/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run |
11 | | // both before and after the DAG is legalized. |
12 | | // |
13 | | // This pass is not a substitute for the LLVM IR instcombine pass. This pass is |
14 | | // primarily intended to handle simplification opportunities that are implicit |
15 | | // in the LLVM IR and exposed by the various codegen lowering phases. |
16 | | // |
17 | | //===----------------------------------------------------------------------===// |
18 | | |
19 | | #include "llvm/ADT/APFloat.h" |
20 | | #include "llvm/ADT/APInt.h" |
21 | | #include "llvm/ADT/ArrayRef.h" |
22 | | #include "llvm/ADT/DenseMap.h" |
23 | | #include "llvm/ADT/None.h" |
24 | | #include "llvm/ADT/Optional.h" |
25 | | #include "llvm/ADT/STLExtras.h" |
26 | | #include "llvm/ADT/SetVector.h" |
27 | | #include "llvm/ADT/SmallBitVector.h" |
28 | | #include "llvm/ADT/SmallPtrSet.h" |
29 | | #include "llvm/ADT/SmallSet.h" |
30 | | #include "llvm/ADT/SmallVector.h" |
31 | | #include "llvm/ADT/Statistic.h" |
32 | | #include "llvm/Analysis/AliasAnalysis.h" |
33 | | #include "llvm/Analysis/MemoryLocation.h" |
34 | | #include "llvm/CodeGen/DAGCombine.h" |
35 | | #include "llvm/CodeGen/ISDOpcodes.h" |
36 | | #include "llvm/CodeGen/MachineFrameInfo.h" |
37 | | #include "llvm/CodeGen/MachineFunction.h" |
38 | | #include "llvm/CodeGen/MachineMemOperand.h" |
39 | | #include "llvm/CodeGen/MachineValueType.h" |
40 | | #include "llvm/CodeGen/RuntimeLibcalls.h" |
41 | | #include "llvm/CodeGen/SelectionDAG.h" |
42 | | #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" |
43 | | #include "llvm/CodeGen/SelectionDAGNodes.h" |
44 | | #include "llvm/CodeGen/SelectionDAGTargetInfo.h" |
45 | | #include "llvm/CodeGen/ValueTypes.h" |
46 | | #include "llvm/IR/Attributes.h" |
47 | | #include "llvm/IR/Constant.h" |
48 | | #include "llvm/IR/DataLayout.h" |
49 | | #include "llvm/IR/DerivedTypes.h" |
50 | | #include "llvm/IR/Function.h" |
51 | | #include "llvm/IR/LLVMContext.h" |
52 | | #include "llvm/IR/Metadata.h" |
53 | | #include "llvm/Support/Casting.h" |
54 | | #include "llvm/Support/CodeGen.h" |
55 | | #include "llvm/Support/CommandLine.h" |
56 | | #include "llvm/Support/Compiler.h" |
57 | | #include "llvm/Support/Debug.h" |
58 | | #include "llvm/Support/ErrorHandling.h" |
59 | | #include "llvm/Support/KnownBits.h" |
60 | | #include "llvm/Support/MathExtras.h" |
61 | | #include "llvm/Support/raw_ostream.h" |
62 | | #include "llvm/Target/TargetLowering.h" |
63 | | #include "llvm/Target/TargetMachine.h" |
64 | | #include "llvm/Target/TargetOptions.h" |
65 | | #include "llvm/Target/TargetRegisterInfo.h" |
66 | | #include "llvm/Target/TargetSubtargetInfo.h" |
67 | | #include <algorithm> |
68 | | #include <cassert> |
69 | | #include <cstdint> |
70 | | #include <functional> |
71 | | #include <iterator> |
72 | | #include <string> |
73 | | #include <tuple> |
74 | | #include <utility> |
75 | | #include <vector> |
76 | | |
77 | | using namespace llvm; |
78 | | |
79 | | #define DEBUG_TYPE "dagcombine" |
80 | | |
81 | | STATISTIC(NodesCombined , "Number of dag nodes combined"); |
82 | | STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); |
83 | | STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); |
84 | | STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); |
85 | | STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); |
86 | | STATISTIC(SlicedLoads, "Number of load sliced"); |
87 | | |
88 | | static cl::opt<bool> |
89 | | CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, |
90 | | cl::desc("Enable DAG combiner's use of IR alias analysis")); |
91 | | |
92 | | static cl::opt<bool> |
93 | | UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), |
94 | | cl::desc("Enable DAG combiner's use of TBAA")); |
95 | | |
96 | | #ifndef NDEBUG |
97 | | static cl::opt<std::string> |
98 | | CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, |
99 | | cl::desc("Only use DAG-combiner alias analysis in this" |
100 | | " function")); |
101 | | #endif |
102 | | |
103 | | /// Hidden option to stress test load slicing, i.e., when this option |
104 | | /// is enabled, load slicing bypasses most of its profitability guards. |
105 | | static cl::opt<bool> |
106 | | StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, |
107 | | cl::desc("Bypass the profitability model of load slicing"), |
108 | | cl::init(false)); |
109 | | |
110 | | static cl::opt<bool> |
111 | | MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), |
112 | | cl::desc("DAG combiner may split indexing from loads")); |
113 | | |
114 | | namespace { |
115 | | |
116 | | class DAGCombiner { |
117 | | SelectionDAG &DAG; |
118 | | const TargetLowering &TLI; |
119 | | CombineLevel Level; |
120 | | CodeGenOpt::Level OptLevel; |
121 | | bool LegalOperations = false; |
122 | | bool LegalTypes = false; |
123 | | bool ForCodeSize; |
124 | | |
125 | | /// \brief Worklist of all of the nodes that need to be simplified. |
126 | | /// |
127 | | /// This must behave as a stack -- new nodes to process are pushed onto the |
128 | | /// back and when processing we pop off of the back. |
129 | | /// |
130 | | /// The worklist will not contain duplicates but may contain null entries |
131 | | /// due to nodes being deleted from the underlying DAG. |
132 | | SmallVector<SDNode *, 64> Worklist; |
133 | | |
134 | | /// \brief Mapping from an SDNode to its position on the worklist. |
135 | | /// |
136 | | /// This is used to find and remove nodes from the worklist (by nulling |
137 | | /// them) when they are deleted from the underlying DAG. It relies on |
138 | | /// stable indices of nodes within the worklist. |
139 | | DenseMap<SDNode *, unsigned> WorklistMap; |
140 | | |
141 | | /// \brief Set of nodes which have been combined (at least once). |
142 | | /// |
143 | | /// This is used to allow us to reliably add any operands of a DAG node |
144 | | /// which have not yet been combined to the worklist. |
145 | | SmallPtrSet<SDNode *, 32> CombinedNodes; |
146 | | |
147 | | // AA - Used for DAG load/store alias analysis. |
148 | | AliasAnalysis *AA; |
149 | | |
150 | | /// When an instruction is simplified, add all users of the instruction to |
151 | | /// the work lists because they might get more simplified now. |
152 | 7.28M | void AddUsersToWorklist(SDNode *N) { |
153 | 7.28M | for (SDNode *Node : N->uses()) |
154 | 11.1M | AddToWorklist(Node); |
155 | 7.28M | } |
156 | | |
157 | | /// Call the node-specific routine that folds each particular type of node. |
158 | | SDValue visit(SDNode *N); |
159 | | |
160 | | public: |
161 | | DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL) |
162 | | : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), |
163 | 7.42M | OptLevel(OL), AA(AA) { |
164 | 7.42M | ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize(); |
165 | 7.42M | |
166 | 7.42M | MaximumLegalStoreInBits = 0; |
167 | 7.42M | for (MVT VT : MVT::all_valuetypes()) |
168 | 824M | if (824M EVT(VT).isSimple() && 824M VT != MVT::Other824M && |
169 | 816M | TLI.isTypeLegal(EVT(VT)) && |
170 | 145M | VT.getSizeInBits() >= MaximumLegalStoreInBits) |
171 | 81.2M | MaximumLegalStoreInBits = VT.getSizeInBits(); |
172 | 7.42M | } |
173 | | |
174 | | /// Add to the worklist making sure its instance is at the back (next to be |
175 | | /// processed.) |
176 | 496M | void AddToWorklist(SDNode *N) { |
177 | 496M | assert(N->getOpcode() != ISD::DELETED_NODE && |
178 | 496M | "Deleted Node added to Worklist"); |
179 | 496M | |
180 | 496M | // Skip handle nodes as they can't usefully be combined and confuse the |
181 | 496M | // zero-use deletion strategy. |
182 | 496M | if (N->getOpcode() == ISD::HANDLENODE) |
183 | 48.5k | return; |
184 | 496M | |
185 | 496M | if (496M WorklistMap.insert(std::make_pair(N, Worklist.size())).second496M ) |
186 | 195M | Worklist.push_back(N); |
187 | 496M | } |
188 | | |
189 | | /// Remove all instances of N from the worklist. |
190 | 23.1M | void removeFromWorklist(SDNode *N) { |
191 | 23.1M | CombinedNodes.erase(N); |
192 | 23.1M | |
193 | 23.1M | auto It = WorklistMap.find(N); |
194 | 23.1M | if (It == WorklistMap.end()) |
195 | 13.5M | return; // Not in the worklist. |
196 | 9.58M | |
197 | 9.58M | // Null out the entry rather than erasing it to avoid a linear operation. |
198 | 9.58M | Worklist[It->second] = nullptr; |
199 | 9.58M | WorklistMap.erase(It); |
200 | 9.58M | } |
201 | | |
202 | | void deleteAndRecombine(SDNode *N); |
203 | | bool recursivelyDeleteUnusedNodes(SDNode *N); |
204 | | |
205 | | /// Replaces all uses of the results of one DAG node with new values. |
206 | | SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, |
207 | | bool AddTo = true); |
208 | | |
209 | | /// Replaces all uses of the results of one DAG node with new values. |
210 | 1.68M | SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { |
211 | 1.68M | return CombineTo(N, &Res, 1, AddTo); |
212 | 1.68M | } |
213 | | |
214 | | /// Replaces all uses of the results of one DAG node with new values. |
215 | | SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, |
216 | 523k | bool AddTo = true) { |
217 | 523k | SDValue To[] = { Res0, Res1 }; |
218 | 523k | return CombineTo(N, To, 2, AddTo); |
219 | 523k | } |
220 | | |
221 | | void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); |
222 | | |
223 | | private: |
224 | | unsigned MaximumLegalStoreInBits; |
225 | | |
226 | | /// Check the specified integer node value to see if it can be simplified or |
227 | | /// if things it uses can be simplified by bit propagation. |
228 | | /// If so, return true. |
229 | 11.8M | bool SimplifyDemandedBits(SDValue Op) { |
230 | 11.8M | unsigned BitWidth = Op.getScalarValueSizeInBits(); |
231 | 11.8M | APInt Demanded = APInt::getAllOnesValue(BitWidth); |
232 | 11.8M | return SimplifyDemandedBits(Op, Demanded); |
233 | 11.8M | } |
234 | | |
235 | | bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); |
236 | | |
237 | | bool CombineToPreIndexedLoadStore(SDNode *N); |
238 | | bool CombineToPostIndexedLoadStore(SDNode *N); |
239 | | SDValue SplitIndexingFromLoad(LoadSDNode *LD); |
240 | | bool SliceUpLoad(SDNode *N); |
241 | | |
242 | | /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed |
243 | | /// load. |
244 | | /// |
245 | | /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. |
246 | | /// \param InVecVT type of the input vector to EVE with bitcasts resolved. |
247 | | /// \param EltNo index of the vector element to load. |
248 | | /// \param OriginalLoad load that EVE came from to be replaced. |
249 | | /// \returns EVE on success SDValue() on failure. |
250 | | SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad( |
251 | | SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad); |
252 | | void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); |
253 | | SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); |
254 | | SDValue SExtPromoteOperand(SDValue Op, EVT PVT); |
255 | | SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); |
256 | | SDValue PromoteIntBinOp(SDValue Op); |
257 | | SDValue PromoteIntShiftOp(SDValue Op); |
258 | | SDValue PromoteExtend(SDValue Op); |
259 | | bool PromoteLoad(SDValue Op); |
260 | | |
261 | | void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc, |
262 | | SDValue ExtLoad, const SDLoc &DL, |
263 | | ISD::NodeType ExtType); |
264 | | |
265 | | /// Call the node-specific routine that knows how to fold each |
266 | | /// particular type of node. If that doesn't do anything, try the |
267 | | /// target-specific DAG combines. |
268 | | SDValue combine(SDNode *N); |
269 | | |
270 | | // Visitation implementation - Implement dag node combining for different |
271 | | // node types. The semantics are as follows: |
272 | | // Return Value: |
273 | | // SDValue.getNode() == 0 - No change was made |
274 | | // SDValue.getNode() == N - N was replaced, is dead and has been handled. |
275 | | // otherwise - N should be replaced by the returned Operand. |
276 | | // |
277 | | SDValue visitTokenFactor(SDNode *N); |
278 | | SDValue visitMERGE_VALUES(SDNode *N); |
279 | | SDValue visitADD(SDNode *N); |
280 | | SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference); |
281 | | SDValue visitSUB(SDNode *N); |
282 | | SDValue visitADDC(SDNode *N); |
283 | | SDValue visitUADDO(SDNode *N); |
284 | | SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N); |
285 | | SDValue visitSUBC(SDNode *N); |
286 | | SDValue visitUSUBO(SDNode *N); |
287 | | SDValue visitADDE(SDNode *N); |
288 | | SDValue visitADDCARRY(SDNode *N); |
289 | | SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N); |
290 | | SDValue visitSUBE(SDNode *N); |
291 | | SDValue visitSUBCARRY(SDNode *N); |
292 | | SDValue visitMUL(SDNode *N); |
293 | | SDValue useDivRem(SDNode *N); |
294 | | SDValue visitSDIV(SDNode *N); |
295 | | SDValue visitUDIV(SDNode *N); |
296 | | SDValue visitREM(SDNode *N); |
297 | | SDValue visitMULHU(SDNode *N); |
298 | | SDValue visitMULHS(SDNode *N); |
299 | | SDValue visitSMUL_LOHI(SDNode *N); |
300 | | SDValue visitUMUL_LOHI(SDNode *N); |
301 | | SDValue visitSMULO(SDNode *N); |
302 | | SDValue visitUMULO(SDNode *N); |
303 | | SDValue visitIMINMAX(SDNode *N); |
304 | | SDValue visitAND(SDNode *N); |
305 | | SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference); |
306 | | SDValue visitOR(SDNode *N); |
307 | | SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference); |
308 | | SDValue visitXOR(SDNode *N); |
309 | | SDValue SimplifyVBinOp(SDNode *N); |
310 | | SDValue visitSHL(SDNode *N); |
311 | | SDValue visitSRA(SDNode *N); |
312 | | SDValue visitSRL(SDNode *N); |
313 | | SDValue visitRotate(SDNode *N); |
314 | | SDValue visitABS(SDNode *N); |
315 | | SDValue visitBSWAP(SDNode *N); |
316 | | SDValue visitBITREVERSE(SDNode *N); |
317 | | SDValue visitCTLZ(SDNode *N); |
318 | | SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); |
319 | | SDValue visitCTTZ(SDNode *N); |
320 | | SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); |
321 | | SDValue visitCTPOP(SDNode *N); |
322 | | SDValue visitSELECT(SDNode *N); |
323 | | SDValue visitVSELECT(SDNode *N); |
324 | | SDValue visitSELECT_CC(SDNode *N); |
325 | | SDValue visitSETCC(SDNode *N); |
326 | | SDValue visitSETCCE(SDNode *N); |
327 | | SDValue visitSETCCCARRY(SDNode *N); |
328 | | SDValue visitSIGN_EXTEND(SDNode *N); |
329 | | SDValue visitZERO_EXTEND(SDNode *N); |
330 | | SDValue visitANY_EXTEND(SDNode *N); |
331 | | SDValue visitAssertZext(SDNode *N); |
332 | | SDValue visitSIGN_EXTEND_INREG(SDNode *N); |
333 | | SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); |
334 | | SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N); |
335 | | SDValue visitTRUNCATE(SDNode *N); |
336 | | SDValue visitBITCAST(SDNode *N); |
337 | | SDValue visitBUILD_PAIR(SDNode *N); |
338 | | SDValue visitFADD(SDNode *N); |
339 | | SDValue visitFSUB(SDNode *N); |
340 | | SDValue visitFMUL(SDNode *N); |
341 | | SDValue visitFMA(SDNode *N); |
342 | | SDValue visitFDIV(SDNode *N); |
343 | | SDValue visitFREM(SDNode *N); |
344 | | SDValue visitFSQRT(SDNode *N); |
345 | | SDValue visitFCOPYSIGN(SDNode *N); |
346 | | SDValue visitSINT_TO_FP(SDNode *N); |
347 | | SDValue visitUINT_TO_FP(SDNode *N); |
348 | | SDValue visitFP_TO_SINT(SDNode *N); |
349 | | SDValue visitFP_TO_UINT(SDNode *N); |
350 | | SDValue visitFP_ROUND(SDNode *N); |
351 | | SDValue visitFP_ROUND_INREG(SDNode *N); |
352 | | SDValue visitFP_EXTEND(SDNode *N); |
353 | | SDValue visitFNEG(SDNode *N); |
354 | | SDValue visitFABS(SDNode *N); |
355 | | SDValue visitFCEIL(SDNode *N); |
356 | | SDValue visitFTRUNC(SDNode *N); |
357 | | SDValue visitFFLOOR(SDNode *N); |
358 | | SDValue visitFMINNUM(SDNode *N); |
359 | | SDValue visitFMAXNUM(SDNode *N); |
360 | | SDValue visitBRCOND(SDNode *N); |
361 | | SDValue visitBR_CC(SDNode *N); |
362 | | SDValue visitLOAD(SDNode *N); |
363 | | |
364 | | SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain); |
365 | | SDValue replaceStoreOfFPConstant(StoreSDNode *ST); |
366 | | |
367 | | SDValue visitSTORE(SDNode *N); |
368 | | SDValue visitINSERT_VECTOR_ELT(SDNode *N); |
369 | | SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); |
370 | | SDValue visitBUILD_VECTOR(SDNode *N); |
371 | | SDValue visitCONCAT_VECTORS(SDNode *N); |
372 | | SDValue visitEXTRACT_SUBVECTOR(SDNode *N); |
373 | | SDValue visitVECTOR_SHUFFLE(SDNode *N); |
374 | | SDValue visitSCALAR_TO_VECTOR(SDNode *N); |
375 | | SDValue visitINSERT_SUBVECTOR(SDNode *N); |
376 | | SDValue visitMLOAD(SDNode *N); |
377 | | SDValue visitMSTORE(SDNode *N); |
378 | | SDValue visitMGATHER(SDNode *N); |
379 | | SDValue visitMSCATTER(SDNode *N); |
380 | | SDValue visitFP_TO_FP16(SDNode *N); |
381 | | SDValue visitFP16_TO_FP(SDNode *N); |
382 | | |
383 | | SDValue visitFADDForFMACombine(SDNode *N); |
384 | | SDValue visitFSUBForFMACombine(SDNode *N); |
385 | | SDValue visitFMULForFMADistributiveCombine(SDNode *N); |
386 | | |
387 | | SDValue XformToShuffleWithZero(SDNode *N); |
388 | | SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS, |
389 | | SDValue RHS); |
390 | | |
391 | | SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); |
392 | | |
393 | | SDValue foldSelectOfConstants(SDNode *N); |
394 | | SDValue foldVSelectOfConstants(SDNode *N); |
395 | | SDValue foldBinOpIntoSelect(SDNode *BO); |
396 | | bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); |
397 | | SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); |
398 | | SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2); |
399 | | SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, |
400 | | SDValue N2, SDValue N3, ISD::CondCode CC, |
401 | | bool NotExtCompare = false); |
402 | | SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1, |
403 | | SDValue N2, SDValue N3, ISD::CondCode CC); |
404 | | SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, |
405 | | const SDLoc &DL); |
406 | | SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, |
407 | | const SDLoc &DL, bool foldBooleans = true); |
408 | | |
409 | | bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, |
410 | | SDValue &CC) const; |
411 | | bool isOneUseSetCC(SDValue N) const; |
412 | | |
413 | | SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, |
414 | | unsigned HiOp); |
415 | | SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); |
416 | | SDValue CombineExtLoad(SDNode *N); |
417 | | SDValue combineRepeatedFPDivisors(SDNode *N); |
418 | | SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); |
419 | | SDValue BuildSDIV(SDNode *N); |
420 | | SDValue BuildSDIVPow2(SDNode *N); |
421 | | SDValue BuildUDIV(SDNode *N); |
422 | | SDValue BuildLogBase2(SDValue Op, const SDLoc &DL); |
423 | | SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags); |
424 | | SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); |
425 | | SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags); |
426 | | SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip); |
427 | | SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations, |
428 | | SDNodeFlags Flags, bool Reciprocal); |
429 | | SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations, |
430 | | SDNodeFlags Flags, bool Reciprocal); |
431 | | SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, |
432 | | bool DemandHighBits = true); |
433 | | SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); |
434 | | SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, |
435 | | SDValue InnerPos, SDValue InnerNeg, |
436 | | unsigned PosOpcode, unsigned NegOpcode, |
437 | | const SDLoc &DL); |
438 | | SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); |
439 | | SDValue MatchLoadCombine(SDNode *N); |
440 | | SDValue ReduceLoadWidth(SDNode *N); |
441 | | SDValue ReduceLoadOpStoreWidth(SDNode *N); |
442 | | SDValue splitMergedValStore(StoreSDNode *ST); |
443 | | SDValue TransformFPLoadStorePair(SDNode *N); |
444 | | SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); |
445 | | SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); |
446 | | SDValue reduceBuildVecToShuffle(SDNode *N); |
447 | | SDValue reduceBuildVecToTrunc(SDNode *N); |
448 | | SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, |
449 | | ArrayRef<int> VectorMask, SDValue VecIn1, |
450 | | SDValue VecIn2, unsigned LeftIdx); |
451 | | SDValue matchVSelectOpSizesWithSetCC(SDNode *N); |
452 | | |
453 | | /// Walk up chain skipping non-aliasing memory nodes, |
454 | | /// looking for aliasing nodes and adding them to the Aliases vector. |
455 | | void GatherAllAliases(SDNode *N, SDValue OriginalChain, |
456 | | SmallVectorImpl<SDValue> &Aliases); |
457 | | |
458 | | /// Return true if there is any possibility that the two addresses overlap. |
459 | | bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const; |
460 | | |
461 | | /// Walk up chain skipping non-aliasing memory nodes, looking for a better |
462 | | /// chain (aliasing node.) |
463 | | SDValue FindBetterChain(SDNode *N, SDValue Chain); |
464 | | |
465 | | /// Try to replace a store and any possibly adjacent stores on |
466 | | /// consecutive chains with better chains. Return true only if St is |
467 | | /// replaced. |
468 | | /// |
469 | | /// Notice that other chains may still be replaced even if the function |
470 | | /// returns false. |
471 | | bool findBetterNeighborChains(StoreSDNode *St); |
472 | | |
473 | | /// Match "(X shl/srl V1) & V2" where V2 may not be present. |
474 | | bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask); |
475 | | |
476 | | /// Holds a pointer to an LSBaseSDNode as well as information on where it |
477 | | /// is located in a sequence of memory operations connected by a chain. |
478 | | struct MemOpLink { |
479 | | // Ptr to the mem node. |
480 | | LSBaseSDNode *MemNode; |
481 | | |
482 | | // Offset from the base ptr. |
483 | | int64_t OffsetFromBase; |
484 | | |
485 | | MemOpLink(LSBaseSDNode *N, int64_t Offset) |
486 | 21.9M | : MemNode(N), OffsetFromBase(Offset) {} |
487 | | }; |
488 | | |
489 | | /// This is a helper function for visitMUL to check the profitability |
490 | | /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). |
491 | | /// MulNode is the original multiply, AddNode is (add x, c1), |
492 | | /// and ConstNode is c2. |
493 | | bool isMulAddWithConstProfitable(SDNode *MulNode, |
494 | | SDValue &AddNode, |
495 | | SDValue &ConstNode); |
496 | | |
497 | | /// This is a helper function for visitAND and visitZERO_EXTEND. Returns |
498 | | /// true if the (and (load x) c) pattern matches an extload. ExtVT returns |
499 | | /// the type of the loaded value to be extended. LoadedVT returns the type |
500 | | /// of the original loaded value. NarrowLoad returns whether the load would |
501 | | /// need to be narrowed in order to match. |
502 | | bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, |
503 | | EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT, |
504 | | bool &NarrowLoad); |
505 | | |
506 | | /// Helper function for MergeConsecutiveStores which merges the |
507 | | /// component store chains. |
508 | | SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, |
509 | | unsigned NumStores); |
510 | | |
511 | | /// This is a helper function for MergeConsecutiveStores. When the |
512 | | /// source elements of the consecutive stores are all constants or |
513 | | /// all extracted vector elements, try to merge them into one |
514 | | /// larger store introducing bitcasts if necessary. \return True |
515 | | /// if a merged store was created. |
516 | | bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, |
517 | | EVT MemVT, unsigned NumStores, |
518 | | bool IsConstantSrc, bool UseVector, |
519 | | bool UseTrunc); |
520 | | |
521 | | /// This is a helper function for MergeConsecutiveStores. Stores |
522 | | /// that potentially may be merged with St are placed in |
523 | | /// StoreNodes. |
524 | | void getStoreMergeCandidates(StoreSDNode *St, |
525 | | SmallVectorImpl<MemOpLink> &StoreNodes); |
526 | | |
527 | | /// Helper function for MergeConsecutiveStores. Checks if |
528 | | /// candidate stores have indirect dependency through their |
529 | | /// operands. \return True if safe to merge. |
530 | | bool checkMergeStoreCandidatesForDependencies( |
531 | | SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores); |
532 | | |
533 | | /// Merge consecutive store operations into a wide store. |
534 | | /// This optimization uses wide integers or vectors when possible. |
535 | | /// \return number of stores that were merged into a merged store (the |
536 | | /// affected nodes are stored as a prefix in \p StoreNodes). |
537 | | bool MergeConsecutiveStores(StoreSDNode *N); |
538 | | |
539 | | /// \brief Try to transform a truncation where C is a constant: |
540 | | /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) |
541 | | /// |
542 | | /// \p N needs to be a truncation and its first operand an AND. Other |
543 | | /// requirements are checked by the function (e.g. that trunc is |
544 | | /// single-use) and if missed an empty SDValue is returned. |
545 | | SDValue distributeTruncateThroughAnd(SDNode *N); |
546 | | |
547 | | public: |
548 | | /// Runs the dag combiner on all nodes in the work list |
549 | | void Run(CombineLevel AtLevel); |
550 | | |
551 | 181M | SelectionDAG &getDAG() const { return DAG; } |
552 | | |
553 | | /// Returns a type large enough to hold any valid shift amount - before type |
554 | | /// legalization these can be huge. |
555 | 40.4k | EVT getShiftAmountTy(EVT LHSTy) { |
556 | 40.4k | assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); |
557 | 40.4k | if (LHSTy.isVector()) |
558 | 358 | return LHSTy; |
559 | 40.0k | auto &DL = DAG.getDataLayout(); |
560 | 7.77k | return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy) |
561 | 32.3k | : TLI.getPointerTy(DL); |
562 | 40.4k | } |
563 | | |
564 | | /// This method returns true if we are running before type legalization or |
565 | | /// if the specified VT is legal. |
566 | 446k | bool isTypeLegal(const EVT &VT) { |
567 | 446k | if (!LegalTypes446k ) return true165k ; |
568 | 280k | return TLI.isTypeLegal(VT); |
569 | 280k | } |
570 | | |
571 | | /// Convenience wrapper around TargetLowering::getSetCCResultType |
572 | 2.58M | EVT getSetCCResultType(EVT VT) const { |
573 | 2.58M | return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); |
574 | 2.58M | } |
575 | | }; |
576 | | |
577 | | /// This class is a DAGUpdateListener that removes any deleted |
578 | | /// nodes from the worklist. |
579 | | class WorklistRemover : public SelectionDAG::DAGUpdateListener { |
580 | | DAGCombiner &DC; |
581 | | |
582 | | public: |
583 | | explicit WorklistRemover(DAGCombiner &dc) |
584 | 181M | : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} |
585 | | |
586 | 92.1k | void NodeDeleted(SDNode *N, SDNode *E) override { |
587 | 92.1k | DC.removeFromWorklist(N); |
588 | 92.1k | } |
589 | | }; |
590 | | |
591 | | } // end anonymous namespace |
592 | | |
593 | | //===----------------------------------------------------------------------===// |
594 | | // TargetLowering::DAGCombinerInfo implementation |
595 | | //===----------------------------------------------------------------------===// |
596 | | |
597 | 18.1k | void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { |
598 | 18.1k | ((DAGCombiner*)DC)->AddToWorklist(N); |
599 | 18.1k | } |
600 | | |
601 | | SDValue TargetLowering::DAGCombinerInfo:: |
602 | 1.67k | CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) { |
603 | 1.67k | return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); |
604 | 1.67k | } |
605 | | |
606 | | SDValue TargetLowering::DAGCombinerInfo:: |
607 | 7.25k | CombineTo(SDNode *N, SDValue Res, bool AddTo) { |
608 | 7.25k | return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); |
609 | 7.25k | } |
610 | | |
611 | | SDValue TargetLowering::DAGCombinerInfo:: |
612 | 4.31k | CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { |
613 | 4.31k | return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); |
614 | 4.31k | } |
615 | | |
616 | | void TargetLowering::DAGCombinerInfo:: |
617 | 327 | CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { |
618 | 327 | return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); |
619 | 327 | } |
620 | | |
621 | | //===----------------------------------------------------------------------===// |
622 | | // Helper Functions |
623 | | //===----------------------------------------------------------------------===// |
624 | | |
625 | 2.94M | void DAGCombiner::deleteAndRecombine(SDNode *N) { |
626 | 2.94M | removeFromWorklist(N); |
627 | 2.94M | |
628 | 2.94M | // If the operands of this node are only used by the node, they will now be |
629 | 2.94M | // dead. Make sure to re-visit them and recursively delete dead nodes. |
630 | 2.94M | for (const SDValue &Op : N->ops()) |
631 | 2.94M | // For an operand generating multiple values, one of the values may |
632 | 2.94M | // become dead allowing further simplification (e.g. split index |
633 | 2.94M | // arithmetic from an indexed load). |
634 | 8.61M | if (8.61M Op->hasOneUse() || 8.61M Op->getNumValues() > 17.67M ) |
635 | 2.29M | AddToWorklist(Op.getNode()); |
636 | 2.94M | |
637 | 2.94M | DAG.DeleteNode(N); |
638 | 2.94M | } |
639 | | |
640 | | /// Return 1 if we can compute the negated form of the specified expression for |
641 | | /// the same cost as the expression itself, or 2 if we can compute the negated |
642 | | /// form more cheaply than the expression itself. |
643 | | static char isNegatibleForFree(SDValue Op, bool LegalOperations, |
644 | | const TargetLowering &TLI, |
645 | | const TargetOptions *Options, |
646 | 774k | unsigned Depth = 0) { |
647 | 774k | // fneg is removable even if it has multiple uses. |
648 | 774k | if (Op.getOpcode() == ISD::FNEG774k ) return 21.35k ; |
649 | 773k | |
650 | 773k | // Don't allow anything with multiple uses. |
651 | 773k | if (773k !Op.hasOneUse()773k ) return 0240k ; |
652 | 532k | |
653 | 532k | // Don't recurse exponentially. |
654 | 532k | if (532k Depth > 6532k ) return 01.73k ; |
655 | 530k | |
656 | 530k | switch (Op.getOpcode()) { |
657 | 309k | default: return false; |
658 | 43.2k | case ISD::ConstantFP: { |
659 | 43.2k | if (!LegalOperations) |
660 | 32.9k | return 1; |
661 | 10.3k | |
662 | 10.3k | // Don't invert constant FP values after legalization unless the target says |
663 | 10.3k | // the negated constant is legal. |
664 | 10.3k | EVT VT = Op.getValueType(); |
665 | 10.3k | return TLI.isOperationLegal(ISD::ConstantFP, VT) || |
666 | 9.79k | TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT); |
667 | 10.3k | } |
668 | 66.8k | case ISD::FADD: |
669 | 66.8k | // FIXME: determine better conditions for this xform. |
670 | 66.8k | if (!Options->UnsafeFPMath66.8k ) return 065.7k ; |
671 | 1.08k | |
672 | 1.08k | // After operation legalization, it might not be legal to create new FSUBs. |
673 | 1.08k | if (1.08k LegalOperations && |
674 | 427 | !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) |
675 | 0 | return 0; |
676 | 1.08k | |
677 | 1.08k | // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) |
678 | 1.08k | if (char 1.08k V1.08k = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, |
679 | 1.08k | Options, Depth + 1)) |
680 | 19 | return V; |
681 | 1.06k | // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) |
682 | 1.06k | return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, |
683 | 1.06k | Depth + 1); |
684 | 8.26k | case ISD::FSUB: |
685 | 8.26k | // We can't turn -(A-B) into B-A when we honor signed zeros. |
686 | 8.26k | if (!Options->NoSignedZerosFPMath && |
687 | 8.23k | !Op.getNode()->getFlags().hasNoSignedZeros()) |
688 | 7.68k | return 0; |
689 | 578 | |
690 | 578 | // fold (fneg (fsub A, B)) -> (fsub B, A) |
691 | 578 | return 1; |
692 | 578 | |
693 | 96.8k | case ISD::FMUL: |
694 | 96.8k | case ISD::FDIV: |
695 | 96.8k | if (Options->HonorSignDependentRoundingFPMath()96.8k ) return 00 ; |
696 | 96.8k | |
697 | 96.8k | // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) |
698 | 96.8k | if (char 96.8k V96.8k = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, |
699 | 96.8k | Options, Depth + 1)) |
700 | 697 | return V; |
701 | 96.1k | |
702 | 96.1k | return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, |
703 | 96.1k | Depth + 1); |
704 | 96.1k | |
705 | 5.48k | case ISD::FP_EXTEND: |
706 | 5.48k | case ISD::FP_ROUND: |
707 | 5.48k | case ISD::FSIN: |
708 | 5.48k | return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, |
709 | 5.48k | Depth + 1); |
710 | 0 | } |
711 | 0 | } |
712 | | |
713 | | /// If isNegatibleForFree returns true, return the newly negated expression. |
714 | | static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, |
715 | 2.59k | bool LegalOperations, unsigned Depth = 0) { |
716 | 2.59k | const TargetOptions &Options = DAG.getTarget().Options; |
717 | 2.59k | // fneg is removable even if it has multiple uses. |
718 | 2.59k | if (Op.getOpcode() == ISD::FNEG2.59k ) return Op.getOperand(0)684 ; |
719 | 1.90k | |
720 | 1.90k | // Don't allow anything with multiple uses. |
721 | 2.59k | assert(Op.hasOneUse() && "Unknown reuse!"); |
722 | 1.90k | |
723 | 1.90k | assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); |
724 | 1.90k | |
725 | 1.90k | const SDNodeFlags Flags = Op.getNode()->getFlags(); |
726 | 1.90k | |
727 | 1.90k | switch (Op.getOpcode()) { |
728 | 0 | default: 0 llvm_unreachable0 ("Unknown code"); |
729 | 732 | case ISD::ConstantFP: { |
730 | 732 | APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); |
731 | 732 | V.changeSign(); |
732 | 732 | return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); |
733 | 1.90k | } |
734 | 18 | case ISD::FADD: |
735 | 18 | // FIXME: determine better conditions for this xform. |
736 | 18 | assert(Options.UnsafeFPMath); |
737 | 18 | |
738 | 18 | // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) |
739 | 18 | if (isNegatibleForFree(Op.getOperand(0), LegalOperations, |
740 | 18 | DAG.getTargetLoweringInfo(), &Options, Depth+1)) |
741 | 18 | return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), |
742 | 18 | GetNegatedExpression(Op.getOperand(0), DAG, |
743 | 18 | LegalOperations, Depth+1), |
744 | 18 | Op.getOperand(1), Flags); |
745 | 0 | // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) |
746 | 0 | return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), |
747 | 0 | GetNegatedExpression(Op.getOperand(1), DAG, |
748 | 0 | LegalOperations, Depth+1), |
749 | 0 | Op.getOperand(0), Flags); |
750 | 19 | case ISD::FSUB: |
751 | 19 | // fold (fneg (fsub 0, B)) -> B |
752 | 19 | if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) |
753 | 5 | if (5 N0CFP->isZero()5 ) |
754 | 5 | return Op.getOperand(1); |
755 | 14 | |
756 | 14 | // fold (fneg (fsub A, B)) -> (fsub B, A) |
757 | 14 | return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), |
758 | 14 | Op.getOperand(1), Op.getOperand(0), Flags); |
759 | 14 | |
760 | 1.10k | case ISD::FMUL: |
761 | 1.10k | case ISD::FDIV: |
762 | 1.10k | assert(!Options.HonorSignDependentRoundingFPMath()); |
763 | 1.10k | |
764 | 1.10k | // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) |
765 | 1.10k | if (isNegatibleForFree(Op.getOperand(0), LegalOperations, |
766 | 1.10k | DAG.getTargetLoweringInfo(), &Options, Depth+1)) |
767 | 289 | return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), |
768 | 289 | GetNegatedExpression(Op.getOperand(0), DAG, |
769 | 289 | LegalOperations, Depth+1), |
770 | 289 | Op.getOperand(1), Flags); |
771 | 820 | |
772 | 820 | // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) |
773 | 820 | return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), |
774 | 820 | Op.getOperand(0), |
775 | 820 | GetNegatedExpression(Op.getOperand(1), DAG, |
776 | 820 | LegalOperations, Depth+1), Flags); |
777 | 820 | |
778 | 18 | case ISD::FP_EXTEND: |
779 | 18 | case ISD::FSIN: |
780 | 18 | return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), |
781 | 18 | GetNegatedExpression(Op.getOperand(0), DAG, |
782 | 18 | LegalOperations, Depth+1)); |
783 | 12 | case ISD::FP_ROUND: |
784 | 12 | return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), |
785 | 12 | GetNegatedExpression(Op.getOperand(0), DAG, |
786 | 12 | LegalOperations, Depth+1), |
787 | 12 | Op.getOperand(1)); |
788 | 0 | } |
789 | 0 | } |
790 | | |
791 | | // APInts must be the same size for most operations, this helper |
792 | | // function zero extends the shorter of the pair so that they match. |
793 | | // We provide an Offset so that we can create bitwidths that won't overflow. |
794 | 11.4k | static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) { |
795 | 11.4k | unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth()); |
796 | 11.4k | LHS = LHS.zextOrSelf(Bits); |
797 | 11.4k | RHS = RHS.zextOrSelf(Bits); |
798 | 11.4k | } |
799 | | |
800 | | // Return true if this node is a setcc, or is a select_cc |
801 | | // that selects between the target values used for true and false, making it |
802 | | // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to |
803 | | // the appropriate nodes based on the type of node we are checking. This |
804 | | // simplifies life a bit for the callers. |
805 | | bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, |
806 | 2.25M | SDValue &CC) const { |
807 | 2.25M | if (N.getOpcode() == ISD::SETCC2.25M ) { |
808 | 1.12M | LHS = N.getOperand(0); |
809 | 1.12M | RHS = N.getOperand(1); |
810 | 1.12M | CC = N.getOperand(2); |
811 | 1.12M | return true; |
812 | 1.12M | } |
813 | 1.13M | |
814 | 1.13M | if (1.13M N.getOpcode() != ISD::SELECT_CC || |
815 | 5.05k | !TLI.isConstTrueVal(N.getOperand(2).getNode()) || |
816 | 294 | !TLI.isConstFalseVal(N.getOperand(3).getNode())) |
817 | 1.13M | return false; |
818 | 288 | |
819 | 288 | if (288 TLI.getBooleanContents(N.getValueType()) == |
820 | 288 | TargetLowering::UndefinedBooleanContent) |
821 | 0 | return false; |
822 | 288 | |
823 | 288 | LHS = N.getOperand(0); |
824 | 288 | RHS = N.getOperand(1); |
825 | 288 | CC = N.getOperand(4); |
826 | 288 | return true; |
827 | 288 | } |
828 | | |
829 | | /// Return true if this is a SetCC-equivalent operation with only one use. |
830 | | /// If this is true, it allows the users to invert the operation for free when |
831 | | /// it is profitable to do so. |
832 | 158 | bool DAGCombiner::isOneUseSetCC(SDValue N) const { |
833 | 158 | SDValue N0, N1, N2; |
834 | 158 | if (isSetCCEquivalent(N, N0, N1, N2) && 158 N.getNode()->hasOneUse()85 ) |
835 | 73 | return true; |
836 | 85 | return false; |
837 | 85 | } |
838 | | |
839 | | // \brief Returns the SDNode if it is a constant float BuildVector |
840 | | // or constant float. |
841 | 3.71M | static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { |
842 | 3.71M | if (isa<ConstantFPSDNode>(N)) |
843 | 138k | return N.getNode(); |
844 | 3.57M | if (3.57M ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())3.57M ) |
845 | 14.2k | return N.getNode(); |
846 | 3.56M | return nullptr; |
847 | 3.56M | } |
848 | | |
849 | | // Determines if it is a constant integer or a build vector of constant |
850 | | // integers (and undefs). |
851 | | // Do not permit build vector implicit truncation. |
852 | 23.8M | static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) { |
853 | 23.8M | if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N)) |
854 | 18.7M | return !(Const->isOpaque() && 18.7M NoOpaques13.8k ); |
855 | 5.15M | if (5.15M N.getOpcode() != ISD::BUILD_VECTOR5.15M ) |
856 | 5.02M | return false; |
857 | 124k | unsigned BitWidth = N.getScalarValueSizeInBits(); |
858 | 417k | for (const SDValue &Op : N->op_values()) { |
859 | 417k | if (Op.isUndef()) |
860 | 882 | continue; |
861 | 416k | ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op); |
862 | 416k | if (!Const || 416k Const->getAPIntValue().getBitWidth() != BitWidth403k || |
863 | 399k | (Const->isOpaque() && 399k NoOpaques0 )) |
864 | 16.6k | return false; |
865 | 107k | } |
866 | 107k | return true; |
867 | 107k | } |
868 | | |
869 | | // Determines if it is a constant null integer or a splatted vector of a |
870 | | // constant null integer (with no undefs). |
871 | | // Build vector implicit truncation is not an issue for null values. |
872 | 1.51M | static bool isNullConstantOrNullSplatConstant(SDValue N) { |
873 | 1.51M | if (ConstantSDNode *Splat = isConstOrConstSplat(N)) |
874 | 159k | return Splat->isNullValue(); |
875 | 1.35M | return false; |
876 | 1.35M | } |
877 | | |
878 | | // Determines if it is a constant integer of one or a splatted vector of a |
879 | | // constant integer of one (with no undefs). |
880 | | // Do not permit build vector implicit truncation. |
881 | 4.32k | static bool isOneConstantOrOneSplatConstant(SDValue N) { |
882 | 4.32k | unsigned BitWidth = N.getScalarValueSizeInBits(); |
883 | 4.32k | if (ConstantSDNode *Splat = isConstOrConstSplat(N)) |
884 | 3.90k | return Splat->isOne() && 3.90k Splat->getAPIntValue().getBitWidth() == BitWidth1.98k ; |
885 | 415 | return false; |
886 | 415 | } |
887 | | |
888 | | // Determines if it is a constant integer of all ones or a splatted vector of a |
889 | | // constant integer of all ones (with no undefs). |
890 | | // Do not permit build vector implicit truncation. |
891 | 426k | static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) { |
892 | 426k | unsigned BitWidth = N.getScalarValueSizeInBits(); |
893 | 426k | if (ConstantSDNode *Splat = isConstOrConstSplat(N)) |
894 | 123k | return Splat->isAllOnesValue() && |
895 | 418 | Splat->getAPIntValue().getBitWidth() == BitWidth; |
896 | 303k | return false; |
897 | 303k | } |
898 | | |
899 | | // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with |
900 | | // undef's. |
901 | 18.6k | static bool isAnyConstantBuildVector(const SDNode *N) { |
902 | 18.6k | return ISD::isBuildVectorOfConstantSDNodes(N) || |
903 | 16.5k | ISD::isBuildVectorOfConstantFPSDNodes(N); |
904 | 18.6k | } |
905 | | |
906 | | // Attempt to match a unary predicate against a scalar/splat constant or |
907 | | // every element of a constant BUILD_VECTOR. |
908 | | static bool matchUnaryPredicate(SDValue Op, |
909 | 1.12M | std::function<bool(ConstantSDNode *)> Match) { |
910 | 1.12M | if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) |
911 | 1.03M | return Match(Cst); |
912 | 85.0k | |
913 | 85.0k | if (85.0k ISD::BUILD_VECTOR != Op.getOpcode()85.0k ) |
914 | 75.1k | return false; |
915 | 9.99k | |
916 | 9.99k | EVT SVT = Op.getValueType().getScalarType(); |
917 | 10.1k | for (unsigned i = 0, e = Op.getNumOperands(); i != e10.1k ; ++i140 ) { |
918 | 10.1k | auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i)); |
919 | 10.1k | if (!Cst || 10.1k Cst->getValueType(0) != SVT9.71k || !Match(Cst)8.63k ) |
920 | 9.96k | return false; |
921 | 10.1k | } |
922 | 28 | return true; |
923 | 1.12M | } |
924 | | |
925 | | // Attempt to match a binary predicate against a pair of scalar/splat constants |
926 | | // or every element of a pair of constant BUILD_VECTORs. |
927 | | static bool matchBinaryPredicate( |
928 | | SDValue LHS, SDValue RHS, |
929 | 16.4k | std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) { |
930 | 16.4k | if (LHS.getValueType() != RHS.getValueType()) |
931 | 232 | return false; |
932 | 16.2k | |
933 | 16.2k | if (auto *16.2k LHSCst16.2k = dyn_cast<ConstantSDNode>(LHS)) |
934 | 14.0k | if (auto *14.0k RHSCst14.0k = dyn_cast<ConstantSDNode>(RHS)) |
935 | 12.5k | return Match(LHSCst, RHSCst); |
936 | 3.69k | |
937 | 3.69k | if (3.69k ISD::BUILD_VECTOR != LHS.getOpcode() || |
938 | 158 | ISD::BUILD_VECTOR != RHS.getOpcode()) |
939 | 3.53k | return false; |
940 | 158 | |
941 | 158 | EVT SVT = LHS.getValueType().getScalarType(); |
942 | 1.25k | for (unsigned i = 0, e = LHS.getNumOperands(); i != e1.25k ; ++i1.09k ) { |
943 | 1.11k | auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i)); |
944 | 1.11k | auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i)); |
945 | 1.11k | if (!LHSCst || 1.11k !RHSCst1.11k ) |
946 | 0 | return false; |
947 | 1.11k | if (1.11k LHSCst->getValueType(0) != SVT || |
948 | 1.11k | LHSCst->getValueType(0) != RHSCst->getValueType(0)) |
949 | 0 | return false; |
950 | 1.11k | if (1.11k !Match(LHSCst, RHSCst)1.11k ) |
951 | 19 | return false; |
952 | 1.11k | } |
953 | 139 | return true; |
954 | 16.4k | } |
955 | | |
956 | | SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, |
957 | 11.9M | SDValue N1) { |
958 | 11.9M | EVT VT = N0.getValueType(); |
959 | 11.9M | if (N0.getOpcode() == Opc11.9M ) { |
960 | 1.21M | if (SDNode *L1.21M = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { |
961 | 590k | if (SDNode *R590k = DAG.isConstantIntBuildVectorOrConstantInt(N1)) { |
962 | 499k | // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) |
963 | 499k | if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R)) |
964 | 499k | return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); |
965 | 70 | return SDValue(); |
966 | 70 | } |
967 | 91.5k | if (91.5k N0.hasOneUse()91.5k ) { |
968 | 63.6k | // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one |
969 | 63.6k | // use |
970 | 63.6k | SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); |
971 | 63.6k | if (!OpNode.getNode()) |
972 | 0 | return SDValue(); |
973 | 63.6k | AddToWorklist(OpNode.getNode()); |
974 | 63.6k | return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); |
975 | 63.6k | } |
976 | 590k | } |
977 | 1.21M | } |
978 | 11.4M | |
979 | 11.4M | if (11.4M N1.getOpcode() == Opc11.4M ) { |
980 | 66.4k | if (SDNode *R66.4k = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { |
981 | 34.1k | if (SDNode *L34.1k = DAG.isConstantIntBuildVectorOrConstantInt(N0)) { |
982 | 0 | // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) |
983 | 0 | if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L)) |
984 | 0 | return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); |
985 | 0 | return SDValue(); |
986 | 0 | } |
987 | 34.1k | if (34.1k N1.hasOneUse()34.1k ) { |
988 | 24.2k | // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one |
989 | 24.2k | // use |
990 | 24.2k | SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0)); |
991 | 24.2k | if (!OpNode.getNode()) |
992 | 0 | return SDValue(); |
993 | 24.2k | AddToWorklist(OpNode.getNode()); |
994 | 24.2k | return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); |
995 | 24.2k | } |
996 | 34.1k | } |
997 | 66.4k | } |
998 | 11.3M | |
999 | 11.3M | return SDValue(); |
1000 | 11.3M | } |
1001 | | |
1002 | | SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, |
1003 | 2.20M | bool AddTo) { |
1004 | 2.20M | assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); |
1005 | 2.20M | ++NodesCombined; |
1006 | 2.20M | DEBUG(dbgs() << "\nReplacing.1 "; |
1007 | 2.20M | N->dump(&DAG); |
1008 | 2.20M | dbgs() << "\nWith: "; |
1009 | 2.20M | To[0].getNode()->dump(&DAG); |
1010 | 2.20M | dbgs() << " and " << NumTo-1 << " other values\n"); |
1011 | 4.94M | for (unsigned i = 0, e = NumTo; i != e4.94M ; ++i2.73M ) |
1012 | 2.20M | assert((!To[i].getNode() || |
1013 | 2.20M | N->getValueType(i) == To[i].getValueType()) && |
1014 | 2.20M | "Cannot combine value to value of different type!"); |
1015 | 2.20M | |
1016 | 2.20M | WorklistRemover DeadNodes(*this); |
1017 | 2.20M | DAG.ReplaceAllUsesWith(N, To); |
1018 | 2.20M | if (AddTo2.20M ) { |
1019 | 1.26M | // Push the new nodes and any users onto the worklist |
1020 | 3.05M | for (unsigned i = 0, e = NumTo; i != e3.05M ; ++i1.79M ) { |
1021 | 1.79M | if (To[i].getNode()1.79M ) { |
1022 | 1.79M | AddToWorklist(To[i].getNode()); |
1023 | 1.79M | AddUsersToWorklist(To[i].getNode()); |
1024 | 1.79M | } |
1025 | 1.79M | } |
1026 | 1.26M | } |
1027 | 2.20M | |
1028 | 2.20M | // Finally, if the node is now dead, remove it from the graph. The node |
1029 | 2.20M | // may not be dead if the replacement process recursively simplified to |
1030 | 2.20M | // something else needing this node. |
1031 | 2.20M | if (N->use_empty()) |
1032 | 2.20M | deleteAndRecombine(N); |
1033 | 2.20M | return SDValue(N, 0); |
1034 | 2.20M | } |
1035 | | |
1036 | | void DAGCombiner:: |
1037 | 458k | CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { |
1038 | 458k | // Replace all uses. If any nodes become isomorphic to other nodes and |
1039 | 458k | // are deleted, make sure to remove them from our worklist. |
1040 | 458k | WorklistRemover DeadNodes(*this); |
1041 | 458k | DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); |
1042 | 458k | |
1043 | 458k | // Push the new node and any (possibly new) users onto the worklist. |
1044 | 458k | AddToWorklist(TLO.New.getNode()); |
1045 | 458k | AddUsersToWorklist(TLO.New.getNode()); |
1046 | 458k | |
1047 | 458k | // Finally, if the node is now dead, remove it from the graph. The node |
1048 | 458k | // may not be dead if the replacement process recursively simplified to |
1049 | 458k | // something else needing this node. |
1050 | 458k | if (TLO.Old.getNode()->use_empty()) |
1051 | 452k | deleteAndRecombine(TLO.Old.getNode()); |
1052 | 458k | } |
1053 | | |
1054 | | /// Check the specified integer node value to see if it can be simplified or if |
1055 | | /// things it uses can be simplified by bit propagation. If so, return true. |
1056 | 12.7M | bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { |
1057 | 12.7M | TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); |
1058 | 12.7M | KnownBits Known; |
1059 | 12.7M | if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO)) |
1060 | 12.3M | return false; |
1061 | 458k | |
1062 | 458k | // Revisit the node. |
1063 | 458k | AddToWorklist(Op.getNode()); |
1064 | 458k | |
1065 | 458k | // Replace the old value with the new one. |
1066 | 458k | ++NodesCombined; |
1067 | 458k | DEBUG(dbgs() << "\nReplacing.2 "; |
1068 | 12.7M | TLO.Old.getNode()->dump(&DAG); |
1069 | 12.7M | dbgs() << "\nWith: "; |
1070 | 12.7M | TLO.New.getNode()->dump(&DAG); |
1071 | 12.7M | dbgs() << '\n'); |
1072 | 12.7M | |
1073 | 12.7M | CommitTargetLoweringOpt(TLO); |
1074 | 12.7M | return true; |
1075 | 12.7M | } |
1076 | | |
1077 | 50 | void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { |
1078 | 50 | SDLoc DL(Load); |
1079 | 50 | EVT VT = Load->getValueType(0); |
1080 | 50 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0)); |
1081 | 50 | |
1082 | 50 | DEBUG(dbgs() << "\nReplacing.9 "; |
1083 | 50 | Load->dump(&DAG); |
1084 | 50 | dbgs() << "\nWith: "; |
1085 | 50 | Trunc.getNode()->dump(&DAG); |
1086 | 50 | dbgs() << '\n'); |
1087 | 50 | WorklistRemover DeadNodes(*this); |
1088 | 50 | DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); |
1089 | 50 | DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); |
1090 | 50 | deleteAndRecombine(Load); |
1091 | 50 | AddToWorklist(Trunc.getNode()); |
1092 | 50 | } |
1093 | | |
1094 | 3.35k | SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { |
1095 | 3.35k | Replace = false; |
1096 | 3.35k | SDLoc DL(Op); |
1097 | 3.35k | if (ISD::isUNINDEXEDLoad(Op.getNode())3.35k ) { |
1098 | 103 | LoadSDNode *LD = cast<LoadSDNode>(Op); |
1099 | 103 | EVT MemVT = LD->getMemoryVT(); |
1100 | 103 | ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) |
1101 | 73 | ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? 73 ISD::ZEXTLOAD73 |
1102 | 73 | : ISD::EXTLOAD) |
1103 | 30 | : LD->getExtensionType(); |
1104 | 103 | Replace = true; |
1105 | 103 | return DAG.getExtLoad(ExtType, DL, PVT, |
1106 | 103 | LD->getChain(), LD->getBasePtr(), |
1107 | 103 | MemVT, LD->getMemOperand()); |
1108 | 103 | } |
1109 | 3.25k | |
1110 | 3.25k | unsigned Opc = Op.getOpcode(); |
1111 | 3.25k | switch (Opc) { |
1112 | 2.39k | default: break; |
1113 | 9 | case ISD::AssertSext: |
1114 | 9 | if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT)) |
1115 | 9 | return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1)); |
1116 | 0 | break; |
1117 | 33 | case ISD::AssertZext: |
1118 | 33 | if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT)) |
1119 | 33 | return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1)); |
1120 | 0 | break; |
1121 | 817 | case ISD::Constant: { |
1122 | 817 | unsigned ExtOpc = |
1123 | 817 | Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND817 : ISD::ZERO_EXTEND0 ; |
1124 | 817 | return DAG.getNode(ExtOpc, DL, PVT, Op); |
1125 | 2.39k | } |
1126 | 2.39k | } |
1127 | 2.39k | |
1128 | 2.39k | if (2.39k !TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)2.39k ) |
1129 | 0 | return SDValue(); |
1130 | 2.39k | return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op); |
1131 | 2.39k | } |
1132 | | |
1133 | 9 | SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { |
1134 | 9 | if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) |
1135 | 0 | return SDValue(); |
1136 | 9 | EVT OldVT = Op.getValueType(); |
1137 | 9 | SDLoc DL(Op); |
1138 | 9 | bool Replace = false; |
1139 | 9 | SDValue NewOp = PromoteOperand(Op, PVT, Replace); |
1140 | 9 | if (!NewOp.getNode()) |
1141 | 0 | return SDValue(); |
1142 | 9 | AddToWorklist(NewOp.getNode()); |
1143 | 9 | |
1144 | 9 | if (Replace) |
1145 | 0 | ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); |
1146 | 9 | return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp, |
1147 | 9 | DAG.getValueType(OldVT)); |
1148 | 9 | } |
1149 | | |
1150 | 770 | SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { |
1151 | 770 | EVT OldVT = Op.getValueType(); |
1152 | 770 | SDLoc DL(Op); |
1153 | 770 | bool Replace = false; |
1154 | 770 | SDValue NewOp = PromoteOperand(Op, PVT, Replace); |
1155 | 770 | if (!NewOp.getNode()) |
1156 | 0 | return SDValue(); |
1157 | 770 | AddToWorklist(NewOp.getNode()); |
1158 | 770 | |
1159 | 770 | if (Replace) |
1160 | 5 | ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); |
1161 | 770 | return DAG.getZeroExtendInReg(NewOp, DL, OldVT); |
1162 | 770 | } |
1163 | | |
1164 | | /// Promote the specified integer binary operation if the target indicates it is |
1165 | | /// beneficial. e.g. On x86, it's usually better to promote i16 operations to |
1166 | | /// i32 since i16 instructions are longer. |
1167 | 10.3M | SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { |
1168 | 10.3M | if (!LegalOperations) |
1169 | 5.85M | return SDValue(); |
1170 | 4.48M | |
1171 | 4.48M | EVT VT = Op.getValueType(); |
1172 | 4.48M | if (VT.isVector() || 4.48M !VT.isInteger()4.33M ) |
1173 | 145k | return SDValue(); |
1174 | 4.33M | |
1175 | 4.33M | // If operation type is 'undesirable', e.g. i16 on x86, consider |
1176 | 4.33M | // promoting it. |
1177 | 4.33M | unsigned Opc = Op.getOpcode(); |
1178 | 4.33M | if (TLI.isTypeDesirableForOp(Opc, VT)) |
1179 | 4.33M | return SDValue(); |
1180 | 1.41k | |
1181 | 1.41k | EVT PVT = VT; |
1182 | 1.41k | // Consult target whether it is a good idea to promote this operation and |
1183 | 1.41k | // what's the right type to promote it to. |
1184 | 1.41k | if (TLI.IsDesirableToPromoteOp(Op, PVT)1.41k ) { |
1185 | 1.24k | assert(PVT != VT && "Don't know what type to promote to!"); |
1186 | 1.24k | |
1187 | 1.24k | DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); |
1188 | 1.24k | |
1189 | 1.24k | bool Replace0 = false; |
1190 | 1.24k | SDValue N0 = Op.getOperand(0); |
1191 | 1.24k | SDValue NN0 = PromoteOperand(N0, PVT, Replace0); |
1192 | 1.24k | |
1193 | 1.24k | bool Replace1 = false; |
1194 | 1.24k | SDValue N1 = Op.getOperand(1); |
1195 | 1.24k | SDValue NN1 = PromoteOperand(N1, PVT, Replace1); |
1196 | 1.24k | SDLoc DL(Op); |
1197 | 1.24k | |
1198 | 1.24k | SDValue RV = |
1199 | 1.24k | DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1)); |
1200 | 1.24k | |
1201 | 1.24k | // We are always replacing N0/N1's use in N and only need |
1202 | 1.24k | // additional replacements if there are additional uses. |
1203 | 1.24k | Replace0 &= !N0->hasOneUse(); |
1204 | 1.23k | Replace1 &= (N0 != N1) && !N1->hasOneUse(); |
1205 | 1.24k | |
1206 | 1.24k | // Combine Op here so it is presreved past replacements. |
1207 | 1.24k | CombineTo(Op.getNode(), RV); |
1208 | 1.24k | |
1209 | 1.24k | // If operands have a use ordering, make sur we deal with |
1210 | 1.24k | // predecessor first. |
1211 | 1.24k | if (Replace0 && 1.24k Replace117 && N0.getNode()->isPredecessorOf(N1.getNode())0 ) { |
1212 | 0 | std::swap(N0, N1); |
1213 | 0 | std::swap(NN0, NN1); |
1214 | 0 | } |
1215 | 1.24k | |
1216 | 1.24k | if (Replace01.24k ) { |
1217 | 17 | AddToWorklist(NN0.getNode()); |
1218 | 17 | ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); |
1219 | 17 | } |
1220 | 1.24k | if (Replace11.24k ) { |
1221 | 11 | AddToWorklist(NN1.getNode()); |
1222 | 11 | ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); |
1223 | 11 | } |
1224 | 1.24k | return Op; |
1225 | 1.24k | } |
1226 | 170 | return SDValue(); |
1227 | 170 | } |
1228 | | |
1229 | | /// Promote the specified integer shift operation if the target indicates it is |
1230 | | /// beneficial. e.g. On x86, it's usually better to promote i16 operations to |
1231 | | /// i32 since i16 instructions are longer. |
1232 | 1.05M | SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { |
1233 | 1.05M | if (!LegalOperations) |
1234 | 572k | return SDValue(); |
1235 | 479k | |
1236 | 479k | EVT VT = Op.getValueType(); |
1237 | 479k | if (VT.isVector() || 479k !VT.isInteger()477k ) |
1238 | 1.63k | return SDValue(); |
1239 | 477k | |
1240 | 477k | // If operation type is 'undesirable', e.g. i16 on x86, consider |
1241 | 477k | // promoting it. |
1242 | 477k | unsigned Opc = Op.getOpcode(); |
1243 | 477k | if (TLI.isTypeDesirableForOp(Opc, VT)) |
1244 | 474k | return SDValue(); |
1245 | 2.87k | |
1246 | 2.87k | EVT PVT = VT; |
1247 | 2.87k | // Consult target whether it is a good idea to promote this operation and |
1248 | 2.87k | // what's the right type to promote it to. |
1249 | 2.87k | if (TLI.IsDesirableToPromoteOp(Op, PVT)2.87k ) { |
1250 | 827 | assert(PVT != VT && "Don't know what type to promote to!"); |
1251 | 827 | |
1252 | 827 | DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); |
1253 | 827 | |
1254 | 827 | bool Replace = false; |
1255 | 827 | SDValue N0 = Op.getOperand(0); |
1256 | 827 | SDValue N1 = Op.getOperand(1); |
1257 | 827 | if (Opc == ISD::SRA) |
1258 | 0 | N0 = SExtPromoteOperand(N0, PVT); |
1259 | 827 | else if (827 Opc == ISD::SRL827 ) |
1260 | 737 | N0 = ZExtPromoteOperand(N0, PVT); |
1261 | 827 | else |
1262 | 90 | N0 = PromoteOperand(N0, PVT, Replace); |
1263 | 827 | |
1264 | 827 | if (!N0.getNode()) |
1265 | 0 | return SDValue(); |
1266 | 827 | |
1267 | 827 | SDLoc DL(Op); |
1268 | 827 | SDValue RV = |
1269 | 827 | DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1)); |
1270 | 827 | |
1271 | 827 | AddToWorklist(N0.getNode()); |
1272 | 827 | if (Replace) |
1273 | 17 | ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); |
1274 | 827 | |
1275 | 827 | // Deal with Op being deleted. |
1276 | 827 | if (Op && 827 Op.getOpcode() != ISD::DELETED_NODE827 ) |
1277 | 826 | return RV; |
1278 | 2.05k | } |
1279 | 2.05k | return SDValue(); |
1280 | 2.05k | } |
1281 | | |
1282 | 787k | SDValue DAGCombiner::PromoteExtend(SDValue Op) { |
1283 | 787k | if (!LegalOperations) |
1284 | 466k | return SDValue(); |
1285 | 320k | |
1286 | 320k | EVT VT = Op.getValueType(); |
1287 | 320k | if (VT.isVector() || 320k !VT.isInteger()270k ) |
1288 | 50.2k | return SDValue(); |
1289 | 270k | |
1290 | 270k | // If operation type is 'undesirable', e.g. i16 on x86, consider |
1291 | 270k | // promoting it. |
1292 | 270k | unsigned Opc = Op.getOpcode(); |
1293 | 270k | if (TLI.isTypeDesirableForOp(Opc, VT)) |
1294 | 270k | return SDValue(); |
1295 | 108 | |
1296 | 108 | EVT PVT = VT; |
1297 | 108 | // Consult target whether it is a good idea to promote this operation and |
1298 | 108 | // what's the right type to promote it to. |
1299 | 108 | if (TLI.IsDesirableToPromoteOp(Op, PVT)108 ) { |
1300 | 104 | assert(PVT != VT && "Don't know what type to promote to!"); |
1301 | 104 | // fold (aext (aext x)) -> (aext x) |
1302 | 104 | // fold (aext (zext x)) -> (zext x) |
1303 | 104 | // fold (aext (sext x)) -> (sext x) |
1304 | 104 | DEBUG(dbgs() << "\nPromoting "; |
1305 | 104 | Op.getNode()->dump(&DAG)); |
1306 | 104 | return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); |
1307 | 104 | } |
1308 | 4 | return SDValue(); |
1309 | 4 | } |
1310 | | |
1311 | 5.67M | bool DAGCombiner::PromoteLoad(SDValue Op) { |
1312 | 5.67M | if (!LegalOperations) |
1313 | 3.40M | return false; |
1314 | 2.26M | |
1315 | 2.26M | if (2.26M !ISD::isUNINDEXEDLoad(Op.getNode())2.26M ) |
1316 | 10.3k | return false; |
1317 | 2.25M | |
1318 | 2.25M | EVT VT = Op.getValueType(); |
1319 | 2.25M | if (VT.isVector() || 2.25M !VT.isInteger()2.06M ) |
1320 | 300k | return false; |
1321 | 1.95M | |
1322 | 1.95M | // If operation type is 'undesirable', e.g. i16 on x86, consider |
1323 | 1.95M | // promoting it. |
1324 | 1.95M | unsigned Opc = Op.getOpcode(); |
1325 | 1.95M | if (TLI.isTypeDesirableForOp(Opc, VT)) |
1326 | 1.95M | return false; |
1327 | 1.42k | |
1328 | 1.42k | EVT PVT = VT; |
1329 | 1.42k | // Consult target whether it is a good idea to promote this operation and |
1330 | 1.42k | // what's the right type to promote it to. |
1331 | 1.42k | if (TLI.IsDesirableToPromoteOp(Op, PVT)1.42k ) { |
1332 | 0 | assert(PVT != VT && "Don't know what type to promote to!"); |
1333 | 0 |
|
1334 | 0 | SDLoc DL(Op); |
1335 | 0 | SDNode *N = Op.getNode(); |
1336 | 0 | LoadSDNode *LD = cast<LoadSDNode>(N); |
1337 | 0 | EVT MemVT = LD->getMemoryVT(); |
1338 | 0 | ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) |
1339 | 0 | ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? 0 ISD::ZEXTLOAD0 |
1340 | 0 | : ISD::EXTLOAD) |
1341 | 0 | : LD->getExtensionType(); |
1342 | 0 | SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT, |
1343 | 0 | LD->getChain(), LD->getBasePtr(), |
1344 | 0 | MemVT, LD->getMemOperand()); |
1345 | 0 | SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD); |
1346 | 0 |
|
1347 | 0 | DEBUG(dbgs() << "\nPromoting "; |
1348 | 0 | N->dump(&DAG); |
1349 | 0 | dbgs() << "\nTo: "; |
1350 | 0 | Result.getNode()->dump(&DAG); |
1351 | 0 | dbgs() << '\n'); |
1352 | 0 | WorklistRemover DeadNodes(*this); |
1353 | 0 | DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); |
1354 | 0 | DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); |
1355 | 0 | deleteAndRecombine(N); |
1356 | 0 | AddToWorklist(Result.getNode()); |
1357 | 0 | return true; |
1358 | 0 | } |
1359 | 1.42k | return false; |
1360 | 1.42k | } |
1361 | | |
1362 | | /// \brief Recursively delete a node which has no uses and any operands for |
1363 | | /// which it is the only use. |
1364 | | /// |
1365 | | /// Note that this both deletes the nodes and removes them from the worklist. |
1366 | | /// It also adds any nodes who have had a user deleted to the worklist as they |
1367 | | /// may now have only one use and subject to other combines. |
1368 | 190M | bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) { |
1369 | 190M | if (!N->use_empty()) |
1370 | 179M | return false; |
1371 | 11.8M | |
1372 | 11.8M | SmallSetVector<SDNode *, 16> Nodes; |
1373 | 11.8M | Nodes.insert(N); |
1374 | 44.9M | do { |
1375 | 44.9M | N = Nodes.pop_back_val(); |
1376 | 44.9M | if (!N) |
1377 | 0 | continue; |
1378 | 44.9M | |
1379 | 44.9M | if (44.9M N->use_empty()44.9M ) { |
1380 | 20.1M | for (const SDValue &ChildN : N->op_values()) |
1381 | 33.6M | Nodes.insert(ChildN.getNode()); |
1382 | 20.1M | |
1383 | 20.1M | removeFromWorklist(N); |
1384 | 20.1M | DAG.DeleteNode(N); |
1385 | 44.9M | } else { |
1386 | 24.7M | AddToWorklist(N); |
1387 | 24.7M | } |
1388 | 44.9M | } while (!Nodes.empty()); |
1389 | 190M | return true; |
1390 | 190M | } |
1391 | | |
1392 | | //===----------------------------------------------------------------------===// |
1393 | | // Main DAG Combiner implementation |
1394 | | //===----------------------------------------------------------------------===// |
1395 | | |
1396 | 7.42M | void DAGCombiner::Run(CombineLevel AtLevel) { |
1397 | 7.42M | // set the instance variables, so that the various visit routines may use it. |
1398 | 7.42M | Level = AtLevel; |
1399 | 7.42M | LegalOperations = Level >= AfterLegalizeVectorOps; |
1400 | 7.42M | LegalTypes = Level >= AfterLegalizeTypes; |
1401 | 7.42M | |
1402 | 7.42M | // Add all the dag nodes to the worklist. |
1403 | 7.42M | for (SDNode &Node : DAG.allnodes()) |
1404 | 170M | AddToWorklist(&Node); |
1405 | 7.42M | |
1406 | 7.42M | // Create a dummy node (which is not added to allnodes), that adds a reference |
1407 | 7.42M | // to the root node, preventing it from being deleted, and tracking any |
1408 | 7.42M | // changes of the root. |
1409 | 7.42M | HandleSDNode Dummy(DAG.getRoot()); |
1410 | 7.42M | |
1411 | 7.42M | // While the worklist isn't empty, find a node and try to combine it. |
1412 | 193M | while (!WorklistMap.empty()193M ) { |
1413 | 185M | SDNode *N; |
1414 | 185M | // The Worklist holds the SDNodes in order, but it may contain null entries. |
1415 | 195M | do { |
1416 | 195M | N = Worklist.pop_back_val(); |
1417 | 195M | } while (!N); |
1418 | 185M | |
1419 | 185M | bool GoodWorklistEntry = WorklistMap.erase(N); |
1420 | 185M | (void)GoodWorklistEntry; |
1421 | 185M | assert(GoodWorklistEntry && |
1422 | 185M | "Found a worklist entry without a corresponding map entry!"); |
1423 | 185M | |
1424 | 185M | // If N has no uses, it is dead. Make sure to revisit all N's operands once |
1425 | 185M | // N is deleted from the DAG, since they too may now be dead or may have a |
1426 | 185M | // reduced number of uses, allowing other xforms. |
1427 | 185M | if (recursivelyDeleteUnusedNodes(N)) |
1428 | 6.86M | continue; |
1429 | 179M | |
1430 | 179M | WorklistRemover DeadNodes(*this); |
1431 | 179M | |
1432 | 179M | // If this combine is running after legalizing the DAG, re-legalize any |
1433 | 179M | // nodes pulled off the worklist. |
1434 | 179M | if (Level == AfterLegalizeDAG179M ) { |
1435 | 72.8M | SmallSetVector<SDNode *, 16> UpdatedNodes; |
1436 | 72.8M | bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); |
1437 | 72.8M | |
1438 | 16.6k | for (SDNode *LN : UpdatedNodes) { |
1439 | 16.6k | AddToWorklist(LN); |
1440 | 16.6k | AddUsersToWorklist(LN); |
1441 | 16.6k | } |
1442 | 72.8M | if (!NIsValid) |
1443 | 7.46k | continue; |
1444 | 179M | } |
1445 | 179M | |
1446 | 179M | DEBUG179M (dbgs() << "\nCombining: "; N->dump(&DAG)); |
1447 | 179M | |
1448 | 179M | // Add any operands of the new node which have not yet been combined to the |
1449 | 179M | // worklist as well. Because the worklist uniques things already, this |
1450 | 179M | // won't repeatedly process the same operand. |
1451 | 179M | CombinedNodes.insert(N); |
1452 | 179M | for (const SDValue &ChildN : N->op_values()) |
1453 | 303M | if (303M !CombinedNodes.count(ChildN.getNode())303M ) |
1454 | 276M | AddToWorklist(ChildN.getNode()); |
1455 | 179M | |
1456 | 179M | SDValue RV = combine(N); |
1457 | 179M | |
1458 | 179M | if (!RV.getNode()) |
1459 | 172M | continue; |
1460 | 6.87M | |
1461 | 6.87M | ++NodesCombined; |
1462 | 6.87M | |
1463 | 6.87M | // If we get back the same node we passed in, rather than a new node or |
1464 | 6.87M | // zero, we know that the node must have defined multiple values and |
1465 | 6.87M | // CombineTo was used. Since CombineTo takes care of the worklist |
1466 | 6.87M | // mechanics for us, we have no work to do in this case. |
1467 | 6.87M | if (RV.getNode() == N) |
1468 | 1.90M | continue; |
1469 | 4.96M | |
1470 | 6.87M | assert(N->getOpcode() != ISD::DELETED_NODE && |
1471 | 4.96M | RV.getOpcode() != ISD::DELETED_NODE && |
1472 | 4.96M | "Node was deleted but visit returned new node!"); |
1473 | 4.96M | |
1474 | 4.96M | DEBUG(dbgs() << " ... into: "; |
1475 | 4.96M | RV.getNode()->dump(&DAG)); |
1476 | 4.96M | |
1477 | 4.96M | if (N->getNumValues() == RV.getNode()->getNumValues()) |
1478 | 4.89M | DAG.ReplaceAllUsesWith(N, RV.getNode()); |
1479 | 64.2k | else { |
1480 | 64.2k | assert(N->getValueType(0) == RV.getValueType() && |
1481 | 64.2k | N->getNumValues() == 1 && "Type mismatch"); |
1482 | 64.2k | DAG.ReplaceAllUsesWith(N, &RV); |
1483 | 64.2k | } |
1484 | 185M | |
1485 | 185M | // Push the new node and any users onto the worklist |
1486 | 185M | AddToWorklist(RV.getNode()); |
1487 | 185M | AddUsersToWorklist(RV.getNode()); |
1488 | 185M | |
1489 | 185M | // Finally, if the node is now dead, remove it from the graph. The node |
1490 | 185M | // may not be dead if the replacement process recursively simplified to |
1491 | 185M | // something else needing this node. This will also take care of adding any |
1492 | 185M | // operands which have lost a user to the worklist. |
1493 | 185M | recursivelyDeleteUnusedNodes(N); |
1494 | 185M | } |
1495 | 7.42M | |
1496 | 7.42M | // If the root changed (e.g. it was a dead load, update the root). |
1497 | 7.42M | DAG.setRoot(Dummy.getValue()); |
1498 | 7.42M | DAG.RemoveDeadNodes(); |
1499 | 7.42M | } |
1500 | | |
1501 | 179M | SDValue DAGCombiner::visit(SDNode *N) { |
1502 | 179M | switch (N->getOpcode()) { |
1503 | 133M | default: break; |
1504 | 5.93M | case ISD::TokenFactor: return visitTokenFactor(N); |
1505 | 54.0k | case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); |
1506 | 9.28M | case ISD::ADD: return visitADD(N); |
1507 | 419k | case ISD::SUB: return visitSUB(N); |
1508 | 302 | case ISD::ADDC: return visitADDC(N); |
1509 | 4.95k | case ISD::UADDO: return visitUADDO(N); |
1510 | 1.18k | case ISD::SUBC: return visitSUBC(N); |
1511 | 2.20k | case ISD::USUBO: return visitUSUBO(N); |
1512 | 333 | case ISD::ADDE: return visitADDE(N); |
1513 | 15.9k | case ISD::ADDCARRY: return visitADDCARRY(N); |
1514 | 145 | case ISD::SUBE: return visitSUBE(N); |
1515 | 972 | case ISD::SUBCARRY: return visitSUBCARRY(N); |
1516 | 478k | case ISD::MUL: return visitMUL(N); |
1517 | 18.9k | case ISD::SDIV: return visitSDIV(N); |
1518 | 22.7k | case ISD::UDIV: return visitUDIV(N); |
1519 | 8.48k | case ISD::SREM: |
1520 | 8.48k | case ISD::UREM: return visitREM(N); |
1521 | 15.8k | case ISD::MULHU: return visitMULHU(N); |
1522 | 1.87k | case ISD::MULHS: return visitMULHS(N); |
1523 | 647 | case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); |
1524 | 2.96k | case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); |
1525 | 82 | case ISD::SMULO: return visitSMULO(N); |
1526 | 2.49k | case ISD::UMULO: return visitUMULO(N); |
1527 | 12.6k | case ISD::SMIN: |
1528 | 12.6k | case ISD::SMAX: |
1529 | 12.6k | case ISD::UMIN: |
1530 | 12.6k | case ISD::UMAX: return visitIMINMAX(N); |
1531 | 1.11M | case ISD::AND: return visitAND(N); |
1532 | 255k | case ISD::OR: return visitOR(N); |
1533 | 874k | case ISD::XOR: return visitXOR(N); |
1534 | 740k | case ISD::SHL: return visitSHL(N); |
1535 | 86.4k | case ISD::SRA: return visitSRA(N); |
1536 | 303k | case ISD::SRL: return visitSRL(N); |
1537 | 5.68k | case ISD::ROTR: |
1538 | 5.68k | case ISD::ROTL: return visitRotate(N); |
1539 | 1.11k | case ISD::ABS: return visitABS(N); |
1540 | 2.66k | case ISD::BSWAP: return visitBSWAP(N); |
1541 | 634 | case ISD::BITREVERSE: return visitBITREVERSE(N); |
1542 | 5.57k | case ISD::CTLZ: return visitCTLZ(N); |
1543 | 4.14k | case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); |
1544 | 728 | case ISD::CTTZ: return visitCTTZ(N); |
1545 | 1.00k | case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); |
1546 | 1.18k | case ISD::CTPOP: return visitCTPOP(N); |
1547 | 177k | case ISD::SELECT: return visitSELECT(N); |
1548 | 30.3k | case ISD::VSELECT: return visitVSELECT(N); |
1549 | 195k | case ISD::SELECT_CC: return visitSELECT_CC(N); |
1550 | 759k | case ISD::SETCC: return visitSETCC(N); |
1551 | 1.05k | case ISD::SETCCE: return visitSETCCE(N); |
1552 | 648 | case ISD::SETCCCARRY: return visitSETCCCARRY(N); |
1553 | 518k | case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); |
1554 | 388k | case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); |
1555 | 168k | case ISD::ANY_EXTEND: return visitANY_EXTEND(N); |
1556 | 510k | case ISD::AssertZext: return visitAssertZext(N); |
1557 | 99.7k | case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); |
1558 | 3.09k | case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); |
1559 | 3.91k | case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N); |
1560 | 849k | case ISD::TRUNCATE: return visitTRUNCATE(N); |
1561 | 394k | case ISD::BITCAST: return visitBITCAST(N); |
1562 | 25.6k | case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); |
1563 | 142k | case ISD::FADD: return visitFADD(N); |
1564 | 29.4k | case ISD::FSUB: return visitFSUB(N); |
1565 | 123k | case ISD::FMUL: return visitFMUL(N); |
1566 | 6.34k | case ISD::FMA: return visitFMA(N); |
1567 | 121k | case ISD::FDIV: return visitFDIV(N); |
1568 | 264 | case ISD::FREM: return visitFREM(N); |
1569 | 2.78k | case ISD::FSQRT: return visitFSQRT(N); |
1570 | 3.42k | case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); |
1571 | 127k | case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); |
1572 | 92.9k | case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); |
1573 | 12.2k | case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); |
1574 | 17.0k | case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); |
1575 | 23.5k | case ISD::FP_ROUND: return visitFP_ROUND(N); |
1576 | 0 | case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); |
1577 | 68.2k | case ISD::FP_EXTEND: return visitFP_EXTEND(N); |
1578 | 12.7k | case ISD::FNEG: return visitFNEG(N); |
1579 | 8.32k | case ISD::FABS: return visitFABS(N); |
1580 | 1.53k | case ISD::FFLOOR: return visitFFLOOR(N); |
1581 | 2.84k | case ISD::FMINNUM: return visitFMINNUM(N); |
1582 | 2.59k | case ISD::FMAXNUM: return visitFMAXNUM(N); |
1583 | 970 | case ISD::FCEIL: return visitFCEIL(N); |
1584 | 1.13k | case ISD::FTRUNC: return visitFTRUNC(N); |
1585 | 2.86M | case ISD::BRCOND: return visitBRCOND(N); |
1586 | 2.09M | case ISD::BR_CC: return visitBR_CC(N); |
1587 | 6.04M | case ISD::LOAD: return visitLOAD(N); |
1588 | 8.90M | case ISD::STORE: return visitSTORE(N); |
1589 | 60.5k | case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); |
1590 | 349k | case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); |
1591 | 366k | case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); |
1592 | 31.3k | case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); |
1593 | 248k | case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); |
1594 | 53.1k | case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); |
1595 | 35.7k | case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); |
1596 | 38.7k | case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); |
1597 | 642 | case ISD::MGATHER: return visitMGATHER(N); |
1598 | 725 | case ISD::MLOAD: return visitMLOAD(N); |
1599 | 282 | case ISD::MSCATTER: return visitMSCATTER(N); |
1600 | 369 | case ISD::MSTORE: return visitMSTORE(N); |
1601 | 3.38k | case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); |
1602 | 4.21k | case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); |
1603 | 133M | } |
1604 | 133M | return SDValue(); |
1605 | 133M | } |
1606 | | |
1607 | 179M | SDValue DAGCombiner::combine(SDNode *N) { |
1608 | 179M | SDValue RV = visit(N); |
1609 | 179M | |
1610 | 179M | // If nothing happened, try a target-specific DAG combine. |
1611 | 179M | if (!RV.getNode()179M ) { |
1612 | 172M | assert(N->getOpcode() != ISD::DELETED_NODE && |
1613 | 172M | "Node was deleted but visit returned NULL!"); |
1614 | 172M | |
1615 | 172M | if (N->getOpcode() >= ISD::BUILTIN_OP_END || |
1616 | 172M | TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())161M ) { |
1617 | 32.3M | |
1618 | 32.3M | // Expose the DAG combiner to the target combiner impls. |
1619 | 32.3M | TargetLowering::DAGCombinerInfo |
1620 | 32.3M | DagCombineInfo(DAG, Level, false, this); |
1621 | 32.3M | |
1622 | 32.3M | RV = TLI.PerformDAGCombine(N, DagCombineInfo); |
1623 | 32.3M | } |
1624 | 172M | } |
1625 | 179M | |
1626 | 179M | // If nothing happened still, try promoting the operation. |
1627 | 179M | if (!RV.getNode()179M ) { |
1628 | 172M | switch (N->getOpcode()) { |
1629 | 154M | default: break; |
1630 | 10.3M | case ISD::ADD: |
1631 | 10.3M | case ISD::SUB: |
1632 | 10.3M | case ISD::MUL: |
1633 | 10.3M | case ISD::AND: |
1634 | 10.3M | case ISD::OR: |
1635 | 10.3M | case ISD::XOR: |
1636 | 10.3M | RV = PromoteIntBinOp(SDValue(N, 0)); |
1637 | 10.3M | break; |
1638 | 1.05M | case ISD::SHL: |
1639 | 1.05M | case ISD::SRA: |
1640 | 1.05M | case ISD::SRL: |
1641 | 1.05M | RV = PromoteIntShiftOp(SDValue(N, 0)); |
1642 | 1.05M | break; |
1643 | 787k | case ISD::SIGN_EXTEND: |
1644 | 787k | case ISD::ZERO_EXTEND: |
1645 | 787k | case ISD::ANY_EXTEND: |
1646 | 787k | RV = PromoteExtend(SDValue(N, 0)); |
1647 | 787k | break; |
1648 | 5.67M | case ISD::LOAD: |
1649 | 5.67M | if (PromoteLoad(SDValue(N, 0))) |
1650 | 0 | RV = SDValue(N, 0); |
1651 | 10.3M | break; |
1652 | 179M | } |
1653 | 179M | } |
1654 | 179M | |
1655 | 179M | // If N is a commutative binary node, try eliminate it if the commuted |
1656 | 179M | // version is already present in the DAG. |
1657 | 179M | if (179M !RV.getNode() && 179M TLI.isCommutativeBinOp(N->getOpcode())172M && |
1658 | 179M | N->getNumValues() == 110.2M ) { |
1659 | 10.2M | SDValue N0 = N->getOperand(0); |
1660 | 10.2M | SDValue N1 = N->getOperand(1); |
1661 | 10.2M | |
1662 | 10.2M | // Constant operands are canonicalized to RHS. |
1663 | 10.2M | if (N0 != N1 && 10.2M (isa<ConstantSDNode>(N0) || 10.1M !isa<ConstantSDNode>(N1)10.1M )) { |
1664 | 2.37M | SDValue Ops[] = {N1, N0}; |
1665 | 2.37M | SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, |
1666 | 2.37M | N->getFlags()); |
1667 | 2.37M | if (CSENode) |
1668 | 378 | return SDValue(CSENode, 0); |
1669 | 179M | } |
1670 | 10.2M | } |
1671 | 179M | |
1672 | 179M | return RV; |
1673 | 179M | } |
1674 | | |
1675 | | /// Given a node, return its input chain if it has one, otherwise return a null |
1676 | | /// sd operand. |
1677 | 7.43M | static SDValue getInputChainForNode(SDNode *N) { |
1678 | 7.43M | if (unsigned NumOps7.43M = N->getNumOperands()) { |
1679 | 7.43M | if (N->getOperand(0).getValueType() == MVT::Other) |
1680 | 7.41M | return N->getOperand(0); |
1681 | 24.2k | if (24.2k N->getOperand(NumOps-1).getValueType() == MVT::Other24.2k ) |
1682 | 24.2k | return N->getOperand(NumOps-1); |
1683 | 0 | for (unsigned i = 1; 0 i < NumOps-10 ; ++i0 ) |
1684 | 0 | if (0 N->getOperand(i).getValueType() == MVT::Other0 ) |
1685 | 0 | return N->getOperand(i); |
1686 | 7.43M | } |
1687 | 1.16k | return SDValue(); |
1688 | 7.43M | } |
1689 | | |
1690 | 5.93M | SDValue DAGCombiner::visitTokenFactor(SDNode *N) { |
1691 | 5.93M | // If N has two operands, where one has an input chain equal to the other, |
1692 | 5.93M | // the 'other' chain is redundant. |
1693 | 5.93M | if (N->getNumOperands() == 25.93M ) { |
1694 | 3.74M | if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) |
1695 | 55.7k | return N->getOperand(0); |
1696 | 3.69M | if (3.69M getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)3.69M ) |
1697 | 39.2k | return N->getOperand(1); |
1698 | 5.83M | } |
1699 | 5.83M | |
1700 | 5.83M | SmallVector<SDNode *, 8> TFs; // List of token factors to visit. |
1701 | 5.83M | SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. |
1702 | 5.83M | SmallPtrSet<SDNode*, 16> SeenOps; |
1703 | 5.83M | bool Changed = false; // If we should replace this token factor. |
1704 | 5.83M | |
1705 | 5.83M | // Start out with this token factor. |
1706 | 5.83M | TFs.push_back(N); |
1707 | 5.83M | |
1708 | 5.83M | // Iterate through token factors. The TFs grows when new token factors are |
1709 | 5.83M | // encountered. |
1710 | 12.5M | for (unsigned i = 0; i < TFs.size()12.5M ; ++i6.69M ) { |
1711 | 6.69M | SDNode *TF = TFs[i]; |
1712 | 6.69M | |
1713 | 6.69M | // Check each of the operands. |
1714 | 24.6M | for (const SDValue &Op : TF->op_values()) { |
1715 | 24.6M | switch (Op.getOpcode()) { |
1716 | 6.10k | case ISD::EntryToken: |
1717 | 6.10k | // Entry tokens don't need to be added to the list. They are |
1718 | 6.10k | // redundant. |
1719 | 6.10k | Changed = true; |
1720 | 6.10k | break; |
1721 | 24.6M | |
1722 | 1.16M | case ISD::TokenFactor: |
1723 | 1.16M | if (Op.hasOneUse() && 1.16M !is_contained(TFs, Op.getNode())852k ) { |
1724 | 852k | // Queue up for processing. |
1725 | 852k | TFs.push_back(Op.getNode()); |
1726 | 852k | // Clean up in case the token factor is removed. |
1727 | 852k | AddToWorklist(Op.getNode()); |
1728 | 852k | Changed = true; |
1729 | 852k | break; |
1730 | 852k | } |
1731 | 315k | LLVM_FALLTHROUGH315k ; |
1732 | 315k | |
1733 | 23.7M | default: |
1734 | 23.7M | // Only add if it isn't already in the list. |
1735 | 23.7M | if (SeenOps.insert(Op.getNode()).second) |
1736 | 23.6M | Ops.push_back(Op); |
1737 | 23.7M | else |
1738 | 104k | Changed = true; |
1739 | 1.16M | break; |
1740 | 24.6M | } |
1741 | 24.6M | } |
1742 | 6.69M | } |
1743 | 5.83M | |
1744 | 5.83M | // Remove Nodes that are chained to another node in the list. Do so |
1745 | 5.83M | // by walking up chains breath-first stopping when we've seen |
1746 | 5.83M | // another operand. In general we must climb to the EntryNode, but we can exit |
1747 | 5.83M | // early if we find all remaining work is associated with just one operand as |
1748 | 5.83M | // no further pruning is possible. |
1749 | 5.83M | |
1750 | 5.83M | // List of nodes to search through and original Ops from which they originate. |
1751 | 5.83M | SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist; |
1752 | 5.83M | SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op. |
1753 | 5.83M | SmallPtrSet<SDNode *, 16> SeenChains; |
1754 | 5.83M | bool DidPruneOps = false; |
1755 | 5.83M | |
1756 | 5.83M | unsigned NumLeftToConsider = 0; |
1757 | 23.6M | for (const SDValue &Op : Ops) { |
1758 | 23.6M | Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++)); |
1759 | 23.6M | OpWorkCount.push_back(1); |
1760 | 23.6M | } |
1761 | 5.83M | |
1762 | 32.7M | auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) { |
1763 | 32.7M | // If this is an Op, we can remove the op from the list. Remark any |
1764 | 32.7M | // search associated with it as from the current OpNumber. |
1765 | 32.7M | if (SeenOps.count(Op) != 032.7M ) { |
1766 | 513k | Changed = true; |
1767 | 513k | DidPruneOps = true; |
1768 | 513k | unsigned OrigOpNumber = 0; |
1769 | 28.1M | while (OrigOpNumber < Ops.size() && 28.1M Ops[OrigOpNumber].getNode() != Op28.1M ) |
1770 | 27.6M | OrigOpNumber++; |
1771 | 513k | assert((OrigOpNumber != Ops.size()) && |
1772 | 513k | "expected to find TokenFactor Operand"); |
1773 | 513k | // Re-mark worklist from OrigOpNumber to OpNumber |
1774 | 22.6M | for (unsigned i = CurIdx + 1; i < Worklist.size()22.6M ; ++i22.1M ) { |
1775 | 22.1M | if (Worklist[i].second == OrigOpNumber22.1M ) { |
1776 | 206k | Worklist[i].second = OpNumber; |
1777 | 206k | } |
1778 | 22.1M | } |
1779 | 513k | OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber]; |
1780 | 513k | OpWorkCount[OrigOpNumber] = 0; |
1781 | 513k | NumLeftToConsider--; |
1782 | 513k | } |
1783 | 32.7M | // Add if it's a new chain |
1784 | 32.7M | if (SeenChains.insert(Op).second32.7M ) { |
1785 | 14.0M | OpWorkCount[OpNumber]++; |
1786 | 14.0M | Worklist.push_back(std::make_pair(Op, OpNumber)); |
1787 | 14.0M | } |
1788 | 32.7M | }; |
1789 | 5.83M | |
1790 | 38.6M | for (unsigned i = 0; i < Worklist.size() && 38.6M i < 102436.7M ; ++i32.7M ) { |
1791 | 36.7M | // We need at least be consider at least 2 Ops to prune. |
1792 | 36.7M | if (NumLeftToConsider <= 1) |
1793 | 4.00M | break; |
1794 | 32.7M | auto CurNode = Worklist[i].first; |
1795 | 32.7M | auto CurOpNumber = Worklist[i].second; |
1796 | 32.7M | assert((OpWorkCount[CurOpNumber] > 0) && |
1797 | 32.7M | "Node should not appear in worklist"); |
1798 | 32.7M | switch (CurNode->getOpcode()) { |
1799 | 1.84M | case ISD::EntryToken: |
1800 | 1.84M | // Hitting EntryToken is the only way for the search to terminate without |
1801 | 1.84M | // hitting |
1802 | 1.84M | // another operand's search. Prevent us from marking this operand |
1803 | 1.84M | // considered. |
1804 | 1.84M | NumLeftToConsider++; |
1805 | 1.84M | break; |
1806 | 1.26M | case ISD::TokenFactor: |
1807 | 1.26M | for (const SDValue &Op : CurNode->op_values()) |
1808 | 4.95M | AddToWorklist(i, Op.getNode(), CurOpNumber); |
1809 | 1.26M | break; |
1810 | 7.37M | case ISD::CopyFromReg: |
1811 | 7.37M | case ISD::CopyToReg: |
1812 | 7.37M | AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber); |
1813 | 7.37M | break; |
1814 | 22.2M | default: |
1815 | 22.2M | if (auto *MemNode = dyn_cast<MemSDNode>(CurNode)) |
1816 | 20.4M | AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber); |
1817 | 7.37M | break; |
1818 | 32.7M | } |
1819 | 32.7M | OpWorkCount[CurOpNumber]--; |
1820 | 32.7M | if (OpWorkCount[CurOpNumber] == 0) |
1821 | 19.3M | NumLeftToConsider--; |
1822 | 36.7M | } |
1823 | 5.83M | |
1824 | 5.83M | // If we've changed things around then replace token factor. |
1825 | 5.83M | if (5.83M Changed5.83M ) { |
1826 | 822k | SDValue Result; |
1827 | 822k | if (Ops.empty()822k ) { |
1828 | 315 | // The entry token is the only possible outcome. |
1829 | 315 | Result = DAG.getEntryNode(); |
1830 | 822k | } else { |
1831 | 822k | if (DidPruneOps822k ) { |
1832 | 173k | SmallVector<SDValue, 8> PrunedOps; |
1833 | 173k | // |
1834 | 1.12M | for (const SDValue &Op : Ops) { |
1835 | 1.12M | if (SeenChains.count(Op.getNode()) == 0) |
1836 | 867k | PrunedOps.push_back(Op); |
1837 | 1.12M | } |
1838 | 173k | Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps); |
1839 | 822k | } else { |
1840 | 649k | Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); |
1841 | 649k | } |
1842 | 822k | } |
1843 | 822k | return Result; |
1844 | 822k | } |
1845 | 5.01M | return SDValue(); |
1846 | 5.01M | } |
1847 | | |
1848 | | /// MERGE_VALUES can always be eliminated. |
1849 | 54.0k | SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { |
1850 | 54.0k | WorklistRemover DeadNodes(*this); |
1851 | 54.0k | // Replacing results may cause a different MERGE_VALUES to suddenly |
1852 | 54.0k | // be CSE'd with N, and carry its uses with it. Iterate until no |
1853 | 54.0k | // uses remain, to ensure that the node can be safely deleted. |
1854 | 54.0k | // First add the users of this node to the work list so that they |
1855 | 54.0k | // can be tried again once they have new operands. |
1856 | 54.0k | AddUsersToWorklist(N); |
1857 | 54.0k | do { |
1858 | 166k | for (unsigned i = 0, e = N->getNumOperands(); i != e166k ; ++i112k ) |
1859 | 112k | DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); |
1860 | 54.0k | } while (!N->use_empty()); |
1861 | 54.0k | deleteAndRecombine(N); |
1862 | 54.0k | return SDValue(N, 0); // Return N so it doesn't get rechecked! |
1863 | 54.0k | } |
1864 | | |
1865 | | /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a |
1866 | | /// ConstantSDNode pointer else nullptr. |
1867 | 4.74M | static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { |
1868 | 4.74M | ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N); |
1869 | 4.74M | return Const != nullptr && !Const->isOpaque()874k ? Const874k : nullptr3.87M ; |
1870 | 4.74M | } |
1871 | | |
1872 | 14.0M | SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { |
1873 | 14.0M | auto BinOpcode = BO->getOpcode(); |
1874 | 14.0M | assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB || |
1875 | 14.0M | BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV || |
1876 | 14.0M | BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM || |
1877 | 14.0M | BinOpcode == ISD::UREM || BinOpcode == ISD::AND || |
1878 | 14.0M | BinOpcode == ISD::OR || BinOpcode == ISD::XOR || |
1879 | 14.0M | BinOpcode == ISD::SHL || BinOpcode == ISD::SRL || |
1880 | 14.0M | BinOpcode == ISD::SRA || BinOpcode == ISD::FADD || |
1881 | 14.0M | BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL || |
1882 | 14.0M | BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) && |
1883 | 14.0M | "Unexpected binary operator"); |
1884 | 14.0M | |
1885 | 14.0M | // Bail out if any constants are opaque because we can't constant fold those. |
1886 | 14.0M | SDValue C1 = BO->getOperand(1); |
1887 | 14.0M | if (!isConstantOrConstantVector(C1, true) && |
1888 | 3.16M | !isConstantFPBuildVectorOrConstantFP(C1)) |
1889 | 3.06M | return SDValue(); |
1890 | 10.9M | |
1891 | 10.9M | // Don't do this unless the old select is going away. We want to eliminate the |
1892 | 10.9M | // binary operator, not replace a binop with a select. |
1893 | 10.9M | // TODO: Handle ISD::SELECT_CC. |
1894 | 10.9M | SDValue Sel = BO->getOperand(0); |
1895 | 10.9M | if (Sel.getOpcode() != ISD::SELECT || 10.9M !Sel.hasOneUse()19.0k ) |
1896 | 10.9M | return SDValue(); |
1897 | 5.75k | |
1898 | 5.75k | SDValue CT = Sel.getOperand(1); |
1899 | 5.75k | if (!isConstantOrConstantVector(CT, true) && |
1900 | 5.43k | !isConstantFPBuildVectorOrConstantFP(CT)) |
1901 | 5.42k | return SDValue(); |
1902 | 332 | |
1903 | 332 | SDValue CF = Sel.getOperand(2); |
1904 | 332 | if (!isConstantOrConstantVector(CF, true) && |
1905 | 129 | !isConstantFPBuildVectorOrConstantFP(CF)) |
1906 | 115 | return SDValue(); |
1907 | 217 | |
1908 | 217 | // We have a select-of-constants followed by a binary operator with a |
1909 | 217 | // constant. Eliminate the binop by pulling the constant math into the select. |
1910 | 217 | // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1 |
1911 | 217 | EVT VT = Sel.getValueType(); |
1912 | 217 | SDLoc DL(Sel); |
1913 | 217 | SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1); |
1914 | 217 | assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) || |
1915 | 217 | isConstantFPBuildVectorOrConstantFP(NewCT)) && |
1916 | 217 | "Failed to constant fold a binop with constant operands"); |
1917 | 217 | |
1918 | 217 | SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1); |
1919 | 217 | assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) || |
1920 | 217 | isConstantFPBuildVectorOrConstantFP(NewCF)) && |
1921 | 217 | "Failed to constant fold a binop with constant operands"); |
1922 | 217 | |
1923 | 217 | return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF); |
1924 | 217 | } |
1925 | | |
1926 | 9.28M | SDValue DAGCombiner::visitADD(SDNode *N) { |
1927 | 9.28M | SDValue N0 = N->getOperand(0); |
1928 | 9.28M | SDValue N1 = N->getOperand(1); |
1929 | 9.28M | EVT VT = N0.getValueType(); |
1930 | 9.28M | SDLoc DL(N); |
1931 | 9.28M | |
1932 | 9.28M | // fold vector ops |
1933 | 9.28M | if (VT.isVector()9.28M ) { |
1934 | 93.4k | if (SDValue FoldedVOp = SimplifyVBinOp(N)) |
1935 | 18 | return FoldedVOp; |
1936 | 93.4k | |
1937 | 93.4k | // fold (add x, 0) -> x, vector edition |
1938 | 93.4k | if (93.4k ISD::isBuildVectorAllZeros(N1.getNode())93.4k ) |
1939 | 76 | return N0; |
1940 | 93.3k | if (93.3k ISD::isBuildVectorAllZeros(N0.getNode())93.3k ) |
1941 | 87 | return N1; |
1942 | 9.28M | } |
1943 | 9.28M | |
1944 | 9.28M | // fold (add x, undef) -> undef |
1945 | 9.28M | if (9.28M N0.isUndef()9.28M ) |
1946 | 3 | return N0; |
1947 | 9.28M | |
1948 | 9.28M | if (9.28M N1.isUndef()9.28M ) |
1949 | 4 | return N1; |
1950 | 9.28M | |
1951 | 9.28M | if (9.28M DAG.isConstantIntBuildVectorOrConstantInt(N0)9.28M ) { |
1952 | 2.28k | // canonicalize constant to RHS |
1953 | 2.28k | if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) |
1954 | 959 | return DAG.getNode(ISD::ADD, DL, VT, N1, N0); |
1955 | 1.32k | // fold (add c1, c2) -> c1+c2 |
1956 | 1.32k | return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(), |
1957 | 1.32k | N1.getNode()); |
1958 | 1.32k | } |
1959 | 9.27M | |
1960 | 9.27M | // fold (add x, 0) -> x |
1961 | 9.27M | if (9.27M isNullConstant(N1)9.27M ) |
1962 | 30 | return N0; |
1963 | 9.27M | |
1964 | 9.27M | if (9.27M isConstantOrConstantVector(N1, /* NoOpaque */ true)9.27M ) { |
1965 | 7.67M | // fold ((c1-A)+c2) -> (c1+c2)-A |
1966 | 7.67M | if (N0.getOpcode() == ISD::SUB && |
1967 | 7.67M | isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)5.95k ) { |
1968 | 566 | // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic. |
1969 | 566 | return DAG.getNode(ISD::SUB, DL, VT, |
1970 | 566 | DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)), |
1971 | 566 | N0.getOperand(1)); |
1972 | 566 | } |
1973 | 7.67M | |
1974 | 7.67M | // add (sext i1 X), 1 -> zext (not i1 X) |
1975 | 7.67M | // We don't transform this pattern: |
1976 | 7.67M | // add (zext i1 X), -1 -> sext (not i1 X) |
1977 | 7.67M | // because most (?) targets generate better code for the zext form. |
1978 | 7.67M | if (7.67M N0.getOpcode() == ISD::SIGN_EXTEND && 7.67M N0.hasOneUse()14.3k && |
1979 | 7.67M | isOneConstantOrOneSplatConstant(N1)3.64k ) { |
1980 | 1.74k | SDValue X = N0.getOperand(0); |
1981 | 1.74k | if ((!LegalOperations || |
1982 | 366 | (TLI.isOperationLegal(ISD::XOR, X.getValueType()) && |
1983 | 366 | TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) && |
1984 | 1.74k | X.getScalarValueSizeInBits() == 11.39k ) { |
1985 | 17 | SDValue Not = DAG.getNOT(DL, X, X.getValueType()); |
1986 | 17 | return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not); |
1987 | 17 | } |
1988 | 7.67M | } |
1989 | 7.67M | |
1990 | 7.67M | // Undo the add -> or combine to merge constant offsets from a frame index. |
1991 | 7.67M | if (7.67M N0.getOpcode() == ISD::OR && |
1992 | 13.4k | isa<FrameIndexSDNode>(N0.getOperand(0)) && |
1993 | 2.84k | isa<ConstantSDNode>(N0.getOperand(1)) && |
1994 | 7.67M | DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))2.84k ) { |
1995 | 2.84k | SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1)); |
1996 | 2.84k | return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0); |
1997 | 2.84k | } |
1998 | 9.27M | } |
1999 | 9.27M | |
2000 | 9.27M | if (SDValue 9.27M NewSel9.27M = foldBinOpIntoSelect(N)) |
2001 | 8 | return NewSel; |
2002 | 9.27M | |
2003 | 9.27M | // reassociate add |
2004 | 9.27M | if (SDValue 9.27M RADD9.27M = ReassociateOps(ISD::ADD, DL, N0, N1)) |
2005 | 583k | return RADD; |
2006 | 8.69M | |
2007 | 8.69M | // fold ((0-A) + B) -> B-A |
2008 | 8.69M | if (8.69M N0.getOpcode() == ISD::SUB && |
2009 | 24.1k | isNullConstantOrNullSplatConstant(N0.getOperand(0))) |
2010 | 271 | return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1)); |
2011 | 8.69M | |
2012 | 8.69M | // fold (A + (0-B)) -> A-B |
2013 | 8.69M | if (8.69M N1.getOpcode() == ISD::SUB && |
2014 | 14.6k | isNullConstantOrNullSplatConstant(N1.getOperand(0))) |
2015 | 4.61k | return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1)); |
2016 | 8.68M | |
2017 | 8.68M | // fold (A+(B-A)) -> B |
2018 | 8.68M | if (8.68M N1.getOpcode() == ISD::SUB && 8.68M N0 == N1.getOperand(1)10.0k ) |
2019 | 6 | return N1.getOperand(0); |
2020 | 8.68M | |
2021 | 8.68M | // fold ((B-A)+A) -> B |
2022 | 8.68M | if (8.68M N0.getOpcode() == ISD::SUB && 8.68M N1 == N0.getOperand(1)23.8k ) |
2023 | 10 | return N0.getOperand(0); |
2024 | 8.68M | |
2025 | 8.68M | // fold (A+(B-(A+C))) to (B-C) |
2026 | 8.68M | if (8.68M N1.getOpcode() == ISD::SUB && 8.68M N1.getOperand(1).getOpcode() == ISD::ADD10.0k && |
2027 | 527 | N0 == N1.getOperand(1).getOperand(0)) |
2028 | 2 | return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), |
2029 | 2 | N1.getOperand(1).getOperand(1)); |
2030 | 8.68M | |
2031 | 8.68M | // fold (A+(B-(C+A))) to (B-C) |
2032 | 8.68M | if (8.68M N1.getOpcode() == ISD::SUB && 8.68M N1.getOperand(1).getOpcode() == ISD::ADD10.0k && |
2033 | 525 | N0 == N1.getOperand(1).getOperand(1)) |
2034 | 3 | return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0), |
2035 | 3 | N1.getOperand(1).getOperand(0)); |
2036 | 8.68M | |
2037 | 8.68M | // fold (A+((B-A)+or-C)) to (B+or-C) |
2038 | 8.68M | if (8.68M (N1.getOpcode() == ISD::SUB || 8.68M N1.getOpcode() == ISD::ADD8.67M ) && |
2039 | 47.7k | N1.getOperand(0).getOpcode() == ISD::SUB && |
2040 | 1.24k | N0 == N1.getOperand(0).getOperand(1)) |
2041 | 7 | return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0), |
2042 | 7 | N1.getOperand(1)); |
2043 | 8.68M | |
2044 | 8.68M | // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant |
2045 | 8.68M | if (8.68M N0.getOpcode() == ISD::SUB && 8.68M N1.getOpcode() == ISD::SUB23.8k ) { |
2046 | 1.09k | SDValue N00 = N0.getOperand(0); |
2047 | 1.09k | SDValue N01 = N0.getOperand(1); |
2048 | 1.09k | SDValue N10 = N1.getOperand(0); |
2049 | 1.09k | SDValue N11 = N1.getOperand(1); |
2050 | 1.09k | |
2051 | 1.09k | if (isConstantOrConstantVector(N00) || 1.09k isConstantOrConstantVector(N10)1.08k ) |
2052 | 17 | return DAG.getNode(ISD::SUB, DL, VT, |
2053 | 17 | DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), |
2054 | 17 | DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); |
2055 | 8.68M | } |
2056 | 8.68M | |
2057 | 8.68M | if (8.68M SimplifyDemandedBits(SDValue(N, 0))8.68M ) |
2058 | 8.28k | return SDValue(N, 0); |
2059 | 8.67M | |
2060 | 8.67M | // fold (a+b) -> (a|b) iff a and b share no bits. |
2061 | 8.67M | if (8.67M (!LegalOperations || 8.67M TLI.isOperationLegal(ISD::OR, VT)3.73M ) && |
2062 | 8.62M | DAG.haveNoCommonBitsSet(N0, N1)) |
2063 | 20.2k | return DAG.getNode(ISD::OR, DL, VT, N0, N1); |
2064 | 8.65M | |
2065 | 8.65M | if (SDValue 8.65M Combined8.65M = visitADDLike(N0, N1, N)) |
2066 | 4.08k | return Combined; |
2067 | 8.65M | |
2068 | 8.65M | if (SDValue 8.65M Combined8.65M = visitADDLike(N1, N0, N)) |
2069 | 220 | return Combined; |
2070 | 8.65M | |
2071 | 8.65M | return SDValue(); |
2072 | 8.65M | } |
2073 | | |
2074 | 729k | static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) { |
2075 | 729k | bool Masked = false; |
2076 | 729k | |
2077 | 729k | // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization. |
2078 | 735k | while (true735k ) { |
2079 | 735k | if (V.getOpcode() == ISD::TRUNCATE || 735k V.getOpcode() == ISD::ZERO_EXTEND732k ) { |
2080 | 5.63k | V = V.getOperand(0); |
2081 | 5.63k | continue; |
2082 | 5.63k | } |
2083 | 730k | |
2084 | 730k | if (730k V.getOpcode() == ISD::AND && 730k isOneConstant(V.getOperand(1))7.21k ) { |
2085 | 1.14k | Masked = true; |
2086 | 1.14k | V = V.getOperand(0); |
2087 | 1.14k | continue; |
2088 | 1.14k | } |
2089 | 729k | |
2090 | 729k | break; |
2091 | 729k | } |
2092 | 729k | |
2093 | 729k | // If this is not a carry, return. |
2094 | 729k | if (V.getResNo() != 1) |
2095 | 726k | return SDValue(); |
2096 | 2.99k | |
2097 | 2.99k | if (2.99k V.getOpcode() != ISD::ADDCARRY && 2.99k V.getOpcode() != ISD::SUBCARRY2.72k && |
2098 | 2.99k | V.getOpcode() != ISD::UADDO2.72k && V.getOpcode() != ISD::USUBO2.51k ) |
2099 | 2.51k | return SDValue(); |
2100 | 478 | |
2101 | 478 | // If the result is masked, then no matter what kind of bool it is we can |
2102 | 478 | // return. If it isn't, then we need to make sure the bool type is either 0 or |
2103 | 478 | // 1 and not other values. |
2104 | 478 | if (478 Masked || |
2105 | 242 | TLI.getBooleanContents(V.getValueType()) == |
2106 | 242 | TargetLoweringBase::ZeroOrOneBooleanContent) |
2107 | 478 | return V; |
2108 | 0 |
|
2109 | 0 | return SDValue(); |
2110 | 0 | } |
2111 | | |
2112 | 17.3M | SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) { |
2113 | 17.3M | EVT VT = N0.getValueType(); |
2114 | 17.3M | SDLoc DL(LocReference); |
2115 | 17.3M | |
2116 | 17.3M | // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) |
2117 | 17.3M | if (N1.getOpcode() == ISD::SHL && 17.3M N1.getOperand(0).getOpcode() == ISD::SUB549k && |
2118 | 4.56k | isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0))) |
2119 | 359 | return DAG.getNode(ISD::SUB, DL, VT, N0, |
2120 | 359 | DAG.getNode(ISD::SHL, DL, VT, |
2121 | 359 | N1.getOperand(0).getOperand(1), |
2122 | 359 | N1.getOperand(1))); |
2123 | 17.3M | |
2124 | 17.3M | if (17.3M N1.getOpcode() == ISD::AND17.3M ) { |
2125 | 37.1k | SDValue AndOp0 = N1.getOperand(0); |
2126 | 37.1k | unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); |
2127 | 37.1k | unsigned DestBits = VT.getScalarSizeInBits(); |
2128 | 37.1k | |
2129 | 37.1k | // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) |
2130 | 37.1k | // and similar xforms where the inner op is either ~0 or 0. |
2131 | 37.1k | if (NumSignBits == DestBits && |
2132 | 677 | isOneConstantOrOneSplatConstant(N1->getOperand(1))) |
2133 | 240 | return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0); |
2134 | 17.3M | } |
2135 | 17.3M | |
2136 | 17.3M | // add (sext i1), X -> sub X, (zext i1) |
2137 | 17.3M | if (17.3M N0.getOpcode() == ISD::SIGN_EXTEND && |
2138 | 64.8k | N0.getOperand(0).getValueType() == MVT::i1 && |
2139 | 17.3M | !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)3.09k ) { |
2140 | 3.06k | SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); |
2141 | 3.06k | return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); |
2142 | 3.06k | } |
2143 | 17.3M | |
2144 | 17.3M | // add X, (sextinreg Y i1) -> sub X, (and Y 1) |
2145 | 17.3M | if (17.3M N1.getOpcode() == ISD::SIGN_EXTEND_INREG17.3M ) { |
2146 | 3.92k | VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); |
2147 | 3.92k | if (TN->getVT() == MVT::i13.92k ) { |
2148 | 627 | SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), |
2149 | 627 | DAG.getConstant(1, DL, VT)); |
2150 | 627 | return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); |
2151 | 627 | } |
2152 | 17.3M | } |
2153 | 17.3M | |
2154 | 17.3M | // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) |
2155 | 17.3M | if (17.3M N1.getOpcode() == ISD::ADDCARRY && 17.3M isNullConstant(N1.getOperand(1))19 ) |
2156 | 12 | return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(), |
2157 | 12 | N0, N1.getOperand(0), N1.getOperand(2)); |
2158 | 17.3M | |
2159 | 17.3M | // (add X, Carry) -> (addcarry X, 0, Carry) |
2160 | 17.3M | if (17.3M TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)17.3M ) |
2161 | 692k | if (SDValue 692k Carry692k = getAsCarry(TLI, N1)) |
2162 | 4 | return DAG.getNode(ISD::ADDCARRY, DL, |
2163 | 4 | DAG.getVTList(VT, Carry.getValueType()), N0, |
2164 | 4 | DAG.getConstant(0, DL, VT), Carry); |
2165 | 17.3M | |
2166 | 17.3M | return SDValue(); |
2167 | 17.3M | } |
2168 | | |
2169 | 302 | SDValue DAGCombiner::visitADDC(SDNode *N) { |
2170 | 302 | SDValue N0 = N->getOperand(0); |
2171 | 302 | SDValue N1 = N->getOperand(1); |
2172 | 302 | EVT VT = N0.getValueType(); |
2173 | 302 | SDLoc DL(N); |
2174 | 302 | |
2175 | 302 | // If the flag result is dead, turn this into an ADD. |
2176 | 302 | if (!N->hasAnyUseOfValue(1)) |
2177 | 5 | return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), |
2178 | 5 | DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); |
2179 | 297 | |
2180 | 297 | // canonicalize constant to RHS. |
2181 | 297 | ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); |
2182 | 297 | ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); |
2183 | 297 | if (N0C && 297 !N1C0 ) |
2184 | 0 | return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0); |
2185 | 297 | |
2186 | 297 | // fold (addc x, 0) -> x + no carry out |
2187 | 297 | if (297 isNullConstant(N1)297 ) |
2188 | 0 | return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, |
2189 | 0 | DL, MVT::Glue)); |
2190 | 297 | |
2191 | 297 | // If it cannot overflow, transform into an add. |
2192 | 297 | if (297 DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never297 ) |
2193 | 2 | return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), |
2194 | 2 | DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); |
2195 | 295 | |
2196 | 295 | return SDValue(); |
2197 | 295 | } |
2198 | | |
2199 | 4.95k | SDValue DAGCombiner::visitUADDO(SDNode *N) { |
2200 | 4.95k | SDValue N0 = N->getOperand(0); |
2201 | 4.95k | SDValue N1 = N->getOperand(1); |
2202 | 4.95k | EVT VT = N0.getValueType(); |
2203 | 4.95k | if (VT.isVector()) |
2204 | 0 | return SDValue(); |
2205 | 4.95k | |
2206 | 4.95k | EVT CarryVT = N->getValueType(1); |
2207 | 4.95k | SDLoc DL(N); |
2208 | 4.95k | |
2209 | 4.95k | // If the flag result is dead, turn this into an ADD. |
2210 | 4.95k | if (!N->hasAnyUseOfValue(1)) |
2211 | 634 | return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), |
2212 | 634 | DAG.getUNDEF(CarryVT)); |
2213 | 4.31k | |
2214 | 4.31k | // canonicalize constant to RHS. |
2215 | 4.31k | ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); |
2216 | 4.31k | ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); |
2217 | 4.31k | if (N0C && 4.31k !N1C106 ) |
2218 | 2 | return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0); |
2219 | 4.31k | |
2220 | 4.31k | // fold (uaddo x, 0) -> x + no carry out |
2221 | 4.31k | if (4.31k isNullConstant(N1)4.31k ) |
2222 | 368 | return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT)); |
2223 | 3.94k | |
2224 | 3.94k | // If it cannot overflow, transform into an add. |
2225 | 3.94k | if (3.94k DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never3.94k ) |
2226 | 87 | return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), |
2227 | 87 | DAG.getConstant(0, DL, CarryVT)); |
2228 | 3.86k | |
2229 | 3.86k | if (SDValue 3.86k Combined3.86k = visitUADDOLike(N0, N1, N)) |
2230 | 69 | return Combined; |
2231 | 3.79k | |
2232 | 3.79k | if (SDValue 3.79k Combined3.79k = visitUADDOLike(N1, N0, N)) |
2233 | 8 | return Combined; |
2234 | 3.78k | |
2235 | 3.78k | return SDValue(); |
2236 | 3.78k | } |
2237 | | |
2238 | 7.65k | SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) { |
2239 | 7.65k | auto VT = N0.getValueType(); |
2240 | 7.65k | |
2241 | 7.65k | // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) |
2242 | 7.65k | // If Y + 1 cannot overflow. |
2243 | 7.65k | if (N1.getOpcode() == ISD::ADDCARRY && 7.65k isNullConstant(N1.getOperand(1))247 ) { |
2244 | 194 | SDValue Y = N1.getOperand(0); |
2245 | 194 | SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType()); |
2246 | 194 | if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never) |
2247 | 77 | return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y, |
2248 | 77 | N1.getOperand(2)); |
2249 | 7.57k | } |
2250 | 7.57k | |
2251 | 7.57k | // (uaddo X, Carry) -> (addcarry X, 0, Carry) |
2252 | 7.57k | if (7.57k TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)7.57k ) |
2253 | 6.64k | if (SDValue 6.64k Carry6.64k = getAsCarry(TLI, N1)) |
2254 | 0 | return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, |
2255 | 0 | DAG.getConstant(0, SDLoc(N), VT), Carry); |
2256 | 7.57k | |
2257 | 7.57k | return SDValue(); |
2258 | 7.57k | } |
2259 | | |
2260 | 333 | SDValue DAGCombiner::visitADDE(SDNode *N) { |
2261 | 333 | SDValue N0 = N->getOperand(0); |
2262 | 333 | SDValue N1 = N->getOperand(1); |
2263 | 333 | SDValue CarryIn = N->getOperand(2); |
2264 | 333 | |
2265 | 333 | // canonicalize constant to RHS |
2266 | 333 | ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); |
2267 | 333 | ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); |
2268 | 333 | if (N0C && 333 !N1C6 ) |
2269 | 4 | return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), |
2270 | 4 | N1, N0, CarryIn); |
2271 | 329 | |
2272 | 329 | // fold (adde x, y, false) -> (addc x, y) |
2273 | 329 | if (329 CarryIn.getOpcode() == ISD::CARRY_FALSE329 ) |
2274 | 2 | return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1); |
2275 | 327 | |
2276 | 327 | return SDValue(); |
2277 | 327 | } |
2278 | | |
2279 | 15.9k | SDValue DAGCombiner::visitADDCARRY(SDNode *N) { |
2280 | 15.9k | SDValue N0 = N->getOperand(0); |
2281 | 15.9k | SDValue N1 = N->getOperand(1); |
2282 | 15.9k | SDValue CarryIn = N->getOperand(2); |
2283 | 15.9k | SDLoc DL(N); |
2284 | 15.9k | |
2285 | 15.9k | // canonicalize constant to RHS |
2286 | 15.9k | ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); |
2287 | 15.9k | ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); |
2288 | 15.9k | if (N0C && 15.9k !N1C604 ) |
2289 | 224 | return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn); |
2290 | 15.6k | |
2291 | 15.6k | // fold (addcarry x, y, false) -> (uaddo x, y) |
2292 | 15.6k | if (15.6k isNullConstant(CarryIn)15.6k ) |
2293 | 469 | return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1); |
2294 | 15.2k | |
2295 | 15.2k | // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry. |
2296 | 15.2k | if (15.2k isNullConstant(N0) && 15.2k isNullConstant(N1)293 ) { |
2297 | 288 | EVT VT = N0.getValueType(); |
2298 | 288 | EVT CarryVT = CarryIn.getValueType(); |
2299 | 288 | SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT); |
2300 | 288 | AddToWorklist(CarryExt.getNode()); |
2301 | 288 | return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt, |
2302 | 288 | DAG.getConstant(1, DL, VT)), |
2303 | 288 | DAG.getConstant(0, DL, CarryVT)); |
2304 | 288 | } |
2305 | 14.9k | |
2306 | 14.9k | if (SDValue 14.9k Combined14.9k = visitADDCARRYLike(N0, N1, CarryIn, N)) |
2307 | 51 | return Combined; |
2308 | 14.8k | |
2309 | 14.8k | if (SDValue 14.8k Combined14.8k = visitADDCARRYLike(N1, N0, CarryIn, N)) |
2310 | 0 | return Combined; |
2311 | 14.8k | |
2312 | 14.8k | return SDValue(); |
2313 | 14.8k | } |
2314 | | |
2315 | | SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, |
2316 | 29.8k | SDNode *N) { |
2317 | 29.8k | // Iff the flag result is dead: |
2318 | 29.8k | // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry) |
2319 | 29.8k | if ((N0.getOpcode() == ISD::ADD || |
2320 | 29.5k | (N0.getOpcode() == ISD::UADDO && 29.5k N0.getResNo() == 01.72k )) && |
2321 | 29.8k | isNullConstant(N1)1.96k && !N->hasAnyUseOfValue(1)1.13k ) |
2322 | 49 | return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), |
2323 | 49 | N0.getOperand(0), N0.getOperand(1), CarryIn); |
2324 | 29.7k | |
2325 | 29.7k | /** |
2326 | 29.7k | * When one of the addcarry argument is itself a carry, we may be facing |
2327 | 29.7k | * a diamond carry propagation. In which case we try to transform the DAG |
2328 | 29.7k | * to ensure linear carry propagation if that is possible. |
2329 | 29.7k | * |
2330 | 29.7k | * We are trying to get: |
2331 | 29.7k | * (addcarry X, 0, (addcarry A, B, Z):Carry) |
2332 | 29.7k | */ |
2333 | 29.7k | if (auto 29.7k Y29.7k = getAsCarry(TLI, N1)) { |
2334 | 474 | /** |
2335 | 474 | * (uaddo A, B) |
2336 | 474 | * / \ |
2337 | 474 | * Carry Sum |
2338 | 474 | * | \ |
2339 | 474 | * | (addcarry *, 0, Z) |
2340 | 474 | * | / |
2341 | 474 | * \ Carry |
2342 | 474 | * | / |
2343 | 474 | * (addcarry X, *, *) |
2344 | 474 | */ |
2345 | 474 | if (Y.getOpcode() == ISD::UADDO && |
2346 | 207 | CarryIn.getResNo() == 1 && |
2347 | 207 | CarryIn.getOpcode() == ISD::ADDCARRY && |
2348 | 4 | isNullConstant(CarryIn.getOperand(1)) && |
2349 | 474 | CarryIn.getOperand(0) == Y.getValue(0)2 ) { |
2350 | 2 | auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(), |
2351 | 2 | Y.getOperand(0), Y.getOperand(1), |
2352 | 2 | CarryIn.getOperand(2)); |
2353 | 2 | AddToWorklist(NewY.getNode()); |
2354 | 2 | return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, |
2355 | 2 | DAG.getConstant(0, SDLoc(N), N0.getValueType()), |
2356 | 2 | NewY.getValue(1)); |
2357 | 2 | } |
2358 | 29.7k | } |
2359 | 29.7k | |
2360 | 29.7k | return SDValue(); |
2361 | 29.7k | } |
2362 | | |
2363 | | // Since it may not be valid to emit a fold to zero for vector initializers |
2364 | | // check if we can before folding. |
2365 | | static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, |
2366 | | SelectionDAG &DAG, bool LegalOperations, |
2367 | 23 | bool LegalTypes) { |
2368 | 23 | if (!VT.isVector()) |
2369 | 13 | return DAG.getConstant(0, DL, VT); |
2370 | 10 | if (10 !LegalOperations || 10 TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)0 ) |
2371 | 10 | return DAG.getConstant(0, DL, VT); |
2372 | 0 | return SDValue(); |
2373 | 0 | } |
2374 | | |
2375 | 419k | SDValue DAGCombiner::visitSUB(SDNode *N) { |
2376 | 419k | SDValue N0 = N->getOperand(0); |
2377 | 419k | SDValue N1 = N->getOperand(1); |
2378 | 419k | EVT VT = N0.getValueType(); |
2379 | 419k | SDLoc DL(N); |
2380 | 419k | |
2381 | 419k | // fold vector ops |
2382 | 419k | if (VT.isVector()419k ) { |
2383 | 15.3k | if (SDValue FoldedVOp = SimplifyVBinOp(N)) |
2384 | 3 | return FoldedVOp; |
2385 | 15.3k | |
2386 | 15.3k | // fold (sub x, 0) -> x, vector edition |
2387 | 15.3k | if (15.3k ISD::isBuildVectorAllZeros(N1.getNode())15.3k ) |
2388 | 8 | return N0; |
2389 | 419k | } |
2390 | 419k | |
2391 | 419k | // fold (sub x, x) -> 0 |
2392 | 419k | // FIXME: Refactor this and xor and other similar operations together. |
2393 | 419k | if (419k N0 == N1419k ) |
2394 | 10 | return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes); |
2395 | 419k | if (419k DAG.isConstantIntBuildVectorOrConstantInt(N0) && |
2396 | 419k | DAG.isConstantIntBuildVectorOrConstantInt(N1)112k ) { |
2397 | 2 | // fold (sub c1, c2) -> c1-c2 |
2398 | 2 | return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(), |
2399 | 2 | N1.getNode()); |
2400 | 2 | } |
2401 | 419k | |
2402 | 419k | if (SDValue 419k NewSel419k = foldBinOpIntoSelect(N)) |
2403 | 2 | return NewSel; |
2404 | 419k | |
2405 | 419k | ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); |
2406 | 419k | |
2407 | 419k | // fold (sub x, c) -> (add x, -c) |
2408 | 419k | if (N1C419k ) { |
2409 | 4.42k | return DAG.getNode(ISD::ADD, DL, VT, N0, |
2410 | 4.42k | DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); |
2411 | 4.42k | } |
2412 | 415k | |
2413 | 415k | if (415k isNullConstantOrNullSplatConstant(N0)415k ) { |
2414 | 41.3k | unsigned BitWidth = VT.getScalarSizeInBits(); |
2415 | 41.3k | // Right-shifting everything out but the sign bit followed by negation is |
2416 | 41.3k | // the same as flipping arithmetic/logical shift type without the negation: |
2417 | 41.3k | // -(X >>u 31) -> (X >>s 31) |
2418 | 41.3k | // -(X >>s 31) -> (X >>u 31) |
2419 | 41.3k | if (N1->getOpcode() == ISD::SRA || 41.3k N1->getOpcode() == ISD::SRL35.3k ) { |
2420 | 6.00k | ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1)); |
2421 | 6.00k | if (ShiftAmt && 6.00k ShiftAmt->getZExtValue() == BitWidth - 15.85k ) { |
2422 | 10 | auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL2 : ISD::SRA8 ; |
2423 | 10 | if (!LegalOperations || 10 TLI.isOperationLegal(NewSh, VT)4 ) |
2424 | 10 | return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1)); |
2425 | 41.2k | } |
2426 | 6.00k | } |
2427 | 41.2k | |
2428 | 41.2k | // 0 - X --> 0 if the sub is NUW. |
2429 | 41.2k | if (41.2k N->getFlags().hasNoUnsignedWrap()41.2k ) |
2430 | 2 | return N0; |
2431 | 41.2k | |
2432 | 41.2k | if (41.2k DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))41.2k ) { |
2433 | 6 | // N1 is either 0 or the minimum signed value. If the sub is NSW, then |
2434 | 6 | // N1 must be 0 because negating the minimum signed value is undefined. |
2435 | 6 | if (N->getFlags().hasNoSignedWrap()) |
2436 | 2 | return N0; |
2437 | 4 | |
2438 | 4 | // 0 - X --> X if X is 0 or the minimum signed value. |
2439 | 4 | return N1; |
2440 | 4 | } |
2441 | 41.3k | } |
2442 | 415k | |
2443 | 415k | // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) |
2444 | 415k | if (415k isAllOnesConstantOrAllOnesSplatConstant(N0)415k ) |
2445 | 392 | return DAG.getNode(ISD::XOR, DL, VT, N1, N0); |
2446 | 414k | |
2447 | 414k | // fold A-(A-B) -> B |
2448 | 414k | if (414k N1.getOpcode() == ISD::SUB && 414k N0 == N1.getOperand(0)4.07k ) |
2449 | 597 | return N1.getOperand(1); |
2450 | 414k | |
2451 | 414k | // fold (A+B)-A -> B |
2452 | 414k | if (414k N0.getOpcode() == ISD::ADD && 414k N0.getOperand(0) == N115.5k ) |
2453 | 17 | return N0.getOperand(1); |
2454 | 414k | |
2455 | 414k | // fold (A+B)-B -> A |
2456 | 414k | if (414k N0.getOpcode() == ISD::ADD && 414k N0.getOperand(1) == N115.5k ) |
2457 | 5 | return N0.getOperand(0); |
2458 | 414k | |
2459 | 414k | // fold C2-(A+C1) -> (C2-C1)-A |
2460 | 414k | if (414k N1.getOpcode() == ISD::ADD414k ) { |
2461 | 5.04k | SDValue N11 = N1.getOperand(1); |
2462 | 5.04k | if (isConstantOrConstantVector(N0, /* NoOpaques */ true) && |
2463 | 5.04k | isConstantOrConstantVector(N11, /* NoOpaques */ true)736 ) { |
2464 | 307 | SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11); |
2465 | 307 | return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0)); |
2466 | 307 | } |
2467 | 414k | } |
2468 | 414k | |
2469 | 414k | // fold ((A+(B+or-C))-B) -> A+or-C |
2470 | 414k | if (414k N0.getOpcode() == ISD::ADD && |
2471 | 15.5k | (N0.getOperand(1).getOpcode() == ISD::SUB || |
2472 | 15.5k | N0.getOperand(1).getOpcode() == ISD::ADD) && |
2473 | 896 | N0.getOperand(1).getOperand(0) == N1) |
2474 | 8 | return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0), |
2475 | 8 | N0.getOperand(1).getOperand(1)); |
2476 | 414k | |
2477 | 414k | // fold ((A+(C+B))-B) -> A+C |
2478 | 414k | if (414k N0.getOpcode() == ISD::ADD && 414k N0.getOperand(1).getOpcode() == ISD::ADD15.4k && |
2479 | 545 | N0.getOperand(1).getOperand(1) == N1) |
2480 | 1 | return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), |
2481 | 1 | N0.getOperand(1).getOperand(0)); |
2482 | 414k | |
2483 | 414k | // fold ((A-(B-C))-C) -> A-B |
2484 | 414k | if (414k N0.getOpcode() == ISD::SUB && 414k N0.getOperand(1).getOpcode() == ISD::SUB5.59k && |
2485 | 251 | N0.getOperand(1).getOperand(1) == N1) |
2486 | 3 | return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), |
2487 | 3 | N0.getOperand(1).getOperand(0)); |
2488 | 414k | |
2489 | 414k | // If either operand of a sub is undef, the result is undef |
2490 | 414k | if (414k N0.isUndef()414k ) |
2491 | 0 | return N0; |
2492 | 414k | if (414k N1.isUndef()414k ) |
2493 | 0 | return N1; |
2494 | 414k | |
2495 | 414k | // If the relocation model supports it, consider symbol offsets. |
2496 | 414k | if (GlobalAddressSDNode *414k GA414k = dyn_cast<GlobalAddressSDNode>(N0)) |
2497 | 24 | if (24 !LegalOperations && 24 TLI.isOffsetFoldingLegal(GA)24 ) { |
2498 | 0 | // fold (sub Sym, c) -> Sym-c |
2499 | 0 | if (N1C && 0 GA->getOpcode() == ISD::GlobalAddress0 ) |
2500 | 0 | return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, |
2501 | 0 | GA->getOffset() - |
2502 | 0 | (uint64_t)N1C->getSExtValue()); |
2503 | 0 | // fold (sub Sym+c1, Sym+c2) -> c1-c2 |
2504 | 0 | if (GlobalAddressSDNode *0 GB0 = dyn_cast<GlobalAddressSDNode>(N1)) |
2505 | 0 | if (0 GA->getGlobal() == GB->getGlobal()0 ) |
2506 | 0 | return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), |
2507 | 0 | DL, VT); |
2508 | 414k | } |
2509 | 414k | |
2510 | 414k | // sub X, (sextinreg Y i1) -> add X, (and Y 1) |
2511 | 414k | if (414k N1.getOpcode() == ISD::SIGN_EXTEND_INREG414k ) { |
2512 | 2.83k | VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); |
2513 | 2.83k | if (TN->getVT() == MVT::i12.83k ) { |
2514 | 54 | SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), |
2515 | 54 | DAG.getConstant(1, DL, VT)); |
2516 | 54 | return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); |
2517 | 54 | } |
2518 | 413k | } |
2519 | 413k | |
2520 | 413k | return SDValue(); |
2521 | 413k | } |
2522 | | |
2523 | 1.18k | SDValue DAGCombiner::visitSUBC(SDNode *N) { |
2524 | 1.18k | SDValue N0 = N->getOperand(0); |
2525 | 1.18k | SDValue N1 = N->getOperand(1); |
2526 | 1.18k | EVT VT = N0.getValueType(); |
2527 | 1.18k | SDLoc DL(N); |
2528 | 1.18k | |
2529 | 1.18k | // If the flag result is dead, turn this into an SUB. |
2530 | 1.18k | if (!N->hasAnyUseOfValue(1)) |
2531 | 0 | return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1), |
2532 | 0 | DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); |
2533 | 1.18k | |
2534 | 1.18k | // fold (subc x, x) -> 0 + no borrow |
2535 | 1.18k | if (1.18k N0 == N11.18k ) |
2536 | 30 | return CombineTo(N, DAG.getConstant(0, DL, VT), |
2537 | 30 | DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); |
2538 | 1.15k | |
2539 | 1.15k | // fold (subc x, 0) -> x + no borrow |
2540 | 1.15k | if (1.15k isNullConstant(N1)1.15k ) |
2541 | 0 | return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); |
2542 | 1.15k | |
2543 | 1.15k | // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow |
2544 | 1.15k | if (1.15k isAllOnesConstant(N0)1.15k ) |
2545 | 0 | return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), |
2546 | 0 | DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); |
2547 | 1.15k | |
2548 | 1.15k | return SDValue(); |
2549 | 1.15k | } |
2550 | | |
2551 | 2.20k | SDValue DAGCombiner::visitUSUBO(SDNode *N) { |
2552 | 2.20k | SDValue N0 = N->getOperand(0); |
2553 | 2.20k | SDValue N1 = N->getOperand(1); |
2554 | 2.20k | EVT VT = N0.getValueType(); |
2555 | 2.20k | if (VT.isVector()) |
2556 | 0 | return SDValue(); |
2557 | 2.20k | |
2558 | 2.20k | EVT CarryVT = N->getValueType(1); |
2559 | 2.20k | SDLoc DL(N); |
2560 | 2.20k | |
2561 | 2.20k | // If the flag result is dead, turn this into an SUB. |
2562 | 2.20k | if (!N->hasAnyUseOfValue(1)) |
2563 | 30 | return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1), |
2564 | 30 | DAG.getUNDEF(CarryVT)); |
2565 | 2.17k | |
2566 | 2.17k | // fold (usubo x, x) -> 0 + no borrow |
2567 | 2.17k | if (2.17k N0 == N12.17k ) |
2568 | 17 | return CombineTo(N, DAG.getConstant(0, DL, VT), |
2569 | 17 | DAG.getConstant(0, DL, CarryVT)); |
2570 | 2.16k | |
2571 | 2.16k | // fold (usubo x, 0) -> x + no borrow |
2572 | 2.16k | if (2.16k isNullConstant(N1)2.16k ) |
2573 | 26 | return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT)); |
2574 | 2.13k | |
2575 | 2.13k | // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow |
2576 | 2.13k | if (2.13k isAllOnesConstant(N0)2.13k ) |
2577 | 3 | return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), |
2578 | 3 | DAG.getConstant(0, DL, CarryVT)); |
2579 | 2.13k | |
2580 | 2.13k | return SDValue(); |
2581 | 2.13k | } |
2582 | | |
2583 | 145 | SDValue DAGCombiner::visitSUBE(SDNode *N) { |
2584 | 145 | SDValue N0 = N->getOperand(0); |
2585 | 145 | SDValue N1 = N->getOperand(1); |
2586 | 145 | SDValue CarryIn = N->getOperand(2); |
2587 | 145 | |
2588 | 145 | // fold (sube x, y, false) -> (subc x, y) |
2589 | 145 | if (CarryIn.getOpcode() == ISD::CARRY_FALSE) |
2590 | 0 | return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1); |
2591 | 145 | |
2592 | 145 | return SDValue(); |
2593 | 145 | } |
2594 | | |
2595 | 972 | SDValue DAGCombiner::visitSUBCARRY(SDNode *N) { |
2596 | 972 | SDValue N0 = N->getOperand(0); |
2597 | 972 | SDValue N1 = N->getOperand(1); |
2598 | 972 | SDValue CarryIn = N->getOperand(2); |
2599 | 972 | |
2600 | 972 | // fold (subcarry x, y, false) -> (usubo x, y) |
2601 | 972 | if (isNullConstant(CarryIn)) |
2602 | 10 | return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1); |
2603 | 962 | |
2604 | 962 | return SDValue(); |
2605 | 962 | } |
2606 | | |
2607 | 478k | SDValue DAGCombiner::visitMUL(SDNode *N) { |
2608 | 478k | SDValue N0 = N->getOperand(0); |
2609 | 478k | SDValue N1 = N->getOperand(1); |
2610 | 478k | EVT VT = N0.getValueType(); |
2611 | 478k | |
2612 | 478k | // fold (mul x, undef) -> 0 |
2613 | 478k | if (N0.isUndef() || 478k N1.isUndef()478k ) |
2614 | 21 | return DAG.getConstant(0, SDLoc(N), VT); |
2615 | 478k | |
2616 | 478k | bool N0IsConst = false; |
2617 | 478k | bool N1IsConst = false; |
2618 | 478k | bool N1IsOpaqueConst = false; |
2619 | 478k | bool N0IsOpaqueConst = false; |
2620 | 478k | APInt ConstValue0, ConstValue1; |
2621 | 478k | // fold vector ops |
2622 | 478k | if (VT.isVector()478k ) { |
2623 | 13.3k | if (SDValue FoldedVOp = SimplifyVBinOp(N)) |
2624 | 3 | return FoldedVOp; |
2625 | 13.3k | |
2626 | 13.3k | N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0); |
2627 | 13.3k | N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1); |
2628 | 13.3k | assert((!N0IsConst || |
2629 | 13.3k | ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) && |
2630 | 13.3k | "Splat APInt should be element width"); |
2631 | 13.3k | assert((!N1IsConst || |
2632 | 13.3k | ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && |
2633 | 13.3k | "Splat APInt should be element width"); |
2634 | 478k | } else { |
2635 | 464k | N0IsConst = isa<ConstantSDNode>(N0); |
2636 | 464k | if (N0IsConst464k ) { |
2637 | 6 | ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue(); |
2638 | 6 | N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque(); |
2639 | 6 | } |
2640 | 464k | N1IsConst = isa<ConstantSDNode>(N1); |
2641 | 464k | if (N1IsConst464k ) { |
2642 | 237k | ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue(); |
2643 | 237k | N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque(); |
2644 | 237k | } |
2645 | 464k | } |
2646 | 478k | |
2647 | 478k | // fold (mul c1, c2) -> c1*c2 |
2648 | 478k | if (478k N0IsConst && 478k N1IsConst9 && !N0IsOpaqueConst6 && !N1IsOpaqueConst6 ) |
2649 | 6 | return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, |
2650 | 6 | N0.getNode(), N1.getNode()); |
2651 | 478k | |
2652 | 478k | // canonicalize constant to RHS (vector doesn't have to splat) |
2653 | 478k | if (478k DAG.isConstantIntBuildVectorOrConstantInt(N0) && |
2654 | 7 | !DAG.isConstantIntBuildVectorOrConstantInt(N1)) |
2655 | 7 | return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); |
2656 | 478k | // fold (mul x, 0) -> 0 |
2657 | 478k | if (478k N1IsConst && 478k ConstValue1.isNullValue()239k ) |
2658 | 22 | return N1; |
2659 | 478k | // fold (mul x, 1) -> x |
2660 | 478k | if (478k N1IsConst && 478k ConstValue1.isOneValue()239k ) |
2661 | 471 | return N0; |
2662 | 477k | |
2663 | 477k | if (SDValue 477k NewSel477k = foldBinOpIntoSelect(N)) |
2664 | 5 | return NewSel; |
2665 | 477k | |
2666 | 477k | // fold (mul x, -1) -> 0-x |
2667 | 477k | if (477k N1IsConst && 477k ConstValue1.isAllOnesValue()238k ) { |
2668 | 558 | SDLoc DL(N); |
2669 | 558 | return DAG.getNode(ISD::SUB, DL, VT, |
2670 | 558 | DAG.getConstant(0, DL, VT), N0); |
2671 | 558 | } |
2672 | 477k | // fold (mul x, (1 << c)) -> x << c |
2673 | 477k | if (477k isConstantOrConstantVector(N1, /*NoOpaques*/ true) && |
2674 | 477k | DAG.isKnownToBeAPowerOfTwo(N1)236k ) { |
2675 | 22.9k | SDLoc DL(N); |
2676 | 22.9k | SDValue LogBase2 = BuildLogBase2(N1, DL); |
2677 | 22.9k | AddToWorklist(LogBase2.getNode()); |
2678 | 22.9k | |
2679 | 22.9k | EVT ShiftVT = getShiftAmountTy(N0.getValueType()); |
2680 | 22.9k | SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); |
2681 | 22.9k | AddToWorklist(Trunc.getNode()); |
2682 | 22.9k | return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc); |
2683 | 22.9k | } |
2684 | 454k | // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c |
2685 | 454k | if (454k N1IsConst && 454k !N1IsOpaqueConst215k && (-ConstValue1).isPowerOf2()213k ) { |
2686 | 3.22k | unsigned Log2Val = (-ConstValue1).logBase2(); |
2687 | 3.22k | SDLoc DL(N); |
2688 | 3.22k | // FIXME: If the input is something that is easily negated (e.g. a |
2689 | 3.22k | // single-use add), we should put the negate there. |
2690 | 3.22k | return DAG.getNode(ISD::SUB, DL, VT, |
2691 | 3.22k | DAG.getConstant(0, DL, VT), |
2692 | 3.22k | DAG.getNode(ISD::SHL, DL, VT, N0, |
2693 | 3.22k | DAG.getConstant(Log2Val, DL, |
2694 | 3.22k | getShiftAmountTy(N0.getValueType())))); |
2695 | 3.22k | } |
2696 | 450k | |
2697 | 450k | // (mul (shl X, c1), c2) -> (mul X, c2 << c1) |
2698 | 450k | if (450k N0.getOpcode() == ISD::SHL && |
2699 | 545 | isConstantOrConstantVector(N1, /* NoOpaques */ true) && |
2700 | 450k | isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)20 ) { |
2701 | 12 | SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1)); |
2702 | 12 | if (isConstantOrConstantVector(C3)) |
2703 | 12 | return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); |
2704 | 450k | } |
2705 | 450k | |
2706 | 450k | // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one |
2707 | 450k | // use. |
2708 | 450k | { |
2709 | 450k | SDValue Sh(nullptr, 0), Y(nullptr, 0); |
2710 | 450k | |
2711 | 450k | // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). |
2712 | 450k | if (N0.getOpcode() == ISD::SHL && |
2713 | 533 | isConstantOrConstantVector(N0.getOperand(1)) && |
2714 | 450k | N0.getNode()->hasOneUse()441 ) { |
2715 | 280 | Sh = N0; Y = N1; |
2716 | 450k | } else if (450k N1.getOpcode() == ISD::SHL && |
2717 | 56 | isConstantOrConstantVector(N1.getOperand(1)) && |
2718 | 450k | N1.getNode()->hasOneUse()47 ) { |
2719 | 5 | Sh = N1; Y = N0; |
2720 | 5 | } |
2721 | 450k | |
2722 | 450k | if (Sh.getNode()450k ) { |
2723 | 285 | SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y); |
2724 | 285 | return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1)); |
2725 | 285 | } |
2726 | 450k | } |
2727 | 450k | |
2728 | 450k | // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) |
2729 | 450k | if (450k DAG.isConstantIntBuildVectorOrConstantInt(N1) && |
2730 | 212k | N0.getOpcode() == ISD::ADD && |
2731 | 9.44k | DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && |
2732 | 977 | isMulAddWithConstProfitable(N, N0, N1)) |
2733 | 541 | return DAG.getNode(ISD::ADD, SDLoc(N), VT, |
2734 | 541 | DAG.getNode(ISD::MUL, SDLoc(N0), VT, |
2735 | 541 | N0.getOperand(0), N1), |
2736 | 541 | DAG.getNode(ISD::MUL, SDLoc(N1), VT, |
2737 | 541 | N0.getOperand(1), N1)); |
2738 | 450k | |
2739 | 450k | // reassociate mul |
2740 | 450k | if (SDValue 450k RMUL450k = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1)) |
2741 | 44 | return RMUL; |
2742 | 450k | |
2743 | 450k | return SDValue(); |
2744 | 450k | } |
2745 | | |
2746 | | /// Return true if divmod libcall is available. |
2747 | | static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, |
2748 | 32.8k | const TargetLowering &TLI) { |
2749 | 32.8k | RTLIB::Libcall LC; |
2750 | 32.8k | EVT NodeType = Node->getValueType(0); |
2751 | 32.8k | if (!NodeType.isSimple()) |
2752 | 0 | return false; |
2753 | 32.8k | switch (NodeType.getSimpleVT().SimpleTy) { |
2754 | 0 | default: return false; // No libcall for vector types. |
2755 | 0 | case MVT::i8: LC= isSigned ? 0 RTLIB::SDIVREM_I80 : RTLIB::UDIVREM_I80 ; break; |
2756 | 4 | case MVT::i16: LC= isSigned ? 4 RTLIB::SDIVREM_I162 : RTLIB::UDIVREM_I162 ; break; |
2757 | 19.3k | case MVT::i32: LC= isSigned ? 19.3k RTLIB::SDIVREM_I3215.9k : RTLIB::UDIVREM_I323.38k ; break; |
2758 | 13.5k | case MVT::i64: LC= isSigned ? 13.5k RTLIB::SDIVREM_I641.02k : RTLIB::UDIVREM_I6412.4k ; break; |
2759 | 3 | case MVT::i128: LC= isSigned ? 3 RTLIB::SDIVREM_I1283 :RTLIB::UDIVREM_I1280 ; break; |
2760 | 32.8k | } |
2761 | 32.8k | |
2762 | 32.8k | return TLI.getLibcallName(LC) != nullptr; |
2763 | 32.8k | } |
2764 | | |
2765 | | /// Issue divrem if both quotient and remainder are needed. |
2766 | 36.9k | SDValue DAGCombiner::useDivRem(SDNode *Node) { |
2767 | 36.9k | if (Node->use_empty()) |
2768 | 0 | return SDValue(); // This is a dead node, leave it alone. |
2769 | 36.9k | |
2770 | 36.9k | unsigned Opcode = Node->getOpcode(); |
2771 | 22.5k | bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM); |
2772 | 36.9k | unsigned DivRemOpc = isSigned ? ISD::SDIVREM18.9k : ISD::UDIVREM17.9k ; |
2773 | 36.9k | |
2774 | 36.9k | // DivMod lib calls can still work on non-legal types if using lib-calls. |
2775 | 36.9k | EVT VT = Node->getValueType(0); |
2776 | 36.9k | if (VT.isVector() || 36.9k !VT.isInteger()36.4k ) |
2777 | 497 | return SDValue(); |
2778 | 36.4k | |
2779 | 36.4k | if (36.4k !TLI.isTypeLegal(VT) && 36.4k !TLI.isOperationCustom(DivRemOpc, VT)899 ) |
2780 | 860 | return SDValue(); |
2781 | 35.5k | |
2782 | 35.5k | // If DIVREM is going to get expanded into a libcall, |
2783 | 35.5k | // but there is no libcall available, then don't combine. |
2784 | 35.5k | if (35.5k !TLI.isOperationLegalOrCustom(DivRemOpc, VT) && |
2785 | 32.8k | !isDivRemLibcallAvailable(Node, isSigned, TLI)) |
2786 | 32.4k | return SDValue(); |
2787 | 3.06k | |
2788 | 3.06k | // If div is legal, it's better to do the normal expansion |
2789 | 3.06k | unsigned OtherOpcode = 0; |
2790 | 3.06k | if ((Opcode == ISD::SDIV) || 3.06k (Opcode == ISD::UDIV)2.33k ) { |
2791 | 1.74k | OtherOpcode = isSigned ? ISD::SREM732 : ISD::UREM1.01k ; |
2792 | 1.74k | if (TLI.isOperationLegalOrCustom(Opcode, VT)) |
2793 | 374 | return SDValue(); |
2794 | 1.32k | } else { |
2795 | 1.32k | OtherOpcode = isSigned ? ISD::SDIV676 : ISD::UDIV648 ; |
2796 | 1.32k | if (TLI.isOperationLegalOrCustom(OtherOpcode, VT)) |
2797 | 214 | return SDValue(); |
2798 | 2.47k | } |
2799 | 2.47k | |
2800 | 2.47k | SDValue Op0 = Node->getOperand(0); |
2801 | 2.47k | SDValue Op1 = Node->getOperand(1); |
2802 | 2.47k | SDValue combined; |
2803 | 2.47k | for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), |
2804 | 5.61k | UE = Op0.getNode()->use_end(); UI != UE5.61k ;) { |
2805 | 3.13k | SDNode *User = *UI++; |
2806 | 3.13k | if (User == Node || 3.13k User->use_empty()641 ) |
2807 | 2.49k | continue; |
2808 | 636 | // Convert the other matching node(s), too; |
2809 | 636 | // otherwise, the DIVREM may get target-legalized into something |
2810 | 636 | // target-specific that we won't be able to recognize. |
2811 | 636 | unsigned UserOpc = User->getOpcode(); |
2812 | 636 | if ((UserOpc == Opcode || 636 UserOpc == OtherOpcode556 || UserOpc == DivRemOpc344 ) && |
2813 | 314 | User->getOperand(0) == Op0 && |
2814 | 636 | User->getOperand(1) == Op1168 ) { |
2815 | 168 | if (!combined168 ) { |
2816 | 168 | if (UserOpc == OtherOpcode168 ) { |
2817 | 168 | SDVTList VTs = DAG.getVTList(VT, VT); |
2818 | 168 | combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1); |
2819 | 168 | } else if (0 UserOpc == DivRemOpc0 ) { |
2820 | 0 | combined = SDValue(User, 0); |
2821 | 0 | } else { |
2822 | 0 | assert(UserOpc == Opcode); |
2823 | 0 | continue; |
2824 | 0 | } |
2825 | 168 | } |
2826 | 168 | if (168 UserOpc == ISD::SDIV || 168 UserOpc == ISD::UDIV80 ) |
2827 | 133 | CombineTo(User, combined); |
2828 | 35 | else if (35 UserOpc == ISD::SREM || 35 UserOpc == ISD::UREM21 ) |
2829 | 35 | CombineTo(User, combined.getValue(1)); |
2830 | 168 | } |
2831 | 3.13k | } |
2832 | 36.9k | return combined; |
2833 | 36.9k | } |
2834 | | |
2835 | 50.1k | static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { |
2836 | 50.1k | SDValue N0 = N->getOperand(0); |
2837 | 50.1k | SDValue N1 = N->getOperand(1); |
2838 | 50.1k | EVT VT = N->getValueType(0); |
2839 | 50.1k | SDLoc DL(N); |
2840 | 50.1k | |
2841 | 50.1k | if (DAG.isUndef(N->getOpcode(), {N0, N1})) |
2842 | 1 | return DAG.getUNDEF(VT); |
2843 | 50.1k | |
2844 | 50.1k | // undef / X -> 0 |
2845 | 50.1k | // undef % X -> 0 |
2846 | 50.1k | if (50.1k N0.isUndef()50.1k ) |
2847 | 0 | return DAG.getConstant(0, DL, VT); |
2848 | 50.1k | |
2849 | 50.1k | return SDValue(); |
2850 | 50.1k | } |
2851 | | |
2852 | 18.9k | SDValue DAGCombiner::visitSDIV(SDNode *N) { |
2853 | 18.9k | SDValue N0 = N->getOperand(0); |
2854 | 18.9k | SDValue N1 = N->getOperand(1); |
2855 | 18.9k | EVT VT = N->getValueType(0); |
2856 | 18.9k | |
2857 | 18.9k | // fold vector ops |
2858 | 18.9k | if (VT.isVector()) |
2859 | 340 | if (SDValue 340 FoldedVOp340 = SimplifyVBinOp(N)) |
2860 | 1 | return FoldedVOp; |
2861 | 18.9k | |
2862 | 18.9k | SDLoc DL(N); |
2863 | 18.9k | |
2864 | 18.9k | // fold (sdiv c1, c2) -> c1/c2 |
2865 | 18.9k | ConstantSDNode *N0C = isConstOrConstSplat(N0); |
2866 | 18.9k | ConstantSDNode *N1C = isConstOrConstSplat(N1); |
2867 | 18.9k | if (N0C && 18.9k N1C5.84k && !N0C->isOpaque()0 && !N1C->isOpaque()0 ) |
2868 | 0 | return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C); |
2869 | 18.9k | // fold (sdiv X, 1) -> X |
2870 | 18.9k | if (18.9k N1C && 18.9k N1C->isOne()4.64k ) |
2871 | 6 | return N0; |
2872 | 18.9k | // fold (sdiv X, -1) -> 0-X |
2873 | 18.9k | if (18.9k N1C && 18.9k N1C->isAllOnesValue()4.64k ) |
2874 | 3 | return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); |
2875 | 18.9k | |
2876 | 18.9k | if (SDValue 18.9k V18.9k = simplifyDivRem(N, DAG)) |
2877 | 0 | return V; |
2878 | 18.9k | |
2879 | 18.9k | if (SDValue 18.9k NewSel18.9k = foldBinOpIntoSelect(N)) |
2880 | 2 | return NewSel; |
2881 | 18.9k | |
2882 | 18.9k | // If we know the sign bits of both operands are zero, strength reduce to a |
2883 | 18.9k | // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 |
2884 | 18.9k | if (18.9k DAG.SignBitIsZero(N1) && 18.9k DAG.SignBitIsZero(N0)7.42k ) |
2885 | 16 | return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1); |
2886 | 18.9k | |
2887 | 18.9k | // fold (sdiv X, pow2) -> simple ops after legalize |
2888 | 18.9k | // FIXME: We check for the exact bit here because the generic lowering gives |
2889 | 18.9k | // better results in that case. The target-specific lowering should learn how |
2890 | 18.9k | // to handle exact sdivs efficiently. |
2891 | 18.9k | if (18.9k N1C && 18.9k !N1C->isNullValue()4.62k && !N1C->isOpaque()4.62k && |
2892 | 18.9k | !N->getFlags().hasExact()4.62k && (N1C->getAPIntValue().isPowerOf2() || |
2893 | 18.9k | (-N1C->getAPIntValue()).isPowerOf2()1.52k )) { |
2894 | 3.08k | // Target-specific implementation of sdiv x, pow2. |
2895 | 3.08k | if (SDValue Res = BuildSDIVPow2(N)) |
2896 | 2.86k | return Res; |
2897 | 216 | |
2898 | 216 | unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); |
2899 | 216 | |
2900 | 216 | // Splat the sign bit into the register |
2901 | 216 | SDValue SGN = |
2902 | 216 | DAG.getNode(ISD::SRA, DL, VT, N0, |
2903 | 216 | DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, |
2904 | 216 | getShiftAmountTy(N0.getValueType()))); |
2905 | 216 | AddToWorklist(SGN.getNode()); |
2906 | 216 | |
2907 | 216 | // Add (N0 < 0) ? abs2 - 1 : 0; |
2908 | 216 | SDValue SRL = |
2909 | 216 | DAG.getNode(ISD::SRL, DL, VT, SGN, |
2910 | 216 | DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL, |
2911 | 216 | getShiftAmountTy(SGN.getValueType()))); |
2912 | 216 | SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL); |
2913 | 216 | AddToWorklist(SRL.getNode()); |
2914 | 216 | AddToWorklist(ADD.getNode()); // Divide by pow2 |
2915 | 216 | SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD, |
2916 | 216 | DAG.getConstant(lg2, DL, |
2917 | 216 | getShiftAmountTy(ADD.getValueType()))); |
2918 | 216 | |
2919 | 216 | // If we're dividing by a positive value, we're done. Otherwise, we must |
2920 | 216 | // negate the result. |
2921 | 216 | if (N1C->getAPIntValue().isNonNegative()) |
2922 | 210 | return SRA; |
2923 | 6 | |
2924 | 6 | AddToWorklist(SRA.getNode()); |
2925 | 6 | return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA); |
2926 | 6 | } |
2927 | 15.8k | |
2928 | 15.8k | // If integer divide is expensive and we satisfy the requirements, emit an |
2929 | 15.8k | // alternate sequence. Targets may check function attributes for size/speed |
2930 | 15.8k | // trade-offs. |
2931 | 15.8k | AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes(); |
2932 | 15.8k | if (N1C && 15.8k !TLI.isIntDivCheap(N->getValueType(0), Attr)1.54k ) |
2933 | 1.50k | if (SDValue 1.50k Op1.50k = BuildSDIV(N)) |
2934 | 1.34k | return Op; |
2935 | 14.4k | |
2936 | 14.4k | // sdiv, srem -> sdivrem |
2937 | 14.4k | // If the divisor is constant, then return DIVREM only if isIntDivCheap() is |
2938 | 14.4k | // true. Otherwise, we break the simplification logic in visitREM(). |
2939 | 14.4k | if (14.4k !N1C || 14.4k TLI.isIntDivCheap(N->getValueType(0), Attr)199 ) |
2940 | 14.3k | if (SDValue 14.3k DivRem14.3k = useDivRem(N)) |
2941 | 14 | return DivRem; |
2942 | 14.4k | |
2943 | 14.4k | return SDValue(); |
2944 | 14.4k | } |
2945 | | |
2946 | 22.7k | SDValue DAGCombiner::visitUDIV(SDNode *N) { |
2947 | 22.7k | SDValue N0 = N->getOperand(0); |
2948 | 22.7k | SDValue N1 = N->getOperand(1); |
2949 | 22.7k | EVT VT = N->getValueType(0); |
2950 | 22.7k | |
2951 | 22.7k | // fold vector ops |
2952 | 22.7k | if (VT.isVector()) |
2953 | 235 | if (SDValue 235 FoldedVOp235 = SimplifyVBinOp(N)) |
2954 | 0 | return FoldedVOp; |
2955 | 22.7k | |
2956 | 22.7k | SDLoc DL(N); |
2957 | 22.7k | |
2958 | 22.7k | // fold (udiv c1, c2) -> c1/c2 |
2959 | 22.7k | ConstantSDNode *N0C = isConstOrConstSplat(N0); |
2960 | 22.7k | ConstantSDNode *N1C = isConstOrConstSplat(N1); |
2961 | 22.7k | if (N0C && 22.7k N1C1.08k ) |
2962 | 0 | if (SDValue 0 Folded0 = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, |
2963 | 0 | N0C, N1C)) |
2964 | 0 | return Folded; |
2965 | 22.7k | |
2966 | 22.7k | if (SDValue 22.7k V22.7k = simplifyDivRem(N, DAG)) |
2967 | 0 | return V; |
2968 | 22.7k | |
2969 | 22.7k | if (SDValue 22.7k NewSel22.7k = foldBinOpIntoSelect(N)) |
2970 | 2 | return NewSel; |
2971 | 22.7k | |
2972 | 22.7k | // fold (udiv x, (1 << c)) -> x >>u c |
2973 | 22.7k | if (22.7k isConstantOrConstantVector(N1, /*NoOpaques*/ true) && |
2974 | 22.7k | DAG.isKnownToBeAPowerOfTwo(N1)4.30k ) { |
2975 | 39 | SDValue LogBase2 = BuildLogBase2(N1, DL); |
2976 | 39 | AddToWorklist(LogBase2.getNode()); |
2977 | 39 | |
2978 | 39 | EVT ShiftVT = getShiftAmountTy(N0.getValueType()); |
2979 | 39 | SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); |
2980 | 39 | AddToWorklist(Trunc.getNode()); |
2981 | 39 | return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc); |
2982 | 39 | } |
2983 | 22.7k | |
2984 | 22.7k | // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 |
2985 | 22.7k | if (22.7k N1.getOpcode() == ISD::SHL22.7k ) { |
2986 | 41 | SDValue N10 = N1.getOperand(0); |
2987 | 41 | if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) && |
2988 | 41 | DAG.isKnownToBeAPowerOfTwo(N10)9 ) { |
2989 | 9 | SDValue LogBase2 = BuildLogBase2(N10, DL); |
2990 | 9 | AddToWorklist(LogBase2.getNode()); |
2991 | 9 | |
2992 | 9 | EVT ADDVT = N1.getOperand(1).getValueType(); |
2993 | 9 | SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT); |
2994 | 9 | AddToWorklist(Trunc.getNode()); |
2995 | 9 | SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc); |
2996 | 9 | AddToWorklist(Add.getNode()); |
2997 | 9 | return DAG.getNode(ISD::SRL, DL, VT, N0, Add); |
2998 | 9 | } |
2999 | 22.7k | } |
3000 | 22.7k | |
3001 | 22.7k | // fold (udiv x, c) -> alternate |
3002 | 22.7k | AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes(); |
3003 | 22.7k | if (N1C && 22.7k !TLI.isIntDivCheap(N->getValueType(0), Attr)7.08k ) |
3004 | 7.04k | if (SDValue 7.04k Op7.04k = BuildUDIV(N)) |
3005 | 6.84k | return Op; |
3006 | 15.8k | |
3007 | 15.8k | // sdiv, srem -> sdivrem |
3008 | 15.8k | // If the divisor is constant, then return DIVREM only if isIntDivCheap() is |
3009 | 15.8k | // true. Otherwise, we break the simplification logic in visitREM(). |
3010 | 15.8k | if (15.8k !N1C || 15.8k TLI.isIntDivCheap(N->getValueType(0), Attr)242 ) |
3011 | 15.6k | if (SDValue 15.6k DivRem15.6k = useDivRem(N)) |
3012 | 21 | return DivRem; |
3013 | 15.8k | |
3014 | 15.8k | return SDValue(); |
3015 | 15.8k | } |
3016 | | |
3017 | | // handles ISD::SREM and ISD::UREM |
3018 | 8.48k | SDValue DAGCombiner::visitREM(SDNode *N) { |
3019 | 8.48k | unsigned Opcode = N->getOpcode(); |
3020 | 8.48k | SDValue N0 = N->getOperand(0); |
3021 | 8.48k | SDValue N1 = N->getOperand(1); |
3022 | 8.48k | EVT VT = N->getValueType(0); |
3023 | 8.48k | bool isSigned = (Opcode == ISD::SREM); |
3024 | 8.48k | SDLoc DL(N); |
3025 | 8.48k | |
3026 | 8.48k | // fold (rem c1, c2) -> c1%c2 |
3027 | 8.48k | ConstantSDNode *N0C = isConstOrConstSplat(N0); |
3028 | 8.48k | ConstantSDNode *N1C = isConstOrConstSplat(N1); |
3029 | 8.48k | if (N0C && 8.48k N1C79 ) |
3030 | 4 | if (SDValue 4 Folded4 = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C)) |
3031 | 4 | return Folded; |
3032 | 8.48k | |
3033 | 8.48k | if (SDValue 8.48k V8.48k = simplifyDivRem(N, DAG)) |
3034 | 1 | return V; |
3035 | 8.48k | |
3036 | 8.48k | if (SDValue 8.48k NewSel8.48k = foldBinOpIntoSelect(N)) |
3037 | 4 | return NewSel; |
3038 | 8.47k | |
3039 | 8.47k | if (8.47k isSigned8.47k ) { |
3040 | 5.44k | // If we know the sign bits of both operands are zero, strength reduce to a |
3041 | 5.44k | // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 |
3042 | 5.44k | if (DAG.SignBitIsZero(N1) && 5.44k DAG.SignBitIsZero(N0)3.70k ) |
3043 | 9 | return DAG.getNode(ISD::UREM, DL, VT, N0, N1); |
3044 | 3.03k | } else { |
3045 | 3.03k | SDValue NegOne = DAG.getAllOnesConstant(DL, VT); |
3046 | 3.03k | if (DAG.isKnownToBeAPowerOfTwo(N1)3.03k ) { |
3047 | 36 | // fold (urem x, pow2) -> (and x, pow2-1) |
3048 | 36 | SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne); |
3049 | 36 | AddToWorklist(Add.getNode()); |
3050 | 36 | return DAG.getNode(ISD::AND, DL, VT, N0, Add); |
3051 | 36 | } |
3052 | 2.99k | if (2.99k N1.getOpcode() == ISD::SHL && |
3053 | 2.99k | DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))6 ) { |
3054 | 6 | // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) |
3055 | 6 | SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne); |
3056 | 6 | AddToWorklist(Add.getNode()); |
3057 | 6 | return DAG.getNode(ISD::AND, DL, VT, N0, Add); |
3058 | 6 | } |
3059 | 8.42k | } |
3060 | 8.42k | |
3061 | 8.42k | AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes(); |
3062 | 8.42k | |
3063 | 8.42k | // If X/C can be simplified by the division-by-constant logic, lower |
3064 | 8.42k | // X%C to the equivalent of X-X/C*C. |
3065 | 8.42k | // To avoid mangling nodes, this simplification requires that the combine() |
3066 | 8.42k | // call for the speculative DIV must not cause a DIVREM conversion. We guard |
3067 | 8.42k | // against this by skipping the simplification if isIntDivCheap(). When |
3068 | 8.42k | // div is not cheap, combine will not return a DIVREM. Regardless, |
3069 | 8.42k | // checking cheapness here makes sense since the simplification results in |
3070 | 8.42k | // fatter code. |
3071 | 8.42k | if (N1C && 8.42k !N1C->isNullValue()1.74k && !TLI.isIntDivCheap(VT, Attr)1.74k ) { |
3072 | 1.68k | unsigned DivOpcode = isSigned ? ISD::SDIV876 : ISD::UDIV809 ; |
3073 | 1.68k | SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1); |
3074 | 1.68k | AddToWorklist(Div.getNode()); |
3075 | 1.68k | SDValue OptimizedDiv = combine(Div.getNode()); |
3076 | 1.68k | if (OptimizedDiv.getNode() && 1.68k OptimizedDiv.getNode() != Div.getNode()1.52k ) { |
3077 | 1.52k | assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) && |
3078 | 1.52k | (OptimizedDiv.getOpcode() != ISD::SDIVREM)); |
3079 | 1.52k | SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1); |
3080 | 1.52k | SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); |
3081 | 1.52k | AddToWorklist(Mul.getNode()); |
3082 | 1.52k | return Sub; |
3083 | 1.52k | } |
3084 | 6.89k | } |
3085 | 6.89k | |
3086 | 6.89k | // sdiv, srem -> sdivrem |
3087 | 6.89k | if (SDValue 6.89k DivRem6.89k = useDivRem(N)) |
3088 | 133 | return DivRem.getValue(1); |
3089 | 6.76k | |
3090 | 6.76k | return SDValue(); |
3091 | 6.76k | } |
3092 | | |
3093 | 1.87k | SDValue DAGCombiner::visitMULHS(SDNode *N) { |
3094 | 1.87k | SDValue N0 = N->getOperand(0); |
3095 | 1.87k | SDValue N1 = N->getOperand(1); |
3096 | 1.87k | EVT VT = N->getValueType(0); |
3097 | 1.87k | SDLoc DL(N); |
3098 | 1.87k | |
3099 | 1.87k | // fold (mulhs x, 0) -> 0 |
3100 | 1.87k | if (isNullConstant(N1)) |
3101 | 0 | return N1; |
3102 | 1.87k | // fold (mulhs x, 1) -> (sra x, size(x)-1) |
3103 | 1.87k | if (1.87k isOneConstant(N1)1.87k ) { |
3104 | 0 | SDLoc DL(N); |
3105 | 0 | return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, |
3106 | 0 | DAG.getConstant(N0.getValueSizeInBits() - 1, DL, |
3107 | 0 | getShiftAmountTy(N0.getValueType()))); |
3108 | 0 | } |
3109 | 1.87k | // fold (mulhs x, undef) -> 0 |
3110 | 1.87k | if (1.87k N0.isUndef() || 1.87k N1.isUndef()1.87k ) |
3111 | 0 | return DAG.getConstant(0, SDLoc(N), VT); |
3112 | 1.87k | |
3113 | 1.87k | // If the type twice as wide is legal, transform the mulhs to a wider multiply |
3114 | 1.87k | // plus a shift. |
3115 | 1.87k | if (1.87k VT.isSimple() && 1.87k !VT.isVector()1.87k ) { |
3116 | 1.60k | MVT Simple = VT.getSimpleVT(); |
3117 | 1.60k | unsigned SimpleSize = Simple.getSizeInBits(); |
3118 | 1.60k | EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); |
3119 | 1.60k | if (TLI.isOperationLegal(ISD::MUL, NewVT)1.60k ) { |
3120 | 918 | N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); |
3121 | 918 | N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); |
3122 | 918 | N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); |
3123 | 918 | N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, |
3124 | 918 | DAG.getConstant(SimpleSize, DL, |
3125 | 918 | getShiftAmountTy(N1.getValueType()))); |
3126 | 918 | return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); |
3127 | 918 | } |
3128 | 957 | } |
3129 | 957 | |
3130 | 957 | return SDValue(); |
3131 | 957 | } |
3132 | | |
3133 | 15.8k | SDValue DAGCombiner::visitMULHU(SDNode *N) { |
3134 | 15.8k | SDValue N0 = N->getOperand(0); |
3135 | 15.8k | SDValue N1 = N->getOperand(1); |
3136 | 15.8k | EVT VT = N->getValueType(0); |
3137 | 15.8k | SDLoc DL(N); |
3138 | 15.8k | |
3139 | 15.8k | // fold (mulhu x, 0) -> 0 |
3140 | 15.8k | if (isNullConstant(N1)) |
3141 | 0 | return N1; |
3142 | 15.8k | // fold (mulhu x, 1) -> 0 |
3143 | 15.8k | if (15.8k isOneConstant(N1)15.8k ) |
3144 | 0 | return DAG.getConstant(0, DL, N0.getValueType()); |
3145 | 15.8k | // fold (mulhu x, undef) -> 0 |
3146 | 15.8k | if (15.8k N0.isUndef() || 15.8k N1.isUndef()15.8k ) |
3147 | 0 | return DAG.getConstant(0, DL, VT); |
3148 | 15.8k | |
3149 | 15.8k | // If the type twice as wide is legal, transform the mulhu to a wider multiply |
3150 | 15.8k | // plus a shift. |
3151 | 15.8k | if (15.8k VT.isSimple() && 15.8k !VT.isVector()15.8k ) { |
3152 | 15.6k | MVT Simple = VT.getSimpleVT(); |
3153 | 15.6k | unsigned SimpleSize = Simple.getSizeInBits(); |
3154 | 15.6k | EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); |
3155 | 15.6k | if (TLI.isOperationLegal(ISD::MUL, NewVT)15.6k ) { |
3156 | 628 | N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); |
3157 | 628 | N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); |
3158 | 628 | N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); |
3159 | 628 | N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, |
3160 | 628 | DAG.getConstant(SimpleSize, DL, |
3161 | 628 | getShiftAmountTy(N1.getValueType()))); |
3162 | 628 | return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); |
3163 | 628 | } |
3164 | 15.2k | } |
3165 | 15.2k | |
3166 | 15.2k | return SDValue(); |
3167 | 15.2k | } |
3168 | | |
3169 | | /// Perform optimizations common to nodes that compute two values. LoOp and HiOp |
3170 | | /// give the opcodes for the two computations that are being performed. Return |
3171 | | /// true if a simplification was made. |
3172 | | SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, |
3173 | 3.61k | unsigned HiOp) { |
3174 | 3.61k | // If the high half is not needed, just compute the low half. |
3175 | 3.61k | bool HiExists = N->hasAnyUseOfValue(1); |
3176 | 3.61k | if (!HiExists && |
3177 | 51 | (!LegalOperations || |
3178 | 3.61k | TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0))7 )) { |
3179 | 44 | SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); |
3180 | 44 | return CombineTo(N, Res, Res); |
3181 | 44 | } |
3182 | 3.56k | |
3183 | 3.56k | // If the low half is not needed, just compute the high half. |
3184 | 3.56k | bool LoExists = N->hasAnyUseOfValue(0); |
3185 | 3.56k | if (!LoExists && |
3186 | 1.42k | (!LegalOperations || |
3187 | 3.56k | TLI.isOperationLegal(HiOp, N->getValueType(1))930 )) { |
3188 | 494 | SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); |
3189 | 494 | return CombineTo(N, Res, Res); |
3190 | 494 | } |
3191 | 3.07k | |
3192 | 3.07k | // If both halves are used, return as it is. |
3193 | 3.07k | if (3.07k LoExists && 3.07k HiExists2.14k ) |
3194 | 2.13k | return SDValue(); |
3195 | 937 | |
3196 | 937 | // If the two computed results can be simplified separately, separate them. |
3197 | 937 | if (937 LoExists937 ) { |
3198 | 7 | SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); |
3199 | 7 | AddToWorklist(Lo.getNode()); |
3200 | 7 | SDValue LoOpt = combine(Lo.getNode()); |
3201 | 7 | if (LoOpt.getNode() && 7 LoOpt.getNode() != Lo.getNode()0 && |
3202 | 0 | (!LegalOperations || |
3203 | 0 | TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) |
3204 | 0 | return CombineTo(N, LoOpt, LoOpt); |
3205 | 937 | } |
3206 | 937 | |
3207 | 937 | if (937 HiExists937 ) { |
3208 | 930 | SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); |
3209 | 930 | AddToWorklist(Hi.getNode()); |
3210 | 930 | SDValue HiOpt = combine(Hi.getNode()); |
3211 | 930 | if (HiOpt.getNode() && 930 HiOpt != Hi256 && |
3212 | 256 | (!LegalOperations || |
3213 | 256 | TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) |
3214 | 256 | return CombineTo(N, HiOpt, HiOpt); |
3215 | 681 | } |
3216 | 681 | |
3217 | 681 | return SDValue(); |
3218 | 681 | } |
3219 | | |
3220 | 647 | SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { |
3221 | 647 | if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS)) |
3222 | 255 | return Res; |
3223 | 392 | |
3224 | 392 | EVT VT = N->getValueType(0); |
3225 | 392 | SDLoc DL(N); |
3226 | 392 | |
3227 | 392 | // If the type is twice as wide is legal, transform the mulhu to a wider |
3228 | 392 | // multiply plus a shift. |
3229 | 392 | if (VT.isSimple() && 392 !VT.isVector()392 ) { |
3230 | 392 | MVT Simple = VT.getSimpleVT(); |
3231 | 392 | unsigned SimpleSize = Simple.getSizeInBits(); |
3232 | 392 | EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); |
3233 | 392 | if (TLI.isOperationLegal(ISD::MUL, NewVT)392 ) { |
3234 | 0 | SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); |
3235 | 0 | SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); |
3236 | 0 | Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); |
3237 | 0 | // Compute the high part as N1. |
3238 | 0 | Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, |
3239 | 0 | DAG.getConstant(SimpleSize, DL, |
3240 | 0 | getShiftAmountTy(Lo.getValueType()))); |
3241 | 0 | Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); |
3242 | 0 | // Compute the low part as N0. |
3243 | 0 | Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); |
3244 | 0 | return CombineTo(N, Lo, Hi); |
3245 | 0 | } |
3246 | 392 | } |
3247 | 392 | |
3248 | 392 | return SDValue(); |
3249 | 392 | } |
3250 | | |
3251 | 2.96k | SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { |
3252 | 2.96k | if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU)) |
3253 | 539 | return Res; |
3254 | 2.42k | |
3255 | 2.42k | EVT VT = N->getValueType(0); |
3256 | 2.42k | SDLoc DL(N); |
3257 | 2.42k | |
3258 | 2.42k | // If the type is twice as wide is legal, transform the mulhu to a wider |
3259 | 2.42k | // multiply plus a shift. |
3260 | 2.42k | if (VT.isSimple() && 2.42k !VT.isVector()2.42k ) { |
3261 | 2.42k | MVT Simple = VT.getSimpleVT(); |
3262 | 2.42k | unsigned SimpleSize = Simple.getSizeInBits(); |
3263 | 2.42k | EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); |
3264 | 2.42k | if (TLI.isOperationLegal(ISD::MUL, NewVT)2.42k ) { |
3265 | 0 | SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); |
3266 | 0 | SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); |
3267 | 0 | Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); |
3268 | 0 | // Compute the high part as N1. |
3269 | 0 | Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, |
3270 | 0 | DAG.getConstant(SimpleSize, DL, |
3271 | 0 | getShiftAmountTy(Lo.getValueType()))); |
3272 | 0 | Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); |
3273 | 0 | // Compute the low part as N0. |
3274 | 0 | Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); |
3275 | 0 | return CombineTo(N, Lo, Hi); |
3276 | 0 | } |
3277 | 2.42k | } |
3278 | 2.42k | |
3279 | 2.42k | return SDValue(); |
3280 | 2.42k | } |
3281 | | |
3282 | 82 | SDValue DAGCombiner::visitSMULO(SDNode *N) { |
3283 | 82 | // (smulo x, 2) -> (saddo x, x) |
3284 | 82 | if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) |
3285 | 7 | if (7 C2->getAPIntValue() == 27 ) |
3286 | 3 | return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(), |
3287 | 3 | N->getOperand(0), N->getOperand(0)); |
3288 | 79 | |
3289 | 79 | return SDValue(); |
3290 | 79 | } |
3291 | | |
3292 | 2.49k | SDValue DAGCombiner::visitUMULO(SDNode *N) { |
3293 | 2.49k | // (umulo x, 2) -> (uaddo x, x) |
3294 | 2.49k | if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) |
3295 | 2.42k | if (2.42k C2->getAPIntValue() == 22.42k ) |
3296 | 16 | return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), |
3297 | 16 | N->getOperand(0), N->getOperand(0)); |
3298 | 2.47k | |
3299 | 2.47k | return SDValue(); |
3300 | 2.47k | } |
3301 | | |
3302 | 12.6k | SDValue DAGCombiner::visitIMINMAX(SDNode *N) { |
3303 | 12.6k | SDValue N0 = N->getOperand(0); |
3304 | 12.6k | SDValue N1 = N->getOperand(1); |
3305 | 12.6k | EVT VT = N0.getValueType(); |
3306 | 12.6k | |
3307 | 12.6k | // fold vector ops |
3308 | 12.6k | if (VT.isVector()) |
3309 | 8.53k | if (SDValue 8.53k FoldedVOp8.53k = SimplifyVBinOp(N)) |
3310 | 252 | return FoldedVOp; |
3311 | 12.4k | |
3312 | 12.4k | // fold operation with constant operands. |
3313 | 12.4k | ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); |
3314 | 12.4k | ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); |
3315 | 12.4k | if (N0C && 12.4k N1C0 ) |
3316 | 0 | return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C); |
3317 | 12.4k | |
3318 | 12.4k | // canonicalize constant to RHS |
3319 | 12.4k | if (12.4k DAG.isConstantIntBuildVectorOrConstantInt(N0) && |
3320 | 10 | !DAG.isConstantIntBuildVectorOrConstantInt(N1)) |
3321 | 10 | return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); |
3322 | 12.4k | |
3323 | 12.4k | return SDValue(); |
3324 | 12.4k | } |
3325 | | |
3326 | | /// If this is a binary operator with two operands of the same opcode, try to |
3327 | | /// simplify it. |
3328 | 154k | SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { |
3329 | 154k | SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); |
3330 | 154k | EVT VT = N0.getValueType(); |
3331 | 154k | assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); |
3332 | 154k | |
3333 | 154k | // Bail early if none of these transforms apply. |
3334 | 154k | if (N0.getNumOperands() == 0154k ) return SDValue()11 ; |
3335 | 154k | |
3336 | 154k | // For each of OP in AND/OR/XOR: |
3337 | 154k | // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) |
3338 | 154k | // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) |
3339 | 154k | // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) |
3340 | 154k | // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y)) |
3341 | 154k | // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) |
3342 | 154k | // |
3343 | 154k | // do not sink logical op inside of a vector extend, since it may combine |
3344 | 154k | // into a vsetcc. |
3345 | 154k | EVT Op0VT = N0.getOperand(0).getValueType(); |
3346 | 154k | if ((N0.getOpcode() == ISD::ZERO_EXTEND || |
3347 | 153k | N0.getOpcode() == ISD::SIGN_EXTEND || |
3348 | 153k | N0.getOpcode() == ISD::BSWAP || |
3349 | 154k | // Avoid infinite looping with PromoteIntBinOp. |
3350 | 153k | (N0.getOpcode() == ISD::ANY_EXTEND && |
3351 | 153k | (!LegalTypes || 223 TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT)223 )) || |
3352 | 153k | (N0.getOpcode() == ISD::TRUNCATE && |
3353 | 2.23k | (!TLI.isZExtFree(VT, Op0VT) || |
3354 | 2.23k | !TLI.isTruncateFree(Op0VT, VT)) && |
3355 | 153k | TLI.isTypeLegal(Op0VT))) && |
3356 | 1.61k | !VT.isVector() && |
3357 | 1.34k | Op0VT == N1.getOperand(0).getValueType() && |
3358 | 154k | (!LegalOperations || 1.21k TLI.isOperationLegal(N->getOpcode(), Op0VT)88 )) { |
3359 | 1.21k | SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), |
3360 | 1.21k | N0.getOperand(0).getValueType(), |
3361 | 1.21k | N0.getOperand(0), N1.getOperand(0)); |
3362 | 1.21k | AddToWorklist(ORNode.getNode()); |
3363 | 1.21k | return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode); |
3364 | 1.21k | } |
3365 | 152k | |
3366 | 152k | // For each of OP in SHL/SRL/SRA/AND... |
3367 | 152k | // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) |
3368 | 152k | // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) |
3369 | 152k | // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) |
3370 | 152k | if (152k (N0.getOpcode() == ISD::SHL || 152k N0.getOpcode() == ISD::SRL151k || |
3371 | 152k | N0.getOpcode() == ISD::SRA150k || N0.getOpcode() == ISD::AND150k ) && |
3372 | 152k | N0.getOperand(1) == N1.getOperand(1)19.8k ) { |
3373 | 204 | SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), |
3374 | 204 | N0.getOperand(0).getValueType(), |
3375 | 204 | N0.getOperand(0), N1.getOperand(0)); |
3376 | 204 | AddToWorklist(ORNode.getNode()); |
3377 | 204 | return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, |
3378 | 204 | ORNode, N0.getOperand(1)); |
3379 | 204 | } |
3380 | 152k | |
3381 | 152k | // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) |
3382 | 152k | // Only perform this optimization up until type legalization, before |
3383 | 152k | // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by |
3384 | 152k | // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and |
3385 | 152k | // we don't want to undo this promotion. |
3386 | 152k | // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper |
3387 | 152k | // on scalars. |
3388 | 152k | if (152k (N0.getOpcode() == ISD::BITCAST || |
3389 | 133k | N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && |
3390 | 152k | Level <= AfterLegalizeTypes19.2k ) { |
3391 | 581 | SDValue In0 = N0.getOperand(0); |
3392 | 581 | SDValue In1 = N1.getOperand(0); |
3393 | 581 | EVT In0Ty = In0.getValueType(); |
3394 | 581 | EVT In1Ty = In1.getValueType(); |
3395 | 581 | SDLoc DL(N); |
3396 | 581 | // If both incoming values are integers, and the original types are the |
3397 | 581 | // same. |
3398 | 581 | if (In0Ty.isInteger() && 581 In1Ty.isInteger()116 && In0Ty == In1Ty114 ) { |
3399 | 112 | SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); |
3400 | 112 | SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); |
3401 | 112 | AddToWorklist(Op.getNode()); |
3402 | 112 | return BC; |
3403 | 112 | } |
3404 | 152k | } |
3405 | 152k | |
3406 | 152k | // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). |
3407 | 152k | // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) |
3408 | 152k | // If both shuffles use the same mask, and both shuffle within a single |
3409 | 152k | // vector, then it is worthwhile to move the swizzle after the operation. |
3410 | 152k | // The type-legalizer generates this pattern when loading illegal |
3411 | 152k | // vector types from memory. In many cases this allows additional shuffle |
3412 | 152k | // optimizations. |
3413 | 152k | // There are other cases where moving the shuffle after the xor/and/or |
3414 | 152k | // is profitable even if shuffles don't perform a swizzle. |
3415 | 152k | // If both shuffles use the same mask, and both shuffles have the same first |
3416 | 152k | // or second operand, then it might still be profitable to move the shuffle |
3417 | 152k | // after the xor/and/or operation. |
3418 | 152k | if (152k N0.getOpcode() == ISD::VECTOR_SHUFFLE && 152k Level < AfterLegalizeDAG197 ) { |
3419 | 197 | ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); |
3420 | 197 | ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); |
3421 | 197 | |
3422 | 197 | assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && |
3423 | 197 | "Inputs to shuffles are not the same type"); |
3424 | 197 | |
3425 | 197 | // Check that both shuffles use the same mask. The masks are known to be of |
3426 | 197 | // the same length because the result vector type is the same. |
3427 | 197 | // Check also that shuffles have only one use to avoid introducing extra |
3428 | 197 | // instructions. |
3429 | 197 | if (SVN0->hasOneUse() && 197 SVN1->hasOneUse()197 && |
3430 | 197 | SVN0->getMask().equals(SVN1->getMask())149 ) { |
3431 | 133 | SDValue ShOp = N0->getOperand(1); |
3432 | 133 | |
3433 | 133 | // Don't try to fold this node if it requires introducing a |
3434 | 133 | // build vector of all zeros that might be illegal at this stage. |
3435 | 133 | if (N->getOpcode() == ISD::XOR && 133 !ShOp.isUndef()32 ) { |
3436 | 22 | if (!LegalTypes) |
3437 | 20 | ShOp = DAG.getConstant(0, SDLoc(N), VT); |
3438 | 22 | else |
3439 | 2 | ShOp = SDValue(); |
3440 | 22 | } |
3441 | 133 | |
3442 | 133 | // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C) |
3443 | 133 | // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C) |
3444 | 133 | // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0) |
3445 | 133 | if (N0.getOperand(1) == N1.getOperand(1) && 133 ShOp.getNode()62 ) { |
3446 | 62 | SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, |
3447 | 62 | N0->getOperand(0), N1->getOperand(0)); |
3448 | 62 | AddToWorklist(NewNode.getNode()); |
3449 | 62 | return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp, |
3450 | 62 | SVN0->getMask()); |
3451 | 62 | } |
3452 | 71 | |
3453 | 71 | // Don't try to fold this node if it requires introducing a |
3454 | 71 | // build vector of all zeros that might be illegal at this stage. |
3455 | 71 | ShOp = N0->getOperand(0); |
3456 | 71 | if (N->getOpcode() == ISD::XOR && 71 !ShOp.isUndef()12 ) { |
3457 | 12 | if (!LegalTypes) |
3458 | 10 | ShOp = DAG.getConstant(0, SDLoc(N), VT); |
3459 | 12 | else |
3460 | 2 | ShOp = SDValue(); |
3461 | 12 | } |
3462 | 71 | |
3463 | 71 | // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B)) |
3464 | 71 | // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B)) |
3465 | 71 | // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B)) |
3466 | 71 | if (N0->getOperand(0) == N1->getOperand(0) && 71 ShOp.getNode()30 ) { |
3467 | 30 | SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, |
3468 | 30 | N0->getOperand(1), N1->getOperand(1)); |
3469 | 30 | AddToWorklist(NewNode.getNode()); |
3470 | 30 | return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode, |
3471 | 30 | SVN0->getMask()); |
3472 | 30 | } |
3473 | 152k | } |
3474 | 197 | } |
3475 | 152k | |
3476 | 152k | return SDValue(); |
3477 | 152k | } |
3478 | | |
3479 | | /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient. |
3480 | | SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, |
3481 | 1.15M | const SDLoc &DL) { |
3482 | 1.15M | SDValue LL, LR, RL, RR, N0CC, N1CC; |
3483 | 1.15M | if (!isSetCCEquivalent(N0, LL, LR, N0CC) || |
3484 | 322k | !isSetCCEquivalent(N1, RL, RR, N1CC)) |
3485 | 1.12M | return SDValue(); |
3486 | 36.0k | |
3487 | 1.15M | assert(N0.getValueType() == N1.getValueType() && |
3488 | 36.0k | "Unexpected operand types for bitwise logic op"); |
3489 | 36.0k | assert(LL.getValueType() == LR.getValueType() && |
3490 | 36.0k | RL.getValueType() == RR.getValueType() && |
3491 | 36.0k | "Unexpected operand types for setcc"); |
3492 | 36.0k | |
3493 | 36.0k | // If we're here post-legalization or the logic op type is not i1, the logic |
3494 | 36.0k | // op type must match a setcc result type. Also, all folds require new |
3495 | 36.0k | // operations on the left and right operands, so those types must match. |
3496 | 36.0k | EVT VT = N0.getValueType(); |
3497 | 36.0k | EVT OpVT = LL.getValueType(); |
3498 | 36.0k | if (LegalOperations || 36.0k VT != MVT::i135.6k ) |
3499 | 13.1k | if (13.1k VT != getSetCCResultType(OpVT)13.1k ) |
3500 | 414 | return SDValue(); |
3501 | 35.6k | if (35.6k OpVT != RL.getValueType()35.6k ) |
3502 | 11.8k | return SDValue(); |
3503 | 23.7k | |
3504 | 23.7k | ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get(); |
3505 | 23.7k | ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get(); |
3506 | 23.7k | bool IsInteger = OpVT.isInteger(); |
3507 | 23.7k | if (LR == RR && 23.7k CC0 == CC112.0k && IsInteger11.4k ) { |
3508 | 11.1k | bool IsZero = isNullConstantOrNullSplatConstant(LR); |
3509 | 11.1k | bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR); |
3510 | 11.1k | |
3511 | 11.1k | // All bits clear? |
3512 | 11.1k | bool AndEqZero = IsAnd && CC1 == ISD::SETEQ10.6k && IsZero10.4k ; |
3513 | 11.1k | // All sign bits clear? |
3514 | 11.1k | bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT10.6k && IsNeg147 ; |
3515 | 11.1k | // Any bits set? |
3516 | 11.1k | bool OrNeZero = !IsAnd && CC1 == ISD::SETNE504 && IsZero65 ; |
3517 | 11.1k | // Any sign bits set? |
3518 | 11.1k | bool OrLtZero = !IsAnd && CC1 == ISD::SETLT504 && IsZero37 ; |
3519 | 11.1k | |
3520 | 11.1k | // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0) |
3521 | 11.1k | // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1) |
3522 | 11.1k | // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0) |
3523 | 11.1k | // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0) |
3524 | 11.1k | if (AndEqZero || 11.1k AndGtNeg1719 || OrNeZero712 || OrLtZero656 ) { |
3525 | 10.5k | SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL); |
3526 | 10.5k | AddToWorklist(Or.getNode()); |
3527 | 10.5k | return DAG.getSetCC(DL, VT, Or, LR, CC1); |
3528 | 10.5k | } |
3529 | 649 | |
3530 | 649 | // All bits set? |
3531 | 649 | bool AndEqNeg1 = IsAnd && 649 CC1 == ISD::SETEQ208 && IsNeg119 ; |
3532 | 649 | // All sign bits set? |
3533 | 649 | bool AndLtZero = IsAnd && CC1 == ISD::SETLT208 && IsZero13 ; |
3534 | 649 | // Any bits clear? |
3535 | 649 | bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE441 && IsNeg19 ; |
3536 | 649 | // Any sign bits clear? |
3537 | 649 | bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT441 && IsNeg177 ; |
3538 | 649 | |
3539 | 649 | // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1) |
3540 | 649 | // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0) |
3541 | 649 | // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1) |
3542 | 649 | // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1) |
3543 | 649 | if (AndEqNeg1 || 649 AndLtZero642 || OrNeNeg1637 || OrGtNeg1632 ) { |
3544 | 22 | SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL); |
3545 | 22 | AddToWorklist(And.getNode()); |
3546 | 22 | return DAG.getSetCC(DL, VT, And, LR, CC1); |
3547 | 22 | } |
3548 | 13.2k | } |
3549 | 13.2k | |
3550 | 13.2k | // TODO: What is the 'or' equivalent of this fold? |
3551 | 13.2k | // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2) |
3552 | 13.2k | if (13.2k IsAnd && 13.2k LL == RL10.9k && CC0 == CC1427 && IsInteger147 && CC0 == ISD::SETNE135 && |
3553 | 102 | ((isNullConstant(LR) && 102 isAllOnesConstant(RR)4 ) || |
3554 | 13.2k | (isAllOnesConstant(LR) && 102 isNullConstant(RR)3 ))) { |
3555 | 3 | SDValue One = DAG.getConstant(1, DL, OpVT); |
3556 | 3 | SDValue Two = DAG.getConstant(2, DL, OpVT); |
3557 | 3 | SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One); |
3558 | 3 | AddToWorklist(Add.getNode()); |
3559 | 3 | return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE); |
3560 | 3 | } |
3561 | 13.2k | |
3562 | 13.2k | // Try more general transforms if the predicates match and the only user of |
3563 | 13.2k | // the compares is the 'and' or 'or'. |
3564 | 13.2k | if (13.2k IsInteger && 13.2k TLI.convertSetCCLogicToBitwiseLogic(OpVT)12.2k && CC0 == CC1583 && |
3565 | 13.2k | N0.hasOneUse()295 && N1.hasOneUse()279 ) { |
3566 | 279 | // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0 |
3567 | 279 | // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0 |
3568 | 279 | if ((IsAnd && 279 CC1 == ISD::SETEQ34 ) || (!IsAnd && 272 CC1 == ISD::SETNE245 )) { |
3569 | 75 | SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR); |
3570 | 75 | SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR); |
3571 | 75 | SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR); |
3572 | 75 | SDValue Zero = DAG.getConstant(0, DL, OpVT); |
3573 | 75 | return DAG.getSetCC(DL, VT, Or, Zero, CC1); |
3574 | 75 | } |
3575 | 13.1k | } |
3576 | 13.1k | |
3577 | 13.1k | // Canonicalize equivalent operands to LL == RL. |
3578 | 13.1k | if (13.1k LL == RR && 13.1k LR == RL88 ) { |
3579 | 0 | CC1 = ISD::getSetCCSwappedOperands(CC1); |
3580 | 0 | std::swap(RL, RR); |
3581 | 0 | } |
3582 | 13.1k | |
3583 | 13.1k | // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC) |
3584 | 13.1k | // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC) |
3585 | 13.1k | if (LL == RL && 13.1k LR == RR829 ) { |
3586 | 27 | ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger) |
3587 | 37 | : ISD::getSetCCOrOperation(CC0, CC1, IsInteger); |
3588 | 64 | if (NewCC != ISD::SETCC_INVALID && |
3589 | 59 | (!LegalOperations || |
3590 | 27 | (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) && |
3591 | 27 | TLI.isOperationLegal(ISD::SETCC, OpVT)))) |
3592 | 32 | return DAG.getSetCC(DL, VT, LL, LR, NewCC); |
3593 | 13.1k | } |
3594 | 13.1k | |
3595 | 13.1k | return SDValue(); |
3596 | 13.1k | } |
3597 | | |
3598 | | /// This contains all DAGCombine rules which reduce two values combined by |
3599 | | /// an And operation to a single value. This makes them reusable in the context |
3600 | | /// of visitSELECT(). Rules involving constants are not included as |
3601 | | /// visitSELECT() already handles those cases. |
3602 | 903k | SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) { |
3603 | 903k | EVT VT = N1.getValueType(); |
3604 | 903k | SDLoc DL(N); |
3605 | 903k | |
3606 | 903k | // fold (and x, undef) -> 0 |
3607 | 903k | if (N0.isUndef() || 903k N1.isUndef()903k ) |
3608 | 1 | return DAG.getConstant(0, DL, VT); |
3609 | 903k | |
3610 | 903k | if (SDValue 903k V903k = foldLogicOfSetCCs(true, N0, N1, DL)) |
3611 | 10.4k | return V; |
3612 | 892k | |
3613 | 892k | if (892k N0.getOpcode() == ISD::ADD && 892k N1.getOpcode() == ISD::SRL28.7k && |
3614 | 892k | VT.getSizeInBits() <= 6425 ) { |
3615 | 25 | if (ConstantSDNode *ADDI25 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { |
3616 | 25 | APInt ADDC = ADDI->getAPIntValue(); |
3617 | 25 | if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())25 ) { |
3618 | 6 | // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal |
3619 | 6 | // immediate for an add, but it is legal if its top c2 bits are set, |
3620 | 6 | // transform the ADD so the immediate doesn't need to be materialized |
3621 | 6 | // in a register. |
3622 | 6 | if (ConstantSDNode *SRLI6 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { |
3623 | 6 | APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), |
3624 | 6 | SRLI->getZExtValue()); |
3625 | 6 | if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)6 ) { |
3626 | 6 | ADDC |= Mask; |
3627 | 6 | if (TLI.isLegalAddImmediate(ADDC.getSExtValue())6 ) { |
3628 | 6 | SDLoc DL0(N0); |
3629 | 6 | SDValue NewAdd = |
3630 | 6 | DAG.getNode(ISD::ADD, DL0, VT, |
3631 | 6 | N0.getOperand(0), DAG.getConstant(ADDC, DL, VT)); |
3632 | 6 | CombineTo(N0.getNode(), NewAdd); |
3633 | 6 | // Return N so it doesn't get rechecked! |
3634 | 6 | return SDValue(N, 0); |
3635 | 6 | } |
3636 | 892k | } |
3637 | 6 | } |
3638 | 6 | } |
3639 | 25 | } |
3640 | 25 | } |
3641 | 892k | |
3642 | 892k | // Reduce bit extract of low half of an integer to the narrower type. |
3643 | 892k | // (and (srl i64:x, K), KMask) -> |
3644 | 892k | // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask) |
3645 | 892k | if (892k N0.getOpcode() == ISD::SRL && 892k N0.hasOneUse()44.7k ) { |
3646 | 38.0k | if (ConstantSDNode *CAnd38.0k = dyn_cast<ConstantSDNode>(N1)) { |
3647 | 36.8k | if (ConstantSDNode *CShift36.8k = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { |
3648 | 33.3k | unsigned Size = VT.getSizeInBits(); |
3649 | 33.3k | const APInt &AndMask = CAnd->getAPIntValue(); |
3650 | 33.3k | unsigned ShiftBits = CShift->getZExtValue(); |
3651 | 33.3k | |
3652 | 33.3k | // Bail out, this node will probably disappear anyway. |
3653 | 33.3k | if (ShiftBits == 0) |
3654 | 2 | return SDValue(); |
3655 | 33.3k | |
3656 | 33.3k | unsigned MaskBits = AndMask.countTrailingOnes(); |
3657 | 33.3k | EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2); |
3658 | 33.3k | |
3659 | 33.3k | if (AndMask.isMask() && |
3660 | 33.3k | // Required bits must not span the two halves of the integer and |
3661 | 33.3k | // must fit in the half size type. |
3662 | 29.9k | (ShiftBits + MaskBits <= Size / 2) && |
3663 | 21.4k | TLI.isNarrowingProfitable(VT, HalfVT) && |
3664 | 264 | TLI.isTypeDesirableForOp(ISD::AND, HalfVT) && |
3665 | 232 | TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) && |
3666 | 232 | TLI.isTruncateFree(VT, HalfVT) && |
3667 | 33.3k | TLI.isZExtFree(HalfVT, VT)232 ) { |
3668 | 195 | // The isNarrowingProfitable is to avoid regressions on PPC and |
3669 | 195 | // AArch64 which match a few 64-bit bit insert / bit extract patterns |
3670 | 195 | // on downstream users of this. Those patterns could probably be |
3671 | 195 | // extended to handle extensions mixed in. |
3672 | 195 | |
3673 | 195 | SDValue SL(N0); |
3674 | 195 | assert(MaskBits <= Size); |
3675 | 195 | |
3676 | 195 | // Extracting the highest bit of the low half. |
3677 | 195 | EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout()); |
3678 | 195 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT, |
3679 | 195 | N0.getOperand(0)); |
3680 | 195 | |
3681 | 195 | SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT); |
3682 | 195 | SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT); |
3683 | 195 | SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK); |
3684 | 195 | SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask); |
3685 | 195 | return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And); |
3686 | 195 | } |
3687 | 892k | } |
3688 | 36.8k | } |
3689 | 38.0k | } |
3690 | 892k | |
3691 | 892k | return SDValue(); |
3692 | 892k | } |
3693 | | |
3694 | | bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, |
3695 | | EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT, |
3696 | 42.8k | bool &NarrowLoad) { |
3697 | 42.8k | if (!AndC->getAPIntValue().isMask()) |
3698 | 24.7k | return false; |
3699 | 18.1k | |
3700 | 18.1k | unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes(); |
3701 | 18.1k | |
3702 | 18.1k | ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); |
3703 | 18.1k | LoadedVT = LoadN->getMemoryVT(); |
3704 | 18.1k | |
3705 | 18.1k | if (ExtVT == LoadedVT && |
3706 | 988 | (!LegalOperations || |
3707 | 18.1k | TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT)826 )) { |
3708 | 162 | // ZEXTLOAD will match without needing to change the size of the value being |
3709 | 162 | // loaded. |
3710 | 162 | NarrowLoad = false; |
3711 | 162 | return true; |
3712 | 162 | } |
3713 | 18.0k | |
3714 | 18.0k | // Do not change the width of a volatile load. |
3715 | 18.0k | if (18.0k LoadN->isVolatile()18.0k ) |
3716 | 136 | return false; |
3717 | 17.8k | |
3718 | 17.8k | // Do not generate loads of non-round integer types since these can |
3719 | 17.8k | // be expensive (and would be wrong if the type is not byte sized). |
3720 | 17.8k | if (17.8k !LoadedVT.bitsGT(ExtVT) || 17.8k !ExtVT.isRound()17.0k ) |
3721 | 11.0k | return false; |
3722 | 6.79k | |
3723 | 6.79k | if (6.79k LegalOperations && |
3724 | 6.51k | !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT)) |
3725 | 3.62k | return false; |
3726 | 3.16k | |
3727 | 3.16k | if (3.16k !TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT)3.16k ) |
3728 | 229 | return false; |
3729 | 2.93k | |
3730 | 2.93k | NarrowLoad = true; |
3731 | 2.93k | return true; |
3732 | 2.93k | } |
3733 | | |
3734 | 1.11M | SDValue DAGCombiner::visitAND(SDNode *N) { |
3735 | 1.11M | SDValue N0 = N->getOperand(0); |
3736 | 1.11M | SDValue N1 = N->getOperand(1); |
3737 | 1.11M | EVT VT = N1.getValueType(); |
3738 | 1.11M | |
3739 | 1.11M | // x & x --> x |
3740 | 1.11M | if (N0 == N1) |
3741 | 12 | return N0; |
3742 | 1.11M | |
3743 | 1.11M | // fold vector ops |
3744 | 1.11M | if (1.11M VT.isVector()1.11M ) { |
3745 | 74.9k | if (SDValue FoldedVOp = SimplifyVBinOp(N)) |
3746 | 973 | return FoldedVOp; |
3747 | 74.0k | |
3748 | 74.0k | // fold (and x, 0) -> 0, vector edition |
3749 | 74.0k | if (74.0k ISD::isBuildVectorAllZeros(N0.getNode())74.0k ) |
3750 | 74.0k | // do not return N0, because undef node may exist in N0 |
3751 | 54 | return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()), |
3752 | 54 | SDLoc(N), N0.getValueType()); |
3753 | 73.9k | if (73.9k ISD::isBuildVectorAllZeros(N1.getNode())73.9k ) |
3754 | 73.9k | // do not return N1, because undef node may exist in N1 |
3755 | 1 | return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()), |
3756 | 1 | SDLoc(N), N1.getValueType()); |
3757 | 73.9k | |
3758 | 73.9k | // fold (and x, -1) -> x, vector edition |
3759 | 73.9k | if (73.9k ISD::isBuildVectorAllOnes(N0.getNode())73.9k ) |
3760 | 30 | return N1; |
3761 | 73.9k | if (73.9k ISD::isBuildVectorAllOnes(N1.getNode())73.9k ) |
3762 | 1 | return N0; |
3763 | 1.11M | } |
3764 | 1.11M | |
3765 | 1.11M | // fold (and c1, c2) -> c1&c2 |
3766 | 1.11M | ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); |
3767 | 1.11M | ConstantSDNode *N1C = isConstOrConstSplat(N1); |
3768 | 1.11M | if (N0C && 1.11M N1C24 && !N1C->isOpaque()18 ) |
3769 | 18 | return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); |
3770 | 1.11M | // canonicalize constant to RHS |
3771 | 1.11M | if (1.11M DAG.isConstantIntBuildVectorOrConstantInt(N0) && |
3772 | 1.34k | !DAG.isConstantIntBuildVectorOrConstantInt(N1)) |
3773 | 1.34k | return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); |
3774 | 1.11M | // fold (and x, -1) -> x |
3775 | 1.11M | if (1.11M isAllOnesConstant(N1)1.11M ) |
3776 | 8 | return N0; |
3777 | 1.11M | // if (and x, c) is known to be zero, return 0 |
3778 | 1.11M | unsigned BitWidth = VT.getScalarSizeInBits(); |
3779 | 1.11M | if (N1C && 1.11M DAG.MaskedValueIsZero(SDValue(N, 0), |
3780 | 962k | APInt::getAllOnesValue(BitWidth))) |
3781 | 29 | return DAG.getConstant(0, SDLoc(N), VT); |
3782 | 1.11M | |
3783 | 1.11M | if (SDValue 1.11M NewSel1.11M = foldBinOpIntoSelect(N)) |
3784 | 73 | return NewSel; |
3785 | 1.11M | |
3786 | 1.11M | // reassociate and |
3787 | 1.11M | if (SDValue 1.11M RAND1.11M = ReassociateOps(ISD::AND, SDLoc(N), N0, N1)) |
3788 | 3.15k | return RAND; |
3789 | 1.10M | // fold (and (or x, C), D) -> D if (C & D) == D |
3790 | 1.10M | if (1.10M N1C && 1.10M N0.getOpcode() == ISD::OR959k ) |
3791 | 13.2k | if (ConstantSDNode *13.2k ORI13.2k = isConstOrConstSplat(N0.getOperand(1))) |
3792 | 5.86k | if (5.86k N1C->getAPIntValue().isSubsetOf(ORI->getAPIntValue())5.86k ) |
3793 | 2.25k | return N1; |
3794 | 1.10M | // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. |
3795 | 1.10M | if (1.10M N1C && 1.10M N0.getOpcode() == ISD::ANY_EXTEND957k ) { |
3796 | 20.9k | SDValue N0Op0 = N0.getOperand(0); |
3797 | 20.9k | APInt Mask = ~N1C->getAPIntValue(); |
3798 | 20.9k | Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits()); |
3799 | 20.9k | if (DAG.MaskedValueIsZero(N0Op0, Mask)20.9k ) { |
3800 | 9.61k | SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), |
3801 | 9.61k | N0.getValueType(), N0Op0); |
3802 | 9.61k | |
3803 | 9.61k | // Replace uses of the AND with uses of the Zero extend node. |
3804 | 9.61k | CombineTo(N, Zext); |
3805 | 9.61k | |
3806 | 9.61k | // We actually want to replace all uses of the any_extend with the |
3807 | 9.61k | // zero_extend, to avoid duplicating things. This will later cause this |
3808 | 9.61k | // AND to be folded. |
3809 | 9.61k | CombineTo(N0.getNode(), Zext); |
3810 | 9.61k | return SDValue(N, 0); // Return N so it doesn't get rechecked! |
3811 | 9.61k | } |
3812 | 1.09M | } |
3813 | 1.09M | // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> |
3814 | 1.09M | // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must |
3815 | 1.09M | // already be zero by virtue of the width of the base type of the load. |
3816 | 1.09M | // |
3817 | 1.09M | // the 'X' node here can either be nothing or an extract_vector_elt to catch |
3818 | 1.09M | // more cases. |
3819 | 1.09M | if (1.09M (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
3820 | 9.30k | N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() && |
3821 | 7.60k | N0.getOperand(0).getOpcode() == ISD::LOAD && |
3822 | 2.88k | N0.getOperand(0).getResNo() == 0) || |
3823 | 1.09M | (N0.getOpcode() == ISD::LOAD && 1.09M N0.getResNo() == 0278k )) { |
3824 | 281k | LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? |
3825 | 281k | N0278k : N0.getOperand(0)2.88k ); |
3826 | 281k | |
3827 | 281k | // Get the constant (if applicable) the zero'th operand is being ANDed with. |
3828 | 281k | // This can be a pure constant or a vector splat, in which case we treat the |
3829 | 281k | // vector as a scalar and use the splat value. |
3830 | 281k | APInt Constant = APInt::getNullValue(1); |
3831 | 281k | if (const ConstantSDNode *C281k = dyn_cast<ConstantSDNode>(N1)) { |
3832 | 264k | Constant = C->getAPIntValue(); |
3833 | 281k | } else if (BuildVectorSDNode *16.5k Vector16.5k = dyn_cast<BuildVectorSDNode>(N1)) { |
3834 | 3.39k | APInt SplatValue, SplatUndef; |
3835 | 3.39k | unsigned SplatBitSize; |
3836 | 3.39k | bool HasAnyUndefs; |
3837 | 3.39k | bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, |
3838 | 3.39k | SplatBitSize, HasAnyUndefs); |
3839 | 3.39k | if (IsSplat3.39k ) { |
3840 | 3.39k | // Undef bits can contribute to a possible optimisation if set, so |
3841 | 3.39k | // set them. |
3842 | 3.39k | SplatValue |= SplatUndef; |
3843 | 3.39k | |
3844 | 3.39k | // The splat value may be something like "0x00FFFFFF", which means 0 for |
3845 | 3.39k | // the first vector value and FF for the rest, repeating. We need a mask |
3846 | 3.39k | // that will apply equally to all members of the vector, so AND all the |
3847 | 3.39k | // lanes of the constant together. |
3848 | 3.39k | EVT VT = Vector->getValueType(0); |
3849 | 3.39k | unsigned BitWidth = VT.getScalarSizeInBits(); |
3850 | 3.39k | |
3851 | 3.39k | // If the splat value has been compressed to a bitlength lower |
3852 | 3.39k | // than the size of the vector lane, we need to re-expand it to |
3853 | 3.39k | // the lane size. |
3854 | 3.39k | if (BitWidth > SplatBitSize) |
3855 | 1 | for (SplatValue = SplatValue.zextOrTrunc(BitWidth); |
3856 | 2 | SplatBitSize < BitWidth; |
3857 | 1 | SplatBitSize = SplatBitSize * 2) |
3858 | 1 | SplatValue |= SplatValue.shl(SplatBitSize); |
3859 | 3.39k | |
3860 | 3.39k | // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a |
3861 | 3.39k | // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. |
3862 | 3.39k | if (SplatBitSize % BitWidth == 03.39k ) { |
3863 | 3.39k | Constant = APInt::getAllOnesValue(BitWidth); |
3864 | 6.84k | for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n6.84k ; ++i3.45k ) |
3865 | 3.45k | Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); |
3866 | 3.39k | } |
3867 | 3.39k | } |
3868 | 16.5k | } |
3869 | 281k | |
3870 | 281k | // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is |
3871 | 281k | // actually legal and isn't going to get expanded, else this is a false |
3872 | 281k | // optimisation. |
3873 | 281k | bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, |
3874 | 281k | Load->getValueType(0), |
3875 | 281k | Load->getMemoryVT()); |
3876 | 281k | |
3877 | 281k | // Resize the constant to the same size as the original memory access before |
3878 | 281k | // extension. If it is still the AllOnesValue then this AND is completely |
3879 | 281k | // unneeded. |
3880 | 281k | Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits()); |
3881 | 281k | |
3882 | 281k | bool B; |
3883 | 281k | switch (Load->getExtensionType()) { |
3884 | 2.23k | default: B = false; break; |
3885 | 191k | case ISD::EXTLOAD: B = CanZextLoadProfitably; break; |
3886 | 87.4k | case ISD::ZEXTLOAD: |
3887 | 87.4k | case ISD::NON_EXTLOAD: B = true; break; |
3888 | 281k | } |
3889 | 281k | |
3890 | 281k | if (281k B && 281k Constant.isAllOnesValue()274k ) { |
3891 | 190k | // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to |
3892 | 190k | // preserve semantics once we get rid of the AND. |
3893 | 190k | SDValue NewLoad(Load, 0); |
3894 | 190k | |
3895 | 190k | // Fold the AND away. NewLoad may get replaced immediately. |
3896 | 190k | CombineTo(N, (N0.getNode() == Load) ? NewLoad190k : N01 ); |
3897 | 190k | |
3898 | 190k | if (Load->getExtensionType() == ISD::EXTLOAD190k ) { |
3899 | 181k | NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, |
3900 | 181k | Load->getValueType(0), SDLoc(Load), |
3901 | 181k | Load->getChain(), Load->getBasePtr(), |
3902 | 181k | Load->getOffset(), Load->getMemoryVT(), |
3903 | 181k | Load->getMemOperand()); |
3904 | 181k | // Replace uses of the EXTLOAD with the new ZEXTLOAD. |
3905 | 181k | if (Load->getNumValues() == 3181k ) { |
3906 | 0 | // PRE/POST_INC loads have 3 values. |
3907 | 0 | SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), |
3908 | 0 | NewLoad.getValue(2) }; |
3909 | 0 | CombineTo(Load, To, 3, true); |
3910 | 181k | } else { |
3911 | 181k | CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); |
3912 | 181k | } |
3913 | 181k | } |
3914 | 190k | |
3915 | 190k | return SDValue(N, 0); // Return N so it doesn't get rechecked! |
3916 | 190k | } |
3917 | 905k | } |
3918 | 905k | |
3919 | 905k | // fold (and (load x), 255) -> (zextload x, i8) |
3920 | 905k | // fold (and (extload x, i16), 255) -> (zextload x, i8) |
3921 | 905k | // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) |
3922 | 905k | if (905k !VT.isVector() && 905k N1C834k && (N0.getOpcode() == ISD::LOAD || |
3923 | 663k | (N0.getOpcode() == ISD::ANY_EXTEND && |
3924 | 905k | N0.getOperand(0).getOpcode() == ISD::LOAD11.0k ))) { |
3925 | 72.9k | bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; |
3926 | 72.9k | LoadSDNode *LN0 = HasAnyExt |
3927 | 264 | ? cast<LoadSDNode>(N0.getOperand(0)) |
3928 | 72.6k | : cast<LoadSDNode>(N0); |
3929 | 72.9k | if (LN0->getExtensionType() != ISD::SEXTLOAD && |
3930 | 72.9k | LN0->isUnindexed()70.5k && N0.hasOneUse()70.5k && SDValue(LN0, 0).hasOneUse()42.7k ) { |
3931 | 42.6k | auto NarrowLoad = false; |
3932 | 42.6k | EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0)30 : VT42.6k ; |
3933 | 42.6k | EVT ExtVT, LoadedVT; |
3934 | 42.6k | if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT, |
3935 | 42.6k | NarrowLoad)) { |
3936 | 3.08k | if (!NarrowLoad3.08k ) { |
3937 | 160 | SDValue NewLoad = |
3938 | 160 | DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, |
3939 | 160 | LN0->getChain(), LN0->getBasePtr(), ExtVT, |
3940 | 160 | LN0->getMemOperand()); |
3941 | 160 | AddToWorklist(N); |
3942 | 160 | CombineTo(LN0, NewLoad, NewLoad.getValue(1)); |
3943 | 160 | return SDValue(N, 0); // Return N so it doesn't get rechecked! |
3944 | 0 | } else { |
3945 | 2.92k | EVT PtrType = LN0->getOperand(1).getValueType(); |
3946 | 2.92k | |
3947 | 2.92k | unsigned Alignment = LN0->getAlignment(); |
3948 | 2.92k | SDValue NewPtr = LN0->getBasePtr(); |
3949 | 2.92k | |
3950 | 2.92k | // For big endian targets, we need to add an offset to the pointer |
3951 | 2.92k | // to load the correct bytes. For little endian systems, we merely |
3952 | 2.92k | // need to read fewer bytes from the same pointer. |
3953 | 2.92k | if (DAG.getDataLayout().isBigEndian()2.92k ) { |
3954 | 4 | unsigned LVTStoreBytes = LoadedVT.getStoreSize(); |
3955 | 4 | unsigned EVTStoreBytes = ExtVT.getStoreSize(); |
3956 | 4 | unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; |
3957 | 4 | SDLoc DL(LN0); |
3958 | 4 | NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, |
3959 | 4 | NewPtr, DAG.getConstant(PtrOff, DL, PtrType)); |
3960 | 4 | Alignment = MinAlign(Alignment, PtrOff); |
3961 | 4 | } |
3962 | 2.92k | |
3963 | 2.92k | AddToWorklist(NewPtr.getNode()); |
3964 | 2.92k | |
3965 | 2.92k | SDValue Load = DAG.getExtLoad( |
3966 | 2.92k | ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr, |
3967 | 2.92k | LN0->getPointerInfo(), ExtVT, Alignment, |
3968 | 2.92k | LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); |
3969 | 2.92k | AddToWorklist(N); |
3970 | 2.92k | CombineTo(LN0, Load, Load.getValue(1)); |
3971 | 2.92k | return SDValue(N, 0); // Return N so it doesn't get rechecked! |
3972 | 2.92k | } |
3973 | 902k | } |
3974 | 42.6k | } |
3975 | 72.9k | } |
3976 | 902k | |
3977 | 902k | if (SDValue 902k Combined902k = visitANDLike(N0, N1, N)) |
3978 | 10.6k | return Combined; |
3979 | 892k | |
3980 | 892k | // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) |
3981 | 892k | if (892k N0.getOpcode() == N1.getOpcode()892k ) |
3982 | 63.1k | if (SDValue 63.1k Tmp63.1k = SimplifyBinOpWithSameOpcodeHands(N)) |
3983 | 311 | return Tmp; |
3984 | 891k | |
3985 | 891k | // Masking the negated extension of a boolean is just the zero-extended |
3986 | 891k | // boolean: |
3987 | 891k | // and (sub 0, zext(bool X)), 1 --> zext(bool X) |
3988 | 891k | // and (sub 0, sext(bool X)), 1 --> zext(bool X) |
3989 | 891k | // |
3990 | 891k | // Note: the SimplifyDemandedBits fold below can make an information-losing |
3991 | 891k | // transform, and then we have no way to find this better fold. |
3992 | 891k | if (891k N1C && 891k N1C->isOne()753k && N0.getOpcode() == ISD::SUB404k ) { |
3993 | 50 | if (isNullConstantOrNullSplatConstant(N0.getOperand(0))50 ) { |
3994 | 8 | SDValue SubRHS = N0.getOperand(1); |
3995 | 8 | if (SubRHS.getOpcode() == ISD::ZERO_EXTEND && |
3996 | 3 | SubRHS.getOperand(0).getScalarValueSizeInBits() == 1) |
3997 | 3 | return SubRHS; |
3998 | 5 | if (5 SubRHS.getOpcode() == ISD::SIGN_EXTEND && |
3999 | 3 | SubRHS.getOperand(0).getScalarValueSizeInBits() == 1) |
4000 | 3 | return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0)); |
4001 | 891k | } |
4002 | 50 | } |
4003 | 891k | |
4004 | 891k | // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) |
4005 | 891k | // fold (and (sra)) -> (and (srl)) when possible. |
4006 | 891k | if (891k SimplifyDemandedBits(SDValue(N, 0))891k ) |
4007 | 388k | return SDValue(N, 0); |
4008 | 503k | |
4009 | 503k | // fold (zext_inreg (extload x)) -> (zextload x) |
4010 | 503k | if (503k ISD::isEXTLoad(N0.getNode()) && 503k ISD::isUNINDEXEDLoad(N0.getNode())9.17k ) { |
4011 | 9.17k | LoadSDNode *LN0 = cast<LoadSDNode>(N0); |
4012 | 9.17k | EVT MemVT = LN0->getMemoryVT(); |
4013 | 9.17k | // If we zero all the possible extended bits, then we can turn this into |
4014 | 9.17k | // a zextload if we are running before legalize or the operation is legal. |
4015 | 9.17k | unsigned BitWidth = N1.getScalarValueSizeInBits(); |
4016 | 9.17k | if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, |
4017 | 9.17k | BitWidth - MemVT.getScalarSizeInBits())) && |
4018 | 8.72k | ((!LegalOperations && 8.72k !LN0->isVolatile()6.67k ) || |
4019 | 9.17k | TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)2.05k )) { |
4020 | 6.86k | SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, |
4021 | 6.86k | LN0->getChain(), LN0->getBasePtr(), |
4022 | 6.86k | MemVT, LN0->getMemOperand()); |
4023 | 6.86k | AddToWorklist(N); |
4024 | 6.86k | CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); |
4025 | 6.86k | return SDValue(N, 0); // Return N so it doesn't get rechecked! |
4026 | 6.86k | } |
4027 | 496k | } |
4028 | 496k | // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use |
4029 | 496k | if (496k ISD::isSEXTLoad(N0.getNode()) && 496k ISD::isUNINDEXEDLoad(N0.getNode())2.23k && |
4030 | 496k | N0.hasOneUse()2.23k ) { |
4031 | 366 | LoadSDNode *LN0 = cast<LoadSDNode>(N0); |
4032 | 366 | EVT MemVT = LN0->getMemoryVT(); |
4033 | 366 | // If we zero all the possible extended bits, then we can turn this into |
4034 | 366 | // a zextload if we are running before legalize or the operation is legal. |
4035 | 366 | unsigned BitWidth = N1.getScalarValueSizeInBits(); |
4036 | 366 | if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, |
4037 | 366 | BitWidth - MemVT.getScalarSizeInBits())) && |
4038 | 234 | ((!LegalOperations && 234 !LN0->isVolatile()8 ) || |
4039 | 366 | TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)226 )) { |
4040 | 218 | SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, |
4041 | 218 | LN0->getChain(), LN0->getBasePtr(), |
4042 | 218 | MemVT, LN0->getMemOperand()); |
4043 | 218 | AddToWorklist(N); |
4044 | 218 | CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); |
4045 | 218 | return SDValue(N, 0); // Return N so it doesn't get rechecked! |
4046 | 218 | } |
4047 | 495k | } |
4048 | 495k | // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) |
4049 | 495k | if (495k N1C && 495k N1C->getAPIntValue() == 0xffff360k && N0.getOpcode() == ISD::OR34.8k ) { |
4050 | 509 | if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), |
4051 | 509 | N0.getOperand(1), false)) |
4052 | 3 | return BSwap; |
4053 | 495k | } |
4054 | 495k | |
4055 | 495k | return SDValue(); |
4056 | 495k | } |
4057 | | |
4058 | | /// Match (a >> 8) | (a << 8) as (bswap a) >> 16. |
4059 | | SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, |
4060 | 260k | bool DemandHighBits) { |
4061 | 260k | if (!LegalOperations) |
4062 | 140k | return SDValue(); |
4063 | 120k | |
4064 | 120k | EVT VT = N->getValueType(0); |
4065 | 120k | if (VT != MVT::i64 && 120k VT != MVT::i3288.8k && VT != MVT::i1618.2k ) |
4066 | 17.6k | return SDValue(); |
4067 | 102k | if (102k !TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)102k ) |
4068 | 12.8k | return SDValue(); |
4069 | 89.7k | |
4070 | 89.7k | // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff) |
4071 | 89.7k | bool LookPassAnd0 = false; |
4072 | 89.7k | bool LookPassAnd1 = false; |
4073 | 89.7k | if (N0.getOpcode() == ISD::AND && 89.7k N0.getOperand(0).getOpcode() == ISD::SRL7.70k ) |
4074 | 1.11k | std::swap(N0, N1); |
4075 | 89.7k | if (N1.getOpcode() == ISD::AND && 89.7k N1.getOperand(0).getOpcode() == ISD::SHL6.68k ) |
4076 | 500 | std::swap(N0, N1); |
4077 | 89.7k | if (N0.getOpcode() == ISD::AND89.7k ) { |
4078 | 7.16k | if (!N0.getNode()->hasOneUse()) |
4079 | 993 | return SDValue(); |
4080 | 6.16k | ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); |
4081 | 6.16k | if (!N01C || 6.16k N01C->getZExtValue() != 0xFF005.75k ) |
4082 | 6.08k | return SDValue(); |
4083 | 85 | N0 = N0.getOperand(0); |
4084 | 85 | LookPassAnd0 = true; |
4085 | 85 | } |
4086 | 89.7k | |
4087 | 82.6k | if (82.6k N1.getOpcode() == ISD::AND82.6k ) { |
4088 | 3.80k | if (!N1.getNode()->hasOneUse()) |
4089 | 131 | return SDValue(); |
4090 | 3.67k | ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); |
4091 | 3.67k | if (!N11C || 3.67k N11C->getZExtValue() != 0xFF3.41k ) |
4092 | 3.52k | return SDValue(); |
4093 | 144 | N1 = N1.getOperand(0); |
4094 | 144 | LookPassAnd1 = true; |
4095 | 144 | } |
4096 | 82.6k | |
4097 | 78.9k | if (78.9k N0.getOpcode() == ISD::SRL && 78.9k N1.getOpcode() == ISD::SHL2.42k ) |
4098 | 1.60k | std::swap(N0, N1); |
4099 | 78.9k | if (N0.getOpcode() != ISD::SHL || 78.9k N1.getOpcode() != ISD::SRL15.1k ) |
4100 | 76.3k | return SDValue(); |
4101 | 2.66k | if (2.66k !N0.getNode()->hasOneUse() || 2.66k !N1.getNode()->hasOneUse()2.66k ) |
4102 | 119 | return SDValue(); |
4103 | 2.54k | |
4104 | 2.54k | ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); |
4105 | 2.54k | ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); |
4106 | 2.54k | if (!N01C || 2.54k !N11C1.50k ) |
4107 | 1.07k | return SDValue(); |
4108 | 1.46k | if (1.46k N01C->getZExtValue() != 8 || 1.46k N11C->getZExtValue() != 842 ) |
4109 | 1.43k | return SDValue(); |
4110 | 34 | |
4111 | 34 | // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) |
4112 | 34 | SDValue N00 = N0->getOperand(0); |
4113 | 34 | if (!LookPassAnd0 && 34 N00.getOpcode() == ISD::AND21 ) { |
4114 | 6 | if (!N00.getNode()->hasOneUse()) |
4115 | 0 | return SDValue(); |
4116 | 6 | ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); |
4117 | 6 | if (!N001C || 6 N001C->getZExtValue() != 0xFF6 ) |
4118 | 2 | return SDValue(); |
4119 | 4 | N00 = N00.getOperand(0); |
4120 | 4 | LookPassAnd0 = true; |
4121 | 4 | } |
4122 | 34 | |
4123 | 32 | SDValue N10 = N1->getOperand(0); |
4124 | 32 | if (!LookPassAnd1 && 32 N10.getOpcode() == ISD::AND24 ) { |
4125 | 5 | if (!N10.getNode()->hasOneUse()) |
4126 | 0 | return SDValue(); |
4127 | 5 | ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); |
4128 | 5 | if (!N101C || 5 N101C->getZExtValue() != 0xFF005 ) |
4129 | 0 | return SDValue(); |
4130 | 5 | N10 = N10.getOperand(0); |
4131 | 5 | LookPassAnd1 = true; |
4132 | 5 | } |
4133 | 32 | |
4134 | 32 | if (32 N00 != N1032 ) |
4135 | 0 | return SDValue(); |
4136 | 32 | |
4137 | 32 | // Make sure everything beyond the low halfword gets set to zero since the SRL |
4138 | 32 | // 16 will clear the top bits. |
4139 | 32 | unsigned OpSizeInBits = VT.getSizeInBits(); |
4140 | 32 | if (DemandHighBits && 32 OpSizeInBits > 1621 ) { |
4141 | 21 | // If the left-shift isn't masked out then the only way this is a bswap is |
4142 | 21 | // if all bits beyond the low 8 are 0. In that case the entire pattern |
4143 | 21 | // reduces to a left shift anyway: leave it for other parts of the combiner. |
4144 | 21 | if (!LookPassAnd0) |
4145 | 4 | return SDValue(); |
4146 | 17 | |
4147 | 17 | // However, if the right shift isn't masked out then it might be because |
4148 | 17 | // it's not needed. See if we can spot that too. |
4149 | 17 | if (17 !LookPassAnd1 && |
4150 | 10 | !DAG.MaskedValueIsZero( |
4151 | 10 | N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) |
4152 | 0 | return SDValue(); |
4153 | 28 | } |
4154 | 28 | |
4155 | 28 | SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); |
4156 | 28 | if (OpSizeInBits > 1628 ) { |
4157 | 28 | SDLoc DL(N); |
4158 | 28 | Res = DAG.getNode(ISD::SRL, DL, VT, Res, |
4159 | 28 | DAG.getConstant(OpSizeInBits - 16, DL, |
4160 | 28 | getShiftAmountTy(VT))); |
4161 | 28 | } |
4162 | 260k | return Res; |
4163 | 260k | } |
4164 | | |
4165 | | /// Return true if the specified node is an element that makes up a 32-bit |
4166 | | /// packed halfword byteswap. |
4167 | | /// ((x & 0x000000ff) << 8) | |
4168 | | /// ((x & 0x0000ff00) >> 8) | |
4169 | | /// ((x & 0x00ff0000) << 8) | |
4170 | | /// ((x & 0xff000000) >> 8) |
4171 | 5.52k | static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { |
4172 | 5.52k | if (!N.getNode()->hasOneUse()) |
4173 | 575 | return false; |
4174 | 4.95k | |
4175 | 4.95k | unsigned Opc = N.getOpcode(); |
4176 | 4.95k | if (Opc != ISD::AND && 4.95k Opc != ISD::SHL4.46k && Opc != ISD::SRL4.12k ) |
4177 | 3.88k | return false; |
4178 | 1.07k | |
4179 | 1.07k | SDValue N0 = N.getOperand(0); |
4180 | 1.07k | unsigned Opc0 = N0.getOpcode(); |
4181 | 1.07k | if (Opc0 != ISD::AND && 1.07k Opc0 != ISD::SHL1.05k && Opc0 != ISD::SRL952 ) |
4182 | 733 | return false; |
4183 | 338 | |
4184 | 338 | ConstantSDNode *N1C = nullptr; |
4185 | 338 | // SHL or SRL: look upstream for AND mask operand |
4186 | 338 | if (Opc == ISD::AND) |
4187 | 319 | N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); |
4188 | 19 | else if (19 Opc0 == ISD::AND19 ) |
4189 | 19 | N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); |
4190 | 338 | if (!N1C) |
4191 | 75 | return false; |
4192 | 263 | |
4193 | 263 | unsigned MaskByteOffset; |
4194 | 263 | switch (N1C->getZExtValue()) { |
4195 | 207 | default: |
4196 | 207 | return false; |
4197 | 10 | case 0xFF: MaskByteOffset = 0; break; |
4198 | 18 | case 0xFF00: MaskByteOffset = 1; break; |
4199 | 20 | case 0xFF0000: MaskByteOffset = 2; break; |
4200 | 8 | case 0xFF000000: MaskByteOffset = 3; break; |
4201 | 56 | } |
4202 | 56 | |
4203 | 56 | // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). |
4204 | 56 | if (56 Opc == ISD::AND56 ) { |
4205 | 42 | if (MaskByteOffset == 0 || 42 MaskByteOffset == 236 ) { |
4206 | 22 | // (x >> 8) & 0xff |
4207 | 22 | // (x >> 8) & 0xff0000 |
4208 | 22 | if (Opc0 != ISD::SRL) |
4209 | 10 | return false; |
4210 | 12 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); |
4211 | 12 | if (!C || 12 C->getZExtValue() != 812 ) |
4212 | 0 | return false; |
4213 | 20 | } else { |
4214 | 20 | // (x << 8) & 0xff00 |
4215 | 20 | // (x << 8) & 0xff000000 |
4216 | 20 | if (Opc0 != ISD::SHL) |
4217 | 8 | return false; |
4218 | 12 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); |
4219 | 12 | if (!C || 12 C->getZExtValue() != 812 ) |
4220 | 0 | return false; |
4221 | 56 | } |
4222 | 14 | } else if (14 Opc == ISD::SHL14 ) { |
4223 | 8 | // (x & 0xff) << 8 |
4224 | 8 | // (x & 0xff0000) << 8 |
4225 | 8 | if (MaskByteOffset != 0 && 8 MaskByteOffset != 24 ) |
4226 | 0 | return false; |
4227 | 8 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); |
4228 | 8 | if (!C || 8 C->getZExtValue() != 88 ) |
4229 | 0 | return false; |
4230 | 6 | } else { // Opc == ISD::SRL |
4231 | 6 | // (x & 0xff00) >> 8 |
4232 | 6 | // (x & 0xff000000) >> 8 |
4233 | 6 | if (MaskByteOffset != 1 && 6 MaskByteOffset != 32 ) |
4234 | 0 | return false; |
4235 | 6 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); |
4236 | 6 | if (!C || 6 C->getZExtValue() != 86 ) |
4237 | 0 | return false; |
4238 | 38 | } |
4239 | 38 | |
4240 | 38 | if (38 Parts[MaskByteOffset]38 ) |
4241 | 0 | return false; |
4242 | 38 | |
4243 | 38 | Parts[MaskByteOffset] = N0.getOperand(0).getNode(); |
4244 | 38 | return true; |
4245 | 38 | } |
4246 | | |
4247 | | /// Match a 32-bit packed halfword bswap. That is |
4248 | | /// ((x & 0x000000ff) << 8) | |
4249 | | /// ((x & 0x0000ff00) >> 8) | |
4250 | | /// ((x & 0x00ff0000) << 8) | |
4251 | | /// ((x & 0xff000000) >> 8) |
4252 | | /// => (rotl (bswap x), 16) |
4253 | 254k | SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { |
4254 | 254k | if (!LegalOperations) |
4255 | 137k | return SDValue(); |
4256 | 117k | |
4257 | 117k | EVT VT = N->getValueType(0); |
4258 | 117k | if (VT != MVT::i32) |
4259 | 49.6k | return SDValue(); |
4260 | 67.5k | if (67.5k !TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)67.5k ) |
4261 | 9.55k | return SDValue(); |
4262 | 57.9k | |
4263 | 57.9k | // Look for either |
4264 | 57.9k | // (or (or (and), (and)), (or (and), (and))) |
4265 | 57.9k | // (or (or (or (and), (and)), (and)), (and)) |
4266 | 57.9k | if (57.9k N0.getOpcode() != ISD::OR57.9k ) |
4267 | 52.4k | return SDValue(); |
4268 | 5.49k | SDValue N00 = N0.getOperand(0); |
4269 | 5.49k | SDValue N01 = N0.getOperand(1); |
4270 | 5.49k | SDNode *Parts[4] = {}; |
4271 | 5.49k | |
4272 | 5.49k | if (N1.getOpcode() == ISD::OR && |
4273 | 5.49k | N00.getNumOperands() == 289 && N01.getNumOperands() == 288 ) { |
4274 | 86 | // (or (or (and), (and)), (or (and), (and))) |
4275 | 86 | if (!isBSwapHWordElement(N00, Parts)) |
4276 | 80 | return SDValue(); |
4277 | 6 | |
4278 | 6 | if (6 !isBSwapHWordElement(N01, Parts)6 ) |
4279 | 0 | return SDValue(); |
4280 | 6 | SDValue N10 = N1.getOperand(0); |
4281 | 6 | if (!isBSwapHWordElement(N10, Parts)) |
4282 | 0 | return SDValue(); |
4283 | 6 | SDValue N11 = N1.getOperand(1); |
4284 | 6 | if (!isBSwapHWordElement(N11, Parts)) |
4285 | 2 | return SDValue(); |
4286 | 5.41k | } else { |
4287 | 5.41k | // (or (or (or (and), (and)), (and)), (and)) |
4288 | 5.41k | if (!isBSwapHWordElement(N1, Parts)) |
4289 | 5.40k | return SDValue(); |
4290 | 4 | if (4 !isBSwapHWordElement(N01, Parts)4 ) |
4291 | 0 | return SDValue(); |
4292 | 4 | if (4 N00.getOpcode() != ISD::OR4 ) |
4293 | 0 | return SDValue(); |
4294 | 4 | SDValue N000 = N00.getOperand(0); |
4295 | 4 | if (!isBSwapHWordElement(N000, Parts)) |
4296 | 0 | return SDValue(); |
4297 | 4 | SDValue N001 = N00.getOperand(1); |
4298 | 4 | if (!isBSwapHWordElement(N001, Parts)) |
4299 | 0 | return SDValue(); |
4300 | 8 | } |
4301 | 8 | |
4302 | 8 | // Make sure the parts are all coming from the same node. |
4303 | 8 | if (8 Parts[0] != Parts[1] || 8 Parts[0] != Parts[2]8 || Parts[0] != Parts[3]8 ) |
4304 | 0 | return SDValue(); |
4305 | 8 | |
4306 | 8 | SDLoc DL(N); |
4307 | 8 | SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, |
4308 | 8 | SDValue(Parts[0], 0)); |
4309 | 8 | |
4310 | 8 | // Result of the bswap should be rotated by 16. If it's not legal, then |
4311 | 8 | // do (x << 16) | (x >> 16). |
4312 | 8 | SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT)); |
4313 | 8 | if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) |
4314 | 4 | return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt); |
4315 | 4 | if (4 TLI.isOperationLegalOrCustom(ISD::ROTR, VT)4 ) |
4316 | 4 | return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt); |
4317 | 0 | return DAG.getNode(ISD::OR, DL, VT, |
4318 | 0 | DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt), |
4319 | 0 | DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt)); |
4320 | 0 | } |
4321 | | |
4322 | | /// This contains all DAGCombine rules which reduce two values combined by |
4323 | | /// an Or operation to a single value \see visitANDLike(). |
4324 | 254k | SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) { |
4325 | 254k | EVT VT = N1.getValueType(); |
4326 | 254k | SDLoc DL(N); |
4327 | 254k | |
4328 | 254k | // fold (or x, undef) -> -1 |
4329 | 254k | if (!LegalOperations && 254k (N0.isUndef() || 137k N1.isUndef()137k )) |
4330 | 11 | return DAG.getAllOnesConstant(DL, VT); |
4331 | 254k | |
4332 | 254k | if (SDValue 254k V254k = foldLogicOfSetCCs(false, N0, N1, DL)) |
4333 | 159 | return V; |
4334 | 254k | |
4335 | 254k | // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. |
4336 | 254k | if (254k N0.getOpcode() == ISD::AND && 254k N1.getOpcode() == ISD::AND36.3k && |
4337 | 254k | // Don't increase # computations. |
4338 | 254k | (N0.getNode()->hasOneUse() || 17.4k N1.getNode()->hasOneUse()81 )) { |
4339 | 17.4k | // We can only do this xform if we know that bits from X that are set in C2 |
4340 | 17.4k | // but not in C1 are already zero. Likewise for Y. |
4341 | 17.4k | if (const ConstantSDNode *N0O1C = |
4342 | 8.63k | getAsNonOpaqueConstant(N0.getOperand(1))) { |
4343 | 8.63k | if (const ConstantSDNode *N1O1C = |
4344 | 8.49k | getAsNonOpaqueConstant(N1.getOperand(1))) { |
4345 | 8.49k | // We can only do this xform if we know that bits from X that are set in |
4346 | 8.49k | // C2 but not in C1 are already zero. Likewise for Y. |
4347 | 8.49k | const APInt &LHSMask = N0O1C->getAPIntValue(); |
4348 | 8.49k | const APInt &RHSMask = N1O1C->getAPIntValue(); |
4349 | 8.49k | |
4350 | 8.49k | if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && |
4351 | 8.49k | DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)389 ) { |
4352 | 41 | SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, |
4353 | 41 | N0.getOperand(0), N1.getOperand(0)); |
4354 | 41 | return DAG.getNode(ISD::AND, DL, VT, X, |
4355 | 41 | DAG.getConstant(LHSMask | RHSMask, DL, VT)); |
4356 | 41 | } |
4357 | 254k | } |
4358 | 8.63k | } |
4359 | 17.4k | } |
4360 | 254k | |
4361 | 254k | // (or (and X, M), (and X, N)) -> (and X, (or M, N)) |
4362 | 254k | if (254k N0.getOpcode() == ISD::AND && |
4363 | 36.2k | N1.getOpcode() == ISD::AND && |
4364 | 17.4k | N0.getOperand(0) == N1.getOperand(0) && |
4365 | 254k | // Don't increase # computations. |
4366 | 254k | (N0.getNode()->hasOneUse() || 88 N1.getNode()->hasOneUse()4 )) { |
4367 | 86 | SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, |
4368 | 86 | N0.getOperand(1), N1.getOperand(1)); |
4369 | 86 | return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X); |
4370 | 86 | } |
4371 | 254k | |
4372 | 254k | return SDValue(); |
4373 | 254k | } |
4374 | | |
4375 | 255k | SDValue DAGCombiner::visitOR(SDNode *N) { |
4376 | 255k | SDValue N0 = N->getOperand(0); |
4377 | 255k | SDValue N1 = N->getOperand(1); |
4378 | 255k | EVT VT = N1.getValueType(); |
4379 | 255k | |
4380 | 255k | // x | x --> x |
4381 | 255k | if (N0 == N1) |
4382 | 9 | return N0; |
4383 | 255k | |
4384 | 255k | // fold vector ops |
4385 | 255k | if (255k VT.isVector()255k ) { |
4386 | 22.1k | if (SDValue FoldedVOp = SimplifyVBinOp(N)) |
4387 | 7 | return FoldedVOp; |
4388 | 22.1k | |
4389 | 22.1k | // fold (or x, 0) -> x, vector edition |
4390 | 22.1k | if (22.1k ISD::isBuildVectorAllZeros(N0.getNode())22.1k ) |
4391 | 58 | return N1; |
4392 | 22.1k | if (22.1k ISD::isBuildVectorAllZeros(N1.getNode())22.1k ) |
4393 | 66 | return N0; |
4394 | 22.0k | |
4395 | 22.0k | // fold (or x, -1) -> -1, vector edition |
4396 | 22.0k | if (22.0k ISD::isBuildVectorAllOnes(N0.getNode())22.0k ) |
4397 | 22.0k | // do not return N0, because undef node may exist in N0 |
4398 | 1 | return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType()); |
4399 | 22.0k | if (22.0k ISD::isBuildVectorAllOnes(N1.getNode())22.0k ) |
4400 | 22.0k | // do not return N1, because undef node may exist in N1 |
4401 | 0 | return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType()); |
4402 | 22.0k | |
4403 | 22.0k | // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask) |
4404 | 22.0k | // Do this only if the resulting shuffle is legal. |
4405 | 22.0k | if (22.0k isa<ShuffleVectorSDNode>(N0) && |
4406 | 203 | isa<ShuffleVectorSDNode>(N1) && |
4407 | 22.0k | // Avoid folding a node with illegal type. |
4408 | 22.0k | TLI.isTypeLegal(VT)109 ) { |
4409 | 108 | bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode()); |
4410 | 108 | bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode()); |
4411 | 108 | bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); |
4412 | 108 | bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode()); |
4413 | 108 | // Ensure both shuffles have a zero input. |
4414 | 108 | if ((ZeroN00 != ZeroN01) && 108 (ZeroN10 != ZeroN11)61 ) { |
4415 | 61 | assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!"); |
4416 | 61 | assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!"); |
4417 | 61 | const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0); |
4418 | 61 | const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1); |
4419 | 61 | bool CanFold = true; |
4420 | 61 | int NumElts = VT.getVectorNumElements(); |
4421 | 61 | SmallVector<int, 4> Mask(NumElts); |
4422 | 61 | |
4423 | 275 | for (int i = 0; i != NumElts275 ; ++i214 ) { |
4424 | 219 | int M0 = SV0->getMaskElt(i); |
4425 | 219 | int M1 = SV1->getMaskElt(i); |
4426 | 219 | |
4427 | 219 | // Determine if either index is pointing to a zero vector. |
4428 | 218 | bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts)); |
4429 | 218 | bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts)); |
4430 | 219 | |
4431 | 219 | // If one element is zero and the otherside is undef, keep undef. |
4432 | 219 | // This also handles the case that both are undef. |
4433 | 219 | if ((M0Zero && 219 M1 < 089 ) || (M1Zero && 218 M0 < 0129 )) { |
4434 | 1 | Mask[i] = -1; |
4435 | 1 | continue; |
4436 | 1 | } |
4437 | 218 | |
4438 | 218 | // Make sure only one of the elements is zero. |
4439 | 218 | if (218 M0Zero == M1Zero218 ) { |
4440 | 5 | CanFold = false; |
4441 | 5 | break; |
4442 | 5 | } |
4443 | 213 | |
4444 | 218 | assert((M0 >= 0 || M1 >= 0) && "Undef index!"); |
4445 | 213 | |
4446 | 213 | // We have a zero and non-zero element. If the non-zero came from |
4447 | 213 | // SV0 make the index a LHS index. If it came from SV1, make it |
4448 | 213 | // a RHS index. We need to mod by NumElts because we don't care |
4449 | 213 | // which operand it came from in the original shuffles. |
4450 | 213 | Mask[i] = M1Zero ? M0 % NumElts127 : (M1 % NumElts) + NumElts86 ; |
4451 | 219 | } |
4452 | 61 | |
4453 | 61 | if (CanFold61 ) { |
4454 | 56 | SDValue NewLHS = ZeroN00 ? N0.getOperand(1)2 : N0.getOperand(0)54 ; |
4455 | 56 | SDValue NewRHS = ZeroN10 ? N1.getOperand(1)2 : N1.getOperand(0)54 ; |
4456 | 56 | |
4457 | 56 | bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT); |
4458 | 56 | if (!LegalMask56 ) { |
4459 | 0 | std::swap(NewLHS, NewRHS); |
4460 | 0 | ShuffleVectorSDNode::commuteMask(Mask); |
4461 | 0 | LegalMask = TLI.isShuffleMaskLegal(Mask, VT); |
4462 | 0 | } |
4463 | 56 | |
4464 | 56 | if (LegalMask) |
4465 | 56 | return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask); |
4466 | 255k | } |
4467 | 61 | } |
4468 | 108 | } |
4469 | 22.1k | } |
4470 | 255k | |
4471 | 255k | // fold (or c1, c2) -> c1|c2 |
4472 | 255k | ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); |
4473 | 255k | ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); |
4474 | 255k | if (N0C && 255k N1C895 && !N1C->isOpaque()124 ) |
4475 | 124 | return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C); |
4476 | 255k | // canonicalize constant to RHS |
4477 | 255k | if (255k DAG.isConstantIntBuildVectorOrConstantInt(N0) && |
4478 | 779 | !DAG.isConstantIntBuildVectorOrConstantInt(N1)) |
4479 | 775 | return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); |
4480 | 254k | // fold (or x, 0) -> x |
4481 | 254k | if (254k isNullConstant(N1)254k ) |
4482 | 205 | return N0; |
4483 | 254k | // fold (or x, -1) -> -1 |
4484 | 254k | if (254k isAllOnesConstant(N1)254k ) |
4485 | 80 | return N1; |
4486 | 254k | |
4487 | 254k | if (SDValue 254k NewSel254k = foldBinOpIntoSelect(N)) |
4488 | 41 | return NewSel; |
4489 | 254k | |
4490 | 254k | // fold (or x, c) -> c iff (x & ~c) == 0 |
4491 | 254k | if (254k N1C && 254k DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())88.4k ) |
4492 | 2 | return N1; |
4493 | 254k | |
4494 | 254k | if (SDValue 254k Combined254k = visitORLike(N0, N1, N)) |
4495 | 291 | return Combined; |
4496 | 254k | |
4497 | 254k | // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) |
4498 | 254k | if (SDValue 254k BSwap254k = MatchBSwapHWord(N, N0, N1)) |
4499 | 8 | return BSwap; |
4500 | 254k | if (SDValue 254k BSwap254k = MatchBSwapHWordLow(N, N0, N1)) |
4501 | 17 | return BSwap; |
4502 | 254k | |
4503 | 254k | // reassociate or |
4504 | 254k | if (SDValue 254k ROR254k = ReassociateOps(ISD::OR, SDLoc(N), N0, N1)) |
4505 | 60 | return ROR; |
4506 | 254k | |
4507 | 254k | // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) |
4508 | 254k | // iff (c1 & c2) != 0. |
4509 | 254k | if (254k N1C && 254k N0.getOpcode() == ISD::AND88.3k && N0.getNode()->hasOneUse()5.52k ) { |
4510 | 4.09k | if (ConstantSDNode *C14.09k = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { |
4511 | 4.05k | if (C1->getAPIntValue().intersects(N1C->getAPIntValue())4.05k ) { |
4512 | 22 | if (SDValue COR = |
4513 | 22 | DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, N1C, C1)) |
4514 | 22 | return DAG.getNode( |
4515 | 22 | ISD::AND, SDLoc(N), VT, |
4516 | 22 | DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); |
4517 | 0 | return SDValue(); |
4518 | 0 | } |
4519 | 4.05k | } |
4520 | 4.09k | } |
4521 | 254k | |
4522 | 254k | // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) |
4523 | 254k | if (254k N0.getOpcode() == N1.getOpcode()254k ) |
4524 | 75.6k | if (SDValue 75.6k Tmp75.6k = SimplifyBinOpWithSameOpcodeHands(N)) |
4525 | 903 | return Tmp; |
4526 | 253k | |
4527 | 253k | // See if this is some rotate idiom. |
4528 | 253k | if (SDNode *253k Rot253k = MatchRotate(N0, N1, SDLoc(N))) |
4529 | 1.96k | return SDValue(Rot, 0); |
4530 | 251k | |
4531 | 251k | if (SDValue 251k Load251k = MatchLoadCombine(N)) |
4532 | 756 | return Load; |
4533 | 250k | |
4534 | 250k | // Simplify the operands using demanded-bits information. |
4535 | 250k | if (250k SimplifyDemandedBits(SDValue(N, 0))250k ) |
4536 | 4.48k | return SDValue(N, 0); |
4537 | 246k | |
4538 | 246k | return SDValue(); |
4539 | 246k | } |
4540 | | |
4541 | | /// Match "(X shl/srl V1) & V2" where V2 may not be present. |
4542 | 258k | bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { |
4543 | 258k | if (Op.getOpcode() == ISD::AND258k ) { |
4544 | 26.8k | if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))26.8k ) { |
4545 | 24.3k | Mask = Op.getOperand(1); |
4546 | 24.3k | Op = Op.getOperand(0); |
4547 | 26.8k | } else { |
4548 | 2.44k | return false; |
4549 | 2.44k | } |
4550 | 256k | } |
4551 | 256k | |
4552 | 256k | if (256k Op.getOpcode() == ISD::SRL || 256k Op.getOpcode() == ISD::SHL240k ) { |
4553 | 54.0k | Shift = Op; |
4554 | 54.0k | return true; |
4555 | 54.0k | } |
4556 | 201k | |
4557 | 201k | return false; |
4558 | 201k | } |
4559 | | |
4560 | | // Return true if we can prove that, whenever Neg and Pos are both in the |
4561 | | // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that |
4562 | | // for two opposing shifts shift1 and shift2 and a value X with OpBits bits: |
4563 | | // |
4564 | | // (or (shift1 X, Neg), (shift2 X, Pos)) |
4565 | | // |
4566 | | // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate |
4567 | | // in direction shift1 by Neg. The range [0, EltSize) means that we only need |
4568 | | // to consider shift amounts with defined behavior. |
4569 | 708 | static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) { |
4570 | 708 | // If EltSize is a power of 2 then: |
4571 | 708 | // |
4572 | 708 | // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1) |
4573 | 708 | // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize). |
4574 | 708 | // |
4575 | 708 | // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check |
4576 | 708 | // for the stronger condition: |
4577 | 708 | // |
4578 | 708 | // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A] |
4579 | 708 | // |
4580 | 708 | // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1) |
4581 | 708 | // we can just replace Neg with Neg' for the rest of the function. |
4582 | 708 | // |
4583 | 708 | // In other cases we check for the even stronger condition: |
4584 | 708 | // |
4585 | 708 | // Neg == EltSize - Pos [B] |
4586 | 708 | // |
4587 | 708 | // for all Neg and Pos. Note that the (or ...) then invokes undefined |
4588 | 708 | // behavior if Pos == 0 (and consequently Neg == EltSize). |
4589 | 708 | // |
4590 | 708 | // We could actually use [A] whenever EltSize is a power of 2, but the |
4591 | 708 | // only extra cases that it would match are those uninteresting ones |
4592 | 708 | // where Neg and Pos are never in range at the same time. E.g. for |
4593 | 708 | // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) |
4594 | 708 | // as well as (sub 32, Pos), but: |
4595 | 708 | // |
4596 | 708 | // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) |
4597 | 708 | // |
4598 | 708 | // always invokes undefined behavior for 32-bit X. |
4599 | 708 | // |
4600 | 708 | // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise. |
4601 | 708 | unsigned MaskLoBits = 0; |
4602 | 708 | if (Neg.getOpcode() == ISD::AND && 708 isPowerOf2_64(EltSize)42 ) { |
4603 | 42 | if (ConstantSDNode *NegC42 = isConstOrConstSplat(Neg.getOperand(1))) { |
4604 | 42 | if (NegC->getAPIntValue() == EltSize - 142 ) { |
4605 | 40 | Neg = Neg.getOperand(0); |
4606 | 40 | MaskLoBits = Log2_64(EltSize); |
4607 | 40 | } |
4608 | 42 | } |
4609 | 42 | } |
4610 | 708 | |
4611 | 708 | // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. |
4612 | 708 | if (Neg.getOpcode() != ISD::SUB) |
4613 | 414 | return false; |
4614 | 294 | ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0)); |
4615 | 294 | if (!NegC) |
4616 | 0 | return false; |
4617 | 294 | SDValue NegOp1 = Neg.getOperand(1); |
4618 | 294 | |
4619 | 294 | // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with |
4620 | 294 | // Pos'. The truncation is redundant for the purpose of the equality. |
4621 | 294 | if (MaskLoBits && 294 Pos.getOpcode() == ISD::AND22 ) |
4622 | 16 | if (ConstantSDNode *16 PosC16 = isConstOrConstSplat(Pos.getOperand(1))) |
4623 | 16 | if (16 PosC->getAPIntValue() == EltSize - 116 ) |
4624 | 16 | Pos = Pos.getOperand(0); |
4625 | 294 | |
4626 | 294 | // The condition we need is now: |
4627 | 294 | // |
4628 | 294 | // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask |
4629 | 294 | // |
4630 | 294 | // If NegOp1 == Pos then we need: |
4631 | 294 | // |
4632 | 294 | // EltSize & Mask == NegC & Mask |
4633 | 294 | // |
4634 | 294 | // (because "x & Mask" is a truncation and distributes through subtraction). |
4635 | 294 | APInt Width; |
4636 | 294 | if (Pos == NegOp1) |
4637 | 247 | Width = NegC->getAPIntValue(); |
4638 | 294 | |
4639 | 294 | // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. |
4640 | 294 | // Then the condition we want to prove becomes: |
4641 | 294 | // |
4642 | 294 | // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask |
4643 | 294 | // |
4644 | 294 | // which, again because "x & Mask" is a truncation, becomes: |
4645 | 294 | // |
4646 | 294 | // NegC & Mask == (EltSize - PosC) & Mask |
4647 | 294 | // EltSize & Mask == (NegC + PosC) & Mask |
4648 | 47 | else if (47 Pos.getOpcode() == ISD::ADD && 47 Pos.getOperand(0) == NegOp111 ) { |
4649 | 11 | if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) |
4650 | 11 | Width = PosC->getAPIntValue() + NegC->getAPIntValue(); |
4651 | 11 | else |
4652 | 0 | return false; |
4653 | 47 | } else |
4654 | 36 | return false; |
4655 | 258 | |
4656 | 258 | // Now we just need to check that EltSize & Mask == Width & Mask. |
4657 | 258 | if (258 MaskLoBits258 ) |
4658 | 258 | // EltSize & Mask is 0 since Mask is EltSize - 1. |
4659 | 22 | return Width.getLoBits(MaskLoBits) == 0; |
4660 | 236 | return Width == EltSize; |
4661 | 236 | } |
4662 | | |
4663 | | // A subroutine of MatchRotate used once we have found an OR of two opposite |
4664 | | // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces |
4665 | | // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the |
4666 | | // former being preferred if supported. InnerPos and InnerNeg are Pos and |
4667 | | // Neg with outer conversions stripped away. |
4668 | | SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, |
4669 | | SDValue Neg, SDValue InnerPos, |
4670 | | SDValue InnerNeg, unsigned PosOpcode, |
4671 | 708 | unsigned NegOpcode, const SDLoc &DL) { |
4672 | 708 | // fold (or (shl x, (*ext y)), |
4673 | 708 | // (srl x, (*ext (sub 32, y)))) -> |
4674 | 708 | // (rotl x, y) or (rotr x, (sub 32, y)) |
4675 | 708 | // |
4676 | 708 | // fold (or (shl x, (*ext (sub 32, y))), |
4677 | 708 | // (srl x, (*ext y))) -> |
4678 | 708 | // (rotr x, y) or (rotl x, (sub 32, y)) |
4679 | 708 | EVT VT = Shifted.getValueType(); |
4680 | 708 | if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())708 ) { |
4681 | 238 | bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); |
4682 | 238 | return DAG.getNode(HasPos ? PosOpcode201 : NegOpcode37 , DL, VT, Shifted, |
4683 | 238 | HasPos ? Pos201 : Neg37 ).getNode(); |
4684 | 238 | } |
4685 | 470 | |
4686 | 470 | return nullptr; |
4687 | 470 | } |
4688 | | |
4689 | | // MatchRotate - Handle an 'or' of two operands. If this is one of the many |
4690 | | // idioms for rotate, and if the target supports rotation instructions, generate |
4691 | | // a rot[lr]. |
4692 | 253k | SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { |
4693 | 253k | // Must be a legal type. Expanded 'n promoted things won't work with rotates. |
4694 | 253k | EVT VT = LHS.getValueType(); |
4695 | 253k | if (!TLI.isTypeLegal(VT)253k ) return nullptr9.63k ; |
4696 | 243k | |
4697 | 243k | // The target must have at least one rotate flavor. |
4698 | 243k | bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); |
4699 | 243k | bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); |
4700 | 243k | if (!HasROTL && 243k !HasROTR213k ) return nullptr27.0k ; |
4701 | 216k | |
4702 | 216k | // Match "(X shl/srl V1) & V2" where V2 may not be present. |
4703 | 216k | SDValue LHSShift; // The shift. |
4704 | 216k | SDValue LHSMask; // AND value if any. |
4705 | 216k | if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) |
4706 | 174k | return nullptr; // Not part of a rotate. |
4707 | 41.8k | |
4708 | 41.8k | SDValue RHSShift; // The shift. |
4709 | 41.8k | SDValue RHSMask; // AND value if any. |
4710 | 41.8k | if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) |
4711 | 29.7k | return nullptr; // Not part of a rotate. |
4712 | 12.1k | |
4713 | 12.1k | if (12.1k LHSShift.getOperand(0) != RHSShift.getOperand(0)12.1k ) |
4714 | 9.47k | return nullptr; // Not shifting the same value. |
4715 | 2.66k | |
4716 | 2.66k | if (2.66k LHSShift.getOpcode() == RHSShift.getOpcode()2.66k ) |
4717 | 262 | return nullptr; // Shifts must disagree. |
4718 | 2.39k | |
4719 | 2.39k | // Canonicalize shl to left side in a shl/srl pair. |
4720 | 2.39k | if (2.39k RHSShift.getOpcode() == ISD::SHL2.39k ) { |
4721 | 963 | std::swap(LHS, RHS); |
4722 | 963 | std::swap(LHSShift, RHSShift); |
4723 | 963 | std::swap(LHSMask, RHSMask); |
4724 | 963 | } |
4725 | 2.39k | |
4726 | 2.39k | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
4727 | 2.39k | SDValue LHSShiftArg = LHSShift.getOperand(0); |
4728 | 2.39k | SDValue LHSShiftAmt = LHSShift.getOperand(1); |
4729 | 2.39k | SDValue RHSShiftArg = RHSShift.getOperand(0); |
4730 | 2.39k | SDValue RHSShiftAmt = RHSShift.getOperand(1); |
4731 | 2.39k | |
4732 | 2.39k | // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) |
4733 | 2.39k | // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) |
4734 | 2.39k | auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS, |
4735 | 3.01k | ConstantSDNode *RHS) { |
4736 | 3.01k | return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits; |
4737 | 3.01k | }; |
4738 | 2.39k | if (matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)2.39k ) { |
4739 | 1.72k | SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL308 : ISD::ROTR1.42k , DL, VT, |
4740 | 1.72k | LHSShiftArg, HasROTL ? LHSShiftAmt308 : RHSShiftAmt1.42k ); |
4741 | 1.72k | |
4742 | 1.72k | // If there is an AND of either shifted operand, apply it to the result. |
4743 | 1.72k | if (LHSMask.getNode() || 1.72k RHSMask.getNode()1.68k ) { |
4744 | 47 | SDValue AllOnes = DAG.getAllOnesConstant(DL, VT); |
4745 | 47 | SDValue Mask = AllOnes; |
4746 | 47 | |
4747 | 47 | if (LHSMask.getNode()47 ) { |
4748 | 47 | SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt); |
4749 | 47 | Mask = DAG.getNode(ISD::AND, DL, VT, Mask, |
4750 | 47 | DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits)); |
4751 | 47 | } |
4752 | 47 | if (RHSMask.getNode()47 ) { |
4753 | 34 | SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt); |
4754 | 34 | Mask = DAG.getNode(ISD::AND, DL, VT, Mask, |
4755 | 34 | DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits)); |
4756 | 34 | } |
4757 | 47 | |
4758 | 47 | Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask); |
4759 | 47 | } |
4760 | 1.72k | |
4761 | 1.72k | return Rot.getNode(); |
4762 | 1.72k | } |
4763 | 670 | |
4764 | 670 | // If there is a mask here, and we have a variable shift, we can't be sure |
4765 | 670 | // that we're masking out the right stuff. |
4766 | 670 | if (670 LHSMask.getNode() || 670 RHSMask.getNode()552 ) |
4767 | 253 | return nullptr; |
4768 | 417 | |
4769 | 417 | // If the shift amount is sign/zext/any-extended just peel it off. |
4770 | 417 | SDValue LExtOp0 = LHSShiftAmt; |
4771 | 417 | SDValue RExtOp0 = RHSShiftAmt; |
4772 | 417 | if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || |
4773 | 417 | LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || |
4774 | 385 | LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || |
4775 | 385 | LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && |
4776 | 87 | (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || |
4777 | 87 | RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || |
4778 | 55 | RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || |
4779 | 417 | RHSShiftAmt.getOpcode() == ISD::TRUNCATE55 )) { |
4780 | 79 | LExtOp0 = LHSShiftAmt.getOperand(0); |
4781 | 79 | RExtOp0 = RHSShiftAmt.getOperand(0); |
4782 | 79 | } |
4783 | 417 | |
4784 | 417 | SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, |
4785 | 417 | LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); |
4786 | 417 | if (TryL) |
4787 | 126 | return TryL; |
4788 | 291 | |
4789 | 291 | SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, |
4790 | 291 | RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); |
4791 | 291 | if (TryR) |
4792 | 112 | return TryR; |
4793 | 179 | |
4794 | 179 | return nullptr; |
4795 | 179 | } |
4796 | | |
4797 | | namespace { |
4798 | | |
4799 | | /// Represents known origin of an individual byte in load combine pattern. The |
4800 | | /// value of the byte is either constant zero or comes from memory. |
4801 | | struct ByteProvider { |
4802 | | // For constant zero providers Load is set to nullptr. For memory providers |
4803 | | // Load represents the node which loads the byte from memory. |
4804 | | // ByteOffset is the offset of the byte in the value produced by the load. |
4805 | | LoadSDNode *Load = nullptr; |
4806 | | unsigned ByteOffset = 0; |
4807 | | |
4808 | | ByteProvider() = default; |
4809 | | |
4810 | 65.5k | static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) { |
4811 | 65.5k | return ByteProvider(Load, ByteOffset); |
4812 | 65.5k | } |
4813 | | |
4814 | 56.6k | static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); } |
4815 | | |
4816 | 74.7k | bool isConstantZero() const { return !Load; } |
4817 | 31.9k | bool isMemory() const { return Load; } |
4818 | | |
4819 | 0 | bool operator==(const ByteProvider &Other) const { |
4820 | 0 | return Other.Load == Load && Other.ByteOffset == ByteOffset; |
4821 | 0 | } |
4822 | | |
4823 | | private: |
4824 | | ByteProvider(LoadSDNode *Load, unsigned ByteOffset) |
4825 | 122k | : Load(Load), ByteOffset(ByteOffset) {} |
4826 | | }; |
4827 | | |
4828 | | } // end anonymous namespace |
4829 | | |
4830 | | /// Recursively traverses the expression calculating the origin of the requested |
4831 | | /// byte of the given value. Returns None if the provider can't be calculated. |
4832 | | /// |
4833 | | /// For all the values except the root of the expression verifies that the value |
4834 | | /// has exactly one use and if it's not true return None. This way if the origin |
4835 | | /// of the byte is returned it's guaranteed that the values which contribute to |
4836 | | /// the byte are not used outside of this expression. |
4837 | | /// |
4838 | | /// Because the parts of the expression are not allowed to have more than one |
4839 | | /// use this function iterates over trees, not DAGs. So it never visits the same |
4840 | | /// node more than once. |
4841 | | static const Optional<ByteProvider> |
4842 | | calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, |
4843 | 648k | bool Root = false) { |
4844 | 648k | // Typical i64 by i8 pattern requires recursion up to 8 calls depth |
4845 | 648k | if (Depth == 10) |
4846 | 839 | return None; |
4847 | 647k | |
4848 | 647k | if (647k !Root && 647k !Op.hasOneUse()412k ) |
4849 | 72.6k | return None; |
4850 | 575k | |
4851 | 647k | assert(Op.getValueType().isScalarInteger() && "can't handle other types"); |
4852 | 575k | unsigned BitWidth = Op.getValueSizeInBits(); |
4853 | 575k | if (BitWidth % 8 != 0) |
4854 | 0 | return None; |
4855 | 575k | unsigned ByteWidth = BitWidth / 8; |
4856 | 575k | assert(Index < ByteWidth && "invalid index requested"); |
4857 | 575k | (void) ByteWidth; |
4858 | 575k | |
4859 | 575k | switch (Op.getOpcode()) { |
4860 | 275k | case ISD::OR: { |
4861 | 275k | auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1); |
4862 | 275k | if (!LHS) |
4863 | 187k | return None; |
4864 | 88.6k | auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1); |
4865 | 88.6k | if (!RHS) |
4866 | 38.7k | return None; |
4867 | 49.8k | |
4868 | 49.8k | if (49.8k LHS->isConstantZero()49.8k ) |
4869 | 25.0k | return RHS; |
4870 | 24.8k | if (24.8k RHS->isConstantZero()24.8k ) |
4871 | 23.3k | return LHS; |
4872 | 1.55k | return None; |
4873 | 1.55k | } |
4874 | 76.2k | case ISD::SHL: { |
4875 | 76.2k | auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1)); |
4876 | 76.2k | if (!ShiftOp) |
4877 | 4.02k | return None; |
4878 | 72.2k | |
4879 | 72.2k | uint64_t BitShift = ShiftOp->getZExtValue(); |
4880 | 72.2k | if (BitShift % 8 != 0) |
4881 | 16.5k | return None; |
4882 | 55.6k | uint64_t ByteShift = BitShift / 8; |
4883 | 55.6k | |
4884 | 55.6k | return Index < ByteShift |
4885 | 30.6k | ? ByteProvider::getConstantZero() |
4886 | 24.9k | : calculateByteProvider(Op->getOperand(0), Index - ByteShift, |
4887 | 24.9k | Depth + 1); |
4888 | 55.6k | } |
4889 | 35.6k | case ISD::ANY_EXTEND: |
4890 | 35.6k | case ISD::SIGN_EXTEND: |
4891 | 35.6k | case ISD::ZERO_EXTEND: { |
4892 | 35.6k | SDValue NarrowOp = Op->getOperand(0); |
4893 | 35.6k | unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits(); |
4894 | 35.6k | if (NarrowBitWidth % 8 != 0) |
4895 | 240 | return None; |
4896 | 35.4k | uint64_t NarrowByteWidth = NarrowBitWidth / 8; |
4897 | 35.4k | |
4898 | 35.4k | if (Index >= NarrowByteWidth) |
4899 | 11.5k | return Op.getOpcode() == ISD::ZERO_EXTEND |
4900 | 11.5k | ? Optional<ByteProvider>(ByteProvider::getConstantZero()) |
4901 | 16 | : None; |
4902 | 23.9k | return calculateByteProvider(NarrowOp, Index, Depth + 1); |
4903 | 23.9k | } |
4904 | 50 | case ISD::BSWAP: |
4905 | 50 | return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1, |
4906 | 50 | Depth + 1); |
4907 | 81.1k | case ISD::LOAD: { |
4908 | 81.1k | auto L = cast<LoadSDNode>(Op.getNode()); |
4909 | 81.1k | if (L->isVolatile() || 81.1k L->isIndexed()81.1k ) |
4910 | 328 | return None; |
4911 | 80.8k | |
4912 | 80.8k | unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits(); |
4913 | 80.8k | if (NarrowBitWidth % 8 != 0) |
4914 | 124 | return None; |
4915 | 80.7k | uint64_t NarrowByteWidth = NarrowBitWidth / 8; |
4916 | 80.7k | |
4917 | 80.7k | if (Index >= NarrowByteWidth) |
4918 | 15.2k | return L->getExtensionType() == ISD::ZEXTLOAD |
4919 | 14.4k | ? Optional<ByteProvider>(ByteProvider::getConstantZero()) |
4920 | 744 | : None; |
4921 | 65.5k | return ByteProvider::getMemory(L, Index); |
4922 | 65.5k | } |
4923 | 106k | } |
4924 | 106k | |
4925 | 106k | return None; |
4926 | 106k | } |
4927 | | |
4928 | | /// Match a pattern where a wide type scalar value is loaded by several narrow |
4929 | | /// loads and combined by shifts and ors. Fold it into a single load or a load |
4930 | | /// and a BSWAP if the targets supports it. |
4931 | | /// |
4932 | | /// Assuming little endian target: |
4933 | | /// i8 *a = ... |
4934 | | /// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24) |
4935 | | /// => |
4936 | | /// i32 val = *((i32)a) |
4937 | | /// |
4938 | | /// i8 *a = ... |
4939 | | /// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3] |
4940 | | /// => |
4941 | | /// i32 val = BSWAP(*((i32)a)) |
4942 | | /// |
4943 | | /// TODO: This rule matches complex patterns with OR node roots and doesn't |
4944 | | /// interact well with the worklist mechanism. When a part of the pattern is |
4945 | | /// updated (e.g. one of the loads) its direct users are put into the worklist, |
4946 | | /// but the root node of the pattern which triggers the load combine is not |
4947 | | /// necessarily a direct user of the changed node. For example, once the address |
4948 | | /// of t28 load is reassociated load combine won't be triggered: |
4949 | | /// t25: i32 = add t4, Constant:i32<2> |
4950 | | /// t26: i64 = sign_extend t25 |
4951 | | /// t27: i64 = add t2, t26 |
4952 | | /// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64 |
4953 | | /// t29: i32 = zero_extend t28 |
4954 | | /// t32: i32 = shl t29, Constant:i8<8> |
4955 | | /// t33: i32 = or t23, t32 |
4956 | | /// As a possible fix visitLoad can check if the load can be a part of a load |
4957 | | /// combine pattern and add corresponding OR roots to the worklist. |
4958 | 251k | SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { |
4959 | 251k | assert(N->getOpcode() == ISD::OR && |
4960 | 251k | "Can only match load combining against OR nodes"); |
4961 | 251k | |
4962 | 251k | // Handles simple types only |
4963 | 251k | EVT VT = N->getValueType(0); |
4964 | 251k | if (VT != MVT::i16 && 251k VT != MVT::i32247k && VT != MVT::i64101k ) |
4965 | 27.0k | return SDValue(); |
4966 | 224k | unsigned ByteWidth = VT.getSizeInBits() / 8; |
4967 | 224k | |
4968 | 224k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
4969 | 224k | // Before legalize we can introduce too wide illegal loads which will be later |
4970 | 224k | // split into legal sized loads. This enables us to combine i64 load by i8 |
4971 | 224k | // patterns to a couple of i32 loads on 32 bit targets. |
4972 | 224k | if (LegalOperations && 224k !TLI.isOperationLegal(ISD::LOAD, VT)99.2k ) |
4973 | 12.6k | return SDValue(); |
4974 | 211k | |
4975 | 211k | std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = []( |
4976 | 34.2k | unsigned BW, unsigned i) { return i; }; |
4977 | 211k | std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = []( |
4978 | 8.39k | unsigned BW, unsigned i) { return BW - i - 1; }; |
4979 | 211k | |
4980 | 211k | bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian(); |
4981 | 27.0k | auto MemoryByteOffset = [&] (ByteProvider P) { |
4982 | 27.0k | assert(P.isMemory() && "Must be a memory byte provider"); |
4983 | 27.0k | unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits(); |
4984 | 27.0k | assert(LoadBitWidth % 8 == 0 && |
4985 | 27.0k | "can only analyze providers for individual bytes not bit"); |
4986 | 27.0k | unsigned LoadByteWidth = LoadBitWidth / 8; |
4987 | 27.0k | return IsBigEndianTarget |
4988 | 606 | ? BigEndianByteAt(LoadByteWidth, P.ByteOffset) |
4989 | 26.4k | : LittleEndianByteAt(LoadByteWidth, P.ByteOffset); |
4990 | 27.0k | }; |
4991 | 211k | |
4992 | 211k | Optional<BaseIndexOffset> Base; |
4993 | 211k | SDValue Chain; |
4994 | 211k | |
4995 | 211k | SmallSet<LoadSDNode *, 8> Loads; |
4996 | 211k | Optional<ByteProvider> FirstByteProvider; |
4997 | 211k | int64_t FirstOffset = INT64_MAX; |
4998 | 211k | |
4999 | 211k | // Check if all the bytes of the OR we are looking at are loaded from the same |
5000 | 211k | // base address. Collect bytes offsets from Base address in ByteOffsets. |
5001 | 211k | SmallVector<int64_t, 4> ByteOffsets(ByteWidth); |
5002 | 236k | for (unsigned i = 0; i < ByteWidth236k ; i++25.3k ) { |
5003 | 235k | auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true); |
5004 | 235k | if (!P || 235k !P->isMemory()31.9k ) // All the bytes must be loaded from memory |
5005 | 206k | return SDValue(); |
5006 | 28.5k | |
5007 | 28.5k | LoadSDNode *L = P->Load; |
5008 | 28.5k | assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() && |
5009 | 28.5k | "Must be enforced by calculateByteProvider"); |
5010 | 28.5k | assert(L->getOffset().isUndef() && "Unindexed load must have undef offset"); |
5011 | 28.5k | |
5012 | 28.5k | // All loads must share the same chain |
5013 | 28.5k | SDValue LChain = L->getChain(); |
5014 | 28.5k | if (!Chain) |
5015 | 9.42k | Chain = LChain; |
5016 | 19.1k | else if (19.1k Chain != LChain19.1k ) |
5017 | 1.95k | return SDValue(); |
5018 | 26.6k | |
5019 | 26.6k | // Loads must share the same base address |
5020 | 26.6k | BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG); |
5021 | 26.6k | int64_t ByteOffsetFromBase = 0; |
5022 | 26.6k | if (!Base) |
5023 | 9.42k | Base = Ptr; |
5024 | 17.1k | else if (17.1k !Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase)17.1k ) |
5025 | 1.32k | return SDValue(); |
5026 | 25.3k | |
5027 | 25.3k | // Calculate the offset of the current byte from the base address |
5028 | 25.3k | ByteOffsetFromBase += MemoryByteOffset(*P); |
5029 | 25.3k | ByteOffsets[i] = ByteOffsetFromBase; |
5030 | 25.3k | |
5031 | 25.3k | // Remember the first byte load |
5032 | 25.3k | if (ByteOffsetFromBase < FirstOffset25.3k ) { |
5033 | 12.3k | FirstByteProvider = P; |
5034 | 12.3k | FirstOffset = ByteOffsetFromBase; |
5035 | 12.3k | } |
5036 | 235k | |
5037 | 235k | Loads.insert(L); |
5038 | 235k | } |
5039 | 1.78k | assert(!Loads.empty() && "All the bytes of the value must be loaded from " |
5040 | 1.78k | "memory, so there must be at least one load which produces the value"); |
5041 | 1.78k | assert(Base && "Base address of the accessed memory location must be set"); |
5042 | 1.78k | assert(FirstOffset != INT64_MAX && "First byte offset must be set"); |
5043 | 1.78k | |
5044 | 1.78k | // Check if the bytes of the OR we are looking at match with either big or |
5045 | 1.78k | // little endian value load |
5046 | 1.78k | bool BigEndian = true, LittleEndian = true; |
5047 | 9.55k | for (unsigned i = 0; i < ByteWidth9.55k ; i++7.76k ) { |
5048 | 7.78k | int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset; |
5049 | 7.78k | LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i); |
5050 | 7.78k | BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i); |
5051 | 7.78k | if (!BigEndian && 7.78k !LittleEndian6.30k ) |
5052 | 22 | return SDValue(); |
5053 | 7.78k | } |
5054 | 1.76k | assert((BigEndian != LittleEndian) && "should be either or"); |
5055 | 1.76k | assert(FirstByteProvider && "must be set"); |
5056 | 1.76k | |
5057 | 1.76k | // Ensure that the first byte is loaded from zero offset of the first load. |
5058 | 1.76k | // So the combined value can be loaded from the first load address. |
5059 | 1.76k | if (MemoryByteOffset(*FirstByteProvider) != 0) |
5060 | 6 | return SDValue(); |
5061 | 1.75k | LoadSDNode *FirstLoad = FirstByteProvider->Load; |
5062 | 1.75k | |
5063 | 1.75k | // The node we are looking at matches with the pattern, check if we can |
5064 | 1.75k | // replace it with a single load and bswap if needed. |
5065 | 1.75k | |
5066 | 1.75k | // If the load needs byte swap check if the target supports it |
5067 | 1.75k | bool NeedsBswap = IsBigEndianTarget != BigEndian; |
5068 | 1.75k | |
5069 | 1.75k | // Before legalize we can introduce illegal bswaps which will be later |
5070 | 1.75k | // converted to an explicit bswap sequence. This way we end up with a single |
5071 | 1.75k | // load and byte shuffling instead of several loads and byte shuffling. |
5072 | 1.75k | if (NeedsBswap && 1.75k LegalOperations322 && !TLI.isOperationLegal(ISD::BSWAP, VT)4 ) |
5073 | 0 | return SDValue(); |
5074 | 1.75k | |
5075 | 1.75k | // Check that a load of the wide type is both allowed and fast on the target |
5076 | 1.75k | bool Fast = false; |
5077 | 1.75k | bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), |
5078 | 1.75k | VT, FirstLoad->getAddressSpace(), |
5079 | 1.75k | FirstLoad->getAlignment(), &Fast); |
5080 | 1.75k | if (!Allowed || 1.75k !Fast880 ) |
5081 | 1.00k | return SDValue(); |
5082 | 756 | |
5083 | 756 | SDValue NewLoad = |
5084 | 756 | DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(), |
5085 | 756 | FirstLoad->getPointerInfo(), FirstLoad->getAlignment()); |
5086 | 756 | |
5087 | 756 | // Transfer chain users from old loads to the new load. |
5088 | 756 | for (LoadSDNode *L : Loads) |
5089 | 2.97k | DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1)); |
5090 | 756 | |
5091 | 756 | return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad)322 : NewLoad434 ; |
5092 | 251k | } |
5093 | | |
5094 | 874k | SDValue DAGCombiner::visitXOR(SDNode *N) { |
5095 | 874k | SDValue N0 = N->getOperand(0); |
5096 | 874k | SDValue N1 = N->getOperand(1); |
5097 | 874k | EVT VT = N0.getValueType(); |
5098 | 874k | |
5099 | 874k | // fold vector ops |
5100 | 874k | if (VT.isVector()874k ) { |
5101 | 19.0k | if (SDValue FoldedVOp = SimplifyVBinOp(N)) |
5102 | 1 | return FoldedVOp; |
5103 | 19.0k | |
5104 | 19.0k | // fold (xor x, 0) -> x, vector edition |
5105 | 19.0k | if (19.0k ISD::isBuildVectorAllZeros(N0.getNode())19.0k ) |
5106 | 2 | return N1; |
5107 | 19.0k | if (19.0k ISD::isBuildVectorAllZeros(N1.getNode())19.0k ) |
5108 | 7 | return N0; |
5109 | 874k | } |
5110 | 874k | |
5111 | 874k | // fold (xor undef, undef) -> 0. This is a common idiom (misuse). |
5112 | 874k | if (874k N0.isUndef() && 874k N1.isUndef()0 ) |
5113 | 0 | return DAG.getConstant(0, SDLoc(N), VT); |
5114 | 874k | // fold (xor x, undef) -> undef |
5115 | 874k | if (874k N0.isUndef()874k ) |
5116 | 0 | return N0; |
5117 | 874k | if (874k N1.isUndef()874k ) |
5118 | 0 | return N1; |
5119 | 874k | // fold (xor c1, c2) -> c1^c2 |
5120 | 874k | ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); |
5121 | 874k | ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); |
5122 | 874k | if (N0C && 874k N1C869 ) |
5123 | 868 | return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C); |
5124 | 873k | // canonicalize constant to RHS |
5125 | 873k | if (873k DAG.isConstantIntBuildVectorOrConstantInt(N0) && |
5126 | 10 | !DAG.isConstantIntBuildVectorOrConstantInt(N1)) |
5127 | 9 | return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); |
5128 | 873k | // fold (xor x, 0) -> x |
5129 | 873k | if (873k isNullConstant(N1)873k ) |
5130 | 0 | return N0; |
5131 | 873k | |
5132 | 873k | if (SDValue 873k NewSel873k = foldBinOpIntoSelect(N)) |
5133 | 5 | return NewSel; |
5134 | 873k | |
5135 | 873k | // reassociate xor |
5136 | 873k | if (SDValue 873k RXOR873k = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1)) |
5137 | 350 | return RXOR; |
5138 | 872k | |
5139 | 872k | // fold !(x cc y) -> (x !cc y) |
5140 | 872k | SDValue LHS, RHS, CC; |
5141 | 872k | if (TLI.isConstTrueVal(N1.getNode()) && 872k isSetCCEquivalent(N0, LHS, RHS, CC)774k ) { |
5142 | 764k | bool isInt = LHS.getValueType().isInteger(); |
5143 | 764k | ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), |
5144 | 764k | isInt); |
5145 | 764k | |
5146 | 764k | if (!LegalOperations || |
5147 | 764k | TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())7 ) { |
5148 | 764k | switch (N0.getOpcode()) { |
5149 | 0 | default: |
5150 | 0 | llvm_unreachable("Unhandled SetCC Equivalent!"); |
5151 | 764k | case ISD::SETCC: |
5152 | 764k | return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC); |
5153 | 0 | case ISD::SELECT_CC: |
5154 | 0 | return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2), |
5155 | 0 | N0.getOperand(3), NotCC); |
5156 | 108k | } |
5157 | 108k | } |
5158 | 764k | } |
5159 | 108k | |
5160 | 108k | // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) |
5161 | 108k | if (108k isOneConstant(N1) && 108k N0.getOpcode() == ISD::ZERO_EXTEND2.03k && |
5162 | 16 | N0.getNode()->hasOneUse() && |
5163 | 108k | isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)7 ){ |
5164 | 3 | SDValue V = N0.getOperand(0); |
5165 | 3 | SDLoc DL(N0); |
5166 | 3 | V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V, |
5167 | 3 | DAG.getConstant(1, DL, V.getValueType())); |
5168 | 3 | AddToWorklist(V.getNode()); |
5169 | 3 | return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); |
5170 | 3 | } |
5171 | 108k | |
5172 | 108k | // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc |
5173 | 108k | if (108k isOneConstant(N1) && 108k VT == MVT::i12.03k && |
5174 | 108k | (N0.getOpcode() == ISD::OR || 392 N0.getOpcode() == ISD::AND326 )) { |
5175 | 97 | SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); |
5176 | 97 | if (isOneUseSetCC(RHS) || 97 isOneUseSetCC(LHS)61 ) { |
5177 | 73 | unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR25 : ISD::AND48 ; |
5178 | 73 | LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS |
5179 | 73 | RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS |
5180 | 73 | AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); |
5181 | 73 | return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); |
5182 | 73 | } |
5183 | 107k | } |
5184 | 107k | // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants |
5185 | 107k | if (107k isAllOnesConstant(N1) && |
5186 | 107k | (N0.getOpcode() == ISD::OR || 29.2k N0.getOpcode() == ISD::AND28.6k )) { |
5187 | 818 | SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); |
5188 | 818 | if (isa<ConstantSDNode>(RHS) || 818 isa<ConstantSDNode>(LHS)710 ) { |
5189 | 108 | unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR104 : ISD::AND4 ; |
5190 | 108 | LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS |
5191 | 108 | RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS |
5192 | 108 | AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); |
5193 | 108 | return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); |
5194 | 108 | } |
5195 | 107k | } |
5196 | 107k | // fold (xor (and x, y), y) -> (and (not x), y) |
5197 | 107k | if (107k N0.getOpcode() == ISD::AND && 107k N0.getNode()->hasOneUse()3.02k && |
5198 | 107k | N0->getOperand(1) == N12.72k ) { |
5199 | 543 | SDValue X = N0->getOperand(0); |
5200 | 543 | SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); |
5201 | 543 | AddToWorklist(NotX.getNode()); |
5202 | 543 | return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1); |
5203 | 543 | } |
5204 | 107k | // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) |
5205 | 107k | if (107k N1C && 107k N0.getOpcode() == ISD::XOR42.9k ) { |
5206 | 344 | if (const ConstantSDNode *N00C344 = getAsNonOpaqueConstant(N0.getOperand(0))) { |
5207 | 0 | SDLoc DL(N); |
5208 | 0 | return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), |
5209 | 0 | DAG.getConstant(N1C->getAPIntValue() ^ |
5210 | 0 | N00C->getAPIntValue(), DL, VT)); |
5211 | 0 | } |
5212 | 344 | if (const ConstantSDNode *344 N01C344 = getAsNonOpaqueConstant(N0.getOperand(1))) { |
5213 | 0 | SDLoc DL(N); |
5214 | 0 | return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), |
5215 | 0 | DAG.getConstant(N1C->getAPIntValue() ^ |
5216 | 0 | N01C->getAPIntValue(), DL, VT)); |
5217 | 0 | } |
5218 | 107k | } |
5219 | 107k | |
5220 | 107k | // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X) |
5221 | 107k | unsigned OpSizeInBits = VT.getScalarSizeInBits(); |
5222 | 107k | if (N0.getOpcode() == ISD::ADD && 107k N0.getOperand(1) == N117.4k && |
5223 | 107k | N1.getOpcode() == ISD::SRA16.2k && N1.getOperand(0) == N0.getOperand(0)15.5k && |
5224 | 107k | TLI.isOperationLegalOrCustom(ISD::ABS, VT)15.4k ) { |
5225 | 231 | if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1))) |
5226 | 229 | if (229 C->getAPIntValue() == (OpSizeInBits - 1)229 ) |
5227 | 229 | return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0)); |
5228 | 107k | } |
5229 | 107k | |
5230 | 107k | // fold (xor x, x) -> 0 |
5231 | 107k | if (107k N0 == N1107k ) |
5232 | 13 | return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); |
5233 | 107k | |
5234 | 107k | // fold (xor (shl 1, x), -1) -> (rotl ~1, x) |
5235 | 107k | // Here is a concrete example of this equivalence: |
5236 | 107k | // i16 x == 14 |
5237 | 107k | // i16 shl == 1 << 14 == 16384 == 0b0100000000000000 |
5238 | 107k | // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111 |
5239 | 107k | // |
5240 | 107k | // => |
5241 | 107k | // |
5242 | 107k | // i16 ~1 == 0b1111111111111110 |
5243 | 107k | // i16 rol(~1, 14) == 0b1011111111111111 |
5244 | 107k | // |
5245 | 107k | // Some additional tips to help conceptualize this transform: |
5246 | 107k | // - Try to see the operation as placing a single zero in a value of all ones. |
5247 | 107k | // - There exists no value for x which would allow the result to contain zero. |
5248 | 107k | // - Values of x larger than the bitwidth are undefined and do not require a |
5249 | 107k | // consistent result. |
5250 | 107k | // - Pushing the zero left requires shifting one bits in from the right. |
5251 | 107k | // A rotate left of ~1 is a nice way of achieving the desired result. |
5252 | 107k | if (107k TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && 107k N0.getOpcode() == ISD::SHL13.4k |
5253 | 107k | && isAllOnesConstant(N1)363 && isOneConstant(N0.getOperand(0))198 ) { |
5254 | 185 | SDLoc DL(N); |
5255 | 185 | return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT), |
5256 | 185 | N0.getOperand(1)); |
5257 | 185 | } |
5258 | 106k | |
5259 | 106k | // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) |
5260 | 106k | if (106k N0.getOpcode() == N1.getOpcode()106k ) |
5261 | 15.2k | if (SDValue 15.2k Tmp15.2k = SimplifyBinOpWithSameOpcodeHands(N)) |
5262 | 409 | return Tmp; |
5263 | 106k | |
5264 | 106k | // Simplify the expression using non-local knowledge. |
5265 | 106k | if (106k SimplifyDemandedBits(SDValue(N, 0))106k ) |
5266 | 405 | return SDValue(N, 0); |
5267 | 106k | |
5268 | 106k | return SDValue(); |
5269 | 106k | } |
5270 | | |
5271 | | /// Handle transforms common to the three shifts, when the shift amount is a |
5272 | | /// constant. |
5273 | 987k | SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { |
5274 | 987k | SDNode *LHS = N->getOperand(0).getNode(); |
5275 | 987k | if (!LHS->hasOneUse()987k ) return SDValue()499k ; |
5276 | 488k | |
5277 | 488k | // We want to pull some binops through shifts, so that we have (and (shift)) |
5278 | 488k | // instead of (shift (and)), likewise for add, or, xor, etc. This sort of |
5279 | 488k | // thing happens with address calculations, so it's important to canonicalize |
5280 | 488k | // it. |
5281 | 488k | bool HighBitSet = false; // Can we transform this if the high bit is set? |
5282 | 488k | |
5283 | 488k | switch (LHS->getOpcode()) { |
5284 | 440k | default: return SDValue(); |
5285 | 3.05k | case ISD::OR: |
5286 | 3.05k | case ISD::XOR: |
5287 | 3.05k | HighBitSet = false; // We can only transform sra if the high bit is clear. |
5288 | 3.05k | break; |
5289 | 16.0k | case ISD::AND: |
5290 | 16.0k | HighBitSet = true; // We can only transform sra if the high bit is set. |
5291 | 16.0k | break; |
5292 | 28.7k | case ISD::ADD: |
5293 | 28.7k | if (N->getOpcode() != ISD::SHL) |
5294 | 18.7k | return SDValue(); // only shl(add) not sr[al](add). |
5295 | 9.94k | HighBitSet = false; // We can only transform sra if the high bit is clear. |
5296 | 9.94k | break; |
5297 | 29.0k | } |
5298 | 29.0k | |
5299 | 29.0k | // We require the RHS of the binop to be a constant and not opaque as well. |
5300 | 29.0k | ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1)); |
5301 | 29.0k | if (!BinOpCst29.0k ) return SDValue()13.0k ; |
5302 | 15.9k | |
5303 | 15.9k | // FIXME: disable this unless the input to the binop is a shift by a constant |
5304 | 15.9k | // or is copy/select.Enable this in other cases when figure out it's exactly profitable. |
5305 | 15.9k | SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); |
5306 | 15.9k | bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL || |
5307 | 15.7k | BinOpLHSVal->getOpcode() == ISD::SRA || |
5308 | 15.7k | BinOpLHSVal->getOpcode() == ISD::SRL; |
5309 | 15.9k | bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg || |
5310 | 14.8k | BinOpLHSVal->getOpcode() == ISD::SELECT; |
5311 | 15.9k | |
5312 | 15.9k | if ((!isShift || 15.9k !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))2.43k ) && |
5313 | 13.5k | !isCopyOrSelect) |
5314 | 12.3k | return SDValue(); |
5315 | 3.58k | |
5316 | 3.58k | if (3.58k isCopyOrSelect && 3.58k N->hasOneUse()1.16k ) |
5317 | 1.09k | return SDValue(); |
5318 | 2.49k | |
5319 | 2.49k | EVT VT = N->getValueType(0); |
5320 | 2.49k | |
5321 | 2.49k | // If this is a signed shift right, and the high bit is modified by the |
5322 | 2.49k | // logical operation, do not perform the transformation. The highBitSet |
5323 | 2.49k | // boolean indicates the value of the high bit of the constant which would |
5324 | 2.49k | // cause it to be modified for this operation. |
5325 | 2.49k | if (N->getOpcode() == ISD::SRA2.49k ) { |
5326 | 24 | bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); |
5327 | 24 | if (BinOpRHSSignSet != HighBitSet) |
5328 | 22 | return SDValue(); |
5329 | 2.47k | } |
5330 | 2.47k | |
5331 | 2.47k | if (2.47k !TLI.isDesirableToCommuteWithShift(LHS)2.47k ) |
5332 | 1.64k | return SDValue(); |
5333 | 822 | |
5334 | 822 | // Fold the constants, shifting the binop RHS by the shift amount. |
5335 | 822 | SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), |
5336 | 822 | N->getValueType(0), |
5337 | 822 | LHS->getOperand(1), N->getOperand(1)); |
5338 | 822 | assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"); |
5339 | 822 | |
5340 | 822 | // Create the new shift. |
5341 | 822 | SDValue NewShift = DAG.getNode(N->getOpcode(), |
5342 | 822 | SDLoc(LHS->getOperand(0)), |
5343 | 822 | VT, LHS->getOperand(0), N->getOperand(1)); |
5344 | 822 | |
5345 | 822 | // Create the new binop. |
5346 | 822 | return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); |
5347 | 822 | } |
5348 | | |
5349 | 676 | SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { |
5350 | 676 | assert(N->getOpcode() == ISD::TRUNCATE); |
5351 | 676 | assert(N->getOperand(0).getOpcode() == ISD::AND); |
5352 | 676 | |
5353 | 676 | // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC) |
5354 | 676 | if (N->hasOneUse() && 676 N->getOperand(0).hasOneUse()661 ) { |
5355 | 259 | SDValue N01 = N->getOperand(0).getOperand(1); |
5356 | 259 | if (isConstantOrConstantVector(N01, /* NoOpaques */ true)259 ) { |
5357 | 256 | SDLoc DL(N); |
5358 | 256 | EVT TruncVT = N->getValueType(0); |
5359 | 256 | SDValue N00 = N->getOperand(0).getOperand(0); |
5360 | 256 | SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00); |
5361 | 256 | SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01); |
5362 | 256 | AddToWorklist(Trunc00.getNode()); |
5363 | 256 | AddToWorklist(Trunc01.getNode()); |
5364 | 256 | return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01); |
5365 | 256 | } |
5366 | 420 | } |
5367 | 420 | |
5368 | 420 | return SDValue(); |
5369 | 420 | } |
5370 | | |
5371 | 5.68k | SDValue DAGCombiner::visitRotate(SDNode *N) { |
5372 | 5.68k | SDLoc dl(N); |
5373 | 5.68k | SDValue N0 = N->getOperand(0); |
5374 | 5.68k | SDValue N1 = N->getOperand(1); |
5375 | 5.68k | EVT VT = N->getValueType(0); |
5376 | 5.68k | unsigned Bitsize = VT.getScalarSizeInBits(); |
5377 | 5.68k | |
5378 | 5.68k | // fold (rot x, 0) -> x |
5379 | 5.68k | if (isNullConstantOrNullSplatConstant(N1)) |
5380 | 2 | return N0; |
5381 | 5.68k | |
5382 | 5.68k | // fold (rot x, c) -> (rot x, c % BitSize) |
5383 | 5.68k | if (ConstantSDNode *5.68k Cst5.68k = isConstOrConstSplat(N1)) { |
5384 | 4.17k | if (Cst->getAPIntValue().uge(Bitsize)4.17k ) { |
5385 | 2 | uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize); |
5386 | 2 | return DAG.getNode(N->getOpcode(), dl, VT, N0, |
5387 | 2 | DAG.getConstant(RotAmt, dl, N1.getValueType())); |
5388 | 2 | } |
5389 | 5.67k | } |
5390 | 5.67k | |
5391 | 5.67k | // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). |
5392 | 5.67k | if (5.67k N1.getOpcode() == ISD::TRUNCATE && |
5393 | 5.67k | N1.getOperand(0).getOpcode() == ISD::AND618 ) { |
5394 | 24 | if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) |
5395 | 24 | return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1); |
5396 | 5.65k | } |
5397 | 5.65k | |
5398 | 5.65k | unsigned NextOp = N0.getOpcode(); |
5399 | 5.65k | // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize) |
5400 | 5.65k | if (NextOp == ISD::ROTL || 5.65k NextOp == ISD::ROTR5.64k ) { |
5401 | 11 | SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1); |
5402 | 11 | SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)); |
5403 | 11 | if (C1 && 11 C211 && C1->getValueType(0) == C2->getValueType(0)11 ) { |
5404 | 11 | EVT ShiftVT = C1->getValueType(0); |
5405 | 11 | bool SameSide = (N->getOpcode() == NextOp); |
5406 | 11 | unsigned CombineOp = SameSide ? ISD::ADD11 : ISD::SUB0 ; |
5407 | 11 | if (SDValue CombinedShift = |
5408 | 11 | DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) { |
5409 | 11 | SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT); |
5410 | 11 | SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic( |
5411 | 11 | ISD::SREM, dl, ShiftVT, CombinedShift.getNode(), |
5412 | 11 | BitsizeC.getNode()); |
5413 | 11 | return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0), |
5414 | 11 | CombinedShiftNorm); |
5415 | 11 | } |
5416 | 5.64k | } |
5417 | 11 | } |
5418 | 5.64k | return SDValue(); |
5419 | 5.64k | } |
5420 | | |
5421 | 740k | SDValue DAGCombiner::visitSHL(SDNode *N) { |
5422 | 740k | SDValue N0 = N->getOperand(0); |
5423 | 740k | SDValue N1 = N->getOperand(1); |
5424 | 740k | EVT VT = N0.getValueType(); |
5425 | 740k | unsigned OpSizeInBits = VT.getScalarSizeInBits(); |
5426 | 740k | |
5427 | 740k | // fold vector ops |
5428 | 740k | if (VT.isVector()740k ) { |
5429 | 4.56k | if (SDValue FoldedVOp = SimplifyVBinOp(N)) |
5430 | 4 | return FoldedVOp; |
5431 | 4.55k | |
5432 | 4.55k | BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); |
5433 | 4.55k | // If setcc produces all-one true value then: |
5434 | 4.55k | // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV) |
5435 | 4.55k | if (N1CV && 4.55k N1CV->isConstant()3.11k ) { |
5436 | 2.92k | if (N0.getOpcode() == ISD::AND2.92k ) { |
5437 | 26 | SDValue N00 = N0->getOperand(0); |
5438 | 26 | SDValue N01 = N0->getOperand(1); |
5439 | 26 | BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01); |
5440 | 26 | |
5441 | 26 | if (N01CV && 26 N01CV->isConstant()26 && N00.getOpcode() == ISD::SETCC26 && |
5442 | 12 | TLI.getBooleanContents(N00.getOperand(0).getValueType()) == |
5443 | 26 | TargetLowering::ZeroOrNegativeOneBooleanContent) { |
5444 | 12 | if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, |
5445 | 12 | N01CV, N1CV)) |
5446 | 12 | return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); |
5447 | 739k | } |
5448 | 26 | } |
5449 | 2.92k | } |
5450 | 4.56k | } |
5451 | 739k | |
5452 | 739k | ConstantSDNode *N1C = isConstOrConstSplat(N1); |
5453 | 739k | |
5454 | 739k | // fold (shl c1, c2) -> c1<<c2 |
5455 | 739k | ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); |
5456 | 739k | if (N0C && 739k N1C20.5k && !N1C->isOpaque()3.67k ) |
5457 | 3.67k | return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C); |
5458 | 736k | // fold (shl 0, x) -> 0 |
5459 | 736k | if (736k isNullConstantOrNullSplatConstant(N0)736k ) |
5460 | 193 | return N0; |
5461 | 736k | // fold (shl x, c >= size(x)) -> undef |
5462 | 736k | // NOTE: ALL vector elements must be too big to avoid partial UNDEFs. |
5463 | 736k | auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) 736k { |
5464 | 690k | return Val->getAPIntValue().uge(OpSizeInBits); |
5465 | 690k | }; |
5466 | 736k | if (matchUnaryPredicate(N1, MatchShiftTooBig)) |
5467 | 43 | return DAG.getUNDEF(VT); |
5468 | 736k | // fold (shl x, 0) -> x |
5469 | 736k | if (736k N1C && 736k N1C->isNullValue()689k ) |
5470 | 414 | return N0; |
5471 | 735k | // fold (shl undef, x) -> 0 |
5472 | 735k | if (735k N0.isUndef()735k ) |
5473 | 28 | return DAG.getConstant(0, SDLoc(N), VT); |
5474 | 735k | |
5475 | 735k | if (SDValue 735k NewSel735k = foldBinOpIntoSelect(N)) |
5476 | 56 | return NewSel; |
5477 | 735k | |
5478 | 735k | // if (shl x, c) is known to be zero, return 0 |
5479 | 735k | if (735k DAG.MaskedValueIsZero(SDValue(N, 0), |
5480 | 735k | APInt::getAllOnesValue(OpSizeInBits))) |
5481 | 2.07k | return DAG.getConstant(0, SDLoc(N), VT); |
5482 | 733k | // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). |
5483 | 733k | if (733k N1.getOpcode() == ISD::TRUNCATE && |
5484 | 733k | N1.getOperand(0).getOpcode() == ISD::AND3.76k ) { |
5485 | 568 | if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) |
5486 | 163 | return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); |
5487 | 733k | } |
5488 | 733k | |
5489 | 733k | if (733k N1C && 733k SimplifyDemandedBits(SDValue(N, 0))686k ) |
5490 | 5.96k | return SDValue(N, 0); |
5491 | 727k | |
5492 | 727k | // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) |
5493 | 727k | if (727k N0.getOpcode() == ISD::SHL727k ) { |
5494 | 886 | auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS, |
5495 | 460 | ConstantSDNode *RHS) { |
5496 | 460 | APInt c1 = LHS->getAPIntValue(); |
5497 | 460 | APInt c2 = RHS->getAPIntValue(); |
5498 | 460 | zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); |
5499 | 460 | return (c1 + c2).uge(OpSizeInBits); |
5500 | 460 | }; |
5501 | 886 | if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) |
5502 | 4 | return DAG.getConstant(0, SDLoc(N), VT); |
5503 | 882 | |
5504 | 882 | auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, |
5505 | 463 | ConstantSDNode *RHS) { |
5506 | 463 | APInt c1 = LHS->getAPIntValue(); |
5507 | 463 | APInt c2 = RHS->getAPIntValue(); |
5508 | 463 | zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); |
5509 | 463 | return (c1 + c2).ult(OpSizeInBits); |
5510 | 463 | }; |
5511 | 882 | if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)882 ) { |
5512 | 448 | SDLoc DL(N); |
5513 | 448 | EVT ShiftVT = N1.getValueType(); |
5514 | 448 | SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); |
5515 | 448 | return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum); |
5516 | 448 | } |
5517 | 726k | } |
5518 | 726k | |
5519 | 726k | // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) |
5520 | 726k | // For this to be valid, the second form must not preserve any of the bits |
5521 | 726k | // that are shifted out by the inner shift in the first form. This means |
5522 | 726k | // the outer shift size must be >= the number of bits added by the ext. |
5523 | 726k | // As a corollary, we don't care what kind of ext it is. |
5524 | 726k | if (726k N1C && 726k (N0.getOpcode() == ISD::ZERO_EXTEND || |
5525 | 627k | N0.getOpcode() == ISD::ANY_EXTEND || |
5526 | 680k | N0.getOpcode() == ISD::SIGN_EXTEND) && |
5527 | 726k | N0.getOperand(0).getOpcode() == ISD::SHL137k ) { |
5528 | 887 | SDValue N0Op0 = N0.getOperand(0); |
5529 | 887 | if (ConstantSDNode *N0Op0C1887 = isConstOrConstSplat(N0Op0.getOperand(1))) { |
5530 | 807 | APInt c1 = N0Op0C1->getAPIntValue(); |
5531 | 807 | APInt c2 = N1C->getAPIntValue(); |
5532 | 807 | zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); |
5533 | 807 | |
5534 | 807 | EVT InnerShiftVT = N0Op0.getValueType(); |
5535 | 807 | uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); |
5536 | 807 | if (c2.uge(OpSizeInBits - InnerShiftSize)807 ) { |
5537 | 21 | SDLoc DL(N0); |
5538 | 21 | APInt Sum = c1 + c2; |
5539 | 21 | if (Sum.uge(OpSizeInBits)) |
5540 | 4 | return DAG.getConstant(0, DL, VT); |
5541 | 17 | |
5542 | 17 | return DAG.getNode( |
5543 | 17 | ISD::SHL, DL, VT, |
5544 | 17 | DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)), |
5545 | 17 | DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType())); |
5546 | 17 | } |
5547 | 807 | } |
5548 | 887 | } |
5549 | 726k | |
5550 | 726k | // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) |
5551 | 726k | // Only fold this if the inner zext has no other uses to avoid increasing |
5552 | 726k | // the total number of instructions. |
5553 | 726k | if (726k N1C && 726k N0.getOpcode() == ISD::ZERO_EXTEND680k && N0.hasOneUse()52.6k && |
5554 | 726k | N0.getOperand(0).getOpcode() == ISD::SRL39.3k ) { |
5555 | 1.37k | SDValue N0Op0 = N0.getOperand(0); |
5556 | 1.37k | if (ConstantSDNode *N0Op0C11.37k = isConstOrConstSplat(N0Op0.getOperand(1))) { |
5557 | 1.19k | if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())1.19k ) { |
5558 | 1.19k | uint64_t c1 = N0Op0C1->getZExtValue(); |
5559 | 1.19k | uint64_t c2 = N1C->getZExtValue(); |
5560 | 1.19k | if (c1 == c21.19k ) { |
5561 | 50 | SDValue NewOp0 = N0.getOperand(0); |
5562 | 50 | EVT CountVT = NewOp0.getOperand(1).getValueType(); |
5563 | 50 | SDLoc DL(N); |
5564 | 50 | SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(), |
5565 | 50 | NewOp0, |
5566 | 50 | DAG.getConstant(c2, DL, CountVT)); |
5567 | 50 | AddToWorklist(NewSHL.getNode()); |
5568 | 50 | return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); |
5569 | 50 | } |
5570 | 726k | } |
5571 | 1.19k | } |
5572 | 1.37k | } |
5573 | 726k | |
5574 | 726k | // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 |
5575 | 726k | // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2 |
5576 | 726k | if (726k N1C && 726k (N0.getOpcode() == ISD::SRL || 680k N0.getOpcode() == ISD::SRA677k ) && |
5577 | 726k | N0->getFlags().hasExact()5.21k ) { |
5578 | 661 | if (ConstantSDNode *N0C1661 = isConstOrConstSplat(N0.getOperand(1))) { |
5579 | 653 | uint64_t C1 = N0C1->getZExtValue(); |
5580 | 653 | uint64_t C2 = N1C->getZExtValue(); |
5581 | 653 | SDLoc DL(N); |
5582 | 653 | if (C1 <= C2) |
5583 | 56 | return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), |
5584 | 56 | DAG.getConstant(C2 - C1, DL, N1.getValueType())); |
5585 | 597 | return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), |
5586 | 597 | DAG.getConstant(C1 - C2, DL, N1.getValueType())); |
5587 | 597 | } |
5588 | 661 | } |
5589 | 726k | |
5590 | 726k | // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or |
5591 | 726k | // (and (srl x, (sub c1, c2), MASK) |
5592 | 726k | // Only fold this if the inner shift has no other uses -- if it does, folding |
5593 | 726k | // this will increase the total number of instructions. |
5594 | 726k | if (726k N1C && 726k N0.getOpcode() == ISD::SRL679k && N0.hasOneUse()3.07k ) { |
5595 | 2.06k | if (ConstantSDNode *N0C12.06k = isConstOrConstSplat(N0.getOperand(1))) { |
5596 | 1.99k | uint64_t c1 = N0C1->getZExtValue(); |
5597 | 1.99k | if (c1 < OpSizeInBits1.99k ) { |
5598 | 1.99k | uint64_t c2 = N1C->getZExtValue(); |
5599 | 1.99k | APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); |
5600 | 1.99k | SDValue Shift; |
5601 | 1.99k | if (c2 > c11.99k ) { |
5602 | 236 | Mask <<= c2 - c1; |
5603 | 236 | SDLoc DL(N); |
5604 | 236 | Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), |
5605 | 236 | DAG.getConstant(c2 - c1, DL, N1.getValueType())); |
5606 | 1.99k | } else { |
5607 | 1.76k | Mask.lshrInPlace(c1 - c2); |
5608 | 1.76k | SDLoc DL(N); |
5609 | 1.76k | Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), |
5610 | 1.76k | DAG.getConstant(c1 - c2, DL, N1.getValueType())); |
5611 | 1.76k | } |
5612 | 1.99k | SDLoc DL(N0); |
5613 | 1.99k | return DAG.getNode(ISD::AND, DL, VT, Shift, |
5614 | 1.99k | DAG.getConstant(Mask, DL, VT)); |
5615 | 1.99k | } |
5616 | 724k | } |
5617 | 2.06k | } |
5618 | 724k | |
5619 | 724k | // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) |
5620 | 724k | if (724k N0.getOpcode() == ISD::SRA && 724k N1 == N0.getOperand(1)1.51k && |
5621 | 724k | isConstantOrConstantVector(N1, /* No Opaques */ true)175 ) { |
5622 | 163 | SDLoc DL(N); |
5623 | 163 | SDValue AllBits = DAG.getAllOnesConstant(DL, VT); |
5624 | 163 | SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1); |
5625 | 163 | return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask); |
5626 | 163 | } |
5627 | 724k | |
5628 | 724k | // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) |
5629 | 724k | // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) |
5630 | 724k | // Variant of version done on multiply, except mul by a power of 2 is turned |
5631 | 724k | // into a shift. |
5632 | 724k | if (724k (N0.getOpcode() == ISD::ADD || 724k N0.getOpcode() == ISD::OR696k ) && |
5633 | 31.3k | N0.getNode()->hasOneUse() && |
5634 | 23.2k | isConstantOrConstantVector(N1, /* No Opaques */ true) && |
5635 | 724k | isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)22.7k ) { |
5636 | 11.2k | SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); |
5637 | 11.2k | SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); |
5638 | 11.2k | AddToWorklist(Shl0.getNode()); |
5639 | 11.2k | AddToWorklist(Shl1.getNode()); |
5640 | 11.2k | return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1); |
5641 | 11.2k | } |
5642 | 712k | |
5643 | 712k | // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2) |
5644 | 712k | if (712k N0.getOpcode() == ISD::MUL && 712k N0.getNode()->hasOneUse()2.27k && |
5645 | 1.69k | isConstantOrConstantVector(N1, /* No Opaques */ true) && |
5646 | 712k | isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)1.12k ) { |
5647 | 334 | SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); |
5648 | 334 | if (isConstantOrConstantVector(Shl)) |
5649 | 334 | return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl); |
5650 | 712k | } |
5651 | 712k | |
5652 | 712k | if (712k N1C && 712k !N1C->isOpaque()666k ) |
5653 | 666k | if (SDValue 666k NewSHL666k = visitShiftByConstant(N, N1C)) |
5654 | 186 | return NewSHL; |
5655 | 712k | |
5656 | 712k | return SDValue(); |
5657 | 712k | } |
5658 | | |
5659 | 86.4k | SDValue DAGCombiner::visitSRA(SDNode *N) { |
5660 | 86.4k | SDValue N0 = N->getOperand(0); |
5661 | 86.4k | SDValue N1 = N->getOperand(1); |
5662 | 86.4k | EVT VT = N0.getValueType(); |
5663 | 86.4k | unsigned OpSizeInBits = VT.getScalarSizeInBits(); |
5664 | 86.4k | |
5665 | 86.4k | // Arithmetic shifting an all-sign-bit value is a no-op. |
5666 | 86.4k | // fold (sra 0, x) -> 0 |
5667 | 86.4k | // fold (sra -1, x) -> -1 |
5668 | 86.4k | if (DAG.ComputeNumSignBits(N0) == OpSizeInBits) |
5669 | 448 | return N0; |
5670 | 86.0k | |
5671 | 86.0k | // fold vector ops |
5672 | 86.0k | if (86.0k VT.isVector()86.0k ) |
5673 | 3.27k | if (SDValue 3.27k FoldedVOp3.27k = SimplifyVBinOp(N)) |
5674 | 2 | return FoldedVOp; |
5675 | 86.0k | |
5676 | 86.0k | ConstantSDNode *N1C = isConstOrConstSplat(N1); |
5677 | 86.0k | |
5678 | 86.0k | // fold (sra c1, c2) -> (sra c1, c2) |
5679 | 86.0k | ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); |
5680 | 86.0k | if (N0C && 86.0k N1C126 && !N1C->isOpaque()0 ) |
5681 | 0 | return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C); |
5682 | 86.0k | // fold (sra x, c >= size(x)) -> undef |
5683 | 86.0k | // NOTE: ALL vector elements must be too big to avoid partial UNDEFs. |
5684 | 86.0k | auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) 86.0k { |
5685 | 78.3k | return Val->getAPIntValue().uge(OpSizeInBits); |
5686 | 78.3k | }; |
5687 | 86.0k | if (matchUnaryPredicate(N1, MatchShiftTooBig)) |
5688 | 21 | return DAG.getUNDEF(VT); |
5689 | 86.0k | // fold (sra x, 0) -> x |
5690 | 86.0k | if (86.0k N1C && 86.0k N1C->isNullValue()78.1k ) |
5691 | 8 | return N0; |
5692 | 86.0k | |
5693 | 86.0k | if (SDValue 86.0k NewSel86.0k = foldBinOpIntoSelect(N)) |
5694 | 2 | return NewSel; |
5695 | 86.0k | |
5696 | 86.0k | // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports |
5697 | 86.0k | // sext_inreg. |
5698 | 86.0k | if (86.0k N1C && 86.0k N0.getOpcode() == ISD::SHL78.1k && N1 == N0.getOperand(1)15.4k ) { |
5699 | 12.2k | unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); |
5700 | 12.2k | EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); |
5701 | 12.2k | if (VT.isVector()) |
5702 | 126 | ExtVT = EVT::getVectorVT(*DAG.getContext(), |
5703 | 126 | ExtVT, VT.getVectorNumElements()); |
5704 | 12.2k | if ((!LegalOperations || |
5705 | 1.73k | TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) |
5706 | 10.4k | return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, |
5707 | 10.4k | N0.getOperand(0), DAG.getValueType(ExtVT)); |
5708 | 75.5k | } |
5709 | 75.5k | |
5710 | 75.5k | // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) |
5711 | 75.5k | if (75.5k N0.getOpcode() == ISD::SRA75.5k ) { |
5712 | 1.19k | SDLoc DL(N); |
5713 | 1.19k | EVT ShiftVT = N1.getValueType(); |
5714 | 1.19k | |
5715 | 1.19k | auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS, |
5716 | 947 | ConstantSDNode *RHS) { |
5717 | 947 | APInt c1 = LHS->getAPIntValue(); |
5718 | 947 | APInt c2 = RHS->getAPIntValue(); |
5719 | 947 | zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); |
5720 | 947 | return (c1 + c2).uge(OpSizeInBits); |
5721 | 947 | }; |
5722 | 1.19k | if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) |
5723 | 925 | return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), |
5724 | 925 | DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT)); |
5725 | 268 | |
5726 | 268 | auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, |
5727 | 27 | ConstantSDNode *RHS) { |
5728 | 27 | APInt c1 = LHS->getAPIntValue(); |
5729 | 27 | APInt c2 = RHS->getAPIntValue(); |
5730 | 27 | zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); |
5731 | 27 | return (c1 + c2).ult(OpSizeInBits); |
5732 | 27 | }; |
5733 | 268 | if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)268 ) { |
5734 | 10 | SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); |
5735 | 10 | return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum); |
5736 | 10 | } |
5737 | 74.5k | } |
5738 | 74.5k | |
5739 | 74.5k | // fold (sra (shl X, m), (sub result_size, n)) |
5740 | 74.5k | // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for |
5741 | 74.5k | // result_size - n != m. |
5742 | 74.5k | // If truncate is free for the target sext(shl) is likely to result in better |
5743 | 74.5k | // code. |
5744 | 74.5k | if (74.5k N0.getOpcode() == ISD::SHL && 74.5k N1C5.21k ) { |
5745 | 4.97k | // Get the two constanst of the shifts, CN0 = m, CN = n. |
5746 | 4.97k | const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1)); |
5747 | 4.97k | if (N01C4.97k ) { |
5748 | 4.84k | LLVMContext &Ctx = *DAG.getContext(); |
5749 | 4.84k | // Determine what the truncate's result bitsize and type would be. |
5750 | 4.84k | EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue()); |
5751 | 4.84k | |
5752 | 4.84k | if (VT.isVector()) |
5753 | 43 | TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); |
5754 | 4.84k | |
5755 | 4.84k | // Determine the residual right-shift amount. |
5756 | 4.84k | int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); |
5757 | 4.84k | |
5758 | 4.84k | // If the shift is not a no-op (in which case this should be just a sign |
5759 | 4.84k | // extend already), the truncated to type is legal, sign_extend is legal |
5760 | 4.84k | // on that type, and the truncate to that type is both legal and free, |
5761 | 4.84k | // perform the transform. |
5762 | 4.84k | if ((ShiftAmt > 0) && |
5763 | 2.19k | TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && |
5764 | 154 | TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && |
5765 | 4.84k | TLI.isTruncateFree(VT, TruncVT)154 ) { |
5766 | 84 | SDLoc DL(N); |
5767 | 84 | SDValue Amt = DAG.getConstant(ShiftAmt, DL, |
5768 | 84 | getShiftAmountTy(N0.getOperand(0).getValueType())); |
5769 | 84 | SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, |
5770 | 84 | N0.getOperand(0), Amt); |
5771 | 84 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, |
5772 | 84 | Shift); |
5773 | 84 | return DAG.getNode(ISD::SIGN_EXTEND, DL, |
5774 | 84 | N->getValueType(0), Trunc); |
5775 | 84 | } |
5776 | 74.5k | } |
5777 | 4.97k | } |
5778 | 74.5k | |
5779 | 74.5k | // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). |
5780 | 74.5k | if (74.5k N1.getOpcode() == ISD::TRUNCATE && |
5781 | 74.5k | N1.getOperand(0).getOpcode() == ISD::AND348 ) { |
5782 | 15 | if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) |
5783 | 13 | return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); |
5784 | 74.4k | } |
5785 | 74.4k | |
5786 | 74.4k | // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2)) |
5787 | 74.4k | // if c1 is equal to the number of bits the trunc removes |
5788 | 74.4k | if (74.4k N0.getOpcode() == ISD::TRUNCATE && |
5789 | 6.25k | (N0.getOperand(0).getOpcode() == ISD::SRL || |
5790 | 6.25k | N0.getOperand(0).getOpcode() == ISD::SRA) && |
5791 | 3.26k | N0.getOperand(0).hasOneUse() && |
5792 | 2.84k | N0.getOperand(0).getOperand(1).hasOneUse() && |
5793 | 74.4k | N1C413 ) { |
5794 | 408 | SDValue N0Op0 = N0.getOperand(0); |
5795 | 408 | if (ConstantSDNode *LargeShift408 = isConstOrConstSplat(N0Op0.getOperand(1))) { |
5796 | 408 | unsigned LargeShiftVal = LargeShift->getZExtValue(); |
5797 | 408 | EVT LargeVT = N0Op0.getValueType(); |
5798 | 408 | |
5799 | 408 | if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal408 ) { |
5800 | 401 | SDLoc DL(N); |
5801 | 401 | SDValue Amt = |
5802 | 401 | DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL, |
5803 | 401 | getShiftAmountTy(N0Op0.getOperand(0).getValueType())); |
5804 | 401 | SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT, |
5805 | 401 | N0Op0.getOperand(0), Amt); |
5806 | 401 | return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA); |
5807 | 401 | } |
5808 | 74.0k | } |
5809 | 408 | } |
5810 | 74.0k | |
5811 | 74.0k | // Simplify, based on bits shifted out of the LHS. |
5812 | 74.0k | if (74.0k N1C && 74.0k SimplifyDemandedBits(SDValue(N, 0))66.2k ) |
5813 | 1.18k | return SDValue(N, 0); |
5814 | 72.9k | |
5815 | 72.9k | // If the sign bit is known to be zero, switch this to a SRL. |
5816 | 72.9k | if (72.9k DAG.SignBitIsZero(N0)72.9k ) |
5817 | 117 | return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); |
5818 | 72.7k | |
5819 | 72.7k | if (72.7k N1C && 72.7k !N1C->isOpaque()65.0k ) |
5820 | 65.0k | if (SDValue 65.0k NewSRA65.0k = visitShiftByConstant(N, N1C)) |
5821 | 2 | return NewSRA; |
5822 | 72.7k | |
5823 | 72.7k | return SDValue(); |
5824 | 72.7k | } |
5825 | | |
5826 | 303k | SDValue DAGCombiner::visitSRL(SDNode *N) { |
5827 | 303k | SDValue N0 = N->getOperand(0); |
5828 | 303k | SDValue N1 = N->getOperand(1); |
5829 | 303k | EVT VT = N0.getValueType(); |
5830 | 303k | unsigned OpSizeInBits = VT.getScalarSizeInBits(); |
5831 | 303k | |
5832 | 303k | // fold vector ops |
5833 | 303k | if (VT.isVector()) |
5834 | 5.84k | if (SDValue 5.84k FoldedVOp5.84k = SimplifyVBinOp(N)) |
5835 | 2 | return FoldedVOp; |
5836 | 303k | |
5837 | 303k | ConstantSDNode *N1C = isConstOrConstSplat(N1); |
5838 | 303k | |
5839 | 303k | // fold (srl c1, c2) -> c1 >>u c2 |
5840 | 303k | ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); |
5841 | 303k | if (N0C && 303k N1C4.18k && !N1C->isOpaque()2.51k ) |
5842 | 2.51k | return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C); |
5843 | 301k | // fold (srl 0, x) -> 0 |
5844 | 301k | if (301k isNullConstantOrNullSplatConstant(N0)301k ) |
5845 | 64 | return N0; |
5846 | 301k | // fold (srl x, c >= size(x)) -> undef |
5847 | 301k | // NOTE: ALL vector elements must be too big to avoid partial UNDEFs. |
5848 | 301k | auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) 301k { |
5849 | 278k | return Val->getAPIntValue().uge(OpSizeInBits); |
5850 | 278k | }; |
5851 | 301k | if (matchUnaryPredicate(N1, MatchShiftTooBig)) |
5852 | 20 | return DAG.getUNDEF(VT); |
5853 | 301k | // fold (srl x, 0) -> x |
5854 | 301k | if (301k N1C && 301k N1C->isNullValue()278k ) |
5855 | 1.73k | return N0; |
5856 | 299k | |
5857 | 299k | if (SDValue 299k NewSel299k = foldBinOpIntoSelect(N)) |
5858 | 3 | return NewSel; |
5859 | 299k | |
5860 | 299k | // if (srl x, c) is known to be zero, return 0 |
5861 | 299k | if (299k N1C && 299k DAG.MaskedValueIsZero(SDValue(N, 0), |
5862 | 276k | APInt::getAllOnesValue(OpSizeInBits))) |
5863 | 342 | return DAG.getConstant(0, SDLoc(N), VT); |
5864 | 299k | |
5865 | 299k | // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) |
5866 | 299k | if (299k N0.getOpcode() == ISD::SRL299k ) { |
5867 | 5.43k | auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS, |
5868 | 4.38k | ConstantSDNode *RHS) { |
5869 | 4.38k | APInt c1 = LHS->getAPIntValue(); |
5870 | 4.38k | APInt c2 = RHS->getAPIntValue(); |
5871 | 4.38k | zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); |
5872 | 4.38k | return (c1 + c2).uge(OpSizeInBits); |
5873 | 4.38k | }; |
5874 | 5.43k | if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) |
5875 | 4 | return DAG.getConstant(0, SDLoc(N), VT); |
5876 | 5.43k | |
5877 | 5.43k | auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, |
5878 | 4.39k | ConstantSDNode *RHS) { |
5879 | 4.39k | APInt c1 = LHS->getAPIntValue(); |
5880 | 4.39k | APInt c2 = RHS->getAPIntValue(); |
5881 | 4.39k | zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); |
5882 | 4.39k | return (c1 + c2).ult(OpSizeInBits); |
5883 | 4.39k | }; |
5884 | 5.43k | if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)5.43k ) { |
5885 | 4.37k | SDLoc DL(N); |
5886 | 4.37k | EVT ShiftVT = N1.getValueType(); |
5887 | 4.37k | SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); |
5888 | 4.37k | return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum); |
5889 | 4.37k | } |
5890 | 294k | } |
5891 | 294k | |
5892 | 294k | // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) |
5893 | 294k | if (294k N1C && 294k N0.getOpcode() == ISD::TRUNCATE271k && |
5894 | 294k | N0.getOperand(0).getOpcode() == ISD::SRL16.0k ) { |
5895 | 4.83k | if (auto N001C4.83k = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) { |
5896 | 4.83k | uint64_t c1 = N001C->getZExtValue(); |
5897 | 4.83k | uint64_t c2 = N1C->getZExtValue(); |
5898 | 4.83k | EVT InnerShiftVT = N0.getOperand(0).getValueType(); |
5899 | 4.83k | EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType(); |
5900 | 4.83k | uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); |
5901 | 4.83k | // This is only valid if the OpSizeInBits + c1 = size of inner shift. |
5902 | 4.83k | if (c1 + OpSizeInBits == InnerShiftSize4.83k ) { |
5903 | 4.23k | SDLoc DL(N0); |
5904 | 4.23k | if (c1 + c2 >= InnerShiftSize) |
5905 | 0 | return DAG.getConstant(0, DL, VT); |
5906 | 4.23k | return DAG.getNode(ISD::TRUNCATE, DL, VT, |
5907 | 4.23k | DAG.getNode(ISD::SRL, DL, InnerShiftVT, |
5908 | 4.23k | N0.getOperand(0).getOperand(0), |
5909 | 4.23k | DAG.getConstant(c1 + c2, DL, |
5910 | 4.23k | ShiftCountVT))); |
5911 | 4.23k | } |
5912 | 4.83k | } |
5913 | 4.83k | } |
5914 | 290k | |
5915 | 290k | // fold (srl (shl x, c), c) -> (and x, cst2) |
5916 | 290k | if (290k N0.getOpcode() == ISD::SHL && 290k N0.getOperand(1) == N11.01k && |
5917 | 290k | isConstantOrConstantVector(N1, /* NoOpaques */ true)216 ) { |
5918 | 186 | SDLoc DL(N); |
5919 | 186 | SDValue Mask = |
5920 | 186 | DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1); |
5921 | 186 | AddToWorklist(Mask.getNode()); |
5922 | 186 | return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask); |
5923 | 186 | } |
5924 | 290k | |
5925 | 290k | // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) |
5926 | 290k | if (290k N1C && 290k N0.getOpcode() == ISD::ANY_EXTEND267k ) { |
5927 | 1.23k | // Shifting in all undef bits? |
5928 | 1.23k | EVT SmallVT = N0.getOperand(0).getValueType(); |
5929 | 1.23k | unsigned BitSize = SmallVT.getScalarSizeInBits(); |
5930 | 1.23k | if (N1C->getZExtValue() >= BitSize) |
5931 | 0 | return DAG.getUNDEF(VT); |
5932 | 1.23k | |
5933 | 1.23k | if (1.23k !LegalTypes || 1.23k TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)857 ) { |
5934 | 457 | uint64_t ShiftAmt = N1C->getZExtValue(); |
5935 | 457 | SDLoc DL0(N0); |
5936 | 457 | SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT, |
5937 | 457 | N0.getOperand(0), |
5938 | 457 | DAG.getConstant(ShiftAmt, DL0, |
5939 | 457 | getShiftAmountTy(SmallVT))); |
5940 | 457 | AddToWorklist(SmallShift.getNode()); |
5941 | 457 | APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt); |
5942 | 457 | SDLoc DL(N); |
5943 | 457 | return DAG.getNode(ISD::AND, DL, VT, |
5944 | 457 | DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift), |
5945 | 457 | DAG.getConstant(Mask, DL, VT)); |
5946 | 457 | } |
5947 | 289k | } |
5948 | 289k | |
5949 | 289k | // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign |
5950 | 289k | // bit, which is unmodified by sra. |
5951 | 289k | if (289k N1C && 289k N1C->getZExtValue() + 1 == OpSizeInBits266k ) { |
5952 | 17.2k | if (N0.getOpcode() == ISD::SRA) |
5953 | 1.13k | return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); |
5954 | 288k | } |
5955 | 288k | |
5956 | 288k | // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). |
5957 | 288k | if (288k N1C && 288k N0.getOpcode() == ISD::CTLZ265k && |
5958 | 288k | N1C->getAPIntValue() == Log2_32(OpSizeInBits)49 ) { |
5959 | 49 | KnownBits Known; |
5960 | 49 | DAG.computeKnownBits(N0.getOperand(0), Known); |
5961 | 49 | |
5962 | 49 | // If any of the input bits are KnownOne, then the input couldn't be all |
5963 | 49 | // zeros, thus the result of the srl will always be zero. |
5964 | 49 | if (Known.One.getBoolValue()49 ) return DAG.getConstant(0, SDLoc(N0), VT)0 ; |
5965 | 49 | |
5966 | 49 | // If all of the bits input the to ctlz node are known to be zero, then |
5967 | 49 | // the result of the ctlz is "32" and the result of the shift is one. |
5968 | 49 | APInt UnknownBits = ~Known.Zero; |
5969 | 49 | if (UnknownBits == 049 ) return DAG.getConstant(1, SDLoc(N0), VT)0 ; |
5970 | 49 | |
5971 | 49 | // Otherwise, check to see if there is exactly one bit input to the ctlz. |
5972 | 49 | if (49 UnknownBits.isPowerOf2()49 ) { |
5973 | 2 | // Okay, we know that only that the single bit specified by UnknownBits |
5974 | 2 | // could be set on input to the CTLZ node. If this bit is set, the SRL |
5975 | 2 | // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair |
5976 | 2 | // to an SRL/XOR pair, which is likely to simplify more. |
5977 | 2 | unsigned ShAmt = UnknownBits.countTrailingZeros(); |
5978 | 2 | SDValue Op = N0.getOperand(0); |
5979 | 2 | |
5980 | 2 | if (ShAmt2 ) { |
5981 | 2 | SDLoc DL(N0); |
5982 | 2 | Op = DAG.getNode(ISD::SRL, DL, VT, Op, |
5983 | 2 | DAG.getConstant(ShAmt, DL, |
5984 | 2 | getShiftAmountTy(Op.getValueType()))); |
5985 | 2 | AddToWorklist(Op.getNode()); |
5986 | 2 | } |
5987 | 2 | |
5988 | 2 | SDLoc DL(N); |
5989 | 2 | return DAG.getNode(ISD::XOR, DL, VT, |
5990 | 2 | Op, DAG.getConstant(1, DL, VT)); |
5991 | 2 | } |
5992 | 288k | } |
5993 | 288k | |
5994 | 288k | // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). |
5995 | 288k | if (288k N1.getOpcode() == ISD::TRUNCATE && |
5996 | 288k | N1.getOperand(0).getOpcode() == ISD::AND2.22k ) { |
5997 | 69 | if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) |
5998 | 56 | return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1); |
5999 | 288k | } |
6000 | 288k | |
6001 | 288k | // fold operands of srl based on knowledge that the low bits are not |
6002 | 288k | // demanded. |
6003 | 288k | if (288k N1C && 288k SimplifyDemandedBits(SDValue(N, 0))265k ) |
6004 | 8.82k | return SDValue(N, 0); |
6005 | 279k | |
6006 | 279k | if (279k N1C && 279k !N1C->isOpaque()256k ) |
6007 | 256k | if (SDValue 256k NewSRL256k = visitShiftByConstant(N, N1C)) |
6008 | 634 | return NewSRL; |
6009 | 279k | |
6010 | 279k | // Attempt to convert a srl of a load into a narrower zero-extending load. |
6011 | 279k | if (SDValue 279k NarrowLoad279k = ReduceLoadWidth(N)) |
6012 | 143 | return NarrowLoad; |
6013 | 278k | |
6014 | 278k | // Here is a common situation. We want to optimize: |
6015 | 278k | // |
6016 | 278k | // %a = ... |
6017 | 278k | // %b = and i32 %a, 2 |
6018 | 278k | // %c = srl i32 %b, 1 |
6019 | 278k | // brcond i32 %c ... |
6020 | 278k | // |
6021 | 278k | // into |
6022 | 278k | // |
6023 | 278k | // %a = ... |
6024 | 278k | // %b = and %a, 2 |
6025 | 278k | // %c = setcc eq %b, 0 |
6026 | 278k | // brcond %c ... |
6027 | 278k | // |
6028 | 278k | // However when after the source operand of SRL is optimized into AND, the SRL |
6029 | 278k | // itself may not be optimized further. Look for it and add the BRCOND into |
6030 | 278k | // the worklist. |
6031 | 278k | if (278k N->hasOneUse()278k ) { |
6032 | 248k | SDNode *Use = *N->use_begin(); |
6033 | 248k | if (Use->getOpcode() == ISD::BRCOND) |
6034 | 336 | AddToWorklist(Use); |
6035 | 248k | else if (248k Use->getOpcode() == ISD::TRUNCATE && 248k Use->hasOneUse()94.0k ) { |
6036 | 80.9k | // Also look pass the truncate. |
6037 | 80.9k | Use = *Use->use_begin(); |
6038 | 80.9k | if (Use->getOpcode() == ISD::BRCOND) |
6039 | 57 | AddToWorklist(Use); |
6040 | 248k | } |
6041 | 248k | } |
6042 | 303k | |
6043 | 303k | return SDValue(); |
6044 | 303k | } |
6045 | | |
6046 | 1.11k | SDValue DAGCombiner::visitABS(SDNode *N) { |
6047 | 1.11k | SDValue N0 = N->getOperand(0); |
6048 | 1.11k | EVT VT = N->getValueType(0); |
6049 | 1.11k | |
6050 | 1.11k | // fold (abs c1) -> c2 |
6051 | 1.11k | if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) |
6052 | 0 | return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0); |
6053 | 1.11k | // fold (abs (abs x)) -> (abs x) |
6054 | 1.11k | if (1.11k N0.getOpcode() == ISD::ABS1.11k ) |
6055 | 8 | return N0; |
6056 | 1.11k | // fold (abs x) -> x iff not-negative |
6057 | 1.11k | if (1.11k DAG.SignBitIsZero(N0)1.11k ) |
6058 | 8 | return N0; |
6059 | 1.10k | return SDValue(); |
6060 | 1.10k | } |
6061 | | |
6062 | 2.66k | SDValue DAGCombiner::visitBSWAP(SDNode *N) { |
6063 | 2.66k | SDValue N0 = N->getOperand(0); |
6064 | 2.66k | EVT VT = N->getValueType(0); |
6065 | 2.66k | |
6066 | 2.66k | // fold (bswap c1) -> c2 |
6067 | 2.66k | if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) |
6068 | 0 | return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0); |
6069 | 2.66k | // fold (bswap (bswap x)) -> x |
6070 | 2.66k | if (2.66k N0.getOpcode() == ISD::BSWAP2.66k ) |
6071 | 28 | return N0->getOperand(0); |
6072 | 2.64k | return SDValue(); |
6073 | 2.64k | } |
6074 | | |
6075 | 634 | SDValue DAGCombiner::visitBITREVERSE(SDNode *N) { |
6076 | 634 | SDValue N0 = N->getOperand(0); |
6077 | 634 | EVT VT = N->getValueType(0); |
6078 | 634 | |
6079 | 634 | // fold (bitreverse c1) -> c2 |
6080 | 634 | if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) |
6081 | 0 | return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0); |
6082 | 634 | // fold (bitreverse (bitreverse x)) -> x |
6083 | 634 | if (634 N0.getOpcode() == ISD::BITREVERSE634 ) |
6084 | 4 | return N0.getOperand(0); |
6085 | 630 | return SDValue(); |
6086 | 630 | } |
6087 | | |
6088 | 5.57k | SDValue DAGCombiner::visitCTLZ(SDNode *N) { |
6089 | 5.57k | SDValue N0 = N->getOperand(0); |
6090 | 5.57k | EVT VT = N->getValueType(0); |
6091 | 5.57k | |
6092 | 5.57k | // fold (ctlz c1) -> c2 |
6093 | 5.57k | if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) |
6094 | 0 | return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); |
6095 | 5.57k | return SDValue(); |
6096 | 5.57k | } |
6097 | | |
6098 | 4.14k | SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { |
6099 | 4.14k | SDValue N0 = N->getOperand(0); |
6100 | 4.14k | EVT VT = N->getValueType(0); |
6101 | 4.14k | |
6102 | 4.14k | // fold (ctlz_zero_undef c1) -> c2 |
6103 | 4.14k | if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) |
6104 | 0 | return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); |
6105 | 4.14k | return SDValue(); |
6106 | 4.14k | } |
6107 | | |
6108 | 728 | SDValue DAGCombiner::visitCTTZ(SDNode *N) { |
6109 | 728 | SDValue N0 = N->getOperand(0); |
6110 | 728 | EVT VT = N->getValueType(0); |
6111 | 728 | |
6112 | 728 | // fold (cttz c1) -> c2 |
6113 | 728 | if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) |
6114 | 0 | return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); |
6115 | 728 | return SDValue(); |
6116 | 728 | } |
6117 | | |
6118 | 1.00k | SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { |
6119 | 1.00k | SDValue N0 = N->getOperand(0); |
6120 | 1.00k | EVT VT = N->getValueType(0); |
6121 | 1.00k | |
6122 | 1.00k | // fold (cttz_zero_undef c1) -> c2 |
6123 | 1.00k | if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) |
6124 | 0 | return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); |
6125 | 1.00k | return SDValue(); |
6126 | 1.00k | } |
6127 | | |
6128 | 1.18k | SDValue DAGCombiner::visitCTPOP(SDNode *N) { |
6129 | 1.18k | SDValue N0 = N->getOperand(0); |
6130 | 1.18k | EVT VT = N->getValueType(0); |
6131 | 1.18k | |
6132 | 1.18k | // fold (ctpop c1) -> c2 |
6133 | 1.18k | if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) |
6134 | 0 | return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); |
6135 | 1.18k | return SDValue(); |
6136 | 1.18k | } |
6137 | | |
6138 | | /// \brief Generate Min/Max node |
6139 | | static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, |
6140 | | SDValue RHS, SDValue True, SDValue False, |
6141 | | ISD::CondCode CC, const TargetLowering &TLI, |
6142 | 99 | SelectionDAG &DAG) { |
6143 | 99 | if (!(LHS == True && 99 RHS == False50 ) && !(LHS == False && 50 RHS == True34 )) |
6144 | 16 | return SDValue(); |
6145 | 83 | |
6146 | 83 | switch (CC) { |
6147 | 43 | case ISD::SETOLT: |
6148 | 43 | case ISD::SETOLE: |
6149 | 43 | case ISD::SETLT: |
6150 | 43 | case ISD::SETLE: |
6151 | 43 | case ISD::SETULT: |
6152 | 43 | case ISD::SETULE: { |
6153 | 43 | unsigned Opcode = (LHS == True) ? ISD::FMINNUM26 : ISD::FMAXNUM17 ; |
6154 | 43 | if (TLI.isOperationLegal(Opcode, VT)) |
6155 | 15 | return DAG.getNode(Opcode, DL, VT, LHS, RHS); |
6156 | 28 | return SDValue(); |
6157 | 28 | } |
6158 | 40 | case ISD::SETOGT: |
6159 | 40 | case ISD::SETOGE: |
6160 | 40 | case ISD::SETGT: |
6161 | 40 | case ISD::SETGE: |
6162 | 40 | case ISD::SETUGT: |
6163 | 40 | case ISD::SETUGE: { |
6164 | 40 | unsigned Opcode = (LHS == True) ? ISD::FMAXNUM23 : ISD::FMINNUM17 ; |
6165 | 40 | if (TLI.isOperationLegal(Opcode, VT)) |
6166 | 12 | return DAG.getNode(Opcode, DL, VT, LHS, RHS); |
6167 | 28 | return SDValue(); |
6168 | 28 | } |
6169 | 0 | default: |
6170 | 0 | return SDValue(); |
6171 | 0 | } |
6172 | 0 | } |
6173 | | |
6174 | 177k | SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { |
6175 | 177k | SDValue Cond = N->getOperand(0); |
6176 | 177k | SDValue N1 = N->getOperand(1); |
6177 | 177k | SDValue N2 = N->getOperand(2); |
6178 | 177k | EVT VT = N->getValueType(0); |
6179 | 177k | EVT CondVT = Cond.getValueType(); |
6180 | 177k | SDLoc DL(N); |
6181 | 177k | |
6182 | 177k | if (!VT.isInteger()) |
6183 | 8.39k | return SDValue(); |
6184 | 169k | |
6185 | 169k | auto *C1 = dyn_cast<ConstantSDNode>(N1); |
6186 | 169k | auto *C2 = dyn_cast<ConstantSDNode>(N2); |
6187 | 169k | if (!C1 || 169k !C248.8k ) |
6188 | 145k | return SDValue(); |
6189 | 24.1k | |
6190 | 24.1k | // Only do this before legalization to avoid conflicting with target-specific |
6191 | 24.1k | // transforms in the other direction (create a select from a zext/sext). There |
6192 | 24.1k | // is also a target-independent combine here in DAGCombiner in the other |
6193 | 24.1k | // direction for (select Cond, -1, 0) when the condition is not i1. |
6194 | 24.1k | if (24.1k CondVT == MVT::i1 && 24.1k !LegalOperations18.6k ) { |
6195 | 16.9k | if (C1->isNullValue() && 16.9k C2->isOne()7.34k ) { |
6196 | 31 | // select Cond, 0, 1 --> zext (!Cond) |
6197 | 31 | SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); |
6198 | 31 | if (VT != MVT::i1) |
6199 | 30 | NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond); |
6200 | 31 | return NotCond; |
6201 | 31 | } |
6202 | 16.9k | if (16.9k C1->isNullValue() && 16.9k C2->isAllOnesValue()7.31k ) { |
6203 | 25 | // select Cond, 0, -1 --> sext (!Cond) |
6204 | 25 | SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); |
6205 | 25 | if (VT != MVT::i1) |
6206 | 25 | NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond); |
6207 | 25 | return NotCond; |
6208 | 25 | } |
6209 | 16.9k | if (16.9k C1->isOne() && 16.9k C2->isNullValue()439 ) { |
6210 | 35 | // select Cond, 1, 0 --> zext (Cond) |
6211 | 35 | if (VT != MVT::i1) |
6212 | 35 | Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); |
6213 | 35 | return Cond; |
6214 | 35 | } |
6215 | 16.8k | if (16.8k C1->isAllOnesValue() && 16.8k C2->isNullValue()228 ) { |
6216 | 56 | // select Cond, -1, 0 --> sext (Cond) |
6217 | 56 | if (VT != MVT::i1) |
6218 | 56 | Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); |
6219 | 56 | return Cond; |
6220 | 56 | } |
6221 | 16.8k | |
6222 | 16.8k | // For any constants that differ by 1, we can transform the select into an |
6223 | 16.8k | // extend and add. Use a target hook because some targets may prefer to |
6224 | 16.8k | // transform in the other direction. |
6225 | 16.8k | if (16.8k TLI.convertSelectOfConstantsToMath(VT)16.8k ) { |
6226 | 540 | if (C1->getAPIntValue() - 1 == C2->getAPIntValue()540 ) { |
6227 | 54 | // select Cond, C1, C1-1 --> add (zext Cond), C1-1 |
6228 | 54 | if (VT != MVT::i1) |
6229 | 54 | Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); |
6230 | 54 | return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); |
6231 | 54 | } |
6232 | 486 | if (486 C1->getAPIntValue() + 1 == C2->getAPIntValue()486 ) { |
6233 | 78 | // select Cond, C1, C1+1 --> add (sext Cond), C1+1 |
6234 | 78 | if (VT != MVT::i1) |
6235 | 78 | Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); |
6236 | 78 | return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); |
6237 | 78 | } |
6238 | 16.6k | } |
6239 | 16.6k | |
6240 | 16.6k | return SDValue(); |
6241 | 16.6k | } |
6242 | 7.18k | |
6243 | 7.18k | // fold (select Cond, 0, 1) -> (xor Cond, 1) |
6244 | 7.18k | // We can't do this reliably if integer based booleans have different contents |
6245 | 7.18k | // to floating point based booleans. This is because we can't tell whether we |
6246 | 7.18k | // have an integer-based boolean or a floating-point-based boolean unless we |
6247 | 7.18k | // can find the SETCC that produced it and inspect its operands. This is |
6248 | 7.18k | // fairly easy if C is the SETCC node, but it can potentially be |
6249 | 7.18k | // undiscoverable (or not reasonably discoverable). For example, it could be |
6250 | 7.18k | // in another basic block or it could require searching a complicated |
6251 | 7.18k | // expression. |
6252 | 7.18k | if (7.18k CondVT.isInteger() && |
6253 | 7.18k | TLI.getBooleanContents(false, true) == |
6254 | 7.18k | TargetLowering::ZeroOrOneBooleanContent && |
6255 | 5.34k | TLI.getBooleanContents(false, false) == |
6256 | 5.34k | TargetLowering::ZeroOrOneBooleanContent && |
6257 | 7.18k | C1->isNullValue()5.34k && C2->isOne()399 ) { |
6258 | 0 | SDValue NotCond = |
6259 | 0 | DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT)); |
6260 | 0 | if (VT.bitsEq(CondVT)) |
6261 | 0 | return NotCond; |
6262 | 0 | return DAG.getZExtOrTrunc(NotCond, DL, VT); |
6263 | 0 | } |
6264 | 7.18k | |
6265 | 7.18k | return SDValue(); |
6266 | 7.18k | } |
6267 | | |
6268 | 177k | SDValue DAGCombiner::visitSELECT(SDNode *N) { |
6269 | 177k | SDValue N0 = N->getOperand(0); |
6270 | 177k | SDValue N1 = N->getOperand(1); |
6271 | 177k | SDValue N2 = N->getOperand(2); |
6272 | 177k | EVT VT = N->getValueType(0); |
6273 | 177k | EVT VT0 = N0.getValueType(); |
6274 | 177k | SDLoc DL(N); |
6275 | 177k | |
6276 | 177k | // fold (select C, X, X) -> X |
6277 | 177k | if (N1 == N2) |
6278 | 59 | return N1; |
6279 | 177k | |
6280 | 177k | if (const ConstantSDNode *177k N0C177k = dyn_cast<const ConstantSDNode>(N0)) { |
6281 | 58 | // fold (select true, X, Y) -> X |
6282 | 58 | // fold (select false, X, Y) -> Y |
6283 | 58 | return !N0C->isNullValue() ? N17 : N251 ; |
6284 | 58 | } |
6285 | 177k | |
6286 | 177k | // fold (select X, X, Y) -> (or X, Y) |
6287 | 177k | // fold (select X, 1, Y) -> (or C, Y) |
6288 | 177k | if (177k VT == VT0 && 177k VT == MVT::i126.9k && (N0 == N1 || 83 isOneConstant(N1)77 )) |
6289 | 9 | return DAG.getNode(ISD::OR, DL, VT, N0, N2); |
6290 | 177k | |
6291 | 177k | if (SDValue 177k V177k = foldSelectOfConstants(N)) |
6292 | 279 | return V; |
6293 | 177k | |
6294 | 177k | // fold (select C, 0, X) -> (and (not C), X) |
6295 | 177k | if (177k VT == VT0 && 177k VT == MVT::i126.9k && isNullConstant(N1)73 ) { |
6296 | 6 | SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); |
6297 | 6 | AddToWorklist(NOTNode.getNode()); |
6298 | 6 | return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2); |
6299 | 6 | } |
6300 | 177k | // fold (select C, X, 1) -> (or (not C), X) |
6301 | 177k | if (177k VT == VT0 && 177k VT == MVT::i126.9k && isOneConstant(N2)67 ) { |
6302 | 2 | SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); |
6303 | 2 | AddToWorklist(NOTNode.getNode()); |
6304 | 2 | return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1); |
6305 | 2 | } |
6306 | 177k | // fold (select X, Y, X) -> (and X, Y) |
6307 | 177k | // fold (select X, Y, 0) -> (and X, Y) |
6308 | 177k | if (177k VT == VT0 && 177k VT == MVT::i126.9k && (N0 == N2 || 65 isNullConstant(N2)63 )) |
6309 | 3 | return DAG.getNode(ISD::AND, DL, VT, N0, N1); |
6310 | 177k | |
6311 | 177k | // If we can fold this based on the true/false value, do so. |
6312 | 177k | if (177k SimplifySelectOps(N, N1, N2)177k ) |
6313 | 168 | return SDValue(N, 0); // Don't revisit N. |
6314 | 177k | |
6315 | 177k | if (177k VT0 == MVT::i1177k ) { |
6316 | 143k | // The code in this block deals with the following 2 equivalences: |
6317 | 143k | // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y)) |
6318 | 143k | // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y) |
6319 | 143k | // The target can specify its preferred form with the |
6320 | 143k | // shouldNormalizeToSelectSequence() callback. However we always transform |
6321 | 143k | // to the right anyway if we find the inner select exists in the DAG anyway |
6322 | 143k | // and we always transform to the left side if we know that we can further |
6323 | 143k | // optimize the combination of the conditions. |
6324 | 143k | bool normalizeToSequence = |
6325 | 143k | TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT); |
6326 | 143k | // select (and Cond0, Cond1), X, Y |
6327 | 143k | // -> select Cond0, (select Cond1, X, Y), Y |
6328 | 143k | if (N0->getOpcode() == ISD::AND && 143k N0->hasOneUse()9.60k ) { |
6329 | 3.50k | SDValue Cond0 = N0->getOperand(0); |
6330 | 3.50k | SDValue Cond1 = N0->getOperand(1); |
6331 | 3.50k | SDValue InnerSelect = |
6332 | 3.50k | DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2); |
6333 | 3.50k | if (normalizeToSequence || 3.50k !InnerSelect.use_empty()3.32k ) |
6334 | 181 | return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, |
6335 | 181 | InnerSelect, N2); |
6336 | 143k | } |
6337 | 143k | // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) |
6338 | 143k | if (143k N0->getOpcode() == ISD::OR && 143k N0->hasOneUse()564 ) { |
6339 | 443 | SDValue Cond0 = N0->getOperand(0); |
6340 | 443 | SDValue Cond1 = N0->getOperand(1); |
6341 | 443 | SDValue InnerSelect = |
6342 | 443 | DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2); |
6343 | 443 | if (normalizeToSequence || 443 !InnerSelect.use_empty()424 ) |
6344 | 20 | return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1, |
6345 | 20 | InnerSelect); |
6346 | 143k | } |
6347 | 143k | |
6348 | 143k | // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y |
6349 | 143k | if (143k N1->getOpcode() == ISD::SELECT && 143k N1->hasOneUse()4.06k ) { |
6350 | 1.16k | SDValue N1_0 = N1->getOperand(0); |
6351 | 1.16k | SDValue N1_1 = N1->getOperand(1); |
6352 | 1.16k | SDValue N1_2 = N1->getOperand(2); |
6353 | 1.16k | if (N1_2 == N2 && 1.16k N0.getValueType() == N1_0.getValueType()321 ) { |
6354 | 321 | // Create the actual and node if we can generate good code for it. |
6355 | 321 | if (!normalizeToSequence321 ) { |
6356 | 0 | SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0); |
6357 | 0 | return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2); |
6358 | 0 | } |
6359 | 321 | // Otherwise see if we can optimize the "and" to a better pattern. |
6360 | 321 | if (SDValue 321 Combined321 = visitANDLike(N0, N1_0, N)) |
6361 | 2 | return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1, |
6362 | 2 | N2); |
6363 | 143k | } |
6364 | 1.16k | } |
6365 | 143k | // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y |
6366 | 143k | if (143k N2->getOpcode() == ISD::SELECT && 143k N2->hasOneUse()7.77k ) { |
6367 | 1.69k | SDValue N2_0 = N2->getOperand(0); |
6368 | 1.69k | SDValue N2_1 = N2->getOperand(1); |
6369 | 1.69k | SDValue N2_2 = N2->getOperand(2); |
6370 | 1.69k | if (N2_1 == N1 && 1.69k N0.getValueType() == N2_0.getValueType()43 ) { |
6371 | 43 | // Create the actual or node if we can generate good code for it. |
6372 | 43 | if (!normalizeToSequence43 ) { |
6373 | 17 | SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0); |
6374 | 17 | return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2); |
6375 | 17 | } |
6376 | 26 | // Otherwise see if we can optimize to a better pattern. |
6377 | 26 | if (SDValue 26 Combined26 = visitORLike(N0, N2_0, N)) |
6378 | 6 | return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1, |
6379 | 6 | N2_2); |
6380 | 176k | } |
6381 | 1.69k | } |
6382 | 143k | } |
6383 | 176k | |
6384 | 176k | // select (xor Cond, 1), X, Y -> select Cond, Y, X |
6385 | 176k | if (176k VT0 == MVT::i1176k ) { |
6386 | 143k | if (N0->getOpcode() == ISD::XOR143k ) { |
6387 | 61 | if (auto *C61 = dyn_cast<ConstantSDNode>(N0->getOperand(1))) { |
6388 | 8 | SDValue Cond0 = N0->getOperand(0); |
6389 | 8 | if (C->isOne()) |
6390 | 8 | return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N2, N1); |
6391 | 176k | } |
6392 | 61 | } |
6393 | 143k | } |
6394 | 176k | |
6395 | 176k | // fold selects based on a setcc into other things, such as min/max/abs |
6396 | 176k | if (176k N0.getOpcode() == ISD::SETCC176k ) { |
6397 | 151k | // select x, y (fcmp lt x, y) -> fminnum x, y |
6398 | 151k | // select x, y (fcmp gt x, y) -> fmaxnum x, y |
6399 | 151k | // |
6400 | 151k | // This is OK if we don't care about what happens if either operand is a |
6401 | 151k | // NaN. |
6402 | 151k | // |
6403 | 151k | |
6404 | 151k | // FIXME: Instead of testing for UnsafeFPMath, this should be checking for |
6405 | 151k | // no signed zeros as well as no nans. |
6406 | 151k | const TargetOptions &Options = DAG.getTarget().Options; |
6407 | 151k | if (Options.UnsafeFPMath && 151k VT.isFloatingPoint()411 && N0.hasOneUse()186 && |
6408 | 151k | DAG.isKnownNeverNaN(N1)182 && DAG.isKnownNeverNaN(N2)126 ) { |
6409 | 99 | ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); |
6410 | 99 | |
6411 | 99 | if (SDValue FMinMax = combineMinNumMaxNum( |
6412 | 99 | DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG)) |
6413 | 27 | return FMinMax; |
6414 | 151k | } |
6415 | 151k | |
6416 | 151k | if (151k (!LegalOperations && |
6417 | 137k | TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || |
6418 | 38.3k | TLI.isOperationLegal(ISD::SELECT_CC, VT)) |
6419 | 112k | return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0), |
6420 | 112k | N0.getOperand(1), N1, N2, N0.getOperand(2)); |
6421 | 38.3k | return SimplifySelect(DL, N0, N1, N2); |
6422 | 38.3k | } |
6423 | 25.5k | |
6424 | 25.5k | return SDValue(); |
6425 | 25.5k | } |
6426 | | |
6427 | | static |
6428 | 8 | std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { |
6429 | 8 | SDLoc DL(N); |
6430 | 8 | EVT LoVT, HiVT; |
6431 | 8 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); |
6432 | 8 | |
6433 | 8 | // Split the inputs. |
6434 | 8 | SDValue Lo, Hi, LL, LH, RL, RH; |
6435 | 8 | std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); |
6436 | 8 | std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); |
6437 | 8 | |
6438 | 8 | Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); |
6439 | 8 | Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); |
6440 | 8 | |
6441 | 8 | return std::make_pair(Lo, Hi); |
6442 | 8 | } |
6443 | | |
6444 | | // This function assumes all the vselect's arguments are CONCAT_VECTOR |
6445 | | // nodes and that the condition is a BV of ConstantSDNodes (or undefs). |
6446 | 42 | static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { |
6447 | 42 | SDLoc DL(N); |
6448 | 42 | SDValue Cond = N->getOperand(0); |
6449 | 42 | SDValue LHS = N->getOperand(1); |
6450 | 42 | SDValue RHS = N->getOperand(2); |
6451 | 42 | EVT VT = N->getValueType(0); |
6452 | 42 | int NumElems = VT.getVectorNumElements(); |
6453 | 42 | assert(LHS.getOpcode() == ISD::CONCAT_VECTORS && |
6454 | 42 | RHS.getOpcode() == ISD::CONCAT_VECTORS && |
6455 | 42 | Cond.getOpcode() == ISD::BUILD_VECTOR); |
6456 | 42 | |
6457 | 42 | // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about |
6458 | 42 | // binary ones here. |
6459 | 42 | if (LHS->getNumOperands() != 2 || 42 RHS->getNumOperands() != 232 ) |
6460 | 10 | return SDValue(); |
6461 | 32 | |
6462 | 32 | // We're sure we have an even number of elements due to the |
6463 | 32 | // concat_vectors we have as arguments to vselect. |
6464 | 32 | // Skip BV elements until we find one that's not an UNDEF |
6465 | 32 | // After we find an UNDEF element, keep looping until we get to half the |
6466 | 32 | // length of the BV and see if all the non-undef nodes are the same. |
6467 | 32 | ConstantSDNode *BottomHalf = nullptr; |
6468 | 98 | for (int i = 0; i < NumElems / 298 ; ++i66 ) { |
6469 | 85 | if (Cond->getOperand(i)->isUndef()) |
6470 | 0 | continue; |
6471 | 85 | |
6472 | 85 | if (85 BottomHalf == nullptr85 ) |
6473 | 32 | BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i)); |
6474 | 53 | else if (53 Cond->getOperand(i).getNode() != BottomHalf53 ) |
6475 | 19 | return SDValue(); |
6476 | 85 | } |
6477 | 32 | |
6478 | 32 | // Do the same for the second half of the BuildVector |
6479 | 13 | ConstantSDNode *TopHalf = nullptr; |
6480 | 48 | for (int i = NumElems / 2; i < NumElems48 ; ++i35 ) { |
6481 | 38 | if (Cond->getOperand(i)->isUndef()) |
6482 | 0 | continue; |
6483 | 38 | |
6484 | 38 | if (38 TopHalf == nullptr38 ) |
6485 | 13 | TopHalf = cast<ConstantSDNode>(Cond.getOperand(i)); |
6486 | 25 | else if (25 Cond->getOperand(i).getNode() != TopHalf25 ) |
6487 | 3 | return SDValue(); |
6488 | 38 | } |
6489 | 13 | |
6490 | 10 | assert(TopHalf && BottomHalf && |
6491 | 10 | "One half of the selector was all UNDEFs and the other was all the " |
6492 | 10 | "same value. This should have been addressed before this function."); |
6493 | 10 | return DAG.getNode( |
6494 | 10 | ISD::CONCAT_VECTORS, DL, VT, |
6495 | 10 | BottomHalf->isNullValue() ? RHS->getOperand(0)8 : LHS->getOperand(0)2 , |
6496 | 10 | TopHalf->isNullValue() ? RHS->getOperand(1)2 : LHS->getOperand(1)8 ); |
6497 | 42 | } |
6498 | | |
6499 | 282 | SDValue DAGCombiner::visitMSCATTER(SDNode *N) { |
6500 | 282 | if (Level >= AfterLegalizeTypes) |
6501 | 217 | return SDValue(); |
6502 | 65 | |
6503 | 65 | MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N); |
6504 | 65 | SDValue Mask = MSC->getMask(); |
6505 | 65 | SDValue Data = MSC->getValue(); |
6506 | 65 | SDLoc DL(N); |
6507 | 65 | |
6508 | 65 | // If the MSCATTER data type requires splitting and the mask is provided by a |
6509 | 65 | // SETCC, then split both nodes and its operands before legalization. This |
6510 | 65 | // prevents the type legalizer from unrolling SETCC into scalar comparisons |
6511 | 65 | // and enables future optimizations (e.g. min/max pattern matching on X86). |
6512 | 65 | if (Mask.getOpcode() != ISD::SETCC) |
6513 | 65 | return SDValue(); |
6514 | 0 |
|
6515 | 0 | // Check if any splitting is required. |
6516 | 0 | if (0 TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != |
6517 | 0 | TargetLowering::TypeSplitVector) |
6518 | 0 | return SDValue(); |
6519 | 0 | SDValue MaskLo, MaskHi, Lo, Hi; |
6520 | 0 | std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); |
6521 | 0 |
|
6522 | 0 | EVT LoVT, HiVT; |
6523 | 0 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0)); |
6524 | 0 |
|
6525 | 0 | SDValue Chain = MSC->getChain(); |
6526 | 0 |
|
6527 | 0 | EVT MemoryVT = MSC->getMemoryVT(); |
6528 | 0 | unsigned Alignment = MSC->getOriginalAlignment(); |
6529 | 0 |
|
6530 | 0 | EVT LoMemVT, HiMemVT; |
6531 | 0 | std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); |
6532 | 0 |
|
6533 | 0 | SDValue DataLo, DataHi; |
6534 | 0 | std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); |
6535 | 0 |
|
6536 | 0 | SDValue BasePtr = MSC->getBasePtr(); |
6537 | 0 | SDValue IndexLo, IndexHi; |
6538 | 0 | std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL); |
6539 | 0 |
|
6540 | 0 | MachineMemOperand *MMO = DAG.getMachineFunction(). |
6541 | 0 | getMachineMemOperand(MSC->getPointerInfo(), |
6542 | 0 | MachineMemOperand::MOStore, LoMemVT.getStoreSize(), |
6543 | 0 | Alignment, MSC->getAAInfo(), MSC->getRanges()); |
6544 | 0 |
|
6545 | 0 | SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo }; |
6546 | 0 | Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), |
6547 | 0 | DL, OpsLo, MMO); |
6548 | 0 |
|
6549 | 0 | SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi}; |
6550 | 0 | Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), |
6551 | 0 | DL, OpsHi, MMO); |
6552 | 0 |
|
6553 | 0 | AddToWorklist(Lo.getNode()); |
6554 | 0 | AddToWorklist(Hi.getNode()); |
6555 | 0 |
|
6556 | 0 | return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); |
6557 | 0 | } |
6558 | | |
6559 | 369 | SDValue DAGCombiner::visitMSTORE(SDNode *N) { |
6560 | 369 | if (Level >= AfterLegalizeTypes) |
6561 | 239 | return SDValue(); |
6562 | 130 | |
6563 | 130 | MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N); |
6564 | 130 | SDValue Mask = MST->getMask(); |
6565 | 130 | SDValue Data = MST->getValue(); |
6566 | 130 | EVT VT = Data.getValueType(); |
6567 | 130 | SDLoc DL(N); |
6568 | 130 | |
6569 | 130 | // If the MSTORE data type requires splitting and the mask is provided by a |
6570 | 130 | // SETCC, then split both nodes and its operands before legalization. This |
6571 | 130 | // prevents the type legalizer from unrolling SETCC into scalar comparisons |
6572 | 130 | // and enables future optimizations (e.g. min/max pattern matching on X86). |
6573 | 130 | if (Mask.getOpcode() == ISD::SETCC130 ) { |
6574 | 36 | // Check if any splitting is required. |
6575 | 36 | if (TLI.getTypeAction(*DAG.getContext(), VT) != |
6576 | 36 | TargetLowering::TypeSplitVector) |
6577 | 34 | return SDValue(); |
6578 | 2 | |
6579 | 2 | SDValue MaskLo, MaskHi, Lo, Hi; |
6580 | 2 | std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); |
6581 | 2 | |
6582 | 2 | SDValue Chain = MST->getChain(); |
6583 | 2 | SDValue Ptr = MST->getBasePtr(); |
6584 | 2 | |
6585 | 2 | EVT MemoryVT = MST->getMemoryVT(); |
6586 | 2 | unsigned Alignment = MST->getOriginalAlignment(); |
6587 | 2 | |
6588 | 2 | // if Alignment is equal to the vector size, |
6589 | 2 | // take the half of it for the second part |
6590 | 2 | unsigned SecondHalfAlignment = |
6591 | 2 | (Alignment == VT.getSizeInBits() / 8) ? Alignment / 22 : Alignment0 ; |
6592 | 36 | |
6593 | 36 | EVT LoMemVT, HiMemVT; |
6594 | 36 | std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); |
6595 | 36 | |
6596 | 36 | SDValue DataLo, DataHi; |
6597 | 36 | std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); |
6598 | 36 | |
6599 | 36 | MachineMemOperand *MMO = DAG.getMachineFunction(). |
6600 | 36 | getMachineMemOperand(MST->getPointerInfo(), |
6601 | 36 | MachineMemOperand::MOStore, LoMemVT.getStoreSize(), |
6602 | 36 | Alignment, MST->getAAInfo(), MST->getRanges()); |
6603 | 36 | |
6604 | 36 | Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, |
6605 | 36 | MST->isTruncatingStore(), |
6606 | 36 | MST->isCompressingStore()); |
6607 | 36 | |
6608 | 36 | Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, |
6609 | 36 | MST->isCompressingStore()); |
6610 | 36 | |
6611 | 36 | MMO = DAG.getMachineFunction(). |
6612 | 36 | getMachineMemOperand(MST->getPointerInfo(), |
6613 | 36 | MachineMemOperand::MOStore, HiMemVT.getStoreSize(), |
6614 | 36 | SecondHalfAlignment, MST->getAAInfo(), |
6615 | 36 | MST->getRanges()); |
6616 | 36 | |
6617 | 36 | Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, |
6618 | 36 | MST->isTruncatingStore(), |
6619 | 36 | MST->isCompressingStore()); |
6620 | 36 | |
6621 | 36 | AddToWorklist(Lo.getNode()); |
6622 | 36 | AddToWorklist(Hi.getNode()); |
6623 | 36 | |
6624 | 36 | return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); |
6625 | 36 | } |
6626 | 94 | return SDValue(); |
6627 | 94 | } |
6628 | | |
6629 | 642 | SDValue DAGCombiner::visitMGATHER(SDNode *N) { |
6630 | 642 | if (Level >= AfterLegalizeTypes) |
6631 | 435 | return SDValue(); |
6632 | 207 | |
6633 | 207 | MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N); |
6634 | 207 | SDValue Mask = MGT->getMask(); |
6635 | 207 | SDLoc DL(N); |
6636 | 207 | |
6637 | 207 | // If the MGATHER result requires splitting and the mask is provided by a |
6638 | 207 | // SETCC, then split both nodes and its operands before legalization. This |
6639 | 207 | // prevents the type legalizer from unrolling SETCC into scalar comparisons |
6640 | 207 | // and enables future optimizations (e.g. min/max pattern matching on X86). |
6641 | 207 | |
6642 | 207 | if (Mask.getOpcode() != ISD::SETCC) |
6643 | 207 | return SDValue(); |
6644 | 0 |
|
6645 | 0 | EVT VT = N->getValueType(0); |
6646 | 0 |
|
6647 | 0 | // Check if any splitting is required. |
6648 | 0 | if (TLI.getTypeAction(*DAG.getContext(), VT) != |
6649 | 0 | TargetLowering::TypeSplitVector) |
6650 | 0 | return SDValue(); |
6651 | 0 |
|
6652 | 0 | SDValue MaskLo, MaskHi, Lo, Hi; |
6653 | 0 | std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); |
6654 | 0 |
|
6655 | 0 | SDValue Src0 = MGT->getValue(); |
6656 | 0 | SDValue Src0Lo, Src0Hi; |
6657 | 0 | std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); |
6658 | 0 |
|
6659 | 0 | EVT LoVT, HiVT; |
6660 | 0 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); |
6661 | 0 |
|
6662 | 0 | SDValue Chain = MGT->getChain(); |
6663 | 0 | EVT MemoryVT = MGT->getMemoryVT(); |
6664 | 0 | unsigned Alignment = MGT->getOriginalAlignment(); |
6665 | 0 |
|
6666 | 0 | EVT LoMemVT, HiMemVT; |
6667 | 0 | std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); |
6668 | 0 |
|
6669 | 0 | SDValue BasePtr = MGT->getBasePtr(); |
6670 | 0 | SDValue Index = MGT->getIndex(); |
6671 | 0 | SDValue IndexLo, IndexHi; |
6672 | 0 | std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); |
6673 | 0 |
|
6674 | 0 | MachineMemOperand *MMO = DAG.getMachineFunction(). |
6675 | 0 | getMachineMemOperand(MGT->getPointerInfo(), |
6676 | 0 | MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), |
6677 | 0 | Alignment, MGT->getAAInfo(), MGT->getRanges()); |
6678 | 0 |
|
6679 | 0 | SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo }; |
6680 | 0 | Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo, |
6681 | 0 | MMO); |
6682 | 0 |
|
6683 | 0 | SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi}; |
6684 | 0 | Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi, |
6685 | 0 | MMO); |
6686 | 0 |
|
6687 | 0 | AddToWorklist(Lo.getNode()); |
6688 | 0 | AddToWorklist(Hi.getNode()); |
6689 | 0 |
|
6690 | 0 | // Build a factor node to remember that this load is independent of the |
6691 | 0 | // other one. |
6692 | 0 | Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), |
6693 | 0 | Hi.getValue(1)); |
6694 | 0 |
|
6695 | 0 | // Legalized the chain result - switch anything that used the old chain to |
6696 | 0 | // use the new one. |
6697 | 0 | DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain); |
6698 | 0 |
|
6699 | 0 | SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); |
6700 | 0 |
|
6701 | 0 | SDValue RetOps[] = { GatherRes, Chain }; |
6702 | 0 | return DAG.getMergeValues(RetOps, DL); |
6703 | 0 | } |
6704 | | |
6705 | 725 | SDValue DAGCombiner::visitMLOAD(SDNode *N) { |
6706 | 725 | if (Level >= AfterLegalizeTypes) |
6707 | 463 | return SDValue(); |
6708 | 262 | |
6709 | 262 | MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N); |
6710 | 262 | SDValue Mask = MLD->getMask(); |
6711 | 262 | SDLoc DL(N); |
6712 | 262 | |
6713 | 262 | // If the MLOAD result requires splitting and the mask is provided by a |
6714 | 262 | // SETCC, then split both nodes and its operands before legalization. This |
6715 | 262 | // prevents the type legalizer from unrolling SETCC into scalar comparisons |
6716 | 262 | // and enables future optimizations (e.g. min/max pattern matching on X86). |
6717 | 262 | if (Mask.getOpcode() == ISD::SETCC262 ) { |
6718 | 76 | EVT VT = N->getValueType(0); |
6719 | 76 | |
6720 | 76 | // Check if any splitting is required. |
6721 | 76 | if (TLI.getTypeAction(*DAG.getContext(), VT) != |
6722 | 76 | TargetLowering::TypeSplitVector) |
6723 | 70 | return SDValue(); |
6724 | 6 | |
6725 | 6 | SDValue MaskLo, MaskHi, Lo, Hi; |
6726 | 6 | std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); |
6727 | 6 | |
6728 | 6 | SDValue Src0 = MLD->getSrc0(); |
6729 | 6 | SDValue Src0Lo, Src0Hi; |
6730 | 6 | std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); |
6731 | 6 | |
6732 | 6 | EVT LoVT, HiVT; |
6733 | 6 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); |
6734 | 6 | |
6735 | 6 | SDValue Chain = MLD->getChain(); |
6736 | 6 | SDValue Ptr = MLD->getBasePtr(); |
6737 | 6 | EVT MemoryVT = MLD->getMemoryVT(); |
6738 | 6 | unsigned Alignment = MLD->getOriginalAlignment(); |
6739 | 6 | |
6740 | 6 | // if Alignment is equal to the vector size, |
6741 | 6 | // take the half of it for the second part |
6742 | 6 | unsigned SecondHalfAlignment = |
6743 | 6 | (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? |
6744 | 6 | Alignment/24 : Alignment2 ; |
6745 | 76 | |
6746 | 76 | EVT LoMemVT, HiMemVT; |
6747 | 76 | std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); |
6748 | 76 | |
6749 | 76 | MachineMemOperand *MMO = DAG.getMachineFunction(). |
6750 | 76 | getMachineMemOperand(MLD->getPointerInfo(), |
6751 | 76 | MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), |
6752 | 76 | Alignment, MLD->getAAInfo(), MLD->getRanges()); |
6753 | 76 | |
6754 | 76 | Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO, |
6755 | 76 | ISD::NON_EXTLOAD, MLD->isExpandingLoad()); |
6756 | 76 | |
6757 | 76 | Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, |
6758 | 76 | MLD->isExpandingLoad()); |
6759 | 76 | |
6760 | 76 | MMO = DAG.getMachineFunction(). |
6761 | 76 | getMachineMemOperand(MLD->getPointerInfo(), |
6762 | 76 | MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), |
6763 | 76 | SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); |
6764 | 76 | |
6765 | 76 | Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, |
6766 | 76 | ISD::NON_EXTLOAD, MLD->isExpandingLoad()); |
6767 | 76 | |
6768 | 76 | AddToWorklist(Lo.getNode()); |
6769 | 76 | AddToWorklist(Hi.getNode()); |
6770 | 76 | |
6771 | 76 | // Build a factor node to remember that this load is independent of the |
6772 | 76 | // other one. |
6773 | 76 | Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), |
6774 | 76 | Hi.getValue(1)); |
6775 | 76 | |
6776 | 76 | // Legalized the chain result - switch anything that used the old chain to |
6777 | 76 | // use the new one. |
6778 | 76 | DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain); |
6779 | 76 | |
6780 | 76 | SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); |
6781 | 76 | |
6782 | 76 | SDValue RetOps[] = { LoadRes, Chain }; |
6783 | 76 | return DAG.getMergeValues(RetOps, DL); |
6784 | 76 | } |
6785 | 186 | return SDValue(); |
6786 | 186 | } |
6787 | | |
6788 | | /// A vector select of 2 constant vectors can be simplified to math/logic to |
6789 | | /// avoid a variable select instruction and possibly avoid constant loads. |
6790 | 29.8k | SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { |
6791 | 29.8k | SDValue Cond = N->getOperand(0); |
6792 | 29.8k | SDValue N1 = N->getOperand(1); |
6793 | 29.8k | SDValue N2 = N->getOperand(2); |
6794 | 29.8k | EVT VT = N->getValueType(0); |
6795 | 29.8k | if (!Cond.hasOneUse() || 29.8k Cond.getScalarValueSizeInBits() != 127.0k || |
6796 | 17.4k | !TLI.convertSelectOfConstantsToMath(VT) || |
6797 | 1.47k | !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) || |
6798 | 217 | !ISD::isBuildVectorOfConstantSDNodes(N2.getNode())) |
6799 | 29.7k | return SDValue(); |
6800 | 158 | |
6801 | 158 | // Check if we can use the condition value to increment/decrement a single |
6802 | 158 | // constant value. This simplifies a select to an add and removes a constant |
6803 | 158 | // load/materialization from the general case. |
6804 | 158 | bool AllAddOne = true; |
6805 | 158 | bool AllSubOne = true; |
6806 | 158 | unsigned Elts = VT.getVectorNumElements(); |
6807 | 910 | for (unsigned i = 0; i != Elts910 ; ++i752 ) { |
6808 | 752 | SDValue N1Elt = N1.getOperand(i); |
6809 | 752 | SDValue N2Elt = N2.getOperand(i); |
6810 | 752 | if (N1Elt.isUndef() || 752 N2Elt.isUndef()752 ) |
6811 | 0 | continue; |
6812 | 752 | |
6813 | 752 | const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue(); |
6814 | 752 | const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue(); |
6815 | 752 | if (C1 != C2 + 1) |
6816 | 662 | AllAddOne = false; |
6817 | 752 | if (C1 != C2 - 1) |
6818 | 612 | AllSubOne = false; |
6819 | 752 | } |
6820 | 158 | |
6821 | 158 | // Further simplifications for the extra-special cases where the constants are |
6822 | 158 | // all 0 or all -1 should be implemented as folds of these patterns. |
6823 | 158 | SDLoc DL(N); |
6824 | 158 | if (AllAddOne || 158 AllSubOne140 ) { |
6825 | 40 | // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C |
6826 | 40 | // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C |
6827 | 40 | auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND18 : ISD::SIGN_EXTEND22 ; |
6828 | 40 | SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond); |
6829 | 40 | return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2); |
6830 | 40 | } |
6831 | 118 | |
6832 | 118 | // The general case for select-of-constants: |
6833 | 118 | // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2 |
6834 | 118 | // ...but that only makes sense if a vselect is slower than 2 logic ops, so |
6835 | 118 | // leave that to a machine-specific pass. |
6836 | 118 | return SDValue(); |
6837 | 118 | } |
6838 | | |
6839 | 30.3k | SDValue DAGCombiner::visitVSELECT(SDNode *N) { |
6840 | 30.3k | SDValue N0 = N->getOperand(0); |
6841 | 30.3k | SDValue N1 = N->getOperand(1); |
6842 | 30.3k | SDValue N2 = N->getOperand(2); |
6843 | 30.3k | SDLoc DL(N); |
6844 | 30.3k | |
6845 | 30.3k | // fold (vselect C, X, X) -> X |
6846 | 30.3k | if (N1 == N2) |
6847 | 4 | return N1; |
6848 | 30.3k | |
6849 | 30.3k | // Canonicalize integer abs. |
6850 | 30.3k | // vselect (setg[te] X, 0), X, -X -> |
6851 | 30.3k | // vselect (setgt X, -1), X, -X -> |
6852 | 30.3k | // vselect (setl[te] X, 0), -X, X -> |
6853 | 30.3k | // Y = sra (X, size(X)-1); xor (add (X, Y), Y) |
6854 | 30.3k | if (30.3k N0.getOpcode() == ISD::SETCC30.3k ) { |
6855 | 6.45k | SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); |
6856 | 6.45k | ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); |
6857 | 6.45k | bool isAbs = false; |
6858 | 6.45k | bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); |
6859 | 6.45k | |
6860 | 6.45k | if (((RHSIsAllZeros && 6.45k (CC == ISD::SETGT || 1.63k CC == ISD::SETGE1.39k )) || |
6861 | 6.17k | (ISD::isBuildVectorAllOnes(RHS.getNode()) && 6.17k CC == ISD::SETGT39 )) && |
6862 | 6.45k | N1 == LHS312 && N2.getOpcode() == ISD::SUB246 && N1 == N2.getOperand(1)246 ) |
6863 | 246 | isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode()); |
6864 | 6.20k | else if (6.20k (RHSIsAllZeros && 6.20k (CC == ISD::SETLT || 1.42k CC == ISD::SETLE1.18k )) && |
6865 | 6.20k | N2 == LHS284 && N1.getOpcode() == ISD::SUB129 && N2 == N1.getOperand(1)129 ) |
6866 | 129 | isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); |
6867 | 6.45k | |
6868 | 6.45k | if (isAbs6.45k ) { |
6869 | 375 | EVT VT = LHS.getValueType(); |
6870 | 375 | if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) |
6871 | 247 | return DAG.getNode(ISD::ABS, DL, VT, LHS); |
6872 | 128 | |
6873 | 128 | SDValue Shift = DAG.getNode( |
6874 | 128 | ISD::SRA, DL, VT, LHS, |
6875 | 128 | DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT)); |
6876 | 128 | SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); |
6877 | 128 | AddToWorklist(Shift.getNode()); |
6878 | 128 | AddToWorklist(Add.getNode()); |
6879 | 128 | return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); |
6880 | 128 | } |
6881 | 6.45k | } |
6882 | 29.9k | |
6883 | 29.9k | if (29.9k SimplifySelectOps(N, N1, N2)29.9k ) |
6884 | 6 | return SDValue(N, 0); // Don't revisit N. |
6885 | 29.9k | |
6886 | 29.9k | // Fold (vselect (build_vector all_ones), N1, N2) -> N1 |
6887 | 29.9k | if (29.9k ISD::isBuildVectorAllOnes(N0.getNode())29.9k ) |
6888 | 33 | return N1; |
6889 | 29.9k | // Fold (vselect (build_vector all_zeros), N1, N2) -> N2 |
6890 | 29.9k | if (29.9k ISD::isBuildVectorAllZeros(N0.getNode())29.9k ) |
6891 | 56 | return N2; |
6892 | 29.8k | |
6893 | 29.8k | // The ConvertSelectToConcatVector function is assuming both the above |
6894 | 29.8k | // checks for (vselect (build_vector all{ones,zeros) ...) have been made |
6895 | 29.8k | // and addressed. |
6896 | 29.8k | if (29.8k N1.getOpcode() == ISD::CONCAT_VECTORS && |
6897 | 714 | N2.getOpcode() == ISD::CONCAT_VECTORS && |
6898 | 29.8k | ISD::isBuildVectorOfConstantSDNodes(N0.getNode())391 ) { |
6899 | 42 | if (SDValue CV = ConvertSelectToConcatVector(N, DAG)) |
6900 | 10 | return CV; |
6901 | 29.8k | } |
6902 | 29.8k | |
6903 | 29.8k | if (SDValue 29.8k V29.8k = foldVSelectOfConstants(N)) |
6904 | 40 | return V; |
6905 | 29.8k | |
6906 | 29.8k | return SDValue(); |
6907 | 29.8k | } |
6908 | | |
6909 | 195k | SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { |
6910 | 195k | SDValue N0 = N->getOperand(0); |
6911 | 195k | SDValue N1 = N->getOperand(1); |
6912 | 195k | SDValue N2 = N->getOperand(2); |
6913 | 195k | SDValue N3 = N->getOperand(3); |
6914 | 195k | SDValue N4 = N->getOperand(4); |
6915 | 195k | ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get(); |
6916 | 195k | |
6917 | 195k | // fold select_cc lhs, rhs, x, x, cc -> x |
6918 | 195k | if (N2 == N3) |
6919 | 24 | return N2; |
6920 | 195k | |
6921 | 195k | // Determine if the condition we're dealing with is constant |
6922 | 195k | if (SDValue 195k SCC195k = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, |
6923 | 2.16k | CC, SDLoc(N), false)) { |
6924 | 2.16k | AddToWorklist(SCC.getNode()); |
6925 | 2.16k | |
6926 | 2.16k | if (ConstantSDNode *SCCC2.16k = dyn_cast<ConstantSDNode>(SCC.getNode())) { |
6927 | 31 | if (!SCCC->isNullValue()) |
6928 | 17 | return N2; // cond always true -> true val |
6929 | 31 | else |
6930 | 14 | return N3; // cond always false -> false val |
6931 | 2.13k | } else if (2.13k SCC->isUndef()2.13k ) { |
6932 | 0 | // When the condition is UNDEF, just return the first operand. This is |
6933 | 0 | // coherent the DAG creation, no setcc node is created in this case |
6934 | 0 | return N2; |
6935 | 2.13k | } else if (2.13k SCC.getOpcode() == ISD::SETCC2.13k ) { |
6936 | 2.10k | // Fold to a simpler select_cc |
6937 | 2.10k | return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(), |
6938 | 2.10k | SCC.getOperand(0), SCC.getOperand(1), N2, N3, |
6939 | 2.10k | SCC.getOperand(2)); |
6940 | 2.10k | } |
6941 | 192k | } |
6942 | 192k | |
6943 | 192k | // If we can fold this based on the true/false value, do so. |
6944 | 192k | if (192k SimplifySelectOps(N, N2, N3)192k ) |
6945 | 0 | return SDValue(N, 0); // Don't revisit N. |
6946 | 192k | |
6947 | 192k | // fold select_cc into other things, such as min/max/abs |
6948 | 192k | return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC); |
6949 | 192k | } |
6950 | | |
6951 | 759k | SDValue DAGCombiner::visitSETCC(SDNode *N) { |
6952 | 759k | return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), |
6953 | 759k | cast<CondCodeSDNode>(N->getOperand(2))->get(), |
6954 | 759k | SDLoc(N)); |
6955 | 759k | } |
6956 | | |
6957 | 1.05k | SDValue DAGCombiner::visitSETCCE(SDNode *N) { |
6958 | 1.05k | SDValue LHS = N->getOperand(0); |
6959 | 1.05k | SDValue RHS = N->getOperand(1); |
6960 | 1.05k | SDValue Carry = N->getOperand(2); |
6961 | 1.05k | SDValue Cond = N->getOperand(3); |
6962 | 1.05k | |
6963 | 1.05k | // If Carry is false, fold to a regular SETCC. |
6964 | 1.05k | if (Carry.getOpcode() == ISD::CARRY_FALSE) |
6965 | 30 | return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond); |
6966 | 1.02k | |
6967 | 1.02k | return SDValue(); |
6968 | 1.02k | } |
6969 | | |
6970 | 648 | SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) { |
6971 | 648 | SDValue LHS = N->getOperand(0); |
6972 | 648 | SDValue RHS = N->getOperand(1); |
6973 | 648 | SDValue Carry = N->getOperand(2); |
6974 | 648 | SDValue Cond = N->getOperand(3); |
6975 | 648 | |
6976 | 648 | // If Carry is false, fold to a regular SETCC. |
6977 | 648 | if (isNullConstant(Carry)) |
6978 | 30 | return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond); |
6979 | 618 | |
6980 | 618 | return SDValue(); |
6981 | 618 | } |
6982 | | |
6983 | | /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or |
6984 | | /// a build_vector of constants. |
6985 | | /// This function is called by the DAGCombiner when visiting sext/zext/aext |
6986 | | /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). |
6987 | | /// Vector extends are not folded if operations are legal; this is to |
6988 | | /// avoid introducing illegal build_vector dag nodes. |
6989 | | static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, |
6990 | | SelectionDAG &DAG, bool LegalTypes, |
6991 | 1.08M | bool LegalOperations) { |
6992 | 1.08M | unsigned Opcode = N->getOpcode(); |
6993 | 1.08M | SDValue N0 = N->getOperand(0); |
6994 | 1.08M | EVT VT = N->getValueType(0); |
6995 | 1.08M | |
6996 | 1.08M | assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || |
6997 | 1.08M | Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || |
6998 | 1.08M | Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) |
6999 | 1.08M | && "Expected EXTEND dag node in input!"); |
7000 | 1.08M | |
7001 | 1.08M | // fold (sext c1) -> c1 |
7002 | 1.08M | // fold (zext c1) -> c1 |
7003 | 1.08M | // fold (aext c1) -> c1 |
7004 | 1.08M | if (isa<ConstantSDNode>(N0)) |
7005 | 257 | return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode(); |
7006 | 1.08M | |
7007 | 1.08M | // fold (sext (build_vector AllConstants) -> (build_vector AllConstants) |
7008 | 1.08M | // fold (zext (build_vector AllConstants) -> (build_vector AllConstants) |
7009 | 1.08M | // fold (aext (build_vector AllConstants) -> (build_vector AllConstants) |
7010 | 1.08M | EVT SVT = VT.getScalarType(); |
7011 | 1.08M | if (!(VT.isVector() && |
7012 | 80.8k | (!LegalTypes || 80.8k (!LegalOperations && 62.1k TLI.isTypeLegal(SVT)9.18k )) && |
7013 | 26.3k | ISD::isBuildVectorOfConstantSDNodes(N0.getNode()))) |
7014 | 1.08M | return nullptr; |
7015 | 231 | |
7016 | 231 | // We can fold this node into a build_vector. |
7017 | 231 | unsigned VTBits = SVT.getSizeInBits(); |
7018 | 231 | unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits(); |
7019 | 231 | SmallVector<SDValue, 8> Elts; |
7020 | 231 | unsigned NumElts = VT.getVectorNumElements(); |
7021 | 231 | SDLoc DL(N); |
7022 | 231 | |
7023 | 1.37k | for (unsigned i=0; i != NumElts1.37k ; ++i1.14k ) { |
7024 | 1.14k | SDValue Op = N0->getOperand(i); |
7025 | 1.14k | if (Op->isUndef()1.14k ) { |
7026 | 86 | Elts.push_back(DAG.getUNDEF(SVT)); |
7027 | 86 | continue; |
7028 | 86 | } |
7029 | 1.06k | |
7030 | 1.06k | SDLoc DL(Op); |
7031 | 1.06k | // Get the constant value and if needed trunc it to the size of the type. |
7032 | 1.06k | // Nodes like build_vector might have constants wider than the scalar type. |
7033 | 1.06k | APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits); |
7034 | 1.06k | if (Opcode == ISD::SIGN_EXTEND || 1.06k Opcode == ISD::SIGN_EXTEND_VECTOR_INREG644 ) |
7035 | 478 | Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT)); |
7036 | 1.06k | else |
7037 | 584 | Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT)); |
7038 | 1.14k | } |
7039 | 1.08M | |
7040 | 1.08M | return DAG.getBuildVector(VT, DL, Elts).getNode(); |
7041 | 1.08M | } |
7042 | | |
7043 | | // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: |
7044 | | // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" |
7045 | | // transformation. Returns true if extension are possible and the above |
7046 | | // mentioned transformation is profitable. |
7047 | | static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, |
7048 | | unsigned ExtOpc, |
7049 | | SmallVectorImpl<SDNode *> &ExtendNodes, |
7050 | 66.0k | const TargetLowering &TLI) { |
7051 | 66.0k | bool HasCopyToRegUses = false; |
7052 | 66.0k | bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); |
7053 | 66.0k | for (SDNode::use_iterator UI = N0.getNode()->use_begin(), |
7054 | 66.0k | UE = N0.getNode()->use_end(); |
7055 | 223k | UI != UE223k ; ++UI157k ) { |
7056 | 159k | SDNode *User = *UI; |
7057 | 159k | if (User == N) |
7058 | 64.2k | continue; |
7059 | 95.6k | if (95.6k UI.getUse().getResNo() != N0.getResNo()95.6k ) |
7060 | 16.2k | continue; |
7061 | 79.4k | // FIXME: Only extend SETCC N, N and SETCC N, c for now. |
7062 | 79.4k | if (79.4k ExtOpc != ISD::ANY_EXTEND && 79.4k User->getOpcode() == ISD::SETCC70.3k ) { |
7063 | 16.5k | ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get(); |
7064 | 16.5k | if (ExtOpc == ISD::ZERO_EXTEND && 16.5k ISD::isSignedIntSetCC(CC)14.0k ) |
7065 | 16.5k | // Sign bits will be lost after a zext. |
7066 | 217 | return false; |
7067 | 16.3k | bool Add = false; |
7068 | 47.0k | for (unsigned i = 0; i != 247.0k ; ++i30.7k ) { |
7069 | 32.0k | SDValue UseOp = User->getOperand(i); |
7070 | 32.0k | if (UseOp == N0) |
7071 | 15.7k | continue; |
7072 | 16.3k | if (16.3k !isa<ConstantSDNode>(UseOp)16.3k ) |
7073 | 1.32k | return false; |
7074 | 14.9k | Add = true; |
7075 | 14.9k | } |
7076 | 14.9k | if (14.9k Add14.9k ) |
7077 | 14.9k | ExtendNodes.push_back(User); |
7078 | 14.9k | continue; |
7079 | 62.8k | } |
7080 | 62.8k | // If truncates aren't free and there are users we can't |
7081 | 62.8k | // extend, it isn't worthwhile. |
7082 | 62.8k | if (62.8k !isTruncFree62.8k ) |
7083 | 460 | return false; |
7084 | 62.4k | // Remember if this value is live-out. |
7085 | 62.4k | if (62.4k User->getOpcode() == ISD::CopyToReg62.4k ) |
7086 | 11.6k | HasCopyToRegUses = true; |
7087 | 159k | } |
7088 | 66.0k | |
7089 | 64.0k | if (64.0k HasCopyToRegUses64.0k ) { |
7090 | 11.5k | bool BothLiveOut = false; |
7091 | 11.5k | for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); |
7092 | 17.0k | UI != UE17.0k ; ++UI5.44k ) { |
7093 | 12.2k | SDUse &Use = UI.getUse(); |
7094 | 12.2k | if (Use.getResNo() == 0 && 12.2k Use.getUser()->getOpcode() == ISD::CopyToReg12.2k ) { |
7095 | 6.84k | BothLiveOut = true; |
7096 | 6.84k | break; |
7097 | 6.84k | } |
7098 | 12.2k | } |
7099 | 11.5k | if (BothLiveOut) |
7100 | 11.5k | // Both unextended and extended values are live out. There had better be |
7101 | 11.5k | // a good reason for the transformation. |
7102 | 6.84k | return ExtendNodes.size(); |
7103 | 57.1k | } |
7104 | 57.1k | return true; |
7105 | 57.1k | } |
7106 | | |
7107 | | void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, |
7108 | | SDValue Trunc, SDValue ExtLoad, |
7109 | 235k | const SDLoc &DL, ISD::NodeType ExtType) { |
7110 | 235k | // Extend SetCC uses if necessary. |
7111 | 250k | for (unsigned i = 0, e = SetCCs.size(); i != e250k ; ++i14.9k ) { |
7112 | 14.9k | SDNode *SetCC = SetCCs[i]; |
7113 | 14.9k | SmallVector<SDValue, 4> Ops; |
7114 | 14.9k | |
7115 | 44.9k | for (unsigned j = 0; j != 244.9k ; ++j29.9k ) { |
7116 | 29.9k | SDValue SOp = SetCC->getOperand(j); |
7117 | 29.9k | if (SOp == Trunc) |
7118 | 0 | Ops.push_back(ExtLoad); |
7119 | 29.9k | else |
7120 | 29.9k | Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); |
7121 | 29.9k | } |
7122 | 14.9k | |
7123 | 14.9k | Ops.push_back(SetCC->getOperand(2)); |
7124 | 14.9k | CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops)); |
7125 | 14.9k | } |
7126 | 235k | } |
7127 | | |
7128 | | // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?). |
7129 | 654k | SDValue DAGCombiner::CombineExtLoad(SDNode *N) { |
7130 | 654k | SDValue N0 = N->getOperand(0); |
7131 | 654k | EVT DstVT = N->getValueType(0); |
7132 | 654k | EVT SrcVT = N0.getValueType(); |
7133 | 654k | |
7134 | 654k | assert((N->getOpcode() == ISD::SIGN_EXTEND || |
7135 | 654k | N->getOpcode() == ISD::ZERO_EXTEND) && |
7136 | 654k | "Unexpected node type (not an extend)!"); |
7137 | 654k | |
7138 | 654k | // fold (sext (load x)) to multiple smaller sextloads; same for zext. |
7139 | 654k | // For example, on a target with legal v4i32, but illegal v8i32, turn: |
7140 | 654k | // (v8i32 (sext (v8i16 (load x)))) |
7141 | 654k | // into: |
7142 | 654k | // (v8i32 (concat_vectors (v4i32 (sextload x)), |
7143 | 654k | // (v4i32 (sextload (x + 16))))) |
7144 | 654k | // Where uses of the original load, i.e.: |
7145 | 654k | // (v8i16 (load x)) |
7146 | 654k | // are replaced with: |
7147 | 654k | // (v8i16 (truncate |
7148 | 654k | // (v8i32 (concat_vectors (v4i32 (sextload x)), |
7149 | 654k | // (v4i32 (sextload (x + 16))))))) |
7150 | 654k | // |
7151 | 654k | // This combine is only applicable to illegal, but splittable, vectors. |
7152 | 654k | // All legal types, and illegal non-vector types, are handled elsewhere. |
7153 | 654k | // This combine is controlled by TargetLowering::isVectorLoadExtDesirable. |
7154 | 654k | // |
7155 | 654k | if (N0->getOpcode() != ISD::LOAD) |
7156 | 633k | return SDValue(); |
7157 | 20.6k | |
7158 | 20.6k | LoadSDNode *LN0 = cast<LoadSDNode>(N0); |
7159 | 20.6k | |
7160 | 20.6k | if (!ISD::isNON_EXTLoad(LN0) || 20.6k !ISD::isUNINDEXEDLoad(LN0)19.3k || |
7161 | 20.6k | !N0.hasOneUse()19.3k || LN0->isVolatile()10.6k || !DstVT.isVector()10.6k || |
7162 | 20.6k | !DstVT.isPow2VectorType()10.4k || !TLI.isVectorLoadExtDesirable(SDValue(N, 0))10.2k ) |
7163 | 20.0k | return SDValue(); |
7164 | 584 | |
7165 | 584 | SmallVector<SDNode *, 4> SetCCs; |
7166 | 584 | if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI)) |
7167 | 0 | return SDValue(); |
7168 | 584 | |
7169 | 584 | ISD::LoadExtType ExtType = |
7170 | 584 | N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD307 : ISD::ZEXTLOAD277 ; |
7171 | 584 | |
7172 | 584 | // Try to split the vector types to get down to legal types. |
7173 | 584 | EVT SplitSrcVT = SrcVT; |
7174 | 584 | EVT SplitDstVT = DstVT; |
7175 | 1.48k | while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) && |
7176 | 1.26k | SplitSrcVT.getVectorNumElements() > 11.26k ) { |
7177 | 905 | SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first; |
7178 | 905 | SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first; |
7179 | 905 | } |
7180 | 584 | |
7181 | 584 | if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT)) |
7182 | 356 | return SDValue(); |
7183 | 228 | |
7184 | 228 | SDLoc DL(N); |
7185 | 228 | const unsigned NumSplits = |
7186 | 228 | DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements(); |
7187 | 228 | const unsigned Stride = SplitSrcVT.getStoreSize(); |
7188 | 228 | SmallVector<SDValue, 4> Loads; |
7189 | 228 | SmallVector<SDValue, 4> Chains; |
7190 | 228 | |
7191 | 228 | SDValue BasePtr = LN0->getBasePtr(); |
7192 | 727 | for (unsigned Idx = 0; Idx < NumSplits727 ; Idx++499 ) { |
7193 | 499 | const unsigned Offset = Idx * Stride; |
7194 | 499 | const unsigned Align = MinAlign(LN0->getAlignment(), Offset); |
7195 | 499 | |
7196 | 499 | SDValue SplitLoad = DAG.getExtLoad( |
7197 | 499 | ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr, |
7198 | 499 | LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align, |
7199 | 499 | LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); |
7200 | 499 | |
7201 | 499 | BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, |
7202 | 499 | DAG.getConstant(Stride, DL, BasePtr.getValueType())); |
7203 | 499 | |
7204 | 499 | Loads.push_back(SplitLoad.getValue(0)); |
7205 | 499 | Chains.push_back(SplitLoad.getValue(1)); |
7206 | 499 | } |
7207 | 654k | |
7208 | 654k | SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); |
7209 | 654k | SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads); |
7210 | 654k | |
7211 | 654k | // Simplify TF. |
7212 | 654k | AddToWorklist(NewChain.getNode()); |
7213 | 654k | |
7214 | 654k | CombineTo(N, NewValue); |
7215 | 654k | |
7216 | 654k | // Replace uses of the original load (before extension) |
7217 | 654k | // with a truncate of the concatenated sextloaded vectors. |
7218 | 654k | SDValue Trunc = |
7219 | 654k | DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue); |
7220 | 654k | CombineTo(N0.getNode(), Trunc, NewChain); |
7221 | 654k | ExtendSetCCUses(SetCCs, Trunc, NewValue, DL, |
7222 | 654k | (ISD::NodeType)N->getOpcode()); |
7223 | 654k | return SDValue(N, 0); // Return N so it doesn't get rechecked! |
7224 | 654k | } |
7225 | | |
7226 | | /// If we're narrowing or widening the result of a vector select and the final |
7227 | | /// size is the same size as a setcc (compare) feeding the select, then try to |
7228 | | /// apply the cast operation to the select's operands because matching vector |
7229 | | /// sizes for a select condition and other operands should be more efficient. |
7230 | 1.52M | SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) { |
7231 | 1.52M | unsigned CastOpcode = Cast->getOpcode(); |
7232 | 1.52M | assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND || |
7233 | 1.52M | CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND || |
7234 | 1.52M | CastOpcode == ISD::FP_ROUND) && |
7235 | 1.52M | "Unexpected opcode for vector select narrowing/widening"); |
7236 | 1.52M | |
7237 | 1.52M | // We only do this transform before legal ops because the pattern may be |
7238 | 1.52M | // obfuscated by target-specific operations after legalization. Do not create |
7239 | 1.52M | // an illegal select op, however, because that may be difficult to lower. |
7240 | 1.52M | EVT VT = Cast->getValueType(0); |
7241 | 1.52M | if (LegalOperations || 1.52M !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT)896k ) |
7242 | 808k | return SDValue(); |
7243 | 713k | |
7244 | 713k | SDValue VSel = Cast->getOperand(0); |
7245 | 713k | if (VSel.getOpcode() != ISD::VSELECT || 713k !VSel.hasOneUse()19 || |
7246 | 19 | VSel.getOperand(0).getOpcode() != ISD::SETCC) |
7247 | 713k | return SDValue(); |
7248 | 19 | |
7249 | 19 | // Does the setcc have the same vector size as the casted select? |
7250 | 19 | SDValue SetCC = VSel.getOperand(0); |
7251 | 19 | EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType()); |
7252 | 19 | if (SetCCVT.getSizeInBits() != VT.getSizeInBits()) |
7253 | 3 | return SDValue(); |
7254 | 16 | |
7255 | 16 | // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B) |
7256 | 16 | SDValue A = VSel.getOperand(1); |
7257 | 16 | SDValue B = VSel.getOperand(2); |
7258 | 16 | SDValue CastA, CastB; |
7259 | 16 | SDLoc DL(Cast); |
7260 | 16 | if (CastOpcode == ISD::FP_ROUND16 ) { |
7261 | 4 | // FP_ROUND (fptrunc) has an extra flag operand to pass along. |
7262 | 4 | CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1)); |
7263 | 4 | CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1)); |
7264 | 16 | } else { |
7265 | 12 | CastA = DAG.getNode(CastOpcode, DL, VT, A); |
7266 | 12 | CastB = DAG.getNode(CastOpcode, DL, VT, B); |
7267 | 12 | } |
7268 | 1.52M | return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB); |
7269 | 1.52M | } |
7270 | | |
7271 | 518k | SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { |
7272 | 518k | SDValue N0 = N->getOperand(0); |
7273 | 518k | EVT VT = N->getValueType(0); |
7274 | 518k | SDLoc DL(N); |
7275 | 518k | |
7276 | 518k | if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, |
7277 | 518k | LegalOperations)) |
7278 | 146 | return SDValue(Res, 0); |
7279 | 518k | |
7280 | 518k | // fold (sext (sext x)) -> (sext x) |
7281 | 518k | // fold (sext (aext x)) -> (sext x) |
7282 | 518k | if (518k N0.getOpcode() == ISD::SIGN_EXTEND || 518k N0.getOpcode() == ISD::ANY_EXTEND518k ) |
7283 | 6 | return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0)); |
7284 | 518k | |
7285 | 518k | if (518k N0.getOpcode() == ISD::TRUNCATE518k ) { |
7286 | 10.1k | // fold (sext (truncate (load x))) -> (sext (smaller load x)) |
7287 | 10.1k | // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) |
7288 | 10.1k | if (SDValue NarrowLoad10.1k = ReduceLoadWidth(N0.getNode())) { |
7289 | 20 | SDNode *oye = N0.getOperand(0).getNode(); |
7290 | 20 | if (NarrowLoad.getNode() != N0.getNode()20 ) { |
7291 | 20 | CombineTo(N0.getNode(), NarrowLoad); |
7292 | 20 | // CombineTo deleted the truncate, if needed, but not what's under it. |
7293 | 20 | AddToWorklist(oye); |
7294 | 20 | } |
7295 | 20 | return SDValue(N, 0); // Return N so it doesn't get rechecked! |
7296 | 20 | } |
7297 | 10.1k | |
7298 | 10.1k | // See if the value being truncated is already sign extended. If so, just |
7299 | 10.1k | // eliminate the trunc/sext pair. |
7300 | 10.1k | SDValue Op = N0.getOperand(0); |
7301 | 10.1k | unsigned OpBits = Op.getScalarValueSizeInBits(); |
7302 | 10.1k | unsigned MidBits = N0.getScalarValueSizeInBits(); |
7303 | 10.1k | unsigned DestBits = VT.getScalarSizeInBits(); |
7304 | 10.1k | unsigned NumSignBits = DAG.ComputeNumSignBits(Op); |
7305 | 10.1k | |
7306 | 10.1k | if (OpBits == DestBits10.1k ) { |
7307 | 6.50k | // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign |
7308 | 6.50k | // bits, it is already ready. |
7309 | 6.50k | if (NumSignBits > DestBits-MidBits) |
7310 | 1.65k | return Op; |
7311 | 3.67k | } else if (3.67k OpBits < DestBits3.67k ) { |
7312 | 2.86k | // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign |
7313 | 2.86k | // bits, just sext from i32. |
7314 | 2.86k | if (NumSignBits > OpBits-MidBits) |
7315 | 530 | return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op); |
7316 | 813 | } else { |
7317 | 813 | // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign |
7318 | 813 | // bits, just truncate to i32. |
7319 | 813 | if (NumSignBits > OpBits-MidBits) |
7320 | 274 | return DAG.getNode(ISD::TRUNCATE, DL, VT, Op); |
7321 | 7.71k | } |
7322 | 7.71k | |
7323 | 7.71k | // fold (sext (truncate x)) -> (sextinreg x). |
7324 | 7.71k | if (7.71k !LegalOperations || 7.71k TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, |
7325 | 7.71k | N0.getValueType())) { |
7326 | 7.39k | if (OpBits < DestBits) |
7327 | 2.33k | Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op); |
7328 | 5.06k | else if (5.06k OpBits > DestBits5.06k ) |
7329 | 539 | Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op); |
7330 | 7.39k | return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op, |
7331 | 7.39k | DAG.getValueType(N0.getValueType())); |
7332 | 7.39k | } |
7333 | 508k | } |
7334 | 508k | |
7335 | 508k | // fold (sext (load x)) -> (sext (truncate (sextload x))) |
7336 | 508k | // Only generate vector extloads when 1) they're legal, and 2) they are |
7337 | 508k | // deemed desirable by the target. |
7338 | 508k | if (508k ISD::isNON_EXTLoad(N0.getNode()) && 508k ISD::isUNINDEXEDLoad(N0.getNode())124k && |
7339 | 124k | ((!LegalOperations && 124k !VT.isVector()120k && |
7340 | 117k | !cast<LoadSDNode>(N0)->isVolatile()) || |
7341 | 508k | TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType())6.99k )) { |
7342 | 121k | bool DoXform = true; |
7343 | 121k | SmallVector<SDNode*, 4> SetCCs; |
7344 | 121k | if (!N0.hasOneUse()) |
7345 | 25.7k | DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); |
7346 | 121k | if (VT.isVector()) |
7347 | 1.02k | DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); |
7348 | 121k | if (DoXform121k ) { |
7349 | 114k | LoadSDNode *LN0 = cast<LoadSDNode>(N0); |
7350 | 114k | SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(), |
7351 | 114k | LN0->getBasePtr(), N0.getValueType(), |
7352 | 114k | LN0->getMemOperand()); |
7353 | 114k | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), |
7354 | 114k | N0.getValueType(), ExtLoad); |
7355 | 114k | ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND); |
7356 | 114k | // If the load value is used only by N, replace it via CombineTo N. |
7357 | 114k | bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); |
7358 | 114k | CombineTo(N, ExtLoad); |
7359 | 114k | if (NoReplaceTrunc) |
7360 | 97.1k | DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); |
7361 | 114k | else |
7362 | 17.8k | CombineTo(LN0, Trunc, ExtLoad.getValue(1)); |
7363 | 114k | return SDValue(N, 0); |
7364 | 114k | } |
7365 | 393k | } |
7366 | 393k | |
7367 | 393k | // fold (sext (load x)) to multiple smaller sextloads. |
7368 | 393k | // Only on illegal but splittable vectors. |
7369 | 393k | if (SDValue 393k ExtLoad393k = CombineExtLoad(N)) |
7370 | 167 | return ExtLoad; |
7371 | 393k | |
7372 | 393k | // fold (sext (sextload x)) -> (sext (truncate (sextload x))) |
7373 | 393k | // fold (sext ( extload x)) -> (sext (truncate (sextload x))) |
7374 | 393k | if (393k (ISD::isSEXTLoad(N0.getNode()) || 393k ISD::isEXTLoad(N0.getNode())392k ) && |
7375 | 393k | ISD::isUNINDEXEDLoad(N0.getNode())397 && N0.hasOneUse()397 ) { |
7376 | 37 | LoadSDNode *LN0 = cast<LoadSDNode>(N0); |
7377 | 37 | EVT MemVT = LN0->getMemoryVT(); |
7378 | 37 | if ((!LegalOperations && 37 !LN0->isVolatile()6 ) || |
7379 | 37 | TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)31 ) { |
7380 | 7 | SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(), |
7381 | 7 | LN0->getBasePtr(), MemVT, |
7382 | 7 | LN0->getMemOperand()); |
7383 | 7 | CombineTo(N, ExtLoad); |
7384 | 7 | CombineTo(N0.getNode(), |
7385 | 7 | DAG.getNode(ISD::TRUNCATE, SDLoc(N0), |
7386 | 7 | N0.getValueType(), ExtLoad), |
7387 | 7 | ExtLoad.getValue(1)); |
7388 | 7 | return SDValue(N, 0); // Return N so it doesn't get rechecked! |
7389 | 7 | } |
7390 | 393k | } |
7391 | 393k | |
7392 | 393k | // fold (sext (and/or/xor (load x), cst)) -> |
7393 | 393k | // (and/or/xor (sextload x), (sext cst)) |
7394 | 393k | if (393k (N0.getOpcode() == ISD::AND || 393k N0.getOpcode() == ISD::OR390k || |
7395 | 388k | N0.getOpcode() == ISD::XOR) && |
7396 | 16.0k | isa<LoadSDNode>(N0.getOperand(0)) && |
7397 | 55 | N0.getOperand(1).getOpcode() == ISD::Constant && |
7398 | 38 | TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) && |
7399 | 393k | (!LegalOperations && 38 TLI.isOperationLegal(N0.getOpcode(), VT)25 )) { |
7400 | 0 | LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); |
7401 | 0 | if (LN0->getExtensionType() != ISD::ZEXTLOAD && 0 LN0->isUnindexed()0 ) { |
7402 | 0 | bool DoXform = true; |
7403 | 0 | SmallVector<SDNode*, 4> SetCCs; |
7404 | 0 | if (!N0.hasOneUse()) |
7405 | 0 | DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, |
7406 | 0 | SetCCs, TLI); |
7407 | 0 | if (DoXform0 ) { |
7408 | 0 | SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT, |
7409 | 0 | LN0->getChain(), LN0->getBasePtr(), |
7410 | 0 | LN0->getMemoryVT(), |
7411 | 0 | LN0->getMemOperand()); |
7412 | 0 | APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); |
7413 | 0 | Mask = Mask.sext(VT.getSizeInBits()); |
7414 | 0 | SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, |
7415 | 0 | ExtLoad, DAG.getConstant(Mask, DL, VT)); |
7416 | 0 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, |
7417 | 0 | SDLoc(N0.getOperand(0)), |
7418 | 0 | N0.getOperand(0).getValueType(), ExtLoad); |
7419 | 0 | ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND); |
7420 | 0 | bool NoReplaceTruncAnd = !N0.hasOneUse(); |
7421 | 0 | bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); |
7422 | 0 | CombineTo(N, And); |
7423 | 0 | // If N0 has multiple uses, change other uses as well. |
7424 | 0 | if (NoReplaceTruncAnd0 ) { |
7425 | 0 | SDValue TruncAnd = |
7426 | 0 | DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And); |
7427 | 0 | CombineTo(N0.getNode(), TruncAnd); |
7428 | 0 | } |
7429 | 0 | if (NoReplaceTrunc) |
7430 | 0 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); |
7431 | 0 | else |
7432 | 0 | CombineTo(LN0, Trunc, ExtLoad.getValue(1)); |
7433 | 0 | return SDValue(N,0); // Return N so it doesn't get rechecked! |
7434 | 0 | } |
7435 | 393k | } |
7436 | 0 | } |
7437 | 393k | |
7438 | 393k | if (393k N0.getOpcode() == ISD::SETCC393k ) { |
7439 | 4.10k | SDValue N00 = N0.getOperand(0); |
7440 | 4.10k | SDValue N01 = N0.getOperand(1); |
7441 | 4.10k | ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); |
7442 | 4.10k | EVT N00VT = N0.getOperand(0).getValueType(); |
7443 | 4.10k | |
7444 | 4.10k | // sext(setcc) -> sext_in_reg(vsetcc) for vectors. |
7445 | 4.10k | // Only do this before legalize for now. |
7446 | 4.10k | if (VT.isVector() && 4.10k !LegalOperations2.43k && |
7447 | 2.43k | TLI.getBooleanContents(N00VT) == |
7448 | 4.10k | TargetLowering::ZeroOrNegativeOneBooleanContent) { |
7449 | 2.43k | // On some architectures (such as SSE/NEON/etc) the SETCC result type is |
7450 | 2.43k | // of the same size as the compared operands. Only optimize sext(setcc()) |
7451 | 2.43k | // if this is the case. |
7452 | 2.43k | EVT SVT = getSetCCResultType(N00VT); |
7453 | 2.43k | |
7454 | 2.43k | // We know that the # elements of the results is the same as the |
7455 | 2.43k | // # elements of the compare (and the # elements of the compare result |
7456 | 2.43k | // for that matter). Check to see that they are the same size. If so, |
7457 | 2.43k | // we know that the element size of the sext'd result matches the |
7458 | 2.43k | // element size of the compare operands. |
7459 | 2.43k | if (VT.getSizeInBits() == SVT.getSizeInBits()) |
7460 | 2.05k | return DAG.getSetCC(DL, VT, N00, N01, CC); |
7461 | 377 | |
7462 | 377 | // If the desired elements are smaller or larger than the source |
7463 | 377 | // elements, we can use a matching integer vector type and then |
7464 | 377 | // truncate/sign extend. |
7465 | 377 | EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger(); |
7466 | 377 | if (SVT == MatchingVecType377 ) { |
7467 | 254 | SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC); |
7468 | 254 | return DAG.getSExtOrTrunc(VsetCC, DL, VT); |
7469 | 254 | } |
7470 | 1.78k | } |
7471 | 1.78k | |
7472 | 1.78k | // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0) |
7473 | 1.78k | // Here, T can be 1 or -1, depending on the type of the setcc and |
7474 | 1.78k | // getBooleanContents(). |
7475 | 1.78k | unsigned SetCCWidth = N0.getScalarValueSizeInBits(); |
7476 | 1.78k | |
7477 | 1.78k | // To determine the "true" side of the select, we need to know the high bit |
7478 | 1.78k | // of the value returned by the setcc if it evaluates to true. |
7479 | 1.78k | // If the type of the setcc is i1, then the true case of the select is just |
7480 | 1.78k | // sext(i1 1), that is, -1. |
7481 | 1.78k | // If the type of the setcc is larger (say, i8) then the value of the high |
7482 | 1.78k | // bit depends on getBooleanContents(), so ask TLI for a real "true" value |
7483 | 1.78k | // of the appropriate width. |
7484 | 1.78k | SDValue ExtTrueVal = (SetCCWidth == 1) ? DAG.getAllOnesConstant(DL, VT) |
7485 | 1 | : TLI.getConstTrueVal(DAG, VT, DL); |
7486 | 1.78k | SDValue Zero = DAG.getConstant(0, DL, VT); |
7487 | 1.78k | if (SDValue SCC = |
7488 | 1.78k | SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true)) |
7489 | 12 | return SCC; |
7490 | 1.77k | |
7491 | 1.77k | if (1.77k !VT.isVector() && 1.77k !TLI.convertSelectOfConstantsToMath(VT)1.65k ) { |
7492 | 948 | EVT SetCCVT = getSetCCResultType(N00VT); |
7493 | 948 | // Don't do this transform for i1 because there's a select transform |
7494 | 948 | // that would reverse it. |
7495 | 948 | // TODO: We should not do this transform at all without a target hook |
7496 | 948 | // because a sext is likely cheaper than a select? |
7497 | 948 | if (SetCCVT.getScalarSizeInBits() != 1 && |
7498 | 948 | (!LegalOperations || 300 TLI.isOperationLegal(ISD::SETCC, N00VT)0 )) { |
7499 | 300 | SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC); |
7500 | 300 | return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero); |
7501 | 300 | } |
7502 | 390k | } |
7503 | 4.10k | } |
7504 | 390k | |
7505 | 390k | // fold (sext x) -> (zext x) if the sign bit is known zero. |
7506 | 390k | if (390k (!LegalOperations || 390k TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)172k ) && |
7507 | 390k | DAG.SignBitIsZero(N0)) |
7508 | 513 | return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0); |
7509 | 390k | |
7510 | 390k | if (SDValue 390k NewVSel390k = matchVSelectOpSizesWithSetCC(N)) |
7511 | 4 | return NewVSel; |
7512 | 390k | |
7513 | 390k | return SDValue(); |
7514 | 390k | } |
7515 | | |
7516 | | // isTruncateOf - If N is a truncate of some other value, return true, record |
7517 | | // the value being truncated in Op and which of Op's bits are zero/one in Known. |
7518 | | // This function computes KnownBits to avoid a duplicated call to |
7519 | | // computeKnownBits in the caller. |
7520 | | static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, |
7521 | 353k | KnownBits &Known) { |
7522 | 353k | if (N->getOpcode() == ISD::TRUNCATE353k ) { |
7523 | 27.6k | Op = N->getOperand(0); |
7524 | 27.6k | DAG.computeKnownBits(Op, Known); |
7525 | 27.6k | return true; |
7526 | 27.6k | } |
7527 | 325k | |
7528 | 325k | if (325k N->getOpcode() != ISD::SETCC || 325k N->getValueType(0) != MVT::i122.8k || |
7529 | 18.1k | cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE) |
7530 | 322k | return false; |
7531 | 3.40k | |
7532 | 3.40k | SDValue Op0 = N->getOperand(0); |
7533 | 3.40k | SDValue Op1 = N->getOperand(1); |
7534 | 3.40k | assert(Op0.getValueType() == Op1.getValueType()); |
7535 | 3.40k | |
7536 | 3.40k | if (isNullConstant(Op0)) |
7537 | 0 | Op = Op1; |
7538 | 3.40k | else if (3.40k isNullConstant(Op1)3.40k ) |
7539 | 2.29k | Op = Op0; |
7540 | 3.40k | else |
7541 | 1.10k | return false; |
7542 | 2.29k | |
7543 | 2.29k | DAG.computeKnownBits(Op, Known); |
7544 | 2.29k | |
7545 | 2.29k | if (!(Known.Zero | 1).isAllOnesValue()) |
7546 | 2.04k | return false; |
7547 | 250 | |
7548 | 250 | return true; |
7549 | 250 | } |
7550 | | |
7551 | 388k | SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { |
7552 | 388k | SDValue N0 = N->getOperand(0); |
7553 | 388k | EVT VT = N->getValueType(0); |
7554 | 388k | |
7555 | 388k | if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, |
7556 | 388k | LegalOperations)) |
7557 | 138 | return SDValue(Res, 0); |
7558 | 388k | |
7559 | 388k | // fold (zext (zext x)) -> (zext x) |
7560 | 388k | // fold (zext (aext x)) -> (zext x) |
7561 | 388k | if (388k N0.getOpcode() == ISD::ZERO_EXTEND || 388k N0.getOpcode() == ISD::ANY_EXTEND388k ) |
7562 | 44 | return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, |
7563 | 44 | N0.getOperand(0)); |
7564 | 388k | |
7565 | 388k | // fold (zext (truncate x)) -> (zext x) or |
7566 | 388k | // (zext (truncate x)) -> (truncate x) |
7567 | 388k | // This is valid when the truncated bits of x are already zero. |
7568 | 388k | // FIXME: We should extend this to work for vectors too. |
7569 | 388k | SDValue Op; |
7570 | 388k | KnownBits Known; |
7571 | 388k | if (!VT.isVector() && 388k isTruncateOf(DAG, N0, Op, Known)353k ) { |
7572 | 27.8k | APInt TruncatedBits = |
7573 | 27.8k | (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? |
7574 | 2 | APInt(Op.getValueSizeInBits(), 0) : |
7575 | 27.8k | APInt::getBitsSet(Op.getValueSizeInBits(), |
7576 | 27.8k | N0.getValueSizeInBits(), |
7577 | 27.8k | std::min(Op.getValueSizeInBits(), |
7578 | 27.8k | VT.getSizeInBits())); |
7579 | 27.8k | if (TruncatedBits.isSubsetOf(Known.Zero)) |
7580 | 20.5k | return DAG.getZExtOrTrunc(Op, SDLoc(N), VT); |
7581 | 367k | } |
7582 | 367k | |
7583 | 367k | // fold (zext (truncate x)) -> (and x, mask) |
7584 | 367k | if (367k N0.getOpcode() == ISD::TRUNCATE367k ) { |
7585 | 7.46k | // fold (zext (truncate (load x))) -> (zext (smaller load x)) |
7586 | 7.46k | // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) |
7587 | 7.46k | if (SDValue NarrowLoad7.46k = ReduceLoadWidth(N0.getNode())) { |
7588 | 46 | SDNode *oye = N0.getOperand(0).getNode(); |
7589 | 46 | if (NarrowLoad.getNode() != N0.getNode()46 ) { |
7590 | 46 | CombineTo(N0.getNode(), NarrowLoad); |
7591 | 46 | // CombineTo deleted the truncate, if needed, but not what's under it. |
7592 | 46 | AddToWorklist(oye); |
7593 | 46 | } |
7594 | 46 | return SDValue(N, 0); // Return N so it doesn't get rechecked! |
7595 | 46 | } |
7596 | 7.41k | |
7597 | 7.41k | EVT SrcVT = N0.getOperand(0).getValueType(); |
7598 | 7.41k | EVT MinVT = N0.getValueType(); |
7599 | 7.41k | |
7600 | 7.41k | // Try to mask before the extension to avoid having to generate a larger mask, |
7601 | 7.41k | // possibly over several sub-vectors. |
7602 | 7.41k | if (SrcVT.bitsLT(VT)7.41k ) { |
7603 | 1.98k | if (!LegalOperations || 1.98k (TLI.isOperationLegal(ISD::AND, SrcVT) && |
7604 | 1.98k | TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)31 )) { |
7605 | 1.98k | SDValue Op = N0.getOperand(0); |
7606 | 1.98k | Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); |
7607 | 1.98k | AddToWorklist(Op.getNode()); |
7608 | 1.98k | return DAG.getZExtOrTrunc(Op, SDLoc(N), VT); |
7609 | 1.98k | } |
7610 | 5.43k | } |
7611 | 5.43k | |
7612 | 5.43k | if (5.43k !LegalOperations || 5.43k TLI.isOperationLegal(ISD::AND, VT)235 ) { |
7613 | 5.39k | SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT); |
7614 | 5.39k | AddToWorklist(Op.getNode()); |
7615 | 5.39k | return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); |
7616 | 5.39k | } |
7617 | 360k | } |
7618 | 360k | |
7619 | 360k | // Fold (zext (and (trunc x), cst)) -> (and x, cst), |
7620 | 360k | // if either of the casts is not free. |
7621 | 360k | if (360k N0.getOpcode() == ISD::AND && |
7622 | 50.9k | N0.getOperand(0).getOpcode() == ISD::TRUNCATE && |
7623 | 3.71k | N0.getOperand(1).getOpcode() == ISD::Constant && |
7624 | 752 | (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), |
7625 | 752 | N0.getValueType()) || |
7626 | 360k | !TLI.isZExtFree(N0.getValueType(), VT)372 )) { |
7627 | 526 | SDValue X = N0.getOperand(0).getOperand(0); |
7628 | 526 | X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT); |
7629 | 526 | APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); |
7630 | 526 | Mask = Mask.zext(VT.getSizeInBits()); |
7631 | 526 | SDLoc DL(N); |
7632 | 526 | return DAG.getNode(ISD::AND, DL, VT, |
7633 | 526 | X, DAG.getConstant(Mask, DL, VT)); |
7634 | 526 | } |
7635 | 359k | |
7636 | 359k | // fold (zext (load x)) -> (zext (truncate (zextload x))) |
7637 | 359k | // Only generate vector extloads when 1) they're legal, and 2) they are |
7638 | 359k | // deemed desirable by the target. |
7639 | 359k | if (359k ISD::isNON_EXTLoad(N0.getNode()) && 359k ISD::isUNINDEXEDLoad(N0.getNode())108k && |
7640 | 108k | ((!LegalOperations && 108k !VT.isVector()105k && |
7641 | 100k | !cast<LoadSDNode>(N0)->isVolatile()) || |
7642 | 359k | TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType())7.89k )) { |
7643 | 102k | bool DoXform = true; |
7644 | 102k | SmallVector<SDNode*, 4> SetCCs; |
7645 | 102k | if (!N0.hasOneUse()) |
7646 | 35.9k | DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); |
7647 | 102k | if (VT.isVector()) |
7648 | 937 | DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); |
7649 | 102k | if (DoXform102k ) { |
7650 | 99.1k | LoadSDNode *LN0 = cast<LoadSDNode>(N0); |
7651 | 99.1k | SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, |
7652 | 99.1k | LN0->getChain(), |
7653 | 99.1k | LN0->getBasePtr(), N0.getValueType(), |
7654 | 99.1k | LN0->getMemOperand()); |
7655 | 99.1k | |
7656 | 99.1k | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), |
7657 | 99.1k | N0.getValueType(), ExtLoad); |
7658 | 99.1k | ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND); |
7659 | 99.1k | // If the load value is used only by N, replace it via CombineTo N. |
7660 | 99.1k | bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); |
7661 | 99.1k | CombineTo(N, ExtLoad); |
7662 | 99.1k | if (NoReplaceTrunc) |
7663 | 78.2k | DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); |
7664 | 99.1k | else |
7665 | 20.9k | CombineTo(LN0, Trunc, ExtLoad.getValue(1)); |
7666 | 99.1k | return SDValue(N, 0); // Return N so it doesn't get rechecked! |
7667 | 99.1k | } |
7668 | 260k | } |
7669 | 260k | |
7670 | 260k | // fold (zext (load x)) to multiple smaller zextloads. |
7671 | 260k | // Only on illegal but splittable vectors. |
7672 | 260k | if (SDValue 260k ExtLoad260k = CombineExtLoad(N)) |
7673 | 61 | return ExtLoad; |
7674 | 260k | |
7675 | 260k | // fold (zext (and/or/xor (load x), cst)) -> |
7676 | 260k | // (and/or/xor (zextload x), (zext cst)) |
7677 | 260k | // Unless (and (load x) cst) will match as a zextload already and has |
7678 | 260k | // additional users. |
7679 | 260k | if (260k (N0.getOpcode() == ISD::AND || 260k N0.getOpcode() == ISD::OR210k || |
7680 | 207k | N0.getOpcode() == ISD::XOR) && |
7681 | 61.0k | isa<LoadSDNode>(N0.getOperand(0)) && |
7682 | 6.82k | N0.getOperand(1).getOpcode() == ISD::Constant && |
7683 | 6.58k | TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) && |
7684 | 260k | (!LegalOperations && 6.53k TLI.isOperationLegal(N0.getOpcode(), VT)6.45k )) { |
7685 | 6.34k | LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); |
7686 | 6.34k | if (LN0->getExtensionType() != ISD::SEXTLOAD && 6.34k LN0->isUnindexed()6.31k ) { |
7687 | 6.31k | bool DoXform = true; |
7688 | 6.31k | SmallVector<SDNode*, 4> SetCCs; |
7689 | 6.31k | if (!N0.hasOneUse()6.31k ) { |
7690 | 254 | if (N0.getOpcode() == ISD::AND254 ) { |
7691 | 250 | auto *AndC = cast<ConstantSDNode>(N0.getOperand(1)); |
7692 | 250 | auto NarrowLoad = false; |
7693 | 250 | EVT LoadResultTy = AndC->getValueType(0); |
7694 | 250 | EVT ExtVT, LoadedVT; |
7695 | 250 | if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT, |
7696 | 250 | NarrowLoad)) |
7697 | 11 | DoXform = false; |
7698 | 250 | } |
7699 | 254 | if (DoXform) |
7700 | 243 | DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), |
7701 | 243 | ISD::ZERO_EXTEND, SetCCs, TLI); |
7702 | 254 | } |
7703 | 6.31k | if (DoXform6.31k ) { |
7704 | 6.30k | SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT, |
7705 | 6.30k | LN0->getChain(), LN0->getBasePtr(), |
7706 | 6.30k | LN0->getMemoryVT(), |
7707 | 6.30k | LN0->getMemOperand()); |
7708 | 6.30k | APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); |
7709 | 6.30k | Mask = Mask.zext(VT.getSizeInBits()); |
7710 | 6.30k | SDLoc DL(N); |
7711 | 6.30k | SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, |
7712 | 6.30k | ExtLoad, DAG.getConstant(Mask, DL, VT)); |
7713 | 6.30k | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, |
7714 | 6.30k | SDLoc(N0.getOperand(0)), |
7715 | 6.30k | N0.getOperand(0).getValueType(), ExtLoad); |
7716 | 6.30k | ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND); |
7717 | 6.30k | bool NoReplaceTruncAnd = !N0.hasOneUse(); |
7718 | 6.30k | bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); |
7719 | 6.30k | CombineTo(N, And); |
7720 | 6.30k | // If N0 has multiple uses, change other uses as well. |
7721 | 6.30k | if (NoReplaceTruncAnd6.30k ) { |
7722 | 243 | SDValue TruncAnd = |
7723 | 243 | DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And); |
7724 | 243 | CombineTo(N0.getNode(), TruncAnd); |
7725 | 243 | } |
7726 | 6.30k | if (NoReplaceTrunc) |
7727 | 3.28k | DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); |
7728 | 6.30k | else |
7729 | 3.02k | CombineTo(LN0, Trunc, ExtLoad.getValue(1)); |
7730 | 6.30k | return SDValue(N,0); // Return N so it doesn't get rechecked! |
7731 | 6.30k | } |
7732 | 254k | } |
7733 | 6.34k | } |
7734 | 254k | |
7735 | 254k | // fold (zext (zextload x)) -> (zext (truncate (zextload x))) |
7736 | 254k | // fold (zext ( extload x)) -> (zext (truncate (zextload x))) |
7737 | 254k | if (254k (ISD::isZEXTLoad(N0.getNode()) || 254k ISD::isEXTLoad(N0.getNode())253k ) && |
7738 | 254k | ISD::isUNINDEXEDLoad(N0.getNode())740 && N0.hasOneUse()740 ) { |
7739 | 65 | LoadSDNode *LN0 = cast<LoadSDNode>(N0); |
7740 | 65 | EVT MemVT = LN0->getMemoryVT(); |
7741 | 65 | if ((!LegalOperations && 65 !LN0->isVolatile()33 ) || |
7742 | 65 | TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)35 ) { |
7743 | 35 | SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, |
7744 | 35 | LN0->getChain(), |
7745 | 35 | LN0->getBasePtr(), MemVT, |
7746 | 35 | LN0->getMemOperand()); |
7747 | 35 | CombineTo(N, ExtLoad); |
7748 | 35 | CombineTo(N0.getNode(), |
7749 | 35 | DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), |
7750 | 35 | ExtLoad), |
7751 | 35 | ExtLoad.getValue(1)); |
7752 | 35 | return SDValue(N, 0); // Return N so it doesn't get rechecked! |
7753 | 35 | } |
7754 | 254k | } |
7755 | 254k | |
7756 | 254k | if (254k N0.getOpcode() == ISD::SETCC254k ) { |
7757 | 22.8k | // Only do this before legalize for now. |
7758 | 22.8k | if (!LegalOperations && 22.8k VT.isVector()22.0k && |
7759 | 22.8k | N0.getValueType().getVectorElementType() == MVT::i1280 ) { |
7760 | 279 | EVT N00VT = N0.getOperand(0).getValueType(); |
7761 | 279 | if (getSetCCResultType(N00VT) == N0.getValueType()) |
7762 | 13 | return SDValue(); |
7763 | 266 | |
7764 | 266 | // We know that the # elements of the results is the same as the # |
7765 | 266 | // elements of the compare (and the # elements of the compare result for |
7766 | 266 | // that matter). Check to see that they are the same size. If so, we know |
7767 | 266 | // that the element size of the sext'd result matches the element size of |
7768 | 266 | // the compare operands. |
7769 | 266 | SDLoc DL(N); |
7770 | 266 | SDValue VecOnes = DAG.getConstant(1, DL, VT); |
7771 | 266 | if (VT.getSizeInBits() == N00VT.getSizeInBits()266 ) { |
7772 | 218 | // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. |
7773 | 218 | SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0), |
7774 | 218 | N0.getOperand(1), N0.getOperand(2)); |
7775 | 218 | return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes); |
7776 | 218 | } |
7777 | 48 | |
7778 | 48 | // If the desired elements are smaller or larger than the source |
7779 | 48 | // elements we can use a matching integer vector type and then |
7780 | 48 | // truncate/sign extend. |
7781 | 48 | EVT MatchingElementType = EVT::getIntegerVT( |
7782 | 48 | *DAG.getContext(), N00VT.getScalarSizeInBits()); |
7783 | 48 | EVT MatchingVectorType = EVT::getVectorVT( |
7784 | 48 | *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements()); |
7785 | 48 | SDValue VsetCC = |
7786 | 48 | DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0), |
7787 | 48 | N0.getOperand(1), N0.getOperand(2)); |
7788 | 48 | return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT), |
7789 | 48 | VecOnes); |
7790 | 48 | } |
7791 | 22.5k | |
7792 | 22.5k | // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc |
7793 | 22.5k | SDLoc DL(N); |
7794 | 22.5k | if (SDValue SCC = SimplifySelectCC( |
7795 | 22.5k | DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT), |
7796 | 22.5k | DAG.getConstant(0, DL, VT), |
7797 | 22.5k | cast<CondCodeSDNode>(N0.getOperand(2))->get(), true)) |
7798 | 59 | return SCC; |
7799 | 254k | } |
7800 | 254k | |
7801 | 254k | // (zext (shl (zext x), cst)) -> (shl (zext x), cst) |
7802 | 254k | if (254k (N0.getOpcode() == ISD::SHL || 254k N0.getOpcode() == ISD::SRL250k ) && |
7803 | 9.31k | isa<ConstantSDNode>(N0.getOperand(1)) && |
7804 | 8.87k | N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && |
7805 | 254k | N0.hasOneUse()100 ) { |
7806 | 67 | SDValue ShAmt = N0.getOperand(1); |
7807 | 67 | unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue(); |
7808 | 67 | if (N0.getOpcode() == ISD::SHL67 ) { |
7809 | 23 | SDValue InnerZExt = N0.getOperand(0); |
7810 | 23 | // If the original shl may be shifting out bits, do not perform this |
7811 | 23 | // transformation. |
7812 | 23 | unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() - |
7813 | 23 | InnerZExt.getOperand(0).getValueSizeInBits(); |
7814 | 23 | if (ShAmtVal > KnownZeroBits) |
7815 | 0 | return SDValue(); |
7816 | 67 | } |
7817 | 67 | |
7818 | 67 | SDLoc DL(N); |
7819 | 67 | |
7820 | 67 | // Ensure that the shift amount is wide enough for the shifted value. |
7821 | 67 | if (VT.getSizeInBits() >= 256) |
7822 | 0 | ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); |
7823 | 67 | |
7824 | 67 | return DAG.getNode(N0.getOpcode(), DL, VT, |
7825 | 67 | DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), |
7826 | 67 | ShAmt); |
7827 | 67 | } |
7828 | 253k | |
7829 | 253k | if (SDValue 253k NewVSel253k = matchVSelectOpSizesWithSetCC(N)) |
7830 | 2 | return NewVSel; |
7831 | 253k | |
7832 | 253k | return SDValue(); |
7833 | 253k | } |
7834 | | |
7835 | 168k | SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { |
7836 | 168k | SDValue N0 = N->getOperand(0); |
7837 | 168k | EVT VT = N->getValueType(0); |
7838 | 168k | |
7839 | 168k | if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, |
7840 | 168k | LegalOperations)) |
7841 | 145 | return SDValue(Res, 0); |
7842 | 168k | |
7843 | 168k | // fold (aext (aext x)) -> (aext x) |
7844 | 168k | // fold (aext (zext x)) -> (zext x) |
7845 | 168k | // fold (aext (sext x)) -> (sext x) |
7846 | 168k | if (168k N0.getOpcode() == ISD::ANY_EXTEND || |
7847 | 168k | N0.getOpcode() == ISD::ZERO_EXTEND || |
7848 | 168k | N0.getOpcode() == ISD::SIGN_EXTEND) |
7849 | 12 | return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); |
7850 | 168k | |
7851 | 168k | // fold (aext (truncate (load x))) -> (aext (smaller load x)) |
7852 | 168k | // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) |
7853 | 168k | if (168k N0.getOpcode() == ISD::TRUNCATE168k ) { |
7854 | 7.66k | if (SDValue NarrowLoad7.66k = ReduceLoadWidth(N0.getNode())) { |
7855 | 21 | SDNode *oye = N0.getOperand(0).getNode(); |
7856 | 21 | if (NarrowLoad.getNode() != N0.getNode()21 ) { |
7857 | 21 | CombineTo(N0.getNode(), NarrowLoad); |
7858 | 21 | // CombineTo deleted the truncate, if needed, but not what's under it. |
7859 | 21 | AddToWorklist(oye); |
7860 | 21 | } |
7861 | 21 | return SDValue(N, 0); // Return N so it doesn't get rechecked! |
7862 | 21 | } |
7863 | 168k | } |
7864 | 168k | |
7865 | 168k | // fold (aext (truncate x)) |
7866 | 168k | if (168k N0.getOpcode() == ISD::TRUNCATE168k ) |
7867 | 7.63k | return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT); |
7868 | 160k | |
7869 | 160k | // Fold (aext (and (trunc x), cst)) -> (and x, cst) |
7870 | 160k | // if the trunc is not free. |
7871 | 160k | if (160k N0.getOpcode() == ISD::AND && |
7872 | 14.1k | N0.getOperand(0).getOpcode() == ISD::TRUNCATE && |
7873 | 1.16k | N0.getOperand(1).getOpcode() == ISD::Constant && |
7874 | 906 | !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), |
7875 | 160k | N0.getValueType())) { |
7876 | 41 | SDLoc DL(N); |
7877 | 41 | SDValue X = N0.getOperand(0).getOperand(0); |
7878 | 41 | X = DAG.getAnyExtOrTrunc(X, DL, VT); |
7879 | 41 | APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); |
7880 | 41 | Mask = Mask.zext(VT.getSizeInBits()); |
7881 | 41 | return DAG.getNode(ISD::AND, DL, VT, |
7882 | 41 | X, DAG.getConstant(Mask, DL, VT)); |
7883 | 41 | } |
7884 | 160k | |
7885 | 160k | // fold (aext (load x)) -> (aext (truncate (extload x))) |
7886 | 160k | // None of the supported targets knows how to perform load and any_ext |
7887 | 160k | // on vectors in one instruction. We only perform this transformation on |
7888 | 160k | // scalars. |
7889 | 160k | if (160k ISD::isNON_EXTLoad(N0.getNode()) && 160k !VT.isVector()14.9k && |
7890 | 14.9k | ISD::isUNINDEXEDLoad(N0.getNode()) && |
7891 | 160k | TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())14.9k ) { |
7892 | 14.5k | bool DoXform = true; |
7893 | 14.5k | SmallVector<SDNode*, 4> SetCCs; |
7894 | 14.5k | if (!N0.hasOneUse()) |
7895 | 3.39k | DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); |
7896 | 14.5k | if (DoXform14.5k ) { |
7897 | 14.4k | LoadSDNode *LN0 = cast<LoadSDNode>(N0); |
7898 | 14.4k | SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, |
7899 | 14.4k | LN0->getChain(), |
7900 | 14.4k | LN0->getBasePtr(), N0.getValueType(), |
7901 | 14.4k | LN0->getMemOperand()); |
7902 | 14.4k | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), |
7903 | 14.4k | N0.getValueType(), ExtLoad); |
7904 | 14.4k | ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), |
7905 | 14.4k | ISD::ANY_EXTEND); |
7906 | 14.4k | // If the load value is used only by N, replace it via CombineTo N. |
7907 | 14.4k | bool NoReplaceTrunc = N0.hasOneUse(); |
7908 | 14.4k | CombineTo(N, ExtLoad); |
7909 | 14.4k | if (NoReplaceTrunc) |
7910 | 11.1k | DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); |
7911 | 14.4k | else |
7912 | 3.25k | CombineTo(LN0, Trunc, ExtLoad.getValue(1)); |
7913 | 14.4k | return SDValue(N, 0); // Return N so it doesn't get rechecked! |
7914 | 14.4k | } |
7915 | 146k | } |
7916 | 146k | |
7917 | 146k | // fold (aext (zextload x)) -> (aext (truncate (zextload x))) |
7918 | 146k | // fold (aext (sextload x)) -> (aext (truncate (sextload x))) |
7919 | 146k | // fold (aext ( extload x)) -> (aext (truncate (extload x))) |
7920 | 146k | if (146k N0.getOpcode() == ISD::LOAD && |
7921 | 146k | !ISD::isNON_EXTLoad(N0.getNode())1.40k && ISD::isUNINDEXEDLoad(N0.getNode())853 && |
7922 | 146k | N0.hasOneUse()853 ) { |
7923 | 321 | LoadSDNode *LN0 = cast<LoadSDNode>(N0); |
7924 | 321 | ISD::LoadExtType ExtType = LN0->getExtensionType(); |
7925 | 321 | EVT MemVT = LN0->getMemoryVT(); |
7926 | 321 | if (!LegalOperations || 321 TLI.isLoadExtLegal(ExtType, VT, MemVT)37 ) { |
7927 | 300 | SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N), |
7928 | 300 | VT, LN0->getChain(), LN0->getBasePtr(), |
7929 | 300 | MemVT, LN0->getMemOperand()); |
7930 | 300 | CombineTo(N, ExtLoad); |
7931 | 300 | CombineTo(N0.getNode(), |
7932 | 300 | DAG.getNode(ISD::TRUNCATE, SDLoc(N0), |
7933 | 300 | N0.getValueType(), ExtLoad), |
7934 | 300 | ExtLoad.getValue(1)); |
7935 | 300 | return SDValue(N, 0); // Return N so it doesn't get rechecked! |
7936 | 300 | } |
7937 | 146k | } |
7938 | 146k | |
7939 | 146k | if (146k N0.getOpcode() == ISD::SETCC146k ) { |
7940 | 22.1k | // For vectors: |
7941 | 22.1k | // aext(setcc) -> vsetcc |
7942 | 22.1k | // aext(setcc) -> truncate(vsetcc) |
7943 | 22.1k | // aext(setcc) -> aext(vsetcc) |
7944 | 22.1k | // Only do this before legalize for now. |
7945 | 22.1k | if (VT.isVector() && 22.1k !LegalOperations314 ) { |
7946 | 314 | EVT N0VT = N0.getOperand(0).getValueType(); |
7947 | 314 | // We know that the # elements of the results is the same as the |
7948 | 314 | // # elements of the compare (and the # elements of the compare result |
7949 | 314 | // for that matter). Check to see that they are the same size. If so, |
7950 | 314 | // we know that the element size of the sext'd result matches the |
7951 | 314 | // element size of the compare operands. |
7952 | 314 | if (VT.getSizeInBits() == N0VT.getSizeInBits()) |
7953 | 159 | return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), |
7954 | 159 | N0.getOperand(1), |
7955 | 159 | cast<CondCodeSDNode>(N0.getOperand(2))->get()); |
7956 | 314 | // If the desired elements are smaller or larger than the source |
7957 | 314 | // elements we can use a matching integer vector type and then |
7958 | 314 | // truncate/any extend |
7959 | 155 | else { |
7960 | 155 | EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); |
7961 | 155 | SDValue VsetCC = |
7962 | 155 | DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), |
7963 | 155 | N0.getOperand(1), |
7964 | 155 | cast<CondCodeSDNode>(N0.getOperand(2))->get()); |
7965 | 155 | return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT); |
7966 | 155 | } |
7967 | 21.7k | } |
7968 | 21.7k | |
7969 | 21.7k | // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc |
7970 | 21.7k | SDLoc DL(N); |
7971 | 21.7k | if (SDValue SCC = SimplifySelectCC( |
7972 | 21.7k | DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT), |
7973 | 21.7k | DAG.getConstant(0, DL, VT), |
7974 | 21.7k | cast<CondCodeSDNode>(N0.getOperand(2))->get(), true)) |
7975 | 34 | return SCC; |
7976 | 145k | } |
7977 | 145k | |
7978 | 145k | return SDValue(); |
7979 | 145k | } |
7980 | | |
7981 | | // TODO: These transforms should work with AssertSext too. |
7982 | | // Change the function name, comments, opcode references, and caller. |
7983 | 510k | SDValue DAGCombiner::visitAssertZext(SDNode *N) { |
7984 | 510k | SDValue N0 = N->getOperand(0); |
7985 | 510k | SDValue N1 = N->getOperand(1); |
7986 | 510k | EVT AssertVT = cast<VTSDNode>(N1)->getVT(); |
7987 | 510k | |
7988 | 510k | // fold (assertzext (assertzext x, vt), vt) -> (assertzext x, vt) |
7989 | 510k | if (N0.getOpcode() == ISD::AssertZext && |
7990 | 927 | AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT()) |
7991 | 842 | return N0; |
7992 | 509k | |
7993 | 509k | if (509k N0.getOpcode() == ISD::TRUNCATE && 509k N0.hasOneUse()736 && |
7994 | 509k | N0.getOperand(0).getOpcode() == ISD::AssertZext736 ) { |
7995 | 702 | // We have an assert, truncate, assert sandwich. Make one stronger assert |
7996 | 702 | // by asserting on the smallest asserted type to the larger source type. |
7997 | 702 | // This eliminates the later assert: |
7998 | 702 | // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN |
7999 | 702 | // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN |
8000 | 702 | SDValue BigA = N0.getOperand(0); |
8001 | 702 | EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT(); |
8002 | 702 | assert(BigA_AssertVT.bitsLE(N0.getValueType()) && |
8003 | 702 | "Asserting zero/sign-extended bits from a type larger than the " |
8004 | 702 | "truncated destination does not provide information"); |
8005 | 702 | |
8006 | 702 | SDLoc DL(N); |
8007 | 702 | EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT702 : BigA_AssertVT0 ; |
8008 | 702 | SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT); |
8009 | 702 | SDValue NewAssert = DAG.getNode(ISD::AssertZext, DL, BigA.getValueType(), |
8010 | 702 | BigA.getOperand(0), MinAssertVTVal); |
8011 | 702 | return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert); |
8012 | 702 | } |
8013 | 508k | |
8014 | 508k | return SDValue(); |
8015 | 508k | } |
8016 | | |
8017 | | /// If the result of a wider load is shifted to right of N bits and then |
8018 | | /// truncated to a narrower type and where N is a multiple of number of bits of |
8019 | | /// the narrower type, transform it to a narrower load from address + N / num of |
8020 | | /// bits of new type. If the result is to be extended, also fold the extension |
8021 | | /// to form a extending load. |
8022 | 1.21M | SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { |
8023 | 1.21M | unsigned Opc = N->getOpcode(); |
8024 | 1.21M | |
8025 | 1.21M | ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; |
8026 | 1.21M | SDValue N0 = N->getOperand(0); |
8027 | 1.21M | EVT VT = N->getValueType(0); |
8028 | 1.21M | EVT ExtVT = VT; |
8029 | 1.21M | |
8030 | 1.21M | // This transformation isn't valid for vector loads. |
8031 | 1.21M | if (VT.isVector()) |
8032 | 49.1k | return SDValue(); |
8033 | 1.16M | |
8034 | 1.16M | // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then |
8035 | 1.16M | // extended to VT. |
8036 | 1.16M | if (1.16M Opc == ISD::SIGN_EXTEND_INREG1.16M ) { |
8037 | 84.4k | ExtType = ISD::SEXTLOAD; |
8038 | 84.4k | ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); |
8039 | 1.16M | } else if (1.07M Opc == ISD::SRL1.07M ) { |
8040 | 273k | // Another special-case: SRL is basically zero-extending a narrower value. |
8041 | 273k | ExtType = ISD::ZEXTLOAD; |
8042 | 273k | N0 = SDValue(N, 0); |
8043 | 273k | ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); |
8044 | 273k | if (!N01273k ) return SDValue()20.9k ; |
8045 | 252k | ExtVT = EVT::getIntegerVT(*DAG.getContext(), |
8046 | 252k | VT.getSizeInBits() - N01->getZExtValue()); |
8047 | 252k | } |
8048 | 1.14M | if (1.14M LegalOperations && 1.14M !TLI.isLoadExtLegal(ExtType, VT, ExtVT)458k ) |
8049 | 98.0k | return SDValue(); |
8050 | 1.04M | |
8051 | 1.04M | unsigned EVTBits = ExtVT.getSizeInBits(); |
8052 | 1.04M | |
8053 | 1.04M | // Do not generate loads of non-round integer types since these can |
8054 | 1.04M | // be expensive (and would be wrong if the type is not byte sized). |
8055 | 1.04M | if (!ExtVT.isRound()) |
8056 | 159k | return SDValue(); |
8057 | 884k | |
8058 | 884k | unsigned ShAmt = 0; |
8059 | 884k | if (N0.getOpcode() == ISD::SRL && 884k N0.hasOneUse()181k ) { |
8060 | 170k | if (ConstantSDNode *N01170k = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { |
8061 | 169k | ShAmt = N01->getZExtValue(); |
8062 | 169k | // Is the shift amount a multiple of size of VT? |
8063 | 169k | if ((ShAmt & (EVTBits-1)) == 0169k ) { |
8064 | 145k | N0 = N0.getOperand(0); |
8065 | 145k | // Is the load width a multiple of size of VT? |
8066 | 145k | if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0) |
8067 | 21 | return SDValue(); |
8068 | 169k | } |
8069 | 169k | |
8070 | 169k | // At this point, we must have a load or else we can't do the transform. |
8071 | 169k | if (169k !isa<LoadSDNode>(N0)169k ) return SDValue()86.4k ; |
8072 | 83.4k | |
8073 | 83.4k | // Because a SRL must be assumed to *need* to zero-extend the high bits |
8074 | 83.4k | // (as opposed to anyext the high bits), we can't combine the zextload |
8075 | 83.4k | // lowering of SRL and an sextload. |
8076 | 83.4k | if (83.4k cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD83.4k ) |
8077 | 35 | return SDValue(); |
8078 | 83.4k | |
8079 | 83.4k | // If the shift amount is larger than the input type then we're not |
8080 | 83.4k | // accessing any of the loaded bytes. If the load was a zextload/extload |
8081 | 83.4k | // then the result of the shift+trunc is zero/undef (handled elsewhere). |
8082 | 83.4k | if (83.4k ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits()83.4k ) |
8083 | 2 | return SDValue(); |
8084 | 798k | } |
8085 | 170k | } |
8086 | 798k | |
8087 | 798k | // If the load is shifted left (and the result isn't shifted back right), |
8088 | 798k | // we can fold the truncate through the shift. |
8089 | 798k | unsigned ShLeftAmt = 0; |
8090 | 798k | if (ShAmt == 0 && 798k N0.getOpcode() == ISD::SHL714k && N0.hasOneUse()1.28k && |
8091 | 798k | ExtVT == VT582 && TLI.isNarrowingProfitable(N0.getValueType(), VT)355 ) { |
8092 | 38 | if (ConstantSDNode *N0138 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { |
8093 | 9 | ShLeftAmt = N01->getZExtValue(); |
8094 | 9 | N0 = N0.getOperand(0); |
8095 | 9 | } |
8096 | 38 | } |
8097 | 798k | |
8098 | 798k | // If we haven't found a load, we can't narrow it. Don't transform one with |
8099 | 798k | // multiple uses, this would require adding a new load. |
8100 | 798k | if (!isa<LoadSDNode>(N0) || 798k !N0.hasOneUse()249k ) |
8101 | 791k | return SDValue(); |
8102 | 7.27k | |
8103 | 7.27k | // Don't change the width of a volatile load. |
8104 | 7.27k | LoadSDNode *LN0 = cast<LoadSDNode>(N0); |
8105 | 7.27k | if (LN0->isVolatile()) |
8106 | 140 | return SDValue(); |
8107 | 7.13k | |
8108 | 7.13k | // Verify that we are actually reducing a load width here. |
8109 | 7.13k | if (7.13k LN0->getMemoryVT().getSizeInBits() < EVTBits7.13k ) |
8110 | 223 | return SDValue(); |
8111 | 6.90k | |
8112 | 6.90k | // For the transform to be legal, the load must produce only two values |
8113 | 6.90k | // (the value loaded and the chain). Don't transform a pre-increment |
8114 | 6.90k | // load, for example, which produces an extra value. Otherwise the |
8115 | 6.90k | // transformation is not equivalent, and the downstream logic to replace |
8116 | 6.90k | // uses gets things wrong. |
8117 | 6.90k | if (6.90k LN0->getNumValues() > 26.90k ) |
8118 | 2 | return SDValue(); |
8119 | 6.90k | |
8120 | 6.90k | // If the load that we're shrinking is an extload and we're not just |
8121 | 6.90k | // discarding the extension we can't simply shrink the load. Bail. |
8122 | 6.90k | // TODO: It would be possible to merge the extensions in some cases. |
8123 | 6.90k | if (6.90k LN0->getExtensionType() != ISD::NON_EXTLOAD && |
8124 | 1.62k | LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) |
8125 | 2 | return SDValue(); |
8126 | 6.90k | |
8127 | 6.90k | if (6.90k !TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT)6.90k ) |
8128 | 877 | return SDValue(); |
8129 | 6.02k | |
8130 | 6.02k | EVT PtrType = N0.getOperand(1).getValueType(); |
8131 | 6.02k | |
8132 | 6.02k | if (PtrType == MVT::Untyped || 6.02k PtrType.isExtended()6.02k ) |
8133 | 6.02k | // It's not possible to generate a constant of extended or untyped type. |
8134 | 0 | return SDValue(); |
8135 | 6.02k | |
8136 | 6.02k | // For big endian targets, we need to adjust the offset to the pointer to |
8137 | 6.02k | // load the correct bytes. |
8138 | 6.02k | if (6.02k DAG.getDataLayout().isBigEndian()6.02k ) { |
8139 | 99 | unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); |
8140 | 99 | unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); |
8141 | 99 | ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; |
8142 | 99 | } |
8143 | 6.02k | |
8144 | 6.02k | uint64_t PtrOff = ShAmt / 8; |
8145 | 6.02k | unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); |
8146 | 6.02k | SDLoc DL(LN0); |
8147 | 6.02k | // The original load itself didn't wrap, so an offset within it doesn't. |
8148 | 6.02k | SDNodeFlags Flags; |
8149 | 6.02k | Flags.setNoUnsignedWrap(true); |
8150 | 6.02k | SDValue NewPtr = DAG.getNode(ISD::ADD, DL, |
8151 | 6.02k | PtrType, LN0->getBasePtr(), |
8152 | 6.02k | DAG.getConstant(PtrOff, DL, PtrType), |
8153 | 6.02k | Flags); |
8154 | 6.02k | AddToWorklist(NewPtr.getNode()); |
8155 | 6.02k | |
8156 | 6.02k | SDValue Load; |
8157 | 6.02k | if (ExtType == ISD::NON_EXTLOAD) |
8158 | 2.90k | Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, |
8159 | 2.90k | LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign, |
8160 | 2.90k | LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); |
8161 | 6.02k | else |
8162 | 3.12k | Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr, |
8163 | 3.12k | LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, |
8164 | 3.12k | NewAlign, LN0->getMemOperand()->getFlags(), |
8165 | 3.12k | LN0->getAAInfo()); |
8166 | 6.02k | |
8167 | 6.02k | // Replace the old load's chain with the new load's chain. |
8168 | 6.02k | WorklistRemover DeadNodes(*this); |
8169 | 6.02k | DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); |
8170 | 6.02k | |
8171 | 6.02k | // Shift the result left, if we've swallowed a left shift. |
8172 | 6.02k | SDValue Result = Load; |
8173 | 6.02k | if (ShLeftAmt != 06.02k ) { |
8174 | 9 | EVT ShImmTy = getShiftAmountTy(Result.getValueType()); |
8175 | 9 | if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) |
8176 | 0 | ShImmTy = VT; |
8177 | 9 | // If the shift amount is as large as the result size (but, presumably, |
8178 | 9 | // no larger than the source) then the useful bits of the result are |
8179 | 9 | // zero; we can't simply return the shortened shift, because the result |
8180 | 9 | // of that operation is undefined. |
8181 | 9 | SDLoc DL(N0); |
8182 | 9 | if (ShLeftAmt >= VT.getSizeInBits()) |
8183 | 7 | Result = DAG.getConstant(0, DL, VT); |
8184 | 9 | else |
8185 | 2 | Result = DAG.getNode(ISD::SHL, DL, VT, |
8186 | 2 | Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy)); |
8187 | 9 | } |
8188 | 1.21M | |
8189 | 1.21M | // Return the new loaded value. |
8190 | 1.21M | return Result; |
8191 | 1.21M | } |
8192 | | |
8193 | 99.7k | SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { |
8194 | 99.7k | SDValue N0 = N->getOperand(0); |
8195 | 99.7k | SDValue N1 = N->getOperand(1); |
8196 | 99.7k | EVT VT = N->getValueType(0); |
8197 | 99.7k | EVT EVT = cast<VTSDNode>(N1)->getVT(); |
8198 | 99.7k | unsigned VTBits = VT.getScalarSizeInBits(); |
8199 | 99.7k | unsigned EVTBits = EVT.getScalarSizeInBits(); |
8200 | 99.7k | |
8201 | 99.7k | if (N0.isUndef()) |
8202 | 1 | return DAG.getUNDEF(VT); |
8203 | 99.7k | |
8204 | 99.7k | // fold (sext_in_reg c1) -> c1 |
8205 | 99.7k | if (99.7k DAG.isConstantIntBuildVectorOrConstantInt(N0)99.7k ) |
8206 | 36 | return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); |
8207 | 99.7k | |
8208 | 99.7k | // If the input is already sign extended, just drop the extension. |
8209 | 99.7k | if (99.7k DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+199.7k ) |
8210 | 2.69k | return N0; |
8211 | 97.0k | |
8212 | 97.0k | // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 |
8213 | 97.0k | if (97.0k N0.getOpcode() == ISD::SIGN_EXTEND_INREG && |
8214 | 2 | EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) |
8215 | 2 | return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, |
8216 | 2 | N0.getOperand(0), N1); |
8217 | 97.0k | |
8218 | 97.0k | // fold (sext_in_reg (sext x)) -> (sext x) |
8219 | 97.0k | // fold (sext_in_reg (aext x)) -> (sext x) |
8220 | 97.0k | // if x is small enough. |
8221 | 97.0k | if (97.0k N0.getOpcode() == ISD::SIGN_EXTEND || 97.0k N0.getOpcode() == ISD::ANY_EXTEND97.0k ) { |
8222 | 9.04k | SDValue N00 = N0.getOperand(0); |
8223 | 9.04k | if (N00.getScalarValueSizeInBits() <= EVTBits && |
8224 | 440 | (!LegalOperations || 440 TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)6 )) |
8225 | 438 | return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); |
8226 | 96.6k | } |
8227 | 96.6k | |
8228 | 96.6k | // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x) |
8229 | 96.6k | if (96.6k (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG || |
8230 | 96.5k | N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG || |
8231 | 96.5k | N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) && |
8232 | 96.6k | N0.getOperand(0).getScalarValueSizeInBits() == EVTBits53 ) { |
8233 | 38 | if (!LegalOperations || |
8234 | 0 | TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)) |
8235 | 38 | return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT); |
8236 | 96.5k | } |
8237 | 96.5k | |
8238 | 96.5k | // fold (sext_in_reg (zext x)) -> (sext x) |
8239 | 96.5k | // iff we are extending the source sign bit. |
8240 | 96.5k | if (96.5k N0.getOpcode() == ISD::ZERO_EXTEND96.5k ) { |
8241 | 186 | SDValue N00 = N0.getOperand(0); |
8242 | 186 | if (N00.getScalarValueSizeInBits() == EVTBits && |
8243 | 9 | (!LegalOperations || 9 TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)0 )) |
8244 | 9 | return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); |
8245 | 96.5k | } |
8246 | 96.5k | |
8247 | 96.5k | // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. |
8248 | 96.5k | if (96.5k DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1))96.5k ) |
8249 | 75 | return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType()); |
8250 | 96.4k | |
8251 | 96.4k | // fold operands of sext_in_reg based on knowledge that the top bits are not |
8252 | 96.4k | // demanded. |
8253 | 96.4k | if (96.4k SimplifyDemandedBits(SDValue(N, 0))96.4k ) |
8254 | 2.58k | return SDValue(N, 0); |
8255 | 93.9k | |
8256 | 93.9k | // fold (sext_in_reg (load x)) -> (smaller sextload x) |
8257 | 93.9k | // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) |
8258 | 93.9k | if (SDValue 93.9k NarrowLoad93.9k = ReduceLoadWidth(N)) |
8259 | 2.98k | return NarrowLoad; |
8260 | 90.9k | |
8261 | 90.9k | // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) |
8262 | 90.9k | // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. |
8263 | 90.9k | // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. |
8264 | 90.9k | if (90.9k N0.getOpcode() == ISD::SRL90.9k ) { |
8265 | 11.0k | if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) |
8266 | 9.74k | if (9.74k ShAmt->getZExtValue()+EVTBits <= VTBits9.74k ) { |
8267 | 9.74k | // We can turn this into an SRA iff the input to the SRL is already sign |
8268 | 9.74k | // extended enough. |
8269 | 9.74k | unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); |
8270 | 9.74k | if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) |
8271 | 1.89k | return DAG.getNode(ISD::SRA, SDLoc(N), VT, |
8272 | 1.89k | N0.getOperand(0), N0.getOperand(1)); |
8273 | 89.0k | } |
8274 | 11.0k | } |
8275 | 89.0k | |
8276 | 89.0k | // fold (sext_inreg (extload x)) -> (sextload x) |
8277 | 89.0k | if (89.0k ISD::isEXTLoad(N0.getNode()) && |
8278 | 3.04k | ISD::isUNINDEXEDLoad(N0.getNode()) && |
8279 | 3.04k | EVT == cast<LoadSDNode>(N0)->getMemoryVT() && |
8280 | 2.86k | ((!LegalOperations && 2.86k !cast<LoadSDNode>(N0)->isVolatile()1.98k ) || |
8281 | 89.0k | TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT)883 )) { |
8282 | 2.01k | LoadSDNode *LN0 = cast<LoadSDNode>(N0); |
8283 | 2.01k | SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, |
8284 | 2.01k | LN0->getChain(), |
8285 | 2.01k | LN0->getBasePtr(), EVT, |
8286 | 2.01k | LN0->getMemOperand()); |
8287 | 2.01k | CombineTo(N, ExtLoad); |
8288 | 2.01k | CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); |
8289 | 2.01k | AddToWorklist(ExtLoad.getNode()); |
8290 | 2.01k | return SDValue(N, 0); // Return N so it doesn't get rechecked! |
8291 | 2.01k | } |
8292 | 87.0k | // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use |
8293 | 87.0k | if (87.0k ISD::isZEXTLoad(N0.getNode()) && 87.0k ISD::isUNINDEXEDLoad(N0.getNode())589 && |
8294 | 589 | N0.hasOneUse() && |
8295 | 0 | EVT == cast<LoadSDNode>(N0)->getMemoryVT() && |
8296 | 0 | ((!LegalOperations && 0 !cast<LoadSDNode>(N0)->isVolatile()0 ) || |
8297 | 87.0k | TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT)0 )) { |
8298 | 0 | LoadSDNode *LN0 = cast<LoadSDNode>(N0); |
8299 | 0 | SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, |
8300 | 0 | LN0->getChain(), |
8301 | 0 | LN0->getBasePtr(), EVT, |
8302 | 0 | LN0->getMemOperand()); |
8303 | 0 | CombineTo(N, ExtLoad); |
8304 | 0 | CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); |
8305 | 0 | return SDValue(N, 0); // Return N so it doesn't get rechecked! |
8306 | 0 | } |
8307 | 87.0k | |
8308 | 87.0k | // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) |
8309 | 87.0k | if (87.0k EVTBits <= 16 && 87.0k N0.getOpcode() == ISD::OR70.7k ) { |
8310 | 5.84k | if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), |
8311 | 5.84k | N0.getOperand(1), false)) |
8312 | 8 | return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, |
8313 | 8 | BSwap, N1); |
8314 | 87.0k | } |
8315 | 87.0k | |
8316 | 87.0k | return SDValue(); |
8317 | 87.0k | } |
8318 | | |
8319 | 3.09k | SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) { |
8320 | 3.09k | SDValue N0 = N->getOperand(0); |
8321 | 3.09k | EVT VT = N->getValueType(0); |
8322 | 3.09k | |
8323 | 3.09k | if (N0.isUndef()) |
8324 | 0 | return DAG.getUNDEF(VT); |
8325 | 3.09k | |
8326 | 3.09k | if (SDNode *3.09k Res3.09k = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, |
8327 | 3.09k | LegalOperations)) |
8328 | 28 | return SDValue(Res, 0); |
8329 | 3.06k | |
8330 | 3.06k | return SDValue(); |
8331 | 3.06k | } |
8332 | | |
8333 | 3.91k | SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) { |
8334 | 3.91k | SDValue N0 = N->getOperand(0); |
8335 | 3.91k | EVT VT = N->getValueType(0); |
8336 | 3.91k | |
8337 | 3.91k | if (N0.isUndef()) |
8338 | 0 | return DAG.getUNDEF(VT); |
8339 | 3.91k | |
8340 | 3.91k | if (SDNode *3.91k Res3.91k = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, |
8341 | 3.91k | LegalOperations)) |
8342 | 31 | return SDValue(Res, 0); |
8343 | 3.88k | |
8344 | 3.88k | return SDValue(); |
8345 | 3.88k | } |
8346 | | |
8347 | 849k | SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { |
8348 | 849k | SDValue N0 = N->getOperand(0); |
8349 | 849k | EVT VT = N->getValueType(0); |
8350 | 849k | bool isLE = DAG.getDataLayout().isLittleEndian(); |
8351 | 849k | |
8352 | 849k | // noop truncate |
8353 | 849k | if (N0.getValueType() == N->getValueType(0)) |
8354 | 0 | return N0; |
8355 | 849k | // fold (truncate c1) -> c1 |
8356 | 849k | if (849k DAG.isConstantIntBuildVectorOrConstantInt(N0)849k ) |
8357 | 2.76k | return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); |
8358 | 846k | // fold (truncate (truncate x)) -> (truncate x) |
8359 | 846k | if (846k N0.getOpcode() == ISD::TRUNCATE846k ) |
8360 | 1.73k | return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); |
8361 | 845k | // fold (truncate (ext x)) -> (ext x) or (truncate x) or x |
8362 | 845k | if (845k N0.getOpcode() == ISD::ZERO_EXTEND || |
8363 | 844k | N0.getOpcode() == ISD::SIGN_EXTEND || |
8364 | 845k | N0.getOpcode() == ISD::ANY_EXTEND843k ) { |
8365 | 11.5k | // if the source is smaller than the dest, we still need an extend. |
8366 | 11.5k | if (N0.getOperand(0).getValueType().bitsLT(VT)) |
8367 | 91 | return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); |
8368 | 11.4k | // if the source is larger than the dest, than we just need the truncate. |
8369 | 11.4k | if (11.4k N0.getOperand(0).getValueType().bitsGT(VT)11.4k ) |
8370 | 707 | return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); |
8371 | 10.7k | // if the source and dest are the same type, we can drop both the extend |
8372 | 10.7k | // and the truncate. |
8373 | 10.7k | return N0.getOperand(0); |
8374 | 10.7k | } |
8375 | 833k | |
8376 | 833k | // If this is anyext(trunc), don't fold it, allow ourselves to be folded. |
8377 | 833k | if (833k N->hasOneUse() && 833k (N->use_begin()->getOpcode() == ISD::ANY_EXTEND)690k ) |
8378 | 1.42k | return SDValue(); |
8379 | 832k | |
8380 | 832k | // Fold extract-and-trunc into a narrow extract. For example: |
8381 | 832k | // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) |
8382 | 832k | // i32 y = TRUNCATE(i64 x) |
8383 | 832k | // -- becomes -- |
8384 | 832k | // v16i8 b = BITCAST (v2i64 val) |
8385 | 832k | // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8) |
8386 | 832k | // |
8387 | 832k | // Note: We only run this optimization after type legalization (which often |
8388 | 832k | // creates this pattern) and before operation legalization after which |
8389 | 832k | // we need to be more careful about the vector instructions that we generate. |
8390 | 832k | if (832k N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
8391 | 832k | LegalTypes14.0k && !LegalOperations13.8k && N0->hasOneUse()11.1k && VT != MVT::i18.17k ) { |
8392 | 8.17k | EVT VecTy = N0.getOperand(0).getValueType(); |
8393 | 8.17k | EVT ExTy = N0.getValueType(); |
8394 | 8.17k | EVT TrTy = N->getValueType(0); |
8395 | 8.17k | |
8396 | 8.17k | unsigned NumElem = VecTy.getVectorNumElements(); |
8397 | 8.17k | unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); |
8398 | 8.17k | |
8399 | 8.17k | EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem); |
8400 | 8.17k | assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); |
8401 | 8.17k | |
8402 | 8.17k | SDValue EltNo = N0->getOperand(1); |
8403 | 8.17k | if (isa<ConstantSDNode>(EltNo) && 8.17k isTypeLegal(NVT)8.16k ) { |
8404 | 7.95k | int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); |
8405 | 7.95k | EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); |
8406 | 7.95k | int Index = isLE ? (Elt*SizeRatio)7.37k : (Elt*SizeRatio + (SizeRatio-1))578 ; |
8407 | 7.95k | |
8408 | 7.95k | SDLoc DL(N); |
8409 | 7.95k | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy, |
8410 | 7.95k | DAG.getBitcast(NVT, N0.getOperand(0)), |
8411 | 7.95k | DAG.getConstant(Index, DL, IndexTy)); |
8412 | 7.95k | } |
8413 | 824k | } |
8414 | 824k | |
8415 | 824k | // trunc (select c, a, b) -> select c, (trunc a), (trunc b) |
8416 | 824k | if (824k N0.getOpcode() == ISD::SELECT && 824k N0.hasOneUse()1.25k ) { |
8417 | 883 | EVT SrcVT = N0.getValueType(); |
8418 | 883 | if ((!LegalOperations || 883 TLI.isOperationLegal(ISD::SELECT, SrcVT)5 ) && |
8419 | 883 | TLI.isTruncateFree(SrcVT, VT)883 ) { |
8420 | 827 | SDLoc SL(N0); |
8421 | 827 | SDValue Cond = N0.getOperand(0); |
8422 | 827 | SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); |
8423 | 827 | SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2)); |
8424 | 827 | return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1); |
8425 | 827 | } |
8426 | 823k | } |
8427 | 823k | |
8428 | 823k | // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits() |
8429 | 823k | if (823k N0.getOpcode() == ISD::SHL && 823k N0.hasOneUse()1.78k && |
8430 | 1.11k | (!LegalOperations || 1.11k TLI.isOperationLegalOrCustom(ISD::SHL, VT)483 ) && |
8431 | 823k | TLI.isTypeDesirableForOp(ISD::SHL, VT)1.11k ) { |
8432 | 1.03k | SDValue Amt = N0.getOperand(1); |
8433 | 1.03k | KnownBits Known; |
8434 | 1.03k | DAG.computeKnownBits(Amt, Known); |
8435 | 1.03k | unsigned Size = VT.getScalarSizeInBits(); |
8436 | 1.03k | if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)1.03k ) { |
8437 | 666 | SDLoc SL(N); |
8438 | 666 | EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); |
8439 | 666 | |
8440 | 666 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); |
8441 | 666 | if (AmtVT != Amt.getValueType()666 ) { |
8442 | 8 | Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT); |
8443 | 8 | AddToWorklist(Amt.getNode()); |
8444 | 8 | } |
8445 | 666 | return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt); |
8446 | 666 | } |
8447 | 822k | } |
8448 | 822k | |
8449 | 822k | // Fold a series of buildvector, bitcast, and truncate if possible. |
8450 | 822k | // For example fold |
8451 | 822k | // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to |
8452 | 822k | // (2xi32 (buildvector x, y)). |
8453 | 822k | if (822k Level == AfterLegalizeVectorOps && 822k VT.isVector()18.7k && |
8454 | 822k | N0.getOpcode() == ISD::BITCAST7.41k && N0.hasOneUse()555 && |
8455 | 555 | N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && |
8456 | 822k | N0.getOperand(0).hasOneUse()521 ) { |
8457 | 521 | SDValue BuildVect = N0.getOperand(0); |
8458 | 521 | EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType(); |
8459 | 521 | EVT TruncVecEltTy = VT.getVectorElementType(); |
8460 | 521 | |
8461 | 521 | // Check that the element types match. |
8462 | 521 | if (BuildVectEltTy == TruncVecEltTy521 ) { |
8463 | 1 | // Now we only need to compute the offset of the truncated elements. |
8464 | 1 | unsigned BuildVecNumElts = BuildVect.getNumOperands(); |
8465 | 1 | unsigned TruncVecNumElts = VT.getVectorNumElements(); |
8466 | 1 | unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts; |
8467 | 1 | |
8468 | 1 | assert((BuildVecNumElts % TruncVecNumElts) == 0 && |
8469 | 1 | "Invalid number of elements"); |
8470 | 1 | |
8471 | 1 | SmallVector<SDValue, 8> Opnds; |
8472 | 3 | for (unsigned i = 0, e = BuildVecNumElts; i != e3 ; i += TruncEltOffset2 ) |
8473 | 2 | Opnds.push_back(BuildVect.getOperand(i)); |
8474 | 1 | |
8475 | 1 | return DAG.getBuildVector(VT, SDLoc(N), Opnds); |
8476 | 1 | } |
8477 | 822k | } |
8478 | 822k | |
8479 | 822k | // See if we can simplify the input to this truncate through knowledge that |
8480 | 822k | // only the low bits are being used. |
8481 | 822k | // For example "trunc (or (shl x, 8), y)" // -> trunc y |
8482 | 822k | // Currently we only perform this optimization on scalars because vectors |
8483 | 822k | // may have different active low bits. |
8484 | 822k | if (822k !VT.isVector()822k ) { |
8485 | 789k | APInt Mask = |
8486 | 789k | APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits()); |
8487 | 789k | if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask)) |
8488 | 1.58k | return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); |
8489 | 821k | } |
8490 | 821k | |
8491 | 821k | // fold (truncate (load x)) -> (smaller load x) |
8492 | 821k | // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) |
8493 | 821k | if (821k !LegalTypes || 821k TLI.isTypeDesirableForOp(N0.getOpcode(), VT)376k ) { |
8494 | 813k | if (SDValue Reduced = ReduceLoadWidth(N)) |
8495 | 2.81k | return Reduced; |
8496 | 811k | |
8497 | 811k | // Handle the case where the load remains an extending load even |
8498 | 811k | // after truncation. |
8499 | 811k | if (811k N0.hasOneUse() && 811k ISD::isUNINDEXEDLoad(N0.getNode())509k ) { |
8500 | 2.23k | LoadSDNode *LN0 = cast<LoadSDNode>(N0); |
8501 | 2.23k | if (!LN0->isVolatile() && |
8502 | 2.23k | LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()2.12k ) { |
8503 | 221 | SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), |
8504 | 221 | VT, LN0->getChain(), LN0->getBasePtr(), |
8505 | 221 | LN0->getMemoryVT(), |
8506 | 221 | LN0->getMemOperand()); |
8507 | 221 | DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1)); |
8508 | 221 | return NewLoad; |
8509 | 221 | } |
8510 | 818k | } |
8511 | 813k | } |
8512 | 818k | |
8513 | 818k | // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), |
8514 | 818k | // where ... are all 'undef'. |
8515 | 818k | if (818k N0.getOpcode() == ISD::CONCAT_VECTORS && 818k !LegalTypes830 ) { |
8516 | 235 | SmallVector<EVT, 8> VTs; |
8517 | 235 | SDValue V; |
8518 | 235 | unsigned Idx = 0; |
8519 | 235 | unsigned NumDefs = 0; |
8520 | 235 | |
8521 | 493 | for (unsigned i = 0, e = N0.getNumOperands(); i != e493 ; ++i258 ) { |
8522 | 486 | SDValue X = N0.getOperand(i); |
8523 | 486 | if (!X.isUndef()486 ) { |
8524 | 463 | V = X; |
8525 | 463 | Idx = i; |
8526 | 463 | NumDefs++; |
8527 | 463 | } |
8528 | 486 | // Stop if more than one members are non-undef. |
8529 | 486 | if (NumDefs > 1) |
8530 | 228 | break; |
8531 | 258 | VTs.push_back(EVT::getVectorVT(*DAG.getContext(), |
8532 | 258 | VT.getVectorElementType(), |
8533 | 258 | X.getValueType().getVectorNumElements())); |
8534 | 258 | } |
8535 | 235 | |
8536 | 235 | if (NumDefs == 0) |
8537 | 0 | return DAG.getUNDEF(VT); |
8538 | 235 | |
8539 | 235 | if (235 NumDefs == 1235 ) { |
8540 | 7 | assert(V.getNode() && "The single defined operand is empty!"); |
8541 | 7 | SmallVector<SDValue, 8> Opnds; |
8542 | 37 | for (unsigned i = 0, e = VTs.size(); i != e37 ; ++i30 ) { |
8543 | 30 | if (i != Idx30 ) { |
8544 | 23 | Opnds.push_back(DAG.getUNDEF(VTs[i])); |
8545 | 23 | continue; |
8546 | 23 | } |
8547 | 7 | SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V); |
8548 | 7 | AddToWorklist(NV.getNode()); |
8549 | 7 | Opnds.push_back(NV); |
8550 | 7 | } |
8551 | 7 | return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds); |
8552 | 7 | } |
8553 | 818k | } |
8554 | 818k | |
8555 | 818k | // Fold truncate of a bitcast of a vector to an extract of the low vector |
8556 | 818k | // element. |
8557 | 818k | // |
8558 | 818k | // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx |
8559 | 818k | if (818k N0.getOpcode() == ISD::BITCAST && 818k !VT.isVector()12.8k ) { |
8560 | 11.9k | SDValue VecSrc = N0.getOperand(0); |
8561 | 11.9k | EVT SrcVT = VecSrc.getValueType(); |
8562 | 11.9k | if (SrcVT.isVector() && 11.9k SrcVT.getScalarType() == VT10.4k && |
8563 | 9.72k | (!LegalOperations || |
8564 | 11.9k | TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT)8.82k )) { |
8565 | 9.69k | SDLoc SL(N); |
8566 | 9.69k | |
8567 | 9.69k | EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); |
8568 | 9.69k | unsigned Idx = isLE ? 09.68k : SrcVT.getVectorNumElements() - 13 ; |
8569 | 9.69k | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, |
8570 | 9.69k | VecSrc, DAG.getConstant(Idx, SL, IdxVT)); |
8571 | 9.69k | } |
8572 | 808k | } |
8573 | 808k | |
8574 | 808k | // Simplify the operands using demanded-bits information. |
8575 | 808k | if (808k !VT.isVector() && |
8576 | 775k | SimplifyDemandedBits(SDValue(N, 0))) |
8577 | 20.9k | return SDValue(N, 0); |
8578 | 787k | |
8579 | 787k | // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry) |
8580 | 787k | // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry) |
8581 | 787k | // When the adde's carry is not used. |
8582 | 787k | if (787k (N0.getOpcode() == ISD::ADDE || 787k N0.getOpcode() == ISD::ADDCARRY787k ) && |
8583 | 787k | N0.hasOneUse()15 && !N0.getNode()->hasAnyUseOfValue(1)9 && |
8584 | 787k | (!LegalOperations || 9 TLI.isOperationLegal(N0.getOpcode(), VT)0 )) { |
8585 | 9 | SDLoc SL(N); |
8586 | 9 | auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0)); |
8587 | 9 | auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); |
8588 | 9 | auto VTs = DAG.getVTList(VT, N0->getValueType(1)); |
8589 | 9 | return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2)); |
8590 | 9 | } |
8591 | 787k | |
8592 | 787k | if (SDValue 787k NewVSel787k = matchVSelectOpSizesWithSetCC(N)) |
8593 | 4 | return NewVSel; |
8594 | 787k | |
8595 | 787k | return SDValue(); |
8596 | 787k | } |
8597 | | |
8598 | 57.9k | static SDNode *getBuildPairElt(SDNode *N, unsigned i) { |
8599 | 57.9k | SDValue Elt = N->getOperand(i); |
8600 | 57.9k | if (Elt.getOpcode() != ISD::MERGE_VALUES) |
8601 | 57.8k | return Elt.getNode(); |
8602 | 98 | return Elt.getOperand(Elt.getResNo()).getNode(); |
8603 | 98 | } |
8604 | | |
8605 | | /// build_pair (load, load) -> load |
8606 | | /// if load locations are consecutive. |
8607 | 28.9k | SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { |
8608 | 28.9k | assert(N->getOpcode() == ISD::BUILD_PAIR); |
8609 | 28.9k | |
8610 | 28.9k | LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); |
8611 | 28.9k | LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); |
8612 | 28.9k | if (!LD1 || 28.9k !LD23.03k || !ISD::isNON_EXTLoad(LD1)2.54k || !LD1->hasOneUse()2.54k || |
8613 | 2.09k | LD1->getAddressSpace() != LD2->getAddressSpace()) |
8614 | 26.9k | return SDValue(); |
8615 | 2.09k | EVT LD1VT = LD1->getValueType(0); |
8616 | 2.09k | unsigned LD1Bytes = LD1VT.getSizeInBits() / 8; |
8617 | 2.09k | if (ISD::isNON_EXTLoad(LD2) && 2.09k LD2->hasOneUse()2.09k && |
8618 | 2.09k | DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)2.09k ) { |
8619 | 1.95k | unsigned Align = LD1->getAlignment(); |
8620 | 1.95k | unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( |
8621 | 1.95k | VT.getTypeForEVT(*DAG.getContext())); |
8622 | 1.95k | |
8623 | 1.95k | if (NewAlign <= Align && |
8624 | 1.80k | (!LegalOperations || 1.80k TLI.isOperationLegal(ISD::LOAD, VT)0 )) |
8625 | 1.80k | return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), |
8626 | 1.80k | LD1->getPointerInfo(), Align); |
8627 | 293 | } |
8628 | 293 | |
8629 | 293 | return SDValue(); |
8630 | 293 | } |
8631 | | |
8632 | 10 | static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) { |
8633 | 10 | // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi |
8634 | 10 | // and Lo parts; on big-endian machines it doesn't. |
8635 | 10 | return DAG.getDataLayout().isBigEndian() ? 16 : 04 ; |
8636 | 10 | } |
8637 | | |
8638 | | static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG, |
8639 | 365k | const TargetLowering &TLI) { |
8640 | 365k | // If this is not a bitcast to an FP type or if the target doesn't have |
8641 | 365k | // IEEE754-compliant FP logic, we're done. |
8642 | 365k | EVT VT = N->getValueType(0); |
8643 | 365k | if (!VT.isFloatingPoint() || 365k !TLI.hasBitPreservingFPLogic(VT)86.9k ) |
8644 | 335k | return SDValue(); |
8645 | 30.2k | |
8646 | 30.2k | // TODO: Use splat values for the constant-checking below and remove this |
8647 | 30.2k | // restriction. |
8648 | 30.2k | SDValue N0 = N->getOperand(0); |
8649 | 30.2k | EVT SourceVT = N0.getValueType(); |
8650 | 30.2k | if (SourceVT.isVector()) |
8651 | 13.6k | return SDValue(); |
8652 | 16.5k | |
8653 | 16.5k | unsigned FPOpcode; |
8654 | 16.5k | APInt SignMask; |
8655 | 16.5k | switch (N0.getOpcode()) { |
8656 | 91 | case ISD::AND: |
8657 | 91 | FPOpcode = ISD::FABS; |
8658 | 91 | SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits()); |
8659 | 91 | break; |
8660 | 222 | case ISD::XOR: |
8661 | 222 | FPOpcode = ISD::FNEG; |
8662 | 222 | SignMask = APInt::getSignMask(SourceVT.getSizeInBits()); |
8663 | 222 | break; |
8664 | 16.5k | // TODO: ISD::OR --> ISD::FNABS? |
8665 | 16.2k | default: |
8666 | 16.2k | return SDValue(); |
8667 | 313 | } |
8668 | 313 | |
8669 | 313 | // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X |
8670 | 313 | // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X |
8671 | 313 | SDValue LogicOp0 = N0.getOperand(0); |
8672 | 313 | ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); |
8673 | 313 | if (LogicOp1 && 313 LogicOp1->getAPIntValue() == SignMask63 && |
8674 | 52 | LogicOp0.getOpcode() == ISD::BITCAST && |
8675 | 50 | LogicOp0->getOperand(0).getValueType() == VT) |
8676 | 50 | return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0)); |
8677 | 263 | |
8678 | 263 | return SDValue(); |
8679 | 263 | } |
8680 | | |
8681 | 394k | SDValue DAGCombiner::visitBITCAST(SDNode *N) { |
8682 | 394k | SDValue N0 = N->getOperand(0); |
8683 | 394k | EVT VT = N->getValueType(0); |
8684 | 394k | |
8685 | 394k | if (N0.isUndef()) |
8686 | 109 | return DAG.getUNDEF(VT); |
8687 | 393k | |
8688 | 393k | // If the input is a BUILD_VECTOR with all constant elements, fold this now. |
8689 | 393k | // Only do this before legalize, since afterward the target may be depending |
8690 | 393k | // on the bitconvert. |
8691 | 393k | // First check to see if this is all constant. |
8692 | 393k | if (393k !LegalTypes && |
8693 | 393k | N0.getOpcode() == ISD::BUILD_VECTOR55.9k && N0.getNode()->hasOneUse()2.54k && |
8694 | 393k | VT.isVector()1.92k ) { |
8695 | 1.23k | bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant(); |
8696 | 1.23k | |
8697 | 1.23k | EVT DestEltVT = N->getValueType(0).getVectorElementType(); |
8698 | 1.23k | assert(!DestEltVT.isVector() && |
8699 | 1.23k | "Element type of vector ValueType must not be vector!"); |
8700 | 1.23k | if (isSimple) |
8701 | 377 | return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT); |
8702 | 393k | } |
8703 | 393k | |
8704 | 393k | // If the input is a constant, let getNode fold it. |
8705 | 393k | if (393k isa<ConstantSDNode>(N0) || 393k isa<ConstantFPSDNode>(N0)391k ) { |
8706 | 2.49k | // If we can't allow illegal operations, we need to check that this is just |
8707 | 2.49k | // a fp -> int or int -> conversion and that the resulting operation will |
8708 | 2.49k | // be legal. |
8709 | 2.49k | if (!LegalOperations || |
8710 | 2.30k | (isa<ConstantSDNode>(N0) && 2.30k VT.isFloatingPoint()2.26k && !VT.isVector()13 && |
8711 | 2.30k | TLI.isOperationLegal(ISD::ConstantFP, VT)) || |
8712 | 2.30k | (isa<ConstantFPSDNode>(N0) && 2.30k VT.isInteger()44 && !VT.isVector()44 && |
8713 | 26 | TLI.isOperationLegal(ISD::Constant, VT))) |
8714 | 220 | return DAG.getBitcast(VT, N0); |
8715 | 393k | } |
8716 | 393k | |
8717 | 393k | // (conv (conv x, t1), t2) -> (conv x, t2) |
8718 | 393k | if (393k N0.getOpcode() == ISD::BITCAST393k ) |
8719 | 24.0k | return DAG.getBitcast(VT, N0.getOperand(0)); |
8720 | 369k | |
8721 | 369k | // fold (conv (load x)) -> (load (conv*)x) |
8722 | 369k | // If the resultant load doesn't need a higher alignment than the original! |
8723 | 369k | if (369k ISD::isNormalLoad(N0.getNode()) && 369k N0.hasOneUse()79.6k && |
8724 | 369k | // Do not change the width of a volatile load. |
8725 | 77.0k | !cast<LoadSDNode>(N0)->isVolatile() && |
8726 | 369k | // Do not remove the cast if the types differ in endian layout. |
8727 | 72.9k | TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) == |
8728 | 72.9k | TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) && |
8729 | 72.9k | (!LegalOperations || 72.9k TLI.isOperationLegal(ISD::LOAD, VT)65.3k ) && |
8730 | 369k | TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)8.73k ) { |
8731 | 3.68k | LoadSDNode *LN0 = cast<LoadSDNode>(N0); |
8732 | 3.68k | unsigned OrigAlign = LN0->getAlignment(); |
8733 | 3.68k | |
8734 | 3.68k | bool Fast = false; |
8735 | 3.68k | if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, |
8736 | 3.68k | LN0->getAddressSpace(), OrigAlign, &Fast) && |
8737 | 3.68k | Fast3.67k ) { |
8738 | 3.65k | SDValue Load = |
8739 | 3.65k | DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), |
8740 | 3.65k | LN0->getPointerInfo(), OrigAlign, |
8741 | 3.65k | LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); |
8742 | 3.65k | DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); |
8743 | 3.65k | return Load; |
8744 | 3.65k | } |
8745 | 365k | } |
8746 | 365k | |
8747 | 365k | if (SDValue 365k V365k = foldBitcastedFPLogic(N, DAG, TLI)) |
8748 | 50 | return V; |
8749 | 365k | |
8750 | 365k | // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) |
8751 | 365k | // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) |
8752 | 365k | // |
8753 | 365k | // For ppc_fp128: |
8754 | 365k | // fold (bitcast (fneg x)) -> |
8755 | 365k | // flipbit = signbit |
8756 | 365k | // (xor (bitcast x) (build_pair flipbit, flipbit)) |
8757 | 365k | // |
8758 | 365k | // fold (bitcast (fabs x)) -> |
8759 | 365k | // flipbit = (and (extract_element (bitcast x), 0), signbit) |
8760 | 365k | // (xor (bitcast x) (build_pair flipbit, flipbit)) |
8761 | 365k | // This often reduces constant pool loads. |
8762 | 365k | if (365k ((N0.getOpcode() == ISD::FNEG && 365k !TLI.isFNegFree(N0.getValueType())415 ) || |
8763 | 365k | (N0.getOpcode() == ISD::FABS && 365k !TLI.isFAbsFree(N0.getValueType())321 )) && |
8764 | 365k | N0.getNode()->hasOneUse()372 && VT.isInteger()222 && |
8765 | 365k | !VT.isVector()197 && !N0.getValueType().isVector()102 ) { |
8766 | 66 | SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0)); |
8767 | 66 | AddToWorklist(NewConv.getNode()); |
8768 | 66 | |
8769 | 66 | SDLoc DL(N); |
8770 | 66 | if (N0.getValueType() == MVT::ppcf128 && 66 !LegalTypes10 ) { |
8771 | 10 | assert(VT.getSizeInBits() == 128); |
8772 | 10 | SDValue SignBit = DAG.getConstant( |
8773 | 10 | APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64); |
8774 | 10 | SDValue FlipBit; |
8775 | 10 | if (N0.getOpcode() == ISD::FNEG10 ) { |
8776 | 5 | FlipBit = SignBit; |
8777 | 5 | AddToWorklist(FlipBit.getNode()); |
8778 | 10 | } else { |
8779 | 5 | assert(N0.getOpcode() == ISD::FABS); |
8780 | 5 | SDValue Hi = |
8781 | 5 | DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv, |
8782 | 5 | DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), |
8783 | 5 | SDLoc(NewConv))); |
8784 | 5 | AddToWorklist(Hi.getNode()); |
8785 | 5 | FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit); |
8786 | 5 | AddToWorklist(FlipBit.getNode()); |
8787 | 5 | } |
8788 | 10 | SDValue FlipBits = |
8789 | 10 | DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); |
8790 | 10 | AddToWorklist(FlipBits.getNode()); |
8791 | 10 | return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits); |
8792 | 10 | } |
8793 | 56 | APInt SignBit = APInt::getSignMask(VT.getSizeInBits()); |
8794 | 56 | if (N0.getOpcode() == ISD::FNEG) |
8795 | 22 | return DAG.getNode(ISD::XOR, DL, VT, |
8796 | 22 | NewConv, DAG.getConstant(SignBit, DL, VT)); |
8797 | 0 | assert(N0.getOpcode() == ISD::FABS); |
8798 | 34 | return DAG.getNode(ISD::AND, DL, VT, |
8799 | 34 | NewConv, DAG.getConstant(~SignBit, DL, VT)); |
8800 | 34 | } |
8801 | 365k | |
8802 | 365k | // fold (bitconvert (fcopysign cst, x)) -> |
8803 | 365k | // (or (and (bitconvert x), sign), (and cst, (not sign))) |
8804 | 365k | // Note that we don't handle (copysign x, cst) because this can always be |
8805 | 365k | // folded to an fneg or fabs. |
8806 | 365k | // |
8807 | 365k | // For ppc_fp128: |
8808 | 365k | // fold (bitcast (fcopysign cst, x)) -> |
8809 | 365k | // flipbit = (and (extract_element |
8810 | 365k | // (xor (bitcast cst), (bitcast x)), 0), |
8811 | 365k | // signbit) |
8812 | 365k | // (xor (bitcast cst) (build_pair flipbit, flipbit)) |
8813 | 365k | if (365k N0.getOpcode() == ISD::FCOPYSIGN && 365k N0.getNode()->hasOneUse()232 && |
8814 | 214 | isa<ConstantFPSDNode>(N0.getOperand(0)) && |
8815 | 365k | VT.isInteger()6 && !VT.isVector()6 ) { |
8816 | 6 | unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits(); |
8817 | 6 | EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); |
8818 | 6 | if (isTypeLegal(IntXVT)6 ) { |
8819 | 6 | SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1)); |
8820 | 6 | AddToWorklist(X.getNode()); |
8821 | 6 | |
8822 | 6 | // If X has a different width than the result/lhs, sext it or truncate it. |
8823 | 6 | unsigned VTWidth = VT.getSizeInBits(); |
8824 | 6 | if (OrigXWidth < VTWidth6 ) { |
8825 | 0 | X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X); |
8826 | 0 | AddToWorklist(X.getNode()); |
8827 | 6 | } else if (6 OrigXWidth > VTWidth6 ) { |
8828 | 0 | // To get the sign bit in the right place, we have to shift it right |
8829 | 0 | // before truncating. |
8830 | 0 | SDLoc DL(X); |
8831 | 0 | X = DAG.getNode(ISD::SRL, DL, |
8832 | 0 | X.getValueType(), X, |
8833 | 0 | DAG.getConstant(OrigXWidth-VTWidth, DL, |
8834 | 0 | X.getValueType())); |
8835 | 0 | AddToWorklist(X.getNode()); |
8836 | 0 | X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); |
8837 | 0 | AddToWorklist(X.getNode()); |
8838 | 0 | } |
8839 | 6 | |
8840 | 6 | if (N0.getValueType() == MVT::ppcf128 && 6 !LegalTypes5 ) { |
8841 | 5 | APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2); |
8842 | 5 | SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0)); |
8843 | 5 | AddToWorklist(Cst.getNode()); |
8844 | 5 | SDValue X = DAG.getBitcast(VT, N0.getOperand(1)); |
8845 | 5 | AddToWorklist(X.getNode()); |
8846 | 5 | SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X); |
8847 | 5 | AddToWorklist(XorResult.getNode()); |
8848 | 5 | SDValue XorResult64 = DAG.getNode( |
8849 | 5 | ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult, |
8850 | 5 | DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), |
8851 | 5 | SDLoc(XorResult))); |
8852 | 5 | AddToWorklist(XorResult64.getNode()); |
8853 | 5 | SDValue FlipBit = |
8854 | 5 | DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64, |
8855 | 5 | DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64)); |
8856 | 5 | AddToWorklist(FlipBit.getNode()); |
8857 | 5 | SDValue FlipBits = |
8858 | 5 | DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); |
8859 | 5 | AddToWorklist(FlipBits.getNode()); |
8860 | 5 | return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits); |
8861 | 5 | } |
8862 | 1 | APInt SignBit = APInt::getSignMask(VT.getSizeInBits()); |
8863 | 1 | X = DAG.getNode(ISD::AND, SDLoc(X), VT, |
8864 | 1 | X, DAG.getConstant(SignBit, SDLoc(X), VT)); |
8865 | 1 | AddToWorklist(X.getNode()); |
8866 | 1 | |
8867 | 1 | SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0)); |
8868 | 1 | Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, |
8869 | 1 | Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT)); |
8870 | 1 | AddToWorklist(Cst.getNode()); |
8871 | 1 | |
8872 | 1 | return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); |
8873 | 1 | } |
8874 | 6 | } |
8875 | 365k | |
8876 | 365k | // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. |
8877 | 365k | if (365k N0.getOpcode() == ISD::BUILD_PAIR365k ) |
8878 | 3.36k | if (SDValue 3.36k CombineLD3.36k = CombineConsecutiveLoads(N0.getNode(), VT)) |
8879 | 38 | return CombineLD; |
8880 | 365k | |
8881 | 365k | // Remove double bitcasts from shuffles - this is often a legacy of |
8882 | 365k | // XformToShuffleWithZero being used to combine bitmaskings (of |
8883 | 365k | // float vectors bitcast to integer vectors) into shuffles. |
8884 | 365k | // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1) |
8885 | 365k | if (365k Level < AfterLegalizeDAG && 365k TLI.isTypeLegal(VT)128k && VT.isVector()119k && |
8886 | 89.5k | N0->getOpcode() == ISD::VECTOR_SHUFFLE && |
8887 | 10.4k | VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() && |
8888 | 365k | !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())678 ) { |
8889 | 678 | ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0); |
8890 | 678 | |
8891 | 678 | // If operands are a bitcast, peek through if it casts the original VT. |
8892 | 678 | // If operands are a constant, just bitcast back to original VT. |
8893 | 1.35k | auto PeekThroughBitcast = [&](SDValue Op) { |
8894 | 1.35k | if (Op.getOpcode() == ISD::BITCAST && |
8895 | 721 | Op.getOperand(0).getValueType() == VT) |
8896 | 271 | return SDValue(Op.getOperand(0)); |
8897 | 1.08k | if (1.08k Op.isUndef() || 1.08k ISD::isBuildVectorOfConstantSDNodes(Op.getNode())778 || |
8898 | 766 | ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) |
8899 | 326 | return DAG.getBitcast(VT, Op); |
8900 | 759 | return SDValue(); |
8901 | 759 | }; |
8902 | 678 | |
8903 | 678 | // FIXME: If either input vector is bitcast, try to convert the shuffle to |
8904 | 678 | // the result type of this bitcast. This would eliminate at least one |
8905 | 678 | // bitcast. See the transform in InstCombine. |
8906 | 678 | SDValue SV0 = PeekThroughBitcast(N0->getOperand(0)); |
8907 | 678 | SDValue SV1 = PeekThroughBitcast(N0->getOperand(1)); |
8908 | 678 | if (!(SV0 && 678 SV1153 )) |
8909 | 530 | return SDValue(); |
8910 | 148 | |
8911 | 148 | int MaskScale = |
8912 | 148 | VT.getVectorNumElements() / N0.getValueType().getVectorNumElements(); |
8913 | 148 | SmallVector<int, 8> NewMask; |
8914 | 148 | for (int M : SVN->getMask()) |
8915 | 1.89k | for (int i = 0; 598 i != MaskScale1.89k ; ++i1.29k ) |
8916 | 1.29k | NewMask.push_back(M < 0 ? 1.29k -120 : M * MaskScale + i1.27k ); |
8917 | 148 | |
8918 | 148 | bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); |
8919 | 148 | if (!LegalMask148 ) { |
8920 | 0 | std::swap(SV0, SV1); |
8921 | 0 | ShuffleVectorSDNode::commuteMask(NewMask); |
8922 | 0 | LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); |
8923 | 0 | } |
8924 | 148 | |
8925 | 148 | if (LegalMask) |
8926 | 148 | return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask); |
8927 | 364k | } |
8928 | 364k | |
8929 | 364k | return SDValue(); |
8930 | 364k | } |
8931 | | |
8932 | 25.6k | SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { |
8933 | 25.6k | EVT VT = N->getValueType(0); |
8934 | 25.6k | return CombineConsecutiveLoads(N, VT); |
8935 | 25.6k | } |
8936 | | |
8937 | | /// We know that BV is a build_vector node with Constant, ConstantFP or Undef |
8938 | | /// operands. DstEltVT indicates the destination element value type. |
8939 | | SDValue DAGCombiner:: |
8940 | 502 | ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { |
8941 | 502 | EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); |
8942 | 502 | |
8943 | 502 | // If this is already the right type, we're done. |
8944 | 502 | if (SrcEltVT == DstEltVT502 ) return SDValue(BV, 0)0 ; |
8945 | 502 | |
8946 | 502 | unsigned SrcBitSize = SrcEltVT.getSizeInBits(); |
8947 | 502 | unsigned DstBitSize = DstEltVT.getSizeInBits(); |
8948 | 502 | |
8949 | 502 | // If this is a conversion of N elements of one type to N elements of another |
8950 | 502 | // type, convert each element. This handles FP<->INT cases. |
8951 | 502 | if (SrcBitSize == DstBitSize502 ) { |
8952 | 148 | EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, |
8953 | 148 | BV->getValueType(0).getVectorNumElements()); |
8954 | 148 | |
8955 | 148 | // Due to the FP element handling below calling this routine recursively, |
8956 | 148 | // we can end up with a scalar-to-vector node here. |
8957 | 148 | if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) |
8958 | 0 | return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, |
8959 | 0 | DAG.getBitcast(DstEltVT, BV->getOperand(0))); |
8960 | 148 | |
8961 | 148 | SmallVector<SDValue, 8> Ops; |
8962 | 451 | for (SDValue Op : BV->op_values()) { |
8963 | 451 | // If the vector element type is not legal, the BUILD_VECTOR operands |
8964 | 451 | // are promoted and implicitly truncated. Make that explicit here. |
8965 | 451 | if (Op.getValueType() != SrcEltVT) |
8966 | 0 | Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); |
8967 | 451 | Ops.push_back(DAG.getBitcast(DstEltVT, Op)); |
8968 | 451 | AddToWorklist(Ops.back().getNode()); |
8969 | 451 | } |
8970 | 148 | return DAG.getBuildVector(VT, SDLoc(BV), Ops); |
8971 | 148 | } |
8972 | 354 | |
8973 | 354 | // Otherwise, we're growing or shrinking the elements. To avoid having to |
8974 | 354 | // handle annoying details of growing/shrinking FP values, we convert them to |
8975 | 354 | // int first. |
8976 | 354 | if (354 SrcEltVT.isFloatingPoint()354 ) { |
8977 | 43 | // Convert the input float vector to a int vector where the elements are the |
8978 | 43 | // same sizes. |
8979 | 43 | EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); |
8980 | 43 | BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); |
8981 | 43 | SrcEltVT = IntVT; |
8982 | 43 | } |
8983 | 354 | |
8984 | 354 | // Now we know the input is an integer vector. If the output is a FP type, |
8985 | 354 | // convert to integer first, then to FP of the right size. |
8986 | 354 | if (DstEltVT.isFloatingPoint()354 ) { |
8987 | 41 | EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); |
8988 | 41 | SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); |
8989 | 41 | |
8990 | 41 | // Next, convert to FP elements of the same size. |
8991 | 41 | return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); |
8992 | 41 | } |
8993 | 313 | |
8994 | 313 | SDLoc DL(BV); |
8995 | 313 | |
8996 | 313 | // Okay, we know the src/dst types are both integers of differing types. |
8997 | 313 | // Handling growing first. |
8998 | 313 | assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); |
8999 | 313 | if (SrcBitSize < DstBitSize313 ) { |
9000 | 208 | unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; |
9001 | 208 | |
9002 | 208 | SmallVector<SDValue, 8> Ops; |
9003 | 788 | for (unsigned i = 0, e = BV->getNumOperands(); i != e; |
9004 | 580 | i += NumInputsPerOutput580 ) { |
9005 | 580 | bool isLE = DAG.getDataLayout().isLittleEndian(); |
9006 | 580 | APInt NewBits = APInt(DstBitSize, 0); |
9007 | 580 | bool EltIsUndef = true; |
9008 | 2.18k | for (unsigned j = 0; j != NumInputsPerOutput2.18k ; ++j1.60k ) { |
9009 | 1.60k | // Shift the previously computed bits over. |
9010 | 1.60k | NewBits <<= SrcBitSize; |
9011 | 1.60k | SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1)1.14k : j468 )); |
9012 | 1.60k | if (Op.isUndef()1.60k ) continue146 ; |
9013 | 1.46k | EltIsUndef = false; |
9014 | 1.46k | |
9015 | 1.46k | NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). |
9016 | 1.46k | zextOrTrunc(SrcBitSize).zext(DstBitSize); |
9017 | 1.46k | } |
9018 | 580 | |
9019 | 580 | if (EltIsUndef) |
9020 | 12 | Ops.push_back(DAG.getUNDEF(DstEltVT)); |
9021 | 580 | else |
9022 | 568 | Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT)); |
9023 | 580 | } |
9024 | 208 | |
9025 | 208 | EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); |
9026 | 208 | return DAG.getBuildVector(VT, DL, Ops); |
9027 | 208 | } |
9028 | 105 | |
9029 | 105 | // Finally, this must be the case where we are shrinking elements: each input |
9030 | 105 | // turns into multiple outputs. |
9031 | 105 | unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; |
9032 | 105 | EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, |
9033 | 105 | NumOutputsPerInput*BV->getNumOperands()); |
9034 | 105 | SmallVector<SDValue, 8> Ops; |
9035 | 105 | |
9036 | 458 | for (const SDValue &Op : BV->op_values()) { |
9037 | 458 | if (Op.isUndef()458 ) { |
9038 | 10 | Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT)); |
9039 | 10 | continue; |
9040 | 10 | } |
9041 | 448 | |
9042 | 448 | APInt OpVal = cast<ConstantSDNode>(Op)-> |
9043 | 448 | getAPIntValue().zextOrTrunc(SrcBitSize); |
9044 | 448 | |
9045 | 1.82k | for (unsigned j = 0; j != NumOutputsPerInput1.82k ; ++j1.38k ) { |
9046 | 1.38k | APInt ThisVal = OpVal.trunc(DstBitSize); |
9047 | 1.38k | Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT)); |
9048 | 1.38k | OpVal.lshrInPlace(DstBitSize); |
9049 | 1.38k | } |
9050 | 448 | |
9051 | 448 | // For big endian targets, swap the order of the pieces of each element. |
9052 | 448 | if (DAG.getDataLayout().isBigEndian()) |
9053 | 32 | std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); |
9054 | 458 | } |
9055 | 502 | |
9056 | 502 | return DAG.getBuildVector(VT, DL, Ops); |
9057 | 502 | } |
9058 | | |
9059 | 141k | static bool isContractable(SDNode *N) { |
9060 | 141k | SDNodeFlags F = N->getFlags(); |
9061 | 141k | return F.hasAllowContract() || F.hasUnsafeAlgebra(); |
9062 | 141k | } |
9063 | | |
9064 | | /// Try to perform FMA combining on a given FADD node. |
9065 | 141k | SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { |
9066 | 141k | SDValue N0 = N->getOperand(0); |
9067 | 141k | SDValue N1 = N->getOperand(1); |
9068 | 141k | EVT VT = N->getValueType(0); |
9069 | 141k | SDLoc SL(N); |
9070 | 141k | |
9071 | 141k | const TargetOptions &Options = DAG.getTarget().Options; |
9072 | 141k | |
9073 | 141k | // Floating-point multiply-add with intermediate rounding. |
9074 | 64.7k | bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); |
9075 | 141k | |
9076 | 141k | // Floating-point multiply-add without intermediate rounding. |
9077 | 141k | bool HasFMA = |
9078 | 141k | TLI.isFMAFasterThanFMulAndFAdd(VT) && |
9079 | 121k | (!LegalOperations || 121k TLI.isOperationLegalOrCustom(ISD::FMA, VT)54.6k ); |
9080 | 141k | |
9081 | 141k | // No valid opcode, do not combine. |
9082 | 141k | if (!HasFMAD && 141k !HasFMA137k ) |
9083 | 15.9k | return SDValue(); |
9084 | 125k | |
9085 | 125k | bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || |
9086 | 125k | Options.UnsafeFPMath125k || HasFMAD124k ); |
9087 | 125k | // If the addition is not contractable, do not combine. |
9088 | 125k | if (!AllowFusionGlobally && 125k !isContractable(N)119k ) |
9089 | 119k | return SDValue(); |
9090 | 6.01k | |
9091 | 6.01k | const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo(); |
9092 | 6.01k | if (STI && 6.01k STI->generateFMAsInMachineCombiner(OptLevel)6.01k ) |
9093 | 56 | return SDValue(); |
9094 | 5.96k | |
9095 | 5.96k | // Always prefer FMAD to FMA for precision. |
9096 | 5.96k | unsigned PreferredFusedOpcode = HasFMAD ? 5.96k ISD::FMAD4.43k : ISD::FMA1.52k ; |
9097 | 5.96k | bool Aggressive = TLI.enableAggressiveFMAFusion(VT); |
9098 | 5.96k | bool LookThroughFPExt = TLI.isFPExtFree(VT); |
9099 | 5.96k | |
9100 | 5.96k | // Is the node an FMUL and contractable either due to global flags or |
9101 | 5.96k | // SDNodeFlags. |
9102 | 14.1k | auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { |
9103 | 14.1k | if (N.getOpcode() != ISD::FMUL) |
9104 | 10.9k | return false; |
9105 | 3.27k | return AllowFusionGlobally || 3.27k isContractable(N.getNode())15 ; |
9106 | 14.1k | }; |
9107 | 5.96k | // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), |
9108 | 5.96k | // prefer to fold the multiply with fewer uses. |
9109 | 5.96k | if (Aggressive && 5.96k isContractableFMUL(N0)2.78k && isContractableFMUL(N1)1.09k ) { |
9110 | 204 | if (N0.getNode()->use_size() > N1.getNode()->use_size()) |
9111 | 3 | std::swap(N0, N1); |
9112 | 204 | } |
9113 | 5.96k | |
9114 | 5.96k | // fold (fadd (fmul x, y), z) -> (fma x, y, z) |
9115 | 5.96k | if (isContractableFMUL(N0) && 5.96k (Aggressive || 1.69k N0->hasOneUse()605 )) { |
9116 | 1.63k | return DAG.getNode(PreferredFusedOpcode, SL, VT, |
9117 | 1.63k | N0.getOperand(0), N0.getOperand(1), N1); |
9118 | 1.63k | } |
9119 | 4.32k | |
9120 | 4.32k | // fold (fadd x, (fmul y, z)) -> (fma y, z, x) |
9121 | 4.32k | // Note: Commutes FADD operands. |
9122 | 4.32k | if (4.32k isContractableFMUL(N1) && 4.32k (Aggressive || 266 N1->hasOneUse()157 )) { |
9123 | 239 | return DAG.getNode(PreferredFusedOpcode, SL, VT, |
9124 | 239 | N1.getOperand(0), N1.getOperand(1), N0); |
9125 | 239 | } |
9126 | 4.08k | |
9127 | 4.08k | // Look through FP_EXTEND nodes to do more combining. |
9128 | 4.08k | if (4.08k LookThroughFPExt4.08k ) { |
9129 | 214 | // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) |
9130 | 214 | if (N0.getOpcode() == ISD::FP_EXTEND214 ) { |
9131 | 10 | SDValue N00 = N0.getOperand(0); |
9132 | 10 | if (isContractableFMUL(N00)) |
9133 | 2 | return DAG.getNode(PreferredFusedOpcode, SL, VT, |
9134 | 2 | DAG.getNode(ISD::FP_EXTEND, SL, VT, |
9135 | 2 | N00.getOperand(0)), |
9136 | 2 | DAG.getNode(ISD::FP_EXTEND, SL, VT, |
9137 | 2 | N00.getOperand(1)), N1); |
9138 | 212 | } |
9139 | 212 | |
9140 | 212 | // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) |
9141 | 212 | // Note: Commutes FADD operands. |
9142 | 212 | if (212 N1.getOpcode() == ISD::FP_EXTEND212 ) { |
9143 | 10 | SDValue N10 = N1.getOperand(0); |
9144 | 10 | if (isContractableFMUL(N10)) |
9145 | 2 | return DAG.getNode(PreferredFusedOpcode, SL, VT, |
9146 | 2 | DAG.getNode(ISD::FP_EXTEND, SL, VT, |
9147 | 2 | N10.getOperand(0)), |
9148 | 2 | DAG.getNode(ISD::FP_EXTEND, SL, VT, |
9149 | 2 | N10.getOperand(1)), N0); |
9150 | 4.07k | } |
9151 | 214 | } |
9152 | 4.07k | |
9153 | 4.07k | // More folding opportunities when target permits. |
9154 | 4.07k | if (4.07k Aggressive4.07k ) { |
9155 | 1.58k | // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) |
9156 | 1.58k | // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF |
9157 | 1.58k | // are currently only supported on binary nodes. |
9158 | 1.58k | if (Options.UnsafeFPMath && |
9159 | 248 | N0.getOpcode() == PreferredFusedOpcode && |
9160 | 17 | N0.getOperand(2).getOpcode() == ISD::FMUL && |
9161 | 1.58k | N0->hasOneUse()15 && N0.getOperand(2)->hasOneUse()11 ) { |
9162 | 7 | return DAG.getNode(PreferredFusedOpcode, SL, VT, |
9163 | 7 | N0.getOperand(0), N0.getOperand(1), |
9164 | 7 | DAG.getNode(PreferredFusedOpcode, SL, VT, |
9165 | 7 | N0.getOperand(2).getOperand(0), |
9166 | 7 | N0.getOperand(2).getOperand(1), |
9167 | 7 | N1)); |
9168 | 7 | } |
9169 | 1.57k | |
9170 | 1.57k | // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) |
9171 | 1.57k | // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF |
9172 | 1.57k | // are currently only supported on binary nodes. |
9173 | 1.57k | if (1.57k Options.UnsafeFPMath && |
9174 | 241 | N1->getOpcode() == PreferredFusedOpcode && |
9175 | 20 | N1.getOperand(2).getOpcode() == ISD::FMUL && |
9176 | 1.57k | N1->hasOneUse()10 && N1.getOperand(2)->hasOneUse()6 ) { |
9177 | 2 | return DAG.getNode(PreferredFusedOpcode, SL, VT, |
9178 | 2 | N1.getOperand(0), N1.getOperand(1), |
9179 | 2 | DAG.getNode(PreferredFusedOpcode, SL, VT, |
9180 | 2 | N1.getOperand(2).getOperand(0), |
9181 | 2 | N1.getOperand(2).getOperand(1), |
9182 | 2 | N0)); |
9183 | 2 | } |
9184 | 1.57k | |
9185 | 1.57k | if (1.57k LookThroughFPExt1.57k ) { |
9186 | 206 | // fold (fadd (fma x, y, (fpext (fmul u, v))), z) |
9187 | 206 | // -> (fma x, y, (fma (fpext u), (fpext v), z)) |
9188 | 206 | auto FoldFAddFMAFPExtFMul = [&] ( |
9189 | 8 | SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { |
9190 | 8 | return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y, |
9191 | 8 | DAG.getNode(PreferredFusedOpcode, SL, VT, |
9192 | 8 | DAG.getNode(ISD::FP_EXTEND, SL, VT, U), |
9193 | 8 | DAG.getNode(ISD::FP_EXTEND, SL, VT, V), |
9194 | 8 | Z)); |
9195 | 8 | }; |
9196 | 206 | if (N0.getOpcode() == PreferredFusedOpcode206 ) { |
9197 | 18 | SDValue N02 = N0.getOperand(2); |
9198 | 18 | if (N02.getOpcode() == ISD::FP_EXTEND18 ) { |
9199 | 4 | SDValue N020 = N02.getOperand(0); |
9200 | 4 | if (isContractableFMUL(N020)) |
9201 | 4 | return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1), |
9202 | 4 | N020.getOperand(0), N020.getOperand(1), |
9203 | 4 | N1); |
9204 | 202 | } |
9205 | 18 | } |
9206 | 202 | |
9207 | 202 | // fold (fadd (fpext (fma x, y, (fmul u, v))), z) |
9208 | 202 | // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z)) |
9209 | 202 | // FIXME: This turns two single-precision and one double-precision |
9210 | 202 | // operation into two double-precision operations, which might not be |
9211 | 202 | // interesting for all targets, especially GPUs. |
9212 | 202 | auto FoldFAddFPExtFMAFMul = [&] ( |
9213 | 8 | SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { |
9214 | 8 | return DAG.getNode(PreferredFusedOpcode, SL, VT, |
9215 | 8 | DAG.getNode(ISD::FP_EXTEND, SL, VT, X), |
9216 | 8 | DAG.getNode(ISD::FP_EXTEND, SL, VT, Y), |
9217 | 8 | DAG.getNode(PreferredFusedOpcode, SL, VT, |
9218 | 8 | DAG.getNode(ISD::FP_EXTEND, SL, VT, U), |
9219 | 8 | DAG.getNode(ISD::FP_EXTEND, SL, VT, V), |
9220 | 8 | Z)); |
9221 | 8 | }; |
9222 | 202 | if (N0.getOpcode() == ISD::FP_EXTEND202 ) { |
9223 | 8 | SDValue N00 = N0.getOperand(0); |
9224 | 8 | if (N00.getOpcode() == PreferredFusedOpcode8 ) { |
9225 | 4 | SDValue N002 = N00.getOperand(2); |
9226 | 4 | if (isContractableFMUL(N002)) |
9227 | 4 | return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1), |
9228 | 4 | N002.getOperand(0), N002.getOperand(1), |
9229 | 4 | N1); |
9230 | 198 | } |
9231 | 8 | } |
9232 | 198 | |
9233 | 198 | // fold (fadd x, (fma y, z, (fpext (fmul u, v))) |
9234 | 198 | // -> (fma y, z, (fma (fpext u), (fpext v), x)) |
9235 | 198 | if (198 N1.getOpcode() == PreferredFusedOpcode198 ) { |
9236 | 16 | SDValue N12 = N1.getOperand(2); |
9237 | 16 | if (N12.getOpcode() == ISD::FP_EXTEND16 ) { |
9238 | 4 | SDValue N120 = N12.getOperand(0); |
9239 | 4 | if (isContractableFMUL(N120)) |
9240 | 4 | return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1), |
9241 | 4 | N120.getOperand(0), N120.getOperand(1), |
9242 | 4 | N0); |
9243 | 194 | } |
9244 | 16 | } |
9245 | 194 | |
9246 | 194 | // fold (fadd x, (fpext (fma y, z, (fmul u, v))) |
9247 | 194 | // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x)) |
9248 | 194 | // FIXME: This turns two single-precision and one double-precision |
9249 | 194 | // operation into two double-precision operations, which might not be |
9250 | 194 | // interesting for all targets, especially GPUs. |
9251 | 194 | if (194 N1.getOpcode() == ISD::FP_EXTEND194 ) { |
9252 | 8 | SDValue N10 = N1.getOperand(0); |
9253 | 8 | if (N10.getOpcode() == PreferredFusedOpcode8 ) { |
9254 | 4 | SDValue N102 = N10.getOperand(2); |
9255 | 4 | if (isContractableFMUL(N102)) |
9256 | 4 | return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1), |
9257 | 4 | N102.getOperand(0), N102.getOperand(1), |
9258 | 4 | N0); |
9259 | 4.05k | } |
9260 | 8 | } |
9261 | 206 | } |
9262 | 1.58k | } |
9263 | 4.05k | |
9264 | 4.05k | return SDValue(); |
9265 | 4.05k | } |
9266 | | |
9267 | | /// Try to perform FMA combining on a given FSUB node. |
9268 | 28.5k | SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { |
9269 | 28.5k | SDValue N0 = N->getOperand(0); |
9270 | 28.5k | SDValue N1 = N->getOperand(1); |
9271 | 28.5k | EVT VT = N->getValueType(0); |
9272 | 28.5k | SDLoc SL(N); |
9273 | 28.5k | |
9274 | 28.5k | const TargetOptions &Options = DAG.getTarget().Options; |
9275 | 28.5k | // Floating-point multiply-add with intermediate rounding. |
9276 | 12.7k | bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); |
9277 | 28.5k | |
9278 | 28.5k | // Floating-point multiply-add without intermediate rounding. |
9279 | 28.5k | bool HasFMA = |
9280 | 28.5k | TLI.isFMAFasterThanFMulAndFAdd(VT) && |
9281 | 22.8k | (!LegalOperations || 22.8k TLI.isOperationLegalOrCustom(ISD::FMA, VT)10.2k ); |
9282 | 28.5k | |
9283 | 28.5k | // No valid opcode, do not combine. |
9284 | 28.5k | if (!HasFMAD && 28.5k !HasFMA27.8k ) |
9285 | 5.01k | return SDValue(); |
9286 | 23.5k | |
9287 | 23.5k | bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || |
9288 | 23.5k | Options.UnsafeFPMath22.6k || HasFMAD22.2k ); |
9289 | 23.5k | // If the subtraction is not contractable, do not combine. |
9290 | 23.5k | if (!AllowFusionGlobally && 23.5k !isContractable(N)21.6k ) |
9291 | 21.6k | return SDValue(); |
9292 | 1.87k | |
9293 | 1.87k | const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo(); |
9294 | 1.87k | if (STI && 1.87k STI->generateFMAsInMachineCombiner(OptLevel)1.87k ) |
9295 | 58 | return SDValue(); |
9296 | 1.82k | |
9297 | 1.82k | // Always prefer FMAD to FMA for precision. |
9298 | 1.82k | unsigned PreferredFusedOpcode = HasFMAD ? 1.82k ISD::FMAD707 : ISD::FMA1.11k ; |
9299 | 1.82k | bool Aggressive = TLI.enableAggressiveFMAFusion(VT); |
9300 | 1.82k | bool LookThroughFPExt = TLI.isFPExtFree(VT); |
9301 | 1.82k | |
9302 | 1.82k | // Is the node an FMUL and contractable either due to global flags or |
9303 | 1.82k | // SDNodeFlags. |
9304 | 3.53k | auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { |
9305 | 3.53k | if (N.getOpcode() != ISD::FMUL) |
9306 | 2.57k | return false; |
9307 | 958 | return AllowFusionGlobally || 958 isContractable(N.getNode())5 ; |
9308 | 3.53k | }; |
9309 | 1.82k | |
9310 | 1.82k | // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) |
9311 | 1.82k | if (isContractableFMUL(N0) && 1.82k (Aggressive || 335 N0->hasOneUse()168 )) { |
9312 | 283 | return DAG.getNode(PreferredFusedOpcode, SL, VT, |
9313 | 283 | N0.getOperand(0), N0.getOperand(1), |
9314 | 283 | DAG.getNode(ISD::FNEG, SL, VT, N1)); |
9315 | 283 | } |
9316 | 1.53k | |
9317 | 1.53k | // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) |
9318 | 1.53k | // Note: Commutes FSUB operands. |
9319 | 1.53k | if (1.53k isContractableFMUL(N1) && 1.53k (Aggressive || 492 N1->hasOneUse()261 )) |
9320 | 468 | return DAG.getNode(PreferredFusedOpcode, SL, VT, |
9321 | 468 | DAG.getNode(ISD::FNEG, SL, VT, |
9322 | 468 | N1.getOperand(0)), |
9323 | 468 | N1.getOperand(1), N0); |
9324 | 1.06k | |
9325 | 1.06k | // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) |
9326 | 1.06k | if (1.06k N0.getOpcode() == ISD::FNEG && 1.06k isContractableFMUL(N0.getOperand(0))112 && |
9327 | 1.06k | (Aggressive || 86 (N0->hasOneUse() && 66 N0.getOperand(0).hasOneUse()66 ))) { |
9328 | 86 | SDValue N00 = N0.getOperand(0).getOperand(0); |
9329 | 86 | SDValue N01 = N0.getOperand(0).getOperand(1); |
9330 | 86 | return DAG.getNode(PreferredFusedOpcode, SL, VT, |
9331 | 86 | DAG.getNode(ISD::FNEG, SL, VT, N00), N01, |
9332 | 86 | DAG.getNode(ISD::FNEG, SL, VT, N1)); |
9333 | 86 | } |
9334 | 983 | |
9335 | 983 | // Look through FP_EXTEND nodes to do more combining. |
9336 | 983 | if (983 LookThroughFPExt983 ) { |
9337 | 62 | // fold (fsub (fpext (fmul x, y)), z) |
9338 | 62 | // -> (fma (fpext x), (fpext y), (fneg z)) |
9339 | 62 | if (N0.getOpcode() == ISD::FP_EXTEND62 ) { |
9340 | 12 | SDValue N00 = N0.getOperand(0); |
9341 | 12 | if (isContractableFMUL(N00)) |
9342 | 2 | return DAG.getNode(PreferredFusedOpcode, SL, VT, |
9343 | 2 | DAG.getNode(ISD::FP_EXTEND, SL, VT, |
9344 | 2 | N00.getOperand(0)), |
9345 | 2 | DAG.getNode(ISD::FP_EXTEND, SL, VT, |
9346 | 2 | N00.getOperand(1)), |
9347 | 2 | DAG.getNode(ISD::FNEG, SL, VT, N1)); |
9348 | 60 | } |
9349 | 60 | |
9350 | 60 | // fold (fsub x, (fpext (fmul y, z))) |
9351 | 60 | // -> (fma (fneg (fpext y)), (fpext z), x) |
9352 | 60 | // Note: Commutes FSUB operands. |
9353 | 60 | if (60 N1.getOpcode() == ISD::FP_EXTEND60 ) { |
9354 | 10 | SDValue N10 = N1.getOperand(0); |
9355 | 10 | if (isContractableFMUL(N10)) |
9356 | 2 | return DAG.getNode(PreferredFusedOpcode, SL, VT, |
9357 | 2 | DAG.getNode(ISD::FNEG, SL, VT, |
9358 | 2 | DAG.getNode(ISD::FP_EXTEND, SL, VT, |
9359 | 2 | N10.getOperand(0))), |
9360 | 2 | DAG.getNode(ISD::FP_EXTEND, SL, VT, |
9361 | 2 | N10.getOperand(1)), |
9362 | 2 | N0); |
9363 | 58 | } |
9364 | 58 | |
9365 | 58 | // fold (fsub (fpext (fneg (fmul, x, y))), z) |
9366 | 58 | // -> (fneg (fma (fpext x), (fpext y), z)) |
9367 | 58 | // Note: This could be removed with appropriate canonicalization of the |
9368 | 58 | // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the |
9369 | 58 | // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent |
9370 | 58 | // from implementing the canonicalization in visitFSUB. |
9371 | 58 | if (58 N0.getOpcode() == ISD::FP_EXTEND58 ) { |
9372 | 10 | SDValue N00 = N0.getOperand(0); |
9373 | 10 | if (N00.getOpcode() == ISD::FNEG10 ) { |
9374 | 2 | SDValue N000 = N00.getOperand(0); |
9375 | 2 | if (isContractableFMUL(N000)2 ) { |
9376 | 2 | return DAG.getNode(ISD::FNEG, SL, VT, |
9377 | 2 | DAG.getNode(PreferredFusedOpcode, SL, VT, |
9378 | 2 | DAG.getNode(ISD::FP_EXTEND, SL, VT, |
9379 | 2 | N000.getOperand(0)), |
9380 | 2 | DAG.getNode(ISD::FP_EXTEND, SL, VT, |
9381 | 2 | N000.getOperand(1)), |
9382 | 2 | N1)); |
9383 | 2 | } |
9384 | 56 | } |
9385 | 10 | } |
9386 | 56 | |
9387 | 56 | // fold (fsub (fneg (fpext (fmul, x, y))), z) |
9388 | 56 | // -> (fneg (fma (fpext x)), (fpext y), z) |
9389 | 56 | // Note: This could be removed with appropriate canonicalization of the |
9390 | 56 | // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the |
9391 | 56 | // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent |
9392 | 56 | // from implementing the canonicalization in visitFSUB. |
9393 | 56 | if (56 N0.getOpcode() == ISD::FNEG56 ) { |
9394 | 2 | SDValue N00 = N0.getOperand(0); |
9395 | 2 | if (N00.getOpcode() == ISD::FP_EXTEND2 ) { |
9396 | 2 | SDValue N000 = N00.getOperand(0); |
9397 | 2 | if (isContractableFMUL(N000)2 ) { |
9398 | 2 | return DAG.getNode(ISD::FNEG, SL, VT, |
9399 | 2 | DAG.getNode(PreferredFusedOpcode, SL, VT, |
9400 | 2 | DAG.getNode(ISD::FP_EXTEND, SL, VT, |
9401 | 2 | N000.getOperand(0)), |
9402 | 2 | DAG.getNode(ISD::FP_EXTEND, SL, VT, |
9403 | 2 | N000.getOperand(1)), |
9404 | 2 | N1)); |
9405 | 2 | } |
9406 | 975 | } |
9407 | 2 | } |
9408 | 62 | |
9409 | 62 | } |
9410 | 975 | |
9411 | 975 | // More folding opportunities when target permits. |
9412 | 975 | if (975 Aggressive975 ) { |
9413 | 605 | // fold (fsub (fma x, y, (fmul u, v)), z) |
9414 | 605 | // -> (fma x, y (fma u, v, (fneg z))) |
9415 | 605 | // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF |
9416 | 605 | // are currently only supported on binary nodes. |
9417 | 605 | if (Options.UnsafeFPMath && 605 N0.getOpcode() == PreferredFusedOpcode91 && |
9418 | 605 | isContractableFMUL(N0.getOperand(2))17 && N0->hasOneUse()15 && |
9419 | 605 | N0.getOperand(2)->hasOneUse()11 ) { |
9420 | 7 | return DAG.getNode(PreferredFusedOpcode, SL, VT, |
9421 | 7 | N0.getOperand(0), N0.getOperand(1), |
9422 | 7 | DAG.getNode(PreferredFusedOpcode, SL, VT, |
9423 | 7 | N0.getOperand(2).getOperand(0), |
9424 | 7 | N0.getOperand(2).getOperand(1), |
9425 | 7 | DAG.getNode(ISD::FNEG, SL, VT, |
9426 | 7 | N1))); |
9427 | 7 | } |
9428 | 598 | |
9429 | 598 | // fold (fsub x, (fma y, z, (fmul u, v))) |
9430 | 598 | // -> (fma (fneg y), z, (fma (fneg u), v, x)) |
9431 | 598 | // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF |
9432 | 598 | // are currently only supported on binary nodes. |
9433 | 598 | if (598 Options.UnsafeFPMath && 598 N1.getOpcode() == PreferredFusedOpcode84 && |
9434 | 598 | isContractableFMUL(N1.getOperand(2))6 ) { |
9435 | 4 | SDValue N20 = N1.getOperand(2).getOperand(0); |
9436 | 4 | SDValue N21 = N1.getOperand(2).getOperand(1); |
9437 | 4 | return DAG.getNode(PreferredFusedOpcode, SL, VT, |
9438 | 4 | DAG.getNode(ISD::FNEG, SL, VT, |
9439 | 4 | N1.getOperand(0)), |
9440 | 4 | N1.getOperand(1), |
9441 | 4 | DAG.getNode(PreferredFusedOpcode, SL, VT, |
9442 | 4 | DAG.getNode(ISD::FNEG, SL, VT, N20), |
9443 | 4 | |
9444 | 4 | N21, N0)); |
9445 | 4 | } |
9446 | 594 | |
9447 | 594 | if (594 LookThroughFPExt594 ) { |
9448 | 50 | // fold (fsub (fma x, y, (fpext (fmul u, v))), z) |
9449 | 50 | // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) |
9450 | 50 | if (N0.getOpcode() == PreferredFusedOpcode50 ) { |
9451 | 8 | SDValue N02 = N0.getOperand(2); |
9452 | 8 | if (N02.getOpcode() == ISD::FP_EXTEND8 ) { |
9453 | 4 | SDValue N020 = N02.getOperand(0); |
9454 | 4 | if (isContractableFMUL(N020)) |
9455 | 4 | return DAG.getNode(PreferredFusedOpcode, SL, VT, |
9456 | 4 | N0.getOperand(0), N0.getOperand(1), |
9457 | 4 | DAG.getNode(PreferredFusedOpcode, SL, VT, |
9458 | 4 | DAG.getNode(ISD::FP_EXTEND, SL, VT, |
9459 | 4 | N020.getOperand(0)), |
9460 | 4 | DAG.getNode(ISD::FP_EXTEND, SL, VT, |
9461 | 4 | N020.getOperand(1)), |
9462 | 4 | DAG.getNode(ISD::FNEG, SL, VT, |
9463 | 4 | N1))); |
9464 | 46 | } |
9465 | 8 | } |
9466 | 46 | |
9467 | 46 | // fold (fsub (fpext (fma x, y, (fmul u, v))), z) |
9468 | 46 | // -> (fma (fpext x), (fpext y), |
9469 | 46 | // (fma (fpext u), (fpext v), (fneg z))) |
9470 | 46 | // FIXME: This turns two single-precision and one double-precision |
9471 | 46 | // operation into two double-precision operations, which might not be |
9472 | 46 | // interesting for all targets, especially GPUs. |
9473 | 46 | if (46 N0.getOpcode() == ISD::FP_EXTEND46 ) { |
9474 | 8 | SDValue N00 = N0.getOperand(0); |
9475 | 8 | if (N00.getOpcode() == PreferredFusedOpcode8 ) { |
9476 | 4 | SDValue N002 = N00.getOperand(2); |
9477 | 4 | if (isContractableFMUL(N002)) |
9478 | 4 | return DAG.getNode(PreferredFusedOpcode, SL, VT, |
9479 | 4 | DAG.getNode(ISD::FP_EXTEND, SL, VT, |
9480 | 4 | N00.getOperand(0)), |
9481 | 4 | DAG.getNode(ISD::FP_EXTEND, SL, VT, |
9482 | 4 | N00.getOperand(1)), |
9483 | 4 | DAG.getNode(PreferredFusedOpcode, SL, VT, |
9484 | 4 | DAG.getNode(ISD::FP_EXTEND, SL, VT, |
9485 | 4 | N002.getOperand(0)), |
9486 | 4 | DAG.getNode(ISD::FP_EXTEND, SL, VT, |
9487 | 4 | N002.getOperand(1)), |
9488 | 4 | DAG.getNode(ISD::FNEG, SL, VT, |
9489 | 4 | N1))); |
9490 | 42 | } |
9491 | 8 | } |
9492 | 42 | |
9493 | 42 | // fold (fsub x, (fma y, z, (fpext (fmul u, v)))) |
9494 | 42 | // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) |
9495 | 42 | if (42 N1.getOpcode() == PreferredFusedOpcode && |
9496 | 42 | N1.getOperand(2).getOpcode() == ISD::FP_EXTEND8 ) { |
9497 | 4 | SDValue N120 = N1.getOperand(2).getOperand(0); |
9498 | 4 | if (isContractableFMUL(N120)4 ) { |
9499 | 4 | SDValue N1200 = N120.getOperand(0); |
9500 | 4 | SDValue N1201 = N120.getOperand(1); |
9501 | 4 | return DAG.getNode(PreferredFusedOpcode, SL, VT, |
9502 | 4 | DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), |
9503 | 4 | N1.getOperand(1), |
9504 | 4 | DAG.getNode(PreferredFusedOpcode, SL, VT, |
9505 | 4 | DAG.getNode(ISD::FNEG, SL, VT, |
9506 | 4 | DAG.getNode(ISD::FP_EXTEND, SL, |
9507 | 4 | VT, N1200)), |
9508 | 4 | DAG.getNode(ISD::FP_EXTEND, SL, VT, |
9509 | 4 | N1201), |
9510 | 4 | N0)); |
9511 | 4 | } |
9512 | 38 | } |
9513 | 38 | |
9514 | 38 | // fold (fsub x, (fpext (fma y, z, (fmul u, v)))) |
9515 | 38 | // -> (fma (fneg (fpext y)), (fpext z), |
9516 | 38 | // (fma (fneg (fpext u)), (fpext v), x)) |
9517 | 38 | // FIXME: This turns two single-precision and one double-precision |
9518 | 38 | // operation into two double-precision operations, which might not be |
9519 | 38 | // interesting for all targets, especially GPUs. |
9520 | 38 | if (38 N1.getOpcode() == ISD::FP_EXTEND && |
9521 | 38 | N1.getOperand(0).getOpcode() == PreferredFusedOpcode8 ) { |
9522 | 4 | SDValue N100 = N1.getOperand(0).getOperand(0); |
9523 | 4 | SDValue N101 = N1.getOperand(0).getOperand(1); |
9524 | 4 | SDValue N102 = N1.getOperand(0).getOperand(2); |
9525 | 4 | if (isContractableFMUL(N102)4 ) { |
9526 | 4 | SDValue N1020 = N102.getOperand(0); |
9527 | 4 | SDValue N1021 = N102.getOperand(1); |
9528 | 4 | return DAG.getNode(PreferredFusedOpcode, SL, VT, |
9529 | 4 | DAG.getNode(ISD::FNEG, SL, VT, |
9530 | 4 | DAG.getNode(ISD::FP_EXTEND, SL, VT, |
9531 | 4 | N100)), |
9532 | 4 | DAG.getNode(ISD::FP_EXTEND, SL, VT, N101), |
9533 | 4 | DAG.getNode(PreferredFusedOpcode, SL, VT, |
9534 | 4 | DAG.getNode(ISD::FNEG, SL, VT, |
9535 | 4 | DAG.getNode(ISD::FP_EXTEND, SL, |
9536 | 4 | VT, N1020)), |
9537 | 4 | DAG.getNode(ISD::FP_EXTEND, SL, VT, |
9538 | 4 | N1021), |
9539 | 4 | N0)); |
9540 | 4 | } |
9541 | 948 | } |
9542 | 50 | } |
9543 | 605 | } |
9544 | 948 | |
9545 | 948 | return SDValue(); |
9546 | 948 | } |
9547 | | |
9548 | | /// Try to perform FMA combining on a given FMUL node based on the distributive |
9549 | | /// law x * (y + 1) = x * y + x and variants thereof (commuted versions, |
9550 | | /// subtraction instead of addition). |
9551 | 123k | SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { |
9552 | 123k | SDValue N0 = N->getOperand(0); |
9553 | 123k | SDValue N1 = N->getOperand(1); |
9554 | 123k | EVT VT = N->getValueType(0); |
9555 | 123k | SDLoc SL(N); |
9556 | 123k | |
9557 | 123k | assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation"); |
9558 | 123k | |
9559 | 123k | const TargetOptions &Options = DAG.getTarget().Options; |
9560 | 123k | |
9561 | 123k | // The transforms below are incorrect when x == 0 and y == inf, because the |
9562 | 123k | // intermediate multiplication produces a nan. |
9563 | 123k | if (!Options.NoInfsFPMath) |
9564 | 122k | return SDValue(); |
9565 | 451 | |
9566 | 451 | // Floating-point multiply-add without intermediate rounding. |
9567 | 451 | bool HasFMA = |
9568 | 226 | (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && |
9569 | 451 | TLI.isFMAFasterThanFMulAndFAdd(VT) && |
9570 | 326 | (!LegalOperations || 326 TLI.isOperationLegalOrCustom(ISD::FMA, VT)74 ); |
9571 | 451 | |
9572 | 451 | // Floating-point multiply-add with intermediate rounding. This can result |
9573 | 451 | // in a less precise result due to the changed rounding order. |
9574 | 451 | bool HasFMAD = Options.UnsafeFPMath && |
9575 | 323 | (LegalOperations && 323 TLI.isOperationLegal(ISD::FMAD, VT)125 ); |
9576 | 451 | |
9577 | 451 | // No valid opcode, do not combine. |
9578 | 451 | if (!HasFMAD && 451 !HasFMA451 ) |
9579 | 125 | return SDValue(); |
9580 | 326 | |
9581 | 326 | // Always prefer FMAD to FMA for precision. |
9582 | 326 | unsigned PreferredFusedOpcode = HasFMAD ? 326 ISD::FMAD0 : ISD::FMA326 ; |
9583 | 326 | bool Aggressive = TLI.enableAggressiveFMAFusion(VT); |
9584 | 326 | |
9585 | 326 | // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y) |
9586 | 326 | // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y)) |
9587 | 634 | auto FuseFADD = [&](SDValue X, SDValue Y) { |
9588 | 634 | if (X.getOpcode() == ISD::FADD && 634 (Aggressive || 47 X->hasOneUse()42 )) { |
9589 | 47 | auto XC1 = isConstOrConstSplatFP(X.getOperand(1)); |
9590 | 47 | if (XC1 && 47 XC1->isExactlyValue(+1.0)37 ) |
9591 | 18 | return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y); |
9592 | 29 | if (29 XC1 && 29 XC1->isExactlyValue(-1.0)19 ) |
9593 | 18 | return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, |
9594 | 18 | DAG.getNode(ISD::FNEG, SL, VT, Y)); |
9595 | 598 | } |
9596 | 598 | return SDValue(); |
9597 | 598 | }; |
9598 | 326 | |
9599 | 326 | if (SDValue FMA = FuseFADD(N0, N1)) |
9600 | 18 | return FMA; |
9601 | 308 | if (SDValue 308 FMA308 = FuseFADD(N1, N0)) |
9602 | 18 | return FMA; |
9603 | 290 | |
9604 | 290 | // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y) |
9605 | 290 | // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y)) |
9606 | 290 | // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y)) |
9607 | 290 | // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y) |
9608 | 290 | auto FuseFSUB = [&](SDValue X, SDValue Y) 290 { |
9609 | 544 | if (X.getOpcode() == ISD::FSUB && 544 (Aggressive || 108 X->hasOneUse()96 )) { |
9610 | 108 | auto XC0 = isConstOrConstSplatFP(X.getOperand(0)); |
9611 | 108 | if (XC0 && 108 XC0->isExactlyValue(+1.0)72 ) |
9612 | 52 | return DAG.getNode(PreferredFusedOpcode, SL, VT, |
9613 | 52 | DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, |
9614 | 52 | Y); |
9615 | 56 | if (56 XC0 && 56 XC0->isExactlyValue(-1.0)20 ) |
9616 | 18 | return DAG.getNode(PreferredFusedOpcode, SL, VT, |
9617 | 18 | DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, |
9618 | 18 | DAG.getNode(ISD::FNEG, SL, VT, Y)); |
9619 | 38 | |
9620 | 38 | auto XC1 = isConstOrConstSplatFP(X.getOperand(1)); |
9621 | 38 | if (XC1 && 38 XC1->isExactlyValue(+1.0)36 ) |
9622 | 18 | return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, |
9623 | 18 | DAG.getNode(ISD::FNEG, SL, VT, Y)); |
9624 | 20 | if (20 XC1 && 20 XC1->isExactlyValue(-1.0)18 ) |
9625 | 18 | return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y); |
9626 | 438 | } |
9627 | 438 | return SDValue(); |
9628 | 438 | }; |
9629 | 290 | |
9630 | 290 | if (SDValue FMA = FuseFSUB(N0, N1)) |
9631 | 36 | return FMA; |
9632 | 254 | if (SDValue 254 FMA254 = FuseFSUB(N1, N0)) |
9633 | 70 | return FMA; |
9634 | 184 | |
9635 | 184 | return SDValue(); |
9636 | 184 | } |
9637 | | |
9638 | 283k | static bool isFMulNegTwo(SDValue &N) { |
9639 | 283k | if (N.getOpcode() != ISD::FMUL) |
9640 | 222k | return false; |
9641 | 61.0k | if (ConstantFPSDNode *61.0k CFP61.0k = isConstOrConstSplatFP(N.getOperand(1))) |
9642 | 11.1k | return CFP->isExactlyValue(-2.0); |
9643 | 49.8k | return false; |
9644 | 49.8k | } |
9645 | | |
9646 | 142k | SDValue DAGCombiner::visitFADD(SDNode *N) { |
9647 | 142k | SDValue N0 = N->getOperand(0); |
9648 | 142k | SDValue N1 = N->getOperand(1); |
9649 | 142k | bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0); |
9650 | 142k | bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1); |
9651 | 142k | EVT VT = N->getValueType(0); |
9652 | 142k | SDLoc DL(N); |
9653 | 142k | const TargetOptions &Options = DAG.getTarget().Options; |
9654 | 142k | const SDNodeFlags Flags = N->getFlags(); |
9655 | 142k | |
9656 | 142k | // fold vector ops |
9657 | 142k | if (VT.isVector()) |
9658 | 27.1k | if (SDValue 27.1k FoldedVOp27.1k = SimplifyVBinOp(N)) |
9659 | 2 | return FoldedVOp; |
9660 | 142k | |
9661 | 142k | // fold (fadd c1, c2) -> c1 + c2 |
9662 | 142k | if (142k N0CFP && 142k N1CFP216 ) |
9663 | 3 | return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags); |
9664 | 142k | |
9665 | 142k | // canonicalize constant to RHS |
9666 | 142k | if (142k N0CFP && 142k !N1CFP213 ) |
9667 | 213 | return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags); |
9668 | 142k | |
9669 | 142k | if (SDValue 142k NewSel142k = foldBinOpIntoSelect(N)) |
9670 | 5 | return NewSel; |
9671 | 142k | |
9672 | 142k | // fold (fadd A, (fneg B)) -> (fsub A, B) |
9673 | 142k | if (142k (!LegalOperations || 142k TLI.isOperationLegalOrCustom(ISD::FSUB, VT)64.7k ) && |
9674 | 139k | isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2) |
9675 | 214 | return DAG.getNode(ISD::FSUB, DL, VT, N0, |
9676 | 214 | GetNegatedExpression(N1, DAG, LegalOperations), Flags); |
9677 | 142k | |
9678 | 142k | // fold (fadd (fneg A), B) -> (fsub B, A) |
9679 | 142k | if (142k (!LegalOperations || 142k TLI.isOperationLegalOrCustom(ISD::FSUB, VT)64.7k ) && |
9680 | 139k | isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2) |
9681 | 256 | return DAG.getNode(ISD::FSUB, DL, VT, N1, |
9682 | 256 | GetNegatedExpression(N0, DAG, LegalOperations), Flags); |
9683 | 141k | |
9684 | 141k | // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B)) |
9685 | 141k | // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B)) |
9686 | 141k | if (141k (isFMulNegTwo(N0) && 141k N0.hasOneUse()14 ) || |
9687 | 141k | (isFMulNegTwo(N1) && 141k N1.hasOneUse()33 )) { |
9688 | 45 | bool N1IsFMul = isFMulNegTwo(N1); |
9689 | 45 | SDValue AddOp = N1IsFMul ? N1.getOperand(0)31 : N0.getOperand(0)14 ; |
9690 | 45 | SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags); |
9691 | 45 | return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N031 : N114 , Add, Flags); |
9692 | 45 | } |
9693 | 141k | |
9694 | 141k | // FIXME: Auto-upgrade the target/function-level option. |
9695 | 141k | if (141k Options.NoSignedZerosFPMath || 141k N->getFlags().hasNoSignedZeros()141k ) { |
9696 | 1.32k | // fold (fadd A, 0) -> A |
9697 | 1.32k | if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1)) |
9698 | 156 | if (156 N1C->isZero()156 ) |
9699 | 14 | return N0; |
9700 | 141k | } |
9701 | 141k | |
9702 | 141k | // If 'unsafe math' is enabled, fold lots of things. |
9703 | 141k | if (141k Options.UnsafeFPMath141k ) { |
9704 | 2.06k | // No FP constant should be created after legalization as Instruction |
9705 | 2.06k | // Selection pass has a hard time dealing with FP constants. |
9706 | 2.06k | bool AllowNewConst = (Level < AfterLegalizeDAG); |
9707 | 2.06k | |
9708 | 2.06k | // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) |
9709 | 2.06k | if (N1CFP && 2.06k N0.getOpcode() == ISD::FADD99 && N0.getNode()->hasOneUse()2 && |
9710 | 2 | isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) |
9711 | 2 | return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), |
9712 | 2 | DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, |
9713 | 2 | Flags), |
9714 | 2 | Flags); |
9715 | 2.05k | |
9716 | 2.05k | // If allowed, fold (fadd (fneg x), x) -> 0.0 |
9717 | 2.05k | if (2.05k AllowNewConst && 2.05k N0.getOpcode() == ISD::FNEG1.31k && N0.getOperand(0) == N10 ) |
9718 | 0 | return DAG.getConstantFP(0.0, DL, VT); |
9719 | 2.05k | |
9720 | 2.05k | // If allowed, fold (fadd x, (fneg x)) -> 0.0 |
9721 | 2.05k | if (2.05k AllowNewConst && 2.05k N1.getOpcode() == ISD::FNEG1.31k && N1.getOperand(0) == N00 ) |
9722 | 0 | return DAG.getConstantFP(0.0, DL, VT); |
9723 | 2.05k | |
9724 | 2.05k | // We can fold chains of FADD's of the same value into multiplications. |
9725 | 2.05k | // This transform is not safe in general because we are reducing the number |
9726 | 2.05k | // of rounding steps. |
9727 | 2.05k | if (2.05k TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && 2.05k !N0CFP2.00k && !N1CFP2.00k ) { |
9728 | 1.91k | if (N0.getOpcode() == ISD::FMUL1.91k ) { |
9729 | 436 | bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); |
9730 | 436 | bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)); |
9731 | 436 | |
9732 | 436 | // (fadd (fmul x, c), x) -> (fmul x, c+1) |
9733 | 436 | if (CFP01 && 436 !CFP0054 && N0.getOperand(0) == N154 ) { |
9734 | 2 | SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), |
9735 | 2 | DAG.getConstantFP(1.0, DL, VT), Flags); |
9736 | 2 | return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags); |
9737 | 2 | } |
9738 | 434 | |
9739 | 434 | // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) |
9740 | 434 | if (434 CFP01 && 434 !CFP0052 && N1.getOpcode() == ISD::FADD52 && |
9741 | 4 | N1.getOperand(0) == N1.getOperand(1) && |
9742 | 434 | N0.getOperand(0) == N1.getOperand(0)4 ) { |
9743 | 4 | SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), |
9744 | 4 | DAG.getConstantFP(2.0, DL, VT), Flags); |
9745 | 4 | return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags); |
9746 | 4 | } |
9747 | 1.90k | } |
9748 | 1.90k | |
9749 | 1.90k | if (1.90k N1.getOpcode() == ISD::FMUL1.90k ) { |
9750 | 581 | bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); |
9751 | 581 | bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1)); |
9752 | 581 | |
9753 | 581 | // (fadd x, (fmul x, c)) -> (fmul x, c+1) |
9754 | 581 | if (CFP11 && 581 !CFP1018 && N1.getOperand(0) == N018 ) { |
9755 | 3 | SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), |
9756 | 3 | DAG.getConstantFP(1.0, DL, VT), Flags); |
9757 | 3 | return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags); |
9758 | 3 | } |
9759 | 578 | |
9760 | 578 | // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) |
9761 | 578 | if (578 CFP11 && 578 !CFP1015 && N0.getOpcode() == ISD::FADD15 && |
9762 | 4 | N0.getOperand(0) == N0.getOperand(1) && |
9763 | 578 | N1.getOperand(0) == N0.getOperand(0)4 ) { |
9764 | 4 | SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), |
9765 | 4 | DAG.getConstantFP(2.0, DL, VT), Flags); |
9766 | 4 | return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags); |
9767 | 4 | } |
9768 | 1.89k | } |
9769 | 1.89k | |
9770 | 1.89k | if (1.89k N0.getOpcode() == ISD::FADD && 1.89k AllowNewConst406 ) { |
9771 | 236 | bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); |
9772 | 236 | // (fadd (fadd x, x), x) -> (fmul x, 3.0) |
9773 | 236 | if (!CFP00 && 236 N0.getOperand(0) == N0.getOperand(1)236 && |
9774 | 236 | (N0.getOperand(0) == N1)32 ) { |
9775 | 6 | return DAG.getNode(ISD::FMUL, DL, VT, |
9776 | 6 | N1, DAG.getConstantFP(3.0, DL, VT), Flags); |
9777 | 6 | } |
9778 | 1.89k | } |
9779 | 1.89k | |
9780 | 1.89k | if (1.89k N1.getOpcode() == ISD::FADD && 1.89k AllowNewConst265 ) { |
9781 | 173 | bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); |
9782 | 173 | // (fadd x, (fadd x, x)) -> (fmul x, 3.0) |
9783 | 173 | if (!CFP10 && 173 N1.getOperand(0) == N1.getOperand(1)173 && |
9784 | 173 | N1.getOperand(0) == N05 ) { |
9785 | 2 | return DAG.getNode(ISD::FMUL, DL, VT, |
9786 | 2 | N0, DAG.getConstantFP(3.0, DL, VT), Flags); |
9787 | 2 | } |
9788 | 1.88k | } |
9789 | 1.88k | |
9790 | 1.88k | // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) |
9791 | 1.88k | if (1.88k AllowNewConst && |
9792 | 1.88k | N0.getOpcode() == ISD::FADD1.15k && N1.getOpcode() == ISD::FADD230 && |
9793 | 17 | N0.getOperand(0) == N0.getOperand(1) && |
9794 | 3 | N1.getOperand(0) == N1.getOperand(1) && |
9795 | 1.88k | N0.getOperand(0) == N1.getOperand(0)3 ) { |
9796 | 3 | return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), |
9797 | 3 | DAG.getConstantFP(4.0, DL, VT), Flags); |
9798 | 3 | } |
9799 | 141k | } |
9800 | 2.06k | } // enable-unsafe-fp-math |
9801 | 141k | |
9802 | 141k | // FADD -> FMA combines: |
9803 | 141k | if (SDValue 141k Fused141k = visitFADDForFMACombine(N)) { |
9804 | 1.90k | AddToWorklist(Fused.getNode()); |
9805 | 1.90k | return Fused; |
9806 | 1.90k | } |
9807 | 139k | return SDValue(); |
9808 | 139k | } |
9809 | | |
9810 | 29.4k | SDValue DAGCombiner::visitFSUB(SDNode *N) { |
9811 | 29.4k | SDValue N0 = N->getOperand(0); |
9812 | 29.4k | SDValue N1 = N->getOperand(1); |
9813 | 29.4k | ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); |
9814 | 29.4k | ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); |
9815 | 29.4k | EVT VT = N->getValueType(0); |
9816 | 29.4k | SDLoc DL(N); |
9817 | 29.4k | const TargetOptions &Options = DAG.getTarget().Options; |
9818 | 29.4k | const SDNodeFlags Flags = N->getFlags(); |
9819 | 29.4k | |
9820 | 29.4k | // fold vector ops |
9821 | 29.4k | if (VT.isVector()) |
9822 | 3.99k | if (SDValue 3.99k FoldedVOp3.99k = SimplifyVBinOp(N)) |
9823 | 1 | return FoldedVOp; |
9824 | 29.4k | |
9825 | 29.4k | // fold (fsub c1, c2) -> c1-c2 |
9826 | 29.4k | if (29.4k N0CFP && 29.4k N1CFP4.08k ) |
9827 | 0 | return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags); |
9828 | 29.4k | |
9829 | 29.4k | if (SDValue 29.4k NewSel29.4k = foldBinOpIntoSelect(N)) |
9830 | 2 | return NewSel; |
9831 | 29.4k | |
9832 | 29.4k | // fold (fsub A, (fneg B)) -> (fadd A, B) |
9833 | 29.4k | if (29.4k isNegatibleForFree(N1, LegalOperations, TLI, &Options)29.4k ) |
9834 | 847 | return DAG.getNode(ISD::FADD, DL, VT, N0, |
9835 | 847 | GetNegatedExpression(N1, DAG, LegalOperations), Flags); |
9836 | 28.5k | |
9837 | 28.5k | // FIXME: Auto-upgrade the target/function-level option. |
9838 | 28.5k | if (28.5k Options.NoSignedZerosFPMath || 28.5k N->getFlags().hasNoSignedZeros()28.3k ) { |
9839 | 721 | // (fsub 0, B) -> -B |
9840 | 721 | if (N0CFP && 721 N0CFP->isZero()266 ) { |
9841 | 20 | if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) |
9842 | 0 | return GetNegatedExpression(N1, DAG, LegalOperations); |
9843 | 20 | if (20 !LegalOperations || 20 TLI.isOperationLegal(ISD::FNEG, VT)0 ) |
9844 | 20 | return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags); |
9845 | 28.5k | } |
9846 | 721 | } |
9847 | 28.5k | |
9848 | 28.5k | // If 'unsafe math' is enabled, fold lots of things. |
9849 | 28.5k | if (28.5k Options.UnsafeFPMath28.5k ) { |
9850 | 826 | // (fsub A, 0) -> A |
9851 | 826 | if (N1CFP && 826 N1CFP->isZero()7 ) |
9852 | 1 | return N0; |
9853 | 825 | |
9854 | 825 | // (fsub x, x) -> 0.0 |
9855 | 825 | if (825 N0 == N1825 ) |
9856 | 5 | return DAG.getConstantFP(0.0f, DL, VT); |
9857 | 820 | |
9858 | 820 | // (fsub x, (fadd x, y)) -> (fneg y) |
9859 | 820 | // (fsub x, (fadd y, x)) -> (fneg y) |
9860 | 820 | if (820 N1.getOpcode() == ISD::FADD820 ) { |
9861 | 8 | SDValue N10 = N1->getOperand(0); |
9862 | 8 | SDValue N11 = N1->getOperand(1); |
9863 | 8 | |
9864 | 8 | if (N10 == N0 && 8 isNegatibleForFree(N11, LegalOperations, TLI, &Options)0 ) |
9865 | 0 | return GetNegatedExpression(N11, DAG, LegalOperations); |
9866 | 8 | |
9867 | 8 | if (8 N11 == N0 && 8 isNegatibleForFree(N10, LegalOperations, TLI, &Options)0 ) |
9868 | 0 | return GetNegatedExpression(N10, DAG, LegalOperations); |
9869 | 28.5k | } |
9870 | 826 | } |
9871 | 28.5k | |
9872 | 28.5k | // FSUB -> FMA combines: |
9873 | 28.5k | if (SDValue 28.5k Fused28.5k = visitFSUBForFMACombine(N)) { |
9874 | 872 | AddToWorklist(Fused.getNode()); |
9875 | 872 | return Fused; |
9876 | 872 | } |
9877 | 27.6k | |
9878 | 27.6k | return SDValue(); |
9879 | 27.6k | } |
9880 | | |
9881 | 123k | SDValue DAGCombiner::visitFMUL(SDNode *N) { |
9882 | 123k | SDValue N0 = N->getOperand(0); |
9883 | 123k | SDValue N1 = N->getOperand(1); |
9884 | 123k | ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); |
9885 | 123k | ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); |
9886 | 123k | EVT VT = N->getValueType(0); |
9887 | 123k | SDLoc DL(N); |
9888 | 123k | const TargetOptions &Options = DAG.getTarget().Options; |
9889 | 123k | const SDNodeFlags Flags = N->getFlags(); |
9890 | 123k | |
9891 | 123k | // fold vector ops |
9892 | 123k | if (VT.isVector()123k ) { |
9893 | 18.0k | // This just handles C1 * C2 for vectors. Other vector folds are below. |
9894 | 18.0k | if (SDValue FoldedVOp = SimplifyVBinOp(N)) |
9895 | 2 | return FoldedVOp; |
9896 | 123k | } |
9897 | 123k | |
9898 | 123k | // fold (fmul c1, c2) -> c1*c2 |
9899 | 123k | if (123k N0CFP && 123k N1CFP105 ) |
9900 | 0 | return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags); |
9901 | 123k | |
9902 | 123k | // canonicalize constant to RHS |
9903 | 123k | if (123k isConstantFPBuildVectorOrConstantFP(N0) && |
9904 | 179 | !isConstantFPBuildVectorOrConstantFP(N1)) |
9905 | 179 | return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags); |
9906 | 123k | |
9907 | 123k | // fold (fmul A, 1.0) -> A |
9908 | 123k | if (123k N1CFP && 123k N1CFP->isExactlyValue(1.0)33.9k ) |
9909 | 79 | return N0; |
9910 | 123k | |
9911 | 123k | if (SDValue 123k NewSel123k = foldBinOpIntoSelect(N)) |
9912 | 3 | return NewSel; |
9913 | 123k | |
9914 | 123k | if (123k Options.UnsafeFPMath123k ) { |
9915 | 2.95k | // fold (fmul A, 0) -> 0 |
9916 | 2.95k | if (N1CFP && 2.95k N1CFP->isZero()319 ) |
9917 | 2 | return N1; |
9918 | 2.95k | |
9919 | 2.95k | // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) |
9920 | 2.95k | if (2.95k N0.getOpcode() == ISD::FMUL2.95k ) { |
9921 | 543 | // Fold scalars or any vector constants (not just splats). |
9922 | 543 | // This fold is done in general by InstCombine, but extra fmul insts |
9923 | 543 | // may have been generated during lowering. |
9924 | 543 | SDValue N00 = N0.getOperand(0); |
9925 | 543 | SDValue N01 = N0.getOperand(1); |
9926 | 543 | auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); |
9927 | 543 | auto *BV00 = dyn_cast<BuildVectorSDNode>(N00); |
9928 | 543 | auto *BV01 = dyn_cast<BuildVectorSDNode>(N01); |
9929 | 543 | |
9930 | 543 | // Check 1: Make sure that the first operand of the inner multiply is NOT |
9931 | 543 | // a constant. Otherwise, we may induce infinite looping. |
9932 | 543 | if (!(isConstOrConstSplatFP(N00) || 543 (BV00 && 541 BV00->isConstant()22 ))) { |
9933 | 519 | // Check 2: Make sure that the second operand of the inner multiply and |
9934 | 519 | // the second operand of the outer multiply are constants. |
9935 | 519 | if ((N1CFP && 519 isConstOrConstSplatFP(N01)58 ) || |
9936 | 519 | (BV1 && 505 BV0127 && BV1->isConstant()7 && BV01->isConstant()7 )) { |
9937 | 21 | SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags); |
9938 | 21 | return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags); |
9939 | 21 | } |
9940 | 2.93k | } |
9941 | 543 | } |
9942 | 2.93k | |
9943 | 2.93k | // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) |
9944 | 2.93k | // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs |
9945 | 2.93k | // during an early run of DAGCombiner can prevent folding with fmuls |
9946 | 2.93k | // inserted during lowering. |
9947 | 2.93k | if (2.93k N0.getOpcode() == ISD::FADD && |
9948 | 269 | (N0.getOperand(0) == N0.getOperand(1)) && |
9949 | 2.93k | N0.hasOneUse()38 ) { |
9950 | 18 | const SDValue Two = DAG.getConstantFP(2.0, DL, VT); |
9951 | 18 | SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags); |
9952 | 18 | return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags); |
9953 | 18 | } |
9954 | 123k | } |
9955 | 123k | |
9956 | 123k | // fold (fmul X, 2.0) -> (fadd X, X) |
9957 | 123k | if (123k N1CFP && 123k N1CFP->isExactlyValue(+2.0)33.8k ) |
9958 | 527 | return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags); |
9959 | 123k | |
9960 | 123k | // fold (fmul X, -1.0) -> (fneg X) |
9961 | 123k | if (123k N1CFP && 123k N1CFP->isExactlyValue(-1.0)33.3k ) |
9962 | 41 | if (41 !LegalOperations || 41 TLI.isOperationLegal(ISD::FNEG, VT)0 ) |
9963 | 41 | return DAG.getNode(ISD::FNEG, DL, VT, N0); |
9964 | 123k | |
9965 | 123k | // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) |
9966 | 123k | if (char 123k LHSNeg123k = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { |
9967 | 1.04k | if (char RHSNeg1.04k = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { |
9968 | 223 | // Both can be negated for free, check to see if at least one is cheaper |
9969 | 223 | // negated. |
9970 | 223 | if (LHSNeg == 2 || 223 RHSNeg == 2204 ) |
9971 | 19 | return DAG.getNode(ISD::FMUL, DL, VT, |
9972 | 19 | GetNegatedExpression(N0, DAG, LegalOperations), |
9973 | 19 | GetNegatedExpression(N1, DAG, LegalOperations), |
9974 | 19 | Flags); |
9975 | 123k | } |
9976 | 1.04k | } |
9977 | 123k | |
9978 | 123k | // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X)) |
9979 | 123k | // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X) |
9980 | 123k | if (123k Flags.hasNoNaNs() && 123k Flags.hasNoSignedZeros()1.70k && |
9981 | 1.54k | (N0.getOpcode() == ISD::SELECT || 1.54k N1.getOpcode() == ISD::SELECT1.54k ) && |
9982 | 123k | TLI.isOperationLegal(ISD::FABS, VT)2 ) { |
9983 | 2 | SDValue Select = N0, X = N1; |
9984 | 2 | if (Select.getOpcode() != ISD::SELECT) |
9985 | 2 | std::swap(Select, X); |
9986 | 2 | |
9987 | 2 | SDValue Cond = Select.getOperand(0); |
9988 | 2 | auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1)); |
9989 | 2 | auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2)); |
9990 | 2 | |
9991 | 2 | if (TrueOpnd && 2 FalseOpnd2 && |
9992 | 2 | Cond.getOpcode() == ISD::SETCC2 && Cond.getOperand(0) == X2 && |
9993 | 2 | isa<ConstantFPSDNode>(Cond.getOperand(1)) && |
9994 | 2 | cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)2 ) { |
9995 | 2 | ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); |
9996 | 2 | switch (CC) { |
9997 | 0 | default: break; |
9998 | 1 | case ISD::SETOLT: |
9999 | 1 | case ISD::SETULT: |
10000 | 1 | case ISD::SETOLE: |
10001 | 1 | case ISD::SETULE: |
10002 | 1 | case ISD::SETLT: |
10003 | 1 | case ISD::SETLE: |
10004 | 1 | std::swap(TrueOpnd, FalseOpnd); |
10005 | 1 | // Fall through |
10006 | 2 | case ISD::SETOGT: |
10007 | 2 | case ISD::SETUGT: |
10008 | 2 | case ISD::SETOGE: |
10009 | 2 | case ISD::SETUGE: |
10010 | 2 | case ISD::SETGT: |
10011 | 2 | case ISD::SETGE: |
10012 | 2 | if (TrueOpnd->isExactlyValue(-1.0) && 2 FalseOpnd->isExactlyValue(1.0)1 && |
10013 | 1 | TLI.isOperationLegal(ISD::FNEG, VT)) |
10014 | 1 | return DAG.getNode(ISD::FNEG, DL, VT, |
10015 | 1 | DAG.getNode(ISD::FABS, DL, VT, X)); |
10016 | 1 | if (1 TrueOpnd->isExactlyValue(1.0) && 1 FalseOpnd->isExactlyValue(-1.0)1 ) |
10017 | 1 | return DAG.getNode(ISD::FABS, DL, VT, X); |
10018 | 0 |
|
10019 | 0 | break; |
10020 | 2 | } |
10021 | 2 | } |
10022 | 2 | } |
10023 | 123k | |
10024 | 123k | // FMUL -> FMA combines: |
10025 | 123k | if (SDValue 123k Fused123k = visitFMULForFMADistributiveCombine(N)) { |
10026 | 142 | AddToWorklist(Fused.getNode()); |
10027 | 142 | return Fused; |
10028 | 142 | } |
10029 | 122k | |
10030 | 122k | return SDValue(); |
10031 | 122k | } |
10032 | | |
10033 | 6.34k | SDValue DAGCombiner::visitFMA(SDNode *N) { |
10034 | 6.34k | SDValue N0 = N->getOperand(0); |
10035 | 6.34k | SDValue N1 = N->getOperand(1); |
10036 | 6.34k | SDValue N2 = N->getOperand(2); |
10037 | 6.34k | ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); |
10038 | 6.34k | ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); |
10039 | 6.34k | EVT VT = N->getValueType(0); |
10040 | 6.34k | SDLoc DL(N); |
10041 | 6.34k | const TargetOptions &Options = DAG.getTarget().Options; |
10042 | 6.34k | |
10043 | 6.34k | // Constant fold FMA. |
10044 | 6.34k | if (isa<ConstantFPSDNode>(N0) && |
10045 | 58 | isa<ConstantFPSDNode>(N1) && |
10046 | 6.34k | isa<ConstantFPSDNode>(N2)18 ) { |
10047 | 0 | return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2); |
10048 | 0 | } |
10049 | 6.34k | |
10050 | 6.34k | if (6.34k Options.UnsafeFPMath6.34k ) { |
10051 | 1.90k | if (N0CFP && 1.90k N0CFP->isZero()0 ) |
10052 | 0 | return N2; |
10053 | 1.90k | if (1.90k N1CFP && 1.90k N1CFP->isZero()40 ) |
10054 | 0 | return N2; |
10055 | 6.34k | } |
10056 | 6.34k | // TODO: The FMA node should have flags that propagate to these nodes. |
10057 | 6.34k | if (6.34k N0CFP && 6.34k N0CFP->isExactlyValue(1.0)58 ) |
10058 | 0 | return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); |
10059 | 6.34k | if (6.34k N1CFP && 6.34k N1CFP->isExactlyValue(1.0)317 ) |
10060 | 2 | return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); |
10061 | 6.34k | |
10062 | 6.34k | // Canonicalize (fma c, x, y) -> (fma x, c, y) |
10063 | 6.34k | if (6.34k isConstantFPBuildVectorOrConstantFP(N0) && |
10064 | 75 | !isConstantFPBuildVectorOrConstantFP(N1)) |
10065 | 57 | return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); |
10066 | 6.28k | |
10067 | 6.28k | // TODO: FMA nodes should have flags that propagate to the created nodes. |
10068 | 6.28k | // For now, create a Flags object for use with all unsafe math transforms. |
10069 | 6.28k | SDNodeFlags Flags; |
10070 | 6.28k | Flags.setUnsafeAlgebra(true); |
10071 | 6.28k | |
10072 | 6.28k | if (Options.UnsafeFPMath6.28k ) { |
10073 | 1.89k | // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) |
10074 | 1.89k | if (N2.getOpcode() == ISD::FMUL && 1.89k N0 == N2.getOperand(0)92 && |
10075 | 17 | isConstantFPBuildVectorOrConstantFP(N1) && |
10076 | 1.89k | isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))17 ) { |
10077 | 17 | return DAG.getNode(ISD::FMUL, DL, VT, N0, |
10078 | 17 | DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1), |
10079 | 17 | Flags), Flags); |
10080 | 17 | } |
10081 | 1.87k | |
10082 | 1.87k | // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) |
10083 | 1.87k | if (1.87k N0.getOpcode() == ISD::FMUL && |
10084 | 58 | isConstantFPBuildVectorOrConstantFP(N1) && |
10085 | 1.87k | isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))17 ) { |
10086 | 17 | return DAG.getNode(ISD::FMA, DL, VT, |
10087 | 17 | N0.getOperand(0), |
10088 | 17 | DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1), |
10089 | 17 | Flags), |
10090 | 17 | N2); |
10091 | 17 | } |
10092 | 6.25k | } |
10093 | 6.25k | |
10094 | 6.25k | // (fma x, 1, y) -> (fadd x, y) |
10095 | 6.25k | // (fma x, -1, y) -> (fadd (fneg x), y) |
10096 | 6.25k | if (6.25k N1CFP6.25k ) { |
10097 | 313 | if (N1CFP->isExactlyValue(1.0)) |
10098 | 313 | // TODO: The FMA node should have flags that propagate to this node. |
10099 | 0 | return DAG.getNode(ISD::FADD, DL, VT, N0, N2); |
10100 | 313 | |
10101 | 313 | if (313 N1CFP->isExactlyValue(-1.0) && |
10102 | 313 | (!LegalOperations || 1 TLI.isOperationLegal(ISD::FNEG, VT)0 )) { |
10103 | 1 | SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0); |
10104 | 1 | AddToWorklist(RHSNeg.getNode()); |
10105 | 1 | // TODO: The FMA node should have flags that propagate to this node. |
10106 | 1 | return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg); |
10107 | 1 | } |
10108 | 6.25k | } |
10109 | 6.25k | |
10110 | 6.25k | if (6.25k Options.UnsafeFPMath6.25k ) { |
10111 | 1.86k | // (fma x, c, x) -> (fmul x, (c+1)) |
10112 | 1.86k | if (N1CFP && 1.86k N0 == N236 ) { |
10113 | 1 | return DAG.getNode(ISD::FMUL, DL, VT, N0, |
10114 | 1 | DAG.getNode(ISD::FADD, DL, VT, N1, |
10115 | 1 | DAG.getConstantFP(1.0, DL, VT), Flags), |
10116 | 1 | Flags); |
10117 | 1 | } |
10118 | 1.86k | |
10119 | 1.86k | // (fma x, c, (fneg x)) -> (fmul x, (c-1)) |
10120 | 1.86k | if (1.86k N1CFP && 1.86k N2.getOpcode() == ISD::FNEG35 && N2.getOperand(0) == N01 ) { |
10121 | 1 | return DAG.getNode(ISD::FMUL, DL, VT, N0, |
10122 | 1 | DAG.getNode(ISD::FADD, DL, VT, N1, |
10123 | 1 | DAG.getConstantFP(-1.0, DL, VT), Flags), |
10124 | 1 | Flags); |
10125 | 1 | } |
10126 | 6.24k | } |
10127 | 6.24k | |
10128 | 6.24k | return SDValue(); |
10129 | 6.24k | } |
10130 | | |
10131 | | // Combine multiple FDIVs with the same divisor into multiple FMULs by the |
10132 | | // reciprocal. |
10133 | | // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) |
10134 | | // Notice that this is not always beneficial. One reason is different targets |
10135 | | // may have different costs for FDIV and FMUL, so sometimes the cost of two |
10136 | | // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason |
10137 | | // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". |
10138 | 121k | SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { |
10139 | 121k | bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath; |
10140 | 121k | const SDNodeFlags Flags = N->getFlags(); |
10141 | 121k | if (!UnsafeMath && 121k !Flags.hasAllowReciprocal()120k ) |
10142 | 120k | return SDValue(); |
10143 | 593 | |
10144 | 593 | // Skip if current node is a reciprocal. |
10145 | 593 | SDValue N0 = N->getOperand(0); |
10146 | 593 | ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); |
10147 | 593 | if (N0CFP && 593 N0CFP->isExactlyValue(1.0)105 ) |
10148 | 102 | return SDValue(); |
10149 | 491 | |
10150 | 491 | // Exit early if the target does not want this transform or if there can't |
10151 | 491 | // possibly be enough uses of the divisor to make the transform worthwhile. |
10152 | 491 | SDValue N1 = N->getOperand(1); |
10153 | 491 | unsigned MinUses = TLI.combineRepeatedFPDivisors(); |
10154 | 491 | if (!MinUses || 491 N1->use_size() < MinUses251 ) |
10155 | 470 | return SDValue(); |
10156 | 21 | |
10157 | 21 | // Find all FDIV users of the same divisor. |
10158 | 21 | // Use a set because duplicates may be present in the user list. |
10159 | 21 | SetVector<SDNode *> Users; |
10160 | 47 | for (auto *U : N1->uses()) { |
10161 | 47 | if (U->getOpcode() == ISD::FDIV && 47 U->getOperand(1) == N144 ) { |
10162 | 44 | // This division is eligible for optimization only if global unsafe math |
10163 | 44 | // is enabled or if this division allows reciprocal formation. |
10164 | 44 | if (UnsafeMath || 44 U->getFlags().hasAllowReciprocal()23 ) |
10165 | 40 | Users.insert(U); |
10166 | 44 | } |
10167 | 47 | } |
10168 | 21 | |
10169 | 21 | // Now that we have the actual number of divisor uses, make sure it meets |
10170 | 21 | // the minimum threshold specified by the target. |
10171 | 21 | if (Users.size() < MinUses) |
10172 | 8 | return SDValue(); |
10173 | 13 | |
10174 | 13 | EVT VT = N->getValueType(0); |
10175 | 13 | SDLoc DL(N); |
10176 | 13 | SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); |
10177 | 13 | SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags); |
10178 | 13 | |
10179 | 13 | // Dividend / Divisor -> Dividend * Reciprocal |
10180 | 31 | for (auto *U : Users) { |
10181 | 31 | SDValue Dividend = U->getOperand(0); |
10182 | 31 | if (Dividend != FPOne31 ) { |
10183 | 30 | SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend, |
10184 | 30 | Reciprocal, Flags); |
10185 | 30 | CombineTo(U, NewNode); |
10186 | 31 | } else if (1 U != Reciprocal.getNode()1 ) { |
10187 | 0 | // In the absence of fast-math-flags, this user node is always the |
10188 | 0 | // same node as Reciprocal, but with FMF they may be different nodes. |
10189 | 0 | CombineTo(U, Reciprocal); |
10190 | 0 | } |
10191 | 31 | } |
10192 | 121k | return SDValue(N, 0); // N was replaced. |
10193 | 121k | } |
10194 | | |
10195 | 121k | SDValue DAGCombiner::visitFDIV(SDNode *N) { |
10196 | 121k | SDValue N0 = N->getOperand(0); |
10197 | 121k | SDValue N1 = N->getOperand(1); |
10198 | 121k | ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); |
10199 | 121k | ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); |
10200 | 121k | EVT VT = N->getValueType(0); |
10201 | 121k | SDLoc DL(N); |
10202 | 121k | const TargetOptions &Options = DAG.getTarget().Options; |
10203 | 121k | SDNodeFlags Flags = N->getFlags(); |
10204 | 121k | |
10205 | 121k | // fold vector ops |
10206 | 121k | if (VT.isVector()) |
10207 | 29.2k | if (SDValue 29.2k FoldedVOp29.2k = SimplifyVBinOp(N)) |
10208 | 0 | return FoldedVOp; |
10209 | 121k | |
10210 | 121k | // fold (fdiv c1, c2) -> c1/c2 |
10211 | 121k | if (121k N0CFP && 121k N1CFP30.8k ) |
10212 | 3 | return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags); |
10213 | 121k | |
10214 | 121k | if (SDValue 121k NewSel121k = foldBinOpIntoSelect(N)) |
10215 | 2 | return NewSel; |
10216 | 121k | |
10217 | 121k | if (121k Options.UnsafeFPMath121k ) { |
10218 | 601 | // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. |
10219 | 601 | if (N1CFP601 ) { |
10220 | 21 | // Compute the reciprocal 1.0 / c2. |
10221 | 21 | const APFloat &N1APF = N1CFP->getValueAPF(); |
10222 | 21 | APFloat Recip(N1APF.getSemantics(), 1); // 1.0 |
10223 | 21 | APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); |
10224 | 21 | // Only do the transform if the reciprocal is a legal fp immediate that |
10225 | 21 | // isn't too nasty (eg NaN, denormal, ...). |
10226 | 21 | if ((st == APFloat::opOK || 21 st == APFloat::opInexact12 ) && // Not too nasty |
10227 | 17 | (!LegalOperations || |
10228 | 17 | // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM |
10229 | 17 | // backend)... we should handle this gracefully after Legalize. |
10230 | 17 | // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) || |
10231 | 0 | TLI.isOperationLegal(ISD::ConstantFP, VT) || |
10232 | 0 | TLI.isFPImmLegal(Recip, VT))) |
10233 | 17 | return DAG.getNode(ISD::FMUL, DL, VT, N0, |
10234 | 17 | DAG.getConstantFP(Recip, DL, VT), Flags); |
10235 | 584 | } |
10236 | 584 | |
10237 | 584 | // If this FDIV is part of a reciprocal square root, it may be folded |
10238 | 584 | // into a target-specific square root estimate instruction. |
10239 | 584 | if (584 N1.getOpcode() == ISD::FSQRT584 ) { |
10240 | 84 | if (SDValue RV84 = buildRsqrtEstimate(N1.getOperand(0), Flags)) { |
10241 | 43 | return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); |
10242 | 43 | } |
10243 | 500 | } else if (500 N1.getOpcode() == ISD::FP_EXTEND && |
10244 | 500 | N1.getOperand(0).getOpcode() == ISD::FSQRT2 ) { |
10245 | 2 | if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), |
10246 | 2 | Flags)) { |
10247 | 2 | RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); |
10248 | 2 | AddToWorklist(RV.getNode()); |
10249 | 2 | return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); |
10250 | 2 | } |
10251 | 498 | } else if (498 N1.getOpcode() == ISD::FP_ROUND && |
10252 | 498 | N1.getOperand(0).getOpcode() == ISD::FSQRT2 ) { |
10253 | 2 | if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), |
10254 | 2 | Flags)) { |
10255 | 2 | RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); |
10256 | 2 | AddToWorklist(RV.getNode()); |
10257 | 2 | return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); |
10258 | 2 | } |
10259 | 496 | } else if (496 N1.getOpcode() == ISD::FMUL496 ) { |
10260 | 2 | // Look through an FMUL. Even though this won't remove the FDIV directly, |
10261 | 2 | // it's still worthwhile to get rid of the FSQRT if possible. |
10262 | 2 | SDValue SqrtOp; |
10263 | 2 | SDValue OtherOp; |
10264 | 2 | if (N1.getOperand(0).getOpcode() == ISD::FSQRT2 ) { |
10265 | 2 | SqrtOp = N1.getOperand(0); |
10266 | 2 | OtherOp = N1.getOperand(1); |
10267 | 2 | } else if (0 N1.getOperand(1).getOpcode() == ISD::FSQRT0 ) { |
10268 | 0 | SqrtOp = N1.getOperand(1); |
10269 | 0 | OtherOp = N1.getOperand(0); |
10270 | 0 | } |
10271 | 2 | if (SqrtOp.getNode()2 ) { |
10272 | 2 | // We found a FSQRT, so try to make this fold: |
10273 | 2 | // x / (y * sqrt(z)) -> x * (rsqrt(z) / y) |
10274 | 2 | if (SDValue RV2 = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) { |
10275 | 2 | RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags); |
10276 | 2 | AddToWorklist(RV.getNode()); |
10277 | 2 | return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); |
10278 | 2 | } |
10279 | 535 | } |
10280 | 500 | } |
10281 | 535 | |
10282 | 535 | // Fold into a reciprocal estimate and multiply instead of a real divide. |
10283 | 535 | if (SDValue 535 RV535 = BuildReciprocalEstimate(N1, Flags)) { |
10284 | 229 | AddToWorklist(RV.getNode()); |
10285 | 229 | return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); |
10286 | 229 | } |
10287 | 121k | } |
10288 | 121k | |
10289 | 121k | // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) |
10290 | 121k | if (char 121k LHSNeg121k = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { |
10291 | 5.94k | if (char RHSNeg5.94k = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { |
10292 | 87 | // Both can be negated for free, check to see if at least one is cheaper |
10293 | 87 | // negated. |
10294 | 87 | if (LHSNeg == 2 || 87 RHSNeg == 286 ) |
10295 | 9 | return DAG.getNode(ISD::FDIV, SDLoc(N), VT, |
10296 | 9 | GetNegatedExpression(N0, DAG, LegalOperations), |
10297 | 9 | GetNegatedExpression(N1, DAG, LegalOperations), |
10298 | 9 | Flags); |
10299 | 121k | } |
10300 | 5.94k | } |
10301 | 121k | |
10302 | 121k | if (SDValue 121k CombineRepeatedDivisors121k = combineRepeatedFPDivisors(N)) |
10303 | 13 | return CombineRepeatedDivisors; |
10304 | 121k | |
10305 | 121k | return SDValue(); |
10306 | 121k | } |
10307 | | |
10308 | 264 | SDValue DAGCombiner::visitFREM(SDNode *N) { |
10309 | 264 | SDValue N0 = N->getOperand(0); |
10310 | 264 | SDValue N1 = N->getOperand(1); |
10311 | 264 | ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); |
10312 | 264 | ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); |
10313 | 264 | EVT VT = N->getValueType(0); |
10314 | 264 | |
10315 | 264 | // fold (frem c1, c2) -> fmod(c1,c2) |
10316 | 264 | if (N0CFP && 264 N1CFP13 ) |
10317 | 9 | return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags()); |
10318 | 255 | |
10319 | 255 | if (SDValue 255 NewSel255 = foldBinOpIntoSelect(N)) |
10320 | 2 | return NewSel; |
10321 | 253 | |
10322 | 253 | return SDValue(); |
10323 | 253 | } |
10324 | | |
10325 | 2.78k | SDValue DAGCombiner::visitFSQRT(SDNode *N) { |
10326 | 2.78k | if (!DAG.getTarget().Options.UnsafeFPMath) |
10327 | 2.60k | return SDValue(); |
10328 | 179 | |
10329 | 179 | SDValue N0 = N->getOperand(0); |
10330 | 179 | if (TLI.isFsqrtCheap(N0, DAG)) |
10331 | 52 | return SDValue(); |
10332 | 127 | |
10333 | 127 | // TODO: FSQRT nodes should have flags that propagate to the created nodes. |
10334 | 127 | // For now, create a Flags object for use with all unsafe math transforms. |
10335 | 127 | SDNodeFlags Flags; |
10336 | 127 | Flags.setUnsafeAlgebra(true); |
10337 | 127 | return buildSqrtEstimate(N0, Flags); |
10338 | 127 | } |
10339 | | |
10340 | | /// copysign(x, fp_extend(y)) -> copysign(x, y) |
10341 | | /// copysign(x, fp_round(y)) -> copysign(x, y) |
10342 | 3.38k | static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { |
10343 | 3.38k | SDValue N1 = N->getOperand(1); |
10344 | 3.38k | if ((N1.getOpcode() == ISD::FP_EXTEND || |
10345 | 3.38k | N1.getOpcode() == ISD::FP_ROUND3.35k )) { |
10346 | 82 | // Do not optimize out type conversion of f128 type yet. |
10347 | 82 | // For some targets like x86_64, configuration is changed to keep one f128 |
10348 | 82 | // value in one SSE register, but instruction selection cannot handle |
10349 | 82 | // FCOPYSIGN on SSE registers yet. |
10350 | 82 | EVT N1VT = N1->getValueType(0); |
10351 | 82 | EVT N1Op0VT = N1->getOperand(0)->getValueType(0); |
10352 | 82 | return (N1VT == N1Op0VT || N1Op0VT != MVT::f128); |
10353 | 82 | } |
10354 | 3.30k | return false; |
10355 | 3.30k | } |
10356 | | |
10357 | 3.42k | SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { |
10358 | 3.42k | SDValue N0 = N->getOperand(0); |
10359 | 3.42k | SDValue N1 = N->getOperand(1); |
10360 | 3.42k | ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); |
10361 | 3.42k | ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); |
10362 | 3.42k | EVT VT = N->getValueType(0); |
10363 | 3.42k | |
10364 | 3.42k | if (N0CFP && 3.42k N1CFP1.07k ) // Constant fold |
10365 | 0 | return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1); |
10366 | 3.42k | |
10367 | 3.42k | if (3.42k N1CFP3.42k ) { |
10368 | 24 | const APFloat &V = N1CFP->getValueAPF(); |
10369 | 24 | // copysign(x, c1) -> fabs(x) iff ispos(c1) |
10370 | 24 | // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) |
10371 | 24 | if (!V.isNegative()24 ) { |
10372 | 24 | if (!LegalOperations || 24 TLI.isOperationLegal(ISD::FABS, VT)0 ) |
10373 | 24 | return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); |
10374 | 0 | } else { |
10375 | 0 | if (!LegalOperations || 0 TLI.isOperationLegal(ISD::FNEG, VT)0 ) |
10376 | 0 | return DAG.getNode(ISD::FNEG, SDLoc(N), VT, |
10377 | 0 | DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0)); |
10378 | 3.39k | } |
10379 | 24 | } |
10380 | 3.39k | |
10381 | 3.39k | // copysign(fabs(x), y) -> copysign(x, y) |
10382 | 3.39k | // copysign(fneg(x), y) -> copysign(x, y) |
10383 | 3.39k | // copysign(copysign(x,z), y) -> copysign(x, y) |
10384 | 3.39k | if (3.39k N0.getOpcode() == ISD::FABS || 3.39k N0.getOpcode() == ISD::FNEG3.39k || |
10385 | 3.39k | N0.getOpcode() == ISD::FCOPYSIGN) |
10386 | 6 | return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1); |
10387 | 3.39k | |
10388 | 3.39k | // copysign(x, abs(y)) -> abs(x) |
10389 | 3.39k | if (3.39k N1.getOpcode() == ISD::FABS3.39k ) |
10390 | 2 | return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); |
10391 | 3.38k | |
10392 | 3.38k | // copysign(x, copysign(y,z)) -> copysign(x, z) |
10393 | 3.38k | if (3.38k N1.getOpcode() == ISD::FCOPYSIGN3.38k ) |
10394 | 2 | return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1)); |
10395 | 3.38k | |
10396 | 3.38k | // copysign(x, fp_extend(y)) -> copysign(x, y) |
10397 | 3.38k | // copysign(x, fp_round(y)) -> copysign(x, y) |
10398 | 3.38k | if (3.38k CanCombineFCOPYSIGN_EXTEND_ROUND(N)3.38k ) |
10399 | 71 | return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0)); |
10400 | 3.31k | |
10401 | 3.31k | return SDValue(); |
10402 | 3.31k | } |
10403 | | |
10404 | 127k | SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { |
10405 | 127k | SDValue N0 = N->getOperand(0); |
10406 | 127k | EVT VT = N->getValueType(0); |
10407 | 127k | EVT OpVT = N0.getValueType(); |
10408 | 127k | |
10409 | 127k | // fold (sint_to_fp c1) -> c1fp |
10410 | 127k | if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && |
10411 | 127k | // ...but only if the target supports immediate floating-point values |
10412 | 5 | (!LegalOperations || |
10413 | 3 | TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) |
10414 | 4 | return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); |
10415 | 127k | |
10416 | 127k | // If the input is a legal type, and SINT_TO_FP is not legal on this target, |
10417 | 127k | // but UINT_TO_FP is legal on this target, try to convert. |
10418 | 127k | if (127k !TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) && |
10419 | 127k | TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)3.97k ) { |
10420 | 104 | // If the sign bit is known to be zero, we can change this to UINT_TO_FP. |
10421 | 104 | if (DAG.SignBitIsZero(N0)) |
10422 | 0 | return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); |
10423 | 127k | } |
10424 | 127k | |
10425 | 127k | // The next optimizations are desirable only if SELECT_CC can be lowered. |
10426 | 127k | if (127k TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || 127k !LegalOperations67.0k ) { |
10427 | 89.0k | // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) |
10428 | 89.0k | if (N0.getOpcode() == ISD::SETCC && 89.0k N0.getValueType() == MVT::i1124 && |
10429 | 4 | !VT.isVector() && |
10430 | 4 | (!LegalOperations || |
10431 | 89.0k | TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)0 )) { |
10432 | 4 | SDLoc DL(N); |
10433 | 4 | SDValue Ops[] = |
10434 | 4 | { N0.getOperand(0), N0.getOperand(1), |
10435 | 4 | DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), |
10436 | 4 | N0.getOperand(2) }; |
10437 | 4 | return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); |
10438 | 4 | } |
10439 | 89.0k | |
10440 | 89.0k | // fold (sint_to_fp (zext (setcc x, y, cc))) -> |
10441 | 89.0k | // (select_cc x, y, 1.0, 0.0,, cc) |
10442 | 89.0k | if (89.0k N0.getOpcode() == ISD::ZERO_EXTEND && |
10443 | 89.0k | N0.getOperand(0).getOpcode() == ISD::SETCC502 &&!VT.isVector()27 && |
10444 | 22 | (!LegalOperations || |
10445 | 89.0k | TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)0 )) { |
10446 | 22 | SDLoc DL(N); |
10447 | 22 | SDValue Ops[] = |
10448 | 22 | { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), |
10449 | 22 | DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), |
10450 | 22 | N0.getOperand(0).getOperand(2) }; |
10451 | 22 | return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); |
10452 | 22 | } |
10453 | 127k | } |
10454 | 127k | |
10455 | 127k | return SDValue(); |
10456 | 127k | } |
10457 | | |
10458 | 92.9k | SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { |
10459 | 92.9k | SDValue N0 = N->getOperand(0); |
10460 | 92.9k | EVT VT = N->getValueType(0); |
10461 | 92.9k | EVT OpVT = N0.getValueType(); |
10462 | 92.9k | |
10463 | 92.9k | // fold (uint_to_fp c1) -> c1fp |
10464 | 92.9k | if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && |
10465 | 92.9k | // ...but only if the target supports immediate floating-point values |
10466 | 0 | (!LegalOperations || |
10467 | 0 | TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) |
10468 | 0 | return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); |
10469 | 92.9k | |
10470 | 92.9k | // If the input is a legal type, and UINT_TO_FP is not legal on this target, |
10471 | 92.9k | // but SINT_TO_FP is legal on this target, try to convert. |
10472 | 92.9k | if (92.9k !TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) && |
10473 | 92.9k | TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)4.96k ) { |
10474 | 154 | // If the sign bit is known to be zero, we can change this to SINT_TO_FP. |
10475 | 154 | if (DAG.SignBitIsZero(N0)) |
10476 | 20 | return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); |
10477 | 92.9k | } |
10478 | 92.9k | |
10479 | 92.9k | // The next optimizations are desirable only if SELECT_CC can be lowered. |
10480 | 92.9k | if (92.9k TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || 92.9k !LegalOperations35.6k ) { |
10481 | 71.3k | // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) |
10482 | 71.3k | if (N0.getOpcode() == ISD::SETCC && 71.3k !VT.isVector()509 && |
10483 | 445 | (!LegalOperations || |
10484 | 71.3k | TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)0 )) { |
10485 | 445 | SDLoc DL(N); |
10486 | 445 | SDValue Ops[] = |
10487 | 445 | { N0.getOperand(0), N0.getOperand(1), |
10488 | 445 | DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), |
10489 | 445 | N0.getOperand(2) }; |
10490 | 445 | return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); |
10491 | 445 | } |
10492 | 92.5k | } |
10493 | 92.5k | |
10494 | 92.5k | return SDValue(); |
10495 | 92.5k | } |
10496 | | |
10497 | | // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x |
10498 | 29.2k | static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) { |
10499 | 29.2k | SDValue N0 = N->getOperand(0); |
10500 | 29.2k | EVT VT = N->getValueType(0); |
10501 | 29.2k | |
10502 | 29.2k | if (N0.getOpcode() != ISD::UINT_TO_FP && 29.2k N0.getOpcode() != ISD::SINT_TO_FP29.2k ) |
10503 | 29.2k | return SDValue(); |
10504 | 35 | |
10505 | 35 | SDValue Src = N0.getOperand(0); |
10506 | 35 | EVT SrcVT = Src.getValueType(); |
10507 | 35 | bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP; |
10508 | 35 | bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT; |
10509 | 35 | |
10510 | 35 | // We can safely assume the conversion won't overflow the output range, |
10511 | 35 | // because (for example) (uint8_t)18293.f is undefined behavior. |
10512 | 35 | |
10513 | 35 | // Since we can assume the conversion won't overflow, our decision as to |
10514 | 35 | // whether the input will fit in the float should depend on the minimum |
10515 | 35 | // of the input range and output range. |
10516 | 35 | |
10517 | 35 | // This means this is also safe for a signed input and unsigned output, since |
10518 | 35 | // a negative input would lead to undefined behavior. |
10519 | 35 | unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned; |
10520 | 35 | unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned; |
10521 | 35 | unsigned ActualSize = std::min(InputSize, OutputSize); |
10522 | 35 | const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType()); |
10523 | 35 | |
10524 | 35 | // We can only fold away the float conversion if the input range can be |
10525 | 35 | // represented exactly in the float range. |
10526 | 35 | if (APFloat::semanticsPrecision(sem) >= ActualSize35 ) { |
10527 | 5 | if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()5 ) { |
10528 | 3 | unsigned ExtOp = IsInputSigned && IsOutputSigned1 ? ISD::SIGN_EXTEND1 |
10529 | 2 | : ISD::ZERO_EXTEND; |
10530 | 3 | return DAG.getNode(ExtOp, SDLoc(N), VT, Src); |
10531 | 3 | } |
10532 | 2 | if (2 VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits()2 ) |
10533 | 1 | return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src); |
10534 | 1 | return DAG.getBitcast(VT, Src); |
10535 | 1 | } |
10536 | 30 | return SDValue(); |
10537 | 30 | } |
10538 | | |
10539 | 12.2k | SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { |
10540 | 12.2k | SDValue N0 = N->getOperand(0); |
10541 | 12.2k | EVT VT = N->getValueType(0); |
10542 | 12.2k | |
10543 | 12.2k | // fold (fp_to_sint c1fp) -> c1 |
10544 | 12.2k | if (isConstantFPBuildVectorOrConstantFP(N0)) |
10545 | 5 | return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0); |
10546 | 12.2k | |
10547 | 12.2k | return FoldIntToFPToInt(N, DAG); |
10548 | 12.2k | } |
10549 | | |
10550 | 17.0k | SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { |
10551 | 17.0k | SDValue N0 = N->getOperand(0); |
10552 | 17.0k | EVT VT = N->getValueType(0); |
10553 | 17.0k | |
10554 | 17.0k | // fold (fp_to_uint c1fp) -> c1 |
10555 | 17.0k | if (isConstantFPBuildVectorOrConstantFP(N0)) |
10556 | 7 | return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0); |
10557 | 17.0k | |
10558 | 17.0k | return FoldIntToFPToInt(N, DAG); |
10559 | 17.0k | } |
10560 | | |
10561 | 23.5k | SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { |
10562 | 23.5k | SDValue N0 = N->getOperand(0); |
10563 | 23.5k | SDValue N1 = N->getOperand(1); |
10564 | 23.5k | ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); |
10565 | 23.5k | EVT VT = N->getValueType(0); |
10566 | 23.5k | |
10567 | 23.5k | // fold (fp_round c1fp) -> c1fp |
10568 | 23.5k | if (N0CFP) |
10569 | 2 | return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1); |
10570 | 23.5k | |
10571 | 23.5k | // fold (fp_round (fp_extend x)) -> x |
10572 | 23.5k | if (23.5k N0.getOpcode() == ISD::FP_EXTEND && 23.5k VT == N0.getOperand(0).getValueType()203 ) |
10573 | 203 | return N0.getOperand(0); |
10574 | 23.3k | |
10575 | 23.3k | // fold (fp_round (fp_round x)) -> (fp_round x) |
10576 | 23.3k | if (23.3k N0.getOpcode() == ISD::FP_ROUND23.3k ) { |
10577 | 19 | const bool NIsTrunc = N->getConstantOperandVal(1) == 1; |
10578 | 19 | const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1; |
10579 | 19 | |
10580 | 19 | // Skip this folding if it results in an fp_round from f80 to f16. |
10581 | 19 | // |
10582 | 19 | // f80 to f16 always generates an expensive (and as yet, unimplemented) |
10583 | 19 | // libcall to __truncxfhf2 instead of selecting native f16 conversion |
10584 | 19 | // instructions from f32 or f64. Moreover, the first (value-preserving) |
10585 | 19 | // fp_round from f80 to either f32 or f64 may become a NOP in platforms like |
10586 | 19 | // x86. |
10587 | 19 | if (N0.getOperand(0).getValueType() == MVT::f80 && 19 VT == MVT::f167 ) |
10588 | 1 | return SDValue(); |
10589 | 18 | |
10590 | 18 | // If the first fp_round isn't a value preserving truncation, it might |
10591 | 18 | // introduce a tie in the second fp_round, that wouldn't occur in the |
10592 | 18 | // single-step fp_round we want to fold to. |
10593 | 18 | // In other words, double rounding isn't the same as rounding. |
10594 | 18 | // Also, this is a value preserving truncation iff both fp_round's are. |
10595 | 18 | if (18 DAG.getTarget().Options.UnsafeFPMath || 18 N0IsTrunc16 ) { |
10596 | 7 | SDLoc DL(N); |
10597 | 7 | return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0), |
10598 | 0 | DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL)); |
10599 | 7 | } |
10600 | 23.3k | } |
10601 | 23.3k | |
10602 | 23.3k | // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) |
10603 | 23.3k | if (23.3k N0.getOpcode() == ISD::FCOPYSIGN && 23.3k N0.getNode()->hasOneUse()8 ) { |
10604 | 8 | SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, |
10605 | 8 | N0.getOperand(0), N1); |
10606 | 8 | AddToWorklist(Tmp.getNode()); |
10607 | 8 | return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, |
10608 | 8 | Tmp, N0.getOperand(1)); |
10609 | 8 | } |
10610 | 23.3k | |
10611 | 23.3k | if (SDValue 23.3k NewVSel23.3k = matchVSelectOpSizesWithSetCC(N)) |
10612 | 4 | return NewVSel; |
10613 | 23.3k | |
10614 | 23.3k | return SDValue(); |
10615 | 23.3k | } |
10616 | | |
10617 | 0 | SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { |
10618 | 0 | SDValue N0 = N->getOperand(0); |
10619 | 0 | EVT VT = N->getValueType(0); |
10620 | 0 | EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); |
10621 | 0 | ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); |
10622 | 0 |
|
10623 | 0 | // fold (fp_round_inreg c1fp) -> c1fp |
10624 | 0 | if (N0CFP && 0 isTypeLegal(EVT)0 ) { |
10625 | 0 | SDLoc DL(N); |
10626 | 0 | SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT); |
10627 | 0 | return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round); |
10628 | 0 | } |
10629 | 0 |
|
10630 | 0 | return SDValue(); |
10631 | 0 | } |
10632 | | |
10633 | 68.2k | SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { |
10634 | 68.2k | SDValue N0 = N->getOperand(0); |
10635 | 68.2k | EVT VT = N->getValueType(0); |
10636 | 68.2k | |
10637 | 68.2k | // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. |
10638 | 68.2k | if (N->hasOneUse() && |
10639 | 65.6k | N->use_begin()->getOpcode() == ISD::FP_ROUND) |
10640 | 32 | return SDValue(); |
10641 | 68.1k | |
10642 | 68.1k | // fold (fp_extend c1fp) -> c1fp |
10643 | 68.1k | if (68.1k isConstantFPBuildVectorOrConstantFP(N0)68.1k ) |
10644 | 10 | return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); |
10645 | 68.1k | |
10646 | 68.1k | // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op) |
10647 | 68.1k | if (68.1k N0.getOpcode() == ISD::FP16_TO_FP && |
10648 | 692 | TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal) |
10649 | 4 | return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0)); |
10650 | 68.1k | |
10651 | 68.1k | // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the |
10652 | 68.1k | // value of X. |
10653 | 68.1k | if (68.1k N0.getOpcode() == ISD::FP_ROUND |
10654 | 68.1k | && N0.getConstantOperandVal(1) == 114.4k ) { |
10655 | 126 | SDValue In = N0.getOperand(0); |
10656 | 126 | if (In.getValueType() == VT126 ) return In120 ; |
10657 | 6 | if (6 VT.bitsLT(In.getValueType())6 ) |
10658 | 5 | return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, |
10659 | 5 | In, N0.getOperand(1)); |
10660 | 1 | return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In); |
10661 | 1 | } |
10662 | 68.0k | |
10663 | 68.0k | // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) |
10664 | 68.0k | if (68.0k ISD::isNormalLoad(N0.getNode()) && 68.0k N0.hasOneUse()10.9k && |
10665 | 68.0k | TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())10.4k ) { |
10666 | 382 | LoadSDNode *LN0 = cast<LoadSDNode>(N0); |
10667 | 382 | SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, |
10668 | 382 | LN0->getChain(), |
10669 | 382 | LN0->getBasePtr(), N0.getValueType(), |
10670 | 382 | LN0->getMemOperand()); |
10671 | 382 | CombineTo(N, ExtLoad); |
10672 | 382 | CombineTo(N0.getNode(), |
10673 | 382 | DAG.getNode(ISD::FP_ROUND, SDLoc(N0), |
10674 | 382 | N0.getValueType(), ExtLoad, |
10675 | 382 | DAG.getIntPtrConstant(1, SDLoc(N0))), |
10676 | 382 | ExtLoad.getValue(1)); |
10677 | 382 | return SDValue(N, 0); // Return N so it doesn't get rechecked! |
10678 | 382 | } |
10679 | 67.6k | |
10680 | 67.6k | if (SDValue 67.6k NewVSel67.6k = matchVSelectOpSizesWithSetCC(N)) |
10681 | 2 | return NewVSel; |
10682 | 67.6k | |
10683 | 67.6k | return SDValue(); |
10684 | 67.6k | } |
10685 | | |
10686 | 970 | SDValue DAGCombiner::visitFCEIL(SDNode *N) { |
10687 | 970 | SDValue N0 = N->getOperand(0); |
10688 | 970 | EVT VT = N->getValueType(0); |
10689 | 970 | |
10690 | 970 | // fold (fceil c1) -> fceil(c1) |
10691 | 970 | if (isConstantFPBuildVectorOrConstantFP(N0)) |
10692 | 0 | return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); |
10693 | 970 | |
10694 | 970 | return SDValue(); |
10695 | 970 | } |
10696 | | |
10697 | 1.13k | SDValue DAGCombiner::visitFTRUNC(SDNode *N) { |
10698 | 1.13k | SDValue N0 = N->getOperand(0); |
10699 | 1.13k | EVT VT = N->getValueType(0); |
10700 | 1.13k | |
10701 | 1.13k | // fold (ftrunc c1) -> ftrunc(c1) |
10702 | 1.13k | if (isConstantFPBuildVectorOrConstantFP(N0)) |
10703 | 0 | return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); |
10704 | 1.13k | |
10705 | 1.13k | return SDValue(); |
10706 | 1.13k | } |
10707 | | |
10708 | 1.53k | SDValue DAGCombiner::visitFFLOOR(SDNode *N) { |
10709 | 1.53k | SDValue N0 = N->getOperand(0); |
10710 | 1.53k | EVT VT = N->getValueType(0); |
10711 | 1.53k | |
10712 | 1.53k | // fold (ffloor c1) -> ffloor(c1) |
10713 | 1.53k | if (isConstantFPBuildVectorOrConstantFP(N0)) |
10714 | 0 | return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); |
10715 | 1.53k | |
10716 | 1.53k | return SDValue(); |
10717 | 1.53k | } |
10718 | | |
10719 | | // FIXME: FNEG and FABS have a lot in common; refactor. |
10720 | 12.7k | SDValue DAGCombiner::visitFNEG(SDNode *N) { |
10721 | 12.7k | SDValue N0 = N->getOperand(0); |
10722 | 12.7k | EVT VT = N->getValueType(0); |
10723 | 12.7k | |
10724 | 12.7k | // Constant fold FNEG. |
10725 | 12.7k | if (isConstantFPBuildVectorOrConstantFP(N0)) |
10726 | 0 | return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); |
10727 | 12.7k | |
10728 | 12.7k | if (12.7k isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), |
10729 | 12.7k | &DAG.getTarget().Options)) |
10730 | 62 | return GetNegatedExpression(N0, DAG, LegalOperations); |
10731 | 12.6k | |
10732 | 12.6k | // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading |
10733 | 12.6k | // constant pool values. |
10734 | 12.6k | if (12.6k !TLI.isFNegFree(VT) && |
10735 | 9.98k | N0.getOpcode() == ISD::BITCAST && |
10736 | 12.6k | N0.getNode()->hasOneUse()240 ) { |
10737 | 237 | SDValue Int = N0.getOperand(0); |
10738 | 237 | EVT IntVT = Int.getValueType(); |
10739 | 237 | if (IntVT.isInteger() && 237 !IntVT.isVector()224 ) { |
10740 | 30 | APInt SignMask; |
10741 | 30 | if (N0.getValueType().isVector()30 ) { |
10742 | 23 | // For a vector, get a mask such as 0x80... per scalar element |
10743 | 23 | // and splat it. |
10744 | 23 | SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits()); |
10745 | 23 | SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); |
10746 | 30 | } else { |
10747 | 7 | // For a scalar, just generate 0x80... |
10748 | 7 | SignMask = APInt::getSignMask(IntVT.getSizeInBits()); |
10749 | 7 | } |
10750 | 30 | SDLoc DL0(N0); |
10751 | 30 | Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int, |
10752 | 30 | DAG.getConstant(SignMask, DL0, IntVT)); |
10753 | 30 | AddToWorklist(Int.getNode()); |
10754 | 30 | return DAG.getBitcast(VT, Int); |
10755 | 30 | } |
10756 | 12.6k | } |
10757 | 12.6k | |
10758 | 12.6k | // (fneg (fmul c, x)) -> (fmul -c, x) |
10759 | 12.6k | if (12.6k N0.getOpcode() == ISD::FMUL && |
10760 | 12.6k | (N0.getNode()->hasOneUse() || 563 !TLI.isFNegFree(VT)78 )) { |
10761 | 515 | ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); |
10762 | 515 | if (CFP1515 ) { |
10763 | 18 | APFloat CVal = CFP1->getValueAPF(); |
10764 | 18 | CVal.changeSign(); |
10765 | 18 | if (Level >= AfterLegalizeDAG && |
10766 | 8 | (TLI.isFPImmLegal(CVal, VT) || |
10767 | 5 | TLI.isOperationLegal(ISD::ConstantFP, VT))) |
10768 | 3 | return DAG.getNode( |
10769 | 3 | ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), |
10770 | 3 | DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)), |
10771 | 3 | N0->getFlags()); |
10772 | 12.6k | } |
10773 | 515 | } |
10774 | 12.6k | |
10775 | 12.6k | return SDValue(); |
10776 | 12.6k | } |
10777 | | |
10778 | 2.84k | SDValue DAGCombiner::visitFMINNUM(SDNode *N) { |
10779 | 2.84k | SDValue N0 = N->getOperand(0); |
10780 | 2.84k | SDValue N1 = N->getOperand(1); |
10781 | 2.84k | EVT VT = N->getValueType(0); |
10782 | 2.84k | const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); |
10783 | 2.84k | const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); |
10784 | 2.84k | |
10785 | 2.84k | if (N0CFP && 2.84k N1CFP3 ) { |
10786 | 3 | const APFloat &C0 = N0CFP->getValueAPF(); |
10787 | 3 | const APFloat &C1 = N1CFP->getValueAPF(); |
10788 | 3 | return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT); |
10789 | 3 | } |
10790 | 2.84k | |
10791 | 2.84k | // Canonicalize to constant on RHS. |
10792 | 2.84k | if (2.84k isConstantFPBuildVectorOrConstantFP(N0) && |
10793 | 8 | !isConstantFPBuildVectorOrConstantFP(N1)) |
10794 | 2 | return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0); |
10795 | 2.83k | |
10796 | 2.83k | return SDValue(); |
10797 | 2.83k | } |
10798 | | |
10799 | 2.59k | SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { |
10800 | 2.59k | SDValue N0 = N->getOperand(0); |
10801 | 2.59k | SDValue N1 = N->getOperand(1); |
10802 | 2.59k | EVT VT = N->getValueType(0); |
10803 | 2.59k | const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); |
10804 | 2.59k | const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); |
10805 | 2.59k | |
10806 | 2.59k | if (N0CFP && 2.59k N1CFP3 ) { |
10807 | 3 | const APFloat &C0 = N0CFP->getValueAPF(); |
10808 | 3 | const APFloat &C1 = N1CFP->getValueAPF(); |
10809 | 3 | return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT); |
10810 | 3 | } |
10811 | 2.59k | |
10812 | 2.59k | // Canonicalize to constant on RHS. |
10813 | 2.59k | if (2.59k isConstantFPBuildVectorOrConstantFP(N0) && |
10814 | 8 | !isConstantFPBuildVectorOrConstantFP(N1)) |
10815 | 2 | return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0); |
10816 | 2.59k | |
10817 | 2.59k | return SDValue(); |
10818 | 2.59k | } |
10819 | | |
10820 | 8.32k | SDValue DAGCombiner::visitFABS(SDNode *N) { |
10821 | 8.32k | SDValue N0 = N->getOperand(0); |
10822 | 8.32k | EVT VT = N->getValueType(0); |
10823 | 8.32k | |
10824 | 8.32k | // fold (fabs c1) -> fabs(c1) |
10825 | 8.32k | if (isConstantFPBuildVectorOrConstantFP(N0)) |
10826 | 0 | return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); |
10827 | 8.32k | |
10828 | 8.32k | // fold (fabs (fabs x)) -> (fabs x) |
10829 | 8.32k | if (8.32k N0.getOpcode() == ISD::FABS8.32k ) |
10830 | 0 | return N->getOperand(0); |
10831 | 8.32k | |
10832 | 8.32k | // fold (fabs (fneg x)) -> (fabs x) |
10833 | 8.32k | // fold (fabs (fcopysign x, y)) -> (fabs x) |
10834 | 8.32k | if (8.32k N0.getOpcode() == ISD::FNEG || 8.32k N0.getOpcode() == ISD::FCOPYSIGN8.32k ) |
10835 | 0 | return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); |
10836 | 8.32k | |
10837 | 8.32k | // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading |
10838 | 8.32k | // constant pool values. |
10839 | 8.32k | if (8.32k !TLI.isFAbsFree(VT) && |
10840 | 6.67k | N0.getOpcode() == ISD::BITCAST && |
10841 | 8.32k | N0.getNode()->hasOneUse()312 ) { |
10842 | 295 | SDValue Int = N0.getOperand(0); |
10843 | 295 | EVT IntVT = Int.getValueType(); |
10844 | 295 | if (IntVT.isInteger() && 295 !IntVT.isVector()284 ) { |
10845 | 262 | APInt SignMask; |
10846 | 262 | if (N0.getValueType().isVector()262 ) { |
10847 | 36 | // For a vector, get a mask such as 0x7f... per scalar element |
10848 | 36 | // and splat it. |
10849 | 36 | SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits()); |
10850 | 36 | SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); |
10851 | 262 | } else { |
10852 | 226 | // For a scalar, just generate 0x7f... |
10853 | 226 | SignMask = ~APInt::getSignMask(IntVT.getSizeInBits()); |
10854 | 226 | } |
10855 | 262 | SDLoc DL(N0); |
10856 | 262 | Int = DAG.getNode(ISD::AND, DL, IntVT, Int, |
10857 | 262 | DAG.getConstant(SignMask, DL, IntVT)); |
10858 | 262 | AddToWorklist(Int.getNode()); |
10859 | 262 | return DAG.getBitcast(N->getValueType(0), Int); |
10860 | 262 | } |
10861 | 8.06k | } |
10862 | 8.06k | |
10863 | 8.06k | return SDValue(); |
10864 | 8.06k | } |
10865 | | |
10866 | 2.86M | SDValue DAGCombiner::visitBRCOND(SDNode *N) { |
10867 | 2.86M | SDValue Chain = N->getOperand(0); |
10868 | 2.86M | SDValue N1 = N->getOperand(1); |
10869 | 2.86M | SDValue N2 = N->getOperand(2); |
10870 | 2.86M | |
10871 | 2.86M | // If N is a constant we could fold this into a fallthrough or unconditional |
10872 | 2.86M | // branch. However that doesn't happen very often in normal code, because |
10873 | 2.86M | // Instcombine/SimplifyCFG should have handled the available opportunities. |
10874 | 2.86M | // If we did this folding here, it would be necessary to update the |
10875 | 2.86M | // MachineBasicBlock CFG, which is awkward. |
10876 | 2.86M | |
10877 | 2.86M | // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal |
10878 | 2.86M | // on the target. |
10879 | 2.86M | if (N1.getOpcode() == ISD::SETCC && |
10880 | 1.98M | TLI.isOperationLegalOrCustom(ISD::BR_CC, |
10881 | 2.86M | N1.getOperand(0).getValueType())) { |
10882 | 1.70M | return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, |
10883 | 1.70M | Chain, N1.getOperand(2), |
10884 | 1.70M | N1.getOperand(0), N1.getOperand(1), N2); |
10885 | 1.70M | } |
10886 | 1.15M | |
10887 | 1.15M | if (1.15M (N1.hasOneUse() && 1.15M N1.getOpcode() == ISD::SRL1.15M ) || |
10888 | 1.15M | ((N1.getOpcode() == ISD::TRUNCATE && 1.15M N1.hasOneUse()24.9k ) && |
10889 | 24.9k | (N1.getOperand(0).hasOneUse() && |
10890 | 1.15M | N1.getOperand(0).getOpcode() == ISD::SRL24.8k ))) { |
10891 | 392 | SDNode *Trunc = nullptr; |
10892 | 392 | if (N1.getOpcode() == ISD::TRUNCATE392 ) { |
10893 | 54 | // Look pass the truncate. |
10894 | 54 | Trunc = N1.getNode(); |
10895 | 54 | N1 = N1.getOperand(0); |
10896 | 54 | } |
10897 | 392 | |
10898 | 392 | // Match this pattern so that we can generate simpler code: |
10899 | 392 | // |
10900 | 392 | // %a = ... |
10901 | 392 | // %b = and i32 %a, 2 |
10902 | 392 | // %c = srl i32 %b, 1 |
10903 | 392 | // brcond i32 %c ... |
10904 | 392 | // |
10905 | 392 | // into |
10906 | 392 | // |
10907 | 392 | // %a = ... |
10908 | 392 | // %b = and i32 %a, 2 |
10909 | 392 | // %c = setcc eq %b, 0 |
10910 | 392 | // brcond %c ... |
10911 | 392 | // |
10912 | 392 | // This applies only when the AND constant value has one bit set and the |
10913 | 392 | // SRL constant is equal to the log2 of the AND constant. The back-end is |
10914 | 392 | // smart enough to convert the result into a TEST/JMP sequence. |
10915 | 392 | SDValue Op0 = N1.getOperand(0); |
10916 | 392 | SDValue Op1 = N1.getOperand(1); |
10917 | 392 | |
10918 | 392 | if (Op0.getOpcode() == ISD::AND && |
10919 | 392 | Op1.getOpcode() == ISD::Constant345 ) { |
10920 | 345 | SDValue AndOp1 = Op0.getOperand(1); |
10921 | 345 | |
10922 | 345 | if (AndOp1.getOpcode() == ISD::Constant345 ) { |
10923 | 345 | const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); |
10924 | 345 | |
10925 | 345 | if (AndConst.isPowerOf2() && |
10926 | 345 | cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()345 ) { |
10927 | 345 | SDLoc DL(N); |
10928 | 345 | SDValue SetCC = |
10929 | 345 | DAG.getSetCC(DL, |
10930 | 345 | getSetCCResultType(Op0.getValueType()), |
10931 | 345 | Op0, DAG.getConstant(0, DL, Op0.getValueType()), |
10932 | 345 | ISD::SETNE); |
10933 | 345 | |
10934 | 345 | SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL, |
10935 | 345 | MVT::Other, Chain, SetCC, N2); |
10936 | 345 | // Don't add the new BRCond into the worklist or else SimplifySelectCC |
10937 | 345 | // will convert it back to (X & C1) >> C2. |
10938 | 345 | CombineTo(N, NewBRCond, false); |
10939 | 345 | // Truncate is dead. |
10940 | 345 | if (Trunc) |
10941 | 29 | deleteAndRecombine(Trunc); |
10942 | 345 | // Replace the uses of SRL with SETCC |
10943 | 345 | WorklistRemover DeadNodes(*this); |
10944 | 345 | DAG.ReplaceAllUsesOfValueWith(N1, SetCC); |
10945 | 345 | deleteAndRecombine(N1.getNode()); |
10946 | 345 | return SDValue(N, 0); // Return N so it doesn't get rechecked! |
10947 | 345 | } |
10948 | 47 | } |
10949 | 345 | } |
10950 | 47 | |
10951 | 47 | if (47 Trunc47 ) |
10952 | 47 | // Restore N1 if the above transformation doesn't match. |
10953 | 25 | N1 = N->getOperand(1); |
10954 | 392 | } |
10955 | 1.15M | |
10956 | 1.15M | // Transform br(xor(x, y)) -> br(x != y) |
10957 | 1.15M | // Transform br(xor(xor(x,y), 1)) -> br (x == y) |
10958 | 1.15M | if (1.15M N1.hasOneUse() && 1.15M N1.getOpcode() == ISD::XOR1.15M ) { |
10959 | 805k | SDNode *TheXor = N1.getNode(); |
10960 | 805k | SDValue Op0 = TheXor->getOperand(0); |
10961 | 805k | SDValue Op1 = TheXor->getOperand(1); |
10962 | 805k | if (Op0.getOpcode() == Op1.getOpcode()805k ) { |
10963 | 152 | // Avoid missing important xor optimizations. |
10964 | 152 | if (SDValue Tmp152 = visitXOR(TheXor)) { |
10965 | 1 | if (Tmp.getNode() != TheXor1 ) { |
10966 | 0 | DEBUG(dbgs() << "\nReplacing.8 "; |
10967 | 0 | TheXor->dump(&DAG); |
10968 | 0 | dbgs() << "\nWith: "; |
10969 | 0 | Tmp.getNode()->dump(&DAG); |
10970 | 0 | dbgs() << '\n'); |
10971 | 0 | WorklistRemover DeadNodes(*this); |
10972 | 0 | DAG.ReplaceAllUsesOfValueWith(N1, Tmp); |
10973 | 0 | deleteAndRecombine(TheXor); |
10974 | 0 | return DAG.getNode(ISD::BRCOND, SDLoc(N), |
10975 | 0 | MVT::Other, Chain, Tmp, N2); |
10976 | 0 | } |
10977 | 1 | |
10978 | 1 | // visitXOR has changed XOR's operands or replaced the XOR completely, |
10979 | 1 | // bail out. |
10980 | 1 | return SDValue(N, 0); |
10981 | 1 | } |
10982 | 152 | } |
10983 | 805k | |
10984 | 805k | if (805k Op0.getOpcode() != ISD::SETCC && 805k Op1.getOpcode() != ISD::SETCC41.0k ) { |
10985 | 41.0k | bool Equal = false; |
10986 | 41.0k | if (isOneConstant(Op0) && 41.0k Op0.hasOneUse()0 && |
10987 | 41.0k | Op0.getOpcode() == ISD::XOR0 ) { |
10988 | 0 | TheXor = Op0.getNode(); |
10989 | 0 | Equal = true; |
10990 | 0 | } |
10991 | 41.0k | |
10992 | 41.0k | EVT SetCCVT = N1.getValueType(); |
10993 | 41.0k | if (LegalTypes) |
10994 | 27 | SetCCVT = getSetCCResultType(SetCCVT); |
10995 | 41.0k | SDValue SetCC = DAG.getSetCC(SDLoc(TheXor), |
10996 | 41.0k | SetCCVT, |
10997 | 41.0k | Op0, Op1, |
10998 | 41.0k | Equal ? ISD::SETEQ0 : ISD::SETNE41.0k ); |
10999 | 41.0k | // Replace the uses of XOR with SETCC |
11000 | 41.0k | WorklistRemover DeadNodes(*this); |
11001 | 41.0k | DAG.ReplaceAllUsesOfValueWith(N1, SetCC); |
11002 | 41.0k | deleteAndRecombine(N1.getNode()); |
11003 | 41.0k | return DAG.getNode(ISD::BRCOND, SDLoc(N), |
11004 | 41.0k | MVT::Other, Chain, SetCC, N2); |
11005 | 41.0k | } |
11006 | 1.11M | } |
11007 | 1.11M | |
11008 | 1.11M | return SDValue(); |
11009 | 1.11M | } |
11010 | | |
11011 | | // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. |
11012 | | // |
11013 | 2.09M | SDValue DAGCombiner::visitBR_CC(SDNode *N) { |
11014 | 2.09M | CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1)); |
11015 | 2.09M | SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); |
11016 | 2.09M | |
11017 | 2.09M | // If N is a constant we could fold this into a fallthrough or unconditional |
11018 | 2.09M | // branch. However that doesn't happen very often in normal code, because |
11019 | 2.09M | // Instcombine/SimplifyCFG should have handled the available opportunities. |
11020 | 2.09M | // If we did this folding here, it would be necessary to update the |
11021 | 2.09M | // MachineBasicBlock CFG, which is awkward. |
11022 | 2.09M | |
11023 | 2.09M | // Use SimplifySetCC to simplify SETCC's. |
11024 | 2.09M | SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()), |
11025 | 2.09M | CondLHS, CondRHS, CC->get(), SDLoc(N), |
11026 | 2.09M | false); |
11027 | 2.09M | if (Simp.getNode()2.09M ) AddToWorklist(Simp.getNode())172k ; |
11028 | 2.09M | |
11029 | 2.09M | // fold to a simpler setcc |
11030 | 2.09M | if (Simp.getNode() && 2.09M Simp.getOpcode() == ISD::SETCC172k ) |
11031 | 171k | return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, |
11032 | 171k | N->getOperand(0), Simp.getOperand(2), |
11033 | 171k | Simp.getOperand(0), Simp.getOperand(1), |
11034 | 171k | N->getOperand(4)); |
11035 | 1.92M | |
11036 | 1.92M | return SDValue(); |
11037 | 1.92M | } |
11038 | | |
11039 | | /// Return true if 'Use' is a load or a store that uses N as its base pointer |
11040 | | /// and that N may be folded in the load / store addressing mode. |
11041 | | static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, |
11042 | | SelectionDAG &DAG, |
11043 | 940k | const TargetLowering &TLI) { |
11044 | 940k | EVT VT; |
11045 | 940k | unsigned AS; |
11046 | 940k | |
11047 | 940k | if (LoadSDNode *LD940k = dyn_cast<LoadSDNode>(Use)) { |
11048 | 306k | if (LD->isIndexed() || 306k LD->getBasePtr().getNode() != N306k ) |
11049 | 4 | return false; |
11050 | 306k | VT = LD->getMemoryVT(); |
11051 | 306k | AS = LD->getAddressSpace(); |
11052 | 940k | } else if (StoreSDNode *634k ST634k = dyn_cast<StoreSDNode>(Use)) { |
11053 | 436k | if (ST->isIndexed() || 436k ST->getBasePtr().getNode() != N436k ) |
11054 | 2.30k | return false; |
11055 | 433k | VT = ST->getMemoryVT(); |
11056 | 433k | AS = ST->getAddressSpace(); |
11057 | 433k | } else |
11058 | 198k | return false; |
11059 | 739k | |
11060 | 739k | TargetLowering::AddrMode AM; |
11061 | 739k | if (N->getOpcode() == ISD::ADD739k ) { |
11062 | 739k | ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); |
11063 | 739k | if (Offset) |
11064 | 739k | // [reg +/- imm] |
11065 | 739k | AM.BaseOffs = Offset->getSExtValue(); |
11066 | 739k | else |
11067 | 739k | // [reg +/- reg] |
11068 | 646 | AM.Scale = 1; |
11069 | 0 | } else if (0 N->getOpcode() == ISD::SUB0 ) { |
11070 | 0 | ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); |
11071 | 0 | if (Offset) |
11072 | 0 | // [reg +/- imm] |
11073 | 0 | AM.BaseOffs = -Offset->getSExtValue(); |
11074 | 0 | else |
11075 | 0 | // [reg +/- reg] |
11076 | 0 | AM.Scale = 1; |
11077 | 0 | } else |
11078 | 0 | return false; |
11079 | 739k | |
11080 | 739k | return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, |
11081 | 739k | VT.getTypeForEVT(*DAG.getContext()), AS); |
11082 | 739k | } |
11083 | | |
11084 | | /// Try turning a load/store into a pre-indexed load/store when the base |
11085 | | /// pointer is an add or subtract and it has other uses besides the load/store. |
11086 | | /// After the transformation, the new indexed load/store has effectively folded |
11087 | | /// the add/subtract in and all of its other uses are redirected to the |
11088 | | /// new load/store. |
11089 | 14.0M | bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { |
11090 | 14.0M | if (Level < AfterLegalizeDAG) |
11091 | 9.85M | return false; |
11092 | 4.23M | |
11093 | 4.23M | bool isLoad = true; |
11094 | 4.23M | SDValue Ptr; |
11095 | 4.23M | EVT VT; |
11096 | 4.23M | if (LoadSDNode *LD4.23M = dyn_cast<LoadSDNode>(N)) { |
11097 | 2.29M | if (LD->isIndexed()) |
11098 | 10.3k | return false; |
11099 | 2.28M | VT = LD->getMemoryVT(); |
11100 | 2.28M | if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && |
11101 | 265k | !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) |
11102 | 265k | return false; |
11103 | 2.02M | Ptr = LD->getBasePtr(); |
11104 | 4.23M | } else if (StoreSDNode *1.94M ST1.94M = dyn_cast<StoreSDNode>(N)) { |
11105 | 1.94M | if (ST->isIndexed()) |
11106 | 12.8k | return false; |
11107 | 1.92M | VT = ST->getMemoryVT(); |
11108 | 1.92M | if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && |
11109 | 317k | !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) |
11110 | 317k | return false; |
11111 | 1.61M | Ptr = ST->getBasePtr(); |
11112 | 1.61M | isLoad = false; |
11113 | 1.94M | } else { |
11114 | 0 | return false; |
11115 | 0 | } |
11116 | 3.63M | |
11117 | 3.63M | // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail |
11118 | 3.63M | // out. There is no reason to make this a preinc/predec. |
11119 | 3.63M | if (3.63M (Ptr.getOpcode() != ISD::ADD && 3.63M Ptr.getOpcode() != ISD::SUB994k ) || |
11120 | 2.63M | Ptr.getNode()->hasOneUse()) |
11121 | 3.28M | return false; |
11122 | 346k | |
11123 | 346k | // Ask the target to do addressing mode selection. |
11124 | 346k | SDValue BasePtr; |
11125 | 346k | SDValue Offset; |
11126 | 346k | ISD::MemIndexedMode AM = ISD::UNINDEXED; |
11127 | 346k | if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) |
11128 | 109k | return false; |
11129 | 237k | |
11130 | 237k | // Backends without true r+i pre-indexed forms may need to pass a |
11131 | 237k | // constant base with a variable offset so that constant coercion |
11132 | 237k | // will work with the patterns in canonical form. |
11133 | 237k | bool Swapped = false; |
11134 | 237k | if (isa<ConstantSDNode>(BasePtr)237k ) { |
11135 | 1 | std::swap(BasePtr, Offset); |
11136 | 1 | Swapped = true; |
11137 | 1 | } |
11138 | 237k | |
11139 | 237k | // Don't create a indexed load / store with zero offset. |
11140 | 237k | if (isNullConstant(Offset)) |
11141 | 0 | return false; |
11142 | 237k | |
11143 | 237k | // Try turning it into a pre-indexed load / store except when: |
11144 | 237k | // 1) The new base ptr is a frame index. |
11145 | 237k | // 2) If N is a store and the new base ptr is either the same as or is a |
11146 | 237k | // predecessor of the value being stored. |
11147 | 237k | // 3) Another use of old base ptr is a predecessor of N. If ptr is folded |
11148 | 237k | // that would create a cycle. |
11149 | 237k | // 4) All uses are load / store ops that use it as old base ptr. |
11150 | 237k | |
11151 | 237k | // Check #1. Preinc'ing a frame index would require copying the stack pointer |
11152 | 237k | // (plus the implicit offset) to a register to preinc anyway. |
11153 | 237k | if (237k isa<FrameIndexSDNode>(BasePtr) || 237k isa<RegisterSDNode>(BasePtr)230k ) |
11154 | 7.32k | return false; |
11155 | 230k | |
11156 | 230k | // Check #2. |
11157 | 230k | if (230k !isLoad230k ) { |
11158 | 90.4k | SDValue Val = cast<StoreSDNode>(N)->getValue(); |
11159 | 90.4k | if (Val == BasePtr || 90.4k BasePtr.getNode()->isPredecessorOf(Val.getNode())89.9k ) |
11160 | 63.5k | return false; |
11161 | 166k | } |
11162 | 166k | |
11163 | 166k | // Caches for hasPredecessorHelper. |
11164 | 166k | SmallPtrSet<const SDNode *, 32> Visited; |
11165 | 166k | SmallVector<const SDNode *, 16> Worklist; |
11166 | 166k | Worklist.push_back(N); |
11167 | 166k | |
11168 | 166k | // If the offset is a constant, there may be other adds of constants that |
11169 | 166k | // can be folded with this one. We should do this to avoid having to keep |
11170 | 166k | // a copy of the original base pointer. |
11171 | 166k | SmallVector<SDNode *, 16> OtherUses; |
11172 | 166k | if (isa<ConstantSDNode>(Offset)) |
11173 | 166k | for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(), |
11174 | 166k | UE = BasePtr.getNode()->use_end(); |
11175 | 642k | UI != UE642k ; ++UI476k ) { |
11176 | 520k | SDUse &Use = UI.getUse(); |
11177 | 520k | // Skip the use that is Ptr and uses of other results from BasePtr's |
11178 | 520k | // node (important for nodes that return multiple results). |
11179 | 520k | if (Use.getUser() == Ptr.getNode() || 520k Use != BasePtr376k ) |
11180 | 152k | continue; |
11181 | 368k | |
11182 | 368k | if (368k SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist)368k ) |
11183 | 110k | continue; |
11184 | 257k | |
11185 | 257k | if (257k Use.getUser()->getOpcode() != ISD::ADD && |
11186 | 257k | Use.getUser()->getOpcode() != ISD::SUB43.9k ) { |
11187 | 43.9k | OtherUses.clear(); |
11188 | 43.9k | break; |
11189 | 43.9k | } |
11190 | 213k | |
11191 | 213k | SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1); |
11192 | 213k | if (!isa<ConstantSDNode>(Op1)213k ) { |
11193 | 251 | OtherUses.clear(); |
11194 | 251 | break; |
11195 | 251 | } |
11196 | 213k | |
11197 | 213k | // FIXME: In some cases, we can be smarter about this. |
11198 | 213k | if (213k Op1.getValueType() != Offset.getValueType()213k ) { |
11199 | 0 | OtherUses.clear(); |
11200 | 0 | break; |
11201 | 0 | } |
11202 | 213k | |
11203 | 213k | OtherUses.push_back(Use.getUser()); |
11204 | 213k | } |
11205 | 166k | |
11206 | 166k | if (Swapped) |
11207 | 1 | std::swap(BasePtr, Offset); |
11208 | 166k | |
11209 | 166k | // Now check for #3 and #4. |
11210 | 166k | bool RealUse = false; |
11211 | 166k | |
11212 | 348k | for (SDNode *Use : Ptr.getNode()->uses()) { |
11213 | 348k | if (Use == N) |
11214 | 150k | continue; |
11215 | 198k | if (198k SDNode::hasPredecessorHelper(Use, Visited, Worklist)198k ) |
11216 | 45.9k | return false; |
11217 | 152k | |
11218 | 152k | // If Ptr may be folded in addressing mode of other use, then it's |
11219 | 152k | // not profitable to do this transformation. |
11220 | 152k | if (152k !canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI)152k ) |
11221 | 36.9k | RealUse = true; |
11222 | 348k | } |
11223 | 166k | |
11224 | 120k | if (120k !RealUse120k ) |
11225 | 87.2k | return false; |
11226 | 33.4k | |
11227 | 33.4k | SDValue Result; |
11228 | 33.4k | if (isLoad) |
11229 | 27.7k | Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), |
11230 | 27.7k | BasePtr, Offset, AM); |
11231 | 33.4k | else |
11232 | 5.71k | Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N), |
11233 | 5.71k | BasePtr, Offset, AM); |
11234 | 33.4k | ++PreIndexedNodes; |
11235 | 33.4k | ++NodesCombined; |
11236 | 33.4k | DEBUG(dbgs() << "\nReplacing.4 "; |
11237 | 33.4k | N->dump(&DAG); |
11238 | 33.4k | dbgs() << "\nWith: "; |
11239 | 33.4k | Result.getNode()->dump(&DAG); |
11240 | 33.4k | dbgs() << '\n'); |
11241 | 33.4k | WorklistRemover DeadNodes(*this); |
11242 | 33.4k | if (isLoad33.4k ) { |
11243 | 27.7k | DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); |
11244 | 27.7k | DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); |
11245 | 33.4k | } else { |
11246 | 5.71k | DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); |
11247 | 5.71k | } |
11248 | 33.4k | |
11249 | 33.4k | // Finally, since the node is now dead, remove it from the graph. |
11250 | 33.4k | deleteAndRecombine(N); |
11251 | 33.4k | |
11252 | 33.4k | if (Swapped) |
11253 | 1 | std::swap(BasePtr, Offset); |
11254 | 33.4k | |
11255 | 33.4k | // Replace other uses of BasePtr that can be updated to use Ptr |
11256 | 35.9k | for (unsigned i = 0, e = OtherUses.size(); i != e35.9k ; ++i2.53k ) { |
11257 | 2.53k | unsigned OffsetIdx = 1; |
11258 | 2.53k | if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode()) |
11259 | 0 | OffsetIdx = 0; |
11260 | 2.53k | assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() == |
11261 | 2.53k | BasePtr.getNode() && "Expected BasePtr operand"); |
11262 | 2.53k | |
11263 | 2.53k | // We need to replace ptr0 in the following expression: |
11264 | 2.53k | // x0 * offset0 + y0 * ptr0 = t0 |
11265 | 2.53k | // knowing that |
11266 | 2.53k | // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) |
11267 | 2.53k | // |
11268 | 2.53k | // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the |
11269 | 2.53k | // indexed load/store and the expression that needs to be re-written. |
11270 | 2.53k | // |
11271 | 2.53k | // Therefore, we have: |
11272 | 2.53k | // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1 |
11273 | 2.53k | |
11274 | 2.53k | ConstantSDNode *CN = |
11275 | 2.53k | cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx)); |
11276 | 2.53k | int X0, X1, Y0, Y1; |
11277 | 2.53k | const APInt &Offset0 = CN->getAPIntValue(); |
11278 | 2.53k | APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue(); |
11279 | 2.53k | |
11280 | 2.53k | X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 10 ) ? -10 : 12.53k ; |
11281 | 2.53k | Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 00 ) ? -10 : 12.53k ; |
11282 | 2.53k | X1 = (AM == ISD::PRE_DEC && !Swapped12 ) ? -112 : 12.52k ; |
11283 | 2.53k | Y1 = (AM == ISD::PRE_DEC && Swapped12 ) ? -10 : 12.53k ; |
11284 | 2.53k | |
11285 | 2.53k | unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB0 : ISD::ADD2.53k ; |
11286 | 2.53k | |
11287 | 2.53k | APInt CNV = Offset0; |
11288 | 2.53k | if (X0 < 02.53k ) CNV = -CNV0 ; |
11289 | 2.53k | if (X1 * Y0 * Y1 < 02.53k ) CNV = CNV + Offset112 ; |
11290 | 2.52k | else CNV = CNV - Offset1; |
11291 | 2.53k | |
11292 | 2.53k | SDLoc DL(OtherUses[i]); |
11293 | 2.53k | |
11294 | 2.53k | // We can now generate the new expression. |
11295 | 2.53k | SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0)); |
11296 | 2.53k | SDValue NewOp2 = Result.getValue(isLoad ? 11.43k : 01.10k ); |
11297 | 2.53k | |
11298 | 2.53k | SDValue NewUse = DAG.getNode(Opcode, |
11299 | 2.53k | DL, |
11300 | 2.53k | OtherUses[i]->getValueType(0), NewOp1, NewOp2); |
11301 | 2.53k | DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); |
11302 | 2.53k | deleteAndRecombine(OtherUses[i]); |
11303 | 2.53k | } |
11304 | 33.4k | |
11305 | 33.4k | // Replace the uses of Ptr with uses of the updated base value. |
11306 | 33.4k | DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 127.7k : 05.71k )); |
11307 | 14.0M | deleteAndRecombine(Ptr.getNode()); |
11308 | 14.0M | |
11309 | 14.0M | return true; |
11310 | 14.0M | } |
11311 | | |
11312 | | /// Try to combine a load/store with a add/sub of the base pointer node into a |
11313 | | /// post-indexed load/store. The transformation folded the add/subtract into the |
11314 | | /// new indexed load/store effectively and all of its uses are redirected to the |
11315 | | /// new load/store. |
11316 | 14.0M | bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { |
11317 | 14.0M | if (Level < AfterLegalizeDAG) |
11318 | 9.85M | return false; |
11319 | 4.20M | |
11320 | 4.20M | bool isLoad = true; |
11321 | 4.20M | SDValue Ptr; |
11322 | 4.20M | EVT VT; |
11323 | 4.20M | if (LoadSDNode *LD4.20M = dyn_cast<LoadSDNode>(N)) { |
11324 | 2.26M | if (LD->isIndexed()) |
11325 | 10.3k | return false; |
11326 | 2.25M | VT = LD->getMemoryVT(); |
11327 | 2.25M | if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && |
11328 | 260k | !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) |
11329 | 260k | return false; |
11330 | 1.99M | Ptr = LD->getBasePtr(); |
11331 | 4.20M | } else if (StoreSDNode *1.93M ST1.93M = dyn_cast<StoreSDNode>(N)) { |
11332 | 1.93M | if (ST->isIndexed()) |
11333 | 12.8k | return false; |
11334 | 1.92M | VT = ST->getMemoryVT(); |
11335 | 1.92M | if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && |
11336 | 317k | !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) |
11337 | 317k | return false; |
11338 | 1.60M | Ptr = ST->getBasePtr(); |
11339 | 1.60M | isLoad = false; |
11340 | 1.93M | } else { |
11341 | 0 | return false; |
11342 | 0 | } |
11343 | 3.60M | |
11344 | 3.60M | if (3.60M Ptr.getNode()->hasOneUse()3.60M ) |
11345 | 2.84M | return false; |
11346 | 760k | |
11347 | 760k | for (SDNode *Op : Ptr.getNode()->uses()) 760k { |
11348 | 3.18M | if (Op == N || |
11349 | 2.45M | (Op->getOpcode() != ISD::ADD && 2.45M Op->getOpcode() != ISD::SUB1.47M )) |
11350 | 2.19M | continue; |
11351 | 986k | |
11352 | 986k | SDValue BasePtr; |
11353 | 986k | SDValue Offset; |
11354 | 986k | ISD::MemIndexedMode AM = ISD::UNINDEXED; |
11355 | 986k | if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)986k ) { |
11356 | 811k | // Don't create a indexed load / store with zero offset. |
11357 | 811k | if (isNullConstant(Offset)) |
11358 | 0 | continue; |
11359 | 811k | |
11360 | 811k | // Try turning it into a post-indexed load / store except when |
11361 | 811k | // 1) All uses are load / store ops that use it as base ptr (and |
11362 | 811k | // it may be folded as addressing mmode). |
11363 | 811k | // 2) Op must be independent of N, i.e. Op is neither a predecessor |
11364 | 811k | // nor a successor of N. Otherwise, if Op is folded that would |
11365 | 811k | // create a cycle. |
11366 | 811k | |
11367 | 811k | if (811k isa<FrameIndexSDNode>(BasePtr) || 811k isa<RegisterSDNode>(BasePtr)623k ) |
11368 | 188k | continue; |
11369 | 623k | |
11370 | 623k | // Check for #1. |
11371 | 623k | bool TryNext = false; |
11372 | 1.07M | for (SDNode *Use : BasePtr.getNode()->uses()) { |
11373 | 1.07M | if (Use == Ptr.getNode()) |
11374 | 0 | continue; |
11375 | 1.07M | |
11376 | 1.07M | // If all the uses are load / store addresses, then don't do the |
11377 | 1.07M | // transformation. |
11378 | 1.07M | if (1.07M Use->getOpcode() == ISD::ADD || 1.07M Use->getOpcode() == ISD::SUB346k ){ |
11379 | 729k | bool RealUse = false; |
11380 | 788k | for (SDNode *UseUse : Use->uses()) { |
11381 | 788k | if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) |
11382 | 165k | RealUse = true; |
11383 | 788k | } |
11384 | 729k | |
11385 | 729k | if (!RealUse729k ) { |
11386 | 565k | TryNext = true; |
11387 | 565k | break; |
11388 | 565k | } |
11389 | 623k | } |
11390 | 1.07M | } |
11391 | 623k | |
11392 | 623k | if (TryNext) |
11393 | 565k | continue; |
11394 | 57.0k | |
11395 | 57.0k | // Check for #2 |
11396 | 57.0k | if (57.0k !Op->isPredecessorOf(N) && 57.0k !N->isPredecessorOf(Op)55.0k ) { |
11397 | 55.0k | SDValue Result = isLoad |
11398 | 37.1k | ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), |
11399 | 37.1k | BasePtr, Offset, AM) |
11400 | 17.8k | : DAG.getIndexedStore(SDValue(N,0), SDLoc(N), |
11401 | 17.8k | BasePtr, Offset, AM); |
11402 | 55.0k | ++PostIndexedNodes; |
11403 | 55.0k | ++NodesCombined; |
11404 | 55.0k | DEBUG(dbgs() << "\nReplacing.5 "; |
11405 | 55.0k | N->dump(&DAG); |
11406 | 55.0k | dbgs() << "\nWith: "; |
11407 | 55.0k | Result.getNode()->dump(&DAG); |
11408 | 55.0k | dbgs() << '\n'); |
11409 | 55.0k | WorklistRemover DeadNodes(*this); |
11410 | 55.0k | if (isLoad55.0k ) { |
11411 | 37.1k | DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); |
11412 | 37.1k | DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); |
11413 | 55.0k | } else { |
11414 | 17.8k | DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); |
11415 | 17.8k | } |
11416 | 55.0k | |
11417 | 55.0k | // Finally, since the node is now dead, remove it from the graph. |
11418 | 55.0k | deleteAndRecombine(N); |
11419 | 55.0k | |
11420 | 55.0k | // Replace the uses of Use with uses of the updated base value. |
11421 | 55.0k | DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), |
11422 | 55.0k | Result.getValue(isLoad ? 137.1k : 017.8k )); |
11423 | 55.0k | deleteAndRecombine(Op); |
11424 | 55.0k | return true; |
11425 | 55.0k | } |
11426 | 705k | } |
11427 | 3.18M | } |
11428 | 705k | |
11429 | 705k | return false; |
11430 | 705k | } |
11431 | | |
11432 | | /// \brief Return the base-pointer arithmetic from an indexed \p LD. |
11433 | 4 | SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { |
11434 | 4 | ISD::MemIndexedMode AM = LD->getAddressingMode(); |
11435 | 4 | assert(AM != ISD::UNINDEXED); |
11436 | 4 | SDValue BP = LD->getOperand(1); |
11437 | 4 | SDValue Inc = LD->getOperand(2); |
11438 | 4 | |
11439 | 4 | // Some backends use TargetConstants for load offsets, but don't expect |
11440 | 4 | // TargetConstants in general ADD nodes. We can convert these constants into |
11441 | 4 | // regular Constants (if the constant is not opaque). |
11442 | 4 | assert((Inc.getOpcode() != ISD::TargetConstant || |
11443 | 4 | !cast<ConstantSDNode>(Inc)->isOpaque()) && |
11444 | 4 | "Cannot split out indexing using opaque target constants"); |
11445 | 4 | if (Inc.getOpcode() == ISD::TargetConstant4 ) { |
11446 | 3 | ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc); |
11447 | 3 | Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc), |
11448 | 3 | ConstInc->getValueType(0)); |
11449 | 3 | } |
11450 | 4 | |
11451 | 4 | unsigned Opc = |
11452 | 4 | (AM == ISD::PRE_INC || AM == ISD::POST_INC0 ? ISD::ADD4 : ISD::SUB0 ); |
11453 | 4 | return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc); |
11454 | 4 | } |
11455 | | |
11456 | 6.04M | SDValue DAGCombiner::visitLOAD(SDNode *N) { |
11457 | 6.04M | LoadSDNode *LD = cast<LoadSDNode>(N); |
11458 | 6.04M | SDValue Chain = LD->getChain(); |
11459 | 6.04M | SDValue Ptr = LD->getBasePtr(); |
11460 | 6.04M | |
11461 | 6.04M | // If load is not volatile and there are no uses of the loaded value (and |
11462 | 6.04M | // the updated indexed value in case of indexed loads), change uses of the |
11463 | 6.04M | // chain value into uses of the chain input (i.e. delete the dead load). |
11464 | 6.04M | if (!LD->isVolatile()6.04M ) { |
11465 | 6.01M | if (N->getValueType(1) == MVT::Other6.01M ) { |
11466 | 6.00M | // Unindexed loads. |
11467 | 6.00M | if (!N->hasAnyUseOfValue(0)6.00M ) { |
11468 | 5.21k | // It's not safe to use the two value CombineTo variant here. e.g. |
11469 | 5.21k | // v1, chain2 = load chain1, loc |
11470 | 5.21k | // v2, chain3 = load chain2, loc |
11471 | 5.21k | // v3 = add v2, c |
11472 | 5.21k | // Now we replace use of chain2 with chain1. This makes the second load |
11473 | 5.21k | // isomorphic to the one we are deleting, and thus makes this load live. |
11474 | 5.21k | DEBUG(dbgs() << "\nReplacing.6 "; |
11475 | 5.21k | N->dump(&DAG); |
11476 | 5.21k | dbgs() << "\nWith chain: "; |
11477 | 5.21k | Chain.getNode()->dump(&DAG); |
11478 | 5.21k | dbgs() << "\n"); |
11479 | 5.21k | WorklistRemover DeadNodes(*this); |
11480 | 5.21k | DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); |
11481 | 5.21k | AddUsersToWorklist(Chain.getNode()); |
11482 | 5.21k | if (N->use_empty()) |
11483 | 5.21k | deleteAndRecombine(N); |
11484 | 5.21k | |
11485 | 5.21k | return SDValue(N, 0); // Return N so it doesn't get rechecked! |
11486 | 5.21k | } |
11487 | 10.3k | } else { |
11488 | 10.3k | // Indexed loads. |
11489 | 10.3k | assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); |
11490 | 10.3k | |
11491 | 10.3k | // If this load has an opaque TargetConstant offset, then we cannot split |
11492 | 10.3k | // the indexing into an add/sub directly (that TargetConstant may not be |
11493 | 10.3k | // valid for a different type of node, and we cannot convert an opaque |
11494 | 10.3k | // target constant into a regular constant). |
11495 | 10.3k | bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant && |
11496 | 49 | cast<ConstantSDNode>(LD->getOperand(2))->isOpaque(); |
11497 | 10.3k | |
11498 | 10.3k | if (!N->hasAnyUseOfValue(0) && |
11499 | 10.3k | ((MaySplitLoadIndex && 4 !HasOTCInc4 ) || !N->hasAnyUseOfValue(1)0 )) { |
11500 | 4 | SDValue Undef = DAG.getUNDEF(N->getValueType(0)); |
11501 | 4 | SDValue Index; |
11502 | 4 | if (N->hasAnyUseOfValue(1) && 4 MaySplitLoadIndex4 && !HasOTCInc4 ) { |
11503 | 4 | Index = SplitIndexingFromLoad(LD); |
11504 | 4 | // Try to fold the base pointer arithmetic into subsequent loads and |
11505 | 4 | // stores. |
11506 | 4 | AddUsersToWorklist(N); |
11507 | 4 | } else |
11508 | 0 | Index = DAG.getUNDEF(N->getValueType(1)); |
11509 | 4 | DEBUG(dbgs() << "\nReplacing.7 "; |
11510 | 4 | N->dump(&DAG); |
11511 | 4 | dbgs() << "\nWith: "; |
11512 | 4 | Undef.getNode()->dump(&DAG); |
11513 | 4 | dbgs() << " and 2 other values\n"); |
11514 | 4 | WorklistRemover DeadNodes(*this); |
11515 | 4 | DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); |
11516 | 4 | DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index); |
11517 | 4 | DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); |
11518 | 4 | deleteAndRecombine(N); |
11519 | 4 | return SDValue(N, 0); // Return N so it doesn't get rechecked! |
11520 | 4 | } |
11521 | 6.04M | } |
11522 | 6.01M | } |
11523 | 6.04M | |
11524 | 6.04M | // If this load is directly stored, replace the load value with the stored |
11525 | 6.04M | // value. |
11526 | 6.04M | // TODO: Handle store large -> read small portion. |
11527 | 6.04M | // TODO: Handle TRUNCSTORE/LOADEXT |
11528 | 6.04M | if (6.04M OptLevel != CodeGenOpt::None && |
11529 | 6.04M | ISD::isNormalLoad(N)6.02M && !LD->isVolatile()4.52M ) { |
11530 | 4.49M | if (ISD::isNON_TRUNCStore(Chain.getNode())4.49M ) { |
11531 | 407k | StoreSDNode *PrevST = cast<StoreSDNode>(Chain); |
11532 | 407k | if (PrevST->getBasePtr() == Ptr && |
11533 | 7.49k | PrevST->getValue().getValueType() == N->getValueType(0)) |
11534 | 4.30k | return CombineTo(N, PrevST->getOperand(1), Chain); |
11535 | 6.03M | } |
11536 | 4.49M | } |
11537 | 6.03M | |
11538 | 6.03M | // Try to infer better alignment information than the load already has. |
11539 | 6.03M | if (6.03M OptLevel != CodeGenOpt::None && 6.03M LD->isUnindexed()6.02M ) { |
11540 | 6.00M | if (unsigned Align6.00M = DAG.InferPtrAlignment(Ptr)) { |
11541 | 883k | if (Align > LD->getMemOperand()->getBaseAlignment()883k ) { |
11542 | 51.6k | SDValue NewLoad = DAG.getExtLoad( |
11543 | 51.6k | LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr, |
11544 | 51.6k | LD->getPointerInfo(), LD->getMemoryVT(), Align, |
11545 | 51.6k | LD->getMemOperand()->getFlags(), LD->getAAInfo()); |
11546 | 51.6k | if (NewLoad.getNode() != N) |
11547 | 0 | return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); |
11548 | 6.03M | } |
11549 | 883k | } |
11550 | 6.00M | } |
11551 | 6.03M | |
11552 | 6.03M | if (6.03M LD->isUnindexed()6.03M ) { |
11553 | 6.02M | // Walk up chain skipping non-aliasing memory nodes. |
11554 | 6.02M | SDValue BetterChain = FindBetterChain(N, Chain); |
11555 | 6.02M | |
11556 | 6.02M | // If there is a better chain. |
11557 | 6.02M | if (Chain != BetterChain6.02M ) { |
11558 | 272k | SDValue ReplLoad; |
11559 | 272k | |
11560 | 272k | // Replace the chain to void dependency. |
11561 | 272k | if (LD->getExtensionType() == ISD::NON_EXTLOAD272k ) { |
11562 | 224k | ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD), |
11563 | 224k | BetterChain, Ptr, LD->getMemOperand()); |
11564 | 272k | } else { |
11565 | 47.9k | ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), |
11566 | 47.9k | LD->getValueType(0), |
11567 | 47.9k | BetterChain, Ptr, LD->getMemoryVT(), |
11568 | 47.9k | LD->getMemOperand()); |
11569 | 47.9k | } |
11570 | 272k | |
11571 | 272k | // Create token factor to keep old chain connected. |
11572 | 272k | SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), |
11573 | 272k | MVT::Other, Chain, ReplLoad.getValue(1)); |
11574 | 272k | |
11575 | 272k | // Replace uses with load result and token factor |
11576 | 272k | return CombineTo(N, ReplLoad.getValue(0), Token); |
11577 | 272k | } |
11578 | 5.76M | } |
11579 | 5.76M | |
11580 | 5.76M | // Try transforming N to an indexed load. |
11581 | 5.76M | if (5.76M CombineToPreIndexedLoadStore(N) || 5.76M CombineToPostIndexedLoadStore(N)5.73M ) |
11582 | 64.8k | return SDValue(N, 0); |
11583 | 5.69M | |
11584 | 5.69M | // Try to slice up N to more direct loads if the slices are mapped to |
11585 | 5.69M | // different register banks or pairing can take place. |
11586 | 5.69M | if (5.69M SliceUpLoad(N)5.69M ) |
11587 | 16.7k | return SDValue(N, 0); |
11588 | 5.68M | |
11589 | 5.68M | return SDValue(); |
11590 | 5.68M | } |
11591 | | |
11592 | | namespace { |
11593 | | |
11594 | | /// \brief Helper structure used to slice a load in smaller loads. |
11595 | | /// Basically a slice is obtained from the following sequence: |
11596 | | /// Origin = load Ty1, Base |
11597 | | /// Shift = srl Ty1 Origin, CstTy Amount |
11598 | | /// Inst = trunc Shift to Ty2 |
11599 | | /// |
11600 | | /// Then, it will be rewritten into: |
11601 | | /// Slice = load SliceTy, Base + SliceOffset |
11602 | | /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 |
11603 | | /// |
11604 | | /// SliceTy is deduced from the number of bits that are actually used to |
11605 | | /// build Inst. |
11606 | | struct LoadedSlice { |
11607 | | /// \brief Helper structure used to compute the cost of a slice. |
11608 | | struct Cost { |
11609 | | /// Are we optimizing for code size. |
11610 | | bool ForCodeSize; |
11611 | | |
11612 | | /// Various cost. |
11613 | | unsigned Loads = 0; |
11614 | | unsigned Truncates = 0; |
11615 | | unsigned CrossRegisterBanksCopies = 0; |
11616 | | unsigned ZExts = 0; |
11617 | | unsigned Shift = 0; |
11618 | | |
11619 | 33.5k | Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {} |
11620 | | |
11621 | | /// \brief Get the cost of one isolated slice. |
11622 | | Cost(const LoadedSlice &LS, bool ForCodeSize = false) |
11623 | 33.5k | : ForCodeSize(ForCodeSize), Loads(1) { |
11624 | 33.5k | EVT TruncType = LS.Inst->getValueType(0); |
11625 | 33.5k | EVT LoadedType = LS.getLoadedType(); |
11626 | 33.5k | if (TruncType != LoadedType && |
11627 | 0 | !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType)) |
11628 | 0 | ZExts = 1; |
11629 | 33.5k | } |
11630 | | |
11631 | | /// \brief Account for slicing gain in the current cost. |
11632 | | /// Slicing provide a few gains like removing a shift or a |
11633 | | /// truncate. This method allows to grow the cost of the original |
11634 | | /// load with the gain from this slice. |
11635 | 33.5k | void addSliceGain(const LoadedSlice &LS) { |
11636 | 33.5k | // Each slice saves a truncate. |
11637 | 33.5k | const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); |
11638 | 33.5k | if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(), |
11639 | 33.5k | LS.Inst->getValueType(0))) |
11640 | 12 | ++Truncates; |
11641 | 33.5k | // If there is a shift amount, this slice gets rid of it. |
11642 | 33.5k | if (LS.Shift) |
11643 | 16.7k | ++Shift; |
11644 | 33.5k | // If this slice can merge a cross register bank copy, account for it. |
11645 | 33.5k | if (LS.canMergeExpensiveCrossRegisterBankCopy()) |
11646 | 4 | ++CrossRegisterBanksCopies; |
11647 | 33.5k | } |
11648 | | |
11649 | 33.5k | Cost &operator+=(const Cost &RHS) { |
11650 | 33.5k | Loads += RHS.Loads; |
11651 | 33.5k | Truncates += RHS.Truncates; |
11652 | 33.5k | CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies; |
11653 | 33.5k | ZExts += RHS.ZExts; |
11654 | 33.5k | Shift += RHS.Shift; |
11655 | 33.5k | return *this; |
11656 | 33.5k | } |
11657 | | |
11658 | 0 | bool operator==(const Cost &RHS) const { |
11659 | 0 | return Loads == RHS.Loads && Truncates == RHS.Truncates && |
11660 | 0 | CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies && |
11661 | 0 | ZExts == RHS.ZExts && Shift == RHS.Shift; |
11662 | 0 | } |
11663 | | |
11664 | 0 | bool operator!=(const Cost &RHS) const { return !(*this == RHS); } |
11665 | | |
11666 | 16.7k | bool operator<(const Cost &RHS) const { |
11667 | 16.7k | // Assume cross register banks copies are as expensive as loads. |
11668 | 16.7k | // FIXME: Do we want some more target hooks? |
11669 | 16.7k | unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies; |
11670 | 16.7k | unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies; |
11671 | 16.7k | // Unless we are optimizing for code size, consider the |
11672 | 16.7k | // expensive operation first. |
11673 | 16.7k | if (!ForCodeSize && 16.7k ExpensiveOpsLHS != ExpensiveOpsRHS16.7k ) |
11674 | 41 | return ExpensiveOpsLHS < ExpensiveOpsRHS; |
11675 | 16.7k | return (Truncates + ZExts + Shift + ExpensiveOpsLHS) < |
11676 | 16.7k | (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS); |
11677 | 16.7k | } |
11678 | | |
11679 | 16.7k | bool operator>(const Cost &RHS) const { return RHS < *this; } |
11680 | | |
11681 | 0 | bool operator<=(const Cost &RHS) const { return !(RHS < *this); } |
11682 | | |
11683 | 0 | bool operator>=(const Cost &RHS) const { return !(*this < RHS); } |
11684 | | }; |
11685 | | |
11686 | | // The last instruction that represent the slice. This should be a |
11687 | | // truncate instruction. |
11688 | | SDNode *Inst; |
11689 | | |
11690 | | // The original load instruction. |
11691 | | LoadSDNode *Origin; |
11692 | | |
11693 | | // The right shift amount in bits from the original load. |
11694 | | unsigned Shift; |
11695 | | |
11696 | | // The DAG from which Origin came from. |
11697 | | // This is used to get some contextual information about legal types, etc. |
11698 | | SelectionDAG *DAG; |
11699 | | |
11700 | | LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr, |
11701 | | unsigned Shift = 0, SelectionDAG *DAG = nullptr) |
11702 | 37.7k | : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} |
11703 | | |
11704 | | /// \brief Get the bits used in a chunk of bits \p BitWidth large. |
11705 | | /// \return Result is \p BitWidth and has used bits set to 1 and |
11706 | | /// not used bits set to 0. |
11707 | 209k | APInt getUsedBits() const { |
11708 | 209k | // Reproduce the trunc(lshr) sequence: |
11709 | 209k | // - Start from the truncated value. |
11710 | 209k | // - Zero extend to the desired bit width. |
11711 | 209k | // - Shift left. |
11712 | 209k | assert(Origin && "No original load to compare against."); |
11713 | 209k | unsigned BitWidth = Origin->getValueSizeInBits(0); |
11714 | 209k | assert(Inst && "This slice is not bound to an instruction"); |
11715 | 209k | assert(Inst->getValueSizeInBits(0) <= BitWidth && |
11716 | 209k | "Extracted slice is bigger than the whole type!"); |
11717 | 209k | APInt UsedBits(Inst->getValueSizeInBits(0), 0); |
11718 | 209k | UsedBits.setAllBits(); |
11719 | 209k | UsedBits = UsedBits.zext(BitWidth); |
11720 | 209k | UsedBits <<= Shift; |
11721 | 209k | return UsedBits; |
11722 | 209k | } |
11723 | | |
11724 | | /// \brief Get the size of the slice to be loaded in bytes. |
11725 | 138k | unsigned getLoadedSize() const { |
11726 | 138k | unsigned SliceSize = getUsedBits().countPopulation(); |
11727 | 138k | assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte."); |
11728 | 138k | return SliceSize / 8; |
11729 | 138k | } |
11730 | | |
11731 | | /// \brief Get the type that will be loaded for this slice. |
11732 | | /// Note: This may not be the final type for the slice. |
11733 | 138k | EVT getLoadedType() const { |
11734 | 138k | assert(DAG && "Missing context"); |
11735 | 138k | LLVMContext &Ctxt = *DAG->getContext(); |
11736 | 138k | return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8); |
11737 | 138k | } |
11738 | | |
11739 | | /// \brief Get the alignment of the load used for this slice. |
11740 | 50.2k | unsigned getAlignment() const { |
11741 | 50.2k | unsigned Alignment = Origin->getAlignment(); |
11742 | 50.2k | unsigned Offset = getOffsetFromBase(); |
11743 | 50.2k | if (Offset != 0) |
11744 | 16.7k | Alignment = MinAlign(Alignment, Alignment + Offset); |
11745 | 50.2k | return Alignment; |
11746 | 50.2k | } |
11747 | | |
11748 | | /// \brief Check if this slice can be rewritten with legal operations. |
11749 | 37.7k | bool isLegal() const { |
11750 | 37.7k | // An invalid slice is not legal. |
11751 | 37.7k | if (!Origin || 37.7k !Inst37.7k || !DAG37.7k ) |
11752 | 0 | return false; |
11753 | 37.7k | |
11754 | 37.7k | // Offsets are for indexed load only, we do not handle that. |
11755 | 37.7k | if (37.7k !Origin->getOffset().isUndef()37.7k ) |
11756 | 0 | return false; |
11757 | 37.7k | |
11758 | 37.7k | const TargetLowering &TLI = DAG->getTargetLoweringInfo(); |
11759 | 37.7k | |
11760 | 37.7k | // Check that the type is legal. |
11761 | 37.7k | EVT SliceType = getLoadedType(); |
11762 | 37.7k | if (!TLI.isTypeLegal(SliceType)) |
11763 | 56 | return false; |
11764 | 37.6k | |
11765 | 37.6k | // Check that the load is legal for this type. |
11766 | 37.6k | if (37.6k !TLI.isOperationLegal(ISD::LOAD, SliceType)37.6k ) |
11767 | 296 | return false; |
11768 | 37.3k | |
11769 | 37.3k | // Check that the offset can be computed. |
11770 | 37.3k | // 1. Check its type. |
11771 | 37.3k | EVT PtrType = Origin->getBasePtr().getValueType(); |
11772 | 37.3k | if (PtrType == MVT::Untyped || 37.3k PtrType.isExtended()37.3k ) |
11773 | 0 | return false; |
11774 | 37.3k | |
11775 | 37.3k | // 2. Check that it fits in the immediate. |
11776 | 37.3k | if (37.3k !TLI.isLegalAddImmediate(getOffsetFromBase())37.3k ) |
11777 | 0 | return false; |
11778 | 37.3k | |
11779 | 37.3k | // 3. Check that the computation is legal. |
11780 | 37.3k | if (37.3k !TLI.isOperationLegal(ISD::ADD, PtrType)37.3k ) |
11781 | 0 | return false; |
11782 | 37.3k | |
11783 | 37.3k | // Check that the zext is legal if it needs one. |
11784 | 37.3k | EVT TruncateType = Inst->getValueType(0); |
11785 | 37.3k | if (TruncateType != SliceType && |
11786 | 289 | !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType)) |
11787 | 0 | return false; |
11788 | 37.3k | |
11789 | 37.3k | return true; |
11790 | 37.3k | } |
11791 | | |
11792 | | /// \brief Get the offset in bytes of this slice in the original chunk of |
11793 | | /// bits. |
11794 | | /// \pre DAG != nullptr. |
11795 | 154k | uint64_t getOffsetFromBase() const { |
11796 | 154k | assert(DAG && "Missing context."); |
11797 | 154k | bool IsBigEndian = DAG->getDataLayout().isBigEndian(); |
11798 | 154k | assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."); |
11799 | 154k | uint64_t Offset = Shift / 8; |
11800 | 154k | unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8; |
11801 | 154k | assert(!(Origin->getValueSizeInBits(0) & 0x7) && |
11802 | 154k | "The size of the original loaded type is not a multiple of a" |
11803 | 154k | " byte."); |
11804 | 154k | // If Offset is bigger than TySizeInBytes, it means we are loading all |
11805 | 154k | // zeros. This should have been optimized before in the process. |
11806 | 154k | assert(TySizeInBytes > Offset && |
11807 | 154k | "Invalid shift amount for given loaded size"); |
11808 | 154k | if (IsBigEndian) |
11809 | 86 | Offset = TySizeInBytes - Offset - getLoadedSize(); |
11810 | 154k | return Offset; |
11811 | 154k | } |
11812 | | |
11813 | | /// \brief Generate the sequence of instructions to load the slice |
11814 | | /// represented by this object and redirect the uses of this slice to |
11815 | | /// this new sequence of instructions. |
11816 | | /// \pre this->Inst && this->Origin are valid Instructions and this |
11817 | | /// object passed the legal check: LoadedSlice::isLegal returned true. |
11818 | | /// \return The last instruction of the sequence used to load the slice. |
11819 | 33.5k | SDValue loadSlice() const { |
11820 | 33.5k | assert(Inst && Origin && "Unable to replace a non-existing slice."); |
11821 | 33.5k | const SDValue &OldBaseAddr = Origin->getBasePtr(); |
11822 | 33.5k | SDValue BaseAddr = OldBaseAddr; |
11823 | 33.5k | // Get the offset in that chunk of bytes w.r.t. the endianness. |
11824 | 33.5k | int64_t Offset = static_cast<int64_t>(getOffsetFromBase()); |
11825 | 33.5k | assert(Offset >= 0 && "Offset too big to fit in int64_t!"); |
11826 | 33.5k | if (Offset33.5k ) { |
11827 | 16.7k | // BaseAddr = BaseAddr + Offset. |
11828 | 16.7k | EVT ArithType = BaseAddr.getValueType(); |
11829 | 16.7k | SDLoc DL(Origin); |
11830 | 16.7k | BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr, |
11831 | 16.7k | DAG->getConstant(Offset, DL, ArithType)); |
11832 | 16.7k | } |
11833 | 33.5k | |
11834 | 33.5k | // Create the type of the loaded slice according to its size. |
11835 | 33.5k | EVT SliceType = getLoadedType(); |
11836 | 33.5k | |
11837 | 33.5k | // Create the load for the slice. |
11838 | 33.5k | SDValue LastInst = |
11839 | 33.5k | DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, |
11840 | 33.5k | Origin->getPointerInfo().getWithOffset(Offset), |
11841 | 33.5k | getAlignment(), Origin->getMemOperand()->getFlags()); |
11842 | 33.5k | // If the final type is not the same as the loaded type, this means that |
11843 | 33.5k | // we have to pad with zero. Create a zero extend for that. |
11844 | 33.5k | EVT FinalType = Inst->getValueType(0); |
11845 | 33.5k | if (SliceType != FinalType) |
11846 | 1 | LastInst = |
11847 | 1 | DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst); |
11848 | 33.5k | return LastInst; |
11849 | 33.5k | } |
11850 | | |
11851 | | /// \brief Check if this slice can be merged with an expensive cross register |
11852 | | /// bank copy. E.g., |
11853 | | /// i = load i32 |
11854 | | /// f = bitcast i32 i to float |
11855 | 33.5k | bool canMergeExpensiveCrossRegisterBankCopy() const { |
11856 | 33.5k | if (!Inst || 33.5k !Inst->hasOneUse()33.5k ) |
11857 | 6.97k | return false; |
11858 | 26.6k | SDNode *Use = *Inst->use_begin(); |
11859 | 26.6k | if (Use->getOpcode() != ISD::BITCAST) |
11860 | 26.6k | return false; |
11861 | 26.6k | assert(DAG && "Missing context"); |
11862 | 4 | const TargetLowering &TLI = DAG->getTargetLoweringInfo(); |
11863 | 4 | EVT ResVT = Use->getValueType(0); |
11864 | 4 | const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT()); |
11865 | 4 | const TargetRegisterClass *ArgRC = |
11866 | 4 | TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT()); |
11867 | 4 | if (ArgRC == ResRC || 4 !TLI.isOperationLegal(ISD::LOAD, ResVT)4 ) |
11868 | 0 | return false; |
11869 | 4 | |
11870 | 4 | // At this point, we know that we perform a cross-register-bank copy. |
11871 | 4 | // Check if it is expensive. |
11872 | 4 | const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo(); |
11873 | 4 | // Assume bitcasts are cheap, unless both register classes do not |
11874 | 4 | // explicitly share a common sub class. |
11875 | 4 | if (!TRI || 4 TRI->getCommonSubClass(ArgRC, ResRC)4 ) |
11876 | 0 | return false; |
11877 | 4 | |
11878 | 4 | // Check if it will be merged with the load. |
11879 | 4 | // 1. Check the alignment constraint. |
11880 | 4 | unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment( |
11881 | 4 | ResVT.getTypeForEVT(*DAG->getContext())); |
11882 | 4 | |
11883 | 4 | if (RequiredAlignment > getAlignment()) |
11884 | 0 | return false; |
11885 | 4 | |
11886 | 4 | // 2. Check that the load is a legal operation for that type. |
11887 | 4 | if (4 !TLI.isOperationLegal(ISD::LOAD, ResVT)4 ) |
11888 | 0 | return false; |
11889 | 4 | |
11890 | 4 | // 3. Check that we do not have a zext in the way. |
11891 | 4 | if (4 Inst->getValueType(0) != getLoadedType()4 ) |
11892 | 0 | return false; |
11893 | 4 | |
11894 | 4 | return true; |
11895 | 4 | } |
11896 | | }; |
11897 | | |
11898 | | } // end anonymous namespace |
11899 | | |
11900 | | /// \brief Check that all bits set in \p UsedBits form a dense region, i.e., |
11901 | | /// \p UsedBits looks like 0..0 1..1 0..0. |
11902 | 33.5k | static bool areUsedBitsDense(const APInt &UsedBits) { |
11903 | 33.5k | // If all the bits are one, this is dense! |
11904 | 33.5k | if (UsedBits.isAllOnesValue()) |
11905 | 33.5k | return true; |
11906 | 2 | |
11907 | 2 | // Get rid of the unused bits on the right. |
11908 | 2 | APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros()); |
11909 | 2 | // Get rid of the unused bits on the left. |
11910 | 2 | if (NarrowedUsedBits.countLeadingZeros()) |
11911 | 1 | NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits()); |
11912 | 33.5k | // Check that the chunk of bits is completely used. |
11913 | 33.5k | return NarrowedUsedBits.isAllOnesValue(); |
11914 | 33.5k | } |
11915 | | |
11916 | | /// \brief Check whether or not \p First and \p Second are next to each other |
11917 | | /// in memory. This means that there is no hole between the bits loaded |
11918 | | /// by \p First and the bits loaded by \p Second. |
11919 | | static bool areSlicesNextToEachOther(const LoadedSlice &First, |
11920 | 16.7k | const LoadedSlice &Second) { |
11921 | 16.7k | assert(First.Origin == Second.Origin && First.Origin && |
11922 | 16.7k | "Unable to match different memory origins."); |
11923 | 16.7k | APInt UsedBits = First.getUsedBits(); |
11924 | 16.7k | assert((UsedBits & Second.getUsedBits()) == 0 && |
11925 | 16.7k | "Slices are not supposed to overlap."); |
11926 | 16.7k | UsedBits |= Second.getUsedBits(); |
11927 | 16.7k | return areUsedBitsDense(UsedBits); |
11928 | 16.7k | } |
11929 | | |
11930 | | /// \brief Adjust the \p GlobalLSCost according to the target |
11931 | | /// paring capabilities and the layout of the slices. |
11932 | | /// \pre \p GlobalLSCost should account for at least as many loads as |
11933 | | /// there is in the slices in \p LoadedSlices. |
11934 | | static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, |
11935 | 16.7k | LoadedSlice::Cost &GlobalLSCost) { |
11936 | 16.7k | unsigned NumberOfSlices = LoadedSlices.size(); |
11937 | 16.7k | // If there is less than 2 elements, no pairing is possible. |
11938 | 16.7k | if (NumberOfSlices < 2) |
11939 | 0 | return; |
11940 | 16.7k | |
11941 | 16.7k | // Sort the slices so that elements that are likely to be next to each |
11942 | 16.7k | // other in memory are next to each other in the list. |
11943 | 16.7k | std::sort(LoadedSlices.begin(), LoadedSlices.end(), |
11944 | 16.7k | [](const LoadedSlice &LHS, const LoadedSlice &RHS) { |
11945 | 16.7k | assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); |
11946 | 16.7k | return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); |
11947 | 16.7k | }); |
11948 | 16.7k | const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); |
11949 | 16.7k | // First (resp. Second) is the first (resp. Second) potentially candidate |
11950 | 16.7k | // to be placed in a paired load. |
11951 | 16.7k | const LoadedSlice *First = nullptr; |
11952 | 16.7k | const LoadedSlice *Second = nullptr; |
11953 | 50.3k | for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices50.3k ; ++CurrSlice, |
11954 | 33.5k | // Set the beginning of the pair. |
11955 | 16.7k | First = Second) { |
11956 | 33.5k | Second = &LoadedSlices[CurrSlice]; |
11957 | 33.5k | |
11958 | 33.5k | // If First is NULL, it means we start a new pair. |
11959 | 33.5k | // Get to the next slice. |
11960 | 33.5k | if (!First) |
11961 | 16.7k | continue; |
11962 | 16.7k | |
11963 | 16.7k | EVT LoadedType = First->getLoadedType(); |
11964 | 16.7k | |
11965 | 16.7k | // If the types of the slices are different, we cannot pair them. |
11966 | 16.7k | if (LoadedType != Second->getLoadedType()) |
11967 | 0 | continue; |
11968 | 16.7k | |
11969 | 16.7k | // Check if the target supplies paired loads for this type. |
11970 | 16.7k | unsigned RequiredAlignment = 0; |
11971 | 16.7k | if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)16.7k ) { |
11972 | 40 | // move to the next pair, this type is hopeless. |
11973 | 40 | Second = nullptr; |
11974 | 40 | continue; |
11975 | 40 | } |
11976 | 16.7k | // Check if we meet the alignment requirement. |
11977 | 16.7k | if (16.7k RequiredAlignment > First->getAlignment()16.7k ) |
11978 | 0 | continue; |
11979 | 16.7k | |
11980 | 16.7k | // Check that both loads are next to each other in memory. |
11981 | 16.7k | if (16.7k !areSlicesNextToEachOther(*First, *Second)16.7k ) |
11982 | 0 | continue; |
11983 | 16.7k | |
11984 | 16.7k | assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!"); |
11985 | 16.7k | --GlobalLSCost.Loads; |
11986 | 16.7k | // Move to the next pair. |
11987 | 16.7k | Second = nullptr; |
11988 | 16.7k | } |
11989 | 16.7k | } |
11990 | | |
11991 | | /// \brief Check the profitability of all involved LoadedSlice. |
11992 | | /// Currently, it is considered profitable if there is exactly two |
11993 | | /// involved slices (1) which are (2) next to each other in memory, and |
11994 | | /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3). |
11995 | | /// |
11996 | | /// Note: The order of the elements in \p LoadedSlices may be modified, but not |
11997 | | /// the elements themselves. |
11998 | | /// |
11999 | | /// FIXME: When the cost model will be mature enough, we can relax |
12000 | | /// constraints (1) and (2). |
12001 | | static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices, |
12002 | 17.2k | const APInt &UsedBits, bool ForCodeSize) { |
12003 | 17.2k | unsigned NumberOfSlices = LoadedSlices.size(); |
12004 | 17.2k | if (StressLoadSlicing) |
12005 | 2 | return NumberOfSlices > 1; |
12006 | 17.2k | |
12007 | 17.2k | // Check (1). |
12008 | 17.2k | if (17.2k NumberOfSlices != 217.2k ) |
12009 | 401 | return false; |
12010 | 16.8k | |
12011 | 16.8k | // Check (2). |
12012 | 16.8k | if (16.8k !areUsedBitsDense(UsedBits)16.8k ) |
12013 | 2 | return false; |
12014 | 16.7k | |
12015 | 16.7k | // Check (3). |
12016 | 16.7k | LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize); |
12017 | 16.7k | // The original code has one big load. |
12018 | 16.7k | OrigCost.Loads = 1; |
12019 | 50.3k | for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices50.3k ; ++CurrSlice33.5k ) { |
12020 | 33.5k | const LoadedSlice &LS = LoadedSlices[CurrSlice]; |
12021 | 33.5k | // Accumulate the cost of all the slices. |
12022 | 33.5k | LoadedSlice::Cost SliceCost(LS, ForCodeSize); |
12023 | 33.5k | GlobalSlicingCost += SliceCost; |
12024 | 33.5k | |
12025 | 33.5k | // Account as cost in the original configuration the gain obtained |
12026 | 33.5k | // with the current slices. |
12027 | 33.5k | OrigCost.addSliceGain(LS); |
12028 | 33.5k | } |
12029 | 17.2k | |
12030 | 17.2k | // If the target supports paired load, adjust the cost accordingly. |
12031 | 17.2k | adjustCostForPairing(LoadedSlices, GlobalSlicingCost); |
12032 | 17.2k | return OrigCost > GlobalSlicingCost; |
12033 | 17.2k | } |
12034 | | |
12035 | | /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr) |
12036 | | /// operations, split it in the various pieces being extracted. |
12037 | | /// |
12038 | | /// This sort of thing is introduced by SROA. |
12039 | | /// This slicing takes care not to insert overlapping loads. |
12040 | | /// \pre LI is a simple load (i.e., not an atomic or volatile load). |
12041 | 5.69M | bool DAGCombiner::SliceUpLoad(SDNode *N) { |
12042 | 5.69M | if (Level < AfterLegalizeDAG) |
12043 | 3.46M | return false; |
12044 | 2.23M | |
12045 | 2.23M | LoadSDNode *LD = cast<LoadSDNode>(N); |
12046 | 2.23M | if (LD->isVolatile() || 2.23M !ISD::isNormalLoad(LD)2.21M || |
12047 | 1.73M | !LD->getValueType(0).isInteger()) |
12048 | 611k | return false; |
12049 | 1.62M | |
12050 | 1.62M | // Keep track of already used bits to detect overlapping values. |
12051 | 1.62M | // In that case, we will just abort the transformation. |
12052 | 1.62M | APInt UsedBits(LD->getValueSizeInBits(0), 0); |
12053 | 1.62M | |
12054 | 1.62M | SmallVector<LoadedSlice, 4> LoadedSlices; |
12055 | 1.62M | |
12056 | 1.62M | // Check if this load is used as several smaller chunks of bits. |
12057 | 1.62M | // Basically, look for uses in trunc or trunc(lshr) and record a new chain |
12058 | 1.62M | // of computation for each trunc. |
12059 | 1.62M | for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); |
12060 | 2.05M | UI != UIEnd2.05M ; ++UI436k ) { |
12061 | 2.04M | // Skip the uses of the chain. |
12062 | 2.04M | if (UI.getUse().getResNo() != 0) |
12063 | 399k | continue; |
12064 | 1.64M | |
12065 | 1.64M | SDNode *User = *UI; |
12066 | 1.64M | unsigned Shift = 0; |
12067 | 1.64M | |
12068 | 1.64M | // Check if this is a trunc(lshr). |
12069 | 1.64M | if (User->getOpcode() == ISD::SRL && 1.64M User->hasOneUse()23.2k && |
12070 | 1.64M | isa<ConstantSDNode>(User->getOperand(1))22.9k ) { |
12071 | 18.6k | Shift = User->getConstantOperandVal(1); |
12072 | 18.6k | User = *User->use_begin(); |
12073 | 18.6k | } |
12074 | 1.64M | |
12075 | 1.64M | // At this point, User is a Truncate, iff we encountered, trunc or |
12076 | 1.64M | // trunc(lshr). |
12077 | 1.64M | if (User->getOpcode() != ISD::TRUNCATE) |
12078 | 1.60M | return false; |
12079 | 37.7k | |
12080 | 37.7k | // The width of the type must be a power of 2 and greater than 8-bits. |
12081 | 37.7k | // Otherwise the load cannot be represented in LLVM IR. |
12082 | 37.7k | // Moreover, if we shifted with a non-8-bits multiple, the slice |
12083 | 37.7k | // will be across several bytes. We do not support that. |
12084 | 37.7k | unsigned Width = User->getValueSizeInBits(0); |
12085 | 37.7k | if (Width < 8 || 37.7k !isPowerOf2_32(Width)37.7k || (Shift & 0x7)37.7k ) |
12086 | 46 | return false; |
12087 | 37.7k | |
12088 | 37.7k | // Build the slice for this chain of computations. |
12089 | 37.7k | LoadedSlice LS(User, LD, Shift, &DAG); |
12090 | 37.7k | APInt CurrentUsedBits = LS.getUsedBits(); |
12091 | 37.7k | |
12092 | 37.7k | // Check if this slice overlaps with another. |
12093 | 37.7k | if ((CurrentUsedBits & UsedBits) != 0) |
12094 | 15 | return false; |
12095 | 37.7k | // Update the bits used globally. |
12096 | 37.7k | UsedBits |= CurrentUsedBits; |
12097 | 37.7k | |
12098 | 37.7k | // Check if the new slice would be legal. |
12099 | 37.7k | if (!LS.isLegal()) |
12100 | 352 | return false; |
12101 | 37.3k | |
12102 | 37.3k | // Record the slice. |
12103 | 37.3k | LoadedSlices.push_back(LS); |
12104 | 37.3k | } |
12105 | 1.62M | |
12106 | 1.62M | // Abort slicing if it does not seem to be profitable. |
12107 | 17.2k | if (17.2k !isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize)17.2k ) |
12108 | 442 | return false; |
12109 | 16.7k | |
12110 | 16.7k | ++SlicedLoads; |
12111 | 16.7k | |
12112 | 16.7k | // Rewrite each chain to use an independent load. |
12113 | 16.7k | // By construction, each chain can be represented by a unique load. |
12114 | 16.7k | |
12115 | 16.7k | // Prepare the argument for the new token factor for all the slices. |
12116 | 16.7k | SmallVector<SDValue, 8> ArgChains; |
12117 | 16.7k | for (SmallVectorImpl<LoadedSlice>::const_iterator |
12118 | 16.7k | LSIt = LoadedSlices.begin(), |
12119 | 16.7k | LSItEnd = LoadedSlices.end(); |
12120 | 50.2k | LSIt != LSItEnd50.2k ; ++LSIt33.5k ) { |
12121 | 33.5k | SDValue SliceInst = LSIt->loadSlice(); |
12122 | 33.5k | CombineTo(LSIt->Inst, SliceInst, true); |
12123 | 33.5k | if (SliceInst.getOpcode() != ISD::LOAD) |
12124 | 1 | SliceInst = SliceInst.getOperand(0); |
12125 | 33.5k | assert(SliceInst->getOpcode() == ISD::LOAD && |
12126 | 33.5k | "It takes more than a zext to get to the loaded slice!!"); |
12127 | 33.5k | ArgChains.push_back(SliceInst.getValue(1)); |
12128 | 33.5k | } |
12129 | 5.69M | |
12130 | 5.69M | SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, |
12131 | 5.69M | ArgChains); |
12132 | 5.69M | DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); |
12133 | 5.69M | AddToWorklist(Chain.getNode()); |
12134 | 5.69M | return true; |
12135 | 5.69M | } |
12136 | | |
12137 | | /// Check to see if V is (and load (ptr), imm), where the load is having |
12138 | | /// specific bytes cleared out. If so, return the byte size being masked out |
12139 | | /// and the shift amount. |
12140 | | static std::pair<unsigned, unsigned> |
12141 | 30.3k | CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { |
12142 | 30.3k | std::pair<unsigned, unsigned> Result(0, 0); |
12143 | 30.3k | |
12144 | 30.3k | // Check for the structure we're looking for. |
12145 | 30.3k | if (V->getOpcode() != ISD::AND || |
12146 | 10.7k | !isa<ConstantSDNode>(V->getOperand(1)) || |
12147 | 9.42k | !ISD::isNormalLoad(V->getOperand(0).getNode())) |
12148 | 23.6k | return Result; |
12149 | 6.76k | |
12150 | 6.76k | // Check the chain and pointer. |
12151 | 6.76k | LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0)); |
12152 | 6.76k | if (LD->getBasePtr() != Ptr6.76k ) return Result2.98k ; // Not from same pointer. |
12153 | 3.78k | |
12154 | 3.78k | // The store should be chained directly to the load or be an operand of a |
12155 | 3.78k | // tokenfactor. |
12156 | 3.78k | if (3.78k LD == Chain.getNode()3.78k ) |
12157 | 1.92k | ; // ok. |
12158 | 1.86k | else if (1.86k Chain->getOpcode() != ISD::TokenFactor1.86k ) |
12159 | 6 | return Result; // Fail. |
12160 | 1.85k | else { |
12161 | 1.85k | bool isOk = false; |
12162 | 1.85k | for (const SDValue &ChainOp : Chain->op_values()) |
12163 | 4.21k | if (4.21k ChainOp.getNode() == LD4.21k ) { |
12164 | 1.85k | isOk = true; |
12165 | 1.85k | break; |
12166 | 1.85k | } |
12167 | 1.85k | if (!isOk1.85k ) return Result0 ; |
12168 | 3.77k | } |
12169 | 3.77k | |
12170 | 3.77k | // This only handles simple types. |
12171 | 3.77k | if (3.77k V.getValueType() != MVT::i16 && |
12172 | 3.19k | V.getValueType() != MVT::i32 && |
12173 | 325 | V.getValueType() != MVT::i64) |
12174 | 266 | return Result; |
12175 | 3.51k | |
12176 | 3.51k | // Check the constant mask. Invert it so that the bits being masked out are |
12177 | 3.51k | // 0 and the bits being kept are 1. Use getSExtValue so that leading bits |
12178 | 3.51k | // follow the sign bit for uniformity. |
12179 | 3.51k | uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); |
12180 | 3.51k | unsigned NotMaskLZ = countLeadingZeros(NotMask); |
12181 | 3.51k | if (NotMaskLZ & 73.51k ) return Result1.85k ; // Must be multiple of a byte. |
12182 | 1.66k | unsigned NotMaskTZ = countTrailingZeros(NotMask); |
12183 | 1.66k | if (NotMaskTZ & 71.66k ) return Result614 ; // Must be multiple of a byte. |
12184 | 1.04k | if (1.04k NotMaskLZ == 641.04k ) return Result0 ; // All zero mask. |
12185 | 1.04k | |
12186 | 1.04k | // See if we have a continuous run of bits. If so, we have 0*1+0* |
12187 | 1.04k | if (1.04k countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 641.04k ) |
12188 | 38 | return Result; |
12189 | 1.00k | |
12190 | 1.00k | // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. |
12191 | 1.00k | if (1.00k V.getValueType() != MVT::i64 && 1.00k NotMaskLZ994 ) |
12192 | 787 | NotMaskLZ -= 64-V.getValueSizeInBits(); |
12193 | 1.00k | |
12194 | 1.00k | unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; |
12195 | 1.00k | switch (MaskedBytes) { |
12196 | 1.00k | case 1: |
12197 | 1.00k | case 2: |
12198 | 1.00k | case 4: break; |
12199 | 2 | default: return Result; // All one mask, or 5-byte mask. |
12200 | 1.00k | } |
12201 | 1.00k | |
12202 | 1.00k | // Verify that the first bit starts at a multiple of mask so that the access |
12203 | 1.00k | // is aligned the same as the access width. |
12204 | 1.00k | if (1.00k NotMaskTZ && 1.00k NotMaskTZ/8 % MaskedBytes306 ) return Result0 ; |
12205 | 1.00k | |
12206 | 1.00k | Result.first = MaskedBytes; |
12207 | 1.00k | Result.second = NotMaskTZ/8; |
12208 | 1.00k | return Result; |
12209 | 1.00k | } |
12210 | | |
12211 | | /// Check to see if IVal is something that provides a value as specified by |
12212 | | /// MaskInfo. If so, replace the specified store with a narrower store of |
12213 | | /// truncated IVal. |
12214 | | static SDNode * |
12215 | | ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, |
12216 | | SDValue IVal, StoreSDNode *St, |
12217 | 1.00k | DAGCombiner *DC) { |
12218 | 1.00k | unsigned NumBytes = MaskInfo.first; |
12219 | 1.00k | unsigned ByteShift = MaskInfo.second; |
12220 | 1.00k | SelectionDAG &DAG = DC->getDAG(); |
12221 | 1.00k | |
12222 | 1.00k | // Check to see if IVal is all zeros in the part being masked in by the 'or' |
12223 | 1.00k | // that uses this. If not, this is not a replacement. |
12224 | 1.00k | APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), |
12225 | 1.00k | ByteShift*8, (ByteShift+NumBytes)*8); |
12226 | 1.00k | if (!DAG.MaskedValueIsZero(IVal, Mask)1.00k ) return nullptr2 ; |
12227 | 1.00k | |
12228 | 1.00k | // Check that it is legal on the target to do this. It is legal if the new |
12229 | 1.00k | // VT we're shrinking to (i8/i16/i32) is legal or we're still before type |
12230 | 1.00k | // legalization. |
12231 | 1.00k | MVT VT = MVT::getIntegerVT(NumBytes*8); |
12232 | 1.00k | if (!DC->isTypeLegal(VT)) |
12233 | 966 | return nullptr; |
12234 | 38 | |
12235 | 38 | // Okay, we can do this! Replace the 'St' store with a store of IVal that is |
12236 | 38 | // shifted by ByteShift and truncated down to NumBytes. |
12237 | 38 | if (38 ByteShift38 ) { |
12238 | 26 | SDLoc DL(IVal); |
12239 | 26 | IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal, |
12240 | 26 | DAG.getConstant(ByteShift*8, DL, |
12241 | 26 | DC->getShiftAmountTy(IVal.getValueType()))); |
12242 | 26 | } |
12243 | 38 | |
12244 | 38 | // Figure out the offset for the store and the alignment of the access. |
12245 | 38 | unsigned StOffset; |
12246 | 38 | unsigned NewAlign = St->getAlignment(); |
12247 | 38 | |
12248 | 38 | if (DAG.getDataLayout().isLittleEndian()) |
12249 | 38 | StOffset = ByteShift; |
12250 | 38 | else |
12251 | 0 | StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; |
12252 | 38 | |
12253 | 38 | SDValue Ptr = St->getBasePtr(); |
12254 | 38 | if (StOffset38 ) { |
12255 | 26 | SDLoc DL(IVal); |
12256 | 26 | Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), |
12257 | 26 | Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType())); |
12258 | 26 | NewAlign = MinAlign(NewAlign, StOffset); |
12259 | 26 | } |
12260 | 1.00k | |
12261 | 1.00k | // Truncate down to the new size. |
12262 | 1.00k | IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal); |
12263 | 1.00k | |
12264 | 1.00k | ++OpsNarrowed; |
12265 | 1.00k | return DAG |
12266 | 1.00k | .getStore(St->getChain(), SDLoc(St), IVal, Ptr, |
12267 | 1.00k | St->getPointerInfo().getWithOffset(StOffset), NewAlign) |
12268 | 1.00k | .getNode(); |
12269 | 1.00k | } |
12270 | | |
12271 | | /// Look for sequence of load / op / store where op is one of 'or', 'xor', and |
12272 | | /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try |
12273 | | /// narrowing the load and store if it would end up being a win for performance |
12274 | | /// or code size. |
12275 | 8.29M | SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { |
12276 | 8.29M | StoreSDNode *ST = cast<StoreSDNode>(N); |
12277 | 8.29M | if (ST->isVolatile()) |
12278 | 38.6k | return SDValue(); |
12279 | 8.25M | |
12280 | 8.25M | SDValue Chain = ST->getChain(); |
12281 | 8.25M | SDValue Value = ST->getValue(); |
12282 | 8.25M | SDValue Ptr = ST->getBasePtr(); |
12283 | 8.25M | EVT VT = Value.getValueType(); |
12284 | 8.25M | |
12285 | 8.25M | if (ST->isTruncatingStore() || 8.25M VT.isVector()7.32M || !Value.hasOneUse()4.35M ) |
12286 | 5.63M | return SDValue(); |
12287 | 2.61M | |
12288 | 2.61M | unsigned Opc = Value.getOpcode(); |
12289 | 2.61M | |
12290 | 2.61M | // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst |
12291 | 2.61M | // is a byte mask indicating a consecutive number of bytes, check to see if |
12292 | 2.61M | // Y is known to provide just those bytes. If so, we try to replace the |
12293 | 2.61M | // load + replace + store sequence with a single (narrower) store, which makes |
12294 | 2.61M | // the load dead. |
12295 | 2.61M | if (Opc == ISD::OR2.61M ) { |
12296 | 15.2k | std::pair<unsigned, unsigned> MaskedLoad; |
12297 | 15.2k | MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); |
12298 | 15.2k | if (MaskedLoad.first) |
12299 | 1.00k | if (SDNode *1.00k NewST1.00k = ShrinkLoadReplaceStoreWithStore(MaskedLoad, |
12300 | 1.00k | Value.getOperand(1), ST,this)) |
12301 | 33 | return SDValue(NewST, 0); |
12302 | 15.1k | |
12303 | 15.1k | // Or is commutative, so try swapping X and Y. |
12304 | 15.1k | MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); |
12305 | 15.1k | if (MaskedLoad.first) |
12306 | 5 | if (SDNode *5 NewST5 = ShrinkLoadReplaceStoreWithStore(MaskedLoad, |
12307 | 5 | Value.getOperand(0), ST,this)) |
12308 | 5 | return SDValue(NewST, 0); |
12309 | 2.61M | } |
12310 | 2.61M | |
12311 | 2.61M | if (2.61M (Opc != ISD::OR && 2.61M Opc != ISD::XOR2.60M && Opc != ISD::AND2.59M ) || |
12312 | 28.5k | Value.getOperand(1).getOpcode() != ISD::Constant) |
12313 | 2.60M | return SDValue(); |
12314 | 14.1k | |
12315 | 14.1k | SDValue N0 = Value.getOperand(0); |
12316 | 14.1k | if (ISD::isNormalLoad(N0.getNode()) && 14.1k N0.hasOneUse()4.36k && |
12317 | 14.1k | Chain == SDValue(N0.getNode(), 1)4.23k ) { |
12318 | 2.58k | LoadSDNode *LD = cast<LoadSDNode>(N0); |
12319 | 2.58k | if (LD->getBasePtr() != Ptr || |
12320 | 2.28k | LD->getPointerInfo().getAddrSpace() != |
12321 | 2.28k | ST->getPointerInfo().getAddrSpace()) |
12322 | 306 | return SDValue(); |
12323 | 2.28k | |
12324 | 2.28k | // Find the type to narrow it the load / op / store to. |
12325 | 2.28k | SDValue N1 = Value.getOperand(1); |
12326 | 2.28k | unsigned BitWidth = N1.getValueSizeInBits(); |
12327 | 2.28k | APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); |
12328 | 2.28k | if (Opc == ISD::AND) |
12329 | 680 | Imm ^= APInt::getAllOnesValue(BitWidth); |
12330 | 2.28k | if (Imm == 0 || 2.28k Imm.isAllOnesValue()2.28k ) |
12331 | 8 | return SDValue(); |
12332 | 2.27k | unsigned ShAmt = Imm.countTrailingZeros(); |
12333 | 2.27k | unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; |
12334 | 2.27k | unsigned NewBW = NextPowerOf2(MSB - ShAmt); |
12335 | 2.27k | EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); |
12336 | 2.27k | // The narrowing should be profitable, the load/store operation should be |
12337 | 2.27k | // legal (or custom) and the store size should be equal to the NewVT width. |
12338 | 10.4k | while (NewBW < BitWidth && |
12339 | 8.23k | (NewVT.getStoreSizeInBits() != NewBW || |
12340 | 2.73k | !TLI.isOperationLegalOrCustom(Opc, NewVT) || |
12341 | 10.4k | !TLI.isNarrowingProfitable(VT, NewVT)83 )) { |
12342 | 8.20k | NewBW = NextPowerOf2(NewBW); |
12343 | 8.20k | NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); |
12344 | 8.20k | } |
12345 | 2.27k | if (NewBW >= BitWidth) |
12346 | 2.24k | return SDValue(); |
12347 | 27 | |
12348 | 27 | // If the lsb changed does not start at the type bitwidth boundary, |
12349 | 27 | // start at the previous one. |
12350 | 27 | if (27 ShAmt % NewBW27 ) |
12351 | 18 | ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; |
12352 | 27 | APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, |
12353 | 27 | std::min(BitWidth, ShAmt + NewBW)); |
12354 | 27 | if ((Imm & Mask) == Imm27 ) { |
12355 | 20 | APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); |
12356 | 20 | if (Opc == ISD::AND) |
12357 | 12 | NewImm ^= APInt::getAllOnesValue(NewBW); |
12358 | 20 | uint64_t PtrOff = ShAmt / 8; |
12359 | 20 | // For big endian targets, we need to adjust the offset to the pointer to |
12360 | 20 | // load the correct bytes. |
12361 | 20 | if (DAG.getDataLayout().isBigEndian()) |
12362 | 0 | PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; |
12363 | 20 | |
12364 | 20 | unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); |
12365 | 20 | Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); |
12366 | 20 | if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy)) |
12367 | 0 | return SDValue(); |
12368 | 20 | |
12369 | 20 | SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), |
12370 | 20 | Ptr.getValueType(), Ptr, |
12371 | 20 | DAG.getConstant(PtrOff, SDLoc(LD), |
12372 | 20 | Ptr.getValueType())); |
12373 | 20 | SDValue NewLD = |
12374 | 20 | DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr, |
12375 | 20 | LD->getPointerInfo().getWithOffset(PtrOff), NewAlign, |
12376 | 20 | LD->getMemOperand()->getFlags(), LD->getAAInfo()); |
12377 | 20 | SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, |
12378 | 20 | DAG.getConstant(NewImm, SDLoc(Value), |
12379 | 20 | NewVT)); |
12380 | 20 | SDValue NewST = |
12381 | 20 | DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr, |
12382 | 20 | ST->getPointerInfo().getWithOffset(PtrOff), NewAlign); |
12383 | 20 | |
12384 | 20 | AddToWorklist(NewPtr.getNode()); |
12385 | 20 | AddToWorklist(NewLD.getNode()); |
12386 | 20 | AddToWorklist(NewVal.getNode()); |
12387 | 20 | WorklistRemover DeadNodes(*this); |
12388 | 20 | DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); |
12389 | 20 | ++OpsNarrowed; |
12390 | 20 | return NewST; |
12391 | 20 | } |
12392 | 2.58k | } |
12393 | 11.5k | |
12394 | 11.5k | return SDValue(); |
12395 | 11.5k | } |
12396 | | |
12397 | | /// For a given floating point load / store pair, if the load value isn't used |
12398 | | /// by any other operations, then consider transforming the pair to integer |
12399 | | /// load / store operations if the target deems the transformation profitable. |
12400 | 8.90M | SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { |
12401 | 8.90M | StoreSDNode *ST = cast<StoreSDNode>(N); |
12402 | 8.90M | SDValue Chain = ST->getChain(); |
12403 | 8.90M | SDValue Value = ST->getValue(); |
12404 | 8.90M | if (ISD::isNormalStore(ST) && 8.90M ISD::isNormalLoad(Value.getNode())7.92M && |
12405 | 697k | Value.hasOneUse() && |
12406 | 8.90M | Chain == SDValue(Value.getNode(), 1)503k ) { |
12407 | 180k | LoadSDNode *LD = cast<LoadSDNode>(Value); |
12408 | 180k | EVT VT = LD->getMemoryVT(); |
12409 | 180k | if (!VT.isFloatingPoint() || |
12410 | 1.00k | VT != ST->getMemoryVT() || |
12411 | 1.00k | LD->isNonTemporal() || |
12412 | 1.00k | ST->isNonTemporal() || |
12413 | 1.00k | LD->getPointerInfo().getAddrSpace() != 0 || |
12414 | 653 | ST->getPointerInfo().getAddrSpace() != 0) |
12415 | 179k | return SDValue(); |
12416 | 600 | |
12417 | 600 | EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); |
12418 | 600 | if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || |
12419 | 256 | !TLI.isOperationLegal(ISD::STORE, IntVT) || |
12420 | 256 | !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || |
12421 | 3 | !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) |
12422 | 597 | return SDValue(); |
12423 | 3 | |
12424 | 3 | unsigned LDAlign = LD->getAlignment(); |
12425 | 3 | unsigned STAlign = ST->getAlignment(); |
12426 | 3 | Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); |
12427 | 3 | unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy); |
12428 | 3 | if (LDAlign < ABIAlign || 3 STAlign < ABIAlign3 ) |
12429 | 0 | return SDValue(); |
12430 | 3 | |
12431 | 3 | SDValue NewLD = |
12432 | 3 | DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(), |
12433 | 3 | LD->getPointerInfo(), LDAlign); |
12434 | 3 | |
12435 | 3 | SDValue NewST = |
12436 | 3 | DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(), |
12437 | 3 | ST->getPointerInfo(), STAlign); |
12438 | 3 | |
12439 | 3 | AddToWorklist(NewLD.getNode()); |
12440 | 3 | AddToWorklist(NewST.getNode()); |
12441 | 3 | WorklistRemover DeadNodes(*this); |
12442 | 3 | DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1)); |
12443 | 3 | ++LdStFP2Int; |
12444 | 3 | return NewST; |
12445 | 3 | } |
12446 | 8.72M | |
12447 | 8.72M | return SDValue(); |
12448 | 8.72M | } |
12449 | | |
12450 | | // This is a helper function for visitMUL to check the profitability |
12451 | | // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). |
12452 | | // MulNode is the original multiply, AddNode is (add x, c1), |
12453 | | // and ConstNode is c2. |
12454 | | // |
12455 | | // If the (add x, c1) has multiple uses, we could increase |
12456 | | // the number of adds if we make this transformation. |
12457 | | // It would only be worth doing this if we can remove a |
12458 | | // multiply in the process. Check for that here. |
12459 | | // To illustrate: |
12460 | | // (A + c1) * c3 |
12461 | | // (A + c2) * c3 |
12462 | | // We're checking for cases where we have common "c3 * A" expressions. |
12463 | | bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, |
12464 | | SDValue &AddNode, |
12465 | 977 | SDValue &ConstNode) { |
12466 | 977 | APInt Val; |
12467 | 977 | |
12468 | 977 | // If the add only has one use, this would be OK to do. |
12469 | 977 | if (AddNode.getNode()->hasOneUse()) |
12470 | 509 | return true; |
12471 | 468 | |
12472 | 468 | // Walk all the users of the constant with which we're multiplying. |
12473 | 468 | for (SDNode *Use : ConstNode->uses()) 468 { |
12474 | 1.78k | if (Use == MulNode) // This use is the one we're on right now. Skip it. |
12475 | 452 | continue; |
12476 | 1.32k | |
12477 | 1.32k | if (1.32k Use->getOpcode() == ISD::MUL1.32k ) { // We have another multiply use. |
12478 | 1.20k | SDNode *OtherOp; |
12479 | 1.20k | SDNode *MulVar = AddNode.getOperand(0).getNode(); |
12480 | 1.20k | |
12481 | 1.20k | // OtherOp is what we're multiplying against the constant. |
12482 | 1.20k | if (Use->getOperand(0) == ConstNode) |
12483 | 0 | OtherOp = Use->getOperand(1).getNode(); |
12484 | 1.20k | else |
12485 | 1.20k | OtherOp = Use->getOperand(0).getNode(); |
12486 | 1.20k | |
12487 | 1.20k | // Check to see if multiply is with the same operand of our "add". |
12488 | 1.20k | // |
12489 | 1.20k | // ConstNode = CONST |
12490 | 1.20k | // Use = ConstNode * A <-- visiting Use. OtherOp is A. |
12491 | 1.20k | // ... |
12492 | 1.20k | // AddNode = (A + c1) <-- MulVar is A. |
12493 | 1.20k | // = AddNode * ConstNode <-- current visiting instruction. |
12494 | 1.20k | // |
12495 | 1.20k | // If we make this transformation, we will have a common |
12496 | 1.20k | // multiply (ConstNode * A) that we can save. |
12497 | 1.20k | if (OtherOp == MulVar) |
12498 | 30 | return true; |
12499 | 1.17k | |
12500 | 1.17k | // Now check to see if a future expansion will give us a common |
12501 | 1.17k | // multiply. |
12502 | 1.17k | // |
12503 | 1.17k | // ConstNode = CONST |
12504 | 1.17k | // AddNode = (A + c1) |
12505 | 1.17k | // ... = AddNode * ConstNode <-- current visiting instruction. |
12506 | 1.17k | // ... |
12507 | 1.17k | // OtherOp = (A + c2) |
12508 | 1.17k | // Use = OtherOp * ConstNode <-- visiting Use. |
12509 | 1.17k | // |
12510 | 1.17k | // If we make this transformation, we will have a common |
12511 | 1.17k | // multiply (CONST * A) after we also do the same transformation |
12512 | 1.17k | // to the "t2" instruction. |
12513 | 1.17k | if (1.17k OtherOp->getOpcode() == ISD::ADD && |
12514 | 1.01k | DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) && |
12515 | 1.00k | OtherOp->getOperand(0).getNode() == MulVar) |
12516 | 2 | return true; |
12517 | 436 | } |
12518 | 1.78k | } |
12519 | 436 | |
12520 | 436 | // Didn't find a case where this would be profitable. |
12521 | 436 | return false; |
12522 | 436 | } |
12523 | | |
12524 | 59.3M | static SDValue peekThroughBitcast(SDValue V) { |
12525 | 59.4M | while (V.getOpcode() == ISD::BITCAST) |
12526 | 84.1k | V = V.getOperand(0); |
12527 | 59.3M | return V; |
12528 | 59.3M | } |
12529 | | |
12530 | | SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, |
12531 | 117k | unsigned NumStores) { |
12532 | 117k | SmallVector<SDValue, 8> Chains; |
12533 | 117k | SmallPtrSet<const SDNode *, 8> Visited; |
12534 | 117k | SDLoc StoreDL(StoreNodes[0].MemNode); |
12535 | 117k | |
12536 | 354k | for (unsigned i = 0; i < NumStores354k ; ++i236k ) { |
12537 | 236k | Visited.insert(StoreNodes[i].MemNode); |
12538 | 236k | } |
12539 | 117k | |
12540 | 117k | // don't include nodes that are children |
12541 | 354k | for (unsigned i = 0; i < NumStores354k ; ++i236k ) { |
12542 | 236k | if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0) |
12543 | 236k | Chains.push_back(StoreNodes[i].MemNode->getChain()); |
12544 | 236k | } |
12545 | 117k | |
12546 | 117k | assert(Chains.size() > 0 && "Chain should have generated a chain"); |
12547 | 117k | return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains); |
12548 | 117k | } |
12549 | | |
12550 | | bool DAGCombiner::MergeStoresOfConstantsOrVecElts( |
12551 | | SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores, |
12552 | 114k | bool IsConstantSrc, bool UseVector, bool UseTrunc) { |
12553 | 114k | // Make sure we have something to merge. |
12554 | 114k | if (NumStores < 2) |
12555 | 0 | return false; |
12556 | 114k | |
12557 | 114k | // The latest Node in the DAG. |
12558 | 114k | SDLoc DL(StoreNodes[0].MemNode); |
12559 | 114k | |
12560 | 114k | int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; |
12561 | 114k | unsigned SizeInBits = NumStores * ElementSizeBytes * 8; |
12562 | 114k | unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements()11 : 1114k ; |
12563 | 114k | |
12564 | 114k | EVT StoreTy; |
12565 | 114k | if (UseVector114k ) { |
12566 | 28.7k | unsigned Elts = NumStores * NumMemElts; |
12567 | 28.7k | // Get the type for the merged vector store. |
12568 | 28.7k | StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); |
12569 | 28.7k | } else |
12570 | 86.0k | StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); |
12571 | 114k | |
12572 | 114k | SDValue StoredVal; |
12573 | 114k | if (UseVector114k ) { |
12574 | 28.7k | if (IsConstantSrc28.7k ) { |
12575 | 28.5k | SmallVector<SDValue, 8> BuildVector; |
12576 | 85.8k | for (unsigned I = 0; I != NumStores85.8k ; ++I57.2k ) { |
12577 | 57.2k | StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode); |
12578 | 57.2k | SDValue Val = St->getValue(); |
12579 | 57.2k | // If constant is of the wrong type, convert it now. |
12580 | 57.2k | if (MemVT != Val.getValueType()57.2k ) { |
12581 | 60 | Val = peekThroughBitcast(Val); |
12582 | 60 | // Deal with constants of wrong size. |
12583 | 60 | if (ElementSizeBytes * 8 != Val.getValueSizeInBits()60 ) { |
12584 | 0 | EVT IntMemVT = |
12585 | 0 | EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); |
12586 | 0 | if (auto *CFP = dyn_cast<ConstantFPSDNode>(Val)) |
12587 | 0 | Val = DAG.getConstant( |
12588 | 0 | CFP->getValueAPF().bitcastToAPInt().zextOrTrunc( |
12589 | 0 | 8 * ElementSizeBytes), |
12590 | 0 | SDLoc(CFP), IntMemVT); |
12591 | 0 | else if (auto *0 C0 = dyn_cast<ConstantSDNode>(Val)) |
12592 | 0 | Val = DAG.getConstant( |
12593 | 0 | C->getAPIntValue().zextOrTrunc(8 * ElementSizeBytes), |
12594 | 0 | SDLoc(C), IntMemVT); |
12595 | 0 | } |
12596 | 60 | // Make sure correctly size type is the correct type. |
12597 | 60 | Val = DAG.getBitcast(MemVT, Val); |
12598 | 60 | } |
12599 | 57.2k | BuildVector.push_back(Val); |
12600 | 57.2k | } |
12601 | 0 | StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS |
12602 | 28.5k | : ISD::BUILD_VECTOR, |
12603 | 28.5k | DL, StoreTy, BuildVector); |
12604 | 28.7k | } else { |
12605 | 129 | SmallVector<SDValue, 8> Ops; |
12606 | 449 | for (unsigned i = 0; i < NumStores449 ; ++i320 ) { |
12607 | 320 | StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); |
12608 | 320 | SDValue Val = peekThroughBitcast(St->getValue()); |
12609 | 320 | // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of |
12610 | 320 | // type MemVT. If the underlying value is not the correct |
12611 | 320 | // type, but it is an extraction of an appropriate vector we |
12612 | 320 | // can recast Val to be of the correct type. This may require |
12613 | 320 | // converting between EXTRACT_VECTOR_ELT and |
12614 | 320 | // EXTRACT_SUBVECTOR. |
12615 | 320 | if ((MemVT != Val.getValueType()) && |
12616 | 2 | (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT || |
12617 | 320 | Val.getOpcode() == ISD::EXTRACT_SUBVECTOR0 )) { |
12618 | 2 | SDValue Vec = Val.getOperand(0); |
12619 | 2 | EVT MemVTScalarTy = MemVT.getScalarType(); |
12620 | 2 | // We may need to add a bitcast here to get types to line up. |
12621 | 2 | if (MemVTScalarTy != Vec.getValueType()2 ) { |
12622 | 2 | unsigned Elts = Vec.getValueType().getSizeInBits() / |
12623 | 2 | MemVTScalarTy.getSizeInBits(); |
12624 | 2 | EVT NewVecTy = |
12625 | 2 | EVT::getVectorVT(*DAG.getContext(), MemVTScalarTy, Elts); |
12626 | 2 | Vec = DAG.getBitcast(NewVecTy, Vec); |
12627 | 2 | } |
12628 | 0 | auto OpC = (MemVT.isVector()) ? ISD::EXTRACT_SUBVECTOR |
12629 | 2 | : ISD::EXTRACT_VECTOR_ELT; |
12630 | 2 | Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Val.getOperand(1)); |
12631 | 2 | } |
12632 | 320 | Ops.push_back(Val); |
12633 | 320 | } |
12634 | 129 | |
12635 | 129 | // Build the extracted vector elements back into a vector. |
12636 | 11 | StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS |
12637 | 118 | : ISD::BUILD_VECTOR, |
12638 | 129 | DL, StoreTy, Ops); |
12639 | 129 | } |
12640 | 114k | } else { |
12641 | 86.0k | // We should always use a vector store when merging extracted vector |
12642 | 86.0k | // elements, so this path implies a store of constants. |
12643 | 86.0k | assert(IsConstantSrc && "Merged vector elements should use vector store"); |
12644 | 86.0k | |
12645 | 86.0k | APInt StoreInt(SizeInBits, 0); |
12646 | 86.0k | |
12647 | 86.0k | // Construct a single integer constant which is made of the smaller |
12648 | 86.0k | // constant inputs. |
12649 | 86.0k | bool IsLE = DAG.getDataLayout().isLittleEndian(); |
12650 | 258k | for (unsigned i = 0; i < NumStores258k ; ++i172k ) { |
12651 | 172k | unsigned Idx = IsLE ? (NumStores - 1 - i)172k : i24 ; |
12652 | 172k | StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); |
12653 | 172k | |
12654 | 172k | SDValue Val = St->getValue(); |
12655 | 172k | StoreInt <<= ElementSizeBytes * 8; |
12656 | 172k | if (ConstantSDNode *C172k = dyn_cast<ConstantSDNode>(Val)) { |
12657 | 172k | StoreInt |= C->getAPIntValue().zextOrTrunc(SizeInBits); |
12658 | 172k | } else if (ConstantFPSDNode *146 C146 = dyn_cast<ConstantFPSDNode>(Val)) { |
12659 | 146 | StoreInt |= C->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits); |
12660 | 146 | } else { |
12661 | 0 | llvm_unreachable("Invalid constant element type"); |
12662 | 146 | } |
12663 | 172k | } |
12664 | 86.0k | |
12665 | 86.0k | // Create the new Load and Store operations. |
12666 | 86.0k | StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); |
12667 | 86.0k | } |
12668 | 114k | |
12669 | 114k | LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; |
12670 | 114k | SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores); |
12671 | 114k | |
12672 | 114k | // make sure we use trunc store if it's necessary to be legal. |
12673 | 114k | SDValue NewStore; |
12674 | 114k | if (!UseTrunc114k ) { |
12675 | 106k | NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), |
12676 | 106k | FirstInChain->getPointerInfo(), |
12677 | 106k | FirstInChain->getAlignment()); |
12678 | 114k | } else { // Must be realized as a trunc store |
12679 | 8.61k | EVT LegalizedStoredValueTy = |
12680 | 8.61k | TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); |
12681 | 8.61k | unsigned LegalizedStoreSize = LegalizedStoredValueTy.getSizeInBits(); |
12682 | 8.61k | ConstantSDNode *C = cast<ConstantSDNode>(StoredVal); |
12683 | 8.61k | SDValue ExtendedStoreVal = |
12684 | 8.61k | DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL, |
12685 | 8.61k | LegalizedStoredValueTy); |
12686 | 8.61k | NewStore = DAG.getTruncStore( |
12687 | 8.61k | NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(), |
12688 | 8.61k | FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/, |
12689 | 8.61k | FirstInChain->getAlignment(), |
12690 | 8.61k | FirstInChain->getMemOperand()->getFlags()); |
12691 | 8.61k | } |
12692 | 114k | |
12693 | 114k | // Replace all merged stores with the new store. |
12694 | 345k | for (unsigned i = 0; i < NumStores345k ; ++i230k ) |
12695 | 230k | CombineTo(StoreNodes[i].MemNode, NewStore); |
12696 | 114k | |
12697 | 114k | AddToWorklist(NewChain.getNode()); |
12698 | 114k | return true; |
12699 | 114k | } |
12700 | | |
12701 | | void DAGCombiner::getStoreMergeCandidates( |
12702 | 2.41M | StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) { |
12703 | 2.41M | // This holds the base pointer, index, and the offset in bytes from the base |
12704 | 2.41M | // pointer. |
12705 | 2.41M | BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); |
12706 | 2.41M | EVT MemVT = St->getMemoryVT(); |
12707 | 2.41M | |
12708 | 2.41M | SDValue Val = peekThroughBitcast(St->getValue()); |
12709 | 2.41M | // We must have a base and an offset. |
12710 | 2.41M | if (!BasePtr.getBase().getNode()) |
12711 | 0 | return; |
12712 | 2.41M | |
12713 | 2.41M | // Do not handle stores to undef base pointers. |
12714 | 2.41M | if (2.41M BasePtr.getBase().isUndef()2.41M ) |
12715 | 823 | return; |
12716 | 2.41M | |
12717 | 2.41M | bool IsConstantSrc = isa<ConstantSDNode>(Val) || 2.41M isa<ConstantFPSDNode>(Val)1.12M ; |
12718 | 2.41M | bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT || |
12719 | 2.40M | Val.getOpcode() == ISD::EXTRACT_SUBVECTOR); |
12720 | 2.41M | bool IsLoadSrc = isa<LoadSDNode>(Val); |
12721 | 2.41M | BaseIndexOffset LBasePtr; |
12722 | 2.41M | // Match on loadbaseptr if relevant. |
12723 | 2.41M | EVT LoadVT; |
12724 | 2.41M | if (IsLoadSrc2.41M ) { |
12725 | 414k | auto *Ld = cast<LoadSDNode>(Val); |
12726 | 414k | LBasePtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG); |
12727 | 414k | LoadVT = Ld->getMemoryVT(); |
12728 | 414k | // Load and store should be the same type. |
12729 | 414k | if (MemVT != LoadVT) |
12730 | 26.5k | return; |
12731 | 2.39M | } |
12732 | 2.39M | auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr, |
12733 | 33.6M | int64_t &Offset) -> bool { |
12734 | 33.6M | if (Other->isVolatile() || 33.6M Other->isIndexed()33.5M ) |
12735 | 21.1k | return false; |
12736 | 33.5M | SDValue Val = peekThroughBitcast(Other->getValue()); |
12737 | 33.5M | // Allow merging constants of different types as integers. |
12738 | 13.9M | bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT()) |
12739 | 19.6M | : Other->getMemoryVT() != MemVT; |
12740 | 33.5M | if (IsLoadSrc33.5M ) { |
12741 | 1.78M | if (NoTypeMatch) |
12742 | 577k | return false; |
12743 | 1.20M | // The Load's Base Ptr must also match |
12744 | 1.20M | if (LoadSDNode *1.20M OtherLd1.20M = dyn_cast<LoadSDNode>(Val)) { |
12745 | 1.11M | auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG); |
12746 | 1.11M | if (LoadVT != OtherLd->getMemoryVT()) |
12747 | 2.22k | return false; |
12748 | 1.11M | if (1.11M !(LBasePtr.equalBaseIndex(LPtr, DAG))1.11M ) |
12749 | 19.8k | return false; |
12750 | 1.20M | } else |
12751 | 88.1k | return false; |
12752 | 32.8M | } |
12753 | 32.8M | if (32.8M IsConstantSrc32.8M ) { |
12754 | 12.1M | if (NoTypeMatch) |
12755 | 6.78M | return false; |
12756 | 5.40M | if (5.40M !(isa<ConstantSDNode>(Val) || 5.40M isa<ConstantFPSDNode>(Val)1.60M )) |
12757 | 1.53M | return false; |
12758 | 24.5M | } |
12759 | 24.5M | if (24.5M IsExtractVecSrc24.5M ) { |
12760 | 19.6M | // Do not merge truncated stores here. |
12761 | 19.6M | if (Other->isTruncatingStore()) |
12762 | 16.4k | return false; |
12763 | 19.5M | if (19.5M !MemVT.bitsEq(Val.getValueType())19.5M ) |
12764 | 2.82M | return false; |
12765 | 16.7M | if (16.7M Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT && |
12766 | 16.7M | Val.getOpcode() != ISD::EXTRACT_SUBVECTOR) |
12767 | 4.91k | return false; |
12768 | 21.7M | } |
12769 | 21.7M | Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG); |
12770 | 21.7M | return (BasePtr.equalBaseIndex(Ptr, DAG, Offset)); |
12771 | 21.7M | }; |
12772 | 2.39M | |
12773 | 2.39M | // We looking for a root node which is an ancestor to all mergable |
12774 | 2.39M | // stores. We search up through a load, to our root and then down |
12775 | 2.39M | // through all children. For instance we will find Store{1,2,3} if |
12776 | 2.39M | // St is Store1, Store2. or Store3 where the root is not a load |
12777 | 2.39M | // which always true for nonvolatile ops. TODO: Expand |
12778 | 2.39M | // the search to find all valid candidates through multiple layers of loads. |
12779 | 2.39M | // |
12780 | 2.39M | // Root |
12781 | 2.39M | // |-------|-------| |
12782 | 2.39M | // Load Load Store3 |
12783 | 2.39M | // | | |
12784 | 2.39M | // Store1 Store2 |
12785 | 2.39M | // |
12786 | 2.39M | // FIXME: We should be able to climb and |
12787 | 2.39M | // descend TokenFactors to find candidates as well. |
12788 | 2.39M | |
12789 | 2.39M | SDNode *RootNode = (St->getChain()).getNode(); |
12790 | 2.39M | |
12791 | 2.39M | if (LoadSDNode *Ldn2.39M = dyn_cast<LoadSDNode>(RootNode)) { |
12792 | 381k | RootNode = Ldn->getChain().getNode(); |
12793 | 1.27M | for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E1.27M ; ++I898k ) |
12794 | 898k | if (898k I.getOperandNo() == 0 && 898k isa<LoadSDNode>(*I)892k ) // walk down chain |
12795 | 2.57M | for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); 412k I2 != E22.57M ; ++I22.16M ) |
12796 | 2.16M | if (2.16M I2.getOperandNo() == 02.16M ) |
12797 | 1.66M | if (StoreSDNode *1.66M OtherST1.66M = dyn_cast<StoreSDNode>(*I2)) { |
12798 | 1.40M | BaseIndexOffset Ptr; |
12799 | 1.40M | int64_t PtrDiff; |
12800 | 1.40M | if (CandidateMatch(OtherST, Ptr, PtrDiff)) |
12801 | 591k | StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); |
12802 | 898k | } |
12803 | 381k | } else |
12804 | 38.2M | for (auto I = RootNode->use_begin(), E = RootNode->use_end(); 2.01M I != E38.2M ; ++I36.2M ) |
12805 | 36.2M | if (36.2M I.getOperandNo() == 036.2M ) |
12806 | 36.1M | if (StoreSDNode *36.1M OtherST36.1M = dyn_cast<StoreSDNode>(*I)) { |
12807 | 32.1M | BaseIndexOffset Ptr; |
12808 | 32.1M | int64_t PtrDiff; |
12809 | 32.1M | if (CandidateMatch(OtherST, Ptr, PtrDiff)) |
12810 | 21.0M | StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); |
12811 | 2.01M | } |
12812 | 2.41M | } |
12813 | | |
12814 | | // We need to check that merging these stores does not cause a loop in |
12815 | | // the DAG. Any store candidate may depend on another candidate |
12816 | | // indirectly through its operand (we already consider dependencies |
12817 | | // through the chain). Check in parallel by searching up from |
12818 | | // non-chain operands of candidates. |
12819 | | bool DAGCombiner::checkMergeStoreCandidatesForDependencies( |
12820 | 1.14M | SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) { |
12821 | 1.14M | // FIXME: We should be able to truncate a full search of |
12822 | 1.14M | // predecessors by doing a BFS and keeping tabs the originating |
12823 | 1.14M | // stores from which worklist nodes come from in a similar way to |
12824 | 1.14M | // TokenFactor simplfication. |
12825 | 1.14M | |
12826 | 1.14M | SmallPtrSet<const SDNode *, 16> Visited; |
12827 | 1.14M | SmallVector<const SDNode *, 8> Worklist; |
12828 | 1.14M | unsigned int Max = 8192; |
12829 | 1.14M | // Search Ops of store candidates. |
12830 | 20.2M | for (unsigned i = 0; i < NumStores20.2M ; ++i19.1M ) { |
12831 | 19.1M | SDNode *n = StoreNodes[i].MemNode; |
12832 | 19.1M | // Potential loops may happen only through non-chain operands |
12833 | 76.5M | for (unsigned j = 1; j < n->getNumOperands()76.5M ; ++j57.4M ) |
12834 | 57.4M | Worklist.push_back(n->getOperand(j).getNode()); |
12835 | 19.1M | } |
12836 | 1.14M | // Search through DAG. We can stop early if we find a store node. |
12837 | 20.2M | for (unsigned i = 0; i < NumStores20.2M ; ++i19.1M ) { |
12838 | 19.1M | if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist, |
12839 | 19.1M | Max)) |
12840 | 2.99k | return false; |
12841 | 19.1M | // Check if we ended early, failing conservatively if so. |
12842 | 19.1M | if (19.1M Visited.size() >= Max19.1M ) |
12843 | 0 | return false; |
12844 | 19.1M | } |
12845 | 1.14M | return true; |
12846 | 1.14M | } |
12847 | | |
12848 | 5.68M | bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { |
12849 | 5.68M | if (OptLevel == CodeGenOpt::None) |
12850 | 7.53k | return false; |
12851 | 5.67M | |
12852 | 5.67M | EVT MemVT = St->getMemoryVT(); |
12853 | 5.67M | int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; |
12854 | 5.67M | unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements()2.41M : 13.25M ; |
12855 | 5.67M | |
12856 | 5.67M | if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits) |
12857 | 241k | return false; |
12858 | 5.43M | |
12859 | 5.43M | bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute( |
12860 | 5.43M | Attribute::NoImplicitFloat); |
12861 | 5.43M | |
12862 | 5.43M | // This function cannot currently deal with non-byte-sized memory sizes. |
12863 | 5.43M | if (ElementSizeBytes * 8 != MemVT.getSizeInBits()) |
12864 | 32.4k | return false; |
12865 | 5.40M | |
12866 | 5.40M | if (5.40M !MemVT.isSimple()5.40M ) |
12867 | 1.66k | return false; |
12868 | 5.40M | |
12869 | 5.40M | // Perform an early exit check. Do not bother looking at stored values that |
12870 | 5.40M | // are not constants, loads, or extracted vector elements. |
12871 | 5.40M | SDValue StoredVal = peekThroughBitcast(St->getValue()); |
12872 | 5.40M | bool IsLoadSrc = isa<LoadSDNode>(StoredVal); |
12873 | 5.40M | bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) || |
12874 | 4.10M | isa<ConstantFPSDNode>(StoredVal); |
12875 | 5.40M | bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT || |
12876 | 5.39M | StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR); |
12877 | 5.40M | |
12878 | 5.40M | if (!IsConstantSrc && 5.40M !IsLoadSrc4.09M && !IsExtractVecSrc3.68M ) |
12879 | 2.98M | return false; |
12880 | 2.41M | |
12881 | 2.41M | SmallVector<MemOpLink, 8> StoreNodes; |
12882 | 2.41M | // Find potential store merge candidates by searching through chain sub-DAG |
12883 | 2.41M | getStoreMergeCandidates(St, StoreNodes); |
12884 | 2.41M | |
12885 | 2.41M | // Check if there is anything to merge. |
12886 | 2.41M | if (StoreNodes.size() < 2) |
12887 | 744k | return false; |
12888 | 1.67M | |
12889 | 1.67M | // Sort the memory operands according to their distance from the |
12890 | 1.67M | // base pointer. |
12891 | 1.67M | std::sort(StoreNodes.begin(), StoreNodes.end(), |
12892 | 104M | [](MemOpLink LHS, MemOpLink RHS) { |
12893 | 104M | return LHS.OffsetFromBase < RHS.OffsetFromBase; |
12894 | 104M | }); |
12895 | 1.67M | |
12896 | 1.67M | // Store Merge attempts to merge the lowest stores. This generally |
12897 | 1.67M | // works out as if successful, as the remaining stores are checked |
12898 | 1.67M | // after the first collection of stores is merged. However, in the |
12899 | 1.67M | // case that a non-mergeable store is found first, e.g., {p[-2], |
12900 | 1.67M | // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent |
12901 | 1.67M | // mergeable cases. To prevent this, we prune such stores from the |
12902 | 1.67M | // front of StoreNodes here. |
12903 | 1.67M | |
12904 | 1.67M | bool RV = false; |
12905 | 2.82M | while (StoreNodes.size() > 12.82M ) { |
12906 | 1.97M | unsigned StartIdx = 0; |
12907 | 4.31M | while ((StartIdx + 1 < StoreNodes.size()) && |
12908 | 3.48M | StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes != |
12909 | 3.48M | StoreNodes[StartIdx + 1].OffsetFromBase) |
12910 | 2.34M | ++StartIdx; |
12911 | 1.97M | |
12912 | 1.97M | // Bail if we don't have enough candidates to merge. |
12913 | 1.97M | if (StartIdx + 1 >= StoreNodes.size()) |
12914 | 826k | return RV; |
12915 | 1.14M | |
12916 | 1.14M | if (1.14M StartIdx1.14M ) |
12917 | 151k | StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx); |
12918 | 1.14M | |
12919 | 1.14M | // Scan the memory operations on the chain and find the first |
12920 | 1.14M | // non-consecutive store memory address. |
12921 | 1.14M | unsigned NumConsecutiveStores = 1; |
12922 | 1.14M | int64_t StartAddress = StoreNodes[0].OffsetFromBase; |
12923 | 1.14M | // Check that the addresses are consecutive starting from the second |
12924 | 1.14M | // element in the list of stores. |
12925 | 19.1M | for (unsigned i = 1, e = StoreNodes.size(); i < e19.1M ; ++i17.9M ) { |
12926 | 18.1M | int64_t CurrAddress = StoreNodes[i].OffsetFromBase; |
12927 | 18.1M | if (CurrAddress - StartAddress != (ElementSizeBytes * i)) |
12928 | 168k | break; |
12929 | 17.9M | NumConsecutiveStores = i + 1; |
12930 | 17.9M | } |
12931 | 1.14M | |
12932 | 1.14M | if (NumConsecutiveStores < 21.14M ) { |
12933 | 0 | StoreNodes.erase(StoreNodes.begin(), |
12934 | 0 | StoreNodes.begin() + NumConsecutiveStores); |
12935 | 0 | continue; |
12936 | 0 | } |
12937 | 1.14M | |
12938 | 1.14M | // Check that we can merge these candidates without causing a cycle |
12939 | 1.14M | if (1.14M !checkMergeStoreCandidatesForDependencies(StoreNodes, |
12940 | 1.14M | NumConsecutiveStores)) { |
12941 | 2.99k | StoreNodes.erase(StoreNodes.begin(), |
12942 | 2.99k | StoreNodes.begin() + NumConsecutiveStores); |
12943 | 2.99k | continue; |
12944 | 2.99k | } |
12945 | 1.14M | |
12946 | 1.14M | // The node with the lowest store address. |
12947 | 1.14M | LLVMContext &Context = *DAG.getContext(); |
12948 | 1.14M | const DataLayout &DL = DAG.getDataLayout(); |
12949 | 1.14M | |
12950 | 1.14M | // Store the constants into memory as one consecutive store. |
12951 | 1.14M | if (IsConstantSrc1.14M ) { |
12952 | 209k | LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; |
12953 | 209k | unsigned FirstStoreAS = FirstInChain->getAddressSpace(); |
12954 | 209k | unsigned FirstStoreAlign = FirstInChain->getAlignment(); |
12955 | 209k | unsigned LastLegalType = 1; |
12956 | 209k | unsigned LastLegalVectorType = 1; |
12957 | 209k | bool LastIntegerTrunc = false; |
12958 | 209k | bool NonZero = false; |
12959 | 209k | unsigned FirstZeroAfterNonZero = NumConsecutiveStores; |
12960 | 960k | for (unsigned i = 0; i < NumConsecutiveStores960k ; ++i750k ) { |
12961 | 750k | StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode); |
12962 | 750k | SDValue StoredVal = ST->getValue(); |
12963 | 750k | bool IsElementZero = false; |
12964 | 750k | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) |
12965 | 679k | IsElementZero = C->isNullValue(); |
12966 | 70.6k | else if (ConstantFPSDNode *70.6k C70.6k = dyn_cast<ConstantFPSDNode>(StoredVal)) |
12967 | 70.6k | IsElementZero = C->getConstantFPValue()->isNullValue(); |
12968 | 750k | if (IsElementZero750k ) { |
12969 | 262k | if (NonZero && 262k FirstZeroAfterNonZero == NumConsecutiveStores31.0k ) |
12970 | 18.0k | FirstZeroAfterNonZero = i; |
12971 | 262k | } |
12972 | 750k | NonZero |= !IsElementZero; |
12973 | 750k | |
12974 | 750k | // Find a legal type for the constant store. |
12975 | 750k | unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; |
12976 | 750k | EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); |
12977 | 750k | bool IsFast = false; |
12978 | 750k | if (TLI.isTypeLegal(StoreTy) && |
12979 | 234k | TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && |
12980 | 233k | TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, |
12981 | 233k | FirstStoreAlign, &IsFast) && |
12982 | 750k | IsFast232k ) { |
12983 | 232k | LastIntegerTrunc = false; |
12984 | 232k | LastLegalType = i + 1; |
12985 | 232k | // Or check whether a truncstore is legal. |
12986 | 750k | } else if (518k TLI.getTypeAction(Context, StoreTy) == |
12987 | 518k | TargetLowering::TypePromoteInteger) { |
12988 | 322k | EVT LegalizedStoredValueTy = |
12989 | 322k | TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); |
12990 | 322k | if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && |
12991 | 63.9k | TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) && |
12992 | 63.9k | TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, |
12993 | 63.9k | FirstStoreAlign, &IsFast) && |
12994 | 322k | IsFast63.6k ) { |
12995 | 63.6k | LastIntegerTrunc = true; |
12996 | 63.6k | LastLegalType = i + 1; |
12997 | 63.6k | } |
12998 | 518k | } |
12999 | 750k | |
13000 | 750k | // We only use vectors if the constant is known to be zero or the target |
13001 | 750k | // allows it and the function is not marked with the noimplicitfloat |
13002 | 750k | // attribute. |
13003 | 750k | if ((!NonZero || |
13004 | 519k | TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) && |
13005 | 750k | !NoVectors265k ) { |
13006 | 265k | // Find a legal type for the vector store. |
13007 | 265k | unsigned Elts = (i + 1) * NumMemElts; |
13008 | 265k | EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); |
13009 | 265k | if (TLI.isTypeLegal(Ty) && |
13010 | 73.8k | TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && |
13011 | 67.8k | TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, |
13012 | 67.8k | FirstStoreAlign, &IsFast) && |
13013 | 67.0k | IsFast) |
13014 | 59.8k | LastLegalVectorType = i + 1; |
13015 | 265k | } |
13016 | 750k | } |
13017 | 209k | |
13018 | 28.5k | bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; |
13019 | 209k | unsigned NumElem = (UseVector) ? LastLegalVectorType28.5k : LastLegalType181k ; |
13020 | 209k | |
13021 | 209k | // Check if we found a legal integer type that creates a meaningful merge. |
13022 | 209k | if (NumElem < 2209k ) { |
13023 | 95.2k | // We know that candidate stores are in order and of correct |
13024 | 95.2k | // shape. While there is no mergeable sequence from the |
13025 | 95.2k | // beginning one may start later in the sequence. The only |
13026 | 95.2k | // reason a merge of size N could have failed where another of |
13027 | 95.2k | // the same size would not have, is if the alignment has |
13028 | 95.2k | // improved or we've dropped a non-zero value. Drop as many |
13029 | 95.2k | // candidates as we can here. |
13030 | 95.2k | unsigned NumSkip = 1; |
13031 | 95.2k | while ( |
13032 | 434k | (NumSkip < NumConsecutiveStores) && |
13033 | 357k | (NumSkip < FirstZeroAfterNonZero) && |
13034 | 344k | (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)344k ) { |
13035 | 339k | NumSkip++; |
13036 | 339k | } |
13037 | 95.2k | StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); |
13038 | 95.2k | continue; |
13039 | 95.2k | } |
13040 | 114k | |
13041 | 114k | bool Merged = MergeStoresOfConstantsOrVecElts( |
13042 | 114k | StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc); |
13043 | 114k | RV |= Merged; |
13044 | 114k | |
13045 | 114k | // Remove merged stores for next iteration. |
13046 | 114k | StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); |
13047 | 114k | continue; |
13048 | 114k | } |
13049 | 934k | |
13050 | 934k | // When extracting multiple vector elements, try to store them |
13051 | 934k | // in one vector store rather than a sequence of scalar stores. |
13052 | 934k | if (934k IsExtractVecSrc934k ) { |
13053 | 688k | LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; |
13054 | 688k | unsigned FirstStoreAS = FirstInChain->getAddressSpace(); |
13055 | 688k | unsigned FirstStoreAlign = FirstInChain->getAlignment(); |
13056 | 688k | unsigned NumStoresToMerge = 1; |
13057 | 17.3M | for (unsigned i = 0; i < NumConsecutiveStores17.3M ; ++i16.6M ) { |
13058 | 16.6M | StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); |
13059 | 16.6M | SDValue StVal = peekThroughBitcast(St->getValue()); |
13060 | 16.6M | // This restriction could be loosened. |
13061 | 16.6M | // Bail out if any stored values are not elements extracted from a |
13062 | 16.6M | // vector. It should be possible to handle mixed sources, but load |
13063 | 16.6M | // sources need more careful handling (see the block of code below that |
13064 | 16.6M | // handles consecutive loads). |
13065 | 16.6M | if (StVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT && |
13066 | 16.6M | StVal.getOpcode() != ISD::EXTRACT_SUBVECTOR) |
13067 | 0 | return RV; |
13068 | 16.6M | |
13069 | 16.6M | // Find a legal type for the vector store. |
13070 | 16.6M | unsigned Elts = (i + 1) * NumMemElts; |
13071 | 16.6M | EVT Ty = |
13072 | 16.6M | EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); |
13073 | 16.6M | bool IsFast; |
13074 | 16.6M | if (TLI.isTypeLegal(Ty) && |
13075 | 1.37M | TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && |
13076 | 1.37M | TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, |
13077 | 1.37M | FirstStoreAlign, &IsFast) && |
13078 | 1.37M | IsFast) |
13079 | 686k | NumStoresToMerge = i + 1; |
13080 | 16.6M | } |
13081 | 688k | |
13082 | 688k | // Check if we found a legal integer type that creates a meaningful merge. |
13083 | 688k | if (688k NumStoresToMerge < 2688k ) { |
13084 | 688k | // We know that candidate stores are in order and of correct |
13085 | 688k | // shape. While there is no mergeable sequence from the |
13086 | 688k | // beginning one may start later in the sequence. The only |
13087 | 688k | // reason a merge of size N could have failed where another of |
13088 | 688k | // the same size would not have, is if the alignment has |
13089 | 688k | // improved. Drop as many candidates as we can here. |
13090 | 688k | unsigned NumSkip = 1; |
13091 | 16.6M | while ((NumSkip < NumConsecutiveStores) && |
13092 | 15.9M | (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) |
13093 | 15.9M | NumSkip++; |
13094 | 688k | |
13095 | 688k | StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); |
13096 | 688k | continue; |
13097 | 688k | } |
13098 | 129 | |
13099 | 129 | bool Merged = MergeStoresOfConstantsOrVecElts( |
13100 | 129 | StoreNodes, MemVT, NumStoresToMerge, false, true, false); |
13101 | 129 | if (!Merged129 ) { |
13102 | 0 | StoreNodes.erase(StoreNodes.begin(), |
13103 | 0 | StoreNodes.begin() + NumStoresToMerge); |
13104 | 0 | continue; |
13105 | 0 | } |
13106 | 129 | // Remove merged stores for next iteration. |
13107 | 129 | StoreNodes.erase(StoreNodes.begin(), |
13108 | 129 | StoreNodes.begin() + NumStoresToMerge); |
13109 | 129 | RV = true; |
13110 | 129 | continue; |
13111 | 129 | } |
13112 | 246k | |
13113 | 246k | // Below we handle the case of multiple consecutive stores that |
13114 | 246k | // come from multiple consecutive loads. We merge them into a single |
13115 | 246k | // wide load and a single wide store. |
13116 | 246k | |
13117 | 246k | // Look for load nodes which are used by the stored values. |
13118 | 246k | SmallVector<MemOpLink, 8> LoadNodes; |
13119 | 246k | |
13120 | 246k | // Find acceptable loads. Loads need to have the same chain (token factor), |
13121 | 246k | // must not be zext, volatile, indexed, and they must be consecutive. |
13122 | 246k | BaseIndexOffset LdBasePtr; |
13123 | 511k | for (unsigned i = 0; i < NumConsecutiveStores511k ; ++i264k ) { |
13124 | 415k | StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); |
13125 | 415k | SDValue Val = peekThroughBitcast(St->getValue()); |
13126 | 415k | LoadSDNode *Ld = dyn_cast<LoadSDNode>(Val); |
13127 | 415k | if (!Ld) |
13128 | 0 | break; |
13129 | 415k | |
13130 | 415k | // Loads must only have one use. |
13131 | 415k | if (415k !Ld->hasNUsesOfValue(1, 0)415k ) |
13132 | 150k | break; |
13133 | 265k | |
13134 | 265k | // The memory operands must not be volatile. |
13135 | 265k | if (265k Ld->isVolatile() || 265k Ld->isIndexed()264k ) |
13136 | 219 | break; |
13137 | 264k | |
13138 | 264k | // The stored memory type must be the same. |
13139 | 264k | if (264k Ld->getMemoryVT() != MemVT264k ) |
13140 | 0 | break; |
13141 | 264k | |
13142 | 264k | BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG); |
13143 | 264k | // If this is not the first ptr that we check. |
13144 | 264k | int64_t LdOffset = 0; |
13145 | 264k | if (LdBasePtr.getBase().getNode()264k ) { |
13146 | 168k | // The base ptr must be the same. |
13147 | 168k | if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset)) |
13148 | 0 | break; |
13149 | 96.5k | } else { |
13150 | 96.5k | // Check that all other base pointers are the same as this one. |
13151 | 96.5k | LdBasePtr = LdPtr; |
13152 | 96.5k | } |
13153 | 264k | |
13154 | 264k | // We found a potential memory operand to merge. |
13155 | 264k | LoadNodes.push_back(MemOpLink(Ld, LdOffset)); |
13156 | 264k | } |
13157 | 246k | |
13158 | 246k | if (LoadNodes.size() < 2246k ) { |
13159 | 150k | StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1); |
13160 | 150k | continue; |
13161 | 150k | } |
13162 | 96.0k | |
13163 | 96.0k | // If we have load/store pair instructions and we only have two values, |
13164 | 96.0k | // don't bother merging. |
13165 | 96.0k | unsigned RequiredAlignment; |
13166 | 96.0k | if (LoadNodes.size() == 2 && 96.0k TLI.hasPairedLoad(MemVT, RequiredAlignment)83.6k && |
13167 | 96.0k | StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment77.5k ) { |
13168 | 77.5k | StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2); |
13169 | 77.5k | continue; |
13170 | 77.5k | } |
13171 | 18.5k | LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; |
13172 | 18.5k | unsigned FirstStoreAS = FirstInChain->getAddressSpace(); |
13173 | 18.5k | unsigned FirstStoreAlign = FirstInChain->getAlignment(); |
13174 | 18.5k | LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); |
13175 | 18.5k | unsigned FirstLoadAS = FirstLoad->getAddressSpace(); |
13176 | 18.5k | unsigned FirstLoadAlign = FirstLoad->getAlignment(); |
13177 | 18.5k | |
13178 | 18.5k | // Scan the memory operations on the chain and find the first |
13179 | 18.5k | // non-consecutive load memory address. These variables hold the index in |
13180 | 18.5k | // the store node array. |
13181 | 18.5k | unsigned LastConsecutiveLoad = 1; |
13182 | 18.5k | // This variable refers to the size and not index in the array. |
13183 | 18.5k | unsigned LastLegalVectorType = 1; |
13184 | 18.5k | unsigned LastLegalIntegerType = 1; |
13185 | 18.5k | bool isDereferenceable = true; |
13186 | 18.5k | bool DoIntegerTruncate = false; |
13187 | 18.5k | StartAddress = LoadNodes[0].OffsetFromBase; |
13188 | 18.5k | SDValue FirstChain = FirstLoad->getChain(); |
13189 | 99.8k | for (unsigned i = 1; i < LoadNodes.size()99.8k ; ++i81.3k ) { |
13190 | 84.2k | // All loads must share the same chain. |
13191 | 84.2k | if (LoadNodes[i].MemNode->getChain() != FirstChain) |
13192 | 1.89k | break; |
13193 | 82.3k | |
13194 | 82.3k | int64_t CurrAddress = LoadNodes[i].OffsetFromBase; |
13195 | 82.3k | if (CurrAddress - StartAddress != (ElementSizeBytes * i)) |
13196 | 913 | break; |
13197 | 81.3k | LastConsecutiveLoad = i; |
13198 | 81.3k | |
13199 | 81.3k | if (isDereferenceable && 81.3k !LoadNodes[i].MemNode->isDereferenceable()21.0k ) |
13200 | 13.9k | isDereferenceable = false; |
13201 | 81.3k | |
13202 | 81.3k | // Find a legal type for the vector store. |
13203 | 81.3k | unsigned Elts = (i + 1) * NumMemElts; |
13204 | 81.3k | EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); |
13205 | 81.3k | |
13206 | 81.3k | bool IsFastSt, IsFastLd; |
13207 | 81.3k | if (TLI.isTypeLegal(StoreTy) && |
13208 | 8.13k | TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && |
13209 | 6.00k | TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, |
13210 | 6.00k | FirstStoreAlign, &IsFastSt) && |
13211 | 5.97k | IsFastSt && |
13212 | 3.02k | TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, |
13213 | 3.02k | FirstLoadAlign, &IsFastLd) && |
13214 | 81.3k | IsFastLd3.02k ) { |
13215 | 3.00k | LastLegalVectorType = i + 1; |
13216 | 3.00k | } |
13217 | 81.3k | |
13218 | 81.3k | // Find a legal type for the integer store. |
13219 | 81.3k | unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; |
13220 | 81.3k | StoreTy = EVT::getIntegerVT(Context, SizeInBits); |
13221 | 81.3k | if (TLI.isTypeLegal(StoreTy) && |
13222 | 7.00k | TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && |
13223 | 6.99k | TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, |
13224 | 6.99k | FirstStoreAlign, &IsFastSt) && |
13225 | 92 | IsFastSt && |
13226 | 92 | TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, |
13227 | 92 | FirstLoadAlign, &IsFastLd) && |
13228 | 81.3k | IsFastLd92 ) { |
13229 | 92 | LastLegalIntegerType = i + 1; |
13230 | 92 | DoIntegerTruncate = false; |
13231 | 92 | // Or check whether a truncstore and extload is legal. |
13232 | 81.3k | } else if (81.3k TLI.getTypeAction(Context, StoreTy) == |
13233 | 81.3k | TargetLowering::TypePromoteInteger) { |
13234 | 54.8k | EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy); |
13235 | 54.8k | if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && |
13236 | 736 | TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) && |
13237 | 736 | TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, |
13238 | 736 | StoreTy) && |
13239 | 736 | TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, |
13240 | 736 | StoreTy) && |
13241 | 736 | TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) && |
13242 | 736 | TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, |
13243 | 736 | FirstStoreAlign, &IsFastSt) && |
13244 | 24 | IsFastSt && |
13245 | 24 | TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, |
13246 | 24 | FirstLoadAlign, &IsFastLd) && |
13247 | 54.8k | IsFastLd24 ) { |
13248 | 24 | LastLegalIntegerType = i + 1; |
13249 | 24 | DoIntegerTruncate = true; |
13250 | 24 | } |
13251 | 81.3k | } |
13252 | 84.2k | } |
13253 | 18.5k | |
13254 | 18.5k | // Only use vector types if the vector type is larger than the integer type. |
13255 | 18.5k | // If they are the same, use integers. |
13256 | 2.98k | bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors; |
13257 | 18.5k | unsigned LastLegalType = |
13258 | 18.5k | std::max(LastLegalVectorType, LastLegalIntegerType); |
13259 | 18.5k | |
13260 | 18.5k | // We add +1 here because the LastXXX variables refer to location while |
13261 | 18.5k | // the NumElem refers to array/index size. |
13262 | 18.5k | unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); |
13263 | 18.5k | NumElem = std::min(LastLegalType, NumElem); |
13264 | 18.5k | |
13265 | 18.5k | if (NumElem < 218.5k ) { |
13266 | 15.4k | // We know that candidate stores are in order and of correct |
13267 | 15.4k | // shape. While there is no mergeable sequence from the |
13268 | 15.4k | // beginning one may start later in the sequence. The only |
13269 | 15.4k | // reason a merge of size N could have failed where another of |
13270 | 15.4k | // the same size would not have is if the alignment or either |
13271 | 15.4k | // the load or store has improved. Drop as many candidates as we |
13272 | 15.4k | // can here. |
13273 | 15.4k | unsigned NumSkip = 1; |
13274 | 97.4k | while ((NumSkip < LoadNodes.size()) && |
13275 | 82.7k | (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) && |
13276 | 82.3k | (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) |
13277 | 82.0k | NumSkip++; |
13278 | 15.4k | StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); |
13279 | 15.4k | continue; |
13280 | 15.4k | } |
13281 | 3.07k | |
13282 | 3.07k | // Find if it is better to use vectors or integers to load and store |
13283 | 3.07k | // to memory. |
13284 | 3.07k | EVT JointMemOpVT; |
13285 | 3.07k | if (UseVectorTy3.07k ) { |
13286 | 2.98k | // Find a legal type for the vector store. |
13287 | 2.98k | unsigned Elts = NumElem * NumMemElts; |
13288 | 2.98k | JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); |
13289 | 3.07k | } else { |
13290 | 92 | unsigned SizeInBits = NumElem * ElementSizeBytes * 8; |
13291 | 92 | JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); |
13292 | 92 | } |
13293 | 3.07k | |
13294 | 3.07k | SDLoc LoadDL(LoadNodes[0].MemNode); |
13295 | 3.07k | SDLoc StoreDL(StoreNodes[0].MemNode); |
13296 | 3.07k | |
13297 | 3.07k | // The merged loads are required to have the same incoming chain, so |
13298 | 3.07k | // using the first's chain is acceptable. |
13299 | 3.07k | |
13300 | 3.07k | SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); |
13301 | 3.07k | AddToWorklist(NewStoreChain.getNode()); |
13302 | 3.07k | |
13303 | 3.07k | MachineMemOperand::Flags MMOFlags = isDereferenceable ? |
13304 | 103 | MachineMemOperand::MODereferenceable: |
13305 | 2.97k | MachineMemOperand::MONone; |
13306 | 3.07k | |
13307 | 3.07k | SDValue NewLoad, NewStore; |
13308 | 3.07k | if (UseVectorTy || 3.07k !DoIntegerTruncate92 ) { |
13309 | 3.06k | NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), |
13310 | 3.06k | FirstLoad->getBasePtr(), |
13311 | 3.06k | FirstLoad->getPointerInfo(), FirstLoadAlign, |
13312 | 3.06k | MMOFlags); |
13313 | 3.06k | NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad, |
13314 | 3.06k | FirstInChain->getBasePtr(), |
13315 | 3.06k | FirstInChain->getPointerInfo(), FirstStoreAlign); |
13316 | 3.07k | } else { // This must be the truncstore/extload case |
13317 | 16 | EVT ExtendedTy = |
13318 | 16 | TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT); |
13319 | 16 | NewLoad = |
13320 | 16 | DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(), |
13321 | 16 | FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), |
13322 | 16 | JointMemOpVT, FirstLoadAlign, MMOFlags); |
13323 | 16 | NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad, |
13324 | 16 | FirstInChain->getBasePtr(), |
13325 | 16 | FirstInChain->getPointerInfo(), JointMemOpVT, |
13326 | 16 | FirstInChain->getAlignment(), |
13327 | 16 | FirstInChain->getMemOperand()->getFlags()); |
13328 | 16 | } |
13329 | 3.07k | |
13330 | 3.07k | // Transfer chain users from old loads to the new load. |
13331 | 9.29k | for (unsigned i = 0; i < NumElem9.29k ; ++i6.21k ) { |
13332 | 6.21k | LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); |
13333 | 6.21k | DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), |
13334 | 6.21k | SDValue(NewLoad.getNode(), 1)); |
13335 | 6.21k | } |
13336 | 3.07k | |
13337 | 3.07k | // Replace the all stores with the new store. Recursively remove |
13338 | 3.07k | // corresponding value if its no longer used. |
13339 | 9.29k | for (unsigned i = 0; i < NumElem9.29k ; ++i6.21k ) { |
13340 | 6.21k | SDValue Val = StoreNodes[i].MemNode->getOperand(1); |
13341 | 6.21k | CombineTo(StoreNodes[i].MemNode, NewStore); |
13342 | 6.21k | if (Val.getNode()->use_empty()) |
13343 | 6.21k | recursivelyDeleteUnusedNodes(Val.getNode()); |
13344 | 6.21k | } |
13345 | 1.97M | |
13346 | 1.97M | RV = true; |
13347 | 1.97M | StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); |
13348 | 1.97M | } |
13349 | 848k | return RV; |
13350 | 5.68M | } |
13351 | | |
13352 | 941k | SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) { |
13353 | 941k | SDLoc SL(ST); |
13354 | 941k | SDValue ReplStore; |
13355 | 941k | |
13356 | 941k | // Replace the chain to avoid dependency. |
13357 | 941k | if (ST->isTruncatingStore()941k ) { |
13358 | 5.12k | ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(), |
13359 | 5.12k | ST->getBasePtr(), ST->getMemoryVT(), |
13360 | 5.12k | ST->getMemOperand()); |
13361 | 941k | } else { |
13362 | 936k | ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(), |
13363 | 936k | ST->getMemOperand()); |
13364 | 936k | } |
13365 | 941k | |
13366 | 941k | // Create token to keep both nodes around. |
13367 | 941k | SDValue Token = DAG.getNode(ISD::TokenFactor, SL, |
13368 | 941k | MVT::Other, ST->getChain(), ReplStore); |
13369 | 941k | |
13370 | 941k | // Make sure the new and old chains are cleaned up. |
13371 | 941k | AddToWorklist(Token.getNode()); |
13372 | 941k | |
13373 | 941k | // Don't add users to work list. |
13374 | 941k | return CombineTo(ST, Token, false); |
13375 | 941k | } |
13376 | | |
13377 | 8.46k | SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { |
13378 | 8.46k | SDValue Value = ST->getValue(); |
13379 | 8.46k | if (Value.getOpcode() == ISD::TargetConstantFP) |
13380 | 0 | return SDValue(); |
13381 | 8.46k | |
13382 | 8.46k | SDLoc DL(ST); |
13383 | 8.46k | |
13384 | 8.46k | SDValue Chain = ST->getChain(); |
13385 | 8.46k | SDValue Ptr = ST->getBasePtr(); |
13386 | 8.46k | |
13387 | 8.46k | const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value); |
13388 | 8.46k | |
13389 | 8.46k | // NOTE: If the original store is volatile, this transform must not increase |
13390 | 8.46k | // the number of stores. For example, on x86-32 an f64 can be stored in one |
13391 | 8.46k | // processor operation but an i64 (which is not legal) requires two. So the |
13392 | 8.46k | // transform should not be done in this case. |
13393 | 8.46k | |
13394 | 8.46k | SDValue Tmp; |
13395 | 8.46k | switch (CFP->getSimpleValueType(0).SimpleTy) { |
13396 | 0 | default: |
13397 | 0 | llvm_unreachable("Unknown FP type"); |
13398 | 250 | case MVT::f16: // We don't do this for these yet. |
13399 | 250 | case MVT::f80: |
13400 | 250 | case MVT::f128: |
13401 | 250 | case MVT::ppcf128: |
13402 | 250 | return SDValue(); |
13403 | 1.81k | case MVT::f32: |
13404 | 1.81k | if ((isTypeLegal(MVT::i32) && 1.81k !LegalOperations1.81k && !ST->isVolatile()1.81k ) || |
13405 | 1.81k | TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)14 ) { |
13406 | 1.81k | ; |
13407 | 1.81k | Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). |
13408 | 1.81k | bitcastToAPInt().getZExtValue(), SDLoc(CFP), |
13409 | 1.81k | MVT::i32); |
13410 | 1.81k | return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand()); |
13411 | 1.81k | } |
13412 | 0 |
|
13413 | 0 | return SDValue(); |
13414 | 6.40k | case MVT::f64: |
13415 | 6.40k | if ((TLI.isTypeLegal(MVT::i64) && 6.40k !LegalOperations6.06k && |
13416 | 6.06k | !ST->isVolatile()) || |
13417 | 6.40k | TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)351 ) { |
13418 | 6.06k | ; |
13419 | 6.06k | Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). |
13420 | 6.06k | getZExtValue(), SDLoc(CFP), MVT::i64); |
13421 | 6.06k | return DAG.getStore(Chain, DL, Tmp, |
13422 | 6.06k | Ptr, ST->getMemOperand()); |
13423 | 6.06k | } |
13424 | 340 | |
13425 | 340 | if (340 !ST->isVolatile() && |
13426 | 340 | TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)332 ) { |
13427 | 328 | // Many FP stores are not made apparent until after legalize, e.g. for |
13428 | 328 | // argument passing. Since this is so common, custom legalize the |
13429 | 328 | // 64-bit integer store into two 32-bit stores. |
13430 | 328 | uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); |
13431 | 328 | SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32); |
13432 | 328 | SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32); |
13433 | 328 | if (DAG.getDataLayout().isBigEndian()) |
13434 | 50 | std::swap(Lo, Hi); |
13435 | 328 | |
13436 | 328 | unsigned Alignment = ST->getAlignment(); |
13437 | 328 | MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); |
13438 | 328 | AAMDNodes AAInfo = ST->getAAInfo(); |
13439 | 328 | |
13440 | 328 | SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), |
13441 | 328 | ST->getAlignment(), MMOFlags, AAInfo); |
13442 | 328 | Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, |
13443 | 328 | DAG.getConstant(4, DL, Ptr.getValueType())); |
13444 | 328 | Alignment = MinAlign(Alignment, 4U); |
13445 | 328 | SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr, |
13446 | 328 | ST->getPointerInfo().getWithOffset(4), |
13447 | 328 | Alignment, MMOFlags, AAInfo); |
13448 | 328 | return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, |
13449 | 328 | St0, St1); |
13450 | 328 | } |
13451 | 12 | |
13452 | 12 | return SDValue(); |
13453 | 8.46k | } |
13454 | 8.46k | } |
13455 | | |
13456 | 8.90M | SDValue DAGCombiner::visitSTORE(SDNode *N) { |
13457 | 8.90M | StoreSDNode *ST = cast<StoreSDNode>(N); |
13458 | 8.90M | SDValue Chain = ST->getChain(); |
13459 | 8.90M | SDValue Value = ST->getValue(); |
13460 | 8.90M | SDValue Ptr = ST->getBasePtr(); |
13461 | 8.90M | |
13462 | 8.90M | // If this is a store of a bit convert, store the input value if the |
13463 | 8.90M | // resultant store does not need a higher alignment than the original. |
13464 | 8.90M | if (Value.getOpcode() == ISD::BITCAST && 8.90M !ST->isTruncatingStore()171k && |
13465 | 8.90M | ST->isUnindexed()170k ) { |
13466 | 170k | EVT SVT = Value.getOperand(0).getValueType(); |
13467 | 170k | if (((!LegalOperations && 170k !ST->isVolatile()8.97k ) || |
13468 | 162k | TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) && |
13469 | 170k | TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)12.0k ) { |
13470 | 2.71k | unsigned OrigAlign = ST->getAlignment(); |
13471 | 2.71k | bool Fast = false; |
13472 | 2.71k | if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT, |
13473 | 2.71k | ST->getAddressSpace(), OrigAlign, &Fast) && |
13474 | 2.71k | Fast2.69k ) { |
13475 | 2.35k | return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, |
13476 | 2.35k | ST->getPointerInfo(), OrigAlign, |
13477 | 2.35k | ST->getMemOperand()->getFlags(), ST->getAAInfo()); |
13478 | 2.35k | } |
13479 | 8.90M | } |
13480 | 170k | } |
13481 | 8.90M | |
13482 | 8.90M | // Turn 'store undef, Ptr' -> nothing. |
13483 | 8.90M | if (8.90M Value.isUndef() && 8.90M ST->isUnindexed()2.09k ) |
13484 | 2.09k | return Chain; |
13485 | 8.90M | |
13486 | 8.90M | // Try to infer better alignment information than the store already has. |
13487 | 8.90M | if (8.90M OptLevel != CodeGenOpt::None && 8.90M ST->isUnindexed()8.88M ) { |
13488 | 8.87M | if (unsigned Align8.87M = DAG.InferPtrAlignment(Ptr)) { |
13489 | 2.16M | if (Align > ST->getAlignment()2.16M ) { |
13490 | 29.7k | SDValue NewStore = |
13491 | 29.7k | DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), |
13492 | 29.7k | ST->getMemoryVT(), Align, |
13493 | 29.7k | ST->getMemOperand()->getFlags(), ST->getAAInfo()); |
13494 | 29.7k | if (NewStore.getNode() != N) |
13495 | 0 | return CombineTo(ST, NewStore, true); |
13496 | 8.90M | } |
13497 | 2.16M | } |
13498 | 8.87M | } |
13499 | 8.90M | |
13500 | 8.90M | // Try transforming a pair floating point load / store ops to integer |
13501 | 8.90M | // load / store ops. |
13502 | 8.90M | if (SDValue 8.90M NewST8.90M = TransformFPLoadStorePair(N)) |
13503 | 3 | return NewST; |
13504 | 8.90M | |
13505 | 8.90M | if (8.90M ST->isUnindexed()8.90M ) { |
13506 | 8.88M | // Walk up chain skipping non-aliasing memory nodes, on this store and any |
13507 | 8.88M | // adjacent stores. |
13508 | 8.88M | if (findBetterNeighborChains(ST)8.88M ) { |
13509 | 445k | // replaceStoreChain uses CombineTo, which handled all of the worklist |
13510 | 445k | // manipulation. Return the original node to not do anything else. |
13511 | 445k | return SDValue(ST, 0); |
13512 | 445k | } |
13513 | 8.44M | Chain = ST->getChain(); |
13514 | 8.44M | } |
13515 | 8.90M | |
13516 | 8.90M | // FIXME: is there such a thing as a truncating indexed store? |
13517 | 8.45M | if (8.45M ST->isTruncatingStore() && 8.45M ST->isUnindexed()961k && |
13518 | 8.45M | Value.getValueType().isInteger()957k ) { |
13519 | 957k | // See if we can simplify the input to this truncstore with knowledge that |
13520 | 957k | // only the low bits are being used. For example: |
13521 | 957k | // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" |
13522 | 957k | SDValue Shorter = DAG.GetDemandedBits( |
13523 | 957k | Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), |
13524 | 957k | ST->getMemoryVT().getScalarSizeInBits())); |
13525 | 957k | AddToWorklist(Value.getNode()); |
13526 | 957k | if (Shorter.getNode()) |
13527 | 2.47k | return DAG.getTruncStore(Chain, SDLoc(N), Shorter, |
13528 | 2.47k | Ptr, ST->getMemoryVT(), ST->getMemOperand()); |
13529 | 954k | |
13530 | 954k | // Otherwise, see if we can simplify the operation with |
13531 | 954k | // SimplifyDemandedBits, which only works if the value has a single use. |
13532 | 954k | if (954k SimplifyDemandedBits( |
13533 | 954k | Value, |
13534 | 954k | APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), |
13535 | 954k | ST->getMemoryVT().getScalarSizeInBits()))) { |
13536 | 16.8k | // Re-visit the store if anything changed and the store hasn't been merged |
13537 | 16.8k | // with another node (N is deleted) SimplifyDemandedBits will add Value's |
13538 | 16.8k | // node back to the worklist if necessary, but we also need to re-visit |
13539 | 16.8k | // the Store node itself. |
13540 | 16.8k | if (N->getOpcode() != ISD::DELETED_NODE) |
13541 | 16.8k | AddToWorklist(N); |
13542 | 16.8k | return SDValue(N, 0); |
13543 | 16.8k | } |
13544 | 8.43M | } |
13545 | 8.43M | |
13546 | 8.43M | // If this is a load followed by a store to the same location, then the store |
13547 | 8.43M | // is dead/noop. |
13548 | 8.43M | if (LoadSDNode *8.43M Ld8.43M = dyn_cast<LoadSDNode>(Value)) { |
13549 | 1.09M | if (Ld->getBasePtr() == Ptr && 1.09M ST->getMemoryVT() == Ld->getMemoryVT()4.08k && |
13550 | 1.09M | ST->isUnindexed()4.04k && !ST->isVolatile()4.04k && |
13551 | 1.09M | // There can't be any side effects between the load and store, such as |
13552 | 1.09M | // a call or store. |
13553 | 1.09M | Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))1.45k ) { |
13554 | 239 | // The store is dead, remove it. |
13555 | 239 | return Chain; |
13556 | 239 | } |
13557 | 8.43M | } |
13558 | 8.43M | |
13559 | 8.43M | if (StoreSDNode *8.43M ST18.43M = dyn_cast<StoreSDNode>(Chain)) { |
13560 | 252k | if (ST->isUnindexed() && 252k !ST->isVolatile()251k && ST1->isUnindexed()239k && |
13561 | 252k | !ST1->isVolatile()239k && ST1->getBasePtr() == Ptr238k && |
13562 | 252k | ST->getMemoryVT() == ST1->getMemoryVT()2.10k ) { |
13563 | 1.70k | // If this is a store followed by a store with the same value to the same |
13564 | 1.70k | // location, then the store is dead/noop. |
13565 | 1.70k | if (ST1->getValue() == Value1.70k ) { |
13566 | 303 | // The store is dead, remove it. |
13567 | 303 | return Chain; |
13568 | 303 | } |
13569 | 1.40k | |
13570 | 1.40k | // If this is a store who's preceeding store to the same location |
13571 | 1.40k | // and no one other node is chained to that store we can effectively |
13572 | 1.40k | // drop the store. Do not remove stores to undef as they may be used as |
13573 | 1.40k | // data sinks. |
13574 | 1.40k | if (1.40k OptLevel != CodeGenOpt::None && 1.40k ST1->hasOneUse()1.31k && |
13575 | 1.40k | !ST1->getBasePtr().isUndef()502 ) { |
13576 | 424 | // ST1 is fully overwritten and can be elided. Combine with it's chain |
13577 | 424 | // value. |
13578 | 424 | CombineTo(ST1, ST1->getChain()); |
13579 | 424 | return SDValue(); |
13580 | 424 | } |
13581 | 8.43M | } |
13582 | 252k | } |
13583 | 8.43M | |
13584 | 8.43M | // If this is an FP_ROUND or TRUNC followed by a store, fold this into a |
13585 | 8.43M | // truncating store. We can do this even if this is already a truncstore. |
13586 | 8.43M | if (8.43M (Value.getOpcode() == ISD::FP_ROUND || 8.43M Value.getOpcode() == ISD::TRUNCATE8.43M ) |
13587 | 8.43M | && Value.getNode()->hasOneUse()99.8k && ST->isUnindexed()69.6k && |
13588 | 69.3k | TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), |
13589 | 8.43M | ST->getMemoryVT())) { |
13590 | 51.1k | return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), |
13591 | 51.1k | Ptr, ST->getMemoryVT(), ST->getMemOperand()); |
13592 | 51.1k | } |
13593 | 8.38M | |
13594 | 8.38M | // Always perform this optimization before types are legal. If the target |
13595 | 8.38M | // prefers, also try this after legalization to catch stores that were created |
13596 | 8.38M | // by intrinsics or other nodes. |
13597 | 8.38M | if (8.38M !LegalTypes || 8.38M (TLI.mergeStoresAfterLegalization())2.93M ) { |
13598 | 5.68M | while (true5.68M ) { |
13599 | 5.68M | // There can be multiple store sequences on the same chain. |
13600 | 5.68M | // Keep trying to merge store sequences until we are unable to do so |
13601 | 5.68M | // or until we merge the last store on the chain. |
13602 | 5.68M | bool Changed = MergeConsecutiveStores(ST); |
13603 | 5.68M | if (!Changed5.68M ) break5.61M ; |
13604 | 74.7k | // Return N as merge only uses CombineTo and no worklist clean |
13605 | 74.7k | // up is necessary. |
13606 | 74.7k | if (74.7k N->getOpcode() == ISD::DELETED_NODE || 74.7k !isa<StoreSDNode>(N)22.0k ) |
13607 | 61.0k | return SDValue(N, 0); |
13608 | 5.68M | } |
13609 | 5.67M | } |
13610 | 8.38M | |
13611 | 8.38M | // Try transforming N to an indexed store. |
13612 | 8.32M | if (8.32M CombineToPreIndexedLoadStore(N) || 8.32M CombineToPostIndexedLoadStore(N)8.31M ) |
13613 | 23.5k | return SDValue(N, 0); |
13614 | 8.30M | |
13615 | 8.30M | // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' |
13616 | 8.30M | // |
13617 | 8.30M | // Make sure to do this only after attempting to merge stores in order to |
13618 | 8.30M | // avoid changing the types of some subset of stores due to visit order, |
13619 | 8.30M | // preventing their merging. |
13620 | 8.30M | if (8.30M isa<ConstantFPSDNode>(ST->getValue())8.30M ) { |
13621 | 8.46k | if (SDValue NewSt = replaceStoreOfFPConstant(ST)) |
13622 | 8.20k | return NewSt; |
13623 | 8.29M | } |
13624 | 8.29M | |
13625 | 8.29M | if (SDValue 8.29M NewSt8.29M = splitMergedValStore(ST)) |
13626 | 0 | return NewSt; |
13627 | 8.29M | |
13628 | 8.29M | return ReduceLoadOpStoreWidth(N); |
13629 | 8.29M | } |
13630 | | |
13631 | | /// For the instruction sequence of store below, F and I values |
13632 | | /// are bundled together as an i64 value before being stored into memory. |
13633 | | /// Sometimes it is more efficent to generate separate stores for F and I, |
13634 | | /// which can remove the bitwise instructions or sink them to colder places. |
13635 | | /// |
13636 | | /// (store (or (zext (bitcast F to i32) to i64), |
13637 | | /// (shl (zext I to i64), 32)), addr) --> |
13638 | | /// (store F, addr) and (store I, addr+4) |
13639 | | /// |
13640 | | /// Similarly, splitting for other merged store can also be beneficial, like: |
13641 | | /// For pair of {i32, i32}, i64 store --> two i32 stores. |
13642 | | /// For pair of {i32, i16}, i64 store --> two i32 stores. |
13643 | | /// For pair of {i16, i16}, i32 store --> two i16 stores. |
13644 | | /// For pair of {i16, i8}, i32 store --> two i16 stores. |
13645 | | /// For pair of {i8, i8}, i16 store --> two i8 stores. |
13646 | | /// |
13647 | | /// We allow each target to determine specifically which kind of splitting is |
13648 | | /// supported. |
13649 | | /// |
13650 | | /// The store patterns are commonly seen from the simple code snippet below |
13651 | | /// if only std::make_pair(...) is sroa transformed before inlined into hoo. |
13652 | | /// void goo(const std::pair<int, float> &); |
13653 | | /// hoo() { |
13654 | | /// ... |
13655 | | /// goo(std::make_pair(tmp, ftmp)); |
13656 | | /// ... |
13657 | | /// } |
13658 | | /// |
13659 | 8.29M | SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { |
13660 | 8.29M | if (OptLevel == CodeGenOpt::None) |
13661 | 12.1k | return SDValue(); |
13662 | 8.27M | |
13663 | 8.27M | SDValue Val = ST->getValue(); |
13664 | 8.27M | SDLoc DL(ST); |
13665 | 8.27M | |
13666 | 8.27M | // Match OR operand. |
13667 | 8.27M | if (!Val.getValueType().isScalarInteger() || 8.27M Val.getOpcode() != ISD::OR5.10M ) |
13668 | 8.24M | return SDValue(); |
13669 | 32.4k | |
13670 | 32.4k | // Match SHL operand and get Lower and Higher parts of Val. |
13671 | 32.4k | SDValue Op1 = Val.getOperand(0); |
13672 | 32.4k | SDValue Op2 = Val.getOperand(1); |
13673 | 32.4k | SDValue Lo, Hi; |
13674 | 32.4k | if (Op1.getOpcode() != ISD::SHL32.4k ) { |
13675 | 28.6k | std::swap(Op1, Op2); |
13676 | 28.6k | if (Op1.getOpcode() != ISD::SHL) |
13677 | 24.4k | return SDValue(); |
13678 | 7.99k | } |
13679 | 7.99k | Lo = Op2; |
13680 | 7.99k | Hi = Op1.getOperand(0); |
13681 | 7.99k | if (!Op1.hasOneUse()) |
13682 | 250 | return SDValue(); |
13683 | 7.74k | |
13684 | 7.74k | // Match shift amount to HalfValBitSize. |
13685 | 7.74k | unsigned HalfValBitSize = Val.getValueSizeInBits() / 2; |
13686 | 7.74k | ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1)); |
13687 | 7.74k | if (!ShAmt || 7.74k ShAmt->getAPIntValue() != HalfValBitSize5.13k ) |
13688 | 5.95k | return SDValue(); |
13689 | 1.79k | |
13690 | 1.79k | // Lo and Hi are zero-extended from int with size less equal than 32 |
13691 | 1.79k | // to i64. |
13692 | 1.79k | if (1.79k Lo.getOpcode() != ISD::ZERO_EXTEND || 1.79k !Lo.hasOneUse()645 || |
13693 | 645 | !Lo.getOperand(0).getValueType().isScalarInteger() || |
13694 | 645 | Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize || |
13695 | 1.79k | Hi.getOpcode() != ISD::ZERO_EXTEND645 || !Hi.hasOneUse()145 || |
13696 | 145 | !Hi.getOperand(0).getValueType().isScalarInteger() || |
13697 | 145 | Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize) |
13698 | 1.65k | return SDValue(); |
13699 | 145 | |
13700 | 145 | // Use the EVT of low and high parts before bitcast as the input |
13701 | 145 | // of target query. |
13702 | 145 | EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST) |
13703 | 0 | ? Lo.getOperand(0).getValueType() |
13704 | 145 | : Lo.getValueType(); |
13705 | 145 | EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST) |
13706 | 0 | ? Hi.getOperand(0).getValueType() |
13707 | 145 | : Hi.getValueType(); |
13708 | 145 | if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy)) |
13709 | 145 | return SDValue(); |
13710 | 0 |
|
13711 | 0 | // Start to split store. |
13712 | 0 | unsigned Alignment = ST->getAlignment(); |
13713 | 0 | MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); |
13714 | 0 | AAMDNodes AAInfo = ST->getAAInfo(); |
13715 | 0 |
|
13716 | 0 | // Change the sizes of Lo and Hi's value types to HalfValBitSize. |
13717 | 0 | EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize); |
13718 | 0 | Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0)); |
13719 | 0 | Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0)); |
13720 | 0 |
|
13721 | 0 | SDValue Chain = ST->getChain(); |
13722 | 0 | SDValue Ptr = ST->getBasePtr(); |
13723 | 0 | // Lower value store. |
13724 | 0 | SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), |
13725 | 0 | ST->getAlignment(), MMOFlags, AAInfo); |
13726 | 0 | Ptr = |
13727 | 0 | DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, |
13728 | 0 | DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType())); |
13729 | 0 | // Higher value store. |
13730 | 0 | SDValue St1 = |
13731 | 0 | DAG.getStore(St0, DL, Hi, Ptr, |
13732 | 0 | ST->getPointerInfo().getWithOffset(HalfValBitSize / 8), |
13733 | 0 | Alignment / 2, MMOFlags, AAInfo); |
13734 | 0 | return St1; |
13735 | 0 | } |
13736 | | |
13737 | 60.5k | SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { |
13738 | 60.5k | SDValue InVec = N->getOperand(0); |
13739 | 60.5k | SDValue InVal = N->getOperand(1); |
13740 | 60.5k | SDValue EltNo = N->getOperand(2); |
13741 | 60.5k | SDLoc DL(N); |
13742 | 60.5k | |
13743 | 60.5k | // If the inserted element is an UNDEF, just use the input vector. |
13744 | 60.5k | if (InVal.isUndef()) |
13745 | 252 | return InVec; |
13746 | 60.2k | |
13747 | 60.2k | EVT VT = InVec.getValueType(); |
13748 | 60.2k | |
13749 | 60.2k | // Remove redundant insertions: |
13750 | 60.2k | // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x |
13751 | 60.2k | if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
13752 | 60.2k | InVec == InVal.getOperand(0)17.8k && EltNo == InVal.getOperand(1)27 ) |
13753 | 7 | return InVec; |
13754 | 60.2k | |
13755 | 60.2k | // Check that we know which element is being inserted |
13756 | 60.2k | if (60.2k !isa<ConstantSDNode>(EltNo)60.2k ) |
13757 | 570 | return SDValue(); |
13758 | 59.7k | unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); |
13759 | 59.7k | |
13760 | 59.7k | // Canonicalize insert_vector_elt dag nodes. |
13761 | 59.7k | // Example: |
13762 | 59.7k | // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1) |
13763 | 59.7k | // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0) |
13764 | 59.7k | // |
13765 | 59.7k | // Do this only if the child insert_vector node has one use; also |
13766 | 59.7k | // do this only if indices are both constants and Idx1 < Idx0. |
13767 | 59.7k | if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && 59.7k InVec.hasOneUse()13.2k |
13768 | 59.7k | && isa<ConstantSDNode>(InVec.getOperand(2))13.1k ) { |
13769 | 13.1k | unsigned OtherElt = InVec.getConstantOperandVal(2); |
13770 | 13.1k | if (Elt < OtherElt13.1k ) { |
13771 | 206 | // Swap nodes. |
13772 | 206 | SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, |
13773 | 206 | InVec.getOperand(0), InVal, EltNo); |
13774 | 206 | AddToWorklist(NewOp.getNode()); |
13775 | 206 | return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()), |
13776 | 206 | VT, NewOp, InVec.getOperand(1), InVec.getOperand(2)); |
13777 | 206 | } |
13778 | 59.4k | } |
13779 | 59.4k | |
13780 | 59.4k | // If we can't generate a legal BUILD_VECTOR, exit |
13781 | 59.4k | if (59.4k LegalOperations && 59.4k !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)24.4k ) |
13782 | 24.3k | return SDValue(); |
13783 | 35.1k | |
13784 | 35.1k | // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially |
13785 | 35.1k | // be converted to a BUILD_VECTOR). Fill in the Ops vector with the |
13786 | 35.1k | // vector elements. |
13787 | 35.1k | SmallVector<SDValue, 8> Ops; |
13788 | 35.1k | // Do not combine these two vectors if the output vector will not replace |
13789 | 35.1k | // the input vector. |
13790 | 35.1k | if (InVec.getOpcode() == ISD::BUILD_VECTOR && 35.1k InVec.hasOneUse()12.3k ) { |
13791 | 12.0k | Ops.append(InVec.getNode()->op_begin(), |
13792 | 12.0k | InVec.getNode()->op_end()); |
13793 | 35.1k | } else if (23.0k InVec.isUndef()23.0k ) { |
13794 | 9.57k | unsigned NElts = VT.getVectorNumElements(); |
13795 | 9.57k | Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); |
13796 | 23.0k | } else { |
13797 | 13.5k | return SDValue(); |
13798 | 13.5k | } |
13799 | 21.6k | |
13800 | 21.6k | // Insert the element |
13801 | 21.6k | if (21.6k Elt < Ops.size()21.6k ) { |
13802 | 21.6k | // All the operands of BUILD_VECTOR must have the same type; |
13803 | 21.6k | // we enforce that here. |
13804 | 21.6k | EVT OpVT = Ops[0].getValueType(); |
13805 | 21.6k | Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT)12.2k : InVal9.33k ; |
13806 | 21.6k | } |
13807 | 60.5k | |
13808 | 60.5k | // Return the new vector |
13809 | 60.5k | return DAG.getBuildVector(VT, DL, Ops); |
13810 | 60.5k | } |
13811 | | |
13812 | | SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( |
13813 | 426 | SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { |
13814 | 426 | assert(!OriginalLoad->isVolatile()); |
13815 | 426 | |
13816 | 426 | EVT ResultVT = EVE->getValueType(0); |
13817 | 426 | EVT VecEltVT = InVecVT.getVectorElementType(); |
13818 | 426 | unsigned Align = OriginalLoad->getAlignment(); |
13819 | 426 | unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( |
13820 | 426 | VecEltVT.getTypeForEVT(*DAG.getContext())); |
13821 | 426 | |
13822 | 426 | if (NewAlign > Align || 426 !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)424 ) |
13823 | 18 | return SDValue(); |
13824 | 408 | |
13825 | 408 | ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ? |
13826 | 408 | ISD::NON_EXTLOAD0 : ISD::EXTLOAD408 ; |
13827 | 408 | if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT)) |
13828 | 5 | return SDValue(); |
13829 | 403 | |
13830 | 403 | Align = NewAlign; |
13831 | 403 | |
13832 | 403 | SDValue NewPtr = OriginalLoad->getBasePtr(); |
13833 | 403 | SDValue Offset; |
13834 | 403 | EVT PtrType = NewPtr.getValueType(); |
13835 | 403 | MachinePointerInfo MPI; |
13836 | 403 | SDLoc DL(EVE); |
13837 | 403 | if (auto *ConstEltNo403 = dyn_cast<ConstantSDNode>(EltNo)) { |
13838 | 398 | int Elt = ConstEltNo->getZExtValue(); |
13839 | 398 | unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; |
13840 | 398 | Offset = DAG.getConstant(PtrOff, DL, PtrType); |
13841 | 398 | MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); |
13842 | 403 | } else { |
13843 | 5 | Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType); |
13844 | 5 | Offset = DAG.getNode( |
13845 | 5 | ISD::MUL, DL, PtrType, Offset, |
13846 | 5 | DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType)); |
13847 | 5 | MPI = OriginalLoad->getPointerInfo(); |
13848 | 5 | } |
13849 | 403 | NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset); |
13850 | 403 | |
13851 | 403 | // The replacement we need to do here is a little tricky: we need to |
13852 | 403 | // replace an extractelement of a load with a load. |
13853 | 403 | // Use ReplaceAllUsesOfValuesWith to do the replacement. |
13854 | 403 | // Note that this replacement assumes that the extractvalue is the only |
13855 | 403 | // use of the load; that's okay because we don't want to perform this |
13856 | 403 | // transformation in other cases anyway. |
13857 | 403 | SDValue Load; |
13858 | 403 | SDValue Chain; |
13859 | 403 | if (ResultVT.bitsGT(VecEltVT)403 ) { |
13860 | 0 | // If the result type of vextract is wider than the load, then issue an |
13861 | 0 | // extending load instead. |
13862 | 0 | ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, |
13863 | 0 | VecEltVT) |
13864 | 0 | ? ISD::ZEXTLOAD |
13865 | 0 | : ISD::EXTLOAD; |
13866 | 0 | Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, |
13867 | 0 | OriginalLoad->getChain(), NewPtr, MPI, VecEltVT, |
13868 | 0 | Align, OriginalLoad->getMemOperand()->getFlags(), |
13869 | 0 | OriginalLoad->getAAInfo()); |
13870 | 0 | Chain = Load.getValue(1); |
13871 | 403 | } else { |
13872 | 403 | Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, |
13873 | 403 | MPI, Align, OriginalLoad->getMemOperand()->getFlags(), |
13874 | 403 | OriginalLoad->getAAInfo()); |
13875 | 403 | Chain = Load.getValue(1); |
13876 | 403 | if (ResultVT.bitsLT(VecEltVT)) |
13877 | 0 | Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); |
13878 | 403 | else |
13879 | 403 | Load = DAG.getBitcast(ResultVT, Load); |
13880 | 403 | } |
13881 | 426 | WorklistRemover DeadNodes(*this); |
13882 | 426 | SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; |
13883 | 426 | SDValue To[] = { Load, Chain }; |
13884 | 426 | DAG.ReplaceAllUsesOfValuesWith(From, To, 2); |
13885 | 426 | // Since we're explicitly calling ReplaceAllUses, add the new node to the |
13886 | 426 | // worklist explicitly as well. |
13887 | 426 | AddToWorklist(Load.getNode()); |
13888 | 426 | AddUsersToWorklist(Load.getNode()); // Add users too |
13889 | 426 | // Make sure to revisit this node to clean it up; it will usually be dead. |
13890 | 426 | AddToWorklist(EVE); |
13891 | 426 | ++OpsNarrowed; |
13892 | 426 | return SDValue(EVE, 0); |
13893 | 426 | } |
13894 | | |
13895 | 349k | SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { |
13896 | 349k | // (vextract (scalar_to_vector val, 0) -> val |
13897 | 349k | SDValue InVec = N->getOperand(0); |
13898 | 349k | EVT VT = InVec.getValueType(); |
13899 | 349k | EVT NVT = N->getValueType(0); |
13900 | 349k | |
13901 | 349k | if (InVec.isUndef()) |
13902 | 66 | return DAG.getUNDEF(NVT); |
13903 | 349k | |
13904 | 349k | if (349k InVec.getOpcode() == ISD::SCALAR_TO_VECTOR349k ) { |
13905 | 335 | // Check if the result type doesn't match the inserted element type. A |
13906 | 335 | // SCALAR_TO_VECTOR may truncate the inserted element and the |
13907 | 335 | // EXTRACT_VECTOR_ELT may widen the extracted vector. |
13908 | 335 | SDValue InOp = InVec.getOperand(0); |
13909 | 335 | if (InOp.getValueType() != NVT335 ) { |
13910 | 2 | assert(InOp.getValueType().isInteger() && NVT.isInteger()); |
13911 | 2 | return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT); |
13912 | 2 | } |
13913 | 333 | return InOp; |
13914 | 333 | } |
13915 | 349k | |
13916 | 349k | SDValue EltNo = N->getOperand(1); |
13917 | 349k | ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); |
13918 | 349k | |
13919 | 349k | // extract_vector_elt (build_vector x, y), 1 -> y |
13920 | 349k | if (ConstEltNo && |
13921 | 347k | InVec.getOpcode() == ISD::BUILD_VECTOR && |
13922 | 31.0k | TLI.isTypeLegal(VT) && |
13923 | 30.7k | (InVec.hasOneUse() || |
13924 | 349k | TLI.aggressivelyPreferBuildVectorSources(VT)24.4k )) { |
13925 | 29.8k | SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue()); |
13926 | 29.8k | EVT InEltVT = Elt.getValueType(); |
13927 | 29.8k | |
13928 | 29.8k | // Sometimes build_vector's scalar input types do not match result type. |
13929 | 29.8k | if (NVT == InEltVT) |
13930 | 29.8k | return Elt; |
13931 | 319k | |
13932 | 319k | // TODO: It may be useful to truncate if free if the build_vector implicitly |
13933 | 319k | // converts. |
13934 | 319k | } |
13935 | 319k | |
13936 | 319k | // extract_vector_elt (v2i32 (bitcast i64:x)), EltTrunc -> i32 (trunc i64:x) |
13937 | 319k | bool isLE = DAG.getDataLayout().isLittleEndian(); |
13938 | 319k | unsigned EltTrunc = isLE ? 0311k : VT.getVectorNumElements() - 17.96k ; |
13939 | 319k | if (ConstEltNo && 319k InVec.getOpcode() == ISD::BITCAST317k && InVec.hasOneUse()62.2k && |
13940 | 319k | ConstEltNo->getZExtValue() == EltTrunc15.5k && VT.isInteger()8.05k ) { |
13941 | 6.00k | SDValue BCSrc = InVec.getOperand(0); |
13942 | 6.00k | if (BCSrc.getValueType().isScalarInteger()) |
13943 | 727 | return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc); |
13944 | 318k | } |
13945 | 318k | |
13946 | 318k | // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val |
13947 | 318k | // |
13948 | 318k | // This only really matters if the index is non-constant since other combines |
13949 | 318k | // on the constant elements already work. |
13950 | 318k | if (318k InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && |
13951 | 318k | EltNo == InVec.getOperand(2)1.25k ) { |
13952 | 74 | SDValue Elt = InVec.getOperand(1); |
13953 | 74 | return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT)72 : Elt2 ; |
13954 | 74 | } |
13955 | 318k | |
13956 | 318k | // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. |
13957 | 318k | // We only perform this optimization before the op legalization phase because |
13958 | 318k | // we may introduce new vector instructions which are not backed by TD |
13959 | 318k | // patterns. For example on AVX, extracting elements from a wide vector |
13960 | 318k | // without using extract_subvector. However, if we can find an underlying |
13961 | 318k | // scalar value, then we can always use that. |
13962 | 318k | if (318k ConstEltNo && 318k InVec.getOpcode() == ISD::VECTOR_SHUFFLE316k ) { |
13963 | 1.46k | int NumElem = VT.getVectorNumElements(); |
13964 | 1.46k | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec); |
13965 | 1.46k | // Find the new index to extract from. |
13966 | 1.46k | int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue()); |
13967 | 1.46k | |
13968 | 1.46k | // Extracting an undef index is undef. |
13969 | 1.46k | if (OrigElt == -1) |
13970 | 49 | return DAG.getUNDEF(NVT); |
13971 | 1.41k | |
13972 | 1.41k | // Select the right vector half to extract from. |
13973 | 1.41k | SDValue SVInVec; |
13974 | 1.41k | if (OrigElt < NumElem1.41k ) { |
13975 | 776 | SVInVec = InVec->getOperand(0); |
13976 | 1.41k | } else { |
13977 | 636 | SVInVec = InVec->getOperand(1); |
13978 | 636 | OrigElt -= NumElem; |
13979 | 636 | } |
13980 | 1.41k | |
13981 | 1.41k | if (SVInVec.getOpcode() == ISD::BUILD_VECTOR1.41k ) { |
13982 | 131 | SDValue InOp = SVInVec.getOperand(OrigElt); |
13983 | 131 | if (InOp.getValueType() != NVT131 ) { |
13984 | 0 | assert(InOp.getValueType().isInteger() && NVT.isInteger()); |
13985 | 0 | InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT); |
13986 | 0 | } |
13987 | 131 | |
13988 | 131 | return InOp; |
13989 | 131 | } |
13990 | 1.28k | |
13991 | 1.28k | // FIXME: We should handle recursing on other vector shuffles and |
13992 | 1.28k | // scalar_to_vector here as well. |
13993 | 1.28k | |
13994 | 1.28k | if (1.28k !LegalOperations || |
13995 | 1.28k | // FIXME: Should really be just isOperationLegalOrCustom. |
13996 | 992 | TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VT) || |
13997 | 1.28k | TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VT)992 ) { |
13998 | 301 | EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); |
13999 | 301 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec, |
14000 | 301 | DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy)); |
14001 | 301 | } |
14002 | 318k | } |
14003 | 318k | |
14004 | 318k | bool BCNumEltsChanged = false; |
14005 | 318k | EVT ExtVT = VT.getVectorElementType(); |
14006 | 318k | EVT LVT = ExtVT; |
14007 | 318k | |
14008 | 318k | // If the result of load has to be truncated, then it's not necessarily |
14009 | 318k | // profitable. |
14010 | 318k | if (NVT.bitsLT(LVT) && 318k !TLI.isTruncateFree(LVT, NVT)0 ) |
14011 | 0 | return SDValue(); |
14012 | 318k | |
14013 | 318k | if (318k InVec.getOpcode() == ISD::BITCAST318k ) { |
14014 | 61.5k | // Don't duplicate a load with other uses. |
14015 | 61.5k | if (!InVec.hasOneUse()) |
14016 | 46.7k | return SDValue(); |
14017 | 14.8k | |
14018 | 14.8k | EVT BCVT = InVec.getOperand(0).getValueType(); |
14019 | 14.8k | if (!BCVT.isVector() || 14.8k ExtVT.bitsGT(BCVT.getVectorElementType())9.27k ) |
14020 | 10.3k | return SDValue(); |
14021 | 4.46k | if (4.46k VT.getVectorNumElements() != BCVT.getVectorNumElements()4.46k ) |
14022 | 3.61k | BCNumEltsChanged = true; |
14023 | 61.5k | InVec = InVec.getOperand(0); |
14024 | 61.5k | ExtVT = BCVT.getVectorElementType(); |
14025 | 61.5k | } |
14026 | 318k | |
14027 | 318k | // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size) |
14028 | 261k | if (261k !LegalOperations && 261k !ConstEltNo88.9k && InVec.hasOneUse()1.48k && |
14029 | 679 | ISD::isNormalLoad(InVec.getNode()) && |
14030 | 261k | !N->getOperand(1)->hasPredecessor(InVec.getNode())41 ) { |
14031 | 41 | SDValue Index = N->getOperand(1); |
14032 | 41 | if (LoadSDNode *OrigLoad41 = dyn_cast<LoadSDNode>(InVec)) { |
14033 | 41 | if (!OrigLoad->isVolatile()41 ) { |
14034 | 23 | return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, |
14035 | 23 | OrigLoad); |
14036 | 23 | } |
14037 | 261k | } |
14038 | 41 | } |
14039 | 261k | |
14040 | 261k | // Perform only after legalization to ensure build_vector / vector_shuffle |
14041 | 261k | // optimizations have already been done. |
14042 | 261k | if (261k !LegalOperations261k ) return SDValue()88.9k ; |
14043 | 172k | |
14044 | 172k | // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) |
14045 | 172k | // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) |
14046 | 172k | // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) |
14047 | 172k | |
14048 | 172k | if (172k ConstEltNo172k ) { |
14049 | 171k | int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); |
14050 | 171k | |
14051 | 171k | LoadSDNode *LN0 = nullptr; |
14052 | 171k | const ShuffleVectorSDNode *SVN = nullptr; |
14053 | 171k | if (ISD::isNormalLoad(InVec.getNode())171k ) { |
14054 | 56.8k | LN0 = cast<LoadSDNode>(InVec); |
14055 | 171k | } else if (115k InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && |
14056 | 16 | InVec.getOperand(0).getValueType() == ExtVT && |
14057 | 115k | ISD::isNormalLoad(InVec.getOperand(0).getNode())16 ) { |
14058 | 6 | // Don't duplicate a load with other uses. |
14059 | 6 | if (!InVec.hasOneUse()) |
14060 | 5 | return SDValue(); |
14061 | 1 | |
14062 | 1 | LN0 = cast<LoadSDNode>(InVec.getOperand(0)); |
14063 | 115k | } else if (115k (SVN = dyn_cast<ShuffleVectorSDNode>(InVec))115k ) { |
14064 | 982 | // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) |
14065 | 982 | // => |
14066 | 982 | // (load $addr+1*size) |
14067 | 982 | |
14068 | 982 | // Don't duplicate a load with other uses. |
14069 | 982 | if (!InVec.hasOneUse()) |
14070 | 933 | return SDValue(); |
14071 | 49 | |
14072 | 49 | // If the bit convert changed the number of elements, it is unsafe |
14073 | 49 | // to examine the mask. |
14074 | 49 | if (49 BCNumEltsChanged49 ) |
14075 | 1 | return SDValue(); |
14076 | 48 | |
14077 | 48 | // Select the input vector, guarding against out of range extract vector. |
14078 | 48 | unsigned NumElems = VT.getVectorNumElements(); |
14079 | 48 | int Idx = (Elt > (int)NumElems) ? -10 : SVN->getMaskElt(Elt)48 ; |
14080 | 48 | InVec = (Idx < (int)NumElems) ? InVec.getOperand(0)43 : InVec.getOperand(1)5 ; |
14081 | 48 | |
14082 | 48 | if (InVec.getOpcode() == ISD::BITCAST48 ) { |
14083 | 38 | // Don't duplicate a load with other uses. |
14084 | 38 | if (!InVec.hasOneUse()) |
14085 | 1 | return SDValue(); |
14086 | 37 | |
14087 | 37 | InVec = InVec.getOperand(0); |
14088 | 37 | } |
14089 | 47 | if (47 ISD::isNormalLoad(InVec.getNode())47 ) { |
14090 | 11 | LN0 = cast<LoadSDNode>(InVec); |
14091 | 11 | Elt = (Idx < (int)NumElems) ? Idx8 : Idx - (int)NumElems3 ; |
14092 | 11 | EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType()); |
14093 | 11 | } |
14094 | 115k | } |
14095 | 171k | |
14096 | 171k | // Make sure we found a non-volatile load and the extractelement is |
14097 | 171k | // the only use. |
14098 | 171k | if (171k !LN0 || 171k !LN0->hasNUsesOfValue(1,0)56.8k || LN0->isVolatile()411 ) |
14099 | 170k | return SDValue(); |
14100 | 403 | |
14101 | 403 | // If Idx was -1 above, Elt is going to be -1, so just return undef. |
14102 | 403 | if (403 Elt == -1403 ) |
14103 | 0 | return DAG.getUNDEF(LVT); |
14104 | 403 | |
14105 | 403 | return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0); |
14106 | 403 | } |
14107 | 188 | |
14108 | 188 | return SDValue(); |
14109 | 188 | } |
14110 | | |
14111 | | // Simplify (build_vec (ext )) to (bitcast (build_vec )) |
14112 | 366k | SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { |
14113 | 366k | // We perform this optimization post type-legalization because |
14114 | 366k | // the type-legalizer often scalarizes integer-promoted vectors. |
14115 | 366k | // Performing this optimization before may create bit-casts which |
14116 | 366k | // will be type-legalized to complex code sequences. |
14117 | 366k | // We perform this optimization only before the operation legalizer because we |
14118 | 366k | // may introduce illegal operations. |
14119 | 366k | if (Level != AfterLegalizeVectorOps && 366k Level != AfterLegalizeTypes313k ) |
14120 | 226k | return SDValue(); |
14121 | 140k | |
14122 | 140k | unsigned NumInScalars = N->getNumOperands(); |
14123 | 140k | SDLoc DL(N); |
14124 | 140k | EVT VT = N->getValueType(0); |
14125 | 140k | |
14126 | 140k | // Check to see if this is a BUILD_VECTOR of a bunch of values |
14127 | 140k | // which come from any_extend or zero_extend nodes. If so, we can create |
14128 | 140k | // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR |
14129 | 140k | // optimizations. We do not handle sign-extend because we can't fill the sign |
14130 | 140k | // using shuffles. |
14131 | 140k | EVT SourceType = MVT::Other; |
14132 | 140k | bool AllAnyExt = true; |
14133 | 140k | |
14134 | 146k | for (unsigned i = 0; i != NumInScalars146k ; ++i6.57k ) { |
14135 | 144k | SDValue In = N->getOperand(i); |
14136 | 144k | // Ignore undef inputs. |
14137 | 144k | if (In.isUndef()144k ) continue543 ; |
14138 | 144k | |
14139 | 144k | bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; |
14140 | 144k | bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; |
14141 | 144k | |
14142 | 144k | // Abort if the element is not an extension. |
14143 | 144k | if (!ZeroExt && 144k !AnyExt140k ) { |
14144 | 137k | SourceType = MVT::Other; |
14145 | 137k | break; |
14146 | 137k | } |
14147 | 6.19k | |
14148 | 6.19k | // The input is a ZeroExt or AnyExt. Check the original type. |
14149 | 6.19k | EVT InTy = In.getOperand(0).getValueType(); |
14150 | 6.19k | |
14151 | 6.19k | // Check that all of the widened source types are the same. |
14152 | 6.19k | if (SourceType == MVT::Other) |
14153 | 6.19k | // First time. |
14154 | 2.77k | SourceType = InTy; |
14155 | 3.42k | else if (3.42k InTy != SourceType3.42k ) { |
14156 | 168 | // Multiple income types. Abort. |
14157 | 168 | SourceType = MVT::Other; |
14158 | 168 | break; |
14159 | 168 | } |
14160 | 6.02k | |
14161 | 6.02k | // Check if all of the extends are ANY_EXTENDs. |
14162 | 6.02k | AllAnyExt &= AnyExt; |
14163 | 6.02k | } |
14164 | 140k | |
14165 | 140k | // In order to have valid types, all of the inputs must be extended from the |
14166 | 140k | // same source type and all of the inputs must be any or zero extend. |
14167 | 140k | // Scalar sizes must be a power of two. |
14168 | 140k | EVT OutScalarTy = VT.getScalarType(); |
14169 | 140k | bool ValidTypes = SourceType != MVT::Other && |
14170 | 1.94k | isPowerOf2_32(OutScalarTy.getSizeInBits()) && |
14171 | 1.94k | isPowerOf2_32(SourceType.getSizeInBits()); |
14172 | 140k | |
14173 | 140k | // Create a new simpler BUILD_VECTOR sequence which other optimizations can |
14174 | 140k | // turn into a single shuffle instruction. |
14175 | 140k | if (!ValidTypes) |
14176 | 138k | return SDValue(); |
14177 | 1.94k | |
14178 | 1.94k | bool isLE = DAG.getDataLayout().isLittleEndian(); |
14179 | 1.94k | unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); |
14180 | 1.94k | assert(ElemRatio > 1 && "Invalid element size ratio"); |
14181 | 722 | SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): |
14182 | 1.22k | DAG.getConstant(0, DL, SourceType); |
14183 | 1.94k | |
14184 | 1.94k | unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); |
14185 | 1.94k | SmallVector<SDValue, 8> Ops(NewBVElems, Filler); |
14186 | 1.94k | |
14187 | 1.94k | // Populate the new build_vector |
14188 | 7.17k | for (unsigned i = 0, e = N->getNumOperands(); i != e7.17k ; ++i5.23k ) { |
14189 | 5.23k | SDValue Cast = N->getOperand(i); |
14190 | 5.23k | assert((Cast.getOpcode() == ISD::ANY_EXTEND || |
14191 | 5.23k | Cast.getOpcode() == ISD::ZERO_EXTEND || |
14192 | 5.23k | Cast.isUndef()) && "Invalid cast opcode"); |
14193 | 5.23k | SDValue In; |
14194 | 5.23k | if (Cast.isUndef()) |
14195 | 107 | In = DAG.getUNDEF(SourceType); |
14196 | 5.23k | else |
14197 | 5.12k | In = Cast->getOperand(0); |
14198 | 5.21k | unsigned Index = isLE ? (i * ElemRatio) : |
14199 | 20 | (i * ElemRatio + (ElemRatio - 1)); |
14200 | 5.23k | |
14201 | 5.23k | assert(Index < Ops.size() && "Invalid index"); |
14202 | 5.23k | Ops[Index] = In; |
14203 | 5.23k | } |
14204 | 1.94k | |
14205 | 1.94k | // The type of the new BUILD_VECTOR node. |
14206 | 1.94k | EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); |
14207 | 1.94k | assert(VecVT.getSizeInBits() == VT.getSizeInBits() && |
14208 | 1.94k | "Invalid vector size"); |
14209 | 1.94k | // Check if the new vector type is legal. |
14210 | 1.94k | if (!isTypeLegal(VecVT)1.94k ) return SDValue()968 ; |
14211 | 979 | |
14212 | 979 | // Make the new BUILD_VECTOR. |
14213 | 979 | SDValue BV = DAG.getBuildVector(VecVT, DL, Ops); |
14214 | 979 | |
14215 | 979 | // The new BUILD_VECTOR node has the potential to be further optimized. |
14216 | 979 | AddToWorklist(BV.getNode()); |
14217 | 979 | // Bitcast to the desired type. |
14218 | 979 | return DAG.getBitcast(VT, BV); |
14219 | 979 | } |
14220 | | |
14221 | 365k | SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { |
14222 | 365k | EVT VT = N->getValueType(0); |
14223 | 365k | |
14224 | 365k | unsigned NumInScalars = N->getNumOperands(); |
14225 | 365k | SDLoc DL(N); |
14226 | 365k | |
14227 | 365k | EVT SrcVT = MVT::Other; |
14228 | 365k | unsigned Opcode = ISD::DELETED_NODE; |
14229 | 365k | unsigned NumDefs = 0; |
14230 | 365k | |
14231 | 371k | for (unsigned i = 0; i != NumInScalars371k ; ++i6.20k ) { |
14232 | 370k | SDValue In = N->getOperand(i); |
14233 | 370k | unsigned Opc = In.getOpcode(); |
14234 | 370k | |
14235 | 370k | if (Opc == ISD::UNDEF) |
14236 | 3.55k | continue; |
14237 | 367k | |
14238 | 367k | // If all scalar values are floats and converted from integers. |
14239 | 367k | if (367k Opcode == ISD::DELETED_NODE && |
14240 | 367k | (Opc == ISD::UINT_TO_FP || 365k Opc == ISD::SINT_TO_FP364k )) { |
14241 | 952 | Opcode = Opc; |
14242 | 952 | } |
14243 | 367k | |
14244 | 367k | if (Opc != Opcode) |
14245 | 364k | return SDValue(); |
14246 | 2.65k | |
14247 | 2.65k | EVT InVT = In.getOperand(0).getValueType(); |
14248 | 2.65k | |
14249 | 2.65k | // If all scalar values are typed differently, bail out. It's chosen to |
14250 | 2.65k | // simplify BUILD_VECTOR of integer types. |
14251 | 2.65k | if (SrcVT == MVT::Other) |
14252 | 952 | SrcVT = InVT; |
14253 | 2.65k | if (SrcVT != InVT) |
14254 | 0 | return SDValue(); |
14255 | 2.65k | NumDefs++; |
14256 | 2.65k | } |
14257 | 365k | |
14258 | 365k | // If the vector has just one element defined, it's not worth to fold it into |
14259 | 365k | // a vectorized one. |
14260 | 905 | if (905 NumDefs < 2905 ) |
14261 | 115 | return SDValue(); |
14262 | 790 | |
14263 | 905 | assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP) |
14264 | 790 | && "Should only handle conversion from integer to float."); |
14265 | 790 | assert(SrcVT != MVT::Other && "Cannot determine source type!"); |
14266 | 790 | |
14267 | 790 | EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars); |
14268 | 790 | |
14269 | 790 | if (!TLI.isOperationLegalOrCustom(Opcode, NVT)) |
14270 | 574 | return SDValue(); |
14271 | 216 | |
14272 | 216 | // Just because the floating-point vector type is legal does not necessarily |
14273 | 216 | // mean that the corresponding integer vector type is. |
14274 | 216 | if (216 !isTypeLegal(NVT)216 ) |
14275 | 0 | return SDValue(); |
14276 | 216 | |
14277 | 216 | SmallVector<SDValue, 8> Opnds; |
14278 | 696 | for (unsigned i = 0; i != NumInScalars696 ; ++i480 ) { |
14279 | 480 | SDValue In = N->getOperand(i); |
14280 | 480 | |
14281 | 480 | if (In.isUndef()) |
14282 | 12 | Opnds.push_back(DAG.getUNDEF(SrcVT)); |
14283 | 480 | else |
14284 | 468 | Opnds.push_back(In.getOperand(0)); |
14285 | 480 | } |
14286 | 365k | SDValue BV = DAG.getBuildVector(NVT, DL, Opnds); |
14287 | 365k | AddToWorklist(BV.getNode()); |
14288 | 365k | |
14289 | 365k | return DAG.getNode(Opcode, DL, VT, BV); |
14290 | 365k | } |
14291 | | |
14292 | | SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, |
14293 | | ArrayRef<int> VectorMask, |
14294 | | SDValue VecIn1, SDValue VecIn2, |
14295 | 5.91k | unsigned LeftIdx) { |
14296 | 5.91k | MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); |
14297 | 5.91k | SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy); |
14298 | 5.91k | |
14299 | 5.91k | EVT VT = N->getValueType(0); |
14300 | 5.91k | EVT InVT1 = VecIn1.getValueType(); |
14301 | 5.91k | EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType()1.32k : InVT14.59k ; |
14302 | 5.91k | |
14303 | 5.91k | unsigned Vec2Offset = 0; |
14304 | 5.91k | unsigned NumElems = VT.getVectorNumElements(); |
14305 | 5.91k | unsigned ShuffleNumElems = NumElems; |
14306 | 5.91k | |
14307 | 5.91k | // In case both the input vectors are extracted from same base |
14308 | 5.91k | // vector we do not need extra addend (Vec2Offset) while |
14309 | 5.91k | // computing shuffle mask. |
14310 | 5.91k | if (!VecIn2 || 5.91k !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR)1.32k || |
14311 | 135 | !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) || |
14312 | 132 | !(VecIn1.getOperand(0) == VecIn2.getOperand(0))) |
14313 | 5.78k | Vec2Offset = InVT1.getVectorNumElements(); |
14314 | 5.91k | |
14315 | 5.91k | // We can't generate a shuffle node with mismatched input and output types. |
14316 | 5.91k | // Try to make the types match the type of the output. |
14317 | 5.91k | if (InVT1 != VT || 5.91k InVT2 != VT3.77k ) { |
14318 | 2.15k | if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && 2.15k InVT1 == InVT2996 ) { |
14319 | 985 | // If the output vector length is a multiple of both input lengths, |
14320 | 985 | // we can concatenate them and pad the rest with undefs. |
14321 | 985 | unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits(); |
14322 | 985 | assert(NumConcats >= 2 && "Concat needs at least two inputs!"); |
14323 | 985 | SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1)); |
14324 | 985 | ConcatOps[0] = VecIn1; |
14325 | 985 | ConcatOps[1] = VecIn2 ? VecIn2105 : DAG.getUNDEF(InVT1)880 ; |
14326 | 985 | VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); |
14327 | 985 | VecIn2 = SDValue(); |
14328 | 2.15k | } else if (1.17k InVT1.getSizeInBits() == VT.getSizeInBits() * 21.17k ) { |
14329 | 824 | if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems)) |
14330 | 317 | return SDValue(); |
14331 | 507 | |
14332 | 507 | if (507 !VecIn2.getNode()507 ) { |
14333 | 367 | // If we only have one input vector, and it's twice the size of the |
14334 | 367 | // output, split it in two. |
14335 | 367 | VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, |
14336 | 367 | DAG.getConstant(NumElems, DL, IdxTy)); |
14337 | 367 | VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx); |
14338 | 367 | // Since we now have shorter input vectors, adjust the offset of the |
14339 | 367 | // second vector's start. |
14340 | 367 | Vec2Offset = NumElems; |
14341 | 507 | } else if (140 InVT2.getSizeInBits() <= InVT1.getSizeInBits()140 ) { |
14342 | 138 | // VecIn1 is wider than the output, and we have another, possibly |
14343 | 138 | // smaller input. Pad the smaller input with undefs, shuffle at the |
14344 | 138 | // input vector width, and extract the output. |
14345 | 138 | // The shuffle type is different than VT, so check legality again. |
14346 | 138 | if (LegalOperations && |
14347 | 0 | !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1)) |
14348 | 0 | return SDValue(); |
14349 | 138 | |
14350 | 138 | // Legalizing INSERT_SUBVECTOR is tricky - you basically have to |
14351 | 138 | // lower it back into a BUILD_VECTOR. So if the inserted type is |
14352 | 138 | // illegal, don't even try. |
14353 | 138 | if (138 InVT1 != InVT2138 ) { |
14354 | 9 | if (!TLI.isTypeLegal(InVT2)) |
14355 | 1 | return SDValue(); |
14356 | 8 | VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1, |
14357 | 8 | DAG.getUNDEF(InVT1), VecIn2, ZeroIdx); |
14358 | 8 | } |
14359 | 137 | ShuffleNumElems = NumElems * 2; |
14360 | 140 | } else { |
14361 | 2 | // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider |
14362 | 2 | // than VecIn1. We can't handle this for now - this case will disappear |
14363 | 2 | // when we start sorting the vectors by type. |
14364 | 2 | return SDValue(); |
14365 | 2 | } |
14366 | 346 | } else if (346 InVT2.getSizeInBits() * 2 == VT.getSizeInBits() && |
14367 | 346 | InVT1.getSizeInBits() == VT.getSizeInBits()6 ) { |
14368 | 6 | SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2)); |
14369 | 6 | ConcatOps[0] = VecIn2; |
14370 | 6 | VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); |
14371 | 346 | } else { |
14372 | 340 | // TODO: Support cases where the length mismatch isn't exactly by a |
14373 | 340 | // factor of 2. |
14374 | 340 | // TODO: Move this check upwards, so that if we have bad type |
14375 | 340 | // mismatches, we don't create any DAG nodes. |
14376 | 340 | return SDValue(); |
14377 | 340 | } |
14378 | 5.25k | } |
14379 | 5.25k | |
14380 | 5.25k | // Initialize mask to undef. |
14381 | 5.25k | SmallVector<int, 8> Mask(ShuffleNumElems, -1); |
14382 | 5.25k | |
14383 | 5.25k | // Only need to run up to the number of elements actually used, not the |
14384 | 5.25k | // total number of elements in the shuffle - if we are shuffling a wider |
14385 | 5.25k | // vector, the high lanes should be set to undef. |
14386 | 56.1k | for (unsigned i = 0; i != NumElems56.1k ; ++i50.9k ) { |
14387 | 50.9k | if (VectorMask[i] <= 0) |
14388 | 26.8k | continue; |
14389 | 24.0k | |
14390 | 24.0k | unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1); |
14391 | 24.0k | if (VectorMask[i] == (int)LeftIdx24.0k ) { |
14392 | 18.1k | Mask[i] = ExtIndex; |
14393 | 24.0k | } else if (5.95k VectorMask[i] == (int)LeftIdx + 15.95k ) { |
14394 | 3.88k | Mask[i] = Vec2Offset + ExtIndex; |
14395 | 3.88k | } |
14396 | 50.9k | } |
14397 | 5.25k | |
14398 | 5.25k | // The type the input vectors may have changed above. |
14399 | 5.25k | InVT1 = VecIn1.getValueType(); |
14400 | 5.25k | |
14401 | 5.25k | // If we already have a VecIn2, it should have the same type as VecIn1. |
14402 | 5.25k | // If we don't, get an undef/zero vector of the appropriate type. |
14403 | 5.25k | VecIn2 = VecIn2.getNode() ? VecIn21.49k : DAG.getUNDEF(InVT1)3.75k ; |
14404 | 5.25k | assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type."); |
14405 | 5.25k | |
14406 | 5.25k | SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask); |
14407 | 5.25k | if (ShuffleNumElems > NumElems) |
14408 | 137 | Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx); |
14409 | 5.91k | |
14410 | 5.91k | return Shuffle; |
14411 | 5.91k | } |
14412 | | |
14413 | | // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT |
14414 | | // operations. If the types of the vectors we're extracting from allow it, |
14415 | | // turn this into a vector_shuffle node. |
14416 | 364k | SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { |
14417 | 364k | SDLoc DL(N); |
14418 | 364k | EVT VT = N->getValueType(0); |
14419 | 364k | |
14420 | 364k | // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. |
14421 | 364k | if (!isTypeLegal(VT)) |
14422 | 0 | return SDValue(); |
14423 | 364k | |
14424 | 364k | // May only combine to shuffle after legalize if shuffle is legal. |
14425 | 364k | if (364k LegalOperations && 364k !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT)140k ) |
14426 | 140k | return SDValue(); |
14427 | 224k | |
14428 | 224k | bool UsesZeroVector = false; |
14429 | 224k | unsigned NumElems = N->getNumOperands(); |
14430 | 224k | |
14431 | 224k | // Record, for each element of the newly built vector, which input vector |
14432 | 224k | // that element comes from. -1 stands for undef, 0 for the zero vector, |
14433 | 224k | // and positive values for the input vectors. |
14434 | 224k | // VectorMask maps each element to its vector number, and VecIn maps vector |
14435 | 224k | // numbers to their initial SDValues. |
14436 | 224k | |
14437 | 224k | SmallVector<int, 8> VectorMask(NumElems, -1); |
14438 | 224k | SmallVector<SDValue, 8> VecIn; |
14439 | 224k | VecIn.push_back(SDValue()); |
14440 | 224k | |
14441 | 464k | for (unsigned i = 0; i != NumElems464k ; ++i239k ) { |
14442 | 432k | SDValue Op = N->getOperand(i); |
14443 | 432k | |
14444 | 432k | if (Op.isUndef()) |
14445 | 26.3k | continue; |
14446 | 405k | |
14447 | 405k | // See if we can use a blend with a zero vector. |
14448 | 405k | // TODO: Should we generalize this to a blend with an arbitrary constant |
14449 | 405k | // vector? |
14450 | 405k | if (405k isNullConstant(Op) || 405k isNullFPConstant(Op)235k ) { |
14451 | 187k | UsesZeroVector = true; |
14452 | 187k | VectorMask[i] = 0; |
14453 | 187k | continue; |
14454 | 187k | } |
14455 | 218k | |
14456 | 218k | // Not an undef or zero. If the input is something other than an |
14457 | 218k | // EXTRACT_VECTOR_ELT with a constant index, bail out. |
14458 | 218k | if (218k Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
14459 | 41.9k | !isa<ConstantSDNode>(Op.getOperand(1))) |
14460 | 176k | return SDValue(); |
14461 | 41.8k | SDValue ExtractedFromVec = Op.getOperand(0); |
14462 | 41.8k | |
14463 | 41.8k | // All inputs must have the same element type as the output. |
14464 | 41.8k | if (VT.getVectorElementType() != |
14465 | 41.8k | ExtractedFromVec.getValueType().getVectorElementType()) |
14466 | 16.4k | return SDValue(); |
14467 | 25.3k | |
14468 | 25.3k | // Have we seen this input vector before? |
14469 | 25.3k | // The vectors are expected to be tiny (usually 1 or 2 elements), so using |
14470 | 25.3k | // a map back from SDValues to numbers isn't worth it. |
14471 | 25.3k | unsigned Idx = std::distance( |
14472 | 25.3k | VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec)); |
14473 | 25.3k | if (Idx == VecIn.size()) |
14474 | 7.37k | VecIn.push_back(ExtractedFromVec); |
14475 | 432k | |
14476 | 432k | VectorMask[i] = Idx; |
14477 | 432k | } |
14478 | 224k | |
14479 | 224k | // If we didn't find at least one input vector, bail out. |
14480 | 31.9k | if (31.9k VecIn.size() < 231.9k ) |
14481 | 26.1k | return SDValue(); |
14482 | 5.77k | |
14483 | 5.77k | // If all the Operands of BUILD_VECTOR extract from same |
14484 | 5.77k | // vector, then split the vector efficiently based on the maximum |
14485 | 5.77k | // vector access index and adjust the VectorMask and |
14486 | 5.77k | // VecIn accordingly. |
14487 | 5.77k | if (5.77k VecIn.size() == 25.77k ) { |
14488 | 4.62k | unsigned MaxIndex = 0; |
14489 | 4.62k | unsigned NearestPow2 = 0; |
14490 | 4.62k | SDValue Vec = VecIn.back(); |
14491 | 4.62k | EVT InVT = Vec.getValueType(); |
14492 | 4.62k | MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); |
14493 | 4.62k | SmallVector<unsigned, 8> IndexVec(NumElems, 0); |
14494 | 4.62k | |
14495 | 48.3k | for (unsigned i = 0; i < NumElems48.3k ; i++43.7k ) { |
14496 | 43.7k | if (VectorMask[i] <= 0) |
14497 | 26.5k | continue; |
14498 | 17.1k | unsigned Index = N->getOperand(i).getConstantOperandVal(1); |
14499 | 17.1k | IndexVec[i] = Index; |
14500 | 17.1k | MaxIndex = std::max(MaxIndex, Index); |
14501 | 17.1k | } |
14502 | 4.62k | |
14503 | 4.62k | NearestPow2 = PowerOf2Ceil(MaxIndex); |
14504 | 4.62k | if (InVT.isSimple() && 4.62k NearestPow2 > 24.41k && MaxIndex < NearestPow22.55k && |
14505 | 4.62k | NumElems * 2 < NearestPow22.34k ) { |
14506 | 180 | unsigned SplitSize = NearestPow2 / 2; |
14507 | 180 | EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), |
14508 | 180 | InVT.getVectorElementType(), SplitSize); |
14509 | 180 | if (TLI.isTypeLegal(SplitVT)180 ) { |
14510 | 132 | SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec, |
14511 | 132 | DAG.getConstant(SplitSize, DL, IdxTy)); |
14512 | 132 | SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec, |
14513 | 132 | DAG.getConstant(0, DL, IdxTy)); |
14514 | 132 | VecIn.pop_back(); |
14515 | 132 | VecIn.push_back(VecIn1); |
14516 | 132 | VecIn.push_back(VecIn2); |
14517 | 132 | |
14518 | 1.08k | for (unsigned i = 0; i < NumElems1.08k ; i++948 ) { |
14519 | 948 | if (VectorMask[i] <= 0) |
14520 | 16 | continue; |
14521 | 932 | VectorMask[i] = (IndexVec[i] < SplitSize) ? 932 1466 : 2466 ; |
14522 | 948 | } |
14523 | 132 | } |
14524 | 180 | } |
14525 | 4.62k | } |
14526 | 5.77k | |
14527 | 5.77k | // TODO: We want to sort the vectors by descending length, so that adjacent |
14528 | 5.77k | // pairs have similar length, and the longer vector is always first in the |
14529 | 5.77k | // pair. |
14530 | 5.77k | |
14531 | 5.77k | // TODO: Should this fire if some of the input vectors has illegal type (like |
14532 | 5.77k | // it does now), or should we let legalization run its course first? |
14533 | 5.77k | |
14534 | 5.77k | // Shuffle phase: |
14535 | 5.77k | // Take pairs of vectors, and shuffle them so that the result has elements |
14536 | 5.77k | // from these vectors in the correct places. |
14537 | 5.77k | // For example, given: |
14538 | 5.77k | // t10: i32 = extract_vector_elt t1, Constant:i64<0> |
14539 | 5.77k | // t11: i32 = extract_vector_elt t2, Constant:i64<0> |
14540 | 5.77k | // t12: i32 = extract_vector_elt t3, Constant:i64<0> |
14541 | 5.77k | // t13: i32 = extract_vector_elt t1, Constant:i64<1> |
14542 | 5.77k | // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13 |
14543 | 5.77k | // We will generate: |
14544 | 5.77k | // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2 |
14545 | 5.77k | // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef |
14546 | 5.77k | SmallVector<SDValue, 4> Shuffles; |
14547 | 11.0k | for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len11.0k ; ++In5.25k ) { |
14548 | 5.91k | unsigned LeftIdx = 2 * In + 1; |
14549 | 5.91k | SDValue VecLeft = VecIn[LeftIdx]; |
14550 | 5.91k | SDValue VecRight = |
14551 | 5.91k | (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1]1.32k : SDValue()4.59k ; |
14552 | 5.91k | |
14553 | 5.91k | if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft, |
14554 | 5.91k | VecRight, LeftIdx)) |
14555 | 5.25k | Shuffles.push_back(Shuffle); |
14556 | 5.91k | else |
14557 | 660 | return SDValue(); |
14558 | 5.91k | } |
14559 | 5.77k | |
14560 | 5.77k | // If we need the zero vector as an "ingredient" in the blend tree, add it |
14561 | 5.77k | // to the list of shuffles. |
14562 | 5.11k | if (5.11k UsesZeroVector5.11k ) |
14563 | 592 | Shuffles.push_back(VT.isInteger() ? 592 DAG.getConstant(0, DL, VT)507 |
14564 | 592 | : DAG.getConstantFP(0.0, DL, VT)); |
14565 | 5.11k | |
14566 | 5.11k | // If we only have one shuffle, we're done. |
14567 | 5.11k | if (Shuffles.size() == 1) |
14568 | 4.38k | return Shuffles[0]; |
14569 | 729 | |
14570 | 729 | // Update the vector mask to point to the post-shuffle vectors. |
14571 | 729 | for (int &Vec : VectorMask) |
14572 | 10.4k | if (10.4k Vec == 010.4k ) |
14573 | 6.57k | Vec = Shuffles.size() - 1; |
14574 | 10.4k | else |
14575 | 3.88k | Vec = (Vec - 1) / 2; |
14576 | 729 | |
14577 | 729 | // More than one shuffle. Generate a binary tree of blends, e.g. if from |
14578 | 729 | // the previous step we got the set of shuffles t10, t11, t12, t13, we will |
14579 | 729 | // generate: |
14580 | 729 | // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2 |
14581 | 729 | // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4 |
14582 | 729 | // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6 |
14583 | 729 | // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8 |
14584 | 729 | // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11 |
14585 | 729 | // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13 |
14586 | 729 | // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21 |
14587 | 729 | |
14588 | 729 | // Make sure the initial size of the shuffle list is even. |
14589 | 729 | if (Shuffles.size() % 2) |
14590 | 2 | Shuffles.push_back(DAG.getUNDEF(VT)); |
14591 | 729 | |
14592 | 1.46k | for (unsigned CurSize = Shuffles.size(); CurSize > 11.46k ; CurSize /= 2731 ) { |
14593 | 731 | if (CurSize % 2731 ) { |
14594 | 0 | Shuffles[CurSize] = DAG.getUNDEF(VT); |
14595 | 0 | CurSize++; |
14596 | 0 | } |
14597 | 1.46k | for (unsigned In = 0, Len = CurSize / 2; In < Len1.46k ; ++In733 ) { |
14598 | 733 | int Left = 2 * In; |
14599 | 733 | int Right = 2 * In + 1; |
14600 | 733 | SmallVector<int, 8> Mask(NumElems, -1); |
14601 | 11.2k | for (unsigned i = 0; i != NumElems11.2k ; ++i10.5k ) { |
14602 | 10.5k | if (VectorMask[i] == Left10.5k ) { |
14603 | 3.06k | Mask[i] = i; |
14604 | 3.06k | VectorMask[i] = In; |
14605 | 10.5k | } else if (7.45k VectorMask[i] == Right7.45k ) { |
14606 | 7.33k | Mask[i] = i + NumElems; |
14607 | 7.33k | VectorMask[i] = In; |
14608 | 7.33k | } |
14609 | 10.5k | } |
14610 | 733 | |
14611 | 733 | Shuffles[In] = |
14612 | 733 | DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask); |
14613 | 733 | } |
14614 | 731 | } |
14615 | 364k | return Shuffles[0]; |
14616 | 364k | } |
14617 | | |
14618 | | // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT |
14619 | | // operations which can be matched to a truncate or to a shuffle-truncate. |
14620 | 69.4k | SDValue DAGCombiner::reduceBuildVecToTrunc(SDNode *N) { |
14621 | 69.4k | // TODO: Add support for big-endian. |
14622 | 69.4k | if (DAG.getDataLayout().isBigEndian()) |
14623 | 0 | return SDValue(); |
14624 | 69.4k | if (69.4k N->getNumOperands() < 269.4k ) |
14625 | 983 | return SDValue(); |
14626 | 68.4k | SDLoc DL(N); |
14627 | 68.4k | EVT VT = N->getValueType(0); |
14628 | 68.4k | unsigned NumElems = N->getNumOperands(); |
14629 | 68.4k | |
14630 | 68.4k | if (!isTypeLegal(VT)) |
14631 | 0 | return SDValue(); |
14632 | 68.4k | |
14633 | 68.4k | // If the input is something other than an EXTRACT_VECTOR_ELT with a constant |
14634 | 68.4k | // index, bail out. |
14635 | 68.4k | // TODO: Allow undef elements in some cases? |
14636 | 68.4k | if (68.4k llvm::any_of(N->ops(), [VT](SDValue Op) 68.4k { |
14637 | 99.6k | return Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
14638 | 33.4k | !isa<ConstantSDNode>(Op.getOperand(1)) || |
14639 | 33.4k | Op.getValueType() != VT.getVectorElementType(); |
14640 | 99.6k | })) |
14641 | 66.6k | return SDValue(); |
14642 | 1.81k | |
14643 | 1.81k | // Helper for obtaining an EXTRACT_VECTOR_ELT's constant index |
14644 | 1.81k | auto GetExtractIdx = [](SDValue Extract) 1.81k { |
14645 | 6.48k | return cast<ConstantSDNode>(Extract.getOperand(1))->getSExtValue(); |
14646 | 6.48k | }; |
14647 | 1.81k | |
14648 | 1.81k | // The offset is defined to be the BUILD_VECTOR's first operand (assuming no |
14649 | 1.81k | // undef and little-endian). |
14650 | 1.81k | int Offset = GetExtractIdx(N->getOperand(0)); |
14651 | 1.81k | |
14652 | 1.81k | // Compute the stride from the next operand. |
14653 | 1.81k | int Stride = GetExtractIdx(N->getOperand(1)) - Offset; |
14654 | 1.81k | SDValue ExtractedFromVec = N->getOperand(0).getOperand(0); |
14655 | 1.81k | |
14656 | 1.81k | // Proceed only if the stride and the types can be matched to a truncate. |
14657 | 1.81k | if ((Stride == 1 || 1.81k !isPowerOf2_32(Stride)1.35k ) || |
14658 | 937 | (ExtractedFromVec.getValueType().getVectorNumElements() != |
14659 | 937 | Stride * NumElems) || |
14660 | 504 | (VT.getScalarSizeInBits() * Stride > 64)) |
14661 | 1.32k | return SDValue(); |
14662 | 482 | |
14663 | 482 | // Check remaining operands are consistent with the computed stride. |
14664 | 3.33k | for (unsigned i = 1; 482 i != NumElems3.33k ; ++i2.85k ) { |
14665 | 2.87k | SDValue Op = N->getOperand(i); |
14666 | 2.87k | |
14667 | 2.87k | if ((Op.getOperand(0) != ExtractedFromVec) || |
14668 | 2.86k | (GetExtractIdx(Op) != Stride * i + Offset)) |
14669 | 18 | return SDValue(); |
14670 | 2.87k | } |
14671 | 482 | |
14672 | 464 | SDValue Res = ExtractedFromVec; |
14673 | 464 | EVT TruncVT = |
14674 | 464 | VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger()8 : VT456 ; |
14675 | 464 | if (Offset464 ) { |
14676 | 342 | // If the first index is non-zero, need to shuffle elements of interest to |
14677 | 342 | // lower parts of the vector's elements the truncate will act upon. |
14678 | 342 | // TODO: Generalize to compute the permute-shuffle that will prepare any |
14679 | 342 | // element permutation for the truncate, and let the target decide if |
14680 | 342 | // profitable. |
14681 | 342 | EVT ExtractedVT = ExtractedFromVec.getValueType(); |
14682 | 342 | SmallVector<int, 64> Mask; |
14683 | 2.65k | for (unsigned i = 0; i != NumElems2.65k ; ++i2.30k ) { |
14684 | 2.30k | Mask.push_back(Offset + i * Stride); |
14685 | 2.30k | // Pad the elements that will be lost after the truncate with undefs. |
14686 | 2.30k | Mask.append(Stride - 1, -1); |
14687 | 2.30k | } |
14688 | 342 | if (!TLI.isShuffleMaskLegal(Mask, ExtractedVT) || |
14689 | 290 | !TLI.isDesirableToCombineBuildVectorToShuffleTruncate(Mask, ExtractedVT, |
14690 | 290 | TruncVT)) |
14691 | 150 | return SDValue(); |
14692 | 192 | Res = DAG.getVectorShuffle(ExtractedVT, SDLoc(N), Res, |
14693 | 192 | DAG.getUNDEF(ExtractedVT), Mask); |
14694 | 192 | } |
14695 | 464 | // Construct the truncate. |
14696 | 314 | LLVMContext &Ctx = *DAG.getContext(); |
14697 | 314 | EVT NewVT = VT.getVectorVT( |
14698 | 314 | Ctx, EVT::getIntegerVT(Ctx, VT.getScalarSizeInBits() * Stride), NumElems); |
14699 | 314 | |
14700 | 314 | Res = DAG.getBitcast(NewVT, Res); |
14701 | 314 | Res = DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, Res); |
14702 | 314 | return DAG.getBitcast(VT, Res); |
14703 | 69.4k | } |
14704 | | |
14705 | 366k | SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { |
14706 | 366k | EVT VT = N->getValueType(0); |
14707 | 366k | |
14708 | 366k | // A vector built entirely of undefs is undef. |
14709 | 366k | if (ISD::allOperandsUndef(N)) |
14710 | 73 | return DAG.getUNDEF(VT); |
14711 | 366k | |
14712 | 366k | // Check if we can express BUILD VECTOR via subvector extract. |
14713 | 366k | if (366k !LegalTypes && 366k (N->getNumOperands() > 1)138k ) { |
14714 | 136k | SDValue Op0 = N->getOperand(0); |
14715 | 275k | auto checkElem = [&](SDValue Op) -> uint64_t { |
14716 | 275k | if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) && |
14717 | 6.53k | (Op0.getOperand(0) == Op.getOperand(0))) |
14718 | 6.44k | if (auto 6.44k CNode6.44k = dyn_cast<ConstantSDNode>(Op.getOperand(1))) |
14719 | 6.29k | return CNode->getZExtValue(); |
14720 | 269k | return -1; |
14721 | 269k | }; |
14722 | 136k | |
14723 | 136k | int Offset = checkElem(Op0); |
14724 | 139k | for (unsigned i = 0; i < N->getNumOperands()139k ; ++i3.06k ) { |
14725 | 139k | if (Offset + i != checkElem(N->getOperand(i))139k ) { |
14726 | 136k | Offset = -1; |
14727 | 136k | break; |
14728 | 136k | } |
14729 | 139k | } |
14730 | 136k | |
14731 | 136k | if ((Offset == 0) && |
14732 | 103 | (Op0.getOperand(0).getValueType() == N->getValueType(0))) |
14733 | 90 | return Op0.getOperand(0); |
14734 | 136k | if (136k (Offset != -1) && |
14735 | 37 | ((Offset % N->getValueType(0).getVectorNumElements()) == |
14736 | 37 | 0)) // IDX must be multiple of output size. |
14737 | 33 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0), |
14738 | 33 | Op0.getOperand(0), Op0.getOperand(1)); |
14739 | 366k | } |
14740 | 366k | |
14741 | 366k | if (SDValue 366k V366k = reduceBuildVecExtToExtBuildVec(N)) |
14742 | 979 | return V; |
14743 | 365k | |
14744 | 365k | if (SDValue 365k V365k = reduceBuildVecConvertToConvertBuildVec(N)) |
14745 | 216 | return V; |
14746 | 365k | |
14747 | 365k | if (365k TLI.isDesirableToCombineBuildVectorToTruncate()365k ) |
14748 | 69.4k | if (SDValue 69.4k V69.4k = reduceBuildVecToTrunc(N)) |
14749 | 314 | return V; |
14750 | 364k | |
14751 | 364k | if (SDValue 364k V364k = reduceBuildVecToShuffle(N)) |
14752 | 5.11k | return V; |
14753 | 359k | |
14754 | 359k | return SDValue(); |
14755 | 359k | } |
14756 | | |
14757 | 27.9k | static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { |
14758 | 27.9k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
14759 | 27.9k | EVT OpVT = N->getOperand(0).getValueType(); |
14760 | 27.9k | |
14761 | 27.9k | // If the operands are legal vectors, leave them alone. |
14762 | 27.9k | if (TLI.isTypeLegal(OpVT)) |
14763 | 23.8k | return SDValue(); |
14764 | 4.06k | |
14765 | 4.06k | SDLoc DL(N); |
14766 | 4.06k | EVT VT = N->getValueType(0); |
14767 | 4.06k | SmallVector<SDValue, 8> Ops; |
14768 | 4.06k | |
14769 | 4.06k | EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); |
14770 | 4.06k | SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); |
14771 | 4.06k | |
14772 | 4.06k | // Keep track of what we encounter. |
14773 | 4.06k | bool AnyInteger = false; |
14774 | 4.06k | bool AnyFP = false; |
14775 | 4.09k | for (const SDValue &Op : N->ops()) { |
14776 | 4.09k | if (ISD::BITCAST == Op.getOpcode() && |
14777 | 97 | !Op.getOperand(0).getValueType().isVector()) |
14778 | 32 | Ops.push_back(Op.getOperand(0)); |
14779 | 4.06k | else if (4.06k ISD::UNDEF == Op.getOpcode()4.06k ) |
14780 | 13 | Ops.push_back(ScalarUndef); |
14781 | 4.06k | else |
14782 | 4.04k | return SDValue(); |
14783 | 45 | |
14784 | 45 | // Note whether we encounter an integer or floating point scalar. |
14785 | 45 | // If it's neither, bail out, it could be something weird like x86mmx. |
14786 | 45 | EVT LastOpVT = Ops.back().getValueType(); |
14787 | 45 | if (LastOpVT.isFloatingPoint()) |
14788 | 9 | AnyFP = true; |
14789 | 36 | else if (36 LastOpVT.isInteger()36 ) |
14790 | 33 | AnyInteger = true; |
14791 | 36 | else |
14792 | 3 | return SDValue(); |
14793 | 9 | } |
14794 | 9 | |
14795 | 9 | // If any of the operands is a floating point scalar bitcast to a vector, |
14796 | 9 | // use floating point types throughout, and bitcast everything. |
14797 | 9 | // Replace UNDEFs by another scalar UNDEF node, of the final desired type. |
14798 | 9 | if (9 AnyFP9 ) { |
14799 | 3 | SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits()); |
14800 | 3 | ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); |
14801 | 3 | if (AnyInteger3 ) { |
14802 | 8 | for (SDValue &Op : Ops) { |
14803 | 8 | if (Op.getValueType() == SVT) |
14804 | 5 | continue; |
14805 | 3 | if (3 Op.isUndef()3 ) |
14806 | 1 | Op = ScalarUndef; |
14807 | 3 | else |
14808 | 2 | Op = DAG.getBitcast(SVT, Op); |
14809 | 8 | } |
14810 | 2 | } |
14811 | 3 | } |
14812 | 27.9k | |
14813 | 27.9k | EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT, |
14814 | 27.9k | VT.getSizeInBits() / SVT.getSizeInBits()); |
14815 | 27.9k | return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops)); |
14816 | 27.9k | } |
14817 | | |
14818 | | // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR |
14819 | | // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at |
14820 | | // most two distinct vectors the same size as the result, attempt to turn this |
14821 | | // into a legal shuffle. |
14822 | 10.3k | static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { |
14823 | 10.3k | EVT VT = N->getValueType(0); |
14824 | 10.3k | EVT OpVT = N->getOperand(0).getValueType(); |
14825 | 10.3k | int NumElts = VT.getVectorNumElements(); |
14826 | 10.3k | int NumOpElts = OpVT.getVectorNumElements(); |
14827 | 10.3k | |
14828 | 10.3k | SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT); |
14829 | 10.3k | SmallVector<int, 8> Mask; |
14830 | 10.3k | |
14831 | 12.1k | for (SDValue Op : N->ops()) { |
14832 | 12.1k | // Peek through any bitcast. |
14833 | 12.1k | Op = peekThroughBitcast(Op); |
14834 | 12.1k | |
14835 | 12.1k | // UNDEF nodes convert to UNDEF shuffle mask values. |
14836 | 12.1k | if (Op.isUndef()12.1k ) { |
14837 | 1.71k | Mask.append((unsigned)NumOpElts, -1); |
14838 | 1.71k | continue; |
14839 | 1.71k | } |
14840 | 10.4k | |
14841 | 10.4k | if (10.4k Op.getOpcode() != ISD::EXTRACT_SUBVECTOR10.4k ) |
14842 | 9.05k | return SDValue(); |
14843 | 1.36k | |
14844 | 1.36k | // What vector are we extracting the subvector from and at what index? |
14845 | 1.36k | SDValue ExtVec = Op.getOperand(0); |
14846 | 1.36k | |
14847 | 1.36k | // We want the EVT of the original extraction to correctly scale the |
14848 | 1.36k | // extraction index. |
14849 | 1.36k | EVT ExtVT = ExtVec.getValueType(); |
14850 | 1.36k | |
14851 | 1.36k | // Peek through any bitcast. |
14852 | 1.36k | ExtVec = peekThroughBitcast(ExtVec); |
14853 | 1.36k | |
14854 | 1.36k | // UNDEF nodes convert to UNDEF shuffle mask values. |
14855 | 1.36k | if (ExtVec.isUndef()1.36k ) { |
14856 | 0 | Mask.append((unsigned)NumOpElts, -1); |
14857 | 0 | continue; |
14858 | 0 | } |
14859 | 1.36k | |
14860 | 1.36k | if (1.36k !isa<ConstantSDNode>(Op.getOperand(1))1.36k ) |
14861 | 0 | return SDValue(); |
14862 | 1.36k | int ExtIdx = Op.getConstantOperandVal(1); |
14863 | 1.36k | |
14864 | 1.36k | // Ensure that we are extracting a subvector from a vector the same |
14865 | 1.36k | // size as the result. |
14866 | 1.36k | if (ExtVT.getSizeInBits() != VT.getSizeInBits()) |
14867 | 429 | return SDValue(); |
14868 | 934 | |
14869 | 934 | // Scale the subvector index to account for any bitcast. |
14870 | 934 | int NumExtElts = ExtVT.getVectorNumElements(); |
14871 | 934 | if (0 == (NumExtElts % NumElts)) |
14872 | 916 | ExtIdx /= (NumExtElts / NumElts); |
14873 | 18 | else if (18 0 == (NumElts % NumExtElts)18 ) |
14874 | 18 | ExtIdx *= (NumElts / NumExtElts); |
14875 | 18 | else |
14876 | 0 | return SDValue(); |
14877 | 934 | |
14878 | 934 | // At most we can reference 2 inputs in the final shuffle. |
14879 | 934 | if (934 SV0.isUndef() || 934 SV0 == ExtVec46 ) { |
14880 | 904 | SV0 = ExtVec; |
14881 | 4.24k | for (int i = 0; i != NumOpElts4.24k ; ++i3.34k ) |
14882 | 3.34k | Mask.push_back(i + ExtIdx); |
14883 | 934 | } else if (30 SV1.isUndef() || 30 SV1 == ExtVec0 ) { |
14884 | 30 | SV1 = ExtVec; |
14885 | 161 | for (int i = 0; i != NumOpElts161 ; ++i131 ) |
14886 | 131 | Mask.push_back(i + ExtIdx + NumElts); |
14887 | 0 | } else { |
14888 | 0 | return SDValue(); |
14889 | 0 | } |
14890 | 874 | } |
14891 | 874 | |
14892 | 874 | if (874 !DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT)874 ) |
14893 | 5 | return SDValue(); |
14894 | 869 | |
14895 | 869 | return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0), |
14896 | 869 | DAG.getBitcast(VT, SV1), Mask); |
14897 | 869 | } |
14898 | | |
14899 | 31.3k | SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { |
14900 | 31.3k | // If we only have one input vector, we don't need to do any concatenation. |
14901 | 31.3k | if (N->getNumOperands() == 1) |
14902 | 0 | return N->getOperand(0); |
14903 | 31.3k | |
14904 | 31.3k | // Check if all of the operands are undefs. |
14905 | 31.3k | EVT VT = N->getValueType(0); |
14906 | 31.3k | if (ISD::allOperandsUndef(N)) |
14907 | 2 | return DAG.getUNDEF(VT); |
14908 | 31.3k | |
14909 | 31.3k | // Optimize concat_vectors where all but the first of the vectors are undef. |
14910 | 31.3k | if (31.3k std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) 31.3k { |
14911 | 38.3k | return Op.isUndef(); |
14912 | 31.3k | })) { |
14913 | 9.83k | SDValue In = N->getOperand(0); |
14914 | 9.83k | assert(In.getValueType().isVector() && "Must concat vectors"); |
14915 | 9.83k | |
14916 | 9.83k | // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr). |
14917 | 9.83k | if (In->getOpcode() == ISD::BITCAST && |
14918 | 9.83k | !In->getOperand(0)->getValueType(0).isVector()861 ) { |
14919 | 119 | SDValue Scalar = In->getOperand(0); |
14920 | 119 | |
14921 | 119 | // If the bitcast type isn't legal, it might be a trunc of a legal type; |
14922 | 119 | // look through the trunc so we can still do the transform: |
14923 | 119 | // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar) |
14924 | 119 | if (Scalar->getOpcode() == ISD::TRUNCATE && |
14925 | 2 | !TLI.isTypeLegal(Scalar.getValueType()) && |
14926 | 2 | TLI.isTypeLegal(Scalar->getOperand(0).getValueType())) |
14927 | 2 | Scalar = Scalar->getOperand(0); |
14928 | 119 | |
14929 | 119 | EVT SclTy = Scalar->getValueType(0); |
14930 | 119 | |
14931 | 119 | if (!SclTy.isFloatingPoint() && 119 !SclTy.isInteger()74 ) |
14932 | 0 | return SDValue(); |
14933 | 119 | |
14934 | 119 | unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits(); |
14935 | 119 | if (VNTNumElms < 2) |
14936 | 1 | return SDValue(); |
14937 | 118 | |
14938 | 118 | EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms); |
14939 | 118 | if (!TLI.isTypeLegal(NVT) || 118 !TLI.isTypeLegal(Scalar.getValueType())116 ) |
14940 | 3 | return SDValue(); |
14941 | 115 | |
14942 | 115 | SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar); |
14943 | 115 | return DAG.getBitcast(VT, Res); |
14944 | 115 | } |
14945 | 9.83k | } |
14946 | 31.2k | |
14947 | 31.2k | // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR. |
14948 | 31.2k | // We have already tested above for an UNDEF only concatenation. |
14949 | 31.2k | // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) |
14950 | 31.2k | // -> (BUILD_VECTOR A, B, ..., C, D, ...) |
14951 | 31.2k | auto IsBuildVectorOrUndef = [](const SDValue &Op) 31.2k { |
14952 | 34.5k | return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode(); |
14953 | 36.8k | }; |
14954 | 31.2k | if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)31.2k ) { |
14955 | 3.29k | SmallVector<SDValue, 8> Opnds; |
14956 | 3.29k | EVT SVT = VT.getScalarType(); |
14957 | 3.29k | |
14958 | 3.29k | EVT MinVT = SVT; |
14959 | 3.29k | if (!SVT.isFloatingPoint()3.29k ) { |
14960 | 2.97k | // If BUILD_VECTOR are from built from integer, they may have different |
14961 | 2.97k | // operand types. Get the smallest type and truncate all operands to it. |
14962 | 2.97k | bool FoundMinVT = false; |
14963 | 2.97k | for (const SDValue &Op : N->ops()) |
14964 | 7.60k | if (7.60k ISD::BUILD_VECTOR == Op.getOpcode()7.60k ) { |
14965 | 5.68k | EVT OpSVT = Op.getOperand(0)->getValueType(0); |
14966 | 5.68k | MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)2.70k ) ? OpSVT5.68k : MinVT0 ; |
14967 | 7.60k | FoundMinVT = true; |
14968 | 7.60k | } |
14969 | 2.97k | assert(FoundMinVT && "Concat vector type mismatch"); |
14970 | 2.97k | } |
14971 | 3.29k | |
14972 | 8.42k | for (const SDValue &Op : N->ops()) { |
14973 | 8.42k | EVT OpVT = Op.getValueType(); |
14974 | 8.42k | unsigned NumElts = OpVT.getVectorNumElements(); |
14975 | 8.42k | |
14976 | 8.42k | if (ISD::UNDEF == Op.getOpcode()) |
14977 | 2.20k | Opnds.append(NumElts, DAG.getUNDEF(MinVT)); |
14978 | 8.42k | |
14979 | 8.42k | if (ISD::BUILD_VECTOR == Op.getOpcode()8.42k ) { |
14980 | 6.22k | if (SVT.isFloatingPoint()6.22k ) { |
14981 | 538 | assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch"); |
14982 | 538 | Opnds.append(Op->op_begin(), Op->op_begin() + NumElts); |
14983 | 6.22k | } else { |
14984 | 24.2k | for (unsigned i = 0; i != NumElts24.2k ; ++i18.5k ) |
14985 | 18.5k | Opnds.push_back( |
14986 | 18.5k | DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i))); |
14987 | 5.68k | } |
14988 | 6.22k | } |
14989 | 8.42k | } |
14990 | 3.29k | |
14991 | 3.29k | assert(VT.getVectorNumElements() == Opnds.size() && |
14992 | 3.29k | "Concat vector type mismatch"); |
14993 | 3.29k | return DAG.getBuildVector(VT, SDLoc(N), Opnds); |
14994 | 3.29k | } |
14995 | 27.9k | |
14996 | 27.9k | // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. |
14997 | 27.9k | if (SDValue 27.9k V27.9k = combineConcatVectorOfScalars(N, DAG)) |
14998 | 9 | return V; |
14999 | 27.9k | |
15000 | 27.9k | // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE. |
15001 | 27.9k | if (27.9k Level < AfterLegalizeVectorOps && 27.9k TLI.isTypeLegal(VT)22.4k ) |
15002 | 10.3k | if (SDValue 10.3k V10.3k = combineConcatVectorOfExtracts(N, DAG)) |
15003 | 869 | return V; |
15004 | 27.0k | |
15005 | 27.0k | // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR |
15006 | 27.0k | // nodes often generate nop CONCAT_VECTOR nodes. |
15007 | 27.0k | // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that |
15008 | 27.0k | // place the incoming vectors at the exact same location. |
15009 | 27.0k | SDValue SingleSource = SDValue(); |
15010 | 27.0k | unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements(); |
15011 | 27.0k | |
15012 | 27.2k | for (unsigned i = 0, e = N->getNumOperands(); i != e27.2k ; ++i214 ) { |
15013 | 27.2k | SDValue Op = N->getOperand(i); |
15014 | 27.2k | |
15015 | 27.2k | if (Op.isUndef()) |
15016 | 105 | continue; |
15017 | 27.1k | |
15018 | 27.1k | // Check if this is the identity extract: |
15019 | 27.1k | if (27.1k Op.getOpcode() != ISD::EXTRACT_SUBVECTOR27.1k ) |
15020 | 26.4k | return SDValue(); |
15021 | 695 | |
15022 | 695 | // Find the single incoming vector for the extract_subvector. |
15023 | 695 | if (695 SingleSource.getNode()695 ) { |
15024 | 76 | if (Op.getOperand(0) != SingleSource) |
15025 | 69 | return SDValue(); |
15026 | 619 | } else { |
15027 | 619 | SingleSource = Op.getOperand(0); |
15028 | 619 | |
15029 | 619 | // Check the source type is the same as the type of the result. |
15030 | 619 | // If not, this concat may extend the vector, so we can not |
15031 | 619 | // optimize it away. |
15032 | 619 | if (SingleSource.getValueType() != N->getValueType(0)) |
15033 | 516 | return SDValue(); |
15034 | 110 | } |
15035 | 110 | |
15036 | 110 | unsigned IdentityIndex = i * PartNumElem; |
15037 | 110 | ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1)); |
15038 | 110 | // The extract index must be constant. |
15039 | 110 | if (!CS) |
15040 | 0 | return SDValue(); |
15041 | 110 | |
15042 | 110 | // Check that we are reading from the identity index. |
15043 | 110 | if (110 CS->getZExtValue() != IdentityIndex110 ) |
15044 | 1 | return SDValue(); |
15045 | 27.2k | } |
15046 | 27.0k | |
15047 | 11 | if (11 SingleSource.getNode()11 ) |
15048 | 11 | return SingleSource; |
15049 | 0 |
|
15050 | 0 | return SDValue(); |
15051 | 0 | } |
15052 | | |
15053 | | /// If we are extracting a subvector produced by a wide binary operator with at |
15054 | | /// at least one operand that was the result of a vector concatenation, then try |
15055 | | /// to use the narrow vector operands directly to avoid the concatenation and |
15056 | | /// extraction. |
15057 | 244k | static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { |
15058 | 244k | // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share |
15059 | 244k | // some of these bailouts with other transforms. |
15060 | 244k | |
15061 | 244k | // The extract index must be a constant, so we can map it to a concat operand. |
15062 | 244k | auto *ExtractIndex = dyn_cast<ConstantSDNode>(Extract->getOperand(1)); |
15063 | 244k | if (!ExtractIndex) |
15064 | 0 | return SDValue(); |
15065 | 244k | |
15066 | 244k | // Only handle the case where we are doubling and then halving. A larger ratio |
15067 | 244k | // may require more than two narrow binops to replace the wide binop. |
15068 | 244k | EVT VT = Extract->getValueType(0); |
15069 | 244k | unsigned NumElems = VT.getVectorNumElements(); |
15070 | 244k | assert((ExtractIndex->getZExtValue() % NumElems) == 0 && |
15071 | 244k | "Extract index is not a multiple of the vector length."); |
15072 | 244k | if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2) |
15073 | 27.0k | return SDValue(); |
15074 | 217k | |
15075 | 217k | // We are looking for an optionally bitcasted wide vector binary operator |
15076 | 217k | // feeding an extract subvector. |
15077 | 217k | SDValue BinOp = peekThroughBitcast(Extract->getOperand(0)); |
15078 | 217k | |
15079 | 217k | // TODO: The motivating case for this transform is an x86 AVX1 target. That |
15080 | 217k | // target has temptingly almost legal versions of bitwise logic ops in 256-bit |
15081 | 217k | // flavors, but no other 256-bit integer support. This could be extended to |
15082 | 217k | // handle any binop, but that may require fixing/adding other folds to avoid |
15083 | 217k | // codegen regressions. |
15084 | 217k | unsigned BOpcode = BinOp.getOpcode(); |
15085 | 217k | if (BOpcode != ISD::AND && 217k BOpcode != ISD::OR216k && BOpcode != ISD::XOR214k ) |
15086 | 214k | return SDValue(); |
15087 | 3.28k | |
15088 | 3.28k | // The binop must be a vector type, so we can chop it in half. |
15089 | 3.28k | EVT WideBVT = BinOp.getValueType(); |
15090 | 3.28k | if (!WideBVT.isVector()) |
15091 | 0 | return SDValue(); |
15092 | 3.28k | |
15093 | 3.28k | // Bail out if the target does not support a narrower version of the binop. |
15094 | 3.28k | EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(), |
15095 | 3.28k | WideBVT.getVectorNumElements() / 2); |
15096 | 3.28k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
15097 | 3.28k | if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT)) |
15098 | 12 | return SDValue(); |
15099 | 3.27k | |
15100 | 3.27k | // Peek through bitcasts of the binary operator operands if needed. |
15101 | 3.27k | SDValue LHS = peekThroughBitcast(BinOp.getOperand(0)); |
15102 | 3.27k | SDValue RHS = peekThroughBitcast(BinOp.getOperand(1)); |
15103 | 3.27k | |
15104 | 3.27k | // We need at least one concatenation operation of a binop operand to make |
15105 | 3.27k | // this transform worthwhile. The concat must double the input vector sizes. |
15106 | 3.27k | // TODO: Should we also handle INSERT_SUBVECTOR patterns? |
15107 | 3.27k | bool ConcatL = |
15108 | 83 | LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2; |
15109 | 3.27k | bool ConcatR = |
15110 | 162 | RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2; |
15111 | 3.27k | if (!ConcatL && 3.27k !ConcatR3.19k ) |
15112 | 3.07k | return SDValue(); |
15113 | 199 | |
15114 | 199 | // If one of the binop operands was not the result of a concat, we must |
15115 | 199 | // extract a half-sized operand for our new narrow binop. We can't just reuse |
15116 | 199 | // the original extract index operand because we may have bitcasted. |
15117 | 199 | unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems; |
15118 | 199 | unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements(); |
15119 | 199 | EVT ExtBOIdxVT = Extract->getOperand(1).getValueType(); |
15120 | 199 | SDLoc DL(Extract); |
15121 | 199 | |
15122 | 199 | // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN |
15123 | 199 | // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N) |
15124 | 199 | // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN |
15125 | 83 | SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum)) |
15126 | 116 | : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, |
15127 | 116 | BinOp.getOperand(0), |
15128 | 116 | DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT)); |
15129 | 199 | |
15130 | 162 | SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum)) |
15131 | 37 | : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, |
15132 | 37 | BinOp.getOperand(1), |
15133 | 37 | DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT)); |
15134 | 244k | |
15135 | 244k | SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y); |
15136 | 244k | return DAG.getBitcast(VT, NarrowBinOp); |
15137 | 244k | } |
15138 | | |
15139 | | /// If we are extracting a subvector from a wide vector load, convert to a |
15140 | | /// narrow load to eliminate the extraction: |
15141 | | /// (extract_subvector (load wide vector)) --> (load narrow vector) |
15142 | 244k | static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { |
15143 | 244k | // TODO: Add support for big-endian. The offset calculation must be adjusted. |
15144 | 244k | if (DAG.getDataLayout().isBigEndian()) |
15145 | 936 | return SDValue(); |
15146 | 243k | |
15147 | 243k | // TODO: The one-use check is overly conservative. Check the cost of the |
15148 | 243k | // extract instead or remove that condition entirely. |
15149 | 243k | auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0)); |
15150 | 243k | auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1)); |
15151 | 243k | if (!Ld || 243k !Ld->hasOneUse()12.7k || Ld->getExtensionType()141 || Ld->isVolatile()130 || |
15152 | 130 | !ExtIdx) |
15153 | 243k | return SDValue(); |
15154 | 130 | |
15155 | 130 | // The narrow load will be offset from the base address of the old load if |
15156 | 130 | // we are extracting from something besides index 0 (little-endian). |
15157 | 130 | EVT VT = Extract->getValueType(0); |
15158 | 130 | SDLoc DL(Extract); |
15159 | 130 | SDValue BaseAddr = Ld->getOperand(1); |
15160 | 130 | unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize(); |
15161 | 130 | |
15162 | 130 | // TODO: Use "BaseIndexOffset" to make this more effective. |
15163 | 130 | SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL); |
15164 | 130 | MachineFunction &MF = DAG.getMachineFunction(); |
15165 | 130 | MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset, |
15166 | 130 | VT.getStoreSize()); |
15167 | 130 | SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO); |
15168 | 130 | DAG.makeEquivalentMemoryOrdering(Ld, NewLd); |
15169 | 130 | return NewLd; |
15170 | 130 | } |
15171 | | |
15172 | 248k | SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { |
15173 | 248k | EVT NVT = N->getValueType(0); |
15174 | 248k | SDValue V = N->getOperand(0); |
15175 | 248k | |
15176 | 248k | // Extract from UNDEF is UNDEF. |
15177 | 248k | if (V.isUndef()) |
15178 | 2 | return DAG.getUNDEF(NVT); |
15179 | 248k | |
15180 | 248k | if (248k TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT)248k ) |
15181 | 244k | if (SDValue 244k NarrowLoad244k = narrowExtractedVectorLoad(N, DAG)) |
15182 | 130 | return NarrowLoad; |
15183 | 248k | |
15184 | 248k | // Combine: |
15185 | 248k | // (extract_subvec (concat V1, V2, ...), i) |
15186 | 248k | // Into: |
15187 | 248k | // Vi if possible |
15188 | 248k | // Only operand 0 is checked as 'concat' assumes all inputs of the same |
15189 | 248k | // type. |
15190 | 248k | if (248k V->getOpcode() == ISD::CONCAT_VECTORS && |
15191 | 1.03k | isa<ConstantSDNode>(N->getOperand(1)) && |
15192 | 248k | V->getOperand(0).getValueType() == NVT1.03k ) { |
15193 | 670 | unsigned Idx = N->getConstantOperandVal(1); |
15194 | 670 | unsigned NumElems = NVT.getVectorNumElements(); |
15195 | 670 | assert((Idx % NumElems) == 0 && |
15196 | 670 | "IDX in concat is not a multiple of the result vector length."); |
15197 | 670 | return V->getOperand(Idx / NumElems); |
15198 | 670 | } |
15199 | 247k | |
15200 | 247k | // Skip bitcasting |
15201 | 247k | V = peekThroughBitcast(V); |
15202 | 247k | |
15203 | 247k | // If the input is a build vector. Try to make a smaller build vector. |
15204 | 247k | if (V->getOpcode() == ISD::BUILD_VECTOR247k ) { |
15205 | 2.96k | if (auto *Idx2.96k = dyn_cast<ConstantSDNode>(N->getOperand(1))) { |
15206 | 2.96k | EVT InVT = V->getValueType(0); |
15207 | 2.96k | unsigned ExtractSize = NVT.getSizeInBits(); |
15208 | 2.96k | unsigned EltSize = InVT.getScalarSizeInBits(); |
15209 | 2.96k | // Only do this if we won't split any elements. |
15210 | 2.96k | if (ExtractSize % EltSize == 02.96k ) { |
15211 | 2.96k | unsigned NumElems = ExtractSize / EltSize; |
15212 | 2.96k | EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(), |
15213 | 2.96k | InVT.getVectorElementType(), NumElems); |
15214 | 2.96k | if ((!LegalOperations || |
15215 | 1.28k | TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT)) && |
15216 | 2.96k | (!LegalTypes || 1.80k TLI.isTypeLegal(ExtractVT)224 )) { |
15217 | 1.80k | unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) / |
15218 | 1.80k | EltSize; |
15219 | 1.80k | |
15220 | 1.80k | // Extract the pieces from the original build_vector. |
15221 | 1.80k | SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N), |
15222 | 1.80k | makeArrayRef(V->op_begin() + IdxVal, |
15223 | 1.80k | NumElems)); |
15224 | 1.80k | return DAG.getBitcast(NVT, BuildVec); |
15225 | 1.80k | } |
15226 | 245k | } |
15227 | 2.96k | } |
15228 | 2.96k | } |
15229 | 245k | |
15230 | 245k | if (245k V->getOpcode() == ISD::INSERT_SUBVECTOR245k ) { |
15231 | 985 | // Handle only simple case where vector being inserted and vector |
15232 | 985 | // being extracted are of same size. |
15233 | 985 | EVT SmallVT = V->getOperand(1).getValueType(); |
15234 | 985 | if (!NVT.bitsEq(SmallVT)) |
15235 | 14 | return SDValue(); |
15236 | 971 | |
15237 | 971 | // Only handle cases where both indexes are constants. |
15238 | 971 | ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); |
15239 | 971 | ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2)); |
15240 | 971 | |
15241 | 971 | if (InsIdx && 971 ExtIdx971 ) { |
15242 | 971 | // Combine: |
15243 | 971 | // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) |
15244 | 971 | // Into: |
15245 | 971 | // indices are equal or bit offsets are equal => V1 |
15246 | 971 | // otherwise => (extract_subvec V1, ExtIdx) |
15247 | 971 | if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() == |
15248 | 971 | ExtIdx->getZExtValue() * NVT.getScalarSizeInBits()) |
15249 | 778 | return DAG.getBitcast(NVT, V->getOperand(1)); |
15250 | 193 | return DAG.getNode( |
15251 | 193 | ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, |
15252 | 193 | DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)), |
15253 | 193 | N->getOperand(1)); |
15254 | 193 | } |
15255 | 985 | } |
15256 | 244k | |
15257 | 244k | if (SDValue 244k NarrowBOp244k = narrowExtractedVectorBinOp(N, DAG)) |
15258 | 199 | return NarrowBOp; |
15259 | 244k | |
15260 | 244k | return SDValue(); |
15261 | 244k | } |
15262 | | |
15263 | | static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements, |
15264 | 101k | SDValue V, SelectionDAG &DAG) { |
15265 | 101k | SDLoc DL(V); |
15266 | 101k | EVT VT = V.getValueType(); |
15267 | 101k | |
15268 | 101k | switch (V.getOpcode()) { |
15269 | 95.6k | default: |
15270 | 95.6k | return V; |
15271 | 101k | |
15272 | 5.58k | case ISD::CONCAT_VECTORS: { |
15273 | 5.58k | EVT OpVT = V->getOperand(0).getValueType(); |
15274 | 5.58k | int OpSize = OpVT.getVectorNumElements(); |
15275 | 5.58k | SmallBitVector OpUsedElements(OpSize, false); |
15276 | 5.58k | bool FoundSimplification = false; |
15277 | 5.58k | SmallVector<SDValue, 4> NewOps; |
15278 | 5.58k | NewOps.reserve(V->getNumOperands()); |
15279 | 18.8k | for (int i = 0, NumOps = V->getNumOperands(); i < NumOps18.8k ; ++i13.2k ) { |
15280 | 13.2k | SDValue Op = V->getOperand(i); |
15281 | 13.2k | bool OpUsed = false; |
15282 | 110k | for (int j = 0; j < OpSize110k ; ++j97.4k ) |
15283 | 97.4k | if (97.4k UsedElements[i * OpSize + j]97.4k ) { |
15284 | 32.9k | OpUsedElements[j] = true; |
15285 | 32.9k | OpUsed = true; |
15286 | 32.9k | } |
15287 | 13.2k | NewOps.push_back( |
15288 | 6.17k | OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG) |
15289 | 7.05k | : DAG.getUNDEF(OpVT)); |
15290 | 13.2k | FoundSimplification |= Op == NewOps.back(); |
15291 | 13.2k | OpUsedElements.reset(); |
15292 | 13.2k | } |
15293 | 5.58k | if (FoundSimplification) |
15294 | 5.58k | V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps); |
15295 | 5.58k | return V; |
15296 | 101k | } |
15297 | 101k | |
15298 | 8 | case ISD::INSERT_SUBVECTOR: { |
15299 | 8 | SDValue BaseV = V->getOperand(0); |
15300 | 8 | SDValue SubV = V->getOperand(1); |
15301 | 8 | auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2)); |
15302 | 8 | if (!IdxN) |
15303 | 0 | return V; |
15304 | 8 | |
15305 | 8 | int SubSize = SubV.getValueType().getVectorNumElements(); |
15306 | 8 | int Idx = IdxN->getZExtValue(); |
15307 | 8 | bool SubVectorUsed = false; |
15308 | 8 | SmallBitVector SubUsedElements(SubSize, false); |
15309 | 64 | for (int i = 0; i < SubSize64 ; ++i56 ) |
15310 | 56 | if (56 UsedElements[i + Idx]56 ) { |
15311 | 18 | SubVectorUsed = true; |
15312 | 18 | SubUsedElements[i] = true; |
15313 | 18 | UsedElements[i + Idx] = false; |
15314 | 18 | } |
15315 | 8 | |
15316 | 8 | // Now recurse on both the base and sub vectors. |
15317 | 8 | SDValue SimplifiedSubV = |
15318 | 8 | SubVectorUsed |
15319 | 8 | ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG) |
15320 | 0 | : DAG.getUNDEF(SubV.getValueType()); |
15321 | 8 | SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG); |
15322 | 8 | if (SimplifiedSubV != SubV || 8 SimplifiedBaseV != BaseV8 ) |
15323 | 0 | V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, |
15324 | 0 | SimplifiedBaseV, SimplifiedSubV, V->getOperand(2)); |
15325 | 8 | return V; |
15326 | 8 | } |
15327 | 101k | } |
15328 | 101k | } |
15329 | | |
15330 | | static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0, |
15331 | 47.5k | SDValue N1, SelectionDAG &DAG) { |
15332 | 47.5k | EVT VT = SVN->getValueType(0); |
15333 | 47.5k | int NumElts = VT.getVectorNumElements(); |
15334 | 47.5k | SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false); |
15335 | 47.5k | for (int M : SVN->getMask()) |
15336 | 474k | if (474k M >= 0 && 474k M < NumElts382k ) |
15337 | 263k | N0UsedElements[M] = true; |
15338 | 211k | else if (211k M >= NumElts211k ) |
15339 | 119k | N1UsedElements[M - NumElts] = true; |
15340 | 47.5k | |
15341 | 47.5k | SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG); |
15342 | 47.5k | SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG); |
15343 | 47.5k | if (S0 == N0 && 47.5k S1 == N147.4k ) |
15344 | 47.4k | return SDValue(); |
15345 | 83 | |
15346 | 83 | return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask()); |
15347 | 83 | } |
15348 | | |
15349 | | static SDValue simplifyShuffleMask(ShuffleVectorSDNode *SVN, SDValue N0, |
15350 | 53.1k | SDValue N1, SelectionDAG &DAG) { |
15351 | 399k | auto isUndefElt = [](SDValue V, int Idx) { |
15352 | 399k | // TODO - handle more cases as required. |
15353 | 399k | if (V.getOpcode() == ISD::BUILD_VECTOR) |
15354 | 104k | return V.getOperand(Idx).isUndef(); |
15355 | 294k | return false; |
15356 | 294k | }; |
15357 | 53.1k | |
15358 | 53.1k | EVT VT = SVN->getValueType(0); |
15359 | 53.1k | unsigned NumElts = VT.getVectorNumElements(); |
15360 | 53.1k | |
15361 | 53.1k | bool Changed = false; |
15362 | 53.1k | SmallVector<int, 8> NewMask; |
15363 | 555k | for (unsigned i = 0; i != NumElts555k ; ++i502k ) { |
15364 | 502k | int Idx = SVN->getMaskElt(i); |
15365 | 502k | if ((0 <= Idx && 502k Idx < (int)NumElts409k && isUndefElt(N0, Idx)290k ) || |
15366 | 502k | ((int)NumElts < Idx && 502k isUndefElt(N1, Idx - NumElts)108k )) { |
15367 | 49 | Changed = true; |
15368 | 49 | Idx = -1; |
15369 | 49 | } |
15370 | 502k | NewMask.push_back(Idx); |
15371 | 502k | } |
15372 | 53.1k | if (Changed) |
15373 | 28 | return DAG.getVectorShuffle(VT, SDLoc(SVN), N0, N1, NewMask); |
15374 | 53.1k | |
15375 | 53.1k | return SDValue(); |
15376 | 53.1k | } |
15377 | | |
15378 | | // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat, |
15379 | | // or turn a shuffle of a single concat into simpler shuffle then concat. |
15380 | 2.12k | static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { |
15381 | 2.12k | EVT VT = N->getValueType(0); |
15382 | 2.12k | unsigned NumElts = VT.getVectorNumElements(); |
15383 | 2.12k | |
15384 | 2.12k | SDValue N0 = N->getOperand(0); |
15385 | 2.12k | SDValue N1 = N->getOperand(1); |
15386 | 2.12k | ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); |
15387 | 2.12k | |
15388 | 2.12k | SmallVector<SDValue, 4> Ops; |
15389 | 2.12k | EVT ConcatVT = N0.getOperand(0).getValueType(); |
15390 | 2.12k | unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements(); |
15391 | 2.12k | unsigned NumConcats = NumElts / NumElemsPerConcat; |
15392 | 2.12k | |
15393 | 2.12k | // Special case: shuffle(concat(A,B)) can be more efficiently represented |
15394 | 2.12k | // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high |
15395 | 2.12k | // half vector elements. |
15396 | 2.12k | if (NumElemsPerConcat * 2 == NumElts && 2.12k N1.isUndef()1.79k && |
15397 | 588 | std::all_of(SVN->getMask().begin() + NumElemsPerConcat, |
15398 | 2.12k | SVN->getMask().end(), [](int i) { return i == -1; }954 )) { |
15399 | 42 | N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1), |
15400 | 42 | makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat)); |
15401 | 42 | N1 = DAG.getUNDEF(ConcatVT); |
15402 | 42 | return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1); |
15403 | 42 | } |
15404 | 2.08k | |
15405 | 2.08k | // Look at every vector that's inserted. We're looking for exact |
15406 | 2.08k | // subvector-sized copies from a concatenated vector |
15407 | 3.18k | for (unsigned I = 0; 2.08k I != NumConcats3.18k ; ++I1.10k ) { |
15408 | 3.14k | // Make sure we're dealing with a copy. |
15409 | 3.14k | unsigned Begin = I * NumElemsPerConcat; |
15410 | 3.14k | bool AllUndef = true, NoUndef = true; |
15411 | 28.3k | for (unsigned J = Begin; J != Begin + NumElemsPerConcat28.3k ; ++J25.1k ) { |
15412 | 25.1k | if (SVN->getMaskElt(J) >= 0) |
15413 | 20.3k | AllUndef = false; |
15414 | 25.1k | else |
15415 | 4.79k | NoUndef = false; |
15416 | 25.1k | } |
15417 | 3.14k | |
15418 | 3.14k | if (NoUndef3.14k ) { |
15419 | 2.09k | if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) |
15420 | 188 | return SDValue(); |
15421 | 1.90k | |
15422 | 10.2k | for (unsigned J = 1; 1.90k J != NumElemsPerConcat10.2k ; ++J8.35k ) |
15423 | 9.21k | if (9.21k SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)9.21k ) |
15424 | 862 | return SDValue(); |
15425 | 1.90k | |
15426 | 1.04k | unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; |
15427 | 1.04k | if (FirstElt < N0.getNumOperands()) |
15428 | 1.02k | Ops.push_back(N0.getOperand(FirstElt)); |
15429 | 1.04k | else |
15430 | 20 | Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); |
15431 | 2.09k | |
15432 | 3.14k | } else if (1.05k AllUndef1.05k ) { |
15433 | 56 | Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType())); |
15434 | 1.05k | } else { // Mixed with general masks and undefs, can't do optimization. |
15435 | 995 | return SDValue(); |
15436 | 995 | } |
15437 | 3.14k | } |
15438 | 2.08k | |
15439 | 36 | return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); |
15440 | 2.12k | } |
15441 | | |
15442 | | // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - |
15443 | | // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. |
15444 | | // |
15445 | | // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always |
15446 | | // a simplification in some sense, but it isn't appropriate in general: some |
15447 | | // BUILD_VECTORs are substantially cheaper than others. The general case |
15448 | | // of a BUILD_VECTOR requires inserting each element individually (or |
15449 | | // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of |
15450 | | // all constants is a single constant pool load. A BUILD_VECTOR where each |
15451 | | // element is identical is a splat. A BUILD_VECTOR where most of the operands |
15452 | | // are undef lowers to a small number of element insertions. |
15453 | | // |
15454 | | // To deal with this, we currently use a bunch of mostly arbitrary heuristics. |
15455 | | // We don't fold shuffles where one side is a non-zero constant, and we don't |
15456 | | // fold shuffles if the resulting BUILD_VECTOR would have duplicate |
15457 | | // non-constant operands. This seems to work out reasonably well in practice. |
15458 | | static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, |
15459 | | SelectionDAG &DAG, |
15460 | 32.5k | const TargetLowering &TLI) { |
15461 | 32.5k | EVT VT = SVN->getValueType(0); |
15462 | 32.5k | unsigned NumElts = VT.getVectorNumElements(); |
15463 | 32.5k | SDValue N0 = SVN->getOperand(0); |
15464 | 32.5k | SDValue N1 = SVN->getOperand(1); |
15465 | 32.5k | |
15466 | 32.5k | if (!N0->hasOneUse() || 32.5k !N1->hasOneUse()26.7k ) |
15467 | 14.3k | return SDValue(); |
15468 | 18.1k | // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as |
15469 | 18.1k | // discussed above. |
15470 | 18.1k | if (18.1k !N1.isUndef()18.1k ) { |
15471 | 9.30k | bool N0AnyConst = isAnyConstantBuildVector(N0.getNode()); |
15472 | 9.30k | bool N1AnyConst = isAnyConstantBuildVector(N1.getNode()); |
15473 | 9.30k | if (N0AnyConst && 9.30k !N1AnyConst496 && !ISD::isBuildVectorAllZeros(N0.getNode())477 ) |
15474 | 64 | return SDValue(); |
15475 | 9.24k | if (9.24k !N0AnyConst && 9.24k N1AnyConst8.80k && !ISD::isBuildVectorAllZeros(N1.getNode())2.02k ) |
15476 | 51 | return SDValue(); |
15477 | 18.0k | } |
15478 | 18.0k | |
15479 | 18.0k | SmallVector<SDValue, 8> Ops; |
15480 | 18.0k | SmallSet<SDValue, 16> DuplicateOps; |
15481 | 23.6k | for (int M : SVN->getMask()) { |
15482 | 23.6k | SDValue Op = DAG.getUNDEF(VT.getScalarType()); |
15483 | 23.6k | if (M >= 023.6k ) { |
15484 | 20.9k | int Idx = M < (int)NumElts ? M18.3k : M - NumElts2.63k ; |
15485 | 20.9k | SDValue &S = (M < (int)NumElts ? N018.3k : N12.63k ); |
15486 | 20.9k | if (S.getOpcode() == ISD::BUILD_VECTOR20.9k ) { |
15487 | 3.20k | Op = S.getOperand(Idx); |
15488 | 20.9k | } else if (17.7k S.getOpcode() == ISD::SCALAR_TO_VECTOR17.7k ) { |
15489 | 152 | if (Idx == 0) |
15490 | 152 | Op = S.getOperand(0); |
15491 | 17.7k | } else { |
15492 | 17.5k | // Operand can't be combined - bail out. |
15493 | 17.5k | return SDValue(); |
15494 | 17.5k | } |
15495 | 6.02k | } |
15496 | 6.02k | |
15497 | 6.02k | // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is |
15498 | 6.02k | // fine, but it's likely to generate low-quality code if the target can't |
15499 | 6.02k | // reconstruct an appropriate shuffle. |
15500 | 6.02k | if (6.02k !Op.isUndef() && 6.02k !isa<ConstantSDNode>(Op)3.35k && !isa<ConstantFPSDNode>(Op)1.56k ) |
15501 | 1.24k | if (1.24k !DuplicateOps.insert(Op).second1.24k ) |
15502 | 97 | return SDValue(); |
15503 | 5.92k | |
15504 | 5.92k | Ops.push_back(Op); |
15505 | 5.92k | } |
15506 | 18.0k | // BUILD_VECTOR requires all inputs to be of the same type, find the |
15507 | 18.0k | // maximum type and extend them all. |
15508 | 354 | EVT SVT = VT.getScalarType(); |
15509 | 354 | if (SVT.isInteger()) |
15510 | 316 | for (SDValue &Op : Ops) |
15511 | 3.26k | SVT = (SVT.bitsLT(Op.getValueType()) ? 3.26k Op.getValueType()6 : SVT3.25k ); |
15512 | 354 | if (SVT != VT.getScalarType()) |
15513 | 6 | for (SDValue &Op : Ops) |
15514 | 64 | Op = TLI.isZExtFree(Op.getValueType(), SVT) |
15515 | 0 | ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT) |
15516 | 64 | : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT); |
15517 | 354 | return DAG.getBuildVector(VT, SDLoc(SVN), Ops); |
15518 | 32.5k | } |
15519 | | |
15520 | | // Match shuffles that can be converted to any_vector_extend_in_reg. |
15521 | | // This is often generated during legalization. |
15522 | | // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)) |
15523 | | // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case. |
15524 | | static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, |
15525 | | SelectionDAG &DAG, |
15526 | | const TargetLowering &TLI, |
15527 | | bool LegalOperations, |
15528 | 47.4k | bool LegalTypes) { |
15529 | 47.4k | EVT VT = SVN->getValueType(0); |
15530 | 47.4k | bool IsBigEndian = DAG.getDataLayout().isBigEndian(); |
15531 | 47.4k | |
15532 | 47.4k | // TODO Add support for big-endian when we have a test case. |
15533 | 47.4k | if (!VT.isInteger() || 47.4k IsBigEndian35.9k ) |
15534 | 13.1k | return SDValue(); |
15535 | 34.2k | |
15536 | 34.2k | unsigned NumElts = VT.getVectorNumElements(); |
15537 | 34.2k | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
15538 | 34.2k | ArrayRef<int> Mask = SVN->getMask(); |
15539 | 34.2k | SDValue N0 = SVN->getOperand(0); |
15540 | 34.2k | |
15541 | 34.2k | // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32)) |
15542 | 72.4k | auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) { |
15543 | 143k | for (unsigned i = 0; i != NumElts143k ; ++i70.9k ) { |
15544 | 141k | if (Mask[i] < 0) |
15545 | 21.3k | continue; |
15546 | 120k | if (120k (i % Scale) == 0 && 120k Mask[i] == (int)(i / Scale)76.3k ) |
15547 | 49.5k | continue; |
15548 | 71.0k | return false; |
15549 | 71.0k | } |
15550 | 1.39k | return true; |
15551 | 72.4k | }; |
15552 | 34.2k | |
15553 | 34.2k | // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for |
15554 | 34.2k | // power-of-2 extensions as they are the most likely. |
15555 | 106k | for (unsigned Scale = 2; Scale < NumElts106k ; Scale *= 272.1k ) { |
15556 | 72.4k | if (!isAnyExtend(Scale)) |
15557 | 71.0k | continue; |
15558 | 1.39k | |
15559 | 1.39k | EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale); |
15560 | 1.39k | EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale); |
15561 | 1.39k | if (!LegalTypes || 1.39k TLI.isTypeLegal(OutVT)1.31k ) |
15562 | 1.38k | if (1.38k !LegalOperations || |
15563 | 1.16k | TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT)) |
15564 | 215 | return DAG.getBitcast(VT, |
15565 | 215 | DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT)); |
15566 | 72.4k | } |
15567 | 34.2k | |
15568 | 34.0k | return SDValue(); |
15569 | 47.4k | } |
15570 | | |
15571 | | // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of |
15572 | | // each source element of a large type into the lowest elements of a smaller |
15573 | | // destination type. This is often generated during legalization. |
15574 | | // If the source node itself was a '*_extend_vector_inreg' node then we should |
15575 | | // then be able to remove it. |
15576 | | static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, |
15577 | 47.2k | SelectionDAG &DAG) { |
15578 | 47.2k | EVT VT = SVN->getValueType(0); |
15579 | 47.2k | bool IsBigEndian = DAG.getDataLayout().isBigEndian(); |
15580 | 47.2k | |
15581 | 47.2k | // TODO Add support for big-endian when we have a test case. |
15582 | 47.2k | if (!VT.isInteger() || 47.2k IsBigEndian35.6k ) |
15583 | 13.1k | return SDValue(); |
15584 | 34.0k | |
15585 | 34.0k | SDValue N0 = peekThroughBitcast(SVN->getOperand(0)); |
15586 | 34.0k | |
15587 | 34.0k | unsigned Opcode = N0.getOpcode(); |
15588 | 34.0k | if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG && |
15589 | 34.0k | Opcode != ISD::SIGN_EXTEND_VECTOR_INREG && |
15590 | 34.0k | Opcode != ISD::ZERO_EXTEND_VECTOR_INREG) |
15591 | 33.9k | return SDValue(); |
15592 | 41 | |
15593 | 41 | SDValue N00 = N0.getOperand(0); |
15594 | 41 | ArrayRef<int> Mask = SVN->getMask(); |
15595 | 41 | unsigned NumElts = VT.getVectorNumElements(); |
15596 | 41 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
15597 | 41 | unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits(); |
15598 | 41 | unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits(); |
15599 | 41 | |
15600 | 41 | if (ExtDstSizeInBits % ExtSrcSizeInBits != 0) |
15601 | 0 | return SDValue(); |
15602 | 41 | unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits; |
15603 | 41 | |
15604 | 41 | // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1> |
15605 | 41 | // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1> |
15606 | 41 | // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1> |
15607 | 21 | auto isTruncate = [&Mask, &NumElts](unsigned Scale) { |
15608 | 93 | for (unsigned i = 0; i != NumElts93 ; ++i72 ) { |
15609 | 76 | if (Mask[i] < 0) |
15610 | 38 | continue; |
15611 | 38 | if (38 (i * Scale) < NumElts && 38 Mask[i] == (int)(i * Scale)38 ) |
15612 | 34 | continue; |
15613 | 4 | return false; |
15614 | 4 | } |
15615 | 17 | return true; |
15616 | 21 | }; |
15617 | 41 | |
15618 | 41 | // At the moment we just handle the case where we've truncated back to the |
15619 | 41 | // same size as before the extension. |
15620 | 41 | // TODO: handle more extension/truncation cases as cases arise. |
15621 | 41 | if (EltSizeInBits != ExtSrcSizeInBits) |
15622 | 20 | return SDValue(); |
15623 | 21 | |
15624 | 21 | // We can remove *extend_vector_inreg only if the truncation happens at |
15625 | 21 | // the same scale as the extension. |
15626 | 21 | if (21 isTruncate(ExtScale)21 ) |
15627 | 17 | return DAG.getBitcast(VT, N00); |
15628 | 4 | |
15629 | 4 | return SDValue(); |
15630 | 4 | } |
15631 | | |
15632 | | // Combine shuffles of splat-shuffles of the form: |
15633 | | // shuffle (shuffle V, undef, splat-mask), undef, M |
15634 | | // If splat-mask contains undef elements, we need to be careful about |
15635 | | // introducing undef's in the folded mask which are not the result of composing |
15636 | | // the masks of the shuffles. |
15637 | | static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask, |
15638 | | ShuffleVectorSDNode *Splat, |
15639 | 23 | SelectionDAG &DAG) { |
15640 | 23 | ArrayRef<int> SplatMask = Splat->getMask(); |
15641 | 23 | assert(UserMask.size() == SplatMask.size() && "Mask length mismatch"); |
15642 | 23 | |
15643 | 23 | // Prefer simplifying to the splat-shuffle, if possible. This is legal if |
15644 | 23 | // every undef mask element in the splat-shuffle has a corresponding undef |
15645 | 23 | // element in the user-shuffle's mask or if the composition of mask elements |
15646 | 23 | // would result in undef. |
15647 | 23 | // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask): |
15648 | 23 | // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u] |
15649 | 23 | // In this case it is not legal to simplify to the splat-shuffle because we |
15650 | 23 | // may be exposing the users of the shuffle an undef element at index 1 |
15651 | 23 | // which was not there before the combine. |
15652 | 23 | // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u] |
15653 | 23 | // In this case the composition of masks yields SplatMask, so it's ok to |
15654 | 23 | // simplify to the splat-shuffle. |
15655 | 23 | // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u] |
15656 | 23 | // In this case the composed mask includes all undef elements of SplatMask |
15657 | 23 | // and in addition sets element zero to undef. It is safe to simplify to |
15658 | 23 | // the splat-shuffle. |
15659 | 23 | auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask, |
15660 | 23 | ArrayRef<int> SplatMask) { |
15661 | 174 | for (unsigned i = 0, e = UserMask.size(); i != e174 ; ++i151 ) |
15662 | 158 | if (158 UserMask[i] != -1 && 158 SplatMask[i] == -153 && |
15663 | 13 | SplatMask[UserMask[i]] != -1) |
15664 | 7 | return false; |
15665 | 16 | return true; |
15666 | 23 | }; |
15667 | 23 | if (CanSimplifyToExistingSplat(UserMask, SplatMask)) |
15668 | 16 | return SDValue(Splat, 0); |
15669 | 7 | |
15670 | 7 | // Create a new shuffle with a mask that is composed of the two shuffles' |
15671 | 7 | // masks. |
15672 | 7 | SmallVector<int, 32> NewMask; |
15673 | 7 | for (int Idx : UserMask) |
15674 | 28 | NewMask.push_back(Idx == -1 ? 28 -19 : SplatMask[Idx]19 ); |
15675 | 23 | |
15676 | 23 | return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat), |
15677 | 23 | Splat->getOperand(0), Splat->getOperand(1), |
15678 | 23 | NewMask); |
15679 | 23 | } |
15680 | | |
15681 | 53.1k | SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { |
15682 | 53.1k | EVT VT = N->getValueType(0); |
15683 | 53.1k | unsigned NumElts = VT.getVectorNumElements(); |
15684 | 53.1k | |
15685 | 53.1k | SDValue N0 = N->getOperand(0); |
15686 | 53.1k | SDValue N1 = N->getOperand(1); |
15687 | 53.1k | |
15688 | 53.1k | assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); |
15689 | 53.1k | |
15690 | 53.1k | // Canonicalize shuffle undef, undef -> undef |
15691 | 53.1k | if (N0.isUndef() && 53.1k N1.isUndef()1 ) |
15692 | 0 | return DAG.getUNDEF(VT); |
15693 | 53.1k | |
15694 | 53.1k | ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); |
15695 | 53.1k | |
15696 | 53.1k | // Canonicalize shuffle v, v -> v, undef |
15697 | 53.1k | if (N0 == N153.1k ) { |
15698 | 23 | SmallVector<int, 8> NewMask; |
15699 | 161 | for (unsigned i = 0; i != NumElts161 ; ++i138 ) { |
15700 | 138 | int Idx = SVN->getMaskElt(i); |
15701 | 138 | if (Idx >= (int)NumElts138 ) Idx -= NumElts57 ; |
15702 | 138 | NewMask.push_back(Idx); |
15703 | 138 | } |
15704 | 23 | return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask); |
15705 | 23 | } |
15706 | 53.1k | |
15707 | 53.1k | // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. |
15708 | 53.1k | if (53.1k N0.isUndef()53.1k ) |
15709 | 1 | return DAG.getCommutedVectorShuffle(*SVN); |
15710 | 53.1k | |
15711 | 53.1k | // Remove references to rhs if it is undef |
15712 | 53.1k | if (53.1k N1.isUndef()53.1k ) { |
15713 | 31.5k | bool Changed = false; |
15714 | 31.5k | SmallVector<int, 8> NewMask; |
15715 | 301k | for (unsigned i = 0; i != NumElts301k ; ++i269k ) { |
15716 | 269k | int Idx = SVN->getMaskElt(i); |
15717 | 269k | if (Idx >= (int)NumElts269k ) { |
15718 | 7 | Idx = -1; |
15719 | 7 | Changed = true; |
15720 | 7 | } |
15721 | 269k | NewMask.push_back(Idx); |
15722 | 269k | } |
15723 | 31.5k | if (Changed) |
15724 | 4 | return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask); |
15725 | 53.1k | } |
15726 | 53.1k | |
15727 | 53.1k | // Simplify shuffle mask if a referenced element is UNDEF. |
15728 | 53.1k | if (SDValue 53.1k V53.1k = simplifyShuffleMask(SVN, N0, N1, DAG)) |
15729 | 28 | return V; |
15730 | 53.1k | |
15731 | 53.1k | // A shuffle of a single vector that is a splat can always be folded. |
15732 | 53.1k | if (auto *53.1k N0Shuf53.1k = dyn_cast<ShuffleVectorSDNode>(N0)) |
15733 | 3.89k | if (3.89k N1->isUndef() && 3.89k N0Shuf->isSplat()913 ) |
15734 | 23 | return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG); |
15735 | 53.1k | |
15736 | 53.1k | // If it is a splat, check if the argument vector is another splat or a |
15737 | 53.1k | // build_vector. |
15738 | 53.1k | if (53.1k SVN->isSplat() && 53.1k SVN->getSplatIndex() < (int)NumElts14.9k ) { |
15739 | 14.9k | SDNode *V = N0.getNode(); |
15740 | 14.9k | |
15741 | 14.9k | // If this is a bit convert that changes the element type of the vector but |
15742 | 14.9k | // not the number of vector elements, look through it. Be careful not to |
15743 | 14.9k | // look though conversions that change things like v4f32 to v2f64. |
15744 | 14.9k | if (V->getOpcode() == ISD::BITCAST14.9k ) { |
15745 | 365 | SDValue ConvInput = V->getOperand(0); |
15746 | 365 | if (ConvInput.getValueType().isVector() && |
15747 | 315 | ConvInput.getValueType().getVectorNumElements() == NumElts) |
15748 | 50 | V = ConvInput.getNode(); |
15749 | 365 | } |
15750 | 14.9k | |
15751 | 14.9k | if (V->getOpcode() == ISD::BUILD_VECTOR14.9k ) { |
15752 | 5.60k | assert(V->getNumOperands() == NumElts && |
15753 | 5.60k | "BUILD_VECTOR has wrong number of operands"); |
15754 | 5.60k | SDValue Base; |
15755 | 5.60k | bool AllSame = true; |
15756 | 5.65k | for (unsigned i = 0; i != NumElts5.65k ; ++i43 ) { |
15757 | 5.65k | if (!V->getOperand(i).isUndef()5.65k ) { |
15758 | 5.60k | Base = V->getOperand(i); |
15759 | 5.60k | break; |
15760 | 5.60k | } |
15761 | 5.65k | } |
15762 | 5.60k | // Splat of <u, u, u, u>, return <u, u, u, u> |
15763 | 5.60k | if (!Base.getNode()) |
15764 | 0 | return N0; |
15765 | 11.2k | for (unsigned i = 0; 5.60k i != NumElts11.2k ; ++i5.60k ) { |
15766 | 11.2k | if (V->getOperand(i) != Base11.2k ) { |
15767 | 5.60k | AllSame = false; |
15768 | 5.60k | break; |
15769 | 5.60k | } |
15770 | 11.2k | } |
15771 | 5.60k | // Splat of <x, x, x, x>, return <x, x, x, x> |
15772 | 5.60k | if (AllSame) |
15773 | 5 | return N0; |
15774 | 5.60k | |
15775 | 5.60k | // Canonicalize any other splat as a build_vector. |
15776 | 5.60k | const SDValue &Splatted = V->getOperand(SVN->getSplatIndex()); |
15777 | 5.60k | SmallVector<SDValue, 8> Ops(NumElts, Splatted); |
15778 | 5.60k | SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops); |
15779 | 5.60k | |
15780 | 5.60k | // We may have jumped through bitcasts, so the type of the |
15781 | 5.60k | // BUILD_VECTOR may not match the type of the shuffle. |
15782 | 5.60k | if (V->getValueType(0) != VT) |
15783 | 0 | NewBV = DAG.getBitcast(VT, NewBV); |
15784 | 5.60k | return NewBV; |
15785 | 5.60k | } |
15786 | 14.9k | } |
15787 | 47.5k | |
15788 | 47.5k | // There are various patterns used to build up a vector from smaller vectors, |
15789 | 47.5k | // subvectors, or elements. Scan chains of these and replace unused insertions |
15790 | 47.5k | // or components with undef. |
15791 | 47.5k | if (SDValue 47.5k S47.5k = simplifyShuffleOperands(SVN, N0, N1, DAG)) |
15792 | 83 | return S; |
15793 | 47.4k | |
15794 | 47.4k | // Match shuffles that can be converted to any_vector_extend_in_reg. |
15795 | 47.4k | if (SDValue 47.4k V47.4k = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes)) |
15796 | 215 | return V; |
15797 | 47.2k | |
15798 | 47.2k | // Combine "truncate_vector_in_reg" style shuffles. |
15799 | 47.2k | if (SDValue 47.2k V47.2k = combineTruncationShuffle(SVN, DAG)) |
15800 | 17 | return V; |
15801 | 47.1k | |
15802 | 47.1k | if (47.1k N0.getOpcode() == ISD::CONCAT_VECTORS && |
15803 | 2.82k | Level < AfterLegalizeVectorOps && |
15804 | 2.61k | (N1.isUndef() || |
15805 | 1.70k | (N1.getOpcode() == ISD::CONCAT_VECTORS && |
15806 | 47.1k | N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()1.21k ))) { |
15807 | 2.12k | if (SDValue V = partitionShuffleOfConcats(N, DAG)) |
15808 | 78 | return V; |
15809 | 47.1k | } |
15810 | 47.1k | |
15811 | 47.1k | // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - |
15812 | 47.1k | // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. |
15813 | 47.1k | if (47.1k Level < AfterLegalizeVectorOps && 47.1k TLI.isTypeLegal(VT)34.4k ) |
15814 | 32.5k | if (SDValue 32.5k Res32.5k = combineShuffleOfScalars(SVN, DAG, TLI)) |
15815 | 354 | return Res; |
15816 | 46.7k | |
15817 | 46.7k | // If this shuffle only has a single input that is a bitcasted shuffle, |
15818 | 46.7k | // attempt to merge the 2 shuffles and suitably bitcast the inputs/output |
15819 | 46.7k | // back to their original types. |
15820 | 46.7k | if (46.7k N0.getOpcode() == ISD::BITCAST && 46.7k N0.hasOneUse()9.74k && |
15821 | 46.7k | N1.isUndef()9.17k && Level < AfterLegalizeVectorOps4.58k && |
15822 | 46.7k | TLI.isTypeLegal(VT)1.91k ) { |
15823 | 1.88k | |
15824 | 1.88k | // Peek through the bitcast only if there is one user. |
15825 | 1.88k | SDValue BC0 = N0; |
15826 | 3.76k | while (BC0.getOpcode() == ISD::BITCAST3.76k ) { |
15827 | 1.88k | if (!BC0.hasOneUse()) |
15828 | 0 | break; |
15829 | 1.88k | BC0 = BC0.getOperand(0); |
15830 | 1.88k | } |
15831 | 1.88k | |
15832 | 164 | auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) { |
15833 | 164 | if (Scale == 1) |
15834 | 82 | return SmallVector<int, 8>(Mask.begin(), Mask.end()); |
15835 | 82 | |
15836 | 82 | SmallVector<int, 8> NewMask; |
15837 | 82 | for (int M : Mask) |
15838 | 1.29k | for (int s = 0; 350 s != Scale1.29k ; ++s944 ) |
15839 | 944 | NewMask.push_back(M < 0 ? 944 -1128 : Scale * M + s816 ); |
15840 | 164 | return NewMask; |
15841 | 164 | }; |
15842 | 1.88k | |
15843 | 1.88k | if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && 1.88k BC0.hasOneUse()86 ) { |
15844 | 82 | EVT SVT = VT.getScalarType(); |
15845 | 82 | EVT InnerVT = BC0->getValueType(0); |
15846 | 82 | EVT InnerSVT = InnerVT.getScalarType(); |
15847 | 82 | |
15848 | 82 | // Determine which shuffle works with the smaller scalar type. |
15849 | 82 | EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT57 : InnerVT25 ; |
15850 | 82 | EVT ScaleSVT = ScaleVT.getScalarType(); |
15851 | 82 | |
15852 | 82 | if (TLI.isTypeLegal(ScaleVT) && |
15853 | 82 | 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) && |
15854 | 82 | 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())82 ) { |
15855 | 82 | int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits(); |
15856 | 82 | int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits(); |
15857 | 82 | |
15858 | 82 | // Scale the shuffle masks to the smaller scalar type. |
15859 | 82 | ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0); |
15860 | 82 | SmallVector<int, 8> InnerMask = |
15861 | 82 | ScaleShuffleMask(InnerSVN->getMask(), InnerScale); |
15862 | 82 | SmallVector<int, 8> OuterMask = |
15863 | 82 | ScaleShuffleMask(SVN->getMask(), OuterScale); |
15864 | 82 | |
15865 | 82 | // Merge the shuffle masks. |
15866 | 82 | SmallVector<int, 8> NewMask; |
15867 | 82 | for (int M : OuterMask) |
15868 | 944 | NewMask.push_back(M < 0 ? 944 -1170 : InnerMask[M]774 ); |
15869 | 82 | |
15870 | 82 | // Test for shuffle mask legality over both commutations. |
15871 | 82 | SDValue SV0 = BC0->getOperand(0); |
15872 | 82 | SDValue SV1 = BC0->getOperand(1); |
15873 | 82 | bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); |
15874 | 82 | if (!LegalMask82 ) { |
15875 | 2 | std::swap(SV0, SV1); |
15876 | 2 | ShuffleVectorSDNode::commuteMask(NewMask); |
15877 | 2 | LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); |
15878 | 2 | } |
15879 | 82 | |
15880 | 82 | if (LegalMask82 ) { |
15881 | 80 | SV0 = DAG.getBitcast(ScaleVT, SV0); |
15882 | 80 | SV1 = DAG.getBitcast(ScaleVT, SV1); |
15883 | 80 | return DAG.getBitcast( |
15884 | 80 | VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask)); |
15885 | 80 | } |
15886 | 46.6k | } |
15887 | 82 | } |
15888 | 1.88k | } |
15889 | 46.6k | |
15890 | 46.6k | // Canonicalize shuffles according to rules: |
15891 | 46.6k | // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) |
15892 | 46.6k | // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) |
15893 | 46.6k | // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B) |
15894 | 46.6k | if (46.6k N1.getOpcode() == ISD::VECTOR_SHUFFLE && |
15895 | 46.6k | N0.getOpcode() != ISD::VECTOR_SHUFFLE1.68k && Level < AfterLegalizeDAG782 && |
15896 | 46.6k | TLI.isTypeLegal(VT)766 ) { |
15897 | 765 | // The incoming shuffle must be of the same type as the result of the |
15898 | 765 | // current shuffle. |
15899 | 765 | assert(N1->getOperand(0).getValueType() == VT && |
15900 | 765 | "Shuffle types don't match"); |
15901 | 765 | |
15902 | 765 | SDValue SV0 = N1->getOperand(0); |
15903 | 765 | SDValue SV1 = N1->getOperand(1); |
15904 | 765 | bool HasSameOp0 = N0 == SV0; |
15905 | 765 | bool IsSV1Undef = SV1.isUndef(); |
15906 | 765 | if (HasSameOp0 || 765 IsSV1Undef715 || N0 == SV1163 ) |
15907 | 765 | // Commute the operands of this shuffle so that next rule |
15908 | 765 | // will trigger. |
15909 | 602 | return DAG.getCommutedVectorShuffle(*SVN); |
15910 | 46.0k | } |
15911 | 46.0k | |
15912 | 46.0k | // Try to fold according to rules: |
15913 | 46.0k | // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) |
15914 | 46.0k | // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) |
15915 | 46.0k | // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) |
15916 | 46.0k | // Don't try to fold shuffles with illegal type. |
15917 | 46.0k | // Only fold if this shuffle is the only user of the other shuffle. |
15918 | 46.0k | if (46.0k N0.getOpcode() == ISD::VECTOR_SHUFFLE && 46.0k N->isOnlyUserOf(N0.getNode())3.86k && |
15919 | 46.0k | Level < AfterLegalizeDAG3.07k && TLI.isTypeLegal(VT)3.02k ) { |
15920 | 2.94k | ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); |
15921 | 2.94k | |
15922 | 2.94k | // Don't try to fold splats; they're likely to simplify somehow, or they |
15923 | 2.94k | // might be free. |
15924 | 2.94k | if (OtherSV->isSplat()) |
15925 | 45 | return SDValue(); |
15926 | 2.90k | |
15927 | 2.90k | // The incoming shuffle must be of the same type as the result of the |
15928 | 2.90k | // current shuffle. |
15929 | 2.94k | assert(OtherSV->getOperand(0).getValueType() == VT && |
15930 | 2.90k | "Shuffle types don't match"); |
15931 | 2.90k | |
15932 | 2.90k | SDValue SV0, SV1; |
15933 | 2.90k | SmallVector<int, 4> Mask; |
15934 | 2.90k | // Compute the combined shuffle mask for a shuffle with SV0 as the first |
15935 | 2.90k | // operand, and SV1 as the second operand. |
15936 | 32.4k | for (unsigned i = 0; i != NumElts32.4k ; ++i29.5k ) { |
15937 | 30.1k | int Idx = SVN->getMaskElt(i); |
15938 | 30.1k | if (Idx < 030.1k ) { |
15939 | 4.06k | // Propagate Undef. |
15940 | 4.06k | Mask.push_back(Idx); |
15941 | 4.06k | continue; |
15942 | 4.06k | } |
15943 | 26.0k | |
15944 | 26.0k | SDValue CurrentVec; |
15945 | 26.0k | if (Idx < (int)NumElts26.0k ) { |
15946 | 12.0k | // This shuffle index refers to the inner shuffle N0. Lookup the inner |
15947 | 12.0k | // shuffle mask to identify which vector is actually referenced. |
15948 | 12.0k | Idx = OtherSV->getMaskElt(Idx); |
15949 | 12.0k | if (Idx < 012.0k ) { |
15950 | 183 | // Propagate Undef. |
15951 | 183 | Mask.push_back(Idx); |
15952 | 183 | continue; |
15953 | 183 | } |
15954 | 11.8k | |
15955 | 11.8k | CurrentVec = (Idx < (int) NumElts) ? 11.8k OtherSV->getOperand(0)9.74k |
15956 | 2.08k | : OtherSV->getOperand(1); |
15957 | 26.0k | } else { |
15958 | 14.0k | // This shuffle index references an element within N1. |
15959 | 14.0k | CurrentVec = N1; |
15960 | 14.0k | } |
15961 | 26.0k | |
15962 | 26.0k | // Simple case where 'CurrentVec' is UNDEF. |
15963 | 25.8k | if (25.8k CurrentVec.isUndef()25.8k ) { |
15964 | 0 | Mask.push_back(-1); |
15965 | 0 | continue; |
15966 | 0 | } |
15967 | 25.8k | |
15968 | 25.8k | // Canonicalize the shuffle index. We don't know yet if CurrentVec |
15969 | 25.8k | // will be the first or second operand of the combined shuffle. |
15970 | 25.8k | Idx = Idx % NumElts; |
15971 | 25.8k | if (!SV0.getNode() || 25.8k SV0 == CurrentVec22.9k ) { |
15972 | 10.5k | // Ok. CurrentVec is the left hand side. |
15973 | 10.5k | // Update the mask accordingly. |
15974 | 10.5k | SV0 = CurrentVec; |
15975 | 10.5k | Mask.push_back(Idx); |
15976 | 10.5k | continue; |
15977 | 10.5k | } |
15978 | 15.3k | |
15979 | 15.3k | // Bail out if we cannot convert the shuffle pair into a single shuffle. |
15980 | 15.3k | if (15.3k SV1.getNode() && 15.3k SV1 != CurrentVec13.1k ) |
15981 | 593 | return SDValue(); |
15982 | 14.7k | |
15983 | 14.7k | // Ok. CurrentVec is the right hand side. |
15984 | 14.7k | // Update the mask accordingly. |
15985 | 14.7k | SV1 = CurrentVec; |
15986 | 14.7k | Mask.push_back(Idx + NumElts); |
15987 | 14.7k | } |
15988 | 2.90k | |
15989 | 2.90k | // Check if all indices in Mask are Undef. In case, propagate Undef. |
15990 | 2.31k | bool isUndefMask = true; |
15991 | 4.73k | for (unsigned i = 0; i != NumElts && 4.73k isUndefMask4.72k ; ++i2.42k ) |
15992 | 2.42k | isUndefMask &= Mask[i] < 0; |
15993 | 2.31k | |
15994 | 2.31k | if (isUndefMask) |
15995 | 6 | return DAG.getUNDEF(VT); |
15996 | 2.30k | |
15997 | 2.30k | if (2.30k !SV0.getNode()2.30k ) |
15998 | 0 | SV0 = DAG.getUNDEF(VT); |
15999 | 2.30k | if (!SV1.getNode()) |
16000 | 724 | SV1 = DAG.getUNDEF(VT); |
16001 | 2.30k | |
16002 | 2.30k | // Avoid introducing shuffles with illegal mask. |
16003 | 2.30k | if (!TLI.isShuffleMaskLegal(Mask, VT)2.30k ) { |
16004 | 0 | ShuffleVectorSDNode::commuteMask(Mask); |
16005 | 0 |
|
16006 | 0 | if (!TLI.isShuffleMaskLegal(Mask, VT)) |
16007 | 0 | return SDValue(); |
16008 | 0 |
|
16009 | 0 | // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) |
16010 | 0 | // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) |
16011 | 0 | // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) |
16012 | 0 | std::swap(SV0, SV1); |
16013 | 0 | } |
16014 | 2.30k | |
16015 | 2.30k | // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) |
16016 | 2.30k | // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) |
16017 | 2.30k | // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) |
16018 | 2.30k | return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask); |
16019 | 43.1k | } |
16020 | 43.1k | |
16021 | 43.1k | return SDValue(); |
16022 | 43.1k | } |
16023 | | |
16024 | 35.7k | SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { |
16025 | 35.7k | SDValue InVal = N->getOperand(0); |
16026 | 35.7k | EVT VT = N->getValueType(0); |
16027 | 35.7k | |
16028 | 35.7k | // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern |
16029 | 35.7k | // with a VECTOR_SHUFFLE and possible truncate. |
16030 | 35.7k | if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT35.7k ) { |
16031 | 15.5k | SDValue InVec = InVal->getOperand(0); |
16032 | 15.5k | SDValue EltNo = InVal->getOperand(1); |
16033 | 15.5k | auto InVecT = InVec.getValueType(); |
16034 | 15.5k | if (ConstantSDNode *C015.5k = dyn_cast<ConstantSDNode>(EltNo)) { |
16035 | 15.5k | SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1); |
16036 | 15.5k | int Elt = C0->getZExtValue(); |
16037 | 15.5k | NewMask[0] = Elt; |
16038 | 15.5k | SDValue Val; |
16039 | 15.5k | // If we have an implict truncate do truncate here as long as it's legal. |
16040 | 15.5k | // if it's not legal, this should |
16041 | 15.5k | if (VT.getScalarType() != InVal.getValueType() && |
16042 | 30 | InVal.getValueType().isScalarInteger() && |
16043 | 15.5k | isTypeLegal(VT.getScalarType())30 ) { |
16044 | 0 | Val = |
16045 | 0 | DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal); |
16046 | 0 | return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val); |
16047 | 0 | } |
16048 | 15.5k | if (15.5k VT.getScalarType() == InVecT.getScalarType() && |
16049 | 383 | VT.getVectorNumElements() <= InVecT.getVectorNumElements() && |
16050 | 15.5k | TLI.isShuffleMaskLegal(NewMask, VT)383 ) { |
16051 | 383 | Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec, |
16052 | 383 | DAG.getUNDEF(InVecT), NewMask); |
16053 | 383 | // If the initial vector is the correct size this shuffle is a |
16054 | 383 | // valid result. |
16055 | 383 | if (VT == InVecT) |
16056 | 248 | return Val; |
16057 | 135 | // If not we must truncate the vector. |
16058 | 135 | if (135 VT.getVectorNumElements() != InVecT.getVectorNumElements()135 ) { |
16059 | 135 | MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); |
16060 | 135 | SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy); |
16061 | 135 | EVT SubVT = |
16062 | 135 | EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(), |
16063 | 135 | VT.getVectorNumElements()); |
16064 | 135 | Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val, |
16065 | 135 | ZeroIdx); |
16066 | 135 | return Val; |
16067 | 135 | } |
16068 | 35.3k | } |
16069 | 15.5k | } |
16070 | 15.5k | } |
16071 | 35.3k | |
16072 | 35.3k | return SDValue(); |
16073 | 35.3k | } |
16074 | | |
16075 | 43.1k | SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { |
16076 | 43.1k | EVT VT = N->getValueType(0); |
16077 | 43.1k | SDValue N0 = N->getOperand(0); |
16078 | 43.1k | SDValue N1 = N->getOperand(1); |
16079 | 43.1k | SDValue N2 = N->getOperand(2); |
16080 | 43.1k | |
16081 | 43.1k | // If inserting an UNDEF, just return the original vector. |
16082 | 43.1k | if (N1.isUndef()) |
16083 | 28 | return N0; |
16084 | 43.0k | |
16085 | 43.0k | // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow |
16086 | 43.0k | // us to pull BITCASTs from input to output. |
16087 | 43.0k | if (43.0k N0.hasOneUse() && 43.0k N0->getOpcode() == ISD::INSERT_SUBVECTOR16.1k ) |
16088 | 4.32k | if (SDValue 4.32k NN04.32k = visitINSERT_SUBVECTOR(N0.getNode())) |
16089 | 373 | return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2); |
16090 | 42.7k | |
16091 | 42.7k | // If this is an insert of an extracted vector into an undef vector, we can |
16092 | 42.7k | // just use the input to the extract. |
16093 | 42.7k | if (42.7k N0.isUndef() && 42.7k N1.getOpcode() == ISD::EXTRACT_SUBVECTOR36.7k && |
16094 | 42.7k | N1.getOperand(1) == N24.56k && N1.getOperand(0).getValueType() == VT383 ) |
16095 | 376 | return N1.getOperand(0); |
16096 | 42.3k | |
16097 | 42.3k | // If we are inserting a bitcast value into an undef, with the same |
16098 | 42.3k | // number of elements, just use the bitcast input of the extract. |
16099 | 42.3k | // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 -> |
16100 | 42.3k | // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2) |
16101 | 42.3k | if (42.3k N0.isUndef() && 42.3k N1.getOpcode() == ISD::BITCAST36.3k && |
16102 | 4.03k | N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR && |
16103 | 145 | N1.getOperand(0).getOperand(1) == N2 && |
16104 | 142 | N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() == |
16105 | 42.3k | VT.getVectorNumElements()) { |
16106 | 3 | return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0)); |
16107 | 3 | } |
16108 | 42.3k | |
16109 | 42.3k | // If both N1 and N2 are bitcast values on which insert_subvector |
16110 | 42.3k | // would makes sense, pull the bitcast through. |
16111 | 42.3k | // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 -> |
16112 | 42.3k | // BITCAST (INSERT_SUBVECTOR N0 N1 N2) |
16113 | 42.3k | if (42.3k N0.getOpcode() == ISD::BITCAST && 42.3k N1.getOpcode() == ISD::BITCAST482 ) { |
16114 | 148 | SDValue CN0 = N0.getOperand(0); |
16115 | 148 | SDValue CN1 = N1.getOperand(0); |
16116 | 148 | if (CN0.getValueType().getVectorElementType() == |
16117 | 148 | CN1.getValueType().getVectorElementType() && |
16118 | 89 | CN0.getValueType().getVectorNumElements() == |
16119 | 148 | VT.getVectorNumElements()) { |
16120 | 3 | SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), |
16121 | 3 | CN0.getValueType(), CN0, CN1, N2); |
16122 | 3 | return DAG.getBitcast(VT, NewINSERT); |
16123 | 3 | } |
16124 | 42.3k | } |
16125 | 42.3k | |
16126 | 42.3k | // Combine INSERT_SUBVECTORs where we are inserting to the same index. |
16127 | 42.3k | // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx ) |
16128 | 42.3k | // --> INSERT_SUBVECTOR( Vec, SubNew, Idx ) |
16129 | 42.3k | if (42.3k N0.getOpcode() == ISD::INSERT_SUBVECTOR && |
16130 | 4.10k | N0.getOperand(1).getValueType() == N1.getValueType() && |
16131 | 4.10k | N0.getOperand(2) == N2) |
16132 | 42 | return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0), |
16133 | 42 | N1, N2); |
16134 | 42.2k | |
16135 | 42.2k | if (42.2k !isa<ConstantSDNode>(N2)42.2k ) |
16136 | 0 | return SDValue(); |
16137 | 42.2k | |
16138 | 42.2k | unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue(); |
16139 | 42.2k | |
16140 | 42.2k | // Canonicalize insert_subvector dag nodes. |
16141 | 42.2k | // Example: |
16142 | 42.2k | // (insert_subvector (insert_subvector A, Idx0), Idx1) |
16143 | 42.2k | // -> (insert_subvector (insert_subvector A, Idx1), Idx0) |
16144 | 42.2k | if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && 42.2k N0.hasOneUse()4.06k && |
16145 | 3.94k | N1.getValueType() == N0.getOperand(1).getValueType() && |
16146 | 42.2k | isa<ConstantSDNode>(N0.getOperand(2))3.94k ) { |
16147 | 3.94k | unsigned OtherIdx = N0.getConstantOperandVal(2); |
16148 | 3.94k | if (InsIdx < OtherIdx3.94k ) { |
16149 | 4 | // Swap nodes. |
16150 | 4 | SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, |
16151 | 4 | N0.getOperand(0), N1, N2); |
16152 | 4 | AddToWorklist(NewOp.getNode()); |
16153 | 4 | return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()), |
16154 | 4 | VT, NewOp, N0.getOperand(1), N0.getOperand(2)); |
16155 | 4 | } |
16156 | 42.2k | } |
16157 | 42.2k | |
16158 | 42.2k | // If the input vector is a concatenation, and the insert replaces |
16159 | 42.2k | // one of the pieces, we can optimize into a single concat_vectors. |
16160 | 42.2k | if (42.2k N0.getOpcode() == ISD::CONCAT_VECTORS && 42.2k N0.hasOneUse()0 && |
16161 | 42.2k | N0.getOperand(0).getValueType() == N1.getValueType()0 ) { |
16162 | 0 | unsigned Factor = N1.getValueType().getVectorNumElements(); |
16163 | 0 |
|
16164 | 0 | SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end()); |
16165 | 0 | Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1; |
16166 | 0 |
|
16167 | 0 | return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); |
16168 | 0 | } |
16169 | 42.2k | |
16170 | 42.2k | return SDValue(); |
16171 | 42.2k | } |
16172 | | |
16173 | 3.38k | SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) { |
16174 | 3.38k | SDValue N0 = N->getOperand(0); |
16175 | 3.38k | |
16176 | 3.38k | // fold (fp_to_fp16 (fp16_to_fp op)) -> op |
16177 | 3.38k | if (N0->getOpcode() == ISD::FP16_TO_FP) |
16178 | 880 | return N0->getOperand(0); |
16179 | 2.50k | |
16180 | 2.50k | return SDValue(); |
16181 | 2.50k | } |
16182 | | |
16183 | 4.21k | SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { |
16184 | 4.21k | SDValue N0 = N->getOperand(0); |
16185 | 4.21k | |
16186 | 4.21k | // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) |
16187 | 4.21k | if (N0->getOpcode() == ISD::AND4.21k ) { |
16188 | 446 | ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1)); |
16189 | 446 | if (AndConst && 446 AndConst->getAPIntValue() == 0xffff446 ) { |
16190 | 432 | return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), |
16191 | 432 | N0.getOperand(0)); |
16192 | 432 | } |
16193 | 3.78k | } |
16194 | 3.78k | |
16195 | 3.78k | return SDValue(); |
16196 | 3.78k | } |
16197 | | |
16198 | | /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle |
16199 | | /// with the destination vector and a zero vector. |
16200 | | /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> |
16201 | | /// vector_shuffle V, Zero, <0, 4, 2, 4> |
16202 | 339k | SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { |
16203 | 339k | EVT VT = N->getValueType(0); |
16204 | 339k | SDValue LHS = N->getOperand(0); |
16205 | 339k | SDValue RHS = peekThroughBitcast(N->getOperand(1)); |
16206 | 339k | SDLoc DL(N); |
16207 | 339k | |
16208 | 339k | // Make sure we're not running after operation legalization where it |
16209 | 339k | // may have custom lowered the vector shuffles. |
16210 | 339k | if (LegalOperations) |
16211 | 193k | return SDValue(); |
16212 | 145k | |
16213 | 145k | if (145k N->getOpcode() != ISD::AND145k ) |
16214 | 123k | return SDValue(); |
16215 | 21.9k | |
16216 | 21.9k | if (21.9k RHS.getOpcode() != ISD::BUILD_VECTOR21.9k ) |
16217 | 5.91k | return SDValue(); |
16218 | 16.0k | |
16219 | 16.0k | EVT RVT = RHS.getValueType(); |
16220 | 16.0k | unsigned NumElts = RHS.getNumOperands(); |
16221 | 16.0k | |
16222 | 16.0k | // Attempt to create a valid clear mask, splitting the mask into |
16223 | 16.0k | // sub elements and checking to see if each is |
16224 | 16.0k | // all zeros or all ones - suitable for shuffle masking. |
16225 | 46.0k | auto BuildClearMask = [&](int Split) { |
16226 | 46.0k | int NumSubElts = NumElts * Split; |
16227 | 46.0k | int NumSubBits = RVT.getScalarSizeInBits() / Split; |
16228 | 46.0k | |
16229 | 46.0k | SmallVector<int, 8> Indices; |
16230 | 183k | for (int i = 0; i != NumSubElts183k ; ++i137k ) { |
16231 | 165k | int EltIdx = i / Split; |
16232 | 165k | int SubIdx = i % Split; |
16233 | 165k | SDValue Elt = RHS.getOperand(EltIdx); |
16234 | 165k | if (Elt.isUndef()165k ) { |
16235 | 28 | Indices.push_back(-1); |
16236 | 28 | continue; |
16237 | 28 | } |
16238 | 165k | |
16239 | 165k | APInt Bits; |
16240 | 165k | if (isa<ConstantSDNode>(Elt)) |
16241 | 164k | Bits = cast<ConstantSDNode>(Elt)->getAPIntValue(); |
16242 | 822 | else if (822 isa<ConstantFPSDNode>(Elt)822 ) |
16243 | 154 | Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt(); |
16244 | 822 | else |
16245 | 668 | return SDValue(); |
16246 | 165k | |
16247 | 165k | // Extract the sub element from the constant bit mask. |
16248 | 165k | if (165k DAG.getDataLayout().isBigEndian()165k ) { |
16249 | 469 | Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits); |
16250 | 165k | } else { |
16251 | 164k | Bits.lshrInPlace(SubIdx * NumSubBits); |
16252 | 164k | } |
16253 | 165k | |
16254 | 165k | if (Split > 1) |
16255 | 147k | Bits = Bits.trunc(NumSubBits); |
16256 | 165k | |
16257 | 165k | if (Bits.isAllOnesValue()) |
16258 | 51.2k | Indices.push_back(i); |
16259 | 113k | else if (113k Bits == 0113k ) |
16260 | 86.2k | Indices.push_back(i + NumSubElts); |
16261 | 113k | else |
16262 | 27.5k | return SDValue(); |
16263 | 165k | } |
16264 | 46.0k | |
16265 | 46.0k | // Let's see if the target supports this vector_shuffle. |
16266 | 17.7k | EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits); |
16267 | 17.7k | EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts); |
16268 | 17.7k | if (!TLI.isVectorClearMaskLegal(Indices, ClearVT)) |
16269 | 16.8k | return SDValue(); |
16270 | 970 | |
16271 | 970 | SDValue Zero = DAG.getConstant(0, DL, ClearVT); |
16272 | 970 | return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL, |
16273 | 970 | DAG.getBitcast(ClearVT, LHS), |
16274 | 970 | Zero, Indices)); |
16275 | 970 | }; |
16276 | 16.0k | |
16277 | 16.0k | // Determine maximum split level (byte level masking). |
16278 | 16.0k | int MaxSplit = 1; |
16279 | 16.0k | if (RVT.getScalarSizeInBits() % 8 == 0) |
16280 | 15.9k | MaxSplit = RVT.getScalarSizeInBits() / 8; |
16281 | 16.0k | |
16282 | 77.3k | for (int Split = 1; Split <= MaxSplit77.3k ; ++Split61.3k ) |
16283 | 62.3k | if (62.3k RVT.getScalarSizeInBits() % Split == 062.3k ) |
16284 | 46.0k | if (SDValue 46.0k S46.0k = BuildClearMask(Split)) |
16285 | 970 | return S; |
16286 | 16.0k | |
16287 | 15.0k | return SDValue(); |
16288 | 339k | } |
16289 | | |
16290 | | /// Visit a binary vector operation, like ADD. |
16291 | 339k | SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { |
16292 | 339k | assert(N->getValueType(0).isVector() && |
16293 | 339k | "SimplifyVBinOp only works on vectors!"); |
16294 | 339k | |
16295 | 339k | SDValue LHS = N->getOperand(0); |
16296 | 339k | SDValue RHS = N->getOperand(1); |
16297 | 339k | SDValue Ops[] = {LHS, RHS}; |
16298 | 339k | |
16299 | 339k | // See if we can constant fold the vector operation. |
16300 | 339k | if (SDValue Fold = DAG.FoldConstantVectorArithmetic( |
16301 | 339k | N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags())) |
16302 | 281 | return Fold; |
16303 | 339k | |
16304 | 339k | // Try to convert a constant mask AND into a shuffle clear mask. |
16305 | 339k | if (SDValue 339k Shuffle339k = XformToShuffleWithZero(N)) |
16306 | 970 | return Shuffle; |
16307 | 338k | |
16308 | 338k | // Type legalization might introduce new shuffles in the DAG. |
16309 | 338k | // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask))) |
16310 | 338k | // -> (shuffle (VBinOp (A, B)), Undef, Mask). |
16311 | 338k | if (338k LegalTypes && 338k isa<ShuffleVectorSDNode>(LHS)229k && |
16312 | 338k | isa<ShuffleVectorSDNode>(RHS)842 && LHS.hasOneUse()374 && RHS.hasOneUse()215 && |
16313 | 167 | LHS.getOperand(1).isUndef() && |
16314 | 338k | RHS.getOperand(1).isUndef()30 ) { |
16315 | 30 | ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS); |
16316 | 30 | ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS); |
16317 | 30 | |
16318 | 30 | if (SVN0->getMask().equals(SVN1->getMask())30 ) { |
16319 | 20 | EVT VT = N->getValueType(0); |
16320 | 20 | SDValue UndefVector = LHS.getOperand(1); |
16321 | 20 | SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT, |
16322 | 20 | LHS.getOperand(0), RHS.getOperand(0), |
16323 | 20 | N->getFlags()); |
16324 | 20 | AddUsersToWorklist(N); |
16325 | 20 | return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector, |
16326 | 20 | SVN0->getMask()); |
16327 | 20 | } |
16328 | 338k | } |
16329 | 338k | |
16330 | 338k | return SDValue(); |
16331 | 338k | } |
16332 | | |
16333 | | SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, |
16334 | 38.3k | SDValue N2) { |
16335 | 38.3k | assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); |
16336 | 38.3k | |
16337 | 38.3k | SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, |
16338 | 38.3k | cast<CondCodeSDNode>(N0.getOperand(2))->get()); |
16339 | 38.3k | |
16340 | 38.3k | // If we got a simplified select_cc node back from SimplifySelectCC, then |
16341 | 38.3k | // break it down into a new SETCC node, and a new SELECT node, and then return |
16342 | 38.3k | // the SELECT node, since we were called with a SELECT node. |
16343 | 38.3k | if (SCC.getNode()38.3k ) { |
16344 | 403 | // Check to see if we got a select_cc back (to turn into setcc/select). |
16345 | 403 | // Otherwise, just return whatever node we got back, like fabs. |
16346 | 403 | if (SCC.getOpcode() == ISD::SELECT_CC403 ) { |
16347 | 3 | SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0), |
16348 | 3 | N0.getValueType(), |
16349 | 3 | SCC.getOperand(0), SCC.getOperand(1), |
16350 | 3 | SCC.getOperand(4)); |
16351 | 3 | AddToWorklist(SETCC.getNode()); |
16352 | 3 | return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC, |
16353 | 3 | SCC.getOperand(2), SCC.getOperand(3)); |
16354 | 3 | } |
16355 | 400 | |
16356 | 400 | return SCC; |
16357 | 400 | } |
16358 | 37.9k | return SDValue(); |
16359 | 37.9k | } |
16360 | | |
16361 | | /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values |
16362 | | /// being selected between, see if we can simplify the select. Callers of this |
16363 | | /// should assume that TheSelect is deleted if this returns true. As such, they |
16364 | | /// should return the appropriate thing (e.g. the node) back to the top-level of |
16365 | | /// the DAG combiner loop to avoid it being looked at. |
16366 | | bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, |
16367 | 400k | SDValue RHS) { |
16368 | 400k | // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x)) |
16369 | 400k | // The select + setcc is redundant, because fsqrt returns NaN for X < 0. |
16370 | 400k | if (const ConstantFPSDNode *NaN400k = isConstOrConstSplatFP(LHS)) { |
16371 | 4.99k | if (NaN->isNaN() && 4.99k RHS.getOpcode() == ISD::FSQRT133 ) { |
16372 | 15 | // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?)) |
16373 | 15 | SDValue Sqrt = RHS; |
16374 | 15 | ISD::CondCode CC; |
16375 | 15 | SDValue CmpLHS; |
16376 | 15 | const ConstantFPSDNode *Zero = nullptr; |
16377 | 15 | |
16378 | 15 | if (TheSelect->getOpcode() == ISD::SELECT_CC15 ) { |
16379 | 0 | CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get(); |
16380 | 0 | CmpLHS = TheSelect->getOperand(0); |
16381 | 0 | Zero = isConstOrConstSplatFP(TheSelect->getOperand(1)); |
16382 | 15 | } else { |
16383 | 15 | // SELECT or VSELECT |
16384 | 15 | SDValue Cmp = TheSelect->getOperand(0); |
16385 | 15 | if (Cmp.getOpcode() == ISD::SETCC15 ) { |
16386 | 15 | CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get(); |
16387 | 15 | CmpLHS = Cmp.getOperand(0); |
16388 | 15 | Zero = isConstOrConstSplatFP(Cmp.getOperand(1)); |
16389 | 15 | } |
16390 | 15 | } |
16391 | 15 | if (Zero && 15 Zero->isZero()15 && |
16392 | 15 | Sqrt.getOperand(0) == CmpLHS15 && (CC == ISD::SETOLT || |
16393 | 15 | CC == ISD::SETULT6 || CC == ISD::SETLT0 )) { |
16394 | 15 | // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x)) |
16395 | 15 | CombineTo(TheSelect, Sqrt); |
16396 | 15 | return true; |
16397 | 15 | } |
16398 | 400k | } |
16399 | 4.99k | } |
16400 | 400k | // Cannot simplify select with vector condition |
16401 | 400k | if (400k TheSelect->getOperand(0).getValueType().isVector()400k ) return false29.9k ; |
16402 | 370k | |
16403 | 370k | // If this is a select from two identical things, try to pull the operation |
16404 | 370k | // through the select. |
16405 | 370k | if (370k LHS.getOpcode() != RHS.getOpcode() || |
16406 | 370k | !LHS.hasOneUse()121k || !RHS.hasOneUse()57.2k ) |
16407 | 320k | return false; |
16408 | 49.9k | |
16409 | 49.9k | // If this is a load and the token chain is identical, replace the select |
16410 | 49.9k | // of two loads with a load through a select of the address to load from. |
16411 | 49.9k | // This triggers in things like "select bool X, 10.0, 123.0" after the FP |
16412 | 49.9k | // constants have been dropped into the constant pool. |
16413 | 49.9k | if (49.9k LHS.getOpcode() == ISD::LOAD49.9k ) { |
16414 | 491 | LoadSDNode *LLD = cast<LoadSDNode>(LHS); |
16415 | 491 | LoadSDNode *RLD = cast<LoadSDNode>(RHS); |
16416 | 491 | |
16417 | 491 | // Token chains must be identical. |
16418 | 491 | if (LHS.getOperand(0) != RHS.getOperand(0) || |
16419 | 491 | // Do not let this transformation reduce the number of volatile loads. |
16420 | 491 | LLD->isVolatile()381 || RLD->isVolatile()381 || |
16421 | 491 | // FIXME: If either is a pre/post inc/dec load, |
16422 | 491 | // we'd need to split out the address adjustment. |
16423 | 491 | LLD->isIndexed()381 || RLD->isIndexed()381 || |
16424 | 491 | // If this is an EXTLOAD, the VT's must match. |
16425 | 381 | LLD->getMemoryVT() != RLD->getMemoryVT() || |
16426 | 491 | // If this is an EXTLOAD, the kind of extension must match. |
16427 | 379 | (LLD->getExtensionType() != RLD->getExtensionType() && |
16428 | 379 | // The only exception is if one of the extensions is anyext. |
16429 | 2 | LLD->getExtensionType() != ISD::EXTLOAD && |
16430 | 379 | RLD->getExtensionType() != ISD::EXTLOAD) || |
16431 | 491 | // FIXME: this discards src value information. This is |
16432 | 491 | // over-conservative. It would be beneficial to be able to remember |
16433 | 491 | // both potential memory locations. Since we are discarding |
16434 | 491 | // src value info, don't do the transformation if the memory |
16435 | 491 | // locations are not in the default address space. |
16436 | 377 | LLD->getPointerInfo().getAddrSpace() != 0 || |
16437 | 161 | RLD->getPointerInfo().getAddrSpace() != 0 || |
16438 | 161 | !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(), |
16439 | 161 | LLD->getBasePtr().getValueType())) |
16440 | 330 | return false; |
16441 | 161 | |
16442 | 161 | // Check that the select condition doesn't reach either load. If so, |
16443 | 161 | // folding this will induce a cycle into the DAG. If not, this is safe to |
16444 | 161 | // xform, so create a select of the addresses. |
16445 | 161 | SDValue Addr; |
16446 | 161 | if (TheSelect->getOpcode() == ISD::SELECT161 ) { |
16447 | 161 | SDNode *CondNode = TheSelect->getOperand(0).getNode(); |
16448 | 161 | if ((LLD->hasAnyUseOfValue(1) && 161 LLD->isPredecessorOf(CondNode)86 ) || |
16449 | 159 | (RLD->hasAnyUseOfValue(1) && 159 RLD->isPredecessorOf(CondNode)81 )) |
16450 | 2 | return false; |
16451 | 159 | // The loads must not depend on one another. |
16452 | 159 | if (159 LLD->isPredecessorOf(RLD) || |
16453 | 159 | RLD->isPredecessorOf(LLD)) |
16454 | 0 | return false; |
16455 | 159 | Addr = DAG.getSelect(SDLoc(TheSelect), |
16456 | 159 | LLD->getBasePtr().getValueType(), |
16457 | 159 | TheSelect->getOperand(0), LLD->getBasePtr(), |
16458 | 159 | RLD->getBasePtr()); |
16459 | 161 | } else { // Otherwise SELECT_CC |
16460 | 0 | SDNode *CondLHS = TheSelect->getOperand(0).getNode(); |
16461 | 0 | SDNode *CondRHS = TheSelect->getOperand(1).getNode(); |
16462 | 0 |
|
16463 | 0 | if ((LLD->hasAnyUseOfValue(1) && |
16464 | 0 | (LLD->isPredecessorOf(CondLHS) || 0 LLD->isPredecessorOf(CondRHS)0 )) || |
16465 | 0 | (RLD->hasAnyUseOfValue(1) && |
16466 | 0 | (RLD->isPredecessorOf(CondLHS) || 0 RLD->isPredecessorOf(CondRHS)0 ))) |
16467 | 0 | return false; |
16468 | 0 |
|
16469 | 0 | Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect), |
16470 | 0 | LLD->getBasePtr().getValueType(), |
16471 | 0 | TheSelect->getOperand(0), |
16472 | 0 | TheSelect->getOperand(1), |
16473 | 0 | LLD->getBasePtr(), RLD->getBasePtr(), |
16474 | 0 | TheSelect->getOperand(4)); |
16475 | 0 | } |
16476 | 161 | |
16477 | 159 | SDValue Load; |
16478 | 159 | // It is safe to replace the two loads if they have different alignments, |
16479 | 159 | // but the new load must be the minimum (most restrictive) alignment of the |
16480 | 159 | // inputs. |
16481 | 159 | unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment()); |
16482 | 159 | MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags(); |
16483 | 159 | if (!RLD->isInvariant()) |
16484 | 159 | MMOFlags &= ~MachineMemOperand::MOInvariant; |
16485 | 159 | if (!RLD->isDereferenceable()) |
16486 | 55 | MMOFlags &= ~MachineMemOperand::MODereferenceable; |
16487 | 159 | if (LLD->getExtensionType() == ISD::NON_EXTLOAD159 ) { |
16488 | 158 | // FIXME: Discards pointer and AA info. |
16489 | 158 | Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect), |
16490 | 158 | LLD->getChain(), Addr, MachinePointerInfo(), Alignment, |
16491 | 158 | MMOFlags); |
16492 | 159 | } else { |
16493 | 1 | // FIXME: Discards pointer and AA info. |
16494 | 1 | Load = DAG.getExtLoad( |
16495 | 1 | LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType() |
16496 | 0 | : LLD->getExtensionType(), |
16497 | 1 | SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr, |
16498 | 1 | MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags); |
16499 | 1 | } |
16500 | 159 | |
16501 | 159 | // Users of the select now use the result of the load. |
16502 | 159 | CombineTo(TheSelect, Load); |
16503 | 159 | |
16504 | 159 | // Users of the old loads now use the new load's chain. We know the |
16505 | 159 | // old-load value is dead now. |
16506 | 159 | CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); |
16507 | 159 | CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); |
16508 | 159 | return true; |
16509 | 49.4k | } |
16510 | 49.4k | |
16511 | 49.4k | return false; |
16512 | 49.4k | } |
16513 | | |
16514 | | /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and |
16515 | | /// bitwise 'and'. |
16516 | | SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, |
16517 | | SDValue N1, SDValue N2, SDValue N3, |
16518 | 277k | ISD::CondCode CC) { |
16519 | 277k | // If this is a select where the false operand is zero and the compare is a |
16520 | 277k | // check of the sign bit, see if we can perform the "gzip trick": |
16521 | 277k | // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A |
16522 | 277k | // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A |
16523 | 277k | EVT XType = N0.getValueType(); |
16524 | 277k | EVT AType = N2.getValueType(); |
16525 | 277k | if (!isNullConstant(N3) || 277k !XType.bitsGE(AType)59.1k ) |
16526 | 231k | return SDValue(); |
16527 | 45.9k | |
16528 | 45.9k | // If the comparison is testing for a positive value, we have to invert |
16529 | 45.9k | // the sign bit mask, so only do that transform if the target has a bitwise |
16530 | 45.9k | // 'and not' instruction (the invert is free). |
16531 | 45.9k | if (45.9k CC == ISD::SETGT && 45.9k TLI.hasAndNot(N2)4.70k ) { |
16532 | 3.91k | // (X > -1) ? A : 0 |
16533 | 3.91k | // (X > 0) ? X : 0 <-- This is canonical signed max. |
16534 | 3.91k | if (!(isAllOnesConstant(N1) || 3.91k (isNullConstant(N1) && 3.11k N0 == N22.46k ))) |
16535 | 942 | return SDValue(); |
16536 | 42.0k | } else if (42.0k CC == ISD::SETLT42.0k ) { |
16537 | 953 | // (X < 0) ? A : 0 |
16538 | 953 | // (X < 1) ? X : 0 <-- This is un-canonicalized signed min. |
16539 | 953 | if (!(isNullConstant(N1) || 953 (isOneConstant(N1) && 642 N0 == N298 ))) |
16540 | 640 | return SDValue(); |
16541 | 41.1k | } else { |
16542 | 41.1k | return SDValue(); |
16543 | 41.1k | } |
16544 | 3.28k | |
16545 | 3.28k | // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit |
16546 | 3.28k | // constant. |
16547 | 3.28k | EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType()); |
16548 | 3.28k | auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); |
16549 | 3.28k | if (N2C && 3.28k ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)248 ) { |
16550 | 79 | unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1; |
16551 | 79 | SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy); |
16552 | 79 | SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt); |
16553 | 79 | AddToWorklist(Shift.getNode()); |
16554 | 79 | |
16555 | 79 | if (XType.bitsGT(AType)79 ) { |
16556 | 40 | Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); |
16557 | 40 | AddToWorklist(Shift.getNode()); |
16558 | 40 | } |
16559 | 79 | |
16560 | 79 | if (CC == ISD::SETGT) |
16561 | 21 | Shift = DAG.getNOT(DL, Shift, AType); |
16562 | 79 | |
16563 | 79 | return DAG.getNode(ISD::AND, DL, AType, Shift, N2); |
16564 | 79 | } |
16565 | 3.20k | |
16566 | 3.20k | SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy); |
16567 | 3.20k | SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt); |
16568 | 3.20k | AddToWorklist(Shift.getNode()); |
16569 | 3.20k | |
16570 | 3.20k | if (XType.bitsGT(AType)3.20k ) { |
16571 | 77 | Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); |
16572 | 77 | AddToWorklist(Shift.getNode()); |
16573 | 77 | } |
16574 | 3.20k | |
16575 | 3.20k | if (CC == ISD::SETGT) |
16576 | 2.94k | Shift = DAG.getNOT(DL, Shift, AType); |
16577 | 277k | |
16578 | 277k | return DAG.getNode(ISD::AND, DL, AType, Shift, N2); |
16579 | 277k | } |
16580 | | |
16581 | | /// Simplify an expression of the form (N0 cond N1) ? N2 : N3 |
16582 | | /// where 'cond' is the comparison specified by CC. |
16583 | | SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, |
16584 | | SDValue N2, SDValue N3, ISD::CondCode CC, |
16585 | 277k | bool NotExtCompare) { |
16586 | 277k | // (x ? y : y) -> y. |
16587 | 277k | if (N2 == N3277k ) return N20 ; |
16588 | 277k | |
16589 | 277k | EVT VT = N2.getValueType(); |
16590 | 277k | ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); |
16591 | 277k | ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); |
16592 | 277k | |
16593 | 277k | // Determine if the condition we're dealing with is constant |
16594 | 277k | SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), |
16595 | 277k | N0, N1, CC, DL, false); |
16596 | 277k | if (SCC.getNode()277k ) AddToWorklist(SCC.getNode())2.33k ; |
16597 | 277k | |
16598 | 277k | if (ConstantSDNode *SCCC277k = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { |
16599 | 35 | // fold select_cc true, x, y -> x |
16600 | 35 | // fold select_cc false, x, y -> y |
16601 | 35 | return !SCCC->isNullValue() ? N213 : N322 ; |
16602 | 35 | } |
16603 | 277k | |
16604 | 277k | // Check to see if we can simplify the select into an fabs node |
16605 | 277k | if (ConstantFPSDNode *277k CFP277k = dyn_cast<ConstantFPSDNode>(N1)) { |
16606 | 4.31k | // Allow either -0.0 or 0.0 |
16607 | 4.31k | if (CFP->isZero()4.31k ) { |
16608 | 1.41k | // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs |
16609 | 1.41k | if ((CC == ISD::SETGE || 1.41k CC == ISD::SETGT1.39k ) && |
16610 | 1.41k | N0 == N235 && N3.getOpcode() == ISD::FNEG17 && |
16611 | 1 | N2 == N3.getOperand(0)) |
16612 | 1 | return DAG.getNode(ISD::FABS, DL, VT, N0); |
16613 | 1.41k | |
16614 | 1.41k | // select (setl[te] X, +/-0.0), fneg(X), X -> fabs |
16615 | 1.41k | if (1.41k (CC == ISD::SETLT || 1.41k CC == ISD::SETLE1.38k ) && |
16616 | 1.41k | N0 == N340 && N2.getOpcode() == ISD::FNEG16 && |
16617 | 0 | N2.getOperand(0) == N3) |
16618 | 0 | return DAG.getNode(ISD::FABS, DL, VT, N3); |
16619 | 277k | } |
16620 | 4.31k | } |
16621 | 277k | |
16622 | 277k | // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" |
16623 | 277k | // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 |
16624 | 277k | // in it. This is a win when the constant is not otherwise available because |
16625 | 277k | // it replaces two constant pool loads with one. We only do this if the FP |
16626 | 277k | // type is known to be legal, because if it isn't, then we are before legalize |
16627 | 277k | // types an we want the other legalization to happen first (e.g. to avoid |
16628 | 277k | // messing with soft float) and if the ConstantFP is not legal, because if |
16629 | 277k | // it is legal, we may not need to store the FP constant in a constant pool. |
16630 | 277k | if (ConstantFPSDNode *277k TV277k = dyn_cast<ConstantFPSDNode>(N2)) |
16631 | 2.45k | if (ConstantFPSDNode *2.45k FV2.45k = dyn_cast<ConstantFPSDNode>(N3)) { |
16632 | 1.76k | if (TLI.isTypeLegal(N2.getValueType()) && |
16633 | 1.72k | (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) != |
16634 | 1.72k | TargetLowering::Legal && |
16635 | 1.52k | !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) && |
16636 | 1.72k | !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) && |
16637 | 1.76k | // If both constants have multiple uses, then we won't need to do an |
16638 | 1.76k | // extra load, they are likely around in registers for other users. |
16639 | 1.76k | (TV->hasOneUse() || 85 FV->hasOneUse()8 )) { |
16640 | 77 | Constant *Elts[] = { |
16641 | 77 | const_cast<ConstantFP*>(FV->getConstantFPValue()), |
16642 | 77 | const_cast<ConstantFP*>(TV->getConstantFPValue()) |
16643 | 77 | }; |
16644 | 77 | Type *FPTy = Elts[0]->getType(); |
16645 | 77 | const DataLayout &TD = DAG.getDataLayout(); |
16646 | 77 | |
16647 | 77 | // Create a ConstantArray of the two constants. |
16648 | 77 | Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); |
16649 | 77 | SDValue CPIdx = |
16650 | 77 | DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()), |
16651 | 77 | TD.getPrefTypeAlignment(FPTy)); |
16652 | 77 | unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); |
16653 | 77 | |
16654 | 77 | // Get the offsets to the 0 and 1 element of the array so that we can |
16655 | 77 | // select between them. |
16656 | 77 | SDValue Zero = DAG.getIntPtrConstant(0, DL); |
16657 | 77 | unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); |
16658 | 77 | SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV)); |
16659 | 77 | |
16660 | 77 | SDValue Cond = DAG.getSetCC(DL, |
16661 | 77 | getSetCCResultType(N0.getValueType()), |
16662 | 77 | N0, N1, CC); |
16663 | 77 | AddToWorklist(Cond.getNode()); |
16664 | 77 | SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), |
16665 | 77 | Cond, One, Zero); |
16666 | 77 | AddToWorklist(CstOffset.getNode()); |
16667 | 77 | CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, |
16668 | 77 | CstOffset); |
16669 | 77 | AddToWorklist(CPIdx.getNode()); |
16670 | 77 | return DAG.getLoad( |
16671 | 77 | TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, |
16672 | 77 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), |
16673 | 77 | Alignment); |
16674 | 77 | } |
16675 | 277k | } |
16676 | 277k | |
16677 | 277k | if (SDValue 277k V277k = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC)) |
16678 | 3.28k | return V; |
16679 | 274k | |
16680 | 274k | // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) |
16681 | 274k | // where y is has a single bit set. |
16682 | 274k | // A plaintext description would be, we can turn the SELECT_CC into an AND |
16683 | 274k | // when the condition can be materialized as an all-ones register. Any |
16684 | 274k | // single bit-test can be materialized as an all-ones register with |
16685 | 274k | // shift-left and shift-right-arith. |
16686 | 274k | if (274k CC == ISD::SETEQ && 274k N0->getOpcode() == ISD::AND98.0k && |
16687 | 274k | N0->getValueType(0) == VT3.23k && isNullConstant(N1)2.01k && isNullConstant(N2)1.66k ) { |
16688 | 115 | SDValue AndLHS = N0->getOperand(0); |
16689 | 115 | ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); |
16690 | 115 | if (ConstAndRHS && 115 ConstAndRHS->getAPIntValue().countPopulation() == 179 ) { |
16691 | 28 | // Shift the tested bit over the sign bit. |
16692 | 28 | const APInt &AndMask = ConstAndRHS->getAPIntValue(); |
16693 | 28 | SDValue ShlAmt = |
16694 | 28 | DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS), |
16695 | 28 | getShiftAmountTy(AndLHS.getValueType())); |
16696 | 28 | SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); |
16697 | 28 | |
16698 | 28 | // Now arithmetic right shift it all the way over, so the result is either |
16699 | 28 | // all-ones, or zero. |
16700 | 28 | SDValue ShrAmt = |
16701 | 28 | DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl), |
16702 | 28 | getShiftAmountTy(Shl.getValueType())); |
16703 | 28 | SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); |
16704 | 28 | |
16705 | 28 | return DAG.getNode(ISD::AND, DL, VT, Shr, N3); |
16706 | 28 | } |
16707 | 274k | } |
16708 | 274k | |
16709 | 274k | // fold select C, 16, 0 -> shl C, 4 |
16710 | 274k | if (274k N2C && 274k isNullConstant(N3)100k && N2C->getAPIntValue().isPowerOf2()52.5k && |
16711 | 45.0k | TLI.getBooleanContents(N0.getValueType()) == |
16712 | 274k | TargetLowering::ZeroOrOneBooleanContent) { |
16713 | 43.4k | |
16714 | 43.4k | // If the caller doesn't want us to simplify this into a zext of a compare, |
16715 | 43.4k | // don't do it. |
16716 | 43.4k | if (NotExtCompare && 43.4k N2C->isOne()43.1k ) |
16717 | 43.1k | return SDValue(); |
16718 | 298 | |
16719 | 298 | // Get a SetCC of the condition |
16720 | 298 | // NOTE: Don't create a SETCC if it's not legal on this target. |
16721 | 298 | if (298 !LegalOperations || |
16722 | 298 | TLI.isOperationLegal(ISD::SETCC, N0.getValueType())16 ) { |
16723 | 282 | SDValue Temp, SCC; |
16724 | 282 | // cast from setcc result type to select result type |
16725 | 282 | if (LegalTypes282 ) { |
16726 | 2 | SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), |
16727 | 2 | N0, N1, CC); |
16728 | 2 | if (N2.getValueType().bitsLT(SCC.getValueType())) |
16729 | 0 | Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), |
16730 | 0 | N2.getValueType()); |
16731 | 2 | else |
16732 | 2 | Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), |
16733 | 2 | N2.getValueType(), SCC); |
16734 | 282 | } else { |
16735 | 280 | SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC); |
16736 | 280 | Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), |
16737 | 280 | N2.getValueType(), SCC); |
16738 | 280 | } |
16739 | 282 | |
16740 | 282 | AddToWorklist(SCC.getNode()); |
16741 | 282 | AddToWorklist(Temp.getNode()); |
16742 | 282 | |
16743 | 282 | if (N2C->isOne()) |
16744 | 2 | return Temp; |
16745 | 280 | |
16746 | 280 | // shl setcc result by log2 n2c |
16747 | 280 | return DAG.getNode( |
16748 | 280 | ISD::SHL, DL, N2.getValueType(), Temp, |
16749 | 280 | DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp), |
16750 | 280 | getShiftAmountTy(Temp.getValueType()))); |
16751 | 280 | } |
16752 | 43.4k | } |
16753 | 230k | |
16754 | 230k | // Check to see if this is an integer abs. |
16755 | 230k | // select_cc setg[te] X, 0, X, -X -> |
16756 | 230k | // select_cc setgt X, -1, X, -X -> |
16757 | 230k | // select_cc setl[te] X, 0, -X, X -> |
16758 | 230k | // select_cc setlt X, 1, -X, X -> |
16759 | 230k | // Y = sra (X, size(X)-1); xor (add (X, Y), Y) |
16760 | 230k | if (230k N1C230k ) { |
16761 | 158k | ConstantSDNode *SubC = nullptr; |
16762 | 158k | if (((N1C->isNullValue() && 158k (CC == ISD::SETGT || 54.1k CC == ISD::SETGE53.2k )) || |
16763 | 157k | (N1C->isAllOnesValue() && 157k CC == ISD::SETGT2.19k )) && |
16764 | 158k | N0 == N21.97k && N3.getOpcode() == ISD::SUB576 && N0 == N3.getOperand(1)458 ) |
16765 | 458 | SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0)); |
16766 | 157k | else if (157k ((N1C->isNullValue() && 157k (CC == ISD::SETLT || 54.1k CC == ISD::SETLE43.9k )) || |
16767 | 147k | (N1C->isOne() && 147k CC == ISD::SETLT46.0k )) && |
16768 | 157k | N0 == N310.9k && N2.getOpcode() == ISD::SUB7.12k && N0 == N2.getOperand(1)6.94k ) |
16769 | 6.92k | SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0)); |
16770 | 158k | |
16771 | 158k | EVT XType = N0.getValueType(); |
16772 | 158k | if (SubC && 158k SubC->isNullValue()7.37k && XType.isInteger()7.37k ) { |
16773 | 7.37k | SDLoc DL(N0); |
16774 | 7.37k | SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, |
16775 | 7.37k | N0, |
16776 | 7.37k | DAG.getConstant(XType.getSizeInBits() - 1, DL, |
16777 | 7.37k | getShiftAmountTy(N0.getValueType()))); |
16778 | 7.37k | SDValue Add = DAG.getNode(ISD::ADD, DL, |
16779 | 7.37k | XType, N0, Shift); |
16780 | 7.37k | AddToWorklist(Shift.getNode()); |
16781 | 7.37k | AddToWorklist(Add.getNode()); |
16782 | 7.37k | return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); |
16783 | 7.37k | } |
16784 | 223k | } |
16785 | 223k | |
16786 | 223k | // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X) |
16787 | 223k | // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X) |
16788 | 223k | // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X) |
16789 | 223k | // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X) |
16790 | 223k | // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X) |
16791 | 223k | // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X) |
16792 | 223k | // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X) |
16793 | 223k | // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X) |
16794 | 223k | if (223k N1C && 223k N1C->isNullValue()151k && (CC == ISD::SETEQ || 47.2k CC == ISD::SETNE10.4k )) { |
16795 | 42.9k | SDValue ValueOnZero = N2; |
16796 | 42.9k | SDValue Count = N3; |
16797 | 42.9k | // If the condition is NE instead of E, swap the operands. |
16798 | 42.9k | if (CC == ISD::SETNE) |
16799 | 6.07k | std::swap(ValueOnZero, Count); |
16800 | 42.9k | // Check if the value on zero is a constant equal to the bits in the type. |
16801 | 42.9k | if (auto *ValueOnZeroC42.9k = dyn_cast<ConstantSDNode>(ValueOnZero)) { |
16802 | 20.7k | if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()20.7k ) { |
16803 | 112 | // If the other operand is cttz/cttz_zero_undef of N0, and cttz is |
16804 | 112 | // legal, combine to just cttz. |
16805 | 112 | if ((Count.getOpcode() == ISD::CTTZ || |
16806 | 112 | Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) && |
16807 | 15 | N0 == Count.getOperand(0) && |
16808 | 15 | (!LegalOperations || 15 TLI.isOperationLegal(ISD::CTTZ, VT)0 )) |
16809 | 15 | return DAG.getNode(ISD::CTTZ, DL, VT, N0); |
16810 | 97 | // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is |
16811 | 97 | // legal, combine to just ctlz. |
16812 | 97 | if (97 (Count.getOpcode() == ISD::CTLZ || |
16813 | 97 | Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) && |
16814 | 15 | N0 == Count.getOperand(0) && |
16815 | 15 | (!LegalOperations || 15 TLI.isOperationLegal(ISD::CTLZ, VT)0 )) |
16816 | 15 | return DAG.getNode(ISD::CTLZ, DL, VT, N0); |
16817 | 223k | } |
16818 | 20.7k | } |
16819 | 42.9k | } |
16820 | 223k | |
16821 | 223k | return SDValue(); |
16822 | 223k | } |
16823 | | |
16824 | | /// This is a stub for TargetLowering::SimplifySetCC. |
16825 | | SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, |
16826 | | ISD::CondCode Cond, const SDLoc &DL, |
16827 | 3.32M | bool foldBooleans) { |
16828 | 3.32M | TargetLowering::DAGCombinerInfo |
16829 | 3.32M | DagCombineInfo(DAG, Level, false, this); |
16830 | 3.32M | return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); |
16831 | 3.32M | } |
16832 | | |
16833 | | /// Given an ISD::SDIV node expressing a divide by constant, return |
16834 | | /// a DAG expression to select that will generate the same value by multiplying |
16835 | | /// by a magic number. |
16836 | | /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". |
16837 | 1.50k | SDValue DAGCombiner::BuildSDIV(SDNode *N) { |
16838 | 1.50k | // when optimising for minimum size, we don't want to expand a div to a mul |
16839 | 1.50k | // and a shift. |
16840 | 1.50k | if (DAG.getMachineFunction().getFunction()->optForMinSize()) |
16841 | 34 | return SDValue(); |
16842 | 1.47k | |
16843 | 1.47k | ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); |
16844 | 1.47k | if (!C) |
16845 | 0 | return SDValue(); |
16846 | 1.47k | |
16847 | 1.47k | // Avoid division by zero. |
16848 | 1.47k | if (1.47k C->isNullValue()1.47k ) |
16849 | 0 | return SDValue(); |
16850 | 1.47k | |
16851 | 1.47k | std::vector<SDNode *> Built; |
16852 | 1.47k | SDValue S = |
16853 | 1.47k | TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); |
16854 | 1.47k | |
16855 | 1.47k | for (SDNode *N : Built) |
16856 | 2.97k | AddToWorklist(N); |
16857 | 1.50k | return S; |
16858 | 1.50k | } |
16859 | | |
16860 | | /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a |
16861 | | /// DAG expression that will generate the same value by right shifting. |
16862 | 3.08k | SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { |
16863 | 3.08k | ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); |
16864 | 3.08k | if (!C) |
16865 | 0 | return SDValue(); |
16866 | 3.08k | |
16867 | 3.08k | // Avoid division by zero. |
16868 | 3.08k | if (3.08k C->isNullValue()3.08k ) |
16869 | 0 | return SDValue(); |
16870 | 3.08k | |
16871 | 3.08k | std::vector<SDNode *> Built; |
16872 | 3.08k | SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built); |
16873 | 3.08k | |
16874 | 3.08k | for (SDNode *N : Built) |
16875 | 8.58k | AddToWorklist(N); |
16876 | 3.08k | return S; |
16877 | 3.08k | } |
16878 | | |
16879 | | /// Given an ISD::UDIV node expressing a divide by constant, return a DAG |
16880 | | /// expression that will generate the same value by multiplying by a magic |
16881 | | /// number. |
16882 | | /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". |
16883 | 7.04k | SDValue DAGCombiner::BuildUDIV(SDNode *N) { |
16884 | 7.04k | // when optimising for minimum size, we don't want to expand a div to a mul |
16885 | 7.04k | // and a shift. |
16886 | 7.04k | if (DAG.getMachineFunction().getFunction()->optForMinSize()) |
16887 | 32 | return SDValue(); |
16888 | 7.01k | |
16889 | 7.01k | ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); |
16890 | 7.01k | if (!C) |
16891 | 0 | return SDValue(); |
16892 | 7.01k | |
16893 | 7.01k | // Avoid division by zero. |
16894 | 7.01k | if (7.01k C->isNullValue()7.01k ) |
16895 | 0 | return SDValue(); |
16896 | 7.01k | |
16897 | 7.01k | std::vector<SDNode *> Built; |
16898 | 7.01k | SDValue S = |
16899 | 7.01k | TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); |
16900 | 7.01k | |
16901 | 7.01k | for (SDNode *N : Built) |
16902 | 8.47k | AddToWorklist(N); |
16903 | 7.04k | return S; |
16904 | 7.04k | } |
16905 | | |
16906 | | /// Determines the LogBase2 value for a non-null input value using the |
16907 | | /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V). |
16908 | 23.0k | SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { |
16909 | 23.0k | EVT VT = V.getValueType(); |
16910 | 23.0k | unsigned EltBits = VT.getScalarSizeInBits(); |
16911 | 23.0k | SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V); |
16912 | 23.0k | SDValue Base = DAG.getConstant(EltBits - 1, DL, VT); |
16913 | 23.0k | SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz); |
16914 | 23.0k | return LogBase2; |
16915 | 23.0k | } |
16916 | | |
16917 | | /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) |
16918 | | /// For the reciprocal, we need to find the zero of the function: |
16919 | | /// F(X) = A X - 1 [which has a zero at X = 1/A] |
16920 | | /// => |
16921 | | /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form |
16922 | | /// does not require additional intermediate precision] |
16923 | 535 | SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) { |
16924 | 535 | if (Level >= AfterLegalizeDAG) |
16925 | 130 | return SDValue(); |
16926 | 405 | |
16927 | 405 | // TODO: Handle half and/or extended types? |
16928 | 405 | EVT VT = Op.getValueType(); |
16929 | 405 | if (VT.getScalarType() != MVT::f32 && 405 VT.getScalarType() != MVT::f6468 ) |
16930 | 3 | return SDValue(); |
16931 | 402 | |
16932 | 402 | // If estimates are explicitly disabled for this function, we're done. |
16933 | 402 | MachineFunction &MF = DAG.getMachineFunction(); |
16934 | 402 | int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF); |
16935 | 402 | if (Enabled == TLI.ReciprocalEstimate::Disabled) |
16936 | 37 | return SDValue(); |
16937 | 365 | |
16938 | 365 | // Estimates may be explicitly enabled for this type with a custom number of |
16939 | 365 | // refinement steps. |
16940 | 365 | int Iterations = TLI.getDivRefinementSteps(VT, MF); |
16941 | 365 | if (SDValue Est365 = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) { |
16942 | 229 | AddToWorklist(Est.getNode()); |
16943 | 229 | |
16944 | 229 | if (Iterations229 ) { |
16945 | 184 | EVT VT = Op.getValueType(); |
16946 | 184 | SDLoc DL(Op); |
16947 | 184 | SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); |
16948 | 184 | |
16949 | 184 | // Newton iterations: Est = Est + Est (1 - Arg * Est) |
16950 | 432 | for (int i = 0; i < Iterations432 ; ++i248 ) { |
16951 | 248 | SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags); |
16952 | 248 | AddToWorklist(NewEst.getNode()); |
16953 | 248 | |
16954 | 248 | NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags); |
16955 | 248 | AddToWorklist(NewEst.getNode()); |
16956 | 248 | |
16957 | 248 | NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); |
16958 | 248 | AddToWorklist(NewEst.getNode()); |
16959 | 248 | |
16960 | 248 | Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags); |
16961 | 248 | AddToWorklist(Est.getNode()); |
16962 | 248 | } |
16963 | 184 | } |
16964 | 229 | return Est; |
16965 | 229 | } |
16966 | 136 | |
16967 | 136 | return SDValue(); |
16968 | 136 | } |
16969 | | |
16970 | | /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) |
16971 | | /// For the reciprocal sqrt, we need to find the zero of the function: |
16972 | | /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] |
16973 | | /// => |
16974 | | /// X_{i+1} = X_i (1.5 - A X_i^2 / 2) |
16975 | | /// As a result, we precompute A/2 prior to the iteration loop. |
16976 | | SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, |
16977 | | unsigned Iterations, |
16978 | 21 | SDNodeFlags Flags, bool Reciprocal) { |
16979 | 21 | EVT VT = Arg.getValueType(); |
16980 | 21 | SDLoc DL(Arg); |
16981 | 21 | SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); |
16982 | 21 | |
16983 | 21 | // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that |
16984 | 21 | // this entire sequence requires only one FP constant. |
16985 | 21 | SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); |
16986 | 21 | AddToWorklist(HalfArg.getNode()); |
16987 | 21 | |
16988 | 21 | HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); |
16989 | 21 | AddToWorklist(HalfArg.getNode()); |
16990 | 21 | |
16991 | 21 | // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) |
16992 | 52 | for (unsigned i = 0; i < Iterations52 ; ++i31 ) { |
16993 | 31 | SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); |
16994 | 31 | AddToWorklist(NewEst.getNode()); |
16995 | 31 | |
16996 | 31 | NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); |
16997 | 31 | AddToWorklist(NewEst.getNode()); |
16998 | 31 | |
16999 | 31 | NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); |
17000 | 31 | AddToWorklist(NewEst.getNode()); |
17001 | 31 | |
17002 | 31 | Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); |
17003 | 31 | AddToWorklist(Est.getNode()); |
17004 | 31 | } |
17005 | 21 | |
17006 | 21 | // If non-reciprocal square root is requested, multiply the result by Arg. |
17007 | 21 | if (!Reciprocal21 ) { |
17008 | 6 | Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); |
17009 | 6 | AddToWorklist(Est.getNode()); |
17010 | 6 | } |
17011 | 21 | |
17012 | 21 | return Est; |
17013 | 21 | } |
17014 | | |
17015 | | /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) |
17016 | | /// For the reciprocal sqrt, we need to find the zero of the function: |
17017 | | /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] |
17018 | | /// => |
17019 | | /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) |
17020 | | SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, |
17021 | | unsigned Iterations, |
17022 | 33 | SDNodeFlags Flags, bool Reciprocal) { |
17023 | 33 | EVT VT = Arg.getValueType(); |
17024 | 33 | SDLoc DL(Arg); |
17025 | 33 | SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); |
17026 | 33 | SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT); |
17027 | 33 | |
17028 | 33 | // This routine must enter the loop below to work correctly |
17029 | 33 | // when (Reciprocal == false). |
17030 | 33 | assert(Iterations > 0); |
17031 | 33 | |
17032 | 33 | // Newton iterations for reciprocal square root: |
17033 | 33 | // E = (E * -0.5) * ((A * E) * E + -3.0) |
17034 | 68 | for (unsigned i = 0; i < Iterations68 ; ++i35 ) { |
17035 | 35 | SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags); |
17036 | 35 | AddToWorklist(AE.getNode()); |
17037 | 35 | |
17038 | 35 | SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags); |
17039 | 35 | AddToWorklist(AEE.getNode()); |
17040 | 35 | |
17041 | 35 | SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags); |
17042 | 35 | AddToWorklist(RHS.getNode()); |
17043 | 35 | |
17044 | 35 | // When calculating a square root at the last iteration build: |
17045 | 35 | // S = ((A * E) * -0.5) * ((A * E) * E + -3.0) |
17046 | 35 | // (notice a common subexpression) |
17047 | 35 | SDValue LHS; |
17048 | 35 | if (Reciprocal || 35 (i + 1) < Iterations22 ) { |
17049 | 14 | // RSQRT: LHS = (E * -0.5) |
17050 | 14 | LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); |
17051 | 35 | } else { |
17052 | 21 | // SQRT: LHS = (A * E) * -0.5 |
17053 | 21 | LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags); |
17054 | 21 | } |
17055 | 35 | AddToWorklist(LHS.getNode()); |
17056 | 35 | |
17057 | 35 | Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags); |
17058 | 35 | AddToWorklist(Est.getNode()); |
17059 | 35 | } |
17060 | 33 | |
17061 | 33 | return Est; |
17062 | 33 | } |
17063 | | |
17064 | | /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case |
17065 | | /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if |
17066 | | /// Op can be zero. |
17067 | | SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, |
17068 | 217 | bool Reciprocal) { |
17069 | 217 | if (Level >= AfterLegalizeDAG) |
17070 | 51 | return SDValue(); |
17071 | 166 | |
17072 | 166 | // TODO: Handle half and/or extended types? |
17073 | 166 | EVT VT = Op.getValueType(); |
17074 | 166 | if (VT.getScalarType() != MVT::f32 && 166 VT.getScalarType() != MVT::f6457 ) |
17075 | 4 | return SDValue(); |
17076 | 162 | |
17077 | 162 | // If estimates are explicitly disabled for this function, we're done. |
17078 | 162 | MachineFunction &MF = DAG.getMachineFunction(); |
17079 | 162 | int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF); |
17080 | 162 | if (Enabled == TLI.ReciprocalEstimate::Disabled) |
17081 | 18 | return SDValue(); |
17082 | 144 | |
17083 | 144 | // Estimates may be explicitly enabled for this type with a custom number of |
17084 | 144 | // refinement steps. |
17085 | 144 | int Iterations = TLI.getSqrtRefinementSteps(VT, MF); |
17086 | 144 | |
17087 | 144 | bool UseOneConstNR = false; |
17088 | 144 | if (SDValue Est = |
17089 | 144 | TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR, |
17090 | 92 | Reciprocal)) { |
17091 | 92 | AddToWorklist(Est.getNode()); |
17092 | 92 | |
17093 | 92 | if (Iterations92 ) { |
17094 | 54 | Est = UseOneConstNR |
17095 | 21 | ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal) |
17096 | 33 | : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); |
17097 | 54 | |
17098 | 54 | if (!Reciprocal54 ) { |
17099 | 27 | // Unfortunately, Est is now NaN if the input was exactly 0.0. |
17100 | 27 | // Select out this case and force the answer to 0.0. |
17101 | 27 | EVT VT = Op.getValueType(); |
17102 | 27 | SDLoc DL(Op); |
17103 | 27 | |
17104 | 27 | SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); |
17105 | 27 | EVT CCVT = getSetCCResultType(VT); |
17106 | 27 | SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); |
17107 | 27 | AddToWorklist(ZeroCmp.getNode()); |
17108 | 27 | |
17109 | 27 | Est = DAG.getNode(VT.isVector() ? ISD::VSELECT15 : ISD::SELECT12 , DL, VT, |
17110 | 27 | ZeroCmp, FPZero, Est); |
17111 | 27 | AddToWorklist(Est.getNode()); |
17112 | 27 | } |
17113 | 54 | } |
17114 | 92 | return Est; |
17115 | 92 | } |
17116 | 52 | |
17117 | 52 | return SDValue(); |
17118 | 52 | } |
17119 | | |
17120 | 90 | SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) { |
17121 | 90 | return buildSqrtEstimateImpl(Op, Flags, true); |
17122 | 90 | } |
17123 | | |
17124 | 127 | SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) { |
17125 | 127 | return buildSqrtEstimateImpl(Op, Flags, false); |
17126 | 127 | } |
17127 | | |
17128 | | /// Return true if base is a frame index, which is known not to alias with |
17129 | | /// anything but itself. Provides base object and offset as results. |
17130 | | static bool findBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, |
17131 | 12.3M | const GlobalValue *&GV, const void *&CV) { |
17132 | 12.3M | // Assume it is a primitive operation. |
17133 | 12.3M | Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr; |
17134 | 12.3M | |
17135 | 12.3M | // If it's an adding a simple constant then integrate the offset. |
17136 | 12.3M | if (Base.getOpcode() == ISD::ADD12.3M ) { |
17137 | 10.0M | if (ConstantSDNode *C10.0M = dyn_cast<ConstantSDNode>(Base.getOperand(1))) { |
17138 | 8.50M | Base = Base.getOperand(0); |
17139 | 8.50M | Offset += C->getSExtValue(); |
17140 | 8.50M | } |
17141 | 10.0M | } |
17142 | 12.3M | |
17143 | 12.3M | // Return the underlying GlobalValue, and update the Offset. Return false |
17144 | 12.3M | // for GlobalAddressSDNode since the same GlobalAddress may be represented |
17145 | 12.3M | // by multiple nodes with different offsets. |
17146 | 12.3M | if (GlobalAddressSDNode *G12.3M = dyn_cast<GlobalAddressSDNode>(Base)) { |
17147 | 1.00M | GV = G->getGlobal(); |
17148 | 1.00M | Offset += G->getOffset(); |
17149 | 1.00M | return false; |
17150 | 1.00M | } |
17151 | 11.2M | |
17152 | 11.2M | // Return the underlying Constant value, and update the Offset. Return false |
17153 | 11.2M | // for ConstantSDNodes since the same constant pool entry may be represented |
17154 | 11.2M | // by multiple nodes with different offsets. |
17155 | 11.2M | if (ConstantPoolSDNode *11.2M C11.2M = dyn_cast<ConstantPoolSDNode>(Base)) { |
17156 | 0 | CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal() |
17157 | 0 | : (const void *)C->getConstVal(); |
17158 | 0 | Offset += C->getOffset(); |
17159 | 0 | return false; |
17160 | 0 | } |
17161 | 11.2M | // If it's any of the following then it can't alias with anything but itself. |
17162 | 11.2M | return isa<FrameIndexSDNode>(Base); |
17163 | 11.2M | } |
17164 | | |
17165 | | /// Return true if there is any possibility that the two addresses overlap. |
17166 | 35.4M | bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { |
17167 | 35.4M | // If they are the same then they must be aliases. |
17168 | 35.4M | if (Op0->getBasePtr() == Op1->getBasePtr()35.4M ) return true634k ; |
17169 | 34.8M | |
17170 | 34.8M | // If they are both volatile then they cannot be reordered. |
17171 | 34.8M | if (34.8M Op0->isVolatile() && 34.8M Op1->isVolatile()37.5k ) return true25.3k ; |
17172 | 34.7M | |
17173 | 34.7M | // If one operation reads from invariant memory, and the other may store, they |
17174 | 34.7M | // cannot alias. These should really be checking the equivalent of mayWrite, |
17175 | 34.7M | // but it only matters for memory nodes other than load /store. |
17176 | 34.7M | if (34.7M Op0->isInvariant() && 34.7M Op1->writeMem()121 ) |
17177 | 121 | return false; |
17178 | 34.7M | |
17179 | 34.7M | if (34.7M Op1->isInvariant() && 34.7M Op0->writeMem()28.9k ) |
17180 | 24.4k | return false; |
17181 | 34.7M | |
17182 | 34.7M | unsigned NumBytes0 = Op0->getMemoryVT().getSizeInBits() >> 3; |
17183 | 34.7M | unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3; |
17184 | 34.7M | |
17185 | 34.7M | // Check for BaseIndexOffset matching. |
17186 | 34.7M | BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG); |
17187 | 34.7M | BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG); |
17188 | 34.7M | int64_t PtrDiff; |
17189 | 34.7M | if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) |
17190 | 28.5M | return !((NumBytes0 <= PtrDiff) || 28.5M (PtrDiff + NumBytes1 <= 0)27.7M ); |
17191 | 6.21M | |
17192 | 6.21M | // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be |
17193 | 6.21M | // able to calculate their relative offset if at least one arises |
17194 | 6.21M | // from an alloca. However, these allocas cannot overlap and we |
17195 | 6.21M | // can infer there is no alias. |
17196 | 6.21M | if (auto *6.21M A6.21M = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase())) |
17197 | 1.04M | if (auto *1.04M B1.04M = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) { |
17198 | 72.5k | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
17199 | 72.5k | // If the base are the same frame index but the we couldn't find a |
17200 | 72.5k | // constant offset, (indices are different) be conservative. |
17201 | 72.5k | if (A != B && 72.5k (!MFI.isFixedObjectIndex(A->getIndex()) || |
17202 | 328 | !MFI.isFixedObjectIndex(B->getIndex()))) |
17203 | 68.0k | return false; |
17204 | 6.15M | } |
17205 | 6.15M | |
17206 | 6.15M | // FIXME: findBaseOffset and ConstantValue/GlobalValue/FrameIndex analysis |
17207 | 6.15M | // modified to use BaseIndexOffset. |
17208 | 6.15M | |
17209 | 6.15M | // Gather base node and offset information. |
17210 | 6.15M | SDValue Base0, Base1; |
17211 | 6.15M | int64_t Offset0, Offset1; |
17212 | 6.15M | const GlobalValue *GV0, *GV1; |
17213 | 6.15M | const void *CV0, *CV1; |
17214 | 6.15M | bool IsFrameIndex0 = findBaseOffset(Op0->getBasePtr(), |
17215 | 6.15M | Base0, Offset0, GV0, CV0); |
17216 | 6.15M | bool IsFrameIndex1 = findBaseOffset(Op1->getBasePtr(), |
17217 | 6.15M | Base1, Offset1, GV1, CV1); |
17218 | 6.15M | |
17219 | 6.15M | // If they have the same base address, then check to see if they overlap. |
17220 | 6.15M | if (Base0 == Base1 || 6.15M (GV0 && 6.15M (GV0 == GV1)567k ) || (CV0 && 6.15M (CV0 == CV1)0 )) |
17221 | 3 | return !((Offset0 + NumBytes0) <= Offset1 || |
17222 | 3 | (Offset1 + NumBytes1) <= Offset0); |
17223 | 6.15M | |
17224 | 6.15M | // It is possible for different frame indices to alias each other, mostly |
17225 | 6.15M | // when tail call optimization reuses return address slots for arguments. |
17226 | 6.15M | // To catch this case, look up the actual index of frame indices to compute |
17227 | 6.15M | // the real alias relationship. |
17228 | 6.15M | if (6.15M IsFrameIndex0 && 6.15M IsFrameIndex1823k ) { |
17229 | 0 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
17230 | 0 | Offset0 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base0)->getIndex()); |
17231 | 0 | Offset1 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex()); |
17232 | 0 | return !((Offset0 + NumBytes0) <= Offset1 || |
17233 | 0 | (Offset1 + NumBytes1) <= Offset0); |
17234 | 0 | } |
17235 | 6.15M | |
17236 | 6.15M | // Otherwise, if we know what the bases are, and they aren't identical, then |
17237 | 6.15M | // we know they cannot alias. |
17238 | 6.15M | if (6.15M (IsFrameIndex0 || 6.15M CV05.32M || GV05.32M ) && (IsFrameIndex1 || 1.39M CV11.31M || GV11.31M )) |
17239 | 256k | return false; |
17240 | 5.89M | |
17241 | 5.89M | // If we know required SrcValue1 and SrcValue2 have relatively large alignment |
17242 | 5.89M | // compared to the size and offset of the access, we may be able to prove they |
17243 | 5.89M | // do not alias. This check is conservative for now to catch cases created by |
17244 | 5.89M | // splitting vector types. |
17245 | 5.89M | int64_t SrcValOffset0 = Op0->getSrcValueOffset(); |
17246 | 5.89M | int64_t SrcValOffset1 = Op1->getSrcValueOffset(); |
17247 | 5.89M | unsigned OrigAlignment0 = Op0->getOriginalAlignment(); |
17248 | 5.89M | unsigned OrigAlignment1 = Op1->getOriginalAlignment(); |
17249 | 5.89M | if (OrigAlignment0 == OrigAlignment1 && 5.89M SrcValOffset0 != SrcValOffset13.08M && |
17250 | 5.89M | NumBytes0 == NumBytes1358k && OrigAlignment0 > NumBytes0310k ) { |
17251 | 19.4k | int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0; |
17252 | 19.4k | int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1; |
17253 | 19.4k | |
17254 | 19.4k | // There is no overlap between these relatively aligned accesses of similar |
17255 | 19.4k | // size. Return no alias. |
17256 | 19.4k | if ((OffAlign0 + NumBytes0) <= OffAlign1 || |
17257 | 17.3k | (OffAlign1 + NumBytes1) <= OffAlign0) |
17258 | 4.87k | return false; |
17259 | 5.89M | } |
17260 | 5.89M | |
17261 | 5.89M | bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 |
17262 | 0 | ? CombinerGlobalAA |
17263 | 5.89M | : DAG.getSubtarget().useAA(); |
17264 | | #ifndef NDEBUG |
17265 | | if (CombinerAAOnlyFunc.getNumOccurrences() && |
17266 | | CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) |
17267 | | UseAA = false; |
17268 | | #endif |
17269 | | |
17270 | 5.89M | if (UseAA && 5.89M AA6.38k && |
17271 | 5.89M | Op0->getMemOperand()->getValue()3.79k && Op1->getMemOperand()->getValue()3.59k ) { |
17272 | 3.55k | // Use alias analysis information. |
17273 | 3.55k | int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); |
17274 | 3.55k | int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset; |
17275 | 3.55k | int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset; |
17276 | 3.55k | AliasResult AAResult = |
17277 | 3.55k | AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0, |
17278 | 3.55k | UseTBAA ? Op0->getAAInfo()3.55k : AAMDNodes()0 ), |
17279 | 3.55k | MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1, |
17280 | 3.55k | UseTBAA ? Op1->getAAInfo()3.55k : AAMDNodes()0 ) ); |
17281 | 3.55k | if (AAResult == NoAlias) |
17282 | 1.36k | return false; |
17283 | 5.88M | } |
17284 | 5.88M | |
17285 | 5.88M | // Otherwise we have to assume they alias. |
17286 | 5.88M | return true; |
17287 | 5.88M | } |
17288 | | |
17289 | | /// Walk up chain skipping non-aliasing memory nodes, |
17290 | | /// looking for aliasing nodes and adding them to the Aliases vector. |
17291 | | void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, |
17292 | 17.9M | SmallVectorImpl<SDValue> &Aliases) { |
17293 | 17.9M | SmallVector<SDValue, 8> Chains; // List of chains to visit. |
17294 | 17.9M | SmallPtrSet<SDNode *, 16> Visited; // Visited node set. |
17295 | 17.9M | |
17296 | 17.9M | // Get alias information for node. |
17297 | 6.02M | bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile(); |
17298 | 17.9M | |
17299 | 17.9M | // Starting off. |
17300 | 17.9M | Chains.push_back(OriginalChain); |
17301 | 17.9M | unsigned Depth = 0; |
17302 | 17.9M | |
17303 | 17.9M | // Look at each chain and determine if it is an alias. If so, add it to the |
17304 | 17.9M | // aliases list. If not, then continue up the chain looking for the next |
17305 | 17.9M | // candidate. |
17306 | 73.3M | while (!Chains.empty()73.3M ) { |
17307 | 56.8M | SDValue Chain = Chains.pop_back_val(); |
17308 | 56.8M | |
17309 | 56.8M | // For TokenFactor nodes, look at each operand and only continue up the |
17310 | 56.8M | // chain until we reach the depth limit. |
17311 | 56.8M | // |
17312 | 56.8M | // FIXME: The depth check could be made to return the last non-aliasing |
17313 | 56.8M | // chain we found before we hit a tokenfactor rather than the original |
17314 | 56.8M | // chain. |
17315 | 56.8M | if (Depth > TLI.getGatherAllAliasesMaxDepth()56.8M ) { |
17316 | 1.45M | Aliases.clear(); |
17317 | 1.45M | Aliases.push_back(OriginalChain); |
17318 | 1.45M | return; |
17319 | 1.45M | } |
17320 | 55.4M | |
17321 | 55.4M | // Don't bother if we've been before. |
17322 | 55.4M | if (55.4M !Visited.insert(Chain.getNode()).second55.4M ) |
17323 | 2.18M | continue; |
17324 | 53.2M | |
17325 | 53.2M | switch (Chain.getOpcode()) { |
17326 | 6.38M | case ISD::EntryToken: |
17327 | 6.38M | // Entry token is ideal chain operand, but handled in FindBetterChain. |
17328 | 6.38M | break; |
17329 | 53.2M | |
17330 | 35.7M | case ISD::LOAD: |
17331 | 35.7M | case ISD::STORE: { |
17332 | 35.7M | // Get alias information for Chain. |
17333 | 35.7M | bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) && |
17334 | 5.44M | !cast<LSBaseSDNode>(Chain.getNode())->isVolatile(); |
17335 | 35.7M | |
17336 | 35.7M | // If chain is alias then stop here. |
17337 | 35.7M | if (!(IsLoad && 35.7M IsOpLoad1.96M ) && |
17338 | 35.7M | isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))35.4M ) { |
17339 | 6.62M | Aliases.push_back(Chain); |
17340 | 35.7M | } else { |
17341 | 29.1M | // Look further up the chain. |
17342 | 29.1M | Chains.push_back(Chain.getOperand(0)); |
17343 | 29.1M | ++Depth; |
17344 | 29.1M | } |
17345 | 35.7M | break; |
17346 | 35.7M | } |
17347 | 35.7M | |
17348 | 6.98M | case ISD::TokenFactor: |
17349 | 6.98M | // We have to check each of the operands of the token factor for "small" |
17350 | 6.98M | // token factors, so we queue them up. Adding the operands to the queue |
17351 | 6.98M | // (stack) in reverse order maintains the original order and increases the |
17352 | 6.98M | // likelihood that getNode will find a matching token factor (CSE.) |
17353 | 6.98M | if (Chain.getNumOperands() > 166.98M ) { |
17354 | 2.55M | Aliases.push_back(Chain); |
17355 | 2.55M | break; |
17356 | 2.55M | } |
17357 | 18.3M | for (unsigned n = Chain.getNumOperands(); 4.42M n18.3M ;) |
17358 | 13.9M | Chains.push_back(Chain.getOperand(--n)); |
17359 | 4.42M | ++Depth; |
17360 | 4.42M | break; |
17361 | 4.42M | |
17362 | 362k | case ISD::CopyFromReg: |
17363 | 362k | // Forward past CopyFromReg. |
17364 | 362k | Chains.push_back(Chain.getOperand(0)); |
17365 | 362k | ++Depth; |
17366 | 362k | break; |
17367 | 4.42M | |
17368 | 3.71M | default: |
17369 | 3.71M | // For all other instructions we will just have to take what we can get. |
17370 | 3.71M | Aliases.push_back(Chain); |
17371 | 3.71M | break; |
17372 | 56.8M | } |
17373 | 56.8M | } |
17374 | 17.9M | } |
17375 | | |
17376 | | /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain |
17377 | | /// (aliasing node.) |
17378 | 17.9M | SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { |
17379 | 17.9M | SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor. |
17380 | 17.9M | |
17381 | 17.9M | // Accumulate all the aliases to this node. |
17382 | 17.9M | GatherAllAliases(N, OldChain, Aliases); |
17383 | 17.9M | |
17384 | 17.9M | // If no operands then chain to entry token. |
17385 | 17.9M | if (Aliases.size() == 0) |
17386 | 6.32M | return DAG.getEntryNode(); |
17387 | 11.5M | |
17388 | 11.5M | // If a single operand then chain to it. We don't need to revisit it. |
17389 | 11.5M | if (11.5M Aliases.size() == 111.5M ) |
17390 | 10.5M | return Aliases[0]; |
17391 | 1.08M | |
17392 | 1.08M | // Construct a custom tailored token factor. |
17393 | 1.08M | return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); |
17394 | 1.08M | } |
17395 | | |
17396 | | // This function tries to collect a bunch of potentially interesting |
17397 | | // nodes to improve the chains of, all at once. This might seem |
17398 | | // redundant, as this function gets called when visiting every store |
17399 | | // node, so why not let the work be done on each store as it's visited? |
17400 | | // |
17401 | | // I believe this is mainly important because MergeConsecutiveStores |
17402 | | // is unable to deal with merging stores of different sizes, so unless |
17403 | | // we improve the chains of all the potential candidates up-front |
17404 | | // before running MergeConsecutiveStores, it might only see some of |
17405 | | // the nodes that will eventually be candidates, and then not be able |
17406 | | // to go from a partially-merged state to the desired final |
17407 | | // fully-merged state. |
17408 | 8.88M | bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { |
17409 | 8.88M | // This holds the base pointer, index, and the offset in bytes from the base |
17410 | 8.88M | // pointer. |
17411 | 8.88M | BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); |
17412 | 8.88M | |
17413 | 8.88M | // We must have a base and an offset. |
17414 | 8.88M | if (!BasePtr.getBase().getNode()) |
17415 | 0 | return false; |
17416 | 8.88M | |
17417 | 8.88M | // Do not handle stores to undef base pointers. |
17418 | 8.88M | if (8.88M BasePtr.getBase().isUndef()8.88M ) |
17419 | 10.9k | return false; |
17420 | 8.87M | |
17421 | 8.87M | SmallVector<StoreSDNode *, 8> ChainedStores; |
17422 | 8.87M | ChainedStores.push_back(St); |
17423 | 8.87M | |
17424 | 8.87M | // Walk up the chain and look for nodes with offsets from the same |
17425 | 8.87M | // base pointer. Stop when reaching an instruction with a different kind |
17426 | 8.87M | // or instruction which has a different base pointer. |
17427 | 8.87M | StoreSDNode *Index = St; |
17428 | 20.3M | while (Index20.3M ) { |
17429 | 11.8M | // If the chain has more than one use, then we can't reorder the mem ops. |
17430 | 11.8M | if (Index != St && 11.8M !SDValue(Index, 0)->hasOneUse()3.00M ) |
17431 | 206k | break; |
17432 | 11.6M | |
17433 | 11.6M | if (11.6M Index->isVolatile() || 11.6M Index->isIndexed()11.6M ) |
17434 | 37.1k | break; |
17435 | 11.6M | |
17436 | 11.6M | // Find the base pointer and offset for this memory node. |
17437 | 11.6M | BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG); |
17438 | 11.6M | |
17439 | 11.6M | // Check that the base pointer is the same as the original one. |
17440 | 11.6M | if (!BasePtr.equalBaseIndex(Ptr, DAG)) |
17441 | 205k | break; |
17442 | 11.4M | |
17443 | 11.4M | // Walk up the chain to find the next store node, ignoring any |
17444 | 11.4M | // intermediate loads. Any other kind of node will halt the loop. |
17445 | 11.4M | SDNode *NextInChain = Index->getChain().getNode(); |
17446 | 13.4M | while (true13.4M ) { |
17447 | 13.4M | if (StoreSDNode *STn13.4M = dyn_cast<StoreSDNode>(NextInChain)) { |
17448 | 3.00M | // We found a store node. Use it for the next iteration. |
17449 | 3.00M | if (STn->isVolatile() || 3.00M STn->isIndexed()3.00M ) { |
17450 | 1.68k | Index = nullptr; |
17451 | 1.68k | break; |
17452 | 1.68k | } |
17453 | 3.00M | ChainedStores.push_back(STn); |
17454 | 3.00M | Index = STn; |
17455 | 3.00M | break; |
17456 | 10.4M | } else if (LoadSDNode *10.4M Ldn10.4M = dyn_cast<LoadSDNode>(NextInChain)) { |
17457 | 2.04M | NextInChain = Ldn->getChain().getNode(); |
17458 | 2.04M | continue; |
17459 | 0 | } else { |
17460 | 8.42M | Index = nullptr; |
17461 | 8.42M | break; |
17462 | 8.42M | } |
17463 | 13.4M | } // end while |
17464 | 11.8M | } |
17465 | 8.87M | |
17466 | 8.87M | // At this point, ChainedStores lists all of the Store nodes |
17467 | 8.87M | // reachable by iterating up through chain nodes matching the above |
17468 | 8.87M | // conditions. For each such store identified, try to find an |
17469 | 8.87M | // earlier chain to attach the store to which won't violate the |
17470 | 8.87M | // required ordering. |
17471 | 8.87M | bool MadeChangeToSt = false; |
17472 | 8.87M | SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains; |
17473 | 8.87M | |
17474 | 11.8M | for (StoreSDNode *ChainedStore : ChainedStores) { |
17475 | 11.8M | SDValue Chain = ChainedStore->getChain(); |
17476 | 11.8M | SDValue BetterChain = FindBetterChain(ChainedStore, Chain); |
17477 | 11.8M | |
17478 | 11.8M | if (Chain != BetterChain11.8M ) { |
17479 | 941k | if (ChainedStore == St) |
17480 | 445k | MadeChangeToSt = true; |
17481 | 941k | BetterChains.push_back(std::make_pair(ChainedStore, BetterChain)); |
17482 | 941k | } |
17483 | 11.8M | } |
17484 | 8.87M | |
17485 | 8.87M | // Do all replacements after finding the replacements to make to avoid making |
17486 | 8.87M | // the chains more complicated by introducing new TokenFactors. |
17487 | 8.87M | for (auto Replacement : BetterChains) |
17488 | 941k | replaceStoreChain(Replacement.first, Replacement.second); |
17489 | 8.88M | |
17490 | 8.88M | return MadeChangeToSt; |
17491 | 8.88M | } |
17492 | | |
17493 | | /// This is the entry point for the file. |
17494 | | void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA, |
17495 | 7.42M | CodeGenOpt::Level OptLevel) { |
17496 | 7.42M | /// This is the main entry point to this class. |
17497 | 7.42M | DAGCombiner(*this, AA, OptLevel).Run(Level); |
17498 | 7.42M | } |