/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- SelectionDAG.cpp - Implement the SelectionDAG data structures ------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This implements the SelectionDAG class. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "llvm/CodeGen/SelectionDAG.h" |
14 | | #include "SDNodeDbgValue.h" |
15 | | #include "llvm/ADT/APFloat.h" |
16 | | #include "llvm/ADT/APInt.h" |
17 | | #include "llvm/ADT/APSInt.h" |
18 | | #include "llvm/ADT/ArrayRef.h" |
19 | | #include "llvm/ADT/BitVector.h" |
20 | | #include "llvm/ADT/FoldingSet.h" |
21 | | #include "llvm/ADT/None.h" |
22 | | #include "llvm/ADT/STLExtras.h" |
23 | | #include "llvm/ADT/SmallPtrSet.h" |
24 | | #include "llvm/ADT/SmallVector.h" |
25 | | #include "llvm/ADT/Triple.h" |
26 | | #include "llvm/ADT/Twine.h" |
27 | | #include "llvm/Analysis/ValueTracking.h" |
28 | | #include "llvm/CodeGen/ISDOpcodes.h" |
29 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
30 | | #include "llvm/CodeGen/MachineConstantPool.h" |
31 | | #include "llvm/CodeGen/MachineFrameInfo.h" |
32 | | #include "llvm/CodeGen/MachineFunction.h" |
33 | | #include "llvm/CodeGen/MachineMemOperand.h" |
34 | | #include "llvm/CodeGen/RuntimeLibcalls.h" |
35 | | #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" |
36 | | #include "llvm/CodeGen/SelectionDAGNodes.h" |
37 | | #include "llvm/CodeGen/SelectionDAGTargetInfo.h" |
38 | | #include "llvm/CodeGen/TargetLowering.h" |
39 | | #include "llvm/CodeGen/TargetRegisterInfo.h" |
40 | | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
41 | | #include "llvm/CodeGen/ValueTypes.h" |
42 | | #include "llvm/IR/Constant.h" |
43 | | #include "llvm/IR/Constants.h" |
44 | | #include "llvm/IR/DataLayout.h" |
45 | | #include "llvm/IR/DebugInfoMetadata.h" |
46 | | #include "llvm/IR/DebugLoc.h" |
47 | | #include "llvm/IR/DerivedTypes.h" |
48 | | #include "llvm/IR/Function.h" |
49 | | #include "llvm/IR/GlobalValue.h" |
50 | | #include "llvm/IR/Metadata.h" |
51 | | #include "llvm/IR/Type.h" |
52 | | #include "llvm/IR/Value.h" |
53 | | #include "llvm/Support/Casting.h" |
54 | | #include "llvm/Support/CodeGen.h" |
55 | | #include "llvm/Support/Compiler.h" |
56 | | #include "llvm/Support/Debug.h" |
57 | | #include "llvm/Support/ErrorHandling.h" |
58 | | #include "llvm/Support/KnownBits.h" |
59 | | #include "llvm/Support/MachineValueType.h" |
60 | | #include "llvm/Support/ManagedStatic.h" |
61 | | #include "llvm/Support/MathExtras.h" |
62 | | #include "llvm/Support/Mutex.h" |
63 | | #include "llvm/Support/raw_ostream.h" |
64 | | #include "llvm/Target/TargetMachine.h" |
65 | | #include "llvm/Target/TargetOptions.h" |
66 | | #include <algorithm> |
67 | | #include <cassert> |
68 | | #include <cstdint> |
69 | | #include <cstdlib> |
70 | | #include <limits> |
71 | | #include <set> |
72 | | #include <string> |
73 | | #include <utility> |
74 | | #include <vector> |
75 | | |
76 | | using namespace llvm; |
77 | | |
78 | | /// makeVTList - Return an instance of the SDVTList struct initialized with the |
79 | | /// specified members. |
80 | 63.2M | static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { |
81 | 63.2M | SDVTList Res = {VTs, NumVTs}; |
82 | 63.2M | return Res; |
83 | 63.2M | } |
84 | | |
85 | | // Default null implementations of the callbacks. |
86 | 14.8k | void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {} |
87 | 26.2M | void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} |
88 | 17.7M | void SelectionDAG::DAGUpdateListener::NodeInserted(SDNode *) {} |
89 | | |
90 | 0 | void SelectionDAG::DAGNodeDeletedListener::anchor() {} |
91 | | |
92 | | #define DEBUG_TYPE "selectiondag" |
93 | | |
94 | | static cl::opt<bool> EnableMemCpyDAGOpt("enable-memcpy-dag-opt", |
95 | | cl::Hidden, cl::init(true), |
96 | | cl::desc("Gang up loads and stores generated by inlining of memcpy")); |
97 | | |
98 | | static cl::opt<int> MaxLdStGlue("ldstmemcpy-glue-max", |
99 | | cl::desc("Number limit for gluing ld/st of memcpy."), |
100 | | cl::Hidden, cl::init(0)); |
101 | | |
102 | 35.1M | static void NewSDValueDbgMsg(SDValue V, StringRef Msg, SelectionDAG *G) { |
103 | 35.1M | LLVM_DEBUG(dbgs() << Msg; V.getNode()->dump(G);); |
104 | 35.1M | } |
105 | | |
106 | | //===----------------------------------------------------------------------===// |
107 | | // ConstantFPSDNode Class |
108 | | //===----------------------------------------------------------------------===// |
109 | | |
110 | | /// isExactlyValue - We don't rely on operator== working on double values, as |
111 | | /// it returns true for things that are clearly not equal, like -0.0 and 0.0. |
112 | | /// As such, this method can be used to do an exact bit-for-bit comparison of |
113 | | /// two floating point values. |
114 | 3 | bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const { |
115 | 3 | return getValueAPF().bitwiseIsEqual(V); |
116 | 3 | } |
117 | | |
118 | | bool ConstantFPSDNode::isValueValidForType(EVT VT, |
119 | 9.38k | const APFloat& Val) { |
120 | 9.38k | assert(VT.isFloatingPoint() && "Can only convert between FP types"); |
121 | 9.38k | |
122 | 9.38k | // convert modifies in place, so make a copy. |
123 | 9.38k | APFloat Val2 = APFloat(Val); |
124 | 9.38k | bool losesInfo; |
125 | 9.38k | (void) Val2.convert(SelectionDAG::EVTToAPFloatSemantics(VT), |
126 | 9.38k | APFloat::rmNearestTiesToEven, |
127 | 9.38k | &losesInfo); |
128 | 9.38k | return !losesInfo; |
129 | 9.38k | } |
130 | | |
131 | | //===----------------------------------------------------------------------===// |
132 | | // ISD Namespace |
133 | | //===----------------------------------------------------------------------===// |
134 | | |
135 | 168k | bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) { |
136 | 168k | auto *BV = dyn_cast<BuildVectorSDNode>(N); |
137 | 168k | if (!BV) |
138 | 151k | return false; |
139 | 16.5k | |
140 | 16.5k | APInt SplatUndef; |
141 | 16.5k | unsigned SplatBitSize; |
142 | 16.5k | bool HasUndefs; |
143 | 16.5k | unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); |
144 | 16.5k | return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs, |
145 | 16.5k | EltSize) && |
146 | 16.5k | EltSize == SplatBitSize15.8k ; |
147 | 16.5k | } |
148 | | |
149 | | // FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be |
150 | | // specializations of the more general isConstantSplatVector()? |
151 | | |
152 | 626k | bool ISD::isBuildVectorAllOnes(const SDNode *N) { |
153 | 626k | // Look through a bit convert. |
154 | 689k | while (N->getOpcode() == ISD::BITCAST) |
155 | 63.6k | N = N->getOperand(0).getNode(); |
156 | 626k | |
157 | 626k | if (N->getOpcode() != ISD::BUILD_VECTOR) return false480k ; |
158 | 145k | |
159 | 145k | unsigned i = 0, e = N->getNumOperands(); |
160 | 145k | |
161 | 145k | // Skip over all of the undef values. |
162 | 148k | while (i != e && N->getOperand(i).isUndef()) |
163 | 3.09k | ++i; |
164 | 145k | |
165 | 145k | // Do not accept an all-undef vector. |
166 | 145k | if (i == e) return false0 ; |
167 | 145k | |
168 | 145k | // Do not accept build_vectors that aren't all constants or which have non-~0 |
169 | 145k | // elements. We have to be a bit careful here, as the type of the constant |
170 | 145k | // may not be the same as the type of the vector elements due to type |
171 | 145k | // legalization (the elements are promoted to a legal type for the target and |
172 | 145k | // a vector of a type may be legal when the base element type is not). |
173 | 145k | // We only want to check enough bits to cover the vector elements, because |
174 | 145k | // we care if the resultant vector is all ones, not whether the individual |
175 | 145k | // constants are. |
176 | 145k | SDValue NotZero = N->getOperand(i); |
177 | 145k | unsigned EltSize = N->getValueType(0).getScalarSizeInBits(); |
178 | 145k | if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(NotZero)) { |
179 | 115k | if (CN->getAPIntValue().countTrailingOnes() < EltSize) |
180 | 75.3k | return false; |
181 | 29.4k | } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(NotZero)) { |
182 | 4.55k | if (CFPN->getValueAPF().bitcastToAPInt().countTrailingOnes() < EltSize) |
183 | 4.43k | return false; |
184 | 24.9k | } else |
185 | 24.9k | return false; |
186 | 40.7k | |
187 | 40.7k | // Okay, we have at least one ~0 value, check to see if the rest match or are |
188 | 40.7k | // undefs. Even with the above element type twiddling, this should be OK, as |
189 | 40.7k | // the same type legalization should have applied to all the elements. |
190 | 240k | for (++i; 40.7k i != e; ++i199k ) |
191 | 202k | if (N->getOperand(i) != NotZero && !N->getOperand(i).isUndef()3.02k ) |
192 | 2.74k | return false; |
193 | 40.7k | return true37.9k ; |
194 | 40.7k | } |
195 | | |
196 | 1.81M | bool ISD::isBuildVectorAllZeros(const SDNode *N) { |
197 | 1.81M | // Look through a bit convert. |
198 | 1.97M | while (N->getOpcode() == ISD::BITCAST) |
199 | 160k | N = N->getOperand(0).getNode(); |
200 | 1.81M | |
201 | 1.81M | if (N->getOpcode() != ISD::BUILD_VECTOR) return false1.39M ; |
202 | 426k | |
203 | 426k | bool IsAllUndef = true; |
204 | 2.34M | for (const SDValue &Op : N->op_values()) { |
205 | 2.34M | if (Op.isUndef()) |
206 | 17.6k | continue; |
207 | 2.32M | IsAllUndef = false; |
208 | 2.32M | // Do not accept build_vectors that aren't all constants or which have non-0 |
209 | 2.32M | // elements. We have to be a bit careful here, as the type of the constant |
210 | 2.32M | // may not be the same as the type of the vector elements due to type |
211 | 2.32M | // legalization (the elements are promoted to a legal type for the target |
212 | 2.32M | // and a vector of a type may be legal when the base element type is not). |
213 | 2.32M | // We only want to check enough bits to cover the vector elements, because |
214 | 2.32M | // we care if the resultant vector is all zeros, not whether the individual |
215 | 2.32M | // constants are. |
216 | 2.32M | unsigned EltSize = N->getValueType(0).getScalarSizeInBits(); |
217 | 2.32M | if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op)) { |
218 | 2.20M | if (CN->getAPIntValue().countTrailingZeros() < EltSize) |
219 | 186k | return false; |
220 | 121k | } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Op)) { |
221 | 89.6k | if (CFPN->getValueAPF().bitcastToAPInt().countTrailingZeros() < EltSize) |
222 | 4.36k | return false; |
223 | 31.7k | } else |
224 | 31.7k | return false; |
225 | 2.32M | } |
226 | 426k | |
227 | 426k | // Do not accept an all-undef vector. |
228 | 426k | if (203k IsAllUndef203k ) |
229 | 0 | return false; |
230 | 203k | return true; |
231 | 203k | } |
232 | | |
233 | 8.87M | bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) { |
234 | 8.87M | if (N->getOpcode() != ISD::BUILD_VECTOR) |
235 | 8.72M | return false; |
236 | 158k | |
237 | 1.32M | for (const SDValue &Op : N->op_values())158k { |
238 | 1.32M | if (Op.isUndef()) |
239 | 44.7k | continue; |
240 | 1.28M | if (!isa<ConstantSDNode>(Op)) |
241 | 33.5k | return false; |
242 | 1.28M | } |
243 | 158k | return true124k ; |
244 | 158k | } |
245 | | |
246 | 1.52M | bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) { |
247 | 1.52M | if (N->getOpcode() != ISD::BUILD_VECTOR) |
248 | 1.50M | return false; |
249 | 25.0k | |
250 | 63.1k | for (const SDValue &Op : N->op_values())25.0k { |
251 | 63.1k | if (Op.isUndef()) |
252 | 879 | continue; |
253 | 62.2k | if (!isa<ConstantFPSDNode>(Op)) |
254 | 5.32k | return false; |
255 | 62.2k | } |
256 | 25.0k | return true19.7k ; |
257 | 25.0k | } |
258 | | |
259 | 574k | bool ISD::allOperandsUndef(const SDNode *N) { |
260 | 574k | // Return false if the node has no operands. |
261 | 574k | // This is "logically inconsistent" with the definition of "all" but |
262 | 574k | // is probably the desired behavior. |
263 | 574k | if (N->getNumOperands() == 0) |
264 | 0 | return false; |
265 | 583k | return all_of(N->op_values(), [](SDValue Op) 574k { return Op.isUndef(); }); |
266 | 574k | } |
267 | | |
268 | | bool ISD::matchUnaryPredicate(SDValue Op, |
269 | | std::function<bool(ConstantSDNode *)> Match, |
270 | 1.25M | bool AllowUndefs) { |
271 | 1.25M | // FIXME: Add support for scalar UNDEF cases? |
272 | 1.25M | if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) |
273 | 1.05M | return Match(Cst); |
274 | 204k | |
275 | 204k | // FIXME: Add support for vector UNDEF cases? |
276 | 204k | if (ISD::BUILD_VECTOR != Op.getOpcode()) |
277 | 136k | return false; |
278 | 67.2k | |
279 | 67.2k | EVT SVT = Op.getValueType().getScalarType(); |
280 | 81.6k | for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i14.3k ) { |
281 | 79.9k | if (AllowUndefs && Op.getOperand(i).isUndef()66.9k ) { |
282 | 1.04k | if (!Match(nullptr)) |
283 | 0 | return false; |
284 | 1.04k | continue; |
285 | 1.04k | } |
286 | 78.9k | |
287 | 78.9k | auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i)); |
288 | 78.9k | if (!Cst || Cst->getValueType(0) != SVT77.2k || !Match(Cst)75.6k ) |
289 | 65.5k | return false; |
290 | 78.9k | } |
291 | 67.2k | return true1.71k ; |
292 | 67.2k | } |
293 | | |
294 | | bool ISD::matchBinaryPredicate( |
295 | | SDValue LHS, SDValue RHS, |
296 | | std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match, |
297 | 71.2k | bool AllowUndefs, bool AllowTypeMismatch) { |
298 | 71.2k | if (!AllowTypeMismatch && LHS.getValueType() != RHS.getValueType()70.8k ) |
299 | 1.70k | return false; |
300 | 69.5k | |
301 | 69.5k | // TODO: Add support for scalar UNDEF cases? |
302 | 69.5k | if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS)) |
303 | 33.6k | if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS)) |
304 | 20.5k | return Match(LHSCst, RHSCst); |
305 | 49.0k | |
306 | 49.0k | // TODO: Add support for vector UNDEF cases? |
307 | 49.0k | if (ISD::BUILD_VECTOR != LHS.getOpcode() || |
308 | 49.0k | ISD::BUILD_VECTOR != RHS.getOpcode()2.59k ) |
309 | 48.0k | return false; |
310 | 953 | |
311 | 953 | EVT SVT = LHS.getValueType().getScalarType(); |
312 | 11.9k | for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i11.0k ) { |
313 | 11.2k | SDValue LHSOp = LHS.getOperand(i); |
314 | 11.2k | SDValue RHSOp = RHS.getOperand(i); |
315 | 11.2k | bool LHSUndef = AllowUndefs && LHSOp.isUndef()3.33k ; |
316 | 11.2k | bool RHSUndef = AllowUndefs && RHSOp.isUndef()3.33k ; |
317 | 11.2k | auto *LHSCst = dyn_cast<ConstantSDNode>(LHSOp); |
318 | 11.2k | auto *RHSCst = dyn_cast<ConstantSDNode>(RHSOp); |
319 | 11.2k | if ((!LHSCst && !LHSUndef88 ) || (!RHSCst && !RHSUndef84 )) |
320 | 0 | return false; |
321 | 11.2k | if (!AllowTypeMismatch && (11.0k LHSOp.getValueType() != SVT11.0k || |
322 | 11.0k | LHSOp.getValueType() != RHSOp.getValueType())) |
323 | 0 | return false; |
324 | 11.2k | if (!Match(LHSCst, RHSCst)) |
325 | 253 | return false; |
326 | 11.2k | } |
327 | 953 | return true700 ; |
328 | 953 | } |
329 | | |
330 | 455 | ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) { |
331 | 455 | switch (ExtType) { |
332 | 455 | case ISD::EXTLOAD: |
333 | 33 | return IsFP ? ISD::FP_EXTEND0 : ISD::ANY_EXTEND; |
334 | 455 | case ISD::SEXTLOAD: |
335 | 172 | return ISD::SIGN_EXTEND; |
336 | 455 | case ISD::ZEXTLOAD: |
337 | 250 | return ISD::ZERO_EXTEND; |
338 | 455 | default: |
339 | 0 | break; |
340 | 0 | } |
341 | 0 | |
342 | 0 | llvm_unreachable("Invalid LoadExtType"); |
343 | 0 | } |
344 | | |
345 | 1.24M | ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { |
346 | 1.24M | // To perform this operation, we just need to swap the L and G bits of the |
347 | 1.24M | // operation. |
348 | 1.24M | unsigned OldL = (Operation >> 2) & 1; |
349 | 1.24M | unsigned OldG = (Operation >> 1) & 1; |
350 | 1.24M | return ISD::CondCode((Operation & ~6) | // Keep the N, U, E bits |
351 | 1.24M | (OldL << 1) | // New G bit |
352 | 1.24M | (OldG << 2)); // New L bit. |
353 | 1.24M | } |
354 | | |
355 | 263k | ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) { |
356 | 263k | unsigned Operation = Op; |
357 | 263k | if (isInteger) |
358 | 258k | Operation ^= 7; // Flip L, G, E bits, but not U. |
359 | 4.71k | else |
360 | 4.71k | Operation ^= 15; // Flip all of the condition bits. |
361 | 263k | |
362 | 263k | if (Operation > ISD::SETTRUE2) |
363 | 470 | Operation &= ~8; // Don't let N and U bits get set. |
364 | 263k | |
365 | 263k | return ISD::CondCode(Operation); |
366 | 263k | } |
367 | | |
368 | | /// For an integer comparison, return 1 if the comparison is a signed operation |
369 | | /// and 2 if the result is an unsigned comparison. Return zero if the operation |
370 | | /// does not depend on the sign of the input (setne and seteq). |
371 | 50 | static int isSignedOp(ISD::CondCode Opcode) { |
372 | 50 | switch (Opcode) { |
373 | 50 | default: 0 llvm_unreachable0 ("Illegal integer setcc operation!"); |
374 | 50 | case ISD::SETEQ: |
375 | 24 | case ISD::SETNE: return 0; |
376 | 24 | case ISD::SETLT: |
377 | 12 | case ISD::SETLE: |
378 | 12 | case ISD::SETGT: |
379 | 12 | case ISD::SETGE: return 1; |
380 | 14 | case ISD::SETULT: |
381 | 14 | case ISD::SETULE: |
382 | 14 | case ISD::SETUGT: |
383 | 14 | case ISD::SETUGE: return 2; |
384 | 50 | } |
385 | 50 | } |
386 | | |
387 | | ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2, |
388 | 52 | bool IsInteger) { |
389 | 52 | if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 320 ) |
390 | 10 | // Cannot fold a signed integer setcc with an unsigned integer setcc. |
391 | 10 | return ISD::SETCC_INVALID; |
392 | 42 | |
393 | 42 | unsigned Op = Op1 | Op2; // Combine all of the condition bits. |
394 | 42 | |
395 | 42 | // If the N and U bits get set, then the resultant comparison DOES suddenly |
396 | 42 | // care about orderedness, and it is true when ordered. |
397 | 42 | if (Op > ISD::SETTRUE2) |
398 | 31 | Op &= ~16; // Clear the U bit if the N bit is set. |
399 | 42 | |
400 | 42 | // Canonicalize illegal integer setcc's. |
401 | 42 | if (IsInteger && Op == ISD::SETUNE10 ) // e.g. SETUGT | SETULT |
402 | 0 | Op = ISD::SETNE; |
403 | 42 | |
404 | 42 | return ISD::CondCode(Op); |
405 | 42 | } |
406 | | |
407 | | ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2, |
408 | 45 | bool IsInteger) { |
409 | 45 | if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 35 ) |
410 | 0 | // Cannot fold a signed setcc with an unsigned setcc. |
411 | 0 | return ISD::SETCC_INVALID; |
412 | 45 | |
413 | 45 | // Combine all of the condition bits. |
414 | 45 | ISD::CondCode Result = ISD::CondCode(Op1 & Op2); |
415 | 45 | |
416 | 45 | // Canonicalize illegal integer setcc's. |
417 | 45 | if (IsInteger) { |
418 | 5 | switch (Result) { |
419 | 5 | default: break2 ; |
420 | 5 | case ISD::SETUO : Result = ISD::SETFALSE; break0 ; // SETUGT & SETULT |
421 | 5 | case ISD::SETOEQ: // SETEQ & SETU[LG]E |
422 | 1 | case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE |
423 | 2 | case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE |
424 | 1 | case ISD::SETOGT: Result = ISD::SETUGT ; break0 ; // SETUGT & SETNE |
425 | 45 | } |
426 | 45 | } |
427 | 45 | |
428 | 45 | return Result; |
429 | 45 | } |
430 | | |
431 | | //===----------------------------------------------------------------------===// |
432 | | // SDNode Profile Support |
433 | | //===----------------------------------------------------------------------===// |
434 | | |
435 | | /// AddNodeIDOpcode - Add the node opcode to the NodeID data. |
436 | 127M | static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) { |
437 | 127M | ID.AddInteger(OpC); |
438 | 127M | } |
439 | | |
440 | | /// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them |
441 | | /// solely with their pointer. |
442 | 127M | static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) { |
443 | 127M | ID.AddPointer(VTList.VTs); |
444 | 127M | } |
445 | | |
446 | | /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. |
447 | | static void AddNodeIDOperands(FoldingSetNodeID &ID, |
448 | 70.7M | ArrayRef<SDValue> Ops) { |
449 | 87.6M | for (auto& Op : Ops) { |
450 | 87.6M | ID.AddPointer(Op.getNode()); |
451 | 87.6M | ID.AddInteger(Op.getResNo()); |
452 | 87.6M | } |
453 | 70.7M | } |
454 | | |
455 | | /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. |
456 | | static void AddNodeIDOperands(FoldingSetNodeID &ID, |
457 | 56.8M | ArrayRef<SDUse> Ops) { |
458 | 91.3M | for (auto& Op : Ops) { |
459 | 91.3M | ID.AddPointer(Op.getNode()); |
460 | 91.3M | ID.AddInteger(Op.getResNo()); |
461 | 91.3M | } |
462 | 56.8M | } |
463 | | |
464 | | static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC, |
465 | 70.7M | SDVTList VTList, ArrayRef<SDValue> OpList) { |
466 | 70.7M | AddNodeIDOpcode(ID, OpC); |
467 | 70.7M | AddNodeIDValueTypes(ID, VTList); |
468 | 70.7M | AddNodeIDOperands(ID, OpList); |
469 | 70.7M | } |
470 | | |
471 | | /// If this is an SDNode with special info, add this info to the NodeID data. |
472 | 57.6M | static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { |
473 | 57.6M | switch (N->getOpcode()) { |
474 | 57.6M | case ISD::TargetExternalSymbol: |
475 | 0 | case ISD::ExternalSymbol: |
476 | 0 | case ISD::MCSymbol: |
477 | 0 | llvm_unreachable("Should only be used on nodes with operands"); |
478 | 28.2M | default: break; // Normal nodes don't need extra info. |
479 | 15.3M | case ISD::TargetConstant: |
480 | 15.3M | case ISD::Constant: { |
481 | 15.3M | const ConstantSDNode *C = cast<ConstantSDNode>(N); |
482 | 15.3M | ID.AddPointer(C->getConstantIntValue()); |
483 | 15.3M | ID.AddBoolean(C->isOpaque()); |
484 | 15.3M | break; |
485 | 15.3M | } |
486 | 15.3M | case ISD::TargetConstantFP: |
487 | 72.8k | case ISD::ConstantFP: |
488 | 72.8k | ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue()); |
489 | 72.8k | break; |
490 | 798k | case ISD::TargetGlobalAddress: |
491 | 798k | case ISD::GlobalAddress: |
492 | 798k | case ISD::TargetGlobalTLSAddress: |
493 | 798k | case ISD::GlobalTLSAddress: { |
494 | 798k | const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); |
495 | 798k | ID.AddPointer(GA->getGlobal()); |
496 | 798k | ID.AddInteger(GA->getOffset()); |
497 | 798k | ID.AddInteger(GA->getTargetFlags()); |
498 | 798k | break; |
499 | 798k | } |
500 | 798k | case ISD::BasicBlock: |
501 | 552k | ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock()); |
502 | 552k | break; |
503 | 6.74M | case ISD::Register: |
504 | 6.74M | ID.AddInteger(cast<RegisterSDNode>(N)->getReg()); |
505 | 6.74M | break; |
506 | 798k | case ISD::RegisterMask: |
507 | 282k | ID.AddPointer(cast<RegisterMaskSDNode>(N)->getRegMask()); |
508 | 282k | break; |
509 | 798k | case ISD::SRCVALUE: |
510 | 1.63k | ID.AddPointer(cast<SrcValueSDNode>(N)->getValue()); |
511 | 1.63k | break; |
512 | 798k | case ISD::FrameIndex: |
513 | 538k | case ISD::TargetFrameIndex: |
514 | 538k | ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex()); |
515 | 538k | break; |
516 | 538k | case ISD::LIFETIME_START: |
517 | 109k | case ISD::LIFETIME_END: |
518 | 109k | if (cast<LifetimeSDNode>(N)->hasOffset()) { |
519 | 108k | ID.AddInteger(cast<LifetimeSDNode>(N)->getSize()); |
520 | 108k | ID.AddInteger(cast<LifetimeSDNode>(N)->getOffset()); |
521 | 108k | } |
522 | 109k | break; |
523 | 109k | case ISD::JumpTable: |
524 | 1.17k | case ISD::TargetJumpTable: |
525 | 1.17k | ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex()); |
526 | 1.17k | ID.AddInteger(cast<JumpTableSDNode>(N)->getTargetFlags()); |
527 | 1.17k | break; |
528 | 67.8k | case ISD::ConstantPool: |
529 | 67.8k | case ISD::TargetConstantPool: { |
530 | 67.8k | const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N); |
531 | 67.8k | ID.AddInteger(CP->getAlignment()); |
532 | 67.8k | ID.AddInteger(CP->getOffset()); |
533 | 67.8k | if (CP->isMachineConstantPoolEntry()) |
534 | 82 | CP->getMachineCPVal()->addSelectionDAGCSEId(ID); |
535 | 67.7k | else |
536 | 67.7k | ID.AddPointer(CP->getConstVal()); |
537 | 67.8k | ID.AddInteger(CP->getTargetFlags()); |
538 | 67.8k | break; |
539 | 67.8k | } |
540 | 67.8k | case ISD::TargetIndex: { |
541 | 0 | const TargetIndexSDNode *TI = cast<TargetIndexSDNode>(N); |
542 | 0 | ID.AddInteger(TI->getIndex()); |
543 | 0 | ID.AddInteger(TI->getOffset()); |
544 | 0 | ID.AddInteger(TI->getTargetFlags()); |
545 | 0 | break; |
546 | 67.8k | } |
547 | 1.88M | case ISD::LOAD: { |
548 | 1.88M | const LoadSDNode *LD = cast<LoadSDNode>(N); |
549 | 1.88M | ID.AddInteger(LD->getMemoryVT().getRawBits()); |
550 | 1.88M | ID.AddInteger(LD->getRawSubclassData()); |
551 | 1.88M | ID.AddInteger(LD->getPointerInfo().getAddrSpace()); |
552 | 1.88M | break; |
553 | 67.8k | } |
554 | 2.81M | case ISD::STORE: { |
555 | 2.81M | const StoreSDNode *ST = cast<StoreSDNode>(N); |
556 | 2.81M | ID.AddInteger(ST->getMemoryVT().getRawBits()); |
557 | 2.81M | ID.AddInteger(ST->getRawSubclassData()); |
558 | 2.81M | ID.AddInteger(ST->getPointerInfo().getAddrSpace()); |
559 | 2.81M | break; |
560 | 67.8k | } |
561 | 67.8k | case ISD::MLOAD: { |
562 | 872 | const MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N); |
563 | 872 | ID.AddInteger(MLD->getMemoryVT().getRawBits()); |
564 | 872 | ID.AddInteger(MLD->getRawSubclassData()); |
565 | 872 | ID.AddInteger(MLD->getPointerInfo().getAddrSpace()); |
566 | 872 | break; |
567 | 67.8k | } |
568 | 67.8k | case ISD::MSTORE: { |
569 | 828 | const MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N); |
570 | 828 | ID.AddInteger(MST->getMemoryVT().getRawBits()); |
571 | 828 | ID.AddInteger(MST->getRawSubclassData()); |
572 | 828 | ID.AddInteger(MST->getPointerInfo().getAddrSpace()); |
573 | 828 | break; |
574 | 67.8k | } |
575 | 67.8k | case ISD::MGATHER: { |
576 | 711 | const MaskedGatherSDNode *MG = cast<MaskedGatherSDNode>(N); |
577 | 711 | ID.AddInteger(MG->getMemoryVT().getRawBits()); |
578 | 711 | ID.AddInteger(MG->getRawSubclassData()); |
579 | 711 | ID.AddInteger(MG->getPointerInfo().getAddrSpace()); |
580 | 711 | break; |
581 | 67.8k | } |
582 | 67.8k | case ISD::MSCATTER: { |
583 | 143 | const MaskedScatterSDNode *MS = cast<MaskedScatterSDNode>(N); |
584 | 143 | ID.AddInteger(MS->getMemoryVT().getRawBits()); |
585 | 143 | ID.AddInteger(MS->getRawSubclassData()); |
586 | 143 | ID.AddInteger(MS->getPointerInfo().getAddrSpace()); |
587 | 143 | break; |
588 | 67.8k | } |
589 | 67.8k | case ISD::ATOMIC_CMP_SWAP: |
590 | 29.6k | case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: |
591 | 29.6k | case ISD::ATOMIC_SWAP: |
592 | 29.6k | case ISD::ATOMIC_LOAD_ADD: |
593 | 29.6k | case ISD::ATOMIC_LOAD_SUB: |
594 | 29.6k | case ISD::ATOMIC_LOAD_AND: |
595 | 29.6k | case ISD::ATOMIC_LOAD_CLR: |
596 | 29.6k | case ISD::ATOMIC_LOAD_OR: |
597 | 29.6k | case ISD::ATOMIC_LOAD_XOR: |
598 | 29.6k | case ISD::ATOMIC_LOAD_NAND: |
599 | 29.6k | case ISD::ATOMIC_LOAD_MIN: |
600 | 29.6k | case ISD::ATOMIC_LOAD_MAX: |
601 | 29.6k | case ISD::ATOMIC_LOAD_UMIN: |
602 | 29.6k | case ISD::ATOMIC_LOAD_UMAX: |
603 | 29.6k | case ISD::ATOMIC_LOAD: |
604 | 29.6k | case ISD::ATOMIC_STORE: { |
605 | 29.6k | const AtomicSDNode *AT = cast<AtomicSDNode>(N); |
606 | 29.6k | ID.AddInteger(AT->getMemoryVT().getRawBits()); |
607 | 29.6k | ID.AddInteger(AT->getRawSubclassData()); |
608 | 29.6k | ID.AddInteger(AT->getPointerInfo().getAddrSpace()); |
609 | 29.6k | break; |
610 | 29.6k | } |
611 | 29.6k | case ISD::PREFETCH: { |
612 | 870 | const MemSDNode *PF = cast<MemSDNode>(N); |
613 | 870 | ID.AddInteger(PF->getPointerInfo().getAddrSpace()); |
614 | 870 | break; |
615 | 29.6k | } |
616 | 120k | case ISD::VECTOR_SHUFFLE: { |
617 | 120k | const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); |
618 | 120k | for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements(); |
619 | 1.69M | i != e; ++i1.57M ) |
620 | 1.57M | ID.AddInteger(SVN->getMaskElt(i)); |
621 | 120k | break; |
622 | 29.6k | } |
623 | 29.6k | case ISD::TargetBlockAddress: |
624 | 70 | case ISD::BlockAddress: { |
625 | 70 | const BlockAddressSDNode *BA = cast<BlockAddressSDNode>(N); |
626 | 70 | ID.AddPointer(BA->getBlockAddress()); |
627 | 70 | ID.AddInteger(BA->getOffset()); |
628 | 70 | ID.AddInteger(BA->getTargetFlags()); |
629 | 70 | break; |
630 | 57.6M | } |
631 | 57.6M | } // end switch (N->getOpcode()) |
632 | 57.6M | |
633 | 57.6M | // Target specific memory nodes could also have address spaces to check. |
634 | 57.6M | if (N->isTargetMemoryOpcode()) |
635 | 14.4k | ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace()); |
636 | 57.6M | } |
637 | | |
638 | | /// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID |
639 | | /// data. |
640 | 56.8M | static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { |
641 | 56.8M | AddNodeIDOpcode(ID, N->getOpcode()); |
642 | 56.8M | // Add the return value info. |
643 | 56.8M | AddNodeIDValueTypes(ID, N->getVTList()); |
644 | 56.8M | // Add the operand info. |
645 | 56.8M | AddNodeIDOperands(ID, N->ops()); |
646 | 56.8M | |
647 | 56.8M | // Handle SDNode leafs with special info. |
648 | 56.8M | AddNodeIDCustom(ID, N); |
649 | 56.8M | } |
650 | | |
651 | | //===----------------------------------------------------------------------===// |
652 | | // SelectionDAG Class |
653 | | //===----------------------------------------------------------------------===// |
654 | | |
655 | | /// doNotCSE - Return true if CSE should not be performed for this node. |
656 | 10.8M | static bool doNotCSE(SDNode *N) { |
657 | 10.8M | if (N->getValueType(0) == MVT::Glue) |
658 | 10.5k | return true; // Never CSE anything that produces a flag. |
659 | 10.8M | |
660 | 10.8M | switch (N->getOpcode()) { |
661 | 10.8M | default: break10.7M ; |
662 | 10.8M | case ISD::HANDLENODE: |
663 | 104k | case ISD::EH_LABEL: |
664 | 104k | return true; // Never CSE these nodes. |
665 | 10.7M | } |
666 | 10.7M | |
667 | 10.7M | // Check that remaining values produced are not flags. |
668 | 12.6M | for (unsigned i = 1, e = N->getNumValues(); 10.7M i != e; ++i1.91M ) |
669 | 4.04M | if (N->getValueType(i) == MVT::Glue) |
670 | 2.12M | return true; // Never CSE anything that produces a flag. |
671 | 10.7M | |
672 | 10.7M | return false8.63M ; |
673 | 10.7M | } |
674 | | |
675 | | /// RemoveDeadNodes - This method deletes all unreachable nodes in the |
676 | | /// SelectionDAG. |
677 | 6.02M | void SelectionDAG::RemoveDeadNodes() { |
678 | 6.02M | // Create a dummy node (which is not added to allnodes), that adds a reference |
679 | 6.02M | // to the root node, preventing it from being deleted. |
680 | 6.02M | HandleSDNode Dummy(getRoot()); |
681 | 6.02M | |
682 | 6.02M | SmallVector<SDNode*, 128> DeadNodes; |
683 | 6.02M | |
684 | 6.02M | // Add all obviously-dead nodes to the DeadNodes worklist. |
685 | 6.02M | for (SDNode &Node : allnodes()) |
686 | 130M | if (Node.use_empty()) |
687 | 1.25M | DeadNodes.push_back(&Node); |
688 | 6.02M | |
689 | 6.02M | RemoveDeadNodes(DeadNodes); |
690 | 6.02M | |
691 | 6.02M | // If the root changed (e.g. it was a dead load, update the root). |
692 | 6.02M | setRoot(Dummy.getValue()); |
693 | 6.02M | } |
694 | | |
695 | | /// RemoveDeadNodes - This method deletes the unreachable nodes in the |
696 | | /// given list, and any nodes that become unreachable as a result. |
697 | 13.1M | void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes) { |
698 | 13.1M | |
699 | 13.1M | // Process the worklist, deleting the nodes and adding their uses to the |
700 | 13.1M | // worklist. |
701 | 21.2M | while (!DeadNodes.empty()) { |
702 | 8.13M | SDNode *N = DeadNodes.pop_back_val(); |
703 | 8.13M | // Skip to next node if we've already managed to delete the node. This could |
704 | 8.13M | // happen if replacing a node causes a node previously added to the node to |
705 | 8.13M | // be deleted. |
706 | 8.13M | if (N->getOpcode() == ISD::DELETED_NODE) |
707 | 7 | continue; |
708 | 8.13M | |
709 | 18.0M | for (DAGUpdateListener *DUL = UpdateListeners; 8.13M DUL; DUL = DUL->Next9.91M ) |
710 | 9.91M | DUL->NodeDeleted(N, nullptr); |
711 | 8.13M | |
712 | 8.13M | // Take the node out of the appropriate CSE map. |
713 | 8.13M | RemoveNodeFromCSEMaps(N); |
714 | 8.13M | |
715 | 8.13M | // Next, brutally remove the operand list. This is safe to do, as there are |
716 | 8.13M | // no cycles in the graph. |
717 | 18.1M | for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) { |
718 | 9.97M | SDUse &Use = *I++; |
719 | 9.97M | SDNode *Operand = Use.getNode(); |
720 | 9.97M | Use.set(SDValue()); |
721 | 9.97M | |
722 | 9.97M | // Now that we removed this operand, see if there are no uses of it left. |
723 | 9.97M | if (Operand->use_empty()) |
724 | 2.84M | DeadNodes.push_back(Operand); |
725 | 9.97M | } |
726 | 8.13M | |
727 | 8.13M | DeallocateNode(N); |
728 | 8.13M | } |
729 | 13.1M | } |
730 | | |
731 | 755k | void SelectionDAG::RemoveDeadNode(SDNode *N){ |
732 | 755k | SmallVector<SDNode*, 16> DeadNodes(1, N); |
733 | 755k | |
734 | 755k | // Create a dummy node that adds a reference to the root node, preventing |
735 | 755k | // it from being deleted. (This matters if the root is an operand of the |
736 | 755k | // dead node.) |
737 | 755k | HandleSDNode Dummy(getRoot()); |
738 | 755k | |
739 | 755k | RemoveDeadNodes(DeadNodes); |
740 | 755k | } |
741 | | |
742 | 12.8M | void SelectionDAG::DeleteNode(SDNode *N) { |
743 | 12.8M | // First take this out of the appropriate CSE map. |
744 | 12.8M | RemoveNodeFromCSEMaps(N); |
745 | 12.8M | |
746 | 12.8M | // Finally, remove uses due to operands of this node, remove from the |
747 | 12.8M | // AllNodes list, and delete the node. |
748 | 12.8M | DeleteNodeNotInCSEMaps(N); |
749 | 12.8M | } |
750 | | |
751 | 12.8M | void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) { |
752 | 12.8M | assert(N->getIterator() != AllNodes.begin() && |
753 | 12.8M | "Cannot delete the entry node!"); |
754 | 12.8M | assert(N->use_empty() && "Cannot delete a node that is not dead!"); |
755 | 12.8M | |
756 | 12.8M | // Drop all of the operands and decrement used node's use counts. |
757 | 12.8M | N->DropOperands(); |
758 | 12.8M | |
759 | 12.8M | DeallocateNode(N); |
760 | 12.8M | } |
761 | | |
762 | 46.6M | void SDDbgInfo::erase(const SDNode *Node) { |
763 | 46.6M | DbgValMapType::iterator I = DbgValMap.find(Node); |
764 | 46.6M | if (I == DbgValMap.end()) |
765 | 46.6M | return; |
766 | 493 | for (auto &Val: I->second) |
767 | 540 | Val->setIsInvalidated(); |
768 | 493 | DbgValMap.erase(I); |
769 | 493 | } |
770 | | |
771 | 46.6M | void SelectionDAG::DeallocateNode(SDNode *N) { |
772 | 46.6M | // If we have operands, deallocate them. |
773 | 46.6M | removeOperands(N); |
774 | 46.6M | |
775 | 46.6M | NodeAllocator.Deallocate(AllNodes.remove(N)); |
776 | 46.6M | |
777 | 46.6M | // Set the opcode to DELETED_NODE to help catch bugs when node |
778 | 46.6M | // memory is reallocated. |
779 | 46.6M | // FIXME: There are places in SDag that have grown a dependency on the opcode |
780 | 46.6M | // value in the released node. |
781 | 46.6M | __asan_unpoison_memory_region(&N->NodeType, sizeof(N->NodeType)); |
782 | 46.6M | N->NodeType = ISD::DELETED_NODE; |
783 | 46.6M | |
784 | 46.6M | // If any of the SDDbgValue nodes refer to this SDNode, invalidate |
785 | 46.6M | // them and forget about that node. |
786 | 46.6M | DbgInfo->erase(N); |
787 | 46.6M | } |
788 | | |
789 | | #ifndef NDEBUG |
790 | | /// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid. |
791 | | static void VerifySDNode(SDNode *N) { |
792 | | switch (N->getOpcode()) { |
793 | | default: |
794 | | break; |
795 | | case ISD::BUILD_PAIR: { |
796 | | EVT VT = N->getValueType(0); |
797 | | assert(N->getNumValues() == 1 && "Too many results!"); |
798 | | assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) && |
799 | | "Wrong return type!"); |
800 | | assert(N->getNumOperands() == 2 && "Wrong number of operands!"); |
801 | | assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() && |
802 | | "Mismatched operand types!"); |
803 | | assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() && |
804 | | "Wrong operand type!"); |
805 | | assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() && |
806 | | "Wrong return type size"); |
807 | | break; |
808 | | } |
809 | | case ISD::BUILD_VECTOR: { |
810 | | assert(N->getNumValues() == 1 && "Too many results!"); |
811 | | assert(N->getValueType(0).isVector() && "Wrong return type!"); |
812 | | assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() && |
813 | | "Wrong number of operands!"); |
814 | | EVT EltVT = N->getValueType(0).getVectorElementType(); |
815 | | for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) { |
816 | | assert((I->getValueType() == EltVT || |
817 | | (EltVT.isInteger() && I->getValueType().isInteger() && |
818 | | EltVT.bitsLE(I->getValueType()))) && |
819 | | "Wrong operand type!"); |
820 | | assert(I->getValueType() == N->getOperand(0).getValueType() && |
821 | | "Operands must all have the same type"); |
822 | | } |
823 | | break; |
824 | | } |
825 | | } |
826 | | } |
827 | | #endif // NDEBUG |
828 | | |
829 | | /// Insert a newly allocated node into the DAG. |
830 | | /// |
831 | | /// Handles insertion into the all nodes list and CSE map, as well as |
832 | | /// verification and other common operations when a new node is allocated. |
833 | 47.9M | void SelectionDAG::InsertNode(SDNode *N) { |
834 | 47.9M | AllNodes.push_back(N); |
835 | | #ifndef NDEBUG |
836 | | N->PersistentId = NextPersistentId++; |
837 | | VerifySDNode(N); |
838 | | #endif |
839 | 70.5M | for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next22.6M ) |
840 | 22.6M | DUL->NodeInserted(N); |
841 | 47.9M | } |
842 | | |
843 | | /// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that |
844 | | /// correspond to it. This is useful when we're about to delete or repurpose |
845 | | /// the node. We don't want future request for structurally identical nodes |
846 | | /// to return N anymore. |
847 | 39.4M | bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { |
848 | 39.4M | bool Erased = false; |
849 | 39.4M | switch (N->getOpcode()) { |
850 | 39.4M | case ISD::HANDLENODE: return false97.6k ; // noop. |
851 | 39.4M | case ISD::CONDCODE: |
852 | 952k | assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] && |
853 | 952k | "Cond code doesn't exist!"); |
854 | 952k | Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != nullptr; |
855 | 952k | CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = nullptr; |
856 | 952k | break; |
857 | 39.4M | case ISD::ExternalSymbol: |
858 | 14.8k | Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol()); |
859 | 14.8k | break; |
860 | 39.4M | case ISD::TargetExternalSymbol: { |
861 | 103 | ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N); |
862 | 103 | Erased = TargetExternalSymbols.erase( |
863 | 103 | std::pair<std::string,unsigned char>(ESN->getSymbol(), |
864 | 103 | ESN->getTargetFlags())); |
865 | 103 | break; |
866 | 39.4M | } |
867 | 39.4M | case ISD::MCSymbol: { |
868 | 0 | auto *MCSN = cast<MCSymbolSDNode>(N); |
869 | 0 | Erased = MCSymbols.erase(MCSN->getMCSymbol()); |
870 | 0 | break; |
871 | 39.4M | } |
872 | 39.4M | case ISD::VALUETYPE: { |
873 | 213k | EVT VT = cast<VTSDNode>(N)->getVT(); |
874 | 213k | if (VT.isExtended()) { |
875 | 67.4k | Erased = ExtendedValueTypeNodes.erase(VT); |
876 | 146k | } else { |
877 | 146k | Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != nullptr; |
878 | 146k | ValueTypeNodes[VT.getSimpleVT().SimpleTy] = nullptr; |
879 | 146k | } |
880 | 213k | break; |
881 | 39.4M | } |
882 | 39.4M | default: |
883 | 38.1M | // Remove it from the CSE Map. |
884 | 38.1M | assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!"); |
885 | 38.1M | assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!"); |
886 | 38.1M | Erased = CSEMap.RemoveNode(N); |
887 | 38.1M | break; |
888 | 39.3M | } |
889 | | #ifndef NDEBUG |
890 | | // Verify that the node was actually in one of the CSE maps, unless it has a |
891 | | // flag result (which cannot be CSE'd) or is one of the special cases that are |
892 | | // not subject to CSE. |
893 | | if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue && |
894 | | !N->isMachineOpcode() && !doNotCSE(N)) { |
895 | | N->dump(this); |
896 | | dbgs() << "\n"; |
897 | | llvm_unreachable("Node is not in map!"); |
898 | | } |
899 | | #endif |
900 | 39.3M | return Erased; |
901 | 39.3M | } |
902 | | |
903 | | /// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE |
904 | | /// maps and modified in place. Add it back to the CSE maps, unless an identical |
905 | | /// node already exists, in which case transfer all its users to the existing |
906 | | /// node. This transfer can potentially trigger recursive merging. |
907 | | void |
908 | 10.0M | SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) { |
909 | 10.0M | // For node types that aren't CSE'd, just act as if no identical node |
910 | 10.0M | // already exists. |
911 | 10.0M | if (!doNotCSE(N)) { |
912 | 7.86M | SDNode *Existing = CSEMap.GetOrInsertNode(N); |
913 | 7.86M | if (Existing != N) { |
914 | 18.5k | // If there was already an existing matching node, use ReplaceAllUsesWith |
915 | 18.5k | // to replace the dead one with the existing one. This can cause |
916 | 18.5k | // recursive merging of other unrelated nodes down the line. |
917 | 18.5k | ReplaceAllUsesWith(N, Existing); |
918 | 18.5k | |
919 | 18.5k | // N is now dead. Inform the listeners and delete it. |
920 | 87.2k | for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next68.7k ) |
921 | 68.7k | DUL->NodeDeleted(N, Existing); |
922 | 18.5k | DeleteNodeNotInCSEMaps(N); |
923 | 18.5k | return; |
924 | 18.5k | } |
925 | 10.0M | } |
926 | 10.0M | |
927 | 10.0M | // If the node doesn't already exist, we updated it. Inform listeners. |
928 | 37.3M | for (DAGUpdateListener *DUL = UpdateListeners; 10.0M DUL; DUL = DUL->Next27.2M ) |
929 | 27.2M | DUL->NodeUpdated(N); |
930 | 10.0M | } |
931 | | |
932 | | /// FindModifiedNodeSlot - Find a slot for the specified node if its operands |
933 | | /// were replaced with those specified. If this node is never memoized, |
934 | | /// return null, otherwise return a pointer to the slot it would take. If a |
935 | | /// node already exists with these operands, the slot will be non-null. |
936 | | SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op, |
937 | 15.8k | void *&InsertPos) { |
938 | 15.8k | if (doNotCSE(N)) |
939 | 0 | return nullptr; |
940 | 15.8k | |
941 | 15.8k | SDValue Ops[] = { Op }; |
942 | 15.8k | FoldingSetNodeID ID; |
943 | 15.8k | AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); |
944 | 15.8k | AddNodeIDCustom(ID, N); |
945 | 15.8k | SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); |
946 | 15.8k | if (Node) |
947 | 0 | Node->intersectFlagsWith(N->getFlags()); |
948 | 15.8k | return Node; |
949 | 15.8k | } |
950 | | |
951 | | /// FindModifiedNodeSlot - Find a slot for the specified node if its operands |
952 | | /// were replaced with those specified. If this node is never memoized, |
953 | | /// return null, otherwise return a pointer to the slot it would take. If a |
954 | | /// node already exists with these operands, the slot will be non-null. |
955 | | SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, |
956 | | SDValue Op1, SDValue Op2, |
957 | 84.3k | void *&InsertPos) { |
958 | 84.3k | if (doNotCSE(N)) |
959 | 0 | return nullptr; |
960 | 84.3k | |
961 | 84.3k | SDValue Ops[] = { Op1, Op2 }; |
962 | 84.3k | FoldingSetNodeID ID; |
963 | 84.3k | AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); |
964 | 84.3k | AddNodeIDCustom(ID, N); |
965 | 84.3k | SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); |
966 | 84.3k | if (Node) |
967 | 1.09k | Node->intersectFlagsWith(N->getFlags()); |
968 | 84.3k | return Node; |
969 | 84.3k | } |
970 | | |
971 | | /// FindModifiedNodeSlot - Find a slot for the specified node if its operands |
972 | | /// were replaced with those specified. If this node is never memoized, |
973 | | /// return null, otherwise return a pointer to the slot it would take. If a |
974 | | /// node already exists with these operands, the slot will be non-null. |
975 | | SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops, |
976 | 682k | void *&InsertPos) { |
977 | 682k | if (doNotCSE(N)) |
978 | 16.9k | return nullptr; |
979 | 665k | |
980 | 665k | FoldingSetNodeID ID; |
981 | 665k | AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); |
982 | 665k | AddNodeIDCustom(ID, N); |
983 | 665k | SDNode *Node = FindNodeOrInsertPos(ID, SDLoc(N), InsertPos); |
984 | 665k | if (Node) |
985 | 286 | Node->intersectFlagsWith(N->getFlags()); |
986 | 665k | return Node; |
987 | 665k | } |
988 | | |
989 | 372k | unsigned SelectionDAG::getEVTAlignment(EVT VT) const { |
990 | 372k | Type *Ty = VT == MVT::iPTR ? |
991 | 0 | PointerType::get(Type::getInt8Ty(*getContext()), 0) : |
992 | 372k | VT.getTypeForEVT(*getContext()); |
993 | 372k | |
994 | 372k | return getDataLayout().getABITypeAlignment(Ty); |
995 | 372k | } |
996 | | |
997 | | // EntryNode could meaningfully have debug info if we can find it... |
998 | | SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) |
999 | | : TM(tm), OptLevel(OL), |
1000 | | EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), |
1001 | 38.1k | Root(getEntryNode()) { |
1002 | 38.1k | InsertNode(&EntryNode); |
1003 | 38.1k | DbgInfo = new SDDbgInfo(); |
1004 | 38.1k | } |
1005 | | |
1006 | | void SelectionDAG::init(MachineFunction &NewMF, |
1007 | | OptimizationRemarkEmitter &NewORE, |
1008 | | Pass *PassPtr, const TargetLibraryInfo *LibraryInfo, |
1009 | 278k | LegacyDivergenceAnalysis * Divergence) { |
1010 | 278k | MF = &NewMF; |
1011 | 278k | SDAGISelPass = PassPtr; |
1012 | 278k | ORE = &NewORE; |
1013 | 278k | TLI = getSubtarget().getTargetLowering(); |
1014 | 278k | TSI = getSubtarget().getSelectionDAGInfo(); |
1015 | 278k | LibInfo = LibraryInfo; |
1016 | 278k | Context = &MF->getFunction().getContext(); |
1017 | 278k | DA = Divergence; |
1018 | 278k | } |
1019 | | |
1020 | 37.9k | SelectionDAG::~SelectionDAG() { |
1021 | 37.9k | assert(!UpdateListeners && "Dangling registered DAGUpdateListeners"); |
1022 | 37.9k | allnodes_clear(); |
1023 | 37.9k | OperandRecycler.clear(OperandAllocator); |
1024 | 37.9k | delete DbgInfo; |
1025 | 37.9k | } |
1026 | | |
1027 | 1.28M | void SelectionDAG::allnodes_clear() { |
1028 | 1.28M | assert(&*AllNodes.begin() == &EntryNode); |
1029 | 1.28M | AllNodes.remove(AllNodes.begin()); |
1030 | 26.8M | while (!AllNodes.empty()) |
1031 | 25.5M | DeallocateNode(&AllNodes.front()); |
1032 | | #ifndef NDEBUG |
1033 | | NextPersistentId = 0; |
1034 | | #endif |
1035 | | } |
1036 | | |
1037 | | SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, |
1038 | 11.7M | void *&InsertPos) { |
1039 | 11.7M | SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos); |
1040 | 11.7M | if (N) { |
1041 | 4.00M | switch (N->getOpcode()) { |
1042 | 4.00M | default: break; |
1043 | 4.00M | case ISD::Constant: |
1044 | 0 | case ISD::ConstantFP: |
1045 | 0 | llvm_unreachable("Querying for Constant and ConstantFP nodes requires " |
1046 | 11.7M | "debug location. Use another overload."); |
1047 | 11.7M | } |
1048 | 11.7M | } |
1049 | 11.7M | return N; |
1050 | 11.7M | } |
1051 | | |
1052 | | SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, |
1053 | 59.0M | const SDLoc &DL, void *&InsertPos) { |
1054 | 59.0M | SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos); |
1055 | 59.0M | if (N) { |
1056 | 15.7M | switch (N->getOpcode()) { |
1057 | 15.7M | case ISD::Constant: |
1058 | 4.76M | case ISD::ConstantFP: |
1059 | 4.76M | // Erase debug location from the node if the node is used at several |
1060 | 4.76M | // different places. Do not propagate one location to all uses as it |
1061 | 4.76M | // will cause a worse single stepping debugging experience. |
1062 | 4.76M | if (N->getDebugLoc() != DL.getDebugLoc()) |
1063 | 768k | N->setDebugLoc(DebugLoc()); |
1064 | 4.76M | break; |
1065 | 11.0M | default: |
1066 | 11.0M | // When the node's point of use is located earlier in the instruction |
1067 | 11.0M | // sequence than its prior point of use, update its debug info to the |
1068 | 11.0M | // earlier location. |
1069 | 11.0M | if (DL.getIROrder() && DL.getIROrder() < N->getIROrder()6.53M ) |
1070 | 136k | N->setDebugLoc(DL.getDebugLoc()); |
1071 | 11.0M | break; |
1072 | 59.0M | } |
1073 | 59.0M | } |
1074 | 59.0M | return N; |
1075 | 59.0M | } |
1076 | | |
1077 | 1.24M | void SelectionDAG::clear() { |
1078 | 1.24M | allnodes_clear(); |
1079 | 1.24M | OperandRecycler.clear(OperandAllocator); |
1080 | 1.24M | OperandAllocator.Reset(); |
1081 | 1.24M | CSEMap.clear(); |
1082 | 1.24M | |
1083 | 1.24M | ExtendedValueTypeNodes.clear(); |
1084 | 1.24M | ExternalSymbols.clear(); |
1085 | 1.24M | TargetExternalSymbols.clear(); |
1086 | 1.24M | MCSymbols.clear(); |
1087 | 1.24M | std::fill(CondCodeNodes.begin(), CondCodeNodes.end(), |
1088 | 1.24M | static_cast<CondCodeSDNode*>(nullptr)); |
1089 | 1.24M | std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(), |
1090 | 1.24M | static_cast<SDNode*>(nullptr)); |
1091 | 1.24M | |
1092 | 1.24M | EntryNode.UseList = nullptr; |
1093 | 1.24M | InsertNode(&EntryNode); |
1094 | 1.24M | Root = getEntryNode(); |
1095 | 1.24M | DbgInfo->clear(); |
1096 | 1.24M | } |
1097 | | |
1098 | 91 | SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) { |
1099 | 91 | return VT.bitsGT(Op.getValueType()) |
1100 | 91 | ? getNode(ISD::FP_EXTEND, DL, VT, Op)11 |
1101 | 91 | : getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL))80 ; |
1102 | 91 | } |
1103 | | |
1104 | 139k | SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { |
1105 | 139k | return VT.bitsGT(Op.getValueType()) ? |
1106 | 75.2k | getNode(ISD::ANY_EXTEND, DL, VT, Op) : |
1107 | 139k | getNode(ISD::TRUNCATE, DL, VT, Op)64.2k ; |
1108 | 139k | } |
1109 | | |
1110 | 1.17M | SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { |
1111 | 1.17M | return VT.bitsGT(Op.getValueType()) ? |
1112 | 56.3k | getNode(ISD::SIGN_EXTEND, DL, VT, Op) : |
1113 | 1.17M | getNode(ISD::TRUNCATE, DL, VT, Op)1.12M ; |
1114 | 1.17M | } |
1115 | | |
1116 | 362k | SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { |
1117 | 362k | return VT.bitsGT(Op.getValueType()) ? |
1118 | 22.8k | getNode(ISD::ZERO_EXTEND, DL, VT, Op) : |
1119 | 362k | getNode(ISD::TRUNCATE, DL, VT, Op)339k ; |
1120 | 362k | } |
1121 | | |
1122 | | SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, |
1123 | 8.09k | EVT OpVT) { |
1124 | 8.09k | if (VT.bitsLE(Op.getValueType())) |
1125 | 7.58k | return getNode(ISD::TRUNCATE, SL, VT, Op); |
1126 | 517 | |
1127 | 517 | TargetLowering::BooleanContent BType = TLI->getBooleanContents(OpVT); |
1128 | 517 | return getNode(TLI->getExtendForContent(BType), SL, VT, Op); |
1129 | 517 | } |
1130 | | |
1131 | 496k | SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { |
1132 | 496k | assert(!VT.isVector() && |
1133 | 496k | "getZeroExtendInReg should use the vector element type instead of " |
1134 | 496k | "the vector type!"); |
1135 | 496k | if (Op.getValueType().getScalarType() == VT) return Op1.00k ; |
1136 | 495k | unsigned BitWidth = Op.getScalarValueSizeInBits(); |
1137 | 495k | APInt Imm = APInt::getLowBitsSet(BitWidth, |
1138 | 495k | VT.getSizeInBits()); |
1139 | 495k | return getNode(ISD::AND, DL, Op.getValueType(), Op, |
1140 | 495k | getConstant(Imm, DL, Op.getValueType())); |
1141 | 495k | } |
1142 | | |
1143 | 104k | SDValue SelectionDAG::getPtrExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { |
1144 | 104k | // Only unsigned pointer semantics are supported right now. In the future this |
1145 | 104k | // might delegate to TLI to check pointer signedness. |
1146 | 104k | return getZExtOrTrunc(Op, DL, VT); |
1147 | 104k | } |
1148 | | |
1149 | 0 | SDValue SelectionDAG::getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { |
1150 | 0 | // Only unsigned pointer semantics are supported right now. In the future this |
1151 | 0 | // might delegate to TLI to check pointer signedness. |
1152 | 0 | return getZeroExtendInReg(Op, DL, VT); |
1153 | 0 | } |
1154 | | |
1155 | | /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). |
1156 | 8.22k | SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) { |
1157 | 8.22k | EVT EltVT = VT.getScalarType(); |
1158 | 8.22k | SDValue NegOne = |
1159 | 8.22k | getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, VT); |
1160 | 8.22k | return getNode(ISD::XOR, DL, VT, Val, NegOne); |
1161 | 8.22k | } |
1162 | | |
1163 | 226 | SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) { |
1164 | 226 | SDValue TrueValue = getBoolConstant(true, DL, VT, VT); |
1165 | 226 | return getNode(ISD::XOR, DL, VT, Val, TrueValue); |
1166 | 226 | } |
1167 | | |
1168 | | SDValue SelectionDAG::getBoolConstant(bool V, const SDLoc &DL, EVT VT, |
1169 | 20.4k | EVT OpVT) { |
1170 | 20.4k | if (!V) |
1171 | 17.8k | return getConstant(0, DL, VT); |
1172 | 2.56k | |
1173 | 2.56k | switch (TLI->getBooleanContents(OpVT)) { |
1174 | 2.56k | case TargetLowering::ZeroOrOneBooleanContent: |
1175 | 2.26k | case TargetLowering::UndefinedBooleanContent: |
1176 | 2.26k | return getConstant(1, DL, VT); |
1177 | 2.26k | case TargetLowering::ZeroOrNegativeOneBooleanContent: |
1178 | 298 | return getAllOnesConstant(DL, VT); |
1179 | 0 | } |
1180 | 0 | llvm_unreachable("Unexpected boolean content enum!"); |
1181 | 0 | } |
1182 | | |
1183 | | SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT, |
1184 | 13.8M | bool isT, bool isO) { |
1185 | 13.8M | EVT EltVT = VT.getScalarType(); |
1186 | 13.8M | assert((EltVT.getSizeInBits() >= 64 || |
1187 | 13.8M | (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) && |
1188 | 13.8M | "getConstant with a uint64_t value that doesn't fit in the type!"); |
1189 | 13.8M | return getConstant(APInt(EltVT.getSizeInBits(), Val), DL, VT, isT, isO); |
1190 | 13.8M | } |
1191 | | |
1192 | | SDValue SelectionDAG::getConstant(const APInt &Val, const SDLoc &DL, EVT VT, |
1193 | 17.4M | bool isT, bool isO) { |
1194 | 17.4M | return getConstant(*ConstantInt::get(*Context, Val), DL, VT, isT, isO); |
1195 | 17.4M | } |
1196 | | |
1197 | | SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, |
1198 | 20.0M | EVT VT, bool isT, bool isO) { |
1199 | 20.0M | assert(VT.isInteger() && "Cannot create FP integer constant!"); |
1200 | 20.0M | |
1201 | 20.0M | EVT EltVT = VT.getScalarType(); |
1202 | 20.0M | const ConstantInt *Elt = &Val; |
1203 | 20.0M | |
1204 | 20.0M | // In some cases the vector type is legal but the element type is illegal and |
1205 | 20.0M | // needs to be promoted, for example v8i8 on ARM. In this case, promote the |
1206 | 20.0M | // inserted value (the type does not need to match the vector element type). |
1207 | 20.0M | // Any extra bits introduced will be truncated away. |
1208 | 20.0M | if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) == |
1209 | 113k | TargetLowering::TypePromoteInteger) { |
1210 | 9.75k | EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); |
1211 | 9.75k | APInt NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits()); |
1212 | 9.75k | Elt = ConstantInt::get(*getContext(), NewVal); |
1213 | 9.75k | } |
1214 | 20.0M | // In other cases the element type is illegal and needs to be expanded, for |
1215 | 20.0M | // example v2i64 on MIPS32. In this case, find the nearest legal type, split |
1216 | 20.0M | // the value into n parts and use a vector type with n-times the elements. |
1217 | 20.0M | // Then bitcast to the type requested. |
1218 | 20.0M | // Legalizing constants too early makes the DAGCombiner's job harder so we |
1219 | 20.0M | // only legalize if the DAG tells us we must produce legal types. |
1220 | 20.0M | else if (NewNodesMustHaveLegalTypes && VT.isVector()10.7M && |
1221 | 20.0M | TLI->getTypeAction(*getContext(), EltVT) == |
1222 | 75.7k | TargetLowering::TypeExpandInteger) { |
1223 | 579 | const APInt &NewVal = Elt->getValue(); |
1224 | 579 | EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); |
1225 | 579 | unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits(); |
1226 | 579 | unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits; |
1227 | 579 | EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts); |
1228 | 579 | |
1229 | 579 | // Check the temporary vector is the correct size. If this fails then |
1230 | 579 | // getTypeToTransformTo() probably returned a type whose size (in bits) |
1231 | 579 | // isn't a power-of-2 factor of the requested type size. |
1232 | 579 | assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits()); |
1233 | 579 | |
1234 | 579 | SmallVector<SDValue, 2> EltParts; |
1235 | 1.73k | for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i1.15k ) { |
1236 | 1.15k | EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits) |
1237 | 1.15k | .zextOrTrunc(ViaEltSizeInBits), DL, |
1238 | 1.15k | ViaEltVT, isT, isO)); |
1239 | 1.15k | } |
1240 | 579 | |
1241 | 579 | // EltParts is currently in little endian order. If we actually want |
1242 | 579 | // big-endian order then reverse it now. |
1243 | 579 | if (getDataLayout().isBigEndian()) |
1244 | 57 | std::reverse(EltParts.begin(), EltParts.end()); |
1245 | 579 | |
1246 | 579 | // The elements must be reversed when the element order is different |
1247 | 579 | // to the endianness of the elements (because the BITCAST is itself a |
1248 | 579 | // vector shuffle in this situation). However, we do not need any code to |
1249 | 579 | // perform this reversal because getConstant() is producing a vector |
1250 | 579 | // splat. |
1251 | 579 | // This situation occurs in MIPS MSA. |
1252 | 579 | |
1253 | 579 | SmallVector<SDValue, 8> Ops; |
1254 | 1.83k | for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i1.25k ) |
1255 | 1.25k | Ops.insert(Ops.end(), EltParts.begin(), EltParts.end()); |
1256 | 579 | |
1257 | 579 | SDValue V = getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops)); |
1258 | 579 | return V; |
1259 | 579 | } |
1260 | 20.0M | |
1261 | 20.0M | assert(Elt->getBitWidth() == EltVT.getSizeInBits() && |
1262 | 20.0M | "APInt size does not match type size!"); |
1263 | 20.0M | unsigned Opc = isT ? ISD::TargetConstant9.95M : ISD::Constant10.1M ; |
1264 | 20.0M | FoldingSetNodeID ID; |
1265 | 20.0M | AddNodeIDNode(ID, Opc, getVTList(EltVT), None); |
1266 | 20.0M | ID.AddPointer(Elt); |
1267 | 20.0M | ID.AddBoolean(isO); |
1268 | 20.0M | void *IP = nullptr; |
1269 | 20.0M | SDNode *N = nullptr; |
1270 | 20.0M | if ((N = FindNodeOrInsertPos(ID, DL, IP))) |
1271 | 10.3M | if (!VT.isVector()) |
1272 | 10.2M | return SDValue(N, 0); |
1273 | 9.78M | |
1274 | 9.78M | if (!N) { |
1275 | 9.72M | N = newSDNode<ConstantSDNode>(isT, isO, Elt, EltVT); |
1276 | 9.72M | CSEMap.InsertNode(N, IP); |
1277 | 9.72M | InsertNode(N); |
1278 | 9.72M | NewSDValueDbgMsg(SDValue(N, 0), "Creating constant: ", this); |
1279 | 9.72M | } |
1280 | 9.78M | |
1281 | 9.78M | SDValue Result(N, 0); |
1282 | 9.78M | if (VT.isVector()) |
1283 | 112k | Result = getSplatBuildVector(VT, DL, Result); |
1284 | 9.78M | |
1285 | 9.78M | return Result; |
1286 | 9.78M | } |
1287 | | |
1288 | | SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL, |
1289 | 1.93M | bool isTarget) { |
1290 | 1.93M | return getConstant(Val, DL, TLI->getPointerTy(getDataLayout()), isTarget); |
1291 | 1.93M | } |
1292 | | |
1293 | | SDValue SelectionDAG::getShiftAmountConstant(uint64_t Val, EVT VT, |
1294 | 210 | const SDLoc &DL, bool LegalTypes) { |
1295 | 210 | EVT ShiftVT = TLI->getShiftAmountTy(VT, getDataLayout(), LegalTypes); |
1296 | 210 | return getConstant(Val, DL, ShiftVT); |
1297 | 210 | } |
1298 | | |
1299 | | SDValue SelectionDAG::getConstantFP(const APFloat &V, const SDLoc &DL, EVT VT, |
1300 | 19.6k | bool isTarget) { |
1301 | 19.6k | return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget); |
1302 | 19.6k | } |
1303 | | |
1304 | | SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL, |
1305 | 78.8k | EVT VT, bool isTarget) { |
1306 | 78.8k | assert(VT.isFloatingPoint() && "Cannot create integer FP constant!"); |
1307 | 78.8k | |
1308 | 78.8k | EVT EltVT = VT.getScalarType(); |
1309 | 78.8k | |
1310 | 78.8k | // Do the map lookup using the actual bit pattern for the floating point |
1311 | 78.8k | // value, so that we don't have problems with 0.0 comparing equal to -0.0, and |
1312 | 78.8k | // we don't have issues with SNANs. |
1313 | 78.8k | unsigned Opc = isTarget ? ISD::TargetConstantFP3.41k : ISD::ConstantFP75.4k ; |
1314 | 78.8k | FoldingSetNodeID ID; |
1315 | 78.8k | AddNodeIDNode(ID, Opc, getVTList(EltVT), None); |
1316 | 78.8k | ID.AddPointer(&V); |
1317 | 78.8k | void *IP = nullptr; |
1318 | 78.8k | SDNode *N = nullptr; |
1319 | 78.8k | if ((N = FindNodeOrInsertPos(ID, DL, IP))) |
1320 | 8.12k | if (!VT.isVector()) |
1321 | 6.53k | return SDValue(N, 0); |
1322 | 72.3k | |
1323 | 72.3k | if (!N) { |
1324 | 70.7k | N = newSDNode<ConstantFPSDNode>(isTarget, &V, EltVT); |
1325 | 70.7k | CSEMap.InsertNode(N, IP); |
1326 | 70.7k | InsertNode(N); |
1327 | 70.7k | } |
1328 | 72.3k | |
1329 | 72.3k | SDValue Result(N, 0); |
1330 | 72.3k | if (VT.isVector()) |
1331 | 3.34k | Result = getSplatBuildVector(VT, DL, Result); |
1332 | 72.3k | NewSDValueDbgMsg(Result, "Creating fp constant: ", this); |
1333 | 72.3k | return Result; |
1334 | 72.3k | } |
1335 | | |
1336 | | SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT, |
1337 | 9.20k | bool isTarget) { |
1338 | 9.20k | EVT EltVT = VT.getScalarType(); |
1339 | 9.20k | if (EltVT == MVT::f32) |
1340 | 5.82k | return getConstantFP(APFloat((float)Val), DL, VT, isTarget); |
1341 | 3.38k | else if (EltVT == MVT::f64) |
1342 | 3.03k | return getConstantFP(APFloat(Val), DL, VT, isTarget); |
1343 | 349 | else if (EltVT == MVT::f80 || EltVT == MVT::f128201 || EltVT == MVT::ppcf128194 || |
1344 | 349 | EltVT == MVT::f16192 ) { |
1345 | 349 | bool Ignored; |
1346 | 349 | APFloat APF = APFloat(Val); |
1347 | 349 | APF.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, |
1348 | 349 | &Ignored); |
1349 | 349 | return getConstantFP(APF, DL, VT, isTarget); |
1350 | 349 | } else |
1351 | 349 | llvm_unreachable("Unsupported type in getConstantFP"); |
1352 | 9.20k | } |
1353 | | |
1354 | | SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, |
1355 | | EVT VT, int64_t Offset, bool isTargetGA, |
1356 | 1.84M | unsigned char TargetFlags) { |
1357 | 1.84M | assert((TargetFlags == 0 || isTargetGA) && |
1358 | 1.84M | "Cannot set target flags on target-independent globals"); |
1359 | 1.84M | |
1360 | 1.84M | // Truncate (with sign-extension) the offset value to the pointer size. |
1361 | 1.84M | unsigned BitWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); |
1362 | 1.84M | if (BitWidth < 64) |
1363 | 285k | Offset = SignExtend64(Offset, BitWidth); |
1364 | 1.84M | |
1365 | 1.84M | unsigned Opc; |
1366 | 1.84M | if (GV->isThreadLocal()) |
1367 | 3.38k | Opc = isTargetGA ? ISD::TargetGlobalTLSAddress1.94k : ISD::GlobalTLSAddress1.43k ; |
1368 | 1.83M | else |
1369 | 1.83M | Opc = isTargetGA ? ISD::TargetGlobalAddress1.06M : ISD::GlobalAddress772k ; |
1370 | 1.84M | |
1371 | 1.84M | FoldingSetNodeID ID; |
1372 | 1.84M | AddNodeIDNode(ID, Opc, getVTList(VT), None); |
1373 | 1.84M | ID.AddPointer(GV); |
1374 | 1.84M | ID.AddInteger(Offset); |
1375 | 1.84M | ID.AddInteger(TargetFlags); |
1376 | 1.84M | void *IP = nullptr; |
1377 | 1.84M | if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) |
1378 | 50.4k | return SDValue(E, 0); |
1379 | 1.79M | |
1380 | 1.79M | auto *N = newSDNode<GlobalAddressSDNode>( |
1381 | 1.79M | Opc, DL.getIROrder(), DL.getDebugLoc(), GV, VT, Offset, TargetFlags); |
1382 | 1.79M | CSEMap.InsertNode(N, IP); |
1383 | 1.79M | InsertNode(N); |
1384 | 1.79M | return SDValue(N, 0); |
1385 | 1.79M | } |
1386 | | |
1387 | 673k | SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { |
1388 | 673k | unsigned Opc = isTarget ? ISD::TargetFrameIndex422k : ISD::FrameIndex250k ; |
1389 | 673k | FoldingSetNodeID ID; |
1390 | 673k | AddNodeIDNode(ID, Opc, getVTList(VT), None); |
1391 | 673k | ID.AddInteger(FI); |
1392 | 673k | void *IP = nullptr; |
1393 | 673k | if (SDNode *E = FindNodeOrInsertPos(ID, IP)) |
1394 | 186k | return SDValue(E, 0); |
1395 | 487k | |
1396 | 487k | auto *N = newSDNode<FrameIndexSDNode>(FI, VT, isTarget); |
1397 | 487k | CSEMap.InsertNode(N, IP); |
1398 | 487k | InsertNode(N); |
1399 | 487k | return SDValue(N, 0); |
1400 | 487k | } |
1401 | | |
1402 | | SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, |
1403 | 8.21k | unsigned char TargetFlags) { |
1404 | 8.21k | assert((TargetFlags == 0 || isTarget) && |
1405 | 8.21k | "Cannot set target flags on target-independent jump tables"); |
1406 | 8.21k | unsigned Opc = isTarget ? ISD::TargetJumpTable5.94k : ISD::JumpTable2.26k ; |
1407 | 8.21k | FoldingSetNodeID ID; |
1408 | 8.21k | AddNodeIDNode(ID, Opc, getVTList(VT), None); |
1409 | 8.21k | ID.AddInteger(JTI); |
1410 | 8.21k | ID.AddInteger(TargetFlags); |
1411 | 8.21k | void *IP = nullptr; |
1412 | 8.21k | if (SDNode *E = FindNodeOrInsertPos(ID, IP)) |
1413 | 99 | return SDValue(E, 0); |
1414 | 8.11k | |
1415 | 8.11k | auto *N = newSDNode<JumpTableSDNode>(JTI, VT, isTarget, TargetFlags); |
1416 | 8.11k | CSEMap.InsertNode(N, IP); |
1417 | 8.11k | InsertNode(N); |
1418 | 8.11k | return SDValue(N, 0); |
1419 | 8.11k | } |
1420 | | |
1421 | | SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, |
1422 | | unsigned Alignment, int Offset, |
1423 | | bool isTarget, |
1424 | 207k | unsigned char TargetFlags) { |
1425 | 207k | assert((TargetFlags == 0 || isTarget) && |
1426 | 207k | "Cannot set target flags on target-independent globals"); |
1427 | 207k | if (Alignment == 0) |
1428 | 70.6k | Alignment = MF->getFunction().hasOptSize() |
1429 | 70.6k | ? getDataLayout().getABITypeAlignment(C->getType())1.98k |
1430 | 70.6k | : getDataLayout().getPrefTypeAlignment(C->getType())68.6k ; |
1431 | 207k | unsigned Opc = isTarget ? ISD::TargetConstantPool136k : ISD::ConstantPool71.1k ; |
1432 | 207k | FoldingSetNodeID ID; |
1433 | 207k | AddNodeIDNode(ID, Opc, getVTList(VT), None); |
1434 | 207k | ID.AddInteger(Alignment); |
1435 | 207k | ID.AddInteger(Offset); |
1436 | 207k | ID.AddPointer(C); |
1437 | 207k | ID.AddInteger(TargetFlags); |
1438 | 207k | void *IP = nullptr; |
1439 | 207k | if (SDNode *E = FindNodeOrInsertPos(ID, IP)) |
1440 | 4.88k | return SDValue(E, 0); |
1441 | 202k | |
1442 | 202k | auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment, |
1443 | 202k | TargetFlags); |
1444 | 202k | CSEMap.InsertNode(N, IP); |
1445 | 202k | InsertNode(N); |
1446 | 202k | return SDValue(N, 0); |
1447 | 202k | } |
1448 | | |
1449 | | SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, |
1450 | | unsigned Alignment, int Offset, |
1451 | | bool isTarget, |
1452 | 265 | unsigned char TargetFlags) { |
1453 | 265 | assert((TargetFlags == 0 || isTarget) && |
1454 | 265 | "Cannot set target flags on target-independent globals"); |
1455 | 265 | if (Alignment == 0) |
1456 | 0 | Alignment = getDataLayout().getPrefTypeAlignment(C->getType()); |
1457 | 265 | unsigned Opc = isTarget ? ISD::TargetConstantPool248 : ISD::ConstantPool17 ; |
1458 | 265 | FoldingSetNodeID ID; |
1459 | 265 | AddNodeIDNode(ID, Opc, getVTList(VT), None); |
1460 | 265 | ID.AddInteger(Alignment); |
1461 | 265 | ID.AddInteger(Offset); |
1462 | 265 | C->addSelectionDAGCSEId(ID); |
1463 | 265 | ID.AddInteger(TargetFlags); |
1464 | 265 | void *IP = nullptr; |
1465 | 265 | if (SDNode *E = FindNodeOrInsertPos(ID, IP)) |
1466 | 0 | return SDValue(E, 0); |
1467 | 265 | |
1468 | 265 | auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment, |
1469 | 265 | TargetFlags); |
1470 | 265 | CSEMap.InsertNode(N, IP); |
1471 | 265 | InsertNode(N); |
1472 | 265 | return SDValue(N, 0); |
1473 | 265 | } |
1474 | | |
1475 | | SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, |
1476 | 0 | unsigned char TargetFlags) { |
1477 | 0 | FoldingSetNodeID ID; |
1478 | 0 | AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), None); |
1479 | 0 | ID.AddInteger(Index); |
1480 | 0 | ID.AddInteger(Offset); |
1481 | 0 | ID.AddInteger(TargetFlags); |
1482 | 0 | void *IP = nullptr; |
1483 | 0 | if (SDNode *E = FindNodeOrInsertPos(ID, IP)) |
1484 | 0 | return SDValue(E, 0); |
1485 | 0 | |
1486 | 0 | auto *N = newSDNode<TargetIndexSDNode>(Index, VT, Offset, TargetFlags); |
1487 | 0 | CSEMap.InsertNode(N, IP); |
1488 | 0 | InsertNode(N); |
1489 | 0 | return SDValue(N, 0); |
1490 | 0 | } |
1491 | | |
1492 | 1.23M | SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { |
1493 | 1.23M | FoldingSetNodeID ID; |
1494 | 1.23M | AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), None); |
1495 | 1.23M | ID.AddPointer(MBB); |
1496 | 1.23M | void *IP = nullptr; |
1497 | 1.23M | if (SDNode *E = FindNodeOrInsertPos(ID, IP)) |
1498 | 56 | return SDValue(E, 0); |
1499 | 1.23M | |
1500 | 1.23M | auto *N = newSDNode<BasicBlockSDNode>(MBB); |
1501 | 1.23M | CSEMap.InsertNode(N, IP); |
1502 | 1.23M | InsertNode(N); |
1503 | 1.23M | return SDValue(N, 0); |
1504 | 1.23M | } |
1505 | | |
1506 | 313k | SDValue SelectionDAG::getValueType(EVT VT) { |
1507 | 313k | if (VT.isSimple() && (unsigned)VT.getSimpleVT().SimpleTy >= |
1508 | 229k | ValueTypeNodes.size()) |
1509 | 13.5k | ValueTypeNodes.resize(VT.getSimpleVT().SimpleTy+1); |
1510 | 313k | |
1511 | 313k | SDNode *&N = VT.isExtended() ? |
1512 | 229k | ExtendedValueTypeNodes[VT]83.3k : ValueTypeNodes[VT.getSimpleVT().SimpleTy]; |
1513 | 313k | |
1514 | 313k | if (N) return SDValue(N, 0)99.8k ; |
1515 | 213k | N = newSDNode<VTSDNode>(VT); |
1516 | 213k | InsertNode(N); |
1517 | 213k | return SDValue(N, 0); |
1518 | 213k | } |
1519 | | |
1520 | 18.9k | SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) { |
1521 | 18.9k | SDNode *&N = ExternalSymbols[Sym]; |
1522 | 18.9k | if (N) return SDValue(N, 0)4.11k ; |
1523 | 14.8k | N = newSDNode<ExternalSymbolSDNode>(false, Sym, 0, VT); |
1524 | 14.8k | InsertNode(N); |
1525 | 14.8k | return SDValue(N, 0); |
1526 | 14.8k | } |
1527 | | |
1528 | 110 | SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) { |
1529 | 110 | SDNode *&N = MCSymbols[Sym]; |
1530 | 110 | if (N) |
1531 | 12 | return SDValue(N, 0); |
1532 | 98 | N = newSDNode<MCSymbolSDNode>(Sym, VT); |
1533 | 98 | InsertNode(N); |
1534 | 98 | return SDValue(N, 0); |
1535 | 98 | } |
1536 | | |
1537 | | SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT, |
1538 | 42.2k | unsigned char TargetFlags) { |
1539 | 42.2k | SDNode *&N = |
1540 | 42.2k | TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym, |
1541 | 42.2k | TargetFlags)]; |
1542 | 42.2k | if (N) return SDValue(N, 0)14.3k ; |
1543 | 27.8k | N = newSDNode<ExternalSymbolSDNode>(true, Sym, TargetFlags, VT); |
1544 | 27.8k | InsertNode(N); |
1545 | 27.8k | return SDValue(N, 0); |
1546 | 27.8k | } |
1547 | | |
1548 | 1.12M | SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { |
1549 | 1.12M | if ((unsigned)Cond >= CondCodeNodes.size()) |
1550 | 26.8k | CondCodeNodes.resize(Cond+1); |
1551 | 1.12M | |
1552 | 1.12M | if (!CondCodeNodes[Cond]) { |
1553 | 952k | auto *N = newSDNode<CondCodeSDNode>(Cond); |
1554 | 952k | CondCodeNodes[Cond] = N; |
1555 | 952k | InsertNode(N); |
1556 | 952k | } |
1557 | 1.12M | |
1558 | 1.12M | return SDValue(CondCodeNodes[Cond], 0); |
1559 | 1.12M | } |
1560 | | |
1561 | | /// Swaps the values of N1 and N2. Swaps all indices in the shuffle mask M that |
1562 | | /// point at N1 to point at N2 and indices that point at N2 to point at N1. |
1563 | 2.16k | static void commuteShuffle(SDValue &N1, SDValue &N2, MutableArrayRef<int> M) { |
1564 | 2.16k | std::swap(N1, N2); |
1565 | 2.16k | ShuffleVectorSDNode::commuteMask(M); |
1566 | 2.16k | } |
1567 | | |
1568 | | SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, |
1569 | 195k | SDValue N2, ArrayRef<int> Mask) { |
1570 | 195k | assert(VT.getVectorNumElements() == Mask.size() && |
1571 | 195k | "Must have the same number of vector elements as mask elements!"); |
1572 | 195k | assert(VT == N1.getValueType() && VT == N2.getValueType() && |
1573 | 195k | "Invalid VECTOR_SHUFFLE"); |
1574 | 195k | |
1575 | 195k | // Canonicalize shuffle undef, undef -> undef |
1576 | 195k | if (N1.isUndef() && N2.isUndef()714 ) |
1577 | 164 | return getUNDEF(VT); |
1578 | 195k | |
1579 | 195k | // Validate that all indices in Mask are within the range of the elements |
1580 | 195k | // input to the shuffle. |
1581 | 195k | int NElts = Mask.size(); |
1582 | 195k | assert(llvm::all_of(Mask, |
1583 | 195k | [&](int M) { return M < (NElts * 2) && M >= -1; }) && |
1584 | 195k | "Index out of range"); |
1585 | 195k | |
1586 | 195k | // Copy the mask so we can do any needed cleanup. |
1587 | 195k | SmallVector<int, 8> MaskVec(Mask.begin(), Mask.end()); |
1588 | 195k | |
1589 | 195k | // Canonicalize shuffle v, v -> v, undef |
1590 | 195k | if (N1 == N2) { |
1591 | 14.2k | N2 = getUNDEF(VT); |
1592 | 120k | for (int i = 0; i != NElts; ++i106k ) |
1593 | 106k | if (MaskVec[i] >= NElts) MaskVec[i] -= NElts7.31k ; |
1594 | 14.2k | } |
1595 | 195k | |
1596 | 195k | // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. |
1597 | 195k | if (N1.isUndef()) |
1598 | 550 | commuteShuffle(N1, N2, MaskVec); |
1599 | 195k | |
1600 | 195k | if (TLI->hasVectorBlend()) { |
1601 | 158k | // If shuffling a splat, try to blend the splat instead. We do this here so |
1602 | 158k | // that even when this arises during lowering we don't have to re-handle it. |
1603 | 158k | auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) { |
1604 | 12.9k | BitVector UndefElements; |
1605 | 12.9k | SDValue Splat = BV->getSplatValue(&UndefElements); |
1606 | 12.9k | if (!Splat) |
1607 | 1.84k | return; |
1608 | 11.1k | |
1609 | 133k | for (int i = 0; 11.1k i < NElts; ++i122k ) { |
1610 | 122k | if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts)78.4k ) |
1611 | 51.3k | continue; |
1612 | 70.6k | |
1613 | 70.6k | // If this input comes from undef, mark it as such. |
1614 | 70.6k | if (UndefElements[MaskVec[i] - Offset]) { |
1615 | 39 | MaskVec[i] = -1; |
1616 | 39 | continue; |
1617 | 39 | } |
1618 | 70.5k | |
1619 | 70.5k | // If we can blend a non-undef lane, use that instead. |
1620 | 70.5k | if (!UndefElements[i]) |
1621 | 67.8k | MaskVec[i] = i + Offset; |
1622 | 70.5k | } |
1623 | 11.1k | }; |
1624 | 158k | if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1)) |
1625 | 4.37k | BlendSplat(N1BV, 0); |
1626 | 158k | if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2)) |
1627 | 8.62k | BlendSplat(N2BV, NElts); |
1628 | 158k | } |
1629 | 195k | |
1630 | 195k | // Canonicalize all index into lhs, -> shuffle lhs, undef |
1631 | 195k | // Canonicalize all index into rhs, -> shuffle rhs, undef |
1632 | 195k | bool AllLHS = true, AllRHS = true; |
1633 | 195k | bool N2Undef = N2.isUndef(); |
1634 | 2.23M | for (int i = 0; i != NElts; ++i2.04M ) { |
1635 | 2.04M | if (MaskVec[i] >= NElts) { |
1636 | 253k | if (N2Undef) |
1637 | 37.8k | MaskVec[i] = -1; |
1638 | 215k | else |
1639 | 215k | AllLHS = false; |
1640 | 1.78M | } else if (MaskVec[i] >= 0) { |
1641 | 1.00M | AllRHS = false; |
1642 | 1.00M | } |
1643 | 2.04M | } |
1644 | 195k | if (AllLHS && AllRHS140k ) |
1645 | 216 | return getUNDEF(VT); |
1646 | 195k | if (AllLHS && !N2Undef140k ) |
1647 | 4.18k | N2 = getUNDEF(VT); |
1648 | 195k | if (AllRHS) { |
1649 | 1.61k | N1 = getUNDEF(VT); |
1650 | 1.61k | commuteShuffle(N1, N2, MaskVec); |
1651 | 1.61k | } |
1652 | 195k | // Reset our undef status after accounting for the mask. |
1653 | 195k | N2Undef = N2.isUndef(); |
1654 | 195k | // Re-check whether both sides ended up undef. |
1655 | 195k | if (N1.isUndef() && N2Undef0 ) |
1656 | 0 | return getUNDEF(VT); |
1657 | 195k | |
1658 | 195k | // If Identity shuffle return that node. |
1659 | 195k | bool Identity = true, AllSame = true; |
1660 | 2.23M | for (int i = 0; i != NElts; ++i2.04M ) { |
1661 | 2.04M | if (MaskVec[i] >= 0 && MaskVec[i] != i1.21M ) Identity = false876k ; |
1662 | 2.04M | if (MaskVec[i] != MaskVec[0]) AllSame = false1.66M ; |
1663 | 2.04M | } |
1664 | 195k | if (Identity && NElts33.0k ) |
1665 | 33.0k | return N1; |
1666 | 162k | |
1667 | 162k | // Shuffling a constant splat doesn't change the result. |
1668 | 162k | if (N2Undef) { |
1669 | 109k | SDValue V = N1; |
1670 | 109k | |
1671 | 109k | // Look through any bitcasts. We check that these don't change the number |
1672 | 109k | // (and size) of elements and just changes their types. |
1673 | 137k | while (V.getOpcode() == ISD::BITCAST) |
1674 | 27.8k | V = V->getOperand(0); |
1675 | 109k | |
1676 | 109k | // A splat should always show up as a build vector node. |
1677 | 109k | if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) { |
1678 | 2.70k | BitVector UndefElements; |
1679 | 2.70k | SDValue Splat = BV->getSplatValue(&UndefElements); |
1680 | 2.70k | // If this is a splat of an undef, shuffling it is also undef. |
1681 | 2.70k | if (Splat && Splat.isUndef()363 ) |
1682 | 0 | return getUNDEF(VT); |
1683 | 2.70k | |
1684 | 2.70k | bool SameNumElts = |
1685 | 2.70k | V.getValueType().getVectorNumElements() == VT.getVectorNumElements(); |
1686 | 2.70k | |
1687 | 2.70k | // We only have a splat which can skip shuffles if there is a splatted |
1688 | 2.70k | // value and no undef lanes rearranged by the shuffle. |
1689 | 2.70k | if (Splat && UndefElements.none()363 ) { |
1690 | 263 | // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the |
1691 | 263 | // number of elements match or the value splatted is a zero constant. |
1692 | 263 | if (SameNumElts) |
1693 | 240 | return N1; |
1694 | 23 | if (auto *C = dyn_cast<ConstantSDNode>(Splat)) |
1695 | 21 | if (C->isNullValue()) |
1696 | 14 | return N1; |
1697 | 2.44k | } |
1698 | 2.44k | |
1699 | 2.44k | // If the shuffle itself creates a splat, build the vector directly. |
1700 | 2.44k | if (AllSame && SameNumElts1.13k ) { |
1701 | 1.04k | EVT BuildVT = BV->getValueType(0); |
1702 | 1.04k | const SDValue &Splatted = BV->getOperand(MaskVec[0]); |
1703 | 1.04k | SDValue NewBV = getSplatBuildVector(BuildVT, dl, Splatted); |
1704 | 1.04k | |
1705 | 1.04k | // We may have jumped through bitcasts, so the type of the |
1706 | 1.04k | // BUILD_VECTOR may not match the type of the shuffle. |
1707 | 1.04k | if (BuildVT != VT) |
1708 | 0 | NewBV = getNode(ISD::BITCAST, dl, VT, NewBV); |
1709 | 1.04k | return NewBV; |
1710 | 1.04k | } |
1711 | 160k | } |
1712 | 109k | } |
1713 | 160k | |
1714 | 160k | FoldingSetNodeID ID; |
1715 | 160k | SDValue Ops[2] = { N1, N2 }; |
1716 | 160k | AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops); |
1717 | 2.01M | for (int i = 0; i != NElts; ++i1.85M ) |
1718 | 1.85M | ID.AddInteger(MaskVec[i]); |
1719 | 160k | |
1720 | 160k | void* IP = nullptr; |
1721 | 160k | if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) |
1722 | 5.23k | return SDValue(E, 0); |
1723 | 155k | |
1724 | 155k | // Allocate the mask array for the node out of the BumpPtrAllocator, since |
1725 | 155k | // SDNode doesn't have access to it. This memory will be "leaked" when |
1726 | 155k | // the node is deallocated, but recovered when the NodeAllocator is released. |
1727 | 155k | int *MaskAlloc = OperandAllocator.Allocate<int>(NElts); |
1728 | 155k | llvm::copy(MaskVec, MaskAlloc); |
1729 | 155k | |
1730 | 155k | auto *N = newSDNode<ShuffleVectorSDNode>(VT, dl.getIROrder(), |
1731 | 155k | dl.getDebugLoc(), MaskAlloc); |
1732 | 155k | createOperands(N, Ops); |
1733 | 155k | |
1734 | 155k | CSEMap.InsertNode(N, IP); |
1735 | 155k | InsertNode(N); |
1736 | 155k | SDValue V = SDValue(N, 0); |
1737 | 155k | NewSDValueDbgMsg(V, "Creating new node: ", this); |
1738 | 155k | return V; |
1739 | 155k | } |
1740 | | |
1741 | 923 | SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { |
1742 | 923 | EVT VT = SV.getValueType(0); |
1743 | 923 | SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end()); |
1744 | 923 | ShuffleVectorSDNode::commuteMask(MaskVec); |
1745 | 923 | |
1746 | 923 | SDValue Op0 = SV.getOperand(0); |
1747 | 923 | SDValue Op1 = SV.getOperand(1); |
1748 | 923 | return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, MaskVec); |
1749 | 923 | } |
1750 | | |
1751 | 9.14M | SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { |
1752 | 9.14M | FoldingSetNodeID ID; |
1753 | 9.14M | AddNodeIDNode(ID, ISD::Register, getVTList(VT), None); |
1754 | 9.14M | ID.AddInteger(RegNo); |
1755 | 9.14M | void *IP = nullptr; |
1756 | 9.14M | if (SDNode *E = FindNodeOrInsertPos(ID, IP)) |
1757 | 3.68M | return SDValue(E, 0); |
1758 | 5.45M | |
1759 | 5.45M | auto *N = newSDNode<RegisterSDNode>(RegNo, VT); |
1760 | 5.45M | N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA); |
1761 | 5.45M | CSEMap.InsertNode(N, IP); |
1762 | 5.45M | InsertNode(N); |
1763 | 5.45M | return SDValue(N, 0); |
1764 | 5.45M | } |
1765 | | |
1766 | 455k | SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { |
1767 | 455k | FoldingSetNodeID ID; |
1768 | 455k | AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), None); |
1769 | 455k | ID.AddPointer(RegMask); |
1770 | 455k | void *IP = nullptr; |
1771 | 455k | if (SDNode *E = FindNodeOrInsertPos(ID, IP)) |
1772 | 127k | return SDValue(E, 0); |
1773 | 327k | |
1774 | 327k | auto *N = newSDNode<RegisterMaskSDNode>(RegMask); |
1775 | 327k | CSEMap.InsertNode(N, IP); |
1776 | 327k | InsertNode(N); |
1777 | 327k | return SDValue(N, 0); |
1778 | 327k | } |
1779 | | |
1780 | | SDValue SelectionDAG::getEHLabel(const SDLoc &dl, SDValue Root, |
1781 | 12.7k | MCSymbol *Label) { |
1782 | 12.7k | return getLabelNode(ISD::EH_LABEL, dl, Root, Label); |
1783 | 12.7k | } |
1784 | | |
1785 | | SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl, |
1786 | 12.7k | SDValue Root, MCSymbol *Label) { |
1787 | 12.7k | FoldingSetNodeID ID; |
1788 | 12.7k | SDValue Ops[] = { Root }; |
1789 | 12.7k | AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), Ops); |
1790 | 12.7k | ID.AddPointer(Label); |
1791 | 12.7k | void *IP = nullptr; |
1792 | 12.7k | if (SDNode *E = FindNodeOrInsertPos(ID, IP)) |
1793 | 0 | return SDValue(E, 0); |
1794 | 12.7k | |
1795 | 12.7k | auto *N = |
1796 | 12.7k | newSDNode<LabelSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), Label); |
1797 | 12.7k | createOperands(N, Ops); |
1798 | 12.7k | |
1799 | 12.7k | CSEMap.InsertNode(N, IP); |
1800 | 12.7k | InsertNode(N); |
1801 | 12.7k | return SDValue(N, 0); |
1802 | 12.7k | } |
1803 | | |
1804 | | SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, |
1805 | | int64_t Offset, |
1806 | | bool isTarget, |
1807 | 305 | unsigned char TargetFlags) { |
1808 | 305 | unsigned Opc = isTarget ? ISD::TargetBlockAddress170 : ISD::BlockAddress135 ; |
1809 | 305 | |
1810 | 305 | FoldingSetNodeID ID; |
1811 | 305 | AddNodeIDNode(ID, Opc, getVTList(VT), None); |
1812 | 305 | ID.AddPointer(BA); |
1813 | 305 | ID.AddInteger(Offset); |
1814 | 305 | ID.AddInteger(TargetFlags); |
1815 | 305 | void *IP = nullptr; |
1816 | 305 | if (SDNode *E = FindNodeOrInsertPos(ID, IP)) |
1817 | 2 | return SDValue(E, 0); |
1818 | 303 | |
1819 | 303 | auto *N = newSDNode<BlockAddressSDNode>(Opc, VT, BA, Offset, TargetFlags); |
1820 | 303 | CSEMap.InsertNode(N, IP); |
1821 | 303 | InsertNode(N); |
1822 | 303 | return SDValue(N, 0); |
1823 | 303 | } |
1824 | | |
1825 | 2.10k | SDValue SelectionDAG::getSrcValue(const Value *V) { |
1826 | 2.10k | assert((!V || V->getType()->isPointerTy()) && |
1827 | 2.10k | "SrcValue is not a pointer?"); |
1828 | 2.10k | |
1829 | 2.10k | FoldingSetNodeID ID; |
1830 | 2.10k | AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None); |
1831 | 2.10k | ID.AddPointer(V); |
1832 | 2.10k | |
1833 | 2.10k | void *IP = nullptr; |
1834 | 2.10k | if (SDNode *E = FindNodeOrInsertPos(ID, IP)) |
1835 | 455 | return SDValue(E, 0); |
1836 | 1.64k | |
1837 | 1.64k | auto *N = newSDNode<SrcValueSDNode>(V); |
1838 | 1.64k | CSEMap.InsertNode(N, IP); |
1839 | 1.64k | InsertNode(N); |
1840 | 1.64k | return SDValue(N, 0); |
1841 | 1.64k | } |
1842 | | |
1843 | 20.9k | SDValue SelectionDAG::getMDNode(const MDNode *MD) { |
1844 | 20.9k | FoldingSetNodeID ID; |
1845 | 20.9k | AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), None); |
1846 | 20.9k | ID.AddPointer(MD); |
1847 | 20.9k | |
1848 | 20.9k | void *IP = nullptr; |
1849 | 20.9k | if (SDNode *E = FindNodeOrInsertPos(ID, IP)) |
1850 | 0 | return SDValue(E, 0); |
1851 | 20.9k | |
1852 | 20.9k | auto *N = newSDNode<MDNodeSDNode>(MD); |
1853 | 20.9k | CSEMap.InsertNode(N, IP); |
1854 | 20.9k | InsertNode(N); |
1855 | 20.9k | return SDValue(N, 0); |
1856 | 20.9k | } |
1857 | | |
1858 | 482k | SDValue SelectionDAG::getBitcast(EVT VT, SDValue V) { |
1859 | 482k | if (VT == V.getValueType()) |
1860 | 189k | return V; |
1861 | 293k | |
1862 | 293k | return getNode(ISD::BITCAST, SDLoc(V), VT, V); |
1863 | 293k | } |
1864 | | |
1865 | | SDValue SelectionDAG::getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, |
1866 | 228 | unsigned SrcAS, unsigned DestAS) { |
1867 | 228 | SDValue Ops[] = {Ptr}; |
1868 | 228 | FoldingSetNodeID ID; |
1869 | 228 | AddNodeIDNode(ID, ISD::ADDRSPACECAST, getVTList(VT), Ops); |
1870 | 228 | ID.AddInteger(SrcAS); |
1871 | 228 | ID.AddInteger(DestAS); |
1872 | 228 | |
1873 | 228 | void *IP = nullptr; |
1874 | 228 | if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) |
1875 | 0 | return SDValue(E, 0); |
1876 | 228 | |
1877 | 228 | auto *N = newSDNode<AddrSpaceCastSDNode>(dl.getIROrder(), dl.getDebugLoc(), |
1878 | 228 | VT, SrcAS, DestAS); |
1879 | 228 | createOperands(N, Ops); |
1880 | 228 | |
1881 | 228 | CSEMap.InsertNode(N, IP); |
1882 | 228 | InsertNode(N); |
1883 | 228 | return SDValue(N, 0); |
1884 | 228 | } |
1885 | | |
1886 | | /// getShiftAmountOperand - Return the specified value casted to |
1887 | | /// the target's desired shift amount type. |
1888 | 535k | SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { |
1889 | 535k | EVT OpTy = Op.getValueType(); |
1890 | 535k | EVT ShTy = TLI->getShiftAmountTy(LHSTy, getDataLayout()); |
1891 | 535k | if (OpTy == ShTy || OpTy.isVector()28.6k ) return Op506k ; |
1892 | 28.6k | |
1893 | 28.6k | return getZExtOrTrunc(Op, SDLoc(Op), ShTy); |
1894 | 28.6k | } |
1895 | | |
1896 | 121 | SDValue SelectionDAG::expandVAArg(SDNode *Node) { |
1897 | 121 | SDLoc dl(Node); |
1898 | 121 | const TargetLowering &TLI = getTargetLoweringInfo(); |
1899 | 121 | const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); |
1900 | 121 | EVT VT = Node->getValueType(0); |
1901 | 121 | SDValue Tmp1 = Node->getOperand(0); |
1902 | 121 | SDValue Tmp2 = Node->getOperand(1); |
1903 | 121 | unsigned Align = Node->getConstantOperandVal(3); |
1904 | 121 | |
1905 | 121 | SDValue VAListLoad = getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1, |
1906 | 121 | Tmp2, MachinePointerInfo(V)); |
1907 | 121 | SDValue VAList = VAListLoad; |
1908 | 121 | |
1909 | 121 | if (Align > TLI.getMinStackArgumentAlignment()) { |
1910 | 98 | assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); |
1911 | 98 | |
1912 | 98 | VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList, |
1913 | 98 | getConstant(Align - 1, dl, VAList.getValueType())); |
1914 | 98 | |
1915 | 98 | VAList = getNode(ISD::AND, dl, VAList.getValueType(), VAList, |
1916 | 98 | getConstant(-(int64_t)Align, dl, VAList.getValueType())); |
1917 | 98 | } |
1918 | 121 | |
1919 | 121 | // Increment the pointer, VAList, to the next vaarg |
1920 | 121 | Tmp1 = getNode(ISD::ADD, dl, VAList.getValueType(), VAList, |
1921 | 121 | getConstant(getDataLayout().getTypeAllocSize( |
1922 | 121 | VT.getTypeForEVT(*getContext())), |
1923 | 121 | dl, VAList.getValueType())); |
1924 | 121 | // Store the incremented VAList to the legalized pointer |
1925 | 121 | Tmp1 = |
1926 | 121 | getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2, MachinePointerInfo(V)); |
1927 | 121 | // Load the actual argument out of the pointer VAList |
1928 | 121 | return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo()); |
1929 | 121 | } |
1930 | | |
1931 | 104 | SDValue SelectionDAG::expandVACopy(SDNode *Node) { |
1932 | 104 | SDLoc dl(Node); |
1933 | 104 | const TargetLowering &TLI = getTargetLoweringInfo(); |
1934 | 104 | // This defaults to loading a pointer from the input and storing it to the |
1935 | 104 | // output, returning the chain. |
1936 | 104 | const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue(); |
1937 | 104 | const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue(); |
1938 | 104 | SDValue Tmp1 = |
1939 | 104 | getLoad(TLI.getPointerTy(getDataLayout()), dl, Node->getOperand(0), |
1940 | 104 | Node->getOperand(2), MachinePointerInfo(VS)); |
1941 | 104 | return getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), |
1942 | 104 | MachinePointerInfo(VD)); |
1943 | 104 | } |
1944 | | |
1945 | 3.90k | SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { |
1946 | 3.90k | MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); |
1947 | 3.90k | unsigned ByteSize = VT.getStoreSize(); |
1948 | 3.90k | Type *Ty = VT.getTypeForEVT(*getContext()); |
1949 | 3.90k | unsigned StackAlign = |
1950 | 3.90k | std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign); |
1951 | 3.90k | |
1952 | 3.90k | int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false); |
1953 | 3.90k | return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); |
1954 | 3.90k | } |
1955 | | |
1956 | 3.39k | SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { |
1957 | 3.39k | unsigned Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize()); |
1958 | 3.39k | Type *Ty1 = VT1.getTypeForEVT(*getContext()); |
1959 | 3.39k | Type *Ty2 = VT2.getTypeForEVT(*getContext()); |
1960 | 3.39k | const DataLayout &DL = getDataLayout(); |
1961 | 3.39k | unsigned Align = |
1962 | 3.39k | std::max(DL.getPrefTypeAlignment(Ty1), DL.getPrefTypeAlignment(Ty2)); |
1963 | 3.39k | |
1964 | 3.39k | MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); |
1965 | 3.39k | int FrameIdx = MFI.CreateStackObject(Bytes, Align, false); |
1966 | 3.39k | return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); |
1967 | 3.39k | } |
1968 | | |
1969 | | SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, |
1970 | 2.76M | ISD::CondCode Cond, const SDLoc &dl) { |
1971 | 2.76M | EVT OpVT = N1.getValueType(); |
1972 | 2.76M | |
1973 | 2.76M | // These setcc operations always fold. |
1974 | 2.76M | switch (Cond) { |
1975 | 2.76M | default: break2.70M ; |
1976 | 2.76M | case ISD::SETFALSE: |
1977 | 53 | case ISD::SETFALSE2: return getBoolConstant(false, dl, VT, OpVT); |
1978 | 58 | case ISD::SETTRUE: |
1979 | 58 | case ISD::SETTRUE2: return getBoolConstant(true, dl, VT, OpVT); |
1980 | 58 | |
1981 | 59.1k | case ISD::SETOEQ: |
1982 | 59.1k | case ISD::SETOGT: |
1983 | 59.1k | case ISD::SETOGE: |
1984 | 59.1k | case ISD::SETOLT: |
1985 | 59.1k | case ISD::SETOLE: |
1986 | 59.1k | case ISD::SETONE: |
1987 | 59.1k | case ISD::SETO: |
1988 | 59.1k | case ISD::SETUO: |
1989 | 59.1k | case ISD::SETUEQ: |
1990 | 59.1k | case ISD::SETUNE: |
1991 | 59.1k | assert(!OpVT.isInteger() && "Illegal setcc for integer!"); |
1992 | 59.1k | break; |
1993 | 2.76M | } |
1994 | 2.76M | |
1995 | 2.76M | if (OpVT.isInteger()) { |
1996 | 2.68M | // For EQ and NE, we can always pick a value for the undef to make the |
1997 | 2.68M | // predicate pass or fail, so we can return undef. |
1998 | 2.68M | // Matches behavior in llvm::ConstantFoldCompareInstruction. |
1999 | 2.68M | // icmp eq/ne X, undef -> undef. |
2000 | 2.68M | if ((N1.isUndef() || N2.isUndef()2.68M ) && |
2001 | 2.68M | (755 Cond == ISD::SETEQ755 || Cond == ISD::SETNE384 )) |
2002 | 391 | return getUNDEF(VT); |
2003 | 2.68M | |
2004 | 2.68M | // If both operands are undef, we can return undef for int comparison. |
2005 | 2.68M | // icmp undef, undef -> undef. |
2006 | 2.68M | if (N1.isUndef() && N2.isUndef()268 ) |
2007 | 22 | return getUNDEF(VT); |
2008 | 2.68M | |
2009 | 2.68M | // icmp X, X -> true/false |
2010 | 2.68M | // icmp X, undef -> true/false because undef could be X. |
2011 | 2.68M | if (N1 == N2) |
2012 | 1.50k | return getBoolConstant(ISD::isTrueWhenEqual(Cond), dl, VT, OpVT); |
2013 | 2.76M | } |
2014 | 2.76M | |
2015 | 2.76M | if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2)) { |
2016 | 2.06M | const APInt &C2 = N2C->getAPIntValue(); |
2017 | 2.06M | if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) { |
2018 | 17.4k | const APInt &C1 = N1C->getAPIntValue(); |
2019 | 17.4k | |
2020 | 17.4k | switch (Cond) { |
2021 | 17.4k | default: 0 llvm_unreachable0 ("Unknown integer setcc!"); |
2022 | 17.4k | case ISD::SETEQ: return getBoolConstant(C1 == C2, dl, VT, OpVT)15.2k ; |
2023 | 17.4k | case ISD::SETNE: return getBoolConstant(C1 != C2, dl, VT, OpVT)26 ; |
2024 | 17.4k | case ISD::SETULT: return getBoolConstant(C1.ult(C2), dl, VT, OpVT)16 ; |
2025 | 17.4k | case ISD::SETUGT: return getBoolConstant(C1.ugt(C2), dl, VT, OpVT)14 ; |
2026 | 17.4k | case ISD::SETULE: return getBoolConstant(C1.ule(C2), dl, VT, OpVT)37 ; |
2027 | 17.4k | case ISD::SETUGE: return getBoolConstant(C1.uge(C2), dl, VT, OpVT)16 ; |
2028 | 17.4k | case ISD::SETLT: return getBoolConstant(C1.slt(C2), dl, VT, OpVT)2.00k ; |
2029 | 17.4k | case ISD::SETGT: return getBoolConstant(C1.sgt(C2), dl, VT, OpVT)154 ; |
2030 | 17.4k | case ISD::SETLE: return getBoolConstant(C1.sle(C2), dl, VT, OpVT)2 ; |
2031 | 17.4k | case ISD::SETGE: return getBoolConstant(C1.sge(C2), dl, VT, OpVT)0 ; |
2032 | 2.74M | } |
2033 | 2.74M | } |
2034 | 2.06M | } |
2035 | 2.74M | |
2036 | 2.74M | auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1); |
2037 | 2.74M | auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2); |
2038 | 2.74M | |
2039 | 2.74M | if (N1CFP && N2CFP563 ) { |
2040 | 253 | APFloat::cmpResult R = N1CFP->getValueAPF().compare(N2CFP->getValueAPF()); |
2041 | 253 | switch (Cond) { |
2042 | 253 | default: break0 ; |
2043 | 253 | case ISD::SETEQ: if (0 R==APFloat::cmpUnordered0 ) |
2044 | 0 | return getUNDEF(VT); |
2045 | 0 | LLVM_FALLTHROUGH; |
2046 | 97 | case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT, |
2047 | 97 | OpVT); |
2048 | 0 | case ISD::SETNE: if (R==APFloat::cmpUnordered) |
2049 | 0 | return getUNDEF(VT); |
2050 | 0 | LLVM_FALLTHROUGH; |
2051 | 0 | case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan || |
2052 | 0 | R==APFloat::cmpLessThan, dl, VT, |
2053 | 0 | OpVT); |
2054 | 3 | case ISD::SETLT: if (R==APFloat::cmpUnordered) |
2055 | 0 | return getUNDEF(VT); |
2056 | 3 | LLVM_FALLTHROUGH; |
2057 | 26 | case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT, |
2058 | 26 | OpVT); |
2059 | 3 | case ISD::SETGT: if (0 R==APFloat::cmpUnordered0 ) |
2060 | 0 | return getUNDEF(VT); |
2061 | 0 | LLVM_FALLTHROUGH; |
2062 | 10 | case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl, |
2063 | 10 | VT, OpVT); |
2064 | 0 | case ISD::SETLE: if (R==APFloat::cmpUnordered) |
2065 | 0 | return getUNDEF(VT); |
2066 | 0 | LLVM_FALLTHROUGH; |
2067 | 4 | case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan || |
2068 | 4 | R==APFloat::cmpEqual, dl, VT, |
2069 | 4 | OpVT); |
2070 | 0 | case ISD::SETGE: if (R==APFloat::cmpUnordered) |
2071 | 0 | return getUNDEF(VT); |
2072 | 0 | LLVM_FALLTHROUGH; |
2073 | 6 | case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan || |
2074 | 6 | R==APFloat::cmpEqual, dl, VT, OpVT); |
2075 | 8 | case ISD::SETO: return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT, |
2076 | 8 | OpVT); |
2077 | 31 | case ISD::SETUO: return getBoolConstant(R==APFloat::cmpUnordered, dl, VT, |
2078 | 31 | OpVT); |
2079 | 9 | case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered || |
2080 | 9 | R==APFloat::cmpEqual, dl, VT, |
2081 | 9 | OpVT); |
2082 | 16 | case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT, |
2083 | 16 | OpVT); |
2084 | 7 | case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered || |
2085 | 7 | R==APFloat::cmpLessThan6 , dl, VT, |
2086 | 7 | OpVT); |
2087 | 36 | case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan || |
2088 | 36 | R==APFloat::cmpUnordered, dl, VT, |
2089 | 36 | OpVT); |
2090 | 1 | case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl, |
2091 | 1 | VT, OpVT); |
2092 | 2 | case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT, |
2093 | 2 | OpVT); |
2094 | 2.74M | } |
2095 | 2.74M | } else if (N1CFP && OpVT.isSimple()310 && !N2.isUndef()310 ) { |
2096 | 303 | // Ensure that the constant occurs on the RHS. |
2097 | 303 | ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond); |
2098 | 303 | if (!TLI->isCondCodeLegal(SwappedCond, OpVT.getSimpleVT())) |
2099 | 101 | return SDValue(); |
2100 | 202 | return getSetCC(dl, VT, N2, N1, SwappedCond); |
2101 | 2.74M | } else if ((N2CFP && N2CFP->getValueAPF().isNaN()33.0k ) || |
2102 | 2.74M | (2.74M OpVT.isFloatingPoint()2.74M && (79.0k N1.isUndef()79.0k || N2.isUndef()79.0k ))) { |
2103 | 280 | // If an operand is known to be a nan (or undef that could be a nan), we can |
2104 | 280 | // fold it. |
2105 | 280 | // Choosing NaN for the undef will always make unordered comparison succeed |
2106 | 280 | // and ordered comparison fails. |
2107 | 280 | // Matches behavior in llvm::ConstantFoldCompareInstruction. |
2108 | 280 | switch (ISD::getUnorderedFlavor(Cond)) { |
2109 | 280 | default: |
2110 | 0 | llvm_unreachable("Unknown flavor!"); |
2111 | 280 | case 0: // Known false. |
2112 | 18 | return getBoolConstant(false, dl, VT, OpVT); |
2113 | 280 | case 1: // Known true. |
2114 | 18 | return getBoolConstant(true, dl, VT, OpVT); |
2115 | 280 | case 2: // Undefined. |
2116 | 244 | return getUNDEF(VT); |
2117 | 2.74M | } |
2118 | 2.74M | } |
2119 | 2.74M | |
2120 | 2.74M | // Could not fold it. |
2121 | 2.74M | return SDValue(); |
2122 | 2.74M | } |
2123 | | |
2124 | | /// See if the specified operand can be simplified with the knowledge that only |
2125 | | /// the bits specified by DemandedBits are used. |
2126 | | /// TODO: really we should be making this into the DAG equivalent of |
2127 | | /// SimplifyMultipleUseDemandedBits and not generate any new nodes. |
2128 | 781k | SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) { |
2129 | 781k | EVT VT = V.getValueType(); |
2130 | 781k | APInt DemandedElts = VT.isVector() |
2131 | 781k | ? APInt::getAllOnesValue(VT.getVectorNumElements())21.9k |
2132 | 781k | : APInt(1, 1)759k ; |
2133 | 781k | return GetDemandedBits(V, DemandedBits, DemandedElts); |
2134 | 781k | } |
2135 | | |
2136 | | /// See if the specified operand can be simplified with the knowledge that only |
2137 | | /// the bits specified by DemandedBits are used in the elements specified by |
2138 | | /// DemandedElts. |
2139 | | /// TODO: really we should be making this into the DAG equivalent of |
2140 | | /// SimplifyMultipleUseDemandedBits and not generate any new nodes. |
2141 | | SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, |
2142 | 781k | const APInt &DemandedElts) { |
2143 | 781k | switch (V.getOpcode()) { |
2144 | 781k | default: |
2145 | 608k | break; |
2146 | 781k | case ISD::Constant: { |
2147 | 53.8k | auto *CV = cast<ConstantSDNode>(V.getNode()); |
2148 | 53.8k | assert(CV && "Const value should be ConstSDNode."); |
2149 | 53.8k | const APInt &CVal = CV->getAPIntValue(); |
2150 | 53.8k | APInt NewVal = CVal & DemandedBits; |
2151 | 53.8k | if (NewVal != CVal) |
2152 | 388 | return getConstant(NewVal, SDLoc(V), V.getValueType()); |
2153 | 53.4k | break; |
2154 | 53.4k | } |
2155 | 53.4k | case ISD::OR: |
2156 | 11.8k | case ISD::XOR: |
2157 | 11.8k | // If the LHS or RHS don't contribute bits to the or, drop them. |
2158 | 11.8k | if (MaskedValueIsZero(V.getOperand(0), DemandedBits)) |
2159 | 851 | return V.getOperand(1); |
2160 | 10.9k | if (MaskedValueIsZero(V.getOperand(1), DemandedBits)) |
2161 | 1.07k | return V.getOperand(0); |
2162 | 9.92k | break; |
2163 | 75.4k | case ISD::SRL: |
2164 | 75.4k | // Only look at single-use SRLs. |
2165 | 75.4k | if (!V.getNode()->hasOneUse()) |
2166 | 7.55k | break; |
2167 | 67.9k | if (auto *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { |
2168 | 66.4k | // See if we can recursively simplify the LHS. |
2169 | 66.4k | unsigned Amt = RHSC->getZExtValue(); |
2170 | 66.4k | |
2171 | 66.4k | // Watch out for shift count overflow though. |
2172 | 66.4k | if (Amt >= DemandedBits.getBitWidth()) |
2173 | 0 | break; |
2174 | 66.4k | APInt SrcDemandedBits = DemandedBits << Amt; |
2175 | 66.4k | if (SDValue SimplifyLHS = |
2176 | 1.99k | GetDemandedBits(V.getOperand(0), SrcDemandedBits)) |
2177 | 1.99k | return getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS, |
2178 | 1.99k | V.getOperand(1)); |
2179 | 65.9k | } |
2180 | 65.9k | break; |
2181 | 65.9k | case ISD::AND: { |
2182 | 20.4k | // X & -1 -> X (ignoring bits which aren't demanded). |
2183 | 20.4k | // Also handle the case where masked out bits in X are known to be zero. |
2184 | 20.4k | if (ConstantSDNode *RHSC = isConstOrConstSplat(V.getOperand(1))) { |
2185 | 19.2k | const APInt &AndVal = RHSC->getAPIntValue(); |
2186 | 19.2k | if (DemandedBits.isSubsetOf(AndVal) || |
2187 | 19.2k | DemandedBits.isSubsetOf(computeKnownBits(V.getOperand(0)).Zero | |
2188 | 16.7k | AndVal)) |
2189 | 4.19k | return V.getOperand(0); |
2190 | 16.2k | } |
2191 | 16.2k | break; |
2192 | 16.2k | } |
2193 | 16.2k | case ISD::ANY_EXTEND: { |
2194 | 3.15k | SDValue Src = V.getOperand(0); |
2195 | 3.15k | unsigned SrcBitWidth = Src.getScalarValueSizeInBits(); |
2196 | 3.15k | // Being conservative here - only peek through if we only demand bits in the |
2197 | 3.15k | // non-extended source (even though the extended bits are technically |
2198 | 3.15k | // undef). |
2199 | 3.15k | if (DemandedBits.getActiveBits() > SrcBitWidth) |
2200 | 101 | break; |
2201 | 3.05k | APInt SrcDemandedBits = DemandedBits.trunc(SrcBitWidth); |
2202 | 3.05k | if (SDValue DemandedSrc = GetDemandedBits(Src, SrcDemandedBits)) |
2203 | 195 | return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc); |
2204 | 2.86k | break; |
2205 | 2.86k | } |
2206 | 8.16k | case ISD::SIGN_EXTEND_INREG: |
2207 | 8.16k | EVT ExVT = cast<VTSDNode>(V.getOperand(1))->getVT(); |
2208 | 8.16k | unsigned ExVTBits = ExVT.getScalarSizeInBits(); |
2209 | 8.16k | |
2210 | 8.16k | // If none of the extended bits are demanded, eliminate the sextinreg. |
2211 | 8.16k | if (DemandedBits.getActiveBits() <= ExVTBits) |
2212 | 64 | return V.getOperand(0); |
2213 | 8.10k | |
2214 | 8.10k | break; |
2215 | 773k | } |
2216 | 773k | return SDValue(); |
2217 | 773k | } |
2218 | | |
2219 | | /// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We |
2220 | | /// use this predicate to simplify operations downstream. |
2221 | 188k | bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { |
2222 | 188k | unsigned BitWidth = Op.getScalarValueSizeInBits(); |
2223 | 188k | return MaskedValueIsZero(Op, APInt::getSignMask(BitWidth), Depth); |
2224 | 188k | } |
2225 | | |
2226 | | /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use |
2227 | | /// this predicate to simplify operations downstream. Mask is known to be zero |
2228 | | /// for bits that V cannot have. |
2229 | | bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask, |
2230 | 3.28M | unsigned Depth) const { |
2231 | 3.28M | EVT VT = V.getValueType(); |
2232 | 3.28M | APInt DemandedElts = VT.isVector() |
2233 | 3.28M | ? APInt::getAllOnesValue(VT.getVectorNumElements())181k |
2234 | 3.28M | : APInt(1, 1)3.10M ; |
2235 | 3.28M | return MaskedValueIsZero(V, Mask, DemandedElts, Depth); |
2236 | 3.28M | } |
2237 | | |
2238 | | /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero in |
2239 | | /// DemandedElts. We use this predicate to simplify operations downstream. |
2240 | | /// Mask is known to be zero for bits that V cannot have. |
2241 | | bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask, |
2242 | | const APInt &DemandedElts, |
2243 | 3.31M | unsigned Depth) const { |
2244 | 3.31M | return Mask.isSubsetOf(computeKnownBits(V, DemandedElts, Depth).Zero); |
2245 | 3.31M | } |
2246 | | |
2247 | | /// MaskedValueIsAllOnes - Return true if '(Op & Mask) == Mask'. |
2248 | | bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask, |
2249 | 1.35k | unsigned Depth) const { |
2250 | 1.35k | return Mask.isSubsetOf(computeKnownBits(V, Depth).One); |
2251 | 1.35k | } |
2252 | | |
2253 | | /// isSplatValue - Return true if the vector V has the same value |
2254 | | /// across all DemandedElts. |
2255 | | bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, |
2256 | 1.27M | APInt &UndefElts) { |
2257 | 1.27M | if (!DemandedElts) |
2258 | 0 | return false; // No demanded elts, better to assume we don't know anything. |
2259 | 1.27M | |
2260 | 1.27M | EVT VT = V.getValueType(); |
2261 | 1.27M | assert(VT.isVector() && "Vector type expected"); |
2262 | 1.27M | |
2263 | 1.27M | unsigned NumElts = VT.getVectorNumElements(); |
2264 | 1.27M | assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch"); |
2265 | 1.27M | UndefElts = APInt::getNullValue(NumElts); |
2266 | 1.27M | |
2267 | 1.27M | switch (V.getOpcode()) { |
2268 | 1.27M | case ISD::BUILD_VECTOR: { |
2269 | 245k | SDValue Scl; |
2270 | 1.34M | for (unsigned i = 0; i != NumElts; ++i1.09M ) { |
2271 | 1.13M | SDValue Op = V.getOperand(i); |
2272 | 1.13M | if (Op.isUndef()) { |
2273 | 6.93k | UndefElts.setBit(i); |
2274 | 6.93k | continue; |
2275 | 6.93k | } |
2276 | 1.13M | if (!DemandedElts[i]) |
2277 | 1.30k | continue; |
2278 | 1.12M | if (Scl && Scl != Op884k ) |
2279 | 41.0k | return false; |
2280 | 1.08M | Scl = Op; |
2281 | 1.08M | } |
2282 | 245k | return true204k ; |
2283 | 245k | } |
2284 | 245k | case ISD::VECTOR_SHUFFLE: { |
2285 | 5.68k | // Check if this is a shuffle node doing a splat. |
2286 | 5.68k | // TODO: Do we need to handle shuffle(splat, undef, mask)? |
2287 | 5.68k | int SplatIndex = -1; |
2288 | 5.68k | ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(V)->getMask(); |
2289 | 58.0k | for (int i = 0; i != (int)NumElts; ++i52.3k ) { |
2290 | 54.7k | int M = Mask[i]; |
2291 | 54.7k | if (M < 0) { |
2292 | 953 | UndefElts.setBit(i); |
2293 | 953 | continue; |
2294 | 953 | } |
2295 | 53.7k | if (!DemandedElts[i]) |
2296 | 2.69k | continue; |
2297 | 51.0k | if (0 <= SplatIndex && SplatIndex != M45.3k ) |
2298 | 2.37k | return false; |
2299 | 48.6k | SplatIndex = M; |
2300 | 48.6k | } |
2301 | 5.68k | return true3.31k ; |
2302 | 5.68k | } |
2303 | 5.68k | case ISD::EXTRACT_SUBVECTOR: { |
2304 | 3.11k | SDValue Src = V.getOperand(0); |
2305 | 3.11k | ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(V.getOperand(1)); |
2306 | 3.11k | unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); |
2307 | 3.11k | if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { |
2308 | 3.11k | // Offset the demanded elts by the subvector index. |
2309 | 3.11k | uint64_t Idx = SubIdx->getZExtValue(); |
2310 | 3.11k | APInt UndefSrcElts; |
2311 | 3.11k | APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); |
2312 | 3.11k | if (isSplatValue(Src, DemandedSrc, UndefSrcElts)) { |
2313 | 291 | UndefElts = UndefSrcElts.extractBits(NumElts, Idx); |
2314 | 291 | return true; |
2315 | 291 | } |
2316 | 2.82k | } |
2317 | 2.82k | break; |
2318 | 2.82k | } |
2319 | 139k | case ISD::ADD: |
2320 | 139k | case ISD::SUB: |
2321 | 139k | case ISD::AND: { |
2322 | 139k | APInt UndefLHS, UndefRHS; |
2323 | 139k | SDValue LHS = V.getOperand(0); |
2324 | 139k | SDValue RHS = V.getOperand(1); |
2325 | 139k | if (isSplatValue(LHS, DemandedElts, UndefLHS) && |
2326 | 139k | isSplatValue(RHS, DemandedElts, UndefRHS)9.11k ) { |
2327 | 3.63k | UndefElts = UndefLHS | UndefRHS; |
2328 | 3.63k | return true; |
2329 | 3.63k | } |
2330 | 135k | break; |
2331 | 135k | } |
2332 | 1.01M | } |
2333 | 1.01M | |
2334 | 1.01M | return false; |
2335 | 1.01M | } |
2336 | | |
2337 | | /// Helper wrapper to main isSplatValue function. |
2338 | 1.20k | bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) { |
2339 | 1.20k | EVT VT = V.getValueType(); |
2340 | 1.20k | assert(VT.isVector() && "Vector type expected"); |
2341 | 1.20k | unsigned NumElts = VT.getVectorNumElements(); |
2342 | 1.20k | |
2343 | 1.20k | APInt UndefElts; |
2344 | 1.20k | APInt DemandedElts = APInt::getAllOnesValue(NumElts); |
2345 | 1.20k | return isSplatValue(V, DemandedElts, UndefElts) && |
2346 | 1.20k | (232 AllowUndefs232 || !UndefElts232 ); |
2347 | 1.20k | } |
2348 | | |
2349 | 1.15M | SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) { |
2350 | 1.15M | V = peekThroughExtractSubvectors(V); |
2351 | 1.15M | |
2352 | 1.15M | EVT VT = V.getValueType(); |
2353 | 1.15M | unsigned Opcode = V.getOpcode(); |
2354 | 1.15M | switch (Opcode) { |
2355 | 1.15M | default: { |
2356 | 1.12M | APInt UndefElts; |
2357 | 1.12M | APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); |
2358 | 1.12M | if (isSplatValue(V, DemandedElts, UndefElts)) { |
2359 | 198k | // Handle case where all demanded elements are UNDEF. |
2360 | 198k | if (DemandedElts.isSubsetOf(UndefElts)) { |
2361 | 0 | SplatIdx = 0; |
2362 | 0 | return getUNDEF(VT); |
2363 | 0 | } |
2364 | 198k | SplatIdx = (UndefElts & DemandedElts).countTrailingOnes(); |
2365 | 198k | return V; |
2366 | 198k | } |
2367 | 922k | break; |
2368 | 922k | } |
2369 | 922k | case ISD::VECTOR_SHUFFLE: { |
2370 | 35.6k | // Check if this is a shuffle node doing a splat. |
2371 | 35.6k | // TODO - remove this and rely purely on SelectionDAG::isSplatValue, |
2372 | 35.6k | // getTargetVShiftNode currently struggles without the splat source. |
2373 | 35.6k | auto *SVN = cast<ShuffleVectorSDNode>(V); |
2374 | 35.6k | if (!SVN->isSplat()) |
2375 | 23.0k | break; |
2376 | 12.5k | int Idx = SVN->getSplatIndex(); |
2377 | 12.5k | int NumElts = V.getValueType().getVectorNumElements(); |
2378 | 12.5k | SplatIdx = Idx % NumElts; |
2379 | 12.5k | return V.getOperand(Idx / NumElts); |
2380 | 12.5k | } |
2381 | 945k | } |
2382 | 945k | |
2383 | 945k | return SDValue(); |
2384 | 945k | } |
2385 | | |
2386 | 14.0k | SDValue SelectionDAG::getSplatValue(SDValue V) { |
2387 | 14.0k | int SplatIdx; |
2388 | 14.0k | if (SDValue SrcVector = getSplatSourceVector(V, SplatIdx)) |
2389 | 3.04k | return getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), |
2390 | 3.04k | SrcVector.getValueType().getScalarType(), SrcVector, |
2391 | 3.04k | getIntPtrConstant(SplatIdx, SDLoc(V))); |
2392 | 11.0k | return SDValue(); |
2393 | 11.0k | } |
2394 | | |
2395 | | /// If a SHL/SRA/SRL node has a constant or splat constant shift amount that |
2396 | | /// is less than the element bit-width of the shift node, return it. |
2397 | 3.24M | static const APInt *getValidShiftAmountConstant(SDValue V) { |
2398 | 3.24M | if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1))) { |
2399 | 3.05M | // Shifting more than the bitwidth is not valid. |
2400 | 3.05M | const APInt &ShAmt = SA->getAPIntValue(); |
2401 | 3.05M | if (ShAmt.ult(V.getScalarValueSizeInBits())) |
2402 | 3.05M | return &ShAmt; |
2403 | 192k | } |
2404 | 192k | return nullptr; |
2405 | 192k | } |
2406 | | |
2407 | | /// Determine which bits of Op are known to be either zero or one and return |
2408 | | /// them in Known. For vectors, the known bits are those that are shared by |
2409 | | /// every vector element. |
2410 | 14.3M | KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const { |
2411 | 14.3M | EVT VT = Op.getValueType(); |
2412 | 14.3M | APInt DemandedElts = VT.isVector() |
2413 | 14.3M | ? APInt::getAllOnesValue(VT.getVectorNumElements())340k |
2414 | 14.3M | : APInt(1, 1)13.9M ; |
2415 | 14.3M | return computeKnownBits(Op, DemandedElts, Depth); |
2416 | 14.3M | } |
2417 | | |
2418 | | /// Determine which bits of Op are known to be either zero or one and return |
2419 | | /// them in Known. The DemandedElts argument allows us to only collect the known |
2420 | | /// bits that are shared by the requested vector elements. |
2421 | | KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, |
2422 | 65.5M | unsigned Depth) const { |
2423 | 65.5M | unsigned BitWidth = Op.getScalarValueSizeInBits(); |
2424 | 65.5M | |
2425 | 65.5M | KnownBits Known(BitWidth); // Don't know anything. |
2426 | 65.5M | |
2427 | 65.5M | if (auto *C = dyn_cast<ConstantSDNode>(Op)) { |
2428 | 12.9M | // We know all of the bits for a constant! |
2429 | 12.9M | Known.One = C->getAPIntValue(); |
2430 | 12.9M | Known.Zero = ~Known.One; |
2431 | 12.9M | return Known; |
2432 | 12.9M | } |
2433 | 52.5M | if (auto *C = dyn_cast<ConstantFPSDNode>(Op)) { |
2434 | 16.0k | // We know all of the bits for a constant fp! |
2435 | 16.0k | Known.One = C->getValueAPF().bitcastToAPInt(); |
2436 | 16.0k | Known.Zero = ~Known.One; |
2437 | 16.0k | return Known; |
2438 | 16.0k | } |
2439 | 52.5M | |
2440 | 52.5M | if (Depth == 6) |
2441 | 3.16M | return Known; // Limit search depth. |
2442 | 49.3M | |
2443 | 49.3M | KnownBits Known2; |
2444 | 49.3M | unsigned NumElts = DemandedElts.getBitWidth(); |
2445 | 49.3M | assert((!Op.getValueType().isVector() || |
2446 | 49.3M | NumElts == Op.getValueType().getVectorNumElements()) && |
2447 | 49.3M | "Unexpected vector size"); |
2448 | 49.3M | |
2449 | 49.3M | if (!DemandedElts) |
2450 | 4.54k | return Known; // No demanded elts, better to assume we don't know anything. |
2451 | 49.3M | |
2452 | 49.3M | unsigned Opcode = Op.getOpcode(); |
2453 | 49.3M | switch (Opcode) { |
2454 | 49.3M | case ISD::BUILD_VECTOR: |
2455 | 1.24M | // Collect the known bits that are shared by every demanded vector element. |
2456 | 1.24M | Known.Zero.setAllBits(); Known.One.setAllBits(); |
2457 | 9.24M | for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i7.99M ) { |
2458 | 8.38M | if (!DemandedElts[i]) |
2459 | 4.26M | continue; |
2460 | 4.11M | |
2461 | 4.11M | SDValue SrcOp = Op.getOperand(i); |
2462 | 4.11M | Known2 = computeKnownBits(SrcOp, Depth + 1); |
2463 | 4.11M | |
2464 | 4.11M | // BUILD_VECTOR can implicitly truncate sources, we must handle this. |
2465 | 4.11M | if (SrcOp.getValueSizeInBits() != BitWidth) { |
2466 | 352k | assert(SrcOp.getValueSizeInBits() > BitWidth && |
2467 | 352k | "Expected BUILD_VECTOR implicit truncation"); |
2468 | 352k | Known2 = Known2.trunc(BitWidth); |
2469 | 352k | } |
2470 | 4.11M | |
2471 | 4.11M | // Known bits are the values that are shared by every demanded element. |
2472 | 4.11M | Known.One &= Known2.One; |
2473 | 4.11M | Known.Zero &= Known2.Zero; |
2474 | 4.11M | |
2475 | 4.11M | // If we don't know any bits, early out. |
2476 | 4.11M | if (Known.isUnknown()) |
2477 | 387k | break; |
2478 | 4.11M | } |
2479 | 1.24M | break; |
2480 | 49.3M | case ISD::VECTOR_SHUFFLE: { |
2481 | 492k | // Collect the known bits that are shared by every vector element referenced |
2482 | 492k | // by the shuffle. |
2483 | 492k | APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0); |
2484 | 492k | Known.Zero.setAllBits(); Known.One.setAllBits(); |
2485 | 492k | const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); |
2486 | 492k | assert(NumElts == SVN->getMask().size() && "Unexpected vector size"); |
2487 | 5.74M | for (unsigned i = 0; i != NumElts; ++i5.24M ) { |
2488 | 5.37M | if (!DemandedElts[i]) |
2489 | 3.31M | continue; |
2490 | 2.06M | |
2491 | 2.06M | int M = SVN->getMaskElt(i); |
2492 | 2.06M | if (M < 0) { |
2493 | 126k | // For UNDEF elements, we don't know anything about the common state of |
2494 | 126k | // the shuffle result. |
2495 | 126k | Known.resetAll(); |
2496 | 126k | DemandedLHS.clearAllBits(); |
2497 | 126k | DemandedRHS.clearAllBits(); |
2498 | 126k | break; |
2499 | 126k | } |
2500 | 1.93M | |
2501 | 1.93M | if ((unsigned)M < NumElts) |
2502 | 1.70M | DemandedLHS.setBit((unsigned)M % NumElts); |
2503 | 226k | else |
2504 | 226k | DemandedRHS.setBit((unsigned)M % NumElts); |
2505 | 1.93M | } |
2506 | 492k | // Known bits are the values that are shared by every demanded element. |
2507 | 492k | if (!!DemandedLHS) { |
2508 | 315k | SDValue LHS = Op.getOperand(0); |
2509 | 315k | Known2 = computeKnownBits(LHS, DemandedLHS, Depth + 1); |
2510 | 315k | Known.One &= Known2.One; |
2511 | 315k | Known.Zero &= Known2.Zero; |
2512 | 315k | } |
2513 | 492k | // If we don't know any bits, early out. |
2514 | 492k | if (Known.isUnknown()) |
2515 | 432k | break; |
2516 | 60.5k | if (!!DemandedRHS) { |
2517 | 51.7k | SDValue RHS = Op.getOperand(1); |
2518 | 51.7k | Known2 = computeKnownBits(RHS, DemandedRHS, Depth + 1); |
2519 | 51.7k | Known.One &= Known2.One; |
2520 | 51.7k | Known.Zero &= Known2.Zero; |
2521 | 51.7k | } |
2522 | 60.5k | break; |
2523 | 60.5k | } |
2524 | 138k | case ISD::CONCAT_VECTORS: { |
2525 | 138k | // Split DemandedElts and test each of the demanded subvectors. |
2526 | 138k | Known.Zero.setAllBits(); Known.One.setAllBits(); |
2527 | 138k | EVT SubVectorVT = Op.getOperand(0).getValueType(); |
2528 | 138k | unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements(); |
2529 | 138k | unsigned NumSubVectors = Op.getNumOperands(); |
2530 | 185k | for (unsigned i = 0; i != NumSubVectors; ++i46.9k ) { |
2531 | 178k | APInt DemandedSub = DemandedElts.lshr(i * NumSubVectorElts); |
2532 | 178k | DemandedSub = DemandedSub.trunc(NumSubVectorElts); |
2533 | 178k | if (!!DemandedSub) { |
2534 | 138k | SDValue Sub = Op.getOperand(i); |
2535 | 138k | Known2 = computeKnownBits(Sub, DemandedSub, Depth + 1); |
2536 | 138k | Known.One &= Known2.One; |
2537 | 138k | Known.Zero &= Known2.Zero; |
2538 | 138k | } |
2539 | 178k | // If we don't know any bits, early out. |
2540 | 178k | if (Known.isUnknown()) |
2541 | 131k | break; |
2542 | 178k | } |
2543 | 138k | break; |
2544 | 60.5k | } |
2545 | 76.8k | case ISD::INSERT_SUBVECTOR: { |
2546 | 76.8k | // If we know the element index, demand any elements from the subvector and |
2547 | 76.8k | // the remainder from the src its inserted into, otherwise demand them all. |
2548 | 76.8k | SDValue Src = Op.getOperand(0); |
2549 | 76.8k | SDValue Sub = Op.getOperand(1); |
2550 | 76.8k | ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); |
2551 | 76.8k | unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); |
2552 | 76.8k | if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) { |
2553 | 76.8k | Known.One.setAllBits(); |
2554 | 76.8k | Known.Zero.setAllBits(); |
2555 | 76.8k | uint64_t Idx = SubIdx->getZExtValue(); |
2556 | 76.8k | APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); |
2557 | 76.8k | if (!!DemandedSubElts) { |
2558 | 62.7k | Known = computeKnownBits(Sub, DemandedSubElts, Depth + 1); |
2559 | 62.7k | if (Known.isUnknown()) |
2560 | 61.4k | break; // early-out. |
2561 | 15.3k | } |
2562 | 15.3k | APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts); |
2563 | 15.3k | APInt DemandedSrcElts = DemandedElts & ~SubMask; |
2564 | 15.3k | if (!!DemandedSrcElts) { |
2565 | 14.3k | Known2 = computeKnownBits(Src, DemandedSrcElts, Depth + 1); |
2566 | 14.3k | Known.One &= Known2.One; |
2567 | 14.3k | Known.Zero &= Known2.Zero; |
2568 | 14.3k | } |
2569 | 15.3k | } else { |
2570 | 0 | Known = computeKnownBits(Sub, Depth + 1); |
2571 | 0 | if (Known.isUnknown()) |
2572 | 0 | break; // early-out. |
2573 | 0 | Known2 = computeKnownBits(Src, Depth + 1); |
2574 | 0 | Known.One &= Known2.One; |
2575 | 0 | Known.Zero &= Known2.Zero; |
2576 | 0 | } |
2577 | 76.8k | break15.3k ; |
2578 | 76.8k | } |
2579 | 436k | case ISD::EXTRACT_SUBVECTOR: { |
2580 | 436k | // If we know the element index, just demand that subvector elements, |
2581 | 436k | // otherwise demand them all. |
2582 | 436k | SDValue Src = Op.getOperand(0); |
2583 | 436k | ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); |
2584 | 436k | unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); |
2585 | 436k | if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { |
2586 | 436k | // Offset the demanded elts by the subvector index. |
2587 | 436k | uint64_t Idx = SubIdx->getZExtValue(); |
2588 | 436k | APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); |
2589 | 436k | Known = computeKnownBits(Src, DemandedSrc, Depth + 1); |
2590 | 436k | } else { |
2591 | 0 | Known = computeKnownBits(Src, Depth + 1); |
2592 | 0 | } |
2593 | 436k | break; |
2594 | 76.8k | } |
2595 | 76.8k | case ISD::SCALAR_TO_VECTOR: { |
2596 | 28.8k | // We know about scalar_to_vector as much as we know about it source, |
2597 | 28.8k | // which becomes the first element of otherwise unknown vector. |
2598 | 28.8k | if (DemandedElts != 1) |
2599 | 12.8k | break; |
2600 | 15.9k | |
2601 | 15.9k | SDValue N0 = Op.getOperand(0); |
2602 | 15.9k | Known = computeKnownBits(N0, Depth + 1); |
2603 | 15.9k | if (N0.getValueSizeInBits() != BitWidth) |
2604 | 326 | Known = Known.trunc(BitWidth); |
2605 | 15.9k | |
2606 | 15.9k | break; |
2607 | 15.9k | } |
2608 | 1.49M | case ISD::BITCAST: { |
2609 | 1.49M | SDValue N0 = Op.getOperand(0); |
2610 | 1.49M | EVT SubVT = N0.getValueType(); |
2611 | 1.49M | unsigned SubBitWidth = SubVT.getScalarSizeInBits(); |
2612 | 1.49M | |
2613 | 1.49M | // Ignore bitcasts from unsupported types. |
2614 | 1.49M | if (!(SubVT.isInteger() || SubVT.isFloatingPoint()111k )) |
2615 | 292 | break; |
2616 | 1.48M | |
2617 | 1.48M | // Fast handling of 'identity' bitcasts. |
2618 | 1.48M | if (BitWidth == SubBitWidth) { |
2619 | 83.4k | Known = computeKnownBits(N0, DemandedElts, Depth + 1); |
2620 | 83.4k | break; |
2621 | 83.4k | } |
2622 | 1.40M | |
2623 | 1.40M | bool IsLE = getDataLayout().isLittleEndian(); |
2624 | 1.40M | |
2625 | 1.40M | // Bitcast 'small element' vector to 'large element' scalar/vector. |
2626 | 1.40M | if ((BitWidth % SubBitWidth) == 0) { |
2627 | 743k | assert(N0.getValueType().isVector() && "Expected bitcast from vector"); |
2628 | 743k | |
2629 | 743k | // Collect known bits for the (larger) output by collecting the known |
2630 | 743k | // bits from each set of sub elements and shift these into place. |
2631 | 743k | // We need to separately call computeKnownBits for each set of |
2632 | 743k | // sub elements as the knownbits for each is likely to be different. |
2633 | 743k | unsigned SubScale = BitWidth / SubBitWidth; |
2634 | 743k | APInt SubDemandedElts(NumElts * SubScale, 0); |
2635 | 4.18M | for (unsigned i = 0; i != NumElts; ++i3.44M ) |
2636 | 3.44M | if (DemandedElts[i]) |
2637 | 3.09M | SubDemandedElts.setBit(i * SubScale); |
2638 | 743k | |
2639 | 2.82M | for (unsigned i = 0; i != SubScale; ++i2.08M ) { |
2640 | 2.08M | Known2 = computeKnownBits(N0, SubDemandedElts.shl(i), |
2641 | 2.08M | Depth + 1); |
2642 | 2.08M | unsigned Shifts = IsLE ? i2.07M : SubScale - 1 - i8.49k ; |
2643 | 2.08M | Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * Shifts); |
2644 | 2.08M | Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * Shifts); |
2645 | 2.08M | } |
2646 | 743k | } |
2647 | 1.40M | |
2648 | 1.40M | // Bitcast 'large element' scalar/vector to 'small element' vector. |
2649 | 1.40M | if ((SubBitWidth % BitWidth) == 0) { |
2650 | 663k | assert(Op.getValueType().isVector() && "Expected bitcast to vector"); |
2651 | 663k | |
2652 | 663k | // Collect known bits for the (smaller) output by collecting the known |
2653 | 663k | // bits from the overlapping larger input elements and extracting the |
2654 | 663k | // sub sections we actually care about. |
2655 | 663k | unsigned SubScale = SubBitWidth / BitWidth; |
2656 | 663k | APInt SubDemandedElts(NumElts / SubScale, 0); |
2657 | 11.4M | for (unsigned i = 0; i != NumElts; ++i10.7M ) |
2658 | 10.7M | if (DemandedElts[i]) |
2659 | 3.36M | SubDemandedElts.setBit(i / SubScale); |
2660 | 663k | |
2661 | 663k | Known2 = computeKnownBits(N0, SubDemandedElts, Depth + 1); |
2662 | 663k | |
2663 | 663k | Known.Zero.setAllBits(); Known.One.setAllBits(); |
2664 | 4.12M | for (unsigned i = 0; i != NumElts; ++i3.46M ) |
2665 | 4.06M | if (DemandedElts[i]) { |
2666 | 1.05M | unsigned Shifts = IsLE ? i1.04M : NumElts - 1 - i10.7k ; |
2667 | 1.05M | unsigned Offset = (Shifts % SubScale) * BitWidth; |
2668 | 1.05M | Known.One &= Known2.One.lshr(Offset).trunc(BitWidth); |
2669 | 1.05M | Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth); |
2670 | 1.05M | // If we don't know any bits, early out. |
2671 | 1.05M | if (Known.isUnknown()) |
2672 | 599k | break; |
2673 | 1.05M | } |
2674 | 663k | } |
2675 | 1.40M | break; |
2676 | 1.40M | } |
2677 | 2.28M | case ISD::AND: |
2678 | 2.28M | // If either the LHS or the RHS are Zero, the result is zero. |
2679 | 2.28M | Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
2680 | 2.28M | Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
2681 | 2.28M | |
2682 | 2.28M | // Output known-1 bits are only known if set in both the LHS & RHS. |
2683 | 2.28M | Known.One &= Known2.One; |
2684 | 2.28M | // Output known-0 are known to be clear if zero in either the LHS | RHS. |
2685 | 2.28M | Known.Zero |= Known2.Zero; |
2686 | 2.28M | break; |
2687 | 1.40M | case ISD::OR: |
2688 | 704k | Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
2689 | 704k | Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
2690 | 704k | |
2691 | 704k | // Output known-0 bits are only known if clear in both the LHS & RHS. |
2692 | 704k | Known.Zero &= Known2.Zero; |
2693 | 704k | // Output known-1 are known to be set if set in either the LHS | RHS. |
2694 | 704k | Known.One |= Known2.One; |
2695 | 704k | break; |
2696 | 1.40M | case ISD::XOR: { |
2697 | 346k | Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
2698 | 346k | Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
2699 | 346k | |
2700 | 346k | // Output known-0 bits are known if clear or set in both the LHS & RHS. |
2701 | 346k | APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); |
2702 | 346k | // Output known-1 are known to be set if set in only one of the LHS, RHS. |
2703 | 346k | Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero); |
2704 | 346k | Known.Zero = KnownZeroOut; |
2705 | 346k | break; |
2706 | 1.40M | } |
2707 | 1.40M | case ISD::MUL: { |
2708 | 744k | Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
2709 | 744k | Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
2710 | 744k | |
2711 | 744k | // If low bits are zero in either operand, output low known-0 bits. |
2712 | 744k | // Also compute a conservative estimate for high known-0 bits. |
2713 | 744k | // More trickiness is possible, but this is sufficient for the |
2714 | 744k | // interesting case of alignment computation. |
2715 | 744k | unsigned TrailZ = Known.countMinTrailingZeros() + |
2716 | 744k | Known2.countMinTrailingZeros(); |
2717 | 744k | unsigned LeadZ = std::max(Known.countMinLeadingZeros() + |
2718 | 744k | Known2.countMinLeadingZeros(), |
2719 | 744k | BitWidth) - BitWidth; |
2720 | 744k | |
2721 | 744k | Known.resetAll(); |
2722 | 744k | Known.Zero.setLowBits(std::min(TrailZ, BitWidth)); |
2723 | 744k | Known.Zero.setHighBits(std::min(LeadZ, BitWidth)); |
2724 | 744k | break; |
2725 | 1.40M | } |
2726 | 1.40M | case ISD::UDIV: { |
2727 | 11.4k | // For the purposes of computing leading zeros we can conservatively |
2728 | 11.4k | // treat a udiv as a logical right shift by the power of 2 known to |
2729 | 11.4k | // be less than the denominator. |
2730 | 11.4k | Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
2731 | 11.4k | unsigned LeadZ = Known2.countMinLeadingZeros(); |
2732 | 11.4k | |
2733 | 11.4k | Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
2734 | 11.4k | unsigned RHSMaxLeadingZeros = Known2.countMaxLeadingZeros(); |
2735 | 11.4k | if (RHSMaxLeadingZeros != BitWidth) |
2736 | 6.18k | LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1); |
2737 | 11.4k | |
2738 | 11.4k | Known.Zero.setHighBits(LeadZ); |
2739 | 11.4k | break; |
2740 | 1.40M | } |
2741 | 1.40M | case ISD::SELECT: |
2742 | 272k | case ISD::VSELECT: |
2743 | 272k | Known = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1); |
2744 | 272k | // If we don't know any bits, early out. |
2745 | 272k | if (Known.isUnknown()) |
2746 | 106k | break; |
2747 | 166k | Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth+1); |
2748 | 166k | |
2749 | 166k | // Only known if known in both the LHS and RHS. |
2750 | 166k | Known.One &= Known2.One; |
2751 | 166k | Known.Zero &= Known2.Zero; |
2752 | 166k | break; |
2753 | 166k | case ISD::SELECT_CC: |
2754 | 119k | Known = computeKnownBits(Op.getOperand(3), DemandedElts, Depth+1); |
2755 | 119k | // If we don't know any bits, early out. |
2756 | 119k | if (Known.isUnknown()) |
2757 | 47.7k | break; |
2758 | 71.6k | Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1); |
2759 | 71.6k | |
2760 | 71.6k | // Only known if known in both the LHS and RHS. |
2761 | 71.6k | Known.One &= Known2.One; |
2762 | 71.6k | Known.Zero &= Known2.Zero; |
2763 | 71.6k | break; |
2764 | 71.6k | case ISD::SMULO: |
2765 | 7.58k | case ISD::UMULO: |
2766 | 7.58k | case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: |
2767 | 7.58k | if (Op.getResNo() != 1) |
2768 | 1.60k | break; |
2769 | 5.98k | // The boolean result conforms to getBooleanContents. |
2770 | 5.98k | // If we know the result of a setcc has the top bits zero, use this info. |
2771 | 5.98k | // We know that we have an integer-based boolean since these operations |
2772 | 5.98k | // are only available for integer. |
2773 | 5.98k | if (TLI->getBooleanContents(Op.getValueType().isVector(), false) == |
2774 | 5.98k | TargetLowering::ZeroOrOneBooleanContent && |
2775 | 5.98k | BitWidth > 14.84k ) |
2776 | 4.14k | Known.Zero.setBitsFrom(1); |
2777 | 5.98k | break; |
2778 | 894k | case ISD::SETCC: |
2779 | 894k | // If we know the result of a setcc has the top bits zero, use this info. |
2780 | 894k | if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == |
2781 | 894k | TargetLowering::ZeroOrOneBooleanContent && |
2782 | 894k | BitWidth > 1559k ) |
2783 | 513k | Known.Zero.setBitsFrom(1); |
2784 | 894k | break; |
2785 | 1.67M | case ISD::SHL: |
2786 | 1.67M | if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { |
2787 | 1.54M | Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
2788 | 1.54M | unsigned Shift = ShAmt->getZExtValue(); |
2789 | 1.54M | Known.Zero <<= Shift; |
2790 | 1.54M | Known.One <<= Shift; |
2791 | 1.54M | // Low bits are known zero. |
2792 | 1.54M | Known.Zero.setLowBits(Shift); |
2793 | 1.54M | } |
2794 | 1.67M | break; |
2795 | 1.42M | case ISD::SRL: |
2796 | 1.42M | if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { |
2797 | 1.37M | Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
2798 | 1.37M | unsigned Shift = ShAmt->getZExtValue(); |
2799 | 1.37M | Known.Zero.lshrInPlace(Shift); |
2800 | 1.37M | Known.One.lshrInPlace(Shift); |
2801 | 1.37M | // High bits are known zero. |
2802 | 1.37M | Known.Zero.setHighBits(Shift); |
2803 | 1.37M | } else if (auto *52.0k BV52.0k = dyn_cast<BuildVectorSDNode>(Op.getOperand(1))) { |
2804 | 5.23k | // If the shift amount is a vector of constants see if we can bound |
2805 | 5.23k | // the number of upper zero bits. |
2806 | 5.23k | unsigned ShiftAmountMin = BitWidth; |
2807 | 52.2k | for (unsigned i = 0; i != BV->getNumOperands(); ++i47.0k ) { |
2808 | 47.8k | if (auto *C = dyn_cast<ConstantSDNode>(BV->getOperand(i))) { |
2809 | 47.0k | const APInt &ShAmt = C->getAPIntValue(); |
2810 | 47.0k | if (ShAmt.ult(BitWidth)) { |
2811 | 47.0k | ShiftAmountMin = std::min<unsigned>(ShiftAmountMin, |
2812 | 47.0k | ShAmt.getZExtValue()); |
2813 | 47.0k | continue; |
2814 | 47.0k | } |
2815 | 811 | } |
2816 | 811 | // Don't know anything. |
2817 | 811 | ShiftAmountMin = 0; |
2818 | 811 | break; |
2819 | 811 | } |
2820 | 5.23k | |
2821 | 5.23k | Known.Zero.setHighBits(ShiftAmountMin); |
2822 | 5.23k | } |
2823 | 1.42M | break; |
2824 | 141k | case ISD::SRA: |
2825 | 141k | if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { |
2826 | 130k | Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
2827 | 130k | unsigned Shift = ShAmt->getZExtValue(); |
2828 | 130k | // Sign extend known zero/one bit (else is unknown). |
2829 | 130k | Known.Zero.ashrInPlace(Shift); |
2830 | 130k | Known.One.ashrInPlace(Shift); |
2831 | 130k | } |
2832 | 141k | break; |
2833 | 5.98k | case ISD::FSHL: |
2834 | 1.47k | case ISD::FSHR: |
2835 | 1.47k | if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(2), DemandedElts)) { |
2836 | 576 | unsigned Amt = C->getAPIntValue().urem(BitWidth); |
2837 | 576 | |
2838 | 576 | // For fshl, 0-shift returns the 1st arg. |
2839 | 576 | // For fshr, 0-shift returns the 2nd arg. |
2840 | 576 | if (Amt == 0) { |
2841 | 0 | Known = computeKnownBits(Op.getOperand(Opcode == ISD::FSHL ? 0 : 1), |
2842 | 0 | DemandedElts, Depth + 1); |
2843 | 0 | break; |
2844 | 0 | } |
2845 | 576 | |
2846 | 576 | // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) |
2847 | 576 | // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) |
2848 | 576 | Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
2849 | 576 | Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
2850 | 576 | if (Opcode == ISD::FSHL) { |
2851 | 364 | Known.One <<= Amt; |
2852 | 364 | Known.Zero <<= Amt; |
2853 | 364 | Known2.One.lshrInPlace(BitWidth - Amt); |
2854 | 364 | Known2.Zero.lshrInPlace(BitWidth - Amt); |
2855 | 364 | } else { |
2856 | 212 | Known.One <<= BitWidth - Amt; |
2857 | 212 | Known.Zero <<= BitWidth - Amt; |
2858 | 212 | Known2.One.lshrInPlace(Amt); |
2859 | 212 | Known2.Zero.lshrInPlace(Amt); |
2860 | 212 | } |
2861 | 576 | Known.One |= Known2.One; |
2862 | 576 | Known.Zero |= Known2.Zero; |
2863 | 576 | } |
2864 | 1.47k | break; |
2865 | 78.0k | case ISD::SIGN_EXTEND_INREG: { |
2866 | 78.0k | EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); |
2867 | 78.0k | unsigned EBits = EVT.getScalarSizeInBits(); |
2868 | 78.0k | |
2869 | 78.0k | // Sign extension. Compute the demanded bits in the result that are not |
2870 | 78.0k | // present in the input. |
2871 | 78.0k | APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits); |
2872 | 78.0k | |
2873 | 78.0k | APInt InSignMask = APInt::getSignMask(EBits); |
2874 | 78.0k | APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits); |
2875 | 78.0k | |
2876 | 78.0k | // If the sign extended bits are demanded, we know that the sign |
2877 | 78.0k | // bit is demanded. |
2878 | 78.0k | InSignMask = InSignMask.zext(BitWidth); |
2879 | 78.0k | if (NewBits.getBoolValue()) |
2880 | 78.0k | InputDemandedBits |= InSignMask; |
2881 | 78.0k | |
2882 | 78.0k | Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
2883 | 78.0k | Known.One &= InputDemandedBits; |
2884 | 78.0k | Known.Zero &= InputDemandedBits; |
2885 | 78.0k | |
2886 | 78.0k | // If the sign bit of the input is known set or clear, then we know the |
2887 | 78.0k | // top bits of the result. |
2888 | 78.0k | if (Known.Zero.intersects(InSignMask)) { // Input sign bit known clear |
2889 | 4 | Known.Zero |= NewBits; |
2890 | 4 | Known.One &= ~NewBits; |
2891 | 78.0k | } else if (Known.One.intersects(InSignMask)) { // Input sign bit known set |
2892 | 4 | Known.One |= NewBits; |
2893 | 4 | Known.Zero &= ~NewBits; |
2894 | 78.0k | } else { // Input sign bit unknown |
2895 | 78.0k | Known.Zero &= ~NewBits; |
2896 | 78.0k | Known.One &= ~NewBits; |
2897 | 78.0k | } |
2898 | 78.0k | break; |
2899 | 1.47k | } |
2900 | 6.97k | case ISD::CTTZ: |
2901 | 6.97k | case ISD::CTTZ_ZERO_UNDEF: { |
2902 | 6.97k | Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
2903 | 6.97k | // If we have a known 1, its position is our upper bound. |
2904 | 6.97k | unsigned PossibleTZ = Known2.countMaxTrailingZeros(); |
2905 | 6.97k | unsigned LowBits = Log2_32(PossibleTZ) + 1; |
2906 | 6.97k | Known.Zero.setBitsFrom(LowBits); |
2907 | 6.97k | break; |
2908 | 6.97k | } |
2909 | 46.3k | case ISD::CTLZ: |
2910 | 46.3k | case ISD::CTLZ_ZERO_UNDEF: { |
2911 | 46.3k | Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
2912 | 46.3k | // If we have a known 1, its position is our upper bound. |
2913 | 46.3k | unsigned PossibleLZ = Known2.countMaxLeadingZeros(); |
2914 | 46.3k | unsigned LowBits = Log2_32(PossibleLZ) + 1; |
2915 | 46.3k | Known.Zero.setBitsFrom(LowBits); |
2916 | 46.3k | break; |
2917 | 46.3k | } |
2918 | 46.3k | case ISD::CTPOP: { |
2919 | 4.73k | Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
2920 | 4.73k | // If we know some of the bits are zero, they can't be one. |
2921 | 4.73k | unsigned PossibleOnes = Known2.countMaxPopulation(); |
2922 | 4.73k | Known.Zero.setBitsFrom(Log2_32(PossibleOnes) + 1); |
2923 | 4.73k | break; |
2924 | 46.3k | } |
2925 | 4.21M | case ISD::LOAD: { |
2926 | 4.21M | LoadSDNode *LD = cast<LoadSDNode>(Op); |
2927 | 4.21M | const Constant *Cst = TLI->getTargetConstantFromLoad(LD); |
2928 | 4.21M | if (ISD::isNON_EXTLoad(LD) && Cst3.39M ) { |
2929 | 179k | // Determine any common known bits from the loaded constant pool value. |
2930 | 179k | Type *CstTy = Cst->getType(); |
2931 | 179k | if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits()) { |
2932 | 179k | // If its a vector splat, then we can (quickly) reuse the scalar path. |
2933 | 179k | // NOTE: We assume all elements match and none are UNDEF. |
2934 | 179k | if (CstTy->isVectorTy()) { |
2935 | 179k | if (const Constant *Splat = Cst->getSplatValue()) { |
2936 | 91.2k | Cst = Splat; |
2937 | 91.2k | CstTy = Cst->getType(); |
2938 | 91.2k | } |
2939 | 179k | } |
2940 | 179k | // TODO - do we need to handle different bitwidths? |
2941 | 179k | if (CstTy->isVectorTy() && BitWidth == CstTy->getScalarSizeInBits()88.0k ) { |
2942 | 87.0k | // Iterate across all vector elements finding common known bits. |
2943 | 87.0k | Known.One.setAllBits(); |
2944 | 87.0k | Known.Zero.setAllBits(); |
2945 | 1.12M | for (unsigned i = 0; i != NumElts; ++i1.04M ) { |
2946 | 1.04M | if (!DemandedElts[i]) |
2947 | 372k | continue; |
2948 | 676k | if (Constant *Elt = Cst->getAggregateElement(i)) { |
2949 | 676k | if (auto *CInt = dyn_cast<ConstantInt>(Elt)) { |
2950 | 667k | const APInt &Value = CInt->getValue(); |
2951 | 667k | Known.One &= Value; |
2952 | 667k | Known.Zero &= ~Value; |
2953 | 667k | continue; |
2954 | 667k | } |
2955 | 9.19k | if (auto *CFP = dyn_cast<ConstantFP>(Elt)) { |
2956 | 844 | APInt Value = CFP->getValueAPF().bitcastToAPInt(); |
2957 | 844 | Known.One &= Value; |
2958 | 844 | Known.Zero &= ~Value; |
2959 | 844 | continue; |
2960 | 844 | } |
2961 | 8.34k | } |
2962 | 8.34k | Known.One.clearAllBits(); |
2963 | 8.34k | Known.Zero.clearAllBits(); |
2964 | 8.34k | break; |
2965 | 8.34k | } |
2966 | 92.4k | } else if (BitWidth == CstTy->getPrimitiveSizeInBits()) { |
2967 | 91.0k | if (auto *CInt = dyn_cast<ConstantInt>(Cst)) { |
2968 | 85.6k | const APInt &Value = CInt->getValue(); |
2969 | 85.6k | Known.One = Value; |
2970 | 85.6k | Known.Zero = ~Value; |
2971 | 85.6k | } else if (auto *5.38k CFP5.38k = dyn_cast<ConstantFP>(Cst)) { |
2972 | 5.38k | APInt Value = CFP->getValueAPF().bitcastToAPInt(); |
2973 | 5.38k | Known.One = Value; |
2974 | 5.38k | Known.Zero = ~Value; |
2975 | 5.38k | } |
2976 | 91.0k | } |
2977 | 179k | } |
2978 | 4.03M | } else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0336k ) { |
2979 | 333k | // If this is a ZEXTLoad and we are looking at the loaded value. |
2980 | 333k | EVT VT = LD->getMemoryVT(); |
2981 | 333k | unsigned MemBits = VT.getScalarSizeInBits(); |
2982 | 333k | Known.Zero.setBitsFrom(MemBits); |
2983 | 3.70M | } else if (const MDNode *Ranges = LD->getRanges()) { |
2984 | 36.2k | if (LD->getExtensionType() == ISD::NON_EXTLOAD) |
2985 | 10.4k | computeKnownBitsFromRangeMetadata(*Ranges, Known); |
2986 | 36.2k | } |
2987 | 4.21M | break; |
2988 | 46.3k | } |
2989 | 46.3k | case ISD::ZERO_EXTEND_VECTOR_INREG: { |
2990 | 22.7k | EVT InVT = Op.getOperand(0).getValueType(); |
2991 | 22.7k | APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements()); |
2992 | 22.7k | Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); |
2993 | 22.7k | Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */); |
2994 | 22.7k | break; |
2995 | 46.3k | } |
2996 | 499k | case ISD::ZERO_EXTEND: { |
2997 | 499k | Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
2998 | 499k | Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */); |
2999 | 499k | break; |
3000 | 46.3k | } |
3001 | 46.3k | case ISD::SIGN_EXTEND_VECTOR_INREG: { |
3002 | 2.49k | EVT InVT = Op.getOperand(0).getValueType(); |
3003 | 2.49k | APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements()); |
3004 | 2.49k | Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); |
3005 | 2.49k | // If the sign bit is known to be zero or one, then sext will extend |
3006 | 2.49k | // it to the top bits, else it will just zext. |
3007 | 2.49k | Known = Known.sext(BitWidth); |
3008 | 2.49k | break; |
3009 | 46.3k | } |
3010 | 312k | case ISD::SIGN_EXTEND: { |
3011 | 312k | Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
3012 | 312k | // If the sign bit is known to be zero or one, then sext will extend |
3013 | 312k | // it to the top bits, else it will just zext. |
3014 | 312k | Known = Known.sext(BitWidth); |
3015 | 312k | break; |
3016 | 46.3k | } |
3017 | 218k | case ISD::ANY_EXTEND: { |
3018 | 218k | Known = computeKnownBits(Op.getOperand(0), Depth+1); |
3019 | 218k | Known = Known.zext(BitWidth, false /* ExtendedBitsAreKnownZero */); |
3020 | 218k | break; |
3021 | 46.3k | } |
3022 | 1.06M | case ISD::TRUNCATE: { |
3023 | 1.06M | Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
3024 | 1.06M | Known = Known.trunc(BitWidth); |
3025 | 1.06M | break; |
3026 | 46.3k | } |
3027 | 880k | case ISD::AssertZext: { |
3028 | 880k | EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); |
3029 | 880k | APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); |
3030 | 880k | Known = computeKnownBits(Op.getOperand(0), Depth+1); |
3031 | 880k | Known.Zero |= (~InMask); |
3032 | 880k | Known.One &= (~Known.Zero); |
3033 | 880k | break; |
3034 | 46.3k | } |
3035 | 46.3k | case ISD::FGETSIGN: |
3036 | 34 | // All bits are zero except the low bit. |
3037 | 34 | Known.Zero.setBitsFrom(1); |
3038 | 34 | break; |
3039 | 46.3k | case ISD::USUBO: |
3040 | 20.7k | case ISD::SSUBO: |
3041 | 20.7k | if (Op.getResNo() == 1) { |
3042 | 11.6k | // If we know the result of a setcc has the top bits zero, use this info. |
3043 | 11.6k | if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == |
3044 | 11.6k | TargetLowering::ZeroOrOneBooleanContent && |
3045 | 11.6k | BitWidth > 14.96k ) |
3046 | 2.75k | Known.Zero.setBitsFrom(1); |
3047 | 11.6k | break; |
3048 | 11.6k | } |
3049 | 9.07k | LLVM_FALLTHROUGH; |
3050 | 519k | case ISD::SUB: |
3051 | 519k | case ISD::SUBC: { |
3052 | 519k | Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
3053 | 519k | Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
3054 | 519k | Known = KnownBits::computeForAddSub(/* Add */ false, /* NSW */ false, |
3055 | 519k | Known, Known2); |
3056 | 519k | break; |
3057 | 519k | } |
3058 | 519k | case ISD::UADDO: |
3059 | 48.1k | case ISD::SADDO: |
3060 | 48.1k | case ISD::ADDCARRY: |
3061 | 48.1k | if (Op.getResNo() == 1) { |
3062 | 16.1k | // If we know the result of a setcc has the top bits zero, use this info. |
3063 | 16.1k | if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == |
3064 | 16.1k | TargetLowering::ZeroOrOneBooleanContent && |
3065 | 16.1k | BitWidth > 113.7k ) |
3066 | 10.7k | Known.Zero.setBitsFrom(1); |
3067 | 16.1k | break; |
3068 | 16.1k | } |
3069 | 32.0k | LLVM_FALLTHROUGH; |
3070 | 6.63M | case ISD::ADD: |
3071 | 6.63M | case ISD::ADDC: |
3072 | 6.63M | case ISD::ADDE: { |
3073 | 6.63M | assert(Op.getResNo() == 0 && "We only compute knownbits for the sum here."); |
3074 | 6.63M | |
3075 | 6.63M | // With ADDE and ADDCARRY, a carry bit may be added in. |
3076 | 6.63M | KnownBits Carry(1); |
3077 | 6.63M | if (Opcode == ISD::ADDE) |
3078 | 3.78k | // Can't track carry from glue, set carry to unknown. |
3079 | 3.78k | Carry.resetAll(); |
3080 | 6.62M | else if (Opcode == ISD::ADDCARRY) |
3081 | 17.0k | // TODO: Compute known bits for the carry operand. Not sure if it is worth |
3082 | 17.0k | // the trouble (how often will we find a known carry bit). And I haven't |
3083 | 17.0k | // tested this very much yet, but something like this might work: |
3084 | 17.0k | // Carry = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); |
3085 | 17.0k | // Carry = Carry.zextOrTrunc(1, false); |
3086 | 17.0k | Carry.resetAll(); |
3087 | 6.61M | else |
3088 | 6.61M | Carry.setAllZero(); |
3089 | 6.63M | |
3090 | 6.63M | Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
3091 | 6.63M | Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
3092 | 6.63M | Known = KnownBits::computeForAddCarry(Known, Known2, Carry); |
3093 | 6.63M | break; |
3094 | 6.63M | } |
3095 | 6.63M | case ISD::SREM: |
3096 | 2.16k | if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) { |
3097 | 929 | const APInt &RA = Rem->getAPIntValue().abs(); |
3098 | 929 | if (RA.isPowerOf2()) { |
3099 | 52 | APInt LowBits = RA - 1; |
3100 | 52 | Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
3101 | 52 | |
3102 | 52 | // The low bits of the first operand are unchanged by the srem. |
3103 | 52 | Known.Zero = Known2.Zero & LowBits; |
3104 | 52 | Known.One = Known2.One & LowBits; |
3105 | 52 | |
3106 | 52 | // If the first operand is non-negative or has all low bits zero, then |
3107 | 52 | // the upper bits are all zero. |
3108 | 52 | if (Known2.Zero[BitWidth-1] || ((Known2.Zero & LowBits) == LowBits)) |
3109 | 8 | Known.Zero |= ~LowBits; |
3110 | 52 | |
3111 | 52 | // If the first operand is negative and not all low bits are zero, then |
3112 | 52 | // the upper bits are all one. |
3113 | 52 | if (Known2.One[BitWidth-1] && ((Known2.One & LowBits) != 0)3 ) |
3114 | 3 | Known.One |= ~LowBits; |
3115 | 52 | assert((Known.Zero & Known.One) == 0&&"Bits known to be one AND zero?"); |
3116 | 52 | } |
3117 | 929 | } |
3118 | 2.16k | break; |
3119 | 6.63M | case ISD::UREM: { |
3120 | 5.60k | if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) { |
3121 | 3.97k | const APInt &RA = Rem->getAPIntValue(); |
3122 | 3.97k | if (RA.isPowerOf2()) { |
3123 | 210 | APInt LowBits = (RA - 1); |
3124 | 210 | Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
3125 | 210 | |
3126 | 210 | // The upper bits are all zero, the lower ones are unchanged. |
3127 | 210 | Known.Zero = Known2.Zero | ~LowBits; |
3128 | 210 | Known.One = Known2.One & LowBits; |
3129 | 210 | break; |
3130 | 210 | } |
3131 | 5.39k | } |
3132 | 5.39k | |
3133 | 5.39k | // Since the result is less than or equal to either operand, any leading |
3134 | 5.39k | // zero bits in either operand must also exist in the result. |
3135 | 5.39k | Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
3136 | 5.39k | Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
3137 | 5.39k | |
3138 | 5.39k | uint32_t Leaders = |
3139 | 5.39k | std::max(Known.countMinLeadingZeros(), Known2.countMinLeadingZeros()); |
3140 | 5.39k | Known.resetAll(); |
3141 | 5.39k | Known.Zero.setHighBits(Leaders); |
3142 | 5.39k | break; |
3143 | 5.39k | } |
3144 | 5.39k | case ISD::EXTRACT_ELEMENT: { |
3145 | 144 | Known = computeKnownBits(Op.getOperand(0), Depth+1); |
3146 | 144 | const unsigned Index = Op.getConstantOperandVal(1); |
3147 | 144 | const unsigned EltBitWidth = Op.getValueSizeInBits(); |
3148 | 144 | |
3149 | 144 | // Remove low part of known bits mask |
3150 | 144 | Known.Zero = Known.Zero.getHiBits(Known.getBitWidth() - Index * EltBitWidth); |
3151 | 144 | Known.One = Known.One.getHiBits(Known.getBitWidth() - Index * EltBitWidth); |
3152 | 144 | |
3153 | 144 | // Remove high part of known bit mask |
3154 | 144 | Known = Known.trunc(EltBitWidth); |
3155 | 144 | break; |
3156 | 5.39k | } |
3157 | 954k | case ISD::EXTRACT_VECTOR_ELT: { |
3158 | 954k | SDValue InVec = Op.getOperand(0); |
3159 | 954k | SDValue EltNo = Op.getOperand(1); |
3160 | 954k | EVT VecVT = InVec.getValueType(); |
3161 | 954k | const unsigned EltBitWidth = VecVT.getScalarSizeInBits(); |
3162 | 954k | const unsigned NumSrcElts = VecVT.getVectorNumElements(); |
3163 | 954k | // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know |
3164 | 954k | // anything about the extended bits. |
3165 | 954k | if (BitWidth > EltBitWidth) |
3166 | 253k | Known = Known.trunc(EltBitWidth); |
3167 | 954k | ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); |
3168 | 954k | if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)953k ) { |
3169 | 953k | // If we know the element index, just demand that vector element. |
3170 | 953k | unsigned Idx = ConstEltNo->getZExtValue(); |
3171 | 953k | APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx); |
3172 | 953k | Known = computeKnownBits(InVec, DemandedElt, Depth + 1); |
3173 | 953k | } else { |
3174 | 615 | // Unknown element index, so ignore DemandedElts and demand them all. |
3175 | 615 | Known = computeKnownBits(InVec, Depth + 1); |
3176 | 615 | } |
3177 | 954k | if (BitWidth > EltBitWidth) |
3178 | 253k | Known = Known.zext(BitWidth, false /* => any extend */); |
3179 | 954k | break; |
3180 | 5.39k | } |
3181 | 15.9k | case ISD::INSERT_VECTOR_ELT: { |
3182 | 15.9k | SDValue InVec = Op.getOperand(0); |
3183 | 15.9k | SDValue InVal = Op.getOperand(1); |
3184 | 15.9k | SDValue EltNo = Op.getOperand(2); |
3185 | 15.9k | |
3186 | 15.9k | ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo); |
3187 | 15.9k | if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)15.9k ) { |
3188 | 15.9k | // If we know the element index, split the demand between the |
3189 | 15.9k | // source vector and the inserted element. |
3190 | 15.9k | Known.Zero = Known.One = APInt::getAllOnesValue(BitWidth); |
3191 | 15.9k | unsigned EltIdx = CEltNo->getZExtValue(); |
3192 | 15.9k | |
3193 | 15.9k | // If we demand the inserted element then add its common known bits. |
3194 | 15.9k | if (DemandedElts[EltIdx]) { |
3195 | 13.9k | Known2 = computeKnownBits(InVal, Depth + 1); |
3196 | 13.9k | Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth()); |
3197 | 13.9k | Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth()); |
3198 | 13.9k | } |
3199 | 15.9k | |
3200 | 15.9k | // If we demand the source vector then add its common known bits, ensuring |
3201 | 15.9k | // that we don't demand the inserted element. |
3202 | 15.9k | APInt VectorElts = DemandedElts & ~(APInt::getOneBitSet(NumElts, EltIdx)); |
3203 | 15.9k | if (!!VectorElts) { |
3204 | 11.0k | Known2 = computeKnownBits(InVec, VectorElts, Depth + 1); |
3205 | 11.0k | Known.One &= Known2.One; |
3206 | 11.0k | Known.Zero &= Known2.Zero; |
3207 | 11.0k | } |
3208 | 15.9k | } else { |
3209 | 6 | // Unknown element index, so ignore DemandedElts and demand them all. |
3210 | 6 | Known = computeKnownBits(InVec, Depth + 1); |
3211 | 6 | Known2 = computeKnownBits(InVal, Depth + 1); |
3212 | 6 | Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth()); |
3213 | 6 | Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth()); |
3214 | 6 | } |
3215 | 15.9k | break; |
3216 | 5.39k | } |
3217 | 5.39k | case ISD::BITREVERSE: { |
3218 | 936 | Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
3219 | 936 | Known.Zero = Known2.Zero.reverseBits(); |
3220 | 936 | Known.One = Known2.One.reverseBits(); |
3221 | 936 | break; |
3222 | 5.39k | } |
3223 | 6.98k | case ISD::BSWAP: { |
3224 | 6.98k | Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
3225 | 6.98k | Known.Zero = Known2.Zero.byteSwap(); |
3226 | 6.98k | Known.One = Known2.One.byteSwap(); |
3227 | 6.98k | break; |
3228 | 5.39k | } |
3229 | 6.11k | case ISD::ABS: { |
3230 | 6.11k | Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
3231 | 6.11k | |
3232 | 6.11k | // If the source's MSB is zero then we know the rest of the bits already. |
3233 | 6.11k | if (Known2.isNonNegative()) { |
3234 | 0 | Known.Zero = Known2.Zero; |
3235 | 0 | Known.One = Known2.One; |
3236 | 0 | break; |
3237 | 0 | } |
3238 | 6.11k | |
3239 | 6.11k | // We only know that the absolute values's MSB will be zero iff there is |
3240 | 6.11k | // a set bit that isn't the sign bit (otherwise it could be INT_MIN). |
3241 | 6.11k | Known2.One.clearSignBit(); |
3242 | 6.11k | if (Known2.One.getBoolValue()) { |
3243 | 2 | Known.Zero = APInt::getSignMask(BitWidth); |
3244 | 2 | break; |
3245 | 2 | } |
3246 | 6.10k | break; |
3247 | 6.10k | } |
3248 | 35.6k | case ISD::UMIN: { |
3249 | 35.6k | Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
3250 | 35.6k | Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
3251 | 35.6k | |
3252 | 35.6k | // UMIN - we know that the result will have the maximum of the |
3253 | 35.6k | // known zero leading bits of the inputs. |
3254 | 35.6k | unsigned LeadZero = Known.countMinLeadingZeros(); |
3255 | 35.6k | LeadZero = std::max(LeadZero, Known2.countMinLeadingZeros()); |
3256 | 35.6k | |
3257 | 35.6k | Known.Zero &= Known2.Zero; |
3258 | 35.6k | Known.One &= Known2.One; |
3259 | 35.6k | Known.Zero.setHighBits(LeadZero); |
3260 | 35.6k | break; |
3261 | 6.10k | } |
3262 | 26.5k | case ISD::UMAX: { |
3263 | 26.5k | Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
3264 | 26.5k | Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
3265 | 26.5k | |
3266 | 26.5k | // UMAX - we know that the result will have the maximum of the |
3267 | 26.5k | // known one leading bits of the inputs. |
3268 | 26.5k | unsigned LeadOne = Known.countMinLeadingOnes(); |
3269 | 26.5k | LeadOne = std::max(LeadOne, Known2.countMinLeadingOnes()); |
3270 | 26.5k | |
3271 | 26.5k | Known.Zero &= Known2.Zero; |
3272 | 26.5k | Known.One &= Known2.One; |
3273 | 26.5k | Known.One.setHighBits(LeadOne); |
3274 | 26.5k | break; |
3275 | 6.10k | } |
3276 | 80.2k | case ISD::SMIN: |
3277 | 80.2k | case ISD::SMAX: { |
3278 | 80.2k | // If we have a clamp pattern, we know that the number of sign bits will be |
3279 | 80.2k | // the minimum of the clamp min/max range. |
3280 | 80.2k | bool IsMax = (Opcode == ISD::SMAX); |
3281 | 80.2k | ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; |
3282 | 80.2k | if ((CstLow = isConstOrConstSplat(Op.getOperand(1), DemandedElts))) |
3283 | 13.1k | if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN9.78k : ISD::SMAX3.39k )) |
3284 | 8.12k | CstHigh = |
3285 | 8.12k | isConstOrConstSplat(Op.getOperand(0).getOperand(1), DemandedElts); |
3286 | 80.2k | if (CstLow && CstHigh13.1k ) { |
3287 | 8.10k | if (!IsMax) |
3288 | 370 | std::swap(CstLow, CstHigh); |
3289 | 8.10k | |
3290 | 8.10k | const APInt &ValueLow = CstLow->getAPIntValue(); |
3291 | 8.10k | const APInt &ValueHigh = CstHigh->getAPIntValue(); |
3292 | 8.10k | if (ValueLow.sle(ValueHigh)) { |
3293 | 8.10k | unsigned LowSignBits = ValueLow.getNumSignBits(); |
3294 | 8.10k | unsigned HighSignBits = ValueHigh.getNumSignBits(); |
3295 | 8.10k | unsigned MinSignBits = std::min(LowSignBits, HighSignBits); |
3296 | 8.10k | if (ValueLow.isNegative() && ValueHigh.isNegative()2.65k ) { |
3297 | 2 | Known.One.setHighBits(MinSignBits); |
3298 | 2 | break; |
3299 | 2 | } |
3300 | 8.10k | if (ValueLow.isNonNegative() && ValueHigh.isNonNegative()5.44k ) { |
3301 | 5.44k | Known.Zero.setHighBits(MinSignBits); |
3302 | 5.44k | break; |
3303 | 5.44k | } |
3304 | 74.7k | } |
3305 | 8.10k | } |
3306 | 74.7k | |
3307 | 74.7k | // Fallback - just get the shared known bits of the operands. |
3308 | 74.7k | Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
3309 | 74.7k | if (Known.isUnknown()) break74.6k ; // Early-out |
3310 | 106 | Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
3311 | 106 | Known.Zero &= Known2.Zero; |
3312 | 106 | Known.One &= Known2.One; |
3313 | 106 | break; |
3314 | 106 | } |
3315 | 2.70M | case ISD::FrameIndex: |
3316 | 2.70M | case ISD::TargetFrameIndex: |
3317 | 2.70M | TLI->computeKnownBitsForFrameIndex(Op, Known, DemandedElts, *this, Depth); |
3318 | 2.70M | break; |
3319 | 2.70M | |
3320 | 17.9M | default: |
3321 | 17.9M | if (Opcode < ISD::BUILTIN_OP_END) |
3322 | 15.6M | break; |
3323 | 2.29M | LLVM_FALLTHROUGH; |
3324 | 2.75M | case ISD::INTRINSIC_WO_CHAIN: |
3325 | 2.75M | case ISD::INTRINSIC_W_CHAIN: |
3326 | 2.75M | case ISD::INTRINSIC_VOID: |
3327 | 2.75M | // Allow the target to implement this method for its nodes. |
3328 | 2.75M | TLI->computeKnownBitsForTargetNode(Op, Known, DemandedElts, *this, Depth); |
3329 | 2.75M | break; |
3330 | 49.3M | } |
3331 | 49.3M | |
3332 | 49.3M | assert(!Known.hasConflict() && "Bits known to be one AND zero?"); |
3333 | 49.3M | return Known; |
3334 | 49.3M | } |
3335 | | |
3336 | | SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0, |
3337 | 14.7k | SDValue N1) const { |
3338 | 14.7k | // X + 0 never overflow |
3339 | 14.7k | if (isNullConstant(N1)) |
3340 | 0 | return OFK_Never; |
3341 | 14.7k | |
3342 | 14.7k | KnownBits N1Known = computeKnownBits(N1); |
3343 | 14.7k | if (N1Known.Zero.getBoolValue()) { |
3344 | 7.41k | KnownBits N0Known = computeKnownBits(N0); |
3345 | 7.41k | |
3346 | 7.41k | bool overflow; |
3347 | 7.41k | (void)(~N0Known.Zero).uadd_ov(~N1Known.Zero, overflow); |
3348 | 7.41k | if (!overflow) |
3349 | 273 | return OFK_Never; |
3350 | 14.4k | } |
3351 | 14.4k | |
3352 | 14.4k | // mulhi + 1 never overflow |
3353 | 14.4k | if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 12.66k && |
3354 | 14.4k | (~N1Known.Zero & 0x01) == ~N1Known.Zero410 ) |
3355 | 237 | return OFK_Never; |
3356 | 14.2k | |
3357 | 14.2k | if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 11.46k ) { |
3358 | 339 | KnownBits N0Known = computeKnownBits(N0); |
3359 | 339 | |
3360 | 339 | if ((~N0Known.Zero & 0x01) == ~N0Known.Zero) |
3361 | 0 | return OFK_Never; |
3362 | 14.2k | } |
3363 | 14.2k | |
3364 | 14.2k | return OFK_Sometime; |
3365 | 14.2k | } |
3366 | | |
3367 | 72.5k | bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { |
3368 | 72.5k | EVT OpVT = Val.getValueType(); |
3369 | 72.5k | unsigned BitWidth = OpVT.getScalarSizeInBits(); |
3370 | 72.5k | |
3371 | 72.5k | // Is the constant a known power of 2? |
3372 | 72.5k | if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val)) |
3373 | 62.7k | return Const->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2(); |
3374 | 9.78k | |
3375 | 9.78k | // A left-shift of a constant one will have exactly one bit set because |
3376 | 9.78k | // shifting the bit off the end is undefined. |
3377 | 9.78k | if (Val.getOpcode() == ISD::SHL) { |
3378 | 45 | auto *C = isConstOrConstSplat(Val.getOperand(0)); |
3379 | 45 | if (C && C->getAPIntValue() == 142 ) |
3380 | 39 | return true; |
3381 | 9.74k | } |
3382 | 9.74k | |
3383 | 9.74k | // Similarly, a logical right-shift of a constant sign-bit will have exactly |
3384 | 9.74k | // one bit set. |
3385 | 9.74k | if (Val.getOpcode() == ISD::SRL) { |
3386 | 23 | auto *C = isConstOrConstSplat(Val.getOperand(0)); |
3387 | 23 | if (C && C->getAPIntValue().isSignMask()5 ) |
3388 | 5 | return true; |
3389 | 9.73k | } |
3390 | 9.73k | |
3391 | 9.73k | // Are all operands of a build vector constant powers of two? |
3392 | 9.73k | if (Val.getOpcode() == ISD::BUILD_VECTOR) |
3393 | 30.6k | if (7.32k llvm::all_of(Val->ops(), [BitWidth](SDValue E) 7.32k { |
3394 | 30.6k | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(E)) |
3395 | 30.4k | return C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2(); |
3396 | 207 | return false; |
3397 | 207 | })) |
3398 | 1.81k | return true; |
3399 | 7.92k | |
3400 | 7.92k | // More could be done here, though the above checks are enough |
3401 | 7.92k | // to handle some common cases. |
3402 | 7.92k | |
3403 | 7.92k | // Fall back to computeKnownBits to catch other known cases. |
3404 | 7.92k | KnownBits Known = computeKnownBits(Val); |
3405 | 7.92k | return (Known.countMaxPopulation() == 1) && (Known.countMinPopulation() == 1)311 ; |
3406 | 7.92k | } |
3407 | | |
3408 | 1.62M | unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { |
3409 | 1.62M | EVT VT = Op.getValueType(); |
3410 | 1.62M | APInt DemandedElts = VT.isVector() |
3411 | 1.62M | ? APInt::getAllOnesValue(VT.getVectorNumElements())147k |
3412 | 1.62M | : APInt(1, 1)1.47M ; |
3413 | 1.62M | return ComputeNumSignBits(Op, DemandedElts, Depth); |
3414 | 1.62M | } |
3415 | | |
3416 | | unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, |
3417 | 1.96M | unsigned Depth) const { |
3418 | 1.96M | EVT VT = Op.getValueType(); |
3419 | 1.96M | assert((VT.isInteger() || VT.isFloatingPoint()) && "Invalid VT!"); |
3420 | 1.96M | unsigned VTBits = VT.getScalarSizeInBits(); |
3421 | 1.96M | unsigned NumElts = DemandedElts.getBitWidth(); |
3422 | 1.96M | unsigned Tmp, Tmp2; |
3423 | 1.96M | unsigned FirstAnswer = 1; |
3424 | 1.96M | |
3425 | 1.96M | if (auto *C = dyn_cast<ConstantSDNode>(Op)) { |
3426 | 303k | const APInt &Val = C->getAPIntValue(); |
3427 | 303k | return Val.getNumSignBits(); |
3428 | 303k | } |
3429 | 1.65M | |
3430 | 1.65M | if (Depth == 6) |
3431 | 1.91k | return 1; // Limit search depth. |
3432 | 1.65M | |
3433 | 1.65M | if (!DemandedElts) |
3434 | 0 | return 1; // No demanded elts, better to assume we don't know anything. |
3435 | 1.65M | |
3436 | 1.65M | unsigned Opcode = Op.getOpcode(); |
3437 | 1.65M | switch (Opcode) { |
3438 | 1.65M | default: break938k ; |
3439 | 1.65M | case ISD::AssertSext: |
3440 | 22.8k | Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); |
3441 | 22.8k | return VTBits-Tmp+1; |
3442 | 1.65M | case ISD::AssertZext: |
3443 | 30.4k | Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); |
3444 | 30.4k | return VTBits-Tmp; |
3445 | 1.65M | |
3446 | 1.65M | case ISD::BUILD_VECTOR: |
3447 | 15.5k | Tmp = VTBits; |
3448 | 76.3k | for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1)68.2k ; ++i60.7k ) { |
3449 | 60.7k | if (!DemandedElts[i]) |
3450 | 15.2k | continue; |
3451 | 45.4k | |
3452 | 45.4k | SDValue SrcOp = Op.getOperand(i); |
3453 | 45.4k | Tmp2 = ComputeNumSignBits(Op.getOperand(i), Depth + 1); |
3454 | 45.4k | |
3455 | 45.4k | // BUILD_VECTOR can implicitly truncate sources, we must handle this. |
3456 | 45.4k | if (SrcOp.getValueSizeInBits() != VTBits) { |
3457 | 1.83k | assert(SrcOp.getValueSizeInBits() > VTBits && |
3458 | 1.83k | "Expected BUILD_VECTOR implicit truncation"); |
3459 | 1.83k | unsigned ExtraBits = SrcOp.getValueSizeInBits() - VTBits; |
3460 | 1.83k | Tmp2 = (Tmp2 > ExtraBits ? Tmp2 - ExtraBits1.79k : 143 ); |
3461 | 1.83k | } |
3462 | 45.4k | Tmp = std::min(Tmp, Tmp2); |
3463 | 45.4k | } |
3464 | 15.5k | return Tmp; |
3465 | 1.65M | |
3466 | 1.65M | case ISD::VECTOR_SHUFFLE: { |
3467 | 5.81k | // Collect the minimum number of sign bits that are shared by every vector |
3468 | 5.81k | // element referenced by the shuffle. |
3469 | 5.81k | APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0); |
3470 | 5.81k | const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); |
3471 | 5.81k | assert(NumElts == SVN->getMask().size() && "Unexpected vector size"); |
3472 | 36.7k | for (unsigned i = 0; i != NumElts; ++i30.8k ) { |
3473 | 31.3k | int M = SVN->getMaskElt(i); |
3474 | 31.3k | if (!DemandedElts[i]) |
3475 | 4.85k | continue; |
3476 | 26.4k | // For UNDEF elements, we don't know anything about the common state of |
3477 | 26.4k | // the shuffle result. |
3478 | 26.4k | if (M < 0) |
3479 | 455 | return 1; |
3480 | 26.0k | if ((unsigned)M < NumElts) |
3481 | 23.9k | DemandedLHS.setBit((unsigned)M % NumElts); |
3482 | 2.12k | else |
3483 | 2.12k | DemandedRHS.setBit((unsigned)M % NumElts); |
3484 | 26.0k | } |
3485 | 5.81k | Tmp = std::numeric_limits<unsigned>::max(); |
3486 | 5.36k | if (!!DemandedLHS) |
3487 | 5.35k | Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1); |
3488 | 5.36k | if (!!DemandedRHS) { |
3489 | 699 | Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedRHS, Depth + 1); |
3490 | 699 | Tmp = std::min(Tmp, Tmp2); |
3491 | 699 | } |
3492 | 5.36k | // If we don't know anything, early out and try computeKnownBits fall-back. |
3493 | 5.36k | if (Tmp == 1) |
3494 | 1.41k | break; |
3495 | 3.94k | assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); |
3496 | 3.94k | return Tmp; |
3497 | 3.94k | } |
3498 | 3.94k | |
3499 | 37.2k | case ISD::BITCAST: { |
3500 | 37.2k | SDValue N0 = Op.getOperand(0); |
3501 | 37.2k | EVT SrcVT = N0.getValueType(); |
3502 | 37.2k | unsigned SrcBits = SrcVT.getScalarSizeInBits(); |
3503 | 37.2k | |
3504 | 37.2k | // Ignore bitcasts from unsupported types.. |
3505 | 37.2k | if (!(SrcVT.isInteger() || SrcVT.isFloatingPoint()5.62k )) |
3506 | 9 | break; |
3507 | 37.2k | |
3508 | 37.2k | // Fast handling of 'identity' bitcasts. |
3509 | 37.2k | if (VTBits == SrcBits) |
3510 | 4.46k | return ComputeNumSignBits(N0, DemandedElts, Depth + 1); |
3511 | 32.7k | |
3512 | 32.7k | bool IsLE = getDataLayout().isLittleEndian(); |
3513 | 32.7k | |
3514 | 32.7k | // Bitcast 'large element' scalar/vector to 'small element' vector. |
3515 | 32.7k | if ((SrcBits % VTBits) == 0) { |
3516 | 15.2k | assert(VT.isVector() && "Expected bitcast to vector"); |
3517 | 15.2k | |
3518 | 15.2k | unsigned Scale = SrcBits / VTBits; |
3519 | 15.2k | APInt SrcDemandedElts(NumElts / Scale, 0); |
3520 | 137k | for (unsigned i = 0; i != NumElts; ++i122k ) |
3521 | 122k | if (DemandedElts[i]) |
3522 | 108k | SrcDemandedElts.setBit(i / Scale); |
3523 | 15.2k | |
3524 | 15.2k | // Fast case - sign splat can be simply split across the small elements. |
3525 | 15.2k | Tmp = ComputeNumSignBits(N0, SrcDemandedElts, Depth + 1); |
3526 | 15.2k | if (Tmp == SrcBits) |
3527 | 6.68k | return VTBits; |
3528 | 8.57k | |
3529 | 8.57k | // Slow case - determine how far the sign extends into each sub-element. |
3530 | 8.57k | Tmp2 = VTBits; |
3531 | 18.7k | for (unsigned i = 0; i != NumElts; ++i10.1k ) |
3532 | 15.4k | if (DemandedElts[i]) { |
3533 | 11.7k | unsigned SubOffset = i % Scale; |
3534 | 11.7k | SubOffset = (IsLE ? ((Scale - 1) - SubOffset)11.5k : SubOffset160 ); |
3535 | 11.7k | SubOffset = SubOffset * VTBits; |
3536 | 11.7k | if (Tmp <= SubOffset) |
3537 | 5.27k | return 1; |
3538 | 6.47k | Tmp2 = std::min(Tmp2, Tmp - SubOffset); |
3539 | 6.47k | } |
3540 | 8.57k | return Tmp23.30k ; |
3541 | 17.4k | } |
3542 | 17.4k | break; |
3543 | 17.4k | } |
3544 | 17.4k | |
3545 | 17.4k | case ISD::SIGN_EXTEND: |
3546 | 16.1k | Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits(); |
3547 | 16.1k | return ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1) + Tmp; |
3548 | 17.4k | case ISD::SIGN_EXTEND_INREG: |
3549 | 6.65k | // Max of the input and what this extends. |
3550 | 6.65k | Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarSizeInBits(); |
3551 | 6.65k | Tmp = VTBits-Tmp+1; |
3552 | 6.65k | Tmp2 = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); |
3553 | 6.65k | return std::max(Tmp, Tmp2); |
3554 | 17.4k | case ISD::SIGN_EXTEND_VECTOR_INREG: { |
3555 | 160 | SDValue Src = Op.getOperand(0); |
3556 | 160 | EVT SrcVT = Src.getValueType(); |
3557 | 160 | APInt DemandedSrcElts = DemandedElts.zextOrSelf(SrcVT.getVectorNumElements()); |
3558 | 160 | Tmp = VTBits - SrcVT.getScalarSizeInBits(); |
3559 | 160 | return ComputeNumSignBits(Src, DemandedSrcElts, Depth+1) + Tmp; |
3560 | 17.4k | } |
3561 | 17.4k | |
3562 | 17.4k | case ISD::SRA: |
3563 | 6.92k | Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); |
3564 | 6.92k | // SRA X, C -> adds C sign bits. |
3565 | 6.92k | if (ConstantSDNode *C = |
3566 | 6.17k | isConstOrConstSplat(Op.getOperand(1), DemandedElts)) { |
3567 | 6.17k | APInt ShiftVal = C->getAPIntValue(); |
3568 | 6.17k | ShiftVal += Tmp; |
3569 | 6.17k | Tmp = ShiftVal.uge(VTBits) ? VTBits981 : ShiftVal.getZExtValue()5.19k ; |
3570 | 6.17k | } |
3571 | 6.92k | return Tmp; |
3572 | 38.0k | case ISD::SHL: |
3573 | 38.0k | if (ConstantSDNode *C = |
3574 | 32.9k | isConstOrConstSplat(Op.getOperand(1), DemandedElts)) { |
3575 | 32.9k | // shl destroys sign bits. |
3576 | 32.9k | Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); |
3577 | 32.9k | if (C->getAPIntValue().uge(VTBits) || // Bad shift. |
3578 | 32.9k | C->getAPIntValue().uge(Tmp)) break18.8k ; // Shifted all sign bits out. |
3579 | 14.0k | return Tmp - C->getZExtValue(); |
3580 | 14.0k | } |
3581 | 5.10k | break; |
3582 | 78.1k | case ISD::AND: |
3583 | 78.1k | case ISD::OR: |
3584 | 78.1k | case ISD::XOR: // NOT is handled here. |
3585 | 78.1k | // Logical binary ops preserve the number of sign bits at the worst. |
3586 | 78.1k | Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); |
3587 | 78.1k | if (Tmp != 1) { |
3588 | 34.5k | Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth+1); |
3589 | 34.5k | FirstAnswer = std::min(Tmp, Tmp2); |
3590 | 34.5k | // We computed what we know about the sign bits as our first |
3591 | 34.5k | // answer. Now proceed to the generic code that uses |
3592 | 34.5k | // computeKnownBits, and pick whichever answer is better. |
3593 | 34.5k | } |
3594 | 78.1k | break; |
3595 | 78.1k | |
3596 | 78.1k | case ISD::SELECT: |
3597 | 2.31k | case ISD::VSELECT: |
3598 | 2.31k | Tmp = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth+1); |
3599 | 2.31k | if (Tmp == 1) return 11.90k ; // Early out. |
3600 | 407 | Tmp2 = ComputeNumSignBits(Op.getOperand(2), DemandedElts, Depth+1); |
3601 | 407 | return std::min(Tmp, Tmp2); |
3602 | 407 | case ISD::SELECT_CC: |
3603 | 254 | Tmp = ComputeNumSignBits(Op.getOperand(2), DemandedElts, Depth+1); |
3604 | 254 | if (Tmp == 1) return 1142 ; // Early out. |
3605 | 112 | Tmp2 = ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth+1); |
3606 | 112 | return std::min(Tmp, Tmp2); |
3607 | 112 | |
3608 | 1.62k | case ISD::SMIN: |
3609 | 1.62k | case ISD::SMAX: { |
3610 | 1.62k | // If we have a clamp pattern, we know that the number of sign bits will be |
3611 | 1.62k | // the minimum of the clamp min/max range. |
3612 | 1.62k | bool IsMax = (Opcode == ISD::SMAX); |
3613 | 1.62k | ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; |
3614 | 1.62k | if ((CstLow = isConstOrConstSplat(Op.getOperand(1), DemandedElts))) |
3615 | 1.17k | if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN1.15k : ISD::SMAX16 )) |
3616 | 1.14k | CstHigh = |
3617 | 1.14k | isConstOrConstSplat(Op.getOperand(0).getOperand(1), DemandedElts); |
3618 | 1.62k | if (CstLow && CstHigh1.17k ) { |
3619 | 1.14k | if (!IsMax) |
3620 | 4 | std::swap(CstLow, CstHigh); |
3621 | 1.14k | if (CstLow->getAPIntValue().sle(CstHigh->getAPIntValue())) { |
3622 | 1.14k | Tmp = CstLow->getAPIntValue().getNumSignBits(); |
3623 | 1.14k | Tmp2 = CstHigh->getAPIntValue().getNumSignBits(); |
3624 | 1.14k | return std::min(Tmp, Tmp2); |
3625 | 1.14k | } |
3626 | 484 | } |
3627 | 484 | |
3628 | 484 | // Fallback - just get the minimum number of sign bits of the operands. |
3629 | 484 | Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); |
3630 | 484 | if (Tmp == 1) |
3631 | 298 | return 1; // Early out. |
3632 | 186 | Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); |
3633 | 186 | return std::min(Tmp, Tmp2); |
3634 | 186 | } |
3635 | 383 | case ISD::UMIN: |
3636 | 383 | case ISD::UMAX: |
3637 | 383 | Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); |
3638 | 383 | if (Tmp == 1) |
3639 | 366 | return 1; // Early out. |
3640 | 17 | Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); |
3641 | 17 | return std::min(Tmp, Tmp2); |
3642 | 2.15k | case ISD::SADDO: |
3643 | 2.15k | case ISD::UADDO: |
3644 | 2.15k | case ISD::SSUBO: |
3645 | 2.15k | case ISD::USUBO: |
3646 | 2.15k | case ISD::SMULO: |
3647 | 2.15k | case ISD::UMULO: |
3648 | 2.15k | if (Op.getResNo() != 1) |
3649 | 93 | break; |
3650 | 2.05k | // The boolean result conforms to getBooleanContents. Fall through. |
3651 | 2.05k | // If setcc returns 0/-1, all bits are sign bits. |
3652 | 2.05k | // We know that we have an integer-based boolean since these operations |
3653 | 2.05k | // are only available for integer. |
3654 | 2.05k | if (TLI->getBooleanContents(VT.isVector(), false) == |
3655 | 2.05k | TargetLowering::ZeroOrNegativeOneBooleanContent) |
3656 | 692 | return VTBits; |
3657 | 1.36k | break; |
3658 | 41.4k | case ISD::SETCC: |
3659 | 41.4k | // If setcc returns 0/-1, all bits are sign bits. |
3660 | 41.4k | if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == |
3661 | 41.4k | TargetLowering::ZeroOrNegativeOneBooleanContent) |
3662 | 39.7k | return VTBits; |
3663 | 1.72k | break; |
3664 | 1.72k | case ISD::ROTL: |
3665 | 215 | case ISD::ROTR: |
3666 | 215 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { |
3667 | 123 | unsigned RotAmt = C->getAPIntValue().urem(VTBits); |
3668 | 123 | |
3669 | 123 | // Handle rotate right by N like a rotate left by 32-N. |
3670 | 123 | if (Opcode == ISD::ROTR) |
3671 | 39 | RotAmt = (VTBits - RotAmt) % VTBits; |
3672 | 123 | |
3673 | 123 | // If we aren't rotating out all of the known-in sign bits, return the |
3674 | 123 | // number that are left. This handles rotl(sext(x), 1) for example. |
3675 | 123 | Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); |
3676 | 123 | if (Tmp > (RotAmt + 1)) return (Tmp - RotAmt)0 ; |
3677 | 215 | } |
3678 | 215 | break; |
3679 | 231k | case ISD::ADD: |
3680 | 231k | case ISD::ADDC: |
3681 | 231k | // Add can have at most one carry bit. Thus we know that the output |
3682 | 231k | // is, at worst, one more bit than the inputs. |
3683 | 231k | Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); |
3684 | 231k | if (Tmp == 1) return 1209k ; // Early out. |
3685 | 21.7k | |
3686 | 21.7k | // Special case decrementing a value (ADD X, -1): |
3687 | 21.7k | if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1))) |
3688 | 12.0k | if (CRHS->isAllOnesValue()) { |
3689 | 946 | KnownBits Known = computeKnownBits(Op.getOperand(0), Depth+1); |
3690 | 946 | |
3691 | 946 | // If the input is known to be 0 or 1, the output is 0/-1, which is all |
3692 | 946 | // sign bits set. |
3693 | 946 | if ((Known.Zero | 1).isAllOnesValue()) |
3694 | 16 | return VTBits; |
3695 | 930 | |
3696 | 930 | // If we are subtracting one from a positive number, there is no carry |
3697 | 930 | // out of the result. |
3698 | 930 | if (Known.isNonNegative()) |
3699 | 631 | return Tmp; |
3700 | 21.0k | } |
3701 | 21.0k | |
3702 | 21.0k | Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); |
3703 | 21.0k | if (Tmp2 == 1) return 13.71k ; |
3704 | 17.3k | return std::min(Tmp, Tmp2)-1; |
3705 | 17.3k | |
3706 | 35.8k | case ISD::SUB: |
3707 | 35.8k | Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); |
3708 | 35.8k | if (Tmp2 == 1) return 123.7k ; |
3709 | 12.1k | |
3710 | 12.1k | // Handle NEG. |
3711 | 12.1k | if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) |
3712 | 6.47k | if (CLHS->isNullValue()) { |
3713 | 555 | KnownBits Known = computeKnownBits(Op.getOperand(1), Depth+1); |
3714 | 555 | // If the input is known to be 0 or 1, the output is 0/-1, which is all |
3715 | 555 | // sign bits set. |
3716 | 555 | if ((Known.Zero | 1).isAllOnesValue()) |
3717 | 29 | return VTBits; |
3718 | 526 | |
3719 | 526 | // If the input is known to be positive (the sign bit is known clear), |
3720 | 526 | // the output of the NEG has the same number of sign bits as the input. |
3721 | 526 | if (Known.isNonNegative()) |
3722 | 395 | return Tmp2; |
3723 | 11.7k | |
3724 | 11.7k | // Otherwise, we treat this like a SUB. |
3725 | 11.7k | } |
3726 | 11.7k | |
3727 | 11.7k | // Sub can have at most one carry bit. Thus we know that the output |
3728 | 11.7k | // is, at worst, one more bit than the inputs. |
3729 | 11.7k | Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); |
3730 | 11.7k | if (Tmp == 1) return 13.00k ; // Early out. |
3731 | 8.70k | return std::min(Tmp, Tmp2)-1; |
3732 | 58.3k | case ISD::TRUNCATE: { |
3733 | 58.3k | // Check if the sign bits of source go down as far as the truncated value. |
3734 | 58.3k | unsigned NumSrcBits = Op.getOperand(0).getScalarValueSizeInBits(); |
3735 | 58.3k | unsigned NumSrcSignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1); |
3736 | 58.3k | if (NumSrcSignBits > (NumSrcBits - VTBits)) |
3737 | 25.9k | return NumSrcSignBits - (NumSrcBits - VTBits); |
3738 | 32.3k | break; |
3739 | 32.3k | } |
3740 | 32.3k | case ISD::EXTRACT_ELEMENT: { |
3741 | 30 | const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1); |
3742 | 30 | const int BitWidth = Op.getValueSizeInBits(); |
3743 | 30 | const int Items = Op.getOperand(0).getValueSizeInBits() / BitWidth; |
3744 | 30 | |
3745 | 30 | // Get reverse index (starting from 1), Op1 value indexes elements from |
3746 | 30 | // little end. Sign starts at big end. |
3747 | 30 | const int rIndex = Items - 1 - Op.getConstantOperandVal(1); |
3748 | 30 | |
3749 | 30 | // If the sign portion ends in our element the subtraction gives correct |
3750 | 30 | // result. Otherwise it gives either negative or > bitwidth result |
3751 | 30 | return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0); |
3752 | 32.3k | } |
3753 | 32.3k | case ISD::INSERT_VECTOR_ELT: { |
3754 | 798 | SDValue InVec = Op.getOperand(0); |
3755 | 798 | SDValue InVal = Op.getOperand(1); |
3756 | 798 | SDValue EltNo = Op.getOperand(2); |
3757 | 798 | |
3758 | 798 | ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo); |
3759 | 798 | if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)790 ) { |
3760 | 790 | // If we know the element index, split the demand between the |
3761 | 790 | // source vector and the inserted element. |
3762 | 790 | unsigned EltIdx = CEltNo->getZExtValue(); |
3763 | 790 | |
3764 | 790 | // If we demand the inserted element then get its sign bits. |
3765 | 790 | Tmp = std::numeric_limits<unsigned>::max(); |
3766 | 790 | if (DemandedElts[EltIdx]) { |
3767 | 787 | // TODO - handle implicit truncation of inserted elements. |
3768 | 787 | if (InVal.getScalarValueSizeInBits() != VTBits) |
3769 | 28 | break; |
3770 | 759 | Tmp = ComputeNumSignBits(InVal, Depth + 1); |
3771 | 759 | } |
3772 | 790 | |
3773 | 790 | // If we demand the source vector then get its sign bits, and determine |
3774 | 790 | // the minimum. |
3775 | 790 | APInt VectorElts = DemandedElts; |
3776 | 762 | VectorElts.clearBit(EltIdx); |
3777 | 762 | if (!!VectorElts) { |
3778 | 678 | Tmp2 = ComputeNumSignBits(InVec, VectorElts, Depth + 1); |
3779 | 678 | Tmp = std::min(Tmp, Tmp2); |
3780 | 678 | } |
3781 | 762 | } else { |
3782 | 8 | // Unknown element index, so ignore DemandedElts and demand them all. |
3783 | 8 | Tmp = ComputeNumSignBits(InVec, Depth + 1); |
3784 | 8 | Tmp2 = ComputeNumSignBits(InVal, Depth + 1); |
3785 | 8 | Tmp = std::min(Tmp, Tmp2); |
3786 | 8 | } |
3787 | 798 | assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); |
3788 | 770 | return Tmp; |
3789 | 798 | } |
3790 | 35.2k | case ISD::EXTRACT_VECTOR_ELT: { |
3791 | 35.2k | SDValue InVec = Op.getOperand(0); |
3792 | 35.2k | SDValue EltNo = Op.getOperand(1); |
3793 | 35.2k | EVT VecVT = InVec.getValueType(); |
3794 | 35.2k | const unsigned BitWidth = Op.getValueSizeInBits(); |
3795 | 35.2k | const unsigned EltBitWidth = Op.getOperand(0).getScalarValueSizeInBits(); |
3796 | 35.2k | const unsigned NumSrcElts = VecVT.getVectorNumElements(); |
3797 | 35.2k | |
3798 | 35.2k | // If BitWidth > EltBitWidth the value is anyext:ed, and we do not know |
3799 | 35.2k | // anything about sign bits. But if the sizes match we can derive knowledge |
3800 | 35.2k | // about sign bits from the vector operand. |
3801 | 35.2k | if (BitWidth != EltBitWidth) |
3802 | 10.0k | break; |
3803 | 25.2k | |
3804 | 25.2k | // If we know the element index, just demand that vector element, else for |
3805 | 25.2k | // an unknown element index, ignore DemandedElts and demand them all. |
3806 | 25.2k | APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); |
3807 | 25.2k | ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); |
3808 | 25.2k | if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) |
3809 | 25.2k | DemandedSrcElts = |
3810 | 25.2k | APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue()); |
3811 | 25.2k | |
3812 | 25.2k | return ComputeNumSignBits(InVec, DemandedSrcElts, Depth + 1); |
3813 | 25.2k | } |
3814 | 42.3k | case ISD::EXTRACT_SUBVECTOR: { |
3815 | 42.3k | // If we know the element index, just demand that subvector elements, |
3816 | 42.3k | // otherwise demand them all. |
3817 | 42.3k | SDValue Src = Op.getOperand(0); |
3818 | 42.3k | ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); |
3819 | 42.3k | unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); |
3820 | 42.3k | if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { |
3821 | 42.3k | // Offset the demanded elts by the subvector index. |
3822 | 42.3k | uint64_t Idx = SubIdx->getZExtValue(); |
3823 | 42.3k | APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); |
3824 | 42.3k | return ComputeNumSignBits(Src, DemandedSrc, Depth + 1); |
3825 | 42.3k | } |
3826 | 0 | return ComputeNumSignBits(Src, Depth + 1); |
3827 | 0 | } |
3828 | 6.80k | case ISD::CONCAT_VECTORS: { |
3829 | 6.80k | // Determine the minimum number of sign bits across all demanded |
3830 | 6.80k | // elts of the input vectors. Early out if the result is already 1. |
3831 | 6.80k | Tmp = std::numeric_limits<unsigned>::max(); |
3832 | 6.80k | EVT SubVectorVT = Op.getOperand(0).getValueType(); |
3833 | 6.80k | unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements(); |
3834 | 6.80k | unsigned NumSubVectors = Op.getNumOperands(); |
3835 | 18.0k | for (unsigned i = 0; (i < NumSubVectors) && (Tmp > 1)14.0k ; ++i11.2k ) { |
3836 | 11.2k | APInt DemandedSub = DemandedElts.lshr(i * NumSubVectorElts); |
3837 | 11.2k | DemandedSub = DemandedSub.trunc(NumSubVectorElts); |
3838 | 11.2k | if (!DemandedSub) |
3839 | 2.95k | continue; |
3840 | 8.25k | Tmp2 = ComputeNumSignBits(Op.getOperand(i), DemandedSub, Depth + 1); |
3841 | 8.25k | Tmp = std::min(Tmp, Tmp2); |
3842 | 8.25k | } |
3843 | 6.80k | assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); |
3844 | 6.80k | return Tmp; |
3845 | 0 | } |
3846 | 1.89k | case ISD::INSERT_SUBVECTOR: { |
3847 | 1.89k | // If we know the element index, demand any elements from the subvector and |
3848 | 1.89k | // the remainder from the src its inserted into, otherwise demand them all. |
3849 | 1.89k | SDValue Src = Op.getOperand(0); |
3850 | 1.89k | SDValue Sub = Op.getOperand(1); |
3851 | 1.89k | auto *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); |
3852 | 1.89k | unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); |
3853 | 1.89k | if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) { |
3854 | 1.89k | Tmp = std::numeric_limits<unsigned>::max(); |
3855 | 1.89k | uint64_t Idx = SubIdx->getZExtValue(); |
3856 | 1.89k | APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); |
3857 | 1.89k | if (!!DemandedSubElts) { |
3858 | 1.68k | Tmp = ComputeNumSignBits(Sub, DemandedSubElts, Depth + 1); |
3859 | 1.68k | if (Tmp == 1) return 11.17k ; // early-out |
3860 | 720 | } |
3861 | 720 | APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts); |
3862 | 720 | APInt DemandedSrcElts = DemandedElts & ~SubMask; |
3863 | 720 | if (!!DemandedSrcElts) { |
3864 | 364 | Tmp2 = ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1); |
3865 | 364 | Tmp = std::min(Tmp, Tmp2); |
3866 | 364 | } |
3867 | 720 | assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); |
3868 | 720 | return Tmp; |
3869 | 720 | } |
3870 | 0 | |
3871 | 0 | // Not able to determine the index so just assume worst case. |
3872 | 0 | Tmp = ComputeNumSignBits(Sub, Depth + 1); |
3873 | 0 | if (Tmp == 1) return 1; // early-out |
3874 | 0 | Tmp2 = ComputeNumSignBits(Src, Depth + 1); |
3875 | 0 | Tmp = std::min(Tmp, Tmp2); |
3876 | 0 | assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); |
3877 | 0 | return Tmp; |
3878 | 0 | } |
3879 | 1.10M | } |
3880 | 1.10M | |
3881 | 1.10M | // If we are looking at the loaded value of the SDNode. |
3882 | 1.10M | if (Op.getResNo() == 0) { |
3883 | 1.08M | // Handle LOADX separately here. EXTLOAD case will fallthrough. |
3884 | 1.08M | if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { |
3885 | 313k | unsigned ExtType = LD->getExtensionType(); |
3886 | 313k | switch (ExtType) { |
3887 | 313k | default: break80.9k ; |
3888 | 313k | case ISD::SEXTLOAD: // e.g. i16->i32 = '17' bits known. |
3889 | 18.1k | Tmp = LD->getMemoryVT().getScalarSizeInBits(); |
3890 | 18.1k | return VTBits - Tmp + 1; |
3891 | 313k | case ISD::ZEXTLOAD: // e.g. i16->i32 = '16' bits known. |
3892 | 27.1k | Tmp = LD->getMemoryVT().getScalarSizeInBits(); |
3893 | 27.1k | return VTBits - Tmp; |
3894 | 313k | case ISD::NON_EXTLOAD: |
3895 | 187k | if (const Constant *Cst = TLI->getTargetConstantFromLoad(LD)) { |
3896 | 592 | // We only need to handle vectors - computeKnownBits should handle |
3897 | 592 | // scalar cases. |
3898 | 592 | Type *CstTy = Cst->getType(); |
3899 | 592 | if (CstTy->isVectorTy() && |
3900 | 592 | (NumElts * VTBits) == CstTy->getPrimitiveSizeInBits()) { |
3901 | 592 | Tmp = VTBits; |
3902 | 9.70k | for (unsigned i = 0; i != NumElts; ++i9.11k ) { |
3903 | 9.13k | if (!DemandedElts[i]) |
3904 | 1 | continue; |
3905 | 9.13k | if (Constant *Elt = Cst->getAggregateElement(i)) { |
3906 | 9.13k | if (auto *CInt = dyn_cast<ConstantInt>(Elt)) { |
3907 | 9.10k | const APInt &Value = CInt->getValue(); |
3908 | 9.10k | Tmp = std::min(Tmp, Value.getNumSignBits()); |
3909 | 9.10k | continue; |
3910 | 9.10k | } |
3911 | 22 | if (auto *CFP = dyn_cast<ConstantFP>(Elt)) { |
3912 | 2 | APInt Value = CFP->getValueAPF().bitcastToAPInt(); |
3913 | 2 | Tmp = std::min(Tmp, Value.getNumSignBits()); |
3914 | 2 | continue; |
3915 | 2 | } |
3916 | 20 | } |
3917 | 20 | // Unknown type. Conservatively assume no bits match sign bit. |
3918 | 20 | return 1; |
3919 | 20 | } |
3920 | 592 | return Tmp572 ; |
3921 | 187k | } |
3922 | 592 | } |
3923 | 187k | break; |
3924 | 313k | } |
3925 | 313k | } |
3926 | 1.08M | } |
3927 | 1.05M | |
3928 | 1.05M | // Allow the target to implement this method for its nodes. |
3929 | 1.05M | if (Opcode >= ISD::BUILTIN_OP_END || |
3930 | 1.05M | Opcode == ISD::INTRINSIC_WO_CHAIN948k || |
3931 | 1.05M | Opcode == ISD::INTRINSIC_W_CHAIN946k || |
3932 | 1.05M | Opcode == ISD::INTRINSIC_VOID934k ) { |
3933 | 124k | unsigned NumBits = |
3934 | 124k | TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth); |
3935 | 124k | if (NumBits > 1) |
3936 | 46.2k | FirstAnswer = std::max(FirstAnswer, NumBits); |
3937 | 124k | } |
3938 | 1.05M | |
3939 | 1.05M | // Finally, if we can prove that the top bits of the result are 0's or 1's, |
3940 | 1.05M | // use this information. |
3941 | 1.05M | KnownBits Known = computeKnownBits(Op, DemandedElts, Depth); |
3942 | 1.05M | |
3943 | 1.05M | APInt Mask; |
3944 | 1.05M | if (Known.isNonNegative()) { // sign bit is 0 |
3945 | 127k | Mask = Known.Zero; |
3946 | 932k | } else if (Known.isNegative()) { // sign bit is 1; |
3947 | 293 | Mask = Known.One; |
3948 | 932k | } else { |
3949 | 932k | // Nothing known. |
3950 | 932k | return FirstAnswer; |
3951 | 932k | } |
3952 | 127k | |
3953 | 127k | // Okay, we know that the sign bit in Mask is set. Use CLZ to determine |
3954 | 127k | // the number of identical bits in the top of the input value. |
3955 | 127k | Mask = ~Mask; |
3956 | 127k | Mask <<= Mask.getBitWidth()-VTBits; |
3957 | 127k | // Return # leading zeros. We use 'min' here in case Val was zero before |
3958 | 127k | // shifting. We don't want to return '64' as for an i32 "0". |
3959 | 127k | return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros())); |
3960 | 127k | } |
3961 | | |
3962 | 6.43M | bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { |
3963 | 6.43M | if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR1.83M ) || |
3964 | 6.43M | !isa<ConstantSDNode>(Op.getOperand(1))4.91M ) |
3965 | 1.82M | return false; |
3966 | 4.61M | |
3967 | 4.61M | if (Op.getOpcode() == ISD::OR && |
3968 | 4.61M | !MaskedValueIsZero(Op.getOperand(0), Op.getConstantOperandAPInt(1))311k ) |
3969 | 243 | return false; |
3970 | 4.61M | |
3971 | 4.61M | return true; |
3972 | 4.61M | } |
3973 | | |
3974 | 4.57k | bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const { |
3975 | 4.57k | // If we're told that NaNs won't happen, assume they won't. |
3976 | 4.57k | if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs()3.56k ) |
3977 | 1.19k | return true; |
3978 | 3.37k | |
3979 | 3.37k | if (Depth == 6) |
3980 | 0 | return false; // Limit search depth. |
3981 | 3.37k | |
3982 | 3.37k | // TODO: Handle vectors. |
3983 | 3.37k | // If the value is a constant, we can obviously see if it is a NaN or not. |
3984 | 3.37k | if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) { |
3985 | 374 | return !C->getValueAPF().isNaN() || |
3986 | 374 | (4 SNaN4 && !C->getValueAPF().isSignaling()0 ); |
3987 | 374 | } |
3988 | 3.00k | |
3989 | 3.00k | unsigned Opcode = Op.getOpcode(); |
3990 | 3.00k | switch (Opcode) { |
3991 | 3.00k | case ISD::FADD: |
3992 | 89 | case ISD::FSUB: |
3993 | 89 | case ISD::FMUL: |
3994 | 89 | case ISD::FDIV: |
3995 | 89 | case ISD::FREM: |
3996 | 89 | case ISD::FSIN: |
3997 | 89 | case ISD::FCOS: { |
3998 | 89 | if (SNaN) |
3999 | 58 | return true; |
4000 | 31 | // TODO: Need isKnownNeverInfinity |
4001 | 31 | return false; |
4002 | 31 | } |
4003 | 37 | case ISD::FCANONICALIZE: |
4004 | 37 | case ISD::FEXP: |
4005 | 37 | case ISD::FEXP2: |
4006 | 37 | case ISD::FTRUNC: |
4007 | 37 | case ISD::FFLOOR: |
4008 | 37 | case ISD::FCEIL: |
4009 | 37 | case ISD::FROUND: |
4010 | 37 | case ISD::FRINT: |
4011 | 37 | case ISD::FNEARBYINT: { |
4012 | 37 | if (SNaN) |
4013 | 37 | return true; |
4014 | 0 | return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1); |
4015 | 0 | } |
4016 | 259 | case ISD::FABS: |
4017 | 259 | case ISD::FNEG: |
4018 | 259 | case ISD::FCOPYSIGN: { |
4019 | 259 | return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1); |
4020 | 259 | } |
4021 | 259 | case ISD::SELECT: |
4022 | 7 | return isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) && |
4023 | 7 | isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1)4 ; |
4024 | 259 | case ISD::FP_EXTEND: |
4025 | 2 | case ISD::FP_ROUND: { |
4026 | 2 | if (SNaN) |
4027 | 2 | return true; |
4028 | 0 | return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1); |
4029 | 0 | } |
4030 | 3 | case ISD::SINT_TO_FP: |
4031 | 3 | case ISD::UINT_TO_FP: |
4032 | 3 | return true; |
4033 | 9 | case ISD::FMA: |
4034 | 9 | case ISD::FMAD: { |
4035 | 9 | if (SNaN) |
4036 | 1 | return true; |
4037 | 8 | return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) && |
4038 | 8 | isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1)0 && |
4039 | 8 | isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1)0 ; |
4040 | 8 | } |
4041 | 8 | case ISD::FSQRT: // Need is known positive |
4042 | 0 | case ISD::FLOG: |
4043 | 0 | case ISD::FLOG2: |
4044 | 0 | case ISD::FLOG10: |
4045 | 0 | case ISD::FPOWI: |
4046 | 0 | case ISD::FPOW: { |
4047 | 0 | if (SNaN) |
4048 | 0 | return true; |
4049 | 0 | // TODO: Refine on operand |
4050 | 0 | return false; |
4051 | 0 | } |
4052 | 183 | case ISD::FMINNUM: |
4053 | 183 | case ISD::FMAXNUM: { |
4054 | 183 | // Only one needs to be known not-nan, since it will be returned if the |
4055 | 183 | // other ends up being one. |
4056 | 183 | return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) || |
4057 | 183 | isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1)98 ; |
4058 | 183 | } |
4059 | 183 | case ISD::FMINNUM_IEEE: |
4060 | 10 | case ISD::FMAXNUM_IEEE: { |
4061 | 10 | if (SNaN) |
4062 | 10 | return true; |
4063 | 0 | // This can return a NaN if either operand is an sNaN, or if both operands |
4064 | 0 | // are NaN. |
4065 | 0 | return (isKnownNeverNaN(Op.getOperand(0), false, Depth + 1) && |
4066 | 0 | isKnownNeverSNaN(Op.getOperand(1), Depth + 1)) || |
4067 | 0 | (isKnownNeverNaN(Op.getOperand(1), false, Depth + 1) && |
4068 | 0 | isKnownNeverSNaN(Op.getOperand(0), Depth + 1)); |
4069 | 0 | } |
4070 | 0 | case ISD::FMINIMUM: |
4071 | 0 | case ISD::FMAXIMUM: { |
4072 | 0 | // TODO: Does this quiet or return the origina NaN as-is? |
4073 | 0 | return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) && |
4074 | 0 | isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1); |
4075 | 0 | } |
4076 | 332 | case ISD::EXTRACT_VECTOR_ELT: { |
4077 | 332 | return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1); |
4078 | 0 | } |
4079 | 2.07k | default: |
4080 | 2.07k | if (Opcode >= ISD::BUILTIN_OP_END || |
4081 | 2.07k | Opcode == ISD::INTRINSIC_WO_CHAIN2.05k || |
4082 | 2.07k | Opcode == ISD::INTRINSIC_W_CHAIN2.02k || |
4083 | 2.07k | Opcode == ISD::INTRINSIC_VOID2.02k ) { |
4084 | 45 | return TLI->isKnownNeverNaNForTargetNode(Op, *this, SNaN, Depth); |
4085 | 45 | } |
4086 | 2.02k | |
4087 | 2.02k | return false; |
4088 | 3.00k | } |
4089 | 3.00k | } |
4090 | | |
4091 | 164 | bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const { |
4092 | 164 | assert(Op.getValueType().isFloatingPoint() && |
4093 | 164 | "Floating point type expected"); |
4094 | 164 | |
4095 | 164 | // If the value is a constant, we can obviously see if it is a zero or not. |
4096 | 164 | // TODO: Add BuildVector support. |
4097 | 164 | if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) |
4098 | 26 | return !C->isZero(); |
4099 | 138 | return false; |
4100 | 138 | } |
4101 | | |
4102 | 8.54k | bool SelectionDAG::isKnownNeverZero(SDValue Op) const { |
4103 | 8.54k | assert(!Op.getValueType().isFloatingPoint() && |
4104 | 8.54k | "Floating point types unsupported - use isKnownNeverZeroFloat"); |
4105 | 8.54k | |
4106 | 8.54k | // If the value is a constant, we can obviously see if it is a zero or not. |
4107 | 8.54k | if (ISD::matchUnaryPredicate( |
4108 | 8.54k | Op, [](ConstantSDNode *C) { return !C->isNullValue(); }4.55k )) |
4109 | 1.37k | return true; |
4110 | 7.17k | |
4111 | 7.17k | // TODO: Recognize more cases here. |
4112 | 7.17k | switch (Op.getOpcode()) { |
4113 | 7.17k | default: break7.12k ; |
4114 | 7.17k | case ISD::OR: |
4115 | 45 | if (isKnownNeverZero(Op.getOperand(1)) || |
4116 | 45 | isKnownNeverZero(Op.getOperand(0))15 ) |
4117 | 30 | return true; |
4118 | 15 | break; |
4119 | 7.14k | } |
4120 | 7.14k | |
4121 | 7.14k | return false; |
4122 | 7.14k | } |
4123 | | |
4124 | 35.3k | bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { |
4125 | 35.3k | // Check the obvious case. |
4126 | 35.3k | if (A == B) return true9.60k ; |
4127 | 25.7k | |
4128 | 25.7k | // For for negative and positive zero. |
4129 | 25.7k | if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A)) |
4130 | 1.31k | if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B)) |
4131 | 89 | if (CA->isZero() && CB->isZero()9 ) return true4 ; |
4132 | 25.7k | |
4133 | 25.7k | // Otherwise they may not be equal. |
4134 | 25.7k | return false; |
4135 | 25.7k | } |
4136 | | |
4137 | | // FIXME: unify with llvm::haveNoCommonBitsSet. |
4138 | | // FIXME: could also handle masked merge pattern (X & ~M) op (Y & M) |
4139 | 3.71M | bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { |
4140 | 3.71M | assert(A.getValueType() == B.getValueType() && |
4141 | 3.71M | "Values must have the same type"); |
4142 | 3.71M | return (computeKnownBits(A).Zero | computeKnownBits(B).Zero).isAllOnesValue(); |
4143 | 3.71M | } |
4144 | | |
4145 | | static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT, |
4146 | | ArrayRef<SDValue> Ops, |
4147 | 563k | SelectionDAG &DAG) { |
4148 | 563k | int NumOps = Ops.size(); |
4149 | 563k | assert(NumOps != 0 && "Can't build an empty vector!"); |
4150 | 563k | assert(VT.getVectorNumElements() == (unsigned)NumOps && |
4151 | 563k | "Incorrect element count in BUILD_VECTOR!"); |
4152 | 563k | |
4153 | 563k | // BUILD_VECTOR of UNDEFs is UNDEF. |
4154 | 604k | if (llvm::all_of(Ops, [](SDValue Op) 563k { return Op.isUndef(); })) |
4155 | 1.55k | return DAG.getUNDEF(VT); |
4156 | 562k | |
4157 | 562k | // BUILD_VECTOR of seq extract/insert from the same vector + type is Identity. |
4158 | 562k | SDValue IdentitySrc; |
4159 | 562k | bool IsIdentity = true; |
4160 | 584k | for (int i = 0; i != NumOps; ++i22.3k ) { |
4161 | 580k | if (Ops[i].getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
4162 | 580k | Ops[i].getOperand(0).getValueType() != VT68.7k || |
4163 | 580k | (36.5k IdentitySrc36.5k && Ops[i].getOperand(0) != IdentitySrc14.9k ) || |
4164 | 580k | !isa<ConstantSDNode>(Ops[i].getOperand(1))36.0k || |
4165 | 580k | cast<ConstantSDNode>(Ops[i].getOperand(1))->getAPIntValue() != i33.7k ) { |
4166 | 558k | IsIdentity = false; |
4167 | 558k | break; |
4168 | 558k | } |
4169 | 22.3k | IdentitySrc = Ops[i].getOperand(0); |
4170 | 22.3k | } |
4171 | 562k | if (IsIdentity) |
4172 | 3.89k | return IdentitySrc; |
4173 | 558k | |
4174 | 558k | return SDValue(); |
4175 | 558k | } |
4176 | | |
4177 | | /// Try to simplify vector concatenation to an input value, undef, or build |
4178 | | /// vector. |
4179 | | static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, |
4180 | | ArrayRef<SDValue> Ops, |
4181 | 94.4k | SelectionDAG &DAG) { |
4182 | 94.4k | assert(!Ops.empty() && "Can't concatenate an empty list of vectors!"); |
4183 | 94.4k | assert(llvm::all_of(Ops, |
4184 | 94.4k | [Ops](SDValue Op) { |
4185 | 94.4k | return Ops[0].getValueType() == Op.getValueType(); |
4186 | 94.4k | }) && |
4187 | 94.4k | "Concatenation of vectors with inconsistent value types!"); |
4188 | 94.4k | assert((Ops.size() * Ops[0].getValueType().getVectorNumElements()) == |
4189 | 94.4k | VT.getVectorNumElements() && |
4190 | 94.4k | "Incorrect element count in vector concatenation!"); |
4191 | 94.4k | |
4192 | 94.4k | if (Ops.size() == 1) |
4193 | 0 | return Ops[0]; |
4194 | 94.4k | |
4195 | 94.4k | // Concat of UNDEFs is UNDEF. |
4196 | 98.9k | if (94.4k llvm::all_of(Ops, [](SDValue Op) 94.4k { return Op.isUndef(); })) |
4197 | 1.90k | return DAG.getUNDEF(VT); |
4198 | 92.4k | |
4199 | 92.4k | // Scan the operands and look for extract operations from a single source |
4200 | 92.4k | // that correspond to insertion at the same location via this concatenation: |
4201 | 92.4k | // concat (extract X, 0*subvec_elts), (extract X, 1*subvec_elts), ... |
4202 | 92.4k | SDValue IdentitySrc; |
4203 | 92.4k | bool IsIdentity = true; |
4204 | 94.2k | for (unsigned i = 0, e = Ops.size(); i != e; ++i1.73k ) { |
4205 | 94.2k | SDValue Op = Ops[i]; |
4206 | 94.2k | unsigned IdentityIndex = i * Op.getValueType().getVectorNumElements(); |
4207 | 94.2k | if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR || |
4208 | 94.2k | Op.getOperand(0).getValueType() != VT3.37k || |
4209 | 94.2k | (2.60k IdentitySrc2.60k && Op.getOperand(0) != IdentitySrc155 ) || |
4210 | 94.2k | !isa<ConstantSDNode>(Op.getOperand(1))2.50k || |
4211 | 94.2k | Op.getConstantOperandVal(1) != IdentityIndex2.50k ) { |
4212 | 92.4k | IsIdentity = false; |
4213 | 92.4k | break; |
4214 | 92.4k | } |
4215 | 1.73k | assert((!IdentitySrc || IdentitySrc == Op.getOperand(0)) && |
4216 | 1.73k | "Unexpected identity source vector for concat of extracts"); |
4217 | 1.73k | IdentitySrc = Op.getOperand(0); |
4218 | 1.73k | } |
4219 | 92.4k | if (IsIdentity) { |
4220 | 17 | assert(IdentitySrc && "Failed to set source vector of extracts"); |
4221 | 17 | return IdentitySrc; |
4222 | 17 | } |
4223 | 92.4k | |
4224 | 92.4k | // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be |
4225 | 92.4k | // simplified to one big BUILD_VECTOR. |
4226 | 92.4k | // FIXME: Add support for SCALAR_TO_VECTOR as well. |
4227 | 92.4k | EVT SVT = VT.getScalarType(); |
4228 | 92.4k | SmallVector<SDValue, 16> Elts; |
4229 | 96.2k | for (SDValue Op : Ops) { |
4230 | 96.2k | EVT OpVT = Op.getValueType(); |
4231 | 96.2k | if (Op.isUndef()) |
4232 | 1.08k | Elts.append(OpVT.getVectorNumElements(), DAG.getUNDEF(SVT)); |
4233 | 95.1k | else if (Op.getOpcode() == ISD::BUILD_VECTOR) |
4234 | 4.10k | Elts.append(Op->op_begin(), Op->op_end()); |
4235 | 91.0k | else |
4236 | 91.0k | return SDValue(); |
4237 | 96.2k | } |
4238 | 92.4k | |
4239 | 92.4k | // BUILD_VECTOR requires all inputs to be of the same type, find the |
4240 | 92.4k | // maximum type and extend them all. |
4241 | 92.4k | for (SDValue Op : Elts)1.46k |
4242 | 22.1k | SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType()703 : SVT21.4k ); |
4243 | 1.46k | |
4244 | 1.46k | if (SVT.bitsGT(VT.getScalarType())) |
4245 | 703 | for (SDValue &Op : Elts) |
4246 | 13.7k | Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT) |
4247 | 13.7k | ? DAG.getZExtOrTrunc(Op, DL, SVT)0 |
4248 | 13.7k | : DAG.getSExtOrTrunc(Op, DL, SVT); |
4249 | 1.46k | |
4250 | 1.46k | SDValue V = DAG.getBuildVector(VT, DL, Elts); |
4251 | 1.46k | NewSDValueDbgMsg(V, "New node fold concat vectors: ", &DAG); |
4252 | 1.46k | return V; |
4253 | 92.4k | } |
4254 | | |
4255 | | /// Gets or creates the specified node. |
4256 | 4.49M | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { |
4257 | 4.49M | FoldingSetNodeID ID; |
4258 | 4.49M | AddNodeIDNode(ID, Opcode, getVTList(VT), None); |
4259 | 4.49M | void *IP = nullptr; |
4260 | 4.49M | if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) |
4261 | 3.56M | return SDValue(E, 0); |
4262 | 927k | |
4263 | 927k | auto *N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), |
4264 | 927k | getVTList(VT)); |
4265 | 927k | CSEMap.InsertNode(N, IP); |
4266 | 927k | |
4267 | 927k | InsertNode(N); |
4268 | 927k | SDValue V = SDValue(N, 0); |
4269 | 927k | NewSDValueDbgMsg(V, "Creating new node: ", this); |
4270 | 927k | return V; |
4271 | 927k | } |
4272 | | |
4273 | | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, |
4274 | 9.96M | SDValue Operand, const SDNodeFlags Flags) { |
4275 | 9.96M | // Constant fold unary operations with an integer constant operand. Even |
4276 | 9.96M | // opaque constant will be folded, because the folding of unary operations |
4277 | 9.96M | // doesn't create new constants with different values. Nevertheless, the |
4278 | 9.96M | // opaque flag is preserved during folding to prevent future folding with |
4279 | 9.96M | // other constants. |
4280 | 9.96M | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand)) { |
4281 | 1.29M | const APInt &Val = C->getAPIntValue(); |
4282 | 1.29M | switch (Opcode) { |
4283 | 1.29M | default: break98.0k ; |
4284 | 1.29M | case ISD::SIGN_EXTEND: |
4285 | 195k | return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT, |
4286 | 195k | C->isTargetOpcode(), C->isOpaque()); |
4287 | 1.29M | case ISD::TRUNCATE: |
4288 | 841k | if (C->isOpaque()) |
4289 | 754 | break; |
4290 | 840k | LLVM_FALLTHROUGH; |
4291 | 954k | case ISD::ANY_EXTEND: |
4292 | 954k | case ISD::ZERO_EXTEND: |
4293 | 954k | return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT, |
4294 | 954k | C->isTargetOpcode(), C->isOpaque()); |
4295 | 954k | case ISD::UINT_TO_FP: |
4296 | 615 | case ISD::SINT_TO_FP: { |
4297 | 615 | APFloat apf(EVTToAPFloatSemantics(VT), |
4298 | 615 | APInt::getNullValue(VT.getSizeInBits())); |
4299 | 615 | (void)apf.convertFromAPInt(Val, |
4300 | 615 | Opcode==ISD::SINT_TO_FP, |
4301 | 615 | APFloat::rmNearestTiesToEven); |
4302 | 615 | return getConstantFP(apf, DL, VT); |
4303 | 615 | } |
4304 | 21.9k | case ISD::BITCAST: |
4305 | 21.9k | if (VT == MVT::f16 && C->getValueType(0) == MVT::i1615 ) |
4306 | 15 | return getConstantFP(APFloat(APFloat::IEEEhalf(), Val), DL, VT); |
4307 | 21.9k | if (VT == MVT::f32 && C->getValueType(0) == MVT::i32427 ) |
4308 | 427 | return getConstantFP(APFloat(APFloat::IEEEsingle(), Val), DL, VT); |
4309 | 21.5k | if (VT == MVT::f64 && C->getValueType(0) == MVT::i64343 ) |
4310 | 343 | return getConstantFP(APFloat(APFloat::IEEEdouble(), Val), DL, VT); |
4311 | 21.1k | if (VT == MVT::f128 && C->getValueType(0) == MVT::i1280 ) |
4312 | 0 | return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT); |
4313 | 21.1k | break; |
4314 | 21.1k | case ISD::ABS: |
4315 | 102 | return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(), |
4316 | 102 | C->isOpaque()); |
4317 | 21.1k | case ISD::BITREVERSE: |
4318 | 452 | return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(), |
4319 | 452 | C->isOpaque()); |
4320 | 21.1k | case ISD::BSWAP: |
4321 | 214 | return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(), |
4322 | 214 | C->isOpaque()); |
4323 | 21.1k | case ISD::CTPOP: |
4324 | 660 | return getConstant(Val.countPopulation(), DL, VT, C->isTargetOpcode(), |
4325 | 660 | C->isOpaque()); |
4326 | 21.1k | case ISD::CTLZ: |
4327 | 16.5k | case ISD::CTLZ_ZERO_UNDEF: |
4328 | 16.5k | return getConstant(Val.countLeadingZeros(), DL, VT, C->isTargetOpcode(), |
4329 | 16.5k | C->isOpaque()); |
4330 | 16.5k | case ISD::CTTZ: |
4331 | 3.83k | case ISD::CTTZ_ZERO_UNDEF: |
4332 | 3.83k | return getConstant(Val.countTrailingZeros(), DL, VT, C->isTargetOpcode(), |
4333 | 3.83k | C->isOpaque()); |
4334 | 3.83k | case ISD::FP16_TO_FP: { |
4335 | 836 | bool Ignored; |
4336 | 836 | APFloat FPV(APFloat::IEEEhalf(), |
4337 | 836 | (Val.getBitWidth() == 16) ? Val : Val.trunc(16)0 ); |
4338 | 836 | |
4339 | 836 | // This can return overflow, underflow, or inexact; we don't care. |
4340 | 836 | // FIXME need to be more flexible about rounding mode. |
4341 | 836 | (void)FPV.convert(EVTToAPFloatSemantics(VT), |
4342 | 836 | APFloat::rmNearestTiesToEven, &Ignored); |
4343 | 836 | return getConstantFP(FPV, DL, VT); |
4344 | 8.78M | } |
4345 | 1.29M | } |
4346 | 1.29M | } |
4347 | 8.78M | |
4348 | 8.78M | // Constant fold unary operations with a floating point constant operand. |
4349 | 8.78M | if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand)) { |
4350 | 16.9k | APFloat V = C->getValueAPF(); // make copy |
4351 | 16.9k | switch (Opcode) { |
4352 | 16.9k | case ISD::FNEG: |
4353 | 418 | V.changeSign(); |
4354 | 418 | return getConstantFP(V, DL, VT); |
4355 | 16.9k | case ISD::FABS: |
4356 | 157 | V.clearSign(); |
4357 | 157 | return getConstantFP(V, DL, VT); |
4358 | 16.9k | case ISD::FCEIL: { |
4359 | 28 | APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); |
4360 | 28 | if (fs == APFloat::opOK || fs == APFloat::opInexact0 ) |
4361 | 28 | return getConstantFP(V, DL, VT); |
4362 | 0 | break; |
4363 | 0 | } |
4364 | 25 | case ISD::FTRUNC: { |
4365 | 25 | APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); |
4366 | 25 | if (fs == APFloat::opOK || fs == APFloat::opInexact0 ) |
4367 | 25 | return getConstantFP(V, DL, VT); |
4368 | 0 | break; |
4369 | 0 | } |
4370 | 25 | case ISD::FFLOOR: { |
4371 | 25 | APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); |
4372 | 25 | if (fs == APFloat::opOK || fs == APFloat::opInexact0 ) |
4373 | 25 | return getConstantFP(V, DL, VT); |
4374 | 0 | break; |
4375 | 0 | } |
4376 | 158 | case ISD::FP_EXTEND: { |
4377 | 158 | bool ignored; |
4378 | 158 | // This can return overflow, underflow, or inexact; we don't care. |
4379 | 158 | // FIXME need to be more flexible about rounding mode. |
4380 | 158 | (void)V.convert(EVTToAPFloatSemantics(VT), |
4381 | 158 | APFloat::rmNearestTiesToEven, &ignored); |
4382 | 158 | return getConstantFP(V, DL, VT); |
4383 | 0 | } |
4384 | 1.21k | case ISD::FP_TO_SINT: |
4385 | 1.21k | case ISD::FP_TO_UINT: { |
4386 | 1.21k | bool ignored; |
4387 | 1.21k | APSInt IntVal(VT.getSizeInBits(), Opcode == ISD::FP_TO_UINT); |
4388 | 1.21k | // FIXME need to be more flexible about rounding mode. |
4389 | 1.21k | APFloat::opStatus s = |
4390 | 1.21k | V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored); |
4391 | 1.21k | if (s == APFloat::opInvalidOp) // inexact is OK, in fact usual |
4392 | 39 | break; |
4393 | 1.17k | return getConstant(IntVal, DL, VT); |
4394 | 1.17k | } |
4395 | 13.0k | case ISD::BITCAST: |
4396 | 13.0k | if (VT == MVT::i16 && C->getValueType(0) == MVT::f16314 ) |
4397 | 314 | return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT); |
4398 | 12.7k | else if (VT == MVT::i32 && C->getValueType(0) == MVT::f324.30k ) |
4399 | 4.30k | return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL, VT); |
4400 | 8.44k | else if (VT == MVT::i64 && C->getValueType(0) == MVT::f648.18k ) |
4401 | 8.18k | return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); |
4402 | 267 | break; |
4403 | 267 | case ISD::FP_TO_FP16: { |
4404 | 71 | bool Ignored; |
4405 | 71 | // This can return overflow, underflow, or inexact; we don't care. |
4406 | 71 | // FIXME need to be more flexible about rounding mode. |
4407 | 71 | (void)V.convert(APFloat::IEEEhalf(), |
4408 | 71 | APFloat::rmNearestTiesToEven, &Ignored); |
4409 | 71 | return getConstant(V.bitcastToAPInt(), DL, VT); |
4410 | 8.77M | } |
4411 | 16.9k | } |
4412 | 16.9k | } |
4413 | 8.77M | |
4414 | 8.77M | // Constant fold unary operations with a vector integer or float operand. |
4415 | 8.77M | if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand)) { |
4416 | 195k | if (BV->isConstant()) { |
4417 | 45.9k | switch (Opcode) { |
4418 | 45.9k | default: |
4419 | 41.0k | // FIXME: Entirely reasonable to perform folding of other unary |
4420 | 41.0k | // operations here as the need arises. |
4421 | 41.0k | break; |
4422 | 45.9k | case ISD::FNEG: |
4423 | 4.94k | case ISD::FABS: |
4424 | 4.94k | case ISD::FCEIL: |
4425 | 4.94k | case ISD::FTRUNC: |
4426 | 4.94k | case ISD::FFLOOR: |
4427 | 4.94k | case ISD::FP_EXTEND: |
4428 | 4.94k | case ISD::FP_TO_SINT: |
4429 | 4.94k | case ISD::FP_TO_UINT: |
4430 | 4.94k | case ISD::TRUNCATE: |
4431 | 4.94k | case ISD::ANY_EXTEND: |
4432 | 4.94k | case ISD::ZERO_EXTEND: |
4433 | 4.94k | case ISD::SIGN_EXTEND: |
4434 | 4.94k | case ISD::UINT_TO_FP: |
4435 | 4.94k | case ISD::SINT_TO_FP: |
4436 | 4.94k | case ISD::ABS: |
4437 | 4.94k | case ISD::BITREVERSE: |
4438 | 4.94k | case ISD::BSWAP: |
4439 | 4.94k | case ISD::CTLZ: |
4440 | 4.94k | case ISD::CTLZ_ZERO_UNDEF: |
4441 | 4.94k | case ISD::CTTZ: |
4442 | 4.94k | case ISD::CTTZ_ZERO_UNDEF: |
4443 | 4.94k | case ISD::CTPOP: { |
4444 | 4.94k | SDValue Ops = { Operand }; |
4445 | 4.94k | if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) |
4446 | 4.94k | return Fold; |
4447 | 8.76M | } |
4448 | 45.9k | } |
4449 | 45.9k | } |
4450 | 195k | } |
4451 | 8.76M | |
4452 | 8.76M | unsigned OpOpcode = Operand.getNode()->getOpcode(); |
4453 | 8.76M | switch (Opcode) { |
4454 | 8.76M | case ISD::TokenFactor: |
4455 | 4.06M | case ISD::MERGE_VALUES: |
4456 | 4.06M | case ISD::CONCAT_VECTORS: |
4457 | 4.06M | return Operand; // Factor, merge or concat of one node? No need. |
4458 | 4.06M | case ISD::BUILD_VECTOR: { |
4459 | 17.1k | // Attempt to simplify BUILD_VECTOR. |
4460 | 17.1k | SDValue Ops[] = {Operand}; |
4461 | 17.1k | if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this)) |
4462 | 295 | return V; |
4463 | 16.8k | break; |
4464 | 16.8k | } |
4465 | 16.8k | case ISD::FP_ROUND: 0 llvm_unreachable0 ("Invalid method to make FP_ROUND node"); |
4466 | 16.8k | case ISD::FP_EXTEND: |
4467 | 15.4k | assert(VT.isFloatingPoint() && |
4468 | 15.4k | Operand.getValueType().isFloatingPoint() && "Invalid FP cast!"); |
4469 | 15.4k | if (Operand.getValueType() == VT) return Operand2.67k ; // noop conversion. |
4470 | 12.7k | assert((!VT.isVector() || |
4471 | 12.7k | VT.getVectorNumElements() == |
4472 | 12.7k | Operand.getValueType().getVectorNumElements()) && |
4473 | 12.7k | "Vector element count mismatch!"); |
4474 | 12.7k | assert(Operand.getValueType().bitsLT(VT) && |
4475 | 12.7k | "Invalid fpext node, dst < src!"); |
4476 | 12.7k | if (Operand.isUndef()) |
4477 | 29 | return getUNDEF(VT); |
4478 | 12.7k | break; |
4479 | 15.1k | case ISD::FP_TO_SINT: |
4480 | 15.1k | case ISD::FP_TO_UINT: |
4481 | 15.1k | if (Operand.isUndef()) |
4482 | 16 | return getUNDEF(VT); |
4483 | 15.1k | break; |
4484 | 94.8k | case ISD::SINT_TO_FP: |
4485 | 94.8k | case ISD::UINT_TO_FP: |
4486 | 94.8k | // [us]itofp(undef) = 0, because the result value is bounded. |
4487 | 94.8k | if (Operand.isUndef()) |
4488 | 256 | return getConstantFP(0.0, DL, VT); |
4489 | 94.5k | break; |
4490 | 135k | case ISD::SIGN_EXTEND: |
4491 | 135k | assert(VT.isInteger() && Operand.getValueType().isInteger() && |
4492 | 135k | "Invalid SIGN_EXTEND!"); |
4493 | 135k | assert(VT.isVector() == Operand.getValueType().isVector() && |
4494 | 135k | "SIGN_EXTEND result type type should be vector iff the operand " |
4495 | 135k | "type is vector!"); |
4496 | 135k | if (Operand.getValueType() == VT) return Operand4.19k ; // noop extension |
4497 | 131k | assert((!VT.isVector() || |
4498 | 131k | VT.getVectorNumElements() == |
4499 | 131k | Operand.getValueType().getVectorNumElements()) && |
4500 | 131k | "Vector element count mismatch!"); |
4501 | 131k | assert(Operand.getValueType().bitsLT(VT) && |
4502 | 131k | "Invalid sext node, dst < src!"); |
4503 | 131k | if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND131k ) |
4504 | 495 | return getNode(OpOpcode, DL, VT, Operand.getOperand(0)); |
4505 | 131k | else if (OpOpcode == ISD::UNDEF) |
4506 | 1.12k | // sext(undef) = 0, because the top bits will all be the same. |
4507 | 1.12k | return getConstant(0, DL, VT); |
4508 | 130k | break; |
4509 | 481k | case ISD::ZERO_EXTEND: |
4510 | 481k | assert(VT.isInteger() && Operand.getValueType().isInteger() && |
4511 | 481k | "Invalid ZERO_EXTEND!"); |
4512 | 481k | assert(VT.isVector() == Operand.getValueType().isVector() && |
4513 | 481k | "ZERO_EXTEND result type type should be vector iff the operand " |
4514 | 481k | "type is vector!"); |
4515 | 481k | if (Operand.getValueType() == VT) return Operand13.1k ; // noop extension |
4516 | 468k | assert((!VT.isVector() || |
4517 | 468k | VT.getVectorNumElements() == |
4518 | 468k | Operand.getValueType().getVectorNumElements()) && |
4519 | 468k | "Vector element count mismatch!"); |
4520 | 468k | assert(Operand.getValueType().bitsLT(VT) && |
4521 | 468k | "Invalid zext node, dst < src!"); |
4522 | 468k | if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x) |
4523 | 6.29k | return getNode(ISD::ZERO_EXTEND, DL, VT, Operand.getOperand(0)); |
4524 | 462k | else if (OpOpcode == ISD::UNDEF) |
4525 | 2.15k | // zext(undef) = 0, because the top bits will be zero. |
4526 | 2.15k | return getConstant(0, DL, VT); |
4527 | 460k | break; |
4528 | 524k | case ISD::ANY_EXTEND: |
4529 | 524k | assert(VT.isInteger() && Operand.getValueType().isInteger() && |
4530 | 524k | "Invalid ANY_EXTEND!"); |
4531 | 524k | assert(VT.isVector() == Operand.getValueType().isVector() && |
4532 | 524k | "ANY_EXTEND result type type should be vector iff the operand " |
4533 | 524k | "type is vector!"); |
4534 | 524k | if (Operand.getValueType() == VT) return Operand338k ; // noop extension |
4535 | 186k | assert((!VT.isVector() || |
4536 | 186k | VT.getVectorNumElements() == |
4537 | 186k | Operand.getValueType().getVectorNumElements()) && |
4538 | 186k | "Vector element count mismatch!"); |
4539 | 186k | assert(Operand.getValueType().bitsLT(VT) && |
4540 | 186k | "Invalid anyext node, dst < src!"); |
4541 | 186k | |
4542 | 186k | if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND185k || |
4543 | 186k | OpOpcode == ISD::ANY_EXTEND184k ) |
4544 | 20.3k | // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) |
4545 | 20.3k | return getNode(OpOpcode, DL, VT, Operand.getOperand(0)); |
4546 | 165k | else if (OpOpcode == ISD::UNDEF) |
4547 | 603 | return getUNDEF(VT); |
4548 | 165k | |
4549 | 165k | // (ext (trunc x)) -> x |
4550 | 165k | if (OpOpcode == ISD::TRUNCATE) { |
4551 | 17.0k | SDValue OpOp = Operand.getOperand(0); |
4552 | 17.0k | if (OpOp.getValueType() == VT) { |
4553 | 14.2k | transferDbgValues(Operand, OpOp); |
4554 | 14.2k | return OpOp; |
4555 | 14.2k | } |
4556 | 151k | } |
4557 | 151k | break; |
4558 | 1.60M | case ISD::TRUNCATE: |
4559 | 1.60M | assert(VT.isInteger() && Operand.getValueType().isInteger() && |
4560 | 1.60M | "Invalid TRUNCATE!"); |
4561 | 1.60M | assert(VT.isVector() == Operand.getValueType().isVector() && |
4562 | 1.60M | "TRUNCATE result type type should be vector iff the operand " |
4563 | 1.60M | "type is vector!"); |
4564 | 1.60M | if (Operand.getValueType() == VT) return Operand902k ; // noop truncate |
4565 | 697k | assert((!VT.isVector() || |
4566 | 697k | VT.getVectorNumElements() == |
4567 | 697k | Operand.getValueType().getVectorNumElements()) && |
4568 | 697k | "Vector element count mismatch!"); |
4569 | 697k | assert(Operand.getValueType().bitsGT(VT) && |
4570 | 697k | "Invalid truncate node, src < dst!"); |
4571 | 697k | if (OpOpcode == ISD::TRUNCATE) |
4572 | 56.8k | return getNode(ISD::TRUNCATE, DL, VT, Operand.getOperand(0)); |
4573 | 640k | if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND634k || |
4574 | 640k | OpOpcode == ISD::ANY_EXTEND632k ) { |
4575 | 16.3k | // If the source is smaller than the dest, we still need an extend. |
4576 | 16.3k | if (Operand.getOperand(0).getValueType().getScalarType() |
4577 | 16.3k | .bitsLT(VT.getScalarType())) |
4578 | 4.39k | return getNode(OpOpcode, DL, VT, Operand.getOperand(0)); |
4579 | 11.9k | if (Operand.getOperand(0).getValueType().bitsGT(VT)) |
4580 | 1.14k | return getNode(ISD::TRUNCATE, DL, VT, Operand.getOperand(0)); |
4581 | 10.8k | return Operand.getOperand(0); |
4582 | 10.8k | } |
4583 | 624k | if (OpOpcode == ISD::UNDEF) |
4584 | 20.4k | return getUNDEF(VT); |
4585 | 604k | break; |
4586 | 604k | case ISD::ANY_EXTEND_VECTOR_INREG: |
4587 | 10.6k | case ISD::ZERO_EXTEND_VECTOR_INREG: |
4588 | 10.6k | case ISD::SIGN_EXTEND_VECTOR_INREG: |
4589 | 10.6k | assert(VT.isVector() && "This DAG node is restricted to vector types."); |
4590 | 10.6k | assert(Operand.getValueType().bitsLE(VT) && |
4591 | 10.6k | "The input must be the same size or smaller than the result."); |
4592 | 10.6k | assert(VT.getVectorNumElements() < |
4593 | 10.6k | Operand.getValueType().getVectorNumElements() && |
4594 | 10.6k | "The destination vector type must have fewer lanes than the input."); |
4595 | 10.6k | break; |
4596 | 10.6k | case ISD::ABS: |
4597 | 1.69k | assert(VT.isInteger() && VT == Operand.getValueType() && |
4598 | 1.69k | "Invalid ABS!"); |
4599 | 1.69k | if (OpOpcode == ISD::UNDEF) |
4600 | 1 | return getUNDEF(VT); |
4601 | 1.69k | break; |
4602 | 1.69k | case ISD::BSWAP: |
4603 | 1.65k | assert(VT.isInteger() && VT == Operand.getValueType() && |
4604 | 1.65k | "Invalid BSWAP!"); |
4605 | 1.65k | assert((VT.getScalarSizeInBits() % 16 == 0) && |
4606 | 1.65k | "BSWAP types must be a multiple of 16 bits!"); |
4607 | 1.65k | if (OpOpcode == ISD::UNDEF) |
4608 | 49 | return getUNDEF(VT); |
4609 | 1.60k | break; |
4610 | 1.60k | case ISD::BITREVERSE: |
4611 | 981 | assert(VT.isInteger() && VT == Operand.getValueType() && |
4612 | 981 | "Invalid BITREVERSE!"); |
4613 | 981 | if (OpOpcode == ISD::UNDEF) |
4614 | 13 | return getUNDEF(VT); |
4615 | 968 | break; |
4616 | 1.04M | case ISD::BITCAST: |
4617 | 1.04M | // Basic sanity checking. |
4618 | 1.04M | assert(VT.getSizeInBits() == Operand.getValueSizeInBits() && |
4619 | 1.04M | "Cannot BITCAST between types of different sizes!"); |
4620 | 1.04M | if (VT == Operand.getValueType()) return Operand267k ; // noop conversion. |
4621 | 773k | if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x) |
4622 | 48.5k | return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0)); |
4623 | 724k | if (OpOpcode == ISD::UNDEF) |
4624 | 22.6k | return getUNDEF(VT); |
4625 | 702k | break; |
4626 | 702k | case ISD::SCALAR_TO_VECTOR: |
4627 | 50.0k | assert(VT.isVector() && !Operand.getValueType().isVector() && |
4628 | 50.0k | (VT.getVectorElementType() == Operand.getValueType() || |
4629 | 50.0k | (VT.getVectorElementType().isInteger() && |
4630 | 50.0k | Operand.getValueType().isInteger() && |
4631 | 50.0k | VT.getVectorElementType().bitsLE(Operand.getValueType()))) && |
4632 | 50.0k | "Illegal SCALAR_TO_VECTOR node!"); |
4633 | 50.0k | if (OpOpcode == ISD::UNDEF) |
4634 | 9 | return getUNDEF(VT); |
4635 | 50.0k | // scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined. |
4636 | 50.0k | if (OpOpcode == ISD::EXTRACT_VECTOR_ELT && |
4637 | 50.0k | isa<ConstantSDNode>(Operand.getOperand(1))15.4k && |
4638 | 50.0k | Operand.getConstantOperandVal(1) == 015.1k && |
4639 | 50.0k | Operand.getOperand(0).getValueType() == VT6.59k ) |
4640 | 2.40k | return Operand.getOperand(0); |
4641 | 47.6k | break; |
4642 | 47.6k | case ISD::FNEG: |
4643 | 8.43k | // Negation of an unknown bag of bits is still completely undefined. |
4644 | 8.43k | if (OpOpcode == ISD::UNDEF) |
4645 | 19 | return getUNDEF(VT); |
4646 | 8.41k | |
4647 | 8.41k | // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 |
4648 | 8.41k | if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()7.73k ) && |
4649 | 8.41k | OpOpcode == ISD::FSUB731 ) |
4650 | 42 | return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1), |
4651 | 42 | Operand.getOperand(0), Flags); |
4652 | 8.37k | if (OpOpcode == ISD::FNEG) // --X -> X |
4653 | 9 | return Operand.getOperand(0); |
4654 | 8.36k | break; |
4655 | 8.36k | case ISD::FABS: |
4656 | 3.14k | if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X) |
4657 | 18 | return getNode(ISD::FABS, DL, VT, Operand.getOperand(0)); |
4658 | 3.12k | break; |
4659 | 2.96M | } |
4660 | 2.96M | |
4661 | 2.96M | SDNode *N; |
4662 | 2.96M | SDVTList VTs = getVTList(VT); |
4663 | 2.96M | SDValue Ops[] = {Operand}; |
4664 | 2.96M | if (VT != MVT::Glue) { // Don't CSE flag producing nodes |
4665 | 2.95M | FoldingSetNodeID ID; |
4666 | 2.95M | AddNodeIDNode(ID, Opcode, VTs, Ops); |
4667 | 2.95M | void *IP = nullptr; |
4668 | 2.95M | if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { |
4669 | 241k | E->intersectFlagsWith(Flags); |
4670 | 241k | return SDValue(E, 0); |
4671 | 241k | } |
4672 | 2.71M | |
4673 | 2.71M | N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); |
4674 | 2.71M | N->setFlags(Flags); |
4675 | 2.71M | createOperands(N, Ops); |
4676 | 2.71M | CSEMap.InsertNode(N, IP); |
4677 | 2.71M | } else { |
4678 | 1.40k | N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); |
4679 | 1.40k | createOperands(N, Ops); |
4680 | 1.40k | } |
4681 | 2.96M | |
4682 | 2.96M | InsertNode(N); |
4683 | 2.71M | SDValue V = SDValue(N, 0); |
4684 | 2.71M | NewSDValueDbgMsg(V, "Creating new node: ", this); |
4685 | 2.71M | return V; |
4686 | 2.96M | } |
4687 | | |
4688 | | static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1, |
4689 | 937k | const APInt &C2) { |
4690 | 937k | switch (Opcode) { |
4691 | 937k | case ISD::ADD: return std::make_pair(C1 + C2, true)593k ; |
4692 | 937k | case ISD::SUB: return std::make_pair(C1 - C2, true)47.1k ; |
4693 | 937k | case ISD::MUL: return std::make_pair(C1 * C2, true)13.6k ; |
4694 | 937k | case ISD::AND: return std::make_pair(C1 & C2, true)120k ; |
4695 | 937k | case ISD::OR: return std::make_pair(C1 | C2, true)3.04k ; |
4696 | 937k | case ISD::XOR: return std::make_pair(C1 ^ C2, true)6.53k ; |
4697 | 937k | case ISD::SHL: return std::make_pair(C1 << C2, true)23.0k ; |
4698 | 937k | case ISD::SRL: return std::make_pair(C1.lshr(C2), true)8.02k ; |
4699 | 937k | case ISD::SRA: return std::make_pair(C1.ashr(C2), true)153 ; |
4700 | 937k | case ISD::ROTL: return std::make_pair(C1.rotl(C2), true)16 ; |
4701 | 937k | case ISD::ROTR: return std::make_pair(C1.rotr(C2), true)0 ; |
4702 | 937k | case ISD::SMIN: return std::make_pair(C1.sle(C2) 896 ? C1476 : C2420 , true); |
4703 | 937k | case ISD::SMAX: return std::make_pair(C1.sge(C2) 900 ? C1516 : C2384 , true); |
4704 | 937k | case ISD::UMIN: return std::make_pair(C1.ule(C2) 896 ? C1467 : C2429 , true); |
4705 | 937k | case ISD::UMAX: return std::make_pair(C1.uge(C2) 897 ? C1513 : C2384 , true); |
4706 | 937k | case ISD::SADDSAT: return std::make_pair(C1.sadd_sat(C2), true)114 ; |
4707 | 937k | case ISD::UADDSAT: return std::make_pair(C1.uadd_sat(C2), true)98 ; |
4708 | 937k | case ISD::SSUBSAT: return std::make_pair(C1.ssub_sat(C2), true)114 ; |
4709 | 937k | case ISD::USUBSAT: return std::make_pair(C1.usub_sat(C2), true)98 ; |
4710 | 937k | case ISD::UDIV: |
4711 | 38 | if (!C2.getBoolValue()) |
4712 | 0 | break; |
4713 | 38 | return std::make_pair(C1.udiv(C2), true); |
4714 | 20.3k | case ISD::UREM: |
4715 | 20.3k | if (!C2.getBoolValue()) |
4716 | 0 | break; |
4717 | 20.3k | return std::make_pair(C1.urem(C2), true); |
4718 | 20.3k | case ISD::SDIV: |
4719 | 110 | if (!C2.getBoolValue()) |
4720 | 0 | break; |
4721 | 110 | return std::make_pair(C1.sdiv(C2), true); |
4722 | 110 | case ISD::SREM: |
4723 | 91 | if (!C2.getBoolValue()) |
4724 | 0 | break; |
4725 | 91 | return std::make_pair(C1.srem(C2), true); |
4726 | 97.4k | } |
4727 | 97.4k | return std::make_pair(APInt(1, 0), false); |
4728 | 97.4k | } |
4729 | | |
4730 | | SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, |
4731 | | EVT VT, const ConstantSDNode *C1, |
4732 | 943k | const ConstantSDNode *C2) { |
4733 | 943k | if (C1->isOpaque() || C2->isOpaque()937k ) |
4734 | 5.54k | return SDValue(); |
4735 | 937k | |
4736 | 937k | std::pair<APInt, bool> Folded = FoldValue(Opcode, C1->getAPIntValue(), |
4737 | 937k | C2->getAPIntValue()); |
4738 | 937k | if (!Folded.second) |
4739 | 97.4k | return SDValue(); |
4740 | 840k | return getConstant(Folded.first, DL, VT); |
4741 | 840k | } |
4742 | | |
4743 | | SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT, |
4744 | | const GlobalAddressSDNode *GA, |
4745 | 91.5k | const SDNode *N2) { |
4746 | 91.5k | if (GA->getOpcode() != ISD::GlobalAddress) |
4747 | 574 | return SDValue(); |
4748 | 90.9k | if (!TLI->isOffsetFoldingLegal(GA)) |
4749 | 84.7k | return SDValue(); |
4750 | 6.18k | auto *C2 = dyn_cast<ConstantSDNode>(N2); |
4751 | 6.18k | if (!C2) |
4752 | 1.60k | return SDValue(); |
4753 | 4.57k | int64_t Offset = C2->getSExtValue(); |
4754 | 4.57k | switch (Opcode) { |
4755 | 4.57k | case ISD::ADD: break4.56k ; |
4756 | 4.57k | case ISD::SUB: Offset = -uint64_t(Offset); break1 ; |
4757 | 4.57k | default: return SDValue()14 ; |
4758 | 4.56k | } |
4759 | 4.56k | return getGlobalAddress(GA->getGlobal(), SDLoc(C2), VT, |
4760 | 4.56k | GA->getOffset() + uint64_t(Offset)); |
4761 | 4.56k | } |
4762 | | |
4763 | 10.1M | bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) { |
4764 | 10.1M | switch (Opcode) { |
4765 | 10.1M | case ISD::SDIV: |
4766 | 58.6k | case ISD::UDIV: |
4767 | 58.6k | case ISD::SREM: |
4768 | 58.6k | case ISD::UREM: { |
4769 | 58.6k | // If a divisor is zero/undef or any element of a divisor vector is |
4770 | 58.6k | // zero/undef, the whole op is undef. |
4771 | 58.6k | assert(Ops.size() == 2 && "Div/rem should have 2 operands"); |
4772 | 58.6k | SDValue Divisor = Ops[1]; |
4773 | 58.6k | if (Divisor.isUndef() || isNullConstant(Divisor)58.6k ) |
4774 | 37 | return true; |
4775 | 58.6k | |
4776 | 58.6k | return ISD::isBuildVectorOfConstantSDNodes(Divisor.getNode()) && |
4777 | 58.6k | llvm::any_of(Divisor->op_values(), |
4778 | 74.7k | [](SDValue V) { return V.isUndef() || |
4779 | 74.7k | isNullConstant(V)74.7k ; }); |
4780 | 58.6k | // TODO: Handle signed overflow. |
4781 | 58.6k | } |
4782 | 58.6k | // TODO: Handle oversized shifts. |
4783 | 10.0M | default: |
4784 | 10.0M | return false; |
4785 | 10.1M | } |
4786 | 10.1M | } |
4787 | | |
4788 | | SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, |
4789 | 8.94M | EVT VT, SDNode *N1, SDNode *N2) { |
4790 | 8.94M | // If the opcode is a target-specific ISD node, there's nothing we can |
4791 | 8.94M | // do here and the operand rules may not line up with the below, so |
4792 | 8.94M | // bail early. |
4793 | 8.94M | if (Opcode >= ISD::BUILTIN_OP_END) |
4794 | 780k | return SDValue(); |
4795 | 8.16M | |
4796 | 8.16M | if (isUndef(Opcode, {SDValue(N1, 0), SDValue(N2, 0)})) |
4797 | 102 | return getUNDEF(VT); |
4798 | 8.16M | |
4799 | 8.16M | // Handle the case of two scalars. |
4800 | 8.16M | if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) { |
4801 | 995k | if (auto *C2 = dyn_cast<ConstantSDNode>(N2)) { |
4802 | 936k | SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, C1, C2); |
4803 | 936k | assert((!Folded || !VT.isVector()) && |
4804 | 936k | "Can't fold vectors ops with scalar operands"); |
4805 | 936k | return Folded; |
4806 | 936k | } |
4807 | 7.22M | } |
4808 | 7.22M | |
4809 | 7.22M | // fold (add Sym, c) -> Sym+c |
4810 | 7.22M | if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N1)) |
4811 | 90.1k | return FoldSymbolOffset(Opcode, VT, GA, N2); |
4812 | 7.13M | if (TLI->isCommutativeBinOp(Opcode)) |
4813 | 3.52M | if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2)) |
4814 | 1.38k | return FoldSymbolOffset(Opcode, VT, GA, N1); |
4815 | 7.13M | |
4816 | 7.13M | // For vectors, extract each constant element and fold them individually. |
4817 | 7.13M | // Either input may be an undef value. |
4818 | 7.13M | auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); |
4819 | 7.13M | if (!BV1 && !N1->isUndef()7.06M ) |
4820 | 7.01M | return SDValue(); |
4821 | 120k | auto *BV2 = dyn_cast<BuildVectorSDNode>(N2); |
4822 | 120k | if (!BV2 && !N2->isUndef()93.3k ) |
4823 | 84.7k | return SDValue(); |
4824 | 35.9k | // If both operands are undef, that's handled the same way as scalars. |
4825 | 35.9k | if (!BV1 && !BV26.12k ) |
4826 | 6.05k | return SDValue(); |
4827 | 29.9k | |
4828 | 29.9k | assert((!BV1 || !BV2 || BV1->getNumOperands() == BV2->getNumOperands()) && |
4829 | 29.9k | "Vector binop with different number of elements in operands?"); |
4830 | 29.9k | |
4831 | 29.9k | EVT SVT = VT.getScalarType(); |
4832 | 29.9k | EVT LegalSVT = SVT; |
4833 | 29.9k | if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()9.91k ) { |
4834 | 9.91k | LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT); |
4835 | 9.91k | if (LegalSVT.bitsLT(SVT)) |
4836 | 0 | return SDValue(); |
4837 | 29.9k | } |
4838 | 29.9k | SmallVector<SDValue, 4> Outputs; |
4839 | 29.9k | unsigned NumOps = BV1 ? BV1->getNumOperands()29.8k : BV2->getNumOperands()73 ; |
4840 | 190k | for (unsigned I = 0; I != NumOps; ++I160k ) { |
4841 | 177k | SDValue V1 = BV1 ? BV1->getOperand(I)177k : getUNDEF(SVT)311 ; |
4842 | 177k | SDValue V2 = BV2 ? BV2->getOperand(I)121k : getUNDEF(SVT)56.0k ; |
4843 | 177k | if (SVT.isInteger()) { |
4844 | 174k | if (V1->getValueType(0).bitsGT(SVT)) |
4845 | 3.30k | V1 = getNode(ISD::TRUNCATE, DL, SVT, V1); |
4846 | 174k | if (V2->getValueType(0).bitsGT(SVT)) |
4847 | 1.40k | V2 = getNode(ISD::TRUNCATE, DL, SVT, V2); |
4848 | 174k | } |
4849 | 177k | |
4850 | 177k | if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) |
4851 | 0 | return SDValue(); |
4852 | 177k | |
4853 | 177k | // Fold one vector element. |
4854 | 177k | SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2); |
4855 | 177k | if (LegalSVT != SVT) |
4856 | 790 | ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult); |
4857 | 177k | |
4858 | 177k | // Scalar folding only succeeded if the result is a constant or UNDEF. |
4859 | 177k | if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant170k && |
4860 | 177k | ScalarResult.getOpcode() != ISD::ConstantFP19.4k ) |
4861 | 17.2k | return SDValue(); |
4862 | 160k | Outputs.push_back(ScalarResult); |
4863 | 160k | } |
4864 | 29.9k | |
4865 | 29.9k | assert(VT.getVectorNumElements() == Outputs.size() && |
4866 | 12.7k | "Vector size mismatch!"); |
4867 | 12.7k | |
4868 | 12.7k | // We may have a vector type but a scalar result. Create a splat. |
4869 | 12.7k | Outputs.resize(VT.getVectorNumElements(), Outputs.back()); |
4870 | 12.7k | |
4871 | 12.7k | // Build a big vector out of the scalar elements we generated. |
4872 | 12.7k | return getBuildVector(VT, SDLoc(), Outputs); |
4873 | 29.9k | } |
4874 | | |
4875 | | // TODO: Merge with FoldConstantArithmetic |
4876 | | SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, |
4877 | | const SDLoc &DL, EVT VT, |
4878 | | ArrayRef<SDValue> Ops, |
4879 | 1.95M | const SDNodeFlags Flags) { |
4880 | 1.95M | // If the opcode is a target-specific ISD node, there's nothing we can |
4881 | 1.95M | // do here and the operand rules may not line up with the below, so |
4882 | 1.95M | // bail early. |
4883 | 1.95M | if (Opcode >= ISD::BUILTIN_OP_END) |
4884 | 0 | return SDValue(); |
4885 | 1.95M | |
4886 | 1.95M | if (isUndef(Opcode, Ops)) |
4887 | 2 | return getUNDEF(VT); |
4888 | 1.95M | |
4889 | 1.95M | // We can only fold vectors - maybe merge with FoldConstantArithmetic someday? |
4890 | 1.95M | if (!VT.isVector()) |
4891 | 1.27M | return SDValue(); |
4892 | 680k | |
4893 | 680k | unsigned NumElts = VT.getVectorNumElements(); |
4894 | 680k | |
4895 | 680k | auto IsScalarOrSameVectorSize = [&](const SDValue &Op) { |
4896 | 11.1k | return !Op.getValueType().isVector() || |
4897 | 11.1k | Op.getValueType().getVectorNumElements() == NumElts9.54k ; |
4898 | 11.1k | }; |
4899 | 680k | |
4900 | 722k | auto IsConstantBuildVectorOrUndef = [&](const SDValue &Op) { |
4901 | 722k | BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op); |
4902 | 722k | return (Op.isUndef()) || (Op.getOpcode() == ISD::CONDCODE)722k || |
4903 | 722k | (721k BV721k && BV->isConstant()68.5k ); |
4904 | 722k | }; |
4905 | 680k | |
4906 | 680k | // All operands must be vector types with the same number of elements as |
4907 | 680k | // the result type and must be either UNDEF or a build vector of constant |
4908 | 680k | // or UNDEF scalars. |
4909 | 680k | if (!llvm::all_of(Ops, IsConstantBuildVectorOrUndef) || |
4910 | 680k | !llvm::all_of(Ops, IsScalarOrSameVectorSize)7.24k ) |
4911 | 673k | return SDValue(); |
4912 | 7.24k | |
4913 | 7.24k | // If we are comparing vectors, then the result needs to be a i1 boolean |
4914 | 7.24k | // that is then sign-extended back to the legal result type. |
4915 | 7.24k | EVT SVT = (Opcode == ISD::SETCC ? MVT::i11.60k : VT.getScalarType()5.64k ); |
4916 | 7.24k | |
4917 | 7.24k | // Find legal integer scalar type for constant promotion and |
4918 | 7.24k | // ensure that its scalar size is at least as large as source. |
4919 | 7.24k | EVT LegalSVT = VT.getScalarType(); |
4920 | 7.24k | if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()2.46k ) { |
4921 | 2.46k | LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT); |
4922 | 2.46k | if (LegalSVT.bitsLT(VT.getScalarType())) |
4923 | 0 | return SDValue(); |
4924 | 7.24k | } |
4925 | 7.24k | |
4926 | 7.24k | // Constant fold each scalar lane separately. |
4927 | 7.24k | SmallVector<SDValue, 4> ScalarResults; |
4928 | 81.4k | for (unsigned i = 0; i != NumElts; i++74.1k ) { |
4929 | 74.2k | SmallVector<SDValue, 4> ScalarOps; |
4930 | 113k | for (SDValue Op : Ops) { |
4931 | 113k | EVT InSVT = Op.getValueType().getScalarType(); |
4932 | 113k | BuildVectorSDNode *InBV = dyn_cast<BuildVectorSDNode>(Op); |
4933 | 113k | if (!InBV) { |
4934 | 16.4k | // We've checked that this is UNDEF or a constant of some kind. |
4935 | 16.4k | if (Op.isUndef()) |
4936 | 68 | ScalarOps.push_back(getUNDEF(InSVT)); |
4937 | 16.3k | else |
4938 | 16.3k | ScalarOps.push_back(Op); |
4939 | 16.4k | continue; |
4940 | 16.4k | } |
4941 | 97.1k | |
4942 | 97.1k | SDValue ScalarOp = InBV->getOperand(i); |
4943 | 97.1k | EVT ScalarVT = ScalarOp.getValueType(); |
4944 | 97.1k | |
4945 | 97.1k | // Build vector (integer) scalar operands may need implicit |
4946 | 97.1k | // truncation - do this before constant folding. |
4947 | 97.1k | if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT)95.3k ) |
4948 | 1.24k | ScalarOp = getNode(ISD::TRUNCATE, DL, InSVT, ScalarOp); |
4949 | 97.1k | |
4950 | 97.1k | ScalarOps.push_back(ScalarOp); |
4951 | 97.1k | } |
4952 | 74.2k | |
4953 | 74.2k | // Constant fold the scalar operands. |
4954 | 74.2k | SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags); |
4955 | 74.2k | |
4956 | 74.2k | // Legalize the (integer) scalar constant if necessary. |
4957 | 74.2k | if (LegalSVT != SVT) |
4958 | 9.48k | ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult); |
4959 | 74.2k | |
4960 | 74.2k | // Scalar folding only succeeded if the result is a constant or UNDEF. |
4961 | 74.2k | if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant74.0k && |
4962 | 74.2k | ScalarResult.getOpcode() != ISD::ConstantFP782 ) |
4963 | 10 | return SDValue(); |
4964 | 74.1k | ScalarResults.push_back(ScalarResult); |
4965 | 74.1k | } |
4966 | 7.24k | |
4967 | 7.24k | SDValue V = getBuildVector(VT, DL, ScalarResults); |
4968 | 7.23k | NewSDValueDbgMsg(V, "New node fold constant vector: ", this); |
4969 | 7.23k | return V; |
4970 | 7.24k | } |
4971 | | |
4972 | | SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, |
4973 | 8.09M | EVT VT, SDValue N1, SDValue N2) { |
4974 | 8.09M | // TODO: We don't do any constant folding for strict FP opcodes here, but we |
4975 | 8.09M | // should. That will require dealing with a potentially non-default |
4976 | 8.09M | // rounding mode, checking the "opStatus" return value from the APFloat |
4977 | 8.09M | // math calculations, and possibly other variations. |
4978 | 8.09M | auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode()); |
4979 | 8.09M | auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode()); |
4980 | 8.09M | if (N1CFP && N2CFP18.2k ) { |
4981 | 13.2k | APFloat C1 = N1CFP->getValueAPF(), C2 = N2CFP->getValueAPF(); |
4982 | 13.2k | switch (Opcode) { |
4983 | 13.2k | case ISD::FADD: |
4984 | 401 | C1.add(C2, APFloat::rmNearestTiesToEven); |
4985 | 401 | return getConstantFP(C1, DL, VT); |
4986 | 13.2k | case ISD::FSUB: |
4987 | 63 | C1.subtract(C2, APFloat::rmNearestTiesToEven); |
4988 | 63 | return getConstantFP(C1, DL, VT); |
4989 | 13.2k | case ISD::FMUL: |
4990 | 347 | C1.multiply(C2, APFloat::rmNearestTiesToEven); |
4991 | 347 | return getConstantFP(C1, DL, VT); |
4992 | 13.2k | case ISD::FDIV: |
4993 | 76 | C1.divide(C2, APFloat::rmNearestTiesToEven); |
4994 | 76 | return getConstantFP(C1, DL, VT); |
4995 | 13.2k | case ISD::FREM: |
4996 | 30 | C1.mod(C2); |
4997 | 30 | return getConstantFP(C1, DL, VT); |
4998 | 13.2k | case ISD::FCOPYSIGN: |
4999 | 4 | C1.copySign(C2); |
5000 | 4 | return getConstantFP(C1, DL, VT); |
5001 | 13.2k | default: break12.3k ; |
5002 | 8.09M | } |
5003 | 8.09M | } |
5004 | 8.09M | if (N1CFP && Opcode == ISD::FP_ROUND17.2k ) { |
5005 | 8 | APFloat C1 = N1CFP->getValueAPF(); // make copy |
5006 | 8 | bool Unused; |
5007 | 8 | // This can return overflow, underflow, or inexact; we don't care. |
5008 | 8 | // FIXME need to be more flexible about rounding mode. |
5009 | 8 | (void) C1.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, |
5010 | 8 | &Unused); |
5011 | 8 | return getConstantFP(C1, DL, VT); |
5012 | 8 | } |
5013 | 8.09M | |
5014 | 8.09M | switch (Opcode) { |
5015 | 8.09M | case ISD::FADD: |
5016 | 108k | case ISD::FSUB: |
5017 | 108k | case ISD::FMUL: |
5018 | 108k | case ISD::FDIV: |
5019 | 108k | case ISD::FREM: |
5020 | 108k | // If both operands are undef, the result is undef. If 1 operand is undef, |
5021 | 108k | // the result is NaN. This should match the behavior of the IR optimizer. |
5022 | 108k | if (N1.isUndef() && N2.isUndef()3.40k ) |
5023 | 1.43k | return getUNDEF(VT); |
5024 | 106k | if (N1.isUndef() || N2.isUndef()104k ) |
5025 | 2.36k | return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT); |
5026 | 8.08M | } |
5027 | 8.08M | return SDValue(); |
5028 | 8.08M | } |
5029 | | |
5030 | | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, |
5031 | 10.3M | SDValue N1, SDValue N2, const SDNodeFlags Flags) { |
5032 | 10.3M | ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); |
5033 | 10.3M | ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); |
5034 | 10.3M | ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); |
5035 | 10.3M | ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2); |
5036 | 10.3M | |
5037 | 10.3M | // Canonicalize constant to RHS if commutative. |
5038 | 10.3M | if (TLI->isCommutativeBinOp(Opcode)) { |
5039 | 5.25M | if (N1C && !N2C375k ) { |
5040 | 75.0k | std::swap(N1C, N2C); |
5041 | 75.0k | std::swap(N1, N2); |
5042 | 5.18M | } else if (N1CFP && !N2CFP3.94k ) { |
5043 | 3.12k | std::swap(N1CFP, N2CFP); |
5044 | 3.12k | std::swap(N1, N2); |
5045 | 3.12k | } |
5046 | 5.25M | } |
5047 | 10.3M | |
5048 | 10.3M | switch (Opcode) { |
5049 | 10.3M | default: break1.51M ; |
5050 | 10.3M | case ISD::TokenFactor: |
5051 | 850k | assert(VT == MVT::Other && N1.getValueType() == MVT::Other && |
5052 | 850k | N2.getValueType() == MVT::Other && "Invalid token factor!"); |
5053 | 850k | // Fold trivial token factors. |
5054 | 850k | if (N1.getOpcode() == ISD::EntryToken) return N296 ; |
5055 | 850k | if (N2.getOpcode() == ISD::EntryToken) return N11.98k ; |
5056 | 848k | if (N1 == N2) return N1347 ; |
5057 | 848k | break; |
5058 | 848k | case ISD::BUILD_VECTOR: { |
5059 | 206k | // Attempt to simplify BUILD_VECTOR. |
5060 | 206k | SDValue Ops[] = {N1, N2}; |
5061 | 206k | if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this)) |
5062 | 1.54k | return V; |
5063 | 205k | break; |
5064 | 205k | } |
5065 | 205k | case ISD::CONCAT_VECTORS: { |
5066 | 80.8k | SDValue Ops[] = {N1, N2}; |
5067 | 80.8k | if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this)) |
5068 | 2.27k | return V; |
5069 | 78.5k | break; |
5070 | 78.5k | } |
5071 | 794k | case ISD::AND: |
5072 | 794k | assert(VT.isInteger() && "This operator does not apply to FP types!"); |
5073 | 794k | assert(N1.getValueType() == N2.getValueType() && |
5074 | 794k | N1.getValueType() == VT && "Binary operator types must match!"); |
5075 | 794k | // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's |
5076 | 794k | // worth handling here. |
5077 | 794k | if (N2C && N2C->isNullValue()679k ) |
5078 | 20.4k | return N2; |
5079 | 773k | if (N2C && N2C->isAllOnesValue()659k ) // X & -1 -> X |
5080 | 4.62k | return N1; |
5081 | 769k | break; |
5082 | 4.35M | case ISD::OR: |
5083 | 4.35M | case ISD::XOR: |
5084 | 4.35M | case ISD::ADD: |
5085 | 4.35M | case ISD::SUB: |
5086 | 4.35M | assert(VT.isInteger() && "This operator does not apply to FP types!"); |
5087 | 4.35M | assert(N1.getValueType() == N2.getValueType() && |
5088 | 4.35M | N1.getValueType() == VT && "Binary operator types must match!"); |
5089 | 4.35M | // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so |
5090 | 4.35M | // it's worth handling here. |
5091 | 4.35M | if (N2C && N2C->isNullValue()3.71M ) |
5092 | 1.34M | return N1; |
5093 | 3.00M | break; |
5094 | 3.00M | case ISD::UDIV: |
5095 | 200k | case ISD::UREM: |
5096 | 200k | case ISD::MULHU: |
5097 | 200k | case ISD::MULHS: |
5098 | 200k | case ISD::MUL: |
5099 | 200k | case ISD::SDIV: |
5100 | 200k | case ISD::SREM: |
5101 | 200k | case ISD::SMIN: |
5102 | 200k | case ISD::SMAX: |
5103 | 200k | case ISD::UMIN: |
5104 | 200k | case ISD::UMAX: |
5105 | 200k | case ISD::SADDSAT: |
5106 | 200k | case ISD::SSUBSAT: |
5107 | 200k | case ISD::UADDSAT: |
5108 | 200k | case ISD::USUBSAT: |
5109 | 200k | assert(VT.isInteger() && "This operator does not apply to FP types!"); |
5110 | 200k | assert(N1.getValueType() == N2.getValueType() && |
5111 | 200k | N1.getValueType() == VT && "Binary operator types must match!"); |
5112 | 200k | break; |
5113 | 200k | case ISD::FADD: |
5114 | 110k | case ISD::FSUB: |
5115 | 110k | case ISD::FMUL: |
5116 | 110k | case ISD::FDIV: |
5117 | 110k | case ISD::FREM: |
5118 | 110k | assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); |
5119 | 110k | assert(N1.getValueType() == N2.getValueType() && |
5120 | 110k | N1.getValueType() == VT && "Binary operator types must match!"); |
5121 | 110k | if (SDValue V = simplifyFPBinop(Opcode, N1, N2)) |
5122 | 715 | return V; |
5123 | 109k | break; |
5124 | 109k | case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match. |
5125 | 2.07k | assert(N1.getValueType() == VT && |
5126 | 2.07k | N1.getValueType().isFloatingPoint() && |
5127 | 2.07k | N2.getValueType().isFloatingPoint() && |
5128 | 2.07k | "Invalid FCOPYSIGN!"); |
5129 | 2.07k | break; |
5130 | 627k | case ISD::SHL: |
5131 | 627k | case ISD::SRA: |
5132 | 627k | case ISD::SRL: |
5133 | 627k | if (SDValue V = simplifyShift(N1, N2)) |
5134 | 48.7k | return V; |
5135 | 578k | LLVM_FALLTHROUGH; |
5136 | 582k | case ISD::ROTL: |
5137 | 582k | case ISD::ROTR: |
5138 | 582k | assert(VT == N1.getValueType() && |
5139 | 582k | "Shift operators return type must be the same as their first arg"); |
5140 | 582k | assert(VT.isInteger() && N2.getValueType().isInteger() && |
5141 | 582k | "Shifts only work on integers"); |
5142 | 582k | assert((!VT.isVector() || VT == N2.getValueType()) && |
5143 | 582k | "Vector shift amounts must be in the same as their first arg"); |
5144 | 582k | // Verify that the shift amount VT is big enough to hold valid shift |
5145 | 582k | // amounts. This catches things like trying to shift an i1024 value by an |
5146 | 582k | // i8, which is easy to fall into in generic code that uses |
5147 | 582k | // TLI.getShiftAmount(). |
5148 | 582k | assert(N2.getValueSizeInBits() >= Log2_32_Ceil(N1.getValueSizeInBits()) && |
5149 | 582k | "Invalid use of small shift amount with oversized value!"); |
5150 | 582k | |
5151 | 582k | // Always fold shifts of i1 values so the code generator doesn't need to |
5152 | 582k | // handle them. Since we know the size of the shift has to be less than the |
5153 | 582k | // size of the value, the shift/rotate count is guaranteed to be zero. |
5154 | 582k | if (VT == MVT::i1) |
5155 | 45 | return N1; |
5156 | 582k | if (N2C && N2C->isNullValue()489k ) |
5157 | 2 | return N1; |
5158 | 582k | break; |
5159 | 582k | case ISD::FP_ROUND_INREG: { |
5160 | 0 | EVT EVT = cast<VTSDNode>(N2)->getVT(); |
5161 | 0 | assert(VT == N1.getValueType() && "Not an inreg round!"); |
5162 | 0 | assert(VT.isFloatingPoint() && EVT.isFloatingPoint() && |
5163 | 0 | "Cannot FP_ROUND_INREG integer types"); |
5164 | 0 | assert(EVT.isVector() == VT.isVector() && |
5165 | 0 | "FP_ROUND_INREG type should be vector iff the operand " |
5166 | 0 | "type is vector!"); |
5167 | 0 | assert((!EVT.isVector() || |
5168 | 0 | EVT.getVectorNumElements() == VT.getVectorNumElements()) && |
5169 | 0 | "Vector element counts must match in FP_ROUND_INREG"); |
5170 | 0 | assert(EVT.bitsLE(VT) && "Not rounding down!"); |
5171 | 0 | (void)EVT; |
5172 | 0 | if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding. |
5173 | 0 | break; |
5174 | 0 | } |
5175 | 8.11k | case ISD::FP_ROUND: |
5176 | 8.11k | assert(VT.isFloatingPoint() && |
5177 | 8.11k | N1.getValueType().isFloatingPoint() && |
5178 | 8.11k | VT.bitsLE(N1.getValueType()) && |
5179 | 8.11k | N2C && (N2C->getZExtValue() == 0 || N2C->getZExtValue() == 1) && |
5180 | 8.11k | "Invalid FP_ROUND!"); |
5181 | 8.11k | if (N1.getValueType() == VT) return N1124 ; // noop conversion. |
5182 | 7.99k | break; |
5183 | 269k | case ISD::AssertSext: |
5184 | 269k | case ISD::AssertZext: { |
5185 | 269k | EVT EVT = cast<VTSDNode>(N2)->getVT(); |
5186 | 269k | assert(VT == N1.getValueType() && "Not an inreg extend!"); |
5187 | 269k | assert(VT.isInteger() && EVT.isInteger() && |
5188 | 269k | "Cannot *_EXTEND_INREG FP types"); |
5189 | 269k | assert(!EVT.isVector() && |
5190 | 269k | "AssertSExt/AssertZExt type should be the vector element type " |
5191 | 269k | "rather than the vector type!"); |
5192 | 269k | assert(EVT.bitsLE(VT.getScalarType()) && "Not extending!"); |
5193 | 269k | if (VT.getScalarType() == EVT) return N11.01k ; // noop assertion. |
5194 | 268k | break; |
5195 | 268k | } |
5196 | 268k | case ISD::SIGN_EXTEND_INREG: { |
5197 | 43.4k | EVT EVT = cast<VTSDNode>(N2)->getVT(); |
5198 | 43.4k | assert(VT == N1.getValueType() && "Not an inreg extend!"); |
5199 | 43.4k | assert(VT.isInteger() && EVT.isInteger() && |
5200 | 43.4k | "Cannot *_EXTEND_INREG FP types"); |
5201 | 43.4k | assert(EVT.isVector() == VT.isVector() && |
5202 | 43.4k | "SIGN_EXTEND_INREG type should be vector iff the operand " |
5203 | 43.4k | "type is vector!"); |
5204 | 43.4k | assert((!EVT.isVector() || |
5205 | 43.4k | EVT.getVectorNumElements() == VT.getVectorNumElements()) && |
5206 | 43.4k | "Vector element counts must match in SIGN_EXTEND_INREG"); |
5207 | 43.4k | assert(EVT.bitsLE(VT) && "Not extending!"); |
5208 | 43.4k | if (EVT == VT) return N1140 ; // Not actually extending |
5209 | 43.3k | |
5210 | 43.3k | auto SignExtendInReg = [&](APInt Val, llvm::EVT ConstantVT) { |
5211 | 4.88k | unsigned FromBits = EVT.getScalarSizeInBits(); |
5212 | 4.88k | Val <<= Val.getBitWidth() - FromBits; |
5213 | 4.88k | Val.ashrInPlace(Val.getBitWidth() - FromBits); |
5214 | 4.88k | return getConstant(Val, DL, ConstantVT); |
5215 | 4.88k | }; |
5216 | 43.3k | |
5217 | 43.3k | if (N1C) { |
5218 | 4.76k | const APInt &Val = N1C->getAPIntValue(); |
5219 | 4.76k | return SignExtendInReg(Val, VT); |
5220 | 4.76k | } |
5221 | 38.5k | if (ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) { |
5222 | 46 | SmallVector<SDValue, 8> Ops; |
5223 | 46 | llvm::EVT OpVT = N1.getOperand(0).getValueType(); |
5224 | 174 | for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i128 ) { |
5225 | 128 | SDValue Op = N1.getOperand(i); |
5226 | 128 | if (Op.isUndef()) { |
5227 | 0 | Ops.push_back(getUNDEF(OpVT)); |
5228 | 0 | continue; |
5229 | 0 | } |
5230 | 128 | ConstantSDNode *C = cast<ConstantSDNode>(Op); |
5231 | 128 | APInt Val = C->getAPIntValue(); |
5232 | 128 | Ops.push_back(SignExtendInReg(Val, OpVT)); |
5233 | 128 | } |
5234 | 46 | return getBuildVector(VT, DL, Ops); |
5235 | 46 | } |
5236 | 38.5k | break; |
5237 | 38.5k | } |
5238 | 884k | case ISD::EXTRACT_VECTOR_ELT: |
5239 | 884k | assert(VT.getSizeInBits() >= N1.getValueType().getScalarSizeInBits() && |
5240 | 884k | "The result of EXTRACT_VECTOR_ELT must be at least as wide as the \ |
5241 | 884k | element type of the vector."); |
5242 | 884k | |
5243 | 884k | // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF. |
5244 | 884k | if (N1.isUndef()) |
5245 | 23.8k | return getUNDEF(VT); |
5246 | 860k | |
5247 | 860k | // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF |
5248 | 860k | if (N2C && N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements())856k ) |
5249 | 116 | return getUNDEF(VT); |
5250 | 860k | |
5251 | 860k | // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is |
5252 | 860k | // expanding copies of large vectors from registers. |
5253 | 860k | if (N2C && |
5254 | 860k | N1.getOpcode() == ISD::CONCAT_VECTORS855k && |
5255 | 860k | N1.getNumOperands() > 022.4k ) { |
5256 | 22.4k | unsigned Factor = |
5257 | 22.4k | N1.getOperand(0).getValueType().getVectorNumElements(); |
5258 | 22.4k | return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, |
5259 | 22.4k | N1.getOperand(N2C->getZExtValue() / Factor), |
5260 | 22.4k | getConstant(N2C->getZExtValue() % Factor, DL, |
5261 | 22.4k | N2.getValueType())); |
5262 | 22.4k | } |
5263 | 837k | |
5264 | 837k | // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is |
5265 | 837k | // expanding large vector constants. |
5266 | 837k | if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR833k ) { |
5267 | 164k | SDValue Elt = N1.getOperand(N2C->getZExtValue()); |
5268 | 164k | |
5269 | 164k | if (VT != Elt.getValueType()) |
5270 | 937 | // If the vector element type is not legal, the BUILD_VECTOR operands |
5271 | 937 | // are promoted and implicitly truncated, and the result implicitly |
5272 | 937 | // extended. Make that explicit here. |
5273 | 937 | Elt = getAnyExtOrTrunc(Elt, DL, VT); |
5274 | 164k | |
5275 | 164k | return Elt; |
5276 | 164k | } |
5277 | 673k | |
5278 | 673k | // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector |
5279 | 673k | // operations are lowered to scalars. |
5280 | 673k | if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) { |
5281 | 133k | // If the indices are the same, return the inserted element else |
5282 | 133k | // if the indices are known different, extract the element from |
5283 | 133k | // the original vector. |
5284 | 133k | SDValue N1Op2 = N1.getOperand(2); |
5285 | 133k | ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2); |
5286 | 133k | |
5287 | 133k | if (N1Op2C && N2C132k ) { |
5288 | 132k | if (N1Op2C->getZExtValue() == N2C->getZExtValue()) { |
5289 | 2.57k | if (VT == N1.getOperand(1).getValueType()) |
5290 | 2.55k | return N1.getOperand(1); |
5291 | 19 | else |
5292 | 19 | return getSExtOrTrunc(N1.getOperand(1), DL, VT); |
5293 | 129k | } |
5294 | 129k | |
5295 | 129k | return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2); |
5296 | 129k | } |
5297 | 133k | } |
5298 | 541k | |
5299 | 541k | // EXTRACT_VECTOR_ELT of v1iX EXTRACT_SUBVECTOR could be formed |
5300 | 541k | // when vector types are scalarized and v1iX is legal. |
5301 | 541k | // vextract (v1iX extract_subvector(vNiX, Idx)) -> vextract(vNiX,Idx) |
5302 | 541k | if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
5303 | 541k | N1.getValueType().getVectorNumElements() == 196.7k ) { |
5304 | 236 | return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), |
5305 | 236 | N1.getOperand(1)); |
5306 | 236 | } |
5307 | 540k | break; |
5308 | 540k | case ISD::EXTRACT_ELEMENT: |
5309 | 76.7k | assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!"); |
5310 | 76.7k | assert(!N1.getValueType().isVector() && !VT.isVector() && |
5311 | 76.7k | (N1.getValueType().isInteger() == VT.isInteger()) && |
5312 | 76.7k | N1.getValueType() != VT && |
5313 | 76.7k | "Wrong types for EXTRACT_ELEMENT!"); |
5314 | 76.7k | |
5315 | 76.7k | // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding |
5316 | 76.7k | // 64-bit integers into 32-bit parts. Instead of building the extract of |
5317 | 76.7k | // the BUILD_PAIR, only to have legalize rip it apart, just do it now. |
5318 | 76.7k | if (N1.getOpcode() == ISD::BUILD_PAIR) |
5319 | 14.5k | return N1.getOperand(N2C->getZExtValue()); |
5320 | 62.1k | |
5321 | 62.1k | // EXTRACT_ELEMENT of a constant int is also very common. |
5322 | 62.1k | if (N1C) { |
5323 | 19.4k | unsigned ElementSize = VT.getSizeInBits(); |
5324 | 19.4k | unsigned Shift = ElementSize * N2C->getZExtValue(); |
5325 | 19.4k | APInt ShiftedVal = N1C->getAPIntValue().lshr(Shift); |
5326 | 19.4k | return getConstant(ShiftedVal.trunc(ElementSize), DL, VT); |
5327 | 19.4k | } |
5328 | 42.7k | break; |
5329 | 300k | case ISD::EXTRACT_SUBVECTOR: |
5330 | 300k | if (VT.isSimple() && N1.getValueType().isSimple()298k ) { |
5331 | 272k | assert(VT.isVector() && N1.getValueType().isVector() && |
5332 | 272k | "Extract subvector VTs must be a vectors!"); |
5333 | 272k | assert(VT.getVectorElementType() == |
5334 | 272k | N1.getValueType().getVectorElementType() && |
5335 | 272k | "Extract subvector VTs must have the same element type!"); |
5336 | 272k | assert(VT.getSimpleVT() <= N1.getSimpleValueType() && |
5337 | 272k | "Extract subvector must be from larger vector to smaller vector!"); |
5338 | 272k | |
5339 | 272k | if (N2C) { |
5340 | 272k | assert((VT.getVectorNumElements() + N2C->getZExtValue() |
5341 | 272k | <= N1.getValueType().getVectorNumElements()) |
5342 | 272k | && "Extract subvector overflow!"); |
5343 | 272k | } |
5344 | 272k | |
5345 | 272k | // Trivial extraction. |
5346 | 272k | if (VT.getSimpleVT() == N1.getSimpleValueType()) |
5347 | 43.3k | return N1; |
5348 | 229k | |
5349 | 229k | // EXTRACT_SUBVECTOR of an UNDEF is an UNDEF. |
5350 | 229k | if (N1.isUndef()) |
5351 | 1.40k | return getUNDEF(VT); |
5352 | 228k | |
5353 | 228k | // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of |
5354 | 228k | // the concat have the same type as the extract. |
5355 | 228k | if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS && |
5356 | 228k | N1.getNumOperands() > 015.0k && |
5357 | 228k | VT == N1.getOperand(0).getValueType()15.0k ) { |
5358 | 13.8k | unsigned Factor = VT.getVectorNumElements(); |
5359 | 13.8k | return N1.getOperand(N2C->getZExtValue() / Factor); |
5360 | 13.8k | } |
5361 | 214k | |
5362 | 214k | // EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created |
5363 | 214k | // during shuffle legalization. |
5364 | 214k | if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2)6.52k && |
5365 | 214k | VT == N1.getOperand(1).getValueType()5.91k ) |
5366 | 4.93k | return N1.getOperand(1); |
5367 | 236k | } |
5368 | 236k | break; |
5369 | 8.45M | } |
5370 | 8.45M | |
5371 | 8.45M | // Perform trivial constant folding. |
5372 | 8.45M | if (SDValue SV = |
5373 | 362k | FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode())) |
5374 | 362k | return SV; |
5375 | 8.09M | |
5376 | 8.09M | if (SDValue V = foldConstantFPMath(Opcode, DL, VT, N1, N2)) |
5377 | 4.73k | return V; |
5378 | 8.08M | |
5379 | 8.08M | // Canonicalize an UNDEF to the RHS, even over a constant. |
5380 | 8.08M | if (N1.isUndef()) { |
5381 | 42.9k | if (TLI->isCommutativeBinOp(Opcode)) { |
5382 | 39.2k | std::swap(N1, N2); |
5383 | 39.2k | } else { |
5384 | 3.66k | switch (Opcode) { |
5385 | 3.66k | case ISD::FP_ROUND_INREG: |
5386 | 439 | case ISD::SIGN_EXTEND_INREG: |
5387 | 439 | case ISD::SUB: |
5388 | 439 | return getUNDEF(VT); // fold op(undef, arg2) -> undef |
5389 | 439 | case ISD::UDIV: |
5390 | 56 | case ISD::SDIV: |
5391 | 56 | case ISD::UREM: |
5392 | 56 | case ISD::SREM: |
5393 | 56 | case ISD::SSUBSAT: |
5394 | 56 | case ISD::USUBSAT: |
5395 | 56 | return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0 |
5396 | 8.08M | } |
5397 | 8.08M | } |
5398 | 42.9k | } |
5399 | 8.08M | |
5400 | 8.08M | // Fold a bunch of operators when the RHS is undef. |
5401 | 8.08M | if (N2.isUndef()) { |
5402 | 70.0k | switch (Opcode) { |
5403 | 70.0k | case ISD::XOR: |
5404 | 1.39k | if (N1.isUndef()) |
5405 | 584 | // Handle undef ^ undef -> 0 special case. This is a common |
5406 | 584 | // idiom (misuse). |
5407 | 584 | return getConstant(0, DL, VT); |
5408 | 814 | LLVM_FALLTHROUGH; |
5409 | 6.02k | case ISD::ADD: |
5410 | 6.02k | case ISD::SUB: |
5411 | 6.02k | case ISD::UDIV: |
5412 | 6.02k | case ISD::SDIV: |
5413 | 6.02k | case ISD::UREM: |
5414 | 6.02k | case ISD::SREM: |
5415 | 6.02k | return getUNDEF(VT); // fold op(arg1, undef) -> undef |
5416 | 17.8k | case ISD::MUL: |
5417 | 17.8k | case ISD::AND: |
5418 | 17.8k | case ISD::SSUBSAT: |
5419 | 17.8k | case ISD::USUBSAT: |
5420 | 17.8k | return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0 |
5421 | 17.8k | case ISD::OR: |
5422 | 16.6k | case ISD::SADDSAT: |
5423 | 16.6k | case ISD::UADDSAT: |
5424 | 16.6k | return getAllOnesConstant(DL, VT); |
5425 | 8.04M | } |
5426 | 8.04M | } |
5427 | 8.04M | |
5428 | 8.04M | // Memoize this node if possible. |
5429 | 8.04M | SDNode *N; |
5430 | 8.04M | SDVTList VTs = getVTList(VT); |
5431 | 8.04M | SDValue Ops[] = {N1, N2}; |
5432 | 8.04M | if (VT != MVT::Glue) { |
5433 | 7.98M | FoldingSetNodeID ID; |
5434 | 7.98M | AddNodeIDNode(ID, Opcode, VTs, Ops); |
5435 | 7.98M | void *IP = nullptr; |
5436 | 7.98M | if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { |
5437 | 472k | E->intersectFlagsWith(Flags); |
5438 | 472k | return SDValue(E, 0); |
5439 | 472k | } |
5440 | 7.51M | |
5441 | 7.51M | N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); |
5442 | 7.51M | N->setFlags(Flags); |
5443 | 7.51M | createOperands(N, Ops); |
5444 | 7.51M | CSEMap.InsertNode(N, IP); |
5445 | 7.51M | } else { |
5446 | 56.1k | N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); |
5447 | 56.1k | createOperands(N, Ops); |
5448 | 56.1k | } |
5449 | 8.04M | |
5450 | 8.04M | InsertNode(N); |
5451 | 7.57M | SDValue V = SDValue(N, 0); |
5452 | 7.57M | NewSDValueDbgMsg(V, "Creating new node: ", this); |
5453 | 7.57M | return V; |
5454 | 8.04M | } |
5455 | | |
5456 | | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, |
5457 | | SDValue N1, SDValue N2, SDValue N3, |
5458 | 3.93M | const SDNodeFlags Flags) { |
5459 | 3.93M | // Perform various simplifications. |
5460 | 3.93M | switch (Opcode) { |
5461 | 3.93M | case ISD::FMA: { |
5462 | 6.64k | assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); |
5463 | 6.64k | assert(N1.getValueType() == VT && N2.getValueType() == VT && |
5464 | 6.64k | N3.getValueType() == VT && "FMA types must match!"); |
5465 | 6.64k | ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); |
5466 | 6.64k | ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2); |
5467 | 6.64k | ConstantFPSDNode *N3CFP = dyn_cast<ConstantFPSDNode>(N3); |
5468 | 6.64k | if (N1CFP && N2CFP76 && N3CFP16 ) { |
5469 | 10 | APFloat V1 = N1CFP->getValueAPF(); |
5470 | 10 | const APFloat &V2 = N2CFP->getValueAPF(); |
5471 | 10 | const APFloat &V3 = N3CFP->getValueAPF(); |
5472 | 10 | V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); |
5473 | 10 | return getConstantFP(V1, DL, VT); |
5474 | 10 | } |
5475 | 6.63k | break; |
5476 | 6.63k | } |
5477 | 6.63k | case ISD::BUILD_VECTOR: { |
5478 | 3.33k | // Attempt to simplify BUILD_VECTOR. |
5479 | 3.33k | SDValue Ops[] = {N1, N2, N3}; |
5480 | 3.33k | if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this)) |
5481 | 0 | return V; |
5482 | 3.33k | break; |
5483 | 3.33k | } |
5484 | 3.33k | case ISD::CONCAT_VECTORS: { |
5485 | 16 | SDValue Ops[] = {N1, N2, N3}; |
5486 | 16 | if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this)) |
5487 | 5 | return V; |
5488 | 11 | break; |
5489 | 11 | } |
5490 | 1.39M | case ISD::SETCC: { |
5491 | 1.39M | assert(VT.isInteger() && "SETCC result type must be an integer!"); |
5492 | 1.39M | assert(N1.getValueType() == N2.getValueType() && |
5493 | 1.39M | "SETCC operands must have the same type!"); |
5494 | 1.39M | assert(VT.isVector() == N1.getValueType().isVector() && |
5495 | 1.39M | "SETCC type should be vector iff the operand type is vector!"); |
5496 | 1.39M | assert((!VT.isVector() || |
5497 | 1.39M | VT.getVectorNumElements() == N1.getValueType().getVectorNumElements()) && |
5498 | 1.39M | "SETCC vector element counts must match!"); |
5499 | 1.39M | // Use FoldSetCC to simplify SETCC's. |
5500 | 1.39M | if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL)) |
5501 | 20.0k | return V; |
5502 | 1.37M | // Vector constant folding. |
5503 | 1.37M | SDValue Ops[] = {N1, N2, N3}; |
5504 | 1.37M | if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) { |
5505 | 1.59k | NewSDValueDbgMsg(V, "New node vector constant folding: ", this); |
5506 | 1.59k | return V; |
5507 | 1.59k | } |
5508 | 1.37M | break; |
5509 | 1.37M | } |
5510 | 1.37M | case ISD::SELECT: |
5511 | 100k | case ISD::VSELECT: |
5512 | 100k | if (SDValue V = simplifySelect(N1, N2, N3)) |
5513 | 3.22k | return V; |
5514 | 96.8k | break; |
5515 | 96.8k | case ISD::VECTOR_SHUFFLE: |
5516 | 0 | llvm_unreachable("should use getVectorShuffle constructor!"); |
5517 | 96.8k | case ISD::INSERT_VECTOR_ELT: { |
5518 | 93.9k | ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3); |
5519 | 93.9k | // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF |
5520 | 93.9k | if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements()93.5k ) |
5521 | 8 | return getUNDEF(VT); |
5522 | 93.9k | break; |
5523 | 93.9k | } |
5524 | 93.9k | case ISD::INSERT_SUBVECTOR: { |
5525 | 88.3k | // Inserting undef into undef is still undef. |
5526 | 88.3k | if (N1.isUndef() && N2.isUndef()75.1k ) |
5527 | 0 | return getUNDEF(VT); |
5528 | 88.3k | SDValue Index = N3; |
5529 | 88.3k | if (VT.isSimple() && N1.getValueType().isSimple() |
5530 | 88.3k | && N2.getValueType().isSimple()) { |
5531 | 88.3k | assert(VT.isVector() && N1.getValueType().isVector() && |
5532 | 88.3k | N2.getValueType().isVector() && |
5533 | 88.3k | "Insert subvector VTs must be a vectors"); |
5534 | 88.3k | assert(VT == N1.getValueType() && |
5535 | 88.3k | "Dest and insert subvector source types must match!"); |
5536 | 88.3k | assert(N2.getSimpleValueType() <= N1.getSimpleValueType() && |
5537 | 88.3k | "Insert subvector must be from smaller vector to larger vector!"); |
5538 | 88.3k | if (isa<ConstantSDNode>(Index)) { |
5539 | 88.3k | assert((N2.getValueType().getVectorNumElements() + |
5540 | 88.3k | cast<ConstantSDNode>(Index)->getZExtValue() |
5541 | 88.3k | <= VT.getVectorNumElements()) |
5542 | 88.3k | && "Insert subvector overflow!"); |
5543 | 88.3k | } |
5544 | 88.3k | |
5545 | 88.3k | // Trivial insertion. |
5546 | 88.3k | if (VT.getSimpleVT() == N2.getSimpleValueType()) |
5547 | 2.86k | return N2; |
5548 | 85.5k | |
5549 | 85.5k | // If this is an insert of an extracted vector into an undef vector, we |
5550 | 85.5k | // can just use the input to the extract. |
5551 | 85.5k | if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR72.2k && |
5552 | 85.5k | N2.getOperand(1) == N322.9k && N2.getOperand(0).getValueType() == VT1.08k ) |
5553 | 1.06k | return N2.getOperand(0); |
5554 | 84.4k | } |
5555 | 84.4k | break; |
5556 | 84.4k | } |
5557 | 84.4k | case ISD::BITCAST: |
5558 | 0 | // Fold bit_convert nodes from a type to themselves. |
5559 | 0 | if (N1.getValueType() == VT) |
5560 | 0 | return N1; |
5561 | 0 | break; |
5562 | 3.90M | } |
5563 | 3.90M | |
5564 | 3.90M | // Memoize node if it doesn't produce a flag. |
5565 | 3.90M | SDNode *N; |
5566 | 3.90M | SDVTList VTs = getVTList(VT); |
5567 | 3.90M | SDValue Ops[] = {N1, N2, N3}; |
5568 | 3.90M | if (VT != MVT::Glue) { |
5569 | 3.89M | FoldingSetNodeID ID; |
5570 | 3.89M | AddNodeIDNode(ID, Opcode, VTs, Ops); |
5571 | 3.89M | void *IP = nullptr; |
5572 | 3.89M | if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { |
5573 | 170k | E->intersectFlagsWith(Flags); |
5574 | 170k | return SDValue(E, 0); |
5575 | 170k | } |
5576 | 3.72M | |
5577 | 3.72M | N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); |
5578 | 3.72M | N->setFlags(Flags); |
5579 | 3.72M | createOperands(N, Ops); |
5580 | 3.72M | CSEMap.InsertNode(N, IP); |
5581 | 3.72M | } else { |
5582 | 1.75k | N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); |
5583 | 1.75k | createOperands(N, Ops); |
5584 | 1.75k | } |
5585 | 3.90M | |
5586 | 3.90M | InsertNode(N); |
5587 | 3.73M | SDValue V = SDValue(N, 0); |
5588 | 3.73M | NewSDValueDbgMsg(V, "Creating new node: ", this); |
5589 | 3.73M | return V; |
5590 | 3.90M | } |
5591 | | |
5592 | | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, |
5593 | 345k | SDValue N1, SDValue N2, SDValue N3, SDValue N4) { |
5594 | 345k | SDValue Ops[] = { N1, N2, N3, N4 }; |
5595 | 345k | return getNode(Opcode, DL, VT, Ops); |
5596 | 345k | } |
5597 | | |
5598 | | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, |
5599 | | SDValue N1, SDValue N2, SDValue N3, SDValue N4, |
5600 | 570k | SDValue N5) { |
5601 | 570k | SDValue Ops[] = { N1, N2, N3, N4, N5 }; |
5602 | 570k | return getNode(Opcode, DL, VT, Ops); |
5603 | 570k | } |
5604 | | |
5605 | | /// getStackArgumentTokenFactor - Compute a TokenFactor to force all |
5606 | | /// the incoming stack arguments to be loaded from the stack. |
5607 | 157 | SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { |
5608 | 157 | SmallVector<SDValue, 8> ArgChains; |
5609 | 157 | |
5610 | 157 | // Include the original chain at the beginning of the list. When this is |
5611 | 157 | // used by target LowerCall hooks, this helps legalize find the |
5612 | 157 | // CALLSEQ_BEGIN node. |
5613 | 157 | ArgChains.push_back(Chain); |
5614 | 157 | |
5615 | 157 | // Add a chain value for each stack argument. |
5616 | 157 | for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(), |
5617 | 743 | UE = getEntryNode().getNode()->use_end(); U != UE; ++U586 ) |
5618 | 586 | if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U)) |
5619 | 153 | if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) |
5620 | 102 | if (FI->getIndex() < 0) |
5621 | 102 | ArgChains.push_back(SDValue(L, 1)); |
5622 | 157 | |
5623 | 157 | // Build a tokenfactor for all the chains. |
5624 | 157 | return getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains); |
5625 | 157 | } |
5626 | | |
5627 | | /// getMemsetValue - Vectorized representation of the memset value |
5628 | | /// operand. |
5629 | | static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, |
5630 | 3.01k | const SDLoc &dl) { |
5631 | 3.01k | assert(!Value.isUndef()); |
5632 | 3.01k | |
5633 | 3.01k | unsigned NumBits = VT.getScalarSizeInBits(); |
5634 | 3.01k | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) { |
5635 | 2.97k | assert(C->getAPIntValue().getBitWidth() == 8); |
5636 | 2.97k | APInt Val = APInt::getSplat(NumBits, C->getAPIntValue()); |
5637 | 2.97k | if (VT.isInteger()) { |
5638 | 2.91k | bool IsOpaque = VT.getSizeInBits() > 64 || |
5639 | 2.91k | !DAG.getTargetLoweringInfo().isLegalStoreImmediate(C->getSExtValue())2.10k ; |
5640 | 2.91k | return DAG.getConstant(Val, dl, VT, false, IsOpaque); |
5641 | 2.91k | } |
5642 | 61 | return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), dl, |
5643 | 61 | VT); |
5644 | 61 | } |
5645 | 38 | |
5646 | 38 | assert(Value.getValueType() == MVT::i8 && "memset with non-byte fill value?"); |
5647 | 38 | EVT IntVT = VT.getScalarType(); |
5648 | 38 | if (!IntVT.isInteger()) |
5649 | 1 | IntVT = EVT::getIntegerVT(*DAG.getContext(), IntVT.getSizeInBits()); |
5650 | 38 | |
5651 | 38 | Value = DAG.getNode(ISD::ZERO_EXTEND, dl, IntVT, Value); |
5652 | 38 | if (NumBits > 8) { |
5653 | 20 | // Use a multiplication with 0x010101... to extend the input to the |
5654 | 20 | // required length. |
5655 | 20 | APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); |
5656 | 20 | Value = DAG.getNode(ISD::MUL, dl, IntVT, Value, |
5657 | 20 | DAG.getConstant(Magic, dl, IntVT)); |
5658 | 20 | } |
5659 | 38 | |
5660 | 38 | if (VT != Value.getValueType() && !VT.isInteger()18 ) |
5661 | 1 | Value = DAG.getBitcast(VT.getScalarType(), Value); |
5662 | 38 | if (VT != Value.getValueType()) |
5663 | 18 | Value = DAG.getSplatBuildVector(VT, dl, Value); |
5664 | 38 | |
5665 | 38 | return Value; |
5666 | 38 | } |
5667 | | |
5668 | | /// getMemsetStringVal - Similar to getMemsetValue. Except this is only |
5669 | | /// used when a memcpy is turned into a memset when the source is a constant |
5670 | | /// string ptr. |
5671 | | static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG, |
5672 | | const TargetLowering &TLI, |
5673 | 946 | const ConstantDataArraySlice &Slice) { |
5674 | 946 | // Handle vector with all elements zero. |
5675 | 946 | if (Slice.Array == nullptr) { |
5676 | 54 | if (VT.isInteger()) |
5677 | 48 | return DAG.getConstant(0, dl, VT); |
5678 | 6 | else if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f1283 ) |
5679 | 6 | return DAG.getConstantFP(0.0, dl, VT); |
5680 | 0 | else if (VT.isVector()) { |
5681 | 0 | unsigned NumElts = VT.getVectorNumElements(); |
5682 | 0 | MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64; |
5683 | 0 | return DAG.getNode(ISD::BITCAST, dl, VT, |
5684 | 0 | DAG.getConstant(0, dl, |
5685 | 0 | EVT::getVectorVT(*DAG.getContext(), |
5686 | 0 | EltVT, NumElts))); |
5687 | 0 | } else |
5688 | 0 | llvm_unreachable("Expected type!"); |
5689 | 54 | } |
5690 | 946 | |
5691 | 946 | assert(!VT.isVector() && "Can't handle vector type here!"); |
5692 | 892 | unsigned NumVTBits = VT.getSizeInBits(); |
5693 | 892 | unsigned NumVTBytes = NumVTBits / 8; |
5694 | 892 | unsigned NumBytes = std::min(NumVTBytes, unsigned(Slice.Length)); |
5695 | 892 | |
5696 | 892 | APInt Val(NumVTBits, 0); |
5697 | 892 | if (DAG.getDataLayout().isLittleEndian()) { |
5698 | 5.11k | for (unsigned i = 0; i != NumBytes; ++i4.21k ) |
5699 | 4.21k | Val |= (uint64_t)(unsigned char)Slice[i] << i*8; |
5700 | 892 | } else { |
5701 | 0 | for (unsigned i = 0; i != NumBytes; ++i) |
5702 | 0 | Val |= (uint64_t)(unsigned char)Slice[i] << (NumVTBytes-i-1)*8; |
5703 | 0 | } |
5704 | 892 | |
5705 | 892 | // If the "cost" of materializing the integer immediate is less than the cost |
5706 | 892 | // of a load, then it is cost effective to turn the load into the immediate. |
5707 | 892 | Type *Ty = VT.getTypeForEVT(*DAG.getContext()); |
5708 | 892 | if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty)) |
5709 | 590 | return DAG.getConstant(Val, dl, VT); |
5710 | 302 | return SDValue(nullptr, 0); |
5711 | 302 | } |
5712 | | |
5713 | | SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, unsigned Offset, |
5714 | 45.2k | const SDLoc &DL) { |
5715 | 45.2k | EVT VT = Base.getValueType(); |
5716 | 45.2k | return getNode(ISD::ADD, DL, VT, Base, getConstant(Offset, DL, VT)); |
5717 | 45.2k | } |
5718 | | |
5719 | | /// Returns true if memcpy source is constant data. |
5720 | 6.54k | static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) { |
5721 | 6.54k | uint64_t SrcDelta = 0; |
5722 | 6.54k | GlobalAddressSDNode *G = nullptr; |
5723 | 6.54k | if (Src.getOpcode() == ISD::GlobalAddress) |
5724 | 964 | G = cast<GlobalAddressSDNode>(Src); |
5725 | 5.58k | else if (Src.getOpcode() == ISD::ADD && |
5726 | 5.58k | Src.getOperand(0).getOpcode() == ISD::GlobalAddress928 && |
5727 | 5.58k | Src.getOperand(1).getOpcode() == ISD::Constant67 ) { |
5728 | 52 | G = cast<GlobalAddressSDNode>(Src.getOperand(0)); |
5729 | 52 | SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getZExtValue(); |
5730 | 52 | } |
5731 | 6.54k | if (!G) |
5732 | 5.53k | return false; |
5733 | 1.01k | |
5734 | 1.01k | return getConstantDataArrayInfo(G->getGlobal(), Slice, 8, |
5735 | 1.01k | SrcDelta + G->getOffset()); |
5736 | 1.01k | } |
5737 | | |
5738 | 11.2k | static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { |
5739 | 11.2k | // On Darwin, -Os means optimize for size without hurting performance, so |
5740 | 11.2k | // only really optimize for size when -Oz (MinSize) is used. |
5741 | 11.2k | if (MF.getTarget().getTargetTriple().isOSDarwin()) |
5742 | 9.81k | return MF.getFunction().hasMinSize(); |
5743 | 1.40k | return MF.getFunction().hasOptSize(); |
5744 | 1.40k | } |
5745 | | |
5746 | | static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl, |
5747 | | SmallVector<SDValue, 32> &OutChains, unsigned From, |
5748 | | unsigned To, SmallVector<SDValue, 16> &OutLoadChains, |
5749 | 2.01k | SmallVector<SDValue, 16> &OutStoreChains) { |
5750 | 2.01k | assert(OutLoadChains.size() && "Missing loads in memcpy inlining"); |
5751 | 2.01k | assert(OutStoreChains.size() && "Missing stores in memcpy inlining"); |
5752 | 2.01k | SmallVector<SDValue, 16> GluedLoadChains; |
5753 | 6.25k | for (unsigned i = From; i < To; ++i4.24k ) { |
5754 | 4.24k | OutChains.push_back(OutLoadChains[i]); |
5755 | 4.24k | GluedLoadChains.push_back(OutLoadChains[i]); |
5756 | 4.24k | } |
5757 | 2.01k | |
5758 | 2.01k | // Chain for all loads. |
5759 | 2.01k | SDValue LoadToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
5760 | 2.01k | GluedLoadChains); |
5761 | 2.01k | |
5762 | 6.25k | for (unsigned i = From; i < To; ++i4.24k ) { |
5763 | 4.24k | StoreSDNode *ST = dyn_cast<StoreSDNode>(OutStoreChains[i]); |
5764 | 4.24k | SDValue NewStore = DAG.getTruncStore(LoadToken, dl, ST->getValue(), |
5765 | 4.24k | ST->getBasePtr(), ST->getMemoryVT(), |
5766 | 4.24k | ST->getMemOperand()); |
5767 | 4.24k | OutChains.push_back(NewStore); |
5768 | 4.24k | } |
5769 | 2.01k | } |
5770 | | |
5771 | | static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, |
5772 | | SDValue Chain, SDValue Dst, SDValue Src, |
5773 | | uint64_t Size, unsigned Align, |
5774 | | bool isVol, bool AlwaysInline, |
5775 | | MachinePointerInfo DstPtrInfo, |
5776 | 6.55k | MachinePointerInfo SrcPtrInfo) { |
5777 | 6.55k | // Turn a memcpy of undef to nop. |
5778 | 6.55k | // FIXME: We need to honor volatile even is Src is undef. |
5779 | 6.55k | if (Src.isUndef()) |
5780 | 5 | return Chain; |
5781 | 6.54k | |
5782 | 6.54k | // Expand memcpy to a series of load and store ops if the size operand falls |
5783 | 6.54k | // below a certain threshold. |
5784 | 6.54k | // TODO: In the AlwaysInline case, if the size is big then generate a loop |
5785 | 6.54k | // rather than maybe a humongous number of loads and stores. |
5786 | 6.54k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
5787 | 6.54k | const DataLayout &DL = DAG.getDataLayout(); |
5788 | 6.54k | LLVMContext &C = *DAG.getContext(); |
5789 | 6.54k | std::vector<EVT> MemOps; |
5790 | 6.54k | bool DstAlignCanChange = false; |
5791 | 6.54k | MachineFunction &MF = DAG.getMachineFunction(); |
5792 | 6.54k | MachineFrameInfo &MFI = MF.getFrameInfo(); |
5793 | 6.54k | bool OptSize = shouldLowerMemFuncForSize(MF); |
5794 | 6.54k | FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); |
5795 | 6.54k | if (FI && !MFI.isFixedObjectIndex(FI->getIndex())2.23k ) |
5796 | 2.22k | DstAlignCanChange = true; |
5797 | 6.54k | unsigned SrcAlign = DAG.InferPtrAlignment(Src); |
5798 | 6.54k | if (Align > SrcAlign) |
5799 | 3.14k | SrcAlign = Align; |
5800 | 6.54k | ConstantDataArraySlice Slice; |
5801 | 6.54k | bool CopyFromConstant = isMemSrcFromConstant(Src, Slice); |
5802 | 6.54k | bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr701 ; |
5803 | 6.54k | unsigned Limit = AlwaysInline ? ~0U3 : TLI.getMaxStoresPerMemcpy(OptSize)6.54k ; |
5804 | 6.54k | |
5805 | 6.54k | if (!TLI.findOptimalMemOpLowering( |
5806 | 6.54k | MemOps, Limit, Size, (DstAlignCanChange ? 02.22k : Align4.32k ), |
5807 | 6.54k | (isZeroConstant ? 09 : SrcAlign6.53k ), /*IsMemset=*/false, |
5808 | 6.54k | /*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant, |
5809 | 6.54k | /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), |
5810 | 6.54k | SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes())) |
5811 | 560 | return SDValue(); |
5812 | 5.98k | |
5813 | 5.98k | if (DstAlignCanChange) { |
5814 | 2.07k | Type *Ty = MemOps[0].getTypeForEVT(C); |
5815 | 2.07k | unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty); |
5816 | 2.07k | |
5817 | 2.07k | // Don't promote to an alignment that would require dynamic stack |
5818 | 2.07k | // realignment. |
5819 | 2.07k | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); |
5820 | 2.07k | if (!TRI->needsStackRealignment(MF)) |
5821 | 2.10k | while (2.03k NewAlign > Align && |
5822 | 2.10k | DL.exceedsNaturalStackAlignment(NewAlign)1.36k ) |
5823 | 67 | NewAlign /= 2; |
5824 | 2.07k | |
5825 | 2.07k | if (NewAlign > Align) { |
5826 | 1.29k | // Give the stack frame object a larger alignment if needed. |
5827 | 1.29k | if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) |
5828 | 635 | MFI.setObjectAlignment(FI->getIndex(), NewAlign); |
5829 | 1.29k | Align = NewAlign; |
5830 | 1.29k | } |
5831 | 2.07k | } |
5832 | 5.98k | |
5833 | 5.98k | MachineMemOperand::Flags MMOFlags = |
5834 | 5.98k | isVol ? MachineMemOperand::MOVolatile28 : MachineMemOperand::MONone5.95k ; |
5835 | 5.98k | SmallVector<SDValue, 16> OutLoadChains; |
5836 | 5.98k | SmallVector<SDValue, 16> OutStoreChains; |
5837 | 5.98k | SmallVector<SDValue, 32> OutChains; |
5838 | 5.98k | unsigned NumMemOps = MemOps.size(); |
5839 | 5.98k | uint64_t SrcOff = 0, DstOff = 0; |
5840 | 20.0k | for (unsigned i = 0; i != NumMemOps; ++i14.1k ) { |
5841 | 14.1k | EVT VT = MemOps[i]; |
5842 | 14.1k | unsigned VTSize = VT.getSizeInBits() / 8; |
5843 | 14.1k | SDValue Value, Store; |
5844 | 14.1k | |
5845 | 14.1k | if (VTSize > Size) { |
5846 | 719 | // Issuing an unaligned load / store pair that overlaps with the previous |
5847 | 719 | // pair. Adjust the offset accordingly. |
5848 | 719 | assert(i == NumMemOps-1 && i != 0); |
5849 | 719 | SrcOff -= VTSize - Size; |
5850 | 719 | DstOff -= VTSize - Size; |
5851 | 719 | } |
5852 | 14.1k | |
5853 | 14.1k | if (CopyFromConstant && |
5854 | 14.1k | (1.84k isZeroConstant1.84k || (1.83k VT.isInteger()1.83k && !VT.isVector()952 ))) { |
5855 | 946 | // It's unlikely a store of a vector immediate can be done in a single |
5856 | 946 | // instruction. It would require a load from a constantpool first. |
5857 | 946 | // We only handle zero vectors here. |
5858 | 946 | // FIXME: Handle other cases where store of vector immediate is done in |
5859 | 946 | // a single instruction. |
5860 | 946 | ConstantDataArraySlice SubSlice; |
5861 | 946 | if (SrcOff < Slice.Length) { |
5862 | 902 | SubSlice = Slice; |
5863 | 902 | SubSlice.move(SrcOff); |
5864 | 902 | } else { |
5865 | 44 | // This is an out-of-bounds access and hence UB. Pretend we read zero. |
5866 | 44 | SubSlice.Array = nullptr; |
5867 | 44 | SubSlice.Offset = 0; |
5868 | 44 | SubSlice.Length = VTSize; |
5869 | 44 | } |
5870 | 946 | Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice); |
5871 | 946 | if (Value.getNode()) { |
5872 | 644 | Store = DAG.getStore(Chain, dl, Value, |
5873 | 644 | DAG.getMemBasePlusOffset(Dst, DstOff, dl), |
5874 | 644 | DstPtrInfo.getWithOffset(DstOff), Align, |
5875 | 644 | MMOFlags); |
5876 | 644 | OutChains.push_back(Store); |
5877 | 644 | } |
5878 | 946 | } |
5879 | 14.1k | |
5880 | 14.1k | if (!Store.getNode()) { |
5881 | 13.4k | // The type might not be legal for the target. This should only happen |
5882 | 13.4k | // if the type is smaller than a legal type, as on PPC, so the right |
5883 | 13.4k | // thing to do is generate a LoadExt/StoreTrunc pair. These simplify |
5884 | 13.4k | // to Load/Store if NVT==VT. |
5885 | 13.4k | // FIXME does the case above also need this? |
5886 | 13.4k | EVT NVT = TLI.getTypeToTransformTo(C, VT); |
5887 | 13.4k | assert(NVT.bitsGE(VT)); |
5888 | 13.4k | |
5889 | 13.4k | bool isDereferenceable = |
5890 | 13.4k | SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); |
5891 | 13.4k | MachineMemOperand::Flags SrcMMOFlags = MMOFlags; |
5892 | 13.4k | if (isDereferenceable) |
5893 | 7.34k | SrcMMOFlags |= MachineMemOperand::MODereferenceable; |
5894 | 13.4k | |
5895 | 13.4k | Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, |
5896 | 13.4k | DAG.getMemBasePlusOffset(Src, SrcOff, dl), |
5897 | 13.4k | SrcPtrInfo.getWithOffset(SrcOff), VT, |
5898 | 13.4k | MinAlign(SrcAlign, SrcOff), SrcMMOFlags); |
5899 | 13.4k | OutLoadChains.push_back(Value.getValue(1)); |
5900 | 13.4k | |
5901 | 13.4k | Store = DAG.getTruncStore( |
5902 | 13.4k | Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), |
5903 | 13.4k | DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags); |
5904 | 13.4k | OutStoreChains.push_back(Store); |
5905 | 13.4k | } |
5906 | 14.1k | SrcOff += VTSize; |
5907 | 14.1k | DstOff += VTSize; |
5908 | 14.1k | Size -= VTSize; |
5909 | 14.1k | } |
5910 | 5.98k | |
5911 | 5.98k | unsigned GluedLdStLimit = MaxLdStGlue == 0 ? |
5912 | 5.98k | TLI.getMaxGluedStoresPerMemcpy() : MaxLdStGlue0 ; |
5913 | 5.98k | unsigned NumLdStInMemcpy = OutStoreChains.size(); |
5914 | 5.98k | |
5915 | 5.98k | if (NumLdStInMemcpy) { |
5916 | 5.80k | // It may be that memcpy might be converted to memset if it's memcpy |
5917 | 5.80k | // of constants. In such a case, we won't have loads and stores, but |
5918 | 5.80k | // just stores. In the absence of loads, there is nothing to gang up. |
5919 | 5.80k | if ((GluedLdStLimit <= 1) || !EnableMemCpyDAGOpt1.79k ) { |
5920 | 4.01k | // If target does not care, just leave as it. |
5921 | 13.2k | for (unsigned i = 0; i < NumLdStInMemcpy; ++i9.22k ) { |
5922 | 9.22k | OutChains.push_back(OutLoadChains[i]); |
5923 | 9.22k | OutChains.push_back(OutStoreChains[i]); |
5924 | 9.22k | } |
5925 | 4.01k | } else { |
5926 | 1.79k | // Ld/St less than/equal limit set by target. |
5927 | 1.79k | if (NumLdStInMemcpy <= GluedLdStLimit) { |
5928 | 1.61k | chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0, |
5929 | 1.61k | NumLdStInMemcpy, OutLoadChains, |
5930 | 1.61k | OutStoreChains); |
5931 | 1.61k | } else { |
5932 | 180 | unsigned NumberLdChain = NumLdStInMemcpy / GluedLdStLimit; |
5933 | 180 | unsigned RemainingLdStInMemcpy = NumLdStInMemcpy % GluedLdStLimit; |
5934 | 180 | unsigned GlueIter = 0; |
5935 | 180 | |
5936 | 460 | for (unsigned cnt = 0; cnt < NumberLdChain; ++cnt280 ) { |
5937 | 280 | unsigned IndexFrom = NumLdStInMemcpy - GlueIter - GluedLdStLimit; |
5938 | 280 | unsigned IndexTo = NumLdStInMemcpy - GlueIter; |
5939 | 280 | |
5940 | 280 | chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, IndexFrom, IndexTo, |
5941 | 280 | OutLoadChains, OutStoreChains); |
5942 | 280 | GlueIter += GluedLdStLimit; |
5943 | 280 | } |
5944 | 180 | |
5945 | 180 | // Residual ld/st. |
5946 | 180 | if (RemainingLdStInMemcpy) { |
5947 | 124 | chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0, |
5948 | 124 | RemainingLdStInMemcpy, OutLoadChains, |
5949 | 124 | OutStoreChains); |
5950 | 124 | } |
5951 | 180 | } |
5952 | 1.79k | } |
5953 | 5.80k | } |
5954 | 5.98k | return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); |
5955 | 5.98k | } |
5956 | | |
5957 | | static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, |
5958 | | SDValue Chain, SDValue Dst, SDValue Src, |
5959 | | uint64_t Size, unsigned Align, |
5960 | | bool isVol, bool AlwaysInline, |
5961 | | MachinePointerInfo DstPtrInfo, |
5962 | 95 | MachinePointerInfo SrcPtrInfo) { |
5963 | 95 | // Turn a memmove of undef to nop. |
5964 | 95 | // FIXME: We need to honor volatile even is Src is undef. |
5965 | 95 | if (Src.isUndef()) |
5966 | 0 | return Chain; |
5967 | 95 | |
5968 | 95 | // Expand memmove to a series of load and store ops if the size operand falls |
5969 | 95 | // below a certain threshold. |
5970 | 95 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
5971 | 95 | const DataLayout &DL = DAG.getDataLayout(); |
5972 | 95 | LLVMContext &C = *DAG.getContext(); |
5973 | 95 | std::vector<EVT> MemOps; |
5974 | 95 | bool DstAlignCanChange = false; |
5975 | 95 | MachineFunction &MF = DAG.getMachineFunction(); |
5976 | 95 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
5977 | 95 | bool OptSize = shouldLowerMemFuncForSize(MF); |
5978 | 95 | FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); |
5979 | 95 | if (FI && !MFI.isFixedObjectIndex(FI->getIndex())1 ) |
5980 | 1 | DstAlignCanChange = true; |
5981 | 95 | unsigned SrcAlign = DAG.InferPtrAlignment(Src); |
5982 | 95 | if (Align > SrcAlign) |
5983 | 82 | SrcAlign = Align; |
5984 | 95 | unsigned Limit = AlwaysInline ? ~0U0 : TLI.getMaxStoresPerMemmove(OptSize); |
5985 | 95 | // FIXME: `AllowOverlap` should really be `!isVol` but there is a bug in |
5986 | 95 | // findOptimalMemOpLowering. Meanwhile, setting it to `false` produces the |
5987 | 95 | // correct code. |
5988 | 95 | bool AllowOverlap = false; |
5989 | 95 | if (!TLI.findOptimalMemOpLowering( |
5990 | 95 | MemOps, Limit, Size, (DstAlignCanChange ? 01 : Align94 ), SrcAlign, |
5991 | 95 | /*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false, |
5992 | 95 | AllowOverlap, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), |
5993 | 95 | MF.getFunction().getAttributes())) |
5994 | 59 | return SDValue(); |
5995 | 36 | |
5996 | 36 | if (DstAlignCanChange) { |
5997 | 1 | Type *Ty = MemOps[0].getTypeForEVT(C); |
5998 | 1 | unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty); |
5999 | 1 | if (NewAlign > Align) { |
6000 | 1 | // Give the stack frame object a larger alignment if needed. |
6001 | 1 | if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) |
6002 | 0 | MFI.setObjectAlignment(FI->getIndex(), NewAlign); |
6003 | 1 | Align = NewAlign; |
6004 | 1 | } |
6005 | 1 | } |
6006 | 36 | |
6007 | 36 | MachineMemOperand::Flags MMOFlags = |
6008 | 36 | isVol ? MachineMemOperand::MOVolatile1 : MachineMemOperand::MONone35 ; |
6009 | 36 | uint64_t SrcOff = 0, DstOff = 0; |
6010 | 36 | SmallVector<SDValue, 8> LoadValues; |
6011 | 36 | SmallVector<SDValue, 8> LoadChains; |
6012 | 36 | SmallVector<SDValue, 8> OutChains; |
6013 | 36 | unsigned NumMemOps = MemOps.size(); |
6014 | 83 | for (unsigned i = 0; i < NumMemOps; i++47 ) { |
6015 | 47 | EVT VT = MemOps[i]; |
6016 | 47 | unsigned VTSize = VT.getSizeInBits() / 8; |
6017 | 47 | SDValue Value; |
6018 | 47 | |
6019 | 47 | bool isDereferenceable = |
6020 | 47 | SrcPtrInfo.getWithOffset(SrcOff).isDereferenceable(VTSize, C, DL); |
6021 | 47 | MachineMemOperand::Flags SrcMMOFlags = MMOFlags; |
6022 | 47 | if (isDereferenceable) |
6023 | 26 | SrcMMOFlags |= MachineMemOperand::MODereferenceable; |
6024 | 47 | |
6025 | 47 | Value = |
6026 | 47 | DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl), |
6027 | 47 | SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, SrcMMOFlags); |
6028 | 47 | LoadValues.push_back(Value); |
6029 | 47 | LoadChains.push_back(Value.getValue(1)); |
6030 | 47 | SrcOff += VTSize; |
6031 | 47 | } |
6032 | 36 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); |
6033 | 36 | OutChains.clear(); |
6034 | 83 | for (unsigned i = 0; i < NumMemOps; i++47 ) { |
6035 | 47 | EVT VT = MemOps[i]; |
6036 | 47 | unsigned VTSize = VT.getSizeInBits() / 8; |
6037 | 47 | SDValue Store; |
6038 | 47 | |
6039 | 47 | Store = DAG.getStore(Chain, dl, LoadValues[i], |
6040 | 47 | DAG.getMemBasePlusOffset(Dst, DstOff, dl), |
6041 | 47 | DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags); |
6042 | 47 | OutChains.push_back(Store); |
6043 | 47 | DstOff += VTSize; |
6044 | 47 | } |
6045 | 36 | |
6046 | 36 | return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); |
6047 | 36 | } |
6048 | | |
6049 | | /// Lower the call to 'memset' intrinsic function into a series of store |
6050 | | /// operations. |
6051 | | /// |
6052 | | /// \param DAG Selection DAG where lowered code is placed. |
6053 | | /// \param dl Link to corresponding IR location. |
6054 | | /// \param Chain Control flow dependency. |
6055 | | /// \param Dst Pointer to destination memory location. |
6056 | | /// \param Src Value of byte to write into the memory. |
6057 | | /// \param Size Number of bytes to write. |
6058 | | /// \param Align Alignment of the destination in bytes. |
6059 | | /// \param isVol True if destination is volatile. |
6060 | | /// \param DstPtrInfo IR information on the memory pointer. |
6061 | | /// \returns New head in the control flow, if lowering was successful, empty |
6062 | | /// SDValue otherwise. |
6063 | | /// |
6064 | | /// The function tries to replace 'llvm.memset' intrinsic with several store |
6065 | | /// operations and value calculation code. This is usually profitable for small |
6066 | | /// memory size. |
6067 | | static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, |
6068 | | SDValue Chain, SDValue Dst, SDValue Src, |
6069 | | uint64_t Size, unsigned Align, bool isVol, |
6070 | 4.57k | MachinePointerInfo DstPtrInfo) { |
6071 | 4.57k | // Turn a memset of undef to nop. |
6072 | 4.57k | // FIXME: We need to honor volatile even is Src is undef. |
6073 | 4.57k | if (Src.isUndef()) |
6074 | 1 | return Chain; |
6075 | 4.57k | |
6076 | 4.57k | // Expand memset to a series of load/store ops if the size operand |
6077 | 4.57k | // falls below a certain threshold. |
6078 | 4.57k | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
6079 | 4.57k | std::vector<EVT> MemOps; |
6080 | 4.57k | bool DstAlignCanChange = false; |
6081 | 4.57k | MachineFunction &MF = DAG.getMachineFunction(); |
6082 | 4.57k | MachineFrameInfo &MFI = MF.getFrameInfo(); |
6083 | 4.57k | bool OptSize = shouldLowerMemFuncForSize(MF); |
6084 | 4.57k | FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); |
6085 | 4.57k | if (FI && !MFI.isFixedObjectIndex(FI->getIndex())1.26k ) |
6086 | 1.26k | DstAlignCanChange = true; |
6087 | 4.57k | bool IsZeroVal = |
6088 | 4.57k | isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue()4.51k ; |
6089 | 4.57k | if (!TLI.findOptimalMemOpLowering( |
6090 | 4.57k | MemOps, TLI.getMaxStoresPerMemset(OptSize), Size, |
6091 | 4.57k | (DstAlignCanChange ? 01.26k : Align3.30k ), 0, /*IsMemset=*/true, |
6092 | 4.57k | /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false, |
6093 | 4.57k | /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), ~0u, |
6094 | 4.57k | MF.getFunction().getAttributes())) |
6095 | 2.02k | return SDValue(); |
6096 | 2.54k | |
6097 | 2.54k | if (DstAlignCanChange) { |
6098 | 1.17k | Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); |
6099 | 1.17k | unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty); |
6100 | 1.17k | if (NewAlign > Align) { |
6101 | 374 | // Give the stack frame object a larger alignment if needed. |
6102 | 374 | if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) |
6103 | 349 | MFI.setObjectAlignment(FI->getIndex(), NewAlign); |
6104 | 374 | Align = NewAlign; |
6105 | 374 | } |
6106 | 1.17k | } |
6107 | 2.54k | |
6108 | 2.54k | SmallVector<SDValue, 8> OutChains; |
6109 | 2.54k | uint64_t DstOff = 0; |
6110 | 2.54k | unsigned NumMemOps = MemOps.size(); |
6111 | 2.54k | |
6112 | 2.54k | // Find the largest store and generate the bit pattern for it. |
6113 | 2.54k | EVT LargestVT = MemOps[0]; |
6114 | 10.6k | for (unsigned i = 1; i < NumMemOps; i++8.09k ) |
6115 | 8.09k | if (MemOps[i].bitsGT(LargestVT)) |
6116 | 0 | LargestVT = MemOps[i]; |
6117 | 2.54k | SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl); |
6118 | 2.54k | |
6119 | 13.1k | for (unsigned i = 0; i < NumMemOps; i++10.6k ) { |
6120 | 10.6k | EVT VT = MemOps[i]; |
6121 | 10.6k | unsigned VTSize = VT.getSizeInBits() / 8; |
6122 | 10.6k | if (VTSize > Size) { |
6123 | 106 | // Issuing an unaligned load / store pair that overlaps with the previous |
6124 | 106 | // pair. Adjust the offset accordingly. |
6125 | 106 | assert(i == NumMemOps-1 && i != 0); |
6126 | 106 | DstOff -= VTSize - Size; |
6127 | 106 | } |
6128 | 10.6k | |
6129 | 10.6k | // If this store is smaller than the largest store see whether we can get |
6130 | 10.6k | // the smaller value for free with a truncate. |
6131 | 10.6k | SDValue Value = MemSetValue; |
6132 | 10.6k | if (VT.bitsLT(LargestVT)) { |
6133 | 531 | if (!LargestVT.isVector() && !VT.isVector()82 && |
6134 | 531 | TLI.isTruncateFree(LargestVT, VT)82 ) |
6135 | 63 | Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue); |
6136 | 468 | else |
6137 | 468 | Value = getMemsetValue(Src, VT, DAG, dl); |
6138 | 531 | } |
6139 | 10.6k | assert(Value.getValueType() == VT && "Value with wrong type."); |
6140 | 10.6k | SDValue Store = DAG.getStore( |
6141 | 10.6k | Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), |
6142 | 10.6k | DstPtrInfo.getWithOffset(DstOff), Align, |
6143 | 10.6k | isVol ? MachineMemOperand::MOVolatile3 : MachineMemOperand::MONone10.6k ); |
6144 | 10.6k | OutChains.push_back(Store); |
6145 | 10.6k | DstOff += VT.getSizeInBits() / 8; |
6146 | 10.6k | Size -= VTSize; |
6147 | 10.6k | } |
6148 | 2.54k | |
6149 | 2.54k | return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); |
6150 | 2.54k | } |
6151 | | |
6152 | | static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI, |
6153 | 1.95k | unsigned AS) { |
6154 | 1.95k | // Lowering memcpy / memset / memmove intrinsics to calls is only valid if all |
6155 | 1.95k | // pointer operands can be losslessly bitcasted to pointers of address space 0 |
6156 | 1.95k | if (AS != 0 && !TLI->isNoopAddrSpaceCast(AS, 0)0 ) { |
6157 | 0 | report_fatal_error("cannot lower memory intrinsic in address space " + |
6158 | 0 | Twine(AS)); |
6159 | 0 | } |
6160 | 1.95k | } |
6161 | | |
6162 | | SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, |
6163 | | SDValue Src, SDValue Size, unsigned Align, |
6164 | | bool isVol, bool AlwaysInline, bool isTailCall, |
6165 | | MachinePointerInfo DstPtrInfo, |
6166 | 7.02k | MachinePointerInfo SrcPtrInfo) { |
6167 | 7.02k | assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); |
6168 | 7.02k | |
6169 | 7.02k | // Check to see if we should lower the memcpy to loads and stores first. |
6170 | 7.02k | // For cases within the target-specified limits, this is the best choice. |
6171 | 7.02k | ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); |
6172 | 7.02k | if (ConstantSize) { |
6173 | 6.55k | // Memcpy with size zero? Just return the original chain. |
6174 | 6.55k | if (ConstantSize->isNullValue()) |
6175 | 5 | return Chain; |
6176 | 6.54k | |
6177 | 6.54k | SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, |
6178 | 6.54k | ConstantSize->getZExtValue(),Align, |
6179 | 6.54k | isVol, false, DstPtrInfo, SrcPtrInfo); |
6180 | 6.54k | if (Result.getNode()) |
6181 | 5.98k | return Result; |
6182 | 1.03k | } |
6183 | 1.03k | |
6184 | 1.03k | // Then check to see if we should lower the memcpy with target-specific |
6185 | 1.03k | // code. If the target chooses to do this, this is the next best. |
6186 | 1.03k | if (TSI) { |
6187 | 1.03k | SDValue Result = TSI->EmitTargetCodeForMemcpy( |
6188 | 1.03k | *this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline, |
6189 | 1.03k | DstPtrInfo, SrcPtrInfo); |
6190 | 1.03k | if (Result.getNode()) |
6191 | 350 | return Result; |
6192 | 681 | } |
6193 | 681 | |
6194 | 681 | // If we really need inline code and the target declined to provide it, |
6195 | 681 | // use a (potentially long) sequence of loads and stores. |
6196 | 681 | if (AlwaysInline) { |
6197 | 3 | assert(ConstantSize && "AlwaysInline requires a constant size!"); |
6198 | 3 | return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, |
6199 | 3 | ConstantSize->getZExtValue(), Align, isVol, |
6200 | 3 | true, DstPtrInfo, SrcPtrInfo); |
6201 | 3 | } |
6202 | 678 | |
6203 | 678 | checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); |
6204 | 678 | checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace()); |
6205 | 678 | |
6206 | 678 | // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc |
6207 | 678 | // memcpy is not guaranteed to be safe. libc memcpys aren't required to |
6208 | 678 | // respect volatile, so they may do things like read or write memory |
6209 | 678 | // beyond the given memory regions. But fixing this isn't easy, and most |
6210 | 678 | // people don't care. |
6211 | 678 | |
6212 | 678 | // Emit a library call. |
6213 | 678 | TargetLowering::ArgListTy Args; |
6214 | 678 | TargetLowering::ArgListEntry Entry; |
6215 | 678 | Entry.Ty = Type::getInt8PtrTy(*getContext()); |
6216 | 678 | Entry.Node = Dst; Args.push_back(Entry); |
6217 | 678 | Entry.Node = Src; Args.push_back(Entry); |
6218 | 678 | |
6219 | 678 | Entry.Ty = getDataLayout().getIntPtrType(*getContext()); |
6220 | 678 | Entry.Node = Size; Args.push_back(Entry); |
6221 | 678 | // FIXME: pass in SDLoc |
6222 | 678 | TargetLowering::CallLoweringInfo CLI(*this); |
6223 | 678 | CLI.setDebugLoc(dl) |
6224 | 678 | .setChain(Chain) |
6225 | 678 | .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY), |
6226 | 678 | Dst.getValueType().getTypeForEVT(*getContext()), |
6227 | 678 | getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), |
6228 | 678 | TLI->getPointerTy(getDataLayout())), |
6229 | 678 | std::move(Args)) |
6230 | 678 | .setDiscardResult() |
6231 | 678 | .setTailCall(isTailCall); |
6232 | 678 | |
6233 | 678 | std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); |
6234 | 678 | return CallResult.second; |
6235 | 678 | } |
6236 | | |
6237 | | SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl, |
6238 | | SDValue Dst, unsigned DstAlign, |
6239 | | SDValue Src, unsigned SrcAlign, |
6240 | | SDValue Size, Type *SizeTy, |
6241 | | unsigned ElemSz, bool isTailCall, |
6242 | | MachinePointerInfo DstPtrInfo, |
6243 | 15 | MachinePointerInfo SrcPtrInfo) { |
6244 | 15 | // Emit a library call. |
6245 | 15 | TargetLowering::ArgListTy Args; |
6246 | 15 | TargetLowering::ArgListEntry Entry; |
6247 | 15 | Entry.Ty = getDataLayout().getIntPtrType(*getContext()); |
6248 | 15 | Entry.Node = Dst; |
6249 | 15 | Args.push_back(Entry); |
6250 | 15 | |
6251 | 15 | Entry.Node = Src; |
6252 | 15 | Args.push_back(Entry); |
6253 | 15 | |
6254 | 15 | Entry.Ty = SizeTy; |
6255 | 15 | Entry.Node = Size; |
6256 | 15 | Args.push_back(Entry); |
6257 | 15 | |
6258 | 15 | RTLIB::Libcall LibraryCall = |
6259 | 15 | RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElemSz); |
6260 | 15 | if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) |
6261 | 0 | report_fatal_error("Unsupported element size"); |
6262 | 15 | |
6263 | 15 | TargetLowering::CallLoweringInfo CLI(*this); |
6264 | 15 | CLI.setDebugLoc(dl) |
6265 | 15 | .setChain(Chain) |
6266 | 15 | .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), |
6267 | 15 | Type::getVoidTy(*getContext()), |
6268 | 15 | getExternalSymbol(TLI->getLibcallName(LibraryCall), |
6269 | 15 | TLI->getPointerTy(getDataLayout())), |
6270 | 15 | std::move(Args)) |
6271 | 15 | .setDiscardResult() |
6272 | 15 | .setTailCall(isTailCall); |
6273 | 15 | |
6274 | 15 | std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); |
6275 | 15 | return CallResult.second; |
6276 | 15 | } |
6277 | | |
6278 | | SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, |
6279 | | SDValue Src, SDValue Size, unsigned Align, |
6280 | | bool isVol, bool isTailCall, |
6281 | | MachinePointerInfo DstPtrInfo, |
6282 | 186 | MachinePointerInfo SrcPtrInfo) { |
6283 | 186 | assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); |
6284 | 186 | |
6285 | 186 | // Check to see if we should lower the memmove to loads and stores first. |
6286 | 186 | // For cases within the target-specified limits, this is the best choice. |
6287 | 186 | ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); |
6288 | 186 | if (ConstantSize) { |
6289 | 95 | // Memmove with size zero? Just return the original chain. |
6290 | 95 | if (ConstantSize->isNullValue()) |
6291 | 0 | return Chain; |
6292 | 95 | |
6293 | 95 | SDValue Result = |
6294 | 95 | getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src, |
6295 | 95 | ConstantSize->getZExtValue(), Align, isVol, |
6296 | 95 | false, DstPtrInfo, SrcPtrInfo); |
6297 | 95 | if (Result.getNode()) |
6298 | 36 | return Result; |
6299 | 150 | } |
6300 | 150 | |
6301 | 150 | // Then check to see if we should lower the memmove with target-specific |
6302 | 150 | // code. If the target chooses to do this, this is the next best. |
6303 | 150 | if (TSI) { |
6304 | 150 | SDValue Result = TSI->EmitTargetCodeForMemmove( |
6305 | 150 | *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo); |
6306 | 150 | if (Result.getNode()) |
6307 | 51 | return Result; |
6308 | 99 | } |
6309 | 99 | |
6310 | 99 | checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); |
6311 | 99 | checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace()); |
6312 | 99 | |
6313 | 99 | // FIXME: If the memmove is volatile, lowering it to plain libc memmove may |
6314 | 99 | // not be safe. See memcpy above for more details. |
6315 | 99 | |
6316 | 99 | // Emit a library call. |
6317 | 99 | TargetLowering::ArgListTy Args; |
6318 | 99 | TargetLowering::ArgListEntry Entry; |
6319 | 99 | Entry.Ty = Type::getInt8PtrTy(*getContext()); |
6320 | 99 | Entry.Node = Dst; Args.push_back(Entry); |
6321 | 99 | Entry.Node = Src; Args.push_back(Entry); |
6322 | 99 | |
6323 | 99 | Entry.Ty = getDataLayout().getIntPtrType(*getContext()); |
6324 | 99 | Entry.Node = Size; Args.push_back(Entry); |
6325 | 99 | // FIXME: pass in SDLoc |
6326 | 99 | TargetLowering::CallLoweringInfo CLI(*this); |
6327 | 99 | CLI.setDebugLoc(dl) |
6328 | 99 | .setChain(Chain) |
6329 | 99 | .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE), |
6330 | 99 | Dst.getValueType().getTypeForEVT(*getContext()), |
6331 | 99 | getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), |
6332 | 99 | TLI->getPointerTy(getDataLayout())), |
6333 | 99 | std::move(Args)) |
6334 | 99 | .setDiscardResult() |
6335 | 99 | .setTailCall(isTailCall); |
6336 | 99 | |
6337 | 99 | std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); |
6338 | 99 | return CallResult.second; |
6339 | 99 | } |
6340 | | |
6341 | | SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl, |
6342 | | SDValue Dst, unsigned DstAlign, |
6343 | | SDValue Src, unsigned SrcAlign, |
6344 | | SDValue Size, Type *SizeTy, |
6345 | | unsigned ElemSz, bool isTailCall, |
6346 | | MachinePointerInfo DstPtrInfo, |
6347 | 13 | MachinePointerInfo SrcPtrInfo) { |
6348 | 13 | // Emit a library call. |
6349 | 13 | TargetLowering::ArgListTy Args; |
6350 | 13 | TargetLowering::ArgListEntry Entry; |
6351 | 13 | Entry.Ty = getDataLayout().getIntPtrType(*getContext()); |
6352 | 13 | Entry.Node = Dst; |
6353 | 13 | Args.push_back(Entry); |
6354 | 13 | |
6355 | 13 | Entry.Node = Src; |
6356 | 13 | Args.push_back(Entry); |
6357 | 13 | |
6358 | 13 | Entry.Ty = SizeTy; |
6359 | 13 | Entry.Node = Size; |
6360 | 13 | Args.push_back(Entry); |
6361 | 13 | |
6362 | 13 | RTLIB::Libcall LibraryCall = |
6363 | 13 | RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElemSz); |
6364 | 13 | if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) |
6365 | 0 | report_fatal_error("Unsupported element size"); |
6366 | 13 | |
6367 | 13 | TargetLowering::CallLoweringInfo CLI(*this); |
6368 | 13 | CLI.setDebugLoc(dl) |
6369 | 13 | .setChain(Chain) |
6370 | 13 | .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), |
6371 | 13 | Type::getVoidTy(*getContext()), |
6372 | 13 | getExternalSymbol(TLI->getLibcallName(LibraryCall), |
6373 | 13 | TLI->getPointerTy(getDataLayout())), |
6374 | 13 | std::move(Args)) |
6375 | 13 | .setDiscardResult() |
6376 | 13 | .setTailCall(isTailCall); |
6377 | 13 | |
6378 | 13 | std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); |
6379 | 13 | return CallResult.second; |
6380 | 13 | } |
6381 | | |
6382 | | SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, |
6383 | | SDValue Src, SDValue Size, unsigned Align, |
6384 | | bool isVol, bool isTailCall, |
6385 | 7.94k | MachinePointerInfo DstPtrInfo) { |
6386 | 7.94k | assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); |
6387 | 7.94k | |
6388 | 7.94k | // Check to see if we should lower the memset to stores first. |
6389 | 7.94k | // For cases within the target-specified limits, this is the best choice. |
6390 | 7.94k | ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); |
6391 | 7.94k | if (ConstantSize) { |
6392 | 4.58k | // Memset with size zero? Just return the original chain. |
6393 | 4.58k | if (ConstantSize->isNullValue()) |
6394 | 15 | return Chain; |
6395 | 4.57k | |
6396 | 4.57k | SDValue Result = |
6397 | 4.57k | getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), |
6398 | 4.57k | Align, isVol, DstPtrInfo); |
6399 | 4.57k | |
6400 | 4.57k | if (Result.getNode()) |
6401 | 2.55k | return Result; |
6402 | 5.38k | } |
6403 | 5.38k | |
6404 | 5.38k | // Then check to see if we should lower the memset with target-specific |
6405 | 5.38k | // code. If the target chooses to do this, this is the next best. |
6406 | 5.38k | if (TSI) { |
6407 | 5.38k | SDValue Result = TSI->EmitTargetCodeForMemset( |
6408 | 5.38k | *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo); |
6409 | 5.38k | if (Result.getNode()) |
6410 | 4.98k | return Result; |
6411 | 403 | } |
6412 | 403 | |
6413 | 403 | checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); |
6414 | 403 | |
6415 | 403 | // Emit a library call. |
6416 | 403 | TargetLowering::ArgListTy Args; |
6417 | 403 | TargetLowering::ArgListEntry Entry; |
6418 | 403 | Entry.Node = Dst; Entry.Ty = Type::getInt8PtrTy(*getContext()); |
6419 | 403 | Args.push_back(Entry); |
6420 | 403 | Entry.Node = Src; |
6421 | 403 | Entry.Ty = Src.getValueType().getTypeForEVT(*getContext()); |
6422 | 403 | Args.push_back(Entry); |
6423 | 403 | Entry.Node = Size; |
6424 | 403 | Entry.Ty = getDataLayout().getIntPtrType(*getContext()); |
6425 | 403 | Args.push_back(Entry); |
6426 | 403 | |
6427 | 403 | // FIXME: pass in SDLoc |
6428 | 403 | TargetLowering::CallLoweringInfo CLI(*this); |
6429 | 403 | CLI.setDebugLoc(dl) |
6430 | 403 | .setChain(Chain) |
6431 | 403 | .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), |
6432 | 403 | Dst.getValueType().getTypeForEVT(*getContext()), |
6433 | 403 | getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), |
6434 | 403 | TLI->getPointerTy(getDataLayout())), |
6435 | 403 | std::move(Args)) |
6436 | 403 | .setDiscardResult() |
6437 | 403 | .setTailCall(isTailCall); |
6438 | 403 | |
6439 | 403 | std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); |
6440 | 403 | return CallResult.second; |
6441 | 403 | } |
6442 | | |
6443 | | SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl, |
6444 | | SDValue Dst, unsigned DstAlign, |
6445 | | SDValue Value, SDValue Size, Type *SizeTy, |
6446 | | unsigned ElemSz, bool isTailCall, |
6447 | 23 | MachinePointerInfo DstPtrInfo) { |
6448 | 23 | // Emit a library call. |
6449 | 23 | TargetLowering::ArgListTy Args; |
6450 | 23 | TargetLowering::ArgListEntry Entry; |
6451 | 23 | Entry.Ty = getDataLayout().getIntPtrType(*getContext()); |
6452 | 23 | Entry.Node = Dst; |
6453 | 23 | Args.push_back(Entry); |
6454 | 23 | |
6455 | 23 | Entry.Ty = Type::getInt8Ty(*getContext()); |
6456 | 23 | Entry.Node = Value; |
6457 | 23 | Args.push_back(Entry); |
6458 | 23 | |
6459 | 23 | Entry.Ty = SizeTy; |
6460 | 23 | Entry.Node = Size; |
6461 | 23 | Args.push_back(Entry); |
6462 | 23 | |
6463 | 23 | RTLIB::Libcall LibraryCall = |
6464 | 23 | RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElemSz); |
6465 | 23 | if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) |
6466 | 0 | report_fatal_error("Unsupported element size"); |
6467 | 23 | |
6468 | 23 | TargetLowering::CallLoweringInfo CLI(*this); |
6469 | 23 | CLI.setDebugLoc(dl) |
6470 | 23 | .setChain(Chain) |
6471 | 23 | .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), |
6472 | 23 | Type::getVoidTy(*getContext()), |
6473 | 23 | getExternalSymbol(TLI->getLibcallName(LibraryCall), |
6474 | 23 | TLI->getPointerTy(getDataLayout())), |
6475 | 23 | std::move(Args)) |
6476 | 23 | .setDiscardResult() |
6477 | 23 | .setTailCall(isTailCall); |
6478 | 23 | |
6479 | 23 | std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); |
6480 | 23 | return CallResult.second; |
6481 | 23 | } |
6482 | | |
6483 | | SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, |
6484 | | SDVTList VTList, ArrayRef<SDValue> Ops, |
6485 | 24.1k | MachineMemOperand *MMO) { |
6486 | 24.1k | FoldingSetNodeID ID; |
6487 | 24.1k | ID.AddInteger(MemVT.getRawBits()); |
6488 | 24.1k | AddNodeIDNode(ID, Opcode, VTList, Ops); |
6489 | 24.1k | ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); |
6490 | 24.1k | void* IP = nullptr; |
6491 | 24.1k | if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { |
6492 | 0 | cast<AtomicSDNode>(E)->refineAlignment(MMO); |
6493 | 0 | return SDValue(E, 0); |
6494 | 0 | } |
6495 | 24.1k | |
6496 | 24.1k | auto *N = newSDNode<AtomicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), |
6497 | 24.1k | VTList, MemVT, MMO); |
6498 | 24.1k | createOperands(N, Ops); |
6499 | 24.1k | |
6500 | 24.1k | CSEMap.InsertNode(N, IP); |
6501 | 24.1k | InsertNode(N); |
6502 | 24.1k | return SDValue(N, 0); |
6503 | 24.1k | } |
6504 | | |
6505 | | SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl, |
6506 | | EVT MemVT, SDVTList VTs, SDValue Chain, |
6507 | | SDValue Ptr, SDValue Cmp, SDValue Swp, |
6508 | 5.52k | MachineMemOperand *MMO) { |
6509 | 5.52k | assert(Opcode == ISD::ATOMIC_CMP_SWAP || |
6510 | 5.52k | Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); |
6511 | 5.52k | assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); |
6512 | 5.52k | |
6513 | 5.52k | SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; |
6514 | 5.52k | return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO); |
6515 | 5.52k | } |
6516 | | |
6517 | | SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, |
6518 | | SDValue Chain, SDValue Ptr, SDValue Val, |
6519 | 16.3k | MachineMemOperand *MMO) { |
6520 | 16.3k | assert((Opcode == ISD::ATOMIC_LOAD_ADD || |
6521 | 16.3k | Opcode == ISD::ATOMIC_LOAD_SUB || |
6522 | 16.3k | Opcode == ISD::ATOMIC_LOAD_AND || |
6523 | 16.3k | Opcode == ISD::ATOMIC_LOAD_CLR || |
6524 | 16.3k | Opcode == ISD::ATOMIC_LOAD_OR || |
6525 | 16.3k | Opcode == ISD::ATOMIC_LOAD_XOR || |
6526 | 16.3k | Opcode == ISD::ATOMIC_LOAD_NAND || |
6527 | 16.3k | Opcode == ISD::ATOMIC_LOAD_MIN || |
6528 | 16.3k | Opcode == ISD::ATOMIC_LOAD_MAX || |
6529 | 16.3k | Opcode == ISD::ATOMIC_LOAD_UMIN || |
6530 | 16.3k | Opcode == ISD::ATOMIC_LOAD_UMAX || |
6531 | 16.3k | Opcode == ISD::ATOMIC_LOAD_FADD || |
6532 | 16.3k | Opcode == ISD::ATOMIC_LOAD_FSUB || |
6533 | 16.3k | Opcode == ISD::ATOMIC_SWAP || |
6534 | 16.3k | Opcode == ISD::ATOMIC_STORE) && |
6535 | 16.3k | "Invalid Atomic Op"); |
6536 | 16.3k | |
6537 | 16.3k | EVT VT = Val.getValueType(); |
6538 | 16.3k | |
6539 | 16.3k | SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other)7.07k : |
6540 | 16.3k | getVTList(VT, MVT::Other)9.31k ; |
6541 | 16.3k | SDValue Ops[] = {Chain, Ptr, Val}; |
6542 | 16.3k | return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO); |
6543 | 16.3k | } |
6544 | | |
6545 | | SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, |
6546 | | EVT VT, SDValue Chain, SDValue Ptr, |
6547 | 2.18k | MachineMemOperand *MMO) { |
6548 | 2.18k | assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op"); |
6549 | 2.18k | |
6550 | 2.18k | SDVTList VTs = getVTList(VT, MVT::Other); |
6551 | 2.18k | SDValue Ops[] = {Chain, Ptr}; |
6552 | 2.18k | return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO); |
6553 | 2.18k | } |
6554 | | |
6555 | | /// getMergeValues - Create a MERGE_VALUES node from the given operands. |
6556 | 526k | SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) { |
6557 | 526k | if (Ops.size() == 1) |
6558 | 450k | return Ops[0]; |
6559 | 76.9k | |
6560 | 76.9k | SmallVector<EVT, 4> VTs; |
6561 | 76.9k | VTs.reserve(Ops.size()); |
6562 | 233k | for (unsigned i = 0; i < Ops.size(); ++i156k ) |
6563 | 156k | VTs.push_back(Ops[i].getValueType()); |
6564 | 76.9k | return getNode(ISD::MERGE_VALUES, dl, getVTList(VTs), Ops); |
6565 | 76.9k | } |
6566 | | |
6567 | | SDValue SelectionDAG::getMemIntrinsicNode( |
6568 | | unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops, |
6569 | | EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, |
6570 | 75.1k | MachineMemOperand::Flags Flags, unsigned Size, const AAMDNodes &AAInfo) { |
6571 | 75.1k | if (Align == 0) // Ensure that codegen never sees alignment 0 |
6572 | 20.2k | Align = getEVTAlignment(MemVT); |
6573 | 75.1k | |
6574 | 75.1k | if (!Size) |
6575 | 74.9k | Size = MemVT.getStoreSize(); |
6576 | 75.1k | |
6577 | 75.1k | MachineFunction &MF = getMachineFunction(); |
6578 | 75.1k | MachineMemOperand *MMO = |
6579 | 75.1k | MF.getMachineMemOperand(PtrInfo, Flags, Size, Align, AAInfo); |
6580 | 75.1k | |
6581 | 75.1k | return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO); |
6582 | 75.1k | } |
6583 | | |
6584 | | SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, |
6585 | | SDVTList VTList, |
6586 | | ArrayRef<SDValue> Ops, EVT MemVT, |
6587 | 88.0k | MachineMemOperand *MMO) { |
6588 | 88.0k | assert((Opcode == ISD::INTRINSIC_VOID || |
6589 | 88.0k | Opcode == ISD::INTRINSIC_W_CHAIN || |
6590 | 88.0k | Opcode == ISD::PREFETCH || |
6591 | 88.0k | Opcode == ISD::LIFETIME_START || |
6592 | 88.0k | Opcode == ISD::LIFETIME_END || |
6593 | 88.0k | ((int)Opcode <= std::numeric_limits<int>::max() && |
6594 | 88.0k | (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) && |
6595 | 88.0k | "Opcode is not a memory-accessing opcode!"); |
6596 | 88.0k | |
6597 | 88.0k | // Memoize the node unless it returns a flag. |
6598 | 88.0k | MemIntrinsicSDNode *N; |
6599 | 88.0k | if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { |
6600 | 86.0k | FoldingSetNodeID ID; |
6601 | 86.0k | AddNodeIDNode(ID, Opcode, VTList, Ops); |
6602 | 86.0k | ID.AddInteger(getSyntheticNodeSubclassData<MemIntrinsicSDNode>( |
6603 | 86.0k | Opcode, dl.getIROrder(), VTList, MemVT, MMO)); |
6604 | 86.0k | ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); |
6605 | 86.0k | void *IP = nullptr; |
6606 | 86.0k | if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { |
6607 | 0 | cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO); |
6608 | 0 | return SDValue(E, 0); |
6609 | 0 | } |
6610 | 86.0k | |
6611 | 86.0k | N = newSDNode<MemIntrinsicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), |
6612 | 86.0k | VTList, MemVT, MMO); |
6613 | 86.0k | createOperands(N, Ops); |
6614 | 86.0k | |
6615 | 86.0k | CSEMap.InsertNode(N, IP); |
6616 | 86.0k | } else { |
6617 | 2.04k | N = newSDNode<MemIntrinsicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), |
6618 | 2.04k | VTList, MemVT, MMO); |
6619 | 2.04k | createOperands(N, Ops); |
6620 | 2.04k | } |
6621 | 88.0k | InsertNode(N); |
6622 | 88.0k | return SDValue(N, 0); |
6623 | 88.0k | } |
6624 | | |
6625 | | SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl, |
6626 | | SDValue Chain, int FrameIndex, |
6627 | 68.1k | int64_t Size, int64_t Offset) { |
6628 | 68.1k | const unsigned Opcode = IsStart ? ISD::LIFETIME_START33.5k : ISD::LIFETIME_END34.5k ; |
6629 | 68.1k | const auto VTs = getVTList(MVT::Other); |
6630 | 68.1k | SDValue Ops[2] = { |
6631 | 68.1k | Chain, |
6632 | 68.1k | getFrameIndex(FrameIndex, |
6633 | 68.1k | getTargetLoweringInfo().getFrameIndexTy(getDataLayout()), |
6634 | 68.1k | true)}; |
6635 | 68.1k | |
6636 | 68.1k | FoldingSetNodeID ID; |
6637 | 68.1k | AddNodeIDNode(ID, Opcode, VTs, Ops); |
6638 | 68.1k | ID.AddInteger(FrameIndex); |
6639 | 68.1k | ID.AddInteger(Size); |
6640 | 68.1k | ID.AddInteger(Offset); |
6641 | 68.1k | void *IP = nullptr; |
6642 | 68.1k | if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) |
6643 | 0 | return SDValue(E, 0); |
6644 | 68.1k | |
6645 | 68.1k | LifetimeSDNode *N = newSDNode<LifetimeSDNode>( |
6646 | 68.1k | Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs, Size, Offset); |
6647 | 68.1k | createOperands(N, Ops); |
6648 | 68.1k | CSEMap.InsertNode(N, IP); |
6649 | 68.1k | InsertNode(N); |
6650 | 68.1k | SDValue V(N, 0); |
6651 | 68.1k | NewSDValueDbgMsg(V, "Creating new node: ", this); |
6652 | 68.1k | return V; |
6653 | 68.1k | } |
6654 | | |
6655 | | /// InferPointerInfo - If the specified ptr/offset is a frame index, infer a |
6656 | | /// MachinePointerInfo record from it. This is particularly useful because the |
6657 | | /// code generator has many cases where it doesn't bother passing in a |
6658 | | /// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". |
6659 | | static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info, |
6660 | | SelectionDAG &DAG, SDValue Ptr, |
6661 | 53.0k | int64_t Offset = 0) { |
6662 | 53.0k | // If this is FI+Offset, we can model it. |
6663 | 53.0k | if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) |
6664 | 16.5k | return MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), |
6665 | 16.5k | FI->getIndex(), Offset); |
6666 | 36.5k | |
6667 | 36.5k | // If this is (FI+Offset1)+Offset2, we can model it. |
6668 | 36.5k | if (Ptr.getOpcode() != ISD::ADD || |
6669 | 36.5k | !isa<ConstantSDNode>(Ptr.getOperand(1))27.0k || |
6670 | 36.5k | !isa<FrameIndexSDNode>(Ptr.getOperand(0))25.8k ) |
6671 | 33.7k | return Info; |
6672 | 2.78k | |
6673 | 2.78k | int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); |
6674 | 2.78k | return MachinePointerInfo::getFixedStack( |
6675 | 2.78k | DAG.getMachineFunction(), FI, |
6676 | 2.78k | Offset + cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue()); |
6677 | 2.78k | } |
6678 | | |
6679 | | /// InferPointerInfo - If the specified ptr/offset is a frame index, infer a |
6680 | | /// MachinePointerInfo record from it. This is particularly useful because the |
6681 | | /// code generator has many cases where it doesn't bother passing in a |
6682 | | /// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". |
6683 | | static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info, |
6684 | | SelectionDAG &DAG, SDValue Ptr, |
6685 | 17.1k | SDValue OffsetOp) { |
6686 | 17.1k | // If the 'Offset' value isn't a constant, we can't handle this. |
6687 | 17.1k | if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp)) |
6688 | 3 | return InferPointerInfo(Info, DAG, Ptr, OffsetNode->getSExtValue()); |
6689 | 17.1k | if (OffsetOp.isUndef()) |
6690 | 17.1k | return InferPointerInfo(Info, DAG, Ptr); |
6691 | 0 | return Info; |
6692 | 0 | } |
6693 | | |
6694 | | SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, |
6695 | | EVT VT, const SDLoc &dl, SDValue Chain, |
6696 | | SDValue Ptr, SDValue Offset, |
6697 | | MachinePointerInfo PtrInfo, EVT MemVT, |
6698 | | unsigned Alignment, |
6699 | | MachineMemOperand::Flags MMOFlags, |
6700 | 1.03M | const AAMDNodes &AAInfo, const MDNode *Ranges) { |
6701 | 1.03M | assert(Chain.getValueType() == MVT::Other && |
6702 | 1.03M | "Invalid chain type"); |
6703 | 1.03M | if (Alignment == 0) // Ensure that codegen never sees alignment 0 |
6704 | 176k | Alignment = getEVTAlignment(MemVT); |
6705 | 1.03M | |
6706 | 1.03M | MMOFlags |= MachineMemOperand::MOLoad; |
6707 | 1.03M | assert((MMOFlags & MachineMemOperand::MOStore) == 0); |
6708 | 1.03M | // If we don't have a PtrInfo, infer the trivial frame index case to simplify |
6709 | 1.03M | // clients. |
6710 | 1.03M | if (PtrInfo.V.isNull()) |
6711 | 17.1k | PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); |
6712 | 1.03M | |
6713 | 1.03M | MachineFunction &MF = getMachineFunction(); |
6714 | 1.03M | MachineMemOperand *MMO = MF.getMachineMemOperand( |
6715 | 1.03M | PtrInfo, MMOFlags, MemVT.getStoreSize(), Alignment, AAInfo, Ranges); |
6716 | 1.03M | return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); |
6717 | 1.03M | } |
6718 | | |
6719 | | SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, |
6720 | | EVT VT, const SDLoc &dl, SDValue Chain, |
6721 | | SDValue Ptr, SDValue Offset, EVT MemVT, |
6722 | 1.43M | MachineMemOperand *MMO) { |
6723 | 1.43M | if (VT == MemVT) { |
6724 | 1.13M | ExtType = ISD::NON_EXTLOAD; |
6725 | 1.13M | } else if (294k ExtType == ISD::NON_EXTLOAD294k ) { |
6726 | 0 | assert(VT == MemVT && "Non-extending load from different memory type!"); |
6727 | 294k | } else { |
6728 | 294k | // Extending load. |
6729 | 294k | assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) && |
6730 | 294k | "Should only be an extending load, not truncating!"); |
6731 | 294k | assert(VT.isInteger() == MemVT.isInteger() && |
6732 | 294k | "Cannot convert from FP to Int or Int -> FP!"); |
6733 | 294k | assert(VT.isVector() == MemVT.isVector() && |
6734 | 294k | "Cannot use an ext load to convert to or from a vector!"); |
6735 | 294k | assert((!VT.isVector() || |
6736 | 294k | VT.getVectorNumElements() == MemVT.getVectorNumElements()) && |
6737 | 294k | "Cannot use an ext load to change the number of vector elements!"); |
6738 | 294k | } |
6739 | 1.43M | |
6740 | 1.43M | bool Indexed = AM != ISD::UNINDEXED; |
6741 | 1.43M | assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!"); |
6742 | 1.43M | |
6743 | 1.43M | SDVTList VTs = Indexed ? |
6744 | 1.41M | getVTList(VT, Ptr.getValueType(), MVT::Other)12.3k : getVTList(VT, MVT::Other); |
6745 | 1.43M | SDValue Ops[] = { Chain, Ptr, Offset }; |
6746 | 1.43M | FoldingSetNodeID ID; |
6747 | 1.43M | AddNodeIDNode(ID, ISD::LOAD, VTs, Ops); |
6748 | 1.43M | ID.AddInteger(MemVT.getRawBits()); |
6749 | 1.43M | ID.AddInteger(getSyntheticNodeSubclassData<LoadSDNode>( |
6750 | 1.43M | dl.getIROrder(), VTs, AM, ExtType, MemVT, MMO)); |
6751 | 1.43M | ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); |
6752 | 1.43M | void *IP = nullptr; |
6753 | 1.43M | if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { |
6754 | 35.3k | cast<LoadSDNode>(E)->refineAlignment(MMO); |
6755 | 35.3k | return SDValue(E, 0); |
6756 | 35.3k | } |
6757 | 1.39M | auto *N = newSDNode<LoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, |
6758 | 1.39M | ExtType, MemVT, MMO); |
6759 | 1.39M | createOperands(N, Ops); |
6760 | 1.39M | |
6761 | 1.39M | CSEMap.InsertNode(N, IP); |
6762 | 1.39M | InsertNode(N); |
6763 | 1.39M | SDValue V(N, 0); |
6764 | 1.39M | NewSDValueDbgMsg(V, "Creating new node: ", this); |
6765 | 1.39M | return V; |
6766 | 1.39M | } |
6767 | | |
6768 | | SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain, |
6769 | | SDValue Ptr, MachinePointerInfo PtrInfo, |
6770 | | unsigned Alignment, |
6771 | | MachineMemOperand::Flags MMOFlags, |
6772 | 906k | const AAMDNodes &AAInfo, const MDNode *Ranges) { |
6773 | 906k | SDValue Undef = getUNDEF(Ptr.getValueType()); |
6774 | 906k | return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, |
6775 | 906k | PtrInfo, VT, Alignment, MMOFlags, AAInfo, Ranges); |
6776 | 906k | } |
6777 | | |
6778 | | SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain, |
6779 | 129k | SDValue Ptr, MachineMemOperand *MMO) { |
6780 | 129k | SDValue Undef = getUNDEF(Ptr.getValueType()); |
6781 | 129k | return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, |
6782 | 129k | VT, MMO); |
6783 | 129k | } |
6784 | | |
6785 | | SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, |
6786 | | EVT VT, SDValue Chain, SDValue Ptr, |
6787 | | MachinePointerInfo PtrInfo, EVT MemVT, |
6788 | | unsigned Alignment, |
6789 | | MachineMemOperand::Flags MMOFlags, |
6790 | 87.3k | const AAMDNodes &AAInfo) { |
6791 | 87.3k | SDValue Undef = getUNDEF(Ptr.getValueType()); |
6792 | 87.3k | return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, PtrInfo, |
6793 | 87.3k | MemVT, Alignment, MMOFlags, AAInfo); |
6794 | 87.3k | } |
6795 | | |
6796 | | SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, |
6797 | | EVT VT, SDValue Chain, SDValue Ptr, EVT MemVT, |
6798 | 200k | MachineMemOperand *MMO) { |
6799 | 200k | SDValue Undef = getUNDEF(Ptr.getValueType()); |
6800 | 200k | return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, |
6801 | 200k | MemVT, MMO); |
6802 | 200k | } |
6803 | | |
6804 | | SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, |
6805 | | SDValue Base, SDValue Offset, |
6806 | 12.3k | ISD::MemIndexedMode AM) { |
6807 | 12.3k | LoadSDNode *LD = cast<LoadSDNode>(OrigLoad); |
6808 | 12.3k | assert(LD->getOffset().isUndef() && "Load is already a indexed load!"); |
6809 | 12.3k | // Don't propagate the invariant or dereferenceable flags. |
6810 | 12.3k | auto MMOFlags = |
6811 | 12.3k | LD->getMemOperand()->getFlags() & |
6812 | 12.3k | ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); |
6813 | 12.3k | return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, |
6814 | 12.3k | LD->getChain(), Base, Offset, LD->getPointerInfo(), |
6815 | 12.3k | LD->getMemoryVT(), LD->getAlignment(), MMOFlags, |
6816 | 12.3k | LD->getAAInfo()); |
6817 | 12.3k | } |
6818 | | |
6819 | | SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, |
6820 | | SDValue Ptr, MachinePointerInfo PtrInfo, |
6821 | | unsigned Alignment, |
6822 | | MachineMemOperand::Flags MMOFlags, |
6823 | 921k | const AAMDNodes &AAInfo) { |
6824 | 921k | assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); |
6825 | 921k | if (Alignment == 0) // Ensure that codegen never sees alignment 0 |
6826 | 162k | Alignment = getEVTAlignment(Val.getValueType()); |
6827 | 921k | |
6828 | 921k | MMOFlags |= MachineMemOperand::MOStore; |
6829 | 921k | assert((MMOFlags & MachineMemOperand::MOLoad) == 0); |
6830 | 921k | |
6831 | 921k | if (PtrInfo.V.isNull()) |
6832 | 32.6k | PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); |
6833 | 921k | |
6834 | 921k | MachineFunction &MF = getMachineFunction(); |
6835 | 921k | MachineMemOperand *MMO = MF.getMachineMemOperand( |
6836 | 921k | PtrInfo, MMOFlags, Val.getValueType().getStoreSize(), Alignment, AAInfo); |
6837 | 921k | return getStore(Chain, dl, Val, Ptr, MMO); |
6838 | 921k | } |
6839 | | |
6840 | | SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, |
6841 | 1.15M | SDValue Ptr, MachineMemOperand *MMO) { |
6842 | 1.15M | assert(Chain.getValueType() == MVT::Other && |
6843 | 1.15M | "Invalid chain type"); |
6844 | 1.15M | EVT VT = Val.getValueType(); |
6845 | 1.15M | SDVTList VTs = getVTList(MVT::Other); |
6846 | 1.15M | SDValue Undef = getUNDEF(Ptr.getValueType()); |
6847 | 1.15M | SDValue Ops[] = { Chain, Val, Ptr, Undef }; |
6848 | 1.15M | FoldingSetNodeID ID; |
6849 | 1.15M | AddNodeIDNode(ID, ISD::STORE, VTs, Ops); |
6850 | 1.15M | ID.AddInteger(VT.getRawBits()); |
6851 | 1.15M | ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>( |
6852 | 1.15M | dl.getIROrder(), VTs, ISD::UNINDEXED, false, VT, MMO)); |
6853 | 1.15M | ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); |
6854 | 1.15M | void *IP = nullptr; |
6855 | 1.15M | if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { |
6856 | 12.6k | cast<StoreSDNode>(E)->refineAlignment(MMO); |
6857 | 12.6k | return SDValue(E, 0); |
6858 | 12.6k | } |
6859 | 1.14M | auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, |
6860 | 1.14M | ISD::UNINDEXED, false, VT, MMO); |
6861 | 1.14M | createOperands(N, Ops); |
6862 | 1.14M | |
6863 | 1.14M | CSEMap.InsertNode(N, IP); |
6864 | 1.14M | InsertNode(N); |
6865 | 1.14M | SDValue V(N, 0); |
6866 | 1.14M | NewSDValueDbgMsg(V, "Creating new node: ", this); |
6867 | 1.14M | return V; |
6868 | 1.14M | } |
6869 | | |
6870 | | SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, |
6871 | | SDValue Ptr, MachinePointerInfo PtrInfo, |
6872 | | EVT SVT, unsigned Alignment, |
6873 | | MachineMemOperand::Flags MMOFlags, |
6874 | 73.4k | const AAMDNodes &AAInfo) { |
6875 | 73.4k | assert(Chain.getValueType() == MVT::Other && |
6876 | 73.4k | "Invalid chain type"); |
6877 | 73.4k | if (Alignment == 0) // Ensure that codegen never sees alignment 0 |
6878 | 1.18k | Alignment = getEVTAlignment(SVT); |
6879 | 73.4k | |
6880 | 73.4k | MMOFlags |= MachineMemOperand::MOStore; |
6881 | 73.4k | assert((MMOFlags & MachineMemOperand::MOLoad) == 0); |
6882 | 73.4k | |
6883 | 73.4k | if (PtrInfo.V.isNull()) |
6884 | 3.25k | PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); |
6885 | 73.4k | |
6886 | 73.4k | MachineFunction &MF = getMachineFunction(); |
6887 | 73.4k | MachineMemOperand *MMO = MF.getMachineMemOperand( |
6888 | 73.4k | PtrInfo, MMOFlags, SVT.getStoreSize(), Alignment, AAInfo); |
6889 | 73.4k | return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); |
6890 | 73.4k | } |
6891 | | |
6892 | | SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, |
6893 | | SDValue Ptr, EVT SVT, |
6894 | 182k | MachineMemOperand *MMO) { |
6895 | 182k | EVT VT = Val.getValueType(); |
6896 | 182k | |
6897 | 182k | assert(Chain.getValueType() == MVT::Other && |
6898 | 182k | "Invalid chain type"); |
6899 | 182k | if (VT == SVT) |
6900 | 58.5k | return getStore(Chain, dl, Val, Ptr, MMO); |
6901 | 123k | |
6902 | 123k | assert(SVT.getScalarType().bitsLT(VT.getScalarType()) && |
6903 | 123k | "Should only be a truncating store, not extending!"); |
6904 | 123k | assert(VT.isInteger() == SVT.isInteger() && |
6905 | 123k | "Can't do FP-INT conversion!"); |
6906 | 123k | assert(VT.isVector() == SVT.isVector() && |
6907 | 123k | "Cannot use trunc store to convert to or from a vector!"); |
6908 | 123k | assert((!VT.isVector() || |
6909 | 123k | VT.getVectorNumElements() == SVT.getVectorNumElements()) && |
6910 | 123k | "Cannot use trunc store to change the number of vector elements!"); |
6911 | 123k | |
6912 | 123k | SDVTList VTs = getVTList(MVT::Other); |
6913 | 123k | SDValue Undef = getUNDEF(Ptr.getValueType()); |
6914 | 123k | SDValue Ops[] = { Chain, Val, Ptr, Undef }; |
6915 | 123k | FoldingSetNodeID ID; |
6916 | 123k | AddNodeIDNode(ID, ISD::STORE, VTs, Ops); |
6917 | 123k | ID.AddInteger(SVT.getRawBits()); |
6918 | 123k | ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>( |
6919 | 123k | dl.getIROrder(), VTs, ISD::UNINDEXED, true, SVT, MMO)); |
6920 | 123k | ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); |
6921 | 123k | void *IP = nullptr; |
6922 | 123k | if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { |
6923 | 636 | cast<StoreSDNode>(E)->refineAlignment(MMO); |
6924 | 636 | return SDValue(E, 0); |
6925 | 636 | } |
6926 | 123k | auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, |
6927 | 123k | ISD::UNINDEXED, true, SVT, MMO); |
6928 | 123k | createOperands(N, Ops); |
6929 | 123k | |
6930 | 123k | CSEMap.InsertNode(N, IP); |
6931 | 123k | InsertNode(N); |
6932 | 123k | SDValue V(N, 0); |
6933 | 123k | NewSDValueDbgMsg(V, "Creating new node: ", this); |
6934 | 123k | return V; |
6935 | 123k | } |
6936 | | |
6937 | | SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl, |
6938 | | SDValue Base, SDValue Offset, |
6939 | 5.78k | ISD::MemIndexedMode AM) { |
6940 | 5.78k | StoreSDNode *ST = cast<StoreSDNode>(OrigStore); |
6941 | 5.78k | assert(ST->getOffset().isUndef() && "Store is already a indexed store!"); |
6942 | 5.78k | SDVTList VTs = getVTList(Base.getValueType(), MVT::Other); |
6943 | 5.78k | SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset }; |
6944 | 5.78k | FoldingSetNodeID ID; |
6945 | 5.78k | AddNodeIDNode(ID, ISD::STORE, VTs, Ops); |
6946 | 5.78k | ID.AddInteger(ST->getMemoryVT().getRawBits()); |
6947 | 5.78k | ID.AddInteger(ST->getRawSubclassData()); |
6948 | 5.78k | ID.AddInteger(ST->getPointerInfo().getAddrSpace()); |
6949 | 5.78k | void *IP = nullptr; |
6950 | 5.78k | if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) |
6951 | 0 | return SDValue(E, 0); |
6952 | 5.78k | |
6953 | 5.78k | auto *N = newSDNode<StoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, |
6954 | 5.78k | ST->isTruncatingStore(), ST->getMemoryVT(), |
6955 | 5.78k | ST->getMemOperand()); |
6956 | 5.78k | createOperands(N, Ops); |
6957 | 5.78k | |
6958 | 5.78k | CSEMap.InsertNode(N, IP); |
6959 | 5.78k | InsertNode(N); |
6960 | 5.78k | SDValue V(N, 0); |
6961 | 5.78k | NewSDValueDbgMsg(V, "Creating new node: ", this); |
6962 | 5.78k | return V; |
6963 | 5.78k | } |
6964 | | |
6965 | | SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, |
6966 | | SDValue Ptr, SDValue Mask, SDValue PassThru, |
6967 | | EVT MemVT, MachineMemOperand *MMO, |
6968 | 814 | ISD::LoadExtType ExtTy, bool isExpanding) { |
6969 | 814 | SDVTList VTs = getVTList(VT, MVT::Other); |
6970 | 814 | SDValue Ops[] = { Chain, Ptr, Mask, PassThru }; |
6971 | 814 | FoldingSetNodeID ID; |
6972 | 814 | AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops); |
6973 | 814 | ID.AddInteger(MemVT.getRawBits()); |
6974 | 814 | ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>( |
6975 | 814 | dl.getIROrder(), VTs, ExtTy, isExpanding, MemVT, MMO)); |
6976 | 814 | ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); |
6977 | 814 | void *IP = nullptr; |
6978 | 814 | if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { |
6979 | 0 | cast<MaskedLoadSDNode>(E)->refineAlignment(MMO); |
6980 | 0 | return SDValue(E, 0); |
6981 | 0 | } |
6982 | 814 | auto *N = newSDNode<MaskedLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, |
6983 | 814 | ExtTy, isExpanding, MemVT, MMO); |
6984 | 814 | createOperands(N, Ops); |
6985 | 814 | |
6986 | 814 | CSEMap.InsertNode(N, IP); |
6987 | 814 | InsertNode(N); |
6988 | 814 | SDValue V(N, 0); |
6989 | 814 | NewSDValueDbgMsg(V, "Creating new node: ", this); |
6990 | 814 | return V; |
6991 | 814 | } |
6992 | | |
6993 | | SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, |
6994 | | SDValue Val, SDValue Ptr, SDValue Mask, |
6995 | | EVT MemVT, MachineMemOperand *MMO, |
6996 | 872 | bool IsTruncating, bool IsCompressing) { |
6997 | 872 | assert(Chain.getValueType() == MVT::Other && |
6998 | 872 | "Invalid chain type"); |
6999 | 872 | SDVTList VTs = getVTList(MVT::Other); |
7000 | 872 | SDValue Ops[] = { Chain, Val, Ptr, Mask }; |
7001 | 872 | FoldingSetNodeID ID; |
7002 | 872 | AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops); |
7003 | 872 | ID.AddInteger(MemVT.getRawBits()); |
7004 | 872 | ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>( |
7005 | 872 | dl.getIROrder(), VTs, IsTruncating, IsCompressing, MemVT, MMO)); |
7006 | 872 | ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); |
7007 | 872 | void *IP = nullptr; |
7008 | 872 | if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { |
7009 | 0 | cast<MaskedStoreSDNode>(E)->refineAlignment(MMO); |
7010 | 0 | return SDValue(E, 0); |
7011 | 0 | } |
7012 | 872 | auto *N = newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, |
7013 | 872 | IsTruncating, IsCompressing, MemVT, MMO); |
7014 | 872 | createOperands(N, Ops); |
7015 | 872 | |
7016 | 872 | CSEMap.InsertNode(N, IP); |
7017 | 872 | InsertNode(N); |
7018 | 872 | SDValue V(N, 0); |
7019 | 872 | NewSDValueDbgMsg(V, "Creating new node: ", this); |
7020 | 872 | return V; |
7021 | 872 | } |
7022 | | |
7023 | | SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, |
7024 | | ArrayRef<SDValue> Ops, |
7025 | 479 | MachineMemOperand *MMO) { |
7026 | 479 | assert(Ops.size() == 6 && "Incompatible number of operands"); |
7027 | 479 | |
7028 | 479 | FoldingSetNodeID ID; |
7029 | 479 | AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops); |
7030 | 479 | ID.AddInteger(VT.getRawBits()); |
7031 | 479 | ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>( |
7032 | 479 | dl.getIROrder(), VTs, VT, MMO)); |
7033 | 479 | ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); |
7034 | 479 | void *IP = nullptr; |
7035 | 479 | if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { |
7036 | 10 | cast<MaskedGatherSDNode>(E)->refineAlignment(MMO); |
7037 | 10 | return SDValue(E, 0); |
7038 | 10 | } |
7039 | 469 | |
7040 | 469 | auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(), |
7041 | 469 | VTs, VT, MMO); |
7042 | 469 | createOperands(N, Ops); |
7043 | 469 | |
7044 | 469 | assert(N->getPassThru().getValueType() == N->getValueType(0) && |
7045 | 469 | "Incompatible type of the PassThru value in MaskedGatherSDNode"); |
7046 | 469 | assert(N->getMask().getValueType().getVectorNumElements() == |
7047 | 469 | N->getValueType(0).getVectorNumElements() && |
7048 | 469 | "Vector width mismatch between mask and data"); |
7049 | 469 | assert(N->getIndex().getValueType().getVectorNumElements() >= |
7050 | 469 | N->getValueType(0).getVectorNumElements() && |
7051 | 469 | "Vector width mismatch between index and data"); |
7052 | 469 | assert(isa<ConstantSDNode>(N->getScale()) && |
7053 | 469 | cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && |
7054 | 469 | "Scale should be a constant power of 2"); |
7055 | 469 | |
7056 | 469 | CSEMap.InsertNode(N, IP); |
7057 | 469 | InsertNode(N); |
7058 | 469 | SDValue V(N, 0); |
7059 | 469 | NewSDValueDbgMsg(V, "Creating new node: ", this); |
7060 | 469 | return V; |
7061 | 469 | } |
7062 | | |
7063 | | SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, |
7064 | | ArrayRef<SDValue> Ops, |
7065 | 174 | MachineMemOperand *MMO) { |
7066 | 174 | assert(Ops.size() == 6 && "Incompatible number of operands"); |
7067 | 174 | |
7068 | 174 | FoldingSetNodeID ID; |
7069 | 174 | AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops); |
7070 | 174 | ID.AddInteger(VT.getRawBits()); |
7071 | 174 | ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>( |
7072 | 174 | dl.getIROrder(), VTs, VT, MMO)); |
7073 | 174 | ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); |
7074 | 174 | void *IP = nullptr; |
7075 | 174 | if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { |
7076 | 0 | cast<MaskedScatterSDNode>(E)->refineAlignment(MMO); |
7077 | 0 | return SDValue(E, 0); |
7078 | 0 | } |
7079 | 174 | auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(), |
7080 | 174 | VTs, VT, MMO); |
7081 | 174 | createOperands(N, Ops); |
7082 | 174 | |
7083 | 174 | assert(N->getMask().getValueType().getVectorNumElements() == |
7084 | 174 | N->getValue().getValueType().getVectorNumElements() && |
7085 | 174 | "Vector width mismatch between mask and data"); |
7086 | 174 | assert(N->getIndex().getValueType().getVectorNumElements() >= |
7087 | 174 | N->getValue().getValueType().getVectorNumElements() && |
7088 | 174 | "Vector width mismatch between index and data"); |
7089 | 174 | assert(isa<ConstantSDNode>(N->getScale()) && |
7090 | 174 | cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && |
7091 | 174 | "Scale should be a constant power of 2"); |
7092 | 174 | |
7093 | 174 | CSEMap.InsertNode(N, IP); |
7094 | 174 | InsertNode(N); |
7095 | 174 | SDValue V(N, 0); |
7096 | 174 | NewSDValueDbgMsg(V, "Creating new node: ", this); |
7097 | 174 | return V; |
7098 | 174 | } |
7099 | | |
7100 | 302k | SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) { |
7101 | 302k | // select undef, T, F --> T (if T is a constant), otherwise F |
7102 | 302k | // select, ?, undef, F --> F |
7103 | 302k | // select, ?, T, undef --> T |
7104 | 302k | if (Cond.isUndef()) |
7105 | 174 | return isConstantValueOfAnyType(T) ? T77 : F97 ; |
7106 | 302k | if (T.isUndef()) |
7107 | 53 | return F; |
7108 | 302k | if (F.isUndef()) |
7109 | 330 | return T; |
7110 | 301k | |
7111 | 301k | // select true, T, F --> T |
7112 | 301k | // select false, T, F --> F |
7113 | 301k | if (auto *CondC = dyn_cast<ConstantSDNode>(Cond)) |
7114 | 2.36k | return CondC->isNullValue() ? F2.19k : T174 ; |
7115 | 299k | |
7116 | 299k | // TODO: This should simplify VSELECT with constant condition using something |
7117 | 299k | // like this (but check boolean contents to be complete?): |
7118 | 299k | // if (ISD::isBuildVectorAllOnes(Cond.getNode())) |
7119 | 299k | // return T; |
7120 | 299k | // if (ISD::isBuildVectorAllZeros(Cond.getNode())) |
7121 | 299k | // return F; |
7122 | 299k | |
7123 | 299k | // select ?, T, T --> T |
7124 | 299k | if (T == F) |
7125 | 476 | return T; |
7126 | 298k | |
7127 | 298k | return SDValue(); |
7128 | 298k | } |
7129 | | |
7130 | 1.28M | SDValue SelectionDAG::simplifyShift(SDValue X, SDValue Y) { |
7131 | 1.28M | // shift undef, Y --> 0 (can always assume that the undef value is 0) |
7132 | 1.28M | if (X.isUndef()) |
7133 | 460 | return getConstant(0, SDLoc(X.getNode()), X.getValueType()); |
7134 | 1.28M | // shift X, undef --> undef (because it may shift by the bitwidth) |
7135 | 1.28M | if (Y.isUndef()) |
7136 | 96 | return getUNDEF(X.getValueType()); |
7137 | 1.28M | |
7138 | 1.28M | // shift 0, Y --> 0 |
7139 | 1.28M | // shift X, 0 --> X |
7140 | 1.28M | if (isNullOrNullSplat(X) || isNullOrNullSplat(Y)1.25M ) |
7141 | 50.6k | return X; |
7142 | 1.23M | |
7143 | 1.23M | // shift X, C >= bitwidth(X) --> undef |
7144 | 1.23M | // All vector elements must be too big (or undef) to avoid partial undefs. |
7145 | 1.23M | auto isShiftTooBig = [X](ConstantSDNode *Val) { |
7146 | 1.11M | return !Val || Val->getAPIntValue().uge(X.getScalarValueSizeInBits())1.11M ; |
7147 | 1.11M | }; |
7148 | 1.23M | if (ISD::matchUnaryPredicate(Y, isShiftTooBig, true)) |
7149 | 178 | return getUNDEF(X.getValueType()); |
7150 | 1.23M | |
7151 | 1.23M | return SDValue(); |
7152 | 1.23M | } |
7153 | | |
7154 | | // TODO: Use fast-math-flags to enable more simplifications. |
7155 | 110k | SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y) { |
7156 | 110k | ConstantFPSDNode *YC = isConstOrConstSplatFP(Y, /* AllowUndefs */ true); |
7157 | 110k | if (!YC) |
7158 | 84.2k | return SDValue(); |
7159 | 26.1k | |
7160 | 26.1k | // X + -0.0 --> X |
7161 | 26.1k | if (Opcode == ISD::FADD) |
7162 | 17.1k | if (YC->getValueAPF().isNegZero()) |
7163 | 9 | return X; |
7164 | 26.1k | |
7165 | 26.1k | // X - +0.0 --> X |
7166 | 26.1k | if (Opcode == ISD::FSUB) |
7167 | 1.06k | if (YC->getValueAPF().isPosZero()) |
7168 | 47 | return X; |
7169 | 26.0k | |
7170 | 26.0k | // X * 1.0 --> X |
7171 | 26.0k | // X / 1.0 --> X |
7172 | 26.0k | if (Opcode == ISD::FMUL || Opcode == ISD::FDIV19.1k ) |
7173 | 7.85k | if (YC->getValueAPF().isExactlyValue(1.0)) |
7174 | 659 | return X; |
7175 | 25.4k | |
7176 | 25.4k | return SDValue(); |
7177 | 25.4k | } |
7178 | | |
7179 | | SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain, |
7180 | 461 | SDValue Ptr, SDValue SV, unsigned Align) { |
7181 | 461 | SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) }; |
7182 | 461 | return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops); |
7183 | 461 | } |
7184 | | |
7185 | | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, |
7186 | 19.0k | ArrayRef<SDUse> Ops) { |
7187 | 19.0k | switch (Ops.size()) { |
7188 | 19.0k | case 0: return getNode(Opcode, DL, VT)0 ; |
7189 | 19.0k | case 1: return getNode(Opcode, DL, VT, static_cast<const SDValue>(Ops[0]))32 ; |
7190 | 19.0k | case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1])12.4k ; |
7191 | 19.0k | case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2])165 ; |
7192 | 19.0k | default: break6.40k ; |
7193 | 6.40k | } |
7194 | 6.40k | |
7195 | 6.40k | // Copy from an SDUse array into an SDValue array for use with |
7196 | 6.40k | // the regular getNode logic. |
7197 | 6.40k | SmallVector<SDValue, 8> NewOps(Ops.begin(), Ops.end()); |
7198 | 6.40k | return getNode(Opcode, DL, VT, NewOps); |
7199 | 6.40k | } |
7200 | | |
7201 | | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, |
7202 | 7.33M | ArrayRef<SDValue> Ops, const SDNodeFlags Flags) { |
7203 | 7.33M | unsigned NumOps = Ops.size(); |
7204 | 7.33M | switch (NumOps) { |
7205 | 7.33M | case 0: return getNode(Opcode, DL, VT)0 ; |
7206 | 7.33M | case 1: return getNode(Opcode, DL, VT, Ops[0], Flags)4.20M ; |
7207 | 7.33M | case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags)910k ; |
7208 | 7.33M | case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2], Flags)421k ; |
7209 | 7.33M | default: break1.80M ; |
7210 | 1.80M | } |
7211 | 1.80M | |
7212 | 1.80M | switch (Opcode) { |
7213 | 1.80M | default: break942k ; |
7214 | 1.80M | case ISD::BUILD_VECTOR: |
7215 | 336k | // Attempt to simplify BUILD_VECTOR. |
7216 | 336k | if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this)) |
7217 | 3.60k | return V; |
7218 | 333k | break; |
7219 | 333k | case ISD::CONCAT_VECTORS: |
7220 | 13.5k | if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this)) |
7221 | 1.10k | return V; |
7222 | 12.4k | break; |
7223 | 55.9k | case ISD::SELECT_CC: |
7224 | 55.9k | assert(NumOps == 5 && "SELECT_CC takes 5 operands!"); |
7225 | 55.9k | assert(Ops[0].getValueType() == Ops[1].getValueType() && |
7226 | 55.9k | "LHS and RHS of condition must have same type!"); |
7227 | 55.9k | assert(Ops[2].getValueType() == Ops[3].getValueType() && |
7228 | 55.9k | "True and False arms of SelectCC must have same type!"); |
7229 | 55.9k | assert(Ops[2].getValueType() == VT && |
7230 | 55.9k | "select_cc node must be of same type as true and false value!"); |
7231 | 55.9k | break; |
7232 | 453k | case ISD::BR_CC: |
7233 | 453k | assert(NumOps == 5 && "BR_CC takes 5 operands!"); |
7234 | 453k | assert(Ops[2].getValueType() == Ops[3].getValueType() && |
7235 | 453k | "LHS/RHS of comparison should match types!"); |
7236 | 453k | break; |
7237 | 1.79M | } |
7238 | 1.79M | |
7239 | 1.79M | // Memoize nodes. |
7240 | 1.79M | SDNode *N; |
7241 | 1.79M | SDVTList VTs = getVTList(VT); |
7242 | 1.79M | |
7243 | 1.79M | if (VT != MVT::Glue1.79M ) { |
7244 | 1.79M | FoldingSetNodeID ID; |
7245 | 1.79M | AddNodeIDNode(ID, Opcode, VTs, Ops); |
7246 | 1.79M | void *IP = nullptr; |
7247 | 1.79M | |
7248 | 1.79M | if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) |
7249 | 238k | return SDValue(E, 0); |
7250 | 1.55M | |
7251 | 1.55M | N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); |
7252 | 1.55M | createOperands(N, Ops); |
7253 | 1.55M | |
7254 | 1.55M | CSEMap.InsertNode(N, IP); |
7255 | 18.4E | } else { |
7256 | 18.4E | N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); |
7257 | 18.4E | createOperands(N, Ops); |
7258 | 18.4E | } |
7259 | 1.79M | |
7260 | 1.79M | InsertNode(N); |
7261 | 1.55M | SDValue V(N, 0); |
7262 | 1.55M | NewSDValueDbgMsg(V, "Creating new node: ", this); |
7263 | 1.55M | return V; |
7264 | 1.79M | } |
7265 | | |
7266 | | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, |
7267 | 24.5k | ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops) { |
7268 | 24.5k | return getNode(Opcode, DL, getVTList(ResultTys), Ops); |
7269 | 24.5k | } |
7270 | | |
7271 | | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, |
7272 | 9.17M | ArrayRef<SDValue> Ops) { |
7273 | 9.17M | if (VTList.NumVTs == 1) |
7274 | 2.80M | return getNode(Opcode, DL, VTList.VTs[0], Ops); |
7275 | 6.37M | |
7276 | | #if 0 |
7277 | | switch (Opcode) { |
7278 | | // FIXME: figure out how to safely handle things like |
7279 | | // int foo(int x) { return 1 << (x & 255); } |
7280 | | // int bar() { return foo(256); } |
7281 | | case ISD::SRA_PARTS: |
7282 | | case ISD::SRL_PARTS: |
7283 | | case ISD::SHL_PARTS: |
7284 | | if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG && |
7285 | | cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1) |
7286 | | return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); |
7287 | | else if (N3.getOpcode() == ISD::AND) |
7288 | | if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) { |
7289 | | // If the and is only masking out bits that cannot effect the shift, |
7290 | | // eliminate the and. |
7291 | | unsigned NumBits = VT.getScalarSizeInBits()*2; |
7292 | | if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1) |
7293 | | return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); |
7294 | | } |
7295 | | break; |
7296 | | } |
7297 | | #endif |
7298 | | |
7299 | 6.37M | // Memoize the node unless it returns a flag. |
7300 | 6.37M | SDNode *N; |
7301 | 6.37M | if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { |
7302 | 3.30M | FoldingSetNodeID ID; |
7303 | 3.30M | AddNodeIDNode(ID, Opcode, VTList, Ops); |
7304 | 3.30M | void *IP = nullptr; |
7305 | 3.30M | if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) |
7306 | 476k | return SDValue(E, 0); |
7307 | 2.82M | |
7308 | 2.82M | N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList); |
7309 | 2.82M | createOperands(N, Ops); |
7310 | 2.82M | CSEMap.InsertNode(N, IP); |
7311 | 3.06M | } else { |
7312 | 3.06M | N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList); |
7313 | 3.06M | createOperands(N, Ops); |
7314 | 3.06M | } |
7315 | 6.37M | InsertNode(N); |
7316 | 5.89M | SDValue V(N, 0); |
7317 | 5.89M | NewSDValueDbgMsg(V, "Creating new node: ", this); |
7318 | 5.89M | return V; |
7319 | 6.37M | } |
7320 | | |
7321 | | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, |
7322 | 0 | SDVTList VTList) { |
7323 | 0 | return getNode(Opcode, DL, VTList, None); |
7324 | 0 | } |
7325 | | |
7326 | | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, |
7327 | 5.33k | SDValue N1) { |
7328 | 5.33k | SDValue Ops[] = { N1 }; |
7329 | 5.33k | return getNode(Opcode, DL, VTList, Ops); |
7330 | 5.33k | } |
7331 | | |
7332 | | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, |
7333 | 288k | SDValue N1, SDValue N2) { |
7334 | 288k | SDValue Ops[] = { N1, N2 }; |
7335 | 288k | return getNode(Opcode, DL, VTList, Ops); |
7336 | 288k | } |
7337 | | |
7338 | | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, |
7339 | 21.6k | SDValue N1, SDValue N2, SDValue N3) { |
7340 | 21.6k | SDValue Ops[] = { N1, N2, N3 }; |
7341 | 21.6k | return getNode(Opcode, DL, VTList, Ops); |
7342 | 21.6k | } |
7343 | | |
7344 | | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, |
7345 | 2.27k | SDValue N1, SDValue N2, SDValue N3, SDValue N4) { |
7346 | 2.27k | SDValue Ops[] = { N1, N2, N3, N4 }; |
7347 | 2.27k | return getNode(Opcode, DL, VTList, Ops); |
7348 | 2.27k | } |
7349 | | |
7350 | | SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, |
7351 | | SDValue N1, SDValue N2, SDValue N3, SDValue N4, |
7352 | 459 | SDValue N5) { |
7353 | 459 | SDValue Ops[] = { N1, N2, N3, N4, N5 }; |
7354 | 459 | return getNode(Opcode, DL, VTList, Ops); |
7355 | 459 | } |
7356 | | |
7357 | 63.2M | SDVTList SelectionDAG::getVTList(EVT VT) { |
7358 | 63.2M | return makeVTList(SDNode::getValueTypeList(VT), 1); |
7359 | 63.2M | } |
7360 | | |
7361 | 9.23M | SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) { |
7362 | 9.23M | FoldingSetNodeID ID; |
7363 | 9.23M | ID.AddInteger(2U); |
7364 | 9.23M | ID.AddInteger(VT1.getRawBits()); |
7365 | 9.23M | ID.AddInteger(VT2.getRawBits()); |
7366 | 9.23M | |
7367 | 9.23M | void *IP = nullptr; |
7368 | 9.23M | SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); |
7369 | 9.23M | if (!Result) { |
7370 | 153k | EVT *Array = Allocator.Allocate<EVT>(2); |
7371 | 153k | Array[0] = VT1; |
7372 | 153k | Array[1] = VT2; |
7373 | 153k | Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 2); |
7374 | 153k | VTListMap.InsertNode(Result, IP); |
7375 | 153k | } |
7376 | 9.23M | return Result->getSDVTList(); |
7377 | 9.23M | } |
7378 | | |
7379 | 255k | SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) { |
7380 | 255k | FoldingSetNodeID ID; |
7381 | 255k | ID.AddInteger(3U); |
7382 | 255k | ID.AddInteger(VT1.getRawBits()); |
7383 | 255k | ID.AddInteger(VT2.getRawBits()); |
7384 | 255k | ID.AddInteger(VT3.getRawBits()); |
7385 | 255k | |
7386 | 255k | void *IP = nullptr; |
7387 | 255k | SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); |
7388 | 255k | if (!Result) { |
7389 | 18.5k | EVT *Array = Allocator.Allocate<EVT>(3); |
7390 | 18.5k | Array[0] = VT1; |
7391 | 18.5k | Array[1] = VT2; |
7392 | 18.5k | Array[2] = VT3; |
7393 | 18.5k | Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 3); |
7394 | 18.5k | VTListMap.InsertNode(Result, IP); |
7395 | 18.5k | } |
7396 | 255k | return Result->getSDVTList(); |
7397 | 255k | } |
7398 | | |
7399 | 90 | SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) { |
7400 | 90 | FoldingSetNodeID ID; |
7401 | 90 | ID.AddInteger(4U); |
7402 | 90 | ID.AddInteger(VT1.getRawBits()); |
7403 | 90 | ID.AddInteger(VT2.getRawBits()); |
7404 | 90 | ID.AddInteger(VT3.getRawBits()); |
7405 | 90 | ID.AddInteger(VT4.getRawBits()); |
7406 | 90 | |
7407 | 90 | void *IP = nullptr; |
7408 | 90 | SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); |
7409 | 90 | if (!Result) { |
7410 | 33 | EVT *Array = Allocator.Allocate<EVT>(4); |
7411 | 33 | Array[0] = VT1; |
7412 | 33 | Array[1] = VT2; |
7413 | 33 | Array[2] = VT3; |
7414 | 33 | Array[3] = VT4; |
7415 | 33 | Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 4); |
7416 | 33 | VTListMap.InsertNode(Result, IP); |
7417 | 33 | } |
7418 | 90 | return Result->getSDVTList(); |
7419 | 90 | } |
7420 | | |
7421 | 4.11M | SDVTList SelectionDAG::getVTList(ArrayRef<EVT> VTs) { |
7422 | 4.11M | unsigned NumVTs = VTs.size(); |
7423 | 4.11M | FoldingSetNodeID ID; |
7424 | 4.11M | ID.AddInteger(NumVTs); |
7425 | 10.6M | for (unsigned index = 0; index < NumVTs; index++6.52M ) { |
7426 | 6.52M | ID.AddInteger(VTs[index].getRawBits()); |
7427 | 6.52M | } |
7428 | 4.11M | |
7429 | 4.11M | void *IP = nullptr; |
7430 | 4.11M | SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); |
7431 | 4.11M | if (!Result) { |
7432 | 86.1k | EVT *Array = Allocator.Allocate<EVT>(NumVTs); |
7433 | 86.1k | llvm::copy(VTs, Array); |
7434 | 86.1k | Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs); |
7435 | 86.1k | VTListMap.InsertNode(Result, IP); |
7436 | 86.1k | } |
7437 | 4.11M | return Result->getSDVTList(); |
7438 | 4.11M | } |
7439 | | |
7440 | | |
7441 | | /// UpdateNodeOperands - *Mutate* the specified node in-place to have the |
7442 | | /// specified operands. If the resultant node already exists in the DAG, |
7443 | | /// this does not modify the specified node, instead it returns the node that |
7444 | | /// already exists. If the resultant node does not exist in the DAG, the |
7445 | | /// input node is returned. As a degenerate case, if you specify the same |
7446 | | /// input operands as the node already has, the input node is returned. |
7447 | 15.8k | SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) { |
7448 | 15.8k | assert(N->getNumOperands() == 1 && "Update with wrong number of operands"); |
7449 | 15.8k | |
7450 | 15.8k | // Check to see if there is no change. |
7451 | 15.8k | if (Op == N->getOperand(0)) return N0 ; |
7452 | 15.8k | |
7453 | 15.8k | // See if the modified node already exists. |
7454 | 15.8k | void *InsertPos = nullptr; |
7455 | 15.8k | if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos)) |
7456 | 0 | return Existing; |
7457 | 15.8k | |
7458 | 15.8k | // Nope it doesn't. Remove the node from its current place in the maps. |
7459 | 15.8k | if (InsertPos) |
7460 | 15.8k | if (!RemoveNodeFromCSEMaps(N)) |
7461 | 0 | InsertPos = nullptr; |
7462 | 15.8k | |
7463 | 15.8k | // Now we update the operands. |
7464 | 15.8k | N->OperandList[0].set(Op); |
7465 | 15.8k | |
7466 | 15.8k | updateDivergence(N); |
7467 | 15.8k | // If this gets put into a CSE map, add it. |
7468 | 15.8k | if (InsertPos) CSEMap.InsertNode(N, InsertPos); |
7469 | 15.8k | return N; |
7470 | 15.8k | } |
7471 | | |
7472 | 84.3k | SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) { |
7473 | 84.3k | assert(N->getNumOperands() == 2 && "Update with wrong number of operands"); |
7474 | 84.3k | |
7475 | 84.3k | // Check to see if there is no change. |
7476 | 84.3k | if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1)31.8k ) |
7477 | 0 | return N; // No operands changed, just return the input node. |
7478 | 84.3k | |
7479 | 84.3k | // See if the modified node already exists. |
7480 | 84.3k | void *InsertPos = nullptr; |
7481 | 84.3k | if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos)) |
7482 | 1.09k | return Existing; |
7483 | 83.2k | |
7484 | 83.2k | // Nope it doesn't. Remove the node from its current place in the maps. |
7485 | 83.2k | if (InsertPos) |
7486 | 83.2k | if (!RemoveNodeFromCSEMaps(N)) |
7487 | 0 | InsertPos = nullptr; |
7488 | 83.2k | |
7489 | 83.2k | // Now we update the operands. |
7490 | 83.2k | if (N->OperandList[0] != Op1) |
7491 | 51.6k | N->OperandList[0].set(Op1); |
7492 | 83.2k | if (N->OperandList[1] != Op2) |
7493 | 53.9k | N->OperandList[1].set(Op2); |
7494 | 83.2k | |
7495 | 83.2k | updateDivergence(N); |
7496 | 83.2k | // If this gets put into a CSE map, add it. |
7497 | 83.2k | if (InsertPos) CSEMap.InsertNode(N, InsertPos); |
7498 | 83.2k | return N; |
7499 | 83.2k | } |
7500 | | |
7501 | | SDNode *SelectionDAG:: |
7502 | 347k | UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) { |
7503 | 347k | SDValue Ops[] = { Op1, Op2, Op3 }; |
7504 | 347k | return UpdateNodeOperands(N, Ops); |
7505 | 347k | } |
7506 | | |
7507 | | SDNode *SelectionDAG:: |
7508 | | UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, |
7509 | 221k | SDValue Op3, SDValue Op4) { |
7510 | 221k | SDValue Ops[] = { Op1, Op2, Op3, Op4 }; |
7511 | 221k | return UpdateNodeOperands(N, Ops); |
7512 | 221k | } |
7513 | | |
7514 | | SDNode *SelectionDAG:: |
7515 | | UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, |
7516 | 10.0k | SDValue Op3, SDValue Op4, SDValue Op5) { |
7517 | 10.0k | SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 }; |
7518 | 10.0k | return UpdateNodeOperands(N, Ops); |
7519 | 10.0k | } |
7520 | | |
7521 | | SDNode *SelectionDAG:: |
7522 | 5.83M | UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) { |
7523 | 5.83M | unsigned NumOps = Ops.size(); |
7524 | 5.83M | assert(N->getNumOperands() == NumOps && |
7525 | 5.83M | "Update with wrong number of operands"); |
7526 | 5.83M | |
7527 | 5.83M | // If no operands changed just return the input node. |
7528 | 5.83M | if (std::equal(Ops.begin(), Ops.end(), N->op_begin())) |
7529 | 5.15M | return N; |
7530 | 682k | |
7531 | 682k | // See if the modified node already exists. |
7532 | 682k | void *InsertPos = nullptr; |
7533 | 682k | if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, InsertPos)) |
7534 | 286 | return Existing; |
7535 | 682k | |
7536 | 682k | // Nope it doesn't. Remove the node from its current place in the maps. |
7537 | 682k | if (InsertPos) |
7538 | 665k | if (!RemoveNodeFromCSEMaps(N)) |
7539 | 0 | InsertPos = nullptr; |
7540 | 682k | |
7541 | 682k | // Now we update the operands. |
7542 | 2.91M | for (unsigned i = 0; i != NumOps; ++i2.23M ) |
7543 | 2.23M | if (N->OperandList[i] != Ops[i]) |
7544 | 895k | N->OperandList[i].set(Ops[i]); |
7545 | 682k | |
7546 | 682k | updateDivergence(N); |
7547 | 682k | // If this gets put into a CSE map, add it. |
7548 | 682k | if (InsertPos) CSEMap.InsertNode(N, InsertPos)665k ; |
7549 | 682k | return N; |
7550 | 682k | } |
7551 | | |
7552 | | /// DropOperands - Release the operands and set this node to have |
7553 | | /// zero operands. |
7554 | 25.7M | void SDNode::DropOperands() { |
7555 | 25.7M | // Unlike the code in MorphNodeTo that does this, we don't need to |
7556 | 25.7M | // watch for dead nodes here. |
7557 | 61.1M | for (op_iterator I = op_begin(), E = op_end(); I != E; ) { |
7558 | 35.3M | SDUse &Use = *I++; |
7559 | 35.3M | Use.set(SDValue()); |
7560 | 35.3M | } |
7561 | 25.7M | } |
7562 | | |
7563 | | void SelectionDAG::setNodeMemRefs(MachineSDNode *N, |
7564 | 1.66M | ArrayRef<MachineMemOperand *> NewMemRefs) { |
7565 | 1.66M | if (NewMemRefs.empty()) { |
7566 | 282 | N->clearMemRefs(); |
7567 | 282 | return; |
7568 | 282 | } |
7569 | 1.66M | |
7570 | 1.66M | // Check if we can avoid allocating by storing a single reference directly. |
7571 | 1.66M | if (NewMemRefs.size() == 1) { |
7572 | 1.65M | N->MemRefs = NewMemRefs[0]; |
7573 | 1.65M | N->NumMemRefs = 1; |
7574 | 1.65M | return; |
7575 | 1.65M | } |
7576 | 5.40k | |
7577 | 5.40k | MachineMemOperand **MemRefsBuffer = |
7578 | 5.40k | Allocator.template Allocate<MachineMemOperand *>(NewMemRefs.size()); |
7579 | 5.40k | llvm::copy(NewMemRefs, MemRefsBuffer); |
7580 | 5.40k | N->MemRefs = MemRefsBuffer; |
7581 | 5.40k | N->NumMemRefs = static_cast<int>(NewMemRefs.size()); |
7582 | 5.40k | } |
7583 | | |
7584 | | /// SelectNodeTo - These are wrappers around MorphNodeTo that accept a |
7585 | | /// machine opcode. |
7586 | | /// |
7587 | | SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, |
7588 | 12.1k | EVT VT) { |
7589 | 12.1k | SDVTList VTs = getVTList(VT); |
7590 | 12.1k | return SelectNodeTo(N, MachineOpc, VTs, None); |
7591 | 12.1k | } |
7592 | | |
7593 | | SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, |
7594 | 140 | EVT VT, SDValue Op1) { |
7595 | 140 | SDVTList VTs = getVTList(VT); |
7596 | 140 | SDValue Ops[] = { Op1 }; |
7597 | 140 | return SelectNodeTo(N, MachineOpc, VTs, Ops); |
7598 | 140 | } |
7599 | | |
7600 | | SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, |
7601 | | EVT VT, SDValue Op1, |
7602 | 2.33k | SDValue Op2) { |
7603 | 2.33k | SDVTList VTs = getVTList(VT); |
7604 | 2.33k | SDValue Ops[] = { Op1, Op2 }; |
7605 | 2.33k | return SelectNodeTo(N, MachineOpc, VTs, Ops); |
7606 | 2.33k | } |
7607 | | |
7608 | | SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, |
7609 | | EVT VT, SDValue Op1, |
7610 | 254 | SDValue Op2, SDValue Op3) { |
7611 | 254 | SDVTList VTs = getVTList(VT); |
7612 | 254 | SDValue Ops[] = { Op1, Op2, Op3 }; |
7613 | 254 | return SelectNodeTo(N, MachineOpc, VTs, Ops); |
7614 | 254 | } |
7615 | | |
7616 | | SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, |
7617 | 44.7k | EVT VT, ArrayRef<SDValue> Ops) { |
7618 | 44.7k | SDVTList VTs = getVTList(VT); |
7619 | 44.7k | return SelectNodeTo(N, MachineOpc, VTs, Ops); |
7620 | 44.7k | } |
7621 | | |
7622 | | SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, |
7623 | 10 | EVT VT1, EVT VT2, ArrayRef<SDValue> Ops) { |
7624 | 10 | SDVTList VTs = getVTList(VT1, VT2); |
7625 | 10 | return SelectNodeTo(N, MachineOpc, VTs, Ops); |
7626 | 10 | } |
7627 | | |
7628 | | SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, |
7629 | 10 | EVT VT1, EVT VT2) { |
7630 | 10 | SDVTList VTs = getVTList(VT1, VT2); |
7631 | 10 | return SelectNodeTo(N, MachineOpc, VTs, None); |
7632 | 10 | } |
7633 | | |
7634 | | SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, |
7635 | | EVT VT1, EVT VT2, EVT VT3, |
7636 | 5 | ArrayRef<SDValue> Ops) { |
7637 | 5 | SDVTList VTs = getVTList(VT1, VT2, VT3); |
7638 | 5 | return SelectNodeTo(N, MachineOpc, VTs, Ops); |
7639 | 5 | } |
7640 | | |
7641 | | SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, |
7642 | | EVT VT1, EVT VT2, |
7643 | 370 | SDValue Op1, SDValue Op2) { |
7644 | 370 | SDVTList VTs = getVTList(VT1, VT2); |
7645 | 370 | SDValue Ops[] = { Op1, Op2 }; |
7646 | 370 | return SelectNodeTo(N, MachineOpc, VTs, Ops); |
7647 | 370 | } |
7648 | | |
7649 | | SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, |
7650 | 92.5k | SDVTList VTs,ArrayRef<SDValue> Ops) { |
7651 | 92.5k | SDNode *New = MorphNodeTo(N, ~MachineOpc, VTs, Ops); |
7652 | 92.5k | // Reset the NodeID to -1. |
7653 | 92.5k | New->setNodeId(-1); |
7654 | 92.5k | if (New != N) { |
7655 | 337 | ReplaceAllUsesWith(N, New); |
7656 | 337 | RemoveDeadNode(N); |
7657 | 337 | } |
7658 | 92.5k | return New; |
7659 | 92.5k | } |
7660 | | |
7661 | | /// UpdateSDLocOnMergeSDNode - If the opt level is -O0 then it throws away |
7662 | | /// the line number information on the merged node since it is not possible to |
7663 | | /// preserve the information that operation is associated with multiple lines. |
7664 | | /// This will make the debugger working better at -O0, were there is a higher |
7665 | | /// probability having other instructions associated with that line. |
7666 | | /// |
7667 | | /// For IROrder, we keep the smaller of the two |
7668 | 154k | SDNode *SelectionDAG::UpdateSDLocOnMergeSDNode(SDNode *N, const SDLoc &OLoc) { |
7669 | 154k | DebugLoc NLoc = N->getDebugLoc(); |
7670 | 154k | if (NLoc && OptLevel == CodeGenOpt::None9.30k && OLoc.getDebugLoc() != NLoc7 ) { |
7671 | 0 | N->setDebugLoc(DebugLoc()); |
7672 | 0 | } |
7673 | 154k | unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder()); |
7674 | 154k | N->setIROrder(Order); |
7675 | 154k | return N; |
7676 | 154k | } |
7677 | | |
7678 | | /// MorphNodeTo - This *mutates* the specified node to have the specified |
7679 | | /// return type, opcode, and operands. |
7680 | | /// |
7681 | | /// Note that MorphNodeTo returns the resultant node. If there is already a |
7682 | | /// node of the specified opcode and operands, it returns that node instead of |
7683 | | /// the current one. Note that the SDLoc need not be the same. |
7684 | | /// |
7685 | | /// Using MorphNodeTo is faster than creating a new node and swapping it in |
7686 | | /// with ReplaceAllUsesWith both because it often avoids allocating a new |
7687 | | /// node, and because it doesn't require CSE recalculation for any of |
7688 | | /// the node's users. |
7689 | | /// |
7690 | | /// However, note that MorphNodeTo recursively deletes dead nodes from the DAG. |
7691 | | /// As a consequence it isn't appropriate to use from within the DAG combiner or |
7692 | | /// the legalizer which maintain worklists that would need to be updated when |
7693 | | /// deleting things. |
7694 | | SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, |
7695 | 7.58M | SDVTList VTs, ArrayRef<SDValue> Ops) { |
7696 | 7.58M | // If an identical node already exists, use it. |
7697 | 7.58M | void *IP = nullptr; |
7698 | 7.58M | if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) { |
7699 | 6.30M | FoldingSetNodeID ID; |
7700 | 6.30M | AddNodeIDNode(ID, Opc, VTs, Ops); |
7701 | 6.30M | if (SDNode *ON = FindNodeOrInsertPos(ID, SDLoc(N), IP)) |
7702 | 16.9k | return UpdateSDLocOnMergeSDNode(ON, SDLoc(N)); |
7703 | 7.57M | } |
7704 | 7.57M | |
7705 | 7.57M | if (!RemoveNodeFromCSEMaps(N)) |
7706 | 1.31M | IP = nullptr; |
7707 | 7.57M | |
7708 | 7.57M | // Start the morphing. |
7709 | 7.57M | N->NodeType = Opc; |
7710 | 7.57M | N->ValueList = VTs.VTs; |
7711 | 7.57M | N->NumValues = VTs.NumVTs; |
7712 | 7.57M | |
7713 | 7.57M | // Clear the operands list, updating used nodes to remove this from their |
7714 | 7.57M | // use list. Keep track of any operands that become dead as a result. |
7715 | 7.57M | SmallPtrSet<SDNode*, 16> DeadNodeSet; |
7716 | 28.1M | for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) { |
7717 | 20.5M | SDUse &Use = *I++; |
7718 | 20.5M | SDNode *Used = Use.getNode(); |
7719 | 20.5M | Use.set(SDValue()); |
7720 | 20.5M | if (Used->use_empty()) |
7721 | 10.4M | DeadNodeSet.insert(Used); |
7722 | 20.5M | } |
7723 | 7.57M | |
7724 | 7.57M | // For MachineNode, initialize the memory references information. |
7725 | 7.57M | if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) |
7726 | 7.55M | MN->clearMemRefs(); |
7727 | 7.57M | |
7728 | 7.57M | // Swap for an appropriately sized array from the recycler. |
7729 | 7.57M | removeOperands(N); |
7730 | 7.57M | createOperands(N, Ops); |
7731 | 7.57M | |
7732 | 7.57M | // Delete any nodes that are still dead after adding the uses for the |
7733 | 7.57M | // new operands. |
7734 | 7.57M | if (!DeadNodeSet.empty()) { |
7735 | 6.34M | SmallVector<SDNode *, 16> DeadNodes; |
7736 | 6.34M | for (SDNode *N : DeadNodeSet) |
7737 | 10.4M | if (N->use_empty()) |
7738 | 3.26M | DeadNodes.push_back(N); |
7739 | 6.34M | RemoveDeadNodes(DeadNodes); |
7740 | 6.34M | } |
7741 | 7.57M | |
7742 | 7.57M | if (IP) |
7743 | 6.22M | CSEMap.InsertNode(N, IP); // Memoize the new node. |
7744 | 7.57M | return N; |
7745 | 7.57M | } |
7746 | | |
7747 | 1.49k | SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { |
7748 | 1.49k | unsigned OrigOpc = Node->getOpcode(); |
7749 | 1.49k | unsigned NewOpc; |
7750 | 1.49k | switch (OrigOpc) { |
7751 | 1.49k | default: |
7752 | 0 | llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!"); |
7753 | 1.49k | case ISD::STRICT_FADD: NewOpc = ISD::FADD; break37 ; |
7754 | 1.49k | case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break41 ; |
7755 | 1.49k | case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break37 ; |
7756 | 1.49k | case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break37 ; |
7757 | 1.49k | case ISD::STRICT_FREM: NewOpc = ISD::FREM; break80 ; |
7758 | 1.49k | case ISD::STRICT_FMA: NewOpc = ISD::FMA; break14 ; |
7759 | 1.49k | case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; break37 ; |
7760 | 1.49k | case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break80 ; |
7761 | 1.49k | case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break80 ; |
7762 | 1.49k | case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; break80 ; |
7763 | 1.49k | case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; break80 ; |
7764 | 1.49k | case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; break80 ; |
7765 | 1.49k | case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; break80 ; |
7766 | 1.49k | case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; break80 ; |
7767 | 1.49k | case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; break80 ; |
7768 | 1.49k | case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; break80 ; |
7769 | 1.49k | case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; break49 ; |
7770 | 1.49k | case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; break57 ; |
7771 | 1.49k | case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break70 ; |
7772 | 1.49k | case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break70 ; |
7773 | 1.49k | case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; break42 ; |
7774 | 1.49k | case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; break42 ; |
7775 | 1.49k | case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; break44 ; |
7776 | 1.49k | case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; break42 ; |
7777 | 1.49k | case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break39 ; |
7778 | 1.49k | case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; break39 ; |
7779 | 1.49k | } |
7780 | 1.49k | |
7781 | 1.49k | assert(Node->getNumValues() == 2 && "Unexpected number of results!"); |
7782 | 1.49k | |
7783 | 1.49k | // We're taking this node out of the chain, so we need to re-link things. |
7784 | 1.49k | SDValue InputChain = Node->getOperand(0); |
7785 | 1.49k | SDValue OutputChain = SDValue(Node, 1); |
7786 | 1.49k | ReplaceAllUsesOfValueWith(OutputChain, InputChain); |
7787 | 1.49k | |
7788 | 1.49k | SmallVector<SDValue, 3> Ops; |
7789 | 3.59k | for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i2.09k ) |
7790 | 2.09k | Ops.push_back(Node->getOperand(i)); |
7791 | 1.49k | |
7792 | 1.49k | SDVTList VTs = getVTList(Node->getValueType(0)); |
7793 | 1.49k | SDNode *Res = MorphNodeTo(Node, NewOpc, VTs, Ops); |
7794 | 1.49k | |
7795 | 1.49k | // MorphNodeTo can operate in two ways: if an existing node with the |
7796 | 1.49k | // specified operands exists, it can just return it. Otherwise, it |
7797 | 1.49k | // updates the node in place to have the requested operands. |
7798 | 1.49k | if (Res == Node) { |
7799 | 1.49k | // If we updated the node in place, reset the node ID. To the isel, |
7800 | 1.49k | // this should be just like a newly allocated machine node. |
7801 | 1.49k | Res->setNodeId(-1); |
7802 | 1.49k | } else { |
7803 | 0 | ReplaceAllUsesWith(Node, Res); |
7804 | 0 | RemoveDeadNode(Node); |
7805 | 0 | } |
7806 | 1.49k | |
7807 | 1.49k | return Res; |
7808 | 1.49k | } |
7809 | | |
7810 | | /// getMachineNode - These are used for target selectors to create a new node |
7811 | | /// with specified return type(s), MachineInstr opcode, and operands. |
7812 | | /// |
7813 | | /// Note that getMachineNode returns the resultant node. If there is already a |
7814 | | /// node of the specified opcode and operands, it returns that node instead of |
7815 | | /// the current one. |
7816 | | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, |
7817 | 10.5k | EVT VT) { |
7818 | 10.5k | SDVTList VTs = getVTList(VT); |
7819 | 10.5k | return getMachineNode(Opcode, dl, VTs, None); |
7820 | 10.5k | } |
7821 | | |
7822 | | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, |
7823 | 57.1k | EVT VT, SDValue Op1) { |
7824 | 57.1k | SDVTList VTs = getVTList(VT); |
7825 | 57.1k | SDValue Ops[] = { Op1 }; |
7826 | 57.1k | return getMachineNode(Opcode, dl, VTs, Ops); |
7827 | 57.1k | } |
7828 | | |
7829 | | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, |
7830 | 103k | EVT VT, SDValue Op1, SDValue Op2) { |
7831 | 103k | SDVTList VTs = getVTList(VT); |
7832 | 103k | SDValue Ops[] = { Op1, Op2 }; |
7833 | 103k | return getMachineNode(Opcode, dl, VTs, Ops); |
7834 | 103k | } |
7835 | | |
7836 | | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, |
7837 | | EVT VT, SDValue Op1, SDValue Op2, |
7838 | 12.5k | SDValue Op3) { |
7839 | 12.5k | SDVTList VTs = getVTList(VT); |
7840 | 12.5k | SDValue Ops[] = { Op1, Op2, Op3 }; |
7841 | 12.5k | return getMachineNode(Opcode, dl, VTs, Ops); |
7842 | 12.5k | } |
7843 | | |
7844 | | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, |
7845 | 82.7k | EVT VT, ArrayRef<SDValue> Ops) { |
7846 | 82.7k | SDVTList VTs = getVTList(VT); |
7847 | 82.7k | return getMachineNode(Opcode, dl, VTs, Ops); |
7848 | 82.7k | } |
7849 | | |
7850 | | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, |
7851 | | EVT VT1, EVT VT2, SDValue Op1, |
7852 | 10.4k | SDValue Op2) { |
7853 | 10.4k | SDVTList VTs = getVTList(VT1, VT2); |
7854 | 10.4k | SDValue Ops[] = { Op1, Op2 }; |
7855 | 10.4k | return getMachineNode(Opcode, dl, VTs, Ops); |
7856 | 10.4k | } |
7857 | | |
7858 | | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, |
7859 | | EVT VT1, EVT VT2, SDValue Op1, |
7860 | 1.76k | SDValue Op2, SDValue Op3) { |
7861 | 1.76k | SDVTList VTs = getVTList(VT1, VT2); |
7862 | 1.76k | SDValue Ops[] = { Op1, Op2, Op3 }; |
7863 | 1.76k | return getMachineNode(Opcode, dl, VTs, Ops); |
7864 | 1.76k | } |
7865 | | |
7866 | | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, |
7867 | | EVT VT1, EVT VT2, |
7868 | 55.2k | ArrayRef<SDValue> Ops) { |
7869 | 55.2k | SDVTList VTs = getVTList(VT1, VT2); |
7870 | 55.2k | return getMachineNode(Opcode, dl, VTs, Ops); |
7871 | 55.2k | } |
7872 | | |
7873 | | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, |
7874 | | EVT VT1, EVT VT2, EVT VT3, |
7875 | 18 | SDValue Op1, SDValue Op2) { |
7876 | 18 | SDVTList VTs = getVTList(VT1, VT2, VT3); |
7877 | 18 | SDValue Ops[] = { Op1, Op2 }; |
7878 | 18 | return getMachineNode(Opcode, dl, VTs, Ops); |
7879 | 18 | } |
7880 | | |
7881 | | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, |
7882 | | EVT VT1, EVT VT2, EVT VT3, |
7883 | | SDValue Op1, SDValue Op2, |
7884 | 179 | SDValue Op3) { |
7885 | 179 | SDVTList VTs = getVTList(VT1, VT2, VT3); |
7886 | 179 | SDValue Ops[] = { Op1, Op2, Op3 }; |
7887 | 179 | return getMachineNode(Opcode, dl, VTs, Ops); |
7888 | 179 | } |
7889 | | |
7890 | | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, |
7891 | | EVT VT1, EVT VT2, EVT VT3, |
7892 | 12.2k | ArrayRef<SDValue> Ops) { |
7893 | 12.2k | SDVTList VTs = getVTList(VT1, VT2, VT3); |
7894 | 12.2k | return getMachineNode(Opcode, dl, VTs, Ops); |
7895 | 12.2k | } |
7896 | | |
7897 | | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, |
7898 | | ArrayRef<EVT> ResultTys, |
7899 | 5.16k | ArrayRef<SDValue> Ops) { |
7900 | 5.16k | SDVTList VTs = getVTList(ResultTys); |
7901 | 5.16k | return getMachineNode(Opcode, dl, VTs, Ops); |
7902 | 5.16k | } |
7903 | | |
7904 | | MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL, |
7905 | | SDVTList VTs, |
7906 | 808k | ArrayRef<SDValue> Ops) { |
7907 | 808k | bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue; |
7908 | 808k | MachineSDNode *N; |
7909 | 808k | void *IP = nullptr; |
7910 | 808k | |
7911 | 808k | if (DoCSE) { |
7912 | 745k | FoldingSetNodeID ID; |
7913 | 745k | AddNodeIDNode(ID, ~Opcode, VTs, Ops); |
7914 | 745k | IP = nullptr; |
7915 | 745k | if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { |
7916 | 137k | return cast<MachineSDNode>(UpdateSDLocOnMergeSDNode(E, DL)); |
7917 | 137k | } |
7918 | 671k | } |
7919 | 671k | |
7920 | 671k | // Allocate a new MachineSDNode. |
7921 | 671k | N = newSDNode<MachineSDNode>(~Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); |
7922 | 671k | createOperands(N, Ops); |
7923 | 671k | |
7924 | 671k | if (DoCSE) |
7925 | 607k | CSEMap.InsertNode(N, IP); |
7926 | 671k | |
7927 | 671k | InsertNode(N); |
7928 | 671k | return N; |
7929 | 671k | } |
7930 | | |
7931 | | /// getTargetExtractSubreg - A convenience function for creating |
7932 | | /// TargetOpcode::EXTRACT_SUBREG nodes. |
7933 | | SDValue SelectionDAG::getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, |
7934 | 63.2k | SDValue Operand) { |
7935 | 63.2k | SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32); |
7936 | 63.2k | SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, |
7937 | 63.2k | VT, Operand, SRIdxVal); |
7938 | 63.2k | return SDValue(Subreg, 0); |
7939 | 63.2k | } |
7940 | | |
7941 | | /// getTargetInsertSubreg - A convenience function for creating |
7942 | | /// TargetOpcode::INSERT_SUBREG nodes. |
7943 | | SDValue SelectionDAG::getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, |
7944 | 7.34k | SDValue Operand, SDValue Subreg) { |
7945 | 7.34k | SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32); |
7946 | 7.34k | SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL, |
7947 | 7.34k | VT, Operand, Subreg, SRIdxVal); |
7948 | 7.34k | return SDValue(Result, 0); |
7949 | 7.34k | } |
7950 | | |
7951 | | /// getNodeIfExists - Get the specified node if it's already available, or |
7952 | | /// else return NULL. |
7953 | | SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, |
7954 | | ArrayRef<SDValue> Ops, |
7955 | 1.73M | const SDNodeFlags Flags) { |
7956 | 1.73M | if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { |
7957 | 1.73M | FoldingSetNodeID ID; |
7958 | 1.73M | AddNodeIDNode(ID, Opcode, VTList, Ops); |
7959 | 1.73M | void *IP = nullptr; |
7960 | 1.73M | if (SDNode *E = FindNodeOrInsertPos(ID, SDLoc(), IP)) { |
7961 | 5.43k | E->intersectFlagsWith(Flags); |
7962 | 5.43k | return E; |
7963 | 5.43k | } |
7964 | 1.72M | } |
7965 | 1.72M | return nullptr; |
7966 | 1.72M | } |
7967 | | |
7968 | | /// getDbgValue - Creates a SDDbgValue node. |
7969 | | /// |
7970 | | /// SDNode |
7971 | | SDDbgValue *SelectionDAG::getDbgValue(DIVariable *Var, DIExpression *Expr, |
7972 | | SDNode *N, unsigned R, bool IsIndirect, |
7973 | 538 | const DebugLoc &DL, unsigned O) { |
7974 | 538 | assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && |
7975 | 538 | "Expected inlined-at fields to agree"); |
7976 | 538 | return new (DbgInfo->getAlloc()) |
7977 | 538 | SDDbgValue(Var, Expr, N, R, IsIndirect, DL, O); |
7978 | 538 | } |
7979 | | |
7980 | | /// Constant |
7981 | | SDDbgValue *SelectionDAG::getConstantDbgValue(DIVariable *Var, |
7982 | | DIExpression *Expr, |
7983 | | const Value *C, |
7984 | 4.18k | const DebugLoc &DL, unsigned O) { |
7985 | 4.18k | assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && |
7986 | 4.18k | "Expected inlined-at fields to agree"); |
7987 | 4.18k | return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, DL, O); |
7988 | 4.18k | } |
7989 | | |
7990 | | /// FrameIndex |
7991 | | SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var, |
7992 | | DIExpression *Expr, unsigned FI, |
7993 | | bool IsIndirect, |
7994 | | const DebugLoc &DL, |
7995 | 59 | unsigned O) { |
7996 | 59 | assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && |
7997 | 59 | "Expected inlined-at fields to agree"); |
7998 | 59 | return new (DbgInfo->getAlloc()) |
7999 | 59 | SDDbgValue(Var, Expr, FI, IsIndirect, DL, O, SDDbgValue::FRAMEIX); |
8000 | 59 | } |
8001 | | |
8002 | | /// VReg |
8003 | | SDDbgValue *SelectionDAG::getVRegDbgValue(DIVariable *Var, |
8004 | | DIExpression *Expr, |
8005 | | unsigned VReg, bool IsIndirect, |
8006 | 52 | const DebugLoc &DL, unsigned O) { |
8007 | 52 | assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && |
8008 | 52 | "Expected inlined-at fields to agree"); |
8009 | 52 | return new (DbgInfo->getAlloc()) |
8010 | 52 | SDDbgValue(Var, Expr, VReg, IsIndirect, DL, O, SDDbgValue::VREG); |
8011 | 52 | } |
8012 | | |
8013 | | void SelectionDAG::transferDbgValues(SDValue From, SDValue To, |
8014 | | unsigned OffsetInBits, unsigned SizeInBits, |
8015 | 11.0M | bool InvalidateDbg) { |
8016 | 11.0M | SDNode *FromNode = From.getNode(); |
8017 | 11.0M | SDNode *ToNode = To.getNode(); |
8018 | 11.0M | assert(FromNode && ToNode && "Can't modify dbg values"); |
8019 | 11.0M | |
8020 | 11.0M | // PR35338 |
8021 | 11.0M | // TODO: assert(From != To && "Redundant dbg value transfer"); |
8022 | 11.0M | // TODO: assert(FromNode != ToNode && "Intranode dbg value transfer"); |
8023 | 11.0M | if (From == To || FromNode == ToNode11.0M ) |
8024 | 2.14M | return; |
8025 | 8.90M | |
8026 | 8.90M | if (!FromNode->getHasDebugValue()) |
8027 | 8.90M | return; |
8028 | 237 | |
8029 | 237 | SmallVector<SDDbgValue *, 2> ClonedDVs; |
8030 | 252 | for (SDDbgValue *Dbg : GetDbgValues(FromNode)) { |
8031 | 252 | if (Dbg->getKind() != SDDbgValue::SDNODE || Dbg->isInvalidated()) |
8032 | 46 | continue; |
8033 | 206 | |
8034 | 206 | // TODO: assert(!Dbg->isInvalidated() && "Transfer of invalid dbg value"); |
8035 | 206 | |
8036 | 206 | // Just transfer the dbg value attached to From. |
8037 | 206 | if (Dbg->getResNo() != From.getResNo()) |
8038 | 35 | continue; |
8039 | 171 | |
8040 | 171 | DIVariable *Var = Dbg->getVariable(); |
8041 | 171 | auto *Expr = Dbg->getExpression(); |
8042 | 171 | // If a fragment is requested, update the expression. |
8043 | 171 | if (SizeInBits) { |
8044 | 32 | // When splitting a larger (e.g., sign-extended) value whose |
8045 | 32 | // lower bits are described with an SDDbgValue, do not attempt |
8046 | 32 | // to transfer the SDDbgValue to the upper bits. |
8047 | 32 | if (auto FI = Expr->getFragmentInfo()) |
8048 | 10 | if (OffsetInBits + SizeInBits > FI->SizeInBits) |
8049 | 1 | continue; |
8050 | 31 | auto Fragment = DIExpression::createFragmentExpression(Expr, OffsetInBits, |
8051 | 31 | SizeInBits); |
8052 | 31 | if (!Fragment) |
8053 | 4 | continue; |
8054 | 27 | Expr = *Fragment; |
8055 | 27 | } |
8056 | 171 | // Clone the SDDbgValue and move it to To. |
8057 | 171 | SDDbgValue *Clone = |
8058 | 166 | getDbgValue(Var, Expr, ToNode, To.getResNo(), Dbg->isIndirect(), |
8059 | 166 | Dbg->getDebugLoc(), Dbg->getOrder()); |
8060 | 166 | ClonedDVs.push_back(Clone); |
8061 | 166 | |
8062 | 166 | if (InvalidateDbg) { |
8063 | 152 | // Invalidate value and indicate the SDDbgValue should not be emitted. |
8064 | 152 | Dbg->setIsInvalidated(); |
8065 | 152 | Dbg->setIsEmitted(); |
8066 | 152 | } |
8067 | 166 | } |
8068 | 237 | |
8069 | 237 | for (SDDbgValue *Dbg : ClonedDVs) |
8070 | 166 | AddDbgValue(Dbg, ToNode, false); |
8071 | 237 | } |
8072 | | |
8073 | 4.40M | void SelectionDAG::salvageDebugInfo(SDNode &N) { |
8074 | 4.40M | if (!N.getHasDebugValue()) |
8075 | 4.40M | return; |
8076 | 2 | |
8077 | 2 | SmallVector<SDDbgValue *, 2> ClonedDVs; |
8078 | 4 | for (auto DV : GetDbgValues(&N)) { |
8079 | 4 | if (DV->isInvalidated()) |
8080 | 0 | continue; |
8081 | 4 | switch (N.getOpcode()) { |
8082 | 4 | default: |
8083 | 1 | break; |
8084 | 4 | case ISD::ADD: |
8085 | 3 | SDValue N0 = N.getOperand(0); |
8086 | 3 | SDValue N1 = N.getOperand(1); |
8087 | 3 | if (!isConstantIntBuildVectorOrConstantInt(N0) && |
8088 | 3 | isConstantIntBuildVectorOrConstantInt(N1)) { |
8089 | 3 | uint64_t Offset = N.getConstantOperandVal(1); |
8090 | 3 | // Rewrite an ADD constant node into a DIExpression. Since we are |
8091 | 3 | // performing arithmetic to compute the variable's *value* in the |
8092 | 3 | // DIExpression, we need to mark the expression with a |
8093 | 3 | // DW_OP_stack_value. |
8094 | 3 | auto *DIExpr = DV->getExpression(); |
8095 | 3 | DIExpr = |
8096 | 3 | DIExpression::prepend(DIExpr, DIExpression::StackValue, Offset); |
8097 | 3 | SDDbgValue *Clone = |
8098 | 3 | getDbgValue(DV->getVariable(), DIExpr, N0.getNode(), N0.getResNo(), |
8099 | 3 | DV->isIndirect(), DV->getDebugLoc(), DV->getOrder()); |
8100 | 3 | ClonedDVs.push_back(Clone); |
8101 | 3 | DV->setIsInvalidated(); |
8102 | 3 | DV->setIsEmitted(); |
8103 | 3 | LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting"; |
8104 | 3 | N0.getNode()->dumprFull(this); |
8105 | 3 | dbgs() << " into " << *DIExpr << '\n'); |
8106 | 3 | } |
8107 | 4 | } |
8108 | 4 | } |
8109 | 2 | |
8110 | 2 | for (SDDbgValue *Dbg : ClonedDVs) |
8111 | 3 | AddDbgValue(Dbg, Dbg->getSDNode(), false); |
8112 | 2 | } |
8113 | | |
8114 | | /// Creates a SDDbgLabel node. |
8115 | | SDDbgLabel *SelectionDAG::getDbgLabel(DILabel *Label, |
8116 | 4 | const DebugLoc &DL, unsigned O) { |
8117 | 4 | assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(DL) && |
8118 | 4 | "Expected inlined-at fields to agree"); |
8119 | 4 | return new (DbgInfo->getAlloc()) SDDbgLabel(Label, DL, O); |
8120 | 4 | } |
8121 | | |
8122 | | namespace { |
8123 | | |
8124 | | /// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node |
8125 | | /// pointed to by a use iterator is deleted, increment the use iterator |
8126 | | /// so that it doesn't dangle. |
8127 | | /// |
8128 | | class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener { |
8129 | | SDNode::use_iterator &UI; |
8130 | | SDNode::use_iterator &UE; |
8131 | | |
8132 | 26.7k | void NodeDeleted(SDNode *N, SDNode *E) override { |
8133 | 26.7k | // Increment the iterator as needed. |
8134 | 26.7k | while (UI != UE && N == *UI3.39k ) |
8135 | 0 | ++UI; |
8136 | 26.7k | } |
8137 | | |
8138 | | public: |
8139 | | RAUWUpdateListener(SelectionDAG &d, |
8140 | | SDNode::use_iterator &ui, |
8141 | | SDNode::use_iterator &ue) |
8142 | 9.38M | : SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {} |
8143 | | }; |
8144 | | |
8145 | | } // end anonymous namespace |
8146 | | |
8147 | | /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. |
8148 | | /// This can cause recursive merging of nodes in the DAG. |
8149 | | /// |
8150 | | /// This version assumes From has a single result value. |
8151 | | /// |
8152 | 3.97M | void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { |
8153 | 3.97M | SDNode *From = FromN.getNode(); |
8154 | 3.97M | assert(From->getNumValues() == 1 && FromN.getResNo() == 0 && |
8155 | 3.97M | "Cannot replace with this method!"); |
8156 | 3.97M | assert(From != To.getNode() && "Cannot replace uses of with self"); |
8157 | 3.97M | |
8158 | 3.97M | // Preserve Debug Values |
8159 | 3.97M | transferDbgValues(FromN, To); |
8160 | 3.97M | |
8161 | 3.97M | // Iterate over all the existing uses of From. New uses will be added |
8162 | 3.97M | // to the beginning of the use list, which we avoid visiting. |
8163 | 3.97M | // This specifically avoids visiting uses of From that arise while the |
8164 | 3.97M | // replacement is happening, because any such uses would be the result |
8165 | 3.97M | // of CSE: If an existing node looks like From after one of its operands |
8166 | 3.97M | // is replaced by To, we don't want to replace of all its users with To |
8167 | 3.97M | // too. See PR3018 for more info. |
8168 | 3.97M | SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); |
8169 | 3.97M | RAUWUpdateListener Listener(*this, UI, UE); |
8170 | 8.44M | while (UI != UE) { |
8171 | 4.47M | SDNode *User = *UI; |
8172 | 4.47M | |
8173 | 4.47M | // This node is about to morph, remove its old self from the CSE maps. |
8174 | 4.47M | RemoveNodeFromCSEMaps(User); |
8175 | 4.47M | |
8176 | 4.47M | // A user can appear in a use list multiple times, and when this |
8177 | 4.47M | // happens the uses are usually next to each other in the list. |
8178 | 4.47M | // To help reduce the number of CSE recomputations, process all |
8179 | 4.47M | // the uses of this user that we can find this way. |
8180 | 4.48M | do { |
8181 | 4.48M | SDUse &Use = UI.getUse(); |
8182 | 4.48M | ++UI; |
8183 | 4.48M | Use.set(To); |
8184 | 4.48M | if (To->isDivergent() != From->isDivergent()) |
8185 | 9.00k | updateDivergence(User); |
8186 | 4.48M | } while (UI != UE && *UI == User563k ); |
8187 | 4.47M | // Now that we have modified User, add it back to the CSE maps. If it |
8188 | 4.47M | // already exists there, recursively merge the results together. |
8189 | 4.47M | AddModifiedNodeToCSEMaps(User); |
8190 | 4.47M | } |
8191 | 3.97M | |
8192 | 3.97M | // If we just RAUW'd the root, take note. |
8193 | 3.97M | if (FromN == getRoot()) |
8194 | 11.0k | setRoot(To); |
8195 | 3.97M | } |
8196 | | |
8197 | | /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. |
8198 | | /// This can cause recursive merging of nodes in the DAG. |
8199 | | /// |
8200 | | /// This version assumes that for each value of From, there is a |
8201 | | /// corresponding value in To in the same position with the same type. |
8202 | | /// |
8203 | 2.45M | void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) { |
8204 | | #ifndef NDEBUG |
8205 | | for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) |
8206 | | assert((!From->hasAnyUseOfValue(i) || |
8207 | | From->getValueType(i) == To->getValueType(i)) && |
8208 | | "Cannot use this version of ReplaceAllUsesWith!"); |
8209 | | #endif |
8210 | | |
8211 | 2.45M | // Handle the trivial case. |
8212 | 2.45M | if (From == To) |
8213 | 0 | return; |
8214 | 2.45M | |
8215 | 2.45M | // Preserve Debug Info. Only do this if there's a use. |
8216 | 4.96M | for (unsigned i = 0, e = From->getNumValues(); 2.45M i != e; ++i2.50M ) |
8217 | 2.50M | if (From->hasAnyUseOfValue(i)) { |
8218 | 2.47M | assert((i < To->getNumValues()) && "Invalid To location"); |
8219 | 2.47M | transferDbgValues(SDValue(From, i), SDValue(To, i)); |
8220 | 2.47M | } |
8221 | 2.45M | |
8222 | 2.45M | // Iterate over just the existing users of From. See the comments in |
8223 | 2.45M | // the ReplaceAllUsesWith above. |
8224 | 2.45M | SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); |
8225 | 2.45M | RAUWUpdateListener Listener(*this, UI, UE); |
8226 | 5.26M | while (UI != UE) { |
8227 | 2.80M | SDNode *User = *UI; |
8228 | 2.80M | |
8229 | 2.80M | // This node is about to morph, remove its old self from the CSE maps. |
8230 | 2.80M | RemoveNodeFromCSEMaps(User); |
8231 | 2.80M | |
8232 | 2.80M | // A user can appear in a use list multiple times, and when this |
8233 | 2.80M | // happens the uses are usually next to each other in the list. |
8234 | 2.80M | // To help reduce the number of CSE recomputations, process all |
8235 | 2.80M | // the uses of this user that we can find this way. |
8236 | 2.81M | do { |
8237 | 2.81M | SDUse &Use = UI.getUse(); |
8238 | 2.81M | ++UI; |
8239 | 2.81M | Use.setNode(To); |
8240 | 2.81M | if (To->isDivergent() != From->isDivergent()) |
8241 | 2.30k | updateDivergence(User); |
8242 | 2.81M | } while (UI != UE && *UI == User360k ); |
8243 | 2.80M | |
8244 | 2.80M | // Now that we have modified User, add it back to the CSE maps. If it |
8245 | 2.80M | // already exists there, recursively merge the results together. |
8246 | 2.80M | AddModifiedNodeToCSEMaps(User); |
8247 | 2.80M | } |
8248 | 2.45M | |
8249 | 2.45M | // If we just RAUW'd the root, take note. |
8250 | 2.45M | if (From == getRoot().getNode()) |
8251 | 20.6k | setRoot(SDValue(To, getRoot().getResNo())); |
8252 | 2.45M | } |
8253 | | |
8254 | | /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. |
8255 | | /// This can cause recursive merging of nodes in the DAG. |
8256 | | /// |
8257 | | /// This version can replace From with any result values. To must match the |
8258 | | /// number and types of values returned by From. |
8259 | 924k | void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) { |
8260 | 924k | if (From->getNumValues() == 1) // Handle the simple case efficiently. |
8261 | 589k | return ReplaceAllUsesWith(SDValue(From, 0), To[0]); |
8262 | 334k | |
8263 | 334k | // Preserve Debug Info. |
8264 | 1.02M | for (unsigned i = 0, e = From->getNumValues(); 334k i != e; ++i694k ) |
8265 | 694k | transferDbgValues(SDValue(From, i), To[i]); |
8266 | 334k | |
8267 | 334k | // Iterate over just the existing users of From. See the comments in |
8268 | 334k | // the ReplaceAllUsesWith above. |
8269 | 334k | SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); |
8270 | 334k | RAUWUpdateListener Listener(*this, UI, UE); |
8271 | 974k | while (UI != UE) { |
8272 | 639k | SDNode *User = *UI; |
8273 | 639k | |
8274 | 639k | // This node is about to morph, remove its old self from the CSE maps. |
8275 | 639k | RemoveNodeFromCSEMaps(User); |
8276 | 639k | |
8277 | 639k | // A user can appear in a use list multiple times, and when this happens the |
8278 | 639k | // uses are usually next to each other in the list. To help reduce the |
8279 | 639k | // number of CSE and divergence recomputations, process all the uses of this |
8280 | 639k | // user that we can find this way. |
8281 | 639k | bool To_IsDivergent = false; |
8282 | 693k | do { |
8283 | 693k | SDUse &Use = UI.getUse(); |
8284 | 693k | const SDValue &ToOp = To[Use.getResNo()]; |
8285 | 693k | ++UI; |
8286 | 693k | Use.set(ToOp); |
8287 | 693k | To_IsDivergent |= ToOp->isDivergent(); |
8288 | 693k | } while (UI != UE && *UI == User360k ); |
8289 | 639k | |
8290 | 639k | if (To_IsDivergent != From->isDivergent()) |
8291 | 5.68k | updateDivergence(User); |
8292 | 639k | |
8293 | 639k | // Now that we have modified User, add it back to the CSE maps. If it |
8294 | 639k | // already exists there, recursively merge the results together. |
8295 | 639k | AddModifiedNodeToCSEMaps(User); |
8296 | 639k | } |
8297 | 334k | |
8298 | 334k | // If we just RAUW'd the root, take note. |
8299 | 334k | if (From == getRoot().getNode()) |
8300 | 318 | setRoot(SDValue(To[getRoot().getResNo()])); |
8301 | 334k | } |
8302 | | |
8303 | | /// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving |
8304 | | /// uses of other values produced by From.getNode() alone. The Deleted |
8305 | | /// vector is handled the same way as for ReplaceAllUsesWith. |
8306 | 4.37M | void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ |
8307 | 4.37M | // Handle the really simple, really trivial case efficiently. |
8308 | 4.37M | if (From == To) return2.16k ; |
8309 | 4.37M | |
8310 | 4.37M | // Handle the simple, trivial, case efficiently. |
8311 | 4.37M | if (From.getNode()->getNumValues() == 1) { |
8312 | 1.75M | ReplaceAllUsesWith(From, To); |
8313 | 1.75M | return; |
8314 | 1.75M | } |
8315 | 2.62M | |
8316 | 2.62M | // Preserve Debug Info. |
8317 | 2.62M | transferDbgValues(From, To); |
8318 | 2.62M | |
8319 | 2.62M | // Iterate over just the existing users of From. See the comments in |
8320 | 2.62M | // the ReplaceAllUsesWith above. |
8321 | 2.62M | SDNode::use_iterator UI = From.getNode()->use_begin(), |
8322 | 2.62M | UE = From.getNode()->use_end(); |
8323 | 2.62M | RAUWUpdateListener Listener(*this, UI, UE); |
8324 | 5.86M | while (UI != UE) { |
8325 | 3.24M | SDNode *User = *UI; |
8326 | 3.24M | bool UserRemovedFromCSEMaps = false; |
8327 | 3.24M | |
8328 | 3.24M | // A user can appear in a use list multiple times, and when this |
8329 | 3.24M | // happens the uses are usually next to each other in the list. |
8330 | 3.24M | // To help reduce the number of CSE recomputations, process all |
8331 | 3.24M | // the uses of this user that we can find this way. |
8332 | 4.16M | do { |
8333 | 4.16M | SDUse &Use = UI.getUse(); |
8334 | 4.16M | |
8335 | 4.16M | // Skip uses of different values from the same node. |
8336 | 4.16M | if (Use.getResNo() != From.getResNo()) { |
8337 | 1.99M | ++UI; |
8338 | 1.99M | continue; |
8339 | 1.99M | } |
8340 | 2.16M | |
8341 | 2.16M | // If this node hasn't been modified yet, it's still in the CSE maps, |
8342 | 2.16M | // so remove its old self from the CSE maps. |
8343 | 2.16M | if (!UserRemovedFromCSEMaps) { |
8344 | 2.16M | RemoveNodeFromCSEMaps(User); |
8345 | 2.16M | UserRemovedFromCSEMaps = true; |
8346 | 2.16M | } |
8347 | 2.16M | |
8348 | 2.16M | ++UI; |
8349 | 2.16M | Use.set(To); |
8350 | 2.16M | if (To->isDivergent() != From->isDivergent()) |
8351 | 855 | updateDivergence(User); |
8352 | 4.16M | } while (UI != UE && *UI == User1.62M ); |
8353 | 3.24M | // We are iterating over all uses of the From node, so if a use |
8354 | 3.24M | // doesn't use the specific value, no changes are made. |
8355 | 3.24M | if (!UserRemovedFromCSEMaps) |
8356 | 1.07M | continue; |
8357 | 2.16M | |
8358 | 2.16M | // Now that we have modified User, add it back to the CSE maps. If it |
8359 | 2.16M | // already exists there, recursively merge the results together. |
8360 | 2.16M | AddModifiedNodeToCSEMaps(User); |
8361 | 2.16M | } |
8362 | 2.62M | |
8363 | 2.62M | // If we just RAUW'd the root, take note. |
8364 | 2.62M | if (From == getRoot()) |
8365 | 69.8k | setRoot(To); |
8366 | 2.62M | } |
8367 | | |
8368 | | namespace { |
8369 | | |
8370 | | /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith |
8371 | | /// to record information about a use. |
8372 | | struct UseMemo { |
8373 | | SDNode *User; |
8374 | | unsigned Index; |
8375 | | SDUse *Use; |
8376 | | }; |
8377 | | |
8378 | | /// operator< - Sort Memos by User. |
8379 | 1.24k | bool operator<(const UseMemo &L, const UseMemo &R) { |
8380 | 1.24k | return (intptr_t)L.User < (intptr_t)R.User; |
8381 | 1.24k | } |
8382 | | |
8383 | | } // end anonymous namespace |
8384 | | |
8385 | | void SelectionDAG::updateDivergence(SDNode * N) |
8386 | 818k | { |
8387 | 818k | if (TLI->isSDNodeAlwaysUniform(N)) |
8388 | 8.20k | return; |
8389 | 809k | bool IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA); |
8390 | 2.48M | for (auto &Op : N->ops()) { |
8391 | 2.48M | if (Op.Val.getValueType() != MVT::Other) |
8392 | 1.67M | IsDivergent |= Op.getNode()->isDivergent(); |
8393 | 2.48M | } |
8394 | 809k | if (N->SDNodeBits.IsDivergent != IsDivergent) { |
8395 | 15.3k | N->SDNodeBits.IsDivergent = IsDivergent; |
8396 | 18.8k | for (auto U : N->uses()) { |
8397 | 18.8k | updateDivergence(U); |
8398 | 18.8k | } |
8399 | 15.3k | } |
8400 | 809k | } |
8401 | | |
8402 | 0 | void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) { |
8403 | 0 | DenseMap<SDNode *, unsigned> Degree; |
8404 | 0 | Order.reserve(AllNodes.size()); |
8405 | 0 | for (auto &N : allnodes()) { |
8406 | 0 | unsigned NOps = N.getNumOperands(); |
8407 | 0 | Degree[&N] = NOps; |
8408 | 0 | if (0 == NOps) |
8409 | 0 | Order.push_back(&N); |
8410 | 0 | } |
8411 | 0 | for (size_t I = 0; I != Order.size(); ++I) { |
8412 | 0 | SDNode *N = Order[I]; |
8413 | 0 | for (auto U : N->uses()) { |
8414 | 0 | unsigned &UnsortedOps = Degree[U]; |
8415 | 0 | if (0 == --UnsortedOps) |
8416 | 0 | Order.push_back(U); |
8417 | 0 | } |
8418 | 0 | } |
8419 | 0 | } |
8420 | | |
8421 | | #ifndef NDEBUG |
8422 | | void SelectionDAG::VerifyDAGDiverence() { |
8423 | | std::vector<SDNode *> TopoOrder; |
8424 | | CreateTopologicalOrder(TopoOrder); |
8425 | | const TargetLowering &TLI = getTargetLoweringInfo(); |
8426 | | DenseMap<const SDNode *, bool> DivergenceMap; |
8427 | | for (auto &N : allnodes()) { |
8428 | | DivergenceMap[&N] = false; |
8429 | | } |
8430 | | for (auto N : TopoOrder) { |
8431 | | bool IsDivergent = DivergenceMap[N]; |
8432 | | bool IsSDNodeDivergent = TLI.isSDNodeSourceOfDivergence(N, FLI, DA); |
8433 | | for (auto &Op : N->ops()) { |
8434 | | if (Op.Val.getValueType() != MVT::Other) |
8435 | | IsSDNodeDivergent |= DivergenceMap[Op.getNode()]; |
8436 | | } |
8437 | | if (!IsDivergent && IsSDNodeDivergent && !TLI.isSDNodeAlwaysUniform(N)) { |
8438 | | DivergenceMap[N] = true; |
8439 | | } |
8440 | | } |
8441 | | for (auto &N : allnodes()) { |
8442 | | (void)N; |
8443 | | assert(DivergenceMap[&N] == N.isDivergent() && |
8444 | | "Divergence bit inconsistency detected\n"); |
8445 | | } |
8446 | | } |
8447 | | #endif |
8448 | | |
8449 | | /// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving |
8450 | | /// uses of other values produced by From.getNode() alone. The same value |
8451 | | /// may appear in both the From and To list. The Deleted vector is |
8452 | | /// handled the same way as for ReplaceAllUsesWith. |
8453 | | void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, |
8454 | | const SDValue *To, |
8455 | 1.12k | unsigned Num){ |
8456 | 1.12k | // Handle the simple, trivial case efficiently. |
8457 | 1.12k | if (Num == 1) |
8458 | 0 | return ReplaceAllUsesOfValueWith(*From, *To); |
8459 | 1.12k | |
8460 | 1.12k | transferDbgValues(*From, *To); |
8461 | 1.12k | |
8462 | 1.12k | // Read up all the uses and make records of them. This helps |
8463 | 1.12k | // processing new uses that are introduced during the |
8464 | 1.12k | // replacement process. |
8465 | 1.12k | SmallVector<UseMemo, 4> Uses; |
8466 | 3.60k | for (unsigned i = 0; i != Num; ++i2.48k ) { |
8467 | 2.48k | unsigned FromResNo = From[i].getResNo(); |
8468 | 2.48k | SDNode *FromNode = From[i].getNode(); |
8469 | 2.48k | for (SDNode::use_iterator UI = FromNode->use_begin(), |
8470 | 6.85k | E = FromNode->use_end(); UI != E; ++UI4.37k ) { |
8471 | 4.37k | SDUse &Use = UI.getUse(); |
8472 | 4.37k | if (Use.getResNo() == FromResNo) { |
8473 | 2.16k | UseMemo Memo = { *UI, i, &Use }; |
8474 | 2.16k | Uses.push_back(Memo); |
8475 | 2.16k | } |
8476 | 4.37k | } |
8477 | 2.48k | } |
8478 | 1.12k | |
8479 | 1.12k | // Sort the uses, so that all the uses from a given User are together. |
8480 | 1.12k | llvm::sort(Uses); |
8481 | 1.12k | |
8482 | 1.12k | for (unsigned UseIndex = 0, UseIndexEnd = Uses.size(); |
8483 | 3.15k | UseIndex != UseIndexEnd; ) { |
8484 | 2.03k | // We know that this user uses some value of From. If it is the right |
8485 | 2.03k | // value, update it. |
8486 | 2.03k | SDNode *User = Uses[UseIndex].User; |
8487 | 2.03k | |
8488 | 2.03k | // This node is about to morph, remove its old self from the CSE maps. |
8489 | 2.03k | RemoveNodeFromCSEMaps(User); |
8490 | 2.03k | |
8491 | 2.03k | // The Uses array is sorted, so all the uses for a given User |
8492 | 2.03k | // are next to each other in the list. |
8493 | 2.03k | // To help reduce the number of CSE recomputations, process all |
8494 | 2.03k | // the uses of this user that we can find this way. |
8495 | 2.16k | do { |
8496 | 2.16k | unsigned i = Uses[UseIndex].Index; |
8497 | 2.16k | SDUse &Use = *Uses[UseIndex].Use; |
8498 | 2.16k | ++UseIndex; |
8499 | 2.16k | |
8500 | 2.16k | Use.set(To[i]); |
8501 | 2.16k | } while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User1.04k ); |
8502 | 2.03k | |
8503 | 2.03k | // Now that we have modified User, add it back to the CSE maps. If it |
8504 | 2.03k | // already exists there, recursively merge the results together. |
8505 | 2.03k | AddModifiedNodeToCSEMaps(User); |
8506 | 2.03k | } |
8507 | 1.12k | } |
8508 | | |
8509 | | /// AssignTopologicalOrder - Assign a unique node id for each node in the DAG |
8510 | | /// based on their topological order. It returns the maximum id and a vector |
8511 | | /// of the SDNodes* in assigned order by reference. |
8512 | 2.66M | unsigned SelectionDAG::AssignTopologicalOrder() { |
8513 | 2.66M | unsigned DAGSize = 0; |
8514 | 2.66M | |
8515 | 2.66M | // SortedPos tracks the progress of the algorithm. Nodes before it are |
8516 | 2.66M | // sorted, nodes after it are unsorted. When the algorithm completes |
8517 | 2.66M | // it is at the end of the list. |
8518 | 2.66M | allnodes_iterator SortedPos = allnodes_begin(); |
8519 | 2.66M | |
8520 | 2.66M | // Visit all the nodes. Move nodes with no operands to the front of |
8521 | 2.66M | // the list immediately. Annotate nodes that do have operands with their |
8522 | 2.66M | // operand count. Before we do this, the Node Id fields of the nodes |
8523 | 2.66M | // may contain arbitrary values. After, the Node Id fields for nodes |
8524 | 2.66M | // before SortedPos will contain the topological sort index, and the |
8525 | 2.66M | // Node Id fields for nodes At SortedPos and after will contain the |
8526 | 2.66M | // count of outstanding operands. |
8527 | 57.9M | for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) { |
8528 | 55.3M | SDNode *N = &*I++; |
8529 | 55.3M | checkForCycles(N, this); |
8530 | 55.3M | unsigned Degree = N->getNumOperands(); |
8531 | 55.3M | if (Degree == 0) { |
8532 | 26.3M | // A node with no uses, add it to the result array immediately. |
8533 | 26.3M | N->setNodeId(DAGSize++); |
8534 | 26.3M | allnodes_iterator Q(N); |
8535 | 26.3M | if (Q != SortedPos) |
8536 | 10.6M | SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q)); |
8537 | 26.3M | assert(SortedPos != AllNodes.end() && "Overran node list"); |
8538 | 26.3M | ++SortedPos; |
8539 | 28.9M | } else { |
8540 | 28.9M | // Temporarily use the Node Id as scratch space for the degree count. |
8541 | 28.9M | N->setNodeId(Degree); |
8542 | 28.9M | } |
8543 | 55.3M | } |
8544 | 2.66M | |
8545 | 2.66M | // Visit all the nodes. As we iterate, move nodes into sorted order, |
8546 | 2.66M | // such that by the time the end is reached all nodes will be sorted. |
8547 | 55.3M | for (SDNode &Node : allnodes()) { |
8548 | 55.3M | SDNode *N = &Node; |
8549 | 55.3M | checkForCycles(N, this); |
8550 | 55.3M | // N is in sorted position, so all its uses have one less operand |
8551 | 55.3M | // that needs to be sorted. |
8552 | 55.3M | for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); |
8553 | 136M | UI != UE; ++UI81.1M ) { |
8554 | 81.1M | SDNode *P = *UI; |
8555 | 81.1M | unsigned Degree = P->getNodeId(); |
8556 | 81.1M | assert(Degree != 0 && "Invalid node degree"); |
8557 | 81.1M | --Degree; |
8558 | 81.1M | if (Degree == 0) { |
8559 | 28.9M | // All of P's operands are sorted, so P may sorted now. |
8560 | 28.9M | P->setNodeId(DAGSize++); |
8561 | 28.9M | if (P->getIterator() != SortedPos) |
8562 | 8.08M | SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P)); |
8563 | 28.9M | assert(SortedPos != AllNodes.end() && "Overran node list"); |
8564 | 28.9M | ++SortedPos; |
8565 | 52.2M | } else { |
8566 | 52.2M | // Update P's outstanding operand count. |
8567 | 52.2M | P->setNodeId(Degree); |
8568 | 52.2M | } |
8569 | 81.1M | } |
8570 | 55.3M | if (Node.getIterator() == SortedPos) { |
8571 | | #ifndef NDEBUG |
8572 | | allnodes_iterator I(N); |
8573 | | SDNode *S = &*++I; |
8574 | | dbgs() << "Overran sorted position:\n"; |
8575 | | S->dumprFull(this); dbgs() << "\n"; |
8576 | | dbgs() << "Checking if this is due to cycles\n"; |
8577 | | checkForCycles(this, true); |
8578 | | #endif |
8579 | 0 | llvm_unreachable(nullptr); |
8580 | 0 | } |
8581 | 55.3M | } |
8582 | 2.66M | |
8583 | 2.66M | assert(SortedPos == AllNodes.end() && |
8584 | 2.66M | "Topological sort incomplete!"); |
8585 | 2.66M | assert(AllNodes.front().getOpcode() == ISD::EntryToken && |
8586 | 2.66M | "First node in topological sort is not the entry token!"); |
8587 | 2.66M | assert(AllNodes.front().getNodeId() == 0 && |
8588 | 2.66M | "First node in topological sort has non-zero id!"); |
8589 | 2.66M | assert(AllNodes.front().getNumOperands() == 0 && |
8590 | 2.66M | "First node in topological sort has operands!"); |
8591 | 2.66M | assert(AllNodes.back().getNodeId() == (int)DAGSize-1 && |
8592 | 2.66M | "Last node in topologic sort has unexpected id!"); |
8593 | 2.66M | assert(AllNodes.back().use_empty() && |
8594 | 2.66M | "Last node in topologic sort has users!"); |
8595 | 2.66M | assert(DAGSize == allnodes_size() && "Node count mismatch!"); |
8596 | 2.66M | return DAGSize; |
8597 | 2.66M | } |
8598 | | |
8599 | | /// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the |
8600 | | /// value is produced by SD. |
8601 | 4.83k | void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) { |
8602 | 4.83k | if (SD) { |
8603 | 541 | assert(DbgInfo->getSDDbgValues(SD).empty() || SD->getHasDebugValue()); |
8604 | 541 | SD->setHasDebugValue(true); |
8605 | 541 | } |
8606 | 4.83k | DbgInfo->add(DB, SD, isParameter); |
8607 | 4.83k | } |
8608 | | |
8609 | 4 | void SelectionDAG::AddDbgLabel(SDDbgLabel *DB) { |
8610 | 4 | DbgInfo->add(DB); |
8611 | 4 | } |
8612 | | |
8613 | | SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, |
8614 | 6.88k | SDValue NewMemOp) { |
8615 | 6.88k | assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node"); |
8616 | 6.88k | // The new memory operation must have the same position as the old load in |
8617 | 6.88k | // terms of memory dependency. Create a TokenFactor for the old load and new |
8618 | 6.88k | // memory operation and update uses of the old load's output chain to use that |
8619 | 6.88k | // TokenFactor. |
8620 | 6.88k | SDValue OldChain = SDValue(OldLoad, 1); |
8621 | 6.88k | SDValue NewChain = SDValue(NewMemOp.getNode(), 1); |
8622 | 6.88k | if (!OldLoad->hasAnyUseOfValue(1)) |
8623 | 5.24k | return NewChain; |
8624 | 1.64k | |
8625 | 1.64k | SDValue TokenFactor = |
8626 | 1.64k | getNode(ISD::TokenFactor, SDLoc(OldLoad), MVT::Other, OldChain, NewChain); |
8627 | 1.64k | ReplaceAllUsesOfValueWith(OldChain, TokenFactor); |
8628 | 1.64k | UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain); |
8629 | 1.64k | return TokenFactor; |
8630 | 1.64k | } |
8631 | | |
8632 | | SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op, |
8633 | 3 | Function **OutFunction) { |
8634 | 3 | assert(isa<ExternalSymbolSDNode>(Op) && "Node should be an ExternalSymbol"); |
8635 | 3 | |
8636 | 3 | auto *Symbol = cast<ExternalSymbolSDNode>(Op)->getSymbol(); |
8637 | 3 | auto *Module = MF->getFunction().getParent(); |
8638 | 3 | auto *Function = Module->getFunction(Symbol); |
8639 | 3 | |
8640 | 3 | if (OutFunction != nullptr) |
8641 | 3 | *OutFunction = Function; |
8642 | 3 | |
8643 | 3 | if (Function != nullptr) { |
8644 | 1 | auto PtrTy = TLI->getPointerTy(getDataLayout(), Function->getAddressSpace()); |
8645 | 1 | return getGlobalAddress(Function, SDLoc(Op), PtrTy); |
8646 | 1 | } |
8647 | 2 | |
8648 | 2 | std::string ErrorStr; |
8649 | 2 | raw_string_ostream ErrorFormatter(ErrorStr); |
8650 | 2 | |
8651 | 2 | ErrorFormatter << "Undefined external symbol "; |
8652 | 2 | ErrorFormatter << '"' << Symbol << '"'; |
8653 | 2 | ErrorFormatter.flush(); |
8654 | 2 | |
8655 | 2 | report_fatal_error(ErrorStr); |
8656 | 2 | } |
8657 | | |
8658 | | //===----------------------------------------------------------------------===// |
8659 | | // SDNode Class |
8660 | | //===----------------------------------------------------------------------===// |
8661 | | |
8662 | 9.39M | bool llvm::isNullConstant(SDValue V) { |
8663 | 9.39M | ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); |
8664 | 9.39M | return Const != nullptr && Const->isNullValue()7.28M ; |
8665 | 9.39M | } |
8666 | | |
8667 | 1.65M | bool llvm::isNullFPConstant(SDValue V) { |
8668 | 1.65M | ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V); |
8669 | 1.65M | return Const != nullptr && Const->isZero()132k && !Const->isNegative()41.5k ; |
8670 | 1.65M | } |
8671 | | |
8672 | 1.40M | bool llvm::isAllOnesConstant(SDValue V) { |
8673 | 1.40M | ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); |
8674 | 1.40M | return Const != nullptr && Const->isAllOnesValue()879k ; |
8675 | 1.40M | } |
8676 | | |
8677 | 535k | bool llvm::isOneConstant(SDValue V) { |
8678 | 535k | ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); |
8679 | 535k | return Const != nullptr && Const->isOne()311k ; |
8680 | 535k | } |
8681 | | |
8682 | 12.0M | SDValue llvm::peekThroughBitcasts(SDValue V) { |
8683 | 13.5M | while (V.getOpcode() == ISD::BITCAST) |
8684 | 1.50M | V = V.getOperand(0); |
8685 | 12.0M | return V; |
8686 | 12.0M | } |
8687 | | |
8688 | 770k | SDValue llvm::peekThroughOneUseBitcasts(SDValue V) { |
8689 | 973k | while (V.getOpcode() == ISD::BITCAST && V.getOperand(0).hasOneUse()221k ) |
8690 | 202k | V = V.getOperand(0); |
8691 | 770k | return V; |
8692 | 770k | } |
8693 | | |
8694 | 1.15M | SDValue llvm::peekThroughExtractSubvectors(SDValue V) { |
8695 | 1.18M | while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) |
8696 | 26.7k | V = V.getOperand(0); |
8697 | 1.15M | return V; |
8698 | 1.15M | } |
8699 | | |
8700 | 1.50M | bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) { |
8701 | 1.50M | if (V.getOpcode() != ISD::XOR) |
8702 | 1.47M | return false; |
8703 | 23.8k | V = peekThroughBitcasts(V.getOperand(1)); |
8704 | 23.8k | unsigned NumBits = V.getScalarValueSizeInBits(); |
8705 | 23.8k | ConstantSDNode *C = |
8706 | 23.8k | isConstOrConstSplat(V, AllowUndefs, /*AllowTruncation*/ true); |
8707 | 23.8k | return C && (C->getAPIntValue().countTrailingOnes() >= NumBits)17.6k ; |
8708 | 23.8k | } |
8709 | | |
8710 | | ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs, |
8711 | 25.6M | bool AllowTruncation) { |
8712 | 25.6M | if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) |
8713 | 17.2M | return CN; |
8714 | 8.41M | |
8715 | 8.41M | if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { |
8716 | 606k | BitVector UndefElements; |
8717 | 606k | ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); |
8718 | 606k | |
8719 | 606k | // BuildVectors can truncate their operands. Ignore that case here unless |
8720 | 606k | // AllowTruncation is set. |
8721 | 606k | if (CN && (541k UndefElements.none()541k || AllowUndefs2.76k )) { |
8722 | 538k | EVT CVT = CN->getValueType(0); |
8723 | 538k | EVT NSVT = N.getValueType().getScalarType(); |
8724 | 538k | assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension"); |
8725 | 538k | if (AllowTruncation || (CVT == NSVT)528k ) |
8726 | 517k | return CN; |
8727 | 7.90M | } |
8728 | 606k | } |
8729 | 7.90M | |
8730 | 7.90M | return nullptr; |
8731 | 7.90M | } |
8732 | | |
8733 | | ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts, |
8734 | | bool AllowUndefs, |
8735 | 1.37M | bool AllowTruncation) { |
8736 | 1.37M | if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) |
8737 | 1.18M | return CN; |
8738 | 183k | |
8739 | 183k | if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { |
8740 | 54.8k | BitVector UndefElements; |
8741 | 54.8k | ConstantSDNode *CN = BV->getConstantSplatNode(DemandedElts, &UndefElements); |
8742 | 54.8k | |
8743 | 54.8k | // BuildVectors can truncate their operands. Ignore that case here unless |
8744 | 54.8k | // AllowTruncation is set. |
8745 | 54.8k | if (CN && (52.9k UndefElements.none()52.9k || AllowUndefs64 )) { |
8746 | 52.8k | EVT CVT = CN->getValueType(0); |
8747 | 52.8k | EVT NSVT = N.getValueType().getScalarType(); |
8748 | 52.8k | assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension"); |
8749 | 52.8k | if (AllowTruncation || (CVT == NSVT)) |
8750 | 51.6k | return CN; |
8751 | 132k | } |
8752 | 54.8k | } |
8753 | 132k | |
8754 | 132k | return nullptr; |
8755 | 132k | } |
8756 | | |
8757 | 567k | ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) { |
8758 | 567k | if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) |
8759 | 44.5k | return CN; |
8760 | 522k | |
8761 | 522k | if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { |
8762 | 41.4k | BitVector UndefElements; |
8763 | 41.4k | ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements); |
8764 | 41.4k | if (CN && (30.8k UndefElements.none()30.8k || AllowUndefs290 )) |
8765 | 30.8k | return CN; |
8766 | 492k | } |
8767 | 492k | |
8768 | 492k | return nullptr; |
8769 | 492k | } |
8770 | | |
8771 | | ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, |
8772 | | const APInt &DemandedElts, |
8773 | 0 | bool AllowUndefs) { |
8774 | 0 | if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) |
8775 | 0 | return CN; |
8776 | 0 | |
8777 | 0 | if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { |
8778 | 0 | BitVector UndefElements; |
8779 | 0 | ConstantFPSDNode *CN = |
8780 | 0 | BV->getConstantFPSplatNode(DemandedElts, &UndefElements); |
8781 | 0 | if (CN && (UndefElements.none() || AllowUndefs)) |
8782 | 0 | return CN; |
8783 | 0 | } |
8784 | 0 | |
8785 | 0 | return nullptr; |
8786 | 0 | } |
8787 | | |
8788 | 2.81M | bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) { |
8789 | 2.81M | // TODO: may want to use peekThroughBitcast() here. |
8790 | 2.81M | ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs); |
8791 | 2.81M | return C && C->isNullValue()1.32M ; |
8792 | 2.81M | } |
8793 | | |
8794 | 3.68M | bool llvm::isOneOrOneSplat(SDValue N) { |
8795 | 3.68M | // TODO: may want to use peekThroughBitcast() here. |
8796 | 3.68M | unsigned BitWidth = N.getScalarValueSizeInBits(); |
8797 | 3.68M | ConstantSDNode *C = isConstOrConstSplat(N); |
8798 | 3.68M | return C && C->isOne()2.98M && C->getValueSizeInBits(0) == BitWidth153k ; |
8799 | 3.68M | } |
8800 | | |
8801 | 327k | bool llvm::isAllOnesOrAllOnesSplat(SDValue N) { |
8802 | 327k | N = peekThroughBitcasts(N); |
8803 | 327k | unsigned BitWidth = N.getScalarValueSizeInBits(); |
8804 | 327k | ConstantSDNode *C = isConstOrConstSplat(N); |
8805 | 327k | return C && C->isAllOnesValue()161k && C->getValueSizeInBits(0) == BitWidth57.5k ; |
8806 | 327k | } |
8807 | | |
8808 | 12.8M | HandleSDNode::~HandleSDNode() { |
8809 | 12.8M | DropOperands(); |
8810 | 12.8M | } |
8811 | | |
8812 | | GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order, |
8813 | | const DebugLoc &DL, |
8814 | | const GlobalValue *GA, EVT VT, |
8815 | | int64_t o, unsigned char TF) |
8816 | 1.79M | : SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) { |
8817 | 1.79M | TheGlobal = GA; |
8818 | 1.79M | } |
8819 | | |
8820 | | AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl, |
8821 | | EVT VT, unsigned SrcAS, |
8822 | | unsigned DestAS) |
8823 | | : SDNode(ISD::ADDRSPACECAST, Order, dl, getSDVTList(VT)), |
8824 | 228 | SrcAddrSpace(SrcAS), DestAddrSpace(DestAS) {} |
8825 | | |
8826 | | MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, |
8827 | | SDVTList VTs, EVT memvt, MachineMemOperand *mmo) |
8828 | 5.57M | : SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) { |
8829 | 5.57M | MemSDNodeBits.IsVolatile = MMO->isVolatile(); |
8830 | 5.57M | MemSDNodeBits.IsNonTemporal = MMO->isNonTemporal(); |
8831 | 5.57M | MemSDNodeBits.IsDereferenceable = MMO->isDereferenceable(); |
8832 | 5.57M | MemSDNodeBits.IsInvariant = MMO->isInvariant(); |
8833 | 5.57M | |
8834 | 5.57M | // We check here that the size of the memory operand fits within the size of |
8835 | 5.57M | // the MMO. This is because the MMO might indicate only a possible address |
8836 | 5.57M | // range instead of specifying the affected memory addresses precisely. |
8837 | 5.57M | assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!"); |
8838 | 5.57M | } |
8839 | | |
8840 | | /// Profile - Gather unique data for the node. |
8841 | | /// |
8842 | 56.8M | void SDNode::Profile(FoldingSetNodeID &ID) const { |
8843 | 56.8M | AddNodeIDNode(ID, this); |
8844 | 56.8M | } |
8845 | | |
8846 | | namespace { |
8847 | | |
8848 | | struct EVTArray { |
8849 | | std::vector<EVT> VTs; |
8850 | | |
8851 | 35.7k | EVTArray() { |
8852 | 35.7k | VTs.reserve(MVT::LAST_VALUETYPE); |
8853 | 4.68M | for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i4.64M ) |
8854 | 4.64M | VTs.push_back(MVT((MVT::SimpleValueType)i)); |
8855 | 35.7k | } |
8856 | | }; |
8857 | | |
8858 | | } // end anonymous namespace |
8859 | | |
8860 | | static ManagedStatic<std::set<EVT, EVT::compareRawBits>> EVTs; |
8861 | | static ManagedStatic<EVTArray> SimpleVTArray; |
8862 | | static ManagedStatic<sys::SmartMutex<true>> VTMutex; |
8863 | | |
8864 | | /// getValueTypeList - Return a pointer to the specified value type. |
8865 | | /// |
8866 | 96.7M | const EVT *SDNode::getValueTypeList(EVT VT) { |
8867 | 96.7M | if (VT.isExtended()) { |
8868 | 124k | sys::SmartScopedLock<true> Lock(*VTMutex); |
8869 | 124k | return &(*EVTs->insert(VT).first); |
8870 | 96.6M | } else { |
8871 | 96.6M | assert(VT.getSimpleVT() < MVT::LAST_VALUETYPE && |
8872 | 96.6M | "Value type out of range!"); |
8873 | 96.6M | return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy]; |
8874 | 96.6M | } |
8875 | 96.7M | } |
8876 | | |
8877 | | /// hasNUsesOfValue - Return true if there are exactly NUSES uses of the |
8878 | | /// indicated value. This method ignores uses of other values defined by this |
8879 | | /// operation. |
8880 | 39.3M | bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const { |
8881 | 39.3M | assert(Value < getNumValues() && "Bad value!"); |
8882 | 39.3M | |
8883 | 39.3M | // TODO: Only iterate over uses of a given value of the node |
8884 | 82.3M | for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI43.0M ) { |
8885 | 58.5M | if (UI.getUse().getResNo() == Value) { |
8886 | 54.7M | if (NUses == 0) |
8887 | 15.4M | return false; |
8888 | 39.2M | --NUses; |
8889 | 39.2M | } |
8890 | 58.5M | } |
8891 | 39.3M | |
8892 | 39.3M | // Found exactly the right number of uses? |
8893 | 39.3M | return NUses == 023.8M ; |
8894 | 39.3M | } |
8895 | | |
8896 | | /// hasAnyUseOfValue - Return true if there are any use of the indicated |
8897 | | /// value. This method ignores uses of other values defined by this operation. |
8898 | 19.0M | bool SDNode::hasAnyUseOfValue(unsigned Value) const { |
8899 | 19.0M | assert(Value < getNumValues() && "Bad value!"); |
8900 | 19.0M | |
8901 | 25.3M | for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI6.30M ) |
8902 | 20.9M | if (UI.getUse().getResNo() == Value) |
8903 | 14.6M | return true; |
8904 | 19.0M | |
8905 | 19.0M | return false4.40M ; |
8906 | 19.0M | } |
8907 | | |
8908 | | /// isOnlyUserOf - Return true if this node is the only use of N. |
8909 | 305k | bool SDNode::isOnlyUserOf(const SDNode *N) const { |
8910 | 305k | bool Seen = false; |
8911 | 658k | for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I353k ) { |
8912 | 443k | SDNode *User = *I; |
8913 | 443k | if (User == this) |
8914 | 353k | Seen = true; |
8915 | 89.3k | else |
8916 | 89.3k | return false; |
8917 | 443k | } |
8918 | 305k | |
8919 | 305k | return Seen215k ; |
8920 | 305k | } |
8921 | | |
8922 | | /// Return true if the only users of N are contained in Nodes. |
8923 | 93.1k | bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) { |
8924 | 93.1k | bool Seen = false; |
8925 | 156k | for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I62.9k ) { |
8926 | 143k | SDNode *User = *I; |
8927 | 143k | if (llvm::any_of(Nodes, |
8928 | 267k | [&User](const SDNode *Node) { return User == Node; })) |
8929 | 62.9k | Seen = true; |
8930 | 80.6k | else |
8931 | 80.6k | return false; |
8932 | 143k | } |
8933 | 93.1k | |
8934 | 93.1k | return Seen12.5k ; |
8935 | 93.1k | } |
8936 | | |
8937 | | /// isOperand - Return true if this node is an operand of N. |
8938 | 1.22M | bool SDValue::isOperandOf(const SDNode *N) const { |
8939 | 4.55M | return any_of(N->op_values(), [this](SDValue Op) { return *this == Op; }); |
8940 | 1.22M | } |
8941 | | |
8942 | 630 | bool SDNode::isOperandOf(const SDNode *N) const { |
8943 | 630 | return any_of(N->op_values(), |
8944 | 983 | [this](SDValue Op) { return this == Op.getNode(); }); |
8945 | 630 | } |
8946 | | |
8947 | | /// reachesChainWithoutSideEffects - Return true if this operand (which must |
8948 | | /// be a chain) reaches the specified operand without crossing any |
8949 | | /// side-effecting instructions on any chain path. In practice, this looks |
8950 | | /// through token factors and non-volatile loads. In order to remain efficient, |
8951 | | /// this only looks a couple of nodes in, it does not do an exhaustive search. |
8952 | | /// |
8953 | | /// Note that we only need to examine chains when we're searching for |
8954 | | /// side-effects; SelectionDAG requires that all side-effects are represented |
8955 | | /// by chains, even if another operand would force a specific ordering. This |
8956 | | /// constraint is necessary to allow transformations like splitting loads. |
8957 | | bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, |
8958 | 2.61k | unsigned Depth) const { |
8959 | 2.61k | if (*this == Dest) return true1.26k ; |
8960 | 1.35k | |
8961 | 1.35k | // Don't search too deeply, we just want to be able to see through |
8962 | 1.35k | // TokenFactor's etc. |
8963 | 1.35k | if (Depth == 0) return false135 ; |
8964 | 1.21k | |
8965 | 1.21k | // If this is a token factor, all inputs to the TF happen in parallel. |
8966 | 1.21k | if (getOpcode() == ISD::TokenFactor) { |
8967 | 247 | // First, try a shallow search. |
8968 | 247 | if (is_contained((*this)->ops(), Dest)) { |
8969 | 187 | // We found the chain we want as an operand of this TokenFactor. |
8970 | 187 | // Essentially, we reach the chain without side-effects if we could |
8971 | 187 | // serialize the TokenFactor into a simple chain of operations with |
8972 | 187 | // Dest as the last operation. This is automatically true if the |
8973 | 187 | // chain has one use: there are no other ordering constraints. |
8974 | 187 | // If the chain has more than one use, we give up: some other |
8975 | 187 | // use of Dest might force a side-effect between Dest and the current |
8976 | 187 | // node. |
8977 | 187 | if (Dest.hasOneUse()) |
8978 | 64 | return true; |
8979 | 183 | } |
8980 | 183 | // Next, try a deep search: check whether every operand of the TokenFactor |
8981 | 183 | // reaches Dest. |
8982 | 185 | return llvm::all_of((*this)->ops(), [=](SDValue Op) 183 { |
8983 | 185 | return Op.reachesChainWithoutSideEffects(Dest, Depth - 1); |
8984 | 185 | }); |
8985 | 183 | } |
8986 | 969 | |
8987 | 969 | // Loads don't have side effects, look through them. |
8988 | 969 | if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) { |
8989 | 160 | if (!Ld->isVolatile()) |
8990 | 160 | return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1); |
8991 | 809 | } |
8992 | 809 | return false; |
8993 | 809 | } |
8994 | | |
8995 | 13.3k | bool SDNode::hasPredecessor(const SDNode *N) const { |
8996 | 13.3k | SmallPtrSet<const SDNode *, 32> Visited; |
8997 | 13.3k | SmallVector<const SDNode *, 16> Worklist; |
8998 | 13.3k | Worklist.push_back(this); |
8999 | 13.3k | return hasPredecessorHelper(N, Visited, Worklist); |
9000 | 13.3k | } |
9001 | | |
9002 | 894k | void SDNode::intersectFlagsWith(const SDNodeFlags Flags) { |
9003 | 894k | this->Flags.intersectWith(Flags); |
9004 | 894k | } |
9005 | | |
9006 | | SDValue |
9007 | | SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, |
9008 | 299k | ArrayRef<ISD::NodeType> CandidateBinOps) { |
9009 | 299k | // The pattern must end in an extract from index 0. |
9010 | 299k | if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
9011 | 299k | !isNullConstant(Extract->getOperand(1))) |
9012 | 172k | return SDValue(); |
9013 | 126k | |
9014 | 126k | SDValue Op = Extract->getOperand(0); |
9015 | 126k | unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements()); |
9016 | 126k | |
9017 | 126k | // Match against one of the candidate binary ops. |
9018 | 175k | if (llvm::none_of(CandidateBinOps, [Op](ISD::NodeType BinOp) 126k { |
9019 | 175k | return Op.getOpcode() == unsigned(BinOp); |
9020 | 175k | })) |
9021 | 120k | return SDValue(); |
9022 | 5.28k | |
9023 | 5.28k | // At each stage, we're looking for something that looks like: |
9024 | 5.28k | // %s = shufflevector <8 x i32> %op, <8 x i32> undef, |
9025 | 5.28k | // <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, |
9026 | 5.28k | // i32 undef, i32 undef, i32 undef, i32 undef> |
9027 | 5.28k | // %a = binop <8 x i32> %op, %s |
9028 | 5.28k | // Where the mask changes according to the stage. E.g. for a 3-stage pyramid, |
9029 | 5.28k | // we expect something like: |
9030 | 5.28k | // <4,5,6,7,u,u,u,u> |
9031 | 5.28k | // <2,3,u,u,u,u,u,u> |
9032 | 5.28k | // <1,u,u,u,u,u,u,u> |
9033 | 5.28k | unsigned CandidateBinOp = Op.getOpcode(); |
9034 | 16.1k | for (unsigned i = 0; i < Stages; ++i10.8k ) { |
9035 | 13.0k | if (Op.getOpcode() != CandidateBinOp) |
9036 | 323 | return SDValue(); |
9037 | 12.6k | |
9038 | 12.6k | SDValue Op0 = Op.getOperand(0); |
9039 | 12.6k | SDValue Op1 = Op.getOperand(1); |
9040 | 12.6k | |
9041 | 12.6k | ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(Op0); |
9042 | 12.6k | if (Shuffle) { |
9043 | 341 | Op = Op1; |
9044 | 12.3k | } else { |
9045 | 12.3k | Shuffle = dyn_cast<ShuffleVectorSDNode>(Op1); |
9046 | 12.3k | Op = Op0; |
9047 | 12.3k | } |
9048 | 12.6k | |
9049 | 12.6k | // The first operand of the shuffle should be the same as the other operand |
9050 | 12.6k | // of the binop. |
9051 | 12.6k | if (!Shuffle || Shuffle->getOperand(0) != Op10.9k ) |
9052 | 1.80k | return SDValue(); |
9053 | 10.8k | |
9054 | 10.8k | // Verify the shuffle has the expected (at this stage of the pyramid) mask. |
9055 | 66.0k | for (int Index = 0, MaskEnd = 1 << i; 10.8k Index < MaskEnd; ++Index55.1k ) |
9056 | 55.1k | if (Shuffle->getMaskElt(Index) != MaskEnd + Index) |
9057 | 0 | return SDValue(); |
9058 | 10.8k | } |
9059 | 5.28k | |
9060 | 5.28k | BinOp = (ISD::NodeType)CandidateBinOp; |
9061 | 3.15k | return Op; |
9062 | 5.28k | } |
9063 | | |
9064 | 6.82k | SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { |
9065 | 6.82k | assert(N->getNumValues() == 1 && |
9066 | 6.82k | "Can't unroll a vector with multiple results!"); |
9067 | 6.82k | |
9068 | 6.82k | EVT VT = N->getValueType(0); |
9069 | 6.82k | unsigned NE = VT.getVectorNumElements(); |
9070 | 6.82k | EVT EltVT = VT.getVectorElementType(); |
9071 | 6.82k | SDLoc dl(N); |
9072 | 6.82k | |
9073 | 6.82k | SmallVector<SDValue, 8> Scalars; |
9074 | 6.82k | SmallVector<SDValue, 4> Operands(N->getNumOperands()); |
9075 | 6.82k | |
9076 | 6.82k | // If ResNE is 0, fully unroll the vector op. |
9077 | 6.82k | if (ResNE == 0) |
9078 | 6.78k | ResNE = NE; |
9079 | 37 | else if (NE > ResNE) |
9080 | 0 | NE = ResNE; |
9081 | 6.82k | |
9082 | 6.82k | unsigned i; |
9083 | 28.0k | for (i= 0; i != NE; ++i21.2k ) { |
9084 | 52.7k | for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j31.5k ) { |
9085 | 31.5k | SDValue Operand = N->getOperand(j); |
9086 | 31.5k | EVT OperandVT = Operand.getValueType(); |
9087 | 31.5k | if (OperandVT.isVector()) { |
9088 | 30.9k | // A vector operand; extract a single element. |
9089 | 30.9k | EVT OperandEltVT = OperandVT.getVectorElementType(); |
9090 | 30.9k | Operands[j] = |
9091 | 30.9k | getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand, |
9092 | 30.9k | getConstant(i, dl, TLI->getVectorIdxTy(getDataLayout()))); |
9093 | 30.9k | } else { |
9094 | 599 | // A scalar operand; just use it as is. |
9095 | 599 | Operands[j] = Operand; |
9096 | 599 | } |
9097 | 31.5k | } |
9098 | 21.2k | |
9099 | 21.2k | switch (N->getOpcode()) { |
9100 | 21.2k | default: { |
9101 | 19.7k | Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands, |
9102 | 19.7k | N->getFlags())); |
9103 | 19.7k | break; |
9104 | 21.2k | } |
9105 | 21.2k | case ISD::VSELECT: |
9106 | 52 | Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands)); |
9107 | 52 | break; |
9108 | 21.2k | case ISD::SHL: |
9109 | 1.35k | case ISD::SRA: |
9110 | 1.35k | case ISD::SRL: |
9111 | 1.35k | case ISD::ROTL: |
9112 | 1.35k | case ISD::ROTR: |
9113 | 1.35k | Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0], |
9114 | 1.35k | getShiftAmountOperand(Operands[0].getValueType(), |
9115 | 1.35k | Operands[1]))); |
9116 | 1.35k | break; |
9117 | 1.35k | case ISD::SIGN_EXTEND_INREG: |
9118 | 74 | case ISD::FP_ROUND_INREG: { |
9119 | 74 | EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType(); |
9120 | 74 | Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, |
9121 | 74 | Operands[0], |
9122 | 74 | getValueType(ExtVT))); |
9123 | 74 | } |
9124 | 21.2k | } |
9125 | 21.2k | } |
9126 | 6.82k | |
9127 | 6.87k | for (; 6.82k i < ResNE; ++i49 ) |
9128 | 49 | Scalars.push_back(getUNDEF(EltVT)); |
9129 | 6.82k | |
9130 | 6.82k | EVT VecVT = EVT::getVectorVT(*getContext(), EltVT, ResNE); |
9131 | 6.82k | return getBuildVector(VecVT, dl, Scalars); |
9132 | 6.82k | } |
9133 | | |
9134 | | std::pair<SDValue, SDValue> SelectionDAG::UnrollVectorOverflowOp( |
9135 | 27 | SDNode *N, unsigned ResNE) { |
9136 | 27 | unsigned Opcode = N->getOpcode(); |
9137 | 27 | assert((Opcode == ISD::UADDO || Opcode == ISD::SADDO || |
9138 | 27 | Opcode == ISD::USUBO || Opcode == ISD::SSUBO || |
9139 | 27 | Opcode == ISD::UMULO || Opcode == ISD::SMULO) && |
9140 | 27 | "Expected an overflow opcode"); |
9141 | 27 | |
9142 | 27 | EVT ResVT = N->getValueType(0); |
9143 | 27 | EVT OvVT = N->getValueType(1); |
9144 | 27 | EVT ResEltVT = ResVT.getVectorElementType(); |
9145 | 27 | EVT OvEltVT = OvVT.getVectorElementType(); |
9146 | 27 | SDLoc dl(N); |
9147 | 27 | |
9148 | 27 | // If ResNE is 0, fully unroll the vector op. |
9149 | 27 | unsigned NE = ResVT.getVectorNumElements(); |
9150 | 27 | if (ResNE == 0) |
9151 | 27 | ResNE = NE; |
9152 | 0 | else if (NE > ResNE) |
9153 | 0 | NE = ResNE; |
9154 | 27 | |
9155 | 27 | SmallVector<SDValue, 8> LHSScalars; |
9156 | 27 | SmallVector<SDValue, 8> RHSScalars; |
9157 | 27 | ExtractVectorElements(N->getOperand(0), LHSScalars, 0, NE); |
9158 | 27 | ExtractVectorElements(N->getOperand(1), RHSScalars, 0, NE); |
9159 | 27 | |
9160 | 27 | EVT SVT = TLI->getSetCCResultType(getDataLayout(), *getContext(), ResEltVT); |
9161 | 27 | SDVTList VTs = getVTList(ResEltVT, SVT); |
9162 | 27 | SmallVector<SDValue, 8> ResScalars; |
9163 | 27 | SmallVector<SDValue, 8> OvScalars; |
9164 | 85 | for (unsigned i = 0; i < NE; ++i58 ) { |
9165 | 58 | SDValue Res = getNode(Opcode, dl, VTs, LHSScalars[i], RHSScalars[i]); |
9166 | 58 | SDValue Ov = |
9167 | 58 | getSelect(dl, OvEltVT, Res.getValue(1), |
9168 | 58 | getBoolConstant(true, dl, OvEltVT, ResVT), |
9169 | 58 | getConstant(0, dl, OvEltVT)); |
9170 | 58 | |
9171 | 58 | ResScalars.push_back(Res); |
9172 | 58 | OvScalars.push_back(Ov); |
9173 | 58 | } |
9174 | 27 | |
9175 | 27 | ResScalars.append(ResNE - NE, getUNDEF(ResEltVT)); |
9176 | 27 | OvScalars.append(ResNE - NE, getUNDEF(OvEltVT)); |
9177 | 27 | |
9178 | 27 | EVT NewResVT = EVT::getVectorVT(*getContext(), ResEltVT, ResNE); |
9179 | 27 | EVT NewOvVT = EVT::getVectorVT(*getContext(), OvEltVT, ResNE); |
9180 | 27 | return std::make_pair(getBuildVector(NewResVT, dl, ResScalars), |
9181 | 27 | getBuildVector(NewOvVT, dl, OvScalars)); |
9182 | 27 | } |
9183 | | |
9184 | | bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, |
9185 | | LoadSDNode *Base, |
9186 | | unsigned Bytes, |
9187 | 7.40k | int Dist) const { |
9188 | 7.40k | if (LD->isVolatile() || Base->isVolatile()7.32k ) |
9189 | 142 | return false; |
9190 | 7.26k | if (LD->isIndexed() || Base->isIndexed()) |
9191 | 0 | return false; |
9192 | 7.26k | if (LD->getChain() != Base->getChain()) |
9193 | 493 | return false; |
9194 | 6.77k | EVT VT = LD->getValueType(0); |
9195 | 6.77k | if (VT.getSizeInBits() / 8 != Bytes) |
9196 | 3 | return false; |
9197 | 6.76k | |
9198 | 6.76k | auto BaseLocDecomp = BaseIndexOffset::match(Base, *this); |
9199 | 6.76k | auto LocDecomp = BaseIndexOffset::match(LD, *this); |
9200 | 6.76k | |
9201 | 6.76k | int64_t Offset = 0; |
9202 | 6.76k | if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset)) |
9203 | 6.44k | return (Dist * Bytes == Offset); |
9204 | 325 | return false; |
9205 | 325 | } |
9206 | | |
9207 | | /// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if |
9208 | | /// it cannot be inferred. |
9209 | 8.67M | unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { |
9210 | 8.67M | // If this is a GlobalAddress + cst, return the alignment. |
9211 | 8.67M | const GlobalValue *GV; |
9212 | 8.67M | int64_t GVOffset = 0; |
9213 | 8.67M | if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { |
9214 | 376k | unsigned IdxWidth = getDataLayout().getIndexTypeSizeInBits(GV->getType()); |
9215 | 376k | KnownBits Known(IdxWidth); |
9216 | 376k | llvm::computeKnownBits(GV, Known, getDataLayout()); |
9217 | 376k | unsigned AlignBits = Known.countMinTrailingZeros(); |
9218 | 376k | unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits)335k : 041.1k ; |
9219 | 376k | if (Align) |
9220 | 335k | return MinAlign(Align, GVOffset); |
9221 | 8.34M | } |
9222 | 8.34M | |
9223 | 8.34M | // If this is a direct reference to a stack slot, use information about the |
9224 | 8.34M | // stack slot's alignment. |
9225 | 8.34M | int FrameIdx = INT_MIN; |
9226 | 8.34M | int64_t FrameOffset = 0; |
9227 | 8.34M | if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) { |
9228 | 3.04M | FrameIdx = FI->getIndex(); |
9229 | 5.29M | } else if (isBaseWithConstantOffset(Ptr) && |
9230 | 5.29M | isa<FrameIndexSDNode>(Ptr.getOperand(0))3.95M ) { |
9231 | 895k | // Handle FI+Cst |
9232 | 895k | FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); |
9233 | 895k | FrameOffset = Ptr.getConstantOperandVal(1); |
9234 | 895k | } |
9235 | 8.34M | |
9236 | 8.34M | if (FrameIdx != INT_MIN) { |
9237 | 3.94M | const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); |
9238 | 3.94M | unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), |
9239 | 3.94M | FrameOffset); |
9240 | 3.94M | return FIInfoAlign; |
9241 | 3.94M | } |
9242 | 4.39M | |
9243 | 4.39M | return 0; |
9244 | 4.39M | } |
9245 | | |
9246 | | /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type |
9247 | | /// which is split (or expanded) into two not necessarily identical pieces. |
9248 | 174k | std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const { |
9249 | 174k | // Currently all types are split in half. |
9250 | 174k | EVT LoVT, HiVT; |
9251 | 174k | if (!VT.isVector()) |
9252 | 2.16k | LoVT = HiVT = TLI->getTypeToTransformTo(*getContext(), VT); |
9253 | 172k | else |
9254 | 172k | LoVT = HiVT = VT.getHalfNumVectorElementsVT(*getContext()); |
9255 | 174k | |
9256 | 174k | return std::make_pair(LoVT, HiVT); |
9257 | 174k | } |
9258 | | |
9259 | | /// SplitVector - Split the vector with EXTRACT_SUBVECTOR and return the |
9260 | | /// low/high part. |
9261 | | std::pair<SDValue, SDValue> |
9262 | | SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, |
9263 | 21.1k | const EVT &HiVT) { |
9264 | 21.1k | assert(LoVT.getVectorNumElements() + HiVT.getVectorNumElements() <= |
9265 | 21.1k | N.getValueType().getVectorNumElements() && |
9266 | 21.1k | "More vector elements requested than available!"); |
9267 | 21.1k | SDValue Lo, Hi; |
9268 | 21.1k | Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, |
9269 | 21.1k | getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout()))); |
9270 | 21.1k | Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N, |
9271 | 21.1k | getConstant(LoVT.getVectorNumElements(), DL, |
9272 | 21.1k | TLI->getVectorIdxTy(getDataLayout()))); |
9273 | 21.1k | return std::make_pair(Lo, Hi); |
9274 | 21.1k | } |
9275 | | |
9276 | | /// Widen the vector up to the next power of two using INSERT_SUBVECTOR. |
9277 | 34 | SDValue SelectionDAG::WidenVector(const SDValue &N, const SDLoc &DL) { |
9278 | 34 | EVT VT = N.getValueType(); |
9279 | 34 | EVT WideVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(), |
9280 | 34 | NextPowerOf2(VT.getVectorNumElements())); |
9281 | 34 | return getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, getUNDEF(WideVT), N, |
9282 | 34 | getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout()))); |
9283 | 34 | } |
9284 | | |
9285 | | void SelectionDAG::ExtractVectorElements(SDValue Op, |
9286 | | SmallVectorImpl<SDValue> &Args, |
9287 | 34.3k | unsigned Start, unsigned Count) { |
9288 | 34.3k | EVT VT = Op.getValueType(); |
9289 | 34.3k | if (Count == 0) |
9290 | 12.0k | Count = VT.getVectorNumElements(); |
9291 | 34.3k | |
9292 | 34.3k | EVT EltVT = VT.getVectorElementType(); |
9293 | 34.3k | EVT IdxTy = TLI->getVectorIdxTy(getDataLayout()); |
9294 | 34.3k | SDLoc SL(Op); |
9295 | 186k | for (unsigned i = Start, e = Start + Count; i != e; ++i152k ) { |
9296 | 152k | Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, |
9297 | 152k | Op, getConstant(i, SL, IdxTy))); |
9298 | 152k | } |
9299 | 34.3k | } |
9300 | | |
9301 | | // getAddressSpace - Return the address space this GlobalAddress belongs to. |
9302 | 10.4k | unsigned GlobalAddressSDNode::getAddressSpace() const { |
9303 | 10.4k | return getGlobal()->getType()->getAddressSpace(); |
9304 | 10.4k | } |
9305 | | |
9306 | 103k | Type *ConstantPoolSDNode::getType() const { |
9307 | 103k | if (isMachineConstantPoolEntry()) |
9308 | 252 | return Val.MachineCPVal->getType(); |
9309 | 102k | return Val.ConstVal->getType(); |
9310 | 102k | } |
9311 | | |
9312 | | bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef, |
9313 | | unsigned &SplatBitSize, |
9314 | | bool &HasAnyUndefs, |
9315 | | unsigned MinSplatBits, |
9316 | 180k | bool IsBigEndian) const { |
9317 | 180k | EVT VT = getValueType(0); |
9318 | 180k | assert(VT.isVector() && "Expected a vector type"); |
9319 | 180k | unsigned VecWidth = VT.getSizeInBits(); |
9320 | 180k | if (MinSplatBits > VecWidth) |
9321 | 0 | return false; |
9322 | 180k | |
9323 | 180k | // FIXME: The widths are based on this node's type, but build vectors can |
9324 | 180k | // truncate their operands. |
9325 | 180k | SplatValue = APInt(VecWidth, 0); |
9326 | 180k | SplatUndef = APInt(VecWidth, 0); |
9327 | 180k | |
9328 | 180k | // Get the bits. Bits with undefined values (when the corresponding element |
9329 | 180k | // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared |
9330 | 180k | // in SplatValue. If any of the values are not constant, give up and return |
9331 | 180k | // false. |
9332 | 180k | unsigned int NumOps = getNumOperands(); |
9333 | 180k | assert(NumOps > 0 && "isConstantSplat has 0-size build vector"); |
9334 | 180k | unsigned EltWidth = VT.getScalarSizeInBits(); |
9335 | 180k | |
9336 | 949k | for (unsigned j = 0; j < NumOps; ++j768k ) { |
9337 | 796k | unsigned i = IsBigEndian ? NumOps - 1 - j59.6k : j736k ; |
9338 | 796k | SDValue OpVal = getOperand(i); |
9339 | 796k | unsigned BitPos = j * EltWidth; |
9340 | 796k | |
9341 | 796k | if (OpVal.isUndef()) |
9342 | 23.5k | SplatUndef.setBits(BitPos, BitPos + EltWidth); |
9343 | 772k | else if (auto *CN = dyn_cast<ConstantSDNode>(OpVal)) |
9344 | 709k | SplatValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth), BitPos); |
9345 | 63.7k | else if (auto *CN = dyn_cast<ConstantFPSDNode>(OpVal)) |
9346 | 36.2k | SplatValue.insertBits(CN->getValueAPF().bitcastToAPInt(), BitPos); |
9347 | 27.4k | else |
9348 | 27.4k | return false; |
9349 | 796k | } |
9350 | 180k | |
9351 | 180k | // The build_vector is all constants or undefs. Find the smallest element |
9352 | 180k | // size that splats the vector. |
9353 | 180k | HasAnyUndefs = (SplatUndef != 0); |
9354 | 152k | |
9355 | 152k | // FIXME: This does not work for vectors with elements less than 8 bits. |
9356 | 331k | while (VecWidth > 8) { |
9357 | 322k | unsigned HalfSize = VecWidth / 2; |
9358 | 322k | APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize); |
9359 | 322k | APInt LowValue = SplatValue.trunc(HalfSize); |
9360 | 322k | APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize); |
9361 | 322k | APInt LowUndef = SplatUndef.trunc(HalfSize); |
9362 | 322k | |
9363 | 322k | // If the two halves do not match (ignoring undef bits), stop here. |
9364 | 322k | if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) || |
9365 | 322k | MinSplatBits > HalfSize182k ) |
9366 | 143k | break; |
9367 | 178k | |
9368 | 178k | SplatValue = HighValue | LowValue; |
9369 | 178k | SplatUndef = HighUndef & LowUndef; |
9370 | 178k | |
9371 | 178k | VecWidth = HalfSize; |
9372 | 178k | } |
9373 | 152k | |
9374 | 152k | SplatBitSize = VecWidth; |
9375 | 152k | return true; |
9376 | 180k | } |
9377 | | |
9378 | | SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts, |
9379 | 1.11M | BitVector *UndefElements) const { |
9380 | 1.11M | if (UndefElements) { |
9381 | 747k | UndefElements->clear(); |
9382 | 747k | UndefElements->resize(getNumOperands()); |
9383 | 747k | } |
9384 | 1.11M | assert(getNumOperands() == DemandedElts.getBitWidth() && |
9385 | 1.11M | "Unexpected vector size"); |
9386 | 1.11M | if (!DemandedElts) |
9387 | 0 | return SDValue(); |
9388 | 1.11M | SDValue Splatted; |
9389 | 6.80M | for (unsigned i = 0, e = getNumOperands(); i != e; ++i5.69M ) { |
9390 | 5.94M | if (!DemandedElts[i]) |
9391 | 48.7k | continue; |
9392 | 5.89M | SDValue Op = getOperand(i); |
9393 | 5.89M | if (Op.isUndef()) { |
9394 | 60.6k | if (UndefElements) |
9395 | 28.0k | (*UndefElements)[i] = true; |
9396 | 5.83M | } else if (!Splatted) { |
9397 | 1.11M | Splatted = Op; |
9398 | 4.72M | } else if (Splatted != Op) { |
9399 | 253k | return SDValue(); |
9400 | 253k | } |
9401 | 5.89M | } |
9402 | 1.11M | |
9403 | 1.11M | if (858k !Splatted858k ) { |
9404 | 0 | unsigned FirstDemandedIdx = DemandedElts.countTrailingZeros(); |
9405 | 0 | assert(getOperand(FirstDemandedIdx).isUndef() && |
9406 | 0 | "Can only have a splat without a constant for all undefs."); |
9407 | 0 | return getOperand(FirstDemandedIdx); |
9408 | 0 | } |
9409 | 858k | |
9410 | 858k | return Splatted; |
9411 | 858k | } |
9412 | | |
9413 | 1.05M | SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { |
9414 | 1.05M | APInt DemandedElts = APInt::getAllOnesValue(getNumOperands()); |
9415 | 1.05M | return getSplatValue(DemandedElts, UndefElements); |
9416 | 1.05M | } |
9417 | | |
9418 | | ConstantSDNode * |
9419 | | BuildVectorSDNode::getConstantSplatNode(const APInt &DemandedElts, |
9420 | 54.8k | BitVector *UndefElements) const { |
9421 | 54.8k | return dyn_cast_or_null<ConstantSDNode>( |
9422 | 54.8k | getSplatValue(DemandedElts, UndefElements)); |
9423 | 54.8k | } |
9424 | | |
9425 | | ConstantSDNode * |
9426 | 690k | BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const { |
9427 | 690k | return dyn_cast_or_null<ConstantSDNode>(getSplatValue(UndefElements)); |
9428 | 690k | } |
9429 | | |
9430 | | ConstantFPSDNode * |
9431 | | BuildVectorSDNode::getConstantFPSplatNode(const APInt &DemandedElts, |
9432 | 0 | BitVector *UndefElements) const { |
9433 | 0 | return dyn_cast_or_null<ConstantFPSDNode>( |
9434 | 0 | getSplatValue(DemandedElts, UndefElements)); |
9435 | 0 | } |
9436 | | |
9437 | | ConstantFPSDNode * |
9438 | 41.5k | BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const { |
9439 | 41.5k | return dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements)); |
9440 | 41.5k | } |
9441 | | |
9442 | | int32_t |
9443 | | BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, |
9444 | 145 | uint32_t BitWidth) const { |
9445 | 145 | if (ConstantFPSDNode *CN = |
9446 | 99 | dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements))) { |
9447 | 99 | bool IsExact; |
9448 | 99 | APSInt IntVal(BitWidth); |
9449 | 99 | const APFloat &APF = CN->getValueAPF(); |
9450 | 99 | if (APF.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) != |
9451 | 99 | APFloat::opOK || |
9452 | 99 | !IsExact93 ) |
9453 | 6 | return -1; |
9454 | 93 | |
9455 | 93 | return IntVal.exactLogBase2(); |
9456 | 93 | } |
9457 | 46 | return -1; |
9458 | 46 | } |
9459 | | |
9460 | 354k | bool BuildVectorSDNode::isConstant() const { |
9461 | 1.11M | for (const SDValue &Op : op_values()) { |
9462 | 1.11M | unsigned Opc = Op.getOpcode(); |
9463 | 1.11M | if (Opc != ISD::UNDEF && Opc != ISD::Constant1.10M && Opc != ISD::ConstantFP295k ) |
9464 | 191k | return false; |
9465 | 1.11M | } |
9466 | 354k | return true163k ; |
9467 | 354k | } |
9468 | | |
9469 | 174k | bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { |
9470 | 174k | // Find the first non-undef value in the shuffle mask. |
9471 | 174k | unsigned i, e; |
9472 | 191k | for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0191k ; ++i17.3k ) |
9473 | 17.3k | /* search */; |
9474 | 174k | |
9475 | 174k | // If all elements are undefined, this shuffle can be considered a splat |
9476 | 174k | // (although it should eventually get simplified away completely). |
9477 | 174k | if (i == e) |
9478 | 1 | return true; |
9479 | 174k | |
9480 | 174k | // Make sure all remaining elements are either undef or the same as the first |
9481 | 174k | // non-undef value. |
9482 | 718k | for (int Idx = Mask[i]; 174k i != e; ++i544k ) |
9483 | 672k | if (Mask[i] >= 0 && Mask[i] != Idx498k ) |
9484 | 128k | return false; |
9485 | 174k | return true45.6k ; |
9486 | 174k | } |
9487 | | |
9488 | | // Returns the SDNode if it is a constant integer BuildVector |
9489 | | // or constant integer. |
9490 | 8.28M | SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) { |
9491 | 8.28M | if (isa<ConstantSDNode>(N)) |
9492 | 1.17M | return N.getNode(); |
9493 | 7.11M | if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) |
9494 | 13.3k | return N.getNode(); |
9495 | 7.09M | // Treat a GlobalAddress supporting constant offset folding as a |
9496 | 7.09M | // constant integer. |
9497 | 7.09M | if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N)) |
9498 | 88.0k | if (GA->getOpcode() == ISD::GlobalAddress && |
9499 | 88.0k | TLI->isOffsetFoldingLegal(GA)87.4k ) |
9500 | 1.03k | return GA; |
9501 | 7.09M | return nullptr; |
9502 | 7.09M | } |
9503 | | |
9504 | 5.39k | SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) { |
9505 | 5.39k | if (isa<ConstantFPSDNode>(N)) |
9506 | 460 | return N.getNode(); |
9507 | 4.93k | |
9508 | 4.93k | if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) |
9509 | 11 | return N.getNode(); |
9510 | 4.92k | |
9511 | 4.92k | return nullptr; |
9512 | 4.92k | } |
9513 | | |
9514 | 32.7M | void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) { |
9515 | 32.7M | assert(!Node->OperandList && "Node already has operands"); |
9516 | 32.7M | assert(SDNode::getMaxNumOperands() >= Vals.size() && |
9517 | 32.7M | "too many operands to fit into SDNode"); |
9518 | 32.7M | SDUse *Ops = OperandRecycler.allocate( |
9519 | 32.7M | ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator); |
9520 | 32.7M | |
9521 | 32.7M | bool IsDivergent = false; |
9522 | 127M | for (unsigned I = 0; I != Vals.size(); ++I95.2M ) { |
9523 | 95.2M | Ops[I].setUser(Node); |
9524 | 95.2M | Ops[I].setInitial(Vals[I]); |
9525 | 95.2M | if (Ops[I].Val.getValueType() != MVT::Other) // Skip Chain. It does not carry divergence. |
9526 | 69.2M | IsDivergent = IsDivergent || Ops[I].getNode()->isDivergent()68.7M ; |
9527 | 95.2M | } |
9528 | 32.7M | Node->NumOperands = Vals.size(); |
9529 | 32.7M | Node->OperandList = Ops; |
9530 | 32.7M | IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, DA); |
9531 | 32.7M | if (!TLI->isSDNodeAlwaysUniform(Node)) |
9532 | 32.5M | Node->SDNodeBits.IsDivergent = IsDivergent; |
9533 | 32.7M | checkForCycles(Node); |
9534 | 32.7M | } |
9535 | | |
9536 | | SDValue SelectionDAG::getTokenFactor(const SDLoc &DL, |
9537 | 847k | SmallVectorImpl<SDValue> &Vals) { |
9538 | 847k | size_t Limit = SDNode::getMaxNumOperands(); |
9539 | 847k | while (Vals.size() > Limit) { |
9540 | 0 | unsigned SliceIdx = Vals.size() - Limit; |
9541 | 0 | auto ExtractedTFs = ArrayRef<SDValue>(Vals).slice(SliceIdx, Limit); |
9542 | 0 | SDValue NewTF = getNode(ISD::TokenFactor, DL, MVT::Other, ExtractedTFs); |
9543 | 0 | Vals.erase(Vals.begin() + SliceIdx, Vals.end()); |
9544 | 0 | Vals.emplace_back(NewTF); |
9545 | 0 | } |
9546 | 847k | return getNode(ISD::TokenFactor, DL, MVT::Other, Vals); |
9547 | 847k | } |
9548 | | |
9549 | | #ifndef NDEBUG |
9550 | | static void checkForCyclesHelper(const SDNode *N, |
9551 | | SmallPtrSetImpl<const SDNode*> &Visited, |
9552 | | SmallPtrSetImpl<const SDNode*> &Checked, |
9553 | | const llvm::SelectionDAG *DAG) { |
9554 | | // If this node has already been checked, don't check it again. |
9555 | | if (Checked.count(N)) |
9556 | | return; |
9557 | | |
9558 | | // If a node has already been visited on this depth-first walk, reject it as |
9559 | | // a cycle. |
9560 | | if (!Visited.insert(N).second) { |
9561 | | errs() << "Detected cycle in SelectionDAG\n"; |
9562 | | dbgs() << "Offending node:\n"; |
9563 | | N->dumprFull(DAG); dbgs() << "\n"; |
9564 | | abort(); |
9565 | | } |
9566 | | |
9567 | | for (const SDValue &Op : N->op_values()) |
9568 | | checkForCyclesHelper(Op.getNode(), Visited, Checked, DAG); |
9569 | | |
9570 | | Checked.insert(N); |
9571 | | Visited.erase(N); |
9572 | | } |
9573 | | #endif |
9574 | | |
9575 | | void llvm::checkForCycles(const llvm::SDNode *N, |
9576 | | const llvm::SelectionDAG *DAG, |
9577 | 176M | bool force) { |
9578 | | #ifndef NDEBUG |
9579 | | bool check = force; |
9580 | | #ifdef EXPENSIVE_CHECKS |
9581 | | check = true; |
9582 | | #endif // EXPENSIVE_CHECKS |
9583 | | if (check) { |
9584 | | assert(N && "Checking nonexistent SDNode"); |
9585 | | SmallPtrSet<const SDNode*, 32> visited; |
9586 | | SmallPtrSet<const SDNode*, 32> checked; |
9587 | | checkForCyclesHelper(N, visited, checked, DAG); |
9588 | | } |
9589 | | #endif // !NDEBUG |
9590 | | } |
9591 | | |
9592 | 16.3M | void llvm::checkForCycles(const llvm::SelectionDAG *DAG, bool force) { |
9593 | 16.3M | checkForCycles(DAG->getRoot().getNode(), DAG, force); |
9594 | 16.3M | } |