/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file implements the SelectionDAG::LegalizeVectors method. |
10 | | // |
11 | | // The vector legalizer looks for vector operations which might need to be |
12 | | // scalarized and legalizes them. This is a separate step from Legalize because |
13 | | // scalarizing can introduce illegal types. For example, suppose we have an |
14 | | // ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition |
15 | | // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the |
16 | | // operation, which introduces nodes with the illegal type i64 which must be |
17 | | // expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC; |
18 | | // the operation must be unrolled, which introduces nodes with the illegal |
19 | | // type i8 which must be promoted. |
20 | | // |
21 | | // This does not legalize vector manipulations like ISD::BUILD_VECTOR, |
22 | | // or operations that happen to take a vector which are custom-lowered; |
23 | | // the legalization for such operations never produces nodes |
24 | | // with illegal types, so it's okay to put off legalizing them until |
25 | | // SelectionDAG::Legalize runs. |
26 | | // |
27 | | //===----------------------------------------------------------------------===// |
28 | | |
29 | | #include "llvm/ADT/APInt.h" |
30 | | #include "llvm/ADT/DenseMap.h" |
31 | | #include "llvm/ADT/SmallVector.h" |
32 | | #include "llvm/CodeGen/ISDOpcodes.h" |
33 | | #include "llvm/CodeGen/MachineMemOperand.h" |
34 | | #include "llvm/CodeGen/SelectionDAG.h" |
35 | | #include "llvm/CodeGen/SelectionDAGNodes.h" |
36 | | #include "llvm/CodeGen/TargetLowering.h" |
37 | | #include "llvm/CodeGen/ValueTypes.h" |
38 | | #include "llvm/IR/DataLayout.h" |
39 | | #include "llvm/Support/Casting.h" |
40 | | #include "llvm/Support/Compiler.h" |
41 | | #include "llvm/Support/ErrorHandling.h" |
42 | | #include "llvm/Support/MachineValueType.h" |
43 | | #include "llvm/Support/MathExtras.h" |
44 | | #include <cassert> |
45 | | #include <cstdint> |
46 | | #include <iterator> |
47 | | #include <utility> |
48 | | |
49 | | using namespace llvm; |
50 | | |
51 | | #define DEBUG_TYPE "legalizevectorops" |
52 | | |
53 | | namespace { |
54 | | |
55 | | class VectorLegalizer { |
56 | | SelectionDAG& DAG; |
57 | | const TargetLowering &TLI; |
58 | | bool Changed = false; // Keep track of whether anything changed |
59 | | |
60 | | /// For nodes that are of legal width, and that have more than one use, this |
61 | | /// map indicates what regularized operand to use. This allows us to avoid |
62 | | /// legalizing the same thing more than once. |
63 | | SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes; |
64 | | |
65 | | /// Adds a node to the translation cache. |
66 | 5.88M | void AddLegalizedOperand(SDValue From, SDValue To) { |
67 | 5.88M | LegalizedNodes.insert(std::make_pair(From, To)); |
68 | 5.88M | // If someone requests legalization of the new node, return itself. |
69 | 5.88M | if (From != To) |
70 | 138k | LegalizedNodes.insert(std::make_pair(To, To)); |
71 | 5.88M | } |
72 | | |
73 | | /// Legalizes the given node. |
74 | | SDValue LegalizeOp(SDValue Op); |
75 | | |
76 | | /// Assuming the node is legal, "legalize" the results. |
77 | | SDValue TranslateLegalizeResults(SDValue Op, SDValue Result); |
78 | | |
79 | | /// Implements unrolling a VSETCC. |
80 | | SDValue UnrollVSETCC(SDValue Op); |
81 | | |
82 | | /// Implement expand-based legalization of vector operations. |
83 | | /// |
84 | | /// This is just a high-level routine to dispatch to specific code paths for |
85 | | /// operations to legalize them. |
86 | | SDValue Expand(SDValue Op); |
87 | | |
88 | | /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if |
89 | | /// FP_TO_SINT isn't legal. |
90 | | SDValue ExpandFP_TO_UINT(SDValue Op); |
91 | | |
92 | | /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if |
93 | | /// SINT_TO_FLOAT and SHR on vectors isn't legal. |
94 | | SDValue ExpandUINT_TO_FLOAT(SDValue Op); |
95 | | |
96 | | /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. |
97 | | SDValue ExpandSEXTINREG(SDValue Op); |
98 | | |
99 | | /// Implement expansion for ANY_EXTEND_VECTOR_INREG. |
100 | | /// |
101 | | /// Shuffles the low lanes of the operand into place and bitcasts to the proper |
102 | | /// type. The contents of the bits in the extended part of each element are |
103 | | /// undef. |
104 | | SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op); |
105 | | |
106 | | /// Implement expansion for SIGN_EXTEND_VECTOR_INREG. |
107 | | /// |
108 | | /// Shuffles the low lanes of the operand into place, bitcasts to the proper |
109 | | /// type, then shifts left and arithmetic shifts right to introduce a sign |
110 | | /// extension. |
111 | | SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op); |
112 | | |
113 | | /// Implement expansion for ZERO_EXTEND_VECTOR_INREG. |
114 | | /// |
115 | | /// Shuffles the low lanes of the operand into place and blends zeros into |
116 | | /// the remaining lanes, finally bitcasting to the proper type. |
117 | | SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op); |
118 | | |
119 | | /// Implement expand-based legalization of ABS vector operations. |
120 | | /// If following expanding is legal/custom then do it: |
121 | | /// (ABS x) --> (XOR (ADD x, (SRA x, sizeof(x)-1)), (SRA x, sizeof(x)-1)) |
122 | | /// else unroll the operation. |
123 | | SDValue ExpandABS(SDValue Op); |
124 | | |
125 | | /// Expand bswap of vectors into a shuffle if legal. |
126 | | SDValue ExpandBSWAP(SDValue Op); |
127 | | |
128 | | /// Implement vselect in terms of XOR, AND, OR when blend is not |
129 | | /// supported by the target. |
130 | | SDValue ExpandVSELECT(SDValue Op); |
131 | | SDValue ExpandSELECT(SDValue Op); |
132 | | SDValue ExpandLoad(SDValue Op); |
133 | | SDValue ExpandStore(SDValue Op); |
134 | | SDValue ExpandFNEG(SDValue Op); |
135 | | SDValue ExpandFSUB(SDValue Op); |
136 | | SDValue ExpandBITREVERSE(SDValue Op); |
137 | | SDValue ExpandCTPOP(SDValue Op); |
138 | | SDValue ExpandCTLZ(SDValue Op); |
139 | | SDValue ExpandCTTZ(SDValue Op); |
140 | | SDValue ExpandFunnelShift(SDValue Op); |
141 | | SDValue ExpandROT(SDValue Op); |
142 | | SDValue ExpandFMINNUM_FMAXNUM(SDValue Op); |
143 | | SDValue ExpandUADDSUBO(SDValue Op); |
144 | | SDValue ExpandSADDSUBO(SDValue Op); |
145 | | SDValue ExpandMULO(SDValue Op); |
146 | | SDValue ExpandAddSubSat(SDValue Op); |
147 | | SDValue ExpandFixedPointMul(SDValue Op); |
148 | | SDValue ExpandStrictFPOp(SDValue Op); |
149 | | |
150 | | /// Implements vector promotion. |
151 | | /// |
152 | | /// This is essentially just bitcasting the operands to a different type and |
153 | | /// bitcasting the result back to the original type. |
154 | | SDValue Promote(SDValue Op); |
155 | | |
156 | | /// Implements [SU]INT_TO_FP vector promotion. |
157 | | /// |
158 | | /// This is a [zs]ext of the input operand to a larger integer type. |
159 | | SDValue PromoteINT_TO_FP(SDValue Op); |
160 | | |
161 | | /// Implements FP_TO_[SU]INT vector promotion of the result type. |
162 | | /// |
163 | | /// It is promoted to a larger integer type. The result is then |
164 | | /// truncated back to the original type. |
165 | | SDValue PromoteFP_TO_INT(SDValue Op); |
166 | | |
167 | | public: |
168 | | VectorLegalizer(SelectionDAG& dag) : |
169 | 1.24M | DAG(dag), TLI(dag.getTargetLoweringInfo()) {} |
170 | | |
171 | | /// Begin legalizer the vector operations in the DAG. |
172 | | bool Run(); |
173 | | }; |
174 | | |
175 | | } // end anonymous namespace |
176 | | |
177 | 1.24M | bool VectorLegalizer::Run() { |
178 | 1.24M | // Before we start legalizing vector nodes, check if there are any vectors. |
179 | 1.24M | bool HasVectors = false; |
180 | 1.24M | for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), |
181 | 22.2M | E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I20.9M ) { |
182 | 21.1M | // Check if the values of the nodes contain vectors. We don't need to check |
183 | 21.1M | // the operands because we are going to check their values at some point. |
184 | 21.1M | for (SDNode::value_iterator J = I->value_begin(), E = I->value_end(); |
185 | 47.2M | J != E; ++J26.0M ) |
186 | 26.0M | HasVectors |= J->isVector(); |
187 | 21.1M | |
188 | 21.1M | // If we found a vector node we can start the legalization. |
189 | 21.1M | if (HasVectors) |
190 | 178k | break; |
191 | 21.1M | } |
192 | 1.24M | |
193 | 1.24M | // If this basic block has no vectors then no need to legalize vectors. |
194 | 1.24M | if (!HasVectors) |
195 | 1.06M | return false; |
196 | 178k | |
197 | 178k | // The legalize process is inherently a bottom-up recursive process (users |
198 | 178k | // legalize their uses before themselves). Given infinite stack space, we |
199 | 178k | // could just start legalizing on the root and traverse the whole graph. In |
200 | 178k | // practice however, this causes us to run out of stack space on large basic |
201 | 178k | // blocks. To avoid this problem, compute an ordering of the nodes where each |
202 | 178k | // node is only legalized after all of its operands are legalized. |
203 | 178k | DAG.AssignTopologicalOrder(); |
204 | 178k | for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), |
205 | 4.86M | E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I4.68M ) |
206 | 4.68M | LegalizeOp(SDValue(&*I, 0)); |
207 | 178k | |
208 | 178k | // Finally, it's possible the root changed. Get the new root. |
209 | 178k | SDValue OldRoot = DAG.getRoot(); |
210 | 178k | assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); |
211 | 178k | DAG.setRoot(LegalizedNodes[OldRoot]); |
212 | 178k | |
213 | 178k | LegalizedNodes.clear(); |
214 | 178k | |
215 | 178k | // Remove dead nodes now. |
216 | 178k | DAG.RemoveDeadNodes(); |
217 | 178k | |
218 | 178k | return Changed; |
219 | 178k | } |
220 | | |
221 | 4.71M | SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) { |
222 | 4.71M | // Generic legalization: just pass the operand through. |
223 | 10.1M | for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i5.44M ) |
224 | 5.44M | AddLegalizedOperand(Op.getValue(i), Result.getValue(i)); |
225 | 4.71M | return Result.getValue(Op.getResNo()); |
226 | 4.71M | } |
227 | | |
228 | 13.4M | SDValue VectorLegalizer::LegalizeOp(SDValue Op) { |
229 | 13.4M | // Note that LegalizeOp may be reentered even from single-use nodes, which |
230 | 13.4M | // means that we always must cache transformed nodes. |
231 | 13.4M | DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); |
232 | 13.4M | if (I != LegalizedNodes.end()) return I->second8.27M ; |
233 | 5.14M | |
234 | 5.14M | SDNode* Node = Op.getNode(); |
235 | 5.14M | |
236 | 5.14M | // Legalize the operands |
237 | 5.14M | SmallVector<SDValue, 8> Ops; |
238 | 5.14M | for (const SDValue &Op : Node->op_values()) |
239 | 8.59M | Ops.push_back(LegalizeOp(Op)); |
240 | 5.14M | |
241 | 5.14M | SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), |
242 | 5.14M | Op.getResNo()); |
243 | 5.14M | |
244 | 5.14M | if (Op.getOpcode() == ISD::LOAD) { |
245 | 134k | LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); |
246 | 134k | ISD::LoadExtType ExtType = LD->getExtensionType(); |
247 | 134k | if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD80.5k ) { |
248 | 6.89k | LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: "; |
249 | 6.89k | Node->dump(&DAG)); |
250 | 6.89k | switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0), |
251 | 6.89k | LD->getMemoryVT())) { |
252 | 6.89k | default: 0 llvm_unreachable0 ("This action is not supported yet!"); |
253 | 6.89k | case TargetLowering::Legal: |
254 | 2.06k | return TranslateLegalizeResults(Op, Result); |
255 | 6.89k | case TargetLowering::Custom: |
256 | 1.62k | if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) { |
257 | 1.62k | assert(Lowered->getNumValues() == Op->getNumValues() && |
258 | 1.62k | "Unexpected number of results"); |
259 | 1.62k | if (Lowered != Result) { |
260 | 1.62k | // Make sure the new code is also legal. |
261 | 1.62k | Lowered = LegalizeOp(Lowered); |
262 | 1.62k | Changed = true; |
263 | 1.62k | } |
264 | 1.62k | return TranslateLegalizeResults(Op, Lowered); |
265 | 1.62k | } |
266 | 0 | LLVM_FALLTHROUGH; |
267 | 3.20k | case TargetLowering::Expand: |
268 | 3.20k | Changed = true; |
269 | 3.20k | return ExpandLoad(Op); |
270 | 5.01M | } |
271 | 5.01M | } |
272 | 5.01M | } else if (Op.getOpcode() == ISD::STORE) { |
273 | 271k | StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); |
274 | 271k | EVT StVT = ST->getMemoryVT(); |
275 | 271k | MVT ValVT = ST->getValue().getSimpleValueType(); |
276 | 271k | if (StVT.isVector() && ST->isTruncatingStore()194k ) { |
277 | 1.13k | LLVM_DEBUG(dbgs() << "\nLegalizing truncating vector store: "; |
278 | 1.13k | Node->dump(&DAG)); |
279 | 1.13k | switch (TLI.getTruncStoreAction(ValVT, StVT)) { |
280 | 1.13k | default: 0 llvm_unreachable0 ("This action is not supported yet!"); |
281 | 1.13k | case TargetLowering::Legal: |
282 | 445 | return TranslateLegalizeResults(Op, Result); |
283 | 1.13k | case TargetLowering::Custom: { |
284 | 434 | SDValue Lowered = TLI.LowerOperation(Result, DAG); |
285 | 434 | if (Lowered != Result) { |
286 | 434 | // Make sure the new code is also legal. |
287 | 434 | Lowered = LegalizeOp(Lowered); |
288 | 434 | Changed = true; |
289 | 434 | } |
290 | 434 | return TranslateLegalizeResults(Op, Lowered); |
291 | 1.13k | } |
292 | 1.13k | case TargetLowering::Expand: |
293 | 256 | Changed = true; |
294 | 256 | return ExpandStore(Op); |
295 | 5.13M | } |
296 | 5.13M | } |
297 | 271k | } |
298 | 5.13M | |
299 | 5.13M | bool HasVectorValueOrOp = false; |
300 | 11.0M | for (auto J = Node->value_begin(), E = Node->value_end(); J != E; ++J5.86M ) |
301 | 5.86M | HasVectorValueOrOp |= J->isVector(); |
302 | 5.13M | for (const SDValue &Op : Node->op_values()) |
303 | 8.57M | HasVectorValueOrOp |= Op.getValueType().isVector(); |
304 | 5.13M | |
305 | 5.13M | if (!HasVectorValueOrOp) |
306 | 2.94M | return TranslateLegalizeResults(Op, Result); |
307 | 2.19M | |
308 | 2.19M | TargetLowering::LegalizeAction Action = TargetLowering::Legal; |
309 | 2.19M | switch (Op.getOpcode()) { |
310 | 2.19M | default: |
311 | 1.76M | return TranslateLegalizeResults(Op, Result); |
312 | 2.19M | case ISD::STRICT_FADD: |
313 | 646 | case ISD::STRICT_FSUB: |
314 | 646 | case ISD::STRICT_FMUL: |
315 | 646 | case ISD::STRICT_FDIV: |
316 | 646 | case ISD::STRICT_FREM: |
317 | 646 | case ISD::STRICT_FSQRT: |
318 | 646 | case ISD::STRICT_FMA: |
319 | 646 | case ISD::STRICT_FPOW: |
320 | 646 | case ISD::STRICT_FPOWI: |
321 | 646 | case ISD::STRICT_FSIN: |
322 | 646 | case ISD::STRICT_FCOS: |
323 | 646 | case ISD::STRICT_FEXP: |
324 | 646 | case ISD::STRICT_FEXP2: |
325 | 646 | case ISD::STRICT_FLOG: |
326 | 646 | case ISD::STRICT_FLOG10: |
327 | 646 | case ISD::STRICT_FLOG2: |
328 | 646 | case ISD::STRICT_FRINT: |
329 | 646 | case ISD::STRICT_FNEARBYINT: |
330 | 646 | case ISD::STRICT_FMAXNUM: |
331 | 646 | case ISD::STRICT_FMINNUM: |
332 | 646 | case ISD::STRICT_FCEIL: |
333 | 646 | case ISD::STRICT_FFLOOR: |
334 | 646 | case ISD::STRICT_FROUND: |
335 | 646 | case ISD::STRICT_FTRUNC: |
336 | 646 | case ISD::STRICT_FP_ROUND: |
337 | 646 | case ISD::STRICT_FP_EXTEND: |
338 | 646 | // These pseudo-ops get legalized as if they were their non-strict |
339 | 646 | // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT |
340 | 646 | // is also legal, but if ISD::FSQRT requires expansion then so does |
341 | 646 | // ISD::STRICT_FSQRT. |
342 | 646 | Action = TLI.getStrictFPOperationAction(Node->getOpcode(), |
343 | 646 | Node->getValueType(0)); |
344 | 646 | break; |
345 | 370k | case ISD::ADD: |
346 | 370k | case ISD::SUB: |
347 | 370k | case ISD::MUL: |
348 | 370k | case ISD::MULHS: |
349 | 370k | case ISD::MULHU: |
350 | 370k | case ISD::SDIV: |
351 | 370k | case ISD::UDIV: |
352 | 370k | case ISD::SREM: |
353 | 370k | case ISD::UREM: |
354 | 370k | case ISD::SDIVREM: |
355 | 370k | case ISD::UDIVREM: |
356 | 370k | case ISD::FADD: |
357 | 370k | case ISD::FSUB: |
358 | 370k | case ISD::FMUL: |
359 | 370k | case ISD::FDIV: |
360 | 370k | case ISD::FREM: |
361 | 370k | case ISD::AND: |
362 | 370k | case ISD::OR: |
363 | 370k | case ISD::XOR: |
364 | 370k | case ISD::SHL: |
365 | 370k | case ISD::SRA: |
366 | 370k | case ISD::SRL: |
367 | 370k | case ISD::FSHL: |
368 | 370k | case ISD::FSHR: |
369 | 370k | case ISD::ROTL: |
370 | 370k | case ISD::ROTR: |
371 | 370k | case ISD::ABS: |
372 | 370k | case ISD::BSWAP: |
373 | 370k | case ISD::BITREVERSE: |
374 | 370k | case ISD::CTLZ: |
375 | 370k | case ISD::CTTZ: |
376 | 370k | case ISD::CTLZ_ZERO_UNDEF: |
377 | 370k | case ISD::CTTZ_ZERO_UNDEF: |
378 | 370k | case ISD::CTPOP: |
379 | 370k | case ISD::SELECT: |
380 | 370k | case ISD::VSELECT: |
381 | 370k | case ISD::SELECT_CC: |
382 | 370k | case ISD::SETCC: |
383 | 370k | case ISD::ZERO_EXTEND: |
384 | 370k | case ISD::ANY_EXTEND: |
385 | 370k | case ISD::TRUNCATE: |
386 | 370k | case ISD::SIGN_EXTEND: |
387 | 370k | case ISD::FP_TO_SINT: |
388 | 370k | case ISD::FP_TO_UINT: |
389 | 370k | case ISD::FNEG: |
390 | 370k | case ISD::FABS: |
391 | 370k | case ISD::FMINNUM: |
392 | 370k | case ISD::FMAXNUM: |
393 | 370k | case ISD::FMINNUM_IEEE: |
394 | 370k | case ISD::FMAXNUM_IEEE: |
395 | 370k | case ISD::FMINIMUM: |
396 | 370k | case ISD::FMAXIMUM: |
397 | 370k | case ISD::FCOPYSIGN: |
398 | 370k | case ISD::FSQRT: |
399 | 370k | case ISD::FSIN: |
400 | 370k | case ISD::FCOS: |
401 | 370k | case ISD::FPOWI: |
402 | 370k | case ISD::FPOW: |
403 | 370k | case ISD::FLOG: |
404 | 370k | case ISD::FLOG2: |
405 | 370k | case ISD::FLOG10: |
406 | 370k | case ISD::FEXP: |
407 | 370k | case ISD::FEXP2: |
408 | 370k | case ISD::FCEIL: |
409 | 370k | case ISD::FTRUNC: |
410 | 370k | case ISD::FRINT: |
411 | 370k | case ISD::FNEARBYINT: |
412 | 370k | case ISD::FROUND: |
413 | 370k | case ISD::FFLOOR: |
414 | 370k | case ISD::FP_ROUND: |
415 | 370k | case ISD::FP_EXTEND: |
416 | 370k | case ISD::FMA: |
417 | 370k | case ISD::SIGN_EXTEND_INREG: |
418 | 370k | case ISD::ANY_EXTEND_VECTOR_INREG: |
419 | 370k | case ISD::SIGN_EXTEND_VECTOR_INREG: |
420 | 370k | case ISD::ZERO_EXTEND_VECTOR_INREG: |
421 | 370k | case ISD::SMIN: |
422 | 370k | case ISD::SMAX: |
423 | 370k | case ISD::UMIN: |
424 | 370k | case ISD::UMAX: |
425 | 370k | case ISD::SMUL_LOHI: |
426 | 370k | case ISD::UMUL_LOHI: |
427 | 370k | case ISD::SADDO: |
428 | 370k | case ISD::UADDO: |
429 | 370k | case ISD::SSUBO: |
430 | 370k | case ISD::USUBO: |
431 | 370k | case ISD::SMULO: |
432 | 370k | case ISD::UMULO: |
433 | 370k | case ISD::FCANONICALIZE: |
434 | 370k | case ISD::SADDSAT: |
435 | 370k | case ISD::UADDSAT: |
436 | 370k | case ISD::SSUBSAT: |
437 | 370k | case ISD::USUBSAT: |
438 | 370k | Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); |
439 | 370k | break; |
440 | 370k | case ISD::SMULFIX: |
441 | 6 | case ISD::SMULFIXSAT: |
442 | 6 | case ISD::UMULFIX: { |
443 | 6 | unsigned Scale = Node->getConstantOperandVal(2); |
444 | 6 | Action = TLI.getFixedPointOperationAction(Node->getOpcode(), |
445 | 6 | Node->getValueType(0), Scale); |
446 | 6 | break; |
447 | 6 | } |
448 | 6 | case ISD::FP_ROUND_INREG: |
449 | 0 | Action = TLI.getOperationAction(Node->getOpcode(), |
450 | 0 | cast<VTSDNode>(Node->getOperand(1))->getVT()); |
451 | 0 | break; |
452 | 59.8k | case ISD::SINT_TO_FP: |
453 | 59.8k | case ISD::UINT_TO_FP: |
454 | 59.8k | case ISD::VECREDUCE_ADD: |
455 | 59.8k | case ISD::VECREDUCE_MUL: |
456 | 59.8k | case ISD::VECREDUCE_AND: |
457 | 59.8k | case ISD::VECREDUCE_OR: |
458 | 59.8k | case ISD::VECREDUCE_XOR: |
459 | 59.8k | case ISD::VECREDUCE_SMAX: |
460 | 59.8k | case ISD::VECREDUCE_SMIN: |
461 | 59.8k | case ISD::VECREDUCE_UMAX: |
462 | 59.8k | case ISD::VECREDUCE_UMIN: |
463 | 59.8k | case ISD::VECREDUCE_FADD: |
464 | 59.8k | case ISD::VECREDUCE_FMUL: |
465 | 59.8k | case ISD::VECREDUCE_FMAX: |
466 | 59.8k | case ISD::VECREDUCE_FMIN: |
467 | 59.8k | Action = TLI.getOperationAction(Node->getOpcode(), |
468 | 59.8k | Node->getOperand(0).getValueType()); |
469 | 59.8k | break; |
470 | 431k | } |
471 | 431k | |
472 | 431k | LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG)); |
473 | 431k | |
474 | 431k | switch (Action) { |
475 | 431k | default: 0 llvm_unreachable0 ("This action is not supported yet!"); |
476 | 431k | case TargetLowering::Promote: |
477 | 886 | Result = Promote(Op); |
478 | 886 | Changed = true; |
479 | 886 | break; |
480 | 431k | case TargetLowering::Legal: |
481 | 238k | LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n"); |
482 | 238k | break; |
483 | 431k | case TargetLowering::Custom: { |
484 | 174k | LLVM_DEBUG(dbgs() << "Trying custom legalization\n"); |
485 | 174k | if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) { |
486 | 170k | LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n"); |
487 | 170k | Result = Tmp1; |
488 | 170k | break; |
489 | 170k | } |
490 | 3.41k | LLVM_DEBUG(dbgs() << "Could not custom legalize node\n"); |
491 | 3.41k | LLVM_FALLTHROUGH; |
492 | 3.41k | } |
493 | 21.4k | case TargetLowering::Expand: |
494 | 21.4k | Result = Expand(Op); |
495 | 431k | } |
496 | 431k | |
497 | 431k | // Make sure that the generated code is itself legal. |
498 | 431k | if (Result != Op) { |
499 | 126k | Result = LegalizeOp(Result); |
500 | 126k | Changed = true; |
501 | 126k | } |
502 | 431k | |
503 | 431k | // Note that LegalizeOp may be reentered even from single-use nodes, which |
504 | 431k | // means that we always must cache transformed nodes. |
505 | 431k | AddLegalizedOperand(Op, Result); |
506 | 431k | return Result; |
507 | 431k | } |
508 | | |
509 | 886 | SDValue VectorLegalizer::Promote(SDValue Op) { |
510 | 886 | // For a few operations there is a specific concept for promotion based on |
511 | 886 | // the operand's type. |
512 | 886 | switch (Op.getOpcode()) { |
513 | 886 | case ISD::SINT_TO_FP: |
514 | 16 | case ISD::UINT_TO_FP: |
515 | 16 | // "Promote" the operation by extending the operand. |
516 | 16 | return PromoteINT_TO_FP(Op); |
517 | 346 | case ISD::FP_TO_UINT: |
518 | 346 | case ISD::FP_TO_SINT: |
519 | 346 | // Promote the operation by extending the operand. |
520 | 346 | return PromoteFP_TO_INT(Op); |
521 | 524 | } |
522 | 524 | |
523 | 524 | // There are currently two cases of vector promotion: |
524 | 524 | // 1) Bitcasting a vector of integers to a different type to a vector of the |
525 | 524 | // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64. |
526 | 524 | // 2) Extending a vector of floats to a vector of the same number of larger |
527 | 524 | // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32. |
528 | 524 | MVT VT = Op.getSimpleValueType(); |
529 | 524 | assert(Op.getNode()->getNumValues() == 1 && |
530 | 524 | "Can't promote a vector with multiple results!"); |
531 | 524 | MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); |
532 | 524 | SDLoc dl(Op); |
533 | 524 | SmallVector<SDValue, 4> Operands(Op.getNumOperands()); |
534 | 524 | |
535 | 1.61k | for (unsigned j = 0; j != Op.getNumOperands(); ++j1.08k ) { |
536 | 1.08k | if (Op.getOperand(j).getValueType().isVector()) |
537 | 965 | if (Op.getOperand(j) |
538 | 965 | .getValueType() |
539 | 965 | .getVectorElementType() |
540 | 965 | .isFloatingPoint() && |
541 | 965 | NVT.isVector()211 && NVT.getVectorElementType().isFloatingPoint()195 ) |
542 | 141 | Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j)); |
543 | 824 | else |
544 | 824 | Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j)); |
545 | 123 | else |
546 | 123 | Operands[j] = Op.getOperand(j); |
547 | 1.08k | } |
548 | 524 | |
549 | 524 | Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags()); |
550 | 524 | if ((VT.isFloatingPoint() && NVT.isFloatingPoint()147 ) || |
551 | 524 | (412 VT.isVector()412 && VT.getVectorElementType().isFloatingPoint()412 && |
552 | 412 | NVT.isVector()35 && NVT.getVectorElementType().isFloatingPoint()27 )) |
553 | 112 | return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0, dl)); |
554 | 412 | else |
555 | 412 | return DAG.getNode(ISD::BITCAST, dl, VT, Op); |
556 | 524 | } |
557 | | |
558 | 16 | SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) { |
559 | 16 | // INT_TO_FP operations may require the input operand be promoted even |
560 | 16 | // when the type is otherwise legal. |
561 | 16 | MVT VT = Op.getOperand(0).getSimpleValueType(); |
562 | 16 | MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); |
563 | 16 | assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && |
564 | 16 | "Vectors have different number of elements!"); |
565 | 16 | |
566 | 16 | SDLoc dl(Op); |
567 | 16 | SmallVector<SDValue, 4> Operands(Op.getNumOperands()); |
568 | 16 | |
569 | 16 | unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND8 : |
570 | 16 | ISD::SIGN_EXTEND8 ; |
571 | 32 | for (unsigned j = 0; j != Op.getNumOperands(); ++j16 ) { |
572 | 16 | if (Op.getOperand(j).getValueType().isVector()) |
573 | 16 | Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j)); |
574 | 0 | else |
575 | 0 | Operands[j] = Op.getOperand(j); |
576 | 16 | } |
577 | 16 | |
578 | 16 | return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Operands); |
579 | 16 | } |
580 | | |
581 | | // For FP_TO_INT we promote the result type to a vector type with wider |
582 | | // elements and then truncate the result. This is different from the default |
583 | | // PromoteVector which uses bitcast to promote thus assumning that the |
584 | | // promoted vector type has the same overall size. |
585 | 346 | SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op) { |
586 | 346 | MVT VT = Op.getSimpleValueType(); |
587 | 346 | MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); |
588 | 346 | assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && |
589 | 346 | "Vectors have different number of elements!"); |
590 | 346 | |
591 | 346 | unsigned NewOpc = Op->getOpcode(); |
592 | 346 | // Change FP_TO_UINT to FP_TO_SINT if possible. |
593 | 346 | // TODO: Should we only do this if FP_TO_UINT itself isn't legal? |
594 | 346 | if (NewOpc == ISD::FP_TO_UINT && |
595 | 346 | TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)161 ) |
596 | 161 | NewOpc = ISD::FP_TO_SINT; |
597 | 346 | |
598 | 346 | SDLoc dl(Op); |
599 | 346 | SDValue Promoted = DAG.getNode(NewOpc, dl, NVT, Op.getOperand(0)); |
600 | 346 | |
601 | 346 | // Assert that the converted value fits in the original type. If it doesn't |
602 | 346 | // (eg: because the value being converted is too big), then the result of the |
603 | 346 | // original operation was undefined anyway, so the assert is still correct. |
604 | 346 | Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext161 |
605 | 346 | : ISD::AssertSext185 , |
606 | 346 | dl, NVT, Promoted, |
607 | 346 | DAG.getValueType(VT.getScalarType())); |
608 | 346 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted); |
609 | 346 | } |
610 | | |
611 | 3.20k | SDValue VectorLegalizer::ExpandLoad(SDValue Op) { |
612 | 3.20k | LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); |
613 | 3.20k | |
614 | 3.20k | EVT SrcVT = LD->getMemoryVT(); |
615 | 3.20k | EVT SrcEltVT = SrcVT.getScalarType(); |
616 | 3.20k | unsigned NumElem = SrcVT.getVectorNumElements(); |
617 | 3.20k | |
618 | 3.20k | SDValue NewChain; |
619 | 3.20k | SDValue Value; |
620 | 3.20k | if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) { |
621 | 740 | SDLoc dl(Op); |
622 | 740 | |
623 | 740 | SmallVector<SDValue, 8> Vals; |
624 | 740 | SmallVector<SDValue, 8> LoadChains; |
625 | 740 | |
626 | 740 | EVT DstEltVT = LD->getValueType(0).getScalarType(); |
627 | 740 | SDValue Chain = LD->getChain(); |
628 | 740 | SDValue BasePTR = LD->getBasePtr(); |
629 | 740 | ISD::LoadExtType ExtType = LD->getExtensionType(); |
630 | 740 | |
631 | 740 | // When elements in a vector is not byte-addressable, we cannot directly |
632 | 740 | // load each element by advancing pointer, which could only address bytes. |
633 | 740 | // Instead, we load all significant words, mask bits off, and concatenate |
634 | 740 | // them to form each element. Finally, they are extended to destination |
635 | 740 | // scalar type to build the destination vector. |
636 | 740 | EVT WideVT = TLI.getPointerTy(DAG.getDataLayout()); |
637 | 740 | |
638 | 740 | assert(WideVT.isRound() && |
639 | 740 | "Could not handle the sophisticated case when the widest integer is" |
640 | 740 | " not power of 2."); |
641 | 740 | assert(WideVT.bitsGE(SrcEltVT) && |
642 | 740 | "Type is not legalized?"); |
643 | 740 | |
644 | 740 | unsigned WideBytes = WideVT.getStoreSize(); |
645 | 740 | unsigned Offset = 0; |
646 | 740 | unsigned RemainingBytes = SrcVT.getStoreSize(); |
647 | 740 | SmallVector<SDValue, 8> LoadVals; |
648 | 1.52k | while (RemainingBytes > 0) { |
649 | 782 | SDValue ScalarLoad; |
650 | 782 | unsigned LoadBytes = WideBytes; |
651 | 782 | |
652 | 782 | if (RemainingBytes >= LoadBytes) { |
653 | 55 | ScalarLoad = |
654 | 55 | DAG.getLoad(WideVT, dl, Chain, BasePTR, |
655 | 55 | LD->getPointerInfo().getWithOffset(Offset), |
656 | 55 | MinAlign(LD->getAlignment(), Offset), |
657 | 55 | LD->getMemOperand()->getFlags(), LD->getAAInfo()); |
658 | 727 | } else { |
659 | 727 | EVT LoadVT = WideVT; |
660 | 2.07k | while (RemainingBytes < LoadBytes) { |
661 | 1.34k | LoadBytes >>= 1; // Reduce the load size by half. |
662 | 1.34k | LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3); |
663 | 1.34k | } |
664 | 727 | ScalarLoad = |
665 | 727 | DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, |
666 | 727 | LD->getPointerInfo().getWithOffset(Offset), LoadVT, |
667 | 727 | MinAlign(LD->getAlignment(), Offset), |
668 | 727 | LD->getMemOperand()->getFlags(), LD->getAAInfo()); |
669 | 727 | } |
670 | 782 | |
671 | 782 | RemainingBytes -= LoadBytes; |
672 | 782 | Offset += LoadBytes; |
673 | 782 | |
674 | 782 | BasePTR = DAG.getObjectPtrOffset(dl, BasePTR, LoadBytes); |
675 | 782 | |
676 | 782 | LoadVals.push_back(ScalarLoad.getValue(0)); |
677 | 782 | LoadChains.push_back(ScalarLoad.getValue(1)); |
678 | 782 | } |
679 | 740 | |
680 | 740 | unsigned BitOffset = 0; |
681 | 740 | unsigned WideIdx = 0; |
682 | 740 | unsigned WideBits = WideVT.getSizeInBits(); |
683 | 740 | |
684 | 740 | // Extract bits, pack and extend/trunc them into destination type. |
685 | 740 | unsigned SrcEltBits = SrcEltVT.getSizeInBits(); |
686 | 740 | SDValue SrcEltBitMask = DAG.getConstant( |
687 | 740 | APInt::getLowBitsSet(WideBits, SrcEltBits), dl, WideVT); |
688 | 740 | |
689 | 4.66k | for (unsigned Idx = 0; Idx != NumElem; ++Idx3.92k ) { |
690 | 3.92k | assert(BitOffset < WideBits && "Unexpected offset!"); |
691 | 3.92k | |
692 | 3.92k | SDValue ShAmt = DAG.getConstant( |
693 | 3.92k | BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); |
694 | 3.92k | SDValue Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt); |
695 | 3.92k | |
696 | 3.92k | BitOffset += SrcEltBits; |
697 | 3.92k | if (BitOffset >= WideBits) { |
698 | 55 | WideIdx++; |
699 | 55 | BitOffset -= WideBits; |
700 | 55 | if (BitOffset > 0) { |
701 | 38 | ShAmt = DAG.getConstant( |
702 | 38 | SrcEltBits - BitOffset, dl, |
703 | 38 | TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); |
704 | 38 | SDValue Hi = |
705 | 38 | DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt); |
706 | 38 | Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi); |
707 | 38 | } |
708 | 55 | } |
709 | 3.92k | |
710 | 3.92k | Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask); |
711 | 3.92k | |
712 | 3.92k | switch (ExtType) { |
713 | 3.92k | default: 0 llvm_unreachable0 ("Unknown extended-load op!"); |
714 | 3.92k | case ISD::EXTLOAD: |
715 | 2.60k | Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT); |
716 | 2.60k | break; |
717 | 3.92k | case ISD::ZEXTLOAD: |
718 | 628 | Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT); |
719 | 628 | break; |
720 | 3.92k | case ISD::SEXTLOAD: |
721 | 692 | ShAmt = |
722 | 692 | DAG.getConstant(WideBits - SrcEltBits, dl, |
723 | 692 | TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); |
724 | 692 | Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt); |
725 | 692 | Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt); |
726 | 692 | Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT); |
727 | 692 | break; |
728 | 3.92k | } |
729 | 3.92k | Vals.push_back(Lo); |
730 | 3.92k | } |
731 | 740 | |
732 | 740 | NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); |
733 | 740 | Value = DAG.getBuildVector(Op.getNode()->getValueType(0), dl, Vals); |
734 | 2.46k | } else { |
735 | 2.46k | SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG); |
736 | 2.46k | // Skip past MERGE_VALUE node if known. |
737 | 2.46k | if (Scalarized->getOpcode() == ISD::MERGE_VALUES) { |
738 | 2.46k | NewChain = Scalarized.getOperand(1); |
739 | 2.46k | Value = Scalarized.getOperand(0); |
740 | 2.46k | } else { |
741 | 0 | NewChain = Scalarized.getValue(1); |
742 | 0 | Value = Scalarized.getValue(0); |
743 | 0 | } |
744 | 2.46k | } |
745 | 3.20k | |
746 | 3.20k | AddLegalizedOperand(Op.getValue(0), Value); |
747 | 3.20k | AddLegalizedOperand(Op.getValue(1), NewChain); |
748 | 3.20k | |
749 | 3.20k | return (Op.getResNo() ? NewChain0 : Value); |
750 | 3.20k | } |
751 | | |
752 | 256 | SDValue VectorLegalizer::ExpandStore(SDValue Op) { |
753 | 256 | StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); |
754 | 256 | SDValue TF = TLI.scalarizeVectorStore(ST, DAG); |
755 | 256 | AddLegalizedOperand(Op, TF); |
756 | 256 | return TF; |
757 | 256 | } |
758 | | |
759 | 21.4k | SDValue VectorLegalizer::Expand(SDValue Op) { |
760 | 21.4k | switch (Op->getOpcode()) { |
761 | 21.4k | case ISD::SIGN_EXTEND_INREG: |
762 | 6.75k | return ExpandSEXTINREG(Op); |
763 | 21.4k | case ISD::ANY_EXTEND_VECTOR_INREG: |
764 | 615 | return ExpandANY_EXTEND_VECTOR_INREG(Op); |
765 | 21.4k | case ISD::SIGN_EXTEND_VECTOR_INREG: |
766 | 69 | return ExpandSIGN_EXTEND_VECTOR_INREG(Op); |
767 | 21.4k | case ISD::ZERO_EXTEND_VECTOR_INREG: |
768 | 886 | return ExpandZERO_EXTEND_VECTOR_INREG(Op); |
769 | 21.4k | case ISD::BSWAP: |
770 | 67 | return ExpandBSWAP(Op); |
771 | 21.4k | case ISD::VSELECT: |
772 | 3.93k | return ExpandVSELECT(Op); |
773 | 21.4k | case ISD::SELECT: |
774 | 112 | return ExpandSELECT(Op); |
775 | 21.4k | case ISD::FP_TO_UINT: |
776 | 163 | return ExpandFP_TO_UINT(Op); |
777 | 21.4k | case ISD::UINT_TO_FP: |
778 | 262 | return ExpandUINT_TO_FLOAT(Op); |
779 | 21.4k | case ISD::FNEG: |
780 | 44 | return ExpandFNEG(Op); |
781 | 21.4k | case ISD::FSUB: |
782 | 50 | return ExpandFSUB(Op); |
783 | 21.4k | case ISD::SETCC: |
784 | 86 | return UnrollVSETCC(Op); |
785 | 21.4k | case ISD::ABS: |
786 | 103 | return ExpandABS(Op); |
787 | 21.4k | case ISD::BITREVERSE: |
788 | 135 | return ExpandBITREVERSE(Op); |
789 | 21.4k | case ISD::CTPOP: |
790 | 102 | return ExpandCTPOP(Op); |
791 | 21.4k | case ISD::CTLZ: |
792 | 149 | case ISD::CTLZ_ZERO_UNDEF: |
793 | 149 | return ExpandCTLZ(Op); |
794 | 255 | case ISD::CTTZ: |
795 | 255 | case ISD::CTTZ_ZERO_UNDEF: |
796 | 255 | return ExpandCTTZ(Op); |
797 | 255 | case ISD::FSHL: |
798 | 2 | case ISD::FSHR: |
799 | 2 | return ExpandFunnelShift(Op); |
800 | 208 | case ISD::ROTL: |
801 | 208 | case ISD::ROTR: |
802 | 208 | return ExpandROT(Op); |
803 | 211 | case ISD::FMINNUM: |
804 | 211 | case ISD::FMAXNUM: |
805 | 211 | return ExpandFMINNUM_FMAXNUM(Op); |
806 | 211 | case ISD::UADDO: |
807 | 164 | case ISD::USUBO: |
808 | 164 | return ExpandUADDSUBO(Op); |
809 | 363 | case ISD::SADDO: |
810 | 363 | case ISD::SSUBO: |
811 | 363 | return ExpandSADDSUBO(Op); |
812 | 363 | case ISD::UMULO: |
813 | 187 | case ISD::SMULO: |
814 | 187 | return ExpandMULO(Op); |
815 | 376 | case ISD::USUBSAT: |
816 | 376 | case ISD::SSUBSAT: |
817 | 376 | case ISD::UADDSAT: |
818 | 376 | case ISD::SADDSAT: |
819 | 376 | return ExpandAddSubSat(Op); |
820 | 376 | case ISD::SMULFIX: |
821 | 4 | case ISD::UMULFIX: |
822 | 4 | return ExpandFixedPointMul(Op); |
823 | 238 | case ISD::STRICT_FADD: |
824 | 238 | case ISD::STRICT_FSUB: |
825 | 238 | case ISD::STRICT_FMUL: |
826 | 238 | case ISD::STRICT_FDIV: |
827 | 238 | case ISD::STRICT_FREM: |
828 | 238 | case ISD::STRICT_FSQRT: |
829 | 238 | case ISD::STRICT_FMA: |
830 | 238 | case ISD::STRICT_FPOW: |
831 | 238 | case ISD::STRICT_FPOWI: |
832 | 238 | case ISD::STRICT_FSIN: |
833 | 238 | case ISD::STRICT_FCOS: |
834 | 238 | case ISD::STRICT_FEXP: |
835 | 238 | case ISD::STRICT_FEXP2: |
836 | 238 | case ISD::STRICT_FLOG: |
837 | 238 | case ISD::STRICT_FLOG10: |
838 | 238 | case ISD::STRICT_FLOG2: |
839 | 238 | case ISD::STRICT_FRINT: |
840 | 238 | case ISD::STRICT_FNEARBYINT: |
841 | 238 | case ISD::STRICT_FMAXNUM: |
842 | 238 | case ISD::STRICT_FMINNUM: |
843 | 238 | case ISD::STRICT_FCEIL: |
844 | 238 | case ISD::STRICT_FFLOOR: |
845 | 238 | case ISD::STRICT_FROUND: |
846 | 238 | case ISD::STRICT_FTRUNC: |
847 | 238 | return ExpandStrictFPOp(Op); |
848 | 238 | case ISD::VECREDUCE_ADD: |
849 | 24 | case ISD::VECREDUCE_MUL: |
850 | 24 | case ISD::VECREDUCE_AND: |
851 | 24 | case ISD::VECREDUCE_OR: |
852 | 24 | case ISD::VECREDUCE_XOR: |
853 | 24 | case ISD::VECREDUCE_SMAX: |
854 | 24 | case ISD::VECREDUCE_SMIN: |
855 | 24 | case ISD::VECREDUCE_UMAX: |
856 | 24 | case ISD::VECREDUCE_UMIN: |
857 | 24 | case ISD::VECREDUCE_FADD: |
858 | 24 | case ISD::VECREDUCE_FMUL: |
859 | 24 | case ISD::VECREDUCE_FMAX: |
860 | 24 | case ISD::VECREDUCE_FMIN: |
861 | 24 | return TLI.expandVecReduce(Op.getNode(), DAG); |
862 | 5.86k | default: |
863 | 5.86k | return DAG.UnrollVectorOp(Op.getNode()); |
864 | 21.4k | } |
865 | 21.4k | } |
866 | | |
867 | 112 | SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { |
868 | 112 | // Lower a select instruction where the condition is a scalar and the |
869 | 112 | // operands are vectors. Lower this select to VSELECT and implement it |
870 | 112 | // using XOR AND OR. The selector bit is broadcasted. |
871 | 112 | EVT VT = Op.getValueType(); |
872 | 112 | SDLoc DL(Op); |
873 | 112 | |
874 | 112 | SDValue Mask = Op.getOperand(0); |
875 | 112 | SDValue Op1 = Op.getOperand(1); |
876 | 112 | SDValue Op2 = Op.getOperand(2); |
877 | 112 | |
878 | 112 | assert(VT.isVector() && !Mask.getValueType().isVector() |
879 | 112 | && Op1.getValueType() == Op2.getValueType() && "Invalid type"); |
880 | 112 | |
881 | 112 | // If we can't even use the basic vector operations of |
882 | 112 | // AND,OR,XOR, we will have to scalarize the op. |
883 | 112 | // Notice that the operation may be 'promoted' which means that it is |
884 | 112 | // 'bitcasted' to another type which is handled. |
885 | 112 | // Also, we need to be able to construct a splat vector using BUILD_VECTOR. |
886 | 112 | if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || |
887 | 112 | TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand27 || |
888 | 112 | TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand27 || |
889 | 112 | TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand27 ) |
890 | 85 | return DAG.UnrollVectorOp(Op.getNode()); |
891 | 27 | |
892 | 27 | // Generate a mask operand. |
893 | 27 | EVT MaskTy = VT.changeVectorElementTypeToInteger(); |
894 | 27 | |
895 | 27 | // What is the size of each element in the vector mask. |
896 | 27 | EVT BitTy = MaskTy.getScalarType(); |
897 | 27 | |
898 | 27 | Mask = DAG.getSelect(DL, BitTy, Mask, |
899 | 27 | DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, |
900 | 27 | BitTy), |
901 | 27 | DAG.getConstant(0, DL, BitTy)); |
902 | 27 | |
903 | 27 | // Broadcast the mask so that the entire vector is all-one or all zero. |
904 | 27 | Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask); |
905 | 27 | |
906 | 27 | // Bitcast the operands to be the same type as the mask. |
907 | 27 | // This is needed when we select between FP types because |
908 | 27 | // the mask is a vector of integers. |
909 | 27 | Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1); |
910 | 27 | Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2); |
911 | 27 | |
912 | 27 | SDValue AllOnes = DAG.getConstant( |
913 | 27 | APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy); |
914 | 27 | SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes); |
915 | 27 | |
916 | 27 | Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask); |
917 | 27 | Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask); |
918 | 27 | SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2); |
919 | 27 | return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); |
920 | 27 | } |
921 | | |
922 | 6.75k | SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { |
923 | 6.75k | EVT VT = Op.getValueType(); |
924 | 6.75k | |
925 | 6.75k | // Make sure that the SRA and SHL instructions are available. |
926 | 6.75k | if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand || |
927 | 6.75k | TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand6.72k ) |
928 | 27 | return DAG.UnrollVectorOp(Op.getNode()); |
929 | 6.72k | |
930 | 6.72k | SDLoc DL(Op); |
931 | 6.72k | EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT(); |
932 | 6.72k | |
933 | 6.72k | unsigned BW = VT.getScalarSizeInBits(); |
934 | 6.72k | unsigned OrigBW = OrigTy.getScalarSizeInBits(); |
935 | 6.72k | SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT); |
936 | 6.72k | |
937 | 6.72k | Op = Op.getOperand(0); |
938 | 6.72k | Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz); |
939 | 6.72k | return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); |
940 | 6.72k | } |
941 | | |
942 | | // Generically expand a vector anyext in register to a shuffle of the relevant |
943 | | // lanes into the appropriate locations, with other lanes left undef. |
944 | 615 | SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) { |
945 | 615 | SDLoc DL(Op); |
946 | 615 | EVT VT = Op.getValueType(); |
947 | 615 | int NumElements = VT.getVectorNumElements(); |
948 | 615 | SDValue Src = Op.getOperand(0); |
949 | 615 | EVT SrcVT = Src.getValueType(); |
950 | 615 | int NumSrcElements = SrcVT.getVectorNumElements(); |
951 | 615 | |
952 | 615 | // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector |
953 | 615 | // into a larger vector type. |
954 | 615 | if (SrcVT.bitsLE(VT)) { |
955 | 615 | assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 && |
956 | 615 | "ANY_EXTEND_VECTOR_INREG vector size mismatch"); |
957 | 615 | NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits(); |
958 | 615 | SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), |
959 | 615 | NumSrcElements); |
960 | 615 | Src = DAG.getNode( |
961 | 615 | ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), Src, |
962 | 615 | DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); |
963 | 615 | } |
964 | 615 | |
965 | 615 | // Build a base mask of undef shuffles. |
966 | 615 | SmallVector<int, 16> ShuffleMask; |
967 | 615 | ShuffleMask.resize(NumSrcElements, -1); |
968 | 615 | |
969 | 615 | // Place the extended lanes into the correct locations. |
970 | 615 | int ExtLaneScale = NumSrcElements / NumElements; |
971 | 615 | int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 19 : 0606 ; |
972 | 2.82k | for (int i = 0; i < NumElements; ++i2.20k ) |
973 | 2.20k | ShuffleMask[i * ExtLaneScale + EndianOffset] = i; |
974 | 615 | |
975 | 615 | return DAG.getNode( |
976 | 615 | ISD::BITCAST, DL, VT, |
977 | 615 | DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask)); |
978 | 615 | } |
979 | | |
980 | 69 | SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) { |
981 | 69 | SDLoc DL(Op); |
982 | 69 | EVT VT = Op.getValueType(); |
983 | 69 | SDValue Src = Op.getOperand(0); |
984 | 69 | EVT SrcVT = Src.getValueType(); |
985 | 69 | |
986 | 69 | // First build an any-extend node which can be legalized above when we |
987 | 69 | // recurse through it. |
988 | 69 | Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src); |
989 | 69 | |
990 | 69 | // Now we need sign extend. Do this by shifting the elements. Even if these |
991 | 69 | // aren't legal operations, they have a better chance of being legalized |
992 | 69 | // without full scalarization than the sign extension does. |
993 | 69 | unsigned EltWidth = VT.getScalarSizeInBits(); |
994 | 69 | unsigned SrcEltWidth = SrcVT.getScalarSizeInBits(); |
995 | 69 | SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT); |
996 | 69 | return DAG.getNode(ISD::SRA, DL, VT, |
997 | 69 | DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount), |
998 | 69 | ShiftAmount); |
999 | 69 | } |
1000 | | |
1001 | | // Generically expand a vector zext in register to a shuffle of the relevant |
1002 | | // lanes into the appropriate locations, a blend of zero into the high bits, |
1003 | | // and a bitcast to the wider element type. |
1004 | 886 | SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) { |
1005 | 886 | SDLoc DL(Op); |
1006 | 886 | EVT VT = Op.getValueType(); |
1007 | 886 | int NumElements = VT.getVectorNumElements(); |
1008 | 886 | SDValue Src = Op.getOperand(0); |
1009 | 886 | EVT SrcVT = Src.getValueType(); |
1010 | 886 | int NumSrcElements = SrcVT.getVectorNumElements(); |
1011 | 886 | |
1012 | 886 | // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector |
1013 | 886 | // into a larger vector type. |
1014 | 886 | if (SrcVT.bitsLE(VT)) { |
1015 | 886 | assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 && |
1016 | 886 | "ZERO_EXTEND_VECTOR_INREG vector size mismatch"); |
1017 | 886 | NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits(); |
1018 | 886 | SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), |
1019 | 886 | NumSrcElements); |
1020 | 886 | Src = DAG.getNode( |
1021 | 886 | ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), Src, |
1022 | 886 | DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); |
1023 | 886 | } |
1024 | 886 | |
1025 | 886 | // Build up a zero vector to blend into this one. |
1026 | 886 | SDValue Zero = DAG.getConstant(0, DL, SrcVT); |
1027 | 886 | |
1028 | 886 | // Shuffle the incoming lanes into the correct position, and pull all other |
1029 | 886 | // lanes from the zero vector. |
1030 | 886 | SmallVector<int, 16> ShuffleMask; |
1031 | 886 | ShuffleMask.reserve(NumSrcElements); |
1032 | 12.1k | for (int i = 0; i < NumSrcElements; ++i11.2k ) |
1033 | 11.2k | ShuffleMask.push_back(i); |
1034 | 886 | |
1035 | 886 | int ExtLaneScale = NumSrcElements / NumElements; |
1036 | 886 | int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 122 : 0864 ; |
1037 | 4.57k | for (int i = 0; i < NumElements; ++i3.69k ) |
1038 | 3.69k | ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i; |
1039 | 886 | |
1040 | 886 | return DAG.getNode(ISD::BITCAST, DL, VT, |
1041 | 886 | DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask)); |
1042 | 886 | } |
1043 | | |
1044 | 161 | static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) { |
1045 | 161 | int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; |
1046 | 1.08k | for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I928 ) |
1047 | 4.26k | for (int J = ScalarSizeInBytes - 1; 928 J >= 0; --J3.33k ) |
1048 | 3.33k | ShuffleMask.push_back((I * ScalarSizeInBytes) + J); |
1049 | 161 | } |
1050 | | |
1051 | 67 | SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { |
1052 | 67 | EVT VT = Op.getValueType(); |
1053 | 67 | |
1054 | 67 | // Generate a byte wise shuffle mask for the BSWAP. |
1055 | 67 | SmallVector<int, 16> ShuffleMask; |
1056 | 67 | createBSWAPShuffleMask(VT, ShuffleMask); |
1057 | 67 | EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size()); |
1058 | 67 | |
1059 | 67 | // Only emit a shuffle if the mask is legal. |
1060 | 67 | if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) |
1061 | 12 | return DAG.UnrollVectorOp(Op.getNode()); |
1062 | 55 | |
1063 | 55 | SDLoc DL(Op); |
1064 | 55 | Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0)); |
1065 | 55 | Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask); |
1066 | 55 | return DAG.getNode(ISD::BITCAST, DL, VT, Op); |
1067 | 55 | } |
1068 | | |
1069 | 135 | SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) { |
1070 | 135 | EVT VT = Op.getValueType(); |
1071 | 135 | |
1072 | 135 | // If we have the scalar operation, it's probably cheaper to unroll it. |
1073 | 135 | if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) |
1074 | 10 | return DAG.UnrollVectorOp(Op.getNode()); |
1075 | 125 | |
1076 | 125 | // If the vector element width is a whole number of bytes, test if its legal |
1077 | 125 | // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte |
1078 | 125 | // vector. This greatly reduces the number of bit shifts necessary. |
1079 | 125 | unsigned ScalarSizeInBits = VT.getScalarSizeInBits(); |
1080 | 125 | if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 094 ) { |
1081 | 94 | SmallVector<int, 16> BSWAPMask; |
1082 | 94 | createBSWAPShuffleMask(VT, BSWAPMask); |
1083 | 94 | |
1084 | 94 | EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size()); |
1085 | 94 | if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) && |
1086 | 94 | (86 TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT)86 || |
1087 | 86 | (23 TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT)23 && |
1088 | 23 | TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) && |
1089 | 23 | TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) && |
1090 | 86 | TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)23 ))) { |
1091 | 86 | SDLoc DL(Op); |
1092 | 86 | Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0)); |
1093 | 86 | Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), |
1094 | 86 | BSWAPMask); |
1095 | 86 | Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op); |
1096 | 86 | return DAG.getNode(ISD::BITCAST, DL, VT, Op); |
1097 | 86 | } |
1098 | 39 | } |
1099 | 39 | |
1100 | 39 | // If we have the appropriate vector bit operations, it is better to use them |
1101 | 39 | // than unrolling and expanding each component. |
1102 | 39 | if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) || |
1103 | 39 | !TLI.isOperationLegalOrCustom(ISD::SRL, VT)33 || |
1104 | 39 | !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT)33 || |
1105 | 39 | !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)33 ) |
1106 | 6 | return DAG.UnrollVectorOp(Op.getNode()); |
1107 | 33 | |
1108 | 33 | // Let LegalizeDAG handle this later. |
1109 | 33 | return Op; |
1110 | 33 | } |
1111 | | |
1112 | 3.93k | SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { |
1113 | 3.93k | // Implement VSELECT in terms of XOR, AND, OR |
1114 | 3.93k | // on platforms which do not support blend natively. |
1115 | 3.93k | SDLoc DL(Op); |
1116 | 3.93k | |
1117 | 3.93k | SDValue Mask = Op.getOperand(0); |
1118 | 3.93k | SDValue Op1 = Op.getOperand(1); |
1119 | 3.93k | SDValue Op2 = Op.getOperand(2); |
1120 | 3.93k | |
1121 | 3.93k | EVT VT = Mask.getValueType(); |
1122 | 3.93k | |
1123 | 3.93k | // If we can't even use the basic vector operations of |
1124 | 3.93k | // AND,OR,XOR, we will have to scalarize the op. |
1125 | 3.93k | // Notice that the operation may be 'promoted' which means that it is |
1126 | 3.93k | // 'bitcasted' to another type which is handled. |
1127 | 3.93k | // This operation also isn't safe with AND, OR, XOR when the boolean |
1128 | 3.93k | // type is 0/1 as we need an all ones vector constant to mask with. |
1129 | 3.93k | // FIXME: Sign extend 1 to all ones if thats legal on the target. |
1130 | 3.93k | if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || |
1131 | 3.93k | TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand3.91k || |
1132 | 3.93k | TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand3.91k || |
1133 | 3.93k | TLI.getBooleanContents(Op1.getValueType()) != |
1134 | 3.91k | TargetLowering::ZeroOrNegativeOneBooleanContent) |
1135 | 17 | return DAG.UnrollVectorOp(Op.getNode()); |
1136 | 3.91k | |
1137 | 3.91k | // If the mask and the type are different sizes, unroll the vector op. This |
1138 | 3.91k | // can occur when getSetCCResultType returns something that is different in |
1139 | 3.91k | // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8. |
1140 | 3.91k | if (VT.getSizeInBits() != Op1.getValueSizeInBits()) |
1141 | 0 | return DAG.UnrollVectorOp(Op.getNode()); |
1142 | 3.91k | |
1143 | 3.91k | // Bitcast the operands to be the same type as the mask. |
1144 | 3.91k | // This is needed when we select between FP types because |
1145 | 3.91k | // the mask is a vector of integers. |
1146 | 3.91k | Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1); |
1147 | 3.91k | Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2); |
1148 | 3.91k | |
1149 | 3.91k | SDValue AllOnes = DAG.getConstant( |
1150 | 3.91k | APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT); |
1151 | 3.91k | SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes); |
1152 | 3.91k | |
1153 | 3.91k | Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); |
1154 | 3.91k | Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask); |
1155 | 3.91k | SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2); |
1156 | 3.91k | return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); |
1157 | 3.91k | } |
1158 | | |
1159 | 103 | SDValue VectorLegalizer::ExpandABS(SDValue Op) { |
1160 | 103 | // Attempt to expand using TargetLowering. |
1161 | 103 | SDValue Result; |
1162 | 103 | if (TLI.expandABS(Op.getNode(), Result, DAG)) |
1163 | 101 | return Result; |
1164 | 2 | |
1165 | 2 | // Otherwise go ahead and unroll. |
1166 | 2 | return DAG.UnrollVectorOp(Op.getNode()); |
1167 | 2 | } |
1168 | | |
1169 | 163 | SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) { |
1170 | 163 | // Attempt to expand using TargetLowering. |
1171 | 163 | SDValue Result; |
1172 | 163 | if (TLI.expandFP_TO_UINT(Op.getNode(), Result, DAG)) |
1173 | 42 | return Result; |
1174 | 121 | |
1175 | 121 | // Otherwise go ahead and unroll. |
1176 | 121 | return DAG.UnrollVectorOp(Op.getNode()); |
1177 | 121 | } |
1178 | | |
1179 | 262 | SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { |
1180 | 262 | EVT VT = Op.getOperand(0).getValueType(); |
1181 | 262 | SDLoc DL(Op); |
1182 | 262 | |
1183 | 262 | // Attempt to expand using TargetLowering. |
1184 | 262 | SDValue Result; |
1185 | 262 | if (TLI.expandUINT_TO_FP(Op.getNode(), Result, DAG)) |
1186 | 107 | return Result; |
1187 | 155 | |
1188 | 155 | // Make sure that the SINT_TO_FP and SRL instructions are available. |
1189 | 155 | if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand || |
1190 | 155 | TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand37 ) |
1191 | 118 | return DAG.UnrollVectorOp(Op.getNode()); |
1192 | 37 | |
1193 | 37 | unsigned BW = VT.getScalarSizeInBits(); |
1194 | 37 | assert((BW == 64 || BW == 32) && |
1195 | 37 | "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); |
1196 | 37 | |
1197 | 37 | SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT); |
1198 | 37 | |
1199 | 37 | // Constants to clear the upper part of the word. |
1200 | 37 | // Notice that we can also use SHL+SHR, but using a constant is slightly |
1201 | 37 | // faster on x86. |
1202 | 37 | uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF0 : 0x0000FFFF; |
1203 | 37 | SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT); |
1204 | 37 | |
1205 | 37 | // Two to the power of half-word-size. |
1206 | 37 | SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType()); |
1207 | 37 | |
1208 | 37 | // Clear upper part of LO, lower HI |
1209 | 37 | SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord); |
1210 | 37 | SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask); |
1211 | 37 | |
1212 | 37 | // Convert hi and lo to floats |
1213 | 37 | // Convert the hi part back to the upper values |
1214 | 37 | // TODO: Can any fast-math-flags be set on these nodes? |
1215 | 37 | SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI); |
1216 | 37 | fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW); |
1217 | 37 | SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO); |
1218 | 37 | |
1219 | 37 | // Add the two halves |
1220 | 37 | return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO); |
1221 | 37 | } |
1222 | | |
1223 | 44 | SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { |
1224 | 44 | if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { |
1225 | 4 | SDLoc DL(Op); |
1226 | 4 | SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType()); |
1227 | 4 | // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB. |
1228 | 4 | return DAG.getNode(ISD::FSUB, DL, Op.getValueType(), |
1229 | 4 | Zero, Op.getOperand(0)); |
1230 | 4 | } |
1231 | 40 | return DAG.UnrollVectorOp(Op.getNode()); |
1232 | 40 | } |
1233 | | |
1234 | 50 | SDValue VectorLegalizer::ExpandFSUB(SDValue Op) { |
1235 | 50 | // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal, |
1236 | 50 | // we can defer this to operation legalization where it will be lowered as |
1237 | 50 | // a+(-b). |
1238 | 50 | EVT VT = Op.getValueType(); |
1239 | 50 | if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) && |
1240 | 50 | TLI.isOperationLegalOrCustom(ISD::FADD, VT)17 ) |
1241 | 4 | return Op; // Defer to LegalizeDAG |
1242 | 46 | |
1243 | 46 | return DAG.UnrollVectorOp(Op.getNode()); |
1244 | 46 | } |
1245 | | |
1246 | 102 | SDValue VectorLegalizer::ExpandCTPOP(SDValue Op) { |
1247 | 102 | SDValue Result; |
1248 | 102 | if (TLI.expandCTPOP(Op.getNode(), Result, DAG)) |
1249 | 57 | return Result; |
1250 | 45 | |
1251 | 45 | return DAG.UnrollVectorOp(Op.getNode()); |
1252 | 45 | } |
1253 | | |
1254 | 149 | SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) { |
1255 | 149 | SDValue Result; |
1256 | 149 | if (TLI.expandCTLZ(Op.getNode(), Result, DAG)) |
1257 | 129 | return Result; |
1258 | 20 | |
1259 | 20 | return DAG.UnrollVectorOp(Op.getNode()); |
1260 | 20 | } |
1261 | | |
1262 | 255 | SDValue VectorLegalizer::ExpandCTTZ(SDValue Op) { |
1263 | 255 | SDValue Result; |
1264 | 255 | if (TLI.expandCTTZ(Op.getNode(), Result, DAG)) |
1265 | 241 | return Result; |
1266 | 14 | |
1267 | 14 | return DAG.UnrollVectorOp(Op.getNode()); |
1268 | 14 | } |
1269 | | |
1270 | 2 | SDValue VectorLegalizer::ExpandFunnelShift(SDValue Op) { |
1271 | 2 | SDValue Result; |
1272 | 2 | if (TLI.expandFunnelShift(Op.getNode(), Result, DAG)) |
1273 | 2 | return Result; |
1274 | 0 | |
1275 | 0 | return DAG.UnrollVectorOp(Op.getNode()); |
1276 | 0 | } |
1277 | | |
1278 | 208 | SDValue VectorLegalizer::ExpandROT(SDValue Op) { |
1279 | 208 | SDValue Result; |
1280 | 208 | if (TLI.expandROT(Op.getNode(), Result, DAG)) |
1281 | 208 | return Result; |
1282 | 0 | |
1283 | 0 | return DAG.UnrollVectorOp(Op.getNode()); |
1284 | 0 | } |
1285 | | |
1286 | 211 | SDValue VectorLegalizer::ExpandFMINNUM_FMAXNUM(SDValue Op) { |
1287 | 211 | if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Op.getNode(), DAG)) |
1288 | 26 | return Expanded; |
1289 | 185 | return DAG.UnrollVectorOp(Op.getNode()); |
1290 | 185 | } |
1291 | | |
1292 | 164 | SDValue VectorLegalizer::ExpandUADDSUBO(SDValue Op) { |
1293 | 164 | SDValue Result, Overflow; |
1294 | 164 | TLI.expandUADDSUBO(Op.getNode(), Result, Overflow, DAG); |
1295 | 164 | |
1296 | 164 | if (Op.getResNo() == 0) { |
1297 | 164 | AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow)); |
1298 | 164 | return Result; |
1299 | 164 | } else { |
1300 | 0 | AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result)); |
1301 | 0 | return Overflow; |
1302 | 0 | } |
1303 | 164 | } |
1304 | | |
1305 | 363 | SDValue VectorLegalizer::ExpandSADDSUBO(SDValue Op) { |
1306 | 363 | SDValue Result, Overflow; |
1307 | 363 | TLI.expandSADDSUBO(Op.getNode(), Result, Overflow, DAG); |
1308 | 363 | |
1309 | 363 | if (Op.getResNo() == 0) { |
1310 | 137 | AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow)); |
1311 | 137 | return Result; |
1312 | 226 | } else { |
1313 | 226 | AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result)); |
1314 | 226 | return Overflow; |
1315 | 226 | } |
1316 | 363 | } |
1317 | | |
1318 | 187 | SDValue VectorLegalizer::ExpandMULO(SDValue Op) { |
1319 | 187 | SDValue Result, Overflow; |
1320 | 187 | if (!TLI.expandMULO(Op.getNode(), Result, Overflow, DAG)) |
1321 | 27 | std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Op.getNode()); |
1322 | 187 | |
1323 | 187 | if (Op.getResNo() == 0) { |
1324 | 187 | AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow)); |
1325 | 187 | return Result; |
1326 | 187 | } else { |
1327 | 0 | AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result)); |
1328 | 0 | return Overflow; |
1329 | 0 | } |
1330 | 187 | } |
1331 | | |
1332 | 376 | SDValue VectorLegalizer::ExpandAddSubSat(SDValue Op) { |
1333 | 376 | if (SDValue Expanded = TLI.expandAddSubSat(Op.getNode(), DAG)) |
1334 | 376 | return Expanded; |
1335 | 0 | return DAG.UnrollVectorOp(Op.getNode()); |
1336 | 0 | } |
1337 | | |
1338 | 4 | SDValue VectorLegalizer::ExpandFixedPointMul(SDValue Op) { |
1339 | 4 | if (SDValue Expanded = TLI.expandFixedPointMul(Op.getNode(), DAG)) |
1340 | 4 | return Expanded; |
1341 | 0 | return DAG.UnrollVectorOp(Op.getNode()); |
1342 | 0 | } |
1343 | | |
1344 | 238 | SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) { |
1345 | 238 | EVT VT = Op.getValueType(); |
1346 | 238 | EVT EltVT = VT.getVectorElementType(); |
1347 | 238 | unsigned NumElems = VT.getVectorNumElements(); |
1348 | 238 | unsigned NumOpers = Op.getNumOperands(); |
1349 | 238 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
1350 | 238 | EVT ValueVTs[] = {EltVT, MVT::Other}; |
1351 | 238 | SDValue Chain = Op.getOperand(0); |
1352 | 238 | SDLoc dl(Op); |
1353 | 238 | |
1354 | 238 | SmallVector<SDValue, 32> OpValues; |
1355 | 238 | SmallVector<SDValue, 32> OpChains; |
1356 | 738 | for (unsigned i = 0; i < NumElems; ++i500 ) { |
1357 | 500 | SmallVector<SDValue, 4> Opers; |
1358 | 500 | SDValue Idx = DAG.getConstant(i, dl, |
1359 | 500 | TLI.getVectorIdxTy(DAG.getDataLayout())); |
1360 | 500 | |
1361 | 500 | // The Chain is the first operand. |
1362 | 500 | Opers.push_back(Chain); |
1363 | 500 | |
1364 | 500 | // Now process the remaining operands. |
1365 | 1.16k | for (unsigned j = 1; j < NumOpers; ++j668 ) { |
1366 | 668 | SDValue Oper = Op.getOperand(j); |
1367 | 668 | EVT OperVT = Oper.getValueType(); |
1368 | 668 | |
1369 | 668 | if (OperVT.isVector()) |
1370 | 628 | Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, |
1371 | 628 | OperVT.getVectorElementType(), Oper, Idx); |
1372 | 668 | |
1373 | 668 | Opers.push_back(Oper); |
1374 | 668 | } |
1375 | 500 | |
1376 | 500 | SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers); |
1377 | 500 | |
1378 | 500 | OpValues.push_back(ScalarOp.getValue(0)); |
1379 | 500 | OpChains.push_back(ScalarOp.getValue(1)); |
1380 | 500 | } |
1381 | 238 | |
1382 | 238 | SDValue Result = DAG.getBuildVector(VT, dl, OpValues); |
1383 | 238 | SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains); |
1384 | 238 | |
1385 | 238 | AddLegalizedOperand(Op.getValue(0), Result); |
1386 | 238 | AddLegalizedOperand(Op.getValue(1), NewChain); |
1387 | 238 | |
1388 | 238 | return Op.getResNo() ? NewChain0 : Result; |
1389 | 238 | } |
1390 | | |
1391 | 86 | SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { |
1392 | 86 | EVT VT = Op.getValueType(); |
1393 | 86 | unsigned NumElems = VT.getVectorNumElements(); |
1394 | 86 | EVT EltVT = VT.getVectorElementType(); |
1395 | 86 | SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2); |
1396 | 86 | EVT TmpEltVT = LHS.getValueType().getVectorElementType(); |
1397 | 86 | SDLoc dl(Op); |
1398 | 86 | SmallVector<SDValue, 8> Ops(NumElems); |
1399 | 378 | for (unsigned i = 0; i < NumElems; ++i292 ) { |
1400 | 292 | SDValue LHSElem = DAG.getNode( |
1401 | 292 | ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, |
1402 | 292 | DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); |
1403 | 292 | SDValue RHSElem = DAG.getNode( |
1404 | 292 | ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, |
1405 | 292 | DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); |
1406 | 292 | Ops[i] = DAG.getNode(ISD::SETCC, dl, |
1407 | 292 | TLI.getSetCCResultType(DAG.getDataLayout(), |
1408 | 292 | *DAG.getContext(), TmpEltVT), |
1409 | 292 | LHSElem, RHSElem, CC); |
1410 | 292 | Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], |
1411 | 292 | DAG.getConstant(APInt::getAllOnesValue |
1412 | 292 | (EltVT.getSizeInBits()), dl, EltVT), |
1413 | 292 | DAG.getConstant(0, dl, EltVT)); |
1414 | 292 | } |
1415 | 86 | return DAG.getBuildVector(VT, dl, Ops); |
1416 | 86 | } |
1417 | | |
1418 | 1.24M | bool SelectionDAG::LegalizeVectors() { |
1419 | 1.24M | return VectorLegalizer(*this).Run(); |
1420 | 1.24M | } |