/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | #include "HexagonISelLowering.h" |
10 | | #include "HexagonRegisterInfo.h" |
11 | | #include "HexagonSubtarget.h" |
12 | | #include "llvm/Support/CommandLine.h" |
13 | | |
14 | | using namespace llvm; |
15 | | |
16 | | static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 }; |
17 | | static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; |
18 | | static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; |
19 | | static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 }; |
20 | | |
21 | | |
22 | | void |
23 | 219 | HexagonTargetLowering::initializeHVXLowering() { |
24 | 219 | if (Subtarget.useHVX64BOps()) { |
25 | 164 | addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass); |
26 | 164 | addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass); |
27 | 164 | addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass); |
28 | 164 | addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass); |
29 | 164 | addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass); |
30 | 164 | addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass); |
31 | 164 | // These "short" boolean vector types should be legal because |
32 | 164 | // they will appear as results of vector compares. If they were |
33 | 164 | // not legal, type legalization would try to make them legal |
34 | 164 | // and that would require using operations that do not use or |
35 | 164 | // produce such types. That, in turn, would imply using custom |
36 | 164 | // nodes, which would be unoptimizable by the DAG combiner. |
37 | 164 | // The idea is to rely on target-independent operations as much |
38 | 164 | // as possible. |
39 | 164 | addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass); |
40 | 164 | addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass); |
41 | 164 | addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass); |
42 | 164 | addRegisterClass(MVT::v512i1, &Hexagon::HvxQRRegClass); |
43 | 164 | } else if (55 Subtarget.useHVX128BOps()55 ) { |
44 | 55 | addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass); |
45 | 55 | addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass); |
46 | 55 | addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass); |
47 | 55 | addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass); |
48 | 55 | addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass); |
49 | 55 | addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass); |
50 | 55 | addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass); |
51 | 55 | addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass); |
52 | 55 | addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass); |
53 | 55 | addRegisterClass(MVT::v1024i1, &Hexagon::HvxQRRegClass); |
54 | 55 | } |
55 | 219 | |
56 | 219 | // Set up operation actions. |
57 | 219 | |
58 | 219 | bool Use64b = Subtarget.useHVX64BOps(); |
59 | 219 | ArrayRef<MVT> LegalV = Use64b ? LegalV64164 : LegalV12855 ; |
60 | 219 | ArrayRef<MVT> LegalW = Use64b ? LegalW64164 : LegalW12855 ; |
61 | 219 | MVT ByteV = Use64b ? MVT::v64i8164 : MVT::v128i855 ; |
62 | 219 | MVT ByteW = Use64b ? MVT::v128i8164 : MVT::v256i855 ; |
63 | 219 | |
64 | 876 | auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) { |
65 | 876 | setOperationAction(Opc, FromTy, Promote); |
66 | 876 | AddPromotedToType(Opc, FromTy, ToTy); |
67 | 876 | }; |
68 | 219 | |
69 | 219 | setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal); |
70 | 219 | setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal); |
71 | 219 | |
72 | 657 | for (MVT T : LegalV) { |
73 | 657 | setIndexedLoadAction(ISD::POST_INC, T, Legal); |
74 | 657 | setIndexedStoreAction(ISD::POST_INC, T, Legal); |
75 | 657 | |
76 | 657 | setOperationAction(ISD::AND, T, Legal); |
77 | 657 | setOperationAction(ISD::OR, T, Legal); |
78 | 657 | setOperationAction(ISD::XOR, T, Legal); |
79 | 657 | setOperationAction(ISD::ADD, T, Legal); |
80 | 657 | setOperationAction(ISD::SUB, T, Legal); |
81 | 657 | setOperationAction(ISD::CTPOP, T, Legal); |
82 | 657 | setOperationAction(ISD::CTLZ, T, Legal); |
83 | 657 | if (T != ByteV) { |
84 | 438 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal); |
85 | 438 | setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal); |
86 | 438 | setOperationAction(ISD::BSWAP, T, Legal); |
87 | 438 | } |
88 | 657 | |
89 | 657 | setOperationAction(ISD::CTTZ, T, Custom); |
90 | 657 | setOperationAction(ISD::LOAD, T, Custom); |
91 | 657 | setOperationAction(ISD::MUL, T, Custom); |
92 | 657 | setOperationAction(ISD::MULHS, T, Custom); |
93 | 657 | setOperationAction(ISD::MULHU, T, Custom); |
94 | 657 | setOperationAction(ISD::BUILD_VECTOR, T, Custom); |
95 | 657 | // Make concat-vectors custom to handle concats of more than 2 vectors. |
96 | 657 | setOperationAction(ISD::CONCAT_VECTORS, T, Custom); |
97 | 657 | setOperationAction(ISD::INSERT_SUBVECTOR, T, Custom); |
98 | 657 | setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom); |
99 | 657 | setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom); |
100 | 657 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom); |
101 | 657 | setOperationAction(ISD::ANY_EXTEND, T, Custom); |
102 | 657 | setOperationAction(ISD::SIGN_EXTEND, T, Custom); |
103 | 657 | setOperationAction(ISD::ZERO_EXTEND, T, Custom); |
104 | 657 | if (T != ByteV) { |
105 | 438 | setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom); |
106 | 438 | // HVX only has shifts of words and halfwords. |
107 | 438 | setOperationAction(ISD::SRA, T, Custom); |
108 | 438 | setOperationAction(ISD::SHL, T, Custom); |
109 | 438 | setOperationAction(ISD::SRL, T, Custom); |
110 | 438 | |
111 | 438 | // Promote all shuffles to operate on vectors of bytes. |
112 | 438 | setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV); |
113 | 438 | } |
114 | 657 | |
115 | 657 | setCondCodeAction(ISD::SETNE, T, Expand); |
116 | 657 | setCondCodeAction(ISD::SETLE, T, Expand); |
117 | 657 | setCondCodeAction(ISD::SETGE, T, Expand); |
118 | 657 | setCondCodeAction(ISD::SETLT, T, Expand); |
119 | 657 | setCondCodeAction(ISD::SETULE, T, Expand); |
120 | 657 | setCondCodeAction(ISD::SETUGE, T, Expand); |
121 | 657 | setCondCodeAction(ISD::SETULT, T, Expand); |
122 | 657 | } |
123 | 219 | |
124 | 657 | for (MVT T : LegalW) { |
125 | 657 | // Custom-lower BUILD_VECTOR for vector pairs. The standard (target- |
126 | 657 | // independent) handling of it would convert it to a load, which is |
127 | 657 | // not always the optimal choice. |
128 | 657 | setOperationAction(ISD::BUILD_VECTOR, T, Custom); |
129 | 657 | // Make concat-vectors custom to handle concats of more than 2 vectors. |
130 | 657 | setOperationAction(ISD::CONCAT_VECTORS, T, Custom); |
131 | 657 | |
132 | 657 | // Custom-lower these operations for pairs. Expand them into a concat |
133 | 657 | // of the corresponding operations on individual vectors. |
134 | 657 | setOperationAction(ISD::ANY_EXTEND, T, Custom); |
135 | 657 | setOperationAction(ISD::SIGN_EXTEND, T, Custom); |
136 | 657 | setOperationAction(ISD::ZERO_EXTEND, T, Custom); |
137 | 657 | setOperationAction(ISD::SIGN_EXTEND_INREG, T, Custom); |
138 | 657 | setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom); |
139 | 657 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal); |
140 | 657 | setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal); |
141 | 657 | |
142 | 657 | setOperationAction(ISD::LOAD, T, Custom); |
143 | 657 | setOperationAction(ISD::STORE, T, Custom); |
144 | 657 | setOperationAction(ISD::CTLZ, T, Custom); |
145 | 657 | setOperationAction(ISD::CTTZ, T, Custom); |
146 | 657 | setOperationAction(ISD::CTPOP, T, Custom); |
147 | 657 | |
148 | 657 | setOperationAction(ISD::ADD, T, Legal); |
149 | 657 | setOperationAction(ISD::SUB, T, Legal); |
150 | 657 | setOperationAction(ISD::MUL, T, Custom); |
151 | 657 | setOperationAction(ISD::MULHS, T, Custom); |
152 | 657 | setOperationAction(ISD::MULHU, T, Custom); |
153 | 657 | setOperationAction(ISD::AND, T, Custom); |
154 | 657 | setOperationAction(ISD::OR, T, Custom); |
155 | 657 | setOperationAction(ISD::XOR, T, Custom); |
156 | 657 | setOperationAction(ISD::SETCC, T, Custom); |
157 | 657 | setOperationAction(ISD::VSELECT, T, Custom); |
158 | 657 | if (T != ByteW) { |
159 | 438 | setOperationAction(ISD::SRA, T, Custom); |
160 | 438 | setOperationAction(ISD::SHL, T, Custom); |
161 | 438 | setOperationAction(ISD::SRL, T, Custom); |
162 | 438 | |
163 | 438 | // Promote all shuffles to operate on vectors of bytes. |
164 | 438 | setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW); |
165 | 438 | } |
166 | 657 | } |
167 | 219 | |
168 | 219 | // Boolean vectors. |
169 | 219 | |
170 | 657 | for (MVT T : LegalW) { |
171 | 657 | // Boolean types for vector pairs will overlap with the boolean |
172 | 657 | // types for single vectors, e.g. |
173 | 657 | // v64i8 -> v64i1 (single) |
174 | 657 | // v64i16 -> v64i1 (pair) |
175 | 657 | // Set these actions first, and allow the single actions to overwrite |
176 | 657 | // any duplicates. |
177 | 657 | MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements()); |
178 | 657 | setOperationAction(ISD::SETCC, BoolW, Custom); |
179 | 657 | setOperationAction(ISD::AND, BoolW, Custom); |
180 | 657 | setOperationAction(ISD::OR, BoolW, Custom); |
181 | 657 | setOperationAction(ISD::XOR, BoolW, Custom); |
182 | 657 | } |
183 | 219 | |
184 | 657 | for (MVT T : LegalV) { |
185 | 657 | MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements()); |
186 | 657 | setOperationAction(ISD::BUILD_VECTOR, BoolV, Custom); |
187 | 657 | setOperationAction(ISD::CONCAT_VECTORS, BoolV, Custom); |
188 | 657 | setOperationAction(ISD::INSERT_SUBVECTOR, BoolV, Custom); |
189 | 657 | setOperationAction(ISD::INSERT_VECTOR_ELT, BoolV, Custom); |
190 | 657 | setOperationAction(ISD::EXTRACT_SUBVECTOR, BoolV, Custom); |
191 | 657 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, BoolV, Custom); |
192 | 657 | setOperationAction(ISD::AND, BoolV, Legal); |
193 | 657 | setOperationAction(ISD::OR, BoolV, Legal); |
194 | 657 | setOperationAction(ISD::XOR, BoolV, Legal); |
195 | 657 | } |
196 | 219 | } |
197 | | |
198 | | SDValue |
199 | | HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops, |
200 | 0 | const SDLoc &dl, SelectionDAG &DAG) const { |
201 | 0 | SmallVector<SDValue,4> IntOps; |
202 | 0 | IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32)); |
203 | 0 | for (const SDValue &Op : Ops) |
204 | 0 | IntOps.push_back(Op); |
205 | 0 | return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps); |
206 | 0 | } |
207 | | |
208 | | MVT |
209 | 9 | HexagonTargetLowering::typeJoin(const TypePair &Tys) const { |
210 | 9 | assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType()); |
211 | 9 | |
212 | 9 | MVT ElemTy = Tys.first.getVectorElementType(); |
213 | 9 | return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() + |
214 | 9 | Tys.second.getVectorNumElements()); |
215 | 9 | } |
216 | | |
217 | | HexagonTargetLowering::TypePair |
218 | 994 | HexagonTargetLowering::typeSplit(MVT VecTy) const { |
219 | 994 | assert(VecTy.isVector()); |
220 | 994 | unsigned NumElem = VecTy.getVectorNumElements(); |
221 | 994 | assert((NumElem % 2) == 0 && "Expecting even-sized vector type"); |
222 | 994 | MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2); |
223 | 994 | return { HalfTy, HalfTy }; |
224 | 994 | } |
225 | | |
226 | | MVT |
227 | 2 | HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const { |
228 | 2 | MVT ElemTy = VecTy.getVectorElementType(); |
229 | 2 | MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor); |
230 | 2 | return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements()); |
231 | 2 | } |
232 | | |
233 | | MVT |
234 | 0 | HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const { |
235 | 0 | MVT ElemTy = VecTy.getVectorElementType(); |
236 | 0 | MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor); |
237 | 0 | return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements()); |
238 | 0 | } |
239 | | |
240 | | SDValue |
241 | | HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy, |
242 | 2 | SelectionDAG &DAG) const { |
243 | 2 | if (ty(Vec).getVectorElementType() == ElemTy) |
244 | 0 | return Vec; |
245 | 2 | MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy); |
246 | 2 | return DAG.getBitcast(CastTy, Vec); |
247 | 2 | } |
248 | | |
249 | | SDValue |
250 | | HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl, |
251 | 0 | SelectionDAG &DAG) const { |
252 | 0 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)), |
253 | 0 | Ops.second, Ops.first); |
254 | 0 | } |
255 | | |
256 | | HexagonTargetLowering::VectorPair |
257 | | HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl, |
258 | 236 | SelectionDAG &DAG) const { |
259 | 236 | TypePair Tys = typeSplit(ty(Vec)); |
260 | 236 | if (Vec.getOpcode() == HexagonISD::QCAT) |
261 | 0 | return VectorPair(Vec.getOperand(0), Vec.getOperand(1)); |
262 | 236 | return DAG.SplitVector(Vec, dl, Tys.first, Tys.second); |
263 | 236 | } |
264 | | |
265 | | bool |
266 | 0 | HexagonTargetLowering::isHvxSingleTy(MVT Ty) const { |
267 | 0 | return Subtarget.isHVXVectorType(Ty) && |
268 | 0 | Ty.getSizeInBits() == 8 * Subtarget.getVectorLength(); |
269 | 0 | } |
270 | | |
271 | | bool |
272 | 32.1k | HexagonTargetLowering::isHvxPairTy(MVT Ty) const { |
273 | 32.1k | return Subtarget.isHVXVectorType(Ty) && |
274 | 32.1k | Ty.getSizeInBits() == 16 * Subtarget.getVectorLength()5.53k ; |
275 | 32.1k | } |
276 | | |
277 | | SDValue |
278 | | HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy, |
279 | 240 | SelectionDAG &DAG) const { |
280 | 240 | if (ElemIdx.getValueType().getSimpleVT() != MVT::i32) |
281 | 0 | ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx); |
282 | 240 | |
283 | 240 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
284 | 240 | if (ElemWidth == 8) |
285 | 11 | return ElemIdx; |
286 | 229 | |
287 | 229 | unsigned L = Log2_32(ElemWidth/8); |
288 | 229 | const SDLoc &dl(ElemIdx); |
289 | 229 | return DAG.getNode(ISD::SHL, dl, MVT::i32, |
290 | 229 | {ElemIdx, DAG.getConstant(L, dl, MVT::i32)}); |
291 | 229 | } |
292 | | |
293 | | SDValue |
294 | | HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy, |
295 | 206 | SelectionDAG &DAG) const { |
296 | 206 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
297 | 206 | assert(ElemWidth >= 8 && ElemWidth <= 32); |
298 | 206 | if (ElemWidth == 32) |
299 | 0 | return Idx; |
300 | 206 | |
301 | 206 | if (ty(Idx) != MVT::i32) |
302 | 0 | Idx = DAG.getBitcast(MVT::i32, Idx); |
303 | 206 | const SDLoc &dl(Idx); |
304 | 206 | SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32); |
305 | 206 | SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask}); |
306 | 206 | return SubIdx; |
307 | 206 | } |
308 | | |
309 | | SDValue |
310 | | HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0, |
311 | | SDValue Op1, ArrayRef<int> Mask, |
312 | 2 | SelectionDAG &DAG) const { |
313 | 2 | MVT OpTy = ty(Op0); |
314 | 2 | assert(OpTy == ty(Op1)); |
315 | 2 | |
316 | 2 | MVT ElemTy = OpTy.getVectorElementType(); |
317 | 2 | if (ElemTy == MVT::i8) |
318 | 2 | return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask); |
319 | 0 | assert(ElemTy.getSizeInBits() >= 8); |
320 | 0 |
|
321 | 0 | MVT ResTy = tyVector(OpTy, MVT::i8); |
322 | 0 | unsigned ElemSize = ElemTy.getSizeInBits() / 8; |
323 | 0 |
|
324 | 0 | SmallVector<int,128> ByteMask; |
325 | 0 | for (int M : Mask) { |
326 | 0 | if (M < 0) { |
327 | 0 | for (unsigned I = 0; I != ElemSize; ++I) |
328 | 0 | ByteMask.push_back(-1); |
329 | 0 | } else { |
330 | 0 | int NewM = M*ElemSize; |
331 | 0 | for (unsigned I = 0; I != ElemSize; ++I) |
332 | 0 | ByteMask.push_back(NewM+I); |
333 | 0 | } |
334 | 0 | } |
335 | 0 | assert(ResTy.getVectorNumElements() == ByteMask.size()); |
336 | 0 | return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG), |
337 | 0 | opCastElem(Op1, MVT::i8, DAG), ByteMask); |
338 | 0 | } |
339 | | |
340 | | SDValue |
341 | | HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values, |
342 | | const SDLoc &dl, MVT VecTy, |
343 | 286 | SelectionDAG &DAG) const { |
344 | 286 | unsigned VecLen = Values.size(); |
345 | 286 | MachineFunction &MF = DAG.getMachineFunction(); |
346 | 286 | MVT ElemTy = VecTy.getVectorElementType(); |
347 | 286 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
348 | 286 | unsigned HwLen = Subtarget.getVectorLength(); |
349 | 286 | |
350 | 286 | unsigned ElemSize = ElemWidth / 8; |
351 | 286 | assert(ElemSize*VecLen == HwLen); |
352 | 286 | SmallVector<SDValue,32> Words; |
353 | 286 | |
354 | 286 | if (VecTy.getVectorElementType() != MVT::i32) { |
355 | 225 | assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size"); |
356 | 225 | unsigned OpsPerWord = (ElemSize == 1) ? 4197 : 228 ; |
357 | 225 | MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord); |
358 | 4.62k | for (unsigned i = 0; i != VecLen; i += OpsPerWord4.40k ) { |
359 | 4.40k | SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG); |
360 | 4.40k | Words.push_back(DAG.getBitcast(MVT::i32, W)); |
361 | 4.40k | } |
362 | 225 | } else { |
363 | 61 | Words.assign(Values.begin(), Values.end()); |
364 | 61 | } |
365 | 286 | |
366 | 286 | unsigned NumWords = Words.size(); |
367 | 286 | bool IsSplat = true, IsUndef = true; |
368 | 286 | SDValue SplatV; |
369 | 2.68k | for (unsigned i = 0; i != NumWords && IsSplat2.59k ; ++i2.39k ) { |
370 | 2.39k | if (isUndef(Words[i])) |
371 | 158 | continue; |
372 | 2.23k | IsUndef = false; |
373 | 2.23k | if (!SplatV.getNode()) |
374 | 286 | SplatV = Words[i]; |
375 | 1.95k | else if (SplatV != Words[i]) |
376 | 201 | IsSplat = false; |
377 | 2.23k | } |
378 | 286 | if (IsUndef) |
379 | 0 | return DAG.getUNDEF(VecTy); |
380 | 286 | if (IsSplat) { |
381 | 85 | assert(SplatV.getNode()); |
382 | 85 | auto *IdxN = dyn_cast<ConstantSDNode>(SplatV.getNode()); |
383 | 85 | if (IdxN && IdxN->isNullValue()80 ) |
384 | 45 | return getZero(dl, VecTy, DAG); |
385 | 40 | return DAG.getNode(HexagonISD::VSPLATW, dl, VecTy, SplatV); |
386 | 40 | } |
387 | 201 | |
388 | 201 | // Delay recognizing constant vectors until here, so that we can generate |
389 | 201 | // a vsplat. |
390 | 201 | SmallVector<ConstantInt*, 128> Consts(VecLen); |
391 | 201 | bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts); |
392 | 201 | if (AllConst) { |
393 | 181 | ArrayRef<Constant*> Tmp((Constant**)Consts.begin(), |
394 | 181 | (Constant**)Consts.end()); |
395 | 181 | Constant *CV = ConstantVector::get(Tmp); |
396 | 181 | unsigned Align = HwLen; |
397 | 181 | SDValue CP = LowerConstantPool(DAG.getConstantPool(CV, VecTy, Align), DAG); |
398 | 181 | return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP, |
399 | 181 | MachinePointerInfo::getConstantPool(MF), Align); |
400 | 181 | } |
401 | 20 | |
402 | 20 | // A special case is a situation where the vector is built entirely from |
403 | 20 | // elements extracted from another vector. This could be done via a shuffle |
404 | 20 | // more efficiently, but typically, the size of the source vector will not |
405 | 20 | // match the size of the vector being built (which precludes the use of a |
406 | 20 | // shuffle directly). |
407 | 20 | // This only handles a single source vector, and the vector being built |
408 | 20 | // should be of a sub-vector type of the source vector type. |
409 | 20 | auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec, |
410 | 20 | SmallVectorImpl<int> &SrcIdx) { |
411 | 20 | SDValue Vec; |
412 | 533 | for (SDValue V : Values) { |
413 | 533 | if (isUndef(V)) { |
414 | 206 | SrcIdx.push_back(-1); |
415 | 206 | continue; |
416 | 206 | } |
417 | 327 | if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
418 | 5 | return false; |
419 | 322 | // All extracts should come from the same vector. |
420 | 322 | SDValue T = V.getOperand(0); |
421 | 322 | if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode()307 ) |
422 | 0 | return false; |
423 | 322 | Vec = T; |
424 | 322 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1)); |
425 | 322 | if (C == nullptr) |
426 | 0 | return false; |
427 | 322 | int I = C->getSExtValue(); |
428 | 322 | assert(I >= 0 && "Negative element index"); |
429 | 322 | SrcIdx.push_back(I); |
430 | 322 | } |
431 | 20 | SrcVec = Vec; |
432 | 15 | return true; |
433 | 20 | }; |
434 | 20 | |
435 | 20 | SmallVector<int,128> ExtIdx; |
436 | 20 | SDValue ExtVec; |
437 | 20 | if (IsBuildFromExtracts(ExtVec, ExtIdx)) { |
438 | 15 | MVT ExtTy = ty(ExtVec); |
439 | 15 | unsigned ExtLen = ExtTy.getVectorNumElements(); |
440 | 15 | if (ExtLen == VecLen || ExtLen == 2*VecLen5 ) { |
441 | 14 | // Construct a new shuffle mask that will produce a vector with the same |
442 | 14 | // number of elements as the input vector, and such that the vector we |
443 | 14 | // want will be the initial subvector of it. |
444 | 14 | SmallVector<int,128> Mask; |
445 | 14 | BitVector Used(ExtLen); |
446 | 14 | |
447 | 512 | for (int M : ExtIdx) { |
448 | 512 | Mask.push_back(M); |
449 | 512 | if (M >= 0) |
450 | 320 | Used.set(M); |
451 | 512 | } |
452 | 14 | // Fill the rest of the mask with the unused elements of ExtVec in hopes |
453 | 14 | // that it will result in a permutation of ExtVec's elements. It's still |
454 | 14 | // fine if it doesn't (e.g. if undefs are present, or elements are |
455 | 14 | // repeated), but permutations can always be done efficiently via vdelta |
456 | 14 | // and vrdelta. |
457 | 268 | for (unsigned I = 0; I != ExtLen; ++I254 ) { |
458 | 266 | if (Mask.size() == ExtLen) |
459 | 12 | break; |
460 | 254 | if (!Used.test(I)) |
461 | 128 | Mask.push_back(I); |
462 | 254 | } |
463 | 14 | |
464 | 14 | SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec, |
465 | 14 | DAG.getUNDEF(ExtTy), Mask); |
466 | 14 | if (ExtLen == VecLen) |
467 | 10 | return S; |
468 | 4 | return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, VecTy, S); |
469 | 4 | } |
470 | 15 | } |
471 | 6 | |
472 | 6 | // Construct two halves in parallel, then or them together. |
473 | 6 | assert(4*Words.size() == Subtarget.getVectorLength()); |
474 | 6 | SDValue HalfV0 = getInstr(Hexagon::V6_vd0, dl, VecTy, {}, DAG); |
475 | 6 | SDValue HalfV1 = getInstr(Hexagon::V6_vd0, dl, VecTy, {}, DAG); |
476 | 6 | SDValue S = DAG.getConstant(4, dl, MVT::i32); |
477 | 62 | for (unsigned i = 0; i != NumWords/2; ++i56 ) { |
478 | 56 | SDValue N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, |
479 | 56 | {HalfV0, Words[i]}); |
480 | 56 | SDValue M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, |
481 | 56 | {HalfV1, Words[i+NumWords/2]}); |
482 | 56 | HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, S}); |
483 | 56 | HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, S}); |
484 | 56 | } |
485 | 6 | |
486 | 6 | HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, |
487 | 6 | {HalfV0, DAG.getConstant(HwLen/2, dl, MVT::i32)}); |
488 | 6 | SDValue DstV = DAG.getNode(ISD::OR, dl, VecTy, {HalfV0, HalfV1}); |
489 | 6 | return DstV; |
490 | 6 | } |
491 | | |
492 | | SDValue |
493 | | HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl, |
494 | 4 | unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const { |
495 | 4 | MVT PredTy = ty(PredV); |
496 | 4 | unsigned HwLen = Subtarget.getVectorLength(); |
497 | 4 | MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); |
498 | 4 | |
499 | 4 | if (Subtarget.isHVXVectorType(PredTy, true)) { |
500 | 0 | // Move the vector predicate SubV to a vector register, and scale it |
501 | 0 | // down to match the representation (bytes per type element) that VecV |
502 | 0 | // uses. The scaling down will pick every 2nd or 4th (every Scale-th |
503 | 0 | // in general) element and put them at the front of the resulting |
504 | 0 | // vector. This subvector will then be inserted into the Q2V of VecV. |
505 | 0 | // To avoid having an operation that generates an illegal type (short |
506 | 0 | // vector), generate a full size vector. |
507 | 0 | // |
508 | 0 | SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV); |
509 | 0 | SmallVector<int,128> Mask(HwLen); |
510 | 0 | // Scale = BitBytes(PredV) / Given BitBytes. |
511 | 0 | unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes); |
512 | 0 | unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes; |
513 | 0 |
|
514 | 0 | for (unsigned i = 0; i != HwLen; ++i) { |
515 | 0 | unsigned Num = i % Scale; |
516 | 0 | unsigned Off = i / Scale; |
517 | 0 | Mask[BlockLen*Num + Off] = i; |
518 | 0 | } |
519 | 0 | SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask); |
520 | 0 | if (!ZeroFill) |
521 | 0 | return S; |
522 | 0 | // Fill the bytes beyond BlockLen with 0s. |
523 | 0 | MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); |
524 | 0 | SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, |
525 | 0 | {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG); |
526 | 0 | SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q); |
527 | 0 | return DAG.getNode(ISD::AND, dl, ByteTy, S, M); |
528 | 0 | } |
529 | 4 | |
530 | 4 | // Make sure that this is a valid scalar predicate. |
531 | 4 | assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1); |
532 | 4 | |
533 | 4 | unsigned Bytes = 8 / PredTy.getVectorNumElements(); |
534 | 4 | SmallVector<SDValue,4> Words[2]; |
535 | 4 | unsigned IdxW = 0; |
536 | 4 | |
537 | 28 | auto Lo32 = [&DAG, &dl] (SDValue P) { |
538 | 28 | return DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, P); |
539 | 28 | }; |
540 | 28 | auto Hi32 = [&DAG, &dl] (SDValue P) { |
541 | 28 | return DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, P); |
542 | 28 | }; |
543 | 4 | |
544 | 4 | SDValue W0 = isUndef(PredV) |
545 | 4 | ? DAG.getUNDEF(MVT::i64)2 |
546 | 4 | : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV)2 ; |
547 | 4 | Words[IdxW].push_back(Hi32(W0)); |
548 | 4 | Words[IdxW].push_back(Lo32(W0)); |
549 | 4 | |
550 | 12 | while (Bytes < BitBytes) { |
551 | 8 | IdxW ^= 1; |
552 | 8 | Words[IdxW].clear(); |
553 | 8 | |
554 | 8 | if (Bytes < 4) { |
555 | 24 | for (const SDValue &W : Words[IdxW ^ 1]) { |
556 | 24 | SDValue T = expandPredicate(W, dl, DAG); |
557 | 24 | Words[IdxW].push_back(Hi32(T)); |
558 | 24 | Words[IdxW].push_back(Lo32(T)); |
559 | 24 | } |
560 | 8 | } else { |
561 | 0 | for (const SDValue &W : Words[IdxW ^ 1]) { |
562 | 0 | Words[IdxW].push_back(W); |
563 | 0 | Words[IdxW].push_back(W); |
564 | 0 | } |
565 | 0 | } |
566 | 8 | Bytes *= 2; |
567 | 8 | } |
568 | 4 | |
569 | 4 | assert(Bytes == BitBytes); |
570 | 4 | |
571 | 4 | SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy)0 ; |
572 | 4 | SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32); |
573 | 32 | for (const SDValue &W : Words[IdxW]) { |
574 | 32 | Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4); |
575 | 32 | Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W); |
576 | 32 | } |
577 | 4 | |
578 | 4 | return Vec; |
579 | 4 | } |
580 | | |
581 | | SDValue |
582 | | HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values, |
583 | | const SDLoc &dl, MVT VecTy, |
584 | 35 | SelectionDAG &DAG) const { |
585 | 35 | // Construct a vector V of bytes, such that a comparison V >u 0 would |
586 | 35 | // produce the required vector predicate. |
587 | 35 | unsigned VecLen = Values.size(); |
588 | 35 | unsigned HwLen = Subtarget.getVectorLength(); |
589 | 35 | assert(VecLen <= HwLen || VecLen == 8*HwLen); |
590 | 35 | SmallVector<SDValue,128> Bytes; |
591 | 35 | bool AllT = true, AllF = true; |
592 | 35 | |
593 | 1.82k | auto IsTrue = [] (SDValue V) { |
594 | 1.82k | if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode())) |
595 | 1.76k | return !N->isNullValue(); |
596 | 64 | return false; |
597 | 64 | }; |
598 | 1.82k | auto IsFalse = [] (SDValue V) { |
599 | 1.82k | if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode())) |
600 | 1.76k | return N->isNullValue(); |
601 | 64 | return false; |
602 | 64 | }; |
603 | 35 | |
604 | 35 | if (VecLen <= HwLen) { |
605 | 35 | // In the hardware, each bit of a vector predicate corresponds to a byte |
606 | 35 | // of a vector register. Calculate how many bytes does a bit of VecTy |
607 | 35 | // correspond to. |
608 | 35 | assert(HwLen % VecLen == 0); |
609 | 35 | unsigned BitBytes = HwLen / VecLen; |
610 | 1.82k | for (SDValue V : Values) { |
611 | 1.82k | AllT &= IsTrue(V); |
612 | 1.82k | AllF &= IsFalse(V); |
613 | 1.82k | |
614 | 1.82k | SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8)1.79k |
615 | 1.82k | : DAG.getUNDEF(MVT::i8)32 ; |
616 | 5.02k | for (unsigned B = 0; B != BitBytes; ++B3.20k ) |
617 | 3.20k | Bytes.push_back(Ext); |
618 | 1.82k | } |
619 | 35 | } else { |
620 | 0 | // There are as many i1 values, as there are bits in a vector register. |
621 | 0 | // Divide the values into groups of 8 and check that each group consists |
622 | 0 | // of the same value (ignoring undefs). |
623 | 0 | for (unsigned I = 0; I != VecLen; I += 8) { |
624 | 0 | unsigned B = 0; |
625 | 0 | // Find the first non-undef value in this group. |
626 | 0 | for (; B != 8; ++B) { |
627 | 0 | if (!Values[I+B].isUndef()) |
628 | 0 | break; |
629 | 0 | } |
630 | 0 | SDValue F = Values[I+B]; |
631 | 0 | AllT &= IsTrue(F); |
632 | 0 | AllF &= IsFalse(F); |
633 | 0 |
|
634 | 0 | SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8) |
635 | 0 | : DAG.getUNDEF(MVT::i8); |
636 | 0 | Bytes.push_back(Ext); |
637 | 0 | // Verify that the rest of values in the group are the same as the |
638 | 0 | // first. |
639 | 0 | for (; B != 8; ++B) |
640 | 0 | assert(Values[I+B].isUndef() || Values[I+B] == F); |
641 | 0 | } |
642 | 0 | } |
643 | 35 | |
644 | 35 | if (AllT) |
645 | 31 | return DAG.getNode(HexagonISD::QTRUE, dl, VecTy); |
646 | 4 | if (AllF) |
647 | 2 | return DAG.getNode(HexagonISD::QFALSE, dl, VecTy); |
648 | 2 | |
649 | 2 | MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); |
650 | 2 | SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG); |
651 | 2 | return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec); |
652 | 2 | } |
653 | | |
654 | | SDValue |
655 | | HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV, |
656 | 240 | const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { |
657 | 240 | MVT ElemTy = ty(VecV).getVectorElementType(); |
658 | 240 | |
659 | 240 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
660 | 240 | assert(ElemWidth >= 8 && ElemWidth <= 32); |
661 | 240 | (void)ElemWidth; |
662 | 240 | |
663 | 240 | SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG); |
664 | 240 | SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, |
665 | 240 | {VecV, ByteIdx}); |
666 | 240 | if (ElemTy == MVT::i32) |
667 | 34 | return ExWord; |
668 | 206 | |
669 | 206 | // Have an extracted word, need to extract the smaller element out of it. |
670 | 206 | // 1. Extract the bits of (the original) IdxV that correspond to the index |
671 | 206 | // of the desired element in the 32-bit word. |
672 | 206 | SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG); |
673 | 206 | // 2. Extract the element from the word. |
674 | 206 | SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord); |
675 | 206 | return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG); |
676 | 206 | } |
677 | | |
678 | | SDValue |
679 | | HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV, |
680 | 2 | const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { |
681 | 2 | // Implement other return types if necessary. |
682 | 2 | assert(ResTy == MVT::i1); |
683 | 2 | |
684 | 2 | unsigned HwLen = Subtarget.getVectorLength(); |
685 | 2 | MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); |
686 | 2 | SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); |
687 | 2 | |
688 | 2 | unsigned Scale = HwLen / ty(VecV).getVectorNumElements(); |
689 | 2 | SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32); |
690 | 2 | IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV); |
691 | 2 | |
692 | 2 | SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG); |
693 | 2 | SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32); |
694 | 2 | return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG); |
695 | 2 | } |
696 | | |
697 | | SDValue |
698 | | HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV, |
699 | 0 | SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const { |
700 | 0 | MVT ElemTy = ty(VecV).getVectorElementType(); |
701 | 0 |
|
702 | 0 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
703 | 0 | assert(ElemWidth >= 8 && ElemWidth <= 32); |
704 | 0 | (void)ElemWidth; |
705 | 0 |
|
706 | 0 | auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV, |
707 | 0 | SDValue ByteIdxV) { |
708 | 0 | MVT VecTy = ty(VecV); |
709 | 0 | unsigned HwLen = Subtarget.getVectorLength(); |
710 | 0 | SDValue MaskV = DAG.getNode(ISD::AND, dl, MVT::i32, |
711 | 0 | {ByteIdxV, DAG.getConstant(-4, dl, MVT::i32)}); |
712 | 0 | SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV}); |
713 | 0 | SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV}); |
714 | 0 | SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32, |
715 | 0 | {DAG.getConstant(HwLen, dl, MVT::i32), MaskV}); |
716 | 0 | SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV}); |
717 | 0 | return TorV; |
718 | 0 | }; |
719 | 0 |
|
720 | 0 | SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG); |
721 | 0 | if (ElemTy == MVT::i32) |
722 | 0 | return InsertWord(VecV, ValV, ByteIdx); |
723 | 0 | |
724 | 0 | // If this is not inserting a 32-bit word, convert it into such a thing. |
725 | 0 | // 1. Extract the existing word from the target vector. |
726 | 0 | SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32, |
727 | 0 | {ByteIdx, DAG.getConstant(2, dl, MVT::i32)}); |
728 | 0 | SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx, |
729 | 0 | dl, MVT::i32, DAG); |
730 | 0 |
|
731 | 0 | // 2. Treating the extracted word as a 32-bit vector, insert the given |
732 | 0 | // value into it. |
733 | 0 | SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG); |
734 | 0 | MVT SubVecTy = tyVector(ty(Ext), ElemTy); |
735 | 0 | SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext), |
736 | 0 | ValV, SubIdx, dl, ElemTy, DAG); |
737 | 0 |
|
738 | 0 | // 3. Insert the 32-bit word back into the original vector. |
739 | 0 | return InsertWord(VecV, Ins, ByteIdx); |
740 | 0 | } |
741 | | |
742 | | SDValue |
743 | | HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV, |
744 | 0 | SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const { |
745 | 0 | unsigned HwLen = Subtarget.getVectorLength(); |
746 | 0 | MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); |
747 | 0 | SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); |
748 | 0 |
|
749 | 0 | unsigned Scale = HwLen / ty(VecV).getVectorNumElements(); |
750 | 0 | SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32); |
751 | 0 | IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV); |
752 | 0 | ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV); |
753 | 0 |
|
754 | 0 | SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG); |
755 | 0 | return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV); |
756 | 0 | } |
757 | | |
758 | | SDValue |
759 | | HexagonTargetLowering::extractHvxSubvectorReg(SDValue VecV, SDValue IdxV, |
760 | 396 | const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { |
761 | 396 | MVT VecTy = ty(VecV); |
762 | 396 | unsigned HwLen = Subtarget.getVectorLength(); |
763 | 396 | unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue(); |
764 | 396 | MVT ElemTy = VecTy.getVectorElementType(); |
765 | 396 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
766 | 396 | |
767 | 396 | // If the source vector is a vector pair, get the single vector containing |
768 | 396 | // the subvector of interest. The subvector will never overlap two single |
769 | 396 | // vectors. |
770 | 396 | if (isHvxPairTy(VecTy)) { |
771 | 394 | unsigned SubIdx; |
772 | 394 | if (Idx * ElemWidth >= 8*HwLen) { |
773 | 197 | SubIdx = Hexagon::vsub_hi; |
774 | 197 | Idx -= VecTy.getVectorNumElements() / 2; |
775 | 197 | } else { |
776 | 197 | SubIdx = Hexagon::vsub_lo; |
777 | 197 | } |
778 | 394 | VecTy = typeSplit(VecTy).first; |
779 | 394 | VecV = DAG.getTargetExtractSubreg(SubIdx, dl, VecTy, VecV); |
780 | 394 | if (VecTy == ResTy) |
781 | 394 | return VecV; |
782 | 2 | } |
783 | 2 | |
784 | 2 | // The only meaningful subvectors of a single HVX vector are those that |
785 | 2 | // fit in a scalar register. |
786 | 2 | assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64); |
787 | 2 | |
788 | 2 | MVT WordTy = tyVector(VecTy, MVT::i32); |
789 | 2 | SDValue WordVec = DAG.getBitcast(WordTy, VecV); |
790 | 2 | unsigned WordIdx = (Idx*ElemWidth) / 32; |
791 | 2 | |
792 | 2 | SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32); |
793 | 2 | SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG); |
794 | 2 | if (ResTy.getSizeInBits() == 32) |
795 | 0 | return DAG.getBitcast(ResTy, W0); |
796 | 2 | |
797 | 2 | SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32); |
798 | 2 | SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG); |
799 | 2 | SDValue WW = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64, {W1, W0}); |
800 | 2 | return DAG.getBitcast(ResTy, WW); |
801 | 2 | } |
802 | | |
803 | | SDValue |
804 | | HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV, |
805 | 1 | const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { |
806 | 1 | MVT VecTy = ty(VecV); |
807 | 1 | unsigned HwLen = Subtarget.getVectorLength(); |
808 | 1 | MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); |
809 | 1 | SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); |
810 | 1 | // IdxV is required to be a constant. |
811 | 1 | unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue(); |
812 | 1 | |
813 | 1 | unsigned ResLen = ResTy.getVectorNumElements(); |
814 | 1 | unsigned BitBytes = HwLen / VecTy.getVectorNumElements(); |
815 | 1 | unsigned Offset = Idx * BitBytes; |
816 | 1 | SDValue Undef = DAG.getUNDEF(ByteTy); |
817 | 1 | SmallVector<int,128> Mask; |
818 | 1 | |
819 | 1 | if (Subtarget.isHVXVectorType(ResTy, true)) { |
820 | 0 | // Converting between two vector predicates. Since the result is shorter |
821 | 0 | // than the source, it will correspond to a vector predicate with the |
822 | 0 | // relevant bits replicated. The replication count is the ratio of the |
823 | 0 | // source and target vector lengths. |
824 | 0 | unsigned Rep = VecTy.getVectorNumElements() / ResLen; |
825 | 0 | assert(isPowerOf2_32(Rep) && HwLen % Rep == 0); |
826 | 0 | for (unsigned i = 0; i != HwLen/Rep; ++i) { |
827 | 0 | for (unsigned j = 0; j != Rep; ++j) |
828 | 0 | Mask.push_back(i + Offset); |
829 | 0 | } |
830 | 0 | SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask); |
831 | 0 | return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV); |
832 | 0 | } |
833 | 1 | |
834 | 1 | // Converting between a vector predicate and a scalar predicate. In the |
835 | 1 | // vector predicate, a group of BitBytes bits will correspond to a single |
836 | 1 | // i1 element of the source vector type. Those bits will all have the same |
837 | 1 | // value. The same will be true for ByteVec, where each byte corresponds |
838 | 1 | // to a bit in the vector predicate. |
839 | 1 | // The algorithm is to traverse the ByteVec, going over the i1 values from |
840 | 1 | // the source vector, and generate the corresponding representation in an |
841 | 1 | // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the |
842 | 1 | // elements so that the interesting 8 bytes will be in the low end of the |
843 | 1 | // vector. |
844 | 1 | unsigned Rep = 8 / ResLen; |
845 | 1 | // Make sure the output fill the entire vector register, so repeat the |
846 | 1 | // 8-byte groups as many times as necessary. |
847 | 9 | for (unsigned r = 0; r != HwLen/ResLen; ++r8 ) { |
848 | 8 | // This will generate the indexes of the 8 interesting bytes. |
849 | 72 | for (unsigned i = 0; i != ResLen; ++i64 ) { |
850 | 128 | for (unsigned j = 0; j != Rep; ++j64 ) |
851 | 64 | Mask.push_back(Offset + i*BitBytes); |
852 | 64 | } |
853 | 8 | } |
854 | 1 | |
855 | 1 | SDValue Zero = getZero(dl, MVT::i32, DAG); |
856 | 1 | SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask); |
857 | 1 | // Combine the two low words from ShuffV into a v8i8, and byte-compare |
858 | 1 | // them against 0. |
859 | 1 | SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero}); |
860 | 1 | SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, |
861 | 1 | {ShuffV, DAG.getConstant(4, dl, MVT::i32)}); |
862 | 1 | SDValue Vec64 = DAG.getNode(HexagonISD::COMBINE, dl, MVT::v8i8, {W1, W0}); |
863 | 1 | return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy, |
864 | 1 | {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG); |
865 | 1 | } |
866 | | |
867 | | SDValue |
868 | | HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV, |
869 | 0 | SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const { |
870 | 0 | MVT VecTy = ty(VecV); |
871 | 0 | MVT SubTy = ty(SubV); |
872 | 0 | unsigned HwLen = Subtarget.getVectorLength(); |
873 | 0 | MVT ElemTy = VecTy.getVectorElementType(); |
874 | 0 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
875 | 0 |
|
876 | 0 | bool IsPair = isHvxPairTy(VecTy); |
877 | 0 | MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth); |
878 | 0 | // The two single vectors that VecV consists of, if it's a pair. |
879 | 0 | SDValue V0, V1; |
880 | 0 | SDValue SingleV = VecV; |
881 | 0 | SDValue PickHi; |
882 | 0 |
|
883 | 0 | if (IsPair) { |
884 | 0 | V0 = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, VecV); |
885 | 0 | V1 = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, VecV); |
886 | 0 |
|
887 | 0 | SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(), |
888 | 0 | dl, MVT::i32); |
889 | 0 | PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT); |
890 | 0 | if (isHvxSingleTy(SubTy)) { |
891 | 0 | if (const auto *CN = dyn_cast<const ConstantSDNode>(IdxV.getNode())) { |
892 | 0 | unsigned Idx = CN->getZExtValue(); |
893 | 0 | assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2); |
894 | 0 | unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi; |
895 | 0 | return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV); |
896 | 0 | } |
897 | 0 | // If IdxV is not a constant, generate the two variants: with the |
898 | 0 | // SubV as the high and as the low subregister, and select the right |
899 | 0 | // pair based on the IdxV. |
900 | 0 | SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1}); |
901 | 0 | SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV}); |
902 | 0 | return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo); |
903 | 0 | } |
904 | 0 | // The subvector being inserted must be entirely contained in one of |
905 | 0 | // the vectors V0 or V1. Set SingleV to the correct one, and update |
906 | 0 | // IdxV to be the index relative to the beginning of that vector. |
907 | 0 | SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV); |
908 | 0 | IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV); |
909 | 0 | SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0); |
910 | 0 | } |
911 | 0 |
|
912 | 0 | // The only meaningful subvectors of a single HVX vector are those that |
913 | 0 | // fit in a scalar register. |
914 | 0 | assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64); |
915 | 0 | // Convert IdxV to be index in bytes. |
916 | 0 | auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode()); |
917 | 0 | if (!IdxN || !IdxN->isNullValue()) { |
918 | 0 | IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, |
919 | 0 | DAG.getConstant(ElemWidth/8, dl, MVT::i32)); |
920 | 0 | SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV); |
921 | 0 | } |
922 | 0 | // When inserting a single word, the rotation back to the original position |
923 | 0 | // would be by HwLen-Idx, but if two words are inserted, it will need to be |
924 | 0 | // by (HwLen-4)-Idx. |
925 | 0 | unsigned RolBase = HwLen; |
926 | 0 | if (VecTy.getSizeInBits() == 32) { |
927 | 0 | SDValue V = DAG.getBitcast(MVT::i32, SubV); |
928 | 0 | SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, V); |
929 | 0 | } else { |
930 | 0 | SDValue V = DAG.getBitcast(MVT::i64, SubV); |
931 | 0 | SDValue R0 = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, V); |
932 | 0 | SDValue R1 = DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, V); |
933 | 0 | SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0); |
934 | 0 | SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, |
935 | 0 | DAG.getConstant(4, dl, MVT::i32)); |
936 | 0 | SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1); |
937 | 0 | RolBase = HwLen-4; |
938 | 0 | } |
939 | 0 | // If the vector wasn't ror'ed, don't ror it back. |
940 | 0 | if (RolBase != 4 || !IdxN || !IdxN->isNullValue()) { |
941 | 0 | SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32, |
942 | 0 | DAG.getConstant(RolBase, dl, MVT::i32), IdxV); |
943 | 0 | SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV); |
944 | 0 | } |
945 | 0 |
|
946 | 0 | if (IsPair) { |
947 | 0 | SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1}); |
948 | 0 | SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV}); |
949 | 0 | return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo); |
950 | 0 | } |
951 | 0 | return SingleV; |
952 | 0 | } |
953 | | |
954 | | SDValue |
955 | | HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV, |
956 | 0 | SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const { |
957 | 0 | MVT VecTy = ty(VecV); |
958 | 0 | MVT SubTy = ty(SubV); |
959 | 0 | assert(Subtarget.isHVXVectorType(VecTy, true)); |
960 | 0 | // VecV is an HVX vector predicate. SubV may be either an HVX vector |
961 | 0 | // predicate as well, or it can be a scalar predicate. |
962 | 0 |
|
963 | 0 | unsigned VecLen = VecTy.getVectorNumElements(); |
964 | 0 | unsigned HwLen = Subtarget.getVectorLength(); |
965 | 0 | assert(HwLen % VecLen == 0 && "Unexpected vector type"); |
966 | 0 |
|
967 | 0 | unsigned Scale = VecLen / SubTy.getVectorNumElements(); |
968 | 0 | unsigned BitBytes = HwLen / VecLen; |
969 | 0 | unsigned BlockLen = HwLen / Scale; |
970 | 0 |
|
971 | 0 | MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); |
972 | 0 | SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); |
973 | 0 | SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG); |
974 | 0 | SDValue ByteIdx; |
975 | 0 |
|
976 | 0 | auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode()); |
977 | 0 | if (!IdxN || !IdxN->isNullValue()) { |
978 | 0 | ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, |
979 | 0 | DAG.getConstant(BitBytes, dl, MVT::i32)); |
980 | 0 | ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx); |
981 | 0 | } |
982 | 0 |
|
983 | 0 | // ByteVec is the target vector VecV rotated in such a way that the |
984 | 0 | // subvector should be inserted at index 0. Generate a predicate mask |
985 | 0 | // and use vmux to do the insertion. |
986 | 0 | MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); |
987 | 0 | SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, |
988 | 0 | {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG); |
989 | 0 | ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG); |
990 | 0 | // Rotate ByteVec back, and convert to a vector predicate. |
991 | 0 | if (!IdxN || !IdxN->isNullValue()) { |
992 | 0 | SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32); |
993 | 0 | SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx); |
994 | 0 | ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi); |
995 | 0 | } |
996 | 0 | return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec); |
997 | 0 | } |
998 | | |
999 | | SDValue |
1000 | | HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl, |
1001 | 8 | MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const { |
1002 | 8 | // Sign- and any-extending of a vector predicate to a vector register is |
1003 | 8 | // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and |
1004 | 8 | // a vector of 1s (where the 1s are of type matching the vector type). |
1005 | 8 | assert(Subtarget.isHVXVectorType(ResTy)); |
1006 | 8 | if (!ZeroExt) |
1007 | 2 | return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV); |
1008 | 6 | |
1009 | 6 | assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements()); |
1010 | 6 | SDValue True = DAG.getNode(HexagonISD::VSPLAT, dl, ResTy, |
1011 | 6 | DAG.getConstant(1, dl, MVT::i32)); |
1012 | 6 | SDValue False = getZero(dl, ResTy, DAG); |
1013 | 6 | return DAG.getSelect(dl, ResTy, VecV, True, False); |
1014 | 6 | } |
1015 | | |
1016 | | SDValue |
1017 | | HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) |
1018 | 306 | const { |
1019 | 306 | const SDLoc &dl(Op); |
1020 | 306 | MVT VecTy = ty(Op); |
1021 | 306 | |
1022 | 306 | unsigned Size = Op.getNumOperands(); |
1023 | 306 | SmallVector<SDValue,128> Ops; |
1024 | 19.7k | for (unsigned i = 0; i != Size; ++i19.4k ) |
1025 | 19.4k | Ops.push_back(Op.getOperand(i)); |
1026 | 306 | |
1027 | 306 | if (VecTy.getVectorElementType() == MVT::i1) |
1028 | 35 | return buildHvxVectorPred(Ops, dl, VecTy, DAG); |
1029 | 271 | |
1030 | 271 | if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) { |
1031 | 13 | ArrayRef<SDValue> A(Ops); |
1032 | 13 | MVT SingleTy = typeSplit(VecTy).first; |
1033 | 13 | SDValue V0 = buildHvxVectorReg(A.take_front(Size/2), dl, SingleTy, DAG); |
1034 | 13 | SDValue V1 = buildHvxVectorReg(A.drop_front(Size/2), dl, SingleTy, DAG); |
1035 | 13 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1); |
1036 | 13 | } |
1037 | 258 | |
1038 | 258 | return buildHvxVectorReg(Ops, dl, VecTy, DAG); |
1039 | 258 | } |
1040 | | |
1041 | | SDValue |
1042 | | HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) |
1043 | 370 | const { |
1044 | 370 | // Vector concatenation of two integer (non-bool) vectors does not need |
1045 | 370 | // special lowering. Custom-lower concats of bool vectors and expand |
1046 | 370 | // concats of more than 2 vectors. |
1047 | 370 | MVT VecTy = ty(Op); |
1048 | 370 | const SDLoc &dl(Op); |
1049 | 370 | unsigned NumOp = Op.getNumOperands(); |
1050 | 370 | if (VecTy.getVectorElementType() != MVT::i1) { |
1051 | 368 | if (NumOp == 2) |
1052 | 366 | return Op; |
1053 | 2 | // Expand the other cases into a build-vector. |
1054 | 2 | SmallVector<SDValue,8> Elems; |
1055 | 2 | for (SDValue V : Op.getNode()->ops()) |
1056 | 16 | DAG.ExtractVectorElements(V, Elems); |
1057 | 2 | // A vector of i16 will be broken up into a build_vector of i16's. |
1058 | 2 | // This is a problem, since at the time of operation legalization, |
1059 | 2 | // all operations are expected to be type-legalized, and i16 is not |
1060 | 2 | // a legal type. If any of the extracted elements is not of a valid |
1061 | 2 | // type, sign-extend it to a valid one. |
1062 | 50 | for (unsigned i = 0, e = Elems.size(); i != e; ++i48 ) { |
1063 | 48 | SDValue V = Elems[i]; |
1064 | 48 | MVT Ty = ty(V); |
1065 | 48 | if (!isTypeLegal(Ty)) { |
1066 | 32 | EVT NTy = getTypeToTransformTo(*DAG.getContext(), Ty); |
1067 | 32 | if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { |
1068 | 4 | Elems[i] = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy, |
1069 | 4 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy, |
1070 | 4 | V.getOperand(0), V.getOperand(1)), |
1071 | 4 | DAG.getValueType(Ty)); |
1072 | 4 | continue; |
1073 | 4 | } |
1074 | 28 | // A few less complicated cases. |
1075 | 28 | if (V.getOpcode() == ISD::Constant) |
1076 | 0 | Elems[i] = DAG.getSExtOrTrunc(V, dl, NTy); |
1077 | 28 | else if (V.isUndef()) |
1078 | 28 | Elems[i] = DAG.getUNDEF(NTy); |
1079 | 28 | else |
1080 | 28 | llvm_unreachable0 ("Unexpected vector element"); |
1081 | 28 | } |
1082 | 48 | } |
1083 | 2 | return DAG.getBuildVector(VecTy, dl, Elems); |
1084 | 2 | } |
1085 | 2 | |
1086 | 2 | assert(VecTy.getVectorElementType() == MVT::i1); |
1087 | 2 | unsigned HwLen = Subtarget.getVectorLength(); |
1088 | 2 | assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0); |
1089 | 2 | |
1090 | 2 | SDValue Op0 = Op.getOperand(0); |
1091 | 2 | |
1092 | 2 | // If the operands are HVX types (i.e. not scalar predicates), then |
1093 | 2 | // defer the concatenation, and create QCAT instead. |
1094 | 2 | if (Subtarget.isHVXVectorType(ty(Op0), true)) { |
1095 | 0 | if (NumOp == 2) |
1096 | 0 | return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1)); |
1097 | 0 | |
1098 | 0 | ArrayRef<SDUse> U(Op.getNode()->ops()); |
1099 | 0 | SmallVector<SDValue,4> SV(U.begin(), U.end()); |
1100 | 0 | ArrayRef<SDValue> Ops(SV); |
1101 | 0 |
|
1102 | 0 | MVT HalfTy = typeSplit(VecTy).first; |
1103 | 0 | SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy, |
1104 | 0 | Ops.take_front(NumOp/2)); |
1105 | 0 | SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy, |
1106 | 0 | Ops.take_back(NumOp/2)); |
1107 | 0 | return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1); |
1108 | 0 | } |
1109 | 2 | |
1110 | 2 | // Count how many bytes (in a vector register) each bit in VecTy |
1111 | 2 | // corresponds to. |
1112 | 2 | unsigned BitBytes = HwLen / VecTy.getVectorNumElements(); |
1113 | 2 | |
1114 | 2 | SmallVector<SDValue,8> Prefixes; |
1115 | 4 | for (SDValue V : Op.getNode()->op_values()) { |
1116 | 4 | SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG); |
1117 | 4 | Prefixes.push_back(P); |
1118 | 4 | } |
1119 | 2 | |
1120 | 2 | unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements(); |
1121 | 2 | MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); |
1122 | 2 | SDValue S = DAG.getConstant(InpLen*BitBytes, dl, MVT::i32); |
1123 | 2 | SDValue Res = getZero(dl, ByteTy, DAG); |
1124 | 6 | for (unsigned i = 0, e = Prefixes.size(); i != e; ++i4 ) { |
1125 | 4 | Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S); |
1126 | 4 | Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]); |
1127 | 4 | } |
1128 | 2 | return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res); |
1129 | 2 | } |
1130 | | |
1131 | | SDValue |
1132 | | HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) |
1133 | 236 | const { |
1134 | 236 | // Change the type of the extracted element to i32. |
1135 | 236 | SDValue VecV = Op.getOperand(0); |
1136 | 236 | MVT ElemTy = ty(VecV).getVectorElementType(); |
1137 | 236 | const SDLoc &dl(Op); |
1138 | 236 | SDValue IdxV = Op.getOperand(1); |
1139 | 236 | if (ElemTy == MVT::i1) |
1140 | 2 | return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG); |
1141 | 234 | |
1142 | 234 | return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG); |
1143 | 234 | } |
1144 | | |
1145 | | SDValue |
1146 | | HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) |
1147 | 0 | const { |
1148 | 0 | const SDLoc &dl(Op); |
1149 | 0 | SDValue VecV = Op.getOperand(0); |
1150 | 0 | SDValue ValV = Op.getOperand(1); |
1151 | 0 | SDValue IdxV = Op.getOperand(2); |
1152 | 0 | MVT ElemTy = ty(VecV).getVectorElementType(); |
1153 | 0 | if (ElemTy == MVT::i1) |
1154 | 0 | return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG); |
1155 | 0 | |
1156 | 0 | return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG); |
1157 | 0 | } |
1158 | | |
1159 | | SDValue |
1160 | | HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) |
1161 | 397 | const { |
1162 | 397 | SDValue SrcV = Op.getOperand(0); |
1163 | 397 | MVT SrcTy = ty(SrcV); |
1164 | 397 | MVT DstTy = ty(Op); |
1165 | 397 | SDValue IdxV = Op.getOperand(1); |
1166 | 397 | unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue(); |
1167 | 397 | assert(Idx % DstTy.getVectorNumElements() == 0); |
1168 | 397 | (void)Idx; |
1169 | 397 | const SDLoc &dl(Op); |
1170 | 397 | |
1171 | 397 | MVT ElemTy = SrcTy.getVectorElementType(); |
1172 | 397 | if (ElemTy == MVT::i1) |
1173 | 1 | return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG); |
1174 | 396 | |
1175 | 396 | return extractHvxSubvectorReg(SrcV, IdxV, dl, DstTy, DAG); |
1176 | 396 | } |
1177 | | |
1178 | | SDValue |
1179 | | HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) |
1180 | 0 | const { |
1181 | 0 | // Idx does not need to be a constant. |
1182 | 0 | SDValue VecV = Op.getOperand(0); |
1183 | 0 | SDValue ValV = Op.getOperand(1); |
1184 | 0 | SDValue IdxV = Op.getOperand(2); |
1185 | 0 |
|
1186 | 0 | const SDLoc &dl(Op); |
1187 | 0 | MVT VecTy = ty(VecV); |
1188 | 0 | MVT ElemTy = VecTy.getVectorElementType(); |
1189 | 0 | if (ElemTy == MVT::i1) |
1190 | 0 | return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG); |
1191 | 0 | |
1192 | 0 | return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG); |
1193 | 0 | } |
1194 | | |
1195 | | SDValue |
1196 | 6 | HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const { |
1197 | 6 | // Lower any-extends of boolean vectors to sign-extends, since they |
1198 | 6 | // translate directly to Q2V. Zero-extending could also be done equally |
1199 | 6 | // fast, but Q2V is used/recognized in more places. |
1200 | 6 | // For all other vectors, use zero-extend. |
1201 | 6 | MVT ResTy = ty(Op); |
1202 | 6 | SDValue InpV = Op.getOperand(0); |
1203 | 6 | MVT ElemTy = ty(InpV).getVectorElementType(); |
1204 | 6 | if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy)0 ) |
1205 | 0 | return LowerHvxSignExt(Op, DAG); |
1206 | 6 | return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV); |
1207 | 6 | } |
1208 | | |
1209 | | SDValue |
1210 | 18 | HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const { |
1211 | 18 | MVT ResTy = ty(Op); |
1212 | 18 | SDValue InpV = Op.getOperand(0); |
1213 | 18 | MVT ElemTy = ty(InpV).getVectorElementType(); |
1214 | 18 | if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy)2 ) |
1215 | 2 | return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG); |
1216 | 16 | return Op; |
1217 | 16 | } |
1218 | | |
1219 | | SDValue |
1220 | 40 | HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const { |
1221 | 40 | MVT ResTy = ty(Op); |
1222 | 40 | SDValue InpV = Op.getOperand(0); |
1223 | 40 | MVT ElemTy = ty(InpV).getVectorElementType(); |
1224 | 40 | if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy)6 ) |
1225 | 6 | return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG); |
1226 | 34 | return Op; |
1227 | 34 | } |
1228 | | |
1229 | | SDValue |
1230 | 6 | HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const { |
1231 | 6 | // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight): |
1232 | 6 | // cttz(x) = bitwidth(x) - ctlz(~x & (x-1)) |
1233 | 6 | const SDLoc &dl(Op); |
1234 | 6 | MVT ResTy = ty(Op); |
1235 | 6 | SDValue InpV = Op.getOperand(0); |
1236 | 6 | assert(ResTy == ty(InpV)); |
1237 | 6 | |
1238 | 6 | // Calculate the vectors of 1 and bitwidth(x). |
1239 | 6 | MVT ElemTy = ty(InpV).getVectorElementType(); |
1240 | 6 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
1241 | 6 | // Using uint64_t because a shift by 32 can happen. |
1242 | 6 | uint64_t Splat1 = 0, SplatW = 0; |
1243 | 6 | assert(isPowerOf2_32(ElemWidth) && ElemWidth <= 32); |
1244 | 20 | for (unsigned i = 0; i != 32/ElemWidth; ++i14 ) { |
1245 | 14 | Splat1 = (Splat1 << ElemWidth) | 1; |
1246 | 14 | SplatW = (SplatW << ElemWidth) | ElemWidth; |
1247 | 14 | } |
1248 | 6 | SDValue Vec1 = DAG.getNode(HexagonISD::VSPLATW, dl, ResTy, |
1249 | 6 | DAG.getConstant(uint32_t(Splat1), dl, MVT::i32)); |
1250 | 6 | SDValue VecW = DAG.getNode(HexagonISD::VSPLATW, dl, ResTy, |
1251 | 6 | DAG.getConstant(uint32_t(SplatW), dl, MVT::i32)); |
1252 | 6 | SDValue VecN1 = DAG.getNode(HexagonISD::VSPLATW, dl, ResTy, |
1253 | 6 | DAG.getConstant(-1, dl, MVT::i32)); |
1254 | 6 | // Do not use DAG.getNOT, because that would create BUILD_VECTOR with |
1255 | 6 | // a BITCAST. Here we can skip the BITCAST (so we don't have to handle |
1256 | 6 | // it separately in custom combine or selection). |
1257 | 6 | SDValue A = DAG.getNode(ISD::AND, dl, ResTy, |
1258 | 6 | {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}), |
1259 | 6 | DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})}); |
1260 | 6 | return DAG.getNode(ISD::SUB, dl, ResTy, |
1261 | 6 | {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)}); |
1262 | 6 | } |
1263 | | |
1264 | | SDValue |
1265 | 17 | HexagonTargetLowering::LowerHvxMul(SDValue Op, SelectionDAG &DAG) const { |
1266 | 17 | MVT ResTy = ty(Op); |
1267 | 17 | assert(ResTy.isVector() && isHvxSingleTy(ResTy)); |
1268 | 17 | const SDLoc &dl(Op); |
1269 | 17 | SmallVector<int,256> ShuffMask; |
1270 | 17 | |
1271 | 17 | MVT ElemTy = ResTy.getVectorElementType(); |
1272 | 17 | unsigned VecLen = ResTy.getVectorNumElements(); |
1273 | 17 | SDValue Vs = Op.getOperand(0); |
1274 | 17 | SDValue Vt = Op.getOperand(1); |
1275 | 17 | |
1276 | 17 | switch (ElemTy.SimpleTy) { |
1277 | 17 | case MVT::i8: { |
1278 | 2 | // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...), |
1279 | 2 | // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo, |
1280 | 2 | // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...). |
1281 | 2 | MVT ExtTy = typeExtElem(ResTy, 2); |
1282 | 2 | unsigned MpyOpc = ElemTy == MVT::i8 ? Hexagon::V6_vmpybv |
1283 | 2 | : Hexagon::V6_vmpyhv0 ; |
1284 | 2 | SDValue M = getInstr(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG); |
1285 | 2 | |
1286 | 2 | // Discard high halves of the resulting values, collect the low halves. |
1287 | 98 | for (unsigned I = 0; I < VecLen; I += 296 ) { |
1288 | 96 | ShuffMask.push_back(I); // Pick even element. |
1289 | 96 | ShuffMask.push_back(I+VecLen); // Pick odd element. |
1290 | 96 | } |
1291 | 2 | VectorPair P = opSplit(opCastElem(M, ElemTy, DAG), dl, DAG); |
1292 | 2 | SDValue BS = getByteShuffle(dl, P.first, P.second, ShuffMask, DAG); |
1293 | 2 | return DAG.getBitcast(ResTy, BS); |
1294 | 17 | } |
1295 | 17 | case MVT::i16: |
1296 | 3 | // For i16 there is V6_vmpyih, which acts exactly like the MUL opcode. |
1297 | 3 | // (There is also V6_vmpyhv, which behaves in an analogous way to |
1298 | 3 | // V6_vmpybv.) |
1299 | 3 | return getInstr(Hexagon::V6_vmpyih, dl, ResTy, {Vs, Vt}, DAG); |
1300 | 17 | case MVT::i32: { |
1301 | 12 | // Use the following sequence for signed word multiply: |
1302 | 12 | // T0 = V6_vmpyiowh Vs, Vt |
1303 | 12 | // T1 = V6_vaslw T0, 16 |
1304 | 12 | // T2 = V6_vmpyiewuh_acc T1, Vs, Vt |
1305 | 12 | SDValue S16 = DAG.getConstant(16, dl, MVT::i32); |
1306 | 12 | SDValue T0 = getInstr(Hexagon::V6_vmpyiowh, dl, ResTy, {Vs, Vt}, DAG); |
1307 | 12 | SDValue T1 = getInstr(Hexagon::V6_vaslw, dl, ResTy, {T0, S16}, DAG); |
1308 | 12 | SDValue T2 = getInstr(Hexagon::V6_vmpyiewuh_acc, dl, ResTy, |
1309 | 12 | {T1, Vs, Vt}, DAG); |
1310 | 12 | return T2; |
1311 | 17 | } |
1312 | 17 | default: |
1313 | 0 | break; |
1314 | 0 | } |
1315 | 0 | return SDValue(); |
1316 | 0 | } |
1317 | | |
1318 | | SDValue |
1319 | 1 | HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const { |
1320 | 1 | MVT ResTy = ty(Op); |
1321 | 1 | assert(ResTy.isVector()); |
1322 | 1 | const SDLoc &dl(Op); |
1323 | 1 | SmallVector<int,256> ShuffMask; |
1324 | 1 | |
1325 | 1 | MVT ElemTy = ResTy.getVectorElementType(); |
1326 | 1 | unsigned VecLen = ResTy.getVectorNumElements(); |
1327 | 1 | SDValue Vs = Op.getOperand(0); |
1328 | 1 | SDValue Vt = Op.getOperand(1); |
1329 | 1 | bool IsSigned = Op.getOpcode() == ISD::MULHS; |
1330 | 1 | |
1331 | 1 | if (ElemTy == MVT::i8 || ElemTy == MVT::i16) { |
1332 | 0 | // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...), |
1333 | 0 | // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo, |
1334 | 0 | // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...). |
1335 | 0 | // For i16, use V6_vmpyhv, which behaves in an analogous way to |
1336 | 0 | // V6_vmpybv: results Lo and Hi are products of even/odd elements |
1337 | 0 | // respectively. |
1338 | 0 | MVT ExtTy = typeExtElem(ResTy, 2); |
1339 | 0 | unsigned MpyOpc = ElemTy == MVT::i8 |
1340 | 0 | ? (IsSigned ? Hexagon::V6_vmpybv : Hexagon::V6_vmpyubv) |
1341 | 0 | : (IsSigned ? Hexagon::V6_vmpyhv : Hexagon::V6_vmpyuhv); |
1342 | 0 | SDValue M = getInstr(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG); |
1343 | 0 |
|
1344 | 0 | // Discard low halves of the resulting values, collect the high halves. |
1345 | 0 | for (unsigned I = 0; I < VecLen; I += 2) { |
1346 | 0 | ShuffMask.push_back(I+1); // Pick even element. |
1347 | 0 | ShuffMask.push_back(I+VecLen+1); // Pick odd element. |
1348 | 0 | } |
1349 | 0 | VectorPair P = opSplit(opCastElem(M, ElemTy, DAG), dl, DAG); |
1350 | 0 | SDValue BS = getByteShuffle(dl, P.first, P.second, ShuffMask, DAG); |
1351 | 0 | return DAG.getBitcast(ResTy, BS); |
1352 | 0 | } |
1353 | 1 | |
1354 | 1 | assert(ElemTy == MVT::i32); |
1355 | 1 | SDValue S16 = DAG.getConstant(16, dl, MVT::i32); |
1356 | 1 | |
1357 | 1 | if (IsSigned) { |
1358 | 1 | // mulhs(Vs,Vt) = |
1359 | 1 | // = [(Hi(Vs)*2^16 + Lo(Vs)) *s (Hi(Vt)*2^16 + Lo(Vt))] >> 32 |
1360 | 1 | // = [Hi(Vs)*2^16 *s Hi(Vt)*2^16 + Hi(Vs) *su Lo(Vt)*2^16 |
1361 | 1 | // + Lo(Vs) *us (Hi(Vt)*2^16 + Lo(Vt))] >> 32 |
1362 | 1 | // = [Hi(Vs) *s Hi(Vt)*2^32 + Hi(Vs) *su Lo(Vt)*2^16 |
1363 | 1 | // + Lo(Vs) *us Vt] >> 32 |
1364 | 1 | // The low half of Lo(Vs)*Lo(Vt) will be discarded (it's not added to |
1365 | 1 | // anything, so it cannot produce any carry over to higher bits), |
1366 | 1 | // so everything in [] can be shifted by 16 without loss of precision. |
1367 | 1 | // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + Lo(Vs)*Vt >> 16] >> 16 |
1368 | 1 | // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + V6_vmpyewuh(Vs,Vt)] >> 16 |
1369 | 1 | // Denote Hi(Vs) = Vs': |
1370 | 1 | // = [Vs'*s Hi(Vt)*2^16 + Vs' *su Lo(Vt) + V6_vmpyewuh(Vt,Vs)] >> 16 |
1371 | 1 | // = Vs'*s Hi(Vt) + (V6_vmpyiewuh(Vs',Vt) + V6_vmpyewuh(Vt,Vs)) >> 16 |
1372 | 1 | SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, ResTy, {Vt, Vs}, DAG); |
1373 | 1 | // Get Vs': |
1374 | 1 | SDValue S0 = getInstr(Hexagon::V6_vasrw, dl, ResTy, {Vs, S16}, DAG); |
1375 | 1 | SDValue T1 = getInstr(Hexagon::V6_vmpyiewuh_acc, dl, ResTy, |
1376 | 1 | {T0, S0, Vt}, DAG); |
1377 | 1 | // Shift by 16: |
1378 | 1 | SDValue S2 = getInstr(Hexagon::V6_vasrw, dl, ResTy, {T1, S16}, DAG); |
1379 | 1 | // Get Vs'*Hi(Vt): |
1380 | 1 | SDValue T2 = getInstr(Hexagon::V6_vmpyiowh, dl, ResTy, {S0, Vt}, DAG); |
1381 | 1 | // Add: |
1382 | 1 | SDValue T3 = DAG.getNode(ISD::ADD, dl, ResTy, {S2, T2}); |
1383 | 1 | return T3; |
1384 | 1 | } |
1385 | 0 | |
1386 | 0 | // Unsigned mulhw. (Would expansion using signed mulhw be better?) |
1387 | 0 | |
1388 | 0 | auto LoVec = [&DAG,ResTy,dl] (SDValue Pair) { |
1389 | 0 | return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, ResTy, Pair); |
1390 | 0 | }; |
1391 | 0 | auto HiVec = [&DAG,ResTy,dl] (SDValue Pair) { |
1392 | 0 | return DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, ResTy, Pair); |
1393 | 0 | }; |
1394 | 0 |
|
1395 | 0 | MVT PairTy = typeJoin({ResTy, ResTy}); |
1396 | 0 | SDValue P = getInstr(Hexagon::V6_lvsplatw, dl, ResTy, |
1397 | 0 | {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG); |
1398 | 0 | // Multiply-unsigned halfwords: |
1399 | 0 | // LoVec = Vs.uh[2i] * Vt.uh[2i], |
1400 | 0 | // HiVec = Vs.uh[2i+1] * Vt.uh[2i+1] |
1401 | 0 | SDValue T0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {Vs, Vt}, DAG); |
1402 | 0 | // The low halves in the LoVec of the pair can be discarded. They are |
1403 | 0 | // not added to anything (in the full-precision product), so they cannot |
1404 | 0 | // produce a carry into the higher bits. |
1405 | 0 | SDValue T1 = getInstr(Hexagon::V6_vlsrw, dl, ResTy, {LoVec(T0), S16}, DAG); |
1406 | 0 | // Swap low and high halves in Vt, and do the halfword multiplication |
1407 | 0 | // to get products Vs.uh[2i] * Vt.uh[2i+1] and Vs.uh[2i+1] * Vt.uh[2i]. |
1408 | 0 | SDValue D0 = getInstr(Hexagon::V6_vdelta, dl, ResTy, {Vt, P}, DAG); |
1409 | 0 | SDValue T2 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {Vs, D0}, DAG); |
1410 | 0 | // T2 has mixed products of halfwords: Lo(Vt)*Hi(Vs) and Hi(Vt)*Lo(Vs). |
1411 | 0 | // These products are words, but cannot be added directly because the |
1412 | 0 | // sums could overflow. Add these products, by halfwords, where each sum |
1413 | 0 | // of a pair of halfwords gives a word. |
1414 | 0 | SDValue T3 = getInstr(Hexagon::V6_vadduhw, dl, PairTy, |
1415 | 0 | {LoVec(T2), HiVec(T2)}, DAG); |
1416 | 0 | // Add the high halfwords from the products of the low halfwords. |
1417 | 0 | SDValue T4 = DAG.getNode(ISD::ADD, dl, ResTy, {T1, LoVec(T3)}); |
1418 | 0 | SDValue T5 = getInstr(Hexagon::V6_vlsrw, dl, ResTy, {T4, S16}, DAG); |
1419 | 0 | SDValue T6 = DAG.getNode(ISD::ADD, dl, ResTy, {HiVec(T0), HiVec(T3)}); |
1420 | 0 | SDValue T7 = DAG.getNode(ISD::ADD, dl, ResTy, {T5, T6}); |
1421 | 0 | return T7; |
1422 | 0 | } |
1423 | | |
1424 | | SDValue |
1425 | 1 | HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const { |
1426 | 1 | // Sign- and zero-extends are legal. |
1427 | 1 | assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG); |
1428 | 1 | return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), ty(Op), |
1429 | 1 | Op.getOperand(0)); |
1430 | 1 | } |
1431 | | |
1432 | | SDValue |
1433 | 61 | HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const { |
1434 | 61 | if (SDValue S = getVectorShiftByInt(Op, DAG)) |
1435 | 25 | return S; |
1436 | 36 | return Op; |
1437 | 36 | } |
1438 | | |
1439 | | SDValue |
1440 | 39 | HexagonTargetLowering::SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const { |
1441 | 39 | assert(!Op.isMachineOpcode()); |
1442 | 39 | SmallVector<SDValue,2> OpsL, OpsH; |
1443 | 39 | const SDLoc &dl(Op); |
1444 | 39 | |
1445 | 39 | auto SplitVTNode = [&DAG,this] (const VTSDNode *N) { |
1446 | 2 | MVT Ty = typeSplit(N->getVT().getSimpleVT()).first; |
1447 | 2 | SDValue TV = DAG.getValueType(Ty); |
1448 | 2 | return std::make_pair(TV, TV); |
1449 | 2 | }; |
1450 | 39 | |
1451 | 51 | for (SDValue A : Op.getNode()->ops()) { |
1452 | 51 | VectorPair P = Subtarget.isHVXVectorType(ty(A), true) |
1453 | 51 | ? opSplit(A, dl, DAG)46 |
1454 | 51 | : std::make_pair(A, A)5 ; |
1455 | 51 | // Special case for type operand. |
1456 | 51 | if (Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { |
1457 | 4 | if (const auto *N = dyn_cast<const VTSDNode>(A.getNode())) |
1458 | 2 | P = SplitVTNode(N); |
1459 | 4 | } |
1460 | 51 | OpsL.push_back(P.first); |
1461 | 51 | OpsH.push_back(P.second); |
1462 | 51 | } |
1463 | 39 | |
1464 | 39 | MVT ResTy = ty(Op); |
1465 | 39 | MVT HalfTy = typeSplit(ResTy).first; |
1466 | 39 | SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL); |
1467 | 39 | SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH); |
1468 | 39 | SDValue S = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, L, H); |
1469 | 39 | return S; |
1470 | 39 | } |
1471 | | |
1472 | | SDValue |
1473 | 310 | HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const { |
1474 | 310 | LSBaseSDNode *BN = cast<LSBaseSDNode>(Op.getNode()); |
1475 | 310 | assert(BN->isUnindexed()); |
1476 | 310 | MVT MemTy = BN->getMemoryVT().getSimpleVT(); |
1477 | 310 | if (!isHvxPairTy(MemTy)) |
1478 | 0 | return Op; |
1479 | 310 | |
1480 | 310 | const SDLoc &dl(Op); |
1481 | 310 | unsigned HwLen = Subtarget.getVectorLength(); |
1482 | 310 | MVT SingleTy = typeSplit(MemTy).first; |
1483 | 310 | SDValue Chain = BN->getChain(); |
1484 | 310 | SDValue Base0 = BN->getBasePtr(); |
1485 | 310 | SDValue Base1 = DAG.getMemBasePlusOffset(Base0, HwLen, dl); |
1486 | 310 | |
1487 | 310 | MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr; |
1488 | 310 | if (MachineMemOperand *MMO = BN->getMemOperand()) { |
1489 | 310 | MachineFunction &MF = DAG.getMachineFunction(); |
1490 | 310 | MOp0 = MF.getMachineMemOperand(MMO, 0, HwLen); |
1491 | 310 | MOp1 = MF.getMachineMemOperand(MMO, HwLen, HwLen); |
1492 | 310 | } |
1493 | 310 | |
1494 | 310 | unsigned MemOpc = BN->getOpcode(); |
1495 | 310 | SDValue NewOp; |
1496 | 310 | |
1497 | 310 | if (MemOpc == ISD::LOAD) { |
1498 | 124 | SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0); |
1499 | 124 | SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1); |
1500 | 124 | NewOp = DAG.getMergeValues( |
1501 | 124 | { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1), |
1502 | 124 | DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
1503 | 124 | Load0.getValue(1), Load1.getValue(1)) }, dl); |
1504 | 186 | } else { |
1505 | 186 | assert(MemOpc == ISD::STORE); |
1506 | 186 | VectorPair Vals = opSplit(cast<StoreSDNode>(Op)->getValue(), dl, DAG); |
1507 | 186 | SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0); |
1508 | 186 | SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1); |
1509 | 186 | NewOp = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1); |
1510 | 186 | } |
1511 | 310 | |
1512 | 310 | return NewOp; |
1513 | 310 | } |
1514 | | |
1515 | | SDValue |
1516 | 4.16k | HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { |
1517 | 4.16k | unsigned Opc = Op.getOpcode(); |
1518 | 4.16k | bool IsPairOp = isHvxPairTy(ty(Op)) || |
1519 | 27.2k | llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) 3.62k { |
1520 | 27.2k | return isHvxPairTy(ty(V)); |
1521 | 27.2k | }); |
1522 | 4.16k | |
1523 | 4.16k | if (IsPairOp) { |
1524 | 1.14k | switch (Opc) { |
1525 | 1.14k | default: |
1526 | 791 | break; |
1527 | 1.14k | case ISD::LOAD: |
1528 | 310 | case ISD::STORE: |
1529 | 310 | return SplitHvxMemOp(Op, DAG); |
1530 | 310 | case ISD::CTPOP: |
1531 | 39 | case ISD::CTLZ: |
1532 | 39 | case ISD::CTTZ: |
1533 | 39 | case ISD::MUL: |
1534 | 39 | case ISD::MULHS: |
1535 | 39 | case ISD::MULHU: |
1536 | 39 | case ISD::AND: |
1537 | 39 | case ISD::OR: |
1538 | 39 | case ISD::XOR: |
1539 | 39 | case ISD::SRA: |
1540 | 39 | case ISD::SHL: |
1541 | 39 | case ISD::SRL: |
1542 | 39 | case ISD::SETCC: |
1543 | 39 | case ISD::VSELECT: |
1544 | 39 | case ISD::SIGN_EXTEND: |
1545 | 39 | case ISD::ZERO_EXTEND: |
1546 | 39 | case ISD::SIGN_EXTEND_INREG: |
1547 | 39 | return SplitHvxPairOp(Op, DAG); |
1548 | 3.81k | } |
1549 | 3.81k | } |
1550 | 3.81k | |
1551 | 3.81k | switch (Opc) { |
1552 | 3.81k | default: |
1553 | 0 | break; |
1554 | 3.81k | case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG)306 ; |
1555 | 3.81k | case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG)370 ; |
1556 | 3.81k | case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG)0 ; |
1557 | 3.81k | case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG)0 ; |
1558 | 3.81k | case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG)397 ; |
1559 | 3.81k | case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG)236 ; |
1560 | 3.81k | |
1561 | 3.81k | case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG)6 ; |
1562 | 3.81k | case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG)18 ; |
1563 | 3.81k | case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG)40 ; |
1564 | 3.81k | case ISD::CTTZ: return LowerHvxCttz(Op, DAG)6 ; |
1565 | 3.81k | case ISD::SRA: |
1566 | 61 | case ISD::SHL: |
1567 | 61 | case ISD::SRL: return LowerHvxShift(Op, DAG); |
1568 | 61 | case ISD::MUL: return LowerHvxMul(Op, DAG)17 ; |
1569 | 61 | case ISD::MULHS: |
1570 | 1 | case ISD::MULHU: return LowerHvxMulh(Op, DAG); |
1571 | 1 | case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG); |
1572 | 150 | case ISD::SETCC: |
1573 | 150 | case ISD::INTRINSIC_VOID: return Op; |
1574 | 150 | // Unaligned loads will be handled by the default lowering. |
1575 | 2.20k | case ISD::LOAD: return SDValue(); |
1576 | 0 | } |
1577 | | #ifndef NDEBUG |
1578 | | Op.dumpr(&DAG); |
1579 | | #endif |
1580 | 0 | llvm_unreachable("Unhandled HVX operation"); |
1581 | 0 | } |
1582 | | |
1583 | | bool |
1584 | 10.8k | HexagonTargetLowering::isHvxOperation(SDValue Op) const { |
1585 | 10.8k | // If the type of the result, or any operand type are HVX vector types, |
1586 | 10.8k | // this is an HVX operation. |
1587 | 10.8k | return Subtarget.isHVXVectorType(ty(Op), true) || |
1588 | 10.8k | llvm::any_of(Op.getNode()->ops(), |
1589 | 18.3k | [this] (SDValue V) { |
1590 | 18.3k | return Subtarget.isHVXVectorType(ty(V), true); |
1591 | 18.3k | }); |
1592 | 10.8k | } |