/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | /// \file |
10 | | /// Custom DAG lowering for R600 |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "R600ISelLowering.h" |
15 | | #include "AMDGPUFrameLowering.h" |
16 | | #include "AMDGPUSubtarget.h" |
17 | | #include "R600Defines.h" |
18 | | #include "R600FrameLowering.h" |
19 | | #include "R600InstrInfo.h" |
20 | | #include "R600MachineFunctionInfo.h" |
21 | | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
22 | | #include "Utils/AMDGPUBaseInfo.h" |
23 | | #include "llvm/ADT/APFloat.h" |
24 | | #include "llvm/ADT/APInt.h" |
25 | | #include "llvm/ADT/ArrayRef.h" |
26 | | #include "llvm/ADT/DenseMap.h" |
27 | | #include "llvm/ADT/SmallVector.h" |
28 | | #include "llvm/CodeGen/CallingConvLower.h" |
29 | | #include "llvm/CodeGen/DAGCombine.h" |
30 | | #include "llvm/CodeGen/ISDOpcodes.h" |
31 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
32 | | #include "llvm/CodeGen/MachineFunction.h" |
33 | | #include "llvm/CodeGen/MachineInstr.h" |
34 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
35 | | #include "llvm/CodeGen/MachineMemOperand.h" |
36 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
37 | | #include "llvm/CodeGen/SelectionDAG.h" |
38 | | #include "llvm/IR/Constants.h" |
39 | | #include "llvm/IR/DerivedTypes.h" |
40 | | #include "llvm/Support/Casting.h" |
41 | | #include "llvm/Support/Compiler.h" |
42 | | #include "llvm/Support/ErrorHandling.h" |
43 | | #include "llvm/Support/MachineValueType.h" |
44 | | #include <cassert> |
45 | | #include <cstdint> |
46 | | #include <iterator> |
47 | | #include <utility> |
48 | | #include <vector> |
49 | | |
50 | | using namespace llvm; |
51 | | |
52 | | #include "R600GenCallingConv.inc" |
53 | | |
54 | | R600TargetLowering::R600TargetLowering(const TargetMachine &TM, |
55 | | const R600Subtarget &STI) |
56 | 290 | : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) { |
57 | 290 | addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass); |
58 | 290 | addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass); |
59 | 290 | addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass); |
60 | 290 | addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass); |
61 | 290 | addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass); |
62 | 290 | addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass); |
63 | 290 | |
64 | 290 | computeRegisterProperties(Subtarget->getRegisterInfo()); |
65 | 290 | |
66 | 290 | // Legalize loads and stores to the private address space. |
67 | 290 | setOperationAction(ISD::LOAD, MVT::i32, Custom); |
68 | 290 | setOperationAction(ISD::LOAD, MVT::v2i32, Custom); |
69 | 290 | setOperationAction(ISD::LOAD, MVT::v4i32, Custom); |
70 | 290 | |
71 | 290 | // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address |
72 | 290 | // spaces, so it is custom lowered to handle those where it isn't. |
73 | 1.74k | for (MVT VT : MVT::integer_valuetypes()) { |
74 | 1.74k | setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); |
75 | 1.74k | setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom); |
76 | 1.74k | setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom); |
77 | 1.74k | |
78 | 1.74k | setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); |
79 | 1.74k | setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom); |
80 | 1.74k | setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom); |
81 | 1.74k | |
82 | 1.74k | setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); |
83 | 1.74k | setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom); |
84 | 1.74k | setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom); |
85 | 1.74k | } |
86 | 290 | |
87 | 290 | // Workaround for LegalizeDAG asserting on expansion of i1 vector loads. |
88 | 290 | setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, MVT::v2i1, Expand); |
89 | 290 | setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, MVT::v2i1, Expand); |
90 | 290 | setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, MVT::v2i1, Expand); |
91 | 290 | |
92 | 290 | setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i1, Expand); |
93 | 290 | setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i1, Expand); |
94 | 290 | setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i1, Expand); |
95 | 290 | |
96 | 290 | setOperationAction(ISD::STORE, MVT::i8, Custom); |
97 | 290 | setOperationAction(ISD::STORE, MVT::i32, Custom); |
98 | 290 | setOperationAction(ISD::STORE, MVT::v2i32, Custom); |
99 | 290 | setOperationAction(ISD::STORE, MVT::v4i32, Custom); |
100 | 290 | |
101 | 290 | setTruncStoreAction(MVT::i32, MVT::i8, Custom); |
102 | 290 | setTruncStoreAction(MVT::i32, MVT::i16, Custom); |
103 | 290 | // We need to include these since trunc STORES to PRIVATE need |
104 | 290 | // special handling to accommodate RMW |
105 | 290 | setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); |
106 | 290 | setTruncStoreAction(MVT::v4i32, MVT::v4i16, Custom); |
107 | 290 | setTruncStoreAction(MVT::v8i32, MVT::v8i16, Custom); |
108 | 290 | setTruncStoreAction(MVT::v16i32, MVT::v16i16, Custom); |
109 | 290 | setTruncStoreAction(MVT::v32i32, MVT::v32i16, Custom); |
110 | 290 | setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); |
111 | 290 | setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom); |
112 | 290 | setTruncStoreAction(MVT::v8i32, MVT::v8i8, Custom); |
113 | 290 | setTruncStoreAction(MVT::v16i32, MVT::v16i8, Custom); |
114 | 290 | setTruncStoreAction(MVT::v32i32, MVT::v32i8, Custom); |
115 | 290 | |
116 | 290 | // Workaround for LegalizeDAG asserting on expansion of i1 vector stores. |
117 | 290 | setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand); |
118 | 290 | setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand); |
119 | 290 | |
120 | 290 | // Set condition code actions |
121 | 290 | setCondCodeAction(ISD::SETO, MVT::f32, Expand); |
122 | 290 | setCondCodeAction(ISD::SETUO, MVT::f32, Expand); |
123 | 290 | setCondCodeAction(ISD::SETLT, MVT::f32, Expand); |
124 | 290 | setCondCodeAction(ISD::SETLE, MVT::f32, Expand); |
125 | 290 | setCondCodeAction(ISD::SETOLT, MVT::f32, Expand); |
126 | 290 | setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); |
127 | 290 | setCondCodeAction(ISD::SETONE, MVT::f32, Expand); |
128 | 290 | setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); |
129 | 290 | setCondCodeAction(ISD::SETUGE, MVT::f32, Expand); |
130 | 290 | setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); |
131 | 290 | setCondCodeAction(ISD::SETULT, MVT::f32, Expand); |
132 | 290 | setCondCodeAction(ISD::SETULE, MVT::f32, Expand); |
133 | 290 | |
134 | 290 | setCondCodeAction(ISD::SETLE, MVT::i32, Expand); |
135 | 290 | setCondCodeAction(ISD::SETLT, MVT::i32, Expand); |
136 | 290 | setCondCodeAction(ISD::SETULE, MVT::i32, Expand); |
137 | 290 | setCondCodeAction(ISD::SETULT, MVT::i32, Expand); |
138 | 290 | |
139 | 290 | setOperationAction(ISD::FCOS, MVT::f32, Custom); |
140 | 290 | setOperationAction(ISD::FSIN, MVT::f32, Custom); |
141 | 290 | |
142 | 290 | setOperationAction(ISD::SETCC, MVT::v4i32, Expand); |
143 | 290 | setOperationAction(ISD::SETCC, MVT::v2i32, Expand); |
144 | 290 | |
145 | 290 | setOperationAction(ISD::BR_CC, MVT::i32, Expand); |
146 | 290 | setOperationAction(ISD::BR_CC, MVT::f32, Expand); |
147 | 290 | setOperationAction(ISD::BRCOND, MVT::Other, Custom); |
148 | 290 | |
149 | 290 | setOperationAction(ISD::FSUB, MVT::f32, Expand); |
150 | 290 | |
151 | 290 | setOperationAction(ISD::FCEIL, MVT::f64, Custom); |
152 | 290 | setOperationAction(ISD::FTRUNC, MVT::f64, Custom); |
153 | 290 | setOperationAction(ISD::FRINT, MVT::f64, Custom); |
154 | 290 | setOperationAction(ISD::FFLOOR, MVT::f64, Custom); |
155 | 290 | |
156 | 290 | setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); |
157 | 290 | setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); |
158 | 290 | |
159 | 290 | setOperationAction(ISD::SETCC, MVT::i32, Expand); |
160 | 290 | setOperationAction(ISD::SETCC, MVT::f32, Expand); |
161 | 290 | setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom); |
162 | 290 | setOperationAction(ISD::FP_TO_SINT, MVT::i1, Custom); |
163 | 290 | setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); |
164 | 290 | setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); |
165 | 290 | |
166 | 290 | setOperationAction(ISD::SELECT, MVT::i32, Expand); |
167 | 290 | setOperationAction(ISD::SELECT, MVT::f32, Expand); |
168 | 290 | setOperationAction(ISD::SELECT, MVT::v2i32, Expand); |
169 | 290 | setOperationAction(ISD::SELECT, MVT::v4i32, Expand); |
170 | 290 | |
171 | 290 | // ADD, SUB overflow. |
172 | 290 | // TODO: turn these into Legal? |
173 | 290 | if (Subtarget->hasCARRY()) |
174 | 257 | setOperationAction(ISD::UADDO, MVT::i32, Custom); |
175 | 290 | |
176 | 290 | if (Subtarget->hasBORROW()) |
177 | 257 | setOperationAction(ISD::USUBO, MVT::i32, Custom); |
178 | 290 | |
179 | 290 | // Expand sign extension of vectors |
180 | 290 | if (!Subtarget->hasBFE()) |
181 | 33 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); |
182 | 290 | |
183 | 290 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand); |
184 | 290 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand); |
185 | 290 | |
186 | 290 | if (!Subtarget->hasBFE()) |
187 | 33 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); |
188 | 290 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand); |
189 | 290 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand); |
190 | 290 | |
191 | 290 | if (!Subtarget->hasBFE()) |
192 | 33 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); |
193 | 290 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand); |
194 | 290 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand); |
195 | 290 | |
196 | 290 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); |
197 | 290 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand); |
198 | 290 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand); |
199 | 290 | |
200 | 290 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); |
201 | 290 | |
202 | 290 | setOperationAction(ISD::FrameIndex, MVT::i32, Custom); |
203 | 290 | |
204 | 290 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom); |
205 | 290 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom); |
206 | 290 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom); |
207 | 290 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); |
208 | 290 | |
209 | 290 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom); |
210 | 290 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom); |
211 | 290 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); |
212 | 290 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); |
213 | 290 | |
214 | 290 | // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32 |
215 | 290 | // to be Legal/Custom in order to avoid library calls. |
216 | 290 | setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); |
217 | 290 | setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); |
218 | 290 | setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); |
219 | 290 | |
220 | 290 | if (!Subtarget->hasFMA()) { |
221 | 211 | setOperationAction(ISD::FMA, MVT::f32, Expand); |
222 | 211 | setOperationAction(ISD::FMA, MVT::f64, Expand); |
223 | 211 | } |
224 | 290 | |
225 | 290 | // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we |
226 | 290 | // need it for R600. |
227 | 290 | if (!Subtarget->hasFP32Denormals()) |
228 | 290 | setOperationAction(ISD::FMAD, MVT::f32, Legal); |
229 | 290 | |
230 | 290 | if (!Subtarget->hasBFI()) { |
231 | 33 | // fcopysign can be done in a single instruction with BFI. |
232 | 33 | setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); |
233 | 33 | setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); |
234 | 33 | } |
235 | 290 | |
236 | 290 | if (!Subtarget->hasBCNT(32)) |
237 | 33 | setOperationAction(ISD::CTPOP, MVT::i32, Expand); |
238 | 290 | |
239 | 290 | if (!Subtarget->hasBCNT(64)) |
240 | 290 | setOperationAction(ISD::CTPOP, MVT::i64, Expand); |
241 | 290 | |
242 | 290 | if (Subtarget->hasFFBH()) |
243 | 257 | setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom); |
244 | 290 | |
245 | 290 | if (Subtarget->hasFFBL()) |
246 | 257 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom); |
247 | 290 | |
248 | 290 | // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we |
249 | 290 | // need it for R600. |
250 | 290 | if (Subtarget->hasBFE()) |
251 | 257 | setHasExtractBitsInsn(true); |
252 | 290 | |
253 | 290 | setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); |
254 | 290 | |
255 | 290 | const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; |
256 | 580 | for (MVT VT : ScalarIntVTs) { |
257 | 580 | setOperationAction(ISD::ADDC, VT, Expand); |
258 | 580 | setOperationAction(ISD::SUBC, VT, Expand); |
259 | 580 | setOperationAction(ISD::ADDE, VT, Expand); |
260 | 580 | setOperationAction(ISD::SUBE, VT, Expand); |
261 | 580 | } |
262 | 290 | |
263 | 290 | // LLVM will expand these to atomic_cmp_swap(0) |
264 | 290 | // and atomic_swap, respectively. |
265 | 290 | setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); |
266 | 290 | setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); |
267 | 290 | |
268 | 290 | // We need to custom lower some of the intrinsics |
269 | 290 | setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); |
270 | 290 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); |
271 | 290 | |
272 | 290 | setSchedulingPreference(Sched::Source); |
273 | 290 | |
274 | 290 | setTargetDAGCombine(ISD::FP_ROUND); |
275 | 290 | setTargetDAGCombine(ISD::FP_TO_SINT); |
276 | 290 | setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); |
277 | 290 | setTargetDAGCombine(ISD::SELECT_CC); |
278 | 290 | setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); |
279 | 290 | setTargetDAGCombine(ISD::LOAD); |
280 | 290 | } |
281 | | |
282 | 2.50k | static inline bool isEOP(MachineBasicBlock::iterator I) { |
283 | 2.50k | if (std::next(I) == I->getParent()->end()) |
284 | 8 | return false; |
285 | 2.49k | return std::next(I)->getOpcode() == R600::RETURN; |
286 | 2.49k | } |
287 | | |
288 | | MachineBasicBlock * |
289 | | R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, |
290 | 9.37k | MachineBasicBlock *BB) const { |
291 | 9.37k | MachineFunction *MF = BB->getParent(); |
292 | 9.37k | MachineRegisterInfo &MRI = MF->getRegInfo(); |
293 | 9.37k | MachineBasicBlock::iterator I = MI; |
294 | 9.37k | const R600InstrInfo *TII = Subtarget->getInstrInfo(); |
295 | 9.37k | |
296 | 9.37k | switch (MI.getOpcode()) { |
297 | 9.37k | default: |
298 | 916 | // Replace LDS_*_RET instruction that don't have any uses with the |
299 | 916 | // equivalent LDS_*_NORET instruction. |
300 | 916 | if (TII->isLDSRetInstr(MI.getOpcode())) { |
301 | 916 | int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst); |
302 | 916 | assert(DstIdx != -1); |
303 | 916 | MachineInstrBuilder NewMI; |
304 | 916 | // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add |
305 | 916 | // LDS_1A2D support and remove this special case. |
306 | 916 | if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) || |
307 | 916 | MI.getOpcode() == R600::LDS_CMPST_RET30 ) |
308 | 886 | return BB; |
309 | 30 | |
310 | 30 | NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), |
311 | 30 | TII->get(R600::getLDSNoRetOp(MI.getOpcode()))); |
312 | 300 | for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i270 ) { |
313 | 270 | NewMI.add(MI.getOperand(i)); |
314 | 270 | } |
315 | 30 | } else { |
316 | 0 | return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); |
317 | 0 | } |
318 | 30 | break; |
319 | 30 | |
320 | 30 | case R600::FABS_R600: { |
321 | 20 | MachineInstr *NewMI = TII->buildDefaultInstruction( |
322 | 20 | *BB, I, R600::MOV, MI.getOperand(0).getReg(), |
323 | 20 | MI.getOperand(1).getReg()); |
324 | 20 | TII->addFlag(*NewMI, 0, MO_FLAG_ABS); |
325 | 20 | break; |
326 | 30 | } |
327 | 30 | |
328 | 30 | case R600::FNEG_R600: { |
329 | 20 | MachineInstr *NewMI = TII->buildDefaultInstruction( |
330 | 20 | *BB, I, R600::MOV, MI.getOperand(0).getReg(), |
331 | 20 | MI.getOperand(1).getReg()); |
332 | 20 | TII->addFlag(*NewMI, 0, MO_FLAG_NEG); |
333 | 20 | break; |
334 | 30 | } |
335 | 30 | |
336 | 30 | case R600::MASK_WRITE: { |
337 | 0 | unsigned maskedRegister = MI.getOperand(0).getReg(); |
338 | 0 | assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); |
339 | 0 | MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); |
340 | 0 | TII->addFlag(*defInstr, 0, MO_FLAG_MASK); |
341 | 0 | break; |
342 | 30 | } |
343 | 30 | |
344 | 30 | case R600::MOV_IMM_F32: |
345 | 17 | TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1) |
346 | 17 | .getFPImm() |
347 | 17 | ->getValueAPF() |
348 | 17 | .bitcastToAPInt() |
349 | 17 | .getZExtValue()); |
350 | 17 | break; |
351 | 30 | |
352 | 559 | case R600::MOV_IMM_I32: |
353 | 559 | TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), |
354 | 559 | MI.getOperand(1).getImm()); |
355 | 559 | break; |
356 | 30 | |
357 | 30 | case R600::MOV_IMM_GLOBAL_ADDR: { |
358 | 1 | //TODO: Perhaps combine this instruction with the next if possible |
359 | 1 | auto MIB = TII->buildDefaultInstruction( |
360 | 1 | *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X); |
361 | 1 | int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal); |
362 | 1 | //TODO: Ugh this is rather ugly |
363 | 1 | MIB->getOperand(Idx) = MI.getOperand(1); |
364 | 1 | break; |
365 | 30 | } |
366 | 30 | |
367 | 2.81k | case R600::CONST_COPY: { |
368 | 2.81k | MachineInstr *NewMI = TII->buildDefaultInstruction( |
369 | 2.81k | *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST); |
370 | 2.81k | TII->setImmOperand(*NewMI, R600::OpName::src0_sel, |
371 | 2.81k | MI.getOperand(1).getImm()); |
372 | 2.81k | break; |
373 | 30 | } |
374 | 30 | |
375 | 2.44k | case R600::RAT_WRITE_CACHELESS_32_eg: |
376 | 2.44k | case R600::RAT_WRITE_CACHELESS_64_eg: |
377 | 2.44k | case R600::RAT_WRITE_CACHELESS_128_eg: |
378 | 2.44k | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode())) |
379 | 2.44k | .add(MI.getOperand(0)) |
380 | 2.44k | .add(MI.getOperand(1)) |
381 | 2.44k | .addImm(isEOP(I)); // Set End of program bit |
382 | 2.44k | break; |
383 | 2.44k | |
384 | 2.44k | case R600::RAT_STORE_TYPED_eg: |
385 | 2 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode())) |
386 | 2 | .add(MI.getOperand(0)) |
387 | 2 | .add(MI.getOperand(1)) |
388 | 2 | .add(MI.getOperand(2)) |
389 | 2 | .addImm(isEOP(I)); // Set End of program bit |
390 | 2 | break; |
391 | 2.44k | |
392 | 2.44k | case R600::BRANCH: |
393 | 139 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP)) |
394 | 139 | .add(MI.getOperand(0)); |
395 | 139 | break; |
396 | 2.44k | |
397 | 2.44k | case R600::BRANCH_COND_f32: { |
398 | 0 | MachineInstr *NewMI = |
399 | 0 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X), |
400 | 0 | R600::PREDICATE_BIT) |
401 | 0 | .add(MI.getOperand(1)) |
402 | 0 | .addImm(R600::PRED_SETNE) |
403 | 0 | .addImm(0); // Flags |
404 | 0 | TII->addFlag(*NewMI, 0, MO_FLAG_PUSH); |
405 | 0 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND)) |
406 | 0 | .add(MI.getOperand(0)) |
407 | 0 | .addReg(R600::PREDICATE_BIT, RegState::Kill); |
408 | 0 | break; |
409 | 2.44k | } |
410 | 2.44k | |
411 | 2.44k | case R600::BRANCH_COND_i32: { |
412 | 88 | MachineInstr *NewMI = |
413 | 88 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X), |
414 | 88 | R600::PREDICATE_BIT) |
415 | 88 | .add(MI.getOperand(1)) |
416 | 88 | .addImm(R600::PRED_SETNE_INT) |
417 | 88 | .addImm(0); // Flags |
418 | 88 | TII->addFlag(*NewMI, 0, MO_FLAG_PUSH); |
419 | 88 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND)) |
420 | 88 | .add(MI.getOperand(0)) |
421 | 88 | .addReg(R600::PREDICATE_BIT, RegState::Kill); |
422 | 88 | break; |
423 | 2.44k | } |
424 | 2.44k | |
425 | 2.44k | case R600::EG_ExportSwz: |
426 | 60 | case R600::R600_ExportSwz: { |
427 | 60 | // Instruction is left unmodified if its not the last one of its type |
428 | 60 | bool isLastInstructionOfItsType = true; |
429 | 60 | unsigned InstExportType = MI.getOperand(1).getImm(); |
430 | 60 | for (MachineBasicBlock::iterator NextExportInst = std::next(I), |
431 | 189 | EndBlock = BB->end(); NextExportInst != EndBlock; |
432 | 138 | NextExportInst = std::next(NextExportInst)129 ) { |
433 | 138 | if (NextExportInst->getOpcode() == R600::EG_ExportSwz || |
434 | 138 | NextExportInst->getOpcode() == R600::R600_ExportSwz116 ) { |
435 | 23 | unsigned CurrentInstExportType = NextExportInst->getOperand(1) |
436 | 23 | .getImm(); |
437 | 23 | if (CurrentInstExportType == InstExportType) { |
438 | 9 | isLastInstructionOfItsType = false; |
439 | 9 | break; |
440 | 9 | } |
441 | 23 | } |
442 | 138 | } |
443 | 60 | bool EOP = isEOP(I); |
444 | 60 | if (!EOP && !isLastInstructionOfItsType14 ) |
445 | 9 | return BB; |
446 | 51 | unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 8441 : 4010 ; |
447 | 51 | BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode())) |
448 | 51 | .add(MI.getOperand(0)) |
449 | 51 | .add(MI.getOperand(1)) |
450 | 51 | .add(MI.getOperand(2)) |
451 | 51 | .add(MI.getOperand(3)) |
452 | 51 | .add(MI.getOperand(4)) |
453 | 51 | .add(MI.getOperand(5)) |
454 | 51 | .add(MI.getOperand(6)) |
455 | 51 | .addImm(CfInst) |
456 | 51 | .addImm(EOP); |
457 | 51 | break; |
458 | 51 | } |
459 | 2.28k | case R600::RETURN: { |
460 | 2.28k | return BB; |
461 | 6.18k | } |
462 | 6.18k | } |
463 | 6.18k | |
464 | 6.18k | MI.eraseFromParent(); |
465 | 6.18k | return BB; |
466 | 6.18k | } |
467 | | |
468 | | //===----------------------------------------------------------------------===// |
469 | | // Custom DAG Lowering Operations |
470 | | //===----------------------------------------------------------------------===// |
471 | | |
472 | 99.8k | SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { |
473 | 99.8k | MachineFunction &MF = DAG.getMachineFunction(); |
474 | 99.8k | R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); |
475 | 99.8k | switch (Op.getOpcode()) { |
476 | 99.8k | default: return AMDGPUTargetLowering::LowerOperation(Op, DAG)458 ; |
477 | 99.8k | case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG)10.4k ; |
478 | 99.8k | case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG)7 ; |
479 | 99.8k | case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG)50 ; |
480 | 99.8k | case ISD::SRA_PARTS: |
481 | 28 | case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG); |
482 | 51 | case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY); |
483 | 642 | case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW); |
484 | 28 | case ISD::FCOS: |
485 | 17 | case ISD::FSIN: return LowerTrig(Op, DAG); |
486 | 14.7k | case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); |
487 | 35.2k | case ISD::STORE: return LowerSTORE(Op, DAG); |
488 | 35.6k | case ISD::LOAD: { |
489 | 35.6k | SDValue Result = LowerLOAD(Op, DAG); |
490 | 35.6k | assert((!Result.getNode() || |
491 | 35.6k | Result.getNode()->getNumValues() == 2) && |
492 | 35.6k | "Load should return a value and a chain"); |
493 | 35.6k | return Result; |
494 | 17 | } |
495 | 17 | |
496 | 88 | case ISD::BRCOND: return LowerBRCOND(Op, DAG); |
497 | 57 | case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG); |
498 | 1.59k | case ISD::FrameIndex: return lowerFrameIndex(Op, DAG); |
499 | 82 | case ISD::INTRINSIC_VOID: { |
500 | 82 | SDValue Chain = Op.getOperand(0); |
501 | 82 | unsigned IntrinsicID = |
502 | 82 | cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); |
503 | 82 | switch (IntrinsicID) { |
504 | 82 | case Intrinsic::r600_store_swizzle: { |
505 | 60 | SDLoc DL(Op); |
506 | 60 | const SDValue Args[8] = { |
507 | 60 | Chain, |
508 | 60 | Op.getOperand(2), // Export Value |
509 | 60 | Op.getOperand(3), // ArrayBase |
510 | 60 | Op.getOperand(4), // Type |
511 | 60 | DAG.getConstant(0, DL, MVT::i32), // SWZ_X |
512 | 60 | DAG.getConstant(1, DL, MVT::i32), // SWZ_Y |
513 | 60 | DAG.getConstant(2, DL, MVT::i32), // SWZ_Z |
514 | 60 | DAG.getConstant(3, DL, MVT::i32) // SWZ_W |
515 | 60 | }; |
516 | 60 | return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args); |
517 | 82 | } |
518 | 82 | |
519 | 82 | // default for switch(IntrinsicID) |
520 | 82 | default: break22 ; |
521 | 22 | } |
522 | 22 | // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode()) |
523 | 22 | break; |
524 | 22 | } |
525 | 669 | case ISD::INTRINSIC_WO_CHAIN: { |
526 | 669 | unsigned IntrinsicID = |
527 | 669 | cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); |
528 | 669 | EVT VT = Op.getValueType(); |
529 | 669 | SDLoc DL(Op); |
530 | 669 | switch (IntrinsicID) { |
531 | 669 | case Intrinsic::r600_tex: |
532 | 276 | case Intrinsic::r600_texc: { |
533 | 276 | unsigned TextureOp; |
534 | 276 | switch (IntrinsicID) { |
535 | 276 | case Intrinsic::r600_tex: |
536 | 269 | TextureOp = 0; |
537 | 269 | break; |
538 | 276 | case Intrinsic::r600_texc: |
539 | 7 | TextureOp = 1; |
540 | 7 | break; |
541 | 276 | default: |
542 | 0 | llvm_unreachable("unhandled texture operation"); |
543 | 276 | } |
544 | 276 | |
545 | 276 | SDValue TexArgs[19] = { |
546 | 276 | DAG.getConstant(TextureOp, DL, MVT::i32), |
547 | 276 | Op.getOperand(1), |
548 | 276 | DAG.getConstant(0, DL, MVT::i32), |
549 | 276 | DAG.getConstant(1, DL, MVT::i32), |
550 | 276 | DAG.getConstant(2, DL, MVT::i32), |
551 | 276 | DAG.getConstant(3, DL, MVT::i32), |
552 | 276 | Op.getOperand(2), |
553 | 276 | Op.getOperand(3), |
554 | 276 | Op.getOperand(4), |
555 | 276 | DAG.getConstant(0, DL, MVT::i32), |
556 | 276 | DAG.getConstant(1, DL, MVT::i32), |
557 | 276 | DAG.getConstant(2, DL, MVT::i32), |
558 | 276 | DAG.getConstant(3, DL, MVT::i32), |
559 | 276 | Op.getOperand(5), |
560 | 276 | Op.getOperand(6), |
561 | 276 | Op.getOperand(7), |
562 | 276 | Op.getOperand(8), |
563 | 276 | Op.getOperand(9), |
564 | 276 | Op.getOperand(10) |
565 | 276 | }; |
566 | 276 | return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs); |
567 | 276 | } |
568 | 276 | case Intrinsic::r600_dot4: { |
569 | 32 | SDValue Args[8] = { |
570 | 32 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), |
571 | 32 | DAG.getConstant(0, DL, MVT::i32)), |
572 | 32 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), |
573 | 32 | DAG.getConstant(0, DL, MVT::i32)), |
574 | 32 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), |
575 | 32 | DAG.getConstant(1, DL, MVT::i32)), |
576 | 32 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), |
577 | 32 | DAG.getConstant(1, DL, MVT::i32)), |
578 | 32 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), |
579 | 32 | DAG.getConstant(2, DL, MVT::i32)), |
580 | 32 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), |
581 | 32 | DAG.getConstant(2, DL, MVT::i32)), |
582 | 32 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1), |
583 | 32 | DAG.getConstant(3, DL, MVT::i32)), |
584 | 32 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), |
585 | 32 | DAG.getConstant(3, DL, MVT::i32)) |
586 | 32 | }; |
587 | 32 | return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args); |
588 | 276 | } |
589 | 276 | |
590 | 276 | case Intrinsic::r600_implicitarg_ptr: { |
591 | 2 | MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS); |
592 | 2 | uint32_t ByteOffset = getImplicitParameterOffset(MF, FIRST_IMPLICIT); |
593 | 2 | return DAG.getConstant(ByteOffset, DL, PtrVT); |
594 | 276 | } |
595 | 276 | case Intrinsic::r600_read_ngroups_x: |
596 | 1 | return LowerImplicitParameter(DAG, VT, DL, 0); |
597 | 276 | case Intrinsic::r600_read_ngroups_y: |
598 | 1 | return LowerImplicitParameter(DAG, VT, DL, 1); |
599 | 276 | case Intrinsic::r600_read_ngroups_z: |
600 | 1 | return LowerImplicitParameter(DAG, VT, DL, 2); |
601 | 276 | case Intrinsic::r600_read_global_size_x: |
602 | 2 | return LowerImplicitParameter(DAG, VT, DL, 3); |
603 | 276 | case Intrinsic::r600_read_global_size_y: |
604 | 2 | return LowerImplicitParameter(DAG, VT, DL, 4); |
605 | 276 | case Intrinsic::r600_read_global_size_z: |
606 | 2 | return LowerImplicitParameter(DAG, VT, DL, 5); |
607 | 276 | case Intrinsic::r600_read_local_size_x: |
608 | 8 | return LowerImplicitParameter(DAG, VT, DL, 6); |
609 | 276 | case Intrinsic::r600_read_local_size_y: |
610 | 36 | return LowerImplicitParameter(DAG, VT, DL, 7); |
611 | 276 | case Intrinsic::r600_read_local_size_z: |
612 | 36 | return LowerImplicitParameter(DAG, VT, DL, 8); |
613 | 276 | |
614 | 276 | case Intrinsic::r600_read_tgid_x: |
615 | 4 | return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, |
616 | 4 | R600::T1_X, VT); |
617 | 276 | case Intrinsic::r600_read_tgid_y: |
618 | 3 | return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, |
619 | 3 | R600::T1_Y, VT); |
620 | 276 | case Intrinsic::r600_read_tgid_z: |
621 | 3 | return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, |
622 | 3 | R600::T1_Z, VT); |
623 | 276 | case Intrinsic::r600_read_tidig_x: |
624 | 184 | return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, |
625 | 184 | R600::T0_X, VT); |
626 | 276 | case Intrinsic::r600_read_tidig_y: |
627 | 32 | return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, |
628 | 32 | R600::T0_Y, VT); |
629 | 276 | case Intrinsic::r600_read_tidig_z: |
630 | 32 | return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, |
631 | 32 | R600::T0_Z, VT); |
632 | 276 | |
633 | 276 | case Intrinsic::r600_recipsqrt_ieee: |
634 | 3 | return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1)); |
635 | 276 | |
636 | 276 | case Intrinsic::r600_recipsqrt_clamped: |
637 | 5 | return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1)); |
638 | 276 | default: |
639 | 4 | return Op; |
640 | 0 | } |
641 | 0 | |
642 | 0 | // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode()) |
643 | 0 | break; |
644 | 0 | } |
645 | 22 | } // end switch(Op.getOpcode()) |
646 | 22 | return SDValue(); |
647 | 22 | } |
648 | | |
649 | | void R600TargetLowering::ReplaceNodeResults(SDNode *N, |
650 | | SmallVectorImpl<SDValue> &Results, |
651 | 99 | SelectionDAG &DAG) const { |
652 | 99 | switch (N->getOpcode()) { |
653 | 99 | default: |
654 | 46 | AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG); |
655 | 46 | return; |
656 | 99 | case ISD::FP_TO_UINT: |
657 | 9 | if (N->getValueType(0) == MVT::i1) { |
658 | 2 | Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG)); |
659 | 2 | return; |
660 | 2 | } |
661 | 7 | // Since we don't care about out of bounds values we can use FP_TO_SINT for |
662 | 7 | // uints too. The DAGLegalizer code for uint considers some extra cases |
663 | 7 | // which are not necessary here. |
664 | 7 | LLVM_FALLTHROUGH; |
665 | 16 | case ISD::FP_TO_SINT: { |
666 | 16 | if (N->getValueType(0) == MVT::i1) { |
667 | 2 | Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG)); |
668 | 2 | return; |
669 | 2 | } |
670 | 14 | |
671 | 14 | SDValue Result; |
672 | 14 | if (expandFP_TO_SINT(N, Result, DAG)) |
673 | 14 | Results.push_back(Result); |
674 | 14 | return; |
675 | 14 | } |
676 | 14 | case ISD::SDIVREM: { |
677 | 12 | SDValue Op = SDValue(N, 1); |
678 | 12 | SDValue RES = LowerSDIVREM(Op, DAG); |
679 | 12 | Results.push_back(RES); |
680 | 12 | Results.push_back(RES.getValue(1)); |
681 | 12 | break; |
682 | 14 | } |
683 | 23 | case ISD::UDIVREM: { |
684 | 23 | SDValue Op = SDValue(N, 0); |
685 | 23 | LowerUDIVREM64(Op, DAG, Results); |
686 | 23 | break; |
687 | 14 | } |
688 | 99 | } |
689 | 99 | } |
690 | | |
691 | | SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG, |
692 | 16 | SDValue Vector) const { |
693 | 16 | SDLoc DL(Vector); |
694 | 16 | EVT VecVT = Vector.getValueType(); |
695 | 16 | EVT EltVT = VecVT.getVectorElementType(); |
696 | 16 | SmallVector<SDValue, 8> Args; |
697 | 16 | |
698 | 64 | for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i48 ) { |
699 | 48 | Args.push_back(DAG.getNode( |
700 | 48 | ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector, |
701 | 48 | DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout())))); |
702 | 48 | } |
703 | 16 | |
704 | 16 | return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args); |
705 | 16 | } |
706 | | |
707 | | SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, |
708 | 10.4k | SelectionDAG &DAG) const { |
709 | 10.4k | SDLoc DL(Op); |
710 | 10.4k | SDValue Vector = Op.getOperand(0); |
711 | 10.4k | SDValue Index = Op.getOperand(1); |
712 | 10.4k | |
713 | 10.4k | if (isa<ConstantSDNode>(Index) || |
714 | 10.4k | Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR42 ) |
715 | 10.4k | return Op; |
716 | 14 | |
717 | 14 | Vector = vectorToVerticalVector(DAG, Vector); |
718 | 14 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(), |
719 | 14 | Vector, Index); |
720 | 14 | } |
721 | | |
722 | | SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, |
723 | 7 | SelectionDAG &DAG) const { |
724 | 7 | SDLoc DL(Op); |
725 | 7 | SDValue Vector = Op.getOperand(0); |
726 | 7 | SDValue Value = Op.getOperand(1); |
727 | 7 | SDValue Index = Op.getOperand(2); |
728 | 7 | |
729 | 7 | if (isa<ConstantSDNode>(Index) || |
730 | 7 | Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR3 ) |
731 | 6 | return Op; |
732 | 1 | |
733 | 1 | Vector = vectorToVerticalVector(DAG, Vector); |
734 | 1 | SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(), |
735 | 1 | Vector, Value, Index); |
736 | 1 | return vectorToVerticalVector(DAG, Insert); |
737 | 1 | } |
738 | | |
739 | | SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, |
740 | | SDValue Op, |
741 | 57 | SelectionDAG &DAG) const { |
742 | 57 | GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op); |
743 | 57 | if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) |
744 | 42 | return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG); |
745 | 15 | |
746 | 15 | const DataLayout &DL = DAG.getDataLayout(); |
747 | 15 | const GlobalValue *GV = GSD->getGlobal(); |
748 | 15 | MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS); |
749 | 15 | |
750 | 15 | SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT); |
751 | 15 | return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA); |
752 | 15 | } |
753 | | |
754 | 17 | SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const { |
755 | 17 | // On hw >= R700, COS/SIN input must be between -1. and 1. |
756 | 17 | // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5) |
757 | 17 | EVT VT = Op.getValueType(); |
758 | 17 | SDValue Arg = Op.getOperand(0); |
759 | 17 | SDLoc DL(Op); |
760 | 17 | |
761 | 17 | // TODO: Should this propagate fast-math-flags? |
762 | 17 | SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT, |
763 | 17 | DAG.getNode(ISD::FADD, DL, VT, |
764 | 17 | DAG.getNode(ISD::FMUL, DL, VT, Arg, |
765 | 17 | DAG.getConstantFP(0.15915494309, DL, MVT::f32)), |
766 | 17 | DAG.getConstantFP(0.5, DL, MVT::f32))); |
767 | 17 | unsigned TrigNode; |
768 | 17 | switch (Op.getOpcode()) { |
769 | 17 | case ISD::FCOS: |
770 | 6 | TrigNode = AMDGPUISD::COS_HW; |
771 | 6 | break; |
772 | 17 | case ISD::FSIN: |
773 | 11 | TrigNode = AMDGPUISD::SIN_HW; |
774 | 11 | break; |
775 | 17 | default: |
776 | 0 | llvm_unreachable("Wrong trig opcode"); |
777 | 17 | } |
778 | 17 | SDValue TrigVal = DAG.getNode(TrigNode, DL, VT, |
779 | 17 | DAG.getNode(ISD::FADD, DL, VT, FractPart, |
780 | 17 | DAG.getConstantFP(-0.5, DL, MVT::f32))); |
781 | 17 | if (Gen >= AMDGPUSubtarget::R700) |
782 | 17 | return TrigVal; |
783 | 0 | // On R600 hw, COS/SIN input must be between -Pi and Pi. |
784 | 0 | return DAG.getNode(ISD::FMUL, DL, VT, TrigVal, |
785 | 0 | DAG.getConstantFP(3.14159265359, DL, MVT::f32)); |
786 | 0 | } |
787 | | |
788 | 50 | SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const { |
789 | 50 | SDLoc DL(Op); |
790 | 50 | EVT VT = Op.getValueType(); |
791 | 50 | |
792 | 50 | SDValue Lo = Op.getOperand(0); |
793 | 50 | SDValue Hi = Op.getOperand(1); |
794 | 50 | SDValue Shift = Op.getOperand(2); |
795 | 50 | SDValue Zero = DAG.getConstant(0, DL, VT); |
796 | 50 | SDValue One = DAG.getConstant(1, DL, VT); |
797 | 50 | |
798 | 50 | SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT); |
799 | 50 | SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT); |
800 | 50 | SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width); |
801 | 50 | SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift); |
802 | 50 | |
803 | 50 | // The dance around Width1 is necessary for 0 special case. |
804 | 50 | // Without it the CompShift might be 32, producing incorrect results in |
805 | 50 | // Overflow. So we do the shift in two steps, the alternative is to |
806 | 50 | // add a conditional to filter the special case. |
807 | 50 | |
808 | 50 | SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift); |
809 | 50 | Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One); |
810 | 50 | |
811 | 50 | SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift); |
812 | 50 | HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow); |
813 | 50 | SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift); |
814 | 50 | |
815 | 50 | SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift); |
816 | 50 | SDValue LoBig = Zero; |
817 | 50 | |
818 | 50 | Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT); |
819 | 50 | Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT); |
820 | 50 | |
821 | 50 | return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi); |
822 | 50 | } |
823 | | |
824 | 28 | SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const { |
825 | 28 | SDLoc DL(Op); |
826 | 28 | EVT VT = Op.getValueType(); |
827 | 28 | |
828 | 28 | SDValue Lo = Op.getOperand(0); |
829 | 28 | SDValue Hi = Op.getOperand(1); |
830 | 28 | SDValue Shift = Op.getOperand(2); |
831 | 28 | SDValue Zero = DAG.getConstant(0, DL, VT); |
832 | 28 | SDValue One = DAG.getConstant(1, DL, VT); |
833 | 28 | |
834 | 28 | const bool SRA = Op.getOpcode() == ISD::SRA_PARTS; |
835 | 28 | |
836 | 28 | SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT); |
837 | 28 | SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT); |
838 | 28 | SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width); |
839 | 28 | SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift); |
840 | 28 | |
841 | 28 | // The dance around Width1 is necessary for 0 special case. |
842 | 28 | // Without it the CompShift might be 32, producing incorrect results in |
843 | 28 | // Overflow. So we do the shift in two steps, the alternative is to |
844 | 28 | // add a conditional to filter the special case. |
845 | 28 | |
846 | 28 | SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift); |
847 | 28 | Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One); |
848 | 28 | |
849 | 28 | SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA7 : ISD::SRL21 , DL, VT, Hi, Shift); |
850 | 28 | SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift); |
851 | 28 | LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow); |
852 | 28 | |
853 | 28 | SDValue LoBig = DAG.getNode(SRA ? ISD::SRA7 : ISD::SRL21 , DL, VT, Hi, BigShift); |
854 | 28 | SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1)7 : Zero21 ; |
855 | 28 | |
856 | 28 | Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT); |
857 | 28 | Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT); |
858 | 28 | |
859 | 28 | return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi); |
860 | 28 | } |
861 | | |
862 | | SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG, |
863 | 693 | unsigned mainop, unsigned ovf) const { |
864 | 693 | SDLoc DL(Op); |
865 | 693 | EVT VT = Op.getValueType(); |
866 | 693 | |
867 | 693 | SDValue Lo = Op.getOperand(0); |
868 | 693 | SDValue Hi = Op.getOperand(1); |
869 | 693 | |
870 | 693 | SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi); |
871 | 693 | // Extend sign. |
872 | 693 | OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF, |
873 | 693 | DAG.getValueType(MVT::i1)); |
874 | 693 | |
875 | 693 | SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi); |
876 | 693 | |
877 | 693 | return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF); |
878 | 693 | } |
879 | | |
880 | 2 | SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const { |
881 | 2 | SDLoc DL(Op); |
882 | 2 | return DAG.getNode( |
883 | 2 | ISD::SETCC, |
884 | 2 | DL, |
885 | 2 | MVT::i1, |
886 | 2 | Op, DAG.getConstantFP(1.0f, DL, MVT::f32), |
887 | 2 | DAG.getCondCode(ISD::SETEQ)); |
888 | 2 | } |
889 | | |
890 | 2 | SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const { |
891 | 2 | SDLoc DL(Op); |
892 | 2 | return DAG.getNode( |
893 | 2 | ISD::SETCC, |
894 | 2 | DL, |
895 | 2 | MVT::i1, |
896 | 2 | Op, DAG.getConstantFP(-1.0f, DL, MVT::f32), |
897 | 2 | DAG.getCondCode(ISD::SETEQ)); |
898 | 2 | } |
899 | | |
900 | | SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, |
901 | | const SDLoc &DL, |
902 | 89 | unsigned DwordOffset) const { |
903 | 89 | unsigned ByteOffset = DwordOffset * 4; |
904 | 89 | PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), |
905 | 89 | AMDGPUAS::PARAM_I_ADDRESS); |
906 | 89 | |
907 | 89 | // We shouldn't be using an offset wider than 16-bits for implicit parameters. |
908 | 89 | assert(isInt<16>(ByteOffset)); |
909 | 89 | |
910 | 89 | return DAG.getLoad(VT, DL, DAG.getEntryNode(), |
911 | 89 | DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR |
912 | 89 | MachinePointerInfo(ConstantPointerNull::get(PtrType))); |
913 | 89 | } |
914 | | |
915 | 19.1k | bool R600TargetLowering::isZero(SDValue Op) const { |
916 | 19.1k | if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) { |
917 | 8.68k | return Cst->isNullValue(); |
918 | 10.4k | } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){ |
919 | 222 | return CstFP->isZero(); |
920 | 10.2k | } else { |
921 | 10.2k | return false; |
922 | 10.2k | } |
923 | 19.1k | } |
924 | | |
925 | 29.4k | bool R600TargetLowering::isHWTrueValue(SDValue Op) const { |
926 | 29.4k | if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { |
927 | 470 | return CFP->isExactlyValue(1.0); |
928 | 470 | } |
929 | 29.0k | return isAllOnesConstant(Op); |
930 | 29.0k | } |
931 | | |
932 | 5.31k | bool R600TargetLowering::isHWFalseValue(SDValue Op) const { |
933 | 5.31k | if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { |
934 | 155 | return CFP->getValueAPF().isZero(); |
935 | 155 | } |
936 | 5.15k | return isNullConstant(Op); |
937 | 5.15k | } |
938 | | |
939 | 14.7k | SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { |
940 | 14.7k | SDLoc DL(Op); |
941 | 14.7k | EVT VT = Op.getValueType(); |
942 | 14.7k | |
943 | 14.7k | SDValue LHS = Op.getOperand(0); |
944 | 14.7k | SDValue RHS = Op.getOperand(1); |
945 | 14.7k | SDValue True = Op.getOperand(2); |
946 | 14.7k | SDValue False = Op.getOperand(3); |
947 | 14.7k | SDValue CC = Op.getOperand(4); |
948 | 14.7k | SDValue Temp; |
949 | 14.7k | |
950 | 14.7k | if (VT == MVT::f32) { |
951 | 383 | DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr); |
952 | 383 | SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI); |
953 | 383 | if (MinMax) |
954 | 21 | return MinMax; |
955 | 14.7k | } |
956 | 14.7k | |
957 | 14.7k | // LHS and RHS are guaranteed to be the same value type |
958 | 14.7k | EVT CompareVT = LHS.getValueType(); |
959 | 14.7k | |
960 | 14.7k | // Check if we can lower this to a native operation. |
961 | 14.7k | |
962 | 14.7k | // Try to lower to a SET* instruction: |
963 | 14.7k | // |
964 | 14.7k | // SET* can match the following patterns: |
965 | 14.7k | // |
966 | 14.7k | // select_cc f32, f32, -1, 0, cc_supported |
967 | 14.7k | // select_cc f32, f32, 1.0f, 0.0f, cc_supported |
968 | 14.7k | // select_cc i32, i32, -1, 0, cc_supported |
969 | 14.7k | // |
970 | 14.7k | |
971 | 14.7k | // Move hardware True/False values to the correct operand. |
972 | 14.7k | ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); |
973 | 14.7k | ISD::CondCode InverseCC = |
974 | 14.7k | ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32); |
975 | 14.7k | if (isHWTrueValue(False) && isHWFalseValue(True)95 ) { |
976 | 82 | if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) { |
977 | 11 | std::swap(False, True); |
978 | 11 | CC = DAG.getCondCode(InverseCC); |
979 | 71 | } else { |
980 | 71 | ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC); |
981 | 71 | if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) { |
982 | 60 | std::swap(False, True); |
983 | 60 | std::swap(LHS, RHS); |
984 | 60 | CC = DAG.getCondCode(SwapInvCC); |
985 | 60 | } |
986 | 71 | } |
987 | 82 | } |
988 | 14.7k | |
989 | 14.7k | if (isHWTrueValue(True) && isHWFalseValue(False)5.21k && |
990 | 14.7k | (5.17k CompareVT == VT5.17k || VT == MVT::i32173 )) { |
991 | 5.17k | // This can be matched by a SET* instruction. |
992 | 5.17k | return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); |
993 | 5.17k | } |
994 | 9.57k | |
995 | 9.57k | // Try to lower to a CND* instruction: |
996 | 9.57k | // |
997 | 9.57k | // CND* can match the following patterns: |
998 | 9.57k | // |
999 | 9.57k | // select_cc f32, 0.0, f32, f32, cc_supported |
1000 | 9.57k | // select_cc f32, 0.0, i32, i32, cc_supported |
1001 | 9.57k | // select_cc i32, 0, f32, f32, cc_supported |
1002 | 9.57k | // select_cc i32, 0, i32, i32, cc_supported |
1003 | 9.57k | // |
1004 | 9.57k | |
1005 | 9.57k | // Try to move the zero value to the RHS |
1006 | 9.57k | if (isZero(LHS)) { |
1007 | 3 | ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); |
1008 | 3 | // Try swapping the operands |
1009 | 3 | ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode); |
1010 | 3 | if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) { |
1011 | 0 | std::swap(LHS, RHS); |
1012 | 0 | CC = DAG.getCondCode(CCSwapped); |
1013 | 3 | } else { |
1014 | 3 | // Try inverting the conditon and then swapping the operands |
1015 | 3 | ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger()); |
1016 | 3 | CCSwapped = ISD::getSetCCSwappedOperands(CCInv); |
1017 | 3 | if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) { |
1018 | 0 | std::swap(True, False); |
1019 | 0 | std::swap(LHS, RHS); |
1020 | 0 | CC = DAG.getCondCode(CCSwapped); |
1021 | 0 | } |
1022 | 3 | } |
1023 | 3 | } |
1024 | 9.57k | if (isZero(RHS)) { |
1025 | 8.66k | SDValue Cond = LHS; |
1026 | 8.66k | SDValue Zero = RHS; |
1027 | 8.66k | ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); |
1028 | 8.66k | if (CompareVT != VT) { |
1029 | 55 | // Bitcast True / False to the correct types. This will end up being |
1030 | 55 | // a nop, but it allows us to define only a single pattern in the |
1031 | 55 | // .TD files for each CND* instruction rather than having to have |
1032 | 55 | // one pattern for integer True/False and one for fp True/False |
1033 | 55 | True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True); |
1034 | 55 | False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False); |
1035 | 55 | } |
1036 | 8.66k | |
1037 | 8.66k | switch (CCOpcode) { |
1038 | 8.66k | case ISD::SETONE: |
1039 | 2.26k | case ISD::SETUNE: |
1040 | 2.26k | case ISD::SETNE: |
1041 | 2.26k | CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32); |
1042 | 2.26k | Temp = True; |
1043 | 2.26k | True = False; |
1044 | 2.26k | False = Temp; |
1045 | 2.26k | break; |
1046 | 6.39k | default: |
1047 | 6.39k | break; |
1048 | 8.66k | } |
1049 | 8.66k | SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, |
1050 | 8.66k | Cond, Zero, |
1051 | 8.66k | True, False, |
1052 | 8.66k | DAG.getCondCode(CCOpcode)); |
1053 | 8.66k | return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode); |
1054 | 8.66k | } |
1055 | 903 | |
1056 | 903 | // If we make it this for it means we have no native instructions to handle |
1057 | 903 | // this SELECT_CC, so we must lower it. |
1058 | 903 | SDValue HWTrue, HWFalse; |
1059 | 903 | |
1060 | 903 | if (CompareVT == MVT::f32) { |
1061 | 60 | HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT); |
1062 | 60 | HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT); |
1063 | 843 | } else if (CompareVT == MVT::i32) { |
1064 | 843 | HWTrue = DAG.getConstant(-1, DL, CompareVT); |
1065 | 843 | HWFalse = DAG.getConstant(0, DL, CompareVT); |
1066 | 843 | } |
1067 | 0 | else { |
1068 | 0 | llvm_unreachable("Unhandled value type in LowerSELECT_CC"); |
1069 | 0 | } |
1070 | 903 | |
1071 | 903 | // Lower this unsupported SELECT_CC into a combination of two supported |
1072 | 903 | // SELECT_CC operations. |
1073 | 903 | SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC); |
1074 | 903 | |
1075 | 903 | return DAG.getNode(ISD::SELECT_CC, DL, VT, |
1076 | 903 | Cond, HWFalse, |
1077 | 903 | True, False, |
1078 | 903 | DAG.getCondCode(ISD::SETNE)); |
1079 | 903 | } |
1080 | | |
1081 | | /// LLVM generates byte-addressed pointers. For indirect addressing, we need to |
1082 | | /// convert these pointers to a register index. Each register holds |
1083 | | /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the |
1084 | | /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used |
1085 | | /// for indirect addressing. |
1086 | | SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr, |
1087 | | unsigned StackWidth, |
1088 | 0 | SelectionDAG &DAG) const { |
1089 | 0 | unsigned SRLPad; |
1090 | 0 | switch(StackWidth) { |
1091 | 0 | case 1: |
1092 | 0 | SRLPad = 2; |
1093 | 0 | break; |
1094 | 0 | case 2: |
1095 | 0 | SRLPad = 3; |
1096 | 0 | break; |
1097 | 0 | case 4: |
1098 | 0 | SRLPad = 4; |
1099 | 0 | break; |
1100 | 0 | default: llvm_unreachable("Invalid stack width"); |
1101 | 0 | } |
1102 | 0 | |
1103 | 0 | SDLoc DL(Ptr); |
1104 | 0 | return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr, |
1105 | 0 | DAG.getConstant(SRLPad, DL, MVT::i32)); |
1106 | 0 | } |
1107 | | |
1108 | | void R600TargetLowering::getStackAddress(unsigned StackWidth, |
1109 | | unsigned ElemIdx, |
1110 | | unsigned &Channel, |
1111 | 0 | unsigned &PtrIncr) const { |
1112 | 0 | switch (StackWidth) { |
1113 | 0 | default: |
1114 | 0 | case 1: |
1115 | 0 | Channel = 0; |
1116 | 0 | if (ElemIdx > 0) { |
1117 | 0 | PtrIncr = 1; |
1118 | 0 | } else { |
1119 | 0 | PtrIncr = 0; |
1120 | 0 | } |
1121 | 0 | break; |
1122 | 0 | case 2: |
1123 | 0 | Channel = ElemIdx % 2; |
1124 | 0 | if (ElemIdx == 2) { |
1125 | 0 | PtrIncr = 1; |
1126 | 0 | } else { |
1127 | 0 | PtrIncr = 0; |
1128 | 0 | } |
1129 | 0 | break; |
1130 | 0 | case 4: |
1131 | 0 | Channel = ElemIdx; |
1132 | 0 | PtrIncr = 0; |
1133 | 0 | break; |
1134 | 0 | } |
1135 | 0 | } |
1136 | | |
1137 | | SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store, |
1138 | 1.32k | SelectionDAG &DAG) const { |
1139 | 1.32k | SDLoc DL(Store); |
1140 | 1.32k | //TODO: Who creates the i8 stores? |
1141 | 1.32k | assert(Store->isTruncatingStore() |
1142 | 1.32k | || Store->getValue().getValueType() == MVT::i8); |
1143 | 1.32k | assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS); |
1144 | 1.32k | |
1145 | 1.32k | SDValue Mask; |
1146 | 1.32k | if (Store->getMemoryVT() == MVT::i8) { |
1147 | 599 | assert(Store->getAlignment() >= 1); |
1148 | 599 | Mask = DAG.getConstant(0xff, DL, MVT::i32); |
1149 | 722 | } else if (Store->getMemoryVT() == MVT::i16) { |
1150 | 722 | assert(Store->getAlignment() >= 2); |
1151 | 722 | Mask = DAG.getConstant(0xffff, DL, MVT::i32); |
1152 | 722 | } else { |
1153 | 0 | llvm_unreachable("Unsupported private trunc store"); |
1154 | 0 | } |
1155 | 1.32k | |
1156 | 1.32k | SDValue OldChain = Store->getChain(); |
1157 | 1.32k | bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN); |
1158 | 1.32k | // Skip dummy |
1159 | 1.32k | SDValue Chain = VectorTrunc ? OldChain->getOperand(0)876 : OldChain445 ; |
1160 | 1.32k | SDValue BasePtr = Store->getBasePtr(); |
1161 | 1.32k | SDValue Offset = Store->getOffset(); |
1162 | 1.32k | EVT MemVT = Store->getMemoryVT(); |
1163 | 1.32k | |
1164 | 1.32k | SDValue LoadPtr = BasePtr; |
1165 | 1.32k | if (!Offset.isUndef()) { |
1166 | 0 | LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset); |
1167 | 0 | } |
1168 | 1.32k | |
1169 | 1.32k | // Get dword location |
1170 | 1.32k | // TODO: this should be eliminated by the future SHR ptr, 2 |
1171 | 1.32k | SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr, |
1172 | 1.32k | DAG.getConstant(0xfffffffc, DL, MVT::i32)); |
1173 | 1.32k | |
1174 | 1.32k | // Load dword |
1175 | 1.32k | // TODO: can we be smarter about machine pointer info? |
1176 | 1.32k | MachinePointerInfo PtrInfo(UndefValue::get( |
1177 | 1.32k | Type::getInt32PtrTy(*DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS))); |
1178 | 1.32k | SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo); |
1179 | 1.32k | |
1180 | 1.32k | Chain = Dst.getValue(1); |
1181 | 1.32k | |
1182 | 1.32k | // Get offset in dword |
1183 | 1.32k | SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr, |
1184 | 1.32k | DAG.getConstant(0x3, DL, MVT::i32)); |
1185 | 1.32k | |
1186 | 1.32k | // Convert byte offset to bit shift |
1187 | 1.32k | SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx, |
1188 | 1.32k | DAG.getConstant(3, DL, MVT::i32)); |
1189 | 1.32k | |
1190 | 1.32k | // TODO: Contrary to the name of the functiom, |
1191 | 1.32k | // it also handles sub i32 non-truncating stores (like i1) |
1192 | 1.32k | SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32, |
1193 | 1.32k | Store->getValue()); |
1194 | 1.32k | |
1195 | 1.32k | // Mask the value to the right type |
1196 | 1.32k | SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT); |
1197 | 1.32k | |
1198 | 1.32k | // Shift the value in place |
1199 | 1.32k | SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32, |
1200 | 1.32k | MaskedValue, ShiftAmt); |
1201 | 1.32k | |
1202 | 1.32k | // Shift the mask in place |
1203 | 1.32k | SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt); |
1204 | 1.32k | |
1205 | 1.32k | // Invert the mask. NOTE: if we had native ROL instructions we could |
1206 | 1.32k | // use inverted mask |
1207 | 1.32k | DstMask = DAG.getNOT(DL, DstMask, MVT::i32); |
1208 | 1.32k | |
1209 | 1.32k | // Cleanup the target bits |
1210 | 1.32k | Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask); |
1211 | 1.32k | |
1212 | 1.32k | // Add the new bits |
1213 | 1.32k | SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue); |
1214 | 1.32k | |
1215 | 1.32k | // Store dword |
1216 | 1.32k | // TODO: Can we be smarter about MachinePointerInfo? |
1217 | 1.32k | SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo); |
1218 | 1.32k | |
1219 | 1.32k | // If we are part of expanded vector, make our neighbors depend on this store |
1220 | 1.32k | if (VectorTrunc) { |
1221 | 876 | // Make all other vector elements depend on this store |
1222 | 876 | Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore); |
1223 | 876 | DAG.ReplaceAllUsesOfValueWith(OldChain, Chain); |
1224 | 876 | } |
1225 | 1.32k | return NewStore; |
1226 | 1.32k | } |
1227 | | |
1228 | 35.2k | SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { |
1229 | 35.2k | StoreSDNode *StoreNode = cast<StoreSDNode>(Op); |
1230 | 35.2k | unsigned AS = StoreNode->getAddressSpace(); |
1231 | 35.2k | |
1232 | 35.2k | SDValue Chain = StoreNode->getChain(); |
1233 | 35.2k | SDValue Ptr = StoreNode->getBasePtr(); |
1234 | 35.2k | SDValue Value = StoreNode->getValue(); |
1235 | 35.2k | |
1236 | 35.2k | EVT VT = Value.getValueType(); |
1237 | 35.2k | EVT MemVT = StoreNode->getMemoryVT(); |
1238 | 35.2k | EVT PtrVT = Ptr.getValueType(); |
1239 | 35.2k | |
1240 | 35.2k | SDLoc DL(Op); |
1241 | 35.2k | |
1242 | 35.2k | const bool TruncatingStore = StoreNode->isTruncatingStore(); |
1243 | 35.2k | |
1244 | 35.2k | // Neither LOCAL nor PRIVATE can do vectors at the moment |
1245 | 35.2k | if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS20.5k || |
1246 | 35.2k | TruncatingStore9.66k ) && |
1247 | 35.2k | VT.isVector()25.8k ) { |
1248 | 937 | if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore292 ) { |
1249 | 245 | // Add an extra level of chain to isolate this vector |
1250 | 245 | SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain); |
1251 | 245 | // TODO: can the chain be replaced without creating a new store? |
1252 | 245 | SDValue NewStore = DAG.getTruncStore( |
1253 | 245 | NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(), |
1254 | 245 | MemVT, StoreNode->getAlignment(), |
1255 | 245 | StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo()); |
1256 | 245 | StoreNode = cast<StoreSDNode>(NewStore); |
1257 | 245 | } |
1258 | 937 | |
1259 | 937 | return scalarizeVectorStore(StoreNode, DAG); |
1260 | 937 | } |
1261 | 34.3k | |
1262 | 34.3k | unsigned Align = StoreNode->getAlignment(); |
1263 | 34.3k | if (Align < MemVT.getStoreSize() && |
1264 | 34.3k | !allowsMisalignedMemoryAccesses( |
1265 | 277 | MemVT, AS, Align, StoreNode->getMemOperand()->getFlags(), nullptr)) { |
1266 | 24 | return expandUnalignedStore(StoreNode, DAG); |
1267 | 24 | } |
1268 | 34.3k | |
1269 | 34.3k | SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr, |
1270 | 34.3k | DAG.getConstant(2, DL, PtrVT)); |
1271 | 34.3k | |
1272 | 34.3k | if (AS == AMDGPUAS::GLOBAL_ADDRESS) { |
1273 | 9.65k | // It is beneficial to create MSKOR here instead of combiner to avoid |
1274 | 9.65k | // artificial dependencies introduced by RMW |
1275 | 9.65k | if (TruncatingStore) { |
1276 | 220 | assert(VT.bitsLE(MVT::i32)); |
1277 | 220 | SDValue MaskConstant; |
1278 | 220 | if (MemVT == MVT::i8) { |
1279 | 105 | MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32); |
1280 | 115 | } else { |
1281 | 115 | assert(MemVT == MVT::i16); |
1282 | 115 | assert(StoreNode->getAlignment() >= 2); |
1283 | 115 | MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32); |
1284 | 115 | } |
1285 | 220 | |
1286 | 220 | SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr, |
1287 | 220 | DAG.getConstant(0x00000003, DL, PtrVT)); |
1288 | 220 | SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex, |
1289 | 220 | DAG.getConstant(3, DL, VT)); |
1290 | 220 | |
1291 | 220 | // Put the mask in correct place |
1292 | 220 | SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift); |
1293 | 220 | |
1294 | 220 | // Put the value bits in correct place |
1295 | 220 | SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant); |
1296 | 220 | SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift); |
1297 | 220 | |
1298 | 220 | // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32 |
1299 | 220 | // vector instead. |
1300 | 220 | SDValue Src[4] = { |
1301 | 220 | ShiftedValue, |
1302 | 220 | DAG.getConstant(0, DL, MVT::i32), |
1303 | 220 | DAG.getConstant(0, DL, MVT::i32), |
1304 | 220 | Mask |
1305 | 220 | }; |
1306 | 220 | SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src); |
1307 | 220 | SDValue Args[3] = { Chain, Input, DWordAddr }; |
1308 | 220 | return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL, |
1309 | 220 | Op->getVTList(), Args, MemVT, |
1310 | 220 | StoreNode->getMemOperand()); |
1311 | 9.43k | } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)3.02k ) { |
1312 | 2.97k | // Convert pointer from byte address to dword address. |
1313 | 2.97k | Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr); |
1314 | 2.97k | |
1315 | 2.97k | if (StoreNode->isIndexed()) { |
1316 | 0 | llvm_unreachable("Indexed stores not supported yet"); |
1317 | 2.97k | } else { |
1318 | 2.97k | Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand()); |
1319 | 2.97k | } |
1320 | 2.97k | return Chain; |
1321 | 31.1k | } |
1322 | 9.65k | } |
1323 | 31.1k | |
1324 | 31.1k | // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes |
1325 | 31.1k | if (AS != AMDGPUAS::PRIVATE_ADDRESS) |
1326 | 20.5k | return SDValue(); |
1327 | 10.5k | |
1328 | 10.5k | if (MemVT.bitsLT(MVT::i32)) |
1329 | 1.32k | return lowerPrivateTruncStore(StoreNode, DAG); |
1330 | 9.25k | |
1331 | 9.25k | // Standard i32+ store, tag it with DWORDADDR to note that the address |
1332 | 9.25k | // has been shifted |
1333 | 9.25k | if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) { |
1334 | 2.75k | Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr); |
1335 | 2.75k | return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand()); |
1336 | 2.75k | } |
1337 | 6.49k | |
1338 | 6.49k | // Tagged i32+ stores will be matched by patterns |
1339 | 6.49k | return SDValue(); |
1340 | 6.49k | } |
1341 | | |
1342 | | // return (512 + (kc_bank << 12) |
1343 | | static int |
1344 | 36.4k | ConstantAddressBlock(unsigned AddressSpace) { |
1345 | 36.4k | switch (AddressSpace) { |
1346 | 36.4k | case AMDGPUAS::CONSTANT_BUFFER_0: |
1347 | 5.49k | return 512; |
1348 | 36.4k | case AMDGPUAS::CONSTANT_BUFFER_1: |
1349 | 0 | return 512 + 4096; |
1350 | 36.4k | case AMDGPUAS::CONSTANT_BUFFER_2: |
1351 | 0 | return 512 + 4096 * 2; |
1352 | 36.4k | case AMDGPUAS::CONSTANT_BUFFER_3: |
1353 | 0 | return 512 + 4096 * 3; |
1354 | 36.4k | case AMDGPUAS::CONSTANT_BUFFER_4: |
1355 | 0 | return 512 + 4096 * 4; |
1356 | 36.4k | case AMDGPUAS::CONSTANT_BUFFER_5: |
1357 | 0 | return 512 + 4096 * 5; |
1358 | 36.4k | case AMDGPUAS::CONSTANT_BUFFER_6: |
1359 | 0 | return 512 + 4096 * 6; |
1360 | 36.4k | case AMDGPUAS::CONSTANT_BUFFER_7: |
1361 | 0 | return 512 + 4096 * 7; |
1362 | 36.4k | case AMDGPUAS::CONSTANT_BUFFER_8: |
1363 | 0 | return 512 + 4096 * 8; |
1364 | 36.4k | case AMDGPUAS::CONSTANT_BUFFER_9: |
1365 | 0 | return 512 + 4096 * 9; |
1366 | 36.4k | case AMDGPUAS::CONSTANT_BUFFER_10: |
1367 | 0 | return 512 + 4096 * 10; |
1368 | 36.4k | case AMDGPUAS::CONSTANT_BUFFER_11: |
1369 | 0 | return 512 + 4096 * 11; |
1370 | 36.4k | case AMDGPUAS::CONSTANT_BUFFER_12: |
1371 | 0 | return 512 + 4096 * 12; |
1372 | 36.4k | case AMDGPUAS::CONSTANT_BUFFER_13: |
1373 | 0 | return 512 + 4096 * 13; |
1374 | 36.4k | case AMDGPUAS::CONSTANT_BUFFER_14: |
1375 | 0 | return 512 + 4096 * 14; |
1376 | 36.4k | case AMDGPUAS::CONSTANT_BUFFER_15: |
1377 | 0 | return 512 + 4096 * 15; |
1378 | 36.4k | default: |
1379 | 30.9k | return -1; |
1380 | 36.4k | } |
1381 | 36.4k | } |
1382 | | |
1383 | | SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op, |
1384 | 4.02k | SelectionDAG &DAG) const { |
1385 | 4.02k | SDLoc DL(Op); |
1386 | 4.02k | LoadSDNode *Load = cast<LoadSDNode>(Op); |
1387 | 4.02k | ISD::LoadExtType ExtType = Load->getExtensionType(); |
1388 | 4.02k | EVT MemVT = Load->getMemoryVT(); |
1389 | 4.02k | assert(Load->getAlignment() >= MemVT.getStoreSize()); |
1390 | 4.02k | |
1391 | 4.02k | SDValue BasePtr = Load->getBasePtr(); |
1392 | 4.02k | SDValue Chain = Load->getChain(); |
1393 | 4.02k | SDValue Offset = Load->getOffset(); |
1394 | 4.02k | |
1395 | 4.02k | SDValue LoadPtr = BasePtr; |
1396 | 4.02k | if (!Offset.isUndef()) { |
1397 | 0 | LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset); |
1398 | 0 | } |
1399 | 4.02k | |
1400 | 4.02k | // Get dword location |
1401 | 4.02k | // NOTE: this should be eliminated by the future SHR ptr, 2 |
1402 | 4.02k | SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr, |
1403 | 4.02k | DAG.getConstant(0xfffffffc, DL, MVT::i32)); |
1404 | 4.02k | |
1405 | 4.02k | // Load dword |
1406 | 4.02k | // TODO: can we be smarter about machine pointer info? |
1407 | 4.02k | MachinePointerInfo PtrInfo(UndefValue::get( |
1408 | 4.02k | Type::getInt32PtrTy(*DAG.getContext(), AMDGPUAS::PRIVATE_ADDRESS))); |
1409 | 4.02k | SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo); |
1410 | 4.02k | |
1411 | 4.02k | // Get offset within the register. |
1412 | 4.02k | SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, |
1413 | 4.02k | LoadPtr, DAG.getConstant(0x3, DL, MVT::i32)); |
1414 | 4.02k | |
1415 | 4.02k | // Bit offset of target byte (byteIdx * 8). |
1416 | 4.02k | SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx, |
1417 | 4.02k | DAG.getConstant(3, DL, MVT::i32)); |
1418 | 4.02k | |
1419 | 4.02k | // Shift to the right. |
1420 | 4.02k | SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt); |
1421 | 4.02k | |
1422 | 4.02k | // Eliminate the upper bits by setting them to ... |
1423 | 4.02k | EVT MemEltVT = MemVT.getScalarType(); |
1424 | 4.02k | |
1425 | 4.02k | if (ExtType == ISD::SEXTLOAD) { // ... ones. |
1426 | 1.28k | SDValue MemEltVTNode = DAG.getValueType(MemEltVT); |
1427 | 1.28k | Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode); |
1428 | 2.74k | } else { // ... or zeros. |
1429 | 2.74k | Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT); |
1430 | 2.74k | } |
1431 | 4.02k | |
1432 | 4.02k | SDValue Ops[] = { |
1433 | 4.02k | Ret, |
1434 | 4.02k | Read.getValue(1) // This should be our output chain |
1435 | 4.02k | }; |
1436 | 4.02k | |
1437 | 4.02k | return DAG.getMergeValues(Ops, DL); |
1438 | 4.02k | } |
1439 | | |
1440 | 35.6k | SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { |
1441 | 35.6k | LoadSDNode *LoadNode = cast<LoadSDNode>(Op); |
1442 | 35.6k | unsigned AS = LoadNode->getAddressSpace(); |
1443 | 35.6k | EVT MemVT = LoadNode->getMemoryVT(); |
1444 | 35.6k | ISD::LoadExtType ExtType = LoadNode->getExtensionType(); |
1445 | 35.6k | |
1446 | 35.6k | if (AS == AMDGPUAS::PRIVATE_ADDRESS && |
1447 | 35.6k | ExtType != ISD::NON_EXTLOAD25.5k && MemVT.bitsLT(MVT::i32)4.02k ) { |
1448 | 4.02k | return lowerPrivateExtLoad(Op, DAG); |
1449 | 4.02k | } |
1450 | 31.5k | |
1451 | 31.5k | SDLoc DL(Op); |
1452 | 31.5k | EVT VT = Op.getValueType(); |
1453 | 31.5k | SDValue Chain = LoadNode->getChain(); |
1454 | 31.5k | SDValue Ptr = LoadNode->getBasePtr(); |
1455 | 31.5k | |
1456 | 31.5k | if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || |
1457 | 31.5k | LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS26.9k ) && |
1458 | 31.5k | VT.isVector()26.1k ) { |
1459 | 382 | return scalarizeVectorLoad(LoadNode, DAG); |
1460 | 382 | } |
1461 | 31.2k | |
1462 | 31.2k | // This is still used for explicit load from addrspace(8) |
1463 | 31.2k | int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace()); |
1464 | 31.2k | if (ConstantBlock > -1 && |
1465 | 31.2k | (276 (LoadNode->getExtensionType() == ISD::NON_EXTLOAD)276 || |
1466 | 276 | (LoadNode->getExtensionType() == ISD::ZEXTLOAD)0 )) { |
1467 | 276 | SDValue Result; |
1468 | 276 | if (isa<Constant>(LoadNode->getMemOperand()->getValue()) || |
1469 | 276 | isa<ConstantSDNode>(Ptr)0 ) { |
1470 | 276 | return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG); |
1471 | 276 | } else { |
1472 | 0 | //TODO: Does this even work? |
1473 | 0 | // non-constant ptr can't be folded, keeps it as a v4f32 load |
1474 | 0 | Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32, |
1475 | 0 | DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, |
1476 | 0 | DAG.getConstant(4, DL, MVT::i32)), |
1477 | 0 | DAG.getConstant(LoadNode->getAddressSpace() - |
1478 | 0 | AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32) |
1479 | 0 | ); |
1480 | 0 | } |
1481 | 276 | |
1482 | 276 | if (0 !VT.isVector()0 ) { |
1483 | 0 | Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result, |
1484 | 0 | DAG.getConstant(0, DL, MVT::i32)); |
1485 | 0 | } |
1486 | 0 |
|
1487 | 0 | SDValue MergedValues[2] = { |
1488 | 0 | Result, |
1489 | 0 | Chain |
1490 | 0 | }; |
1491 | 0 | return DAG.getMergeValues(MergedValues, DL); |
1492 | 30.9k | } |
1493 | 30.9k | |
1494 | 30.9k | // For most operations returning SDValue() will result in the node being |
1495 | 30.9k | // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we |
1496 | 30.9k | // need to manually expand loads that may be legal in some address spaces and |
1497 | 30.9k | // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for |
1498 | 30.9k | // compute shaders, since the data is sign extended when it is uploaded to the |
1499 | 30.9k | // buffer. However SEXT loads from other address spaces are not supported, so |
1500 | 30.9k | // we need to expand them here. |
1501 | 30.9k | if (LoadNode->getExtensionType() == ISD::SEXTLOAD) { |
1502 | 414 | EVT MemVT = LoadNode->getMemoryVT(); |
1503 | 414 | assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8)); |
1504 | 414 | SDValue NewLoad = DAG.getExtLoad( |
1505 | 414 | ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT, |
1506 | 414 | LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags()); |
1507 | 414 | SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad, |
1508 | 414 | DAG.getValueType(MemVT)); |
1509 | 414 | |
1510 | 414 | SDValue MergedValues[2] = { Res, Chain }; |
1511 | 414 | return DAG.getMergeValues(MergedValues, DL); |
1512 | 414 | } |
1513 | 30.5k | |
1514 | 30.5k | if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { |
1515 | 9.22k | return SDValue(); |
1516 | 9.22k | } |
1517 | 21.2k | |
1518 | 21.2k | // DWORDADDR ISD marks already shifted address |
1519 | 21.2k | if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) { |
1520 | 5.88k | assert(VT == MVT::i32); |
1521 | 5.88k | Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32)); |
1522 | 5.88k | Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr); |
1523 | 5.88k | return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand()); |
1524 | 5.88k | } |
1525 | 15.4k | return SDValue(); |
1526 | 15.4k | } |
1527 | | |
1528 | 88 | SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { |
1529 | 88 | SDValue Chain = Op.getOperand(0); |
1530 | 88 | SDValue Cond = Op.getOperand(1); |
1531 | 88 | SDValue Jump = Op.getOperand(2); |
1532 | 88 | |
1533 | 88 | return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(), |
1534 | 88 | Chain, Jump, Cond); |
1535 | 88 | } |
1536 | | |
1537 | | SDValue R600TargetLowering::lowerFrameIndex(SDValue Op, |
1538 | 1.59k | SelectionDAG &DAG) const { |
1539 | 1.59k | MachineFunction &MF = DAG.getMachineFunction(); |
1540 | 1.59k | const R600FrameLowering *TFL = Subtarget->getFrameLowering(); |
1541 | 1.59k | |
1542 | 1.59k | FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op); |
1543 | 1.59k | |
1544 | 1.59k | unsigned FrameIndex = FIN->getIndex(); |
1545 | 1.59k | unsigned IgnoredFrameReg; |
1546 | 1.59k | unsigned Offset = |
1547 | 1.59k | TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg); |
1548 | 1.59k | return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op), |
1549 | 1.59k | Op.getValueType()); |
1550 | 1.59k | } |
1551 | | |
1552 | | CCAssignFn *R600TargetLowering::CCAssignFnForCall(CallingConv::ID CC, |
1553 | 50 | bool IsVarArg) const { |
1554 | 50 | switch (CC) { |
1555 | 50 | case CallingConv::AMDGPU_KERNEL: |
1556 | 0 | case CallingConv::SPIR_KERNEL: |
1557 | 0 | case CallingConv::C: |
1558 | 0 | case CallingConv::Fast: |
1559 | 0 | case CallingConv::Cold: |
1560 | 0 | llvm_unreachable("kernels should not be handled here"); |
1561 | 50 | case CallingConv::AMDGPU_VS: |
1562 | 50 | case CallingConv::AMDGPU_GS: |
1563 | 50 | case CallingConv::AMDGPU_PS: |
1564 | 50 | case CallingConv::AMDGPU_CS: |
1565 | 50 | case CallingConv::AMDGPU_HS: |
1566 | 50 | case CallingConv::AMDGPU_ES: |
1567 | 50 | case CallingConv::AMDGPU_LS: |
1568 | 50 | return CC_R600; |
1569 | 50 | default: |
1570 | 0 | report_fatal_error("Unsupported calling convention."); |
1571 | 50 | } |
1572 | 50 | } |
1573 | | |
1574 | | /// XXX Only kernel functions are supported, so we can assume for now that |
1575 | | /// every function is a kernel function, but in the future we should use |
1576 | | /// separate calling conventions for kernel and non-kernel functions. |
1577 | | SDValue R600TargetLowering::LowerFormalArguments( |
1578 | | SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |
1579 | | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, |
1580 | 2.29k | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { |
1581 | 2.29k | SmallVector<CCValAssign, 16> ArgLocs; |
1582 | 2.29k | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, |
1583 | 2.29k | *DAG.getContext()); |
1584 | 2.29k | MachineFunction &MF = DAG.getMachineFunction(); |
1585 | 2.29k | SmallVector<ISD::InputArg, 8> LocalIns; |
1586 | 2.29k | |
1587 | 2.29k | if (AMDGPU::isShader(CallConv)) { |
1588 | 50 | CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg)); |
1589 | 2.24k | } else { |
1590 | 2.24k | analyzeFormalArgumentsCompute(CCInfo, Ins); |
1591 | 2.24k | } |
1592 | 2.29k | |
1593 | 8.58k | for (unsigned i = 0, e = Ins.size(); i < e; ++i6.29k ) { |
1594 | 6.29k | CCValAssign &VA = ArgLocs[i]; |
1595 | 6.29k | const ISD::InputArg &In = Ins[i]; |
1596 | 6.29k | EVT VT = In.VT; |
1597 | 6.29k | EVT MemVT = VA.getLocVT(); |
1598 | 6.29k | if (!VT.isVector() && MemVT.isVector()5.90k ) { |
1599 | 0 | // Get load source type if scalarized. |
1600 | 0 | MemVT = MemVT.getVectorElementType(); |
1601 | 0 | } |
1602 | 6.29k | |
1603 | 6.29k | if (AMDGPU::isShader(CallConv)) { |
1604 | 65 | unsigned Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass); |
1605 | 65 | SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT); |
1606 | 65 | InVals.push_back(Register); |
1607 | 65 | continue; |
1608 | 65 | } |
1609 | 6.23k | |
1610 | 6.23k | PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), |
1611 | 6.23k | AMDGPUAS::PARAM_I_ADDRESS); |
1612 | 6.23k | |
1613 | 6.23k | // i64 isn't a legal type, so the register type used ends up as i32, which |
1614 | 6.23k | // isn't expected here. It attempts to create this sextload, but it ends up |
1615 | 6.23k | // being invalid. Somehow this seems to work with i64 arguments, but breaks |
1616 | 6.23k | // for <1 x i64>. |
1617 | 6.23k | |
1618 | 6.23k | // The first 36 bytes of the input buffer contains information about |
1619 | 6.23k | // thread group and global sizes. |
1620 | 6.23k | ISD::LoadExtType Ext = ISD::NON_EXTLOAD; |
1621 | 6.23k | if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) { |
1622 | 314 | // FIXME: This should really check the extload type, but the handling of |
1623 | 314 | // extload vector parameters seems to be broken. |
1624 | 314 | |
1625 | 314 | // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD; |
1626 | 314 | Ext = ISD::SEXTLOAD; |
1627 | 314 | } |
1628 | 6.23k | |
1629 | 6.23k | // Compute the offset from the value. |
1630 | 6.23k | // XXX - I think PartOffset should give you this, but it seems to give the |
1631 | 6.23k | // size of the register which isn't useful. |
1632 | 6.23k | |
1633 | 6.23k | unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset(); |
1634 | 6.23k | unsigned PartOffset = VA.getLocMemOffset(); |
1635 | 6.23k | unsigned Alignment = MinAlign(VT.getStoreSize(), PartOffset); |
1636 | 6.23k | |
1637 | 6.23k | MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase); |
1638 | 6.23k | SDValue Arg = DAG.getLoad( |
1639 | 6.23k | ISD::UNINDEXED, Ext, VT, DL, Chain, |
1640 | 6.23k | DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32), |
1641 | 6.23k | PtrInfo, |
1642 | 6.23k | MemVT, Alignment, MachineMemOperand::MONonTemporal | |
1643 | 6.23k | MachineMemOperand::MODereferenceable | |
1644 | 6.23k | MachineMemOperand::MOInvariant); |
1645 | 6.23k | |
1646 | 6.23k | InVals.push_back(Arg); |
1647 | 6.23k | } |
1648 | 2.29k | return Chain; |
1649 | 2.29k | } |
1650 | | |
1651 | | EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, |
1652 | 33.0k | EVT VT) const { |
1653 | 33.0k | if (!VT.isVector()) |
1654 | 32.9k | return MVT::i32; |
1655 | 89 | return VT.changeVectorElementTypeToInteger(); |
1656 | 89 | } |
1657 | | |
1658 | | bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT, |
1659 | 98 | const SelectionDAG &DAG) const { |
1660 | 98 | // Local and Private addresses do not handle vectors. Limit to i32 |
1661 | 98 | if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS90 )) { |
1662 | 97 | return (MemVT.getSizeInBits() <= 32); |
1663 | 97 | } |
1664 | 1 | return true; |
1665 | 1 | } |
1666 | | |
1667 | | bool R600TargetLowering::allowsMisalignedMemoryAccesses( |
1668 | | EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags, |
1669 | 846 | bool *IsFast) const { |
1670 | 846 | if (IsFast) |
1671 | 569 | *IsFast = false; |
1672 | 846 | |
1673 | 846 | if (!VT.isSimple() || VT == MVT::Other844 ) |
1674 | 2 | return false; |
1675 | 844 | |
1676 | 844 | if (VT.bitsLT(MVT::i32)) |
1677 | 40 | return false; |
1678 | 804 | |
1679 | 804 | // TODO: This is a rough estimate. |
1680 | 804 | if (IsFast) |
1681 | 551 | *IsFast = true; |
1682 | 804 | |
1683 | 804 | return VT.bitsGT(MVT::i32) && Align % 4 == 0751 ; |
1684 | 804 | } |
1685 | | |
1686 | | static SDValue CompactSwizzlableVector( |
1687 | | SelectionDAG &DAG, SDValue VectorEntry, |
1688 | 387 | DenseMap<unsigned, unsigned> &RemapSwizzle) { |
1689 | 387 | assert(RemapSwizzle.empty()); |
1690 | 387 | |
1691 | 387 | SDLoc DL(VectorEntry); |
1692 | 387 | EVT EltTy = VectorEntry.getValueType().getVectorElementType(); |
1693 | 387 | |
1694 | 387 | SDValue NewBldVec[4]; |
1695 | 1.93k | for (unsigned i = 0; i < 4; i++1.54k ) |
1696 | 1.54k | NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry, |
1697 | 1.54k | DAG.getIntPtrConstant(i, DL)); |
1698 | 387 | |
1699 | 1.93k | for (unsigned i = 0; i < 4; i++1.54k ) { |
1700 | 1.54k | if (NewBldVec[i].isUndef()) |
1701 | 192 | // We mask write here to teach later passes that the ith element of this |
1702 | 192 | // vector is undef. Thus we can use it to reduce 128 bits reg usage, |
1703 | 192 | // break false dependencies and additionnaly make assembly easier to read. |
1704 | 192 | RemapSwizzle[i] = 7; // SEL_MASK_WRITE |
1705 | 1.54k | if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) { |
1706 | 28 | if (C->isZero()) { |
1707 | 27 | RemapSwizzle[i] = 4; // SEL_0 |
1708 | 27 | NewBldVec[i] = DAG.getUNDEF(MVT::f32); |
1709 | 27 | } else if (1 C->isExactlyValue(1.0)1 ) { |
1710 | 1 | RemapSwizzle[i] = 5; // SEL_1 |
1711 | 1 | NewBldVec[i] = DAG.getUNDEF(MVT::f32); |
1712 | 1 | } |
1713 | 28 | } |
1714 | 1.54k | |
1715 | 1.54k | if (NewBldVec[i].isUndef()) |
1716 | 220 | continue; |
1717 | 1.32k | // Fix spurious warning with gcc 7.3 -O3 |
1718 | 1.32k | // warning: array subscript is above array bounds [-Warray-bounds] |
1719 | 1.32k | // if (NewBldVec[i] == NewBldVec[j]) { |
1720 | 1.32k | // ~~~~~~~~~~~^ |
1721 | 1.32k | if (i >= 4) |
1722 | 0 | continue; |
1723 | 3.15k | for (unsigned j = 0; 1.32k j < i; j++1.82k ) { |
1724 | 1.83k | if (NewBldVec[i] == NewBldVec[j]) { |
1725 | 16 | NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType()); |
1726 | 16 | RemapSwizzle[i] = j; |
1727 | 16 | break; |
1728 | 16 | } |
1729 | 1.83k | } |
1730 | 1.32k | } |
1731 | 387 | |
1732 | 387 | return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry), |
1733 | 387 | NewBldVec); |
1734 | 387 | } |
1735 | | |
1736 | | static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, |
1737 | 387 | DenseMap<unsigned, unsigned> &RemapSwizzle) { |
1738 | 387 | assert(RemapSwizzle.empty()); |
1739 | 387 | |
1740 | 387 | SDLoc DL(VectorEntry); |
1741 | 387 | EVT EltTy = VectorEntry.getValueType().getVectorElementType(); |
1742 | 387 | |
1743 | 387 | SDValue NewBldVec[4]; |
1744 | 387 | bool isUnmovable[4] = {false, false, false, false}; |
1745 | 1.93k | for (unsigned i = 0; i < 4; i++1.54k ) |
1746 | 1.54k | NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry, |
1747 | 1.54k | DAG.getIntPtrConstant(i, DL)); |
1748 | 387 | |
1749 | 1.93k | for (unsigned i = 0; i < 4; i++1.54k ) { |
1750 | 1.54k | RemapSwizzle[i] = i; |
1751 | 1.54k | if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) { |
1752 | 47 | unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1)) |
1753 | 47 | ->getZExtValue(); |
1754 | 47 | if (i == Idx) |
1755 | 39 | isUnmovable[Idx] = true; |
1756 | 47 | } |
1757 | 1.54k | } |
1758 | 387 | |
1759 | 1.92k | for (unsigned i = 0; i < 4; i++1.53k ) { |
1760 | 1.54k | if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) { |
1761 | 46 | unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1)) |
1762 | 46 | ->getZExtValue(); |
1763 | 46 | if (isUnmovable[Idx]) |
1764 | 40 | continue; |
1765 | 6 | // Swap i and Idx |
1766 | 6 | std::swap(NewBldVec[Idx], NewBldVec[i]); |
1767 | 6 | std::swap(RemapSwizzle[i], RemapSwizzle[Idx]); |
1768 | 6 | break; |
1769 | 6 | } |
1770 | 1.54k | } |
1771 | 387 | |
1772 | 387 | return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry), |
1773 | 387 | NewBldVec); |
1774 | 387 | } |
1775 | | |
1776 | | SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4], |
1777 | | SelectionDAG &DAG, |
1778 | 387 | const SDLoc &DL) const { |
1779 | 387 | // Old -> New swizzle values |
1780 | 387 | DenseMap<unsigned, unsigned> SwizzleRemap; |
1781 | 387 | |
1782 | 387 | BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap); |
1783 | 1.93k | for (unsigned i = 0; i < 4; i++1.54k ) { |
1784 | 1.54k | unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue(); |
1785 | 1.54k | if (SwizzleRemap.find(Idx) != SwizzleRemap.end()) |
1786 | 119 | Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32); |
1787 | 1.54k | } |
1788 | 387 | |
1789 | 387 | SwizzleRemap.clear(); |
1790 | 387 | BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap); |
1791 | 1.93k | for (unsigned i = 0; i < 4; i++1.54k ) { |
1792 | 1.54k | unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue(); |
1793 | 1.54k | if (SwizzleRemap.find(Idx) != SwizzleRemap.end()) |
1794 | 1.35k | Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32); |
1795 | 1.54k | } |
1796 | 387 | |
1797 | 387 | return BuildVector; |
1798 | 387 | } |
1799 | | |
1800 | | SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block, |
1801 | 7.26k | SelectionDAG &DAG) const { |
1802 | 7.26k | SDLoc DL(LoadNode); |
1803 | 7.26k | EVT VT = LoadNode->getValueType(0); |
1804 | 7.26k | SDValue Chain = LoadNode->getChain(); |
1805 | 7.26k | SDValue Ptr = LoadNode->getBasePtr(); |
1806 | 7.26k | assert (isa<ConstantSDNode>(Ptr)); |
1807 | 7.26k | |
1808 | 7.26k | //TODO: Support smaller loads |
1809 | 7.26k | if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode)5.24k ) |
1810 | 2.02k | return SDValue(); |
1811 | 5.24k | |
1812 | 5.24k | if (LoadNode->getAlignment() < 4) |
1813 | 18 | return SDValue(); |
1814 | 5.22k | |
1815 | 5.22k | int ConstantBlock = ConstantAddressBlock(Block); |
1816 | 5.22k | |
1817 | 5.22k | SDValue Slots[4]; |
1818 | 26.1k | for (unsigned i = 0; i < 4; i++20.8k ) { |
1819 | 20.8k | // We want Const position encoded with the following formula : |
1820 | 20.8k | // (((512 + (kc_bank << 12) + const_index) << 2) + chan) |
1821 | 20.8k | // const_index is Ptr computed by llvm using an alignment of 16. |
1822 | 20.8k | // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and |
1823 | 20.8k | // then div by 4 at the ISel step |
1824 | 20.8k | SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, |
1825 | 20.8k | DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32)); |
1826 | 20.8k | Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr); |
1827 | 20.8k | } |
1828 | 5.22k | EVT NewVT = MVT::v4i32; |
1829 | 5.22k | unsigned NumElements = 4; |
1830 | 5.22k | if (VT.isVector()) { |
1831 | 565 | NewVT = VT; |
1832 | 565 | NumElements = VT.getVectorNumElements(); |
1833 | 565 | } |
1834 | 5.22k | SDValue Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements)); |
1835 | 5.22k | if (!VT.isVector()) { |
1836 | 4.65k | Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result, |
1837 | 4.65k | DAG.getConstant(0, DL, MVT::i32)); |
1838 | 4.65k | } |
1839 | 5.22k | SDValue MergedValues[2] = { |
1840 | 5.22k | Result, |
1841 | 5.22k | Chain |
1842 | 5.22k | }; |
1843 | 5.22k | return DAG.getMergeValues(MergedValues, DL); |
1844 | 5.22k | } |
1845 | | |
1846 | | //===----------------------------------------------------------------------===// |
1847 | | // Custom DAG Optimizations |
1848 | | //===----------------------------------------------------------------------===// |
1849 | | |
1850 | | SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, |
1851 | 213k | DAGCombinerInfo &DCI) const { |
1852 | 213k | SelectionDAG &DAG = DCI.DAG; |
1853 | 213k | SDLoc DL(N); |
1854 | 213k | |
1855 | 213k | switch (N->getOpcode()) { |
1856 | 213k | // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a) |
1857 | 213k | case ISD::FP_ROUND: { |
1858 | 2 | SDValue Arg = N->getOperand(0); |
1859 | 2 | if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f641 ) { |
1860 | 1 | return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0), |
1861 | 1 | Arg.getOperand(0)); |
1862 | 1 | } |
1863 | 1 | break; |
1864 | 1 | } |
1865 | 1 | |
1866 | 1 | // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) -> |
1867 | 1 | // (i32 select_cc f32, f32, -1, 0 cc) |
1868 | 1 | // |
1869 | 1 | // Mesa's GLSL frontend generates the above pattern a lot and we can lower |
1870 | 1 | // this to one of the SET*_DX10 instructions. |
1871 | 58 | case ISD::FP_TO_SINT: { |
1872 | 58 | SDValue FNeg = N->getOperand(0); |
1873 | 58 | if (FNeg.getOpcode() != ISD::FNEG) { |
1874 | 42 | return SDValue(); |
1875 | 42 | } |
1876 | 16 | SDValue SelectCC = FNeg.getOperand(0); |
1877 | 16 | if (SelectCC.getOpcode() != ISD::SELECT_CC || |
1878 | 16 | SelectCC.getOperand(0).getValueType() != MVT::f328 || // LHS |
1879 | 16 | SelectCC.getOperand(2).getValueType() != MVT::f328 || // True |
1880 | 16 | !isHWTrueValue(SelectCC.getOperand(2))8 || |
1881 | 16 | !isHWFalseValue(SelectCC.getOperand(3))8 ) { |
1882 | 8 | return SDValue(); |
1883 | 8 | } |
1884 | 8 | |
1885 | 8 | return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0), |
1886 | 8 | SelectCC.getOperand(0), // LHS |
1887 | 8 | SelectCC.getOperand(1), // RHS |
1888 | 8 | DAG.getConstant(-1, DL, MVT::i32), // True |
1889 | 8 | DAG.getConstant(0, DL, MVT::i32), // False |
1890 | 8 | SelectCC.getOperand(4)); // CC |
1891 | 8 | |
1892 | 8 | break0 ; |
1893 | 8 | } |
1894 | 8 | |
1895 | 8 | // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx |
1896 | 8 | // => build_vector elt0, ... , NewEltIdx, ... , eltN |
1897 | 250 | case ISD::INSERT_VECTOR_ELT: { |
1898 | 250 | SDValue InVec = N->getOperand(0); |
1899 | 250 | SDValue InVal = N->getOperand(1); |
1900 | 250 | SDValue EltNo = N->getOperand(2); |
1901 | 250 | |
1902 | 250 | // If the inserted element is an UNDEF, just use the input vector. |
1903 | 250 | if (InVal.isUndef()) |
1904 | 0 | return InVec; |
1905 | 250 | |
1906 | 250 | EVT VT = InVec.getValueType(); |
1907 | 250 | |
1908 | 250 | // If we can't generate a legal BUILD_VECTOR, exit |
1909 | 250 | if (!isOperationLegal(ISD::BUILD_VECTOR, VT)) |
1910 | 8 | return SDValue(); |
1911 | 242 | |
1912 | 242 | // Check that we know which element is being inserted |
1913 | 242 | if (!isa<ConstantSDNode>(EltNo)) |
1914 | 4 | return SDValue(); |
1915 | 238 | unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); |
1916 | 238 | |
1917 | 238 | // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially |
1918 | 238 | // be converted to a BUILD_VECTOR). Fill in the Ops vector with the |
1919 | 238 | // vector elements. |
1920 | 238 | SmallVector<SDValue, 8> Ops; |
1921 | 238 | if (InVec.getOpcode() == ISD::BUILD_VECTOR) { |
1922 | 4 | Ops.append(InVec.getNode()->op_begin(), |
1923 | 4 | InVec.getNode()->op_end()); |
1924 | 234 | } else if (InVec.isUndef()) { |
1925 | 0 | unsigned NElts = VT.getVectorNumElements(); |
1926 | 0 | Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); |
1927 | 234 | } else { |
1928 | 234 | return SDValue(); |
1929 | 234 | } |
1930 | 4 | |
1931 | 4 | // Insert the element |
1932 | 4 | if (Elt < Ops.size()) { |
1933 | 4 | // All the operands of BUILD_VECTOR must have the same type; |
1934 | 4 | // we enforce that here. |
1935 | 4 | EVT OpVT = Ops[0].getValueType(); |
1936 | 4 | if (InVal.getValueType() != OpVT) |
1937 | 0 | InVal = OpVT.bitsGT(InVal.getValueType()) ? |
1938 | 0 | DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) : |
1939 | 0 | DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal); |
1940 | 4 | Ops[Elt] = InVal; |
1941 | 4 | } |
1942 | 4 | |
1943 | 4 | // Return the new vector |
1944 | 4 | return DAG.getBuildVector(VT, DL, Ops); |
1945 | 4 | } |
1946 | 4 | |
1947 | 4 | // Extract_vec (Build_vector) generated by custom lowering |
1948 | 4 | // also needs to be customly combined |
1949 | 10.7k | case ISD::EXTRACT_VECTOR_ELT: { |
1950 | 10.7k | SDValue Arg = N->getOperand(0); |
1951 | 10.7k | if (Arg.getOpcode() == ISD::BUILD_VECTOR) { |
1952 | 47 | if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) { |
1953 | 0 | unsigned Element = Const->getZExtValue(); |
1954 | 0 | return Arg->getOperand(Element); |
1955 | 0 | } |
1956 | 10.7k | } |
1957 | 10.7k | if (Arg.getOpcode() == ISD::BITCAST && |
1958 | 10.7k | Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR220 && |
1959 | 10.7k | (Arg.getOperand(0).getValueType().getVectorNumElements() == |
1960 | 32 | Arg.getValueType().getVectorNumElements())) { |
1961 | 31 | if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) { |
1962 | 31 | unsigned Element = Const->getZExtValue(); |
1963 | 31 | return DAG.getNode(ISD::BITCAST, DL, N->getVTList(), |
1964 | 31 | Arg->getOperand(0).getOperand(Element)); |
1965 | 31 | } |
1966 | 10.7k | } |
1967 | 10.7k | break; |
1968 | 10.7k | } |
1969 | 10.7k | |
1970 | 10.7k | case ISD::SELECT_CC: { |
1971 | 10.0k | // Try common optimizations |
1972 | 10.0k | if (SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI)) |
1973 | 0 | return Ret; |
1974 | 10.0k | |
1975 | 10.0k | // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq -> |
1976 | 10.0k | // selectcc x, y, a, b, inv(cc) |
1977 | 10.0k | // |
1978 | 10.0k | // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne -> |
1979 | 10.0k | // selectcc x, y, a, b, cc |
1980 | 10.0k | SDValue LHS = N->getOperand(0); |
1981 | 10.0k | if (LHS.getOpcode() != ISD::SELECT_CC) { |
1982 | 5.18k | return SDValue(); |
1983 | 5.18k | } |
1984 | 4.83k | |
1985 | 4.83k | SDValue RHS = N->getOperand(1); |
1986 | 4.83k | SDValue True = N->getOperand(2); |
1987 | 4.83k | SDValue False = N->getOperand(3); |
1988 | 4.83k | ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get(); |
1989 | 4.83k | |
1990 | 4.83k | if (LHS.getOperand(2).getNode() != True.getNode() || |
1991 | 4.83k | LHS.getOperand(3).getNode() != False.getNode()6 || |
1992 | 4.83k | RHS.getNode() != False.getNode()4 ) { |
1993 | 4.83k | return SDValue(); |
1994 | 4.83k | } |
1995 | 4 | |
1996 | 4 | switch (NCC) { |
1997 | 4 | default: return SDValue()0 ; |
1998 | 4 | case ISD::SETNE: return LHS1 ; |
1999 | 4 | case ISD::SETEQ: { |
2000 | 3 | ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get(); |
2001 | 3 | LHSCC = ISD::getSetCCInverse(LHSCC, |
2002 | 3 | LHS.getOperand(0).getValueType().isInteger()); |
2003 | 3 | if (DCI.isBeforeLegalizeOps() || |
2004 | 3 | isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType())) |
2005 | 0 | return DAG.getSelectCC(DL, |
2006 | 0 | LHS.getOperand(0), |
2007 | 0 | LHS.getOperand(1), |
2008 | 0 | LHS.getOperand(2), |
2009 | 0 | LHS.getOperand(3), |
2010 | 0 | LHSCC); |
2011 | 3 | break; |
2012 | 3 | } |
2013 | 3 | } |
2014 | 3 | return SDValue(); |
2015 | 3 | } |
2016 | 3 | |
2017 | 127 | case AMDGPUISD::R600_EXPORT: { |
2018 | 127 | SDValue Arg = N->getOperand(1); |
2019 | 127 | if (Arg.getOpcode() != ISD::BUILD_VECTOR) |
2020 | 12 | break; |
2021 | 115 | |
2022 | 115 | SDValue NewArgs[8] = { |
2023 | 115 | N->getOperand(0), // Chain |
2024 | 115 | SDValue(), |
2025 | 115 | N->getOperand(2), // ArrayBase |
2026 | 115 | N->getOperand(3), // Type |
2027 | 115 | N->getOperand(4), // SWZ_X |
2028 | 115 | N->getOperand(5), // SWZ_Y |
2029 | 115 | N->getOperand(6), // SWZ_Z |
2030 | 115 | N->getOperand(7) // SWZ_W |
2031 | 115 | }; |
2032 | 115 | NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL); |
2033 | 115 | return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs); |
2034 | 115 | } |
2035 | 296 | case AMDGPUISD::TEXTURE_FETCH: { |
2036 | 296 | SDValue Arg = N->getOperand(1); |
2037 | 296 | if (Arg.getOpcode() != ISD::BUILD_VECTOR) |
2038 | 24 | break; |
2039 | 272 | |
2040 | 272 | SDValue NewArgs[19] = { |
2041 | 272 | N->getOperand(0), |
2042 | 272 | N->getOperand(1), |
2043 | 272 | N->getOperand(2), |
2044 | 272 | N->getOperand(3), |
2045 | 272 | N->getOperand(4), |
2046 | 272 | N->getOperand(5), |
2047 | 272 | N->getOperand(6), |
2048 | 272 | N->getOperand(7), |
2049 | 272 | N->getOperand(8), |
2050 | 272 | N->getOperand(9), |
2051 | 272 | N->getOperand(10), |
2052 | 272 | N->getOperand(11), |
2053 | 272 | N->getOperand(12), |
2054 | 272 | N->getOperand(13), |
2055 | 272 | N->getOperand(14), |
2056 | 272 | N->getOperand(15), |
2057 | 272 | N->getOperand(16), |
2058 | 272 | N->getOperand(17), |
2059 | 272 | N->getOperand(18), |
2060 | 272 | }; |
2061 | 272 | NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL); |
2062 | 272 | return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs); |
2063 | 272 | } |
2064 | 272 | |
2065 | 41.0k | case ISD::LOAD: { |
2066 | 41.0k | LoadSDNode *LoadNode = cast<LoadSDNode>(N); |
2067 | 41.0k | SDValue Ptr = LoadNode->getBasePtr(); |
2068 | 41.0k | if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS && |
2069 | 41.0k | isa<ConstantSDNode>(Ptr)7.00k ) |
2070 | 6.98k | return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG); |
2071 | 34.1k | break; |
2072 | 34.1k | } |
2073 | 34.1k | |
2074 | 151k | default: break; |
2075 | 196k | } |
2076 | 196k | |
2077 | 196k | return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); |
2078 | 196k | } |
2079 | | |
2080 | | bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx, |
2081 | | SDValue &Src, SDValue &Neg, SDValue &Abs, |
2082 | | SDValue &Sel, SDValue &Imm, |
2083 | 245k | SelectionDAG &DAG) const { |
2084 | 245k | const R600InstrInfo *TII = Subtarget->getInstrInfo(); |
2085 | 245k | if (!Src.isMachineOpcode()) |
2086 | 87.7k | return false; |
2087 | 157k | |
2088 | 157k | switch (Src.getMachineOpcode()) { |
2089 | 157k | case R600::FNEG_R600: |
2090 | 125 | if (!Neg.getNode()) |
2091 | 24 | return false; |
2092 | 101 | Src = Src.getOperand(0); |
2093 | 101 | Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32); |
2094 | 101 | return true; |
2095 | 109 | case R600::FABS_R600: |
2096 | 109 | if (!Abs.getNode()) |
2097 | 16 | return false; |
2098 | 93 | Src = Src.getOperand(0); |
2099 | 93 | Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32); |
2100 | 93 | return true; |
2101 | 11.0k | case R600::CONST_COPY: { |
2102 | 11.0k | unsigned Opcode = ParentNode->getMachineOpcode(); |
2103 | 11.0k | bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1; |
2104 | 11.0k | |
2105 | 11.0k | if (!Sel.getNode()) |
2106 | 1.47k | return false; |
2107 | 9.53k | |
2108 | 9.53k | SDValue CstOffset = Src.getOperand(0); |
2109 | 9.53k | if (ParentNode->getValueType(0).isVector()) |
2110 | 0 | return false; |
2111 | 9.53k | |
2112 | 9.53k | // Gather constants values |
2113 | 9.53k | int SrcIndices[] = { |
2114 | 9.53k | TII->getOperandIdx(Opcode, R600::OpName::src0), |
2115 | 9.53k | TII->getOperandIdx(Opcode, R600::OpName::src1), |
2116 | 9.53k | TII->getOperandIdx(Opcode, R600::OpName::src2), |
2117 | 9.53k | TII->getOperandIdx(Opcode, R600::OpName::src0_X), |
2118 | 9.53k | TII->getOperandIdx(Opcode, R600::OpName::src0_Y), |
2119 | 9.53k | TII->getOperandIdx(Opcode, R600::OpName::src0_Z), |
2120 | 9.53k | TII->getOperandIdx(Opcode, R600::OpName::src0_W), |
2121 | 9.53k | TII->getOperandIdx(Opcode, R600::OpName::src1_X), |
2122 | 9.53k | TII->getOperandIdx(Opcode, R600::OpName::src1_Y), |
2123 | 9.53k | TII->getOperandIdx(Opcode, R600::OpName::src1_Z), |
2124 | 9.53k | TII->getOperandIdx(Opcode, R600::OpName::src1_W) |
2125 | 9.53k | }; |
2126 | 9.53k | std::vector<unsigned> Consts; |
2127 | 104k | for (int OtherSrcIdx : SrcIndices) { |
2128 | 104k | int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx); |
2129 | 104k | if (OtherSrcIdx < 0 || OtherSelIdx < 019.3k ) |
2130 | 85.5k | continue; |
2131 | 19.3k | if (HasDst) { |
2132 | 19.3k | OtherSrcIdx--; |
2133 | 19.3k | OtherSelIdx--; |
2134 | 19.3k | } |
2135 | 19.3k | if (RegisterSDNode *Reg = |
2136 | 735 | dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) { |
2137 | 735 | if (Reg->getReg() == R600::ALU_CONST) { |
2138 | 563 | ConstantSDNode *Cst |
2139 | 563 | = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx)); |
2140 | 563 | Consts.push_back(Cst->getZExtValue()); |
2141 | 563 | } |
2142 | 735 | } |
2143 | 19.3k | } |
2144 | 9.53k | |
2145 | 9.53k | ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset); |
2146 | 9.53k | Consts.push_back(Cst->getZExtValue()); |
2147 | 9.53k | if (!TII->fitsConstReadLimitations(Consts)) { |
2148 | 20 | return false; |
2149 | 20 | } |
2150 | 9.51k | |
2151 | 9.51k | Sel = CstOffset; |
2152 | 9.51k | Src = DAG.getRegister(R600::ALU_CONST, MVT::f32); |
2153 | 9.51k | return true; |
2154 | 9.51k | } |
2155 | 9.51k | case R600::MOV_IMM_GLOBAL_ADDR: |
2156 | 14 | // Check if the Imm slot is used. Taken from below. |
2157 | 14 | if (cast<ConstantSDNode>(Imm)->getZExtValue()) |
2158 | 0 | return false; |
2159 | 14 | Imm = Src.getOperand(0); |
2160 | 14 | Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32); |
2161 | 14 | return true; |
2162 | 27.8k | case R600::MOV_IMM_I32: |
2163 | 27.8k | case R600::MOV_IMM_F32: { |
2164 | 27.8k | unsigned ImmReg = R600::ALU_LITERAL_X; |
2165 | 27.8k | uint64_t ImmValue = 0; |
2166 | 27.8k | |
2167 | 27.8k | if (Src.getMachineOpcode() == R600::MOV_IMM_F32) { |
2168 | 536 | ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0)); |
2169 | 536 | float FloatValue = FPC->getValueAPF().convertToFloat(); |
2170 | 536 | if (FloatValue == 0.0) { |
2171 | 142 | ImmReg = R600::ZERO; |
2172 | 394 | } else if (FloatValue == 0.5) { |
2173 | 37 | ImmReg = R600::HALF; |
2174 | 357 | } else if (FloatValue == 1.0) { |
2175 | 87 | ImmReg = R600::ONE; |
2176 | 270 | } else { |
2177 | 270 | ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue(); |
2178 | 270 | } |
2179 | 27.3k | } else { |
2180 | 27.3k | ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0)); |
2181 | 27.3k | uint64_t Value = C->getZExtValue(); |
2182 | 27.3k | if (Value == 0) { |
2183 | 1.76k | ImmReg = R600::ZERO; |
2184 | 25.5k | } else if (Value == 1) { |
2185 | 2.94k | ImmReg = R600::ONE_INT; |
2186 | 22.6k | } else { |
2187 | 22.6k | ImmValue = Value; |
2188 | 22.6k | } |
2189 | 27.3k | } |
2190 | 27.8k | |
2191 | 27.8k | // Check that we aren't already using an immediate. |
2192 | 27.8k | // XXX: It's possible for an instruction to have more than one |
2193 | 27.8k | // immediate operand, but this is not supported yet. |
2194 | 27.8k | if (ImmReg == R600::ALU_LITERAL_X) { |
2195 | 22.8k | if (!Imm.getNode()) |
2196 | 75 | return false; |
2197 | 22.8k | ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm); |
2198 | 22.8k | assert(C); |
2199 | 22.8k | if (C->getZExtValue()) |
2200 | 2.28k | return false; |
2201 | 20.5k | Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32); |
2202 | 20.5k | } |
2203 | 27.8k | Src = DAG.getRegister(ImmReg, MVT::i32); |
2204 | 25.5k | return true; |
2205 | 27.8k | } |
2206 | 118k | default: |
2207 | 118k | return false; |
2208 | 157k | } |
2209 | 157k | } |
2210 | | |
2211 | | /// Fold the instructions after selecting them |
2212 | | SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node, |
2213 | 165k | SelectionDAG &DAG) const { |
2214 | 165k | const R600InstrInfo *TII = Subtarget->getInstrInfo(); |
2215 | 165k | if (!Node->isMachineOpcode()) |
2216 | 0 | return Node; |
2217 | 165k | |
2218 | 165k | unsigned Opcode = Node->getMachineOpcode(); |
2219 | 165k | SDValue FakeOp; |
2220 | 165k | |
2221 | 165k | std::vector<SDValue> Ops(Node->op_begin(), Node->op_end()); |
2222 | 165k | |
2223 | 165k | if (Opcode == R600::DOT_4) { |
2224 | 148 | int OperandIdx[] = { |
2225 | 148 | TII->getOperandIdx(Opcode, R600::OpName::src0_X), |
2226 | 148 | TII->getOperandIdx(Opcode, R600::OpName::src0_Y), |
2227 | 148 | TII->getOperandIdx(Opcode, R600::OpName::src0_Z), |
2228 | 148 | TII->getOperandIdx(Opcode, R600::OpName::src0_W), |
2229 | 148 | TII->getOperandIdx(Opcode, R600::OpName::src1_X), |
2230 | 148 | TII->getOperandIdx(Opcode, R600::OpName::src1_Y), |
2231 | 148 | TII->getOperandIdx(Opcode, R600::OpName::src1_Z), |
2232 | 148 | TII->getOperandIdx(Opcode, R600::OpName::src1_W) |
2233 | 148 | }; |
2234 | 148 | int NegIdx[] = { |
2235 | 148 | TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X), |
2236 | 148 | TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y), |
2237 | 148 | TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z), |
2238 | 148 | TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W), |
2239 | 148 | TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X), |
2240 | 148 | TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y), |
2241 | 148 | TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z), |
2242 | 148 | TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W) |
2243 | 148 | }; |
2244 | 148 | int AbsIdx[] = { |
2245 | 148 | TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X), |
2246 | 148 | TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y), |
2247 | 148 | TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z), |
2248 | 148 | TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W), |
2249 | 148 | TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X), |
2250 | 148 | TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y), |
2251 | 148 | TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z), |
2252 | 148 | TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W) |
2253 | 148 | }; |
2254 | 1.07k | for (unsigned i = 0; i < 8; i++922 ) { |
2255 | 1.00k | if (OperandIdx[i] < 0) |
2256 | 0 | return Node; |
2257 | 1.00k | SDValue &Src = Ops[OperandIdx[i] - 1]; |
2258 | 1.00k | SDValue &Neg = Ops[NegIdx[i] - 1]; |
2259 | 1.00k | SDValue &Abs = Ops[AbsIdx[i] - 1]; |
2260 | 1.00k | bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1; |
2261 | 1.00k | int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]); |
2262 | 1.00k | if (HasDst) |
2263 | 1.00k | SelIdx--; |
2264 | 1.00k | SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp0 ; |
2265 | 1.00k | if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG)) |
2266 | 80 | return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); |
2267 | 1.00k | } |
2268 | 165k | } else if (Opcode == R600::REG_SEQUENCE) { |
2269 | 25.1k | for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 219.2k ) { |
2270 | 20.5k | SDValue &Src = Ops[i]; |
2271 | 20.5k | if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG)) |
2272 | 1.30k | return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); |
2273 | 20.5k | } |
2274 | 159k | } else { |
2275 | 159k | if (!TII->hasInstrModifiers(Opcode)) |
2276 | 51.1k | return Node; |
2277 | 108k | int OperandIdx[] = { |
2278 | 108k | TII->getOperandIdx(Opcode, R600::OpName::src0), |
2279 | 108k | TII->getOperandIdx(Opcode, R600::OpName::src1), |
2280 | 108k | TII->getOperandIdx(Opcode, R600::OpName::src2) |
2281 | 108k | }; |
2282 | 108k | int NegIdx[] = { |
2283 | 108k | TII->getOperandIdx(Opcode, R600::OpName::src0_neg), |
2284 | 108k | TII->getOperandIdx(Opcode, R600::OpName::src1_neg), |
2285 | 108k | TII->getOperandIdx(Opcode, R600::OpName::src2_neg) |
2286 | 108k | }; |
2287 | 108k | int AbsIdx[] = { |
2288 | 108k | TII->getOperandIdx(Opcode, R600::OpName::src0_abs), |
2289 | 108k | TII->getOperandIdx(Opcode, R600::OpName::src1_abs), |
2290 | 108k | -1 |
2291 | 108k | }; |
2292 | 297k | for (unsigned i = 0; i < 3; i++189k ) { |
2293 | 283k | if (OperandIdx[i] < 0) |
2294 | 59.4k | return Node; |
2295 | 223k | SDValue &Src = Ops[OperandIdx[i] - 1]; |
2296 | 223k | SDValue &Neg = Ops[NegIdx[i] - 1]; |
2297 | 223k | SDValue FakeAbs; |
2298 | 223k | SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1]164k : FakeAbs59.2k ; |
2299 | 223k | bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1; |
2300 | 223k | int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]); |
2301 | 223k | int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal); |
2302 | 223k | if (HasDst) { |
2303 | 223k | SelIdx--; |
2304 | 223k | ImmIdx--; |
2305 | 223k | } |
2306 | 223k | SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp0 ; |
2307 | 223k | SDValue &Imm = Ops[ImmIdx]; |
2308 | 223k | if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG)) |
2309 | 33.8k | return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); |
2310 | 223k | } |
2311 | 108k | } |
2312 | 165k | |
2313 | 165k | return Node19.4k ; |
2314 | 165k | } |