/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //==-----------------------------------------------------------------------===// |
8 | | // |
9 | | /// \file |
10 | | /// Defines an instruction selector for the AMDGPU target. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #include "AMDGPU.h" |
15 | | #include "AMDGPUArgumentUsageInfo.h" |
16 | | #include "AMDGPUISelLowering.h" // For AMDGPUISD |
17 | | #include "AMDGPUInstrInfo.h" |
18 | | #include "AMDGPUPerfHintAnalysis.h" |
19 | | #include "AMDGPURegisterInfo.h" |
20 | | #include "AMDGPUSubtarget.h" |
21 | | #include "AMDGPUTargetMachine.h" |
22 | | #include "SIDefines.h" |
23 | | #include "SIISelLowering.h" |
24 | | #include "SIInstrInfo.h" |
25 | | #include "SIMachineFunctionInfo.h" |
26 | | #include "SIRegisterInfo.h" |
27 | | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
28 | | #include "llvm/ADT/APInt.h" |
29 | | #include "llvm/ADT/SmallVector.h" |
30 | | #include "llvm/ADT/StringRef.h" |
31 | | #include "llvm/Analysis/LegacyDivergenceAnalysis.h" |
32 | | #include "llvm/Analysis/ValueTracking.h" |
33 | | #include "llvm/CodeGen/FunctionLoweringInfo.h" |
34 | | #include "llvm/CodeGen/ISDOpcodes.h" |
35 | | #include "llvm/CodeGen/MachineFunction.h" |
36 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
37 | | #include "llvm/CodeGen/SelectionDAG.h" |
38 | | #include "llvm/CodeGen/SelectionDAGISel.h" |
39 | | #include "llvm/CodeGen/SelectionDAGNodes.h" |
40 | | #include "llvm/CodeGen/ValueTypes.h" |
41 | | #include "llvm/IR/BasicBlock.h" |
42 | | #ifdef EXPENSIVE_CHECKS |
43 | | #include "llvm/IR/Dominators.h" |
44 | | #endif |
45 | | #include "llvm/IR/Instruction.h" |
46 | | #include "llvm/MC/MCInstrDesc.h" |
47 | | #include "llvm/Support/Casting.h" |
48 | | #include "llvm/Support/CodeGen.h" |
49 | | #include "llvm/Support/ErrorHandling.h" |
50 | | #include "llvm/Support/MachineValueType.h" |
51 | | #include "llvm/Support/MathExtras.h" |
52 | | #include <cassert> |
53 | | #include <cstdint> |
54 | | #include <new> |
55 | | #include <vector> |
56 | | |
57 | | #define DEBUG_TYPE "isel" |
58 | | |
59 | | using namespace llvm; |
60 | | |
61 | | namespace llvm { |
62 | | |
63 | | class R600InstrInfo; |
64 | | |
65 | | } // end namespace llvm |
66 | | |
67 | | //===----------------------------------------------------------------------===// |
68 | | // Instruction Selector Implementation |
69 | | //===----------------------------------------------------------------------===// |
70 | | |
71 | | namespace { |
72 | | |
73 | 35 | static bool isNullConstantOrUndef(SDValue V) { |
74 | 35 | if (V.isUndef()) |
75 | 4 | return true; |
76 | 31 | |
77 | 31 | ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); |
78 | 31 | return Const != nullptr && Const->isNullValue()27 ; |
79 | 31 | } |
80 | | |
81 | 1.22k | static bool getConstantValue(SDValue N, uint32_t &Out) { |
82 | 1.22k | // This is only used for packed vectors, where ussing 0 for undef should |
83 | 1.22k | // always be good. |
84 | 1.22k | if (N.isUndef()) { |
85 | 21 | Out = 0; |
86 | 21 | return true; |
87 | 21 | } |
88 | 1.20k | |
89 | 1.20k | if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { |
90 | 288 | Out = C->getAPIntValue().getSExtValue(); |
91 | 288 | return true; |
92 | 288 | } |
93 | 912 | |
94 | 912 | if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) { |
95 | 339 | Out = C->getValueAPF().bitcastToAPInt().getSExtValue(); |
96 | 339 | return true; |
97 | 339 | } |
98 | 573 | |
99 | 573 | return false; |
100 | 573 | } |
101 | | |
102 | | // TODO: Handle undef as zero |
103 | | static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG, |
104 | 863 | bool Negate = false) { |
105 | 863 | assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2); |
106 | 863 | uint32_t LHSVal, RHSVal; |
107 | 863 | if (getConstantValue(N->getOperand(0), LHSVal) && |
108 | 863 | getConstantValue(N->getOperand(1), RHSVal)358 ) { |
109 | 290 | SDLoc SL(N); |
110 | 290 | uint32_t K = Negate ? |
111 | 38 | (-LHSVal & 0xffff) | (-RHSVal << 16) : |
112 | 290 | (LHSVal & 0xffff) | (RHSVal << 16)252 ; |
113 | 290 | return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0), |
114 | 290 | DAG.getTargetConstant(K, SL, MVT::i32)); |
115 | 290 | } |
116 | 573 | |
117 | 573 | return nullptr; |
118 | 573 | } |
119 | | |
120 | 38 | static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) { |
121 | 38 | return packConstantV2I16(N, DAG, true); |
122 | 38 | } |
123 | | |
124 | | /// AMDGPU specific code to select AMDGPU machine instructions for |
125 | | /// SelectionDAG operations. |
126 | | class AMDGPUDAGToDAGISel : public SelectionDAGISel { |
127 | | // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can |
128 | | // make the right decision when generating code for different targets. |
129 | | const GCNSubtarget *Subtarget; |
130 | | bool EnableLateStructurizeCFG; |
131 | | |
132 | | public: |
133 | | explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr, |
134 | | CodeGenOpt::Level OptLevel = CodeGenOpt::Default) |
135 | 2.68k | : SelectionDAGISel(*TM, OptLevel) { |
136 | 2.68k | EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG; |
137 | 2.68k | } |
138 | 2.66k | ~AMDGPUDAGToDAGISel() override = default; |
139 | | |
140 | 2.66k | void getAnalysisUsage(AnalysisUsage &AU) const override { |
141 | 2.66k | AU.addRequired<AMDGPUArgumentUsageInfo>(); |
142 | 2.66k | AU.addRequired<LegacyDivergenceAnalysis>(); |
143 | | #ifdef EXPENSIVE_CHECKS |
144 | | AU.addRequired<DominatorTreeWrapperPass>(); |
145 | | AU.addRequired<LoopInfoWrapperPass>(); |
146 | | #endif |
147 | | SelectionDAGISel::getAnalysisUsage(AU); |
148 | 2.66k | } |
149 | | |
150 | | bool matchLoadD16FromBuildVector(SDNode *N) const; |
151 | | |
152 | | bool runOnMachineFunction(MachineFunction &MF) override; |
153 | | void PreprocessISelDAG() override; |
154 | | void Select(SDNode *N) override; |
155 | | StringRef getPassName() const override; |
156 | | void PostprocessISelDAG() override; |
157 | | |
158 | | protected: |
159 | | void SelectBuildVector(SDNode *N, unsigned RegClassID); |
160 | | |
161 | | private: |
162 | | std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const; |
163 | | bool isNoNanSrc(SDValue N) const; |
164 | | bool isInlineImmediate(const SDNode *N, bool Negated = false) const; |
165 | 46 | bool isNegInlineImmediate(const SDNode *N) const { |
166 | 46 | return isInlineImmediate(N, true); |
167 | 46 | } |
168 | | |
169 | | bool isVGPRImm(const SDNode *N) const; |
170 | | bool isUniformLoad(const SDNode *N) const; |
171 | | bool isUniformBr(const SDNode *N) const; |
172 | | |
173 | | MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const; |
174 | | |
175 | | SDNode *glueCopyToM0LDSInit(SDNode *N) const; |
176 | | SDNode *glueCopyToM0(SDNode *N, SDValue Val) const; |
177 | | |
178 | | const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; |
179 | | virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); |
180 | | virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); |
181 | | bool isDSOffsetLegal(SDValue Base, unsigned Offset, |
182 | | unsigned OffsetBits) const; |
183 | | bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; |
184 | | bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, |
185 | | SDValue &Offset1) const; |
186 | | bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, |
187 | | SDValue &SOffset, SDValue &Offset, SDValue &Offen, |
188 | | SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC, |
189 | | SDValue &TFE, SDValue &DLC) const; |
190 | | bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, |
191 | | SDValue &SOffset, SDValue &Offset, SDValue &GLC, |
192 | | SDValue &SLC, SDValue &TFE, SDValue &DLC) const; |
193 | | bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, |
194 | | SDValue &VAddr, SDValue &SOffset, SDValue &Offset, |
195 | | SDValue &SLC) const; |
196 | | bool SelectMUBUFScratchOffen(SDNode *Parent, |
197 | | SDValue Addr, SDValue &RSrc, SDValue &VAddr, |
198 | | SDValue &SOffset, SDValue &ImmOffset) const; |
199 | | bool SelectMUBUFScratchOffset(SDNode *Parent, |
200 | | SDValue Addr, SDValue &SRsrc, SDValue &Soffset, |
201 | | SDValue &Offset) const; |
202 | | |
203 | | bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, |
204 | | SDValue &Offset, SDValue &GLC, SDValue &SLC, |
205 | | SDValue &TFE, SDValue &DLC) const; |
206 | | bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, |
207 | | SDValue &Offset, SDValue &SLC) const; |
208 | | bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, |
209 | | SDValue &Offset) const; |
210 | | |
211 | | bool SelectFlatAtomic(SDNode *N, SDValue Addr, SDValue &VAddr, |
212 | | SDValue &Offset, SDValue &SLC) const; |
213 | | bool SelectFlatAtomicSigned(SDNode *N, SDValue Addr, SDValue &VAddr, |
214 | | SDValue &Offset, SDValue &SLC) const; |
215 | | |
216 | | template <bool IsSigned> |
217 | | bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr, |
218 | | SDValue &Offset, SDValue &SLC) const; |
219 | | |
220 | | bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, |
221 | | bool &Imm) const; |
222 | | SDValue Expand32BitAddress(SDValue Addr) const; |
223 | | bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, |
224 | | bool &Imm) const; |
225 | | bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; |
226 | | bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; |
227 | | bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; |
228 | | bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; |
229 | | bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; |
230 | | bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; |
231 | | |
232 | | bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; |
233 | | bool SelectVOP3Mods_f32(SDValue In, SDValue &Src, SDValue &SrcMods) const; |
234 | | bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const; |
235 | | bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; |
236 | | bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; |
237 | | bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, |
238 | | SDValue &Clamp, SDValue &Omod) const; |
239 | | bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, |
240 | | SDValue &Clamp, SDValue &Omod) const; |
241 | | |
242 | | bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods, |
243 | | SDValue &Clamp, |
244 | | SDValue &Omod) const; |
245 | | |
246 | | bool SelectVOP3OMods(SDValue In, SDValue &Src, |
247 | | SDValue &Clamp, SDValue &Omod) const; |
248 | | |
249 | | bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; |
250 | | bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods, |
251 | | SDValue &Clamp) const; |
252 | | |
253 | | bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; |
254 | | bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods, |
255 | | SDValue &Clamp) const; |
256 | | |
257 | | bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; |
258 | | bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods, |
259 | | SDValue &Clamp) const; |
260 | | bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const; |
261 | | bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; |
262 | | |
263 | | SDValue getHi16Elt(SDValue In) const; |
264 | | |
265 | | void SelectADD_SUB_I64(SDNode *N); |
266 | | void SelectAddcSubb(SDNode *N); |
267 | | void SelectUADDO_USUBO(SDNode *N); |
268 | | void SelectDIV_SCALE(SDNode *N); |
269 | | void SelectDIV_FMAS(SDNode *N); |
270 | | void SelectMAD_64_32(SDNode *N); |
271 | | void SelectFMA_W_CHAIN(SDNode *N); |
272 | | void SelectFMUL_W_CHAIN(SDNode *N); |
273 | | |
274 | | SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, |
275 | | uint32_t Offset, uint32_t Width); |
276 | | void SelectS_BFEFromShifts(SDNode *N); |
277 | | void SelectS_BFE(SDNode *N); |
278 | | bool isCBranchSCC(const SDNode *N) const; |
279 | | void SelectBRCOND(SDNode *N); |
280 | | void SelectFMAD_FMA(SDNode *N); |
281 | | void SelectATOMIC_CMP_SWAP(SDNode *N); |
282 | | void SelectDSAppendConsume(SDNode *N, unsigned IntrID); |
283 | | void SelectDS_GWS(SDNode *N, unsigned IntrID); |
284 | | void SelectINTRINSIC_W_CHAIN(SDNode *N); |
285 | | void SelectINTRINSIC_VOID(SDNode *N); |
286 | | |
287 | | protected: |
288 | | // Include the pieces autogenerated from the target description. |
289 | | #include "AMDGPUGenDAGISel.inc" |
290 | | }; |
291 | | |
292 | | class R600DAGToDAGISel : public AMDGPUDAGToDAGISel { |
293 | | const R600Subtarget *Subtarget; |
294 | | |
295 | | bool isConstantLoad(const MemSDNode *N, int cbID) const; |
296 | | bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); |
297 | | bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, |
298 | | SDValue& Offset); |
299 | | public: |
300 | | explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) : |
301 | 280 | AMDGPUDAGToDAGISel(TM, OptLevel) {} |
302 | | |
303 | | void Select(SDNode *N) override; |
304 | | |
305 | | bool SelectADDRIndirect(SDValue Addr, SDValue &Base, |
306 | | SDValue &Offset) override; |
307 | | bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, |
308 | | SDValue &Offset) override; |
309 | | |
310 | | bool runOnMachineFunction(MachineFunction &MF) override; |
311 | | |
312 | 2.47k | void PreprocessISelDAG() override {} |
313 | | |
314 | | protected: |
315 | | // Include the pieces autogenerated from the target description. |
316 | | #include "R600GenDAGISel.inc" |
317 | | }; |
318 | | |
319 | 4.38k | static SDValue stripBitcast(SDValue Val) { |
320 | 4.38k | return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0)651 : Val3.73k ; |
321 | 4.38k | } |
322 | | |
323 | | // Figure out if this is really an extract of the high 16-bits of a dword. |
324 | 1.19k | static bool isExtractHiElt(SDValue In, SDValue &Out) { |
325 | 1.19k | In = stripBitcast(In); |
326 | 1.19k | if (In.getOpcode() != ISD::TRUNCATE) |
327 | 709 | return false; |
328 | 481 | |
329 | 481 | SDValue Srl = In.getOperand(0); |
330 | 481 | if (Srl.getOpcode() == ISD::SRL) { |
331 | 232 | if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) { |
332 | 232 | if (ShiftAmt->getZExtValue() == 16) { |
333 | 208 | Out = stripBitcast(Srl.getOperand(0)); |
334 | 208 | return true; |
335 | 208 | } |
336 | 273 | } |
337 | 232 | } |
338 | 273 | |
339 | 273 | return false; |
340 | 273 | } |
341 | | |
342 | | // Look through operations that obscure just looking at the low 16-bits of the |
343 | | // same register. |
344 | 730 | static SDValue stripExtractLoElt(SDValue In) { |
345 | 730 | if (In.getOpcode() == ISD::TRUNCATE) { |
346 | 158 | SDValue Src = In.getOperand(0); |
347 | 158 | if (Src.getValueType().getSizeInBits() == 32) |
348 | 157 | return stripBitcast(Src); |
349 | 573 | } |
350 | 573 | |
351 | 573 | return In; |
352 | 573 | } |
353 | | |
354 | | } // end anonymous namespace |
355 | | |
356 | 101k | INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel", |
357 | 101k | "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) |
358 | 101k | INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo) |
359 | 101k | INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis) |
360 | 101k | INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) |
361 | | #ifdef EXPENSIVE_CHECKS |
362 | | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
363 | | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
364 | | #endif |
365 | 101k | INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel", |
366 | | "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) |
367 | | |
368 | | /// This pass converts a legalized DAG into a AMDGPU-specific |
369 | | // DAG, ready for instruction scheduling. |
370 | | FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM, |
371 | 2.40k | CodeGenOpt::Level OptLevel) { |
372 | 2.40k | return new AMDGPUDAGToDAGISel(TM, OptLevel); |
373 | 2.40k | } |
374 | | |
375 | | /// This pass converts a legalized DAG into a R600-specific |
376 | | // DAG, ready for instruction scheduling. |
377 | | FunctionPass *llvm::createR600ISelDag(TargetMachine *TM, |
378 | 280 | CodeGenOpt::Level OptLevel) { |
379 | 280 | return new R600DAGToDAGISel(TM, OptLevel); |
380 | 280 | } |
381 | | |
382 | 25.1k | bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { |
383 | | #ifdef EXPENSIVE_CHECKS |
384 | | DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
385 | | LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
386 | | for (auto &L : LI->getLoopsInPreorder()) { |
387 | | assert(L->isLCSSAForm(DT)); |
388 | | } |
389 | | #endif |
390 | | Subtarget = &MF.getSubtarget<GCNSubtarget>(); |
391 | 25.1k | return SelectionDAGISel::runOnMachineFunction(MF); |
392 | 25.1k | } |
393 | | |
394 | 7.19k | bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const { |
395 | 7.19k | assert(Subtarget->d16PreservesUnusedBits()); |
396 | 7.19k | MVT VT = N->getValueType(0).getSimpleVT(); |
397 | 7.19k | if (VT != MVT::v2i16 && VT != MVT::v2f166.71k ) |
398 | 6.31k | return false; |
399 | 878 | |
400 | 878 | SDValue Lo = N->getOperand(0); |
401 | 878 | SDValue Hi = N->getOperand(1); |
402 | 878 | |
403 | 878 | LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi)); |
404 | 878 | |
405 | 878 | // build_vector lo, (load ptr) -> load_d16_hi ptr, lo |
406 | 878 | // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo |
407 | 878 | // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo |
408 | 878 | |
409 | 878 | // Need to check for possible indirect dependencies on the other half of the |
410 | 878 | // vector to avoid introducing a cycle. |
411 | 878 | if (LdHi && Hi.hasOneUse()86 && !LdHi->isPredecessorOf(Lo.getNode())79 ) { |
412 | 78 | SDVTList VTList = CurDAG->getVTList(VT, MVT::Other); |
413 | 78 | |
414 | 78 | SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo); |
415 | 78 | SDValue Ops[] = { |
416 | 78 | LdHi->getChain(), LdHi->getBasePtr(), TiedIn |
417 | 78 | }; |
418 | 78 | |
419 | 78 | unsigned LoadOp = AMDGPUISD::LOAD_D16_HI; |
420 | 78 | if (LdHi->getMemoryVT() == MVT::i8) { |
421 | 24 | LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ? |
422 | 13 | AMDGPUISD::LOAD_D16_HI_I811 : AMDGPUISD::LOAD_D16_HI_U8; |
423 | 54 | } else { |
424 | 54 | assert(LdHi->getMemoryVT() == MVT::i16); |
425 | 54 | } |
426 | 78 | |
427 | 78 | SDValue NewLoadHi = |
428 | 78 | CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList, |
429 | 78 | Ops, LdHi->getMemoryVT(), |
430 | 78 | LdHi->getMemOperand()); |
431 | 78 | |
432 | 78 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi); |
433 | 78 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1)); |
434 | 78 | return true; |
435 | 78 | } |
436 | 800 | |
437 | 800 | // build_vector (load ptr), hi -> load_d16_lo ptr, hi |
438 | 800 | // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi |
439 | 800 | // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi |
440 | 800 | LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo)); |
441 | 800 | if (LdLo && Lo.hasOneUse()71 ) { |
442 | 64 | SDValue TiedIn = getHi16Elt(Hi); |
443 | 64 | if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode())52 ) |
444 | 15 | return false; |
445 | 49 | |
446 | 49 | SDVTList VTList = CurDAG->getVTList(VT, MVT::Other); |
447 | 49 | unsigned LoadOp = AMDGPUISD::LOAD_D16_LO; |
448 | 49 | if (LdLo->getMemoryVT() == MVT::i8) { |
449 | 21 | LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ? |
450 | 11 | AMDGPUISD::LOAD_D16_LO_I810 : AMDGPUISD::LOAD_D16_LO_U8; |
451 | 28 | } else { |
452 | 28 | assert(LdLo->getMemoryVT() == MVT::i16); |
453 | 28 | } |
454 | 49 | |
455 | 49 | TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn); |
456 | 49 | |
457 | 49 | SDValue Ops[] = { |
458 | 49 | LdLo->getChain(), LdLo->getBasePtr(), TiedIn |
459 | 49 | }; |
460 | 49 | |
461 | 49 | SDValue NewLoadLo = |
462 | 49 | CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList, |
463 | 49 | Ops, LdLo->getMemoryVT(), |
464 | 49 | LdLo->getMemOperand()); |
465 | 49 | |
466 | 49 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo); |
467 | 49 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1)); |
468 | 49 | return true; |
469 | 49 | } |
470 | 736 | |
471 | 736 | return false; |
472 | 736 | } |
473 | | |
474 | 28.4k | void AMDGPUDAGToDAGISel::PreprocessISelDAG() { |
475 | 28.4k | if (!Subtarget->d16PreservesUnusedBits()) |
476 | 20.9k | return; |
477 | 7.44k | |
478 | 7.44k | SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); |
479 | 7.44k | |
480 | 7.44k | bool MadeChange = false; |
481 | 231k | while (Position != CurDAG->allnodes_begin()) { |
482 | 223k | SDNode *N = &*--Position; |
483 | 223k | if (N->use_empty()) |
484 | 7.44k | continue; |
485 | 216k | |
486 | 216k | switch (N->getOpcode()) { |
487 | 216k | case ISD::BUILD_VECTOR: |
488 | 7.19k | MadeChange |= matchLoadD16FromBuildVector(N); |
489 | 7.19k | break; |
490 | 216k | default: |
491 | 209k | break; |
492 | 216k | } |
493 | 216k | } |
494 | 7.44k | |
495 | 7.44k | if (MadeChange) { |
496 | 122 | CurDAG->RemoveDeadNodes(); |
497 | 122 | LLVM_DEBUG(dbgs() << "After PreProcess:\n"; |
498 | 122 | CurDAG->dump();); |
499 | 122 | } |
500 | 7.44k | } |
501 | | |
502 | 246 | bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { |
503 | 246 | if (TM.Options.NoNaNsFPMath) |
504 | 189 | return true; |
505 | 57 | |
506 | 57 | // TODO: Move into isKnownNeverNaN |
507 | 57 | if (N->getFlags().isDefined()) |
508 | 57 | return N->getFlags().hasNoNaNs(); |
509 | 0 | |
510 | 0 | return CurDAG->isKnownNeverNaN(N); |
511 | 0 | } |
512 | | |
513 | | bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N, |
514 | 9.24k | bool Negated) const { |
515 | 9.24k | if (N->isUndef()) |
516 | 0 | return true; |
517 | 9.24k | |
518 | 9.24k | const SIInstrInfo *TII = Subtarget->getInstrInfo(); |
519 | 9.24k | if (Negated) { |
520 | 46 | if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) |
521 | 42 | return TII->isInlineConstant(-C->getAPIntValue()); |
522 | 4 | |
523 | 4 | if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) |
524 | 0 | return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt()); |
525 | 9.20k | |
526 | 9.20k | } else { |
527 | 9.20k | if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) |
528 | 8.03k | return TII->isInlineConstant(C->getAPIntValue()); |
529 | 1.16k | |
530 | 1.16k | if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) |
531 | 594 | return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt()); |
532 | 579 | } |
533 | 579 | |
534 | 579 | return false; |
535 | 579 | } |
536 | | |
537 | | /// Determine the register class for \p OpNo |
538 | | /// \returns The register class of the virtual register that will be used for |
539 | | /// the given operand number \OpNo or NULL if the register class cannot be |
540 | | /// determined. |
541 | | const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, |
542 | 32.8k | unsigned OpNo) const { |
543 | 32.8k | if (!N->isMachineOpcode()) { |
544 | 1.16k | if (N->getOpcode() == ISD::CopyToReg) { |
545 | 1.16k | unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); |
546 | 1.16k | if (TargetRegisterInfo::isVirtualRegister(Reg)) { |
547 | 449 | MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo(); |
548 | 449 | return MRI.getRegClass(Reg); |
549 | 449 | } |
550 | 720 | |
551 | 720 | const SIRegisterInfo *TRI |
552 | 720 | = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo(); |
553 | 720 | return TRI->getPhysRegClass(Reg); |
554 | 720 | } |
555 | 0 | |
556 | 0 | return nullptr; |
557 | 0 | } |
558 | 31.6k | |
559 | 31.6k | switch (N->getMachineOpcode()) { |
560 | 31.6k | default: { |
561 | 29.5k | const MCInstrDesc &Desc = |
562 | 29.5k | Subtarget->getInstrInfo()->get(N->getMachineOpcode()); |
563 | 29.5k | unsigned OpIdx = Desc.getNumDefs() + OpNo; |
564 | 29.5k | if (OpIdx >= Desc.getNumOperands()) |
565 | 0 | return nullptr; |
566 | 29.5k | int RegClass = Desc.OpInfo[OpIdx].RegClass; |
567 | 29.5k | if (RegClass == -1) |
568 | 12 | return nullptr; |
569 | 29.5k | |
570 | 29.5k | return Subtarget->getRegisterInfo()->getRegClass(RegClass); |
571 | 29.5k | } |
572 | 29.5k | case AMDGPU::REG_SEQUENCE: { |
573 | 2.12k | unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); |
574 | 2.12k | const TargetRegisterClass *SuperRC = |
575 | 2.12k | Subtarget->getRegisterInfo()->getRegClass(RCID); |
576 | 2.12k | |
577 | 2.12k | SDValue SubRegOp = N->getOperand(OpNo + 1); |
578 | 2.12k | unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); |
579 | 2.12k | return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, |
580 | 2.12k | SubRegIdx); |
581 | 29.5k | } |
582 | 31.6k | } |
583 | 31.6k | } |
584 | | |
585 | 9.09k | SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const { |
586 | 9.09k | const SITargetLowering& Lowering = |
587 | 9.09k | *static_cast<const SITargetLowering*>(getTargetLowering()); |
588 | 9.09k | |
589 | 9.09k | assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain"); |
590 | 9.09k | |
591 | 9.09k | SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), |
592 | 9.09k | Val); |
593 | 9.09k | |
594 | 9.09k | SDValue Glue = M0.getValue(1); |
595 | 9.09k | |
596 | 9.09k | SmallVector <SDValue, 8> Ops; |
597 | 9.09k | Ops.push_back(M0); // Replace the chain. |
598 | 32.2k | for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i23.1k ) |
599 | 23.1k | Ops.push_back(N->getOperand(i)); |
600 | 9.09k | |
601 | 9.09k | Ops.push_back(Glue); |
602 | 9.09k | return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); |
603 | 9.09k | } |
604 | | |
605 | 93.2k | SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const { |
606 | 93.2k | unsigned AS = cast<MemSDNode>(N)->getAddressSpace(); |
607 | 93.2k | if (AS == AMDGPUAS::LOCAL_ADDRESS) { |
608 | 12.3k | if (Subtarget->ldsRequiresM0Init()) |
609 | 8.79k | return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); |
610 | 80.9k | } else if (AS == AMDGPUAS::REGION_ADDRESS) { |
611 | 48 | MachineFunction &MF = CurDAG->getMachineFunction(); |
612 | 48 | unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize(); |
613 | 48 | return |
614 | 48 | glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32)); |
615 | 48 | } |
616 | 84.4k | return N; |
617 | 84.4k | } |
618 | | |
619 | | MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm, |
620 | 1.47k | EVT VT) const { |
621 | 1.47k | SDNode *Lo = CurDAG->getMachineNode( |
622 | 1.47k | AMDGPU::S_MOV_B32, DL, MVT::i32, |
623 | 1.47k | CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32)); |
624 | 1.47k | SDNode *Hi = |
625 | 1.47k | CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, |
626 | 1.47k | CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32)); |
627 | 1.47k | const SDValue Ops[] = { |
628 | 1.47k | CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), |
629 | 1.47k | SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), |
630 | 1.47k | SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)}; |
631 | 1.47k | |
632 | 1.47k | return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops); |
633 | 1.47k | } |
634 | | |
635 | 28.1k | static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { |
636 | 28.1k | switch (NumVectorElts) { |
637 | 28.1k | case 1: |
638 | 0 | return AMDGPU::SReg_32_XM0RegClassID; |
639 | 28.1k | case 2: |
640 | 17.6k | return AMDGPU::SReg_64RegClassID; |
641 | 28.1k | case 3: |
642 | 114 | return AMDGPU::SGPR_96RegClassID; |
643 | 28.1k | case 4: |
644 | 9.04k | return AMDGPU::SReg_128RegClassID; |
645 | 28.1k | case 5: |
646 | 1 | return AMDGPU::SGPR_160RegClassID; |
647 | 28.1k | case 8: |
648 | 1.24k | return AMDGPU::SReg_256RegClassID; |
649 | 28.1k | case 16: |
650 | 123 | return AMDGPU::SReg_512RegClassID; |
651 | 28.1k | case 32: |
652 | 23 | return AMDGPU::SReg_1024RegClassID; |
653 | 0 | } |
654 | 0 | |
655 | 0 | llvm_unreachable("invalid vector size"); |
656 | 0 | } |
657 | | |
658 | 30.6k | void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { |
659 | 30.6k | EVT VT = N->getValueType(0); |
660 | 30.6k | unsigned NumVectorElts = VT.getVectorNumElements(); |
661 | 30.6k | EVT EltVT = VT.getVectorElementType(); |
662 | 30.6k | SDLoc DL(N); |
663 | 30.6k | SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); |
664 | 30.6k | |
665 | 30.6k | if (NumVectorElts == 1) { |
666 | 0 | CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), |
667 | 0 | RegClass); |
668 | 0 | return; |
669 | 0 | } |
670 | 30.6k | |
671 | 30.6k | assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not " |
672 | 30.6k | "supported yet"); |
673 | 30.6k | // 32 = Max Num Vector Elements |
674 | 30.6k | // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) |
675 | 30.6k | // 1 = Vector Register Class |
676 | 30.6k | SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); |
677 | 30.6k | |
678 | 30.6k | RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); |
679 | 30.6k | bool IsRegSeq = true; |
680 | 30.6k | unsigned NOps = N->getNumOperands(); |
681 | 124k | for (unsigned i = 0; i < NOps; i++93.5k ) { |
682 | 93.5k | // XXX: Why is this here? |
683 | 93.5k | if (isa<RegisterSDNode>(N->getOperand(i))) { |
684 | 0 | IsRegSeq = false; |
685 | 0 | break; |
686 | 0 | } |
687 | 93.5k | unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i); |
688 | 93.5k | RegSeqArgs[1 + (2 * i)] = N->getOperand(i); |
689 | 93.5k | RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32); |
690 | 93.5k | } |
691 | 30.6k | if (NOps != NumVectorElts) { |
692 | 5 | // Fill in the missing undef elements if this was a scalar_to_vector. |
693 | 5 | assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); |
694 | 5 | MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, |
695 | 5 | DL, EltVT); |
696 | 10 | for (unsigned i = NOps; i < NumVectorElts; ++i5 ) { |
697 | 5 | unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i); |
698 | 5 | RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); |
699 | 5 | RegSeqArgs[1 + (2 * i) + 1] = |
700 | 5 | CurDAG->getTargetConstant(Sub, DL, MVT::i32); |
701 | 5 | } |
702 | 5 | } |
703 | 30.6k | |
704 | 30.6k | if (!IsRegSeq) |
705 | 0 | SelectCode(N); |
706 | 30.6k | CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); |
707 | 30.6k | } |
708 | | |
709 | 700k | void AMDGPUDAGToDAGISel::Select(SDNode *N) { |
710 | 700k | unsigned int Opc = N->getOpcode(); |
711 | 700k | if (N->isMachineOpcode()) { |
712 | 2.95k | N->setNodeId(-1); |
713 | 2.95k | return; // Already selected. |
714 | 2.95k | } |
715 | 697k | |
716 | 697k | if (isa<AtomicSDNode>(N) || |
717 | 697k | (695k Opc == AMDGPUISD::ATOMIC_INC695k || Opc == AMDGPUISD::ATOMIC_DEC695k || |
718 | 695k | Opc == ISD::ATOMIC_LOAD_FADD695k || |
719 | 695k | Opc == AMDGPUISD::ATOMIC_LOAD_FMIN695k || |
720 | 695k | Opc == AMDGPUISD::ATOMIC_LOAD_FMAX695k )) |
721 | 2.70k | N = glueCopyToM0LDSInit(N); |
722 | 697k | |
723 | 697k | switch (Opc) { |
724 | 697k | default: |
725 | 473k | break; |
726 | 697k | // We are selecting i64 ADD here instead of custom lower it during |
727 | 697k | // DAG legalization, so we can fold some i64 ADDs used for address |
728 | 697k | // calculation into the LOAD and STORE instructions. |
729 | 697k | case ISD::ADDC: |
730 | 288 | case ISD::ADDE: |
731 | 288 | case ISD::SUBC: |
732 | 288 | case ISD::SUBE: { |
733 | 288 | if (N->getValueType(0) != MVT::i64) |
734 | 138 | break; |
735 | 150 | |
736 | 150 | SelectADD_SUB_I64(N); |
737 | 150 | return; |
738 | 150 | } |
739 | 389 | case ISD::ADDCARRY: |
740 | 389 | case ISD::SUBCARRY: |
741 | 389 | if (N->getValueType(0) != MVT::i32) |
742 | 0 | break; |
743 | 389 | |
744 | 389 | SelectAddcSubb(N); |
745 | 389 | return; |
746 | 389 | case ISD::UADDO: |
747 | 259 | case ISD::USUBO: { |
748 | 259 | SelectUADDO_USUBO(N); |
749 | 259 | return; |
750 | 259 | } |
751 | 259 | case AMDGPUISD::FMUL_W_CHAIN: { |
752 | 64 | SelectFMUL_W_CHAIN(N); |
753 | 64 | return; |
754 | 259 | } |
755 | 320 | case AMDGPUISD::FMA_W_CHAIN: { |
756 | 320 | SelectFMA_W_CHAIN(N); |
757 | 320 | return; |
758 | 259 | } |
759 | 259 | |
760 | 29.0k | case ISD::SCALAR_TO_VECTOR: |
761 | 29.0k | case ISD::BUILD_VECTOR: { |
762 | 29.0k | EVT VT = N->getValueType(0); |
763 | 29.0k | unsigned NumVectorElts = VT.getVectorNumElements(); |
764 | 29.0k | if (VT.getScalarSizeInBits() == 16) { |
765 | 896 | if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2825 ) { |
766 | 825 | if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) { |
767 | 252 | ReplaceNode(N, Packed); |
768 | 252 | return; |
769 | 252 | } |
770 | 644 | } |
771 | 644 | |
772 | 644 | break; |
773 | 644 | } |
774 | 28.1k | |
775 | 28.1k | assert(VT.getVectorElementType().bitsEq(MVT::i32)); |
776 | 28.1k | unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts); |
777 | 28.1k | SelectBuildVector(N, RegClassID); |
778 | 28.1k | return; |
779 | 28.1k | } |
780 | 28.1k | case ISD::BUILD_PAIR: { |
781 | 11.7k | SDValue RC, SubReg0, SubReg1; |
782 | 11.7k | SDLoc DL(N); |
783 | 11.7k | if (N->getValueType(0) == MVT::i128) { |
784 | 0 | RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32); |
785 | 0 | SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); |
786 | 0 | SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); |
787 | 11.7k | } else if (N->getValueType(0) == MVT::i64) { |
788 | 11.7k | RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); |
789 | 11.7k | SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); |
790 | 11.7k | SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); |
791 | 11.7k | } else { |
792 | 0 | llvm_unreachable("Unhandled value type for BUILD_PAIR"); |
793 | 0 | } |
794 | 11.7k | const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, |
795 | 11.7k | N->getOperand(1), SubReg1 }; |
796 | 11.7k | ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, |
797 | 11.7k | N->getValueType(0), Ops)); |
798 | 11.7k | return; |
799 | 11.7k | } |
800 | 11.7k | |
801 | 34.4k | case ISD::Constant: |
802 | 34.4k | case ISD::ConstantFP: { |
803 | 34.4k | if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)4.17k ) |
804 | 33.0k | break; |
805 | 1.39k | |
806 | 1.39k | uint64_t Imm; |
807 | 1.39k | if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) |
808 | 77 | Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); |
809 | 1.31k | else { |
810 | 1.31k | ConstantSDNode *C = cast<ConstantSDNode>(N); |
811 | 1.31k | Imm = C->getZExtValue(); |
812 | 1.31k | } |
813 | 1.39k | |
814 | 1.39k | SDLoc DL(N); |
815 | 1.39k | ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0))); |
816 | 1.39k | return; |
817 | 1.39k | } |
818 | 90.5k | case ISD::LOAD: |
819 | 90.5k | case ISD::STORE: |
820 | 90.5k | case ISD::ATOMIC_LOAD: |
821 | 90.5k | case ISD::ATOMIC_STORE: { |
822 | 90.5k | N = glueCopyToM0LDSInit(N); |
823 | 90.5k | break; |
824 | 90.5k | } |
825 | 90.5k | |
826 | 90.5k | case AMDGPUISD::BFE_I32: |
827 | 152 | case AMDGPUISD::BFE_U32: { |
828 | 152 | // There is a scalar version available, but unlike the vector version which |
829 | 152 | // has a separate operand for the offset and width, the scalar version packs |
830 | 152 | // the width and offset into a single operand. Try to move to the scalar |
831 | 152 | // version if the offsets are constant, so that we can try to keep extended |
832 | 152 | // loads of kernel arguments in SGPRs. |
833 | 152 | |
834 | 152 | // TODO: Technically we could try to pattern match scalar bitshifts of |
835 | 152 | // dynamic values, but it's probably not useful. |
836 | 152 | ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); |
837 | 152 | if (!Offset) |
838 | 16 | break; |
839 | 136 | |
840 | 136 | ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); |
841 | 136 | if (!Width) |
842 | 4 | break; |
843 | 132 | |
844 | 132 | bool Signed = Opc == AMDGPUISD::BFE_I32; |
845 | 132 | |
846 | 132 | uint32_t OffsetVal = Offset->getZExtValue(); |
847 | 132 | uint32_t WidthVal = Width->getZExtValue(); |
848 | 132 | |
849 | 132 | ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I328 : AMDGPU::S_BFE_U32124 , |
850 | 132 | SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); |
851 | 132 | return; |
852 | 132 | } |
853 | 319 | case AMDGPUISD::DIV_SCALE: { |
854 | 319 | SelectDIV_SCALE(N); |
855 | 319 | return; |
856 | 132 | } |
857 | 172 | case AMDGPUISD::DIV_FMAS: { |
858 | 172 | SelectDIV_FMAS(N); |
859 | 172 | return; |
860 | 132 | } |
861 | 132 | case AMDGPUISD::MAD_I64_I32: |
862 | 48 | case AMDGPUISD::MAD_U64_U32: { |
863 | 48 | SelectMAD_64_32(N); |
864 | 48 | return; |
865 | 48 | } |
866 | 22.0k | case ISD::CopyToReg: { |
867 | 22.0k | const SITargetLowering& Lowering = |
868 | 22.0k | *static_cast<const SITargetLowering*>(getTargetLowering()); |
869 | 22.0k | N = Lowering.legalizeTargetIndependentNode(N, *CurDAG); |
870 | 22.0k | break; |
871 | 48 | } |
872 | 29.7k | case ISD::AND: |
873 | 29.7k | case ISD::SRL: |
874 | 29.7k | case ISD::SRA: |
875 | 29.7k | case ISD::SIGN_EXTEND_INREG: |
876 | 29.7k | if (N->getValueType(0) != MVT::i32) |
877 | 7.37k | break; |
878 | 22.3k | |
879 | 22.3k | SelectS_BFE(N); |
880 | 22.3k | return; |
881 | 22.3k | case ISD::BRCOND: |
882 | 692 | SelectBRCOND(N); |
883 | 692 | return; |
884 | 22.3k | case ISD::FMAD: |
885 | 2.78k | case ISD::FMA: |
886 | 2.78k | SelectFMAD_FMA(N); |
887 | 2.78k | return; |
888 | 2.78k | case AMDGPUISD::ATOMIC_CMP_SWAP: |
889 | 690 | SelectATOMIC_CMP_SWAP(N); |
890 | 690 | return; |
891 | 2.78k | case AMDGPUISD::CVT_PKRTZ_F16_F32: |
892 | 187 | case AMDGPUISD::CVT_PKNORM_I16_F32: |
893 | 187 | case AMDGPUISD::CVT_PKNORM_U16_F32: |
894 | 187 | case AMDGPUISD::CVT_PK_U16_U32: |
895 | 187 | case AMDGPUISD::CVT_PK_I16_I32: { |
896 | 187 | // Hack around using a legal type if f16 is illegal. |
897 | 187 | if (N->getValueType(0) == MVT::i32) { |
898 | 84 | MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f1628 : MVT::v2i1656 ; |
899 | 84 | N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT), |
900 | 84 | { N->getOperand(0), N->getOperand(1) }); |
901 | 84 | SelectCode(N); |
902 | 84 | return; |
903 | 84 | } |
904 | 103 | |
905 | 103 | break; |
906 | 103 | } |
907 | 103 | case ISD::INTRINSIC_W_CHAIN: { |
908 | 85 | SelectINTRINSIC_W_CHAIN(N); |
909 | 85 | return; |
910 | 103 | } |
911 | 571 | case ISD::INTRINSIC_VOID: { |
912 | 571 | SelectINTRINSIC_VOID(N); |
913 | 571 | return; |
914 | 627k | } |
915 | 627k | } |
916 | 627k | |
917 | 627k | SelectCode(N); |
918 | 627k | } |
919 | | |
920 | 441 | bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { |
921 | 441 | const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); |
922 | 441 | const Instruction *Term = BB->getTerminator(); |
923 | 441 | return Term->getMetadata("amdgpu.uniform") || |
924 | 441 | Term->getMetadata("structurizecfg.uniform")1 ; |
925 | 441 | } |
926 | | |
927 | 27.4k | StringRef AMDGPUDAGToDAGISel::getPassName() const { |
928 | 27.4k | return "AMDGPU DAG->DAG Pattern Instruction Selection"; |
929 | 27.4k | } |
930 | | |
931 | | //===----------------------------------------------------------------------===// |
932 | | // Complex Patterns |
933 | | //===----------------------------------------------------------------------===// |
934 | | |
935 | | bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, |
936 | 0 | SDValue &Offset) { |
937 | 0 | return false; |
938 | 0 | } |
939 | | |
940 | | bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, |
941 | 0 | SDValue &Offset) { |
942 | 0 | ConstantSDNode *C; |
943 | 0 | SDLoc DL(Addr); |
944 | 0 |
|
945 | 0 | if ((C = dyn_cast<ConstantSDNode>(Addr))) { |
946 | 0 | Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); |
947 | 0 | Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); |
948 | 0 | } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && |
949 | 0 | (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { |
950 | 0 | Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); |
951 | 0 | Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); |
952 | 0 | } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && |
953 | 0 | (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { |
954 | 0 | Base = Addr.getOperand(0); |
955 | 0 | Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); |
956 | 0 | } else { |
957 | 0 | Base = Addr; |
958 | 0 | Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); |
959 | 0 | } |
960 | 0 |
|
961 | 0 | return true; |
962 | 0 | } |
963 | | |
964 | | // FIXME: Should only handle addcarry/subcarry |
965 | 150 | void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { |
966 | 150 | SDLoc DL(N); |
967 | 150 | SDValue LHS = N->getOperand(0); |
968 | 150 | SDValue RHS = N->getOperand(1); |
969 | 150 | |
970 | 150 | unsigned Opcode = N->getOpcode(); |
971 | 150 | bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE144 ); |
972 | 150 | bool ProduceCarry = |
973 | 150 | ConsumeCarry || Opcode == ISD::ADDC144 || Opcode == ISD::SUBC0 ; |
974 | 150 | bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE6 ; |
975 | 150 | |
976 | 150 | SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); |
977 | 150 | SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); |
978 | 150 | |
979 | 150 | SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, |
980 | 150 | DL, MVT::i32, LHS, Sub0); |
981 | 150 | SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, |
982 | 150 | DL, MVT::i32, LHS, Sub1); |
983 | 150 | |
984 | 150 | SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, |
985 | 150 | DL, MVT::i32, RHS, Sub0); |
986 | 150 | SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, |
987 | 150 | DL, MVT::i32, RHS, Sub1); |
988 | 150 | |
989 | 150 | SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); |
990 | 150 | |
991 | 150 | unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U320 ; |
992 | 150 | unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U320 ; |
993 | 150 | |
994 | 150 | SDNode *AddLo; |
995 | 150 | if (!ConsumeCarry) { |
996 | 144 | SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; |
997 | 144 | AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args); |
998 | 144 | } else { |
999 | 6 | SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) }; |
1000 | 6 | AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args); |
1001 | 6 | } |
1002 | 150 | SDValue AddHiArgs[] = { |
1003 | 150 | SDValue(Hi0, 0), |
1004 | 150 | SDValue(Hi1, 0), |
1005 | 150 | SDValue(AddLo, 1) |
1006 | 150 | }; |
1007 | 150 | SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs); |
1008 | 150 | |
1009 | 150 | SDValue RegSequenceArgs[] = { |
1010 | 150 | CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), |
1011 | 150 | SDValue(AddLo,0), |
1012 | 150 | Sub0, |
1013 | 150 | SDValue(AddHi,0), |
1014 | 150 | Sub1, |
1015 | 150 | }; |
1016 | 150 | SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, |
1017 | 150 | MVT::i64, RegSequenceArgs); |
1018 | 150 | |
1019 | 150 | if (ProduceCarry) { |
1020 | 150 | // Replace the carry-use |
1021 | 150 | ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1)); |
1022 | 150 | } |
1023 | 150 | |
1024 | 150 | // Replace the remaining uses. |
1025 | 150 | ReplaceNode(N, RegSequence); |
1026 | 150 | } |
1027 | | |
1028 | 389 | void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) { |
1029 | 389 | SDLoc DL(N); |
1030 | 389 | SDValue LHS = N->getOperand(0); |
1031 | 389 | SDValue RHS = N->getOperand(1); |
1032 | 389 | SDValue CI = N->getOperand(2); |
1033 | 389 | |
1034 | 389 | unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64175 |
1035 | 389 | : AMDGPU::V_SUBB_U32_e64214 ; |
1036 | 389 | CurDAG->SelectNodeTo( |
1037 | 389 | N, Opc, N->getVTList(), |
1038 | 389 | {LHS, RHS, CI, CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/}); |
1039 | 389 | } |
1040 | | |
1041 | 259 | void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { |
1042 | 259 | // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned |
1043 | 259 | // carry out despite the _i32 name. These were renamed in VI to _U32. |
1044 | 259 | // FIXME: We should probably rename the opcodes here. |
1045 | 259 | unsigned Opc = N->getOpcode() == ISD::UADDO ? |
1046 | 146 | AMDGPU::V_ADD_I32_e64113 : AMDGPU::V_SUB_I32_e64; |
1047 | 259 | |
1048 | 259 | CurDAG->SelectNodeTo( |
1049 | 259 | N, Opc, N->getVTList(), |
1050 | 259 | {N->getOperand(0), N->getOperand(1), |
1051 | 259 | CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/}); |
1052 | 259 | } |
1053 | | |
1054 | 320 | void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { |
1055 | 320 | SDLoc SL(N); |
1056 | 320 | // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod |
1057 | 320 | SDValue Ops[10]; |
1058 | 320 | |
1059 | 320 | SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]); |
1060 | 320 | SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); |
1061 | 320 | SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]); |
1062 | 320 | Ops[8] = N->getOperand(0); |
1063 | 320 | Ops[9] = N->getOperand(4); |
1064 | 320 | |
1065 | 320 | CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops); |
1066 | 320 | } |
1067 | | |
1068 | 64 | void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) { |
1069 | 64 | SDLoc SL(N); |
1070 | 64 | // src0_modifiers, src0, src1_modifiers, src1, clamp, omod |
1071 | 64 | SDValue Ops[8]; |
1072 | 64 | |
1073 | 64 | SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]); |
1074 | 64 | SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); |
1075 | 64 | Ops[6] = N->getOperand(0); |
1076 | 64 | Ops[7] = N->getOperand(3); |
1077 | 64 | |
1078 | 64 | CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops); |
1079 | 64 | } |
1080 | | |
1081 | | // We need to handle this here because tablegen doesn't support matching |
1082 | | // instructions with multiple outputs. |
1083 | 319 | void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { |
1084 | 319 | SDLoc SL(N); |
1085 | 319 | EVT VT = N->getValueType(0); |
1086 | 319 | |
1087 | 319 | assert(VT == MVT::f32 || VT == MVT::f64); |
1088 | 319 | |
1089 | 319 | unsigned Opc |
1090 | 319 | = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64136 : AMDGPU::V_DIV_SCALE_F32183 ; |
1091 | 319 | |
1092 | 319 | SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; |
1093 | 319 | CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); |
1094 | 319 | } |
1095 | | |
1096 | 172 | void AMDGPUDAGToDAGISel::SelectDIV_FMAS(SDNode *N) { |
1097 | 172 | const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget); |
1098 | 172 | const SIRegisterInfo *TRI = ST->getRegisterInfo(); |
1099 | 172 | |
1100 | 172 | SDLoc SL(N); |
1101 | 172 | EVT VT = N->getValueType(0); |
1102 | 172 | |
1103 | 172 | assert(VT == MVT::f32 || VT == MVT::f64); |
1104 | 172 | |
1105 | 172 | unsigned Opc |
1106 | 172 | = (VT == MVT::f64) ? AMDGPU::V_DIV_FMAS_F6467 : AMDGPU::V_DIV_FMAS_F32105 ; |
1107 | 172 | |
1108 | 172 | SDValue CarryIn = N->getOperand(3); |
1109 | 172 | // V_DIV_FMAS implicitly reads VCC. |
1110 | 172 | SDValue VCC = CurDAG->getCopyToReg(CurDAG->getEntryNode(), SL, |
1111 | 172 | TRI->getVCC(), CarryIn, SDValue()); |
1112 | 172 | |
1113 | 172 | SDValue Ops[10]; |
1114 | 172 | |
1115 | 172 | SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); |
1116 | 172 | SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]); |
1117 | 172 | SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]); |
1118 | 172 | |
1119 | 172 | Ops[8] = VCC; |
1120 | 172 | Ops[9] = VCC.getValue(1); |
1121 | 172 | |
1122 | 172 | CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); |
1123 | 172 | } |
1124 | | |
1125 | | // We need to handle this here because tablegen doesn't support matching |
1126 | | // instructions with multiple outputs. |
1127 | 48 | void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) { |
1128 | 48 | SDLoc SL(N); |
1129 | 48 | bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32; |
1130 | 48 | unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I3212 : AMDGPU::V_MAD_U64_U3236 ; |
1131 | 48 | |
1132 | 48 | SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1); |
1133 | 48 | SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), |
1134 | 48 | Clamp }; |
1135 | 48 | CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); |
1136 | 48 | } |
1137 | | |
1138 | | bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset, |
1139 | 8.51k | unsigned OffsetBits) const { |
1140 | 8.51k | if ((OffsetBits == 16 && !isUInt<16>(Offset)8.02k ) || |
1141 | 8.51k | (8.09k OffsetBits == 88.09k && !isUInt<8>(Offset)483 )) |
1142 | 437 | return false; |
1143 | 8.07k | |
1144 | 8.07k | if (Subtarget->hasUsableDSOffset() || |
1145 | 8.07k | Subtarget->unsafeDSOffsetFoldingEnabled()2.03k ) |
1146 | 6.04k | return true; |
1147 | 2.03k | |
1148 | 2.03k | // On Southern Islands instruction with a negative base value and an offset |
1149 | 2.03k | // don't seem to work. |
1150 | 2.03k | return CurDAG->SignBitIsZero(Base); |
1151 | 2.03k | } |
1152 | | |
1153 | | bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, |
1154 | 11.7k | SDValue &Offset) const { |
1155 | 11.7k | SDLoc DL(Addr); |
1156 | 11.7k | if (CurDAG->isBaseWithConstantOffset(Addr)) { |
1157 | 7.97k | SDValue N0 = Addr.getOperand(0); |
1158 | 7.97k | SDValue N1 = Addr.getOperand(1); |
1159 | 7.97k | ConstantSDNode *C1 = cast<ConstantSDNode>(N1); |
1160 | 7.97k | if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) { |
1161 | 7.52k | // (add n0, c0) |
1162 | 7.52k | Base = N0; |
1163 | 7.52k | Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); |
1164 | 7.52k | return true; |
1165 | 7.52k | } |
1166 | 3.80k | } else if (Addr.getOpcode() == ISD::SUB) { |
1167 | 16 | // sub C, x -> add (sub 0, x), C |
1168 | 16 | if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { |
1169 | 16 | int64_t ByteOffset = C->getSExtValue(); |
1170 | 16 | if (isUInt<16>(ByteOffset)) { |
1171 | 14 | SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); |
1172 | 14 | |
1173 | 14 | // XXX - This is kind of hacky. Create a dummy sub node so we can check |
1174 | 14 | // the known bits in isDSOffsetLegal. We need to emit the selected node |
1175 | 14 | // here, so this is thrown away. |
1176 | 14 | SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, |
1177 | 14 | Zero, Addr.getOperand(1)); |
1178 | 14 | |
1179 | 14 | if (isDSOffsetLegal(Sub, ByteOffset, 16)) { |
1180 | 12 | SmallVector<SDValue, 3> Opnds; |
1181 | 12 | Opnds.push_back(Zero); |
1182 | 12 | Opnds.push_back(Addr.getOperand(1)); |
1183 | 12 | |
1184 | 12 | // FIXME: Select to VOP3 version for with-carry. |
1185 | 12 | unsigned SubOp = AMDGPU::V_SUB_I32_e32; |
1186 | 12 | if (Subtarget->hasAddNoCarry()) { |
1187 | 5 | SubOp = AMDGPU::V_SUB_U32_e64; |
1188 | 5 | Opnds.push_back( |
1189 | 5 | CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit |
1190 | 5 | } |
1191 | 12 | |
1192 | 12 | MachineSDNode *MachineSub = |
1193 | 12 | CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds); |
1194 | 12 | |
1195 | 12 | Base = SDValue(MachineSub, 0); |
1196 | 12 | Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16); |
1197 | 12 | return true; |
1198 | 12 | } |
1199 | 3.78k | } |
1200 | 16 | } |
1201 | 3.78k | } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { |
1202 | 720 | // If we have a constant address, prefer to put the constant into the |
1203 | 720 | // offset. This can save moves to load the constant address since multiple |
1204 | 720 | // operations can share the zero base address register, and enables merging |
1205 | 720 | // into read2 / write2 instructions. |
1206 | 720 | |
1207 | 720 | SDLoc DL(Addr); |
1208 | 720 | |
1209 | 720 | if (isUInt<16>(CAddr->getZExtValue())) { |
1210 | 716 | SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); |
1211 | 716 | MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, |
1212 | 716 | DL, MVT::i32, Zero); |
1213 | 716 | Base = SDValue(MovZero, 0); |
1214 | 716 | Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); |
1215 | 716 | return true; |
1216 | 716 | } |
1217 | 3.52k | } |
1218 | 3.52k | |
1219 | 3.52k | // default case |
1220 | 3.52k | Base = Addr; |
1221 | 3.52k | Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); |
1222 | 3.52k | return true; |
1223 | 3.52k | } |
1224 | | |
1225 | | // TODO: If offset is too big, put low 16-bit into offset. |
1226 | | bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, |
1227 | | SDValue &Offset0, |
1228 | 595 | SDValue &Offset1) const { |
1229 | 595 | SDLoc DL(Addr); |
1230 | 595 | |
1231 | 595 | if (CurDAG->isBaseWithConstantOffset(Addr)) { |
1232 | 477 | SDValue N0 = Addr.getOperand(0); |
1233 | 477 | SDValue N1 = Addr.getOperand(1); |
1234 | 477 | ConstantSDNode *C1 = cast<ConstantSDNode>(N1); |
1235 | 477 | unsigned DWordOffset0 = C1->getZExtValue() / 4; |
1236 | 477 | unsigned DWordOffset1 = DWordOffset0 + 1; |
1237 | 477 | // (add n0, c0) |
1238 | 477 | if (isDSOffsetLegal(N0, DWordOffset1, 8)) { |
1239 | 455 | Base = N0; |
1240 | 455 | Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); |
1241 | 455 | Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); |
1242 | 455 | return true; |
1243 | 455 | } |
1244 | 118 | } else if (Addr.getOpcode() == ISD::SUB) { |
1245 | 6 | // sub C, x -> add (sub 0, x), C |
1246 | 6 | if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { |
1247 | 6 | unsigned DWordOffset0 = C->getZExtValue() / 4; |
1248 | 6 | unsigned DWordOffset1 = DWordOffset0 + 1; |
1249 | 6 | |
1250 | 6 | if (isUInt<8>(DWordOffset0)) { |
1251 | 6 | SDLoc DL(Addr); |
1252 | 6 | SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); |
1253 | 6 | |
1254 | 6 | // XXX - This is kind of hacky. Create a dummy sub node so we can check |
1255 | 6 | // the known bits in isDSOffsetLegal. We need to emit the selected node |
1256 | 6 | // here, so this is thrown away. |
1257 | 6 | SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, |
1258 | 6 | Zero, Addr.getOperand(1)); |
1259 | 6 | |
1260 | 6 | if (isDSOffsetLegal(Sub, DWordOffset1, 8)) { |
1261 | 4 | SmallVector<SDValue, 3> Opnds; |
1262 | 4 | Opnds.push_back(Zero); |
1263 | 4 | Opnds.push_back(Addr.getOperand(1)); |
1264 | 4 | unsigned SubOp = AMDGPU::V_SUB_I32_e32; |
1265 | 4 | if (Subtarget->hasAddNoCarry()) { |
1266 | 2 | SubOp = AMDGPU::V_SUB_U32_e64; |
1267 | 2 | Opnds.push_back( |
1268 | 2 | CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit |
1269 | 2 | } |
1270 | 4 | |
1271 | 4 | MachineSDNode *MachineSub |
1272 | 4 | = CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds); |
1273 | 4 | |
1274 | 4 | Base = SDValue(MachineSub, 0); |
1275 | 4 | Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); |
1276 | 4 | Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); |
1277 | 4 | return true; |
1278 | 4 | } |
1279 | 112 | } |
1280 | 6 | } |
1281 | 112 | } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { |
1282 | 0 | unsigned DWordOffset0 = CAddr->getZExtValue() / 4; |
1283 | 0 | unsigned DWordOffset1 = DWordOffset0 + 1; |
1284 | 0 | assert(4 * DWordOffset0 == CAddr->getZExtValue()); |
1285 | 0 |
|
1286 | 0 | if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) { |
1287 | 0 | SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); |
1288 | 0 | MachineSDNode *MovZero |
1289 | 0 | = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, |
1290 | 0 | DL, MVT::i32, Zero); |
1291 | 0 | Base = SDValue(MovZero, 0); |
1292 | 0 | Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8); |
1293 | 0 | Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8); |
1294 | 0 | return true; |
1295 | 0 | } |
1296 | 136 | } |
1297 | 136 | |
1298 | 136 | // default case |
1299 | 136 | |
1300 | 136 | Base = Addr; |
1301 | 136 | Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); |
1302 | 136 | Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); |
1303 | 136 | return true; |
1304 | 136 | } |
1305 | | |
1306 | | bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, |
1307 | | SDValue &VAddr, SDValue &SOffset, |
1308 | | SDValue &Offset, SDValue &Offen, |
1309 | | SDValue &Idxen, SDValue &Addr64, |
1310 | | SDValue &GLC, SDValue &SLC, |
1311 | 50.6k | SDValue &TFE, SDValue &DLC) const { |
1312 | 50.6k | // Subtarget prefers to use flat instruction |
1313 | 50.6k | if (Subtarget->useFlatForGlobal()) |
1314 | 17.8k | return false; |
1315 | 32.8k | |
1316 | 32.8k | SDLoc DL(Addr); |
1317 | 32.8k | |
1318 | 32.8k | if (!GLC.getNode()) |
1319 | 32.8k | GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); |
1320 | 32.8k | if (!SLC.getNode()) |
1321 | 32.5k | SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); |
1322 | 32.8k | TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); |
1323 | 32.8k | DLC = CurDAG->getTargetConstant(0, DL, MVT::i1); |
1324 | 32.8k | |
1325 | 32.8k | Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); |
1326 | 32.8k | Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); |
1327 | 32.8k | Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); |
1328 | 32.8k | SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); |
1329 | 32.8k | |
1330 | 32.8k | ConstantSDNode *C1 = nullptr; |
1331 | 32.8k | SDValue N0 = Addr; |
1332 | 32.8k | if (CurDAG->isBaseWithConstantOffset(Addr)) { |
1333 | 9.18k | C1 = cast<ConstantSDNode>(Addr.getOperand(1)); |
1334 | 9.18k | if (isUInt<32>(C1->getZExtValue())) |
1335 | 9.17k | N0 = Addr.getOperand(0); |
1336 | 10 | else |
1337 | 10 | C1 = nullptr; |
1338 | 9.18k | } |
1339 | 32.8k | |
1340 | 32.8k | if (N0.getOpcode() == ISD::ADD) { |
1341 | 4.79k | // (add N2, N3) -> addr64, or |
1342 | 4.79k | // (add (add N2, N3), C1) -> addr64 |
1343 | 4.79k | SDValue N2 = N0.getOperand(0); |
1344 | 4.79k | SDValue N3 = N0.getOperand(1); |
1345 | 4.79k | Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); |
1346 | 4.79k | |
1347 | 4.79k | if (N2->isDivergent()) { |
1348 | 152 | if (N3->isDivergent()) { |
1349 | 7 | // Both N2 and N3 are divergent. Use N0 (the result of the add) as the |
1350 | 7 | // addr64, and construct the resource from a 0 address. |
1351 | 7 | Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0); |
1352 | 7 | VAddr = N0; |
1353 | 145 | } else { |
1354 | 145 | // N2 is divergent, N3 is not. |
1355 | 145 | Ptr = N3; |
1356 | 145 | VAddr = N2; |
1357 | 145 | } |
1358 | 4.64k | } else { |
1359 | 4.64k | // N2 is not divergent. |
1360 | 4.64k | Ptr = N2; |
1361 | 4.64k | VAddr = N3; |
1362 | 4.64k | } |
1363 | 4.79k | Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); |
1364 | 28.0k | } else if (N0->isDivergent()) { |
1365 | 73 | // N0 is divergent. Use it as the addr64, and construct the resource from a |
1366 | 73 | // 0 address. |
1367 | 73 | Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0); |
1368 | 73 | VAddr = N0; |
1369 | 73 | Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); |
1370 | 27.9k | } else { |
1371 | 27.9k | // N0 -> offset, or |
1372 | 27.9k | // (N0 + C1) -> offset |
1373 | 27.9k | VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); |
1374 | 27.9k | Ptr = N0; |
1375 | 27.9k | } |
1376 | 32.8k | |
1377 | 32.8k | if (!C1) { |
1378 | 23.6k | // No offset. |
1379 | 23.6k | Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); |
1380 | 23.6k | return true; |
1381 | 23.6k | } |
1382 | 9.17k | |
1383 | 9.17k | if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) { |
1384 | 9.04k | // Legal offset for instruction. |
1385 | 9.04k | Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); |
1386 | 9.04k | return true; |
1387 | 9.04k | } |
1388 | 132 | |
1389 | 132 | // Illegal offset, store it in soffset. |
1390 | 132 | Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); |
1391 | 132 | SOffset = |
1392 | 132 | SDValue(CurDAG->getMachineNode( |
1393 | 132 | AMDGPU::S_MOV_B32, DL, MVT::i32, |
1394 | 132 | CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), |
1395 | 132 | 0); |
1396 | 132 | return true; |
1397 | 132 | } |
1398 | | |
1399 | | bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, |
1400 | | SDValue &VAddr, SDValue &SOffset, |
1401 | | SDValue &Offset, SDValue &GLC, |
1402 | | SDValue &SLC, SDValue &TFE, |
1403 | 35.3k | SDValue &DLC) const { |
1404 | 35.3k | SDValue Ptr, Offen, Idxen, Addr64; |
1405 | 35.3k | |
1406 | 35.3k | // addr64 bit was removed for volcanic islands. |
1407 | 35.3k | if (!Subtarget->hasAddr64()) |
1408 | 18.4k | return false; |
1409 | 16.8k | |
1410 | 16.8k | if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, |
1411 | 16.8k | GLC, SLC, TFE, DLC)) |
1412 | 3.18k | return false; |
1413 | 13.6k | |
1414 | 13.6k | ConstantSDNode *C = cast<ConstantSDNode>(Addr64); |
1415 | 13.6k | if (C->getSExtValue()) { |
1416 | 3.78k | SDLoc DL(Addr); |
1417 | 3.78k | |
1418 | 3.78k | const SITargetLowering& Lowering = |
1419 | 3.78k | *static_cast<const SITargetLowering*>(getTargetLowering()); |
1420 | 3.78k | |
1421 | 3.78k | SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); |
1422 | 3.78k | return true; |
1423 | 3.78k | } |
1424 | 9.90k | |
1425 | 9.90k | return false; |
1426 | 9.90k | } |
1427 | | |
1428 | | bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, |
1429 | | SDValue &VAddr, SDValue &SOffset, |
1430 | | SDValue &Offset, |
1431 | 635 | SDValue &SLC) const { |
1432 | 635 | SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1); |
1433 | 635 | SDValue GLC, TFE, DLC; |
1434 | 635 | |
1435 | 635 | return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC); |
1436 | 635 | } |
1437 | | |
1438 | 328 | static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { |
1439 | 328 | auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>(); |
1440 | 328 | return PSV && PSV->isStack()221 ; |
1441 | 328 | } |
1442 | | |
1443 | 7.55k | std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { |
1444 | 7.55k | const MachineFunction &MF = CurDAG->getMachineFunction(); |
1445 | 7.55k | const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); |
1446 | 7.55k | |
1447 | 7.55k | if (auto FI = dyn_cast<FrameIndexSDNode>(N)) { |
1448 | 6.24k | SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(), |
1449 | 6.24k | FI->getValueType(0)); |
1450 | 6.24k | |
1451 | 6.24k | // If we can resolve this to a frame index access, this will be relative to |
1452 | 6.24k | // either the stack or frame pointer SGPR. |
1453 | 6.24k | return std::make_pair( |
1454 | 6.24k | TFI, CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32)); |
1455 | 6.24k | } |
1456 | 1.30k | |
1457 | 1.30k | // If we don't know this private access is a local stack object, it needs to |
1458 | 1.30k | // be relative to the entry point's scratch wave offset register. |
1459 | 1.30k | return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(), |
1460 | 1.30k | MVT::i32)); |
1461 | 1.30k | } |
1462 | | |
1463 | | bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent, |
1464 | | SDValue Addr, SDValue &Rsrc, |
1465 | | SDValue &VAddr, SDValue &SOffset, |
1466 | 7.55k | SDValue &ImmOffset) const { |
1467 | 7.55k | |
1468 | 7.55k | SDLoc DL(Addr); |
1469 | 7.55k | MachineFunction &MF = CurDAG->getMachineFunction(); |
1470 | 7.55k | const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); |
1471 | 7.55k | |
1472 | 7.55k | Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); |
1473 | 7.55k | |
1474 | 7.55k | if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { |
1475 | 6 | unsigned Imm = CAddr->getZExtValue(); |
1476 | 6 | |
1477 | 6 | SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); |
1478 | 6 | MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, |
1479 | 6 | DL, MVT::i32, HighBits); |
1480 | 6 | VAddr = SDValue(MovHighBits, 0); |
1481 | 6 | |
1482 | 6 | // In a call sequence, stores to the argument stack area are relative to the |
1483 | 6 | // stack pointer. |
1484 | 6 | const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo(); |
1485 | 6 | unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? |
1486 | 6 | Info->getStackPtrOffsetReg()0 : Info->getScratchWaveOffsetReg(); |
1487 | 6 | |
1488 | 6 | SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); |
1489 | 6 | ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); |
1490 | 6 | return true; |
1491 | 6 | } |
1492 | 7.55k | |
1493 | 7.55k | if (CurDAG->isBaseWithConstantOffset(Addr)) { |
1494 | 6.24k | // (add n0, c1) |
1495 | 6.24k | |
1496 | 6.24k | SDValue N0 = Addr.getOperand(0); |
1497 | 6.24k | SDValue N1 = Addr.getOperand(1); |
1498 | 6.24k | |
1499 | 6.24k | // Offsets in vaddr must be positive if range checking is enabled. |
1500 | 6.24k | // |
1501 | 6.24k | // The total computation of vaddr + soffset + offset must not overflow. If |
1502 | 6.24k | // vaddr is negative, even if offset is 0 the sgpr offset add will end up |
1503 | 6.24k | // overflowing. |
1504 | 6.24k | // |
1505 | 6.24k | // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would |
1506 | 6.24k | // always perform a range check. If a negative vaddr base index was used, |
1507 | 6.24k | // this would fail the range check. The overall address computation would |
1508 | 6.24k | // compute a valid address, but this doesn't happen due to the range |
1509 | 6.24k | // check. For out-of-bounds MUBUF loads, a 0 is returned. |
1510 | 6.24k | // |
1511 | 6.24k | // Therefore it should be safe to fold any VGPR offset on gfx9 into the |
1512 | 6.24k | // MUBUF vaddr, but not on older subtargets which can only do this if the |
1513 | 6.24k | // sign bit is known 0. |
1514 | 6.24k | ConstantSDNode *C1 = cast<ConstantSDNode>(N1); |
1515 | 6.24k | if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) && |
1516 | 6.24k | (6.22k !Subtarget->privateMemoryResourceIsRangeChecked()6.22k || |
1517 | 6.22k | CurDAG->SignBitIsZero(N0)3.60k )) { |
1518 | 5.62k | std::tie(VAddr, SOffset) = foldFrameIndex(N0); |
1519 | 5.62k | ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); |
1520 | 5.62k | return true; |
1521 | 5.62k | } |
1522 | 1.92k | } |
1523 | 1.92k | |
1524 | 1.92k | // (node) |
1525 | 1.92k | std::tie(VAddr, SOffset) = foldFrameIndex(Addr); |
1526 | 1.92k | ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); |
1527 | 1.92k | return true; |
1528 | 1.92k | } |
1529 | | |
1530 | | bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent, |
1531 | | SDValue Addr, |
1532 | | SDValue &SRsrc, |
1533 | | SDValue &SOffset, |
1534 | 7.88k | SDValue &Offset) const { |
1535 | 7.88k | ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr); |
1536 | 7.88k | if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())328 ) |
1537 | 7.55k | return false; |
1538 | 322 | |
1539 | 322 | SDLoc DL(Addr); |
1540 | 322 | MachineFunction &MF = CurDAG->getMachineFunction(); |
1541 | 322 | const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); |
1542 | 322 | |
1543 | 322 | SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); |
1544 | 322 | |
1545 | 322 | const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo(); |
1546 | 322 | unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? |
1547 | 221 | Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg()101 ; |
1548 | 322 | |
1549 | 322 | // FIXME: Get from MachinePointerInfo? We should only be using the frame |
1550 | 322 | // offset if we know this is in a call sequence. |
1551 | 322 | SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); |
1552 | 322 | |
1553 | 322 | Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); |
1554 | 322 | return true; |
1555 | 322 | } |
1556 | | |
1557 | | bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, |
1558 | | SDValue &SOffset, SDValue &Offset, |
1559 | | SDValue &GLC, SDValue &SLC, |
1560 | 33.8k | SDValue &TFE, SDValue &DLC) const { |
1561 | 33.8k | SDValue Ptr, VAddr, Offen, Idxen, Addr64; |
1562 | 33.8k | const SIInstrInfo *TII = |
1563 | 33.8k | static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); |
1564 | 33.8k | |
1565 | 33.8k | if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, |
1566 | 33.8k | GLC, SLC, TFE, DLC)) |
1567 | 14.6k | return false; |
1568 | 19.1k | |
1569 | 19.1k | if (!cast<ConstantSDNode>(Offen)->getSExtValue() && |
1570 | 19.1k | !cast<ConstantSDNode>(Idxen)->getSExtValue() && |
1571 | 19.1k | !cast<ConstantSDNode>(Addr64)->getSExtValue()) { |
1572 | 18.0k | uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | |
1573 | 18.0k | APInt::getAllOnesValue(32).getZExtValue(); // Size |
1574 | 18.0k | SDLoc DL(Addr); |
1575 | 18.0k | |
1576 | 18.0k | const SITargetLowering& Lowering = |
1577 | 18.0k | *static_cast<const SITargetLowering*>(getTargetLowering()); |
1578 | 18.0k | |
1579 | 18.0k | SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); |
1580 | 18.0k | return true; |
1581 | 18.0k | } |
1582 | 1.08k | return false; |
1583 | 1.08k | } |
1584 | | |
1585 | | bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, |
1586 | | SDValue &Soffset, SDValue &Offset |
1587 | 8 | ) const { |
1588 | 8 | SDValue GLC, SLC, TFE, DLC; |
1589 | 8 | |
1590 | 8 | return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC); |
1591 | 8 | } |
1592 | | bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, |
1593 | | SDValue &Soffset, SDValue &Offset, |
1594 | 566 | SDValue &SLC) const { |
1595 | 566 | SDValue GLC, TFE, DLC; |
1596 | 566 | |
1597 | 566 | return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC); |
1598 | 566 | } |
1599 | | |
1600 | | template <bool IsSigned> |
1601 | | bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, |
1602 | | SDValue Addr, |
1603 | | SDValue &VAddr, |
1604 | | SDValue &Offset, |
1605 | 19.1k | SDValue &SLC) const { |
1606 | 19.1k | return static_cast<const SITargetLowering*>(getTargetLowering())-> |
1607 | 19.1k | SelectFlatOffset(IsSigned, *CurDAG, N, Addr, VAddr, Offset, SLC); |
1608 | 19.1k | } AMDGPUISelDAGToDAG.cpp:bool (anonymous namespace)::AMDGPUDAGToDAGISel::SelectFlatOffset<true>(llvm::SDNode*, llvm::SDValue, llvm::SDValue&, llvm::SDValue&, llvm::SDValue&) const Line | Count | Source | 1605 | 6.18k | SDValue &SLC) const { | 1606 | 6.18k | return static_cast<const SITargetLowering*>(getTargetLowering())-> | 1607 | 6.18k | SelectFlatOffset(IsSigned, *CurDAG, N, Addr, VAddr, Offset, SLC); | 1608 | 6.18k | } |
AMDGPUISelDAGToDAG.cpp:bool (anonymous namespace)::AMDGPUDAGToDAGISel::SelectFlatOffset<false>(llvm::SDNode*, llvm::SDValue, llvm::SDValue&, llvm::SDValue&, llvm::SDValue&) const Line | Count | Source | 1605 | 12.9k | SDValue &SLC) const { | 1606 | 12.9k | return static_cast<const SITargetLowering*>(getTargetLowering())-> | 1607 | 12.9k | SelectFlatOffset(IsSigned, *CurDAG, N, Addr, VAddr, Offset, SLC); | 1608 | 12.9k | } |
|
1609 | | |
1610 | | bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDNode *N, |
1611 | | SDValue Addr, |
1612 | | SDValue &VAddr, |
1613 | | SDValue &Offset, |
1614 | 2.06k | SDValue &SLC) const { |
1615 | 2.06k | return SelectFlatOffset<false>(N, Addr, VAddr, Offset, SLC); |
1616 | 2.06k | } |
1617 | | |
1618 | | bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N, |
1619 | | SDValue Addr, |
1620 | | SDValue &VAddr, |
1621 | | SDValue &Offset, |
1622 | 215 | SDValue &SLC) const { |
1623 | 215 | return SelectFlatOffset<true>(N, Addr, VAddr, Offset, SLC); |
1624 | 215 | } |
1625 | | |
1626 | | bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, |
1627 | 26.8k | SDValue &Offset, bool &Imm) const { |
1628 | 26.8k | |
1629 | 26.8k | // FIXME: Handle non-constant offsets. |
1630 | 26.8k | ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); |
1631 | 26.8k | if (!C) |
1632 | 71 | return false; |
1633 | 26.7k | |
1634 | 26.7k | SDLoc SL(ByteOffsetNode); |
1635 | 26.7k | GCNSubtarget::Generation Gen = Subtarget->getGeneration(); |
1636 | 26.7k | int64_t ByteOffset = C->getSExtValue(); |
1637 | 26.7k | int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset); |
1638 | 26.7k | |
1639 | 26.7k | if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) { |
1640 | 26.6k | Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); |
1641 | 26.6k | Imm = true; |
1642 | 26.6k | return true; |
1643 | 26.6k | } |
1644 | 93 | |
1645 | 93 | if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset)82 ) |
1646 | 13 | return false; |
1647 | 80 | |
1648 | 80 | if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)30 ) { |
1649 | 30 | // 32-bit Immediates are supported on Sea Islands. |
1650 | 30 | Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); |
1651 | 50 | } else { |
1652 | 50 | SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); |
1653 | 50 | Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, |
1654 | 50 | C32Bit), 0); |
1655 | 50 | } |
1656 | 80 | Imm = false; |
1657 | 80 | return true; |
1658 | 80 | } |
1659 | | |
1660 | 32.3k | SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const { |
1661 | 32.3k | if (Addr.getValueType() != MVT::i32) |
1662 | 32.1k | return Addr; |
1663 | 122 | |
1664 | 122 | // Zero-extend a 32-bit address. |
1665 | 122 | SDLoc SL(Addr); |
1666 | 122 | |
1667 | 122 | const MachineFunction &MF = CurDAG->getMachineFunction(); |
1668 | 122 | const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); |
1669 | 122 | unsigned AddrHiVal = Info->get32BitAddressHighBits(); |
1670 | 122 | SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32); |
1671 | 122 | |
1672 | 122 | const SDValue Ops[] = { |
1673 | 122 | CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32), |
1674 | 122 | Addr, |
1675 | 122 | CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32), |
1676 | 122 | SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi), |
1677 | 122 | 0), |
1678 | 122 | CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32), |
1679 | 122 | }; |
1680 | 122 | |
1681 | 122 | return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64, |
1682 | 122 | Ops), 0); |
1683 | 122 | } |
1684 | | |
1685 | | bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, |
1686 | 32.3k | SDValue &Offset, bool &Imm) const { |
1687 | 32.3k | SDLoc SL(Addr); |
1688 | 32.3k | |
1689 | 32.3k | // A 32-bit (address + offset) should not cause unsigned 32-bit integer |
1690 | 32.3k | // wraparound, because s_load instructions perform the addition in 64 bits. |
1691 | 32.3k | if ((Addr.getValueType() != MVT::i32 || |
1692 | 32.3k | Addr->getFlags().hasNoUnsignedWrap()122 ) && |
1693 | 32.3k | CurDAG->isBaseWithConstantOffset(Addr)32.2k ) { |
1694 | 26.2k | SDValue N0 = Addr.getOperand(0); |
1695 | 26.2k | SDValue N1 = Addr.getOperand(1); |
1696 | 26.2k | |
1697 | 26.2k | if (SelectSMRDOffset(N1, Offset, Imm)) { |
1698 | 26.2k | SBase = Expand32BitAddress(N0); |
1699 | 26.2k | return true; |
1700 | 26.2k | } |
1701 | 6.04k | } |
1702 | 6.04k | SBase = Expand32BitAddress(Addr); |
1703 | 6.04k | Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); |
1704 | 6.04k | Imm = true; |
1705 | 6.04k | return true; |
1706 | 6.04k | } |
1707 | | |
1708 | | bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, |
1709 | 32.2k | SDValue &Offset) const { |
1710 | 32.2k | bool Imm; |
1711 | 32.2k | return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; |
1712 | 32.2k | } |
1713 | | |
1714 | | bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, |
1715 | 9 | SDValue &Offset) const { |
1716 | 9 | |
1717 | 9 | if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) |
1718 | 0 | return false; |
1719 | 9 | |
1720 | 9 | bool Imm; |
1721 | 9 | if (!SelectSMRD(Addr, SBase, Offset, Imm)) |
1722 | 0 | return false; |
1723 | 9 | |
1724 | 9 | return !Imm && isa<ConstantSDNode>(Offset); |
1725 | 9 | } |
1726 | | |
1727 | | bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, |
1728 | 19 | SDValue &Offset) const { |
1729 | 19 | bool Imm; |
1730 | 19 | return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && |
1731 | 19 | !isa<ConstantSDNode>(Offset); |
1732 | 19 | } |
1733 | | |
1734 | | bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, |
1735 | 532 | SDValue &Offset) const { |
1736 | 532 | bool Imm; |
1737 | 532 | return SelectSMRDOffset(Addr, Offset, Imm) && Imm472 ; |
1738 | 532 | } |
1739 | | |
1740 | | bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, |
1741 | 17 | SDValue &Offset) const { |
1742 | 17 | if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS) |
1743 | 0 | return false; |
1744 | 17 | |
1745 | 17 | bool Imm; |
1746 | 17 | if (!SelectSMRDOffset(Addr, Offset, Imm)) |
1747 | 11 | return false; |
1748 | 6 | |
1749 | 6 | return !Imm && isa<ConstantSDNode>(Offset); |
1750 | 6 | } |
1751 | | |
1752 | | bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, |
1753 | | SDValue &Base, |
1754 | 76.1k | SDValue &Offset) const { |
1755 | 76.1k | SDLoc DL(Index); |
1756 | 76.1k | |
1757 | 76.1k | if (CurDAG->isBaseWithConstantOffset(Index)) { |
1758 | 81 | SDValue N0 = Index.getOperand(0); |
1759 | 81 | SDValue N1 = Index.getOperand(1); |
1760 | 81 | ConstantSDNode *C1 = cast<ConstantSDNode>(N1); |
1761 | 81 | |
1762 | 81 | // (add n0, c0) |
1763 | 81 | // Don't peel off the offset (c0) if doing so could possibly lead |
1764 | 81 | // the base (n0) to be negative. |
1765 | 81 | if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0)45 ) { |
1766 | 41 | Base = N0; |
1767 | 41 | Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32); |
1768 | 41 | return true; |
1769 | 41 | } |
1770 | 76.0k | } |
1771 | 76.0k | |
1772 | 76.0k | if (isa<ConstantSDNode>(Index)) |
1773 | 76.0k | return false; |
1774 | 73 | |
1775 | 73 | Base = Index; |
1776 | 73 | Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); |
1777 | 73 | return true; |
1778 | 73 | } |
1779 | | |
1780 | | SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL, |
1781 | | SDValue Val, uint32_t Offset, |
1782 | 5.09k | uint32_t Width) { |
1783 | 5.09k | // Transformation function, pack the offset and width of a BFE into |
1784 | 5.09k | // the format expected by the S_BFE_I32 / S_BFE_U32. In the second |
1785 | 5.09k | // source, bits [5:0] contain the offset and bits [22:16] the width. |
1786 | 5.09k | uint32_t PackedVal = Offset | (Width << 16); |
1787 | 5.09k | SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); |
1788 | 5.09k | |
1789 | 5.09k | return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); |
1790 | 5.09k | } |
1791 | | |
1792 | 492 | void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { |
1793 | 492 | // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) |
1794 | 492 | // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) |
1795 | 492 | // Predicate: 0 < b <= c < 32 |
1796 | 492 | |
1797 | 492 | const SDValue &Shl = N->getOperand(0); |
1798 | 492 | ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); |
1799 | 492 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); |
1800 | 492 | |
1801 | 492 | if (B && C472 ) { |
1802 | 472 | uint32_t BVal = B->getZExtValue(); |
1803 | 472 | uint32_t CVal = C->getZExtValue(); |
1804 | 472 | |
1805 | 472 | if (0 < BVal && BVal <= CVal && CVal < 32461 ) { |
1806 | 461 | bool Signed = N->getOpcode() == ISD::SRA; |
1807 | 461 | unsigned Opcode = Signed ? AMDGPU::S_BFE_I32459 : AMDGPU::S_BFE_U322 ; |
1808 | 461 | |
1809 | 461 | ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, |
1810 | 461 | 32 - CVal)); |
1811 | 461 | return; |
1812 | 461 | } |
1813 | 31 | } |
1814 | 31 | SelectCode(N); |
1815 | 31 | } |
1816 | | |
1817 | 22.3k | void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { |
1818 | 22.3k | switch (N->getOpcode()) { |
1819 | 22.3k | case ISD::AND: |
1820 | 8.30k | if (N->getOperand(0).getOpcode() == ISD::SRL) { |
1821 | 2.91k | // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" |
1822 | 2.91k | // Predicate: isMask(mask) |
1823 | 2.91k | const SDValue &Srl = N->getOperand(0); |
1824 | 2.91k | ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); |
1825 | 2.91k | ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); |
1826 | 2.91k | |
1827 | 2.91k | if (Shift && Mask2.90k ) { |
1828 | 2.90k | uint32_t ShiftVal = Shift->getZExtValue(); |
1829 | 2.90k | uint32_t MaskVal = Mask->getZExtValue(); |
1830 | 2.90k | |
1831 | 2.90k | if (isMask_32(MaskVal)) { |
1832 | 2.72k | uint32_t WidthVal = countPopulation(MaskVal); |
1833 | 2.72k | |
1834 | 2.72k | ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), |
1835 | 2.72k | Srl.getOperand(0), ShiftVal, WidthVal)); |
1836 | 2.72k | return; |
1837 | 2.72k | } |
1838 | 5.57k | } |
1839 | 2.91k | } |
1840 | 5.57k | break; |
1841 | 6.76k | case ISD::SRL: |
1842 | 6.76k | if (N->getOperand(0).getOpcode() == ISD::AND) { |
1843 | 374 | // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" |
1844 | 374 | // Predicate: isMask(mask >> b) |
1845 | 374 | const SDValue &And = N->getOperand(0); |
1846 | 374 | ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); |
1847 | 374 | ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); |
1848 | 374 | |
1849 | 374 | if (Shift && Mask373 ) { |
1850 | 373 | uint32_t ShiftVal = Shift->getZExtValue(); |
1851 | 373 | uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; |
1852 | 373 | |
1853 | 373 | if (isMask_32(MaskVal)) { |
1854 | 373 | uint32_t WidthVal = countPopulation(MaskVal); |
1855 | 373 | |
1856 | 373 | ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), |
1857 | 373 | And.getOperand(0), ShiftVal, WidthVal)); |
1858 | 373 | return; |
1859 | 373 | } |
1860 | 6.39k | } |
1861 | 6.39k | } else if (N->getOperand(0).getOpcode() == ISD::SHL) { |
1862 | 16 | SelectS_BFEFromShifts(N); |
1863 | 16 | return; |
1864 | 16 | } |
1865 | 6.38k | break; |
1866 | 6.38k | case ISD::SRA: |
1867 | 2.61k | if (N->getOperand(0).getOpcode() == ISD::SHL) { |
1868 | 476 | SelectS_BFEFromShifts(N); |
1869 | 476 | return; |
1870 | 476 | } |
1871 | 2.14k | break; |
1872 | 2.14k | |
1873 | 4.70k | case ISD::SIGN_EXTEND_INREG: { |
1874 | 4.70k | // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 |
1875 | 4.70k | SDValue Src = N->getOperand(0); |
1876 | 4.70k | if (Src.getOpcode() != ISD::SRL) |
1877 | 3.29k | break; |
1878 | 1.40k | |
1879 | 1.40k | const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); |
1880 | 1.40k | if (!Amt) |
1881 | 0 | break; |
1882 | 1.40k | |
1883 | 1.40k | unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); |
1884 | 1.40k | ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), |
1885 | 1.40k | Amt->getZExtValue(), Width)); |
1886 | 1.40k | return; |
1887 | 1.40k | } |
1888 | 17.3k | } |
1889 | 17.3k | |
1890 | 17.3k | SelectCode(N); |
1891 | 17.3k | } |
1892 | | |
1893 | 603 | bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { |
1894 | 603 | assert(N->getOpcode() == ISD::BRCOND); |
1895 | 603 | if (!N->hasOneUse()) |
1896 | 0 | return false; |
1897 | 603 | |
1898 | 603 | SDValue Cond = N->getOperand(1); |
1899 | 603 | if (Cond.getOpcode() == ISD::CopyToReg) |
1900 | 0 | Cond = Cond.getOperand(2); |
1901 | 603 | |
1902 | 603 | if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()545 ) |
1903 | 64 | return false; |
1904 | 539 | |
1905 | 539 | MVT VT = Cond.getOperand(0).getSimpleValueType(); |
1906 | 539 | if (VT == MVT::i32) |
1907 | 427 | return true; |
1908 | 112 | |
1909 | 112 | if (VT == MVT::i64) { |
1910 | 31 | auto ST = static_cast<const GCNSubtarget *>(Subtarget); |
1911 | 31 | |
1912 | 31 | ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); |
1913 | 31 | return (CC == ISD::SETEQ || CC == ISD::SETNE22 ) && ST->hasScalarCompareEq64()29 ; |
1914 | 31 | } |
1915 | 81 | |
1916 | 81 | return false; |
1917 | 81 | } |
1918 | | |
1919 | 692 | void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { |
1920 | 692 | SDValue Cond = N->getOperand(1); |
1921 | 692 | |
1922 | 692 | if (Cond.isUndef()) { |
1923 | 89 | CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other, |
1924 | 89 | N->getOperand(2), N->getOperand(0)); |
1925 | 89 | return; |
1926 | 89 | } |
1927 | 603 | |
1928 | 603 | const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget); |
1929 | 603 | const SIRegisterInfo *TRI = ST->getRegisterInfo(); |
1930 | 603 | |
1931 | 603 | bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N)441 ; |
1932 | 603 | unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1440 : AMDGPU::S_CBRANCH_VCCNZ163 ; |
1933 | 603 | unsigned CondReg = UseSCCBr ? (unsigned)AMDGPU::SCC440 : TRI->getVCC()163 ; |
1934 | 603 | SDLoc SL(N); |
1935 | 603 | |
1936 | 603 | if (!UseSCCBr) { |
1937 | 163 | // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not |
1938 | 163 | // analyzed what generates the vcc value, so we do not know whether vcc |
1939 | 163 | // bits for disabled lanes are 0. Thus we need to mask out bits for |
1940 | 163 | // disabled lanes. |
1941 | 163 | // |
1942 | 163 | // For the case that we select S_CBRANCH_SCC1 and it gets |
1943 | 163 | // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls |
1944 | 163 | // SIInstrInfo::moveToVALU which inserts the S_AND). |
1945 | 163 | // |
1946 | 163 | // We could add an analysis of what generates the vcc value here and omit |
1947 | 163 | // the S_AND when is unnecessary. But it would be better to add a separate |
1948 | 163 | // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it |
1949 | 163 | // catches both cases. |
1950 | 163 | Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B3220 |
1951 | 163 | : AMDGPU::S_AND_B64143 , |
1952 | 163 | SL, MVT::i1, |
1953 | 163 | CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO20 |
1954 | 163 | : AMDGPU::EXEC143 , |
1955 | 163 | MVT::i1), |
1956 | 163 | Cond), |
1957 | 163 | 0); |
1958 | 163 | } |
1959 | 603 | |
1960 | 603 | SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond); |
1961 | 603 | CurDAG->SelectNodeTo(N, BrOp, MVT::Other, |
1962 | 603 | N->getOperand(2), // Basic Block |
1963 | 603 | VCC.getValue(0)); |
1964 | 603 | } |
1965 | | |
1966 | 2.78k | void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) { |
1967 | 2.78k | MVT VT = N->getSimpleValueType(0); |
1968 | 2.78k | bool IsFMA = N->getOpcode() == ISD::FMA; |
1969 | 2.78k | if (VT != MVT::f32 || (2.11k !Subtarget->hasMadMixInsts()2.11k && |
1970 | 2.11k | !Subtarget->hasFmaMixInsts()1.92k ) || |
1971 | 2.78k | (348 (348 IsFMA348 && Subtarget->hasMadMixInsts()179 ) || |
1972 | 2.56k | (277 !IsFMA277 && Subtarget->hasFmaMixInsts()169 ))) { |
1973 | 2.56k | SelectCode(N); |
1974 | 2.56k | return; |
1975 | 2.56k | } |
1976 | 224 | |
1977 | 224 | SDValue Src0 = N->getOperand(0); |
1978 | 224 | SDValue Src1 = N->getOperand(1); |
1979 | 224 | SDValue Src2 = N->getOperand(2); |
1980 | 224 | unsigned Src0Mods, Src1Mods, Src2Mods; |
1981 | 224 | |
1982 | 224 | // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand |
1983 | 224 | // using the conversion from f16. |
1984 | 224 | bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods); |
1985 | 224 | bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods); |
1986 | 224 | bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods); |
1987 | 224 | |
1988 | 224 | assert((IsFMA || !Subtarget->hasFP32Denormals()) && |
1989 | 224 | "fmad selected with denormals enabled"); |
1990 | 224 | // TODO: We can select this with f32 denormals enabled if all the sources are |
1991 | 224 | // converted from f16 (in which case fmad isn't legal). |
1992 | 224 | |
1993 | 224 | if (Sel0 || Sel194 || Sel294 ) { |
1994 | 130 | // For dummy operands. |
1995 | 130 | SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32); |
1996 | 130 | SDValue Ops[] = { |
1997 | 130 | CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0, |
1998 | 130 | CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1, |
1999 | 130 | CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2, |
2000 | 130 | CurDAG->getTargetConstant(0, SDLoc(), MVT::i1), |
2001 | 130 | Zero, Zero |
2002 | 130 | }; |
2003 | 130 | |
2004 | 130 | CurDAG->SelectNodeTo(N, |
2005 | 130 | IsFMA ? AMDGPU::V_FMA_MIX_F3265 : AMDGPU::V_MAD_MIX_F3265 , |
2006 | 130 | MVT::f32, Ops); |
2007 | 130 | } else { |
2008 | 94 | SelectCode(N); |
2009 | 94 | } |
2010 | 224 | } |
2011 | | |
2012 | | // This is here because there isn't a way to use the generated sub0_sub1 as the |
2013 | | // subreg index to EXTRACT_SUBREG in tablegen. |
2014 | 690 | void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { |
2015 | 690 | MemSDNode *Mem = cast<MemSDNode>(N); |
2016 | 690 | unsigned AS = Mem->getAddressSpace(); |
2017 | 690 | if (AS == AMDGPUAS::FLAT_ADDRESS) { |
2018 | 638 | SelectCode(N); |
2019 | 638 | return; |
2020 | 638 | } |
2021 | 52 | |
2022 | 52 | MVT VT = N->getSimpleValueType(0); |
2023 | 52 | bool Is32 = (VT == MVT::i32); |
2024 | 52 | SDLoc SL(N); |
2025 | 52 | |
2026 | 52 | MachineSDNode *CmpSwap = nullptr; |
2027 | 52 | if (Subtarget->hasAddr64()) { |
2028 | 17 | SDValue SRsrc, VAddr, SOffset, Offset, SLC; |
2029 | 17 | |
2030 | 17 | if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) { |
2031 | 8 | unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN4 : |
2032 | 8 | AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN4 ; |
2033 | 8 | SDValue CmpVal = Mem->getOperand(2); |
2034 | 8 | |
2035 | 8 | // XXX - Do we care about glue operands? |
2036 | 8 | |
2037 | 8 | SDValue Ops[] = { |
2038 | 8 | CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain() |
2039 | 8 | }; |
2040 | 8 | |
2041 | 8 | CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); |
2042 | 8 | } |
2043 | 17 | } |
2044 | 52 | |
2045 | 52 | if (!CmpSwap) { |
2046 | 44 | SDValue SRsrc, SOffset, Offset, SLC; |
2047 | 44 | if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) { |
2048 | 18 | unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN8 : |
2049 | 18 | AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN10 ; |
2050 | 18 | |
2051 | 18 | SDValue CmpVal = Mem->getOperand(2); |
2052 | 18 | SDValue Ops[] = { |
2053 | 18 | CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain() |
2054 | 18 | }; |
2055 | 18 | |
2056 | 18 | CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); |
2057 | 18 | } |
2058 | 44 | } |
2059 | 52 | |
2060 | 52 | if (!CmpSwap) { |
2061 | 26 | SelectCode(N); |
2062 | 26 | return; |
2063 | 26 | } |
2064 | 26 | |
2065 | 26 | MachineMemOperand *MMO = Mem->getMemOperand(); |
2066 | 26 | CurDAG->setNodeMemRefs(CmpSwap, {MMO}); |
2067 | 26 | |
2068 | 26 | unsigned SubReg = Is32 ? AMDGPU::sub012 : AMDGPU::sub0_sub114 ; |
2069 | 26 | SDValue Extract |
2070 | 26 | = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0)); |
2071 | 26 | |
2072 | 26 | ReplaceUses(SDValue(N, 0), Extract); |
2073 | 26 | ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1)); |
2074 | 26 | CurDAG->RemoveDeadNode(N); |
2075 | 26 | } |
2076 | | |
2077 | 72 | void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) { |
2078 | 72 | // The address is assumed to be uniform, so if it ends up in a VGPR, it will |
2079 | 72 | // be copied to an SGPR with readfirstlane. |
2080 | 72 | unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ? |
2081 | 36 | AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME; |
2082 | 72 | |
2083 | 72 | SDValue Chain = N->getOperand(0); |
2084 | 72 | SDValue Ptr = N->getOperand(2); |
2085 | 72 | MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N); |
2086 | 72 | MachineMemOperand *MMO = M->getMemOperand(); |
2087 | 72 | bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS; |
2088 | 72 | |
2089 | 72 | SDValue Offset; |
2090 | 72 | if (CurDAG->isBaseWithConstantOffset(Ptr)) { |
2091 | 40 | SDValue PtrBase = Ptr.getOperand(0); |
2092 | 40 | SDValue PtrOffset = Ptr.getOperand(1); |
2093 | 40 | |
2094 | 40 | const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue(); |
2095 | 40 | if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue(), 16)) { |
2096 | 22 | N = glueCopyToM0(N, PtrBase); |
2097 | 22 | Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32); |
2098 | 22 | } |
2099 | 40 | } |
2100 | 72 | |
2101 | 72 | if (!Offset) { |
2102 | 50 | N = glueCopyToM0(N, Ptr); |
2103 | 50 | Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32); |
2104 | 50 | } |
2105 | 72 | |
2106 | 72 | SDValue Ops[] = { |
2107 | 72 | Offset, |
2108 | 72 | CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32), |
2109 | 72 | Chain, |
2110 | 72 | N->getOperand(N->getNumOperands() - 1) // New glue |
2111 | 72 | }; |
2112 | 72 | |
2113 | 72 | SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); |
2114 | 72 | CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO}); |
2115 | 72 | } |
2116 | | |
2117 | 179 | static unsigned gwsIntrinToOpcode(unsigned IntrID) { |
2118 | 179 | switch (IntrID) { |
2119 | 179 | case Intrinsic::amdgcn_ds_gws_init: |
2120 | 60 | return AMDGPU::DS_GWS_INIT; |
2121 | 179 | case Intrinsic::amdgcn_ds_gws_barrier: |
2122 | 99 | return AMDGPU::DS_GWS_BARRIER; |
2123 | 179 | case Intrinsic::amdgcn_ds_gws_sema_v: |
2124 | 5 | return AMDGPU::DS_GWS_SEMA_V; |
2125 | 179 | case Intrinsic::amdgcn_ds_gws_sema_br: |
2126 | 5 | return AMDGPU::DS_GWS_SEMA_BR; |
2127 | 179 | case Intrinsic::amdgcn_ds_gws_sema_p: |
2128 | 5 | return AMDGPU::DS_GWS_SEMA_P; |
2129 | 179 | case Intrinsic::amdgcn_ds_gws_sema_release_all: |
2130 | 5 | return AMDGPU::DS_GWS_SEMA_RELEASE_ALL; |
2131 | 179 | default: |
2132 | 0 | llvm_unreachable("not a gws intrinsic"); |
2133 | 179 | } |
2134 | 179 | } |
2135 | | |
2136 | 180 | void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) { |
2137 | 180 | if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all && |
2138 | 180 | !Subtarget->hasGWSSemaReleaseAll()6 ) { |
2139 | 1 | // Let this error. |
2140 | 1 | SelectCode(N); |
2141 | 1 | return; |
2142 | 1 | } |
2143 | 179 | |
2144 | 179 | // Chain, intrinsic ID, vsrc, offset |
2145 | 179 | const bool HasVSrc = N->getNumOperands() == 4; |
2146 | 179 | assert(HasVSrc || N->getNumOperands() == 3); |
2147 | 179 | |
2148 | 179 | SDLoc SL(N); |
2149 | 179 | SDValue BaseOffset = N->getOperand(HasVSrc ? 3164 : 215 ); |
2150 | 179 | int ImmOffset = 0; |
2151 | 179 | MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N); |
2152 | 179 | MachineMemOperand *MMO = M->getMemOperand(); |
2153 | 179 | |
2154 | 179 | // Don't worry if the offset ends up in a VGPR. Only one lane will have |
2155 | 179 | // effect, so SIFixSGPRCopies will validly insert readfirstlane. |
2156 | 179 | |
2157 | 179 | // The resource id offset is computed as (<isa opaque base> + M0[21:16] + |
2158 | 179 | // offset field) % 64. Some versions of the programming guide omit the m0 |
2159 | 179 | // part, or claim it's from offset 0. |
2160 | 179 | if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) { |
2161 | 128 | // If we have a constant offset, try to use the 0 in m0 as the base. |
2162 | 128 | // TODO: Look into changing the default m0 initialization value. If the |
2163 | 128 | // default -1 only set the low 16-bits, we could leave it as-is and add 1 to |
2164 | 128 | // the immediate offset. |
2165 | 128 | glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32)); |
2166 | 128 | ImmOffset = ConstOffset->getZExtValue(); |
2167 | 128 | } else { |
2168 | 51 | if (CurDAG->isBaseWithConstantOffset(BaseOffset)) { |
2169 | 24 | ImmOffset = BaseOffset.getConstantOperandVal(1); |
2170 | 24 | BaseOffset = BaseOffset.getOperand(0); |
2171 | 24 | } |
2172 | 51 | |
2173 | 51 | // Prefer to do the shift in an SGPR since it should be possible to use m0 |
2174 | 51 | // as the result directly. If it's already an SGPR, it will be eliminated |
2175 | 51 | // later. |
2176 | 51 | SDNode *SGPROffset |
2177 | 51 | = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32, |
2178 | 51 | BaseOffset); |
2179 | 51 | // Shift to offset in m0 |
2180 | 51 | SDNode *M0Base |
2181 | 51 | = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32, |
2182 | 51 | SDValue(SGPROffset, 0), |
2183 | 51 | CurDAG->getTargetConstant(16, SL, MVT::i32)); |
2184 | 51 | glueCopyToM0(N, SDValue(M0Base, 0)); |
2185 | 51 | } |
2186 | 179 | |
2187 | 179 | SDValue V0; |
2188 | 179 | SDValue Chain = N->getOperand(0); |
2189 | 179 | SDValue Glue; |
2190 | 179 | if (HasVSrc) { |
2191 | 164 | SDValue VSrc0 = N->getOperand(2); |
2192 | 164 | |
2193 | 164 | // The manual doesn't mention this, but it seems only v0 works. |
2194 | 164 | V0 = CurDAG->getRegister(AMDGPU::VGPR0, MVT::i32); |
2195 | 164 | |
2196 | 164 | SDValue CopyToV0 = CurDAG->getCopyToReg( |
2197 | 164 | N->getOperand(0), SL, V0, VSrc0, |
2198 | 164 | N->getOperand(N->getNumOperands() - 1)); |
2199 | 164 | Chain = CopyToV0; |
2200 | 164 | Glue = CopyToV0.getValue(1); |
2201 | 164 | } |
2202 | 179 | |
2203 | 179 | SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32); |
2204 | 179 | |
2205 | 179 | // TODO: Can this just be removed from the instruction? |
2206 | 179 | SDValue GDS = CurDAG->getTargetConstant(1, SL, MVT::i1); |
2207 | 179 | |
2208 | 179 | const unsigned Opc = gwsIntrinToOpcode(IntrID); |
2209 | 179 | SmallVector<SDValue, 5> Ops; |
2210 | 179 | if (HasVSrc) |
2211 | 164 | Ops.push_back(V0); |
2212 | 179 | Ops.push_back(OffsetField); |
2213 | 179 | Ops.push_back(GDS); |
2214 | 179 | Ops.push_back(Chain); |
2215 | 179 | |
2216 | 179 | if (HasVSrc) |
2217 | 164 | Ops.push_back(Glue); |
2218 | 179 | |
2219 | 179 | SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); |
2220 | 179 | CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO}); |
2221 | 179 | } |
2222 | | |
2223 | 85 | void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) { |
2224 | 85 | unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); |
2225 | 85 | switch (IntrID) { |
2226 | 85 | case Intrinsic::amdgcn_ds_append: |
2227 | 72 | case Intrinsic::amdgcn_ds_consume: { |
2228 | 72 | if (N->getValueType(0) != MVT::i32) |
2229 | 0 | break; |
2230 | 72 | SelectDSAppendConsume(N, IntrID); |
2231 | 72 | return; |
2232 | 72 | } |
2233 | 13 | } |
2234 | 13 | |
2235 | 13 | SelectCode(N); |
2236 | 13 | } |
2237 | | |
2238 | 571 | void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) { |
2239 | 571 | unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); |
2240 | 571 | switch (IntrID) { |
2241 | 571 | case Intrinsic::amdgcn_ds_gws_init: |
2242 | 180 | case Intrinsic::amdgcn_ds_gws_barrier: |
2243 | 180 | case Intrinsic::amdgcn_ds_gws_sema_v: |
2244 | 180 | case Intrinsic::amdgcn_ds_gws_sema_br: |
2245 | 180 | case Intrinsic::amdgcn_ds_gws_sema_p: |
2246 | 180 | case Intrinsic::amdgcn_ds_gws_sema_release_all: |
2247 | 180 | SelectDS_GWS(N, IntrID); |
2248 | 180 | return; |
2249 | 391 | default: |
2250 | 391 | break; |
2251 | 391 | } |
2252 | 391 | |
2253 | 391 | SelectCode(N); |
2254 | 391 | } |
2255 | | |
2256 | | bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src, |
2257 | 29.5k | unsigned &Mods) const { |
2258 | 29.5k | Mods = 0; |
2259 | 29.5k | Src = In; |
2260 | 29.5k | |
2261 | 29.5k | if (Src.getOpcode() == ISD::FNEG) { |
2262 | 1.79k | Mods |= SISrcMods::NEG; |
2263 | 1.79k | Src = Src.getOperand(0); |
2264 | 1.79k | } |
2265 | 29.5k | |
2266 | 29.5k | if (Src.getOpcode() == ISD::FABS) { |
2267 | 762 | Mods |= SISrcMods::ABS; |
2268 | 762 | Src = Src.getOperand(0); |
2269 | 762 | } |
2270 | 29.5k | |
2271 | 29.5k | return true; |
2272 | 29.5k | } |
2273 | | |
2274 | | bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, |
2275 | 28.3k | SDValue &SrcMods) const { |
2276 | 28.3k | unsigned Mods; |
2277 | 28.3k | if (SelectVOP3ModsImpl(In, Src, Mods)) { |
2278 | 28.3k | SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); |
2279 | 28.3k | return true; |
2280 | 28.3k | } |
2281 | 0 | |
2282 | 0 | return false; |
2283 | 0 | } |
2284 | | |
2285 | | bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, |
2286 | 246 | SDValue &SrcMods) const { |
2287 | 246 | SelectVOP3Mods(In, Src, SrcMods); |
2288 | 246 | return isNoNanSrc(Src); |
2289 | 246 | } |
2290 | | |
2291 | | bool AMDGPUDAGToDAGISel::SelectVOP3Mods_f32(SDValue In, SDValue &Src, |
2292 | 9.51k | SDValue &SrcMods) const { |
2293 | 9.51k | if (In.getValueType() == MVT::f32) |
2294 | 1.08k | return SelectVOP3Mods(In, Src, SrcMods); |
2295 | 8.43k | Src = In; |
2296 | 8.43k | SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);; |
2297 | 8.43k | return true; |
2298 | 8.43k | } |
2299 | | |
2300 | 4.76k | bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const { |
2301 | 4.76k | if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG4.73k ) |
2302 | 325 | return false; |
2303 | 4.44k | |
2304 | 4.44k | Src = In; |
2305 | 4.44k | return true; |
2306 | 4.44k | } |
2307 | | |
2308 | | bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, |
2309 | | SDValue &SrcMods, SDValue &Clamp, |
2310 | 12.6k | SDValue &Omod) const { |
2311 | 12.6k | SDLoc DL(In); |
2312 | 12.6k | Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); |
2313 | 12.6k | Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); |
2314 | 12.6k | |
2315 | 12.6k | return SelectVOP3Mods(In, Src, SrcMods); |
2316 | 12.6k | } |
2317 | | |
2318 | | bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, |
2319 | | SDValue &SrcMods, |
2320 | | SDValue &Clamp, |
2321 | 64 | SDValue &Omod) const { |
2322 | 64 | Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); |
2323 | 64 | return SelectVOP3Mods(In, Src, SrcMods); |
2324 | 64 | } |
2325 | | |
2326 | | bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, |
2327 | 602 | SDValue &Clamp, SDValue &Omod) const { |
2328 | 602 | Src = In; |
2329 | 602 | |
2330 | 602 | SDLoc DL(In); |
2331 | 602 | Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); |
2332 | 602 | Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); |
2333 | 602 | |
2334 | 602 | return true; |
2335 | 602 | } |
2336 | | |
2337 | | bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, |
2338 | 1.44k | SDValue &SrcMods) const { |
2339 | 1.44k | unsigned Mods = 0; |
2340 | 1.44k | Src = In; |
2341 | 1.44k | |
2342 | 1.44k | if (Src.getOpcode() == ISD::FNEG) { |
2343 | 13 | Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); |
2344 | 13 | Src = Src.getOperand(0); |
2345 | 13 | } |
2346 | 1.44k | |
2347 | 1.44k | if (Src.getOpcode() == ISD::BUILD_VECTOR) { |
2348 | 365 | unsigned VecMods = Mods; |
2349 | 365 | |
2350 | 365 | SDValue Lo = stripBitcast(Src.getOperand(0)); |
2351 | 365 | SDValue Hi = stripBitcast(Src.getOperand(1)); |
2352 | 365 | |
2353 | 365 | if (Lo.getOpcode() == ISD::FNEG) { |
2354 | 11 | Lo = stripBitcast(Lo.getOperand(0)); |
2355 | 11 | Mods ^= SISrcMods::NEG; |
2356 | 11 | } |
2357 | 365 | |
2358 | 365 | if (Hi.getOpcode() == ISD::FNEG) { |
2359 | 11 | Hi = stripBitcast(Hi.getOperand(0)); |
2360 | 11 | Mods ^= SISrcMods::NEG_HI; |
2361 | 11 | } |
2362 | 365 | |
2363 | 365 | if (isExtractHiElt(Lo, Lo)) |
2364 | 15 | Mods |= SISrcMods::OP_SEL_0; |
2365 | 365 | |
2366 | 365 | if (isExtractHiElt(Hi, Hi)) |
2367 | 15 | Mods |= SISrcMods::OP_SEL_1; |
2368 | 365 | |
2369 | 365 | Lo = stripExtractLoElt(Lo); |
2370 | 365 | Hi = stripExtractLoElt(Hi); |
2371 | 365 | |
2372 | 365 | if (Lo == Hi && !isInlineImmediate(Lo.getNode())196 ) { |
2373 | 34 | // Really a scalar input. Just select from the low half of the register to |
2374 | 34 | // avoid packing. |
2375 | 34 | |
2376 | 34 | Src = Lo; |
2377 | 34 | SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); |
2378 | 34 | return true; |
2379 | 34 | } |
2380 | 331 | |
2381 | 331 | Mods = VecMods; |
2382 | 331 | } |
2383 | 1.44k | |
2384 | 1.44k | // Packed instructions do not have abs modifiers. |
2385 | 1.44k | Mods |= SISrcMods::OP_SEL_1; |
2386 | 1.40k | |
2387 | 1.40k | SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); |
2388 | 1.40k | return true; |
2389 | 1.44k | } |
2390 | | |
2391 | | bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src, |
2392 | | SDValue &SrcMods, |
2393 | 648 | SDValue &Clamp) const { |
2394 | 648 | SDLoc SL(In); |
2395 | 648 | |
2396 | 648 | // FIXME: Handle clamp and op_sel |
2397 | 648 | Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); |
2398 | 648 | |
2399 | 648 | return SelectVOP3PMods(In, Src, SrcMods); |
2400 | 648 | } |
2401 | | |
2402 | | bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src, |
2403 | 72 | SDValue &SrcMods) const { |
2404 | 72 | Src = In; |
2405 | 72 | // FIXME: Handle op_sel |
2406 | 72 | SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); |
2407 | 72 | return true; |
2408 | 72 | } |
2409 | | |
2410 | | bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src, |
2411 | | SDValue &SrcMods, |
2412 | 24 | SDValue &Clamp) const { |
2413 | 24 | SDLoc SL(In); |
2414 | 24 | |
2415 | 24 | // FIXME: Handle clamp |
2416 | 24 | Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); |
2417 | 24 | |
2418 | 24 | return SelectVOP3OpSel(In, Src, SrcMods); |
2419 | 24 | } |
2420 | | |
2421 | | bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src, |
2422 | 102 | SDValue &SrcMods) const { |
2423 | 102 | // FIXME: Handle op_sel |
2424 | 102 | return SelectVOP3Mods(In, Src, SrcMods); |
2425 | 102 | } |
2426 | | |
2427 | | bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src, |
2428 | | SDValue &SrcMods, |
2429 | 34 | SDValue &Clamp) const { |
2430 | 34 | SDLoc SL(In); |
2431 | 34 | |
2432 | 34 | // FIXME: Handle clamp |
2433 | 34 | Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); |
2434 | 34 | |
2435 | 34 | return SelectVOP3OpSelMods(In, Src, SrcMods); |
2436 | 34 | } |
2437 | | |
2438 | | // The return value is not whether the match is possible (which it always is), |
2439 | | // but whether or not it a conversion is really used. |
2440 | | bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, |
2441 | 774 | unsigned &Mods) const { |
2442 | 774 | Mods = 0; |
2443 | 774 | SelectVOP3ModsImpl(In, Src, Mods); |
2444 | 774 | |
2445 | 774 | if (Src.getOpcode() == ISD::FP_EXTEND) { |
2446 | 404 | Src = Src.getOperand(0); |
2447 | 404 | assert(Src.getValueType() == MVT::f16); |
2448 | 404 | Src = stripBitcast(Src); |
2449 | 404 | |
2450 | 404 | // Be careful about folding modifiers if we already have an abs. fneg is |
2451 | 404 | // applied last, so we don't want to apply an earlier fneg. |
2452 | 404 | if ((Mods & SISrcMods::ABS) == 0) { |
2453 | 398 | unsigned ModsTmp; |
2454 | 398 | SelectVOP3ModsImpl(Src, Src, ModsTmp); |
2455 | 398 | |
2456 | 398 | if ((ModsTmp & SISrcMods::NEG) != 0) |
2457 | 5 | Mods ^= SISrcMods::NEG; |
2458 | 398 | |
2459 | 398 | if ((ModsTmp & SISrcMods::ABS) != 0) |
2460 | 6 | Mods |= SISrcMods::ABS; |
2461 | 398 | } |
2462 | 404 | |
2463 | 404 | // op_sel/op_sel_hi decide the source type and source. |
2464 | 404 | // If the source's op_sel_hi is set, it indicates to do a conversion from fp16. |
2465 | 404 | // If the sources's op_sel is set, it picks the high half of the source |
2466 | 404 | // register. |
2467 | 404 | |
2468 | 404 | Mods |= SISrcMods::OP_SEL_1; |
2469 | 404 | if (isExtractHiElt(Src, Src)) { |
2470 | 134 | Mods |= SISrcMods::OP_SEL_0; |
2471 | 134 | |
2472 | 134 | // TODO: Should we try to look for neg/abs here? |
2473 | 134 | } |
2474 | 404 | |
2475 | 404 | return true; |
2476 | 404 | } |
2477 | 370 | |
2478 | 370 | return false; |
2479 | 370 | } |
2480 | | |
2481 | | bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src, |
2482 | 102 | SDValue &SrcMods) const { |
2483 | 102 | unsigned Mods = 0; |
2484 | 102 | SelectVOP3PMadMixModsImpl(In, Src, Mods); |
2485 | 102 | SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); |
2486 | 102 | return true; |
2487 | 102 | } |
2488 | | |
2489 | 64 | SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const { |
2490 | 64 | if (In.isUndef()) |
2491 | 2 | return CurDAG->getUNDEF(MVT::i32); |
2492 | 62 | |
2493 | 62 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) { |
2494 | 3 | SDLoc SL(In); |
2495 | 3 | return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32); |
2496 | 3 | } |
2497 | 59 | |
2498 | 59 | if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) { |
2499 | 3 | SDLoc SL(In); |
2500 | 3 | return CurDAG->getConstant( |
2501 | 3 | C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32); |
2502 | 3 | } |
2503 | 56 | |
2504 | 56 | SDValue Src; |
2505 | 56 | if (isExtractHiElt(In, Src)) |
2506 | 44 | return Src; |
2507 | 12 | |
2508 | 12 | return SDValue(); |
2509 | 12 | } |
2510 | | |
2511 | 28.5k | bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const { |
2512 | 28.5k | assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn); |
2513 | 28.5k | |
2514 | 28.5k | const SIRegisterInfo *SIRI = |
2515 | 28.5k | static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo()); |
2516 | 28.5k | const SIInstrInfo * SII = |
2517 | 28.5k | static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); |
2518 | 28.5k | |
2519 | 28.5k | unsigned Limit = 0; |
2520 | 28.5k | bool AllUsesAcceptSReg = true; |
2521 | 28.5k | for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); |
2522 | 38.9k | Limit < 10 && U != E38.9k ; ++U, ++Limit10.3k ) { |
2523 | 31.9k | const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo()); |
2524 | 31.9k | |
2525 | 31.9k | // If the register class is unknown, it could be an unknown |
2526 | 31.9k | // register class that needs to be an SGPR, e.g. an inline asm |
2527 | 31.9k | // constraint |
2528 | 31.9k | if (!RC || SIRI->isSGPRClass(RC)31.9k ) |
2529 | 16.3k | return false; |
2530 | 15.5k | |
2531 | 15.5k | if (RC != &AMDGPU::VS_32RegClass) { |
2532 | 6.07k | AllUsesAcceptSReg = false; |
2533 | 6.07k | SDNode * User = *U; |
2534 | 6.07k | if (User->isMachineOpcode()) { |
2535 | 5.20k | unsigned Opc = User->getMachineOpcode(); |
2536 | 5.20k | MCInstrDesc Desc = SII->get(Opc); |
2537 | 5.20k | if (Desc.isCommutable()) { |
2538 | 999 | unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo(); |
2539 | 999 | unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex; |
2540 | 999 | if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) { |
2541 | 890 | unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs(); |
2542 | 890 | const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo); |
2543 | 890 | if (CommutedRC == &AMDGPU::VS_32RegClass) |
2544 | 890 | AllUsesAcceptSReg = true; |
2545 | 890 | } |
2546 | 999 | } |
2547 | 5.20k | } |
2548 | 6.07k | // If "AllUsesAcceptSReg == false" so far we haven't suceeded |
2549 | 6.07k | // commuting current user. This means have at least one use |
2550 | 6.07k | // that strictly require VGPR. Thus, we will not attempt to commute |
2551 | 6.07k | // other user instructions. |
2552 | 6.07k | if (!AllUsesAcceptSReg) |
2553 | 5.18k | break; |
2554 | 6.07k | } |
2555 | 15.5k | } |
2556 | 28.5k | return 12.2k !AllUsesAcceptSReg12.2k && (Limit < 10)5.18k ; |
2557 | 28.5k | } |
2558 | | |
2559 | 86.1k | bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const { |
2560 | 86.1k | auto Ld = cast<LoadSDNode>(N); |
2561 | 86.1k | |
2562 | 86.1k | return Ld->getAlignment() >= 4 && |
2563 | 86.1k | ( |
2564 | 84.6k | ( |
2565 | 84.6k | ( |
2566 | 84.6k | Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || |
2567 | 84.6k | Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT30.0k |
2568 | 84.6k | ) |
2569 | 84.6k | && |
2570 | 84.6k | !N->isDivergent()54.8k |
2571 | 84.6k | ) |
2572 | 84.6k | || |
2573 | 84.6k | ( |
2574 | 30.1k | Subtarget->getScalarizeGlobalBehavior() && |
2575 | 30.1k | Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS21.8k && |
2576 | 30.1k | !Ld->isVolatile()12.8k && |
2577 | 30.1k | !N->isDivergent()8.58k && |
2578 | 30.1k | static_cast<const SITargetLowering *>( |
2579 | 3.40k | getTargetLowering())->isMemOpHasNoClobberedMemOperand(N) |
2580 | 30.1k | ) |
2581 | 84.6k | ); |
2582 | 86.1k | } |
2583 | | |
2584 | 30.8k | void AMDGPUDAGToDAGISel::PostprocessISelDAG() { |
2585 | 30.8k | const AMDGPUTargetLowering& Lowering = |
2586 | 30.8k | *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); |
2587 | 30.8k | bool IsModified = false; |
2588 | 33.2k | do { |
2589 | 33.2k | IsModified = false; |
2590 | 33.2k | |
2591 | 33.2k | // Go over all selected nodes and try to fold them a bit more |
2592 | 33.2k | SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin(); |
2593 | 1.27M | while (Position != CurDAG->allnodes_end()) { |
2594 | 1.24M | SDNode *Node = &*Position++; |
2595 | 1.24M | MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node); |
2596 | 1.24M | if (!MachineNode) |
2597 | 588k | continue; |
2598 | 657k | |
2599 | 657k | SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); |
2600 | 657k | if (ResNode != Node) { |
2601 | 35.3k | if (ResNode) |
2602 | 35.2k | ReplaceUses(Node, ResNode); |
2603 | 35.3k | IsModified = true; |
2604 | 35.3k | } |
2605 | 657k | } |
2606 | 33.2k | CurDAG->RemoveDeadNodes(); |
2607 | 33.2k | } while (IsModified); |
2608 | 30.8k | } |
2609 | | |
2610 | 2.29k | bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { |
2611 | 2.29k | Subtarget = &MF.getSubtarget<R600Subtarget>(); |
2612 | 2.29k | return SelectionDAGISel::runOnMachineFunction(MF); |
2613 | 2.29k | } |
2614 | | |
2615 | 3.75k | bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { |
2616 | 3.75k | if (!N->readMem()) |
2617 | 0 | return false; |
2618 | 3.75k | if (CbId == -1) |
2619 | 0 | return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || |
2620 | 0 | N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT; |
2621 | 3.75k | |
2622 | 3.75k | return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId; |
2623 | 3.75k | } |
2624 | | |
2625 | | bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, |
2626 | 6.52k | SDValue& IntPtr) { |
2627 | 6.52k | if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { |
2628 | 6.52k | IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), |
2629 | 6.52k | true); |
2630 | 6.52k | return true; |
2631 | 6.52k | } |
2632 | 0 | return false; |
2633 | 0 | } |
2634 | | |
2635 | | bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, |
2636 | 0 | SDValue& BaseReg, SDValue &Offset) { |
2637 | 0 | if (!isa<ConstantSDNode>(Addr)) { |
2638 | 0 | BaseReg = Addr; |
2639 | 0 | Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); |
2640 | 0 | return true; |
2641 | 0 | } |
2642 | 0 | return false; |
2643 | 0 | } |
2644 | | |
2645 | 85.0k | void R600DAGToDAGISel::Select(SDNode *N) { |
2646 | 85.0k | unsigned int Opc = N->getOpcode(); |
2647 | 85.0k | if (N->isMachineOpcode()) { |
2648 | 0 | N->setNodeId(-1); |
2649 | 0 | return; // Already selected. |
2650 | 0 | } |
2651 | 85.0k | |
2652 | 85.0k | switch (Opc) { |
2653 | 85.0k | default: break82.6k ; |
2654 | 85.0k | case AMDGPUISD::BUILD_VERTICAL_VECTOR: |
2655 | 2.45k | case ISD::SCALAR_TO_VECTOR: |
2656 | 2.45k | case ISD::BUILD_VECTOR: { |
2657 | 2.45k | EVT VT = N->getValueType(0); |
2658 | 2.45k | unsigned NumVectorElts = VT.getVectorNumElements(); |
2659 | 2.45k | unsigned RegClassID; |
2660 | 2.45k | // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG |
2661 | 2.45k | // that adds a 128 bits reg copy when going through TwoAddressInstructions |
2662 | 2.45k | // pass. We want to avoid 128 bits copies as much as possible because they |
2663 | 2.45k | // can't be bundled by our scheduler. |
2664 | 2.45k | switch(NumVectorElts) { |
2665 | 2.45k | case 2: RegClassID = R600::R600_Reg64RegClassID; break386 ; |
2666 | 2.45k | case 4: |
2667 | 2.07k | if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) |
2668 | 8 | RegClassID = R600::R600_Reg128VerticalRegClassID; |
2669 | 2.06k | else |
2670 | 2.06k | RegClassID = R600::R600_Reg128RegClassID; |
2671 | 2.07k | break; |
2672 | 2.45k | default: 0 llvm_unreachable0 ("Do not know how to lower this BUILD_VECTOR"); |
2673 | 2.45k | } |
2674 | 2.45k | SelectBuildVector(N, RegClassID); |
2675 | 2.45k | return; |
2676 | 2.45k | } |
2677 | 82.6k | } |
2678 | 82.6k | |
2679 | 82.6k | SelectCode(N); |
2680 | 82.6k | } |
2681 | | |
2682 | | bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, |
2683 | 2.04k | SDValue &Offset) { |
2684 | 2.04k | ConstantSDNode *C; |
2685 | 2.04k | SDLoc DL(Addr); |
2686 | 2.04k | |
2687 | 2.04k | if ((C = dyn_cast<ConstantSDNode>(Addr))) { |
2688 | 0 | Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); |
2689 | 0 | Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); |
2690 | 2.04k | } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && |
2691 | 2.04k | (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { |
2692 | 1.76k | Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); |
2693 | 1.76k | Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); |
2694 | 1.76k | } else if (277 (277 Addr.getOpcode() == ISD::ADD277 || Addr.getOpcode() == ISD::OR277 ) && |
2695 | 277 | (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))0 ) { |
2696 | 0 | Base = Addr.getOperand(0); |
2697 | 0 | Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); |
2698 | 277 | } else { |
2699 | 277 | Base = Addr; |
2700 | 277 | Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); |
2701 | 277 | } |
2702 | 2.04k | |
2703 | 2.04k | return true; |
2704 | 2.04k | } |
2705 | | |
2706 | | bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, |
2707 | 1.71k | SDValue &Offset) { |
2708 | 1.71k | ConstantSDNode *IMMOffset; |
2709 | 1.71k | |
2710 | 1.71k | if (Addr.getOpcode() == ISD::ADD |
2711 | 1.71k | && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))592 |
2712 | 1.71k | && isInt<16>(IMMOffset->getZExtValue())400 ) { |
2713 | 399 | |
2714 | 399 | Base = Addr.getOperand(0); |
2715 | 399 | Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), |
2716 | 399 | MVT::i32); |
2717 | 399 | return true; |
2718 | 399 | // If the pointer address is constant, we can move it to the offset field. |
2719 | 1.31k | } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) |
2720 | 1.31k | && isInt<16>(IMMOffset->getZExtValue())368 ) { |
2721 | 368 | Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), |
2722 | 368 | SDLoc(CurDAG->getEntryNode()), |
2723 | 368 | R600::ZERO, MVT::i32); |
2724 | 368 | Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), |
2725 | 368 | MVT::i32); |
2726 | 368 | return true; |
2727 | 368 | } |
2728 | 948 | |
2729 | 948 | // Default case, no offset |
2730 | 948 | Base = Addr; |
2731 | 948 | Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); |
2732 | 948 | return true; |
2733 | 948 | } |