/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- SIFixupVectorISel.cpp - Fixup post ISel vector issues -------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | /// \file |
8 | | /// SIFixupVectorISel pass cleans up post ISEL Vector issues. |
9 | | /// Currently this will convert GLOBAL_{LOAD|STORE}_* |
10 | | /// and GLOBAL_Atomic_* instructions into their _SADDR variants, |
11 | | /// feeding the sreg into the saddr field of the new instruction. |
12 | | /// We currently handle a REG_SEQUENCE feeding the vaddr |
13 | | /// and decompose it into a base and index. |
14 | | /// |
15 | | /// Transform: |
16 | | /// %17:vgpr_32, %19:sreg_64_xexec = V_ADD_I32_e64 %21:sgpr_32, %22:vgpr_32 |
17 | | /// %18:vgpr_32, %20:sreg_64_xexec = V_ADDC_U32_e64 %25:vgpr_32, |
18 | | /// %24:vgpr_32, %19:sreg_64_xexec |
19 | | /// %16:vreg_64 = REG_SEQUENCE %17:vgpr_32, %sub0, %18:vgpr_32, %sub1 |
20 | | /// %11:vreg_64 = COPY %16:vreg_64 |
21 | | /// %10:vgpr_32 = GLOBAL_LOAD_DWORD killed %11:vreg_64, 16, 0, 0 |
22 | | /// Into: |
23 | | /// %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1:sgpr_64, 36, 0 |
24 | | /// %14:vreg_64 = REG_SEQUENCE %6:vgpr_32, %sub0, %15:vgpr_32, %sub1 |
25 | | /// %10:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %14:vreg_64, %4:sreg_64_xexec,16... |
26 | | /// |
27 | | //===----------------------------------------------------------------------===// |
28 | | // |
29 | | |
30 | | #include "AMDGPU.h" |
31 | | #include "AMDGPUSubtarget.h" |
32 | | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
33 | | #include "llvm/ADT/Statistic.h" |
34 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
35 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
36 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
37 | | #include "llvm/IR/Function.h" |
38 | | #include "llvm/IR/LLVMContext.h" |
39 | | #include "llvm/Support/Debug.h" |
40 | | #include "llvm/Target/TargetMachine.h" |
41 | | #define DEBUG_TYPE "si-fixup-vector-isel" |
42 | | |
43 | | using namespace llvm; |
44 | | |
45 | | static cl::opt<bool> EnableGlobalSGPRAddr( |
46 | | "amdgpu-enable-global-sgpr-addr", |
47 | | cl::desc("Enable use of SGPR regs for GLOBAL LOAD/STORE instructions"), |
48 | | cl::init(false)); |
49 | | |
50 | | STATISTIC(NumSGPRGlobalOccurs, "Number of global ld/st opportunities"); |
51 | | STATISTIC(NumSGPRGlobalSaddrs, "Number of global sgpr instructions converted"); |
52 | | |
53 | | namespace { |
54 | | |
55 | | class SIFixupVectorISel : public MachineFunctionPass { |
56 | | public: |
57 | | static char ID; |
58 | | |
59 | | public: |
60 | 2.40k | SIFixupVectorISel() : MachineFunctionPass(ID) { |
61 | 2.40k | initializeSIFixupVectorISelPass(*PassRegistry::getPassRegistry()); |
62 | 2.40k | } |
63 | | |
64 | | bool runOnMachineFunction(MachineFunction &MF) override; |
65 | | |
66 | 2.38k | void getAnalysisUsage(AnalysisUsage &AU) const override { |
67 | 2.38k | AU.setPreservesCFG(); |
68 | 2.38k | MachineFunctionPass::getAnalysisUsage(AU); |
69 | 2.38k | } |
70 | | }; |
71 | | |
72 | | } // End anonymous namespace. |
73 | | |
74 | | INITIALIZE_PASS(SIFixupVectorISel, DEBUG_TYPE, |
75 | | "SI Fixup Vector ISel", false, false) |
76 | | |
77 | | char SIFixupVectorISel::ID = 0; |
78 | | |
79 | | char &llvm::SIFixupVectorISelID = SIFixupVectorISel::ID; |
80 | | |
81 | 2.40k | FunctionPass *llvm::createSIFixupVectorISelPass() { |
82 | 2.40k | return new SIFixupVectorISel(); |
83 | 2.40k | } |
84 | | |
85 | | static bool findSRegBaseAndIndex(MachineOperand *Op, |
86 | | unsigned &BaseReg, |
87 | | unsigned &IndexReg, |
88 | | MachineRegisterInfo &MRI, |
89 | 324 | const SIRegisterInfo *TRI) { |
90 | 324 | SmallVector<MachineOperand *, 8> Worklist; |
91 | 324 | Worklist.push_back(Op); |
92 | 1.33k | while (!Worklist.empty()) { |
93 | 1.27k | MachineOperand *WOp = Worklist.pop_back_val(); |
94 | 1.27k | if (!WOp->isReg() || |
95 | 1.27k | !TargetRegisterInfo::isVirtualRegister(WOp->getReg())) |
96 | 20 | continue; |
97 | 1.25k | MachineInstr *DefInst = MRI.getUniqueVRegDef(WOp->getReg()); |
98 | 1.25k | switch (DefInst->getOpcode()) { |
99 | 1.25k | default: |
100 | 326 | continue; |
101 | 1.25k | case AMDGPU::COPY: |
102 | 378 | Worklist.push_back(&DefInst->getOperand(1)); |
103 | 378 | break; |
104 | 1.25k | case AMDGPU::REG_SEQUENCE: |
105 | 287 | if (DefInst->getNumOperands() != 5) |
106 | 0 | continue; |
107 | 287 | Worklist.push_back(&DefInst->getOperand(1)); |
108 | 287 | Worklist.push_back(&DefInst->getOperand(3)); |
109 | 287 | break; |
110 | 287 | case AMDGPU::V_ADD_I32_e64: |
111 | 265 | // The V_ADD_* and its analogous V_ADDCV_* are generated by |
112 | 265 | // a previous pass which lowered from an ADD_64_PSEUDO, |
113 | 265 | // which generates subregs to break up the 64 bit args. |
114 | 265 | if (DefInst->getOperand(2).getSubReg() != AMDGPU::NoSubRegister) |
115 | 0 | continue; |
116 | 265 | BaseReg = DefInst->getOperand(2).getReg(); |
117 | 265 | if (DefInst->getOperand(3).getSubReg() != AMDGPU::NoSubRegister) |
118 | 0 | continue; |
119 | 265 | IndexReg = DefInst->getOperand(3).getReg(); |
120 | 265 | // Chase the IndexReg. |
121 | 265 | MachineInstr *MI = MRI.getUniqueVRegDef(IndexReg); |
122 | 265 | if (!MI || !MI->isCopy()) |
123 | 0 | continue; |
124 | 265 | // Make sure the reg class is 64 bit for Index. |
125 | 265 | // If the Index register is a subreg, we want it to reference |
126 | 265 | // a 64 bit register which we will use as the Index reg. |
127 | 265 | const TargetRegisterClass *IdxRC, *BaseRC; |
128 | 265 | IdxRC = MRI.getRegClass(MI->getOperand(1).getReg()); |
129 | 265 | if (AMDGPU::getRegBitWidth(IdxRC->getID()) != 64) |
130 | 0 | continue; |
131 | 265 | IndexReg = MI->getOperand(1).getReg(); |
132 | 265 | // Chase the BaseReg. |
133 | 265 | MI = MRI.getUniqueVRegDef(BaseReg); |
134 | 265 | if (!MI || !MI->isCopy()) |
135 | 0 | continue; |
136 | 265 | // Make sure the register class is 64 bit for Base. |
137 | 265 | BaseReg = MI->getOperand(1).getReg(); |
138 | 265 | BaseRC = MRI.getRegClass(BaseReg); |
139 | 265 | if (AMDGPU::getRegBitWidth(BaseRC->getID()) != 64) |
140 | 0 | continue; |
141 | 265 | // Make sure Base is SReg and Index is VReg. |
142 | 265 | if (!TRI->isSGPRReg(MRI, BaseReg)) |
143 | 3 | return false; |
144 | 262 | if (!TRI->hasVGPRs(MRI.getRegClass(IndexReg))) |
145 | 0 | return false; |
146 | 262 | // clear any killed flags on Index and Base regs, used later. |
147 | 262 | MRI.clearKillFlags(IndexReg); |
148 | 262 | MRI.clearKillFlags(BaseReg); |
149 | 262 | return true; |
150 | 1.25k | } |
151 | 1.25k | } |
152 | 324 | return false59 ; |
153 | 324 | } |
154 | | |
155 | | // Identify Global LOAD|STORE/ATOMIC and try to convert to _SADDR. |
156 | | static bool fixupGlobalSaddr(MachineBasicBlock &MBB, |
157 | | MachineFunction &MF, |
158 | | MachineRegisterInfo &MRI, |
159 | | const GCNSubtarget &ST, |
160 | | const SIInstrInfo *TII, |
161 | 28.3k | const SIRegisterInfo *TRI) { |
162 | 28.3k | if (!EnableGlobalSGPRAddr) |
163 | 27.9k | return false; |
164 | 468 | bool FuncModified = false; |
165 | 468 | MachineBasicBlock::iterator I, Next; |
166 | 7.91k | for (I = MBB.begin(); I != MBB.end(); I = Next7.44k ) { |
167 | 7.44k | Next = std::next(I); |
168 | 7.44k | MachineInstr &MI = *I; |
169 | 7.44k | int NewOpcd = AMDGPU::getGlobalSaddrOp(MI.getOpcode()); |
170 | 7.44k | if (NewOpcd < 0) |
171 | 7.12k | continue; |
172 | 324 | // Update our statistics on opportunities seen. |
173 | 324 | ++NumSGPRGlobalOccurs; |
174 | 324 | LLVM_DEBUG(dbgs() << "Global Mem opp " << MI << '\n'); |
175 | 324 | // Need a Base and Index or we cant transform to _SADDR. |
176 | 324 | unsigned BaseReg = 0; |
177 | 324 | unsigned IndexReg = 0; |
178 | 324 | MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::vaddr); |
179 | 324 | if (!findSRegBaseAndIndex(Op, BaseReg, IndexReg, MRI, TRI)) |
180 | 62 | continue; |
181 | 262 | ++NumSGPRGlobalSaddrs; |
182 | 262 | FuncModified = true; |
183 | 262 | // Create the new _SADDR Memory instruction. |
184 | 262 | bool HasVdst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst) != nullptr; |
185 | 262 | MachineOperand *VData = TII->getNamedOperand(MI, AMDGPU::OpName::vdata); |
186 | 262 | MachineInstr *NewGlob = nullptr; |
187 | 262 | NewGlob = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcd)); |
188 | 262 | if (HasVdst) |
189 | 147 | NewGlob->addOperand(MF, MI.getOperand(0)); |
190 | 262 | NewGlob->addOperand(MF, MachineOperand::CreateReg(IndexReg, false)); |
191 | 262 | if (VData) |
192 | 141 | NewGlob->addOperand(MF, *VData); |
193 | 262 | NewGlob->addOperand(MF, MachineOperand::CreateReg(BaseReg, false)); |
194 | 262 | NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::offset)); |
195 | 262 | |
196 | 262 | MachineOperand *Glc = TII->getNamedOperand(MI, AMDGPU::OpName::glc); |
197 | 262 | // Atomics dont have a GLC, so omit the field if not there. |
198 | 262 | if (Glc) |
199 | 210 | NewGlob->addOperand(MF, *Glc); |
200 | 262 | |
201 | 262 | MachineOperand *DLC = TII->getNamedOperand(MI, AMDGPU::OpName::dlc); |
202 | 262 | if (DLC) |
203 | 210 | NewGlob->addOperand(MF, *DLC); |
204 | 262 | |
205 | 262 | NewGlob->addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::slc)); |
206 | 262 | // _D16 have an vdst_in operand, copy it in. |
207 | 262 | MachineOperand *VDstInOp = TII->getNamedOperand(MI, |
208 | 262 | AMDGPU::OpName::vdst_in); |
209 | 262 | if (VDstInOp) |
210 | 6 | NewGlob->addOperand(MF, *VDstInOp); |
211 | 262 | NewGlob->copyImplicitOps(MF, MI); |
212 | 262 | NewGlob->cloneMemRefs(MF, MI); |
213 | 262 | // Remove the old Global Memop instruction. |
214 | 262 | MI.eraseFromParent(); |
215 | 262 | LLVM_DEBUG(dbgs() << "New Global Mem " << *NewGlob << '\n'); |
216 | 262 | } |
217 | 468 | return FuncModified; |
218 | 468 | } |
219 | | |
220 | 25.1k | bool SIFixupVectorISel::runOnMachineFunction(MachineFunction &MF) { |
221 | 25.1k | if (skipFunction(MF.getFunction())) |
222 | 13 | return false; |
223 | 25.1k | |
224 | 25.1k | MachineRegisterInfo &MRI = MF.getRegInfo(); |
225 | 25.1k | const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); |
226 | 25.1k | const SIInstrInfo *TII = ST.getInstrInfo(); |
227 | 25.1k | const SIRegisterInfo *TRI = ST.getRegisterInfo(); |
228 | 25.1k | |
229 | 25.1k | bool FuncModified = false; |
230 | 28.3k | for (MachineBasicBlock &MBB : MF) { |
231 | 28.3k | // Cleanup missed Saddr opportunites from ISel. |
232 | 28.3k | FuncModified |= fixupGlobalSaddr(MBB, MF, MRI, ST, TII, TRI); |
233 | 28.3k | } |
234 | 25.1k | return FuncModified; |
235 | 25.1k | } |