/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===----- R600Packetizer.cpp - VLIW packetizer ---------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | /// \file |
10 | | /// This pass implements instructions packetization for R600. It unsets isLast |
11 | | /// bit of instructions inside a bundle and substitutes src register with |
12 | | /// PreviousVector when applicable. |
13 | | // |
14 | | //===----------------------------------------------------------------------===// |
15 | | |
16 | | #include "AMDGPU.h" |
17 | | #include "AMDGPUSubtarget.h" |
18 | | #include "R600InstrInfo.h" |
19 | | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
20 | | #include "llvm/CodeGen/DFAPacketizer.h" |
21 | | #include "llvm/CodeGen/MachineDominators.h" |
22 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
23 | | #include "llvm/CodeGen/MachineLoopInfo.h" |
24 | | #include "llvm/CodeGen/Passes.h" |
25 | | #include "llvm/CodeGen/ScheduleDAG.h" |
26 | | #include "llvm/Support/Debug.h" |
27 | | #include "llvm/Support/raw_ostream.h" |
28 | | |
29 | | using namespace llvm; |
30 | | |
31 | | #define DEBUG_TYPE "packets" |
32 | | |
33 | | namespace { |
34 | | |
35 | | class R600Packetizer : public MachineFunctionPass { |
36 | | |
37 | | public: |
38 | | static char ID; |
39 | 280 | R600Packetizer() : MachineFunctionPass(ID) {} |
40 | | |
41 | 280 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
42 | 280 | AU.setPreservesCFG(); |
43 | 280 | AU.addRequired<MachineDominatorTree>(); |
44 | 280 | AU.addPreserved<MachineDominatorTree>(); |
45 | 280 | AU.addRequired<MachineLoopInfo>(); |
46 | 280 | AU.addPreserved<MachineLoopInfo>(); |
47 | 280 | MachineFunctionPass::getAnalysisUsage(AU); |
48 | 280 | } |
49 | | |
50 | 2.57k | StringRef getPassName() const override { return "R600 Packetizer"; } |
51 | | |
52 | | bool runOnMachineFunction(MachineFunction &Fn) override; |
53 | | }; |
54 | | |
55 | | class R600PacketizerList : public VLIWPacketizerList { |
56 | | private: |
57 | | const R600InstrInfo *TII; |
58 | | const R600RegisterInfo &TRI; |
59 | | bool VLIW5; |
60 | | bool ConsideredInstUsesAlreadyWrittenVectorElement; |
61 | | |
62 | 263k | unsigned getSlot(const MachineInstr &MI) const { |
63 | 263k | return TRI.getHWRegChan(MI.getOperand(0).getReg()); |
64 | 263k | } |
65 | | |
66 | | /// \returns register to PV chan mapping for bundle/single instructions that |
67 | | /// immediately precedes I. |
68 | | DenseMap<unsigned, unsigned> getPreviousVector(MachineBasicBlock::iterator I) |
69 | 46.7k | const { |
70 | 46.7k | DenseMap<unsigned, unsigned> Result; |
71 | 46.7k | I--; |
72 | 46.7k | if (!TII->isALUInstr(I->getOpcode()) && !I->isBundle()31.5k ) |
73 | 5.75k | return Result; |
74 | 40.9k | MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); |
75 | 40.9k | if (I->isBundle()) |
76 | 25.7k | BI++; |
77 | 40.9k | int LastDstChan = -1; |
78 | 101k | do { |
79 | 101k | bool isTrans = false; |
80 | 101k | int BISlot = getSlot(*BI); |
81 | 101k | if (LastDstChan >= BISlot) |
82 | 18.1k | isTrans = true; |
83 | 101k | LastDstChan = BISlot; |
84 | 101k | if (TII->isPredicated(*BI)) |
85 | 442 | continue; |
86 | 100k | int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::write); |
87 | 100k | if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 075.2k ) |
88 | 607 | continue; |
89 | 100k | int DstIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::dst); |
90 | 100k | if (DstIdx == -1) { |
91 | 4.18k | continue; |
92 | 4.18k | } |
93 | 96.0k | unsigned Dst = BI->getOperand(DstIdx).getReg(); |
94 | 96.0k | if (isTrans || TII->isTransOnly(*BI)77.9k ) { |
95 | 18.8k | Result[Dst] = R600::PS; |
96 | 18.8k | continue; |
97 | 18.8k | } |
98 | 77.1k | if (BI->getOpcode() == R600::DOT4_r600 || |
99 | 77.1k | BI->getOpcode() == R600::DOT4_eg77.1k ) { |
100 | 24 | Result[Dst] = R600::PV_X; |
101 | 24 | continue; |
102 | 24 | } |
103 | 77.1k | if (Dst == R600::OQAP) { |
104 | 1.67k | continue; |
105 | 1.67k | } |
106 | 75.4k | unsigned PVReg = 0; |
107 | 75.4k | switch (TRI.getHWRegChan(Dst)) { |
108 | 75.4k | case 0: |
109 | 15.4k | PVReg = R600::PV_X; |
110 | 15.4k | break; |
111 | 75.4k | case 1: |
112 | 14.7k | PVReg = R600::PV_Y; |
113 | 14.7k | break; |
114 | 75.4k | case 2: |
115 | 16.0k | PVReg = R600::PV_Z; |
116 | 16.0k | break; |
117 | 75.4k | case 3: |
118 | 29.2k | PVReg = R600::PV_W; |
119 | 29.2k | break; |
120 | 75.4k | default: |
121 | 0 | llvm_unreachable("Invalid Chan"); |
122 | 75.4k | } |
123 | 75.4k | Result[Dst] = PVReg; |
124 | 101k | } while ((++BI)->isBundledWithPred()); |
125 | 40.9k | return Result; |
126 | 40.9k | } |
127 | | |
128 | | void substitutePV(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PVs) |
129 | 42.9k | const { |
130 | 42.9k | unsigned Ops[] = { |
131 | 42.9k | R600::OpName::src0, |
132 | 42.9k | R600::OpName::src1, |
133 | 42.9k | R600::OpName::src2 |
134 | 42.9k | }; |
135 | 171k | for (unsigned i = 0; i < 3; i++128k ) { |
136 | 128k | int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Ops[i]); |
137 | 128k | if (OperandIdx < 0) |
138 | 44.2k | continue; |
139 | 84.7k | unsigned Src = MI.getOperand(OperandIdx).getReg(); |
140 | 84.7k | const DenseMap<unsigned, unsigned>::const_iterator It = PVs.find(Src); |
141 | 84.7k | if (It != PVs.end()) |
142 | 23.3k | MI.getOperand(OperandIdx).setReg(It->second); |
143 | 84.7k | } |
144 | 42.9k | } |
145 | | public: |
146 | | // Ctor. |
147 | | R600PacketizerList(MachineFunction &MF, const R600Subtarget &ST, |
148 | | MachineLoopInfo &MLI) |
149 | | : VLIWPacketizerList(MF, MLI, nullptr), |
150 | | TII(ST.getInstrInfo()), |
151 | 2.29k | TRI(TII->getRegisterInfo()) { |
152 | 2.29k | VLIW5 = !ST.hasCaymanISA(); |
153 | 2.29k | } |
154 | | |
155 | | // initPacketizerState - initialize some internal flags. |
156 | 61.8k | void initPacketizerState() override { |
157 | 61.8k | ConsideredInstUsesAlreadyWrittenVectorElement = false; |
158 | 61.8k | } |
159 | | |
160 | | // ignorePseudoInstruction - Ignore bundling of pseudo instructions. |
161 | | bool ignorePseudoInstruction(const MachineInstr &MI, |
162 | 46.7k | const MachineBasicBlock *MBB) override { |
163 | 46.7k | return false; |
164 | 46.7k | } |
165 | | |
166 | | // isSoloInstruction - return true if instruction MI can not be packetized |
167 | | // with any other instruction, which means that MI itself is a packet. |
168 | 61.8k | bool isSoloInstruction(const MachineInstr &MI) override { |
169 | 61.8k | if (TII->isVector(MI)) |
170 | 0 | return true; |
171 | 61.8k | if (!TII->isALUInstr(MI.getOpcode())) |
172 | 11.6k | return true; |
173 | 50.1k | if (MI.getOpcode() == R600::GROUP_BARRIER) |
174 | 4 | return true; |
175 | 50.1k | // XXX: This can be removed once the packetizer properly handles all the |
176 | 50.1k | // LDS instruction group restrictions. |
177 | 50.1k | return TII->isLDSInstr(MI.getOpcode()); |
178 | 50.1k | } |
179 | | |
180 | | // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ |
181 | | // together. |
182 | 54.2k | bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override { |
183 | 54.2k | MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr(); |
184 | 54.2k | if (getSlot(*MII) == getSlot(*MIJ)) |
185 | 12.0k | ConsideredInstUsesAlreadyWrittenVectorElement = true; |
186 | 54.2k | // Does MII and MIJ share the same pred_sel ? |
187 | 54.2k | int OpI = TII->getOperandIdx(MII->getOpcode(), R600::OpName::pred_sel), |
188 | 54.2k | OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600::OpName::pred_sel); |
189 | 54.2k | Register PredI = (OpI > -1)?MII->getOperand(OpI).getReg() : Register()0 , |
190 | 54.2k | PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg() : Register()0 ; |
191 | 54.2k | if (PredI != PredJ) |
192 | 36 | return false; |
193 | 54.2k | if (SUJ->isSucc(SUI)) { |
194 | 38.3k | for (unsigned i = 0, e = SUJ->Succs.size(); i < e; ++i27.0k ) { |
195 | 32.4k | const SDep &Dep = SUJ->Succs[i]; |
196 | 32.4k | if (Dep.getSUnit() != SUI) |
197 | 21.1k | continue; |
198 | 11.2k | if (Dep.getKind() == SDep::Anti) |
199 | 5.89k | continue; |
200 | 5.39k | if (Dep.getKind() == SDep::Output) |
201 | 844 | if (MII->getOperand(0).getReg() != MIJ->getOperand(0).getReg()) |
202 | 8 | continue; |
203 | 5.38k | return false; |
204 | 5.38k | } |
205 | 11.2k | } |
206 | 54.2k | |
207 | 54.2k | bool ARDef = |
208 | 48.8k | TII->definesAddressRegister(*MII) || TII->definesAddressRegister(*MIJ)48.6k ; |
209 | 48.8k | bool ARUse = |
210 | 48.8k | TII->usesAddressRegister(*MII) || TII->usesAddressRegister(*MIJ); |
211 | 48.8k | |
212 | 48.8k | return !ARDef || !ARUse215 ; |
213 | 54.2k | } |
214 | | |
215 | | // isLegalToPruneDependencies - Is it legal to prune dependece between SUI |
216 | | // and SUJ. |
217 | 5.56k | bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override { |
218 | 5.56k | return false; |
219 | 5.56k | } |
220 | | |
221 | 23.7k | void setIsLastBit(MachineInstr *MI, unsigned Bit) const { |
222 | 23.7k | unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600::OpName::last); |
223 | 23.7k | MI->getOperand(LastOp).setImm(Bit); |
224 | 23.7k | } |
225 | | |
226 | | bool isBundlableWithCurrentPMI(MachineInstr &MI, |
227 | | const DenseMap<unsigned, unsigned> &PV, |
228 | | std::vector<R600InstrInfo::BankSwizzle> &BS, |
229 | 46.7k | bool &isTransSlot) { |
230 | 46.7k | isTransSlot = TII->isTransOnly(MI); |
231 | 46.7k | assert (!isTransSlot || VLIW5); |
232 | 46.7k | |
233 | 46.7k | // Is the dst reg sequence legal ? |
234 | 46.7k | if (!isTransSlot && !CurrentPacketMIs.empty()45.7k ) { |
235 | 27.0k | if (getSlot(MI) <= getSlot(*CurrentPacketMIs.back())) { |
236 | 11.5k | if (ConsideredInstUsesAlreadyWrittenVectorElement && |
237 | 11.5k | !TII->isVectorOnly(MI)9.44k && VLIW59.13k ) { |
238 | 8.68k | isTransSlot = true; |
239 | 8.68k | LLVM_DEBUG({ |
240 | 8.68k | dbgs() << "Considering as Trans Inst :"; |
241 | 8.68k | MI.dump(); |
242 | 8.68k | }); |
243 | 8.68k | } |
244 | 2.85k | else |
245 | 2.85k | return false; |
246 | 43.8k | } |
247 | 27.0k | } |
248 | 43.8k | |
249 | 43.8k | // Are the Constants limitations met ? |
250 | 43.8k | CurrentPacketMIs.push_back(&MI); |
251 | 43.8k | if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) { |
252 | 342 | LLVM_DEBUG({ |
253 | 342 | dbgs() << "Couldn't pack :\n"; |
254 | 342 | MI.dump(); |
255 | 342 | dbgs() << "with the following packets :\n"; |
256 | 342 | for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) { |
257 | 342 | CurrentPacketMIs[i]->dump(); |
258 | 342 | dbgs() << "\n"; |
259 | 342 | } |
260 | 342 | dbgs() << "because of Consts read limitations\n"; |
261 | 342 | }); |
262 | 342 | CurrentPacketMIs.pop_back(); |
263 | 342 | return false; |
264 | 342 | } |
265 | 43.5k | |
266 | 43.5k | // Is there a BankSwizzle set that meet Read Port limitations ? |
267 | 43.5k | if (!TII->fitsReadPortLimitations(CurrentPacketMIs, |
268 | 43.5k | PV, BS, isTransSlot)) { |
269 | 518 | LLVM_DEBUG({ |
270 | 518 | dbgs() << "Couldn't pack :\n"; |
271 | 518 | MI.dump(); |
272 | 518 | dbgs() << "with the following packets :\n"; |
273 | 518 | for (unsigned i = 0, e = CurrentPacketMIs.size() - 1; i < e; i++) { |
274 | 518 | CurrentPacketMIs[i]->dump(); |
275 | 518 | dbgs() << "\n"; |
276 | 518 | } |
277 | 518 | dbgs() << "because of Read port limitations\n"; |
278 | 518 | }); |
279 | 518 | CurrentPacketMIs.pop_back(); |
280 | 518 | return false; |
281 | 518 | } |
282 | 42.9k | |
283 | 42.9k | // We cannot read LDS source registers from the Trans slot. |
284 | 42.9k | if (isTransSlot && TII->readsLDSSrcReg(MI)9.28k ) |
285 | 0 | return false; |
286 | 42.9k | |
287 | 42.9k | CurrentPacketMIs.pop_back(); |
288 | 42.9k | return true; |
289 | 42.9k | } |
290 | | |
291 | 46.7k | MachineBasicBlock::iterator addToPacket(MachineInstr &MI) override { |
292 | 46.7k | MachineBasicBlock::iterator FirstInBundle = |
293 | 46.7k | CurrentPacketMIs.empty() ? &MI19.1k : CurrentPacketMIs.front()27.5k ; |
294 | 46.7k | const DenseMap<unsigned, unsigned> &PV = |
295 | 46.7k | getPreviousVector(FirstInBundle); |
296 | 46.7k | std::vector<R600InstrInfo::BankSwizzle> BS; |
297 | 46.7k | bool isTransSlot; |
298 | 46.7k | |
299 | 46.7k | if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) { |
300 | 83.6k | for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++40.6k ) { |
301 | 40.6k | MachineInstr *MI = CurrentPacketMIs[i]; |
302 | 40.6k | unsigned Op = TII->getOperandIdx(MI->getOpcode(), |
303 | 40.6k | R600::OpName::bank_swizzle); |
304 | 40.6k | MI->getOperand(Op).setImm(BS[i]); |
305 | 40.6k | } |
306 | 42.9k | unsigned Op = |
307 | 42.9k | TII->getOperandIdx(MI.getOpcode(), R600::OpName::bank_swizzle); |
308 | 42.9k | MI.getOperand(Op).setImm(BS.back()); |
309 | 42.9k | if (!CurrentPacketMIs.empty()) |
310 | 23.7k | setIsLastBit(CurrentPacketMIs.back(), 0); |
311 | 42.9k | substitutePV(MI, PV); |
312 | 42.9k | MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI); |
313 | 42.9k | if (isTransSlot) { |
314 | 9.28k | endPacket(std::next(It)->getParent(), std::next(It)); |
315 | 9.28k | } |
316 | 42.9k | return It; |
317 | 42.9k | } |
318 | 3.71k | endPacket(MI.getParent(), MI); |
319 | 3.71k | if (TII->isTransOnly(MI)) |
320 | 11 | return MI; |
321 | 3.70k | return VLIWPacketizerList::addToPacket(MI); |
322 | 3.70k | } |
323 | | }; |
324 | | |
325 | 2.29k | bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) { |
326 | 2.29k | const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>(); |
327 | 2.29k | const R600InstrInfo *TII = ST.getInstrInfo(); |
328 | 2.29k | |
329 | 2.29k | MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); |
330 | 2.29k | |
331 | 2.29k | // Instantiate the packetizer. |
332 | 2.29k | R600PacketizerList Packetizer(Fn, ST, MLI); |
333 | 2.29k | |
334 | 2.29k | // DFA state table should not be empty. |
335 | 2.29k | assert(Packetizer.getResourceTracker() && "Empty DFA table!"); |
336 | 2.29k | assert(Packetizer.getResourceTracker()->getInstrItins()); |
337 | 2.29k | |
338 | 2.29k | if (Packetizer.getResourceTracker()->getInstrItins()->isEmpty()) |
339 | 1 | return false; |
340 | 2.29k | |
341 | 2.29k | // |
342 | 2.29k | // Loop over all basic blocks and remove KILL pseudo-instructions |
343 | 2.29k | // These instructions confuse the dependence analysis. Consider: |
344 | 2.29k | // D0 = ... (Insn 0) |
345 | 2.29k | // R0 = KILL R0, D0 (Insn 1) |
346 | 2.29k | // R0 = ... (Insn 2) |
347 | 2.29k | // Here, Insn 1 will result in the dependence graph not emitting an output |
348 | 2.29k | // dependence between Insn 0 and Insn 2. This can lead to incorrect |
349 | 2.29k | // packetization |
350 | 2.29k | // |
351 | 2.29k | for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); |
352 | 4.58k | MBB != MBBe; ++MBB2.29k ) { |
353 | 2.29k | MachineBasicBlock::iterator End = MBB->end(); |
354 | 2.29k | MachineBasicBlock::iterator MI = MBB->begin(); |
355 | 64.2k | while (MI != End) { |
356 | 61.9k | if (MI->isKill() || MI->getOpcode() == R600::IMPLICIT_DEF || |
357 | 61.9k | (61.9k MI->getOpcode() == R600::CF_ALU61.9k && !MI->getOperand(8).getImm()3.76k )) { |
358 | 17 | MachineBasicBlock::iterator DeleteMI = MI; |
359 | 17 | ++MI; |
360 | 17 | MBB->erase(DeleteMI); |
361 | 17 | End = MBB->end(); |
362 | 17 | continue; |
363 | 17 | } |
364 | 61.9k | ++MI; |
365 | 61.9k | } |
366 | 2.29k | } |
367 | 2.29k | |
368 | 2.29k | // Loop over all of the basic blocks. |
369 | 2.29k | for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); |
370 | 4.58k | MBB != MBBe; ++MBB2.29k ) { |
371 | 2.29k | // Find scheduling regions and schedule / packetize each region. |
372 | 2.29k | unsigned RemainingCount = MBB->size(); |
373 | 2.29k | for(MachineBasicBlock::iterator RegionEnd = MBB->end(); |
374 | 4.57k | RegionEnd != MBB->begin();) { |
375 | 2.28k | // The next region starts above the previous region. Look backward in the |
376 | 2.28k | // instruction stream until we find the nearest boundary. |
377 | 2.28k | MachineBasicBlock::iterator I = RegionEnd; |
378 | 2.28k | for(;I != MBB->begin(); --I, --RemainingCount0 ) { |
379 | 2.28k | if (TII->isSchedulingBoundary(*std::prev(I), &*MBB, Fn)) |
380 | 2.28k | break; |
381 | 2.28k | } |
382 | 2.28k | I = MBB->begin(); |
383 | 2.28k | |
384 | 2.28k | // Skip empty scheduling regions. |
385 | 2.28k | if (I == RegionEnd) { |
386 | 0 | RegionEnd = std::prev(RegionEnd); |
387 | 0 | --RemainingCount; |
388 | 0 | continue; |
389 | 0 | } |
390 | 2.28k | // Skip regions with one instruction. |
391 | 2.28k | if (I == std::prev(RegionEnd)) { |
392 | 91 | RegionEnd = std::prev(RegionEnd); |
393 | 91 | continue; |
394 | 91 | } |
395 | 2.19k | |
396 | 2.19k | Packetizer.PacketizeMIs(&*MBB, &*I, RegionEnd); |
397 | 2.19k | RegionEnd = I; |
398 | 2.19k | } |
399 | 2.29k | } |
400 | 2.29k | |
401 | 2.29k | return true; |
402 | 2.29k | |
403 | 2.29k | } |
404 | | |
405 | | } // end anonymous namespace |
406 | | |
407 | 101k | INITIALIZE_PASS_BEGIN(R600Packetizer, DEBUG_TYPE, |
408 | 101k | "R600 Packetizer", false, false) |
409 | 101k | INITIALIZE_PASS_END(R600Packetizer, DEBUG_TYPE, |
410 | | "R600 Packetizer", false, false) |
411 | | |
412 | | char R600Packetizer::ID = 0; |
413 | | |
414 | | char &llvm::R600PacketizerID = R600Packetizer::ID; |
415 | | |
416 | 280 | llvm::FunctionPass *llvm::createR600Packetizer() { |
417 | 280 | return new R600Packetizer(); |
418 | 280 | } |