/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | /// \file |
11 | | /// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer. |
12 | | /// This pass is merging consecutive CFAlus where applicable. |
13 | | /// It needs to be called after IfCvt for best results. |
14 | | //===----------------------------------------------------------------------===// |
15 | | |
16 | | #include "AMDGPU.h" |
17 | | #include "AMDGPUSubtarget.h" |
18 | | #include "R600Defines.h" |
19 | | #include "R600InstrInfo.h" |
20 | | #include "R600MachineFunctionInfo.h" |
21 | | #include "R600RegisterInfo.h" |
22 | | #include "llvm/CodeGen/MachineFunctionPass.h" |
23 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
24 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
25 | | #include "llvm/Support/Debug.h" |
26 | | #include "llvm/Support/raw_ostream.h" |
27 | | |
28 | | using namespace llvm; |
29 | | |
30 | | #define DEBUG_TYPE "r600mergeclause" |
31 | | |
32 | | namespace { |
33 | | |
34 | 123k | static bool isCFAlu(const MachineInstr &MI) { |
35 | 123k | switch (MI.getOpcode()) { |
36 | 8.68k | case AMDGPU::CF_ALU: |
37 | 8.68k | case AMDGPU::CF_ALU_PUSH_BEFORE: |
38 | 8.68k | return true; |
39 | 114k | default: |
40 | 114k | return false; |
41 | 0 | } |
42 | 0 | } |
43 | | |
44 | | class R600ClauseMergePass : public MachineFunctionPass { |
45 | | |
46 | | private: |
47 | | const R600InstrInfo *TII; |
48 | | |
49 | | unsigned getCFAluSize(const MachineInstr &MI) const; |
50 | | bool isCFAluEnabled(const MachineInstr &MI) const; |
51 | | |
52 | | /// IfCvt pass can generate "disabled" ALU clause marker that need to be |
53 | | /// removed and their content affected to the previous alu clause. |
54 | | /// This function parse instructions after CFAlu until it find a disabled |
55 | | /// CFAlu and merge the content, or an enabled CFAlu. |
56 | | void cleanPotentialDisabledCFAlu(MachineInstr &CFAlu) const; |
57 | | |
58 | | /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if |
59 | | /// it is the case. |
60 | | bool mergeIfPossible(MachineInstr &RootCFAlu, |
61 | | const MachineInstr &LatrCFAlu) const; |
62 | | |
63 | | public: |
64 | | static char ID; |
65 | | |
66 | 244 | R600ClauseMergePass() : MachineFunctionPass(ID) { } |
67 | | |
68 | | bool runOnMachineFunction(MachineFunction &MF) override; |
69 | | |
70 | | StringRef getPassName() const override; |
71 | | }; |
72 | | |
73 | | } // end anonymous namespace |
74 | | |
75 | 90.0k | INITIALIZE_PASS_BEGIN90.0k (R600ClauseMergePass, DEBUG_TYPE,
|
76 | 90.0k | "R600 Clause Merge", false, false) |
77 | 90.0k | INITIALIZE_PASS_END(R600ClauseMergePass, DEBUG_TYPE, |
78 | | "R600 Clause Merge", false, false) |
79 | | |
80 | | char R600ClauseMergePass::ID = 0; |
81 | | |
82 | | char &llvm::R600ClauseMergePassID = R600ClauseMergePass::ID; |
83 | | |
84 | 688 | unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr &MI) const { |
85 | 688 | assert(isCFAlu(MI)); |
86 | 688 | return MI |
87 | 688 | .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::COUNT)) |
88 | 688 | .getImm(); |
89 | 688 | } |
90 | | |
91 | 1.48k | bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr &MI) const { |
92 | 1.48k | assert(isCFAlu(MI)); |
93 | 1.48k | return MI |
94 | 1.48k | .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::Enabled)) |
95 | 1.48k | .getImm(); |
96 | 1.48k | } |
97 | | |
98 | | void R600ClauseMergePass::cleanPotentialDisabledCFAlu( |
99 | 3.59k | MachineInstr &CFAlu) const { |
100 | 3.59k | int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT); |
101 | 3.59k | MachineBasicBlock::iterator I = CFAlu, E = CFAlu.getParent()->end(); |
102 | 3.59k | I++; |
103 | 3.62k | do { |
104 | 57.4k | while (I != E && 57.4k !isCFAlu(*I)55.3k ) |
105 | 53.8k | I++; |
106 | 3.62k | if (I == E) |
107 | 2.14k | return; |
108 | 1.48k | MachineInstr &MI = *I++; |
109 | 1.48k | if (isCFAluEnabled(MI)) |
110 | 1.45k | break; |
111 | 26 | CFAlu.getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI)); |
112 | 26 | MI.eraseFromParent(); |
113 | 3.59k | } while (I != E); |
114 | 3.59k | } |
115 | | |
116 | | bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu, |
117 | 318 | const MachineInstr &LatrCFAlu) const { |
118 | 318 | assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu)); |
119 | 318 | int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT); |
120 | 318 | unsigned RootInstCount = getCFAluSize(RootCFAlu), |
121 | 318 | LaterInstCount = getCFAluSize(LatrCFAlu); |
122 | 318 | unsigned CumuledInsts = RootInstCount + LaterInstCount; |
123 | 318 | if (CumuledInsts >= TII->getMaxAlusPerClause()318 ) { |
124 | 226 | DEBUG(dbgs() << "Excess inst counts\n"); |
125 | 226 | return false; |
126 | 226 | } |
127 | 92 | if (92 RootCFAlu.getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE92 ) |
128 | 0 | return false; |
129 | 92 | // Is KCache Bank 0 compatible ? |
130 | 92 | int Mode0Idx = |
131 | 92 | TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0); |
132 | 92 | int KBank0Idx = |
133 | 92 | TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0); |
134 | 92 | int KBank0LineIdx = |
135 | 92 | TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0); |
136 | 92 | if (LatrCFAlu.getOperand(Mode0Idx).getImm() && |
137 | 2 | RootCFAlu.getOperand(Mode0Idx).getImm() && |
138 | 0 | (LatrCFAlu.getOperand(KBank0Idx).getImm() != |
139 | 0 | RootCFAlu.getOperand(KBank0Idx).getImm() || |
140 | 0 | LatrCFAlu.getOperand(KBank0LineIdx).getImm() != |
141 | 92 | RootCFAlu.getOperand(KBank0LineIdx).getImm())) { |
142 | 0 | DEBUG(dbgs() << "Wrong KC0\n"); |
143 | 0 | return false; |
144 | 0 | } |
145 | 92 | // Is KCache Bank 1 compatible ? |
146 | 92 | int Mode1Idx = |
147 | 92 | TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1); |
148 | 92 | int KBank1Idx = |
149 | 92 | TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1); |
150 | 92 | int KBank1LineIdx = |
151 | 92 | TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1); |
152 | 92 | if (LatrCFAlu.getOperand(Mode1Idx).getImm() && |
153 | 0 | RootCFAlu.getOperand(Mode1Idx).getImm() && |
154 | 0 | (LatrCFAlu.getOperand(KBank1Idx).getImm() != |
155 | 0 | RootCFAlu.getOperand(KBank1Idx).getImm() || |
156 | 0 | LatrCFAlu.getOperand(KBank1LineIdx).getImm() != |
157 | 92 | RootCFAlu.getOperand(KBank1LineIdx).getImm())) { |
158 | 0 | DEBUG(dbgs() << "Wrong KC0\n"); |
159 | 0 | return false; |
160 | 0 | } |
161 | 92 | if (92 LatrCFAlu.getOperand(Mode0Idx).getImm()92 ) { |
162 | 2 | RootCFAlu.getOperand(Mode0Idx).setImm( |
163 | 2 | LatrCFAlu.getOperand(Mode0Idx).getImm()); |
164 | 2 | RootCFAlu.getOperand(KBank0Idx).setImm( |
165 | 2 | LatrCFAlu.getOperand(KBank0Idx).getImm()); |
166 | 2 | RootCFAlu.getOperand(KBank0LineIdx) |
167 | 2 | .setImm(LatrCFAlu.getOperand(KBank0LineIdx).getImm()); |
168 | 2 | } |
169 | 92 | if (LatrCFAlu.getOperand(Mode1Idx).getImm()92 ) { |
170 | 0 | RootCFAlu.getOperand(Mode1Idx).setImm( |
171 | 0 | LatrCFAlu.getOperand(Mode1Idx).getImm()); |
172 | 0 | RootCFAlu.getOperand(KBank1Idx).setImm( |
173 | 0 | LatrCFAlu.getOperand(KBank1Idx).getImm()); |
174 | 0 | RootCFAlu.getOperand(KBank1LineIdx) |
175 | 0 | .setImm(LatrCFAlu.getOperand(KBank1LineIdx).getImm()); |
176 | 0 | } |
177 | 318 | RootCFAlu.getOperand(CntIdx).setImm(CumuledInsts); |
178 | 318 | RootCFAlu.setDesc(TII->get(LatrCFAlu.getOpcode())); |
179 | 318 | return true; |
180 | 318 | } |
181 | | |
182 | 2.05k | bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) { |
183 | 2.05k | if (skipFunction(*MF.getFunction())) |
184 | 0 | return false; |
185 | 2.05k | |
186 | 2.05k | const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>(); |
187 | 2.05k | TII = ST.getInstrInfo(); |
188 | 2.05k | |
189 | 2.05k | for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); |
190 | 4.23k | BB != BB_E4.23k ; ++BB2.17k ) { |
191 | 2.17k | MachineBasicBlock &MBB = *BB; |
192 | 2.17k | MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); |
193 | 2.17k | MachineBasicBlock::iterator LatestCFAlu = E; |
194 | 59.6k | while (I != E59.6k ) { |
195 | 57.5k | MachineInstr &MI = *I++; |
196 | 57.5k | if ((!TII->canBeConsideredALU(MI) && 57.5k !isCFAlu(MI)10.6k ) || |
197 | 50.4k | TII->mustBeLastInClause(MI.getOpcode())) |
198 | 7.03k | LatestCFAlu = E; |
199 | 57.5k | if (!isCFAlu(MI)) |
200 | 53.9k | continue; |
201 | 3.59k | cleanPotentialDisabledCFAlu(MI); |
202 | 3.59k | |
203 | 3.59k | if (LatestCFAlu != E && 3.59k mergeIfPossible(*LatestCFAlu, MI)318 ) { |
204 | 92 | MI.eraseFromParent(); |
205 | 3.59k | } else { |
206 | 3.50k | assert(MI.getOperand(8).getImm() && "CF ALU instruction disabled"); |
207 | 3.50k | LatestCFAlu = MI; |
208 | 3.50k | } |
209 | 57.5k | } |
210 | 2.17k | } |
211 | 2.05k | return false; |
212 | 2.05k | } |
213 | | |
214 | 244 | StringRef R600ClauseMergePass::getPassName() const { |
215 | 244 | return "R600 Merge Clause Markers Pass"; |
216 | 244 | } |
217 | | |
218 | 244 | llvm::FunctionPass *llvm::createR600ClauseMergePass() { |
219 | 244 | return new R600ClauseMergePass(); |
220 | 244 | } |