/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | /// \file |
11 | | /// \brief R600 Implementation of TargetInstrInfo. |
12 | | // |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "R600InstrInfo.h" |
16 | | #include "AMDGPU.h" |
17 | | #include "AMDGPUInstrInfo.h" |
18 | | #include "AMDGPUSubtarget.h" |
19 | | #include "R600Defines.h" |
20 | | #include "R600FrameLowering.h" |
21 | | #include "R600RegisterInfo.h" |
22 | | #include "Utils/AMDGPUBaseInfo.h" |
23 | | #include "llvm/ADT/BitVector.h" |
24 | | #include "llvm/ADT/SmallSet.h" |
25 | | #include "llvm/ADT/SmallVector.h" |
26 | | #include "llvm/CodeGen/MachineBasicBlock.h" |
27 | | #include "llvm/CodeGen/MachineFrameInfo.h" |
28 | | #include "llvm/CodeGen/MachineFunction.h" |
29 | | #include "llvm/CodeGen/MachineInstr.h" |
30 | | #include "llvm/CodeGen/MachineInstrBuilder.h" |
31 | | #include "llvm/CodeGen/MachineOperand.h" |
32 | | #include "llvm/CodeGen/MachineRegisterInfo.h" |
33 | | #include "llvm/Support/ErrorHandling.h" |
34 | | #include "llvm/Target/TargetRegisterInfo.h" |
35 | | #include "llvm/Target/TargetSubtargetInfo.h" |
36 | | #include <algorithm> |
37 | | #include <cassert> |
38 | | #include <cstdint> |
39 | | #include <cstring> |
40 | | #include <iterator> |
41 | | #include <utility> |
42 | | #include <vector> |
43 | | |
44 | | using namespace llvm; |
45 | | |
46 | | #define GET_INSTRINFO_CTOR_DTOR |
47 | | #include "AMDGPUGenDFAPacketizer.inc" |
48 | | |
49 | | R600InstrInfo::R600InstrInfo(const R600Subtarget &ST) |
50 | 253 | : AMDGPUInstrInfo(ST), RI(), ST(ST) {} |
51 | | |
52 | 270k | bool R600InstrInfo::isVector(const MachineInstr &MI) const { |
53 | 270k | return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; |
54 | 270k | } |
55 | | |
56 | | void R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, |
57 | | MachineBasicBlock::iterator MI, |
58 | | const DebugLoc &DL, unsigned DestReg, |
59 | 1.98k | unsigned SrcReg, bool KillSrc) const { |
60 | 1.98k | unsigned VectorComponents = 0; |
61 | 1.98k | if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) || |
62 | 1.98k | AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) && |
63 | 0 | (AMDGPU::R600_Reg128RegClass.contains(SrcReg) || |
64 | 1.98k | AMDGPU::R600_Reg128VerticalRegClass.contains(SrcReg)0 )) { |
65 | 0 | VectorComponents = 4; |
66 | 1.98k | } else if(1.98k (AMDGPU::R600_Reg64RegClass.contains(DestReg) || |
67 | 1.98k | AMDGPU::R600_Reg64VerticalRegClass.contains(DestReg)) && |
68 | 8 | (AMDGPU::R600_Reg64RegClass.contains(SrcReg) || |
69 | 1.98k | AMDGPU::R600_Reg64VerticalRegClass.contains(SrcReg)0 )) { |
70 | 8 | VectorComponents = 2; |
71 | 8 | } |
72 | 1.98k | |
73 | 1.98k | if (VectorComponents > 01.98k ) { |
74 | 24 | for (unsigned I = 0; I < VectorComponents24 ; I++16 ) { |
75 | 16 | unsigned SubRegIndex = RI.getSubRegFromChannel(I); |
76 | 16 | buildDefaultInstruction(MBB, MI, AMDGPU::MOV, |
77 | 16 | RI.getSubReg(DestReg, SubRegIndex), |
78 | 16 | RI.getSubReg(SrcReg, SubRegIndex)) |
79 | 16 | .addReg(DestReg, |
80 | 16 | RegState::Define | RegState::Implicit); |
81 | 16 | } |
82 | 1.98k | } else { |
83 | 1.97k | MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV, |
84 | 1.97k | DestReg, SrcReg); |
85 | 1.97k | NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0)) |
86 | 1.97k | .setIsKill(KillSrc); |
87 | 1.97k | } |
88 | 1.98k | } |
89 | | |
90 | | /// \returns true if \p MBBI can be moved into a new basic. |
91 | | bool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, |
92 | 0 | MachineBasicBlock::iterator MBBI) const { |
93 | 0 | for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(), |
94 | 0 | E = MBBI->operands_end(); I != E0 ; ++I0 ) { |
95 | 0 | if (I->isReg() && 0 !TargetRegisterInfo::isVirtualRegister(I->getReg())0 && |
96 | 0 | I->isUse()0 && RI.isPhysRegLiveAcrossClauses(I->getReg())0 ) |
97 | 0 | return false; |
98 | 0 | } |
99 | 0 | return true; |
100 | 0 | } |
101 | | |
102 | 2.19k | bool R600InstrInfo::isMov(unsigned Opcode) const { |
103 | 2.19k | switch(Opcode) { |
104 | 2.13k | default: |
105 | 2.13k | return false; |
106 | 64 | case AMDGPU::MOV: |
107 | 64 | case AMDGPU::MOV_IMM_F32: |
108 | 64 | case AMDGPU::MOV_IMM_I32: |
109 | 64 | return true; |
110 | 0 | } |
111 | 0 | } |
112 | | |
113 | 190k | bool R600InstrInfo::isReductionOp(unsigned Opcode) const { |
114 | 190k | return false; |
115 | 190k | } |
116 | | |
117 | 210k | bool R600InstrInfo::isCubeOp(unsigned Opcode) const { |
118 | 210k | switch(Opcode) { |
119 | 210k | default: return false; |
120 | 9 | case AMDGPU::CUBE_r600_pseudo: |
121 | 9 | case AMDGPU::CUBE_r600_real: |
122 | 9 | case AMDGPU::CUBE_eg_pseudo: |
123 | 9 | case AMDGPU::CUBE_eg_real: |
124 | 9 | return true; |
125 | 0 | } |
126 | 0 | } |
127 | | |
128 | 517k | bool R600InstrInfo::isALUInstr(unsigned Opcode) const { |
129 | 517k | unsigned TargetFlags = get(Opcode).TSFlags; |
130 | 517k | |
131 | 517k | return (TargetFlags & R600_InstFlag::ALU_INST); |
132 | 517k | } |
133 | | |
134 | 151k | bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const { |
135 | 151k | unsigned TargetFlags = get(Opcode).TSFlags; |
136 | 151k | |
137 | 151k | return ((TargetFlags & R600_InstFlag::OP1) | |
138 | 151k | (TargetFlags & R600_InstFlag::OP2) | |
139 | 151k | (TargetFlags & R600_InstFlag::OP3)); |
140 | 151k | } |
141 | | |
142 | 239k | bool R600InstrInfo::isLDSInstr(unsigned Opcode) const { |
143 | 239k | unsigned TargetFlags = get(Opcode).TSFlags; |
144 | 239k | |
145 | 239k | return ((TargetFlags & R600_InstFlag::LDS_1A) | |
146 | 239k | (TargetFlags & R600_InstFlag::LDS_1A1D) | |
147 | 239k | (TargetFlags & R600_InstFlag::LDS_1A2D)); |
148 | 239k | } |
149 | | |
150 | 105k | bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const { |
151 | 7.45k | return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1; |
152 | 105k | } |
153 | | |
154 | 57.5k | bool R600InstrInfo::canBeConsideredALU(const MachineInstr &MI) const { |
155 | 57.5k | if (isALUInstr(MI.getOpcode())) |
156 | 46.7k | return true; |
157 | 10.7k | if (10.7k isVector(MI) || 10.7k isCubeOp(MI.getOpcode())10.7k ) |
158 | 2 | return true; |
159 | 10.7k | switch (MI.getOpcode()) { |
160 | 116 | case AMDGPU::PRED_X: |
161 | 116 | case AMDGPU::INTERP_PAIR_XY: |
162 | 116 | case AMDGPU::INTERP_PAIR_ZW: |
163 | 116 | case AMDGPU::INTERP_VEC_LOAD: |
164 | 116 | case AMDGPU::COPY: |
165 | 116 | case AMDGPU::DOT_4: |
166 | 116 | return true; |
167 | 10.6k | default: |
168 | 10.6k | return false; |
169 | 0 | } |
170 | 0 | } |
171 | | |
172 | 210k | bool R600InstrInfo::isTransOnly(unsigned Opcode) const { |
173 | 210k | if (ST.hasCaymanISA()) |
174 | 21.9k | return false; |
175 | 188k | return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU); |
176 | 188k | } |
177 | | |
178 | 210k | bool R600InstrInfo::isTransOnly(const MachineInstr &MI) const { |
179 | 210k | return isTransOnly(MI.getOpcode()); |
180 | 210k | } |
181 | | |
182 | 21.6k | bool R600InstrInfo::isVectorOnly(unsigned Opcode) const { |
183 | 21.6k | return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU); |
184 | 21.6k | } |
185 | | |
186 | 21.6k | bool R600InstrInfo::isVectorOnly(const MachineInstr &MI) const { |
187 | 21.6k | return isVectorOnly(MI.getOpcode()); |
188 | 21.6k | } |
189 | | |
190 | 3.13k | bool R600InstrInfo::isExport(unsigned Opcode) const { |
191 | 3.13k | return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT); |
192 | 3.13k | } |
193 | | |
194 | 63.0k | bool R600InstrInfo::usesVertexCache(unsigned Opcode) const { |
195 | 55.3k | return ST.hasVertexCache() && IS_VTX(get(Opcode)); |
196 | 63.0k | } |
197 | | |
198 | 8.58k | bool R600InstrInfo::usesVertexCache(const MachineInstr &MI) const { |
199 | 8.58k | const MachineFunction *MF = MI.getParent()->getParent(); |
200 | 8.58k | return !AMDGPU::isCompute(MF->getFunction()->getCallingConv()) && |
201 | 272 | usesVertexCache(MI.getOpcode()); |
202 | 8.58k | } |
203 | | |
204 | 61.0k | bool R600InstrInfo::usesTextureCache(unsigned Opcode) const { |
205 | 61.0k | return (!ST.hasVertexCache() && IS_VTX8.08k (get(Opcode))) || IS_TEX60.0k (get(Opcode)); |
206 | 61.0k | } |
207 | | |
208 | 13.7k | bool R600InstrInfo::usesTextureCache(const MachineInstr &MI) const { |
209 | 13.7k | const MachineFunction *MF = MI.getParent()->getParent(); |
210 | 13.7k | return (AMDGPU::isCompute(MF->getFunction()->getCallingConv()) && |
211 | 13.1k | usesVertexCache(MI.getOpcode())) || |
212 | 10.8k | usesTextureCache(MI.getOpcode()); |
213 | 13.7k | } |
214 | | |
215 | 97.2k | bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const { |
216 | 97.2k | switch (Opcode) { |
217 | 8 | case AMDGPU::KILLGT: |
218 | 8 | case AMDGPU::GROUP_BARRIER: |
219 | 8 | return true; |
220 | 97.2k | default: |
221 | 97.2k | return false; |
222 | 0 | } |
223 | 0 | } |
224 | | |
225 | 93.7k | bool R600InstrInfo::usesAddressRegister(MachineInstr &MI) const { |
226 | 93.7k | return MI.findRegisterUseOperandIdx(AMDGPU::AR_X) != -1; |
227 | 93.7k | } |
228 | | |
229 | 93.5k | bool R600InstrInfo::definesAddressRegister(MachineInstr &MI) const { |
230 | 93.5k | return MI.findRegisterDefOperandIdx(AMDGPU::AR_X) != -1; |
231 | 93.5k | } |
232 | | |
233 | 36.3k | bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const { |
234 | 36.3k | if (!isALUInstr(MI.getOpcode())36.3k ) { |
235 | 271 | return false; |
236 | 271 | } |
237 | 36.0k | for (MachineInstr::const_mop_iterator I = MI.operands_begin(), |
238 | 36.0k | E = MI.operands_end(); |
239 | 766k | I != E766k ; ++I730k ) { |
240 | 730k | if (!I->isReg() || 730k !I->isUse()147k || |
241 | 111k | TargetRegisterInfo::isVirtualRegister(I->getReg())) |
242 | 653k | continue; |
243 | 77.1k | |
244 | 77.1k | if (77.1k AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg())77.1k ) |
245 | 0 | return true; |
246 | 730k | } |
247 | 36.0k | return false; |
248 | 36.3k | } |
249 | | |
250 | 313k | int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const { |
251 | 313k | static const unsigned SrcSelTable[][2] = { |
252 | 313k | {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, |
253 | 313k | {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, |
254 | 313k | {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, |
255 | 313k | {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, |
256 | 313k | {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, |
257 | 313k | {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, |
258 | 313k | {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, |
259 | 313k | {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, |
260 | 313k | {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, |
261 | 313k | {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, |
262 | 313k | {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W} |
263 | 313k | }; |
264 | 313k | |
265 | 618k | for (const auto &Row : SrcSelTable) { |
266 | 618k | if (getOperandIdx(Opcode, Row[0]) == (int)SrcIdx618k ) { |
267 | 313k | return getOperandIdx(Opcode, Row[1]); |
268 | 313k | } |
269 | 0 | } |
270 | 0 | return -1; |
271 | 0 | } |
272 | | |
273 | | SmallVector<std::pair<MachineOperand *, int64_t>, 3> |
274 | 325k | R600InstrInfo::getSrcs(MachineInstr &MI) const { |
275 | 325k | SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result; |
276 | 325k | |
277 | 325k | if (MI.getOpcode() == AMDGPU::DOT_4325k ) { |
278 | 32 | static const unsigned OpTable[8][2] = { |
279 | 32 | {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, |
280 | 32 | {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, |
281 | 32 | {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, |
282 | 32 | {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, |
283 | 32 | {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, |
284 | 32 | {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, |
285 | 32 | {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, |
286 | 32 | {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}, |
287 | 32 | }; |
288 | 32 | |
289 | 288 | for (unsigned j = 0; j < 8288 ; j++256 ) { |
290 | 256 | MachineOperand &MO = |
291 | 256 | MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][0])); |
292 | 256 | unsigned Reg = MO.getReg(); |
293 | 256 | if (Reg == AMDGPU::ALU_CONST256 ) { |
294 | 21 | MachineOperand &Sel = |
295 | 21 | MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1])); |
296 | 21 | Result.push_back(std::make_pair(&MO, Sel.getImm())); |
297 | 21 | continue; |
298 | 21 | } |
299 | 256 | |
300 | 256 | } |
301 | 32 | return Result; |
302 | 32 | } |
303 | 325k | |
304 | 325k | static const unsigned OpTable[3][2] = { |
305 | 325k | {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, |
306 | 325k | {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, |
307 | 325k | {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, |
308 | 325k | }; |
309 | 325k | |
310 | 985k | for (unsigned j = 0; j < 3985k ; j++660k ) { |
311 | 926k | int SrcIdx = getOperandIdx(MI.getOpcode(), OpTable[j][0]); |
312 | 926k | if (SrcIdx < 0) |
313 | 266k | break; |
314 | 660k | MachineOperand &MO = MI.getOperand(SrcIdx); |
315 | 660k | unsigned Reg = MO.getReg(); |
316 | 660k | if (Reg == AMDGPU::ALU_CONST660k ) { |
317 | 27.0k | MachineOperand &Sel = |
318 | 27.0k | MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1])); |
319 | 27.0k | Result.push_back(std::make_pair(&MO, Sel.getImm())); |
320 | 27.0k | continue; |
321 | 27.0k | } |
322 | 633k | if (633k Reg == AMDGPU::ALU_LITERAL_X633k ) { |
323 | 139k | MachineOperand &Operand = |
324 | 139k | MI.getOperand(getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal)); |
325 | 139k | if (Operand.isImm()139k ) { |
326 | 139k | Result.push_back(std::make_pair(&MO, Operand.getImm())); |
327 | 139k | continue; |
328 | 139k | } |
329 | 139k | assert(Operand.isGlobal()); |
330 | 75 | } |
331 | 493k | Result.push_back(std::make_pair(&MO, 0)); |
332 | 493k | } |
333 | 325k | return Result; |
334 | 325k | } |
335 | | |
336 | | std::vector<std::pair<int, unsigned>> |
337 | | R600InstrInfo::ExtractSrcs(MachineInstr &MI, |
338 | | const DenseMap<unsigned, unsigned> &PV, |
339 | 81.6k | unsigned &ConstCount) const { |
340 | 81.6k | ConstCount = 0; |
341 | 81.6k | const std::pair<int, unsigned> DummyPair(-1, 0); |
342 | 81.6k | std::vector<std::pair<int, unsigned>> Result; |
343 | 81.6k | unsigned i = 0; |
344 | 167k | for (const auto &Src : getSrcs(MI)) { |
345 | 167k | ++i; |
346 | 167k | unsigned Reg = Src.first->getReg(); |
347 | 167k | int Index = RI.getEncodingValue(Reg) & 0xff; |
348 | 167k | if (Reg == AMDGPU::OQAP167k ) { |
349 | 1.52k | Result.push_back(std::make_pair(Index, 0U)); |
350 | 1.52k | } |
351 | 167k | if (PV.find(Reg) != PV.end()167k ) { |
352 | 22.3k | // 255 is used to tells its a PS/PV reg |
353 | 22.3k | Result.push_back(std::make_pair(255, 0U)); |
354 | 22.3k | continue; |
355 | 22.3k | } |
356 | 144k | if (144k Index > 127144k ) { |
357 | 93.0k | ConstCount++; |
358 | 93.0k | Result.push_back(DummyPair); |
359 | 93.0k | continue; |
360 | 93.0k | } |
361 | 51.5k | unsigned Chan = RI.getHWRegChan(Reg); |
362 | 51.5k | Result.push_back(std::make_pair(Index, Chan)); |
363 | 51.5k | } |
364 | 159k | for (; i < 3159k ; ++i77.9k ) |
365 | 77.9k | Result.push_back(DummyPair); |
366 | 81.6k | return Result; |
367 | 81.6k | } |
368 | | |
369 | | static std::vector<std::pair<int, unsigned>> |
370 | | Swizzle(std::vector<std::pair<int, unsigned>> Src, |
371 | 1.91M | R600InstrInfo::BankSwizzle Swz) { |
372 | 1.91M | if (Src[0] == Src[1]) |
373 | 319k | Src[1].first = -1; |
374 | 1.91M | switch (Swz) { |
375 | 394k | case R600InstrInfo::ALU_VEC_012_SCL_210: |
376 | 394k | break; |
377 | 308k | case R600InstrInfo::ALU_VEC_021_SCL_122: |
378 | 308k | std::swap(Src[1], Src[2]); |
379 | 308k | break; |
380 | 303k | case R600InstrInfo::ALU_VEC_102_SCL_221: |
381 | 303k | std::swap(Src[0], Src[1]); |
382 | 303k | break; |
383 | 306k | case R600InstrInfo::ALU_VEC_120_SCL_212: |
384 | 306k | std::swap(Src[0], Src[1]); |
385 | 306k | std::swap(Src[0], Src[2]); |
386 | 306k | break; |
387 | 302k | case R600InstrInfo::ALU_VEC_201: |
388 | 302k | std::swap(Src[0], Src[2]); |
389 | 302k | std::swap(Src[0], Src[1]); |
390 | 302k | break; |
391 | 298k | case R600InstrInfo::ALU_VEC_210: |
392 | 298k | std::swap(Src[0], Src[2]); |
393 | 298k | break; |
394 | 1.91M | } |
395 | 1.91M | return Src; |
396 | 1.91M | } |
397 | | |
398 | 263k | static unsigned getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) { |
399 | 263k | switch (Swz) { |
400 | 125k | case R600InstrInfo::ALU_VEC_012_SCL_210: { |
401 | 125k | unsigned Cycles[3] = { 2, 1, 0}; |
402 | 125k | return Cycles[Op]; |
403 | 263k | } |
404 | 45.7k | case R600InstrInfo::ALU_VEC_021_SCL_122: { |
405 | 45.7k | unsigned Cycles[3] = { 1, 2, 2}; |
406 | 45.7k | return Cycles[Op]; |
407 | 263k | } |
408 | 45.9k | case R600InstrInfo::ALU_VEC_120_SCL_212: { |
409 | 45.9k | unsigned Cycles[3] = { 2, 1, 2}; |
410 | 45.9k | return Cycles[Op]; |
411 | 263k | } |
412 | 46.2k | case R600InstrInfo::ALU_VEC_102_SCL_221: { |
413 | 46.2k | unsigned Cycles[3] = { 2, 2, 1}; |
414 | 46.2k | return Cycles[Op]; |
415 | 263k | } |
416 | 0 | default: |
417 | 0 | llvm_unreachable("Wrong Swizzle for Trans Slot"); |
418 | 0 | } |
419 | 0 | } |
420 | | |
421 | | /// returns how many MIs (whose inputs are represented by IGSrcs) can be packed |
422 | | /// in the same Instruction Group while meeting read port limitations given a |
423 | | /// Swz swizzle sequence. |
424 | | unsigned R600InstrInfo::isLegalUpTo( |
425 | | const std::vector<std::vector<std::pair<int, unsigned>>> &IGSrcs, |
426 | | const std::vector<R600InstrInfo::BankSwizzle> &Swz, |
427 | | const std::vector<std::pair<int, unsigned>> &TransSrcs, |
428 | 523k | R600InstrInfo::BankSwizzle TransSwz) const { |
429 | 523k | int Vector[4][3]; |
430 | 523k | memset(Vector, -1, sizeof(Vector)); |
431 | 2.09M | for (unsigned i = 0, e = IGSrcs.size(); i < e2.09M ; i++1.57M ) { |
432 | 1.91M | const std::vector<std::pair<int, unsigned>> &Srcs = |
433 | 1.91M | Swizzle(IGSrcs[i], Swz[i]); |
434 | 6.92M | for (unsigned j = 0; j < 36.92M ; j++5.00M ) { |
435 | 5.35M | const std::pair<int, unsigned> &Src = Srcs[j]; |
436 | 5.35M | if (Src.first < 0 || 5.35M Src.first == 2552.53M ) |
437 | 2.83M | continue; |
438 | 2.51M | if (2.51M Src.first == 2.51M GET_REG_INDEX2.51M (RI.getEncodingValue(AMDGPU::OQAP))) { |
439 | 1.53k | if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 && |
440 | 1.53k | Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_1220 ) { |
441 | 0 | // The value from output queue A (denoted by register OQAP) can |
442 | 0 | // only be fetched during the first cycle. |
443 | 0 | return false; |
444 | 0 | } |
445 | 1.53k | // OQAP does not count towards the normal read port restrictions |
446 | 1.53k | continue; |
447 | 1.53k | } |
448 | 2.51M | if (2.51M Vector[Src.second][j] < 02.51M ) |
449 | 2.10M | Vector[Src.second][j] = Src.first; |
450 | 2.51M | if (Vector[Src.second][j] != Src.first) |
451 | 344k | return i; |
452 | 5.35M | } |
453 | 1.91M | } |
454 | 523k | // Now check Trans Alu |
455 | 274k | for (unsigned i = 0, e = TransSrcs.size(); 179k i < e274k ; ++i95.4k ) { |
456 | 233k | const std::pair<int, unsigned> &Src = TransSrcs[i]; |
457 | 233k | unsigned Cycle = getTransSwizzle(TransSwz, i); |
458 | 233k | if (Src.first < 0) |
459 | 17.6k | continue; |
460 | 216k | if (216k Src.first == 255216k ) |
461 | 26.2k | continue; |
462 | 189k | if (189k Vector[Src.second][Cycle] < 0189k ) |
463 | 48.4k | Vector[Src.second][Cycle] = Src.first; |
464 | 189k | if (Vector[Src.second][Cycle] != Src.first) |
465 | 138k | return IGSrcs.size() - 1; |
466 | 233k | } |
467 | 40.9k | return IGSrcs.size(); |
468 | 523k | } |
469 | | |
470 | | /// Given a swizzle sequence SwzCandidate and an index Idx, returns the next |
471 | | /// (in lexicographic term) swizzle sequence assuming that all swizzles after |
472 | | /// Idx can be skipped |
473 | | static bool |
474 | | NextPossibleSolution( |
475 | | std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate, |
476 | 482k | unsigned Idx) { |
477 | 482k | assert(Idx < SwzCandidate.size()); |
478 | 482k | int ResetIdx = Idx; |
479 | 578k | while (ResetIdx > -1 && 578k SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210577k ) |
480 | 95.7k | ResetIdx --; |
481 | 624k | for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e624k ; i++142k ) { |
482 | 142k | SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210; |
483 | 142k | } |
484 | 482k | if (ResetIdx == -1) |
485 | 1.16k | return false; |
486 | 481k | int NextSwizzle = SwzCandidate[ResetIdx] + 1; |
487 | 481k | SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle; |
488 | 481k | return true; |
489 | 481k | } |
490 | | |
491 | | /// Enumerate all possible Swizzle sequence to find one that can meet all |
492 | | /// read port requirements. |
493 | | bool R600InstrInfo::FindSwizzleForVectorSlot( |
494 | | const std::vector<std::vector<std::pair<int, unsigned>>> &IGSrcs, |
495 | | std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate, |
496 | | const std::vector<std::pair<int, unsigned>> &TransSrcs, |
497 | 42.0k | R600InstrInfo::BankSwizzle TransSwz) const { |
498 | 42.0k | unsigned ValidUpTo = 0; |
499 | 523k | do { |
500 | 523k | ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz); |
501 | 523k | if (ValidUpTo == IGSrcs.size()) |
502 | 40.9k | return true; |
503 | 482k | } while (NextPossibleSolution(SwzCandidate, ValidUpTo)); |
504 | 1.16k | return false; |
505 | 42.0k | } |
506 | | |
507 | | /// Instructions in Trans slot can't read gpr at cycle 0 if they also read |
508 | | /// a const, and can't read a gpr at cycle 1 if they read 2 const. |
509 | | static bool |
510 | | isConstCompatible(R600InstrInfo::BankSwizzle TransSwz, |
511 | | const std::vector<std::pair<int, unsigned>> &TransOps, |
512 | 9.83k | unsigned ConstCount) { |
513 | 9.83k | // TransALU can't read 3 constants |
514 | 9.83k | if (ConstCount > 2) |
515 | 16 | return false; |
516 | 39.2k | for (unsigned i = 0, e = TransOps.size(); 9.81k i < e39.2k ; ++i29.4k ) { |
517 | 29.4k | const std::pair<int, unsigned> &Src = TransOps[i]; |
518 | 29.4k | unsigned Cycle = getTransSwizzle(TransSwz, i); |
519 | 29.4k | if (Src.first < 0) |
520 | 18.2k | continue; |
521 | 11.2k | if (11.2k ConstCount > 0 && 11.2k Cycle == 02.20k ) |
522 | 19 | return false; |
523 | 11.2k | if (11.2k ConstCount > 1 && 11.2k Cycle == 19 ) |
524 | 1 | return false; |
525 | 29.4k | } |
526 | 9.79k | return true; |
527 | 9.83k | } |
528 | | |
529 | | bool |
530 | | R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG, |
531 | | const DenseMap<unsigned, unsigned> &PV, |
532 | | std::vector<BankSwizzle> &ValidSwizzle, |
533 | | bool isLastAluTrans) |
534 | 41.4k | const { |
535 | 41.4k | //Todo : support shared src0 - src1 operand |
536 | 41.4k | |
537 | 41.4k | std::vector<std::vector<std::pair<int, unsigned>>> IGSrcs; |
538 | 41.4k | ValidSwizzle.clear(); |
539 | 41.4k | unsigned ConstCount; |
540 | 41.4k | BankSwizzle TransBS = ALU_VEC_012_SCL_210; |
541 | 123k | for (unsigned i = 0, e = IG.size(); i < e123k ; ++i81.6k ) { |
542 | 81.6k | IGSrcs.push_back(ExtractSrcs(*IG[i], PV, ConstCount)); |
543 | 81.6k | unsigned Op = getOperandIdx(IG[i]->getOpcode(), |
544 | 81.6k | AMDGPU::OpName::bank_swizzle); |
545 | 81.6k | ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle) |
546 | 81.6k | IG[i]->getOperand(Op).getImm()); |
547 | 81.6k | } |
548 | 41.4k | std::vector<std::pair<int, unsigned>> TransOps; |
549 | 41.4k | if (!isLastAluTrans) |
550 | 32.2k | return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS); |
551 | 9.14k | |
552 | 9.14k | TransOps = std::move(IGSrcs.back()); |
553 | 9.14k | IGSrcs.pop_back(); |
554 | 9.14k | ValidSwizzle.pop_back(); |
555 | 9.14k | |
556 | 9.14k | static const R600InstrInfo::BankSwizzle TransSwz[] = { |
557 | 9.14k | ALU_VEC_012_SCL_210, |
558 | 9.14k | ALU_VEC_021_SCL_122, |
559 | 9.14k | ALU_VEC_120_SCL_212, |
560 | 9.14k | ALU_VEC_102_SCL_221 |
561 | 9.14k | }; |
562 | 10.0k | for (unsigned i = 0; i < 410.0k ; i++915 ) { |
563 | 9.83k | TransBS = TransSwz[i]; |
564 | 9.83k | if (!isConstCompatible(TransBS, TransOps, ConstCount)) |
565 | 36 | continue; |
566 | 9.79k | bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, |
567 | 9.79k | TransBS); |
568 | 9.79k | if (Result9.79k ) { |
569 | 8.91k | ValidSwizzle.push_back(TransBS); |
570 | 8.91k | return true; |
571 | 8.91k | } |
572 | 9.83k | } |
573 | 9.14k | |
574 | 224 | return false; |
575 | 41.4k | } |
576 | | |
577 | | bool |
578 | | R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts) |
579 | 95.4k | const { |
580 | 95.4k | assert (Consts.size() <= 12 && "Too many operands in instructions group"); |
581 | 95.4k | unsigned Pair1 = 0, Pair2 = 0; |
582 | 133k | for (unsigned i = 0, n = Consts.size(); i < n133k ; ++i38.5k ) { |
583 | 39.3k | unsigned ReadConstHalf = Consts[i] & 2; |
584 | 39.3k | unsigned ReadConstIndex = Consts[i] & (~3); |
585 | 39.3k | unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf; |
586 | 39.3k | if (!Pair139.3k ) { |
587 | 32.5k | Pair1 = ReadHalfConst; |
588 | 32.5k | continue; |
589 | 32.5k | } |
590 | 6.76k | if (6.76k Pair1 == ReadHalfConst6.76k ) |
591 | 1.15k | continue; |
592 | 5.60k | if (5.60k !Pair25.60k ) { |
593 | 3.97k | Pair2 = ReadHalfConst; |
594 | 3.97k | continue; |
595 | 3.97k | } |
596 | 1.63k | if (1.63k Pair2 != ReadHalfConst1.63k ) |
597 | 853 | return false; |
598 | 39.3k | } |
599 | 94.5k | return true; |
600 | 95.4k | } |
601 | | |
602 | | bool |
603 | | R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs) |
604 | 86.5k | const { |
605 | 86.5k | std::vector<unsigned> Consts; |
606 | 86.5k | SmallSet<int64_t, 4> Literals; |
607 | 236k | for (unsigned i = 0, n = MIs.size(); i < n236k ; i++149k ) { |
608 | 149k | MachineInstr &MI = *MIs[i]; |
609 | 149k | if (!isALUInstr(MI.getOpcode())) |
610 | 1.22k | continue; |
611 | 148k | |
612 | 148k | for (const auto &Src : getSrcs(MI)) 148k { |
613 | 307k | if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X) |
614 | 64.6k | Literals.insert(Src.second); |
615 | 307k | if (Literals.size() > 4) |
616 | 21 | return false; |
617 | 307k | if (307k Src.first->getReg() == AMDGPU::ALU_CONST307k ) |
618 | 15.4k | Consts.push_back(Src.second); |
619 | 307k | if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) || |
620 | 307k | AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())292k ) { |
621 | 14.4k | unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff; |
622 | 14.4k | unsigned Chan = RI.getHWRegChan(Src.first->getReg()); |
623 | 14.4k | Consts.push_back((Index << 2) | Chan); |
624 | 14.4k | } |
625 | 307k | } |
626 | 149k | } |
627 | 86.5k | return fitsConstReadLimitations(Consts); |
628 | 86.5k | } |
629 | | |
630 | | DFAPacketizer * |
631 | 2.05k | R600InstrInfo::CreateTargetScheduleState(const TargetSubtargetInfo &STI) const { |
632 | 2.05k | const InstrItineraryData *II = STI.getInstrItineraryData(); |
633 | 2.05k | return static_cast<const R600Subtarget &>(STI).createDFAPacketizer(II); |
634 | 2.05k | } |
635 | | |
636 | | static bool |
637 | 5.90k | isPredicateSetter(unsigned Opcode) { |
638 | 5.90k | switch (Opcode) { |
639 | 2.41k | case AMDGPU::PRED_X: |
640 | 2.41k | return true; |
641 | 3.49k | default: |
642 | 3.49k | return false; |
643 | 0 | } |
644 | 0 | } |
645 | | |
646 | | static MachineInstr * |
647 | | findFirstPredicateSetterFrom(MachineBasicBlock &MBB, |
648 | 422 | MachineBasicBlock::iterator I) { |
649 | 483 | while (I != MBB.begin()483 ) { |
650 | 483 | --I; |
651 | 483 | MachineInstr &MI = *I; |
652 | 483 | if (isPredicateSetter(MI.getOpcode())) |
653 | 422 | return &MI; |
654 | 483 | } |
655 | 422 | |
656 | 0 | return nullptr; |
657 | 422 | } |
658 | | |
659 | | static |
660 | 36.4k | bool isJump(unsigned Opcode) { |
661 | 35.5k | return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND; |
662 | 36.4k | } |
663 | | |
664 | 34.0k | static bool isBranch(unsigned Opcode) { |
665 | 33.9k | return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 || |
666 | 33.9k | Opcode == AMDGPU::BRANCH_COND_f32; |
667 | 34.0k | } |
668 | | |
669 | | bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB, |
670 | | MachineBasicBlock *&TBB, |
671 | | MachineBasicBlock *&FBB, |
672 | | SmallVectorImpl<MachineOperand> &Cond, |
673 | 34.0k | bool AllowModify) const { |
674 | 34.0k | // Most of the following comes from the ARM implementation of AnalyzeBranch |
675 | 34.0k | |
676 | 34.0k | // If the block has no terminators, it just falls into the block after it. |
677 | 34.0k | MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); |
678 | 34.0k | if (I == MBB.end()) |
679 | 83 | return false; |
680 | 34.0k | |
681 | 34.0k | // AMDGPU::BRANCH* instructions are only available after isel and are not |
682 | 34.0k | // handled |
683 | 34.0k | if (34.0k isBranch(I->getOpcode())34.0k ) |
684 | 7 | return true; |
685 | 33.9k | if (33.9k !isJump(I->getOpcode())33.9k ) { |
686 | 31.4k | return false; |
687 | 31.4k | } |
688 | 2.50k | |
689 | 2.50k | // Remove successive JUMP |
690 | 2.50k | while (2.50k I != MBB.begin() && 2.50k std::prev(I)->getOpcode() == AMDGPU::JUMP2.49k ) { |
691 | 0 | MachineBasicBlock::iterator PriorI = std::prev(I); |
692 | 0 | if (AllowModify) |
693 | 0 | I->removeFromParent(); |
694 | 0 | I = PriorI; |
695 | 0 | } |
696 | 2.50k | MachineInstr &LastInst = *I; |
697 | 2.50k | |
698 | 2.50k | // If there is only one terminator instruction, process it. |
699 | 2.50k | unsigned LastOpc = LastInst.getOpcode(); |
700 | 2.50k | if (I == MBB.begin() || 2.50k !isJump((--I)->getOpcode())2.49k ) { |
701 | 2.11k | if (LastOpc == AMDGPU::JUMP2.11k ) { |
702 | 510 | TBB = LastInst.getOperand(0).getMBB(); |
703 | 510 | return false; |
704 | 1.60k | } else if (1.60k LastOpc == AMDGPU::JUMP_COND1.60k ) { |
705 | 1.60k | auto predSet = I; |
706 | 1.99k | while (!isPredicateSetter(predSet->getOpcode())1.99k ) { |
707 | 390 | predSet = --I; |
708 | 390 | } |
709 | 1.60k | TBB = LastInst.getOperand(0).getMBB(); |
710 | 1.60k | Cond.push_back(predSet->getOperand(1)); |
711 | 1.60k | Cond.push_back(predSet->getOperand(2)); |
712 | 1.60k | Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); |
713 | 1.60k | return false; |
714 | 1.60k | } |
715 | 0 | return true; // Can't handle indirect branch. |
716 | 0 | } |
717 | 388 | |
718 | 388 | // Get the instruction before it if it is a terminator. |
719 | 388 | MachineInstr &SecondLastInst = *I; |
720 | 388 | unsigned SecondLastOpc = SecondLastInst.getOpcode(); |
721 | 388 | |
722 | 388 | // If the block ends with a B and a Bcc, handle it. |
723 | 388 | if (SecondLastOpc == AMDGPU::JUMP_COND && 388 LastOpc == AMDGPU::JUMP388 ) { |
724 | 388 | auto predSet = --I; |
725 | 445 | while (!isPredicateSetter(predSet->getOpcode())445 ) { |
726 | 57 | predSet = --I; |
727 | 57 | } |
728 | 388 | TBB = SecondLastInst.getOperand(0).getMBB(); |
729 | 388 | FBB = LastInst.getOperand(0).getMBB(); |
730 | 388 | Cond.push_back(predSet->getOperand(1)); |
731 | 388 | Cond.push_back(predSet->getOperand(2)); |
732 | 388 | Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); |
733 | 388 | return false; |
734 | 388 | } |
735 | 0 |
|
736 | 0 | // Otherwise, can't handle this. |
737 | 0 | return true; |
738 | 0 | } |
739 | | |
740 | | static |
741 | 422 | MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) { |
742 | 422 | for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend(); |
743 | 2.15k | It != E2.15k ; ++It1.73k ) { |
744 | 1.98k | if (It->getOpcode() == AMDGPU::CF_ALU || |
745 | 1.87k | It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) |
746 | 256 | return It.getReverse(); |
747 | 1.98k | } |
748 | 166 | return MBB.end(); |
749 | 422 | } |
750 | | |
751 | | unsigned R600InstrInfo::insertBranch(MachineBasicBlock &MBB, |
752 | | MachineBasicBlock *TBB, |
753 | | MachineBasicBlock *FBB, |
754 | | ArrayRef<MachineOperand> Cond, |
755 | | const DebugLoc &DL, |
756 | 223 | int *BytesAdded) const { |
757 | 223 | assert(TBB && "insertBranch must not be told to insert a fallthrough"); |
758 | 223 | assert(!BytesAdded && "code size not handled"); |
759 | 223 | |
760 | 223 | if (!FBB223 ) { |
761 | 223 | if (Cond.empty()223 ) { |
762 | 25 | BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB); |
763 | 25 | return 1; |
764 | 0 | } else { |
765 | 198 | MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); |
766 | 198 | assert(PredSet && "No previous predicate !"); |
767 | 198 | addFlag(*PredSet, 0, MO_FLAG_PUSH); |
768 | 198 | PredSet->getOperand(2).setImm(Cond[1].getImm()); |
769 | 198 | |
770 | 198 | BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) |
771 | 198 | .addMBB(TBB) |
772 | 198 | .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); |
773 | 198 | MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); |
774 | 198 | if (CfAlu == MBB.end()) |
775 | 82 | return 1; |
776 | 0 | assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); |
777 | 116 | CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); |
778 | 116 | return 1; |
779 | 116 | } |
780 | 0 | } else { |
781 | 0 | MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); |
782 | 0 | assert(PredSet && "No previous predicate !"); |
783 | 0 | addFlag(*PredSet, 0, MO_FLAG_PUSH); |
784 | 0 | PredSet->getOperand(2).setImm(Cond[1].getImm()); |
785 | 0 | BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) |
786 | 0 | .addMBB(TBB) |
787 | 0 | .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); |
788 | 0 | BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB); |
789 | 0 | MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); |
790 | 0 | if (CfAlu == MBB.end()) |
791 | 0 | return 2; |
792 | 0 | assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); |
793 | 0 | CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); |
794 | 0 | return 2; |
795 | 0 | } |
796 | 223 | } |
797 | | |
798 | | unsigned R600InstrInfo::removeBranch(MachineBasicBlock &MBB, |
799 | 314 | int *BytesRemoved) const { |
800 | 314 | assert(!BytesRemoved && "code size not handled"); |
801 | 314 | |
802 | 314 | // Note : we leave PRED* instructions there. |
803 | 314 | // They may be needed when predicating instructions. |
804 | 314 | |
805 | 314 | MachineBasicBlock::iterator I = MBB.end(); |
806 | 314 | |
807 | 314 | if (I == MBB.begin()314 ) { |
808 | 0 | return 0; |
809 | 0 | } |
810 | 314 | --I; |
811 | 314 | switch (I->getOpcode()) { |
812 | 22 | default: |
813 | 22 | return 0; |
814 | 139 | case AMDGPU::JUMP_COND: { |
815 | 139 | MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); |
816 | 139 | clearFlag(*predSet, 0, MO_FLAG_PUSH); |
817 | 139 | I->eraseFromParent(); |
818 | 139 | MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); |
819 | 139 | if (CfAlu == MBB.end()) |
820 | 0 | break; |
821 | 139 | assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); |
822 | 139 | CfAlu->setDesc(get(AMDGPU::CF_ALU)); |
823 | 139 | break; |
824 | 139 | } |
825 | 153 | case AMDGPU::JUMP: |
826 | 153 | I->eraseFromParent(); |
827 | 153 | break; |
828 | 292 | } |
829 | 292 | I = MBB.end(); |
830 | 292 | |
831 | 292 | if (I == MBB.begin()292 ) { |
832 | 3 | return 1; |
833 | 3 | } |
834 | 289 | --I; |
835 | 289 | switch (I->getOpcode()) { |
836 | 289 | // FIXME: only one case?? |
837 | 204 | default: |
838 | 204 | return 1; |
839 | 85 | case AMDGPU::JUMP_COND: { |
840 | 85 | MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); |
841 | 85 | clearFlag(*predSet, 0, MO_FLAG_PUSH); |
842 | 85 | I->eraseFromParent(); |
843 | 85 | MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); |
844 | 85 | if (CfAlu == MBB.end()) |
845 | 84 | break; |
846 | 85 | assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); |
847 | 1 | CfAlu->setDesc(get(AMDGPU::CF_ALU)); |
848 | 1 | break; |
849 | 1 | } |
850 | 0 | case AMDGPU::JUMP: |
851 | 0 | I->eraseFromParent(); |
852 | 0 | break; |
853 | 85 | } |
854 | 85 | return 2; |
855 | 85 | } |
856 | | |
857 | 121k | bool R600InstrInfo::isPredicated(const MachineInstr &MI) const { |
858 | 121k | int idx = MI.findFirstPredOperandIdx(); |
859 | 121k | if (idx < 0) |
860 | 24.1k | return false; |
861 | 97.7k | |
862 | 97.7k | unsigned Reg = MI.getOperand(idx).getReg(); |
863 | 97.7k | switch (Reg) { |
864 | 97.3k | default: return false; |
865 | 442 | case AMDGPU::PRED_SEL_ONE: |
866 | 442 | case AMDGPU::PRED_SEL_ZERO: |
867 | 442 | case AMDGPU::PREDICATE_BIT: |
868 | 442 | return true; |
869 | 0 | } |
870 | 0 | } |
871 | | |
872 | 2.98k | bool R600InstrInfo::isPredicable(const MachineInstr &MI) const { |
873 | 2.98k | // XXX: KILL* instructions can be predicated, but they must be the last |
874 | 2.98k | // instruction in a clause, so this means any instructions after them cannot |
875 | 2.98k | // be predicated. Until we have proper support for instruction clauses in the |
876 | 2.98k | // backend, we will mark KILL* instructions as unpredicable. |
877 | 2.98k | |
878 | 2.98k | if (MI.getOpcode() == AMDGPU::KILLGT2.98k ) { |
879 | 0 | return false; |
880 | 2.98k | } else if (2.98k MI.getOpcode() == AMDGPU::CF_ALU2.98k ) { |
881 | 2.17k | // If the clause start in the middle of MBB then the MBB has more |
882 | 2.17k | // than a single clause, unable to predicate several clauses. |
883 | 2.17k | if (MI.getParent()->begin() != MachineBasicBlock::const_iterator(MI)) |
884 | 0 | return false; |
885 | 2.17k | // TODO: We don't support KC merging atm |
886 | 2.17k | return MI.getOperand(3).getImm() == 0 && 2.17k MI.getOperand(4).getImm() == 0184 ; |
887 | 813 | } else if (813 isVector(MI)813 ) { |
888 | 1 | return false; |
889 | 0 | } else { |
890 | 812 | return AMDGPUInstrInfo::isPredicable(MI); |
891 | 812 | } |
892 | 0 | } |
893 | | |
894 | | bool |
895 | | R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, |
896 | | unsigned NumCycles, |
897 | | unsigned ExtraPredCycles, |
898 | 97 | BranchProbability Probability) const{ |
899 | 97 | return true; |
900 | 97 | } |
901 | | |
902 | | bool |
903 | | R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, |
904 | | unsigned NumTCycles, |
905 | | unsigned ExtraTCycles, |
906 | | MachineBasicBlock &FMBB, |
907 | | unsigned NumFCycles, |
908 | | unsigned ExtraFCycles, |
909 | 2 | BranchProbability Probability) const { |
910 | 2 | return true; |
911 | 2 | } |
912 | | |
913 | | bool |
914 | | R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, |
915 | | unsigned NumCycles, |
916 | | BranchProbability Probability) |
917 | 134 | const { |
918 | 134 | return true; |
919 | 134 | } |
920 | | |
921 | | bool |
922 | | R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, |
923 | 2 | MachineBasicBlock &FMBB) const { |
924 | 2 | return false; |
925 | 2 | } |
926 | | |
927 | | bool |
928 | 312 | R600InstrInfo::reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { |
929 | 312 | MachineOperand &MO = Cond[1]; |
930 | 312 | switch (MO.getImm()) { |
931 | 61 | case AMDGPU::PRED_SETE_INT: |
932 | 61 | MO.setImm(AMDGPU::PRED_SETNE_INT); |
933 | 61 | break; |
934 | 251 | case AMDGPU::PRED_SETNE_INT: |
935 | 251 | MO.setImm(AMDGPU::PRED_SETE_INT); |
936 | 251 | break; |
937 | 0 | case AMDGPU::PRED_SETE: |
938 | 0 | MO.setImm(AMDGPU::PRED_SETNE); |
939 | 0 | break; |
940 | 0 | case AMDGPU::PRED_SETNE: |
941 | 0 | MO.setImm(AMDGPU::PRED_SETE); |
942 | 0 | break; |
943 | 0 | default: |
944 | 0 | return true; |
945 | 312 | } |
946 | 312 | |
947 | 312 | MachineOperand &MO2 = Cond[2]; |
948 | 312 | switch (MO2.getReg()) { |
949 | 0 | case AMDGPU::PRED_SEL_ZERO: |
950 | 0 | MO2.setReg(AMDGPU::PRED_SEL_ONE); |
951 | 0 | break; |
952 | 312 | case AMDGPU::PRED_SEL_ONE: |
953 | 312 | MO2.setReg(AMDGPU::PRED_SEL_ZERO); |
954 | 312 | break; |
955 | 0 | default: |
956 | 0 | return true; |
957 | 312 | } |
958 | 312 | return false; |
959 | 312 | } |
960 | | |
961 | | bool R600InstrInfo::DefinesPredicate(MachineInstr &MI, |
962 | 2.98k | std::vector<MachineOperand> &Pred) const { |
963 | 2.98k | return isPredicateSetter(MI.getOpcode()); |
964 | 2.98k | } |
965 | | |
966 | | bool R600InstrInfo::PredicateInstruction(MachineInstr &MI, |
967 | 235 | ArrayRef<MachineOperand> Pred) const { |
968 | 235 | int PIdx = MI.findFirstPredOperandIdx(); |
969 | 235 | |
970 | 235 | if (MI.getOpcode() == AMDGPU::CF_ALU235 ) { |
971 | 26 | MI.getOperand(8).setImm(0); |
972 | 26 | return true; |
973 | 26 | } |
974 | 209 | |
975 | 209 | if (209 MI.getOpcode() == AMDGPU::DOT_4209 ) { |
976 | 15 | MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_X)) |
977 | 15 | .setReg(Pred[2].getReg()); |
978 | 15 | MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Y)) |
979 | 15 | .setReg(Pred[2].getReg()); |
980 | 15 | MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Z)) |
981 | 15 | .setReg(Pred[2].getReg()); |
982 | 15 | MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_W)) |
983 | 15 | .setReg(Pred[2].getReg()); |
984 | 15 | MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); |
985 | 15 | MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); |
986 | 15 | return true; |
987 | 15 | } |
988 | 194 | |
989 | 194 | if (194 PIdx != -1194 ) { |
990 | 194 | MachineOperand &PMO = MI.getOperand(PIdx); |
991 | 194 | PMO.setReg(Pred[2].getReg()); |
992 | 194 | MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); |
993 | 194 | MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); |
994 | 194 | return true; |
995 | 194 | } |
996 | 0 |
|
997 | 0 | return false; |
998 | 0 | } |
999 | | |
1000 | 2.98k | unsigned int R600InstrInfo::getPredicationCost(const MachineInstr &) const { |
1001 | 2.98k | return 2; |
1002 | 2.98k | } |
1003 | | |
1004 | | unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, |
1005 | | const MachineInstr &, |
1006 | 247k | unsigned *PredCost) const { |
1007 | 247k | if (PredCost) |
1008 | 0 | *PredCost = 2; |
1009 | 247k | return 2; |
1010 | 247k | } |
1011 | | |
1012 | | unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex, |
1013 | 1.96k | unsigned Channel) const { |
1014 | 1.96k | assert(Channel == 0); |
1015 | 1.96k | return RegIndex; |
1016 | 1.96k | } |
1017 | | |
1018 | 6.14k | bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { |
1019 | 6.14k | switch (MI.getOpcode()) { |
1020 | 6.13k | default: { |
1021 | 6.13k | MachineBasicBlock *MBB = MI.getParent(); |
1022 | 6.13k | int OffsetOpIdx = |
1023 | 6.13k | AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::addr); |
1024 | 6.13k | // addr is a custom operand with multiple MI operands, and only the |
1025 | 6.13k | // first MI operand is given a name. |
1026 | 6.13k | int RegOpIdx = OffsetOpIdx + 1; |
1027 | 6.13k | int ChanOpIdx = |
1028 | 6.13k | AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::chan); |
1029 | 6.13k | if (isRegisterLoad(MI)6.13k ) { |
1030 | 1.00k | int DstOpIdx = |
1031 | 1.00k | AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst); |
1032 | 1.00k | unsigned RegIndex = MI.getOperand(RegOpIdx).getImm(); |
1033 | 1.00k | unsigned Channel = MI.getOperand(ChanOpIdx).getImm(); |
1034 | 1.00k | unsigned Address = calculateIndirectAddress(RegIndex, Channel); |
1035 | 1.00k | unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg(); |
1036 | 1.00k | if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR1.00k ) { |
1037 | 899 | buildMovInstr(MBB, MI, MI.getOperand(DstOpIdx).getReg(), |
1038 | 899 | getIndirectAddrRegClass()->getRegister(Address)); |
1039 | 1.00k | } else { |
1040 | 103 | buildIndirectRead(MBB, MI, MI.getOperand(DstOpIdx).getReg(), Address, |
1041 | 103 | OffsetReg); |
1042 | 103 | } |
1043 | 6.13k | } else if (5.12k isRegisterStore(MI)5.12k ) { |
1044 | 850 | int ValOpIdx = |
1045 | 850 | AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::val); |
1046 | 850 | unsigned RegIndex = MI.getOperand(RegOpIdx).getImm(); |
1047 | 850 | unsigned Channel = MI.getOperand(ChanOpIdx).getImm(); |
1048 | 850 | unsigned Address = calculateIndirectAddress(RegIndex, Channel); |
1049 | 850 | unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg(); |
1050 | 850 | if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR850 ) { |
1051 | 736 | buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address), |
1052 | 736 | MI.getOperand(ValOpIdx).getReg()); |
1053 | 850 | } else { |
1054 | 114 | buildIndirectWrite(MBB, MI, MI.getOperand(ValOpIdx).getReg(), |
1055 | 114 | calculateIndirectAddress(RegIndex, Channel), |
1056 | 114 | OffsetReg); |
1057 | 114 | } |
1058 | 5.12k | } else { |
1059 | 4.27k | return false; |
1060 | 4.27k | } |
1061 | 1.85k | |
1062 | 1.85k | MBB->erase(MI); |
1063 | 1.85k | return true; |
1064 | 1.85k | } |
1065 | 14 | case AMDGPU::R600_EXTRACT_ELT_V2: |
1066 | 14 | case AMDGPU::R600_EXTRACT_ELT_V4: |
1067 | 14 | buildIndirectRead(MI.getParent(), MI, MI.getOperand(0).getReg(), |
1068 | 14 | RI.getHWRegIndex(MI.getOperand(1).getReg()), // Address |
1069 | 14 | MI.getOperand(2).getReg(), |
1070 | 14 | RI.getHWRegChan(MI.getOperand(1).getReg())); |
1071 | 14 | break; |
1072 | 1 | case AMDGPU::R600_INSERT_ELT_V2: |
1073 | 1 | case AMDGPU::R600_INSERT_ELT_V4: |
1074 | 1 | buildIndirectWrite(MI.getParent(), MI, MI.getOperand(2).getReg(), // Value |
1075 | 1 | RI.getHWRegIndex(MI.getOperand(1).getReg()), // Address |
1076 | 1 | MI.getOperand(3).getReg(), // Offset |
1077 | 1 | RI.getHWRegChan(MI.getOperand(1).getReg())); // Channel |
1078 | 1 | break; |
1079 | 15 | } |
1080 | 15 | MI.eraseFromParent(); |
1081 | 15 | return true; |
1082 | 15 | } |
1083 | | |
1084 | | void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved, |
1085 | 4.11k | const MachineFunction &MF) const { |
1086 | 4.11k | const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>(); |
1087 | 4.11k | const R600FrameLowering *TFL = ST.getFrameLowering(); |
1088 | 4.11k | |
1089 | 4.11k | unsigned StackWidth = TFL->getStackWidth(MF); |
1090 | 4.11k | int End = getIndirectIndexEnd(MF); |
1091 | 4.11k | |
1092 | 4.11k | if (End == -1) |
1093 | 3.34k | return; |
1094 | 772 | |
1095 | 7.90k | for (int Index = getIndirectIndexBegin(MF); 772 Index <= End7.90k ; ++Index7.13k ) { |
1096 | 7.13k | unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index); |
1097 | 7.13k | Reserved.set(SuperReg); |
1098 | 14.2k | for (unsigned Chan = 0; Chan < StackWidth14.2k ; ++Chan7.13k ) { |
1099 | 7.13k | unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan); |
1100 | 7.13k | Reserved.set(Reg); |
1101 | 7.13k | } |
1102 | 7.13k | } |
1103 | 4.11k | } |
1104 | | |
1105 | 1.65k | const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const { |
1106 | 1.65k | return &AMDGPU::R600_TReg32_XRegClass; |
1107 | 1.65k | } |
1108 | | |
1109 | | MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, |
1110 | | MachineBasicBlock::iterator I, |
1111 | | unsigned ValueReg, unsigned Address, |
1112 | 114 | unsigned OffsetReg) const { |
1113 | 114 | return buildIndirectWrite(MBB, I, ValueReg, Address, OffsetReg, 0); |
1114 | 114 | } |
1115 | | |
1116 | | MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, |
1117 | | MachineBasicBlock::iterator I, |
1118 | | unsigned ValueReg, unsigned Address, |
1119 | | unsigned OffsetReg, |
1120 | 115 | unsigned AddrChan) const { |
1121 | 115 | unsigned AddrReg; |
1122 | 115 | switch (AddrChan) { |
1123 | 0 | default: 0 llvm_unreachable0 ("Invalid Channel"); |
1124 | 114 | case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break; |
1125 | 0 | case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break; |
1126 | 0 | case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break; |
1127 | 1 | case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break; |
1128 | 115 | } |
1129 | 115 | MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, |
1130 | 115 | AMDGPU::AR_X, OffsetReg); |
1131 | 115 | setImmOperand(*MOVA, AMDGPU::OpName::write, 0); |
1132 | 115 | |
1133 | 115 | MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, |
1134 | 115 | AddrReg, ValueReg) |
1135 | 115 | .addReg(AMDGPU::AR_X, |
1136 | 115 | RegState::Implicit | RegState::Kill); |
1137 | 115 | setImmOperand(*Mov, AMDGPU::OpName::dst_rel, 1); |
1138 | 115 | return Mov; |
1139 | 115 | } |
1140 | | |
1141 | | MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, |
1142 | | MachineBasicBlock::iterator I, |
1143 | | unsigned ValueReg, unsigned Address, |
1144 | 103 | unsigned OffsetReg) const { |
1145 | 103 | return buildIndirectRead(MBB, I, ValueReg, Address, OffsetReg, 0); |
1146 | 103 | } |
1147 | | |
1148 | | MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, |
1149 | | MachineBasicBlock::iterator I, |
1150 | | unsigned ValueReg, unsigned Address, |
1151 | | unsigned OffsetReg, |
1152 | 117 | unsigned AddrChan) const { |
1153 | 117 | unsigned AddrReg; |
1154 | 117 | switch (AddrChan) { |
1155 | 0 | default: 0 llvm_unreachable0 ("Invalid Channel"); |
1156 | 105 | case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break; |
1157 | 6 | case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break; |
1158 | 0 | case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break; |
1159 | 6 | case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break; |
1160 | 117 | } |
1161 | 117 | MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, |
1162 | 117 | AMDGPU::AR_X, |
1163 | 117 | OffsetReg); |
1164 | 117 | setImmOperand(*MOVA, AMDGPU::OpName::write, 0); |
1165 | 117 | MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, |
1166 | 117 | ValueReg, |
1167 | 117 | AddrReg) |
1168 | 117 | .addReg(AMDGPU::AR_X, |
1169 | 117 | RegState::Implicit | RegState::Kill); |
1170 | 117 | setImmOperand(*Mov, AMDGPU::OpName::src0_rel, 1); |
1171 | 117 | |
1172 | 117 | return Mov; |
1173 | 117 | } |
1174 | | |
1175 | 1.54k | int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const { |
1176 | 1.54k | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
1177 | 1.54k | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
1178 | 1.54k | int Offset = -1; |
1179 | 1.54k | |
1180 | 1.54k | if (MFI.getNumObjects() == 01.54k ) { |
1181 | 0 | return -1; |
1182 | 0 | } |
1183 | 1.54k | |
1184 | 1.54k | if (1.54k MRI.livein_empty()1.54k ) { |
1185 | 1.52k | return 0; |
1186 | 1.52k | } |
1187 | 24 | |
1188 | 24 | const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass(); |
1189 | 24 | for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), |
1190 | 24 | LE = MRI.livein_end(); |
1191 | 48 | LI != LE48 ; ++LI24 ) { |
1192 | 24 | unsigned Reg = LI->first; |
1193 | 24 | if (TargetRegisterInfo::isVirtualRegister(Reg) || |
1194 | 24 | !IndirectRC->contains(Reg)) |
1195 | 0 | continue; |
1196 | 24 | |
1197 | 24 | unsigned RegIndex; |
1198 | 24 | unsigned RegEnd; |
1199 | 24 | for (RegIndex = 0, RegEnd = IndirectRC->getNumRegs(); RegIndex != RegEnd; |
1200 | 24 | ++RegIndex0 ) { |
1201 | 24 | if (IndirectRC->getRegister(RegIndex) == Reg) |
1202 | 24 | break; |
1203 | 24 | } |
1204 | 24 | Offset = std::max(Offset, (int)RegIndex); |
1205 | 24 | } |
1206 | 1.54k | |
1207 | 1.54k | return Offset + 1; |
1208 | 1.54k | } |
1209 | | |
1210 | 4.11k | int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const { |
1211 | 4.11k | int Offset = 0; |
1212 | 4.11k | const MachineFrameInfo &MFI = MF.getFrameInfo(); |
1213 | 4.11k | |
1214 | 4.11k | // Variable sized objects are not supported |
1215 | 4.11k | if (MFI.hasVarSizedObjects()4.11k ) { |
1216 | 2 | return -1; |
1217 | 2 | } |
1218 | 4.11k | |
1219 | 4.11k | if (4.11k MFI.getNumObjects() == 04.11k ) { |
1220 | 3.34k | return -1; |
1221 | 3.34k | } |
1222 | 772 | |
1223 | 772 | const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>(); |
1224 | 772 | const R600FrameLowering *TFL = ST.getFrameLowering(); |
1225 | 772 | |
1226 | 772 | unsigned IgnoredFrameReg; |
1227 | 772 | Offset = TFL->getFrameIndexReference(MF, -1, IgnoredFrameReg); |
1228 | 772 | |
1229 | 772 | return getIndirectIndexBegin(MF) + Offset; |
1230 | 772 | } |
1231 | | |
1232 | 50.3k | unsigned R600InstrInfo::getMaxAlusPerClause() const { |
1233 | 50.3k | return 115; |
1234 | 50.3k | } |
1235 | | |
1236 | | MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB, |
1237 | | MachineBasicBlock::iterator I, |
1238 | | unsigned Opcode, |
1239 | | unsigned DstReg, |
1240 | | unsigned Src0Reg, |
1241 | 8.45k | unsigned Src1Reg) const { |
1242 | 8.45k | MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode), |
1243 | 8.45k | DstReg); // $dst |
1244 | 8.45k | |
1245 | 8.45k | if (Src1Reg8.45k ) { |
1246 | 256 | MIB.addImm(0) // $update_exec_mask |
1247 | 256 | .addImm(0); // $update_predicate |
1248 | 256 | } |
1249 | 8.45k | MIB.addImm(1) // $write |
1250 | 8.45k | .addImm(0) // $omod |
1251 | 8.45k | .addImm(0) // $dst_rel |
1252 | 8.45k | .addImm(0) // $dst_clamp |
1253 | 8.45k | .addReg(Src0Reg) // $src0 |
1254 | 8.45k | .addImm(0) // $src0_neg |
1255 | 8.45k | .addImm(0) // $src0_rel |
1256 | 8.45k | .addImm(0) // $src0_abs |
1257 | 8.45k | .addImm(-1); // $src0_sel |
1258 | 8.45k | |
1259 | 8.45k | if (Src1Reg8.45k ) { |
1260 | 256 | MIB.addReg(Src1Reg) // $src1 |
1261 | 256 | .addImm(0) // $src1_neg |
1262 | 256 | .addImm(0) // $src1_rel |
1263 | 256 | .addImm(0) // $src1_abs |
1264 | 256 | .addImm(-1); // $src1_sel |
1265 | 256 | } |
1266 | 8.45k | |
1267 | 8.45k | //XXX: The r600g finalizer expects this to be 1, once we've moved the |
1268 | 8.45k | //scheduling to the backend, we can change the default to 0. |
1269 | 8.45k | MIB.addImm(1) // $last |
1270 | 8.45k | .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel |
1271 | 8.45k | .addImm(0) // $literal |
1272 | 8.45k | .addImm(0); // $bank_swizzle |
1273 | 8.45k | |
1274 | 8.45k | return MIB; |
1275 | 8.45k | } |
1276 | | |
1277 | | #define OPERAND_CASE(Label) \ |
1278 | 2.17k | case Label: { \ |
1279 | 2.17k | static const unsigned Ops[] = \ |
1280 | 2.17k | { \ |
1281 | 2.17k | Label##_X, \ |
1282 | 2.17k | Label##_Y, \ |
1283 | 2.17k | Label##_Z, \ |
1284 | 2.17k | Label##_W \ |
1285 | 2.17k | }; \ |
1286 | 2.17k | return Ops[Slot]; \ |
1287 | 2.17k | } |
1288 | | |
1289 | 2.17k | static unsigned getSlotedOps(unsigned Op, unsigned Slot) { |
1290 | 2.17k | switch (Op) { |
1291 | 128 | OPERAND_CASE128 (AMDGPU::OpName::update_exec_mask) |
1292 | 128 | OPERAND_CASE128 (AMDGPU::OpName::update_pred) |
1293 | 128 | OPERAND_CASE128 (AMDGPU::OpName::write) |
1294 | 128 | OPERAND_CASE128 (AMDGPU::OpName::omod) |
1295 | 128 | OPERAND_CASE128 (AMDGPU::OpName::dst_rel) |
1296 | 128 | OPERAND_CASE128 (AMDGPU::OpName::clamp) |
1297 | 128 | OPERAND_CASE128 (AMDGPU::OpName::src0) |
1298 | 128 | OPERAND_CASE128 (AMDGPU::OpName::src0_neg) |
1299 | 128 | OPERAND_CASE128 (AMDGPU::OpName::src0_rel) |
1300 | 128 | OPERAND_CASE128 (AMDGPU::OpName::src0_abs) |
1301 | 128 | OPERAND_CASE128 (AMDGPU::OpName::src0_sel) |
1302 | 128 | OPERAND_CASE128 (AMDGPU::OpName::src1) |
1303 | 128 | OPERAND_CASE128 (AMDGPU::OpName::src1_neg) |
1304 | 128 | OPERAND_CASE128 (AMDGPU::OpName::src1_rel) |
1305 | 128 | OPERAND_CASE128 (AMDGPU::OpName::src1_abs) |
1306 | 128 | OPERAND_CASE128 (AMDGPU::OpName::src1_sel) |
1307 | 128 | OPERAND_CASE128 (AMDGPU::OpName::pred_sel) |
1308 | 0 | default: |
1309 | 0 | llvm_unreachable("Wrong Operand"); |
1310 | 0 | } |
1311 | 0 | } |
1312 | | |
1313 | | #undef OPERAND_CASE |
1314 | | |
1315 | | MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction( |
1316 | | MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg) |
1317 | 128 | const { |
1318 | 128 | assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented"); |
1319 | 128 | unsigned Opcode; |
1320 | 128 | if (ST.getGeneration() <= R600Subtarget::R700) |
1321 | 12 | Opcode = AMDGPU::DOT4_r600; |
1322 | 128 | else |
1323 | 116 | Opcode = AMDGPU::DOT4_eg; |
1324 | 128 | MachineBasicBlock::iterator I = MI; |
1325 | 128 | MachineOperand &Src0 = MI->getOperand( |
1326 | 128 | getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot))); |
1327 | 128 | MachineOperand &Src1 = MI->getOperand( |
1328 | 128 | getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot))); |
1329 | 128 | MachineInstr *MIB = buildDefaultInstruction( |
1330 | 128 | MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg()); |
1331 | 128 | static const unsigned Operands[14] = { |
1332 | 128 | AMDGPU::OpName::update_exec_mask, |
1333 | 128 | AMDGPU::OpName::update_pred, |
1334 | 128 | AMDGPU::OpName::write, |
1335 | 128 | AMDGPU::OpName::omod, |
1336 | 128 | AMDGPU::OpName::dst_rel, |
1337 | 128 | AMDGPU::OpName::clamp, |
1338 | 128 | AMDGPU::OpName::src0_neg, |
1339 | 128 | AMDGPU::OpName::src0_rel, |
1340 | 128 | AMDGPU::OpName::src0_abs, |
1341 | 128 | AMDGPU::OpName::src0_sel, |
1342 | 128 | AMDGPU::OpName::src1_neg, |
1343 | 128 | AMDGPU::OpName::src1_rel, |
1344 | 128 | AMDGPU::OpName::src1_abs, |
1345 | 128 | AMDGPU::OpName::src1_sel, |
1346 | 128 | }; |
1347 | 128 | |
1348 | 128 | MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(), |
1349 | 128 | getSlotedOps(AMDGPU::OpName::pred_sel, Slot))); |
1350 | 128 | MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel)) |
1351 | 128 | .setReg(MO.getReg()); |
1352 | 128 | |
1353 | 1.92k | for (unsigned i = 0; i < 141.92k ; i++1.79k ) { |
1354 | 1.79k | MachineOperand &MO = MI->getOperand( |
1355 | 1.79k | getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot))); |
1356 | 1.79k | assert (MO.isImm()); |
1357 | 1.79k | setImmOperand(*MIB, Operands[i], MO.getImm()); |
1358 | 1.79k | } |
1359 | 128 | MIB->getOperand(20).setImm(0); |
1360 | 128 | return MIB; |
1361 | 128 | } |
1362 | | |
1363 | | MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB, |
1364 | | MachineBasicBlock::iterator I, |
1365 | | unsigned DstReg, |
1366 | 469 | uint64_t Imm) const { |
1367 | 469 | MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg, |
1368 | 469 | AMDGPU::ALU_LITERAL_X); |
1369 | 469 | setImmOperand(*MovImm, AMDGPU::OpName::literal, Imm); |
1370 | 469 | return MovImm; |
1371 | 469 | } |
1372 | | |
1373 | | MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB, |
1374 | | MachineBasicBlock::iterator I, |
1375 | 2.44k | unsigned DstReg, unsigned SrcReg) const { |
1376 | 2.44k | return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg); |
1377 | 2.44k | } |
1378 | | |
1379 | 10.6k | int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const { |
1380 | 10.6k | return getOperandIdx(MI.getOpcode(), Op); |
1381 | 10.6k | } |
1382 | | |
1383 | 4.07M | int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const { |
1384 | 4.07M | return AMDGPU::getNamedOperandIdx(Opcode, Op); |
1385 | 4.07M | } |
1386 | | |
1387 | | void R600InstrInfo::setImmOperand(MachineInstr &MI, unsigned Op, |
1388 | 6.14k | int64_t Imm) const { |
1389 | 6.14k | int Idx = getOperandIdx(MI, Op); |
1390 | 6.14k | assert(Idx != -1 && "Operand not supported for this instruction."); |
1391 | 6.14k | assert(MI.getOperand(Idx).isImm()); |
1392 | 6.14k | MI.getOperand(Idx).setImm(Imm); |
1393 | 6.14k | } |
1394 | | |
1395 | | //===----------------------------------------------------------------------===// |
1396 | | // Instruction flag getters/setters |
1397 | | //===----------------------------------------------------------------------===// |
1398 | | |
1399 | | MachineOperand &R600InstrInfo::getFlagOp(MachineInstr &MI, unsigned SrcIdx, |
1400 | 1.63k | unsigned Flag) const { |
1401 | 1.63k | unsigned TargetFlags = get(MI.getOpcode()).TSFlags; |
1402 | 1.63k | int FlagIndex = 0; |
1403 | 1.63k | if (Flag != 01.63k ) { |
1404 | 1.04k | // If we pass something other than the default value of Flag to this |
1405 | 1.04k | // function, it means we are want to set a flag on an instruction |
1406 | 1.04k | // that uses native encoding. |
1407 | 1.04k | assert(HAS_NATIVE_OPERANDS(TargetFlags)); |
1408 | 1.04k | bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3; |
1409 | 1.04k | switch (Flag) { |
1410 | 0 | case 0 MO_FLAG_CLAMP0 : |
1411 | 0 | FlagIndex = getOperandIdx(MI, AMDGPU::OpName::clamp); |
1412 | 0 | break; |
1413 | 582 | case 582 MO_FLAG_MASK582 : |
1414 | 582 | FlagIndex = getOperandIdx(MI, AMDGPU::OpName::write); |
1415 | 582 | break; |
1416 | 426 | case 426 MO_FLAG_NOT_LAST426 : |
1417 | 426 | case 426 MO_FLAG_LAST426 : |
1418 | 426 | FlagIndex = getOperandIdx(MI, AMDGPU::OpName::last); |
1419 | 426 | break; |
1420 | 20 | case 20 MO_FLAG_NEG20 : |
1421 | 20 | switch (SrcIdx) { |
1422 | 20 | case 0: |
1423 | 20 | FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_neg); |
1424 | 20 | break; |
1425 | 0 | case 1: |
1426 | 0 | FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_neg); |
1427 | 0 | break; |
1428 | 0 | case 2: |
1429 | 0 | FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src2_neg); |
1430 | 0 | break; |
1431 | 20 | } |
1432 | 20 | break; |
1433 | 20 | |
1434 | 20 | case 20 MO_FLAG_ABS20 : |
1435 | 20 | assert(!IsOP3 && "Cannot set absolute value modifier for OP3 " |
1436 | 20 | "instructions."); |
1437 | 20 | (void)IsOP3; |
1438 | 20 | switch (SrcIdx) { |
1439 | 20 | case 0: |
1440 | 20 | FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_abs); |
1441 | 20 | break; |
1442 | 0 | case 1: |
1443 | 0 | FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_abs); |
1444 | 0 | break; |
1445 | 20 | } |
1446 | 20 | break; |
1447 | 20 | |
1448 | 0 | default: |
1449 | 0 | FlagIndex = -1; |
1450 | 0 | break; |
1451 | 1.04k | } |
1452 | 1.04k | assert(FlagIndex != -1 && "Flag not supported for this instruction"); |
1453 | 1.63k | } else { |
1454 | 590 | FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags); |
1455 | 590 | assert(FlagIndex != 0 && |
1456 | 590 | "Instruction flags not supported for this instruction"); |
1457 | 590 | } |
1458 | 1.63k | |
1459 | 1.63k | MachineOperand &FlagOp = MI.getOperand(FlagIndex); |
1460 | 1.63k | assert(FlagOp.isImm()); |
1461 | 1.63k | return FlagOp; |
1462 | 1.63k | } |
1463 | | |
1464 | | void R600InstrInfo::addFlag(MachineInstr &MI, unsigned Operand, |
1465 | 826 | unsigned Flag) const { |
1466 | 826 | unsigned TargetFlags = get(MI.getOpcode()).TSFlags; |
1467 | 826 | if (Flag == 0826 ) { |
1468 | 0 | return; |
1469 | 0 | } |
1470 | 826 | if (826 HAS_NATIVE_OPERANDS826 (TargetFlags)) { |
1471 | 544 | MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); |
1472 | 544 | if (Flag == 544 MO_FLAG_NOT_LAST544 ) { |
1473 | 213 | clearFlag(MI, Operand, MO_FLAG_LAST); |
1474 | 544 | } else if (331 Flag == 331 MO_FLAG_MASK331 ) { |
1475 | 291 | clearFlag(MI, Operand, Flag); |
1476 | 331 | } else { |
1477 | 40 | FlagOp.setImm(1); |
1478 | 40 | } |
1479 | 826 | } else { |
1480 | 282 | MachineOperand &FlagOp = getFlagOp(MI, Operand); |
1481 | 282 | FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand))); |
1482 | 282 | } |
1483 | 826 | } |
1484 | | |
1485 | | void R600InstrInfo::clearFlag(MachineInstr &MI, unsigned Operand, |
1486 | 728 | unsigned Flag) const { |
1487 | 728 | unsigned TargetFlags = get(MI.getOpcode()).TSFlags; |
1488 | 728 | if (HAS_NATIVE_OPERANDS728 (TargetFlags)) { |
1489 | 504 | MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); |
1490 | 504 | FlagOp.setImm(0); |
1491 | 728 | } else { |
1492 | 224 | MachineOperand &FlagOp = getFlagOp(MI); |
1493 | 224 | unsigned InstFlags = FlagOp.getImm(); |
1494 | 224 | InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand)); |
1495 | 224 | FlagOp.setImm(InstFlags); |
1496 | 224 | } |
1497 | 728 | } |