Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
/// \file
11
/// \brief R600 Implementation of TargetInstrInfo.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "R600InstrInfo.h"
16
#include "AMDGPU.h"
17
#include "AMDGPUInstrInfo.h"
18
#include "AMDGPUSubtarget.h"
19
#include "R600Defines.h"
20
#include "R600FrameLowering.h"
21
#include "R600RegisterInfo.h"
22
#include "Utils/AMDGPUBaseInfo.h"
23
#include "llvm/ADT/BitVector.h"
24
#include "llvm/ADT/SmallSet.h"
25
#include "llvm/ADT/SmallVector.h"
26
#include "llvm/CodeGen/MachineBasicBlock.h"
27
#include "llvm/CodeGen/MachineFrameInfo.h"
28
#include "llvm/CodeGen/MachineFunction.h"
29
#include "llvm/CodeGen/MachineInstr.h"
30
#include "llvm/CodeGen/MachineInstrBuilder.h"
31
#include "llvm/CodeGen/MachineOperand.h"
32
#include "llvm/CodeGen/MachineRegisterInfo.h"
33
#include "llvm/Support/ErrorHandling.h"
34
#include "llvm/Target/TargetRegisterInfo.h"
35
#include "llvm/Target/TargetSubtargetInfo.h"
36
#include <algorithm>
37
#include <cassert>
38
#include <cstdint>
39
#include <cstring>
40
#include <iterator>
41
#include <utility>
42
#include <vector>
43
44
using namespace llvm;
45
46
#define GET_INSTRINFO_CTOR_DTOR
47
#include "AMDGPUGenDFAPacketizer.inc"
48
49
R600InstrInfo::R600InstrInfo(const R600Subtarget &ST)
50
253
  : AMDGPUInstrInfo(ST), RI(), ST(ST) {}
51
52
270k
bool R600InstrInfo::isVector(const MachineInstr &MI) const {
53
270k
  return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
54
270k
}
55
56
void R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
57
                                MachineBasicBlock::iterator MI,
58
                                const DebugLoc &DL, unsigned DestReg,
59
1.98k
                                unsigned SrcReg, bool KillSrc) const {
60
1.98k
  unsigned VectorComponents = 0;
61
1.98k
  if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) ||
62
1.98k
      AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) &&
63
0
      (AMDGPU::R600_Reg128RegClass.contains(SrcReg) ||
64
1.98k
       
AMDGPU::R600_Reg128VerticalRegClass.contains(SrcReg)0
)) {
65
0
    VectorComponents = 4;
66
1.98k
  } else 
if(1.98k
(AMDGPU::R600_Reg64RegClass.contains(DestReg) ||
67
1.98k
            AMDGPU::R600_Reg64VerticalRegClass.contains(DestReg)) &&
68
8
            (AMDGPU::R600_Reg64RegClass.contains(SrcReg) ||
69
1.98k
             
AMDGPU::R600_Reg64VerticalRegClass.contains(SrcReg)0
)) {
70
8
    VectorComponents = 2;
71
8
  }
72
1.98k
73
1.98k
  if (
VectorComponents > 01.98k
) {
74
24
    for (unsigned I = 0; 
I < VectorComponents24
;
I++16
) {
75
16
      unsigned SubRegIndex = RI.getSubRegFromChannel(I);
76
16
      buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
77
16
                              RI.getSubReg(DestReg, SubRegIndex),
78
16
                              RI.getSubReg(SrcReg, SubRegIndex))
79
16
                              .addReg(DestReg,
80
16
                                      RegState::Define | RegState::Implicit);
81
16
    }
82
1.98k
  } else {
83
1.97k
    MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
84
1.97k
                                                  DestReg, SrcReg);
85
1.97k
    NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0))
86
1.97k
                                    .setIsKill(KillSrc);
87
1.97k
  }
88
1.98k
}
89
90
/// \returns true if \p MBBI can be moved into a new basic.
91
bool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
92
0
                                       MachineBasicBlock::iterator MBBI) const {
93
0
  for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(),
94
0
                                        E = MBBI->operands_end(); 
I != E0
;
++I0
) {
95
0
    if (
I->isReg() && 0
!TargetRegisterInfo::isVirtualRegister(I->getReg())0
&&
96
0
        
I->isUse()0
&&
RI.isPhysRegLiveAcrossClauses(I->getReg())0
)
97
0
      return false;
98
0
  }
99
0
  return true;
100
0
}
101
102
2.19k
bool R600InstrInfo::isMov(unsigned Opcode) const {
103
2.19k
  switch(Opcode) {
104
2.13k
  default:
105
2.13k
    return false;
106
64
  case AMDGPU::MOV:
107
64
  case AMDGPU::MOV_IMM_F32:
108
64
  case AMDGPU::MOV_IMM_I32:
109
64
    return true;
110
0
  }
111
0
}
112
113
190k
bool R600InstrInfo::isReductionOp(unsigned Opcode) const {
114
190k
  return false;
115
190k
}
116
117
210k
bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
118
210k
  switch(Opcode) {
119
210k
    default: return false;
120
9
    case AMDGPU::CUBE_r600_pseudo:
121
9
    case AMDGPU::CUBE_r600_real:
122
9
    case AMDGPU::CUBE_eg_pseudo:
123
9
    case AMDGPU::CUBE_eg_real:
124
9
      return true;
125
0
  }
126
0
}
127
128
517k
bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
129
517k
  unsigned TargetFlags = get(Opcode).TSFlags;
130
517k
131
517k
  return (TargetFlags & R600_InstFlag::ALU_INST);
132
517k
}
133
134
151k
bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const {
135
151k
  unsigned TargetFlags = get(Opcode).TSFlags;
136
151k
137
151k
  return ((TargetFlags & R600_InstFlag::OP1) |
138
151k
          (TargetFlags & R600_InstFlag::OP2) |
139
151k
          (TargetFlags & R600_InstFlag::OP3));
140
151k
}
141
142
239k
bool R600InstrInfo::isLDSInstr(unsigned Opcode) const {
143
239k
  unsigned TargetFlags = get(Opcode).TSFlags;
144
239k
145
239k
  return ((TargetFlags & R600_InstFlag::LDS_1A) |
146
239k
          (TargetFlags & R600_InstFlag::LDS_1A1D) |
147
239k
          (TargetFlags & R600_InstFlag::LDS_1A2D));
148
239k
}
149
150
105k
bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const {
151
7.45k
  return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1;
152
105k
}
153
154
57.5k
bool R600InstrInfo::canBeConsideredALU(const MachineInstr &MI) const {
155
57.5k
  if (isALUInstr(MI.getOpcode()))
156
46.7k
    return true;
157
10.7k
  
if (10.7k
isVector(MI) || 10.7k
isCubeOp(MI.getOpcode())10.7k
)
158
2
    return true;
159
10.7k
  switch (MI.getOpcode()) {
160
116
  case AMDGPU::PRED_X:
161
116
  case AMDGPU::INTERP_PAIR_XY:
162
116
  case AMDGPU::INTERP_PAIR_ZW:
163
116
  case AMDGPU::INTERP_VEC_LOAD:
164
116
  case AMDGPU::COPY:
165
116
  case AMDGPU::DOT_4:
166
116
    return true;
167
10.6k
  default:
168
10.6k
    return false;
169
0
  }
170
0
}
171
172
210k
bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
173
210k
  if (ST.hasCaymanISA())
174
21.9k
    return false;
175
188k
  return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU);
176
188k
}
177
178
210k
bool R600InstrInfo::isTransOnly(const MachineInstr &MI) const {
179
210k
  return isTransOnly(MI.getOpcode());
180
210k
}
181
182
21.6k
bool R600InstrInfo::isVectorOnly(unsigned Opcode) const {
183
21.6k
  return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU);
184
21.6k
}
185
186
21.6k
bool R600InstrInfo::isVectorOnly(const MachineInstr &MI) const {
187
21.6k
  return isVectorOnly(MI.getOpcode());
188
21.6k
}
189
190
3.13k
bool R600InstrInfo::isExport(unsigned Opcode) const {
191
3.13k
  return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT);
192
3.13k
}
193
194
63.0k
bool R600InstrInfo::usesVertexCache(unsigned Opcode) const {
195
55.3k
  return ST.hasVertexCache() && IS_VTX(get(Opcode));
196
63.0k
}
197
198
8.58k
bool R600InstrInfo::usesVertexCache(const MachineInstr &MI) const {
199
8.58k
  const MachineFunction *MF = MI.getParent()->getParent();
200
8.58k
  return !AMDGPU::isCompute(MF->getFunction()->getCallingConv()) &&
201
272
         usesVertexCache(MI.getOpcode());
202
8.58k
}
203
204
61.0k
bool R600InstrInfo::usesTextureCache(unsigned Opcode) const {
205
61.0k
  return (!ST.hasVertexCache() && 
IS_VTX8.08k
(get(Opcode))) ||
IS_TEX60.0k
(get(Opcode));
206
61.0k
}
207
208
13.7k
bool R600InstrInfo::usesTextureCache(const MachineInstr &MI) const {
209
13.7k
  const MachineFunction *MF = MI.getParent()->getParent();
210
13.7k
  return (AMDGPU::isCompute(MF->getFunction()->getCallingConv()) &&
211
13.1k
          usesVertexCache(MI.getOpcode())) ||
212
10.8k
          usesTextureCache(MI.getOpcode());
213
13.7k
}
214
215
97.2k
bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
216
97.2k
  switch (Opcode) {
217
8
  case AMDGPU::KILLGT:
218
8
  case AMDGPU::GROUP_BARRIER:
219
8
    return true;
220
97.2k
  default:
221
97.2k
    return false;
222
0
  }
223
0
}
224
225
93.7k
bool R600InstrInfo::usesAddressRegister(MachineInstr &MI) const {
226
93.7k
  return MI.findRegisterUseOperandIdx(AMDGPU::AR_X) != -1;
227
93.7k
}
228
229
93.5k
bool R600InstrInfo::definesAddressRegister(MachineInstr &MI) const {
230
93.5k
  return MI.findRegisterDefOperandIdx(AMDGPU::AR_X) != -1;
231
93.5k
}
232
233
36.3k
bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const {
234
36.3k
  if (
!isALUInstr(MI.getOpcode())36.3k
) {
235
271
    return false;
236
271
  }
237
36.0k
  for (MachineInstr::const_mop_iterator I = MI.operands_begin(),
238
36.0k
                                        E = MI.operands_end();
239
766k
       
I != E766k
;
++I730k
) {
240
730k
    if (
!I->isReg() || 730k
!I->isUse()147k
||
241
111k
        TargetRegisterInfo::isVirtualRegister(I->getReg()))
242
653k
      continue;
243
77.1k
244
77.1k
    
if (77.1k
AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg())77.1k
)
245
0
      return true;
246
730k
  }
247
36.0k
  return false;
248
36.3k
}
249
250
313k
int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
251
313k
  static const unsigned SrcSelTable[][2] = {
252
313k
    {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
253
313k
    {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
254
313k
    {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
255
313k
    {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
256
313k
    {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
257
313k
    {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
258
313k
    {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
259
313k
    {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
260
313k
    {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
261
313k
    {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
262
313k
    {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
263
313k
  };
264
313k
265
618k
  for (const auto &Row : SrcSelTable) {
266
618k
    if (
getOperandIdx(Opcode, Row[0]) == (int)SrcIdx618k
) {
267
313k
      return getOperandIdx(Opcode, Row[1]);
268
313k
    }
269
0
  }
270
0
  return -1;
271
0
}
272
273
SmallVector<std::pair<MachineOperand *, int64_t>, 3>
274
325k
R600InstrInfo::getSrcs(MachineInstr &MI) const {
275
325k
  SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
276
325k
277
325k
  if (
MI.getOpcode() == AMDGPU::DOT_4325k
) {
278
32
    static const unsigned OpTable[8][2] = {
279
32
      {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
280
32
      {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
281
32
      {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
282
32
      {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
283
32
      {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
284
32
      {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
285
32
      {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
286
32
      {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W},
287
32
    };
288
32
289
288
    for (unsigned j = 0; 
j < 8288
;
j++256
) {
290
256
      MachineOperand &MO =
291
256
          MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][0]));
292
256
      unsigned Reg = MO.getReg();
293
256
      if (
Reg == AMDGPU::ALU_CONST256
) {
294
21
        MachineOperand &Sel =
295
21
            MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1]));
296
21
        Result.push_back(std::make_pair(&MO, Sel.getImm()));
297
21
        continue;
298
21
      }
299
256
300
256
    }
301
32
    return Result;
302
32
  }
303
325k
304
325k
  static const unsigned OpTable[3][2] = {
305
325k
    {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
306
325k
    {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
307
325k
    {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
308
325k
  };
309
325k
310
985k
  for (unsigned j = 0; 
j < 3985k
;
j++660k
) {
311
926k
    int SrcIdx = getOperandIdx(MI.getOpcode(), OpTable[j][0]);
312
926k
    if (SrcIdx < 0)
313
266k
      break;
314
660k
    MachineOperand &MO = MI.getOperand(SrcIdx);
315
660k
    unsigned Reg = MO.getReg();
316
660k
    if (
Reg == AMDGPU::ALU_CONST660k
) {
317
27.0k
      MachineOperand &Sel =
318
27.0k
          MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1]));
319
27.0k
      Result.push_back(std::make_pair(&MO, Sel.getImm()));
320
27.0k
      continue;
321
27.0k
    }
322
633k
    
if (633k
Reg == AMDGPU::ALU_LITERAL_X633k
) {
323
139k
      MachineOperand &Operand =
324
139k
          MI.getOperand(getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal));
325
139k
      if (
Operand.isImm()139k
) {
326
139k
        Result.push_back(std::make_pair(&MO, Operand.getImm()));
327
139k
        continue;
328
139k
      }
329
139k
      assert(Operand.isGlobal());
330
75
    }
331
493k
    Result.push_back(std::make_pair(&MO, 0));
332
493k
  }
333
325k
  return Result;
334
325k
}
335
336
std::vector<std::pair<int, unsigned>>
337
R600InstrInfo::ExtractSrcs(MachineInstr &MI,
338
                           const DenseMap<unsigned, unsigned> &PV,
339
81.6k
                           unsigned &ConstCount) const {
340
81.6k
  ConstCount = 0;
341
81.6k
  const std::pair<int, unsigned> DummyPair(-1, 0);
342
81.6k
  std::vector<std::pair<int, unsigned>> Result;
343
81.6k
  unsigned i = 0;
344
167k
  for (const auto &Src : getSrcs(MI)) {
345
167k
    ++i;
346
167k
    unsigned Reg = Src.first->getReg();
347
167k
    int Index = RI.getEncodingValue(Reg) & 0xff;
348
167k
    if (
Reg == AMDGPU::OQAP167k
) {
349
1.52k
      Result.push_back(std::make_pair(Index, 0U));
350
1.52k
    }
351
167k
    if (
PV.find(Reg) != PV.end()167k
) {
352
22.3k
      // 255 is used to tells its a PS/PV reg
353
22.3k
      Result.push_back(std::make_pair(255, 0U));
354
22.3k
      continue;
355
22.3k
    }
356
144k
    
if (144k
Index > 127144k
) {
357
93.0k
      ConstCount++;
358
93.0k
      Result.push_back(DummyPair);
359
93.0k
      continue;
360
93.0k
    }
361
51.5k
    unsigned Chan = RI.getHWRegChan(Reg);
362
51.5k
    Result.push_back(std::make_pair(Index, Chan));
363
51.5k
  }
364
159k
  for (; 
i < 3159k
;
++i77.9k
)
365
77.9k
    Result.push_back(DummyPair);
366
81.6k
  return Result;
367
81.6k
}
368
369
static std::vector<std::pair<int, unsigned>>
370
Swizzle(std::vector<std::pair<int, unsigned>> Src,
371
1.91M
        R600InstrInfo::BankSwizzle Swz) {
372
1.91M
  if (Src[0] == Src[1])
373
319k
    Src[1].first = -1;
374
1.91M
  switch (Swz) {
375
394k
  case R600InstrInfo::ALU_VEC_012_SCL_210:
376
394k
    break;
377
308k
  case R600InstrInfo::ALU_VEC_021_SCL_122:
378
308k
    std::swap(Src[1], Src[2]);
379
308k
    break;
380
303k
  case R600InstrInfo::ALU_VEC_102_SCL_221:
381
303k
    std::swap(Src[0], Src[1]);
382
303k
    break;
383
306k
  case R600InstrInfo::ALU_VEC_120_SCL_212:
384
306k
    std::swap(Src[0], Src[1]);
385
306k
    std::swap(Src[0], Src[2]);
386
306k
    break;
387
302k
  case R600InstrInfo::ALU_VEC_201:
388
302k
    std::swap(Src[0], Src[2]);
389
302k
    std::swap(Src[0], Src[1]);
390
302k
    break;
391
298k
  case R600InstrInfo::ALU_VEC_210:
392
298k
    std::swap(Src[0], Src[2]);
393
298k
    break;
394
1.91M
  }
395
1.91M
  return Src;
396
1.91M
}
397
398
263k
static unsigned getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
399
263k
  switch (Swz) {
400
125k
  case R600InstrInfo::ALU_VEC_012_SCL_210: {
401
125k
    unsigned Cycles[3] = { 2, 1, 0};
402
125k
    return Cycles[Op];
403
263k
  }
404
45.7k
  case R600InstrInfo::ALU_VEC_021_SCL_122: {
405
45.7k
    unsigned Cycles[3] = { 1, 2, 2};
406
45.7k
    return Cycles[Op];
407
263k
  }
408
45.9k
  case R600InstrInfo::ALU_VEC_120_SCL_212: {
409
45.9k
    unsigned Cycles[3] = { 2, 1, 2};
410
45.9k
    return Cycles[Op];
411
263k
  }
412
46.2k
  case R600InstrInfo::ALU_VEC_102_SCL_221: {
413
46.2k
    unsigned Cycles[3] = { 2, 2, 1};
414
46.2k
    return Cycles[Op];
415
263k
  }
416
0
  default:
417
0
    llvm_unreachable("Wrong Swizzle for Trans Slot");
418
0
  }
419
0
}
420
421
/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed
422
/// in the same Instruction Group while meeting read port limitations given a
423
/// Swz swizzle sequence.
424
unsigned  R600InstrInfo::isLegalUpTo(
425
    const std::vector<std::vector<std::pair<int, unsigned>>> &IGSrcs,
426
    const std::vector<R600InstrInfo::BankSwizzle> &Swz,
427
    const std::vector<std::pair<int, unsigned>> &TransSrcs,
428
523k
    R600InstrInfo::BankSwizzle TransSwz) const {
429
523k
  int Vector[4][3];
430
523k
  memset(Vector, -1, sizeof(Vector));
431
2.09M
  for (unsigned i = 0, e = IGSrcs.size(); 
i < e2.09M
;
i++1.57M
) {
432
1.91M
    const std::vector<std::pair<int, unsigned>> &Srcs =
433
1.91M
        Swizzle(IGSrcs[i], Swz[i]);
434
6.92M
    for (unsigned j = 0; 
j < 36.92M
;
j++5.00M
) {
435
5.35M
      const std::pair<int, unsigned> &Src = Srcs[j];
436
5.35M
      if (
Src.first < 0 || 5.35M
Src.first == 2552.53M
)
437
2.83M
        continue;
438
2.51M
      
if (2.51M
Src.first == 2.51M
GET_REG_INDEX2.51M
(RI.getEncodingValue(AMDGPU::OQAP))) {
439
1.53k
        if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&
440
1.53k
            
Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_1220
) {
441
0
            // The value from output queue A (denoted by register OQAP) can
442
0
            // only be fetched during the first cycle.
443
0
            return false;
444
0
        }
445
1.53k
        // OQAP does not count towards the normal read port restrictions
446
1.53k
        continue;
447
1.53k
      }
448
2.51M
      
if (2.51M
Vector[Src.second][j] < 02.51M
)
449
2.10M
        Vector[Src.second][j] = Src.first;
450
2.51M
      if (Vector[Src.second][j] != Src.first)
451
344k
        return i;
452
5.35M
    }
453
1.91M
  }
454
523k
  // Now check Trans Alu
455
274k
  
for (unsigned i = 0, e = TransSrcs.size(); 179k
i < e274k
;
++i95.4k
) {
456
233k
    const std::pair<int, unsigned> &Src = TransSrcs[i];
457
233k
    unsigned Cycle = getTransSwizzle(TransSwz, i);
458
233k
    if (Src.first < 0)
459
17.6k
      continue;
460
216k
    
if (216k
Src.first == 255216k
)
461
26.2k
      continue;
462
189k
    
if (189k
Vector[Src.second][Cycle] < 0189k
)
463
48.4k
      Vector[Src.second][Cycle] = Src.first;
464
189k
    if (Vector[Src.second][Cycle] != Src.first)
465
138k
      return IGSrcs.size() - 1;
466
233k
  }
467
40.9k
  return IGSrcs.size();
468
523k
}
469
470
/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next
471
/// (in lexicographic term) swizzle sequence assuming that all swizzles after
472
/// Idx can be skipped
473
static bool
474
NextPossibleSolution(
475
    std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
476
482k
    unsigned Idx) {
477
482k
  assert(Idx < SwzCandidate.size());
478
482k
  int ResetIdx = Idx;
479
578k
  while (
ResetIdx > -1 && 578k
SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210577k
)
480
95.7k
    ResetIdx --;
481
624k
  for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); 
i < e624k
;
i++142k
) {
482
142k
    SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;
483
142k
  }
484
482k
  if (ResetIdx == -1)
485
1.16k
    return false;
486
481k
  int NextSwizzle = SwzCandidate[ResetIdx] + 1;
487
481k
  SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle;
488
481k
  return true;
489
481k
}
490
491
/// Enumerate all possible Swizzle sequence to find one that can meet all
492
/// read port requirements.
493
bool R600InstrInfo::FindSwizzleForVectorSlot(
494
    const std::vector<std::vector<std::pair<int, unsigned>>> &IGSrcs,
495
    std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
496
    const std::vector<std::pair<int, unsigned>> &TransSrcs,
497
42.0k
    R600InstrInfo::BankSwizzle TransSwz) const {
498
42.0k
  unsigned ValidUpTo = 0;
499
523k
  do {
500
523k
    ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);
501
523k
    if (ValidUpTo == IGSrcs.size())
502
40.9k
      return true;
503
482k
  } while (NextPossibleSolution(SwzCandidate, ValidUpTo));
504
1.16k
  return false;
505
42.0k
}
506
507
/// Instructions in Trans slot can't read gpr at cycle 0 if they also read
508
/// a const, and can't read a gpr at cycle 1 if they read 2 const.
509
static bool
510
isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
511
                  const std::vector<std::pair<int, unsigned>> &TransOps,
512
9.83k
                  unsigned ConstCount) {
513
9.83k
  // TransALU can't read 3 constants
514
9.83k
  if (ConstCount > 2)
515
16
    return false;
516
39.2k
  
for (unsigned i = 0, e = TransOps.size(); 9.81k
i < e39.2k
;
++i29.4k
) {
517
29.4k
    const std::pair<int, unsigned> &Src = TransOps[i];
518
29.4k
    unsigned Cycle = getTransSwizzle(TransSwz, i);
519
29.4k
    if (Src.first < 0)
520
18.2k
      continue;
521
11.2k
    
if (11.2k
ConstCount > 0 && 11.2k
Cycle == 02.20k
)
522
19
      return false;
523
11.2k
    
if (11.2k
ConstCount > 1 && 11.2k
Cycle == 19
)
524
1
      return false;
525
29.4k
  }
526
9.79k
  return true;
527
9.83k
}
528
529
bool
530
R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
531
                                       const DenseMap<unsigned, unsigned> &PV,
532
                                       std::vector<BankSwizzle> &ValidSwizzle,
533
                                       bool isLastAluTrans)
534
41.4k
    const {
535
41.4k
  //Todo : support shared src0 - src1 operand
536
41.4k
537
41.4k
  std::vector<std::vector<std::pair<int, unsigned>>> IGSrcs;
538
41.4k
  ValidSwizzle.clear();
539
41.4k
  unsigned ConstCount;
540
41.4k
  BankSwizzle TransBS = ALU_VEC_012_SCL_210;
541
123k
  for (unsigned i = 0, e = IG.size(); 
i < e123k
;
++i81.6k
) {
542
81.6k
    IGSrcs.push_back(ExtractSrcs(*IG[i], PV, ConstCount));
543
81.6k
    unsigned Op = getOperandIdx(IG[i]->getOpcode(),
544
81.6k
        AMDGPU::OpName::bank_swizzle);
545
81.6k
    ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
546
81.6k
        IG[i]->getOperand(Op).getImm());
547
81.6k
  }
548
41.4k
  std::vector<std::pair<int, unsigned>> TransOps;
549
41.4k
  if (!isLastAluTrans)
550
32.2k
    return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
551
9.14k
552
9.14k
  TransOps = std::move(IGSrcs.back());
553
9.14k
  IGSrcs.pop_back();
554
9.14k
  ValidSwizzle.pop_back();
555
9.14k
556
9.14k
  static const R600InstrInfo::BankSwizzle TransSwz[] = {
557
9.14k
    ALU_VEC_012_SCL_210,
558
9.14k
    ALU_VEC_021_SCL_122,
559
9.14k
    ALU_VEC_120_SCL_212,
560
9.14k
    ALU_VEC_102_SCL_221
561
9.14k
  };
562
10.0k
  for (unsigned i = 0; 
i < 410.0k
;
i++915
) {
563
9.83k
    TransBS = TransSwz[i];
564
9.83k
    if (!isConstCompatible(TransBS, TransOps, ConstCount))
565
36
      continue;
566
9.79k
    bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps,
567
9.79k
        TransBS);
568
9.79k
    if (
Result9.79k
) {
569
8.91k
      ValidSwizzle.push_back(TransBS);
570
8.91k
      return true;
571
8.91k
    }
572
9.83k
  }
573
9.14k
574
224
  return false;
575
41.4k
}
576
577
bool
578
R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
579
95.4k
    const {
580
95.4k
  assert (Consts.size() <= 12 && "Too many operands in instructions group");
581
95.4k
  unsigned Pair1 = 0, Pair2 = 0;
582
133k
  for (unsigned i = 0, n = Consts.size(); 
i < n133k
;
++i38.5k
) {
583
39.3k
    unsigned ReadConstHalf = Consts[i] & 2;
584
39.3k
    unsigned ReadConstIndex = Consts[i] & (~3);
585
39.3k
    unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
586
39.3k
    if (
!Pair139.3k
) {
587
32.5k
      Pair1 = ReadHalfConst;
588
32.5k
      continue;
589
32.5k
    }
590
6.76k
    
if (6.76k
Pair1 == ReadHalfConst6.76k
)
591
1.15k
      continue;
592
5.60k
    
if (5.60k
!Pair25.60k
) {
593
3.97k
      Pair2 = ReadHalfConst;
594
3.97k
      continue;
595
3.97k
    }
596
1.63k
    
if (1.63k
Pair2 != ReadHalfConst1.63k
)
597
853
      return false;
598
39.3k
  }
599
94.5k
  return true;
600
95.4k
}
601
602
bool
603
R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
604
86.5k
    const {
605
86.5k
  std::vector<unsigned> Consts;
606
86.5k
  SmallSet<int64_t, 4> Literals;
607
236k
  for (unsigned i = 0, n = MIs.size(); 
i < n236k
;
i++149k
) {
608
149k
    MachineInstr &MI = *MIs[i];
609
149k
    if (!isALUInstr(MI.getOpcode()))
610
1.22k
      continue;
611
148k
612
148k
    
for (const auto &Src : getSrcs(MI)) 148k
{
613
307k
      if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X)
614
64.6k
        Literals.insert(Src.second);
615
307k
      if (Literals.size() > 4)
616
21
        return false;
617
307k
      
if (307k
Src.first->getReg() == AMDGPU::ALU_CONST307k
)
618
15.4k
        Consts.push_back(Src.second);
619
307k
      if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) ||
620
307k
          
AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())292k
) {
621
14.4k
        unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff;
622
14.4k
        unsigned Chan = RI.getHWRegChan(Src.first->getReg());
623
14.4k
        Consts.push_back((Index << 2) | Chan);
624
14.4k
      }
625
307k
    }
626
149k
  }
627
86.5k
  return fitsConstReadLimitations(Consts);
628
86.5k
}
629
630
DFAPacketizer *
631
2.05k
R600InstrInfo::CreateTargetScheduleState(const TargetSubtargetInfo &STI) const {
632
2.05k
  const InstrItineraryData *II = STI.getInstrItineraryData();
633
2.05k
  return static_cast<const R600Subtarget &>(STI).createDFAPacketizer(II);
634
2.05k
}
635
636
static bool
637
5.90k
isPredicateSetter(unsigned Opcode) {
638
5.90k
  switch (Opcode) {
639
2.41k
  case AMDGPU::PRED_X:
640
2.41k
    return true;
641
3.49k
  default:
642
3.49k
    return false;
643
0
  }
644
0
}
645
646
static MachineInstr *
647
findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
648
422
                             MachineBasicBlock::iterator I) {
649
483
  while (
I != MBB.begin()483
) {
650
483
    --I;
651
483
    MachineInstr &MI = *I;
652
483
    if (isPredicateSetter(MI.getOpcode()))
653
422
      return &MI;
654
483
  }
655
422
656
0
  return nullptr;
657
422
}
658
659
static
660
36.4k
bool isJump(unsigned Opcode) {
661
35.5k
  return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
662
36.4k
}
663
664
34.0k
static bool isBranch(unsigned Opcode) {
665
33.9k
  return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 ||
666
33.9k
      Opcode == AMDGPU::BRANCH_COND_f32;
667
34.0k
}
668
669
bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
670
                                  MachineBasicBlock *&TBB,
671
                                  MachineBasicBlock *&FBB,
672
                                  SmallVectorImpl<MachineOperand> &Cond,
673
34.0k
                                  bool AllowModify) const {
674
34.0k
  // Most of the following comes from the ARM implementation of AnalyzeBranch
675
34.0k
676
34.0k
  // If the block has no terminators, it just falls into the block after it.
677
34.0k
  MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
678
34.0k
  if (I == MBB.end())
679
83
    return false;
680
34.0k
681
34.0k
  // AMDGPU::BRANCH* instructions are only available after isel and are not
682
34.0k
  // handled
683
34.0k
  
if (34.0k
isBranch(I->getOpcode())34.0k
)
684
7
    return true;
685
33.9k
  
if (33.9k
!isJump(I->getOpcode())33.9k
) {
686
31.4k
    return false;
687
31.4k
  }
688
2.50k
689
2.50k
  // Remove successive JUMP
690
2.50k
  
while (2.50k
I != MBB.begin() && 2.50k
std::prev(I)->getOpcode() == AMDGPU::JUMP2.49k
) {
691
0
      MachineBasicBlock::iterator PriorI = std::prev(I);
692
0
      if (AllowModify)
693
0
        I->removeFromParent();
694
0
      I = PriorI;
695
0
  }
696
2.50k
  MachineInstr &LastInst = *I;
697
2.50k
698
2.50k
  // If there is only one terminator instruction, process it.
699
2.50k
  unsigned LastOpc = LastInst.getOpcode();
700
2.50k
  if (
I == MBB.begin() || 2.50k
!isJump((--I)->getOpcode())2.49k
) {
701
2.11k
    if (
LastOpc == AMDGPU::JUMP2.11k
) {
702
510
      TBB = LastInst.getOperand(0).getMBB();
703
510
      return false;
704
1.60k
    } else 
if (1.60k
LastOpc == AMDGPU::JUMP_COND1.60k
) {
705
1.60k
      auto predSet = I;
706
1.99k
      while (
!isPredicateSetter(predSet->getOpcode())1.99k
) {
707
390
        predSet = --I;
708
390
      }
709
1.60k
      TBB = LastInst.getOperand(0).getMBB();
710
1.60k
      Cond.push_back(predSet->getOperand(1));
711
1.60k
      Cond.push_back(predSet->getOperand(2));
712
1.60k
      Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
713
1.60k
      return false;
714
1.60k
    }
715
0
    return true;  // Can't handle indirect branch.
716
0
  }
717
388
718
388
  // Get the instruction before it if it is a terminator.
719
388
  MachineInstr &SecondLastInst = *I;
720
388
  unsigned SecondLastOpc = SecondLastInst.getOpcode();
721
388
722
388
  // If the block ends with a B and a Bcc, handle it.
723
388
  if (
SecondLastOpc == AMDGPU::JUMP_COND && 388
LastOpc == AMDGPU::JUMP388
) {
724
388
    auto predSet = --I;
725
445
    while (
!isPredicateSetter(predSet->getOpcode())445
) {
726
57
      predSet = --I;
727
57
    }
728
388
    TBB = SecondLastInst.getOperand(0).getMBB();
729
388
    FBB = LastInst.getOperand(0).getMBB();
730
388
    Cond.push_back(predSet->getOperand(1));
731
388
    Cond.push_back(predSet->getOperand(2));
732
388
    Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
733
388
    return false;
734
388
  }
735
0
736
0
  // Otherwise, can't handle this.
737
0
  return true;
738
0
}
739
740
static
741
422
MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) {
742
422
  for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
743
2.15k
      
It != E2.15k
;
++It1.73k
) {
744
1.98k
    if (It->getOpcode() == AMDGPU::CF_ALU ||
745
1.87k
        It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
746
256
      return It.getReverse();
747
1.98k
  }
748
166
  return MBB.end();
749
422
}
750
751
unsigned R600InstrInfo::insertBranch(MachineBasicBlock &MBB,
752
                                     MachineBasicBlock *TBB,
753
                                     MachineBasicBlock *FBB,
754
                                     ArrayRef<MachineOperand> Cond,
755
                                     const DebugLoc &DL,
756
223
                                     int *BytesAdded) const {
757
223
  assert(TBB && "insertBranch must not be told to insert a fallthrough");
758
223
  assert(!BytesAdded && "code size not handled");
759
223
760
223
  if (
!FBB223
) {
761
223
    if (
Cond.empty()223
) {
762
25
      BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB);
763
25
      return 1;
764
0
    } else {
765
198
      MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
766
198
      assert(PredSet && "No previous predicate !");
767
198
      addFlag(*PredSet, 0, MO_FLAG_PUSH);
768
198
      PredSet->getOperand(2).setImm(Cond[1].getImm());
769
198
770
198
      BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
771
198
             .addMBB(TBB)
772
198
             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
773
198
      MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
774
198
      if (CfAlu == MBB.end())
775
82
        return 1;
776
0
      assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
777
116
      CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
778
116
      return 1;
779
116
    }
780
0
  } else {
781
0
    MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
782
0
    assert(PredSet && "No previous predicate !");
783
0
    addFlag(*PredSet, 0, MO_FLAG_PUSH);
784
0
    PredSet->getOperand(2).setImm(Cond[1].getImm());
785
0
    BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
786
0
            .addMBB(TBB)
787
0
            .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
788
0
    BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
789
0
    MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
790
0
    if (CfAlu == MBB.end())
791
0
      return 2;
792
0
    assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
793
0
    CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
794
0
    return 2;
795
0
  }
796
223
}
797
798
unsigned R600InstrInfo::removeBranch(MachineBasicBlock &MBB,
799
314
                                     int *BytesRemoved) const {
800
314
  assert(!BytesRemoved && "code size not handled");
801
314
802
314
  // Note : we leave PRED* instructions there.
803
314
  // They may be needed when predicating instructions.
804
314
805
314
  MachineBasicBlock::iterator I = MBB.end();
806
314
807
314
  if (
I == MBB.begin()314
) {
808
0
    return 0;
809
0
  }
810
314
  --I;
811
314
  switch (I->getOpcode()) {
812
22
  default:
813
22
    return 0;
814
139
  case AMDGPU::JUMP_COND: {
815
139
    MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
816
139
    clearFlag(*predSet, 0, MO_FLAG_PUSH);
817
139
    I->eraseFromParent();
818
139
    MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
819
139
    if (CfAlu == MBB.end())
820
0
      break;
821
139
    assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
822
139
    CfAlu->setDesc(get(AMDGPU::CF_ALU));
823
139
    break;
824
139
  }
825
153
  case AMDGPU::JUMP:
826
153
    I->eraseFromParent();
827
153
    break;
828
292
  }
829
292
  I = MBB.end();
830
292
831
292
  if (
I == MBB.begin()292
) {
832
3
    return 1;
833
3
  }
834
289
  --I;
835
289
  switch (I->getOpcode()) {
836
289
    // FIXME: only one case??
837
204
  default:
838
204
    return 1;
839
85
  case AMDGPU::JUMP_COND: {
840
85
    MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
841
85
    clearFlag(*predSet, 0, MO_FLAG_PUSH);
842
85
    I->eraseFromParent();
843
85
    MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
844
85
    if (CfAlu == MBB.end())
845
84
      break;
846
85
    assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
847
1
    CfAlu->setDesc(get(AMDGPU::CF_ALU));
848
1
    break;
849
1
  }
850
0
  case AMDGPU::JUMP:
851
0
    I->eraseFromParent();
852
0
    break;
853
85
  }
854
85
  return 2;
855
85
}
856
857
121k
bool R600InstrInfo::isPredicated(const MachineInstr &MI) const {
858
121k
  int idx = MI.findFirstPredOperandIdx();
859
121k
  if (idx < 0)
860
24.1k
    return false;
861
97.7k
862
97.7k
  unsigned Reg = MI.getOperand(idx).getReg();
863
97.7k
  switch (Reg) {
864
97.3k
  default: return false;
865
442
  case AMDGPU::PRED_SEL_ONE:
866
442
  case AMDGPU::PRED_SEL_ZERO:
867
442
  case AMDGPU::PREDICATE_BIT:
868
442
    return true;
869
0
  }
870
0
}
871
872
2.98k
bool R600InstrInfo::isPredicable(const MachineInstr &MI) const {
873
2.98k
  // XXX: KILL* instructions can be predicated, but they must be the last
874
2.98k
  // instruction in a clause, so this means any instructions after them cannot
875
2.98k
  // be predicated.  Until we have proper support for instruction clauses in the
876
2.98k
  // backend, we will mark KILL* instructions as unpredicable.
877
2.98k
878
2.98k
  if (
MI.getOpcode() == AMDGPU::KILLGT2.98k
) {
879
0
    return false;
880
2.98k
  } else 
if (2.98k
MI.getOpcode() == AMDGPU::CF_ALU2.98k
) {
881
2.17k
    // If the clause start in the middle of MBB then the MBB has more
882
2.17k
    // than a single clause, unable to predicate several clauses.
883
2.17k
    if (MI.getParent()->begin() != MachineBasicBlock::const_iterator(MI))
884
0
      return false;
885
2.17k
    // TODO: We don't support KC merging atm
886
2.17k
    
return MI.getOperand(3).getImm() == 0 && 2.17k
MI.getOperand(4).getImm() == 0184
;
887
813
  } else 
if (813
isVector(MI)813
) {
888
1
    return false;
889
0
  } else {
890
812
    return AMDGPUInstrInfo::isPredicable(MI);
891
812
  }
892
0
}
893
894
bool
895
R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
896
                                   unsigned NumCycles,
897
                                   unsigned ExtraPredCycles,
898
97
                                   BranchProbability Probability) const{
899
97
  return true;
900
97
}
901
902
bool
903
R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
904
                                   unsigned NumTCycles,
905
                                   unsigned ExtraTCycles,
906
                                   MachineBasicBlock &FMBB,
907
                                   unsigned NumFCycles,
908
                                   unsigned ExtraFCycles,
909
2
                                   BranchProbability Probability) const {
910
2
  return true;
911
2
}
912
913
bool
914
R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
915
                                         unsigned NumCycles,
916
                                         BranchProbability Probability)
917
134
                                         const {
918
134
  return true;
919
134
}
920
921
bool
922
R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
923
2
                                         MachineBasicBlock &FMBB) const {
924
2
  return false;
925
2
}
926
927
bool
928
312
R600InstrInfo::reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
929
312
  MachineOperand &MO = Cond[1];
930
312
  switch (MO.getImm()) {
931
61
  case AMDGPU::PRED_SETE_INT:
932
61
    MO.setImm(AMDGPU::PRED_SETNE_INT);
933
61
    break;
934
251
  case AMDGPU::PRED_SETNE_INT:
935
251
    MO.setImm(AMDGPU::PRED_SETE_INT);
936
251
    break;
937
0
  case AMDGPU::PRED_SETE:
938
0
    MO.setImm(AMDGPU::PRED_SETNE);
939
0
    break;
940
0
  case AMDGPU::PRED_SETNE:
941
0
    MO.setImm(AMDGPU::PRED_SETE);
942
0
    break;
943
0
  default:
944
0
    return true;
945
312
  }
946
312
947
312
  MachineOperand &MO2 = Cond[2];
948
312
  switch (MO2.getReg()) {
949
0
  case AMDGPU::PRED_SEL_ZERO:
950
0
    MO2.setReg(AMDGPU::PRED_SEL_ONE);
951
0
    break;
952
312
  case AMDGPU::PRED_SEL_ONE:
953
312
    MO2.setReg(AMDGPU::PRED_SEL_ZERO);
954
312
    break;
955
0
  default:
956
0
    return true;
957
312
  }
958
312
  return false;
959
312
}
960
961
bool R600InstrInfo::DefinesPredicate(MachineInstr &MI,
962
2.98k
                                     std::vector<MachineOperand> &Pred) const {
963
2.98k
  return isPredicateSetter(MI.getOpcode());
964
2.98k
}
965
966
bool R600InstrInfo::PredicateInstruction(MachineInstr &MI,
967
235
                                         ArrayRef<MachineOperand> Pred) const {
968
235
  int PIdx = MI.findFirstPredOperandIdx();
969
235
970
235
  if (
MI.getOpcode() == AMDGPU::CF_ALU235
) {
971
26
    MI.getOperand(8).setImm(0);
972
26
    return true;
973
26
  }
974
209
975
209
  
if (209
MI.getOpcode() == AMDGPU::DOT_4209
) {
976
15
    MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_X))
977
15
        .setReg(Pred[2].getReg());
978
15
    MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Y))
979
15
        .setReg(Pred[2].getReg());
980
15
    MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Z))
981
15
        .setReg(Pred[2].getReg());
982
15
    MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_W))
983
15
        .setReg(Pred[2].getReg());
984
15
    MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
985
15
    MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
986
15
    return true;
987
15
  }
988
194
989
194
  
if (194
PIdx != -1194
) {
990
194
    MachineOperand &PMO = MI.getOperand(PIdx);
991
194
    PMO.setReg(Pred[2].getReg());
992
194
    MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
993
194
    MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
994
194
    return true;
995
194
  }
996
0
997
0
  return false;
998
0
}
999
1000
2.98k
unsigned int R600InstrInfo::getPredicationCost(const MachineInstr &) const {
1001
2.98k
  return 2;
1002
2.98k
}
1003
1004
unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
1005
                                            const MachineInstr &,
1006
247k
                                            unsigned *PredCost) const {
1007
247k
  if (PredCost)
1008
0
    *PredCost = 2;
1009
247k
  return 2;
1010
247k
}
1011
1012
unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex,
1013
1.96k
                                                   unsigned Channel) const {
1014
1.96k
  assert(Channel == 0);
1015
1.96k
  return RegIndex;
1016
1.96k
}
1017
1018
6.14k
bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1019
6.14k
  switch (MI.getOpcode()) {
1020
6.13k
  default: {
1021
6.13k
    MachineBasicBlock *MBB = MI.getParent();
1022
6.13k
    int OffsetOpIdx =
1023
6.13k
        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::addr);
1024
6.13k
    // addr is a custom operand with multiple MI operands, and only the
1025
6.13k
    // first MI operand is given a name.
1026
6.13k
    int RegOpIdx = OffsetOpIdx + 1;
1027
6.13k
    int ChanOpIdx =
1028
6.13k
        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::chan);
1029
6.13k
    if (
isRegisterLoad(MI)6.13k
) {
1030
1.00k
      int DstOpIdx =
1031
1.00k
          AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
1032
1.00k
      unsigned RegIndex = MI.getOperand(RegOpIdx).getImm();
1033
1.00k
      unsigned Channel = MI.getOperand(ChanOpIdx).getImm();
1034
1.00k
      unsigned Address = calculateIndirectAddress(RegIndex, Channel);
1035
1.00k
      unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg();
1036
1.00k
      if (
OffsetReg == AMDGPU::INDIRECT_BASE_ADDR1.00k
) {
1037
899
        buildMovInstr(MBB, MI, MI.getOperand(DstOpIdx).getReg(),
1038
899
                      getIndirectAddrRegClass()->getRegister(Address));
1039
1.00k
      } else {
1040
103
        buildIndirectRead(MBB, MI, MI.getOperand(DstOpIdx).getReg(), Address,
1041
103
                          OffsetReg);
1042
103
      }
1043
6.13k
    } else 
if (5.12k
isRegisterStore(MI)5.12k
) {
1044
850
      int ValOpIdx =
1045
850
          AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::val);
1046
850
      unsigned RegIndex = MI.getOperand(RegOpIdx).getImm();
1047
850
      unsigned Channel = MI.getOperand(ChanOpIdx).getImm();
1048
850
      unsigned Address = calculateIndirectAddress(RegIndex, Channel);
1049
850
      unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg();
1050
850
      if (
OffsetReg == AMDGPU::INDIRECT_BASE_ADDR850
) {
1051
736
        buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address),
1052
736
                      MI.getOperand(ValOpIdx).getReg());
1053
850
      } else {
1054
114
        buildIndirectWrite(MBB, MI, MI.getOperand(ValOpIdx).getReg(),
1055
114
                           calculateIndirectAddress(RegIndex, Channel),
1056
114
                           OffsetReg);
1057
114
      }
1058
5.12k
    } else {
1059
4.27k
      return false;
1060
4.27k
    }
1061
1.85k
1062
1.85k
    MBB->erase(MI);
1063
1.85k
    return true;
1064
1.85k
  }
1065
14
  case AMDGPU::R600_EXTRACT_ELT_V2:
1066
14
  case AMDGPU::R600_EXTRACT_ELT_V4:
1067
14
    buildIndirectRead(MI.getParent(), MI, MI.getOperand(0).getReg(),
1068
14
                      RI.getHWRegIndex(MI.getOperand(1).getReg()), //  Address
1069
14
                      MI.getOperand(2).getReg(),
1070
14
                      RI.getHWRegChan(MI.getOperand(1).getReg()));
1071
14
    break;
1072
1
  case AMDGPU::R600_INSERT_ELT_V2:
1073
1
  case AMDGPU::R600_INSERT_ELT_V4:
1074
1
    buildIndirectWrite(MI.getParent(), MI, MI.getOperand(2).getReg(), // Value
1075
1
                       RI.getHWRegIndex(MI.getOperand(1).getReg()),   // Address
1076
1
                       MI.getOperand(3).getReg(),                     // Offset
1077
1
                       RI.getHWRegChan(MI.getOperand(1).getReg()));   // Channel
1078
1
    break;
1079
15
  }
1080
15
  MI.eraseFromParent();
1081
15
  return true;
1082
15
}
1083
1084
void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
1085
4.11k
                                             const MachineFunction &MF) const {
1086
4.11k
  const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
1087
4.11k
  const R600FrameLowering *TFL = ST.getFrameLowering();
1088
4.11k
1089
4.11k
  unsigned StackWidth = TFL->getStackWidth(MF);
1090
4.11k
  int End = getIndirectIndexEnd(MF);
1091
4.11k
1092
4.11k
  if (End == -1)
1093
3.34k
    return;
1094
772
1095
7.90k
  
for (int Index = getIndirectIndexBegin(MF); 772
Index <= End7.90k
;
++Index7.13k
) {
1096
7.13k
    unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index);
1097
7.13k
    Reserved.set(SuperReg);
1098
14.2k
    for (unsigned Chan = 0; 
Chan < StackWidth14.2k
;
++Chan7.13k
) {
1099
7.13k
      unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
1100
7.13k
      Reserved.set(Reg);
1101
7.13k
    }
1102
7.13k
  }
1103
4.11k
}
1104
1105
1.65k
const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const {
1106
1.65k
  return &AMDGPU::R600_TReg32_XRegClass;
1107
1.65k
}
1108
1109
MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
1110
                                       MachineBasicBlock::iterator I,
1111
                                       unsigned ValueReg, unsigned Address,
1112
114
                                       unsigned OffsetReg) const {
1113
114
  return buildIndirectWrite(MBB, I, ValueReg, Address, OffsetReg, 0);
1114
114
}
1115
1116
MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
1117
                                       MachineBasicBlock::iterator I,
1118
                                       unsigned ValueReg, unsigned Address,
1119
                                       unsigned OffsetReg,
1120
115
                                       unsigned AddrChan) const {
1121
115
  unsigned AddrReg;
1122
115
  switch (AddrChan) {
1123
0
    
default: 0
llvm_unreachable0
("Invalid Channel");
1124
114
    case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
1125
0
    case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
1126
0
    case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
1127
1
    case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
1128
115
  }
1129
115
  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
1130
115
                                               AMDGPU::AR_X, OffsetReg);
1131
115
  setImmOperand(*MOVA, AMDGPU::OpName::write, 0);
1132
115
1133
115
  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
1134
115
                                      AddrReg, ValueReg)
1135
115
                                      .addReg(AMDGPU::AR_X,
1136
115
                                           RegState::Implicit | RegState::Kill);
1137
115
  setImmOperand(*Mov, AMDGPU::OpName::dst_rel, 1);
1138
115
  return Mov;
1139
115
}
1140
1141
MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
1142
                                       MachineBasicBlock::iterator I,
1143
                                       unsigned ValueReg, unsigned Address,
1144
103
                                       unsigned OffsetReg) const {
1145
103
  return buildIndirectRead(MBB, I, ValueReg, Address, OffsetReg, 0);
1146
103
}
1147
1148
MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
1149
                                       MachineBasicBlock::iterator I,
1150
                                       unsigned ValueReg, unsigned Address,
1151
                                       unsigned OffsetReg,
1152
117
                                       unsigned AddrChan) const {
1153
117
  unsigned AddrReg;
1154
117
  switch (AddrChan) {
1155
0
    
default: 0
llvm_unreachable0
("Invalid Channel");
1156
105
    case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
1157
6
    case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
1158
0
    case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
1159
6
    case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
1160
117
  }
1161
117
  MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
1162
117
                                                       AMDGPU::AR_X,
1163
117
                                                       OffsetReg);
1164
117
  setImmOperand(*MOVA, AMDGPU::OpName::write, 0);
1165
117
  MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
1166
117
                                      ValueReg,
1167
117
                                      AddrReg)
1168
117
                                      .addReg(AMDGPU::AR_X,
1169
117
                                           RegState::Implicit | RegState::Kill);
1170
117
  setImmOperand(*Mov, AMDGPU::OpName::src0_rel, 1);
1171
117
1172
117
  return Mov;
1173
117
}
1174
1175
1.54k
int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
1176
1.54k
  const MachineRegisterInfo &MRI = MF.getRegInfo();
1177
1.54k
  const MachineFrameInfo &MFI = MF.getFrameInfo();
1178
1.54k
  int Offset = -1;
1179
1.54k
1180
1.54k
  if (
MFI.getNumObjects() == 01.54k
) {
1181
0
    return -1;
1182
0
  }
1183
1.54k
1184
1.54k
  
if (1.54k
MRI.livein_empty()1.54k
) {
1185
1.52k
    return 0;
1186
1.52k
  }
1187
24
1188
24
  const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass();
1189
24
  for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
1190
24
                                            LE = MRI.livein_end();
1191
48
                                            
LI != LE48
;
++LI24
) {
1192
24
    unsigned Reg = LI->first;
1193
24
    if (TargetRegisterInfo::isVirtualRegister(Reg) ||
1194
24
        !IndirectRC->contains(Reg))
1195
0
      continue;
1196
24
1197
24
    unsigned RegIndex;
1198
24
    unsigned RegEnd;
1199
24
    for (RegIndex = 0, RegEnd = IndirectRC->getNumRegs(); RegIndex != RegEnd;
1200
24
                                                          
++RegIndex0
) {
1201
24
      if (IndirectRC->getRegister(RegIndex) == Reg)
1202
24
        break;
1203
24
    }
1204
24
    Offset = std::max(Offset, (int)RegIndex);
1205
24
  }
1206
1.54k
1207
1.54k
  return Offset + 1;
1208
1.54k
}
1209
1210
4.11k
int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
1211
4.11k
  int Offset = 0;
1212
4.11k
  const MachineFrameInfo &MFI = MF.getFrameInfo();
1213
4.11k
1214
4.11k
  // Variable sized objects are not supported
1215
4.11k
  if (
MFI.hasVarSizedObjects()4.11k
) {
1216
2
    return -1;
1217
2
  }
1218
4.11k
1219
4.11k
  
if (4.11k
MFI.getNumObjects() == 04.11k
) {
1220
3.34k
    return -1;
1221
3.34k
  }
1222
772
1223
772
  const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
1224
772
  const R600FrameLowering *TFL = ST.getFrameLowering();
1225
772
1226
772
  unsigned IgnoredFrameReg;
1227
772
  Offset = TFL->getFrameIndexReference(MF, -1, IgnoredFrameReg);
1228
772
1229
772
  return getIndirectIndexBegin(MF) + Offset;
1230
772
}
1231
1232
50.3k
unsigned R600InstrInfo::getMaxAlusPerClause() const {
1233
50.3k
  return 115;
1234
50.3k
}
1235
1236
MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB,
1237
                                                  MachineBasicBlock::iterator I,
1238
                                                  unsigned Opcode,
1239
                                                  unsigned DstReg,
1240
                                                  unsigned Src0Reg,
1241
8.45k
                                                  unsigned Src1Reg) const {
1242
8.45k
  MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode),
1243
8.45k
    DstReg);           // $dst
1244
8.45k
1245
8.45k
  if (
Src1Reg8.45k
) {
1246
256
    MIB.addImm(0)     // $update_exec_mask
1247
256
       .addImm(0);    // $update_predicate
1248
256
  }
1249
8.45k
  MIB.addImm(1)        // $write
1250
8.45k
     .addImm(0)        // $omod
1251
8.45k
     .addImm(0)        // $dst_rel
1252
8.45k
     .addImm(0)        // $dst_clamp
1253
8.45k
     .addReg(Src0Reg)  // $src0
1254
8.45k
     .addImm(0)        // $src0_neg
1255
8.45k
     .addImm(0)        // $src0_rel
1256
8.45k
     .addImm(0)        // $src0_abs
1257
8.45k
     .addImm(-1);       // $src0_sel
1258
8.45k
1259
8.45k
  if (
Src1Reg8.45k
) {
1260
256
    MIB.addReg(Src1Reg) // $src1
1261
256
       .addImm(0)       // $src1_neg
1262
256
       .addImm(0)       // $src1_rel
1263
256
       .addImm(0)       // $src1_abs
1264
256
       .addImm(-1);      // $src1_sel
1265
256
  }
1266
8.45k
1267
8.45k
  //XXX: The r600g finalizer expects this to be 1, once we've moved the
1268
8.45k
  //scheduling to the backend, we can change the default to 0.
1269
8.45k
  MIB.addImm(1)        // $last
1270
8.45k
      .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
1271
8.45k
      .addImm(0)         // $literal
1272
8.45k
      .addImm(0);        // $bank_swizzle
1273
8.45k
1274
8.45k
  return MIB;
1275
8.45k
}
1276
1277
#define OPERAND_CASE(Label) \
1278
2.17k
  case Label: { \
1279
2.17k
    static const unsigned Ops[] = \
1280
2.17k
    { \
1281
2.17k
      Label##_X, \
1282
2.17k
      Label##_Y, \
1283
2.17k
      Label##_Z, \
1284
2.17k
      Label##_W \
1285
2.17k
    }; \
1286
2.17k
    return Ops[Slot]; \
1287
2.17k
  }
1288
1289
2.17k
static unsigned getSlotedOps(unsigned  Op, unsigned Slot) {
1290
2.17k
  switch (Op) {
1291
128
  
OPERAND_CASE128
(AMDGPU::OpName::update_exec_mask)
1292
128
  
OPERAND_CASE128
(AMDGPU::OpName::update_pred)
1293
128
  
OPERAND_CASE128
(AMDGPU::OpName::write)
1294
128
  
OPERAND_CASE128
(AMDGPU::OpName::omod)
1295
128
  
OPERAND_CASE128
(AMDGPU::OpName::dst_rel)
1296
128
  
OPERAND_CASE128
(AMDGPU::OpName::clamp)
1297
128
  
OPERAND_CASE128
(AMDGPU::OpName::src0)
1298
128
  
OPERAND_CASE128
(AMDGPU::OpName::src0_neg)
1299
128
  
OPERAND_CASE128
(AMDGPU::OpName::src0_rel)
1300
128
  
OPERAND_CASE128
(AMDGPU::OpName::src0_abs)
1301
128
  
OPERAND_CASE128
(AMDGPU::OpName::src0_sel)
1302
128
  
OPERAND_CASE128
(AMDGPU::OpName::src1)
1303
128
  
OPERAND_CASE128
(AMDGPU::OpName::src1_neg)
1304
128
  
OPERAND_CASE128
(AMDGPU::OpName::src1_rel)
1305
128
  
OPERAND_CASE128
(AMDGPU::OpName::src1_abs)
1306
128
  
OPERAND_CASE128
(AMDGPU::OpName::src1_sel)
1307
128
  
OPERAND_CASE128
(AMDGPU::OpName::pred_sel)
1308
0
  default:
1309
0
    llvm_unreachable("Wrong Operand");
1310
0
  }
1311
0
}
1312
1313
#undef OPERAND_CASE
1314
1315
MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
1316
    MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg)
1317
128
    const {
1318
128
  assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
1319
128
  unsigned Opcode;
1320
128
  if (ST.getGeneration() <= R600Subtarget::R700)
1321
12
    Opcode = AMDGPU::DOT4_r600;
1322
128
  else
1323
116
    Opcode = AMDGPU::DOT4_eg;
1324
128
  MachineBasicBlock::iterator I = MI;
1325
128
  MachineOperand &Src0 = MI->getOperand(
1326
128
      getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot)));
1327
128
  MachineOperand &Src1 = MI->getOperand(
1328
128
      getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot)));
1329
128
  MachineInstr *MIB = buildDefaultInstruction(
1330
128
      MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg());
1331
128
  static const unsigned  Operands[14] = {
1332
128
    AMDGPU::OpName::update_exec_mask,
1333
128
    AMDGPU::OpName::update_pred,
1334
128
    AMDGPU::OpName::write,
1335
128
    AMDGPU::OpName::omod,
1336
128
    AMDGPU::OpName::dst_rel,
1337
128
    AMDGPU::OpName::clamp,
1338
128
    AMDGPU::OpName::src0_neg,
1339
128
    AMDGPU::OpName::src0_rel,
1340
128
    AMDGPU::OpName::src0_abs,
1341
128
    AMDGPU::OpName::src0_sel,
1342
128
    AMDGPU::OpName::src1_neg,
1343
128
    AMDGPU::OpName::src1_rel,
1344
128
    AMDGPU::OpName::src1_abs,
1345
128
    AMDGPU::OpName::src1_sel,
1346
128
  };
1347
128
1348
128
  MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
1349
128
      getSlotedOps(AMDGPU::OpName::pred_sel, Slot)));
1350
128
  MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel))
1351
128
      .setReg(MO.getReg());
1352
128
1353
1.92k
  for (unsigned i = 0; 
i < 141.92k
;
i++1.79k
) {
1354
1.79k
    MachineOperand &MO = MI->getOperand(
1355
1.79k
        getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot)));
1356
1.79k
    assert (MO.isImm());
1357
1.79k
    setImmOperand(*MIB, Operands[i], MO.getImm());
1358
1.79k
  }
1359
128
  MIB->getOperand(20).setImm(0);
1360
128
  return MIB;
1361
128
}
1362
1363
MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB,
1364
                                         MachineBasicBlock::iterator I,
1365
                                         unsigned DstReg,
1366
469
                                         uint64_t Imm) const {
1367
469
  MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
1368
469
                                                  AMDGPU::ALU_LITERAL_X);
1369
469
  setImmOperand(*MovImm, AMDGPU::OpName::literal, Imm);
1370
469
  return MovImm;
1371
469
}
1372
1373
MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB,
1374
                                       MachineBasicBlock::iterator I,
1375
2.44k
                                       unsigned DstReg, unsigned SrcReg) const {
1376
2.44k
  return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg);
1377
2.44k
}
1378
1379
10.6k
int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
1380
10.6k
  return getOperandIdx(MI.getOpcode(), Op);
1381
10.6k
}
1382
1383
4.07M
int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const {
1384
4.07M
  return AMDGPU::getNamedOperandIdx(Opcode, Op);
1385
4.07M
}
1386
1387
void R600InstrInfo::setImmOperand(MachineInstr &MI, unsigned Op,
1388
6.14k
                                  int64_t Imm) const {
1389
6.14k
  int Idx = getOperandIdx(MI, Op);
1390
6.14k
  assert(Idx != -1 && "Operand not supported for this instruction.");
1391
6.14k
  assert(MI.getOperand(Idx).isImm());
1392
6.14k
  MI.getOperand(Idx).setImm(Imm);
1393
6.14k
}
1394
1395
//===----------------------------------------------------------------------===//
1396
// Instruction flag getters/setters
1397
//===----------------------------------------------------------------------===//
1398
1399
MachineOperand &R600InstrInfo::getFlagOp(MachineInstr &MI, unsigned SrcIdx,
1400
1.63k
                                         unsigned Flag) const {
1401
1.63k
  unsigned TargetFlags = get(MI.getOpcode()).TSFlags;
1402
1.63k
  int FlagIndex = 0;
1403
1.63k
  if (
Flag != 01.63k
) {
1404
1.04k
    // If we pass something other than the default value of Flag to this
1405
1.04k
    // function, it means we are want to set a flag on an instruction
1406
1.04k
    // that uses native encoding.
1407
1.04k
    assert(HAS_NATIVE_OPERANDS(TargetFlags));
1408
1.04k
    bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
1409
1.04k
    switch (Flag) {
1410
0
    
case 0
MO_FLAG_CLAMP0
:
1411
0
      FlagIndex = getOperandIdx(MI, AMDGPU::OpName::clamp);
1412
0
      break;
1413
582
    
case 582
MO_FLAG_MASK582
:
1414
582
      FlagIndex = getOperandIdx(MI, AMDGPU::OpName::write);
1415
582
      break;
1416
426
    
case 426
MO_FLAG_NOT_LAST426
:
1417
426
    
case 426
MO_FLAG_LAST426
:
1418
426
      FlagIndex = getOperandIdx(MI, AMDGPU::OpName::last);
1419
426
      break;
1420
20
    
case 20
MO_FLAG_NEG20
:
1421
20
      switch (SrcIdx) {
1422
20
      case 0:
1423
20
        FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_neg);
1424
20
        break;
1425
0
      case 1:
1426
0
        FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_neg);
1427
0
        break;
1428
0
      case 2:
1429
0
        FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src2_neg);
1430
0
        break;
1431
20
      }
1432
20
      break;
1433
20
1434
20
    
case 20
MO_FLAG_ABS20
:
1435
20
      assert(!IsOP3 && "Cannot set absolute value modifier for OP3 "
1436
20
                       "instructions.");
1437
20
      (void)IsOP3;
1438
20
      switch (SrcIdx) {
1439
20
      case 0:
1440
20
        FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_abs);
1441
20
        break;
1442
0
      case 1:
1443
0
        FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_abs);
1444
0
        break;
1445
20
      }
1446
20
      break;
1447
20
1448
0
    default:
1449
0
      FlagIndex = -1;
1450
0
      break;
1451
1.04k
    }
1452
1.04k
    assert(FlagIndex != -1 && "Flag not supported for this instruction");
1453
1.63k
  } else {
1454
590
      FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags);
1455
590
      assert(FlagIndex != 0 &&
1456
590
         "Instruction flags not supported for this instruction");
1457
590
  }
1458
1.63k
1459
1.63k
  MachineOperand &FlagOp = MI.getOperand(FlagIndex);
1460
1.63k
  assert(FlagOp.isImm());
1461
1.63k
  return FlagOp;
1462
1.63k
}
1463
1464
void R600InstrInfo::addFlag(MachineInstr &MI, unsigned Operand,
1465
826
                            unsigned Flag) const {
1466
826
  unsigned TargetFlags = get(MI.getOpcode()).TSFlags;
1467
826
  if (
Flag == 0826
) {
1468
0
    return;
1469
0
  }
1470
826
  
if (826
HAS_NATIVE_OPERANDS826
(TargetFlags)) {
1471
544
    MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
1472
544
    if (
Flag == 544
MO_FLAG_NOT_LAST544
) {
1473
213
      clearFlag(MI, Operand, MO_FLAG_LAST);
1474
544
    } else 
if (331
Flag == 331
MO_FLAG_MASK331
) {
1475
291
      clearFlag(MI, Operand, Flag);
1476
331
    } else {
1477
40
      FlagOp.setImm(1);
1478
40
    }
1479
826
  } else {
1480
282
      MachineOperand &FlagOp = getFlagOp(MI, Operand);
1481
282
      FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
1482
282
  }
1483
826
}
1484
1485
void R600InstrInfo::clearFlag(MachineInstr &MI, unsigned Operand,
1486
728
                              unsigned Flag) const {
1487
728
  unsigned TargetFlags = get(MI.getOpcode()).TSFlags;
1488
728
  if (
HAS_NATIVE_OPERANDS728
(TargetFlags)) {
1489
504
    MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag);
1490
504
    FlagOp.setImm(0);
1491
728
  } else {
1492
224
    MachineOperand &FlagOp = getFlagOp(MI);
1493
224
    unsigned InstFlags = FlagOp.getImm();
1494
224
    InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
1495
224
    FlagOp.setImm(InstFlags);
1496
224
  }
1497
728
}