Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
/// This pass compute turns all control flow pseudo instructions into native one
11
/// computing their address on the fly; it also sets STACK_SIZE info.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "AMDGPU.h"
16
#include "AMDGPUSubtarget.h"
17
#include "R600Defines.h"
18
#include "R600InstrInfo.h"
19
#include "R600MachineFunctionInfo.h"
20
#include "R600RegisterInfo.h"
21
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
22
#include "llvm/ADT/STLExtras.h"
23
#include "llvm/ADT/SmallVector.h"
24
#include "llvm/ADT/StringRef.h"
25
#include "llvm/CodeGen/MachineBasicBlock.h"
26
#include "llvm/CodeGen/MachineFunction.h"
27
#include "llvm/CodeGen/MachineFunctionPass.h"
28
#include "llvm/CodeGen/MachineInstr.h"
29
#include "llvm/CodeGen/MachineInstrBuilder.h"
30
#include "llvm/CodeGen/MachineOperand.h"
31
#include "llvm/IR/CallingConv.h"
32
#include "llvm/IR/DebugLoc.h"
33
#include "llvm/IR/Function.h"
34
#include "llvm/Pass.h"
35
#include "llvm/Support/Compiler.h"
36
#include "llvm/Support/Debug.h"
37
#include "llvm/Support/MathExtras.h"
38
#include "llvm/Support/raw_ostream.h"
39
#include <algorithm>
40
#include <cassert>
41
#include <cstdint>
42
#include <set>
43
#include <utility>
44
#include <vector>
45
46
using namespace llvm;
47
48
#define DEBUG_TYPE "r600cf"
49
50
namespace {
51
52
struct CFStack {
53
  enum StackItem {
54
    ENTRY = 0,
55
    SUB_ENTRY = 1,
56
    FIRST_NON_WQM_PUSH = 2,
57
    FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
58
  };
59
60
  const R600Subtarget *ST;
61
  std::vector<StackItem> BranchStack;
62
  std::vector<StackItem> LoopStack;
63
  unsigned MaxStackSize;
64
  unsigned CurrentEntries = 0;
65
  unsigned CurrentSubEntries = 0;
66
67
  CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st),
68
      // We need to reserve a stack entry for CALL_FS in vertex shaders.
69
2.29k
      MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {}
70
71
  unsigned getLoopDepth();
72
  bool branchStackContains(CFStack::StackItem);
73
  bool requiresWorkAroundForInst(unsigned Opcode);
74
  unsigned getSubEntrySize(CFStack::StackItem Item);
75
  void updateMaxStackSize();
76
  void pushBranch(unsigned Opcode, bool isWQM = false);
77
  void pushLoop();
78
  void popBranch();
79
  void popLoop();
80
};
81
82
9
unsigned CFStack::getLoopDepth() {
83
9
  return LoopStack.size();
84
9
}
85
86
55
bool CFStack::branchStackContains(CFStack::StackItem Item) {
87
55
  for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(),
88
55
       E = BranchStack.end(); I != E; 
++I0
) {
89
10
    if (*I == Item)
90
10
      return true;
91
10
  }
92
55
  
return false45
;
93
55
}
94
95
9.37k
bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
96
9.37k
  if (Opcode == R600::CF_ALU_PUSH_BEFORE && 
ST->hasCaymanISA()64
&&
97
9.37k
      
getLoopDepth() > 19
)
98
1
    return true;
99
9.37k
100
9.37k
  if (!ST->hasCFAluBug())
101
4.09k
    return false;
102
5.27k
103
5.27k
  switch(Opcode) {
104
5.27k
  
default: return false5.23k
;
105
5.27k
  case R600::CF_ALU_PUSH_BEFORE:
106
46
  case R600::CF_ALU_ELSE_AFTER:
107
46
  case R600::CF_ALU_BREAK:
108
46
  case R600::CF_ALU_CONTINUE:
109
46
    if (CurrentSubEntries == 0)
110
36
      return false;
111
10
    if (ST->getWavefrontSize() == 64) {
112
8
      // We are being conservative here.  We only require this work-around if
113
8
      // CurrentSubEntries > 3 &&
114
8
      // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
115
8
      //
116
8
      // We have to be conservative, because we don't know for certain that
117
8
      // our stack allocation algorithm for Evergreen/NI is correct.  Applying this
118
8
      // work-around when CurrentSubEntries > 3 allows us to over-allocate stack
119
8
      // resources without any problems.
120
8
      return CurrentSubEntries > 3;
121
8
    } else {
122
2
      assert(ST->getWavefrontSize() == 32);
123
2
      // We are being conservative here.  We only require the work-around if
124
2
      // CurrentSubEntries > 7 &&
125
2
      // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
126
2
      // See the comment on the wavefront size == 64 case for why we are
127
2
      // being conservative.
128
2
      return CurrentSubEntries > 7;
129
2
    }
130
5.27k
  }
131
5.27k
}
132
133
128
unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
134
128
  switch(Item) {
135
128
  default:
136
0
    return 0;
137
128
  case CFStack::FIRST_NON_WQM_PUSH:
138
90
  assert(!ST->hasCaymanISA());
139
90
  if (ST->getGeneration() <= AMDGPUSubtarget::R700) {
140
0
    // +1 For the push operation.
141
0
    // +2 Extra space required.
142
0
    return 3;
143
90
  } else {
144
90
    // Some documentation says that this is not necessary on Evergreen,
145
90
    // but experimentation has show that we need to allocate 1 extra
146
90
    // sub-entry for the first non-WQM push.
147
90
    // +1 For the push operation.
148
90
    // +1 Extra space required.
149
90
    return 2;
150
90
  }
151
0
  case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
152
0
    assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
153
0
    // +1 For the push operation.
154
0
    // +1 Extra space required.
155
0
    return 2;
156
38
  case CFStack::SUB_ENTRY:
157
38
    return 1;
158
128
  }
159
128
}
160
161
84
void CFStack::updateMaxStackSize() {
162
84
  unsigned CurrentStackSize =
163
84
      CurrentEntries + (alignTo(CurrentSubEntries, 4) / 4);
164
84
  MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
165
84
}
166
167
64
void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
168
64
  CFStack::StackItem Item = CFStack::ENTRY;
169
64
  switch(Opcode) {
170
64
  case R600::CF_PUSH_EG:
171
64
  case R600::CF_ALU_PUSH_BEFORE:
172
64
    if (!isWQM) {
173
64
      if (!ST->hasCaymanISA() &&
174
64
          
!branchStackContains(CFStack::FIRST_NON_WQM_PUSH)55
)
175
45
        Item = CFStack::FIRST_NON_WQM_PUSH;  // May not be required on Evergreen/NI
176
19
                                             // See comment in
177
19
                                             // CFStack::getSubEntrySize()
178
19
      else if (CurrentEntries > 0 &&
179
19
               
ST->getGeneration() > AMDGPUSubtarget::EVERGREEN14
&&
180
19
               
!ST->hasCaymanISA()6
&&
181
19
               
!branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY)0
)
182
0
        Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
183
19
      else
184
19
        Item = CFStack::SUB_ENTRY;
185
64
    } else
186
0
      Item = CFStack::ENTRY;
187
64
    break;
188
64
  }
189
64
  BranchStack.push_back(Item);
190
64
  if (Item == CFStack::ENTRY)
191
0
    CurrentEntries++;
192
64
  else
193
64
    CurrentSubEntries += getSubEntrySize(Item);
194
64
  updateMaxStackSize();
195
64
}
196
197
20
void CFStack::pushLoop() {
198
20
  LoopStack.push_back(CFStack::ENTRY);
199
20
  CurrentEntries++;
200
20
  updateMaxStackSize();
201
20
}
202
203
64
void CFStack::popBranch() {
204
64
  CFStack::StackItem Top = BranchStack.back();
205
64
  if (Top == CFStack::ENTRY)
206
0
    CurrentEntries--;
207
64
  else
208
64
    CurrentSubEntries-= getSubEntrySize(Top);
209
64
  BranchStack.pop_back();
210
64
}
211
212
20
void CFStack::popLoop() {
213
20
  CurrentEntries--;
214
20
  LoopStack.pop_back();
215
20
}
216
217
class R600ControlFlowFinalizer : public MachineFunctionPass {
218
private:
219
  using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>;
220
221
  enum ControlFlowInstruction {
222
    CF_TC,
223
    CF_VC,
224
    CF_CALL_FS,
225
    CF_WHILE_LOOP,
226
    CF_END_LOOP,
227
    CF_LOOP_BREAK,
228
    CF_LOOP_CONTINUE,
229
    CF_JUMP,
230
    CF_ELSE,
231
    CF_POP,
232
    CF_END
233
  };
234
235
  const R600InstrInfo *TII = nullptr;
236
  const R600RegisterInfo *TRI = nullptr;
237
  unsigned MaxFetchInst;
238
  const R600Subtarget *ST = nullptr;
239
240
33.5k
  bool IsTrivialInst(MachineInstr &MI) const {
241
33.5k
    switch (MI.getOpcode()) {
242
33.5k
    case R600::KILL:
243
287
    case R600::RETURN:
244
287
      return true;
245
33.3k
    default:
246
33.3k
      return false;
247
33.5k
    }
248
33.5k
  }
249
250
3.48k
  const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
251
3.48k
    unsigned Opcode = 0;
252
3.48k
    bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
253
3.48k
    switch (CFI) {
254
3.48k
    case CF_TC:
255
1.30k
      Opcode = isEg ? 
R600::CF_TC_EG1.28k
:
R600::CF_TC_R60013
;
256
1.30k
      break;
257
3.48k
    case CF_VC:
258
0
      Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600;
259
0
      break;
260
3.48k
    case CF_CALL_FS:
261
15
      Opcode = isEg ? 
R600::CF_CALL_FS_EG12
:
R600::CF_CALL_FS_R6003
;
262
15
      break;
263
3.48k
    case CF_WHILE_LOOP:
264
20
      Opcode = isEg ? R600::WHILE_LOOP_EG : 
R600::WHILE_LOOP_R6000
;
265
20
      break;
266
3.48k
    case CF_END_LOOP:
267
20
      Opcode = isEg ? R600::END_LOOP_EG : 
R600::END_LOOP_R6000
;
268
20
      break;
269
3.48k
    case CF_LOOP_BREAK:
270
20
      Opcode = isEg ? R600::LOOP_BREAK_EG : 
R600::LOOP_BREAK_R6000
;
271
20
      break;
272
3.48k
    case CF_LOOP_CONTINUE:
273
0
      Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600;
274
0
      break;
275
3.48k
    case CF_JUMP:
276
64
      Opcode = isEg ? R600::CF_JUMP_EG : 
R600::CF_JUMP_R6000
;
277
64
      break;
278
3.48k
    case CF_ELSE:
279
3
      Opcode = isEg ? R600::CF_ELSE_EG : 
R600::CF_ELSE_R6000
;
280
3
      break;
281
3.48k
    case CF_POP:
282
40
      Opcode = isEg ? R600::POP_EG : 
R600::POP_R6000
;
283
40
      break;
284
3.48k
    case CF_END:
285
2.00k
      if (ST->hasCaymanISA()) {
286
312
        Opcode = R600::CF_END_CM;
287
312
        break;
288
312
      }
289
1.69k
      Opcode = isEg ? 
R600::CF_END_EG1.66k
:
R600::CF_END_R60028
;
290
1.69k
      break;
291
3.48k
    }
292
3.48k
    assert (Opcode && "No opcode selected");
293
3.48k
    return TII->get(Opcode);
294
3.48k
  }
295
296
  bool isCompatibleWithClause(const MachineInstr &MI,
297
2.00k
                              std::set<unsigned> &DstRegs) const {
298
2.00k
    unsigned DstMI, SrcMI;
299
2.00k
    for (MachineInstr::const_mop_iterator I = MI.operands_begin(),
300
2.00k
                                          E = MI.operands_end();
301
14.4k
         I != E; 
++I12.4k
) {
302
12.4k
      const MachineOperand &MO = *I;
303
12.4k
      if (!MO.isReg())
304
8.39k
        continue;
305
4.01k
      if (MO.isDef()) {
306
2.00k
        unsigned Reg = MO.getReg();
307
2.00k
        if (R600::R600_Reg128RegClass.contains(Reg))
308
826
          DstMI = Reg;
309
1.18k
        else
310
1.18k
          DstMI = TRI->getMatchingSuperReg(Reg,
311
1.18k
              AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
312
1.18k
              &R600::R600_Reg128RegClass);
313
2.00k
      }
314
4.01k
      if (MO.isUse()) {
315
2.00k
        unsigned Reg = MO.getReg();
316
2.00k
        if (R600::R600_Reg128RegClass.contains(Reg))
317
292
          SrcMI = Reg;
318
1.71k
        else
319
1.71k
          SrcMI = TRI->getMatchingSuperReg(Reg,
320
1.71k
              AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
321
1.71k
              &R600::R600_Reg128RegClass);
322
2.00k
      }
323
4.01k
    }
324
2.00k
    if ((DstRegs.find(SrcMI) == DstRegs.end())) {
325
1.99k
      DstRegs.insert(DstMI);
326
1.99k
      return true;
327
1.99k
    } else
328
17
      return false;
329
2.00k
  }
330
331
  ClauseFile
332
  MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
333
1.30k
      const {
334
1.30k
    MachineBasicBlock::iterator ClauseHead = I;
335
1.30k
    std::vector<MachineInstr *> ClauseContent;
336
1.30k
    unsigned AluInstCount = 0;
337
1.30k
    bool IsTex = TII->usesTextureCache(*ClauseHead);
338
1.30k
    std::set<unsigned> DstRegs;
339
3.29k
    for (MachineBasicBlock::iterator E = MBB.end(); I != E; 
++I1.99k
) {
340
3.29k
      if (IsTrivialInst(*I))
341
0
        continue;
342
3.29k
      if (AluInstCount >= MaxFetchInst)
343
0
        break;
344
3.29k
      if ((IsTex && !TII->usesTextureCache(*I)) ||
345
3.29k
          
(2.00k
!IsTex2.00k
&&
!TII->usesVertexCache(*I)0
))
346
1.28k
        break;
347
2.00k
      if (!isCompatibleWithClause(*I, DstRegs))
348
17
        break;
349
1.99k
      AluInstCount ++;
350
1.99k
      ClauseContent.push_back(&*I);
351
1.99k
    }
352
1.30k
    MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
353
1.30k
        getHWInstrDesc(IsTex?CF_TC:
CF_VC0
))
354
1.30k
        .addImm(0) // ADDR
355
1.30k
        .addImm(AluInstCount - 1); // COUNT
356
1.30k
    return ClauseFile(MIb, std::move(ClauseContent));
357
1.30k
  }
358
359
50.5k
  void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const {
360
50.5k
    static const unsigned LiteralRegs[] = {
361
50.5k
      R600::ALU_LITERAL_X,
362
50.5k
      R600::ALU_LITERAL_Y,
363
50.5k
      R600::ALU_LITERAL_Z,
364
50.5k
      R600::ALU_LITERAL_W
365
50.5k
    };
366
50.5k
    const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs =
367
50.5k
        TII->getSrcs(MI);
368
97.8k
    for (const auto &Src:Srcs) {
369
97.8k
      if (Src.first->getReg() != R600::ALU_LITERAL_X)
370
76.7k
        continue;
371
21.1k
      int64_t Imm = Src.second;
372
21.1k
      std::vector<MachineOperand *>::iterator It =
373
21.1k
          llvm::find_if(Lits, [&](MachineOperand *val) {
374
10.4k
            return val->isImm() && (val->getImm() == Imm);
375
10.4k
          });
376
21.1k
377
21.1k
      // Get corresponding Operand
378
21.1k
      MachineOperand &Operand = MI.getOperand(
379
21.1k
          TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal));
380
21.1k
381
21.1k
      if (It != Lits.end()) {
382
1.84k
        // Reuse existing literal reg
383
1.84k
        unsigned Index = It - Lits.begin();
384
1.84k
        Src.first->setReg(LiteralRegs[Index]);
385
19.2k
      } else {
386
19.2k
        // Allocate new literal reg
387
19.2k
        assert(Lits.size() < 4 && "Too many literals in Instruction Group");
388
19.2k
        Src.first->setReg(LiteralRegs[Lits.size()]);
389
19.2k
        Lits.push_back(&Operand);
390
19.2k
      }
391
21.1k
    }
392
50.5k
  }
393
394
  MachineBasicBlock::iterator insertLiterals(
395
      MachineBasicBlock::iterator InsertPos,
396
0
      const std::vector<unsigned> &Literals) const {
397
0
    MachineBasicBlock *MBB = InsertPos->getParent();
398
0
    for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
399
0
      unsigned LiteralPair0 = Literals[i];
400
0
      unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
401
0
      InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
402
0
          TII->get(R600::LITERALS))
403
0
          .addImm(LiteralPair0)
404
0
          .addImm(LiteralPair1);
405
0
    }
406
0
    return InsertPos;
407
0
  }
408
409
  ClauseFile
410
  MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
411
3.82k
      const {
412
3.82k
    MachineInstr &ClauseHead = *I;
413
3.82k
    std::vector<MachineInstr *> ClauseContent;
414
3.82k
    I++;
415
30.5k
    for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
416
30.3k
      if (IsTrivialInst(*I)) {
417
287
        ++I;
418
287
        continue;
419
287
      }
420
30.0k
      if (!I->isBundle() && 
!TII->isALUInstr(I->getOpcode())16.2k
)
421
3.53k
        break;
422
26.4k
      std::vector<MachineOperand *>Literals;
423
26.4k
      if (I->isBundle()) {
424
13.7k
        MachineInstr &DeleteMI = *I;
425
13.7k
        MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
426
51.5k
        while (++BI != E && BI->isBundledWithPred()) {
427
37.8k
          BI->unbundleFromPred();
428
735k
          for (MachineOperand &MO : BI->operands()) {
429
735k
            if (MO.isReg() && 
MO.isInternalRead()151k
)
430
9
              MO.setIsInternalRead(false);
431
735k
          }
432
37.8k
          getLiteral(*BI, Literals);
433
37.8k
          ClauseContent.push_back(&*BI);
434
37.8k
        }
435
13.7k
        I = BI;
436
13.7k
        DeleteMI.eraseFromParent();
437
13.7k
      } else {
438
12.7k
        getLiteral(*I, Literals);
439
12.7k
        ClauseContent.push_back(&*I);
440
12.7k
        I++;
441
12.7k
      }
442
40.3k
      for (unsigned i = 0, e = Literals.size(); i < e; 
i += 213.8k
) {
443
13.8k
        MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(),
444
13.8k
            TII->get(R600::LITERALS));
445
13.8k
        if (Literals[i]->isImm()) {
446
13.8k
            MILit.addImm(Literals[i]->getImm());
447
13.8k
        } else {
448
15
            MILit.addGlobalAddress(Literals[i]->getGlobal(),
449
15
                                   Literals[i]->getOffset());
450
15
        }
451
13.8k
        if (i + 1 < e) {
452
5.41k
          if (Literals[i + 1]->isImm()) {
453
5.41k
            MILit.addImm(Literals[i + 1]->getImm());
454
5.41k
          } else {
455
0
            MILit.addGlobalAddress(Literals[i + 1]->getGlobal(),
456
0
                                   Literals[i + 1]->getOffset());
457
0
          }
458
5.41k
        } else
459
8.43k
          MILit.addImm(0);
460
13.8k
        ClauseContent.push_back(MILit);
461
13.8k
      }
462
26.4k
    }
463
3.82k
    assert(ClauseContent.size() < 128 && "ALU clause is too big");
464
3.82k
    ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1);
465
3.82k
    return ClauseFile(&ClauseHead, std::move(ClauseContent));
466
3.82k
  }
467
468
  void EmitFetchClause(MachineBasicBlock::iterator InsertPos,
469
                       const DebugLoc &DL, ClauseFile &Clause,
470
1.28k
                       unsigned &CfCount) {
471
1.28k
    CounterPropagateAddr(*Clause.first, CfCount);
472
1.28k
    MachineBasicBlock *BB = Clause.first->getParent();
473
1.28k
    BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount);
474
3.23k
    for (unsigned i = 0, e = Clause.second.size(); i < e; 
++i1.95k
) {
475
1.95k
      BB->splice(InsertPos, BB, Clause.second[i]);
476
1.95k
    }
477
1.28k
    CfCount += 2 * Clause.second.size();
478
1.28k
  }
479
480
  void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL,
481
3.43k
                     ClauseFile &Clause, unsigned &CfCount) {
482
3.43k
    Clause.first->getOperand(0).setImm(0);
483
3.43k
    CounterPropagateAddr(*Clause.first, CfCount);
484
3.43k
    MachineBasicBlock *BB = Clause.first->getParent();
485
3.43k
    BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount);
486
52.0k
    for (unsigned i = 0, e = Clause.second.size(); i < e; 
++i48.6k
) {
487
48.6k
      BB->splice(InsertPos, BB, Clause.second[i]);
488
48.6k
    }
489
3.43k
    CfCount += Clause.second.size();
490
3.43k
  }
491
492
4.82k
  void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const {
493
4.82k
    MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm());
494
4.82k
  }
495
  void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,
496
20
                            unsigned Addr) const {
497
40
    for (MachineInstr *MI : MIs) {
498
40
      CounterPropagateAddr(*MI, Addr);
499
40
    }
500
20
  }
501
502
public:
503
  static char ID;
504
505
280
  R600ControlFlowFinalizer() : MachineFunctionPass(ID) {}
506
507
2.29k
  bool runOnMachineFunction(MachineFunction &MF) override {
508
2.29k
    ST = &MF.getSubtarget<R600Subtarget>();
509
2.29k
    MaxFetchInst = ST->getTexVTXClauseSize();
510
2.29k
    TII = ST->getInstrInfo();
511
2.29k
    TRI = ST->getRegisterInfo();
512
2.29k
513
2.29k
    R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
514
2.29k
515
2.29k
    CFStack CFStack(ST, MF.getFunction().getCallingConv());
516
4.58k
    for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
517
2.29k
        ++MB) {
518
2.29k
      MachineBasicBlock &MBB = *MB;
519
2.29k
      unsigned CfCount = 0;
520
2.29k
      std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack;
521
2.29k
      std::vector<MachineInstr * > IfThenElseStack;
522
2.29k
      if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) {
523
15
        BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
524
15
            getHWInstrDesc(CF_CALL_FS));
525
15
        CfCount++;
526
15
      }
527
2.29k
      std::vector<ClauseFile> FetchClauses, AluClauses;
528
2.29k
      std::vector<MachineInstr *> LastAlu(1);
529
2.29k
      std::vector<MachineInstr *> ToPopAfter;
530
2.29k
531
2.29k
      for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
532
12.9k
          I != E;) {
533
10.6k
        if (TII->usesTextureCache(*I) || 
TII->usesVertexCache(*I)9.37k
) {
534
1.30k
          LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump(););
535
1.30k
          FetchClauses.push_back(MakeFetchClause(MBB, I));
536
1.30k
          CfCount++;
537
1.30k
          LastAlu.back() = nullptr;
538
1.30k
          continue;
539
1.30k
        }
540
9.37k
541
9.37k
        MachineBasicBlock::iterator MI = I;
542
9.37k
        if (MI->getOpcode() != R600::ENDIF)
543
9.31k
          LastAlu.back() = nullptr;
544
9.37k
        if (MI->getOpcode() == R600::CF_ALU)
545
3.76k
          LastAlu.back() = &*MI;
546
9.37k
        I++;
547
9.37k
        bool RequiresWorkAround =
548
9.37k
            CFStack.requiresWorkAroundForInst(MI->getOpcode());
549
9.37k
        switch (MI->getOpcode()) {
550
9.37k
        case R600::CF_ALU_PUSH_BEFORE:
551
64
          if (RequiresWorkAround) {
552
1
            LLVM_DEBUG(dbgs()
553
1
                       << "Applying bug work-around for ALU_PUSH_BEFORE\n");
554
1
            BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG))
555
1
                .addImm(CfCount + 1)
556
1
                .addImm(1);
557
1
            MI->setDesc(TII->get(R600::CF_ALU));
558
1
            CfCount++;
559
1
            CFStack.pushBranch(R600::CF_PUSH_EG);
560
1
          } else
561
63
            CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);
562
64
          LLVM_FALLTHROUGH;
563
3.82k
        case R600::CF_ALU:
564
3.82k
          I = MI;
565
3.82k
          AluClauses.push_back(MakeALUClause(MBB, I));
566
3.82k
          LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
567
3.82k
          CfCount++;
568
3.82k
          break;
569
64
        case R600::WHILELOOP: {
570
20
          CFStack.pushLoop();
571
20
          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
572
20
              getHWInstrDesc(CF_WHILE_LOOP))
573
20
              .addImm(1);
574
20
          std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount,
575
20
              std::set<MachineInstr *>());
576
20
          Pair.second.insert(MIb);
577
20
          LoopStack.push_back(std::move(Pair));
578
20
          MI->eraseFromParent();
579
20
          CfCount++;
580
20
          break;
581
64
        }
582
64
        case R600::ENDLOOP: {
583
20
          CFStack.popLoop();
584
20
          std::pair<unsigned, std::set<MachineInstr *>> Pair =
585
20
              std::move(LoopStack.back());
586
20
          LoopStack.pop_back();
587
20
          CounterPropagateAddr(Pair.second, CfCount);
588
20
          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
589
20
              .addImm(Pair.first + 1);
590
20
          MI->eraseFromParent();
591
20
          CfCount++;
592
20
          break;
593
64
        }
594
64
        case R600::IF_PREDICATE_SET: {
595
64
          LastAlu.push_back(nullptr);
596
64
          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
597
64
              getHWInstrDesc(CF_JUMP))
598
64
              .addImm(0)
599
64
              .addImm(0);
600
64
          IfThenElseStack.push_back(MIb);
601
64
          LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
602
64
          MI->eraseFromParent();
603
64
          CfCount++;
604
64
          break;
605
64
        }
606
64
        case R600::ELSE: {
607
3
          MachineInstr * JumpInst = IfThenElseStack.back();
608
3
          IfThenElseStack.pop_back();
609
3
          CounterPropagateAddr(*JumpInst, CfCount);
610
3
          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
611
3
              getHWInstrDesc(CF_ELSE))
612
3
              .addImm(0)
613
3
              .addImm(0);
614
3
          LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
615
3
          IfThenElseStack.push_back(MIb);
616
3
          MI->eraseFromParent();
617
3
          CfCount++;
618
3
          break;
619
64
        }
620
64
        case R600::ENDIF: {
621
64
          CFStack.popBranch();
622
64
          if (LastAlu.back()) {
623
24
            ToPopAfter.push_back(LastAlu.back());
624
40
          } else {
625
40
            MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
626
40
                getHWInstrDesc(CF_POP))
627
40
                .addImm(CfCount + 1)
628
40
                .addImm(1);
629
40
            (void)MIb;
630
40
            LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
631
40
            CfCount++;
632
40
          }
633
64
634
64
          MachineInstr *IfOrElseInst = IfThenElseStack.back();
635
64
          IfThenElseStack.pop_back();
636
64
          CounterPropagateAddr(*IfOrElseInst, CfCount);
637
64
          IfOrElseInst->getOperand(1).setImm(1);
638
64
          LastAlu.pop_back();
639
64
          MI->eraseFromParent();
640
64
          break;
641
64
        }
642
64
        case R600::BREAK: {
643
20
          CfCount ++;
644
20
          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
645
20
              getHWInstrDesc(CF_LOOP_BREAK))
646
20
              .addImm(0);
647
20
          LoopStack.back().second.insert(MIb);
648
20
          MI->eraseFromParent();
649
20
          break;
650
64
        }
651
64
        case R600::CONTINUE: {
652
0
          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
653
0
              getHWInstrDesc(CF_LOOP_CONTINUE))
654
0
              .addImm(0);
655
0
          LoopStack.back().second.insert(MIb);
656
0
          MI->eraseFromParent();
657
0
          CfCount++;
658
0
          break;
659
64
        }
660
2.00k
        case R600::RETURN: {
661
2.00k
          DebugLoc DL = MBB.findDebugLoc(MI);
662
2.00k
          BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END));
663
2.00k
          CfCount++;
664
2.00k
          if (CfCount % 2) {
665
1.65k
            BuildMI(MBB, I, DL, TII->get(R600::PAD));
666
1.65k
            CfCount++;
667
1.65k
          }
668
2.00k
          MI->eraseFromParent();
669
3.28k
          for (unsigned i = 0, e = FetchClauses.size(); i < e; 
i++1.28k
)
670
1.28k
            EmitFetchClause(I, DL, FetchClauses[i], CfCount);
671
5.44k
          for (unsigned i = 0, e = AluClauses.size(); i < e; 
i++3.43k
)
672
3.43k
            EmitALUClause(I, DL, AluClauses[i], CfCount);
673
2.00k
          break;
674
64
        }
675
3.35k
        default:
676
3.35k
          if (TII->isExport(MI->getOpcode())) {
677
3.33k
            LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
678
3.33k
            CfCount++;
679
3.33k
          }
680
3.35k
          break;
681
9.37k
        }
682
9.37k
      }
683
2.31k
      
for (unsigned i = 0, e = ToPopAfter.size(); 2.29k
i < e;
++i24
) {
684
24
        MachineInstr *Alu = ToPopAfter[i];
685
24
        BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
686
24
            TII->get(R600::CF_ALU_POP_AFTER))
687
24
            .addImm(Alu->getOperand(0).getImm())
688
24
            .addImm(Alu->getOperand(1).getImm())
689
24
            .addImm(Alu->getOperand(2).getImm())
690
24
            .addImm(Alu->getOperand(3).getImm())
691
24
            .addImm(Alu->getOperand(4).getImm())
692
24
            .addImm(Alu->getOperand(5).getImm())
693
24
            .addImm(Alu->getOperand(6).getImm())
694
24
            .addImm(Alu->getOperand(7).getImm())
695
24
            .addImm(Alu->getOperand(8).getImm());
696
24
        Alu->eraseFromParent();
697
24
      }
698
2.29k
      MFI->CFStackSize = CFStack.MaxStackSize;
699
2.29k
    }
700
2.29k
701
2.29k
    return false;
702
2.29k
  }
703
704
2.57k
  StringRef getPassName() const override {
705
2.57k
    return "R600 Control Flow Finalizer Pass";
706
2.57k
  }
707
};
708
709
} // end anonymous namespace
710
711
101k
INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE,
712
101k
                     "R600 Control Flow Finalizer", false, false)
713
101k
INITIALIZE_PASS_END(R600ControlFlowFinalizer, DEBUG_TYPE,
714
                    "R600 Control Flow Finalizer", false, false)
715
716
char R600ControlFlowFinalizer::ID = 0;
717
718
char &llvm::R600ControlFlowFinalizerID = R600ControlFlowFinalizer::ID;
719
720
280
FunctionPass *llvm::createR600ControlFlowFinalizer() {
721
280
  return new R600ControlFlowFinalizer();
722
280
}