Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- AMDILCFGStructurizer.cpp - CFG Structurizer ------------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//==-----------------------------------------------------------------------===//
9
10
#include "AMDGPU.h"
11
#include "AMDGPUSubtarget.h"
12
#include "R600InstrInfo.h"
13
#include "R600RegisterInfo.h"
14
#include "llvm/ADT/DepthFirstIterator.h"
15
#include "llvm/ADT/SCCIterator.h"
16
#include "llvm/ADT/SmallPtrSet.h"
17
#include "llvm/ADT/SmallVector.h"
18
#include "llvm/ADT/Statistic.h"
19
#include "llvm/ADT/StringRef.h"
20
#include "llvm/CodeGen/MachineBasicBlock.h"
21
#include "llvm/CodeGen/MachineDominators.h"
22
#include "llvm/CodeGen/MachineFunction.h"
23
#include "llvm/CodeGen/MachineFunctionPass.h"
24
#include "llvm/CodeGen/MachineInstr.h"
25
#include "llvm/CodeGen/MachineInstrBuilder.h"
26
#include "llvm/CodeGen/MachineJumpTableInfo.h"
27
#include "llvm/CodeGen/MachineLoopInfo.h"
28
#include "llvm/CodeGen/MachineOperand.h"
29
#include "llvm/CodeGen/MachinePostDominators.h"
30
#include "llvm/CodeGen/MachineRegisterInfo.h"
31
#include "llvm/CodeGen/MachineValueType.h"
32
#include "llvm/IR/DebugLoc.h"
33
#include "llvm/IR/LLVMContext.h"
34
#include "llvm/Pass.h"
35
#include "llvm/Support/Debug.h"
36
#include "llvm/Support/ErrorHandling.h"
37
#include "llvm/Support/raw_ostream.h"
38
#include <cassert>
39
#include <cstddef>
40
#include <deque>
41
#include <iterator>
42
#include <map>
43
#include <utility>
44
#include <vector>
45
46
using namespace llvm;
47
48
#define DEBUG_TYPE "structcfg"
49
50
#define DEFAULT_VEC_SLOTS 8
51
52
// TODO: move-begin.
53
54
//===----------------------------------------------------------------------===//
55
//
56
// Statistics for CFGStructurizer.
57
//
58
//===----------------------------------------------------------------------===//
59
60
STATISTIC(numSerialPatternMatch,    "CFGStructurizer number of serial pattern "
61
    "matched");
62
STATISTIC(numIfPatternMatch,        "CFGStructurizer number of if pattern "
63
    "matched");
64
STATISTIC(numClonedBlock,           "CFGStructurizer cloned blocks");
65
STATISTIC(numClonedInstr,           "CFGStructurizer cloned instructions");
66
67
namespace llvm {
68
69
void initializeAMDGPUCFGStructurizerPass(PassRegistry &);
70
71
} // end namespace llvm
72
73
namespace {
74
75
//===----------------------------------------------------------------------===//
76
//
77
// Miscellaneous utility for CFGStructurizer.
78
//
79
//===----------------------------------------------------------------------===//
80
81
#define SHOWNEWINSTR(i) \
82
190
  DEBUG(dbgs() << "New instr: " << *i << "\n");
83
84
5
#define SHOWNEWBLK(b, msg) \
85
5
DEBUG( \
86
5
  dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
87
5
  dbgs() << "\n"; \
88
5
);
89
90
#define SHOWBLK_DETAIL(b, msg) \
91
DEBUG( \
92
  if (b) { \
93
  dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
94
  b->print(dbgs()); \
95
  dbgs() << "\n"; \
96
  } \
97
);
98
99
2.17k
#define INVALIDSCCNUM -1
100
101
//===----------------------------------------------------------------------===//
102
//
103
// supporting data structure for CFGStructurizer
104
//
105
//===----------------------------------------------------------------------===//
106
107
class BlockInformation {
108
public:
109
  bool IsRetired = false;
110
  int SccNum = INVALIDSCCNUM;
111
112
2.18k
  BlockInformation() = default;
113
};
114
115
//===----------------------------------------------------------------------===//
116
//
117
// CFGStructurizer
118
//
119
//===----------------------------------------------------------------------===//
120
121
class AMDGPUCFGStructurizer : public MachineFunctionPass {
122
public:
123
  using MBBVector = SmallVector<MachineBasicBlock *, 32>;
124
  using MBBInfoMap = std::map<MachineBasicBlock *, BlockInformation *>;
125
  using LoopLandInfoMap = std::map<MachineLoop *, MachineBasicBlock *>;
126
127
  enum PathToKind {
128
    Not_SinglePath = 0,
129
    SinglePath_InPath = 1,
130
    SinglePath_NotInPath = 2
131
  };
132
133
  static char ID;
134
135
244
  AMDGPUCFGStructurizer() : MachineFunctionPass(ID) {
136
244
    initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry());
137
244
  }
138
139
244
  StringRef getPassName() const override {
140
244
    return "AMDGPU Control Flow Graph structurizer Pass";
141
244
  }
142
143
244
  void getAnalysisUsage(AnalysisUsage &AU) const override {
144
244
    AU.addRequired<MachineDominatorTree>();
145
244
    AU.addRequired<MachinePostDominatorTree>();
146
244
    AU.addRequired<MachineLoopInfo>();
147
244
    MachineFunctionPass::getAnalysisUsage(AU);
148
244
  }
149
150
  /// Perform the CFG structurization
151
  bool run();
152
153
  /// Perform the CFG preparation
154
  /// This step will remove every unconditionnal/dead jump instructions and make
155
  /// sure all loops have an exit block
156
  bool prepare();
157
158
2.05k
  bool runOnMachineFunction(MachineFunction &MF) override {
159
2.05k
    TII = MF.getSubtarget<R600Subtarget>().getInstrInfo();
160
2.05k
    TRI = &TII->getRegisterInfo();
161
2.05k
    DEBUG(MF.dump(););
162
2.05k
    OrderedBlks.clear();
163
2.05k
    Visited.clear();
164
2.05k
    FuncRep = &MF;
165
2.05k
    MLI = &getAnalysis<MachineLoopInfo>();
166
2.05k
    DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
167
2.05k
    MDT = &getAnalysis<MachineDominatorTree>();
168
2.05k
    DEBUG(MDT->print(dbgs(), (const Module*)nullptr););
169
2.05k
    PDT = &getAnalysis<MachinePostDominatorTree>();
170
2.05k
    DEBUG(PDT->print(dbgs()););
171
2.05k
    prepare();
172
2.05k
    run();
173
2.05k
    DEBUG(MF.dump(););
174
2.05k
    return true;
175
2.05k
  }
176
177
protected:
178
  MachineDominatorTree *MDT;
179
  MachinePostDominatorTree *PDT;
180
  MachineLoopInfo *MLI;
181
  const R600InstrInfo *TII = nullptr;
182
  const R600RegisterInfo *TRI = nullptr;
183
184
  // PRINT FUNCTIONS
185
  /// Print the ordered Blocks.
186
0
  void printOrderedBlocks() const {
187
0
    size_t i = 0;
188
0
    for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(),
189
0
        iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) {
190
0
      dbgs() << "BB" << (*iterBlk)->getNumber();
191
0
      dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
192
0
      if (i != 0 && i % 10 == 0) {
193
0
        dbgs() << "\n";
194
0
      } else {
195
0
        dbgs() << " ";
196
0
      }
197
0
    }
198
0
  }
199
200
0
  static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
201
0
    for (MachineLoop::iterator iter = LoopInfo.begin(),
202
0
         iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) {
203
0
      (*iter)->print(dbgs(), 0);
204
0
    }
205
0
  }
206
207
  // UTILITY FUNCTIONS
208
  int getSCCNum(MachineBasicBlock *MBB) const;
209
  MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const;
210
  bool hasBackEdge(MachineBasicBlock *MBB) const;
211
  bool isRetiredBlock(MachineBasicBlock *MBB) const;
212
  bool isActiveLoophead(MachineBasicBlock *MBB) const;
213
  PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
214
      bool AllowSideEntry = true) const;
215
  int countActiveBlock(MBBVector::const_iterator It,
216
      MBBVector::const_iterator E) const;
217
  bool needMigrateBlock(MachineBasicBlock *MBB) const;
218
219
  // Utility Functions
220
  void reversePredicateSetter(MachineBasicBlock::iterator I,
221
                              MachineBasicBlock &MBB);
222
  /// Compute the reversed DFS post order of Blocks
223
  void orderBlocks(MachineFunction *MF);
224
225
  // Function originally from CFGStructTraits
226
  void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode,
227
                      const DebugLoc &DL = DebugLoc());
228
  MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode,
229
                                  const DebugLoc &DL = DebugLoc());
230
  MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode);
231
  void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode,
232
                              const DebugLoc &DL);
233
  void insertCondBranchBefore(MachineBasicBlock *MBB,
234
                              MachineBasicBlock::iterator I, int NewOpcode,
235
                              int RegNum, const DebugLoc &DL);
236
237
  static int getBranchNzeroOpcode(int OldOpcode);
238
  static int getBranchZeroOpcode(int OldOpcode);
239
  static int getContinueNzeroOpcode(int OldOpcode);
240
  static int getContinueZeroOpcode(int OldOpcode);
241
  static MachineBasicBlock *getTrueBranch(MachineInstr *MI);
242
  static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB);
243
  static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB,
244
      MachineInstr *MI);
245
  static bool isCondBranch(MachineInstr *MI);
246
  static bool isUncondBranch(MachineInstr *MI);
247
  static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB);
248
  static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB);
249
250
  /// The correct naming for this is getPossibleLoopendBlockBranchInstr.
251
  ///
252
  /// BB with backward-edge could have move instructions after the branch
253
  /// instruction.  Such move instruction "belong to" the loop backward-edge.
254
  MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB);
255
256
  static MachineInstr *getReturnInstr(MachineBasicBlock *MBB);
257
  static bool isReturnBlock(MachineBasicBlock *MBB);
258
  static void cloneSuccessorList(MachineBasicBlock *DstMBB,
259
      MachineBasicBlock *SrcMBB);
260
  static MachineBasicBlock *clone(MachineBasicBlock *MBB);
261
262
  /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose
263
  /// because the AMDGPU instruction is not recognized as terminator fix this
264
  /// and retire this routine
265
  void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB,
266
      MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
267
268
  static void wrapup(MachineBasicBlock *MBB);
269
270
  int patternMatch(MachineBasicBlock *MBB);
271
  int patternMatchGroup(MachineBasicBlock *MBB);
272
  int serialPatternMatch(MachineBasicBlock *MBB);
273
  int ifPatternMatch(MachineBasicBlock *MBB);
274
  int loopendPatternMatch();
275
  int mergeLoop(MachineLoop *LoopRep);
276
277
  /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in
278
  /// the same loop with LoopLandInfo without explicitly keeping track of
279
  /// loopContBlks and loopBreakBlks, this is a method to get the information.
280
  bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
281
      MachineBasicBlock *Src2MBB);
282
  int handleJumpintoIf(MachineBasicBlock *HeadMBB,
283
      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
284
  int handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
285
      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
286
  int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
287
      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
288
      MachineBasicBlock **LandMBBPtr);
289
  void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
290
      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
291
      MachineBasicBlock *LandMBB, bool Detail = false);
292
  int cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
293
      MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB);
294
  void mergeSerialBlock(MachineBasicBlock *DstMBB,
295
      MachineBasicBlock *SrcMBB);
296
297
  void mergeIfthenelseBlock(MachineInstr *BranchMI,
298
      MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
299
      MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB);
300
  void mergeLooplandBlock(MachineBasicBlock *DstMBB,
301
      MachineBasicBlock *LandMBB);
302
  void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
303
      MachineBasicBlock *LandMBB);
304
  void settleLoopcontBlock(MachineBasicBlock *ContingMBB,
305
      MachineBasicBlock *ContMBB);
306
307
  /// normalizeInfiniteLoopExit change
308
  ///   B1:
309
  ///        uncond_br LoopHeader
310
  ///
311
  /// to
312
  ///   B1:
313
  ///        cond_br 1 LoopHeader dummyExit
314
  /// and return the newly added dummy exit block
315
  MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep);
316
  void removeUnconditionalBranch(MachineBasicBlock *MBB);
317
318
  /// Remove duplicate branches instructions in a block.
319
  /// For instance
320
  /// B0:
321
  ///    cond_br X B1 B2
322
  ///    cond_br X B1 B2
323
  /// is transformed to
324
  /// B0:
325
  ///    cond_br X B1 B2
326
  void removeRedundantConditionalBranch(MachineBasicBlock *MBB);
327
328
  void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB);
329
  void removeSuccessor(MachineBasicBlock *MBB);
330
  MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
331
      MachineBasicBlock *PredMBB);
332
  void migrateInstruction(MachineBasicBlock *SrcMBB,
333
      MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I);
334
  void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
335
  void retireBlock(MachineBasicBlock *MBB);
336
337
private:
338
  MBBInfoMap BlockInfoMap;
339
  LoopLandInfoMap LLInfoMap;
340
  std::map<MachineLoop *, bool> Visited;
341
  MachineFunction *FuncRep;
342
  SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks;
343
};
344
345
} // end anonymous namespace
346
347
char AMDGPUCFGStructurizer::ID = 0;
348
349
2.41k
int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
350
2.41k
  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
351
2.41k
  if (It == BlockInfoMap.end())
352
0
    
return 0
INVALIDSCCNUM0
;
353
2.41k
  return (*It).second->SccNum;
354
2.41k
}
355
356
MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
357
0
    const {
358
0
  LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
359
0
  if (It == LLInfoMap.end())
360
0
    return nullptr;
361
0
  return (*It).second;
362
0
}
363
364
46
bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const {
365
46
  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
366
46
  if (!LoopRep)
367
35
    return false;
368
11
  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
369
11
  return MBB->isSuccessor(LoopHeader);
370
11
}
371
372
6.52k
bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const {
373
6.52k
  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
374
6.52k
  if (It == BlockInfoMap.end())
375
0
    return false;
376
6.52k
  return (*It).second->IsRetired;
377
6.52k
}
378
379
74
bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
380
74
  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
381
74
  while (
LoopRep && 74
LoopRep->getHeader() == MBB13
) {
382
0
    MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep);
383
0
    if(!LoopLand)
384
0
      return true;
385
0
    
if (0
!isRetiredBlock(LoopLand)0
)
386
0
      return true;
387
0
    LoopRep = LoopRep->getParentLoop();
388
0
  }
389
74
  return false;
390
74
}
391
392
AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
393
    MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
394
4
    bool AllowSideEntry) const {
395
4
  assert(DstMBB);
396
4
  if (SrcMBB == DstMBB)
397
0
    return SinglePath_InPath;
398
10
  
while (4
SrcMBB && 10
SrcMBB->succ_size() == 110
) {
399
10
    SrcMBB = *SrcMBB->succ_begin();
400
10
    if (SrcMBB == DstMBB)
401
4
      return SinglePath_InPath;
402
6
    
if (6
!AllowSideEntry && 6
SrcMBB->pred_size() > 10
)
403
0
      return Not_SinglePath;
404
10
  }
405
0
  
if (0
SrcMBB && 0
SrcMBB->succ_size()==00
)
406
0
    return SinglePath_NotInPath;
407
0
  return Not_SinglePath;
408
0
}
409
410
int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
411
4.21k
    MBBVector::const_iterator E) const {
412
4.21k
  int Count = 0;
413
8.56k
  while (
It != E8.56k
) {
414
4.35k
    if (!isRetiredBlock(*It))
415
4.33k
      ++Count;
416
4.35k
    ++It;
417
4.35k
  }
418
4.21k
  return Count;
419
4.21k
}
420
421
2
bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
422
2
  unsigned BlockSizeThreshold = 30;
423
2
  unsigned CloneInstrThreshold = 100;
424
2
  bool MultiplePreds = MBB && (MBB->pred_size() > 1);
425
2
426
2
  if(!MultiplePreds)
427
1
    return false;
428
1
  unsigned BlkSize = MBB->size();
429
1
  return ((BlkSize > BlockSizeThreshold) &&
430
1
      (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold));
431
2
}
432
433
void AMDGPUCFGStructurizer::reversePredicateSetter(
434
52
    MachineBasicBlock::iterator I, MachineBasicBlock &MBB) {
435
52
  assert(I.isValid() && "Expected valid iterator");
436
100
  for (;; 
--I100
) {
437
152
    if (I == MBB.end())
438
36
      continue;
439
116
    
if (116
I->getOpcode() == AMDGPU::PRED_X116
) {
440
52
      switch (I->getOperand(2).getImm()) {
441
14
      case AMDGPU::PRED_SETE_INT:
442
14
        I->getOperand(2).setImm(AMDGPU::PRED_SETNE_INT);
443
14
        return;
444
38
      case AMDGPU::PRED_SETNE_INT:
445
38
        I->getOperand(2).setImm(AMDGPU::PRED_SETE_INT);
446
38
        return;
447
0
      case AMDGPU::PRED_SETE:
448
0
        I->getOperand(2).setImm(AMDGPU::PRED_SETNE);
449
0
        return;
450
0
      case AMDGPU::PRED_SETNE:
451
0
        I->getOperand(2).setImm(AMDGPU::PRED_SETE);
452
0
        return;
453
0
      default:
454
0
        llvm_unreachable("PRED_X Opcode invalid!");
455
52
      }
456
52
    }
457
152
  }
458
52
}
459
460
void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB,
461
36
                                           int NewOpcode, const DebugLoc &DL) {
462
36
  MachineInstr *MI =
463
36
      MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
464
36
  MBB->push_back(MI);
465
36
  //assume the instruction doesn't take any reg operand ...
466
36
  SHOWNEWINSTR(MI);
467
36
}
468
469
MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
470
                                                       int NewOpcode,
471
16
                                                       const DebugLoc &DL) {
472
16
  MachineInstr *MI =
473
16
      MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
474
16
  if (MBB->begin() != MBB->end())
475
16
    MBB->insert(MBB->begin(), MI);
476
16
  else
477
0
    MBB->push_back(MI);
478
16
  SHOWNEWINSTR(MI);
479
16
  return MI;
480
16
}
481
482
MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(
483
80
    MachineBasicBlock::iterator I, int NewOpcode) {
484
80
  MachineInstr *OldMI = &(*I);
485
80
  MachineBasicBlock *MBB = OldMI->getParent();
486
80
  MachineInstr *NewMBB =
487
80
      MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
488
80
  MBB->insert(I, NewMBB);
489
80
  //assume the instruction doesn't take any reg operand ...
490
80
  SHOWNEWINSTR(NewMBB);
491
80
  return NewMBB;
492
80
}
493
494
void AMDGPUCFGStructurizer::insertCondBranchBefore(
495
42
    MachineBasicBlock::iterator I, int NewOpcode, const DebugLoc &DL) {
496
42
  MachineInstr *OldMI = &(*I);
497
42
  MachineBasicBlock *MBB = OldMI->getParent();
498
42
  MachineFunction *MF = MBB->getParent();
499
42
  MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
500
42
  MBB->insert(I, NewMI);
501
42
  MachineInstrBuilder MIB(*MF, NewMI);
502
42
  MIB.addReg(OldMI->getOperand(1).getReg(), false);
503
42
  SHOWNEWINSTR(NewMI);
504
42
  //erase later oldInstr->eraseFromParent();
505
42
}
506
507
void AMDGPUCFGStructurizer::insertCondBranchBefore(
508
    MachineBasicBlock *blk, MachineBasicBlock::iterator I, int NewOpcode,
509
16
    int RegNum, const DebugLoc &DL) {
510
16
  MachineFunction *MF = blk->getParent();
511
16
  MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
512
16
  //insert before
513
16
  blk->insert(I, NewInstr);
514
16
  MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
515
16
  SHOWNEWINSTR(NewInstr);
516
16
}
517
518
42
int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
519
42
  switch(OldOpcode) {
520
42
  case AMDGPU::JUMP_COND:
521
42
  case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
522
0
  case AMDGPU::BRANCH_COND_i32:
523
0
  case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
524
0
  
default: 0
llvm_unreachable0
("internal error");
525
0
  }
526
0
  return -1;
527
0
}
528
529
0
int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
530
0
  switch(OldOpcode) {
531
0
  case AMDGPU::JUMP_COND:
532
0
  case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
533
0
  case AMDGPU::BRANCH_COND_i32:
534
0
  case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
535
0
  
default: 0
llvm_unreachable0
("internal error");
536
0
  }
537
0
  return -1;
538
0
}
539
540
0
int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
541
0
  switch(OldOpcode) {
542
0
  case AMDGPU::JUMP_COND:
543
0
  case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
544
0
  
default: 0
llvm_unreachable0
("internal error");
545
0
  }
546
0
  return -1;
547
0
}
548
549
0
int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
550
0
  switch(OldOpcode) {
551
0
  case AMDGPU::JUMP_COND:
552
0
  case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
553
0
  
default: 0
llvm_unreachable0
("internal error");
554
0
  }
555
0
  return -1;
556
0
}
557
558
109
MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) {
559
109
  return MI->getOperand(0).getMBB();
560
109
}
561
562
void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI,
563
1
    MachineBasicBlock *MBB) {
564
1
  MI->getOperand(0).setMBB(MBB);
565
1
}
566
567
MachineBasicBlock *
568
AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
569
46
    MachineInstr *MI) {
570
46
  assert(MBB->succ_size() == 2);
571
46
  MachineBasicBlock *TrueBranch = getTrueBranch(MI);
572
46
  MachineBasicBlock::succ_iterator It = MBB->succ_begin();
573
46
  MachineBasicBlock::succ_iterator Next = It;
574
46
  ++Next;
575
46
  return (*It == TrueBranch) ? 
*Next4
:
*It42
;
576
46
}
577
578
2.31k
bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
579
2.31k
  switch (MI->getOpcode()) {
580
122
    case AMDGPU::JUMP_COND:
581
122
    case AMDGPU::BRANCH_COND_i32:
582
122
    case AMDGPU::BRANCH_COND_f32: return true;
583
2.19k
  default:
584
2.19k
    return false;
585
0
  }
586
0
  return false;
587
0
}
588
589
2.25k
bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
590
2.25k
  switch (MI->getOpcode()) {
591
2
  case AMDGPU::JUMP:
592
2
  case AMDGPU::BRANCH:
593
2
    return true;
594
2.25k
  default:
595
2.25k
    return false;
596
0
  }
597
0
  return false;
598
0
}
599
600
16
DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
601
16
  //get DebugLoc from the first MachineBasicBlock instruction with debug info
602
16
  DebugLoc DL;
603
227
  for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
604
211
      
++It211
) {
605
211
    MachineInstr *instr = &(*It);
606
211
    if (instr->getDebugLoc())
607
0
      DL = instr->getDebugLoc();
608
211
  }
609
16
  return DL;
610
16
}
611
612
MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
613
46
    MachineBasicBlock *MBB) {
614
46
  MachineBasicBlock::reverse_iterator It = MBB->rbegin();
615
46
  MachineInstr *MI = &*It;
616
46
  if (
MI && 46
(isCondBranch(MI) || 46
isUncondBranch(MI)0
))
617
46
    return MI;
618
0
  return nullptr;
619
0
}
620
621
MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
622
2.21k
    MachineBasicBlock *MBB) {
623
2.21k
  for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend();
624
2.27k
      
It != E2.27k
;
++It64
) {
625
2.27k
    // FIXME: Simplify
626
2.27k
    MachineInstr *MI = &*It;
627
2.27k
    if (
MI2.27k
) {
628
2.27k
      if (
isCondBranch(MI) || 2.27k
isUncondBranch(MI)2.19k
)
629
76
        return MI;
630
2.19k
      else 
if (2.19k
!TII->isMov(MI->getOpcode())2.19k
)
631
2.13k
        break;
632
2.27k
    }
633
2.27k
  }
634
2.13k
  return nullptr;
635
2.21k
}
636
637
2.18k
MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
638
2.18k
  MachineBasicBlock::reverse_iterator It = MBB->rbegin();
639
2.18k
  if (
It != MBB->rend()2.18k
) {
640
2.18k
    MachineInstr *instr = &(*It);
641
2.18k
    if (instr->getOpcode() == AMDGPU::RETURN)
642
2.06k
      return instr;
643
117
  }
644
117
  return nullptr;
645
117
}
646
647
2.17k
bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
648
2.17k
  MachineInstr *MI = getReturnInstr(MBB);
649
2.17k
  bool IsReturn = (MBB->succ_size() == 0);
650
2.17k
  if (MI)
651
2.17k
    assert(IsReturn);
652
116
  else 
if (116
IsReturn116
)
653
3
    DEBUG(
654
2.17k
      dbgs() << "BB" << MBB->getNumber()
655
2.17k
             <<" is return block without RETURN instr\n";);
656
2.17k
  return  IsReturn;
657
2.17k
}
658
659
void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
660
75
    MachineBasicBlock *SrcMBB) {
661
75
  for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(),
662
95
       iterEnd = SrcMBB->succ_end(); 
It != iterEnd95
;
++It20
)
663
20
    DstMBB->addSuccessor(*It);  // *iter's predecessor is also taken care of
664
75
}
665
666
1
MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
667
1
  MachineFunction *Func = MBB->getParent();
668
1
  MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock();
669
1
  Func->push_back(NewMBB);  //insert to function
670
1
  for (const MachineInstr &It : *MBB)
671
130
    NewMBB->push_back(Func->CloneMachineInstr(&It));
672
1
  return NewMBB;
673
1
}
674
675
void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
676
    MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB,
677
1
    MachineBasicBlock *NewBlk) {
678
1
  MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
679
1
  if (
BranchMI && 1
isCondBranch(BranchMI)1
&&
680
1
      getTrueBranch(BranchMI) == OldMBB)
681
1
    setTrueBranch(BranchMI, NewBlk);
682
1
}
683
684
2.05k
void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
685
2.05k
  assert((!MBB->getParent()->getJumpTableInfo()
686
2.05k
          || MBB->getParent()->getJumpTableInfo()->isEmpty())
687
2.05k
         && "found a jump table");
688
2.05k
689
2.05k
   //collect continue right before endloop
690
2.05k
   SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr;
691
2.05k
   MachineBasicBlock::iterator Pre = MBB->begin();
692
2.05k
   MachineBasicBlock::iterator E = MBB->end();
693
2.05k
   MachineBasicBlock::iterator It = Pre;
694
59.7k
   while (
It != E59.7k
) {
695
57.6k
     if (Pre->getOpcode() == AMDGPU::CONTINUE
696
16
         && It->getOpcode() == AMDGPU::ENDLOOP)
697
16
       ContInstr.push_back(&*Pre);
698
57.6k
     Pre = It;
699
57.6k
     ++It;
700
57.6k
   }
701
2.05k
702
2.05k
   //delete continue right before endloop
703
2.07k
   for (unsigned i = 0; 
i < ContInstr.size()2.07k
;
++i16
)
704
16
      ContInstr[i]->eraseFromParent();
705
2.05k
706
2.05k
   // TODO to fix up jump table so later phase won't be confused.  if
707
2.05k
   // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
708
2.05k
   // there isn't such an interface yet.  alternatively, replace all the other
709
2.05k
   // blocks in the jump table with the entryBlk //}
710
2.05k
}
711
712
2.05k
bool AMDGPUCFGStructurizer::prepare() {
713
2.05k
  bool Changed = false;
714
2.05k
715
2.05k
  //FIXME: if not reducible flow graph, make it so ???
716
2.05k
717
2.05k
  DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
718
2.05k
719
2.05k
  orderBlocks(FuncRep);
720
2.05k
721
2.05k
  SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks;
722
2.05k
723
2.05k
  // Add an ExitBlk to loop that don't have one
724
2.05k
  for (MachineLoopInfo::iterator It = MLI->begin(),
725
2.07k
       E = MLI->end(); 
It != E2.07k
;
++It14
) {
726
14
    MachineLoop *LoopRep = (*It);
727
14
    MBBVector ExitingMBBs;
728
14
    LoopRep->getExitingBlocks(ExitingMBBs);
729
14
730
14
    if (
ExitingMBBs.size() == 014
) {
731
0
      MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep);
732
0
      if (DummyExitBlk)
733
0
        RetBlks.push_back(DummyExitBlk);
734
0
    }
735
14
  }
736
2.05k
737
2.05k
  // Remove unconditional branch instr.
738
2.05k
  // Add dummy exit block iff there are multiple returns.
739
2.05k
  for (SmallVectorImpl<MachineBasicBlock *>::const_iterator
740
4.23k
       It = OrderedBlks.begin(), E = OrderedBlks.end(); 
It != E4.23k
;
++It2.17k
) {
741
2.17k
    MachineBasicBlock *MBB = *It;
742
2.17k
    removeUnconditionalBranch(MBB);
743
2.17k
    removeRedundantConditionalBranch(MBB);
744
2.17k
    if (
isReturnBlock(MBB)2.17k
) {
745
2.06k
      RetBlks.push_back(MBB);
746
2.06k
    }
747
2.17k
    assert(MBB->succ_size() <= 2);
748
2.17k
  }
749
2.05k
750
2.05k
  if (
RetBlks.size() >= 22.05k
) {
751
4
    addDummyExitBlock(RetBlks);
752
4
    Changed = true;
753
4
  }
754
2.05k
755
2.05k
  return Changed;
756
2.05k
}
757
758
2.05k
bool AMDGPUCFGStructurizer::run() {
759
2.05k
  //Assume reducible CFG...
760
2.05k
  DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n");
761
2.05k
762
#ifdef STRESSTEST
763
  //Use the worse block ordering to test the algorithm.
764
  ReverseVector(orderedBlks);
765
#endif
766
767
2.05k
  DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks(););
768
2.05k
  int NumIter = 0;
769
2.05k
  bool Finish = false;
770
2.05k
  MachineBasicBlock *MBB;
771
2.05k
  bool MakeProgress = false;
772
2.05k
  int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
773
2.05k
                                        OrderedBlks.end());
774
2.05k
775
2.05k
  do {
776
2.05k
    ++NumIter;
777
2.05k
    DEBUG(
778
2.05k
      dbgs() << "numIter = " << NumIter
779
2.05k
             << ", numRemaintedBlk = " << NumRemainedBlk << "\n";
780
2.05k
    );
781
2.05k
782
2.05k
    SmallVectorImpl<MachineBasicBlock *>::const_iterator It =
783
2.05k
        OrderedBlks.begin();
784
2.05k
    SmallVectorImpl<MachineBasicBlock *>::const_iterator E =
785
2.05k
        OrderedBlks.end();
786
2.05k
787
2.05k
    SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter =
788
2.05k
        It;
789
2.05k
    MachineBasicBlock *SccBeginMBB = nullptr;
790
2.05k
    int SccNumBlk = 0;  // The number of active blocks, init to a
791
2.05k
                        // maximum possible number.
792
2.05k
    int SccNumIter;     // Number of iteration in this SCC.
793
2.05k
794
4.23k
    while (
It != E4.23k
) {
795
2.17k
      MBB = *It;
796
2.17k
797
2.17k
      if (
!SccBeginMBB2.17k
) {
798
2.15k
        SccBeginIter = It;
799
2.15k
        SccBeginMBB = MBB;
800
2.15k
        SccNumIter = 0;
801
2.15k
        SccNumBlk = NumRemainedBlk; // Init to maximum possible number.
802
2.15k
        DEBUG(
803
2.15k
              dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB);
804
2.15k
              dbgs() << "\n";
805
2.15k
        );
806
2.15k
      }
807
2.17k
808
2.17k
      if (!isRetiredBlock(MBB))
809
2.15k
        patternMatch(MBB);
810
2.17k
811
2.17k
      ++It;
812
2.17k
813
2.17k
      bool ContNextScc = true;
814
2.17k
      if (It == E
815
2.17k
          || 
getSCCNum(SccBeginMBB) != getSCCNum(*It)117
) {
816
2.15k
        // Just finish one scc.
817
2.15k
        ++SccNumIter;
818
2.15k
        int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
819
2.15k
        if (
sccRemainedNumBlk != 1 && 2.15k
sccRemainedNumBlk >= SccNumBlk0
) {
820
0
          DEBUG(
821
0
            dbgs() << "Can't reduce SCC " << getSCCNum(MBB)
822
0
                   << ", sccNumIter = " << SccNumIter;
823
0
            dbgs() << "doesn't make any progress\n";
824
0
          );
825
0
          ContNextScc = true;
826
2.15k
        } else 
if (2.15k
sccRemainedNumBlk != 1 && 2.15k
sccRemainedNumBlk < SccNumBlk0
) {
827
0
          SccNumBlk = sccRemainedNumBlk;
828
0
          It = SccBeginIter;
829
0
          ContNextScc = false;
830
0
          DEBUG(
831
0
            dbgs() << "repeat processing SCC" << getSCCNum(MBB)
832
0
                   << "sccNumIter = " << SccNumIter << '\n';
833
0
          );
834
2.15k
        } else {
835
2.15k
          // Finish the current scc.
836
2.15k
          ContNextScc = true;
837
2.15k
        }
838
2.17k
      } else {
839
20
        // Continue on next component in the current scc.
840
20
        ContNextScc = false;
841
20
      }
842
2.17k
843
2.17k
      if (ContNextScc)
844
2.15k
        SccBeginMBB = nullptr;
845
2.17k
    } //while, "one iteration" over the function.
846
2.05k
847
2.05k
    MachineBasicBlock *EntryMBB =
848
2.05k
        *GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
849
2.05k
    if (
EntryMBB->succ_size() == 02.05k
) {
850
2.05k
      Finish = true;
851
2.05k
      DEBUG(
852
2.05k
        dbgs() << "Reduce to one block\n";
853
2.05k
      );
854
0
    } else {
855
0
      int NewnumRemainedBlk
856
0
        = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
857
0
      // consider cloned blocks ??
858
0
      if (
NewnumRemainedBlk == 1 || 0
NewnumRemainedBlk < NumRemainedBlk0
) {
859
0
        MakeProgress = true;
860
0
        NumRemainedBlk = NewnumRemainedBlk;
861
0
      } else {
862
0
        MakeProgress = false;
863
0
        DEBUG(
864
0
          dbgs() << "No progress\n";
865
0
        );
866
0
      }
867
0
    }
868
2.05k
  } while (
!Finish && 2.05k
MakeProgress0
);
869
2.05k
870
2.05k
  // Misc wrap up to maintain the consistency of the Function representation.
871
2.05k
  wrapup(*GraphTraits<MachineFunction *>::nodes_begin(FuncRep));
872
2.05k
873
2.05k
  // Detach retired Block, release memory.
874
2.05k
  for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
875
4.24k
      
It != E4.24k
;
++It2.18k
) {
876
2.18k
    if (
(*It).second && 2.18k
(*It).second->IsRetired2.18k
) {
877
122
      assert(((*It).first)->getNumber() != -1);
878
122
      DEBUG(
879
122
        dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";
880
122
      );
881
122
      (*It).first->eraseFromParent();  //Remove from the parent Function.
882
122
    }
883
2.18k
    delete (*It).second;
884
2.18k
  }
885
2.05k
  BlockInfoMap.clear();
886
2.05k
  LLInfoMap.clear();
887
2.05k
888
2.05k
  if (
!Finish2.05k
) {
889
0
    DEBUG(FuncRep->viewCFG());
890
0
    report_fatal_error("IRREDUCIBLE_CFG");
891
0
  }
892
2.05k
893
2.05k
  return true;
894
2.05k
}
895
896
2.05k
void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
897
2.05k
  int SccNum = 0;
898
2.05k
  MachineBasicBlock *MBB;
899
4.21k
  for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd();
900
2.15k
       
++It, ++SccNum2.15k
) {
901
2.15k
    const std::vector<MachineBasicBlock *> &SccNext = *It;
902
2.15k
    for (std::vector<MachineBasicBlock *>::const_iterator
903
2.15k
         blockIter = SccNext.begin(), blockEnd = SccNext.end();
904
4.33k
         
blockIter != blockEnd4.33k
;
++blockIter2.17k
) {
905
2.17k
      MBB = *blockIter;
906
2.17k
      OrderedBlks.push_back(MBB);
907
2.17k
      recordSccnum(MBB, SccNum);
908
2.17k
    }
909
2.15k
  }
910
2.05k
911
2.05k
  // walk through all the block in func to check for unreachable
912
2.17k
  for (auto *MBB : nodes(MF)) {
913
2.17k
    SccNum = getSCCNum(MBB);
914
2.17k
    if (
SccNum == 2.17k
INVALIDSCCNUM2.17k
)
915
0
      dbgs() << "unreachable block BB" << MBB->getNumber() << "\n";
916
2.17k
  }
917
2.05k
}
918
919
2.15k
int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
920
2.15k
  int NumMatch = 0;
921
2.15k
  int CurMatch;
922
2.15k
923
2.15k
  DEBUG(
924
2.15k
        dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";
925
2.15k
  );
926
2.15k
927
2.26k
  while ((CurMatch = patternMatchGroup(MBB)) > 0)
928
110
    NumMatch += CurMatch;
929
2.15k
930
2.15k
  DEBUG(
931
2.15k
        dbgs() << "End patternMatch BB" << MBB->getNumber()
932
2.15k
      << ", numMatch = " << NumMatch << "\n";
933
2.15k
  );
934
2.15k
935
2.15k
  return NumMatch;
936
2.15k
}
937
938
2.26k
int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
939
2.26k
  int NumMatch = 0;
940
2.26k
  NumMatch += loopendPatternMatch();
941
2.26k
  NumMatch += serialPatternMatch(MBB);
942
2.26k
  NumMatch += ifPatternMatch(MBB);
943
2.26k
  return NumMatch;
944
2.26k
}
945
946
2.39k
int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
947
2.39k
  if (MBB->succ_size() != 1)
948
2.19k
    return 0;
949
198
950
198
  MachineBasicBlock *childBlk = *MBB->succ_begin();
951
198
  if (
childBlk->pred_size() != 1 || 198
isActiveLoophead(childBlk)74
)
952
124
    return 0;
953
74
954
74
  mergeSerialBlock(MBB, childBlk);
955
74
  ++numSerialPatternMatch;
956
74
  return 1;
957
74
}
958
959
2.38k
int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
960
2.38k
  //two edges
961
2.38k
  if (MBB->succ_size() != 2)
962
2.34k
    return 0;
963
46
  
if (46
hasBackEdge(MBB)46
)
964
0
    return 0;
965
46
  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
966
46
  if (!BranchMI)
967
0
    return 0;
968
46
969
46
  assert(isCondBranch(BranchMI));
970
46
  int NumMatch = 0;
971
46
972
46
  MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
973
46
  NumMatch += serialPatternMatch(TrueMBB);
974
46
  NumMatch += ifPatternMatch(TrueMBB);
975
46
  MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
976
46
  NumMatch += serialPatternMatch(FalseMBB);
977
46
  NumMatch += ifPatternMatch(FalseMBB);
978
46
  MachineBasicBlock *LandBlk;
979
46
  int Cloned = 0;
980
46
981
46
  assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty());
982
46
  // TODO: Simplify
983
46
  if (
TrueMBB->succ_size() == 1 && 46
FalseMBB->succ_size() == 118
984
46
    && 
*TrueMBB->succ_begin() == *FalseMBB->succ_begin()18
) {
985
6
    // Diamond pattern
986
6
    LandBlk = *TrueMBB->succ_begin();
987
46
  } else 
if (40
TrueMBB->succ_size() == 1 && 40
*TrueMBB->succ_begin() == FalseMBB12
) {
988
0
    // Triangle pattern, false is empty
989
0
    LandBlk = FalseMBB;
990
0
    FalseMBB = nullptr;
991
40
  } else 
if (40
FalseMBB->succ_size() == 1
992
40
             && 
*FalseMBB->succ_begin() == TrueMBB40
) {
993
36
    // Triangle pattern, true is empty
994
36
    // We reverse the predicate to make a triangle, empty false pattern;
995
36
    std::swap(TrueMBB, FalseMBB);
996
36
    reversePredicateSetter(MBB->end(), *MBB);
997
36
    LandBlk = FalseMBB;
998
36
    FalseMBB = nullptr;
999
40
  } else 
if (4
FalseMBB->succ_size() == 1
1000
4
             && 
isSameloopDetachedContbreak(TrueMBB, FalseMBB)4
) {
1001
0
    LandBlk = *FalseMBB->succ_begin();
1002
4
  } else 
if (4
TrueMBB->succ_size() == 1
1003
4
    && 
isSameloopDetachedContbreak(FalseMBB, TrueMBB)4
) {
1004
0
    LandBlk = *TrueMBB->succ_begin();
1005
4
  } else {
1006
4
    return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
1007
4
  }
1008
42
1009
42
  // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
1010
42
  // new BB created for landBlk==NULL may introduce new challenge to the
1011
42
  // reduction process.
1012
42
  
if (42
LandBlk &&
1013
42
      
((TrueMBB && 42
TrueMBB->pred_size() > 142
)
1014
42
      || 
(FalseMBB && 41
FalseMBB->pred_size() > 15
))) {
1015
1
     Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
1016
1
  }
1017
42
1018
42
  if (
TrueMBB && 42
TrueMBB->pred_size() > 142
) {
1019
1
    TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
1020
1
    ++Cloned;
1021
1
  }
1022
42
1023
42
  if (
FalseMBB && 42
FalseMBB->pred_size() > 16
) {
1024
0
    FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
1025
0
    ++Cloned;
1026
0
  }
1027
2.38k
1028
2.38k
  mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
1029
2.38k
1030
2.38k
  ++numIfPatternMatch;
1031
2.38k
1032
2.38k
  numClonedBlock += Cloned;
1033
2.38k
1034
2.38k
  return 1 + Cloned + NumMatch;
1035
2.38k
}
1036
1037
2.26k
int AMDGPUCFGStructurizer::loopendPatternMatch() {
1038
2.26k
  std::deque<MachineLoop *> NestedLoops;
1039
2.26k
  for (auto &It: *MLI)
1040
92
    for (MachineLoop *ML : depth_first(It))
1041
104
      NestedLoops.push_front(ML);
1042
2.26k
1043
2.26k
  if (NestedLoops.empty())
1044
2.17k
    return 0;
1045
92
1046
92
  // Process nested loop outside->inside (we did push_front),
1047
92
  // so "continue" to a outside loop won't be mistaken as "break"
1048
92
  // of the current loop.
1049
92
  int Num = 0;
1050
104
  for (MachineLoop *ExaminedLoop : NestedLoops) {
1051
104
    if (
ExaminedLoop->getNumBlocks() == 0 || 104
Visited[ExaminedLoop]26
)
1052
88
      continue;
1053
16
    
DEBUG16
(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
1054
16
    int NumBreak = mergeLoop(ExaminedLoop);
1055
16
    if (NumBreak == -1)
1056
0
      break;
1057
16
    Num += NumBreak;
1058
16
  }
1059
2.26k
  return Num;
1060
2.26k
}
1061
1062
16
int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
1063
16
  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1064
16
  MBBVector ExitingMBBs;
1065
16
  LoopRep->getExitingBlocks(ExitingMBBs);
1066
16
  assert(!ExitingMBBs.empty() && "Infinite Loop not supported");
1067
16
  DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() << " exiting blocks\n";);
1068
16
  // We assume a single ExitBlk
1069
16
  MBBVector ExitBlks;
1070
16
  LoopRep->getExitBlocks(ExitBlks);
1071
16
  SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet;
1072
32
  for (unsigned i = 0, e = ExitBlks.size(); 
i < e32
;
++i16
)
1073
16
    ExitBlkSet.insert(ExitBlks[i]);
1074
16
  assert(ExitBlkSet.size() == 1);
1075
16
  MachineBasicBlock *ExitBlk = *ExitBlks.begin();
1076
16
  assert(ExitBlk && "Loop has several exit block");
1077
16
  MBBVector LatchBlks;
1078
16
  for (auto *LB : inverse_children<MachineBasicBlock*>(LoopHeader))
1079
32
    
if (32
LoopRep->contains(LB)32
)
1080
16
      LatchBlks.push_back(LB);
1081
16
1082
32
  for (unsigned i = 0, e = ExitingMBBs.size(); 
i < e32
;
++i16
)
1083
16
    mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
1084
32
  for (unsigned i = 0, e = LatchBlks.size(); 
i < e32
;
++i16
)
1085
16
    settleLoopcontBlock(LatchBlks[i], LoopHeader);
1086
16
  int Match = 0;
1087
26
  do {
1088
26
    Match = 0;
1089
26
    Match += serialPatternMatch(LoopHeader);
1090
26
    Match += ifPatternMatch(LoopHeader);
1091
26
  } while (Match > 0);
1092
16
  mergeLooplandBlock(LoopHeader, ExitBlk);
1093
16
  MachineLoop *ParentLoop = LoopRep->getParentLoop();
1094
16
  if (ParentLoop)
1095
2
    MLI->changeLoopFor(LoopHeader, ParentLoop);
1096
16
  else
1097
14
    MLI->removeBlock(LoopHeader);
1098
16
  Visited[LoopRep] = true;
1099
16
  return 1;
1100
16
}
1101
1102
bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
1103
8
    MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
1104
8
  if (
Src1MBB->succ_size() == 08
) {
1105
0
    MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
1106
0
    if (
LoopRep&& 0
LoopRep == MLI->getLoopFor(Src2MBB)0
) {
1107
0
      MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
1108
0
      if (
TheEntry0
) {
1109
0
        DEBUG(
1110
0
          dbgs() << "isLoopContBreakBlock yes src1 = BB"
1111
0
                 << Src1MBB->getNumber()
1112
0
                 << " src2 = BB" << Src2MBB->getNumber() << "\n";
1113
0
        );
1114
0
        return true;
1115
0
      }
1116
8
    }
1117
0
  }
1118
8
  return false;
1119
8
}
1120
1121
int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
1122
4
    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1123
4
  int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
1124
4
  if (
Num == 04
) {
1125
0
    DEBUG(
1126
0
      dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
1127
0
    );
1128
0
    Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
1129
0
  }
1130
4
  return Num;
1131
4
}
1132
1133
int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
1134
4
    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1135
4
  int Num = 0;
1136
4
  MachineBasicBlock *DownBlk;
1137
4
1138
4
  //trueBlk could be the common post dominator
1139
4
  DownBlk = TrueMBB;
1140
4
1141
4
  DEBUG(
1142
4
    dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
1143
4
           << " true = BB" << TrueMBB->getNumber()
1144
4
           << ", numSucc=" << TrueMBB->succ_size()
1145
4
           << " false = BB" << FalseMBB->getNumber() << "\n";
1146
4
  );
1147
4
1148
4
  while (
DownBlk4
) {
1149
4
    DEBUG(
1150
4
      dbgs() << "check down = BB" << DownBlk->getNumber();
1151
4
    );
1152
4
1153
4
    if (
singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath4
) {
1154
4
      DEBUG(
1155
4
        dbgs() << " working\n";
1156
4
      );
1157
4
1158
4
      Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
1159
4
      Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
1160
4
1161
4
      numClonedBlock += Num;
1162
4
      Num += serialPatternMatch(*HeadMBB->succ_begin());
1163
4
      Num += serialPatternMatch(*std::next(HeadMBB->succ_begin()));
1164
4
      Num += ifPatternMatch(HeadMBB);
1165
4
      assert(Num > 0);
1166
4
1167
4
      break;
1168
4
    }
1169
0
    
DEBUG0
(
1170
0
      dbgs() << " not working\n";
1171
0
    );
1172
0
    DownBlk = (DownBlk->succ_size() == 1) ? 
(*DownBlk->succ_begin())0
:
nullptr0
;
1173
4
  } // walk down the postDomTree
1174
4
1175
4
  return Num;
1176
4
}
1177
1178
#ifndef NDEBUG
1179
void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
1180
    MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB,
1181
    MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) {
1182
  dbgs() << "head = BB" << HeadMBB->getNumber()
1183
         << " size = " << HeadMBB->size();
1184
  if (Detail) {
1185
    dbgs() << "\n";
1186
    HeadMBB->print(dbgs());
1187
    dbgs() << "\n";
1188
  }
1189
1190
  if (TrueMBB) {
1191
    dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = "
1192
           << TrueMBB->size() << " numPred = " << TrueMBB->pred_size();
1193
    if (Detail) {
1194
      dbgs() << "\n";
1195
      TrueMBB->print(dbgs());
1196
      dbgs() << "\n";
1197
    }
1198
  }
1199
  if (FalseMBB) {
1200
    dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = "
1201
           << FalseMBB->size() << " numPred = " << FalseMBB->pred_size();
1202
    if (Detail) {
1203
      dbgs() << "\n";
1204
      FalseMBB->print(dbgs());
1205
      dbgs() << "\n";
1206
    }
1207
  }
1208
  if (LandMBB) {
1209
    dbgs() << ", land = BB" << LandMBB->getNumber() << " size = "
1210
           << LandMBB->size() << " numPred = " << LandMBB->pred_size();
1211
    if (Detail) {
1212
      dbgs() << "\n";
1213
      LandMBB->print(dbgs());
1214
      dbgs() << "\n";
1215
    }
1216
  }
1217
1218
  dbgs() << "\n";
1219
}
1220
#endif
1221
1222
int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
1223
    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
1224
1
    MachineBasicBlock **LandMBBPtr) {
1225
1
  bool MigrateTrue = false;
1226
1
  bool MigrateFalse = false;
1227
1
1228
1
  MachineBasicBlock *LandBlk = *LandMBBPtr;
1229
1
1230
1
  assert((!TrueMBB || TrueMBB->succ_size() <= 1)
1231
1
         && (!FalseMBB || FalseMBB->succ_size() <= 1));
1232
1
1233
1
  if (TrueMBB == FalseMBB)
1234
0
    return 0;
1235
1
1236
1
  MigrateTrue = needMigrateBlock(TrueMBB);
1237
1
  MigrateFalse = needMigrateBlock(FalseMBB);
1238
1
1239
1
  if (
!MigrateTrue && 1
!MigrateFalse0
)
1240
0
    return 0;
1241
1
1242
1
  // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1243
1
  // have more than one predecessors.  without doing this, its predecessor
1244
1
  // rather than headBlk will have undefined value in initReg.
1245
1
  
if (1
!MigrateTrue && 1
TrueMBB0
&&
TrueMBB->pred_size() > 10
)
1246
0
    MigrateTrue = true;
1247
1
  if (
!MigrateFalse && 1
FalseMBB1
&&
FalseMBB->pred_size() > 11
)
1248
0
    MigrateFalse = true;
1249
1
1250
1
  DEBUG(
1251
1
    dbgs() << "before improveSimpleJumpintoIf: ";
1252
1
    showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
1253
1
  );
1254
1
1255
1
  // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1256
1
  //
1257
1
  // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1258
1
  //      {initReg = 0; org falseBlk branch }
1259
1
  //      => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1260
1
  //      => org landBlk
1261
1
  //      if landBlk->pred_size() > 2, put the about if-else inside
1262
1
  //      if (initReg !=2) {...}
1263
1
  //
1264
1
  // add initReg = initVal to headBlk
1265
1
1266
1
  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1267
1
  if (
!MigrateTrue || 1
!MigrateFalse1
) {
1268
1
    // XXX: We have an opportunity here to optimize the "branch into if" case
1269
1
    // here.  Branch into if looks like this:
1270
1
    //                        entry
1271
1
    //                       /     |
1272
1
    //           diamond_head       branch_from
1273
1
    //             /      \           |
1274
1
    // diamond_false        diamond_true
1275
1
    //             \      /
1276
1
    //               done
1277
1
    //
1278
1
    // The diamond_head block begins the "if" and the diamond_true block
1279
1
    // is the block being "branched into".
1280
1
    //
1281
1
    // If MigrateTrue is true, then TrueBB is the block being "branched into"
1282
1
    // and if MigrateFalse is true, then FalseBB is the block being
1283
1
    // "branched into"
1284
1
    //
1285
1
    // Here is the pseudo code for how I think the optimization should work:
1286
1
    // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
1287
1
    // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
1288
1
    // 3. Move the branch instruction from diamond_head into its own basic
1289
1
    //    block (new_block).
1290
1
    // 4. Add an unconditional branch from diamond_head to new_block
1291
1
    // 5. Replace the branch instruction in branch_from with an unconditional
1292
1
    //    branch to new_block.  If branch_from has multiple predecessors, then
1293
1
    //    we need to replace the True/False block in the branch
1294
1
    //    instruction instead of replacing it.
1295
1
    // 6. Change the condition of the branch instruction in new_block from
1296
1
    //    COND to (COND || GPR0)
1297
1
    //
1298
1
    // In order insert these MOV instruction, we will need to use the
1299
1
    // RegisterScavenger.  Usually liveness stops being tracked during
1300
1
    // the late machine optimization passes, however if we implement
1301
1
    // bool TargetRegisterInfo::requiresRegisterScavenging(
1302
1
    //                                                const MachineFunction &MF)
1303
1
    // and have it return true, liveness will be tracked correctly
1304
1
    // by generic optimization passes.  We will also need to make sure that
1305
1
    // all of our target-specific passes that run after regalloc and before
1306
1
    // the CFGStructurizer track liveness and we will need to modify this pass
1307
1
    // to correctly track liveness.
1308
1
    //
1309
1
    // After the above changes, the new CFG should look like this:
1310
1
    //                        entry
1311
1
    //                       /     |
1312
1
    //           diamond_head       branch_from
1313
1
    //                       \     /
1314
1
    //                      new_block
1315
1
    //                      /      |
1316
1
    //         diamond_false        diamond_true
1317
1
    //                      \      /
1318
1
    //                        done
1319
1
    //
1320
1
    // Without this optimization, we are forced to duplicate the diamond_true
1321
1
    // block and we will end up with a CFG like this:
1322
1
    //
1323
1
    //                        entry
1324
1
    //                       /     |
1325
1
    //           diamond_head       branch_from
1326
1
    //             /      \                   |
1327
1
    // diamond_false        diamond_true      diamond_true (duplicate)
1328
1
    //             \      /                   |
1329
1
    //               done --------------------|
1330
1
    //
1331
1
    // Duplicating diamond_true can be very costly especially if it has a
1332
1
    // lot of instructions.
1333
1
    return 0;
1334
1
  }
1335
0
1336
0
  int NumNewBlk = 0;
1337
0
1338
0
  bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
1339
0
1340
0
  //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
1341
0
  MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF);
1342
0
1343
0
  if (
LandBlkHasOtherPred0
) {
1344
0
    report_fatal_error("Extra register needed to handle CFG");
1345
0
    unsigned CmpResReg =
1346
0
      HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1347
0
    report_fatal_error("Extra compare instruction needed to handle CFG");
1348
0
    insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
1349
0
        CmpResReg, DebugLoc());
1350
0
  }
1351
0
1352
0
  // XXX: We are running this after RA, so creating virtual registers will
1353
0
  // cause an assertion failure in the PostRA scheduling pass.
1354
0
  unsigned InitReg =
1355
0
    HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1356
0
  insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
1357
0
      DebugLoc());
1358
0
1359
0
  if (
MigrateTrue0
) {
1360
0
    migrateInstruction(TrueMBB, LandBlk, I);
1361
0
    // need to uncondionally insert the assignment to ensure a path from its
1362
0
    // predecessor rather than headBlk has valid value in initReg if
1363
0
    // (initVal != 1).
1364
0
    report_fatal_error("Extra register needed to handle CFG");
1365
0
  }
1366
0
  insertInstrBefore(I, AMDGPU::ELSE);
1367
0
1368
0
  if (
MigrateFalse0
) {
1369
0
    migrateInstruction(FalseMBB, LandBlk, I);
1370
0
    // need to uncondionally insert the assignment to ensure a path from its
1371
0
    // predecessor rather than headBlk has valid value in initReg if
1372
0
    // (initVal != 0)
1373
0
    report_fatal_error("Extra register needed to handle CFG");
1374
0
  }
1375
0
1376
0
  
if (0
LandBlkHasOtherPred0
) {
1377
0
    // add endif
1378
0
    insertInstrBefore(I, AMDGPU::ENDIF);
1379
0
1380
0
    // put initReg = 2 to other predecessors of landBlk
1381
0
    for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
1382
0
         PE = LandBlk->pred_end(); 
PI != PE0
;
++PI0
) {
1383
0
      MachineBasicBlock *MBB = *PI;
1384
0
      if (
MBB != TrueMBB && 0
MBB != FalseMBB0
)
1385
0
        report_fatal_error("Extra register needed to handle CFG");
1386
0
    }
1387
0
  }
1388
0
  
DEBUG0
(
1389
0
    dbgs() << "result from improveSimpleJumpintoIf: ";
1390
0
    showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
1391
0
  );
1392
0
1393
0
  // update landBlk
1394
0
  *LandMBBPtr = LandBlk;
1395
0
1396
0
  return NumNewBlk;
1397
1
}
1398
1399
void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
1400
74
    MachineBasicBlock *SrcMBB) {
1401
74
  DEBUG(
1402
74
    dbgs() << "serialPattern BB" << DstMBB->getNumber()
1403
74
           << " <= BB" << SrcMBB->getNumber() << "\n";
1404
74
  );
1405
74
  DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
1406
74
1407
74
  DstMBB->removeSuccessor(SrcMBB, true);
1408
74
  cloneSuccessorList(DstMBB, SrcMBB);
1409
74
1410
74
  removeSuccessor(SrcMBB);
1411
74
  MLI->removeBlock(SrcMBB);
1412
74
  retireBlock(SrcMBB);
1413
74
}
1414
1415
void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
1416
    MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
1417
42
    MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
1418
42
  assert (TrueMBB);
1419
42
  DEBUG(
1420
42
    dbgs() << "ifPattern BB" << MBB->getNumber();
1421
42
    dbgs() << "{  ";
1422
42
    if (TrueMBB) {
1423
42
      dbgs() << "BB" << TrueMBB->getNumber();
1424
42
    }
1425
42
    dbgs() << "  } else ";
1426
42
    dbgs() << "{  ";
1427
42
    if (FalseMBB) {
1428
42
      dbgs() << "BB" << FalseMBB->getNumber();
1429
42
    }
1430
42
    dbgs() << "  }\n ";
1431
42
    dbgs() << "landBlock: ";
1432
42
    if (!LandMBB) {
1433
42
      dbgs() << "NULL";
1434
42
    } else {
1435
42
      dbgs() << "BB" << LandMBB->getNumber();
1436
42
    }
1437
42
    dbgs() << "\n";
1438
42
  );
1439
42
1440
42
  int OldOpcode = BranchMI->getOpcode();
1441
42
  DebugLoc BranchDL = BranchMI->getDebugLoc();
1442
42
1443
42
//    transform to
1444
42
//    if cond
1445
42
//       trueBlk
1446
42
//    else
1447
42
//       falseBlk
1448
42
//    endif
1449
42
//    landBlk
1450
42
1451
42
  MachineBasicBlock::iterator I = BranchMI;
1452
42
  insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode),
1453
42
      BranchDL);
1454
42
1455
42
  if (
TrueMBB42
) {
1456
42
    MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end());
1457
42
    MBB->removeSuccessor(TrueMBB, true);
1458
42
    if (
LandMBB && 42
TrueMBB->succ_size()!=042
)
1459
42
      TrueMBB->removeSuccessor(LandMBB, true);
1460
42
    retireBlock(TrueMBB);
1461
42
    MLI->removeBlock(TrueMBB);
1462
42
  }
1463
42
1464
42
  if (
FalseMBB42
) {
1465
6
    insertInstrBefore(I, AMDGPU::ELSE);
1466
6
    MBB->splice(I, FalseMBB, FalseMBB->begin(),
1467
6
                   FalseMBB->end());
1468
6
    MBB->removeSuccessor(FalseMBB, true);
1469
6
    if (
LandMBB && 6
FalseMBB->succ_size() != 06
)
1470
6
      FalseMBB->removeSuccessor(LandMBB, true);
1471
6
    retireBlock(FalseMBB);
1472
6
    MLI->removeBlock(FalseMBB);
1473
6
  }
1474
42
  insertInstrBefore(I, AMDGPU::ENDIF);
1475
42
1476
42
  BranchMI->eraseFromParent();
1477
42
1478
42
  if (
LandMBB && 42
TrueMBB42
&&
FalseMBB42
)
1479
6
    MBB->addSuccessor(LandMBB);
1480
42
}
1481
1482
void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
1483
16
    MachineBasicBlock *LandMBB) {
1484
16
  DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
1485
16
               << " land = BB" << LandMBB->getNumber() << "\n";);
1486
16
1487
16
  insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc());
1488
16
  insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc());
1489
16
  DstBlk->replaceSuccessor(DstBlk, LandMBB);
1490
16
}
1491
1492
void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
1493
16
    MachineBasicBlock *LandMBB) {
1494
16
  DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber()
1495
16
               << " land = BB" << LandMBB->getNumber() << "\n";);
1496
16
  MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
1497
16
  assert(BranchMI && isCondBranch(BranchMI));
1498
16
  DebugLoc DL = BranchMI->getDebugLoc();
1499
16
  MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI);
1500
16
  MachineBasicBlock::iterator I = BranchMI;
1501
16
  if (TrueBranch != LandMBB)
1502
16
    reversePredicateSetter(I, *I->getParent());
1503
16
  insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL);
1504
16
  insertInstrBefore(I, AMDGPU::BREAK);
1505
16
  insertInstrBefore(I, AMDGPU::ENDIF);
1506
16
  //now branchInst can be erase safely
1507
16
  BranchMI->eraseFromParent();
1508
16
  //now take care of successors, retire blocks
1509
16
  ExitingMBB->removeSuccessor(LandMBB, true);
1510
16
}
1511
1512
void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
1513
16
    MachineBasicBlock *ContMBB) {
1514
16
  DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
1515
16
               << ContingMBB->getNumber()
1516
16
               << ", cont = BB" << ContMBB->getNumber() << "\n";);
1517
16
1518
16
  MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
1519
16
  if (
MI16
) {
1520
0
    assert(isCondBranch(MI));
1521
0
    MachineBasicBlock::iterator I = MI;
1522
0
    MachineBasicBlock *TrueBranch = getTrueBranch(MI);
1523
0
    int OldOpcode = MI->getOpcode();
1524
0
    DebugLoc DL = MI->getDebugLoc();
1525
0
1526
0
    bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
1527
0
1528
0
    if (
!UseContinueLogical0
) {
1529
0
      int BranchOpcode =
1530
0
          TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
1531
0
          getBranchZeroOpcode(OldOpcode);
1532
0
      insertCondBranchBefore(I, BranchOpcode, DL);
1533
0
      // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1534
0
      insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL);
1535
0
      insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL);
1536
0
    } else {
1537
0
      int BranchOpcode =
1538
0
          TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
1539
0
          getContinueZeroOpcode(OldOpcode);
1540
0
      insertCondBranchBefore(I, BranchOpcode, DL);
1541
0
    }
1542
0
1543
0
    MI->eraseFromParent();
1544
16
  } else {
1545
16
    // if we've arrived here then we've already erased the branch instruction
1546
16
    // travel back up the basic block to see the last reference of our debug
1547
16
    // location we've just inserted that reference here so it should be
1548
16
    // representative insertEnd to ensure phi-moves, if exist, go before the
1549
16
    // continue-instr.
1550
16
    insertInstrEnd(ContingMBB, AMDGPU::CONTINUE,
1551
16
        getLastDebugLocInBB(ContingMBB));
1552
16
  }
1553
16
}
1554
1555
int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
1556
8
    MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) {
1557
8
  int Cloned = 0;
1558
8
  assert(PreMBB->isSuccessor(SrcMBB));
1559
18
  while (
SrcMBB && 18
SrcMBB != DstMBB18
) {
1560
10
    assert(SrcMBB->succ_size() == 1);
1561
10
    if (
SrcMBB->pred_size() > 110
) {
1562
0
      SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
1563
0
      ++Cloned;
1564
0
    }
1565
10
1566
10
    PreMBB = SrcMBB;
1567
10
    SrcMBB = *SrcMBB->succ_begin();
1568
10
  }
1569
8
1570
8
  return Cloned;
1571
8
}
1572
1573
MachineBasicBlock *
1574
AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
1575
1
    MachineBasicBlock *PredMBB) {
1576
1
  assert(PredMBB->isSuccessor(MBB) &&
1577
1
         "succBlk is not a prececessor of curBlk");
1578
1
1579
1
  MachineBasicBlock *CloneMBB = clone(MBB);  //clone instructions
1580
1
  replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
1581
1
  //srcBlk, oldBlk, newBlk
1582
1
1583
1
  PredMBB->replaceSuccessor(MBB, CloneMBB);
1584
1
1585
1
  // add all successor to cloneBlk
1586
1
  cloneSuccessorList(CloneMBB, MBB);
1587
1
1588
1
  numClonedInstr += MBB->size();
1589
1
1590
1
  DEBUG(
1591
1
    dbgs() << "Cloned block: " << "BB"
1592
1
           << MBB->getNumber() << "size " << MBB->size() << "\n";
1593
1
  );
1594
1
1595
1
  SHOWNEWBLK(CloneMBB, "result of Cloned block: ");
1596
1
1597
1
  return CloneMBB;
1598
1
}
1599
1600
void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
1601
0
    MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) {
1602
0
  MachineBasicBlock::iterator SpliceEnd;
1603
0
  //look for the input branchinstr, not the AMDGPU branchinstr
1604
0
  MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
1605
0
  if (
!BranchMI0
) {
1606
0
    DEBUG(
1607
0
      dbgs() << "migrateInstruction don't see branch instr\n";
1608
0
    );
1609
0
    SpliceEnd = SrcMBB->end();
1610
0
  } else {
1611
0
    DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI);
1612
0
    SpliceEnd = BranchMI;
1613
0
  }
1614
0
  DEBUG(
1615
0
    dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size()
1616
0
      << "srcSize = " << SrcMBB->size() << "\n";
1617
0
  );
1618
0
1619
0
  //splice insert before insertPos
1620
0
  DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd);
1621
0
1622
0
  DEBUG(
1623
0
    dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size()
1624
0
      << "srcSize = " << SrcMBB->size() << '\n';
1625
0
  );
1626
0
}
1627
1628
MachineBasicBlock *
1629
0
AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
1630
0
  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1631
0
  MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch();
1632
0
1633
0
  if (
!LoopHeader || 0
!LoopLatch0
)
1634
0
    return nullptr;
1635
0
  MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
1636
0
  // Is LoopRep an infinite loop ?
1637
0
  if (
!BranchMI || 0
!isUncondBranch(BranchMI)0
)
1638
0
    return nullptr;
1639
0
1640
0
  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1641
0
  FuncRep->push_back(DummyExitBlk);  //insert to function
1642
0
  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
1643
0
  DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
1644
0
  LLVMContext &Ctx = LoopHeader->getParent()->getFunction()->getContext();
1645
0
  Ctx.emitError("Extra register needed to handle CFG");
1646
0
  return nullptr;
1647
0
}
1648
1649
2.17k
void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
1650
2.17k
  MachineInstr *BranchMI;
1651
2.17k
1652
2.17k
  // I saw two unconditional branch in one basic block in example
1653
2.17k
  // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
1654
2.17k
  while ((BranchMI = getLoopendBlockBranchInstr(MBB))
1655
2.17k
          && 
isUncondBranch(BranchMI)59
) {
1656
1
    DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI);
1657
1
    BranchMI->eraseFromParent();
1658
1
  }
1659
2.17k
}
1660
1661
void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
1662
2.17k
    MachineBasicBlock *MBB) {
1663
2.17k
  if (MBB->succ_size() != 2)
1664
2.11k
    return;
1665
58
  MachineBasicBlock *MBB1 = *MBB->succ_begin();
1666
58
  MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin());
1667
58
  if (MBB1 != MBB2)
1668
58
    return;
1669
0
1670
0
  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
1671
0
  assert(BranchMI && isCondBranch(BranchMI));
1672
0
  DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI);
1673
0
  BranchMI->eraseFromParent();
1674
0
  SHOWNEWBLK(MBB1, "Removing redundant successor");
1675
2.17k
  MBB->removeSuccessor(MBB1, true);
1676
2.17k
}
1677
1678
void AMDGPUCFGStructurizer::addDummyExitBlock(
1679
4
    SmallVectorImpl<MachineBasicBlock*> &RetMBB) {
1680
4
  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1681
4
  FuncRep->push_back(DummyExitBlk);  //insert to function
1682
4
  insertInstrEnd(DummyExitBlk, AMDGPU::RETURN);
1683
4
1684
4
  for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(),
1685
12
       E = RetMBB.end(); 
It != E12
;
++It8
) {
1686
8
    MachineBasicBlock *MBB = *It;
1687
8
    MachineInstr *MI = getReturnInstr(MBB);
1688
8
    if (MI)
1689
7
      MI->eraseFromParent();
1690
8
    MBB->addSuccessor(DummyExitBlk);
1691
8
    DEBUG(
1692
8
      dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
1693
8
             << " successors\n";
1694
8
    );
1695
8
  }
1696
4
  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: ");
1697
4
}
1698
1699
74
void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) {
1700
93
  while (MBB->succ_size())
1701
19
    MBB->removeSuccessor(*MBB->succ_begin());
1702
74
}
1703
1704
void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
1705
2.17k
    int SccNum) {
1706
2.17k
  BlockInformation *&srcBlkInfo = BlockInfoMap[MBB];
1707
2.17k
  if (!srcBlkInfo)
1708
2.17k
    srcBlkInfo = new BlockInformation();
1709
2.17k
  srcBlkInfo->SccNum = SccNum;
1710
2.17k
}
1711
1712
122
void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
1713
122
  DEBUG(
1714
122
        dbgs() << "Retiring BB" << MBB->getNumber() << "\n";
1715
122
  );
1716
122
1717
122
  BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
1718
122
1719
122
  if (!SrcBlkInfo)
1720
5
    SrcBlkInfo = new BlockInformation();
1721
122
1722
122
  SrcBlkInfo->IsRetired = true;
1723
122
  assert(MBB->succ_size() == 0 && MBB->pred_size() == 0
1724
122
         && "can't retire block yet");
1725
122
}
1726
1727
244
INITIALIZE_PASS_BEGIN244
(AMDGPUCFGStructurizer, "amdgpustructurizer",
1728
244
                      "AMDGPU CFG Structurizer", false, false)
1729
244
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
1730
244
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
1731
244
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
1732
244
INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer",
1733
                      "AMDGPU CFG Structurizer", false, false)
1734
1735
244
FunctionPass *llvm::createAMDGPUCFGStructurizerPass() {
1736
244
  return new AMDGPUCFGStructurizer();
1737
244
}