Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- AMDILCFGStructurizer.cpp - CFG Structurizer ------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//==-----------------------------------------------------------------------===//
8
9
#include "AMDGPU.h"
10
#include "AMDGPUSubtarget.h"
11
#include "R600InstrInfo.h"
12
#include "R600RegisterInfo.h"
13
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14
#include "llvm/ADT/DepthFirstIterator.h"
15
#include "llvm/ADT/SCCIterator.h"
16
#include "llvm/ADT/SmallPtrSet.h"
17
#include "llvm/ADT/SmallVector.h"
18
#include "llvm/ADT/Statistic.h"
19
#include "llvm/ADT/StringRef.h"
20
#include "llvm/CodeGen/MachineBasicBlock.h"
21
#include "llvm/CodeGen/MachineDominators.h"
22
#include "llvm/CodeGen/MachineFunction.h"
23
#include "llvm/CodeGen/MachineFunctionPass.h"
24
#include "llvm/CodeGen/MachineInstr.h"
25
#include "llvm/CodeGen/MachineInstrBuilder.h"
26
#include "llvm/CodeGen/MachineJumpTableInfo.h"
27
#include "llvm/CodeGen/MachineLoopInfo.h"
28
#include "llvm/CodeGen/MachineOperand.h"
29
#include "llvm/CodeGen/MachinePostDominators.h"
30
#include "llvm/CodeGen/MachineRegisterInfo.h"
31
#include "llvm/IR/DebugLoc.h"
32
#include "llvm/IR/LLVMContext.h"
33
#include "llvm/Pass.h"
34
#include "llvm/Support/Debug.h"
35
#include "llvm/Support/ErrorHandling.h"
36
#include "llvm/Support/MachineValueType.h"
37
#include "llvm/Support/raw_ostream.h"
38
#include <cassert>
39
#include <cstddef>
40
#include <deque>
41
#include <iterator>
42
#include <map>
43
#include <utility>
44
#include <vector>
45
46
using namespace llvm;
47
48
#define DEBUG_TYPE "structcfg"
49
50
#define DEFAULT_VEC_SLOTS 8
51
52
// TODO: move-begin.
53
54
//===----------------------------------------------------------------------===//
55
//
56
// Statistics for CFGStructurizer.
57
//
58
//===----------------------------------------------------------------------===//
59
60
STATISTIC(numSerialPatternMatch,    "CFGStructurizer number of serial pattern "
61
    "matched");
62
STATISTIC(numIfPatternMatch,        "CFGStructurizer number of if pattern "
63
    "matched");
64
STATISTIC(numClonedBlock,           "CFGStructurizer cloned blocks");
65
STATISTIC(numClonedInstr,           "CFGStructurizer cloned instructions");
66
67
namespace llvm {
68
69
void initializeAMDGPUCFGStructurizerPass(PassRegistry &);
70
71
} // end namespace llvm
72
73
namespace {
74
75
//===----------------------------------------------------------------------===//
76
//
77
// Miscellaneous utility for CFGStructurizer.
78
//
79
//===----------------------------------------------------------------------===//
80
81
212
#define SHOWNEWINSTR(i) LLVM_DEBUG(dbgs() << "New instr: " << *i << "\n");
82
83
#define SHOWNEWBLK(b, msg)                                                     \
84
2
  LLVM_DEBUG(dbgs() << msg << "BB" << b->getNumber() << "size " << b->size();  \
85
2
             dbgs() << "\n";);
86
87
#define SHOWBLK_DETAIL(b, msg)                                                 \
88
  LLVM_DEBUG(if (b) {                                                          \
89
    dbgs() << msg << "BB" << b->getNumber() << "size " << b->size();           \
90
    b->print(dbgs());                                                          \
91
    dbgs() << "\n";                                                            \
92
  });
93
94
2.42k
#define INVALIDSCCNUM -1
95
96
//===----------------------------------------------------------------------===//
97
//
98
// supporting data structure for CFGStructurizer
99
//
100
//===----------------------------------------------------------------------===//
101
102
class BlockInformation {
103
public:
104
  bool IsRetired = false;
105
  int SccNum = INVALIDSCCNUM;
106
107
2.42k
  BlockInformation() = default;
108
};
109
110
//===----------------------------------------------------------------------===//
111
//
112
// CFGStructurizer
113
//
114
//===----------------------------------------------------------------------===//
115
116
class AMDGPUCFGStructurizer : public MachineFunctionPass {
117
public:
118
  using MBBVector = SmallVector<MachineBasicBlock *, 32>;
119
  using MBBInfoMap = std::map<MachineBasicBlock *, BlockInformation *>;
120
  using LoopLandInfoMap = std::map<MachineLoop *, MachineBasicBlock *>;
121
122
  enum PathToKind {
123
    Not_SinglePath = 0,
124
    SinglePath_InPath = 1,
125
    SinglePath_NotInPath = 2
126
  };
127
128
  static char ID;
129
130
280
  AMDGPUCFGStructurizer() : MachineFunctionPass(ID) {
131
280
    initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry());
132
280
  }
133
134
2.57k
  StringRef getPassName() const override {
135
2.57k
    return "AMDGPU Control Flow Graph structurizer Pass";
136
2.57k
  }
137
138
280
  void getAnalysisUsage(AnalysisUsage &AU) const override {
139
280
    AU.addRequired<MachineDominatorTree>();
140
280
    AU.addRequired<MachinePostDominatorTree>();
141
280
    AU.addRequired<MachineLoopInfo>();
142
280
    MachineFunctionPass::getAnalysisUsage(AU);
143
280
  }
144
145
  /// Perform the CFG structurization
146
  bool run();
147
148
  /// Perform the CFG preparation
149
  /// This step will remove every unconditionnal/dead jump instructions and make
150
  /// sure all loops have an exit block
151
  bool prepare();
152
153
2.29k
  bool runOnMachineFunction(MachineFunction &MF) override {
154
2.29k
    TII = MF.getSubtarget<R600Subtarget>().getInstrInfo();
155
2.29k
    TRI = &TII->getRegisterInfo();
156
2.29k
    LLVM_DEBUG(MF.dump(););
157
2.29k
    OrderedBlks.clear();
158
2.29k
    Visited.clear();
159
2.29k
    FuncRep = &MF;
160
2.29k
    MLI = &getAnalysis<MachineLoopInfo>();
161
2.29k
    LLVM_DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
162
2.29k
    MDT = &getAnalysis<MachineDominatorTree>();
163
2.29k
    LLVM_DEBUG(MDT->print(dbgs(), (const Module *)nullptr););
164
2.29k
    PDT = &getAnalysis<MachinePostDominatorTree>();
165
2.29k
    LLVM_DEBUG(PDT->print(dbgs()););
166
2.29k
    prepare();
167
2.29k
    run();
168
2.29k
    LLVM_DEBUG(MF.dump(););
169
2.29k
    return true;
170
2.29k
  }
171
172
protected:
173
  MachineDominatorTree *MDT;
174
  MachinePostDominatorTree *PDT;
175
  MachineLoopInfo *MLI;
176
  const R600InstrInfo *TII = nullptr;
177
  const R600RegisterInfo *TRI = nullptr;
178
179
  // PRINT FUNCTIONS
180
  /// Print the ordered Blocks.
181
0
  void printOrderedBlocks() const {
182
0
    size_t i = 0;
183
0
    for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(),
184
0
        iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) {
185
0
      dbgs() << "BB" << (*iterBlk)->getNumber();
186
0
      dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
187
0
      if (i != 0 && i % 10 == 0) {
188
0
        dbgs() << "\n";
189
0
      } else {
190
0
        dbgs() << " ";
191
0
      }
192
0
    }
193
0
  }
194
195
0
  static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
196
0
    for (MachineLoop::iterator iter = LoopInfo.begin(),
197
0
         iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) {
198
0
      (*iter)->print(dbgs(), 0);
199
0
    }
200
0
  }
201
202
  // UTILITY FUNCTIONS
203
  int getSCCNum(MachineBasicBlock *MBB) const;
204
  MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const;
205
  bool hasBackEdge(MachineBasicBlock *MBB) const;
206
  bool isRetiredBlock(MachineBasicBlock *MBB) const;
207
  bool isActiveLoophead(MachineBasicBlock *MBB) const;
208
  PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
209
      bool AllowSideEntry = true) const;
210
  int countActiveBlock(MBBVector::const_iterator It,
211
      MBBVector::const_iterator E) const;
212
  bool needMigrateBlock(MachineBasicBlock *MBB) const;
213
214
  // Utility Functions
215
  void reversePredicateSetter(MachineBasicBlock::iterator I,
216
                              MachineBasicBlock &MBB);
217
  /// Compute the reversed DFS post order of Blocks
218
  void orderBlocks(MachineFunction *MF);
219
220
  // Function originally from CFGStructTraits
221
  void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode,
222
                      const DebugLoc &DL = DebugLoc());
223
  MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode,
224
                                  const DebugLoc &DL = DebugLoc());
225
  MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode);
226
  void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode,
227
                              const DebugLoc &DL);
228
  void insertCondBranchBefore(MachineBasicBlock *MBB,
229
                              MachineBasicBlock::iterator I, int NewOpcode,
230
                              int RegNum, const DebugLoc &DL);
231
232
  static int getBranchNzeroOpcode(int OldOpcode);
233
  static int getBranchZeroOpcode(int OldOpcode);
234
  static int getContinueNzeroOpcode(int OldOpcode);
235
  static int getContinueZeroOpcode(int OldOpcode);
236
  static MachineBasicBlock *getTrueBranch(MachineInstr *MI);
237
  static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB);
238
  static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB,
239
      MachineInstr *MI);
240
  static bool isCondBranch(MachineInstr *MI);
241
  static bool isUncondBranch(MachineInstr *MI);
242
  static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB);
243
  static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB);
244
245
  /// The correct naming for this is getPossibleLoopendBlockBranchInstr.
246
  ///
247
  /// BB with backward-edge could have move instructions after the branch
248
  /// instruction.  Such move instruction "belong to" the loop backward-edge.
249
  MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB);
250
251
  static MachineInstr *getReturnInstr(MachineBasicBlock *MBB);
252
  static bool isReturnBlock(MachineBasicBlock *MBB);
253
  static void cloneSuccessorList(MachineBasicBlock *DstMBB,
254
      MachineBasicBlock *SrcMBB);
255
  static MachineBasicBlock *clone(MachineBasicBlock *MBB);
256
257
  /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose
258
  /// because the AMDGPU instruction is not recognized as terminator fix this
259
  /// and retire this routine
260
  void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB,
261
      MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
262
263
  static void wrapup(MachineBasicBlock *MBB);
264
265
  int patternMatch(MachineBasicBlock *MBB);
266
  int patternMatchGroup(MachineBasicBlock *MBB);
267
  int serialPatternMatch(MachineBasicBlock *MBB);
268
  int ifPatternMatch(MachineBasicBlock *MBB);
269
  int loopendPatternMatch();
270
  int mergeLoop(MachineLoop *LoopRep);
271
272
  /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in
273
  /// the same loop with LoopLandInfo without explicitly keeping track of
274
  /// loopContBlks and loopBreakBlks, this is a method to get the information.
275
  bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
276
      MachineBasicBlock *Src2MBB);
277
  int handleJumpintoIf(MachineBasicBlock *HeadMBB,
278
      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
279
  int handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
280
      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
281
  int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
282
      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
283
      MachineBasicBlock **LandMBBPtr);
284
  void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
285
      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
286
      MachineBasicBlock *LandMBB, bool Detail = false);
287
  int cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
288
      MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB);
289
  void mergeSerialBlock(MachineBasicBlock *DstMBB,
290
      MachineBasicBlock *SrcMBB);
291
292
  void mergeIfthenelseBlock(MachineInstr *BranchMI,
293
      MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
294
      MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB);
295
  void mergeLooplandBlock(MachineBasicBlock *DstMBB,
296
      MachineBasicBlock *LandMBB);
297
  void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
298
      MachineBasicBlock *LandMBB);
299
  void settleLoopcontBlock(MachineBasicBlock *ContingMBB,
300
      MachineBasicBlock *ContMBB);
301
302
  /// normalizeInfiniteLoopExit change
303
  ///   B1:
304
  ///        uncond_br LoopHeader
305
  ///
306
  /// to
307
  ///   B1:
308
  ///        cond_br 1 LoopHeader dummyExit
309
  /// and return the newly added dummy exit block
310
  MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep);
311
  void removeUnconditionalBranch(MachineBasicBlock *MBB);
312
313
  /// Remove duplicate branches instructions in a block.
314
  /// For instance
315
  /// B0:
316
  ///    cond_br X B1 B2
317
  ///    cond_br X B1 B2
318
  /// is transformed to
319
  /// B0:
320
  ///    cond_br X B1 B2
321
  void removeRedundantConditionalBranch(MachineBasicBlock *MBB);
322
323
  void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB);
324
  void removeSuccessor(MachineBasicBlock *MBB);
325
  MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
326
      MachineBasicBlock *PredMBB);
327
  void migrateInstruction(MachineBasicBlock *SrcMBB,
328
      MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I);
329
  void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
330
  void retireBlock(MachineBasicBlock *MBB);
331
332
private:
333
  MBBInfoMap BlockInfoMap;
334
  LoopLandInfoMap LLInfoMap;
335
  std::map<MachineLoop *, bool> Visited;
336
  MachineFunction *FuncRep;
337
  SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks;
338
};
339
340
} // end anonymous namespace
341
342
char AMDGPUCFGStructurizer::ID = 0;
343
344
2.67k
int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
345
2.67k
  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
346
2.67k
  if (It == BlockInfoMap.end())
347
0
    return INVALIDSCCNUM;
348
2.67k
  return (*It).second->SccNum;
349
2.67k
}
350
351
MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
352
0
    const {
353
0
  LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
354
0
  if (It == LLInfoMap.end())
355
0
    return nullptr;
356
0
  return (*It).second;
357
0
}
358
359
48
bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const {
360
48
  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
361
48
  if (!LoopRep)
362
37
    return false;
363
11
  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
364
11
  return MBB->isSuccessor(LoopHeader);
365
11
}
366
367
7.26k
bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const {
368
7.26k
  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
369
7.26k
  if (It == BlockInfoMap.end())
370
0
    return false;
371
7.26k
  return (*It).second->IsRetired;
372
7.26k
}
373
374
84
bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
375
84
  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
376
84
  while (LoopRep && 
LoopRep->getHeader() == MBB13
) {
377
0
    MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep);
378
0
    if(!LoopLand)
379
0
      return true;
380
0
    if (!isRetiredBlock(LoopLand))
381
0
      return true;
382
0
    LoopRep = LoopRep->getParentLoop();
383
0
  }
384
84
  return false;
385
84
}
386
387
AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
388
    MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
389
4
    bool AllowSideEntry) const {
390
4
  assert(DstMBB);
391
4
  if (SrcMBB == DstMBB)
392
0
    return SinglePath_InPath;
393
10
  
while (4
SrcMBB && SrcMBB->succ_size() == 1) {
394
10
    SrcMBB = *SrcMBB->succ_begin();
395
10
    if (SrcMBB == DstMBB)
396
4
      return SinglePath_InPath;
397
6
    if (!AllowSideEntry && 
SrcMBB->pred_size() > 10
)
398
0
      return Not_SinglePath;
399
6
  }
400
4
  
if (0
SrcMBB0
&&
SrcMBB->succ_size()==00
)
401
0
    return SinglePath_NotInPath;
402
0
  return Not_SinglePath;
403
0
}
404
405
int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
406
4.69k
    MBBVector::const_iterator E) const {
407
4.69k
  int Count = 0;
408
9.53k
  while (It != E) {
409
4.84k
    if (!isRetiredBlock(*It))
410
4.82k
      ++Count;
411
4.84k
    ++It;
412
4.84k
  }
413
4.69k
  return Count;
414
4.69k
}
415
416
2
bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
417
2
  unsigned BlockSizeThreshold = 30;
418
2
  unsigned CloneInstrThreshold = 100;
419
2
  bool MultiplePreds = MBB && (MBB->pred_size() > 1);
420
2
421
2
  if(!MultiplePreds)
422
1
    return false;
423
1
  unsigned BlkSize = MBB->size();
424
1
  return ((BlkSize > BlockSizeThreshold) &&
425
1
      (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold));
426
1
}
427
428
void AMDGPUCFGStructurizer::reversePredicateSetter(
429
57
    MachineBasicBlock::iterator I, MachineBasicBlock &MBB) {
430
57
  assert(I.isValid() && "Expected valid iterator");
431
167
  for (;; 
--I110
) {
432
167
    if (I == MBB.end())
433
41
      continue;
434
126
    if (I->getOpcode() == R600::PRED_X) {
435
57
      switch (I->getOperand(2).getImm()) {
436
57
      case R600::PRED_SETE_INT:
437
15
        I->getOperand(2).setImm(R600::PRED_SETNE_INT);
438
15
        return;
439
57
      case R600::PRED_SETNE_INT:
440
42
        I->getOperand(2).setImm(R600::PRED_SETE_INT);
441
42
        return;
442
57
      case R600::PRED_SETE:
443
0
        I->getOperand(2).setImm(R600::PRED_SETNE);
444
0
        return;
445
57
      case R600::PRED_SETNE:
446
0
        I->getOperand(2).setImm(R600::PRED_SETE);
447
0
        return;
448
57
      default:
449
0
        llvm_unreachable("PRED_X Opcode invalid!");
450
57
      }
451
57
    }
452
126
  }
453
57
}
454
455
void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB,
456
41
                                           int NewOpcode, const DebugLoc &DL) {
457
41
  MachineInstr *MI =
458
41
      MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
459
41
  MBB->push_back(MI);
460
41
  //assume the instruction doesn't take any reg operand ...
461
41
  SHOWNEWINSTR(MI);
462
41
}
463
464
MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
465
                                                       int NewOpcode,
466
20
                                                       const DebugLoc &DL) {
467
20
  MachineInstr *MI =
468
20
      MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
469
20
  if (MBB->begin() != MBB->end())
470
20
    MBB->insert(MBB->begin(), MI);
471
0
  else
472
0
    MBB->push_back(MI);
473
20
  SHOWNEWINSTR(MI);
474
20
  return MI;
475
20
}
476
477
MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(
478
87
    MachineBasicBlock::iterator I, int NewOpcode) {
479
87
  MachineInstr *OldMI = &(*I);
480
87
  MachineBasicBlock *MBB = OldMI->getParent();
481
87
  MachineInstr *NewMBB =
482
87
      MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
483
87
  MBB->insert(I, NewMBB);
484
87
  //assume the instruction doesn't take any reg operand ...
485
87
  SHOWNEWINSTR(NewMBB);
486
87
  return NewMBB;
487
87
}
488
489
void AMDGPUCFGStructurizer::insertCondBranchBefore(
490
44
    MachineBasicBlock::iterator I, int NewOpcode, const DebugLoc &DL) {
491
44
  MachineInstr *OldMI = &(*I);
492
44
  MachineBasicBlock *MBB = OldMI->getParent();
493
44
  MachineFunction *MF = MBB->getParent();
494
44
  MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
495
44
  MBB->insert(I, NewMI);
496
44
  MachineInstrBuilder MIB(*MF, NewMI);
497
44
  MIB.addReg(OldMI->getOperand(1).getReg(), false);
498
44
  SHOWNEWINSTR(NewMI);
499
44
  //erase later oldInstr->eraseFromParent();
500
44
}
501
502
void AMDGPUCFGStructurizer::insertCondBranchBefore(
503
    MachineBasicBlock *blk, MachineBasicBlock::iterator I, int NewOpcode,
504
20
    int RegNum, const DebugLoc &DL) {
505
20
  MachineFunction *MF = blk->getParent();
506
20
  MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
507
20
  //insert before
508
20
  blk->insert(I, NewInstr);
509
20
  MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
510
20
  SHOWNEWINSTR(NewInstr);
511
20
}
512
513
44
int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
514
44
  switch(OldOpcode) {
515
44
  case R600::JUMP_COND:
516
44
  case R600::JUMP: return R600::IF_PREDICATE_SET;
517
44
  case R600::BRANCH_COND_i32:
518
0
  case R600::BRANCH_COND_f32: return R600::IF_LOGICALNZ_f32;
519
0
  default: llvm_unreachable("internal error");
520
0
  }
521
0
  return -1;
522
0
}
523
524
0
int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
525
0
  switch(OldOpcode) {
526
0
  case R600::JUMP_COND:
527
0
  case R600::JUMP: return R600::IF_PREDICATE_SET;
528
0
  case R600::BRANCH_COND_i32:
529
0
  case R600::BRANCH_COND_f32: return R600::IF_LOGICALZ_f32;
530
0
  default: llvm_unreachable("internal error");
531
0
  }
532
0
  return -1;
533
0
}
534
535
0
int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
536
0
  switch(OldOpcode) {
537
0
  case R600::JUMP_COND:
538
0
  case R600::JUMP: return R600::CONTINUE_LOGICALNZ_i32;
539
0
  default: llvm_unreachable("internal error");
540
0
  }
541
0
  return -1;
542
0
}
543
544
0
int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
545
0
  switch(OldOpcode) {
546
0
  case R600::JUMP_COND:
547
0
  case R600::JUMP: return R600::CONTINUE_LOGICALZ_i32;
548
0
  default: llvm_unreachable("internal error");
549
0
  }
550
0
  return -1;
551
0
}
552
553
117
MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) {
554
117
  return MI->getOperand(0).getMBB();
555
117
}
556
557
void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI,
558
1
    MachineBasicBlock *MBB) {
559
1
  MI->getOperand(0).setMBB(MBB);
560
1
}
561
562
MachineBasicBlock *
563
AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
564
48
    MachineInstr *MI) {
565
48
  assert(MBB->succ_size() == 2);
566
48
  MachineBasicBlock *TrueBranch = getTrueBranch(MI);
567
48
  MachineBasicBlock::succ_iterator It = MBB->succ_begin();
568
48
  MachineBasicBlock::succ_iterator Next = It;
569
48
  ++Next;
570
48
  return (*It == TrueBranch) ? 
*Next1
:
*It47
;
571
48
}
572
573
2.58k
bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
574
2.58k
  switch (MI->getOpcode()) {
575
2.58k
    case R600::JUMP_COND:
576
134
    case R600::BRANCH_COND_i32:
577
134
    case R600::BRANCH_COND_f32: return true;
578
2.45k
  default:
579
2.45k
    return false;
580
0
  }
581
0
  return false;
582
0
}
583
584
2.52k
bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
585
2.52k
  switch (MI->getOpcode()) {
586
2.52k
  case R600::JUMP:
587
18
  case R600::BRANCH:
588
18
    return true;
589
2.50k
  default:
590
2.50k
    return false;
591
0
  }
592
0
  return false;
593
0
}
594
595
20
DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
596
20
  //get DebugLoc from the first MachineBasicBlock instruction with debug info
597
20
  DebugLoc DL;
598
293
  for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
599
273
      ++It) {
600
273
    MachineInstr *instr = &(*It);
601
273
    if (instr->getDebugLoc())
602
0
      DL = instr->getDebugLoc();
603
273
  }
604
20
  return DL;
605
20
}
606
607
MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
608
48
    MachineBasicBlock *MBB) {
609
48
  MachineBasicBlock::reverse_iterator It = MBB->rbegin();
610
48
  MachineInstr *MI = &*It;
611
48
  if (MI && (isCondBranch(MI) || 
isUncondBranch(MI)0
))
612
48
    return MI;
613
0
  return nullptr;
614
0
}
615
616
MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
617
2.47k
    MachineBasicBlock *MBB) {
618
2.47k
  for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend();
619
2.53k
      It != E; 
++It69
) {
620
2.53k
    // FIXME: Simplify
621
2.53k
    MachineInstr *MI = &*It;
622
2.53k
    if (MI) {
623
2.53k
      if (isCondBranch(MI) || 
isUncondBranch(MI)2.45k
)
624
94
        return MI;
625
2.44k
      else if (!TII->isMov(MI->getOpcode()))
626
2.37k
        break;
627
2.53k
    }
628
2.53k
  }
629
2.47k
  
return nullptr2.37k
;
630
2.47k
}
631
632
2.42k
MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
633
2.42k
  MachineBasicBlock::reverse_iterator It = MBB->rbegin();
634
2.42k
  if (It != MBB->rend()) {
635
2.41k
    MachineInstr *instr = &(*It);
636
2.41k
    if (instr->getOpcode() == R600::RETURN)
637
2.29k
      return instr;
638
132
  }
639
132
  return nullptr;
640
132
}
641
642
2.42k
bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
643
2.42k
  MachineInstr *MI = getReturnInstr(MBB);
644
2.42k
  bool IsReturn = (MBB->succ_size() == 0);
645
2.42k
  if (MI)
646
2.42k
    assert(IsReturn);
647
2.42k
  else 
if (131
IsReturn131
)
648
131
    
LLVM_DEBUG3
(dbgs() << "BB" << MBB->getNumber()
649
2.42k
                      << " is return block without RETURN instr\n";);
650
2.42k
  return  IsReturn;
651
2.42k
}
652
653
void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
654
85
    MachineBasicBlock *SrcMBB) {
655
85
  for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(),
656
105
       iterEnd = SrcMBB->succ_end(); It != iterEnd; 
++It20
)
657
20
    DstMBB->addSuccessor(*It);  // *iter's predecessor is also taken care of
658
85
}
659
660
1
MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
661
1
  MachineFunction *Func = MBB->getParent();
662
1
  MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock();
663
1
  Func->push_back(NewMBB);  //insert to function
664
1
  for (const MachineInstr &It : *MBB)
665
130
    NewMBB->push_back(Func->CloneMachineInstr(&It));
666
1
  return NewMBB;
667
1
}
668
669
void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
670
    MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB,
671
1
    MachineBasicBlock *NewBlk) {
672
1
  MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
673
1
  if (BranchMI && isCondBranch(BranchMI) &&
674
1
      getTrueBranch(BranchMI) == OldMBB)
675
1
    setTrueBranch(BranchMI, NewBlk);
676
1
}
677
678
2.29k
void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
679
2.29k
  assert((!MBB->getParent()->getJumpTableInfo()
680
2.29k
          || MBB->getParent()->getJumpTableInfo()->isEmpty())
681
2.29k
         && "found a jump table");
682
2.29k
683
2.29k
   //collect continue right before endloop
684
2.29k
   SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr;
685
2.29k
   MachineBasicBlock::iterator Pre = MBB->begin();
686
2.29k
   MachineBasicBlock::iterator E = MBB->end();
687
2.29k
   MachineBasicBlock::iterator It = Pre;
688
63.3k
   while (It != E) {
689
61.0k
     if (Pre->getOpcode() == R600::CONTINUE
690
61.0k
         && 
It->getOpcode() == R600::ENDLOOP20
)
691
20
       ContInstr.push_back(&*Pre);
692
61.0k
     Pre = It;
693
61.0k
     ++It;
694
61.0k
   }
695
2.29k
696
2.29k
   //delete continue right before endloop
697
2.31k
   for (unsigned i = 0; i < ContInstr.size(); 
++i20
)
698
20
      ContInstr[i]->eraseFromParent();
699
2.29k
700
2.29k
   // TODO to fix up jump table so later phase won't be confused.  if
701
2.29k
   // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
702
2.29k
   // there isn't such an interface yet.  alternatively, replace all the other
703
2.29k
   // blocks in the jump table with the entryBlk //}
704
2.29k
}
705
706
2.29k
bool AMDGPUCFGStructurizer::prepare() {
707
2.29k
  bool Changed = false;
708
2.29k
709
2.29k
  //FIXME: if not reducible flow graph, make it so ???
710
2.29k
711
2.29k
  LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
712
2.29k
713
2.29k
  orderBlocks(FuncRep);
714
2.29k
715
2.29k
  SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks;
716
2.29k
717
2.29k
  // Add an ExitBlk to loop that don't have one
718
2.29k
  for (MachineLoopInfo::iterator It = MLI->begin(),
719
2.30k
       E = MLI->end(); It != E; 
++It18
) {
720
18
    MachineLoop *LoopRep = (*It);
721
18
    MBBVector ExitingMBBs;
722
18
    LoopRep->getExitingBlocks(ExitingMBBs);
723
18
724
18
    if (ExitingMBBs.size() == 0) {
725
0
      MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep);
726
0
      if (DummyExitBlk)
727
0
        RetBlks.push_back(DummyExitBlk);
728
0
    }
729
18
  }
730
2.29k
731
2.29k
  // Remove unconditional branch instr.
732
2.29k
  // Add dummy exit block iff there are multiple returns.
733
2.29k
  for (SmallVectorImpl<MachineBasicBlock *>::const_iterator
734
4.71k
       It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; 
++It2.42k
) {
735
2.42k
    MachineBasicBlock *MBB = *It;
736
2.42k
    removeUnconditionalBranch(MBB);
737
2.42k
    removeRedundantConditionalBranch(MBB);
738
2.42k
    if (isReturnBlock(MBB)) {
739
2.29k
      RetBlks.push_back(MBB);
740
2.29k
    }
741
2.42k
    assert(MBB->succ_size() <= 2);
742
2.42k
  }
743
2.29k
744
2.29k
  if (RetBlks.size() >= 2) {
745
1
    addDummyExitBlock(RetBlks);
746
1
    Changed = true;
747
1
  }
748
2.29k
749
2.29k
  return Changed;
750
2.29k
}
751
752
2.29k
bool AMDGPUCFGStructurizer::run() {
753
2.29k
  //Assume reducible CFG...
754
2.29k
  LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n");
755
2.29k
756
#ifdef STRESSTEST
757
  //Use the worse block ordering to test the algorithm.
758
  ReverseVector(orderedBlks);
759
#endif
760
761
2.29k
  LLVM_DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks(););
762
2.29k
  int NumIter = 0;
763
2.29k
  bool Finish = false;
764
2.29k
  MachineBasicBlock *MBB;
765
2.29k
  bool MakeProgress = false;
766
2.29k
  int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
767
2.29k
                                        OrderedBlks.end());
768
2.29k
769
2.29k
  do {
770
2.29k
    ++NumIter;
771
2.29k
    LLVM_DEBUG(dbgs() << "numIter = " << NumIter
772
2.29k
                      << ", numRemaintedBlk = " << NumRemainedBlk << "\n";);
773
2.29k
774
2.29k
    SmallVectorImpl<MachineBasicBlock *>::const_iterator It =
775
2.29k
        OrderedBlks.begin();
776
2.29k
    SmallVectorImpl<MachineBasicBlock *>::const_iterator E =
777
2.29k
        OrderedBlks.end();
778
2.29k
779
2.29k
    SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter =
780
2.29k
        It;
781
2.29k
    MachineBasicBlock *SccBeginMBB = nullptr;
782
2.29k
    int SccNumBlk = 0;  // The number of active blocks, init to a
783
2.29k
                        // maximum possible number.
784
2.29k
    int SccNumIter;     // Number of iteration in this SCC.
785
2.29k
786
4.71k
    while (It != E) {
787
2.42k
      MBB = *It;
788
2.42k
789
2.42k
      if (!SccBeginMBB) {
790
2.40k
        SccBeginIter = It;
791
2.40k
        SccBeginMBB = MBB;
792
2.40k
        SccNumIter = 0;
793
2.40k
        SccNumBlk = NumRemainedBlk; // Init to maximum possible number.
794
2.40k
        LLVM_DEBUG(dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB);
795
2.40k
                   dbgs() << "\n";);
796
2.40k
      }
797
2.42k
798
2.42k
      if (!isRetiredBlock(MBB))
799
2.40k
        patternMatch(MBB);
800
2.42k
801
2.42k
      ++It;
802
2.42k
803
2.42k
      bool ContNextScc = true;
804
2.42k
      if (It == E
805
2.42k
          || 
getSCCNum(SccBeginMBB) != getSCCNum(*It)129
) {
806
2.40k
        // Just finish one scc.
807
2.40k
        ++SccNumIter;
808
2.40k
        int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
809
2.40k
        if (sccRemainedNumBlk != 1 && 
sccRemainedNumBlk >= SccNumBlk0
) {
810
0
          LLVM_DEBUG(dbgs() << "Can't reduce SCC " << getSCCNum(MBB)
811
0
                            << ", sccNumIter = " << SccNumIter;
812
0
                     dbgs() << "doesn't make any progress\n";);
813
0
          ContNextScc = true;
814
2.40k
        } else if (sccRemainedNumBlk != 1 && 
sccRemainedNumBlk < SccNumBlk0
) {
815
0
          SccNumBlk = sccRemainedNumBlk;
816
0
          It = SccBeginIter;
817
0
          ContNextScc = false;
818
0
          LLVM_DEBUG(dbgs() << "repeat processing SCC" << getSCCNum(MBB)
819
0
                            << "sccNumIter = " << SccNumIter << '\n';);
820
2.40k
        } else {
821
2.40k
          // Finish the current scc.
822
2.40k
          ContNextScc = true;
823
2.40k
        }
824
2.40k
      } else {
825
20
        // Continue on next component in the current scc.
826
20
        ContNextScc = false;
827
20
      }
828
2.42k
829
2.42k
      if (ContNextScc)
830
2.40k
        SccBeginMBB = nullptr;
831
2.42k
    } //while, "one iteration" over the function.
832
2.29k
833
2.29k
    MachineBasicBlock *EntryMBB =
834
2.29k
        *GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
835
2.29k
    if (EntryMBB->succ_size() == 0) {
836
2.29k
      Finish = true;
837
2.29k
      LLVM_DEBUG(dbgs() << "Reduce to one block\n";);
838
2.29k
    } else {
839
0
      int NewnumRemainedBlk
840
0
        = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
841
0
      // consider cloned blocks ??
842
0
      if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) {
843
0
        MakeProgress = true;
844
0
        NumRemainedBlk = NewnumRemainedBlk;
845
0
      } else {
846
0
        MakeProgress = false;
847
0
        LLVM_DEBUG(dbgs() << "No progress\n";);
848
0
      }
849
0
    }
850
2.29k
  } while (!Finish && 
MakeProgress0
);
851
2.29k
852
2.29k
  // Misc wrap up to maintain the consistency of the Function representation.
853
2.29k
  wrapup(*GraphTraits<MachineFunction *>::nodes_begin(FuncRep));
854
2.29k
855
2.29k
  // Detach retired Block, release memory.
856
2.29k
  for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
857
4.71k
      It != E; 
++It2.42k
) {
858
2.42k
    if ((*It).second && (*It).second->IsRetired) {
859
131
      assert(((*It).first)->getNumber() != -1);
860
131
      LLVM_DEBUG(dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";);
861
131
      (*It).first->eraseFromParent();  //Remove from the parent Function.
862
131
    }
863
2.42k
    delete (*It).second;
864
2.42k
  }
865
2.29k
  BlockInfoMap.clear();
866
2.29k
  LLInfoMap.clear();
867
2.29k
868
2.29k
  if (!Finish) {
869
0
    LLVM_DEBUG(FuncRep->viewCFG());
870
0
    report_fatal_error("IRREDUCIBLE_CFG");
871
0
  }
872
2.29k
873
2.29k
  return true;
874
2.29k
}
875
876
2.29k
void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
877
2.29k
  int SccNum = 0;
878
2.29k
  MachineBasicBlock *MBB;
879
4.69k
  for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd();
880
2.40k
       ++It, ++SccNum) {
881
2.40k
    const std::vector<MachineBasicBlock *> &SccNext = *It;
882
2.40k
    for (std::vector<MachineBasicBlock *>::const_iterator
883
2.40k
         blockIter = SccNext.begin(), blockEnd = SccNext.end();
884
4.82k
         blockIter != blockEnd; 
++blockIter2.42k
) {
885
2.42k
      MBB = *blockIter;
886
2.42k
      OrderedBlks.push_back(MBB);
887
2.42k
      recordSccnum(MBB, SccNum);
888
2.42k
    }
889
2.40k
  }
890
2.29k
891
2.29k
  // walk through all the block in func to check for unreachable
892
2.42k
  for (auto *MBB : nodes(MF)) {
893
2.42k
    SccNum = getSCCNum(MBB);
894
2.42k
    if (SccNum == INVALIDSCCNUM)
895
2.42k
      
dbgs() << "unreachable block BB" << MBB->getNumber() << "\n"0
;
896
2.42k
  }
897
2.29k
}
898
899
2.40k
int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
900
2.40k
  int NumMatch = 0;
901
2.40k
  int CurMatch;
902
2.40k
903
2.40k
  LLVM_DEBUG(dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";);
904
2.40k
905
2.52k
  while ((CurMatch = patternMatchGroup(MBB)) > 0)
906
126
    NumMatch += CurMatch;
907
2.40k
908
2.40k
  LLVM_DEBUG(dbgs() << "End patternMatch BB" << MBB->getNumber()
909
2.40k
                    << ", numMatch = " << NumMatch << "\n";);
910
2.40k
911
2.40k
  return NumMatch;
912
2.40k
}
913
914
2.52k
int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
915
2.52k
  int NumMatch = 0;
916
2.52k
  NumMatch += loopendPatternMatch();
917
2.52k
  NumMatch += serialPatternMatch(MBB);
918
2.52k
  NumMatch += ifPatternMatch(MBB);
919
2.52k
  return NumMatch;
920
2.52k
}
921
922
2.66k
int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
923
2.66k
  if (MBB->succ_size() != 1)
924
2.45k
    return 0;
925
210
926
210
  MachineBasicBlock *childBlk = *MBB->succ_begin();
927
210
  if (childBlk->pred_size() != 1 || 
isActiveLoophead(childBlk)84
)
928
126
    return 0;
929
84
930
84
  mergeSerialBlock(MBB, childBlk);
931
84
  ++numSerialPatternMatch;
932
84
  return 1;
933
84
}
934
935
2.65k
int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
936
2.65k
  //two edges
937
2.65k
  if (MBB->succ_size() != 2)
938
2.60k
    return 0;
939
48
  if (hasBackEdge(MBB))
940
0
    return 0;
941
48
  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
942
48
  if (!BranchMI)
943
0
    return 0;
944
48
945
48
  assert(isCondBranch(BranchMI));
946
48
  int NumMatch = 0;
947
48
948
48
  MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
949
48
  NumMatch += serialPatternMatch(TrueMBB);
950
48
  NumMatch += ifPatternMatch(TrueMBB);
951
48
  MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
952
48
  NumMatch += serialPatternMatch(FalseMBB);
953
48
  NumMatch += ifPatternMatch(FalseMBB);
954
48
  MachineBasicBlock *LandBlk;
955
48
  int Cloned = 0;
956
48
957
48
  assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty());
958
48
  // TODO: Simplify
959
48
  if (TrueMBB->succ_size() == 1 && 
FalseMBB->succ_size() == 115
960
48
    && 
*TrueMBB->succ_begin() == *FalseMBB->succ_begin()15
) {
961
3
    // Diamond pattern
962
3
    LandBlk = *TrueMBB->succ_begin();
963
45
  } else if (TrueMBB->succ_size() == 1 && 
*TrueMBB->succ_begin() == FalseMBB12
) {
964
0
    // Triangle pattern, false is empty
965
0
    LandBlk = FalseMBB;
966
0
    FalseMBB = nullptr;
967
45
  } else if (FalseMBB->succ_size() == 1
968
45
             && *FalseMBB->succ_begin() == TrueMBB) {
969
41
    // Triangle pattern, true is empty
970
41
    // We reverse the predicate to make a triangle, empty false pattern;
971
41
    std::swap(TrueMBB, FalseMBB);
972
41
    reversePredicateSetter(MBB->end(), *MBB);
973
41
    LandBlk = FalseMBB;
974
41
    FalseMBB = nullptr;
975
41
  } else 
if (4
FalseMBB->succ_size() == 14
976
4
             && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
977
0
    LandBlk = *FalseMBB->succ_begin();
978
4
  } else if (TrueMBB->succ_size() == 1
979
4
    && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
980
0
    LandBlk = *TrueMBB->succ_begin();
981
4
  } else {
982
4
    return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
983
4
  }
984
44
985
44
  // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
986
44
  // new BB created for landBlk==NULL may introduce new challenge to the
987
44
  // reduction process.
988
44
  if (LandBlk &&
989
44
      ((TrueMBB && TrueMBB->pred_size() > 1)
990
44
      || 
(43
FalseMBB43
&&
FalseMBB->pred_size() > 12
))) {
991
1
     Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
992
1
  }
993
44
994
44
  if (TrueMBB && TrueMBB->pred_size() > 1) {
995
1
    TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
996
1
    ++Cloned;
997
1
  }
998
44
999
44
  if (FalseMBB && 
FalseMBB->pred_size() > 13
) {
1000
0
    FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
1001
0
    ++Cloned;
1002
0
  }
1003
44
1004
44
  mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
1005
44
1006
44
  ++numIfPatternMatch;
1007
44
1008
44
  numClonedBlock += Cloned;
1009
44
1010
44
  return 1 + Cloned + NumMatch;
1011
44
}
1012
1013
2.52k
int AMDGPUCFGStructurizer::loopendPatternMatch() {
1014
2.52k
  std::deque<MachineLoop *> NestedLoops;
1015
2.52k
  for (auto &It: *MLI)
1016
116
    for (MachineLoop *ML : depth_first(It))
1017
128
      NestedLoops.push_front(ML);
1018
2.52k
1019
2.52k
  if (NestedLoops.empty())
1020
2.41k
    return 0;
1021
116
1022
116
  // Process nested loop outside->inside (we did push_front),
1023
116
  // so "continue" to a outside loop won't be mistaken as "break"
1024
116
  // of the current loop.
1025
116
  int Num = 0;
1026
128
  for (MachineLoop *ExaminedLoop : NestedLoops) {
1027
128
    if (ExaminedLoop->getNumBlocks() == 0 || 
Visited[ExaminedLoop]30
)
1028
108
      continue;
1029
20
    LLVM_DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
1030
20
    int NumBreak = mergeLoop(ExaminedLoop);
1031
20
    if (NumBreak == -1)
1032
0
      break;
1033
20
    Num += NumBreak;
1034
20
  }
1035
116
  return Num;
1036
116
}
1037
1038
20
int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
1039
20
  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1040
20
  MBBVector ExitingMBBs;
1041
20
  LoopRep->getExitingBlocks(ExitingMBBs);
1042
20
  assert(!ExitingMBBs.empty() && "Infinite Loop not supported");
1043
20
  LLVM_DEBUG(dbgs() << "Loop has " << ExitingMBBs.size()
1044
20
                    << " exiting blocks\n";);
1045
20
  // We assume a single ExitBlk
1046
20
  MBBVector ExitBlks;
1047
20
  LoopRep->getExitBlocks(ExitBlks);
1048
20
  SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet;
1049
40
  for (unsigned i = 0, e = ExitBlks.size(); i < e; 
++i20
)
1050
20
    ExitBlkSet.insert(ExitBlks[i]);
1051
20
  assert(ExitBlkSet.size() == 1);
1052
20
  MachineBasicBlock *ExitBlk = *ExitBlks.begin();
1053
20
  assert(ExitBlk && "Loop has several exit block");
1054
20
  MBBVector LatchBlks;
1055
20
  for (auto *LB : inverse_children<MachineBasicBlock*>(LoopHeader))
1056
40
    if (LoopRep->contains(LB))
1057
20
      LatchBlks.push_back(LB);
1058
20
1059
40
  for (unsigned i = 0, e = ExitingMBBs.size(); i < e; 
++i20
)
1060
20
    mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
1061
40
  for (unsigned i = 0, e = LatchBlks.size(); i < e; 
++i20
)
1062
20
    settleLoopcontBlock(LatchBlks[i], LoopHeader);
1063
20
  int Match = 0;
1064
30
  do {
1065
30
    Match = 0;
1066
30
    Match += serialPatternMatch(LoopHeader);
1067
30
    Match += ifPatternMatch(LoopHeader);
1068
30
  } while (Match > 0);
1069
20
  mergeLooplandBlock(LoopHeader, ExitBlk);
1070
20
  MachineLoop *ParentLoop = LoopRep->getParentLoop();
1071
20
  if (ParentLoop)
1072
2
    MLI->changeLoopFor(LoopHeader, ParentLoop);
1073
18
  else
1074
18
    MLI->removeBlock(LoopHeader);
1075
20
  Visited[LoopRep] = true;
1076
20
  return 1;
1077
20
}
1078
1079
bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
1080
8
    MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
1081
8
  if (Src1MBB->succ_size() == 0) {
1082
0
    MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
1083
0
    if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
1084
0
      MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
1085
0
      if (TheEntry) {
1086
0
        LLVM_DEBUG(dbgs() << "isLoopContBreakBlock yes src1 = BB"
1087
0
                          << Src1MBB->getNumber() << " src2 = BB"
1088
0
                          << Src2MBB->getNumber() << "\n";);
1089
0
        return true;
1090
0
      }
1091
8
    }
1092
0
  }
1093
8
  return false;
1094
8
}
1095
1096
int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
1097
4
    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1098
4
  int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
1099
4
  if (Num == 0) {
1100
0
    LLVM_DEBUG(dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk"
1101
0
                      << "\n";);
1102
0
    Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
1103
0
  }
1104
4
  return Num;
1105
4
}
1106
1107
int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
1108
4
    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1109
4
  int Num = 0;
1110
4
  MachineBasicBlock *DownBlk;
1111
4
1112
4
  //trueBlk could be the common post dominator
1113
4
  DownBlk = TrueMBB;
1114
4
1115
4
  LLVM_DEBUG(dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
1116
4
                    << " true = BB" << TrueMBB->getNumber()
1117
4
                    << ", numSucc=" << TrueMBB->succ_size() << " false = BB"
1118
4
                    << FalseMBB->getNumber() << "\n";);
1119
4
1120
4
  while (DownBlk) {
1121
4
    LLVM_DEBUG(dbgs() << "check down = BB" << DownBlk->getNumber(););
1122
4
1123
4
    if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
1124
4
      LLVM_DEBUG(dbgs() << " working\n";);
1125
4
1126
4
      Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
1127
4
      Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
1128
4
1129
4
      numClonedBlock += Num;
1130
4
      Num += serialPatternMatch(*HeadMBB->succ_begin());
1131
4
      Num += serialPatternMatch(*std::next(HeadMBB->succ_begin()));
1132
4
      Num += ifPatternMatch(HeadMBB);
1133
4
      assert(Num > 0);
1134
4
1135
4
      break;
1136
4
    }
1137
0
    LLVM_DEBUG(dbgs() << " not working\n";);
1138
0
    DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr;
1139
0
  } // walk down the postDomTree
1140
4
1141
4
  return Num;
1142
4
}
1143
1144
#ifndef NDEBUG
1145
void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
1146
    MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB,
1147
    MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) {
1148
  dbgs() << "head = BB" << HeadMBB->getNumber()
1149
         << " size = " << HeadMBB->size();
1150
  if (Detail) {
1151
    dbgs() << "\n";
1152
    HeadMBB->print(dbgs());
1153
    dbgs() << "\n";
1154
  }
1155
1156
  if (TrueMBB) {
1157
    dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = "
1158
           << TrueMBB->size() << " numPred = " << TrueMBB->pred_size();
1159
    if (Detail) {
1160
      dbgs() << "\n";
1161
      TrueMBB->print(dbgs());
1162
      dbgs() << "\n";
1163
    }
1164
  }
1165
  if (FalseMBB) {
1166
    dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = "
1167
           << FalseMBB->size() << " numPred = " << FalseMBB->pred_size();
1168
    if (Detail) {
1169
      dbgs() << "\n";
1170
      FalseMBB->print(dbgs());
1171
      dbgs() << "\n";
1172
    }
1173
  }
1174
  if (LandMBB) {
1175
    dbgs() << ", land = BB" << LandMBB->getNumber() << " size = "
1176
           << LandMBB->size() << " numPred = " << LandMBB->pred_size();
1177
    if (Detail) {
1178
      dbgs() << "\n";
1179
      LandMBB->print(dbgs());
1180
      dbgs() << "\n";
1181
    }
1182
  }
1183
1184
  dbgs() << "\n";
1185
}
1186
#endif
1187
1188
int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
1189
    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
1190
1
    MachineBasicBlock **LandMBBPtr) {
1191
1
  bool MigrateTrue = false;
1192
1
  bool MigrateFalse = false;
1193
1
1194
1
  MachineBasicBlock *LandBlk = *LandMBBPtr;
1195
1
1196
1
  assert((!TrueMBB || TrueMBB->succ_size() <= 1)
1197
1
         && (!FalseMBB || FalseMBB->succ_size() <= 1));
1198
1
1199
1
  if (TrueMBB == FalseMBB)
1200
0
    return 0;
1201
1
1202
1
  MigrateTrue = needMigrateBlock(TrueMBB);
1203
1
  MigrateFalse = needMigrateBlock(FalseMBB);
1204
1
1205
1
  if (!MigrateTrue && 
!MigrateFalse0
)
1206
0
    return 0;
1207
1
1208
1
  // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1209
1
  // have more than one predecessors.  without doing this, its predecessor
1210
1
  // rather than headBlk will have undefined value in initReg.
1211
1
  if (!MigrateTrue && 
TrueMBB0
&&
TrueMBB->pred_size() > 10
)
1212
0
    MigrateTrue = true;
1213
1
  if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1)
1214
0
    MigrateFalse = true;
1215
1
1216
1
  LLVM_DEBUG(
1217
1
      dbgs() << "before improveSimpleJumpintoIf: ";
1218
1
      showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0););
1219
1
1220
1
  // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1221
1
  //
1222
1
  // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1223
1
  //      {initReg = 0; org falseBlk branch }
1224
1
  //      => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1225
1
  //      => org landBlk
1226
1
  //      if landBlk->pred_size() > 2, put the about if-else inside
1227
1
  //      if (initReg !=2) {...}
1228
1
  //
1229
1
  // add initReg = initVal to headBlk
1230
1
1231
1
  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1232
1
  if (!MigrateTrue || !MigrateFalse) {
1233
1
    // XXX: We have an opportunity here to optimize the "branch into if" case
1234
1
    // here.  Branch into if looks like this:
1235
1
    //                        entry
1236
1
    //                       /     |
1237
1
    //           diamond_head       branch_from
1238
1
    //             /      \           |
1239
1
    // diamond_false        diamond_true
1240
1
    //             \      /
1241
1
    //               done
1242
1
    //
1243
1
    // The diamond_head block begins the "if" and the diamond_true block
1244
1
    // is the block being "branched into".
1245
1
    //
1246
1
    // If MigrateTrue is true, then TrueBB is the block being "branched into"
1247
1
    // and if MigrateFalse is true, then FalseBB is the block being
1248
1
    // "branched into"
1249
1
    //
1250
1
    // Here is the pseudo code for how I think the optimization should work:
1251
1
    // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
1252
1
    // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
1253
1
    // 3. Move the branch instruction from diamond_head into its own basic
1254
1
    //    block (new_block).
1255
1
    // 4. Add an unconditional branch from diamond_head to new_block
1256
1
    // 5. Replace the branch instruction in branch_from with an unconditional
1257
1
    //    branch to new_block.  If branch_from has multiple predecessors, then
1258
1
    //    we need to replace the True/False block in the branch
1259
1
    //    instruction instead of replacing it.
1260
1
    // 6. Change the condition of the branch instruction in new_block from
1261
1
    //    COND to (COND || GPR0)
1262
1
    //
1263
1
    // In order insert these MOV instruction, we will need to use the
1264
1
    // RegisterScavenger.  Usually liveness stops being tracked during
1265
1
    // the late machine optimization passes, however if we implement
1266
1
    // bool TargetRegisterInfo::requiresRegisterScavenging(
1267
1
    //                                                const MachineFunction &MF)
1268
1
    // and have it return true, liveness will be tracked correctly
1269
1
    // by generic optimization passes.  We will also need to make sure that
1270
1
    // all of our target-specific passes that run after regalloc and before
1271
1
    // the CFGStructurizer track liveness and we will need to modify this pass
1272
1
    // to correctly track liveness.
1273
1
    //
1274
1
    // After the above changes, the new CFG should look like this:
1275
1
    //                        entry
1276
1
    //                       /     |
1277
1
    //           diamond_head       branch_from
1278
1
    //                       \     /
1279
1
    //                      new_block
1280
1
    //                      /      |
1281
1
    //         diamond_false        diamond_true
1282
1
    //                      \      /
1283
1
    //                        done
1284
1
    //
1285
1
    // Without this optimization, we are forced to duplicate the diamond_true
1286
1
    // block and we will end up with a CFG like this:
1287
1
    //
1288
1
    //                        entry
1289
1
    //                       /     |
1290
1
    //           diamond_head       branch_from
1291
1
    //             /      \                   |
1292
1
    // diamond_false        diamond_true      diamond_true (duplicate)
1293
1
    //             \      /                   |
1294
1
    //               done --------------------|
1295
1
    //
1296
1
    // Duplicating diamond_true can be very costly especially if it has a
1297
1
    // lot of instructions.
1298
1
    return 0;
1299
1
  }
1300
0
1301
0
  int NumNewBlk = 0;
1302
0
1303
0
  bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
1304
0
1305
0
  //insert R600::ENDIF to avoid special case "input landBlk == NULL"
1306
0
  MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, R600::ENDIF);
1307
0
1308
0
  if (LandBlkHasOtherPred) {
1309
0
    report_fatal_error("Extra register needed to handle CFG");
1310
0
    unsigned CmpResReg =
1311
0
      HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1312
0
    report_fatal_error("Extra compare instruction needed to handle CFG");
1313
0
    insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET,
1314
0
        CmpResReg, DebugLoc());
1315
0
  }
1316
0
1317
0
  // XXX: We are running this after RA, so creating virtual registers will
1318
0
  // cause an assertion failure in the PostRA scheduling pass.
1319
0
  unsigned InitReg =
1320
0
    HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1321
0
  insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, InitReg,
1322
0
      DebugLoc());
1323
0
1324
0
  if (MigrateTrue) {
1325
0
    migrateInstruction(TrueMBB, LandBlk, I);
1326
0
    // need to uncondionally insert the assignment to ensure a path from its
1327
0
    // predecessor rather than headBlk has valid value in initReg if
1328
0
    // (initVal != 1).
1329
0
    report_fatal_error("Extra register needed to handle CFG");
1330
0
  }
1331
0
  insertInstrBefore(I, R600::ELSE);
1332
0
1333
0
  if (MigrateFalse) {
1334
0
    migrateInstruction(FalseMBB, LandBlk, I);
1335
0
    // need to uncondionally insert the assignment to ensure a path from its
1336
0
    // predecessor rather than headBlk has valid value in initReg if
1337
0
    // (initVal != 0)
1338
0
    report_fatal_error("Extra register needed to handle CFG");
1339
0
  }
1340
0
1341
0
  if (LandBlkHasOtherPred) {
1342
0
    // add endif
1343
0
    insertInstrBefore(I, R600::ENDIF);
1344
0
1345
0
    // put initReg = 2 to other predecessors of landBlk
1346
0
    for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
1347
0
         PE = LandBlk->pred_end(); PI != PE; ++PI) {
1348
0
      MachineBasicBlock *MBB = *PI;
1349
0
      if (MBB != TrueMBB && MBB != FalseMBB)
1350
0
        report_fatal_error("Extra register needed to handle CFG");
1351
0
    }
1352
0
  }
1353
0
  LLVM_DEBUG(
1354
0
      dbgs() << "result from improveSimpleJumpintoIf: ";
1355
0
      showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0););
1356
0
1357
0
  // update landBlk
1358
0
  *LandMBBPtr = LandBlk;
1359
0
1360
0
  return NumNewBlk;
1361
0
}
1362
1363
void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
1364
84
    MachineBasicBlock *SrcMBB) {
1365
84
  LLVM_DEBUG(dbgs() << "serialPattern BB" << DstMBB->getNumber() << " <= BB"
1366
84
                    << SrcMBB->getNumber() << "\n";);
1367
84
  DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
1368
84
1369
84
  DstMBB->removeSuccessor(SrcMBB, true);
1370
84
  cloneSuccessorList(DstMBB, SrcMBB);
1371
84
1372
84
  removeSuccessor(SrcMBB);
1373
84
  MLI->removeBlock(SrcMBB);
1374
84
  retireBlock(SrcMBB);
1375
84
}
1376
1377
void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
1378
    MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
1379
44
    MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
1380
44
  assert (TrueMBB);
1381
44
  LLVM_DEBUG(dbgs() << "ifPattern BB" << MBB->getNumber(); dbgs() << "{  ";
1382
44
             if (TrueMBB) { dbgs() << "BB" << TrueMBB->getNumber(); } dbgs()
1383
44
             << "  } else ";
1384
44
             dbgs() << "{  "; if (FalseMBB) {
1385
44
               dbgs() << "BB" << FalseMBB->getNumber();
1386
44
             } dbgs() << "  }\n ";
1387
44
             dbgs() << "landBlock: "; if (!LandMBB) { dbgs() << "NULL"; } else {
1388
44
               dbgs() << "BB" << LandMBB->getNumber();
1389
44
             } dbgs() << "\n";);
1390
44
1391
44
  int OldOpcode = BranchMI->getOpcode();
1392
44
  DebugLoc BranchDL = BranchMI->getDebugLoc();
1393
44
1394
44
//    transform to
1395
44
//    if cond
1396
44
//       trueBlk
1397
44
//    else
1398
44
//       falseBlk
1399
44
//    endif
1400
44
//    landBlk
1401
44
1402
44
  MachineBasicBlock::iterator I = BranchMI;
1403
44
  insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode),
1404
44
      BranchDL);
1405
44
1406
44
  if (TrueMBB) {
1407
44
    MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end());
1408
44
    MBB->removeSuccessor(TrueMBB, true);
1409
44
    if (LandMBB && TrueMBB->succ_size()!=0)
1410
44
      TrueMBB->removeSuccessor(LandMBB, true);
1411
44
    retireBlock(TrueMBB);
1412
44
    MLI->removeBlock(TrueMBB);
1413
44
  }
1414
44
1415
44
  if (FalseMBB) {
1416
3
    insertInstrBefore(I, R600::ELSE);
1417
3
    MBB->splice(I, FalseMBB, FalseMBB->begin(),
1418
3
                   FalseMBB->end());
1419
3
    MBB->removeSuccessor(FalseMBB, true);
1420
3
    if (LandMBB && FalseMBB->succ_size() != 0)
1421
3
      FalseMBB->removeSuccessor(LandMBB, true);
1422
3
    retireBlock(FalseMBB);
1423
3
    MLI->removeBlock(FalseMBB);
1424
3
  }
1425
44
  insertInstrBefore(I, R600::ENDIF);
1426
44
1427
44
  BranchMI->eraseFromParent();
1428
44
1429
44
  if (LandMBB && TrueMBB && FalseMBB)
1430
3
    MBB->addSuccessor(LandMBB);
1431
44
}
1432
1433
void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
1434
20
    MachineBasicBlock *LandMBB) {
1435
20
  LLVM_DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
1436
20
                    << " land = BB" << LandMBB->getNumber() << "\n";);
1437
20
1438
20
  insertInstrBefore(DstBlk, R600::WHILELOOP, DebugLoc());
1439
20
  insertInstrEnd(DstBlk, R600::ENDLOOP, DebugLoc());
1440
20
  DstBlk->replaceSuccessor(DstBlk, LandMBB);
1441
20
}
1442
1443
void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
1444
20
    MachineBasicBlock *LandMBB) {
1445
20
  LLVM_DEBUG(dbgs() << "loopbreakPattern exiting = BB"
1446
20
                    << ExitingMBB->getNumber() << " land = BB"
1447
20
                    << LandMBB->getNumber() << "\n";);
1448
20
  MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
1449
20
  assert(BranchMI && isCondBranch(BranchMI));
1450
20
  DebugLoc DL = BranchMI->getDebugLoc();
1451
20
  MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI);
1452
20
  MachineBasicBlock::iterator I = BranchMI;
1453
20
  if (TrueBranch != LandMBB)
1454
16
    reversePredicateSetter(I, *I->getParent());
1455
20
  insertCondBranchBefore(ExitingMBB, I, R600::IF_PREDICATE_SET, R600::PREDICATE_BIT, DL);
1456
20
  insertInstrBefore(I, R600::BREAK);
1457
20
  insertInstrBefore(I, R600::ENDIF);
1458
20
  //now branchInst can be erase safely
1459
20
  BranchMI->eraseFromParent();
1460
20
  //now take care of successors, retire blocks
1461
20
  ExitingMBB->removeSuccessor(LandMBB, true);
1462
20
}
1463
1464
void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
1465
20
    MachineBasicBlock *ContMBB) {
1466
20
  LLVM_DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
1467
20
                    << ContingMBB->getNumber() << ", cont = BB"
1468
20
                    << ContMBB->getNumber() << "\n";);
1469
20
1470
20
  MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
1471
20
  if (MI) {
1472
0
    assert(isCondBranch(MI));
1473
0
    MachineBasicBlock::iterator I = MI;
1474
0
    MachineBasicBlock *TrueBranch = getTrueBranch(MI);
1475
0
    int OldOpcode = MI->getOpcode();
1476
0
    DebugLoc DL = MI->getDebugLoc();
1477
0
1478
0
    bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
1479
0
1480
0
    if (!UseContinueLogical) {
1481
0
      int BranchOpcode =
1482
0
          TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
1483
0
          getBranchZeroOpcode(OldOpcode);
1484
0
      insertCondBranchBefore(I, BranchOpcode, DL);
1485
0
      // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1486
0
      insertInstrEnd(ContingMBB, R600::CONTINUE, DL);
1487
0
      insertInstrEnd(ContingMBB, R600::ENDIF, DL);
1488
0
    } else {
1489
0
      int BranchOpcode =
1490
0
          TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
1491
0
          getContinueZeroOpcode(OldOpcode);
1492
0
      insertCondBranchBefore(I, BranchOpcode, DL);
1493
0
    }
1494
0
1495
0
    MI->eraseFromParent();
1496
20
  } else {
1497
20
    // if we've arrived here then we've already erased the branch instruction
1498
20
    // travel back up the basic block to see the last reference of our debug
1499
20
    // location we've just inserted that reference here so it should be
1500
20
    // representative insertEnd to ensure phi-moves, if exist, go before the
1501
20
    // continue-instr.
1502
20
    insertInstrEnd(ContingMBB, R600::CONTINUE,
1503
20
        getLastDebugLocInBB(ContingMBB));
1504
20
  }
1505
20
}
1506
1507
int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
1508
8
    MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) {
1509
8
  int Cloned = 0;
1510
8
  assert(PreMBB->isSuccessor(SrcMBB));
1511
18
  while (SrcMBB && SrcMBB != DstMBB) {
1512
10
    assert(SrcMBB->succ_size() == 1);
1513
10
    if (SrcMBB->pred_size() > 1) {
1514
0
      SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
1515
0
      ++Cloned;
1516
0
    }
1517
10
1518
10
    PreMBB = SrcMBB;
1519
10
    SrcMBB = *SrcMBB->succ_begin();
1520
10
  }
1521
8
1522
8
  return Cloned;
1523
8
}
1524
1525
MachineBasicBlock *
1526
AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
1527
1
    MachineBasicBlock *PredMBB) {
1528
1
  assert(PredMBB->isSuccessor(MBB) &&
1529
1
         "succBlk is not a prececessor of curBlk");
1530
1
1531
1
  MachineBasicBlock *CloneMBB = clone(MBB);  //clone instructions
1532
1
  replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
1533
1
  //srcBlk, oldBlk, newBlk
1534
1
1535
1
  PredMBB->replaceSuccessor(MBB, CloneMBB);
1536
1
1537
1
  // add all successor to cloneBlk
1538
1
  cloneSuccessorList(CloneMBB, MBB);
1539
1
1540
1
  numClonedInstr += MBB->size();
1541
1
1542
1
  LLVM_DEBUG(dbgs() << "Cloned block: "
1543
1
                    << "BB" << MBB->getNumber() << "size " << MBB->size()
1544
1
                    << "\n";);
1545
1
1546
1
  SHOWNEWBLK(CloneMBB, "result of Cloned block: ");
1547
1
1548
1
  return CloneMBB;
1549
1
}
1550
1551
void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
1552
0
    MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) {
1553
0
  MachineBasicBlock::iterator SpliceEnd;
1554
0
  //look for the input branchinstr, not the AMDGPU branchinstr
1555
0
  MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
1556
0
  if (!BranchMI) {
1557
0
    LLVM_DEBUG(dbgs() << "migrateInstruction don't see branch instr\n";);
1558
0
    SpliceEnd = SrcMBB->end();
1559
0
  } else {
1560
0
    LLVM_DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI);
1561
0
    SpliceEnd = BranchMI;
1562
0
  }
1563
0
  LLVM_DEBUG(dbgs() << "migrateInstruction before splice dstSize = "
1564
0
                    << DstMBB->size() << "srcSize = " << SrcMBB->size()
1565
0
                    << "\n";);
1566
0
1567
0
  //splice insert before insertPos
1568
0
  DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd);
1569
0
1570
0
  LLVM_DEBUG(dbgs() << "migrateInstruction after splice dstSize = "
1571
0
                    << DstMBB->size() << "srcSize = " << SrcMBB->size()
1572
0
                    << '\n';);
1573
0
}
1574
1575
MachineBasicBlock *
1576
0
AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
1577
0
  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1578
0
  MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch();
1579
0
1580
0
  if (!LoopHeader || !LoopLatch)
1581
0
    return nullptr;
1582
0
  MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
1583
0
  // Is LoopRep an infinite loop ?
1584
0
  if (!BranchMI || !isUncondBranch(BranchMI))
1585
0
    return nullptr;
1586
0
1587
0
  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1588
0
  FuncRep->push_back(DummyExitBlk);  //insert to function
1589
0
  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
1590
0
  LLVM_DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
1591
0
  LLVMContext &Ctx = LoopHeader->getParent()->getFunction().getContext();
1592
0
  Ctx.emitError("Extra register needed to handle CFG");
1593
0
  return nullptr;
1594
0
}
1595
1596
2.42k
void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
1597
2.42k
  MachineInstr *BranchMI;
1598
2.42k
1599
2.42k
  // I saw two unconditional branch in one basic block in example
1600
2.42k
  // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
1601
2.42k
  while ((BranchMI = getLoopendBlockBranchInstr(MBB))
1602
2.42k
          && 
isUncondBranch(BranchMI)73
) {
1603
9
    LLVM_DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI);
1604
9
    BranchMI->eraseFromParent();
1605
9
  }
1606
2.42k
}
1607
1608
void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
1609
2.42k
    MachineBasicBlock *MBB) {
1610
2.42k
  if (MBB->succ_size() != 2)
1611
2.35k
    return;
1612
64
  MachineBasicBlock *MBB1 = *MBB->succ_begin();
1613
64
  MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin());
1614
64
  if (MBB1 != MBB2)
1615
64
    return;
1616
0
1617
0
  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
1618
0
  assert(BranchMI && isCondBranch(BranchMI));
1619
0
  LLVM_DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI);
1620
0
  BranchMI->eraseFromParent();
1621
0
  SHOWNEWBLK(MBB1, "Removing redundant successor");
1622
0
  MBB->removeSuccessor(MBB1, true);
1623
0
}
1624
1625
void AMDGPUCFGStructurizer::addDummyExitBlock(
1626
1
    SmallVectorImpl<MachineBasicBlock*> &RetMBB) {
1627
1
  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1628
1
  FuncRep->push_back(DummyExitBlk);  //insert to function
1629
1
  insertInstrEnd(DummyExitBlk, R600::RETURN);
1630
1
1631
1
  for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(),
1632
3
       E = RetMBB.end(); It != E; 
++It2
) {
1633
2
    MachineBasicBlock *MBB = *It;
1634
2
    MachineInstr *MI = getReturnInstr(MBB);
1635
2
    if (MI)
1636
1
      MI->eraseFromParent();
1637
2
    MBB->addSuccessor(DummyExitBlk);
1638
2
    LLVM_DEBUG(dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
1639
2
                      << " successors\n";);
1640
2
  }
1641
1
  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: ");
1642
1
}
1643
1644
84
void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) {
1645
103
  while (MBB->succ_size())
1646
19
    MBB->removeSuccessor(*MBB->succ_begin());
1647
84
}
1648
1649
void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
1650
2.42k
    int SccNum) {
1651
2.42k
  BlockInformation *&srcBlkInfo = BlockInfoMap[MBB];
1652
2.42k
  if (!srcBlkInfo)
1653
2.42k
    srcBlkInfo = new BlockInformation();
1654
2.42k
  srcBlkInfo->SccNum = SccNum;
1655
2.42k
}
1656
1657
131
void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
1658
131
  LLVM_DEBUG(dbgs() << "Retiring BB" << MBB->getNumber() << "\n";);
1659
131
1660
131
  BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
1661
131
1662
131
  if (!SrcBlkInfo)
1663
2
    SrcBlkInfo = new BlockInformation();
1664
131
1665
131
  SrcBlkInfo->IsRetired = true;
1666
131
  assert(MBB->succ_size() == 0 && MBB->pred_size() == 0
1667
131
         && "can't retire block yet");
1668
131
}
1669
1670
280
INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer",
1671
280
                      "AMDGPU CFG Structurizer", false, false)
1672
280
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
1673
280
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
1674
280
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
1675
280
INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer",
1676
                      "AMDGPU CFG Structurizer", false, false)
1677
1678
280
FunctionPass *llvm::createAMDGPUCFGStructurizerPass() {
1679
280
  return new AMDGPUCFGStructurizer();
1680
280
}