Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
Line
Count
Source
1
//===- AMDGPUUnifyDivergentExitNodes.cpp ----------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This is a variant of the UnifyDivergentExitNodes pass. Rather than ensuring
10
// there is at most one ret and one unreachable instruction, it ensures there is
11
// at most one divergent exiting block.
12
//
13
// StructurizeCFG can't deal with multi-exit regions formed by branches to
14
// multiple return nodes. It is not desirable to structurize regions with
15
// uniform branches, so unifying those to the same return block as divergent
16
// branches inhibits use of scalar branching. It still can't deal with the case
17
// where one branch goes to return, and one unreachable. Replace unreachable in
18
// this case with a return.
19
//
20
//===----------------------------------------------------------------------===//
21
22
#include "AMDGPU.h"
23
#include "llvm/ADT/ArrayRef.h"
24
#include "llvm/ADT/SmallPtrSet.h"
25
#include "llvm/ADT/SmallVector.h"
26
#include "llvm/ADT/StringRef.h"
27
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
28
#include "llvm/Analysis/PostDominators.h"
29
#include "llvm/Analysis/TargetTransformInfo.h"
30
#include "llvm/Transforms/Utils/Local.h"
31
#include "llvm/IR/BasicBlock.h"
32
#include "llvm/IR/CFG.h"
33
#include "llvm/IR/Constants.h"
34
#include "llvm/IR/Function.h"
35
#include "llvm/IR/InstrTypes.h"
36
#include "llvm/IR/Instructions.h"
37
#include "llvm/IR/Intrinsics.h"
38
#include "llvm/IR/Type.h"
39
#include "llvm/Pass.h"
40
#include "llvm/Support/Casting.h"
41
#include "llvm/Transforms/Scalar.h"
42
#include "llvm/Transforms/Utils.h"
43
44
using namespace llvm;
45
46
#define DEBUG_TYPE "amdgpu-unify-divergent-exit-nodes"
47
48
namespace {
49
50
class AMDGPUUnifyDivergentExitNodes : public FunctionPass {
51
public:
52
  static char ID; // Pass identification, replacement for typeid
53
54
2.44k
  AMDGPUUnifyDivergentExitNodes() : FunctionPass(ID) {
55
2.44k
    initializeAMDGPUUnifyDivergentExitNodesPass(*PassRegistry::getPassRegistry());
56
2.44k
  }
57
58
  // We can preserve non-critical-edgeness when we unify function exit nodes
59
  void getAnalysisUsage(AnalysisUsage &AU) const override;
60
  bool runOnFunction(Function &F) override;
61
};
62
63
} // end anonymous namespace
64
65
char AMDGPUUnifyDivergentExitNodes::ID = 0;
66
67
char &llvm::AMDGPUUnifyDivergentExitNodesID = AMDGPUUnifyDivergentExitNodes::ID;
68
69
101k
INITIALIZE_PASS_BEGIN(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
70
101k
                     "Unify divergent function exit nodes", false, false)
71
101k
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
72
101k
INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
73
101k
INITIALIZE_PASS_END(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
74
                    "Unify divergent function exit nodes", false, false)
75
76
2.42k
void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
77
2.42k
  // TODO: Preserve dominator tree.
78
2.42k
  AU.addRequired<PostDominatorTreeWrapperPass>();
79
2.42k
80
2.42k
  AU.addRequired<LegacyDivergenceAnalysis>();
81
2.42k
82
2.42k
  // No divergent values are changed, only blocks and branch edges.
83
2.42k
  AU.addPreserved<LegacyDivergenceAnalysis>();
84
2.42k
85
2.42k
  // We preserve the non-critical-edgeness property
86
2.42k
  AU.addPreservedID(BreakCriticalEdgesID);
87
2.42k
88
2.42k
  // This is a cluster of orthogonal Transforms
89
2.42k
  AU.addPreservedID(LowerSwitchID);
90
2.42k
  FunctionPass::getAnalysisUsage(AU);
91
2.42k
92
2.42k
  AU.addRequired<TargetTransformInfoWrapperPass>();
93
2.42k
}
94
95
/// \returns true if \p BB is reachable through only uniform branches.
96
/// XXX - Is there a more efficient way to find this?
97
static bool isUniformlyReached(const LegacyDivergenceAnalysis &DA,
98
265
                               BasicBlock &BB) {
99
265
  SmallVector<BasicBlock *, 8> Stack;
100
265
  SmallPtrSet<BasicBlock *, 8> Visited;
101
265
102
265
  for (BasicBlock *Pred : predecessors(&BB))
103
317
    Stack.push_back(Pred);
104
265
105
489
  while (!Stack.empty()) {
106
372
    BasicBlock *Top = Stack.pop_back_val();
107
372
    if (!DA.isUniform(Top->getTerminator()))
108
148
      return false;
109
224
110
224
    for (BasicBlock *Pred : predecessors(Top)) {
111
140
      if (Visited.insert(Pred).second)
112
111
        Stack.push_back(Pred);
113
140
    }
114
224
  }
115
265
116
265
  
return true117
;
117
265
}
118
119
static BasicBlock *unifyReturnBlockSet(Function &F,
120
                                       ArrayRef<BasicBlock *> ReturningBlocks,
121
                                       const TargetTransformInfo &TTI,
122
69
                                       StringRef Name) {
123
69
  // Otherwise, we need to insert a new basic block into the function, add a PHI
124
69
  // nodes (if the function returns values), and convert all of the return
125
69
  // instructions into unconditional branches.
126
69
  BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), Name, &F);
127
69
128
69
  PHINode *PN = nullptr;
129
69
  if (F.getReturnType()->isVoidTy()) {
130
60
    ReturnInst::Create(F.getContext(), nullptr, NewRetBlock);
131
60
  } else {
132
9
    // If the function doesn't return void... add a PHI node to the block...
133
9
    PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(),
134
9
                         "UnifiedRetVal");
135
9
    NewRetBlock->getInstList().push_back(PN);
136
9
    ReturnInst::Create(F.getContext(), PN, NewRetBlock);
137
9
  }
138
69
139
69
  // Loop over all of the blocks, replacing the return instruction with an
140
69
  // unconditional branch.
141
140
  for (BasicBlock *BB : ReturningBlocks) {
142
140
    // Add an incoming element to the PHI node for every return instruction that
143
140
    // is merging into this new block...
144
140
    if (PN)
145
18
      PN->addIncoming(BB->getTerminator()->getOperand(0), BB);
146
140
147
140
    // Remove and delete the return inst.
148
140
    BB->getTerminator()->eraseFromParent();
149
140
    BranchInst::Create(NewRetBlock, BB);
150
140
  }
151
69
152
140
  for (BasicBlock *BB : ReturningBlocks) {
153
140
    // Cleanup possible branch to unconditional branch to the return.
154
140
    simplifyCFG(BB, TTI, {2});
155
140
  }
156
69
157
69
  return NewRetBlock;
158
69
}
159
160
25.2k
bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
161
25.2k
  auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
162
25.2k
  if (PDT.getRoots().size() <= 1)
163
25.0k
    return false;
164
137
165
137
  LegacyDivergenceAnalysis &DA = getAnalysis<LegacyDivergenceAnalysis>();
166
137
167
137
  // Loop over all of the blocks in a function, tracking all of the blocks that
168
137
  // return.
169
137
  SmallVector<BasicBlock *, 4> ReturningBlocks;
170
137
  SmallVector<BasicBlock *, 4> UnreachableBlocks;
171
137
172
137
  // Dummy return block for infinite loop.
173
137
  BasicBlock *DummyReturnBB = nullptr;
174
137
175
283
  for (BasicBlock *BB : PDT.getRoots()) {
176
283
    if (isa<ReturnInst>(BB->getTerminator())) {
177
187
      if (!isUniformlyReached(DA, *BB))
178
96
        ReturningBlocks.push_back(BB);
179
187
    } else 
if (96
isa<UnreachableInst>(BB->getTerminator())96
) {
180
78
      if (!isUniformlyReached(DA, *BB))
181
52
        UnreachableBlocks.push_back(BB);
182
78
    } else 
if (BranchInst *18
BI18
= dyn_cast<BranchInst>(BB->getTerminator())) {
183
18
184
18
      ConstantInt *BoolTrue = ConstantInt::getTrue(F.getContext());
185
18
      if (DummyReturnBB == nullptr) {
186
16
        DummyReturnBB = BasicBlock::Create(F.getContext(),
187
16
                                           "DummyReturnBlock", &F);
188
16
        Type *RetTy = F.getReturnType();
189
16
        Value *RetVal = RetTy->isVoidTy() ? 
nullptr15
:
UndefValue::get(RetTy)1
;
190
16
        ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
191
16
        ReturningBlocks.push_back(DummyReturnBB);
192
16
      }
193
18
194
18
      if (BI->isUnconditional()) {
195
15
        BasicBlock *LoopHeaderBB = BI->getSuccessor(0);
196
15
        BI->eraseFromParent(); // Delete the unconditional branch.
197
15
        // Add a new conditional branch with a dummy edge to the return block.
198
15
        BranchInst::Create(LoopHeaderBB, DummyReturnBB, BoolTrue, BB);
199
15
      } else { // Conditional branch.
200
3
        // Create a new transition block to hold the conditional branch.
201
3
        BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");
202
3
203
3
        // Create a branch that will always branch to the transition block and
204
3
        // references DummyReturnBB.
205
3
        BB->getTerminator()->eraseFromParent();
206
3
        BranchInst::Create(TransitionBB, DummyReturnBB, BoolTrue, BB);
207
3
      }
208
18
    }
209
283
  }
210
137
211
137
  if (!UnreachableBlocks.empty()) {
212
43
    BasicBlock *UnreachableBlock = nullptr;
213
43
214
43
    if (UnreachableBlocks.size() == 1) {
215
35
      UnreachableBlock = UnreachableBlocks.front();
216
35
    } else {
217
8
      UnreachableBlock = BasicBlock::Create(F.getContext(),
218
8
                                            "UnifiedUnreachableBlock", &F);
219
8
      new UnreachableInst(F.getContext(), UnreachableBlock);
220
8
221
17
      for (BasicBlock *BB : UnreachableBlocks) {
222
17
        // Remove and delete the unreachable inst.
223
17
        BB->getTerminator()->eraseFromParent();
224
17
        BranchInst::Create(UnreachableBlock, BB);
225
17
      }
226
8
    }
227
43
228
43
    if (!ReturningBlocks.empty()) {
229
37
      // Don't create a new unreachable inst if we have a return. The
230
37
      // structurizer/annotator can't handle the multiple exits
231
37
232
37
      Type *RetTy = F.getReturnType();
233
37
      Value *RetVal = RetTy->isVoidTy() ? 
nullptr33
:
UndefValue::get(RetTy)4
;
234
37
      // Remove and delete the unreachable inst.
235
37
      UnreachableBlock->getTerminator()->eraseFromParent();
236
37
237
37
      Function *UnreachableIntrin =
238
37
        Intrinsic::getDeclaration(F.getParent(), Intrinsic::amdgcn_unreachable);
239
37
240
37
      // Insert a call to an intrinsic tracking that this is an unreachable
241
37
      // point, in case we want to kill the active lanes or something later.
242
37
      CallInst::Create(UnreachableIntrin, {}, "", UnreachableBlock);
243
37
244
37
      // Don't create a scalar trap. We would only want to trap if this code was
245
37
      // really reached, but a scalar trap would happen even if no lanes
246
37
      // actually reached here.
247
37
      ReturnInst::Create(F.getContext(), RetVal, UnreachableBlock);
248
37
      ReturningBlocks.push_back(UnreachableBlock);
249
37
    }
250
43
  }
251
137
252
137
  // Now handle return blocks.
253
137
  if (ReturningBlocks.empty())
254
59
    return false; // No blocks return
255
78
256
78
  if (ReturningBlocks.size() == 1)
257
9
    return false; // Already has a single return block
258
69
259
69
  const TargetTransformInfo &TTI
260
69
    = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
261
69
262
69
  unifyReturnBlockSet(F, ReturningBlocks, TTI, "UnifiedReturnBlock");
263
69
  return true;
264
69
}