/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | /// \file |
10 | | /// This pass adds amdgpu.uniform metadata to IR values so this information |
11 | | /// can be used during instruction selection. |
12 | | // |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "AMDGPU.h" |
16 | | #include "llvm/ADT/SetVector.h" |
17 | | #include "llvm/Analysis/LegacyDivergenceAnalysis.h" |
18 | | #include "llvm/Analysis/LoopInfo.h" |
19 | | #include "llvm/Analysis/MemoryDependenceAnalysis.h" |
20 | | #include "llvm/IR/IRBuilder.h" |
21 | | #include "llvm/IR/InstVisitor.h" |
22 | | #include "llvm/Support/Debug.h" |
23 | | #include "llvm/Support/raw_ostream.h" |
24 | | |
25 | | #define DEBUG_TYPE "amdgpu-annotate-uniform" |
26 | | |
27 | | using namespace llvm; |
28 | | |
29 | | namespace { |
30 | | |
31 | | class AMDGPUAnnotateUniformValues : public FunctionPass, |
32 | | public InstVisitor<AMDGPUAnnotateUniformValues> { |
33 | | LegacyDivergenceAnalysis *DA; |
34 | | MemoryDependenceResults *MDR; |
35 | | LoopInfo *LI; |
36 | | DenseMap<Value*, GetElementPtrInst*> noClobberClones; |
37 | | bool isKernelFunc; |
38 | | |
39 | | public: |
40 | | static char ID; |
41 | | AMDGPUAnnotateUniformValues() : |
42 | 2.44k | FunctionPass(ID) { } |
43 | | bool doInitialization(Module &M) override; |
44 | | bool runOnFunction(Function &F) override; |
45 | 25.6k | StringRef getPassName() const override { |
46 | 25.6k | return "AMDGPU Annotate Uniform Values"; |
47 | 25.6k | } |
48 | 2.42k | void getAnalysisUsage(AnalysisUsage &AU) const override { |
49 | 2.42k | AU.addRequired<LegacyDivergenceAnalysis>(); |
50 | 2.42k | AU.addRequired<MemoryDependenceWrapperPass>(); |
51 | 2.42k | AU.addRequired<LoopInfoWrapperPass>(); |
52 | 2.42k | AU.setPreservesAll(); |
53 | 2.42k | } |
54 | | |
55 | | void visitBranchInst(BranchInst &I); |
56 | | void visitLoadInst(LoadInst &I); |
57 | | bool isClobberedInFunction(LoadInst * Load); |
58 | | }; |
59 | | |
60 | | } // End anonymous namespace |
61 | | |
62 | 101k | INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE, |
63 | 101k | "Add AMDGPU uniform metadata", false, false) |
64 | 101k | INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) |
65 | 101k | INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) |
66 | 101k | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) |
67 | 101k | INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE, |
68 | | "Add AMDGPU uniform metadata", false, false) |
69 | | |
70 | | char AMDGPUAnnotateUniformValues::ID = 0; |
71 | | |
72 | 36.3k | static void setUniformMetadata(Instruction *I) { |
73 | 36.3k | I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {})); |
74 | 36.3k | } |
75 | 32.1k | static void setNoClobberMetadata(Instruction *I) { |
76 | 32.1k | I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {})); |
77 | 32.1k | } |
78 | | |
79 | 35.8k | static void DFS(BasicBlock *Root, SetVector<BasicBlock*> & Set) { |
80 | 35.8k | for (auto I : predecessors(Root)) |
81 | 837 | if (Set.insert(I)) |
82 | 664 | DFS(I, Set); |
83 | 35.8k | } |
84 | | |
85 | 35.1k | bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) { |
86 | 35.1k | // 1. get Loop for the Load->getparent(); |
87 | 35.1k | // 2. if it exists, collect all the BBs from the most outer |
88 | 35.1k | // loop and check for the writes. If NOT - start DFS over all preds. |
89 | 35.1k | // 3. Start DFS over all preds from the most outer loop header. |
90 | 35.1k | SetVector<BasicBlock *> Checklist; |
91 | 35.1k | BasicBlock *Start = Load->getParent(); |
92 | 35.1k | Checklist.insert(Start); |
93 | 35.1k | const Value *Ptr = Load->getPointerOperand(); |
94 | 35.1k | const Loop *L = LI->getLoopFor(Start); |
95 | 35.1k | if (L) { |
96 | 106 | const Loop *P = L; |
97 | 107 | do { |
98 | 107 | L = P; |
99 | 107 | P = P->getParentLoop(); |
100 | 107 | } while (P); |
101 | 106 | Checklist.insert(L->block_begin(), L->block_end()); |
102 | 106 | Start = L->getHeader(); |
103 | 106 | } |
104 | 35.1k | |
105 | 35.1k | DFS(Start, Checklist); |
106 | 35.7k | for (auto &BB : Checklist) { |
107 | 35.7k | BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())35.5k ) ? |
108 | 35.0k | BasicBlock::iterator(Load) : BB->end()690 ; |
109 | 35.7k | auto Q = MDR->getPointerDependencyFrom(MemoryLocation(Ptr), true, |
110 | 35.7k | StartIt, BB, Load); |
111 | 35.7k | if (Q.isClobber() || Q.isUnknown()34.0k ) |
112 | 2.41k | return true; |
113 | 35.7k | } |
114 | 35.1k | return false32.7k ; |
115 | 35.1k | } |
116 | | |
117 | 3.15k | void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) { |
118 | 3.15k | if (DA->isUniform(&I)) |
119 | 2.33k | setUniformMetadata(I.getParent()->getTerminator()); |
120 | 3.15k | } |
121 | | |
122 | 41.7k | void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { |
123 | 41.7k | Value *Ptr = I.getPointerOperand(); |
124 | 41.7k | if (!DA->isUniform(Ptr)) |
125 | 5.72k | return; |
126 | 35.9k | auto isGlobalLoad = [&](LoadInst &Load)->bool { |
127 | 749 | return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; |
128 | 749 | }; |
129 | 35.9k | // We're tracking up to the Function boundaries |
130 | 35.9k | // We cannot go beyond because of FunctionPass restrictions |
131 | 35.9k | // Thus we can ensure that memory not clobbered for memory |
132 | 35.9k | // operations that live in kernel only. |
133 | 35.9k | bool NotClobbered = isKernelFunc && !isClobberedInFunction(&I)35.1k ; |
134 | 35.9k | Instruction *PtrI = dyn_cast<Instruction>(Ptr); |
135 | 35.9k | if (!PtrI && NotClobbered2.05k && isGlobalLoad(I)749 ) { |
136 | 388 | if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)321 ) { |
137 | 82 | // Lookup for the existing GEP |
138 | 82 | if (noClobberClones.count(Ptr)) { |
139 | 0 | PtrI = noClobberClones[Ptr]; |
140 | 82 | } else { |
141 | 82 | // Create GEP of the Value |
142 | 82 | Function *F = I.getParent()->getParent(); |
143 | 82 | Value *Idx = Constant::getIntegerValue( |
144 | 82 | Type::getInt32Ty(Ptr->getContext()), APInt(64, 0)); |
145 | 82 | // Insert GEP at the entry to make it dominate all uses |
146 | 82 | PtrI = GetElementPtrInst::Create( |
147 | 82 | Ptr->getType()->getPointerElementType(), Ptr, |
148 | 82 | ArrayRef<Value*>(Idx), Twine(""), F->getEntryBlock().getFirstNonPHI()); |
149 | 82 | } |
150 | 82 | I.replaceUsesOfWith(Ptr, PtrI); |
151 | 82 | } |
152 | 388 | } |
153 | 35.9k | |
154 | 35.9k | if (PtrI) { |
155 | 34.0k | setUniformMetadata(PtrI); |
156 | 34.0k | if (NotClobbered) |
157 | 32.1k | setNoClobberMetadata(PtrI); |
158 | 34.0k | } |
159 | 35.9k | } |
160 | | |
161 | 2.42k | bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) { |
162 | 2.42k | return false; |
163 | 2.42k | } |
164 | | |
165 | 25.6k | bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) { |
166 | 25.6k | if (skipFunction(F)) |
167 | 13 | return false; |
168 | 25.6k | |
169 | 25.6k | DA = &getAnalysis<LegacyDivergenceAnalysis>(); |
170 | 25.6k | MDR = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); |
171 | 25.6k | LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
172 | 25.6k | isKernelFunc = F.getCallingConv() == CallingConv::AMDGPU_KERNEL; |
173 | 25.6k | |
174 | 25.6k | visit(F); |
175 | 25.6k | noClobberClones.clear(); |
176 | 25.6k | return true; |
177 | 25.6k | } |
178 | | |
179 | | FunctionPass * |
180 | 2.44k | llvm::createAMDGPUAnnotateUniformValues() { |
181 | 2.44k | return new AMDGPUAnnotateUniformValues(); |
182 | 2.44k | } |