/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Target/NVPTX/NVPTXInferAddressSpaces.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- NVPTXInferAddressSpace.cpp - ---------------------*- C++ -*-===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // CUDA C/C++ includes memory space designation as variable type qualifers (such |
11 | | // as __global__ and __shared__). Knowing the space of a memory access allows |
12 | | // CUDA compilers to emit faster PTX loads and stores. For example, a load from |
13 | | // shared memory can be translated to `ld.shared` which is roughly 10% faster |
14 | | // than a generic `ld` on an NVIDIA Tesla K40c. |
15 | | // |
16 | | // Unfortunately, type qualifiers only apply to variable declarations, so CUDA |
17 | | // compilers must infer the memory space of an address expression from |
18 | | // type-qualified variables. |
19 | | // |
20 | | // LLVM IR uses non-zero (so-called) specific address spaces to represent memory |
21 | | // spaces (e.g. addrspace(3) means shared memory). The Clang frontend |
22 | | // places only type-qualified variables in specific address spaces, and then |
23 | | // conservatively `addrspacecast`s each type-qualified variable to addrspace(0) |
24 | | // (so-called the generic address space) for other instructions to use. |
25 | | // |
26 | | // For example, the Clang translates the following CUDA code |
27 | | // __shared__ float a[10]; |
28 | | // float v = a[i]; |
29 | | // to |
30 | | // %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* |
31 | | // %1 = gep [10 x float], [10 x float]* %0, i64 0, i64 %i |
32 | | // %v = load float, float* %1 ; emits ld.f32 |
33 | | // @a is in addrspace(3) since it's type-qualified, but its use from %1 is |
34 | | // redirected to %0 (the generic version of @a). |
35 | | // |
36 | | // The optimization implemented in this file propagates specific address spaces |
37 | | // from type-qualified variable declarations to its users. For example, it |
38 | | // optimizes the above IR to |
39 | | // %1 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i |
40 | | // %v = load float addrspace(3)* %1 ; emits ld.shared.f32 |
41 | | // propagating the addrspace(3) from @a to %1. As the result, the NVPTX |
42 | | // codegen is able to emit ld.shared.f32 for %v. |
43 | | // |
44 | | // Address space inference works in two steps. First, it uses a data-flow |
45 | | // analysis to infer as many generic pointers as possible to point to only one |
46 | | // specific address space. In the above example, it can prove that %1 only |
47 | | // points to addrspace(3). This algorithm was published in |
48 | | // CUDA: Compiling and optimizing for a GPU platform |
49 | | // Chakrabarti, Grover, Aarts, Kong, Kudlur, Lin, Marathe, Murphy, Wang |
50 | | // ICCS 2012 |
51 | | // |
52 | | // Then, address space inference replaces all refinable generic pointers with |
53 | | // equivalent specific pointers. |
54 | | // |
55 | | // The major challenge of implementing this optimization is handling PHINodes, |
56 | | // which may create loops in the data flow graph. This brings two complications. |
57 | | // |
58 | | // First, the data flow analysis in Step 1 needs to be circular. For example, |
59 | | // %generic.input = addrspacecast float addrspace(3)* %input to float* |
60 | | // loop: |
61 | | // %y = phi [ %generic.input, %y2 ] |
62 | | // %y2 = getelementptr %y, 1 |
63 | | // %v = load %y2 |
64 | | // br ..., label %loop, ... |
65 | | // proving %y specific requires proving both %generic.input and %y2 specific, |
66 | | // but proving %y2 specific circles back to %y. To address this complication, |
67 | | // the data flow analysis operates on a lattice: |
68 | | // uninitialized > specific address spaces > generic. |
69 | | // All address expressions (our implementation only considers phi, bitcast, |
70 | | // addrspacecast, and getelementptr) start with the uninitialized address space. |
71 | | // The monotone transfer function moves the address space of a pointer down a |
72 | | // lattice path from uninitialized to specific and then to generic. A join |
73 | | // operation of two different specific address spaces pushes the expression down |
74 | | // to the generic address space. The analysis completes once it reaches a fixed |
75 | | // point. |
76 | | // |
77 | | // Second, IR rewriting in Step 2 also needs to be circular. For example, |
78 | | // converting %y to addrspace(3) requires the compiler to know the converted |
79 | | // %y2, but converting %y2 needs the converted %y. To address this complication, |
80 | | // we break these cycles using "undef" placeholders. When converting an |
81 | | // instruction `I` to a new address space, if its operand `Op` is not converted |
82 | | // yet, we let `I` temporarily use `undef` and fix all the uses of undef later. |
83 | | // For instance, our algorithm first converts %y to |
84 | | // %y' = phi float addrspace(3)* [ %input, undef ] |
85 | | // Then, it converts %y2 to |
86 | | // %y2' = getelementptr %y', 1 |
87 | | // Finally, it fixes the undef in %y' so that |
88 | | // %y' = phi float addrspace(3)* [ %input, %y2' ] |
89 | | // |
90 | | //===----------------------------------------------------------------------===// |
91 | | |
92 | | #define DEBUG_TYPE "nvptx-infer-addrspace" |
93 | | |
94 | | #include "NVPTX.h" |
95 | | #include "MCTargetDesc/NVPTXBaseInfo.h" |
96 | | #include "llvm/ADT/DenseSet.h" |
97 | | #include "llvm/ADT/Optional.h" |
98 | | #include "llvm/ADT/SetVector.h" |
99 | | #include "llvm/IR/Function.h" |
100 | | #include "llvm/IR/InstIterator.h" |
101 | | #include "llvm/IR/Instructions.h" |
102 | | #include "llvm/IR/Operator.h" |
103 | | #include "llvm/Support/Debug.h" |
104 | | #include "llvm/Support/raw_ostream.h" |
105 | | #include "llvm/Transforms/Utils/Local.h" |
106 | | #include "llvm/Transforms/Utils/ValueMapper.h" |
107 | | |
108 | | using namespace llvm; |
109 | | |
110 | | namespace { |
111 | | const unsigned ADDRESS_SPACE_UNINITIALIZED = (unsigned)-1; |
112 | | |
113 | | using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>; |
114 | | |
115 | | /// \brief NVPTXInferAddressSpaces |
116 | | class NVPTXInferAddressSpaces: public FunctionPass { |
117 | | public: |
118 | | static char ID; |
119 | | |
120 | 202 | NVPTXInferAddressSpaces() : FunctionPass(ID) {} |
121 | | |
122 | 202 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
123 | 202 | AU.setPreservesCFG(); |
124 | 202 | } |
125 | | |
126 | | bool runOnFunction(Function &F) override; |
127 | | |
128 | | private: |
129 | | // Returns the new address space of V if updated; otherwise, returns None. |
130 | | Optional<unsigned> |
131 | | updateAddressSpace(const Value &V, |
132 | | const ValueToAddrSpaceMapTy &InferredAddrSpace); |
133 | | |
134 | | // Tries to infer the specific address space of each address expression in |
135 | | // Postorder. |
136 | | void inferAddressSpaces(const std::vector<Value *> &Postorder, |
137 | | ValueToAddrSpaceMapTy *InferredAddrSpace); |
138 | | |
139 | | // Changes the generic address expressions in function F to point to specific |
140 | | // address spaces if InferredAddrSpace says so. Postorder is the postorder of |
141 | | // all generic address expressions in the use-def graph of function F. |
142 | | bool |
143 | | rewriteWithNewAddressSpaces(const std::vector<Value *> &Postorder, |
144 | | const ValueToAddrSpaceMapTy &InferredAddrSpace, |
145 | | Function *F); |
146 | | }; |
147 | | } // end anonymous namespace |
148 | | |
149 | | char NVPTXInferAddressSpaces::ID = 0; |
150 | | |
151 | | namespace llvm { |
152 | | void initializeNVPTXInferAddressSpacesPass(PassRegistry &); |
153 | | } |
154 | | INITIALIZE_PASS(NVPTXInferAddressSpaces, "nvptx-infer-addrspace", |
155 | | "Infer address spaces", |
156 | | false, false) |
157 | | |
158 | | // Returns true if V is an address expression. |
159 | | // TODO: Currently, we consider only phi, bitcast, addrspacecast, and |
160 | | // getelementptr operators. |
161 | 787 | static bool isAddressExpression(const Value &V) { |
162 | 787 | if (!isa<Operator>(V)) |
163 | 306 | return false; |
164 | 787 | |
165 | 481 | switch (cast<Operator>(V).getOpcode()) { |
166 | 474 | case Instruction::PHI: |
167 | 474 | case Instruction::BitCast: |
168 | 474 | case Instruction::AddrSpaceCast: |
169 | 474 | case Instruction::GetElementPtr: |
170 | 474 | return true; |
171 | 7 | default: |
172 | 7 | return false; |
173 | 481 | } |
174 | 481 | } |
175 | | |
176 | | // Returns the pointer operands of V. |
177 | | // |
178 | | // Precondition: V is an address expression. |
179 | 599 | static SmallVector<Value *, 2> getPointerOperands(const Value &V) { |
180 | 599 | assert(isAddressExpression(V)); |
181 | 599 | const Operator& Op = cast<Operator>(V); |
182 | 599 | switch (Op.getOpcode()) { |
183 | 27 | case Instruction::PHI: { |
184 | 27 | auto IncomingValues = cast<PHINode>(Op).incoming_values(); |
185 | 27 | return SmallVector<Value *, 2>(IncomingValues.begin(), |
186 | 27 | IncomingValues.end()); |
187 | 27 | } |
188 | 572 | case Instruction::BitCast: |
189 | 572 | case Instruction::AddrSpaceCast: |
190 | 572 | case Instruction::GetElementPtr: |
191 | 572 | return {Op.getOperand(0)}; |
192 | 0 | default: |
193 | 0 | llvm_unreachable("Unexpected instruction type."); |
194 | 599 | } |
195 | 599 | } |
196 | | |
197 | | // If V is an unvisited generic address expression, appends V to PostorderStack |
198 | | // and marks it as visited. |
199 | | static void appendsGenericAddressExpressionToPostorderStack( |
200 | | Value *V, std::vector<std::pair<Value *, bool>> *PostorderStack, |
201 | 787 | DenseSet<Value *> *Visited) { |
202 | 787 | assert(V->getType()->isPointerTy()); |
203 | 787 | if (isAddressExpression(*V) && |
204 | 474 | V->getType()->getPointerAddressSpace() == |
205 | 314 | AddressSpace::ADDRESS_SPACE_GENERIC) { |
206 | 314 | if (Visited->insert(V).second) |
207 | 270 | PostorderStack->push_back(std::make_pair(V, false)); |
208 | 314 | } |
209 | 787 | } |
210 | | |
211 | | // Returns all generic address expressions in function F. The elements are |
212 | | // ordered in postorder. |
213 | 929 | static std::vector<Value *> collectGenericAddressExpressions(Function &F) { |
214 | 929 | // This function implements a non-recursive postorder traversal of a partial |
215 | 929 | // use-def graph of function F. |
216 | 929 | std::vector<std::pair<Value*, bool>> PostorderStack; |
217 | 929 | // The set of visited expressions. |
218 | 929 | DenseSet<Value*> Visited; |
219 | 929 | // We only explore address expressions that are reachable from loads and |
220 | 929 | // stores for now because we aim at generating faster loads and stores. |
221 | 3.46k | for (Instruction &I : instructions(F)) { |
222 | 3.46k | if (isa<LoadInst>(I)3.46k ) {243 |
223 | 243 | appendsGenericAddressExpressionToPostorderStack( |
224 | 243 | I.getOperand(0), &PostorderStack, &Visited); |
225 | 3.21k | } else if (3.21k isa<StoreInst>(I)3.21k ) {267 |
226 | 267 | appendsGenericAddressExpressionToPostorderStack( |
227 | 267 | I.getOperand(1), &PostorderStack, &Visited); |
228 | 267 | } |
229 | 3.46k | } |
230 | 929 | |
231 | 929 | std::vector<Value *> Postorder; // The resultant postorder. |
232 | 1.46k | while (!PostorderStack.empty()1.46k ) {540 |
233 | 540 | // If the operands of the expression on the top are already explored, |
234 | 540 | // adds that expression to the resultant postorder. |
235 | 540 | if (PostorderStack.back().second540 ) {270 |
236 | 270 | Postorder.push_back(PostorderStack.back().first); |
237 | 270 | PostorderStack.pop_back(); |
238 | 270 | continue; |
239 | 270 | } |
240 | 540 | // Otherwise, adds its operands to the stack and explores them. |
241 | 270 | PostorderStack.back().second = true; |
242 | 277 | for (Value *PtrOperand : getPointerOperands(*PostorderStack.back().first)) { |
243 | 277 | appendsGenericAddressExpressionToPostorderStack( |
244 | 277 | PtrOperand, &PostorderStack, &Visited); |
245 | 277 | } |
246 | 270 | } |
247 | 929 | return Postorder; |
248 | 929 | } |
249 | | |
250 | | // A helper function for cloneInstructionWithNewAddressSpace. Returns the clone |
251 | | // of OperandUse.get() in the new address space. If the clone is not ready yet, |
252 | | // returns an undef in the new address space as a placeholder. |
253 | | static Value *operandWithNewAddressSpaceOrCreateUndef( |
254 | | const Use &OperandUse, unsigned NewAddrSpace, |
255 | | const ValueToValueMapTy &ValueWithNewAddrSpace, |
256 | 62 | SmallVectorImpl<const Use *> *UndefUsesToFix) { |
257 | 62 | Value *Operand = OperandUse.get(); |
258 | 62 | if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand)) |
259 | 48 | return NewOperand; |
260 | 62 | |
261 | 14 | UndefUsesToFix->push_back(&OperandUse); |
262 | 14 | return UndefValue::get( |
263 | 14 | Operand->getType()->getPointerElementType()->getPointerTo(NewAddrSpace)); |
264 | 62 | } |
265 | | |
266 | | // Returns a clone of `I` with its operands converted to those specified in |
267 | | // ValueWithNewAddrSpace. Due to potential cycles in the data flow graph, an |
268 | | // operand whose address space needs to be modified might not exist in |
269 | | // ValueWithNewAddrSpace. In that case, uses undef as a placeholder operand and |
270 | | // adds that operand use to UndefUsesToFix so that caller can fix them later. |
271 | | // |
272 | | // Note that we do not necessarily clone `I`, e.g., if it is an addrspacecast |
273 | | // from a pointer whose type already matches. Therefore, this function returns a |
274 | | // Value* instead of an Instruction*. |
275 | | static Value *cloneInstructionWithNewAddressSpace( |
276 | | Instruction *I, unsigned NewAddrSpace, |
277 | | const ValueToValueMapTy &ValueWithNewAddrSpace, |
278 | 215 | SmallVectorImpl<const Use *> *UndefUsesToFix) { |
279 | 215 | Type *NewPtrType = |
280 | 215 | I->getType()->getPointerElementType()->getPointerTo(NewAddrSpace); |
281 | 215 | |
282 | 215 | if (I->getOpcode() == Instruction::AddrSpaceCast215 ) {159 |
283 | 159 | Value *Src = I->getOperand(0); |
284 | 159 | // Because `I` is generic, the source address space must be specific. |
285 | 159 | // Therefore, the inferred address space must be the source space, according |
286 | 159 | // to our algorithm. |
287 | 159 | assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace); |
288 | 159 | if (Src->getType() != NewPtrType) |
289 | 6 | return new BitCastInst(Src, NewPtrType); |
290 | 153 | return Src; |
291 | 159 | } |
292 | 215 | |
293 | 215 | // Computes the converted pointer operands. |
294 | 56 | SmallVector<Value *, 4> NewPointerOperands; |
295 | 113 | for (const Use &OperandUse : I->operands()) { |
296 | 113 | if (!OperandUse.get()->getType()->isPointerTy()) |
297 | 51 | NewPointerOperands.push_back(nullptr); |
298 | 113 | else |
299 | 62 | NewPointerOperands.push_back(operandWithNewAddressSpaceOrCreateUndef( |
300 | 62 | OperandUse, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix)); |
301 | 113 | } |
302 | 56 | |
303 | 56 | switch (I->getOpcode()) { |
304 | 11 | case Instruction::BitCast: |
305 | 11 | return new BitCastInst(NewPointerOperands[0], NewPtrType); |
306 | 6 | case Instruction::PHI: { |
307 | 6 | assert(I->getType()->isPointerTy()); |
308 | 6 | PHINode *PHI = cast<PHINode>(I); |
309 | 6 | PHINode *NewPHI = PHINode::Create(NewPtrType, PHI->getNumIncomingValues()); |
310 | 18 | for (unsigned Index = 0; Index < PHI->getNumIncomingValues()18 ; ++Index12 ) {12 |
311 | 12 | unsigned OperandNo = PHINode::getOperandNumForIncomingValue(Index); |
312 | 12 | NewPHI->addIncoming(NewPointerOperands[OperandNo], |
313 | 12 | PHI->getIncomingBlock(Index)); |
314 | 12 | } |
315 | 6 | return NewPHI; |
316 | 11 | } |
317 | 39 | case Instruction::GetElementPtr: { |
318 | 39 | GetElementPtrInst *GEP = cast<GetElementPtrInst>(I); |
319 | 39 | GetElementPtrInst *NewGEP = GetElementPtrInst::Create( |
320 | 39 | GEP->getSourceElementType(), NewPointerOperands[0], |
321 | 39 | SmallVector<Value *, 4>(GEP->idx_begin(), GEP->idx_end())); |
322 | 39 | NewGEP->setIsInBounds(GEP->isInBounds()); |
323 | 39 | return NewGEP; |
324 | 11 | } |
325 | 0 | default: |
326 | 0 | llvm_unreachable("Unexpected opcode"); |
327 | 56 | } |
328 | 56 | } |
329 | | |
330 | | // Similar to cloneInstructionWithNewAddressSpace, returns a clone of the |
331 | | // constant expression `CE` with its operands replaced as specified in |
332 | | // ValueWithNewAddrSpace. |
333 | | static Value *cloneConstantExprWithNewAddressSpace( |
334 | | ConstantExpr *CE, unsigned NewAddrSpace, |
335 | 23 | const ValueToValueMapTy &ValueWithNewAddrSpace) { |
336 | 23 | Type *TargetType = |
337 | 23 | CE->getType()->getPointerElementType()->getPointerTo(NewAddrSpace); |
338 | 23 | |
339 | 23 | if (CE->getOpcode() == Instruction::AddrSpaceCast23 ) {14 |
340 | 14 | // Because CE is generic, the source address space must be specific. |
341 | 14 | // Therefore, the inferred address space must be the source space according |
342 | 14 | // to our algorithm. |
343 | 14 | assert(CE->getOperand(0)->getType()->getPointerAddressSpace() == |
344 | 14 | NewAddrSpace); |
345 | 14 | return ConstantExpr::getBitCast(CE->getOperand(0), TargetType); |
346 | 14 | } |
347 | 23 | |
348 | 23 | // Computes the operands of the new constant expression. |
349 | 9 | SmallVector<Constant *, 4> NewOperands; |
350 | 30 | for (unsigned Index = 0; Index < CE->getNumOperands()30 ; ++Index21 ) {21 |
351 | 21 | Constant *Operand = CE->getOperand(Index); |
352 | 21 | // If the address space of `Operand` needs to be modified, the new operand |
353 | 21 | // with the new address space should already be in ValueWithNewAddrSpace |
354 | 21 | // because (1) the constant expressions we consider (i.e. addrspacecast, |
355 | 21 | // bitcast, and getelementptr) do not incur cycles in the data flow graph |
356 | 21 | // and (2) this function is called on constant expressions in postorder. |
357 | 21 | if (Value *NewOperand21 = ValueWithNewAddrSpace.lookup(Operand)) {9 |
358 | 9 | NewOperands.push_back(cast<Constant>(NewOperand)); |
359 | 12 | } else { |
360 | 12 | // Otherwise, reuses the old operand. |
361 | 12 | NewOperands.push_back(Operand); |
362 | 12 | } |
363 | 21 | } |
364 | 9 | |
365 | 9 | if (CE->getOpcode() == Instruction::GetElementPtr9 ) {6 |
366 | 6 | // Needs to specify the source type while constructing a getelementptr |
367 | 6 | // constant expression. |
368 | 6 | return CE->getWithOperands( |
369 | 6 | NewOperands, TargetType, /*OnlyIfReduced=*/false, |
370 | 6 | NewOperands[0]->getType()->getPointerElementType()); |
371 | 6 | } |
372 | 9 | |
373 | 3 | return CE->getWithOperands(NewOperands, TargetType); |
374 | 9 | } |
375 | | |
376 | | // Returns a clone of the value `V`, with its operands replaced as specified in |
377 | | // ValueWithNewAddrSpace. This function is called on every generic address |
378 | | // expression whose address space needs to be modified, in postorder. |
379 | | // |
380 | | // See cloneInstructionWithNewAddressSpace for the meaning of UndefUsesToFix. |
381 | | static Value * |
382 | | cloneValueWithNewAddressSpace(Value *V, unsigned NewAddrSpace, |
383 | | const ValueToValueMapTy &ValueWithNewAddrSpace, |
384 | 238 | SmallVectorImpl<const Use *> *UndefUsesToFix) { |
385 | 238 | // All values in Postorder are generic address expressions. |
386 | 238 | assert(isAddressExpression(*V) && |
387 | 238 | V->getType()->getPointerAddressSpace() == |
388 | 238 | AddressSpace::ADDRESS_SPACE_GENERIC); |
389 | 238 | |
390 | 238 | if (Instruction *I238 = dyn_cast<Instruction>(V)) {215 |
391 | 215 | Value *NewV = cloneInstructionWithNewAddressSpace( |
392 | 215 | I, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix); |
393 | 215 | if (Instruction *NewI215 = dyn_cast<Instruction>(NewV)) {203 |
394 | 203 | if (NewI->getParent() == nullptr203 ) {62 |
395 | 62 | NewI->insertBefore(I); |
396 | 62 | NewI->takeName(I); |
397 | 62 | } |
398 | 203 | } |
399 | 215 | return NewV; |
400 | 215 | } |
401 | 238 | |
402 | 23 | return cloneConstantExprWithNewAddressSpace( |
403 | 23 | cast<ConstantExpr>(V), NewAddrSpace, ValueWithNewAddrSpace); |
404 | 238 | } |
405 | | |
406 | | // Defines the join operation on the address space lattice (see the file header |
407 | | // comments). |
408 | 348 | static unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) { |
409 | 348 | if (AS1 == AddressSpace::ADDRESS_SPACE_GENERIC || |
410 | 348 | AS2 == AddressSpace::ADDRESS_SPACE_GENERIC) |
411 | 32 | return AddressSpace::ADDRESS_SPACE_GENERIC; |
412 | 348 | |
413 | 316 | if (316 AS1 == ADDRESS_SPACE_UNINITIALIZED316 ) |
414 | 304 | return AS2; |
415 | 12 | if (12 AS2 == ADDRESS_SPACE_UNINITIALIZED12 ) |
416 | 6 | return AS1; |
417 | 12 | |
418 | 12 | // The join of two different specific address spaces is generic. |
419 | 6 | return AS1 == AS2 ? 6 AS16 : (unsigned)AddressSpace::ADDRESS_SPACE_GENERIC0 ; |
420 | 12 | } |
421 | | |
422 | 929 | bool NVPTXInferAddressSpaces::runOnFunction(Function &F) { |
423 | 929 | if (skipFunction(F)) |
424 | 0 | return false; |
425 | 929 | |
426 | 929 | // Collects all generic address expressions in postorder. |
427 | 929 | std::vector<Value *> Postorder = collectGenericAddressExpressions(F); |
428 | 929 | |
429 | 929 | // Runs a data-flow analysis to refine the address spaces of every expression |
430 | 929 | // in Postorder. |
431 | 929 | ValueToAddrSpaceMapTy InferredAddrSpace; |
432 | 929 | inferAddressSpaces(Postorder, &InferredAddrSpace); |
433 | 929 | |
434 | 929 | // Changes the address spaces of the generic address expressions who are |
435 | 929 | // inferred to point to a specific address space. |
436 | 929 | return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F); |
437 | 929 | } |
438 | | |
439 | | void NVPTXInferAddressSpaces::inferAddressSpaces( |
440 | | const std::vector<Value *> &Postorder, |
441 | 929 | ValueToAddrSpaceMapTy *InferredAddrSpace) { |
442 | 929 | SetVector<Value *> Worklist(Postorder.begin(), Postorder.end()); |
443 | 929 | // Initially, all expressions are in the uninitialized address space. |
444 | 929 | for (Value *V : Postorder) |
445 | 270 | (*InferredAddrSpace)[V] = ADDRESS_SPACE_UNINITIALIZED; |
446 | 929 | |
447 | 1.25k | while (!Worklist.empty()1.25k ) {329 |
448 | 329 | Value* V = Worklist.pop_back_val(); |
449 | 329 | |
450 | 329 | // Tries to update the address space of the stack top according to the |
451 | 329 | // address spaces of its operands. |
452 | 329 | DEBUG(dbgs() << "Updating the address space of\n" |
453 | 329 | << " " << *V << "\n"); |
454 | 329 | Optional<unsigned> NewAS = updateAddressSpace(*V, *InferredAddrSpace); |
455 | 329 | if (!NewAS.hasValue()) |
456 | 59 | continue; |
457 | 329 | // If any updates are made, grabs its users to the worklist because |
458 | 329 | // their address spaces can also be possibly updated. |
459 | 270 | DEBUG270 (dbgs() << " to " << NewAS.getValue() << "\n");270 |
460 | 270 | (*InferredAddrSpace)[V] = NewAS.getValue(); |
461 | 270 | |
462 | 331 | for (Value *User : V->users()) { |
463 | 331 | // Skip if User is already in the worklist. |
464 | 331 | if (Worklist.count(User)) |
465 | 15 | continue; |
466 | 331 | |
467 | 316 | auto Pos = InferredAddrSpace->find(User); |
468 | 316 | // Our algorithm only updates the address spaces of generic address |
469 | 316 | // expressions, which are those in InferredAddrSpace. |
470 | 316 | if (Pos == InferredAddrSpace->end()) |
471 | 256 | continue; |
472 | 316 | |
473 | 316 | // Function updateAddressSpace moves the address space down a lattice |
474 | 316 | // path. Therefore, nothing to do if User is already inferred as |
475 | 316 | // generic (the bottom element in the lattice). |
476 | 60 | if (60 Pos->second == AddressSpace::ADDRESS_SPACE_GENERIC60 ) |
477 | 1 | continue; |
478 | 60 | |
479 | 59 | Worklist.insert(User); |
480 | 59 | } |
481 | 270 | } |
482 | 929 | } |
483 | | |
484 | | Optional<unsigned> NVPTXInferAddressSpaces::updateAddressSpace( |
485 | 329 | const Value &V, const ValueToAddrSpaceMapTy &InferredAddrSpace) { |
486 | 329 | assert(InferredAddrSpace.count(&V)); |
487 | 329 | |
488 | 329 | // The new inferred address space equals the join of the address spaces |
489 | 329 | // of all its pointer operands. |
490 | 329 | unsigned NewAS = ADDRESS_SPACE_UNINITIALIZED; |
491 | 348 | for (Value *PtrOperand : getPointerOperands(V)) { |
492 | 348 | unsigned OperandAS; |
493 | 348 | if (InferredAddrSpace.count(PtrOperand)) |
494 | 146 | OperandAS = InferredAddrSpace.lookup(PtrOperand); |
495 | 348 | else |
496 | 202 | OperandAS = PtrOperand->getType()->getPointerAddressSpace(); |
497 | 348 | NewAS = joinAddressSpaces(NewAS, OperandAS); |
498 | 348 | // join(generic, *) = generic. So we can break if NewAS is already generic. |
499 | 348 | if (NewAS == AddressSpace::ADDRESS_SPACE_GENERIC) |
500 | 32 | break; |
501 | 348 | } |
502 | 329 | |
503 | 329 | unsigned OldAS = InferredAddrSpace.lookup(&V); |
504 | 329 | assert(OldAS != AddressSpace::ADDRESS_SPACE_GENERIC); |
505 | 329 | if (OldAS == NewAS) |
506 | 59 | return None; |
507 | 270 | return NewAS; |
508 | 329 | } |
509 | | |
510 | | bool NVPTXInferAddressSpaces::rewriteWithNewAddressSpaces( |
511 | | const std::vector<Value *> &Postorder, |
512 | 929 | const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) { |
513 | 929 | // For each address expression to be modified, creates a clone of it with its |
514 | 929 | // pointer operands converted to the new address space. Since the pointer |
515 | 929 | // operands are converted, the clone is naturally in the new address space by |
516 | 929 | // construction. |
517 | 929 | ValueToValueMapTy ValueWithNewAddrSpace; |
518 | 929 | SmallVector<const Use *, 32> UndefUsesToFix; |
519 | 270 | for (Value* V : Postorder) { |
520 | 270 | unsigned NewAddrSpace = InferredAddrSpace.lookup(V); |
521 | 270 | if (V->getType()->getPointerAddressSpace() != NewAddrSpace270 ) {238 |
522 | 238 | ValueWithNewAddrSpace[V] = cloneValueWithNewAddressSpace( |
523 | 238 | V, NewAddrSpace, ValueWithNewAddrSpace, &UndefUsesToFix); |
524 | 238 | } |
525 | 270 | } |
526 | 929 | |
527 | 929 | if (ValueWithNewAddrSpace.empty()) |
528 | 828 | return false; |
529 | 929 | |
530 | 929 | // Fixes all the undef uses generated by cloneInstructionWithNewAddressSpace. |
531 | 101 | for (const Use* UndefUse : UndefUsesToFix) 101 {14 |
532 | 14 | User *V = UndefUse->getUser(); |
533 | 14 | User *NewV = cast<User>(ValueWithNewAddrSpace.lookup(V)); |
534 | 14 | unsigned OperandNo = UndefUse->getOperandNo(); |
535 | 14 | assert(isa<UndefValue>(NewV->getOperand(OperandNo))); |
536 | 14 | NewV->setOperand(OperandNo, ValueWithNewAddrSpace.lookup(UndefUse->get())); |
537 | 14 | } |
538 | 101 | |
539 | 101 | // Replaces the uses of the old address expressions with the new ones. |
540 | 240 | for (Value *V : Postorder) { |
541 | 240 | Value *NewV = ValueWithNewAddrSpace.lookup(V); |
542 | 240 | if (NewV == nullptr) |
543 | 2 | continue; |
544 | 240 | |
545 | 238 | SmallVector<Use *, 4> Uses; |
546 | 238 | for (Use &U : V->uses()) |
547 | 284 | Uses.push_back(&U); |
548 | 238 | DEBUG(dbgs() << "Replacing the uses of " << *V << "\n to\n " << *NewV |
549 | 238 | << "\n"); |
550 | 284 | for (Use *U : Uses) { |
551 | 284 | if (isa<LoadInst>(U->getUser()) || |
552 | 210 | (isa<StoreInst>(U->getUser()) && 179 U->getOperandNo() == 1105 )) {210 |
553 | 210 | // If V is used as the pointer operand of a load/store, sets the pointer |
554 | 210 | // operand to NewV. This replacement does not change the element type, |
555 | 210 | // so the resultant load/store is still valid. |
556 | 210 | U->set(NewV); |
557 | 74 | } else if (74 isa<Instruction>(U->getUser())74 ) {59 |
558 | 59 | // Otherwise, replaces the use with generic(NewV). |
559 | 59 | // TODO: Some optimization opportunities are missed. For example, in |
560 | 59 | // %0 = icmp eq float* %p, %q |
561 | 59 | // if both p and q are inferred to be shared, we can rewrite %0 as |
562 | 59 | // %0 = icmp eq float addrspace(3)* %new_p, %new_q |
563 | 59 | // instead of currently |
564 | 59 | // %generic_p = addrspacecast float addrspace(3)* %new_p to float* |
565 | 59 | // %generic_q = addrspacecast float addrspace(3)* %new_q to float* |
566 | 59 | // %0 = icmp eq float* %generic_p, %generic_q |
567 | 59 | if (Instruction *I59 = dyn_cast<Instruction>(V)) {56 |
568 | 56 | BasicBlock::iterator InsertPos = std::next(I->getIterator()); |
569 | 56 | while (isa<PHINode>(InsertPos)) |
570 | 0 | ++InsertPos; |
571 | 56 | U->set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos)); |
572 | 3 | } else { |
573 | 3 | U->set(ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV), |
574 | 3 | V->getType())); |
575 | 3 | } |
576 | 59 | } |
577 | 284 | } |
578 | 238 | if (V->use_empty()) |
579 | 229 | RecursivelyDeleteTriviallyDeadInstructions(V); |
580 | 238 | } |
581 | 101 | |
582 | 101 | return true; |
583 | 929 | } |
584 | | |
585 | 200 | FunctionPass *llvm::createNVPTXInferAddressSpacesPass() { |
586 | 200 | return new NVPTXInferAddressSpaces(); |
587 | 200 | } |