/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- NVPTXInferAddressSpace.cpp - ---------------------*- C++ -*-===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // CUDA C/C++ includes memory space designation as variable type qualifers (such |
11 | | // as __global__ and __shared__). Knowing the space of a memory access allows |
12 | | // CUDA compilers to emit faster PTX loads and stores. For example, a load from |
13 | | // shared memory can be translated to `ld.shared` which is roughly 10% faster |
14 | | // than a generic `ld` on an NVIDIA Tesla K40c. |
15 | | // |
16 | | // Unfortunately, type qualifiers only apply to variable declarations, so CUDA |
17 | | // compilers must infer the memory space of an address expression from |
18 | | // type-qualified variables. |
19 | | // |
20 | | // LLVM IR uses non-zero (so-called) specific address spaces to represent memory |
21 | | // spaces (e.g. addrspace(3) means shared memory). The Clang frontend |
22 | | // places only type-qualified variables in specific address spaces, and then |
23 | | // conservatively `addrspacecast`s each type-qualified variable to addrspace(0) |
24 | | // (so-called the generic address space) for other instructions to use. |
25 | | // |
26 | | // For example, the Clang translates the following CUDA code |
27 | | // __shared__ float a[10]; |
28 | | // float v = a[i]; |
29 | | // to |
30 | | // %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* |
31 | | // %1 = gep [10 x float], [10 x float]* %0, i64 0, i64 %i |
32 | | // %v = load float, float* %1 ; emits ld.f32 |
33 | | // @a is in addrspace(3) since it's type-qualified, but its use from %1 is |
34 | | // redirected to %0 (the generic version of @a). |
35 | | // |
36 | | // The optimization implemented in this file propagates specific address spaces |
37 | | // from type-qualified variable declarations to its users. For example, it |
38 | | // optimizes the above IR to |
39 | | // %1 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i |
40 | | // %v = load float addrspace(3)* %1 ; emits ld.shared.f32 |
41 | | // propagating the addrspace(3) from @a to %1. As the result, the NVPTX |
42 | | // codegen is able to emit ld.shared.f32 for %v. |
43 | | // |
44 | | // Address space inference works in two steps. First, it uses a data-flow |
45 | | // analysis to infer as many generic pointers as possible to point to only one |
46 | | // specific address space. In the above example, it can prove that %1 only |
47 | | // points to addrspace(3). This algorithm was published in |
48 | | // CUDA: Compiling and optimizing for a GPU platform |
49 | | // Chakrabarti, Grover, Aarts, Kong, Kudlur, Lin, Marathe, Murphy, Wang |
50 | | // ICCS 2012 |
51 | | // |
52 | | // Then, address space inference replaces all refinable generic pointers with |
53 | | // equivalent specific pointers. |
54 | | // |
55 | | // The major challenge of implementing this optimization is handling PHINodes, |
56 | | // which may create loops in the data flow graph. This brings two complications. |
57 | | // |
58 | | // First, the data flow analysis in Step 1 needs to be circular. For example, |
59 | | // %generic.input = addrspacecast float addrspace(3)* %input to float* |
60 | | // loop: |
61 | | // %y = phi [ %generic.input, %y2 ] |
62 | | // %y2 = getelementptr %y, 1 |
63 | | // %v = load %y2 |
64 | | // br ..., label %loop, ... |
65 | | // proving %y specific requires proving both %generic.input and %y2 specific, |
66 | | // but proving %y2 specific circles back to %y. To address this complication, |
67 | | // the data flow analysis operates on a lattice: |
68 | | // uninitialized > specific address spaces > generic. |
69 | | // All address expressions (our implementation only considers phi, bitcast, |
70 | | // addrspacecast, and getelementptr) start with the uninitialized address space. |
71 | | // The monotone transfer function moves the address space of a pointer down a |
72 | | // lattice path from uninitialized to specific and then to generic. A join |
73 | | // operation of two different specific address spaces pushes the expression down |
74 | | // to the generic address space. The analysis completes once it reaches a fixed |
75 | | // point. |
76 | | // |
77 | | // Second, IR rewriting in Step 2 also needs to be circular. For example, |
78 | | // converting %y to addrspace(3) requires the compiler to know the converted |
79 | | // %y2, but converting %y2 needs the converted %y. To address this complication, |
80 | | // we break these cycles using "undef" placeholders. When converting an |
81 | | // instruction `I` to a new address space, if its operand `Op` is not converted |
82 | | // yet, we let `I` temporarily use `undef` and fix all the uses of undef later. |
83 | | // For instance, our algorithm first converts %y to |
84 | | // %y' = phi float addrspace(3)* [ %input, undef ] |
85 | | // Then, it converts %y2 to |
86 | | // %y2' = getelementptr %y', 1 |
87 | | // Finally, it fixes the undef in %y' so that |
88 | | // %y' = phi float addrspace(3)* [ %input, %y2' ] |
89 | | // |
90 | | //===----------------------------------------------------------------------===// |
91 | | |
92 | | #include "llvm/ADT/DenseSet.h" |
93 | | #include "llvm/ADT/Optional.h" |
94 | | #include "llvm/ADT/SetVector.h" |
95 | | #include "llvm/Analysis/TargetTransformInfo.h" |
96 | | #include "llvm/IR/Function.h" |
97 | | #include "llvm/IR/IRBuilder.h" |
98 | | #include "llvm/IR/InstIterator.h" |
99 | | #include "llvm/IR/Instructions.h" |
100 | | #include "llvm/IR/IntrinsicInst.h" |
101 | | #include "llvm/IR/Operator.h" |
102 | | #include "llvm/Support/Debug.h" |
103 | | #include "llvm/Support/raw_ostream.h" |
104 | | #include "llvm/Transforms/Scalar.h" |
105 | | #include "llvm/Transforms/Utils/Local.h" |
106 | | #include "llvm/Transforms/Utils/ValueMapper.h" |
107 | | |
108 | | #define DEBUG_TYPE "infer-address-spaces" |
109 | | |
110 | | using namespace llvm; |
111 | | |
112 | | namespace { |
113 | | static const unsigned UninitializedAddressSpace = ~0u; |
114 | | |
115 | | using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>; |
116 | | |
117 | | /// \brief InferAddressSpaces |
118 | | class InferAddressSpaces : public FunctionPass { |
119 | | /// Target specific address space which uses of should be replaced if |
120 | | /// possible. |
121 | | unsigned FlatAddrSpace; |
122 | | |
123 | | public: |
124 | | static char ID; |
125 | | |
126 | 1.95k | InferAddressSpaces() : FunctionPass(ID) {} |
127 | | |
128 | 1.94k | void getAnalysisUsage(AnalysisUsage &AU) const override { |
129 | 1.94k | AU.setPreservesCFG(); |
130 | 1.94k | AU.addRequired<TargetTransformInfoWrapperPass>(); |
131 | 1.94k | } |
132 | | |
133 | | bool runOnFunction(Function &F) override; |
134 | | |
135 | | private: |
136 | | // Returns the new address space of V if updated; otherwise, returns None. |
137 | | Optional<unsigned> |
138 | | updateAddressSpace(const Value &V, |
139 | | const ValueToAddrSpaceMapTy &InferredAddrSpace) const; |
140 | | |
141 | | // Tries to infer the specific address space of each address expression in |
142 | | // Postorder. |
143 | | void inferAddressSpaces(ArrayRef<WeakTrackingVH> Postorder, |
144 | | ValueToAddrSpaceMapTy *InferredAddrSpace) const; |
145 | | |
146 | | bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const; |
147 | | |
148 | | // Changes the flat address expressions in function F to point to specific |
149 | | // address spaces if InferredAddrSpace says so. Postorder is the postorder of |
150 | | // all flat expressions in the use-def graph of function F. |
151 | | bool |
152 | | rewriteWithNewAddressSpaces(ArrayRef<WeakTrackingVH> Postorder, |
153 | | const ValueToAddrSpaceMapTy &InferredAddrSpace, |
154 | | Function *F) const; |
155 | | |
156 | | void appendsFlatAddressExpressionToPostorderStack( |
157 | | Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack, |
158 | | DenseSet<Value *> &Visited) const; |
159 | | |
160 | | bool rewriteIntrinsicOperands(IntrinsicInst *II, |
161 | | Value *OldV, Value *NewV) const; |
162 | | void collectRewritableIntrinsicOperands( |
163 | | IntrinsicInst *II, |
164 | | std::vector<std::pair<Value *, bool>> &PostorderStack, |
165 | | DenseSet<Value *> &Visited) const; |
166 | | |
167 | | std::vector<WeakTrackingVH> collectFlatAddressExpressions(Function &F) const; |
168 | | |
169 | | Value *cloneValueWithNewAddressSpace( |
170 | | Value *V, unsigned NewAddrSpace, |
171 | | const ValueToValueMapTy &ValueWithNewAddrSpace, |
172 | | SmallVectorImpl<const Use *> *UndefUsesToFix) const; |
173 | | unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) const; |
174 | | }; |
175 | | } // end anonymous namespace |
176 | | |
177 | | char InferAddressSpaces::ID = 0; |
178 | | |
179 | | namespace llvm { |
180 | | void initializeInferAddressSpacesPass(PassRegistry &); |
181 | | } |
182 | | |
183 | | INITIALIZE_PASS(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces", |
184 | | false, false) |
185 | | |
186 | | // Returns true if V is an address expression. |
187 | | // TODO: Currently, we consider only phi, bitcast, addrspacecast, and |
188 | | // getelementptr operators. |
189 | 27.8k | static bool isAddressExpression(const Value &V) { |
190 | 27.8k | if (!isa<Operator>(V)) |
191 | 18.0k | return false; |
192 | 9.78k | |
193 | 9.78k | switch (cast<Operator>(V).getOpcode()) { |
194 | 8.88k | case Instruction::PHI: |
195 | 8.88k | case Instruction::BitCast: |
196 | 8.88k | case Instruction::AddrSpaceCast: |
197 | 8.88k | case Instruction::GetElementPtr: |
198 | 8.88k | case Instruction::Select: |
199 | 8.88k | return true; |
200 | 892 | default: |
201 | 892 | return false; |
202 | 0 | } |
203 | 0 | } |
204 | | |
205 | | // Returns the pointer operands of V. |
206 | | // |
207 | | // Precondition: V is an address expression. |
208 | 2.55k | static SmallVector<Value *, 2> getPointerOperands(const Value &V) { |
209 | 2.55k | const Operator &Op = cast<Operator>(V); |
210 | 2.55k | switch (Op.getOpcode()) { |
211 | 50 | case Instruction::PHI: { |
212 | 50 | auto IncomingValues = cast<PHINode>(Op).incoming_values(); |
213 | 50 | return SmallVector<Value *, 2>(IncomingValues.begin(), |
214 | 50 | IncomingValues.end()); |
215 | 2.55k | } |
216 | 2.48k | case Instruction::BitCast: |
217 | 2.48k | case Instruction::AddrSpaceCast: |
218 | 2.48k | case Instruction::GetElementPtr: |
219 | 2.48k | return {Op.getOperand(0)}; |
220 | 22 | case Instruction::Select: |
221 | 22 | return {Op.getOperand(1), Op.getOperand(2)}; |
222 | 0 | default: |
223 | 0 | llvm_unreachable("Unexpected instruction type."); |
224 | 0 | } |
225 | 0 | } |
226 | | |
227 | | // TODO: Move logic to TTI? |
228 | | bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II, |
229 | | Value *OldV, |
230 | 18 | Value *NewV) const { |
231 | 18 | Module *M = II->getParent()->getParent()->getParent(); |
232 | 18 | |
233 | 18 | switch (II->getIntrinsicID()) { |
234 | 14 | case Intrinsic::amdgcn_atomic_inc: |
235 | 14 | case Intrinsic::amdgcn_atomic_dec:{ |
236 | 14 | const ConstantInt *IsVolatile = dyn_cast<ConstantInt>(II->getArgOperand(4)); |
237 | 14 | if (!IsVolatile || 14 !IsVolatile->isZero()13 ) |
238 | 6 | return false; |
239 | 8 | |
240 | 8 | LLVM_FALLTHROUGH8 ; |
241 | 8 | } |
242 | 10 | case Intrinsic::objectsize: { |
243 | 10 | Type *DestTy = II->getType(); |
244 | 10 | Type *SrcTy = NewV->getType(); |
245 | 10 | Function *NewDecl = |
246 | 10 | Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy}); |
247 | 10 | II->setArgOperand(0, NewV); |
248 | 10 | II->setCalledFunction(NewDecl); |
249 | 10 | return true; |
250 | 8 | } |
251 | 2 | default: |
252 | 2 | return false; |
253 | 0 | } |
254 | 0 | } |
255 | | |
256 | | // TODO: Move logic to TTI? |
257 | | void InferAddressSpaces::collectRewritableIntrinsicOperands( |
258 | | IntrinsicInst *II, std::vector<std::pair<Value *, bool>> &PostorderStack, |
259 | 5.63k | DenseSet<Value *> &Visited) const { |
260 | 5.63k | switch (II->getIntrinsicID()) { |
261 | 164 | case Intrinsic::objectsize: |
262 | 164 | case Intrinsic::amdgcn_atomic_inc: |
263 | 164 | case Intrinsic::amdgcn_atomic_dec: |
264 | 164 | appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0), |
265 | 164 | PostorderStack, Visited); |
266 | 164 | break; |
267 | 5.46k | default: |
268 | 5.46k | break; |
269 | 5.63k | } |
270 | 5.63k | } |
271 | | |
272 | | // Returns all flat address expressions in function F. The elements are |
273 | | // If V is an unvisited flat address expression, appends V to PostorderStack |
274 | | // and marks it as visited. |
275 | | void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack( |
276 | | Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack, |
277 | 27.8k | DenseSet<Value *> &Visited) const { |
278 | 27.8k | assert(V->getType()->isPointerTy()); |
279 | 27.8k | |
280 | 27.8k | // Generic addressing expressions may be hidden in nested constant |
281 | 27.8k | // expressions. |
282 | 27.8k | if (ConstantExpr *CE27.8k = dyn_cast<ConstantExpr>(V)) { |
283 | 212 | // TODO: Look in non-address parts, like icmp operands. |
284 | 212 | if (isAddressExpression(*CE) && 212 Visited.insert(CE).second146 ) |
285 | 110 | PostorderStack.push_back(std::make_pair(CE, false)); |
286 | 212 | |
287 | 212 | return; |
288 | 212 | } |
289 | 27.6k | |
290 | 27.6k | if (27.6k isAddressExpression(*V) && |
291 | 27.6k | V->getType()->getPointerAddressSpace() == FlatAddrSpace8.72k ) { |
292 | 1.42k | if (Visited.insert(V).second1.42k ) { |
293 | 1.15k | PostorderStack.push_back(std::make_pair(V, false)); |
294 | 1.15k | |
295 | 1.15k | Operator *Op = cast<Operator>(V); |
296 | 3.17k | for (unsigned I = 0, E = Op->getNumOperands(); I != E3.17k ; ++I2.02k ) { |
297 | 2.02k | if (ConstantExpr *CE2.02k = dyn_cast<ConstantExpr>(Op->getOperand(I))) { |
298 | 22 | if (isAddressExpression(*CE) && 22 Visited.insert(CE).second17 ) |
299 | 7 | PostorderStack.emplace_back(CE, false); |
300 | 22 | } |
301 | 2.02k | } |
302 | 1.15k | } |
303 | 1.42k | } |
304 | 27.8k | } |
305 | | |
306 | | // Returns all flat address expressions in function F. The elements are ordered |
307 | | // ordered in postorder. |
308 | | std::vector<WeakTrackingVH> |
309 | 10.4k | InferAddressSpaces::collectFlatAddressExpressions(Function &F) const { |
310 | 10.4k | // This function implements a non-recursive postorder traversal of a partial |
311 | 10.4k | // use-def graph of function F. |
312 | 10.4k | std::vector<std::pair<Value *, bool>> PostorderStack; |
313 | 10.4k | // The set of visited expressions. |
314 | 10.4k | DenseSet<Value *> Visited; |
315 | 10.4k | |
316 | 26.3k | auto PushPtrOperand = [&](Value *Ptr) { |
317 | 26.3k | appendsFlatAddressExpressionToPostorderStack(Ptr, PostorderStack, |
318 | 26.3k | Visited); |
319 | 26.3k | }; |
320 | 10.4k | |
321 | 10.4k | // Look at operations that may be interesting accelerate by moving to a known |
322 | 10.4k | // address space. We aim at generating after loads and stores, but pure |
323 | 10.4k | // addressing calculations may also be faster. |
324 | 58.3k | for (Instruction &I : instructions(F)) { |
325 | 58.3k | if (auto *GEP58.3k = dyn_cast<GetElementPtrInst>(&I)) { |
326 | 7.37k | if (!GEP->getType()->isVectorTy()) |
327 | 7.36k | PushPtrOperand(GEP->getPointerOperand()); |
328 | 58.3k | } else if (auto *50.9k LI50.9k = dyn_cast<LoadInst>(&I)) |
329 | 7.26k | PushPtrOperand(LI->getPointerOperand()); |
330 | 43.6k | else if (auto *43.6k SI43.6k = dyn_cast<StoreInst>(&I)) |
331 | 9.71k | PushPtrOperand(SI->getPointerOperand()); |
332 | 33.9k | else if (auto *33.9k RMW33.9k = dyn_cast<AtomicRMWInst>(&I)) |
333 | 1.07k | PushPtrOperand(RMW->getPointerOperand()); |
334 | 32.9k | else if (auto *32.9k CmpX32.9k = dyn_cast<AtomicCmpXchgInst>(&I)) |
335 | 213 | PushPtrOperand(CmpX->getPointerOperand()); |
336 | 32.6k | else if (auto *32.6k MI32.6k = dyn_cast<MemIntrinsic>(&I)) { |
337 | 90 | // For memset/memcpy/memmove, any pointer operand can be replaced. |
338 | 90 | PushPtrOperand(MI->getRawDest()); |
339 | 90 | |
340 | 90 | // Handle 2nd operand for memcpy/memmove. |
341 | 90 | if (auto *MTI = dyn_cast<MemTransferInst>(MI)) |
342 | 84 | PushPtrOperand(MTI->getRawSource()); |
343 | 32.6k | } else if (auto *32.6k II32.6k = dyn_cast<IntrinsicInst>(&I)) |
344 | 5.63k | collectRewritableIntrinsicOperands(II, PostorderStack, Visited); |
345 | 26.9k | else if (ICmpInst *26.9k Cmp26.9k = dyn_cast<ICmpInst>(&I)) { |
346 | 1.30k | // FIXME: Handle vectors of pointers |
347 | 1.30k | if (Cmp->getOperand(0)->getType()->isPointerTy()1.30k ) { |
348 | 33 | PushPtrOperand(Cmp->getOperand(0)); |
349 | 33 | PushPtrOperand(Cmp->getOperand(1)); |
350 | 33 | } |
351 | 26.9k | } else if (auto *25.6k ASC25.6k = dyn_cast<AddrSpaceCastInst>(&I)) { |
352 | 506 | if (!ASC->getType()->isVectorTy()) |
353 | 501 | PushPtrOperand(ASC->getPointerOperand()); |
354 | 50.9k | } |
355 | 58.3k | } |
356 | 10.4k | |
357 | 10.4k | std::vector<WeakTrackingVH> Postorder; // The resultant postorder. |
358 | 12.9k | while (!PostorderStack.empty()12.9k ) { |
359 | 2.54k | Value *TopVal = PostorderStack.back().first; |
360 | 2.54k | // If the operands of the expression on the top are already explored, |
361 | 2.54k | // adds that expression to the resultant postorder. |
362 | 2.54k | if (PostorderStack.back().second2.54k ) { |
363 | 1.27k | if (TopVal->getType()->getPointerAddressSpace() == FlatAddrSpace) |
364 | 1.24k | Postorder.push_back(TopVal); |
365 | 1.27k | PostorderStack.pop_back(); |
366 | 1.27k | continue; |
367 | 1.27k | } |
368 | 1.27k | // Otherwise, adds its operands to the stack and explores them. |
369 | 1.27k | PostorderStack.back().second = true; |
370 | 1.30k | for (Value *PtrOperand : getPointerOperands(*TopVal)) { |
371 | 1.30k | appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack, |
372 | 1.30k | Visited); |
373 | 1.30k | } |
374 | 2.54k | } |
375 | 10.4k | return Postorder; |
376 | 10.4k | } |
377 | | |
378 | | // A helper function for cloneInstructionWithNewAddressSpace. Returns the clone |
379 | | // of OperandUse.get() in the new address space. If the clone is not ready yet, |
380 | | // returns an undef in the new address space as a placeholder. |
381 | | static Value *operandWithNewAddressSpaceOrCreateUndef( |
382 | | const Use &OperandUse, unsigned NewAddrSpace, |
383 | | const ValueToValueMapTy &ValueWithNewAddrSpace, |
384 | 128 | SmallVectorImpl<const Use *> *UndefUsesToFix) { |
385 | 128 | Value *Operand = OperandUse.get(); |
386 | 128 | |
387 | 128 | Type *NewPtrTy = |
388 | 128 | Operand->getType()->getPointerElementType()->getPointerTo(NewAddrSpace); |
389 | 128 | |
390 | 128 | if (Constant *C = dyn_cast<Constant>(Operand)) |
391 | 18 | return ConstantExpr::getAddrSpaceCast(C, NewPtrTy); |
392 | 110 | |
393 | 110 | if (Value *110 NewOperand110 = ValueWithNewAddrSpace.lookup(Operand)) |
394 | 44 | return NewOperand; |
395 | 66 | |
396 | 66 | UndefUsesToFix->push_back(&OperandUse); |
397 | 66 | return UndefValue::get(NewPtrTy); |
398 | 66 | } |
399 | | |
400 | | // Returns a clone of `I` with its operands converted to those specified in |
401 | | // ValueWithNewAddrSpace. Due to potential cycles in the data flow graph, an |
402 | | // operand whose address space needs to be modified might not exist in |
403 | | // ValueWithNewAddrSpace. In that case, uses undef as a placeholder operand and |
404 | | // adds that operand use to UndefUsesToFix so that caller can fix them later. |
405 | | // |
406 | | // Note that we do not necessarily clone `I`, e.g., if it is an addrspacecast |
407 | | // from a pointer whose type already matches. Therefore, this function returns a |
408 | | // Value* instead of an Instruction*. |
409 | | static Value *cloneInstructionWithNewAddressSpace( |
410 | | Instruction *I, unsigned NewAddrSpace, |
411 | | const ValueToValueMapTy &ValueWithNewAddrSpace, |
412 | 401 | SmallVectorImpl<const Use *> *UndefUsesToFix) { |
413 | 401 | Type *NewPtrType = |
414 | 401 | I->getType()->getPointerElementType()->getPointerTo(NewAddrSpace); |
415 | 401 | |
416 | 401 | if (I->getOpcode() == Instruction::AddrSpaceCast401 ) { |
417 | 294 | Value *Src = I->getOperand(0); |
418 | 294 | // Because `I` is flat, the source address space must be specific. |
419 | 294 | // Therefore, the inferred address space must be the source space, according |
420 | 294 | // to our algorithm. |
421 | 294 | assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace); |
422 | 294 | if (Src->getType() != NewPtrType) |
423 | 10 | return new BitCastInst(Src, NewPtrType); |
424 | 284 | return Src; |
425 | 284 | } |
426 | 107 | |
427 | 107 | // Computes the converted pointer operands. |
428 | 107 | SmallVector<Value *, 4> NewPointerOperands; |
429 | 225 | for (const Use &OperandUse : I->operands()) { |
430 | 225 | if (!OperandUse.get()->getType()->isPointerTy()) |
431 | 97 | NewPointerOperands.push_back(nullptr); |
432 | 225 | else |
433 | 128 | NewPointerOperands.push_back(operandWithNewAddressSpaceOrCreateUndef( |
434 | 128 | OperandUse, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix)); |
435 | 225 | } |
436 | 107 | |
437 | 107 | switch (I->getOpcode()) { |
438 | 15 | case Instruction::BitCast: |
439 | 15 | return new BitCastInst(NewPointerOperands[0], NewPtrType); |
440 | 11 | case Instruction::PHI: { |
441 | 11 | assert(I->getType()->isPointerTy()); |
442 | 11 | PHINode *PHI = cast<PHINode>(I); |
443 | 11 | PHINode *NewPHI = PHINode::Create(NewPtrType, PHI->getNumIncomingValues()); |
444 | 33 | for (unsigned Index = 0; Index < PHI->getNumIncomingValues()33 ; ++Index22 ) { |
445 | 22 | unsigned OperandNo = PHINode::getOperandNumForIncomingValue(Index); |
446 | 22 | NewPHI->addIncoming(NewPointerOperands[OperandNo], |
447 | 22 | PHI->getIncomingBlock(Index)); |
448 | 22 | } |
449 | 11 | return NewPHI; |
450 | 107 | } |
451 | 71 | case Instruction::GetElementPtr: { |
452 | 71 | GetElementPtrInst *GEP = cast<GetElementPtrInst>(I); |
453 | 71 | GetElementPtrInst *NewGEP = GetElementPtrInst::Create( |
454 | 71 | GEP->getSourceElementType(), NewPointerOperands[0], |
455 | 71 | SmallVector<Value *, 4>(GEP->idx_begin(), GEP->idx_end())); |
456 | 71 | NewGEP->setIsInBounds(GEP->isInBounds()); |
457 | 71 | return NewGEP; |
458 | 107 | } |
459 | 10 | case Instruction::Select: { |
460 | 10 | assert(I->getType()->isPointerTy()); |
461 | 10 | return SelectInst::Create(I->getOperand(0), NewPointerOperands[1], |
462 | 10 | NewPointerOperands[2], "", nullptr, I); |
463 | 107 | } |
464 | 0 | default: |
465 | 0 | llvm_unreachable("Unexpected opcode"); |
466 | 0 | } |
467 | 0 | } |
468 | | |
469 | | // Similar to cloneInstructionWithNewAddressSpace, returns a clone of the |
470 | | // constant expression `CE` with its operands replaced as specified in |
471 | | // ValueWithNewAddrSpace. |
472 | | static Value *cloneConstantExprWithNewAddressSpace( |
473 | | ConstantExpr *CE, unsigned NewAddrSpace, |
474 | 83 | const ValueToValueMapTy &ValueWithNewAddrSpace) { |
475 | 83 | Type *TargetType = |
476 | 83 | CE->getType()->getPointerElementType()->getPointerTo(NewAddrSpace); |
477 | 83 | |
478 | 83 | if (CE->getOpcode() == Instruction::AddrSpaceCast83 ) { |
479 | 56 | // Because CE is flat, the source address space must be specific. |
480 | 56 | // Therefore, the inferred address space must be the source space according |
481 | 56 | // to our algorithm. |
482 | 56 | assert(CE->getOperand(0)->getType()->getPointerAddressSpace() == |
483 | 56 | NewAddrSpace); |
484 | 56 | return ConstantExpr::getBitCast(CE->getOperand(0), TargetType); |
485 | 56 | } |
486 | 27 | |
487 | 27 | if (27 CE->getOpcode() == Instruction::BitCast27 ) { |
488 | 9 | if (Value *NewOperand = ValueWithNewAddrSpace.lookup(CE->getOperand(0))) |
489 | 8 | return ConstantExpr::getBitCast(cast<Constant>(NewOperand), TargetType); |
490 | 1 | return ConstantExpr::getAddrSpaceCast(CE, TargetType); |
491 | 1 | } |
492 | 18 | |
493 | 18 | if (18 CE->getOpcode() == Instruction::Select18 ) { |
494 | 2 | Constant *Src0 = CE->getOperand(1); |
495 | 2 | Constant *Src1 = CE->getOperand(2); |
496 | 2 | if (Src0->getType()->getPointerAddressSpace() == |
497 | 2 | Src1->getType()->getPointerAddressSpace()) { |
498 | 2 | |
499 | 2 | return ConstantExpr::getSelect( |
500 | 2 | CE->getOperand(0), ConstantExpr::getAddrSpaceCast(Src0, TargetType), |
501 | 2 | ConstantExpr::getAddrSpaceCast(Src1, TargetType)); |
502 | 2 | } |
503 | 16 | } |
504 | 16 | |
505 | 16 | // Computes the operands of the new constant expression. |
506 | 16 | bool IsNew = false; |
507 | 16 | SmallVector<Constant *, 4> NewOperands; |
508 | 65 | for (unsigned Index = 0; Index < CE->getNumOperands()65 ; ++Index49 ) { |
509 | 49 | Constant *Operand = CE->getOperand(Index); |
510 | 49 | // If the address space of `Operand` needs to be modified, the new operand |
511 | 49 | // with the new address space should already be in ValueWithNewAddrSpace |
512 | 49 | // because (1) the constant expressions we consider (i.e. addrspacecast, |
513 | 49 | // bitcast, and getelementptr) do not incur cycles in the data flow graph |
514 | 49 | // and (2) this function is called on constant expressions in postorder. |
515 | 49 | if (Value *NewOperand49 = ValueWithNewAddrSpace.lookup(Operand)) { |
516 | 15 | IsNew = true; |
517 | 15 | NewOperands.push_back(cast<Constant>(NewOperand)); |
518 | 49 | } else { |
519 | 34 | // Otherwise, reuses the old operand. |
520 | 34 | NewOperands.push_back(Operand); |
521 | 34 | } |
522 | 49 | } |
523 | 16 | |
524 | 16 | // If !IsNew, we will replace the Value with itself. However, replaced values |
525 | 16 | // are assumed to wrapped in a addrspace cast later so drop it now. |
526 | 16 | if (!IsNew) |
527 | 1 | return nullptr; |
528 | 15 | |
529 | 15 | if (15 CE->getOpcode() == Instruction::GetElementPtr15 ) { |
530 | 15 | // Needs to specify the source type while constructing a getelementptr |
531 | 15 | // constant expression. |
532 | 15 | return CE->getWithOperands( |
533 | 15 | NewOperands, TargetType, /*OnlyIfReduced=*/false, |
534 | 15 | NewOperands[0]->getType()->getPointerElementType()); |
535 | 15 | } |
536 | 0 |
|
537 | 0 | return CE->getWithOperands(NewOperands, TargetType); |
538 | 0 | } |
539 | | |
540 | | // Returns a clone of the value `V`, with its operands replaced as specified in |
541 | | // ValueWithNewAddrSpace. This function is called on every flat address |
542 | | // expression whose address space needs to be modified, in postorder. |
543 | | // |
544 | | // See cloneInstructionWithNewAddressSpace for the meaning of UndefUsesToFix. |
545 | | Value *InferAddressSpaces::cloneValueWithNewAddressSpace( |
546 | | Value *V, unsigned NewAddrSpace, |
547 | | const ValueToValueMapTy &ValueWithNewAddrSpace, |
548 | 484 | SmallVectorImpl<const Use *> *UndefUsesToFix) const { |
549 | 484 | // All values in Postorder are flat address expressions. |
550 | 484 | assert(isAddressExpression(*V) && |
551 | 484 | V->getType()->getPointerAddressSpace() == FlatAddrSpace); |
552 | 484 | |
553 | 484 | if (Instruction *I484 = dyn_cast<Instruction>(V)) { |
554 | 401 | Value *NewV = cloneInstructionWithNewAddressSpace( |
555 | 401 | I, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix); |
556 | 401 | if (Instruction *NewI401 = dyn_cast<Instruction>(NewV)) { |
557 | 260 | if (NewI->getParent() == nullptr260 ) { |
558 | 117 | NewI->insertBefore(I); |
559 | 117 | NewI->takeName(I); |
560 | 117 | } |
561 | 260 | } |
562 | 401 | return NewV; |
563 | 401 | } |
564 | 83 | |
565 | 83 | return cloneConstantExprWithNewAddressSpace( |
566 | 83 | cast<ConstantExpr>(V), NewAddrSpace, ValueWithNewAddrSpace); |
567 | 83 | } |
568 | | |
569 | | // Defines the join operation on the address space lattice (see the file header |
570 | | // comments). |
571 | | unsigned InferAddressSpaces::joinAddressSpaces(unsigned AS1, |
572 | 1.33k | unsigned AS2) const { |
573 | 1.33k | if (AS1 == FlatAddrSpace || 1.33k AS2 == FlatAddrSpace1.33k ) |
574 | 748 | return FlatAddrSpace; |
575 | 591 | |
576 | 591 | if (591 AS1 == UninitializedAddressSpace591 ) |
577 | 548 | return AS2; |
578 | 43 | if (43 AS2 == UninitializedAddressSpace43 ) |
579 | 18 | return AS1; |
580 | 25 | |
581 | 25 | // The join of two different specific address spaces is flat. |
582 | 25 | return (AS1 == AS2) ? 25 AS113 : FlatAddrSpace12 ; |
583 | 1.33k | } |
584 | | |
585 | 18.5k | bool InferAddressSpaces::runOnFunction(Function &F) { |
586 | 18.5k | if (skipFunction(F)) |
587 | 1 | return false; |
588 | 18.5k | |
589 | 18.5k | const TargetTransformInfo &TTI = |
590 | 18.5k | getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); |
591 | 18.5k | FlatAddrSpace = TTI.getFlatAddressSpace(); |
592 | 18.5k | if (FlatAddrSpace == UninitializedAddressSpace) |
593 | 8.12k | return false; |
594 | 10.4k | |
595 | 10.4k | // Collects all flat address expressions in postorder. |
596 | 10.4k | std::vector<WeakTrackingVH> Postorder = collectFlatAddressExpressions(F); |
597 | 10.4k | |
598 | 10.4k | // Runs a data-flow analysis to refine the address spaces of every expression |
599 | 10.4k | // in Postorder. |
600 | 10.4k | ValueToAddrSpaceMapTy InferredAddrSpace; |
601 | 10.4k | inferAddressSpaces(Postorder, &InferredAddrSpace); |
602 | 10.4k | |
603 | 10.4k | // Changes the address spaces of the flat address expressions who are inferred |
604 | 10.4k | // to point to a specific address space. |
605 | 10.4k | return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F); |
606 | 10.4k | } |
607 | | |
608 | | // Constants need to be tracked through RAUW to handle cases with nested |
609 | | // constant expressions, so wrap values in WeakTrackingVH. |
610 | | void InferAddressSpaces::inferAddressSpaces( |
611 | | ArrayRef<WeakTrackingVH> Postorder, |
612 | 10.4k | ValueToAddrSpaceMapTy *InferredAddrSpace) const { |
613 | 10.4k | SetVector<Value *> Worklist(Postorder.begin(), Postorder.end()); |
614 | 10.4k | // Initially, all expressions are in the uninitialized address space. |
615 | 10.4k | for (Value *V : Postorder) |
616 | 1.24k | (*InferredAddrSpace)[V] = UninitializedAddressSpace; |
617 | 10.4k | |
618 | 11.7k | while (!Worklist.empty()11.7k ) { |
619 | 1.34k | Value *V = Worklist.pop_back_val(); |
620 | 1.34k | |
621 | 1.34k | // Tries to update the address space of the stack top according to the |
622 | 1.34k | // address spaces of its operands. |
623 | 1.34k | DEBUG(dbgs() << "Updating the address space of\n " << *V << '\n'); |
624 | 1.34k | Optional<unsigned> NewAS = updateAddressSpace(*V, *InferredAddrSpace); |
625 | 1.34k | if (!NewAS.hasValue()) |
626 | 92 | continue; |
627 | 1.25k | // If any updates are made, grabs its users to the worklist because |
628 | 1.25k | // their address spaces can also be possibly updated. |
629 | 1.25k | DEBUG1.25k (dbgs() << " to " << NewAS.getValue() << '\n'); |
630 | 1.25k | (*InferredAddrSpace)[V] = NewAS.getValue(); |
631 | 1.25k | |
632 | 1.40k | for (Value *User : V->users()) { |
633 | 1.40k | // Skip if User is already in the worklist. |
634 | 1.40k | if (Worklist.count(User)) |
635 | 215 | continue; |
636 | 1.19k | |
637 | 1.19k | auto Pos = InferredAddrSpace->find(User); |
638 | 1.19k | // Our algorithm only updates the address spaces of flat address |
639 | 1.19k | // expressions, which are those in InferredAddrSpace. |
640 | 1.19k | if (Pos == InferredAddrSpace->end()) |
641 | 1.09k | continue; |
642 | 99 | |
643 | 99 | // Function updateAddressSpace moves the address space down a lattice |
644 | 99 | // path. Therefore, nothing to do if User is already inferred as flat (the |
645 | 99 | // bottom element in the lattice). |
646 | 99 | if (99 Pos->second == FlatAddrSpace99 ) |
647 | 1 | continue; |
648 | 98 | |
649 | 98 | Worklist.insert(User); |
650 | 98 | } |
651 | 1.34k | } |
652 | 10.4k | } |
653 | | |
654 | | Optional<unsigned> InferAddressSpaces::updateAddressSpace( |
655 | 1.34k | const Value &V, const ValueToAddrSpaceMapTy &InferredAddrSpace) const { |
656 | 1.34k | assert(InferredAddrSpace.count(&V)); |
657 | 1.34k | |
658 | 1.34k | // The new inferred address space equals the join of the address spaces |
659 | 1.34k | // of all its pointer operands. |
660 | 1.34k | unsigned NewAS = UninitializedAddressSpace; |
661 | 1.34k | |
662 | 1.34k | const Operator &Op = cast<Operator>(V); |
663 | 1.34k | if (Op.getOpcode() == Instruction::Select1.34k ) { |
664 | 59 | Value *Src0 = Op.getOperand(1); |
665 | 59 | Value *Src1 = Op.getOperand(2); |
666 | 59 | |
667 | 59 | auto I = InferredAddrSpace.find(Src0); |
668 | 59 | unsigned Src0AS = (I != InferredAddrSpace.end()) ? |
669 | 59 | I->second53 : Src0->getType()->getPointerAddressSpace()6 ; |
670 | 59 | |
671 | 59 | auto J = InferredAddrSpace.find(Src1); |
672 | 59 | unsigned Src1AS = (J != InferredAddrSpace.end()) ? |
673 | 59 | J->second51 : Src1->getType()->getPointerAddressSpace()8 ; |
674 | 59 | |
675 | 59 | auto *C0 = dyn_cast<Constant>(Src0); |
676 | 59 | auto *C1 = dyn_cast<Constant>(Src1); |
677 | 59 | |
678 | 59 | // If one of the inputs is a constant, we may be able to do a constant |
679 | 59 | // addrspacecast of it. Defer inferring the address space until the input |
680 | 59 | // address space is known. |
681 | 59 | if ((C1 && 59 Src0AS == UninitializedAddressSpace43 ) || |
682 | 44 | (C0 && 44 Src1AS == UninitializedAddressSpace23 )) |
683 | 26 | return None; |
684 | 33 | |
685 | 33 | if (33 C0 && 33 isSafeToCastConstAddrSpace(C0, Src1AS)12 ) |
686 | 4 | NewAS = Src1AS; |
687 | 29 | else if (29 C1 && 29 isSafeToCastConstAddrSpace(C1, Src0AS)18 ) |
688 | 8 | NewAS = Src0AS; |
689 | 29 | else |
690 | 21 | NewAS = joinAddressSpaces(Src0AS, Src1AS); |
691 | 1.34k | } else { |
692 | 1.31k | for (Value *PtrOperand : getPointerOperands(V)) { |
693 | 1.31k | auto I = InferredAddrSpace.find(PtrOperand); |
694 | 1.31k | unsigned OperandAS = I != InferredAddrSpace.end() ? |
695 | 1.31k | I->second359 : PtrOperand->getType()->getPointerAddressSpace()959 ; |
696 | 1.31k | |
697 | 1.31k | // join(flat, *) = flat. So we can break if NewAS is already flat. |
698 | 1.31k | NewAS = joinAddressSpaces(NewAS, OperandAS); |
699 | 1.31k | if (NewAS == FlatAddrSpace) |
700 | 750 | break; |
701 | 1.31k | } |
702 | 1.28k | } |
703 | 1.31k | |
704 | 1.31k | unsigned OldAS = InferredAddrSpace.lookup(&V); |
705 | 1.31k | assert(OldAS != FlatAddrSpace); |
706 | 1.31k | if (OldAS == NewAS) |
707 | 66 | return None; |
708 | 1.25k | return NewAS; |
709 | 1.25k | } |
710 | | |
711 | | /// \p returns true if \p U is the pointer operand of a memory instruction with |
712 | | /// a single pointer operand that can have its address space changed by simply |
713 | | /// mutating the use to a new value. |
714 | 621 | static bool isSimplePointerUseValidToReplace(Use &U) { |
715 | 621 | User *Inst = U.getUser(); |
716 | 621 | unsigned OpNo = U.getOperandNo(); |
717 | 621 | |
718 | 621 | if (auto *LI = dyn_cast<LoadInst>(Inst)) |
719 | 144 | return OpNo == LoadInst::getPointerOperandIndex() && 144 !LI->isVolatile()144 ; |
720 | 477 | |
721 | 477 | if (auto *477 SI477 = dyn_cast<StoreInst>(Inst)) |
722 | 162 | return OpNo == StoreInst::getPointerOperandIndex() && 162 !SI->isVolatile()161 ; |
723 | 315 | |
724 | 315 | if (auto *315 RMW315 = dyn_cast<AtomicRMWInst>(Inst)) |
725 | 4 | return OpNo == AtomicRMWInst::getPointerOperandIndex() && 4 !RMW->isVolatile()4 ; |
726 | 311 | |
727 | 311 | if (auto *311 CmpX311 = dyn_cast<AtomicCmpXchgInst>(Inst)) { |
728 | 4 | return OpNo == AtomicCmpXchgInst::getPointerOperandIndex() && |
729 | 4 | !CmpX->isVolatile(); |
730 | 4 | } |
731 | 307 | |
732 | 307 | return false; |
733 | 307 | } |
734 | | |
735 | | /// Update memory intrinsic uses that require more complex processing than |
736 | | /// simple memory instructions. Thse require re-mangling and may have multiple |
737 | | /// pointer operands. |
738 | | static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV, |
739 | 17 | Value *NewV) { |
740 | 17 | IRBuilder<> B(MI); |
741 | 17 | MDNode *TBAA = MI->getMetadata(LLVMContext::MD_tbaa); |
742 | 17 | MDNode *ScopeMD = MI->getMetadata(LLVMContext::MD_alias_scope); |
743 | 17 | MDNode *NoAliasMD = MI->getMetadata(LLVMContext::MD_noalias); |
744 | 17 | |
745 | 17 | if (auto *MSI17 = dyn_cast<MemSetInst>(MI)) { |
746 | 4 | B.CreateMemSet(NewV, MSI->getValue(), |
747 | 4 | MSI->getLength(), MSI->getAlignment(), |
748 | 4 | false, // isVolatile |
749 | 4 | TBAA, ScopeMD, NoAliasMD); |
750 | 17 | } else if (auto *13 MTI13 = dyn_cast<MemTransferInst>(MI)) { |
751 | 13 | Value *Src = MTI->getRawSource(); |
752 | 13 | Value *Dest = MTI->getRawDest(); |
753 | 13 | |
754 | 13 | // Be careful in case this is a self-to-self copy. |
755 | 13 | if (Src == OldV) |
756 | 9 | Src = NewV; |
757 | 13 | |
758 | 13 | if (Dest == OldV) |
759 | 5 | Dest = NewV; |
760 | 13 | |
761 | 13 | if (isa<MemCpyInst>(MTI)13 ) { |
762 | 12 | MDNode *TBAAStruct = MTI->getMetadata(LLVMContext::MD_tbaa_struct); |
763 | 12 | B.CreateMemCpy(Dest, Src, MTI->getLength(), |
764 | 12 | MTI->getAlignment(), |
765 | 12 | false, // isVolatile |
766 | 12 | TBAA, TBAAStruct, ScopeMD, NoAliasMD); |
767 | 13 | } else { |
768 | 1 | assert(isa<MemMoveInst>(MTI)); |
769 | 1 | B.CreateMemMove(Dest, Src, MTI->getLength(), |
770 | 1 | MTI->getAlignment(), |
771 | 1 | false, // isVolatile |
772 | 1 | TBAA, ScopeMD, NoAliasMD); |
773 | 1 | } |
774 | 13 | } else |
775 | 0 | llvm_unreachable("unhandled MemIntrinsic"); |
776 | 17 | |
777 | 17 | MI->eraseFromParent(); |
778 | 17 | return true; |
779 | 17 | } |
780 | | |
781 | | // \p returns true if it is OK to change the address space of constant \p C with |
782 | | // a ConstantExpr addrspacecast. |
783 | 71 | bool InferAddressSpaces::isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const { |
784 | 71 | assert(NewAS != UninitializedAddressSpace); |
785 | 71 | |
786 | 71 | unsigned SrcAS = C->getType()->getPointerAddressSpace(); |
787 | 71 | if (SrcAS == NewAS || 71 isa<UndefValue>(C)66 ) |
788 | 11 | return true; |
789 | 60 | |
790 | 60 | // Prevent illegal casts between different non-flat address spaces. |
791 | 60 | if (60 SrcAS != FlatAddrSpace && 60 NewAS != FlatAddrSpace24 ) |
792 | 24 | return false; |
793 | 36 | |
794 | 36 | if (36 isa<ConstantPointerNull>(C)36 ) |
795 | 4 | return true; |
796 | 32 | |
797 | 32 | if (auto *32 Op32 = dyn_cast<Operator>(C)) { |
798 | 32 | // If we already have a constant addrspacecast, it should be safe to cast it |
799 | 32 | // off. |
800 | 32 | if (Op->getOpcode() == Instruction::AddrSpaceCast) |
801 | 29 | return isSafeToCastConstAddrSpace(cast<Constant>(Op->getOperand(0)), NewAS); |
802 | 3 | |
803 | 3 | if (3 Op->getOpcode() == Instruction::IntToPtr && |
804 | 3 | Op->getType()->getPointerAddressSpace() == FlatAddrSpace) |
805 | 3 | return true; |
806 | 0 | } |
807 | 0 |
|
808 | 0 | return false; |
809 | 0 | } |
810 | | |
811 | | static Value::use_iterator skipToNextUser(Value::use_iterator I, |
812 | 621 | Value::use_iterator End) { |
813 | 621 | User *CurUser = I->getUser(); |
814 | 621 | ++I; |
815 | 621 | |
816 | 623 | while (I != End && 623 I->getUser() == CurUser149 ) |
817 | 2 | ++I; |
818 | 621 | |
819 | 621 | return I; |
820 | 621 | } |
821 | | |
822 | | bool InferAddressSpaces::rewriteWithNewAddressSpaces( |
823 | | ArrayRef<WeakTrackingVH> Postorder, |
824 | 10.4k | const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const { |
825 | 10.4k | // For each address expression to be modified, creates a clone of it with its |
826 | 10.4k | // pointer operands converted to the new address space. Since the pointer |
827 | 10.4k | // operands are converted, the clone is naturally in the new address space by |
828 | 10.4k | // construction. |
829 | 10.4k | ValueToValueMapTy ValueWithNewAddrSpace; |
830 | 10.4k | SmallVector<const Use *, 32> UndefUsesToFix; |
831 | 1.24k | for (Value* V : Postorder) { |
832 | 1.24k | unsigned NewAddrSpace = InferredAddrSpace.lookup(V); |
833 | 1.24k | if (V->getType()->getPointerAddressSpace() != NewAddrSpace1.24k ) { |
834 | 484 | ValueWithNewAddrSpace[V] = cloneValueWithNewAddressSpace( |
835 | 484 | V, NewAddrSpace, ValueWithNewAddrSpace, &UndefUsesToFix); |
836 | 484 | } |
837 | 1.24k | } |
838 | 10.4k | |
839 | 10.4k | if (ValueWithNewAddrSpace.empty()) |
840 | 10.1k | return false; |
841 | 235 | |
842 | 235 | // Fixes all the undef uses generated by cloneInstructionWithNewAddressSpace. |
843 | 235 | for (const Use *UndefUse : UndefUsesToFix) 235 { |
844 | 66 | User *V = UndefUse->getUser(); |
845 | 66 | User *NewV = cast<User>(ValueWithNewAddrSpace.lookup(V)); |
846 | 66 | unsigned OperandNo = UndefUse->getOperandNo(); |
847 | 66 | assert(isa<UndefValue>(NewV->getOperand(OperandNo))); |
848 | 66 | NewV->setOperand(OperandNo, ValueWithNewAddrSpace.lookup(UndefUse->get())); |
849 | 66 | } |
850 | 235 | |
851 | 235 | SmallVector<Instruction *, 16> DeadInstructions; |
852 | 235 | |
853 | 235 | // Replaces the uses of the old address expressions with the new ones. |
854 | 498 | for (const WeakTrackingVH &WVH : Postorder) { |
855 | 498 | assert(WVH && "value was unexpectedly deleted"); |
856 | 498 | Value *V = WVH; |
857 | 498 | Value *NewV = ValueWithNewAddrSpace.lookup(V); |
858 | 498 | if (NewV == nullptr) |
859 | 15 | continue; |
860 | 483 | |
861 | 483 | DEBUG483 (dbgs() << "Replacing the uses of " << *V |
862 | 483 | << "\n with\n " << *NewV << '\n'); |
863 | 483 | |
864 | 483 | if (Constant *C483 = dyn_cast<Constant>(V)) { |
865 | 82 | Constant *Replace = ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV), |
866 | 82 | C->getType()); |
867 | 82 | if (C != Replace82 ) { |
868 | 17 | DEBUG(dbgs() << "Inserting replacement const cast: " |
869 | 17 | << Replace << ": " << *Replace << '\n'); |
870 | 17 | C->replaceAllUsesWith(Replace); |
871 | 17 | V = Replace; |
872 | 17 | } |
873 | 82 | } |
874 | 483 | |
875 | 483 | Value::use_iterator I, E, Next; |
876 | 1.10k | for (I = V->use_begin(), E = V->use_end(); I != E1.10k ; ) { |
877 | 621 | Use &U = *I; |
878 | 621 | |
879 | 621 | // Some users may see the same pointer operand in multiple operands. Skip |
880 | 621 | // to the next instruction. |
881 | 621 | I = skipToNextUser(I, E); |
882 | 621 | |
883 | 621 | if (isSimplePointerUseValidToReplace(U)621 ) { |
884 | 282 | // If V is used as the pointer operand of a compatible memory operation, |
885 | 282 | // sets the pointer operand to NewV. This replacement does not change |
886 | 282 | // the element type, so the resultant load/store is still valid. |
887 | 282 | U.set(NewV); |
888 | 282 | continue; |
889 | 282 | } |
890 | 339 | |
891 | 339 | User *CurUser = U.getUser(); |
892 | 339 | // Handle more complex cases like intrinsic that need to be remangled. |
893 | 339 | if (auto *MI339 = dyn_cast<MemIntrinsic>(CurUser)) { |
894 | 19 | if (!MI->isVolatile() && 19 handleMemIntrinsicPtrUse(MI, V, NewV)17 ) |
895 | 17 | continue; |
896 | 322 | } |
897 | 322 | |
898 | 322 | if (auto *322 II322 = dyn_cast<IntrinsicInst>(CurUser)) { |
899 | 18 | if (rewriteIntrinsicOperands(II, V, NewV)) |
900 | 10 | continue; |
901 | 312 | } |
902 | 312 | |
903 | 312 | if (312 isa<Instruction>(CurUser)312 ) { |
904 | 239 | if (ICmpInst *Cmp239 = dyn_cast<ICmpInst>(CurUser)) { |
905 | 34 | // If we can infer that both pointers are in the same addrspace, |
906 | 34 | // transform e.g. |
907 | 34 | // %cmp = icmp eq float* %p, %q |
908 | 34 | // into |
909 | 34 | // %cmp = icmp eq float addrspace(3)* %new_p, %new_q |
910 | 34 | |
911 | 34 | unsigned NewAS = NewV->getType()->getPointerAddressSpace(); |
912 | 34 | int SrcIdx = U.getOperandNo(); |
913 | 34 | int OtherIdx = (SrcIdx == 0) ? 116 : 018 ; |
914 | 34 | Value *OtherSrc = Cmp->getOperand(OtherIdx); |
915 | 34 | |
916 | 34 | if (Value *OtherNewV34 = ValueWithNewAddrSpace.lookup(OtherSrc)) { |
917 | 15 | if (OtherNewV->getType()->getPointerAddressSpace() == NewAS15 ) { |
918 | 7 | Cmp->setOperand(OtherIdx, OtherNewV); |
919 | 7 | Cmp->setOperand(SrcIdx, NewV); |
920 | 7 | continue; |
921 | 7 | } |
922 | 27 | } |
923 | 27 | |
924 | 27 | // Even if the type mismatches, we can cast the constant. |
925 | 27 | if (auto *27 KOtherSrc27 = dyn_cast<Constant>(OtherSrc)) { |
926 | 12 | if (isSafeToCastConstAddrSpace(KOtherSrc, NewAS)12 ) { |
927 | 6 | Cmp->setOperand(SrcIdx, NewV); |
928 | 6 | Cmp->setOperand(OtherIdx, |
929 | 6 | ConstantExpr::getAddrSpaceCast(KOtherSrc, NewV->getType())); |
930 | 6 | continue; |
931 | 6 | } |
932 | 226 | } |
933 | 34 | } |
934 | 226 | |
935 | 226 | if (AddrSpaceCastInst *226 ASC226 = dyn_cast<AddrSpaceCastInst>(CurUser)) { |
936 | 8 | unsigned NewAS = NewV->getType()->getPointerAddressSpace(); |
937 | 8 | if (ASC->getDestAddressSpace() == NewAS8 ) { |
938 | 8 | ASC->replaceAllUsesWith(NewV); |
939 | 8 | DeadInstructions.push_back(ASC); |
940 | 8 | continue; |
941 | 8 | } |
942 | 218 | } |
943 | 218 | |
944 | 218 | // Otherwise, replaces the use with flat(NewV). |
945 | 218 | if (Instruction *218 I218 = dyn_cast<Instruction>(V)) { |
946 | 173 | BasicBlock::iterator InsertPos = std::next(I->getIterator()); |
947 | 173 | while (isa<PHINode>(InsertPos)) |
948 | 0 | ++InsertPos; |
949 | 173 | U.set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos)); |
950 | 218 | } else { |
951 | 45 | U.set(ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV), |
952 | 45 | V->getType())); |
953 | 45 | } |
954 | 239 | } |
955 | 621 | } |
956 | 483 | |
957 | 483 | if (V->use_empty()483 ) { |
958 | 429 | if (Instruction *I = dyn_cast<Instruction>(V)) |
959 | 393 | DeadInstructions.push_back(I); |
960 | 429 | } |
961 | 498 | } |
962 | 235 | |
963 | 235 | for (Instruction *I : DeadInstructions) |
964 | 401 | RecursivelyDeleteTriviallyDeadInstructions(I); |
965 | 10.4k | |
966 | 10.4k | return true; |
967 | 10.4k | } |
968 | | |
969 | 1.94k | FunctionPass *llvm::createInferAddressSpacesPass() { |
970 | 1.94k | return new InferAddressSpaces(); |
971 | 1.94k | } |