Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- InferAddressSpace.cpp - --------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// CUDA C/C++ includes memory space designation as variable type qualifers (such
10
// as __global__ and __shared__). Knowing the space of a memory access allows
11
// CUDA compilers to emit faster PTX loads and stores. For example, a load from
12
// shared memory can be translated to `ld.shared` which is roughly 10% faster
13
// than a generic `ld` on an NVIDIA Tesla K40c.
14
//
15
// Unfortunately, type qualifiers only apply to variable declarations, so CUDA
16
// compilers must infer the memory space of an address expression from
17
// type-qualified variables.
18
//
19
// LLVM IR uses non-zero (so-called) specific address spaces to represent memory
20
// spaces (e.g. addrspace(3) means shared memory). The Clang frontend
21
// places only type-qualified variables in specific address spaces, and then
22
// conservatively `addrspacecast`s each type-qualified variable to addrspace(0)
23
// (so-called the generic address space) for other instructions to use.
24
//
25
// For example, the Clang translates the following CUDA code
26
//   __shared__ float a[10];
27
//   float v = a[i];
28
// to
29
//   %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]*
30
//   %1 = gep [10 x float], [10 x float]* %0, i64 0, i64 %i
31
//   %v = load float, float* %1 ; emits ld.f32
32
// @a is in addrspace(3) since it's type-qualified, but its use from %1 is
33
// redirected to %0 (the generic version of @a).
34
//
35
// The optimization implemented in this file propagates specific address spaces
36
// from type-qualified variable declarations to its users. For example, it
37
// optimizes the above IR to
38
//   %1 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
39
//   %v = load float addrspace(3)* %1 ; emits ld.shared.f32
40
// propagating the addrspace(3) from @a to %1. As the result, the NVPTX
41
// codegen is able to emit ld.shared.f32 for %v.
42
//
43
// Address space inference works in two steps. First, it uses a data-flow
44
// analysis to infer as many generic pointers as possible to point to only one
45
// specific address space. In the above example, it can prove that %1 only
46
// points to addrspace(3). This algorithm was published in
47
//   CUDA: Compiling and optimizing for a GPU platform
48
//   Chakrabarti, Grover, Aarts, Kong, Kudlur, Lin, Marathe, Murphy, Wang
49
//   ICCS 2012
50
//
51
// Then, address space inference replaces all refinable generic pointers with
52
// equivalent specific pointers.
53
//
54
// The major challenge of implementing this optimization is handling PHINodes,
55
// which may create loops in the data flow graph. This brings two complications.
56
//
57
// First, the data flow analysis in Step 1 needs to be circular. For example,
58
//     %generic.input = addrspacecast float addrspace(3)* %input to float*
59
//   loop:
60
//     %y = phi [ %generic.input, %y2 ]
61
//     %y2 = getelementptr %y, 1
62
//     %v = load %y2
63
//     br ..., label %loop, ...
64
// proving %y specific requires proving both %generic.input and %y2 specific,
65
// but proving %y2 specific circles back to %y. To address this complication,
66
// the data flow analysis operates on a lattice:
67
//   uninitialized > specific address spaces > generic.
68
// All address expressions (our implementation only considers phi, bitcast,
69
// addrspacecast, and getelementptr) start with the uninitialized address space.
70
// The monotone transfer function moves the address space of a pointer down a
71
// lattice path from uninitialized to specific and then to generic. A join
72
// operation of two different specific address spaces pushes the expression down
73
// to the generic address space. The analysis completes once it reaches a fixed
74
// point.
75
//
76
// Second, IR rewriting in Step 2 also needs to be circular. For example,
77
// converting %y to addrspace(3) requires the compiler to know the converted
78
// %y2, but converting %y2 needs the converted %y. To address this complication,
79
// we break these cycles using "undef" placeholders. When converting an
80
// instruction `I` to a new address space, if its operand `Op` is not converted
81
// yet, we let `I` temporarily use `undef` and fix all the uses of undef later.
82
// For instance, our algorithm first converts %y to
83
//   %y' = phi float addrspace(3)* [ %input, undef ]
84
// Then, it converts %y2 to
85
//   %y2' = getelementptr %y', 1
86
// Finally, it fixes the undef in %y' so that
87
//   %y' = phi float addrspace(3)* [ %input, %y2' ]
88
//
89
//===----------------------------------------------------------------------===//
90
91
#include "llvm/ADT/ArrayRef.h"
92
#include "llvm/ADT/DenseMap.h"
93
#include "llvm/ADT/DenseSet.h"
94
#include "llvm/ADT/None.h"
95
#include "llvm/ADT/Optional.h"
96
#include "llvm/ADT/SetVector.h"
97
#include "llvm/ADT/SmallVector.h"
98
#include "llvm/Analysis/TargetTransformInfo.h"
99
#include "llvm/Transforms/Utils/Local.h"
100
#include "llvm/IR/BasicBlock.h"
101
#include "llvm/IR/Constant.h"
102
#include "llvm/IR/Constants.h"
103
#include "llvm/IR/Function.h"
104
#include "llvm/IR/IRBuilder.h"
105
#include "llvm/IR/InstIterator.h"
106
#include "llvm/IR/Instruction.h"
107
#include "llvm/IR/Instructions.h"
108
#include "llvm/IR/IntrinsicInst.h"
109
#include "llvm/IR/Intrinsics.h"
110
#include "llvm/IR/LLVMContext.h"
111
#include "llvm/IR/Operator.h"
112
#include "llvm/IR/Type.h"
113
#include "llvm/IR/Use.h"
114
#include "llvm/IR/User.h"
115
#include "llvm/IR/Value.h"
116
#include "llvm/IR/ValueHandle.h"
117
#include "llvm/Pass.h"
118
#include "llvm/Support/Casting.h"
119
#include "llvm/Support/Compiler.h"
120
#include "llvm/Support/Debug.h"
121
#include "llvm/Support/ErrorHandling.h"
122
#include "llvm/Support/raw_ostream.h"
123
#include "llvm/Transforms/Scalar.h"
124
#include "llvm/Transforms/Utils/ValueMapper.h"
125
#include <cassert>
126
#include <iterator>
127
#include <limits>
128
#include <utility>
129
#include <vector>
130
131
#define DEBUG_TYPE "infer-address-spaces"
132
133
using namespace llvm;
134
135
static const unsigned UninitializedAddressSpace =
136
    std::numeric_limits<unsigned>::max();
137
138
namespace {
139
140
using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>;
141
142
/// InferAddressSpaces
143
class InferAddressSpaces : public FunctionPass {
144
  /// Target specific address space which uses of should be replaced if
145
  /// possible.
146
  unsigned FlatAddrSpace;
147
148
public:
149
  static char ID;
150
151
  InferAddressSpaces() :
152
17
    FunctionPass(ID), FlatAddrSpace(UninitializedAddressSpace) {}
153
3.00k
  InferAddressSpaces(unsigned AS) : FunctionPass(ID), FlatAddrSpace(AS) {}
154
155
2.99k
  void getAnalysisUsage(AnalysisUsage &AU) const override {
156
2.99k
    AU.setPreservesCFG();
157
2.99k
    AU.addRequired<TargetTransformInfoWrapperPass>();
158
2.99k
  }
159
160
  bool runOnFunction(Function &F) override;
161
162
private:
163
  // Returns the new address space of V if updated; otherwise, returns None.
164
  Optional<unsigned>
165
  updateAddressSpace(const Value &V,
166
                     const ValueToAddrSpaceMapTy &InferredAddrSpace) const;
167
168
  // Tries to infer the specific address space of each address expression in
169
  // Postorder.
170
  void inferAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,
171
                          ValueToAddrSpaceMapTy *InferredAddrSpace) const;
172
173
  bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const;
174
175
  // Changes the flat address expressions in function F to point to specific
176
  // address spaces if InferredAddrSpace says so. Postorder is the postorder of
177
  // all flat expressions in the use-def graph of function F.
178
  bool rewriteWithNewAddressSpaces(
179
      const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder,
180
      const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const;
181
182
  void appendsFlatAddressExpressionToPostorderStack(
183
    Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack,
184
    DenseSet<Value *> &Visited) const;
185
186
  bool rewriteIntrinsicOperands(IntrinsicInst *II,
187
                                Value *OldV, Value *NewV) const;
188
  void collectRewritableIntrinsicOperands(
189
    IntrinsicInst *II,
190
    std::vector<std::pair<Value *, bool>> &PostorderStack,
191
    DenseSet<Value *> &Visited) const;
192
193
  std::vector<WeakTrackingVH> collectFlatAddressExpressions(Function &F) const;
194
195
  Value *cloneValueWithNewAddressSpace(
196
    Value *V, unsigned NewAddrSpace,
197
    const ValueToValueMapTy &ValueWithNewAddrSpace,
198
    SmallVectorImpl<const Use *> *UndefUsesToFix) const;
199
  unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) const;
200
};
201
202
} // end anonymous namespace
203
204
char InferAddressSpaces::ID = 0;
205
206
namespace llvm {
207
208
void initializeInferAddressSpacesPass(PassRegistry &);
209
210
} // end namespace llvm
211
212
INITIALIZE_PASS(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
213
                false, false)
214
215
// Returns true if V is an address expression.
216
// TODO: Currently, we consider only phi, bitcast, addrspacecast, and
217
// getelementptr operators.
218
61.3k
static bool isAddressExpression(const Value &V) {
219
61.3k
  if (!isa<Operator>(V))
220
41.6k
    return false;
221
19.7k
222
19.7k
  const Operator &Op = cast<Operator>(V);
223
19.7k
  switch (Op.getOpcode()) {
224
19.7k
  case Instruction::PHI:
225
50
    assert(Op.getType()->isPointerTy());
226
50
    return true;
227
19.7k
  case Instruction::BitCast:
228
18.0k
  case Instruction::AddrSpaceCast:
229
18.0k
  case Instruction::GetElementPtr:
230
18.0k
    return true;
231
18.0k
  case Instruction::Select:
232
31
    return Op.getType()->isPointerTy();
233
18.0k
  default:
234
1.62k
    return false;
235
19.7k
  }
236
19.7k
}
237
238
// Returns the pointer operands of V.
239
//
240
// Precondition: V is an address expression.
241
4.00k
static SmallVector<Value *, 2> getPointerOperands(const Value &V) {
242
4.00k
  const Operator &Op = cast<Operator>(V);
243
4.00k
  switch (Op.getOpcode()) {
244
4.00k
  case Instruction::PHI: {
245
50
    auto IncomingValues = cast<PHINode>(Op).incoming_values();
246
50
    return SmallVector<Value *, 2>(IncomingValues.begin(),
247
50
                                   IncomingValues.end());
248
4.00k
  }
249
4.00k
  case Instruction::BitCast:
250
3.93k
  case Instruction::AddrSpaceCast:
251
3.93k
  case Instruction::GetElementPtr:
252
3.93k
    return {Op.getOperand(0)};
253
3.93k
  case Instruction::Select:
254
22
    return {Op.getOperand(1), Op.getOperand(2)};
255
3.93k
  default:
256
0
    llvm_unreachable("Unexpected instruction type.");
257
4.00k
  }
258
4.00k
}
259
260
// TODO: Move logic to TTI?
261
bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II,
262
                                                  Value *OldV,
263
17
                                                  Value *NewV) const {
264
17
  Module *M = II->getParent()->getParent()->getParent();
265
17
266
17
  switch (II->getIntrinsicID()) {
267
17
  case Intrinsic::amdgcn_atomic_inc:
268
13
  case Intrinsic::amdgcn_atomic_dec:
269
13
  case Intrinsic::amdgcn_ds_fadd:
270
13
  case Intrinsic::amdgcn_ds_fmin:
271
13
  case Intrinsic::amdgcn_ds_fmax: {
272
13
    const ConstantInt *IsVolatile = dyn_cast<ConstantInt>(II->getArgOperand(4));
273
13
    if (!IsVolatile || !IsVolatile->isZero())
274
5
      return false;
275
8
276
8
    LLVM_FALLTHROUGH;
277
8
  }
278
10
  case Intrinsic::objectsize: {
279
10
    Type *DestTy = II->getType();
280
10
    Type *SrcTy = NewV->getType();
281
10
    Function *NewDecl =
282
10
        Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy});
283
10
    II->setArgOperand(0, NewV);
284
10
    II->setCalledFunction(NewDecl);
285
10
    return true;
286
8
  }
287
8
  default:
288
2
    return false;
289
17
  }
290
17
}
291
292
// TODO: Move logic to TTI?
293
void InferAddressSpaces::collectRewritableIntrinsicOperands(
294
    IntrinsicInst *II, std::vector<std::pair<Value *, bool>> &PostorderStack,
295
13.1k
    DenseSet<Value *> &Visited) const {
296
13.1k
  switch (II->getIntrinsicID()) {
297
13.1k
  case Intrinsic::objectsize:
298
299
  case Intrinsic::amdgcn_atomic_inc:
299
299
  case Intrinsic::amdgcn_atomic_dec:
300
299
  case Intrinsic::amdgcn_ds_fadd:
301
299
  case Intrinsic::amdgcn_ds_fmin:
302
299
  case Intrinsic::amdgcn_ds_fmax:
303
299
    appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0),
304
299
                                                 PostorderStack, Visited);
305
299
    break;
306
12.8k
  default:
307
12.8k
    break;
308
13.1k
  }
309
13.1k
}
310
311
// Returns all flat address expressions in function F. The elements are
312
// If V is an unvisited flat address expression, appends V to PostorderStack
313
// and marks it as visited.
314
void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack(
315
    Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack,
316
61.3k
    DenseSet<Value *> &Visited) const {
317
61.3k
  assert(V->getType()->isPointerTy());
318
61.3k
319
61.3k
  // Generic addressing expressions may be hidden in nested constant
320
61.3k
  // expressions.
321
61.3k
  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
322
306
    // TODO: Look in non-address parts, like icmp operands.
323
306
    if (isAddressExpression(*CE) && 
Visited.insert(CE).second200
)
324
159
      PostorderStack.push_back(std::make_pair(CE, false));
325
306
326
306
    return;
327
306
  }
328
61.0k
329
61.0k
  if (isAddressExpression(*V) &&
330
61.0k
      
V->getType()->getPointerAddressSpace() == FlatAddrSpace17.8k
) {
331
2.15k
    if (Visited.insert(V).second) {
332
1.83k
      PostorderStack.push_back(std::make_pair(V, false));
333
1.83k
334
1.83k
      Operator *Op = cast<Operator>(V);
335
5.19k
      for (unsigned I = 0, E = Op->getNumOperands(); I != E; 
++I3.35k
) {
336
3.35k
        if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op->getOperand(I))) {
337
23
          if (isAddressExpression(*CE) && 
Visited.insert(CE).second17
)
338
7
            PostorderStack.emplace_back(CE, false);
339
23
        }
340
3.35k
      }
341
1.83k
    }
342
2.15k
  }
343
61.0k
}
344
345
// Returns all flat address expressions in function F. The elements are ordered
346
// ordered in postorder.
347
std::vector<WeakTrackingVH>
348
24.3k
InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
349
24.3k
  // This function implements a non-recursive postorder traversal of a partial
350
24.3k
  // use-def graph of function F.
351
24.3k
  std::vector<std::pair<Value *, bool>> PostorderStack;
352
24.3k
  // The set of visited expressions.
353
24.3k
  DenseSet<Value *> Visited;
354
24.3k
355
59.0k
  auto PushPtrOperand = [&](Value *Ptr) {
356
59.0k
    appendsFlatAddressExpressionToPostorderStack(Ptr, PostorderStack,
357
59.0k
                                                 Visited);
358
59.0k
  };
359
24.3k
360
24.3k
  // Look at operations that may be interesting accelerate by moving to a known
361
24.3k
  // address space. We aim at generating after loads and stores, but pure
362
24.3k
  // addressing calculations may also be faster.
363
155k
  for (Instruction &I : instructions(F)) {
364
155k
    if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
365
15.1k
      if (!GEP->getType()->isVectorTy())
366
15.1k
        PushPtrOperand(GEP->getPointerOperand());
367
140k
    } else if (auto *LI = dyn_cast<LoadInst>(&I))
368
18.3k
      PushPtrOperand(LI->getPointerOperand());
369
121k
    else if (auto *SI = dyn_cast<StoreInst>(&I))
370
22.0k
      PushPtrOperand(SI->getPointerOperand());
371
99.7k
    else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
372
1.80k
      PushPtrOperand(RMW->getPointerOperand());
373
97.9k
    else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
374
774
      PushPtrOperand(CmpX->getPointerOperand());
375
97.1k
    else if (auto *MI = dyn_cast<MemIntrinsic>(&I)) {
376
110
      // For memset/memcpy/memmove, any pointer operand can be replaced.
377
110
      PushPtrOperand(MI->getRawDest());
378
110
379
110
      // Handle 2nd operand for memcpy/memmove.
380
110
      if (auto *MTI = dyn_cast<MemTransferInst>(MI))
381
100
        PushPtrOperand(MTI->getRawSource());
382
97.0k
    } else if (auto *II = dyn_cast<IntrinsicInst>(&I))
383
13.1k
      collectRewritableIntrinsicOperands(II, PostorderStack, Visited);
384
83.9k
    else if (ICmpInst *Cmp = dyn_cast<ICmpInst>(&I)) {
385
4.00k
      // FIXME: Handle vectors of pointers
386
4.00k
      if (Cmp->getOperand(0)->getType()->isPointerTy()) {
387
73
        PushPtrOperand(Cmp->getOperand(0));
388
73
        PushPtrOperand(Cmp->getOperand(1));
389
73
      }
390
79.9k
    } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
391
569
      if (!ASC->getType()->isVectorTy())
392
564
        PushPtrOperand(ASC->getPointerOperand());
393
569
    }
394
155k
  }
395
24.3k
396
24.3k
  std::vector<WeakTrackingVH> Postorder; // The resultant postorder.
397
28.3k
  while (!PostorderStack.empty()) {
398
4.00k
    Value *TopVal = PostorderStack.back().first;
399
4.00k
    // If the operands of the expression on the top are already explored,
400
4.00k
    // adds that expression to the resultant postorder.
401
4.00k
    if (PostorderStack.back().second) {
402
2.00k
      if (TopVal->getType()->getPointerAddressSpace() == FlatAddrSpace)
403
1.93k
        Postorder.push_back(TopVal);
404
2.00k
      PostorderStack.pop_back();
405
2.00k
      continue;
406
2.00k
    }
407
2.00k
    // Otherwise, adds its operands to the stack and explores them.
408
2.00k
    PostorderStack.back().second = true;
409
2.04k
    for (Value *PtrOperand : getPointerOperands(*TopVal)) {
410
2.04k
      appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack,
411
2.04k
                                                   Visited);
412
2.04k
    }
413
2.00k
  }
414
24.3k
  return Postorder;
415
24.3k
}
416
417
// A helper function for cloneInstructionWithNewAddressSpace. Returns the clone
418
// of OperandUse.get() in the new address space. If the clone is not ready yet,
419
// returns an undef in the new address space as a placeholder.
420
static Value *operandWithNewAddressSpaceOrCreateUndef(
421
    const Use &OperandUse, unsigned NewAddrSpace,
422
    const ValueToValueMapTy &ValueWithNewAddrSpace,
423
138
    SmallVectorImpl<const Use *> *UndefUsesToFix) {
424
138
  Value *Operand = OperandUse.get();
425
138
426
138
  Type *NewPtrTy =
427
138
      Operand->getType()->getPointerElementType()->getPointerTo(NewAddrSpace);
428
138
429
138
  if (Constant *C = dyn_cast<Constant>(Operand))
430
18
    return ConstantExpr::getAddrSpaceCast(C, NewPtrTy);
431
120
432
120
  if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand))
433
44
    return NewOperand;
434
76
435
76
  UndefUsesToFix->push_back(&OperandUse);
436
76
  return UndefValue::get(NewPtrTy);
437
76
}
438
439
// Returns a clone of `I` with its operands converted to those specified in
440
// ValueWithNewAddrSpace. Due to potential cycles in the data flow graph, an
441
// operand whose address space needs to be modified might not exist in
442
// ValueWithNewAddrSpace. In that case, uses undef as a placeholder operand and
443
// adds that operand use to UndefUsesToFix so that caller can fix them later.
444
//
445
// Note that we do not necessarily clone `I`, e.g., if it is an addrspacecast
446
// from a pointer whose type already matches. Therefore, this function returns a
447
// Value* instead of an Instruction*.
448
static Value *cloneInstructionWithNewAddressSpace(
449
    Instruction *I, unsigned NewAddrSpace,
450
    const ValueToValueMapTy &ValueWithNewAddrSpace,
451
426
    SmallVectorImpl<const Use *> *UndefUsesToFix) {
452
426
  Type *NewPtrType =
453
426
      I->getType()->getPointerElementType()->getPointerTo(NewAddrSpace);
454
426
455
426
  if (I->getOpcode() == Instruction::AddrSpaceCast) {
456
309
    Value *Src = I->getOperand(0);
457
309
    // Because `I` is flat, the source address space must be specific.
458
309
    // Therefore, the inferred address space must be the source space, according
459
309
    // to our algorithm.
460
309
    assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace);
461
309
    if (Src->getType() != NewPtrType)
462
10
      return new BitCastInst(Src, NewPtrType);
463
299
    return Src;
464
299
  }
465
117
466
117
  // Computes the converted pointer operands.
467
117
  SmallVector<Value *, 4> NewPointerOperands;
468
250
  for (const Use &OperandUse : I->operands()) {
469
250
    if (!OperandUse.get()->getType()->isPointerTy())
470
112
      NewPointerOperands.push_back(nullptr);
471
138
    else
472
138
      NewPointerOperands.push_back(operandWithNewAddressSpaceOrCreateUndef(
473
138
                                     OperandUse, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix));
474
250
  }
475
117
476
117
  switch (I->getOpcode()) {
477
117
  case Instruction::BitCast:
478
15
    return new BitCastInst(NewPointerOperands[0], NewPtrType);
479
117
  case Instruction::PHI: {
480
11
    assert(I->getType()->isPointerTy());
481
11
    PHINode *PHI = cast<PHINode>(I);
482
11
    PHINode *NewPHI = PHINode::Create(NewPtrType, PHI->getNumIncomingValues());
483
33
    for (unsigned Index = 0; Index < PHI->getNumIncomingValues(); 
++Index22
) {
484
22
      unsigned OperandNo = PHINode::getOperandNumForIncomingValue(Index);
485
22
      NewPHI->addIncoming(NewPointerOperands[OperandNo],
486
22
                          PHI->getIncomingBlock(Index));
487
22
    }
488
11
    return NewPHI;
489
117
  }
490
117
  case Instruction::GetElementPtr: {
491
81
    GetElementPtrInst *GEP = cast<GetElementPtrInst>(I);
492
81
    GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
493
81
        GEP->getSourceElementType(), NewPointerOperands[0],
494
81
        SmallVector<Value *, 4>(GEP->idx_begin(), GEP->idx_end()));
495
81
    NewGEP->setIsInBounds(GEP->isInBounds());
496
81
    return NewGEP;
497
117
  }
498
117
  case Instruction::Select:
499
10
    assert(I->getType()->isPointerTy());
500
10
    return SelectInst::Create(I->getOperand(0), NewPointerOperands[1],
501
10
                              NewPointerOperands[2], "", nullptr, I);
502
117
  default:
503
0
    llvm_unreachable("Unexpected opcode");
504
117
  }
505
117
}
506
507
// Similar to cloneInstructionWithNewAddressSpace, returns a clone of the
508
// constant expression `CE` with its operands replaced as specified in
509
// ValueWithNewAddrSpace.
510
static Value *cloneConstantExprWithNewAddressSpace(
511
  ConstantExpr *CE, unsigned NewAddrSpace,
512
98
  const ValueToValueMapTy &ValueWithNewAddrSpace) {
513
98
  Type *TargetType =
514
98
    CE->getType()->getPointerElementType()->getPointerTo(NewAddrSpace);
515
98
516
98
  if (CE->getOpcode() == Instruction::AddrSpaceCast) {
517
70
    // Because CE is flat, the source address space must be specific.
518
70
    // Therefore, the inferred address space must be the source space according
519
70
    // to our algorithm.
520
70
    assert(CE->getOperand(0)->getType()->getPointerAddressSpace() ==
521
70
           NewAddrSpace);
522
70
    return ConstantExpr::getBitCast(CE->getOperand(0), TargetType);
523
70
  }
524
28
525
28
  if (CE->getOpcode() == Instruction::BitCast) {
526
9
    if (Value *NewOperand = ValueWithNewAddrSpace.lookup(CE->getOperand(0)))
527
8
      return ConstantExpr::getBitCast(cast<Constant>(NewOperand), TargetType);
528
1
    return ConstantExpr::getAddrSpaceCast(CE, TargetType);
529
1
  }
530
19
531
19
  if (CE->getOpcode() == Instruction::Select) {
532
2
    Constant *Src0 = CE->getOperand(1);
533
2
    Constant *Src1 = CE->getOperand(2);
534
2
    if (Src0->getType()->getPointerAddressSpace() ==
535
2
        Src1->getType()->getPointerAddressSpace()) {
536
2
537
2
      return ConstantExpr::getSelect(
538
2
          CE->getOperand(0), ConstantExpr::getAddrSpaceCast(Src0, TargetType),
539
2
          ConstantExpr::getAddrSpaceCast(Src1, TargetType));
540
2
    }
541
17
  }
542
17
543
17
  // Computes the operands of the new constant expression.
544
17
  bool IsNew = false;
545
17
  SmallVector<Constant *, 4> NewOperands;
546
68
  for (unsigned Index = 0; Index < CE->getNumOperands(); 
++Index51
) {
547
51
    Constant *Operand = CE->getOperand(Index);
548
51
    // If the address space of `Operand` needs to be modified, the new operand
549
51
    // with the new address space should already be in ValueWithNewAddrSpace
550
51
    // because (1) the constant expressions we consider (i.e. addrspacecast,
551
51
    // bitcast, and getelementptr) do not incur cycles in the data flow graph
552
51
    // and (2) this function is called on constant expressions in postorder.
553
51
    if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand)) {
554
15
      IsNew = true;
555
15
      NewOperands.push_back(cast<Constant>(NewOperand));
556
15
      continue;
557
15
    }
558
36
    if (auto CExpr = dyn_cast<ConstantExpr>(Operand))
559
2
      if (Value *NewOperand = cloneConstantExprWithNewAddressSpace(
560
2
              CExpr, NewAddrSpace, ValueWithNewAddrSpace)) {
561
2
        IsNew = true;
562
2
        NewOperands.push_back(cast<Constant>(NewOperand));
563
2
        continue;
564
2
      }
565
34
    // Otherwise, reuses the old operand.
566
34
    NewOperands.push_back(Operand);
567
34
  }
568
17
569
17
  // If !IsNew, we will replace the Value with itself. However, replaced values
570
17
  // are assumed to wrapped in a addrspace cast later so drop it now.
571
17
  if (!IsNew)
572
0
    return nullptr;
573
17
574
17
  if (CE->getOpcode() == Instruction::GetElementPtr) {
575
17
    // Needs to specify the source type while constructing a getelementptr
576
17
    // constant expression.
577
17
    return CE->getWithOperands(
578
17
      NewOperands, TargetType, /*OnlyIfReduced=*/false,
579
17
      NewOperands[0]->getType()->getPointerElementType());
580
17
  }
581
0
582
0
  return CE->getWithOperands(NewOperands, TargetType);
583
0
}
584
585
// Returns a clone of the value `V`, with its operands replaced as specified in
586
// ValueWithNewAddrSpace. This function is called on every flat address
587
// expression whose address space needs to be modified, in postorder.
588
//
589
// See cloneInstructionWithNewAddressSpace for the meaning of UndefUsesToFix.
590
Value *InferAddressSpaces::cloneValueWithNewAddressSpace(
591
  Value *V, unsigned NewAddrSpace,
592
  const ValueToValueMapTy &ValueWithNewAddrSpace,
593
522
  SmallVectorImpl<const Use *> *UndefUsesToFix) const {
594
522
  // All values in Postorder are flat address expressions.
595
522
  assert(isAddressExpression(*V) &&
596
522
         V->getType()->getPointerAddressSpace() == FlatAddrSpace);
597
522
598
522
  if (Instruction *I = dyn_cast<Instruction>(V)) {
599
426
    Value *NewV = cloneInstructionWithNewAddressSpace(
600
426
      I, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix);
601
426
    if (Instruction *NewI = dyn_cast<Instruction>(NewV)) {
602
281
      if (NewI->getParent() == nullptr) {
603
127
        NewI->insertBefore(I);
604
127
        NewI->takeName(I);
605
127
      }
606
281
    }
607
426
    return NewV;
608
426
  }
609
96
610
96
  return cloneConstantExprWithNewAddressSpace(
611
96
    cast<ConstantExpr>(V), NewAddrSpace, ValueWithNewAddrSpace);
612
96
}
613
614
// Defines the join operation on the address space lattice (see the file header
615
// comments).
616
unsigned InferAddressSpaces::joinAddressSpaces(unsigned AS1,
617
2.05k
                                               unsigned AS2) const {
618
2.05k
  if (AS1 == FlatAddrSpace || AS2 == FlatAddrSpace)
619
1.40k
    return FlatAddrSpace;
620
653
621
653
  if (AS1 == UninitializedAddressSpace)
622
610
    return AS2;
623
43
  if (AS2 == UninitializedAddressSpace)
624
18
    return AS1;
625
25
626
25
  // The join of two different specific address spaces is flat.
627
25
  return (AS1 == AS2) ? 
AS113
:
FlatAddrSpace12
;
628
25
}
629
630
29.1k
bool InferAddressSpaces::runOnFunction(Function &F) {
631
29.1k
  if (skipFunction(F))
632
8
    return false;
633
29.1k
634
29.1k
  const TargetTransformInfo &TTI =
635
29.1k
      getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
636
29.1k
637
29.1k
  if (FlatAddrSpace == UninitializedAddressSpace) {
638
7.12k
    FlatAddrSpace = TTI.getFlatAddressSpace();
639
7.12k
    if (FlatAddrSpace == UninitializedAddressSpace)
640
4.83k
      return false;
641
24.3k
  }
642
24.3k
643
24.3k
  // Collects all flat address expressions in postorder.
644
24.3k
  std::vector<WeakTrackingVH> Postorder = collectFlatAddressExpressions(F);
645
24.3k
646
24.3k
  // Runs a data-flow analysis to refine the address spaces of every expression
647
24.3k
  // in Postorder.
648
24.3k
  ValueToAddrSpaceMapTy InferredAddrSpace;
649
24.3k
  inferAddressSpaces(Postorder, &InferredAddrSpace);
650
24.3k
651
24.3k
  // Changes the address spaces of the flat address expressions who are inferred
652
24.3k
  // to point to a specific address space.
653
24.3k
  return rewriteWithNewAddressSpaces(TTI, Postorder, InferredAddrSpace, &F);
654
24.3k
}
655
656
// Constants need to be tracked through RAUW to handle cases with nested
657
// constant expressions, so wrap values in WeakTrackingVH.
658
void InferAddressSpaces::inferAddressSpaces(
659
    ArrayRef<WeakTrackingVH> Postorder,
660
24.3k
    ValueToAddrSpaceMapTy *InferredAddrSpace) const {
661
24.3k
  SetVector<Value *> Worklist(Postorder.begin(), Postorder.end());
662
24.3k
  // Initially, all expressions are in the uninitialized address space.
663
24.3k
  for (Value *V : Postorder)
664
1.93k
    (*InferredAddrSpace)[V] = UninitializedAddressSpace;
665
24.3k
666
26.4k
  while (!Worklist.empty()) {
667
2.06k
    Value *V = Worklist.pop_back_val();
668
2.06k
669
2.06k
    // Tries to update the address space of the stack top according to the
670
2.06k
    // address spaces of its operands.
671
2.06k
    LLVM_DEBUG(dbgs() << "Updating the address space of\n  " << *V << '\n');
672
2.06k
    Optional<unsigned> NewAS = updateAddressSpace(*V, *InferredAddrSpace);
673
2.06k
    if (!NewAS.hasValue())
674
116
      continue;
675
1.94k
    // If any updates are made, grabs its users to the worklist because
676
1.94k
    // their address spaces can also be possibly updated.
677
1.94k
    LLVM_DEBUG(dbgs() << "  to " << NewAS.getValue() << '\n');
678
1.94k
    (*InferredAddrSpace)[V] = NewAS.getValue();
679
1.94k
680
2.13k
    for (Value *User : V->users()) {
681
2.13k
      // Skip if User is already in the worklist.
682
2.13k
      if (Worklist.count(User))
683
238
        continue;
684
1.89k
685
1.89k
      auto Pos = InferredAddrSpace->find(User);
686
1.89k
      // Our algorithm only updates the address spaces of flat address
687
1.89k
      // expressions, which are those in InferredAddrSpace.
688
1.89k
      if (Pos == InferredAddrSpace->end())
689
1.77k
        continue;
690
123
691
123
      // Function updateAddressSpace moves the address space down a lattice
692
123
      // path. Therefore, nothing to do if User is already inferred as flat (the
693
123
      // bottom element in the lattice).
694
123
      if (Pos->second == FlatAddrSpace)
695
1
        continue;
696
122
697
122
      Worklist.insert(User);
698
122
    }
699
1.94k
  }
700
24.3k
}
701
702
Optional<unsigned> InferAddressSpaces::updateAddressSpace(
703
2.06k
    const Value &V, const ValueToAddrSpaceMapTy &InferredAddrSpace) const {
704
2.06k
  assert(InferredAddrSpace.count(&V));
705
2.06k
706
2.06k
  // The new inferred address space equals the join of the address spaces
707
2.06k
  // of all its pointer operands.
708
2.06k
  unsigned NewAS = UninitializedAddressSpace;
709
2.06k
710
2.06k
  const Operator &Op = cast<Operator>(V);
711
2.06k
  if (Op.getOpcode() == Instruction::Select) {
712
59
    Value *Src0 = Op.getOperand(1);
713
59
    Value *Src1 = Op.getOperand(2);
714
59
715
59
    auto I = InferredAddrSpace.find(Src0);
716
59
    unsigned Src0AS = (I != InferredAddrSpace.end()) ?
717
53
      I->second : 
Src0->getType()->getPointerAddressSpace()6
;
718
59
719
59
    auto J = InferredAddrSpace.find(Src1);
720
59
    unsigned Src1AS = (J != InferredAddrSpace.end()) ?
721
51
      J->second : 
Src1->getType()->getPointerAddressSpace()8
;
722
59
723
59
    auto *C0 = dyn_cast<Constant>(Src0);
724
59
    auto *C1 = dyn_cast<Constant>(Src1);
725
59
726
59
    // If one of the inputs is a constant, we may be able to do a constant
727
59
    // addrspacecast of it. Defer inferring the address space until the input
728
59
    // address space is known.
729
59
    if ((C1 && 
Src0AS == UninitializedAddressSpace43
) ||
730
59
        
(44
C044
&&
Src1AS == UninitializedAddressSpace23
))
731
26
      return None;
732
33
733
33
    if (C0 && 
isSafeToCastConstAddrSpace(C0, Src1AS)12
)
734
4
      NewAS = Src1AS;
735
29
    else if (C1 && 
isSafeToCastConstAddrSpace(C1, Src0AS)18
)
736
8
      NewAS = Src0AS;
737
21
    else
738
21
      NewAS = joinAddressSpaces(Src0AS, Src1AS);
739
2.00k
  } else {
740
2.03k
    for (Value *PtrOperand : getPointerOperands(V)) {
741
2.03k
      auto I = InferredAddrSpace.find(PtrOperand);
742
2.03k
      unsigned OperandAS = I != InferredAddrSpace.end() ?
743
1.60k
        
I->second430
: PtrOperand->getType()->getPointerAddressSpace();
744
2.03k
745
2.03k
      // join(flat, *) = flat. So we can break if NewAS is already flat.
746
2.03k
      NewAS = joinAddressSpaces(NewAS, OperandAS);
747
2.03k
      if (NewAS == FlatAddrSpace)
748
1.40k
        break;
749
2.03k
    }
750
2.00k
  }
751
2.06k
752
2.06k
  unsigned OldAS = InferredAddrSpace.lookup(&V);
753
2.03k
  assert(OldAS != FlatAddrSpace);
754
2.03k
  if (OldAS == NewAS)
755
90
    return None;
756
1.94k
  return NewAS;
757
1.94k
}
758
759
/// \p returns true if \p U is the pointer operand of a memory instruction with
760
/// a single pointer operand that can have its address space changed by simply
761
/// mutating the use to a new value. If the memory instruction is volatile,
762
/// return true only if the target allows the memory instruction to be volatile
763
/// in the new address space.
764
static bool isSimplePointerUseValidToReplace(const TargetTransformInfo &TTI,
765
669
                                             Use &U, unsigned AddrSpace) {
766
669
  User *Inst = U.getUser();
767
669
  unsigned OpNo = U.getOperandNo();
768
669
  bool VolatileIsAllowed = false;
769
669
  if (auto *I = dyn_cast<Instruction>(Inst))
770
595
    VolatileIsAllowed = TTI.hasVolatileVariant(I, AddrSpace);
771
669
772
669
  if (auto *LI = dyn_cast<LoadInst>(Inst))
773
164
    return OpNo == LoadInst::getPointerOperandIndex() &&
774
164
           (VolatileIsAllowed || 
!LI->isVolatile()37
);
775
505
776
505
  if (auto *SI = dyn_cast<StoreInst>(Inst))
777
177
    return OpNo == StoreInst::getPointerOperandIndex() &&
778
177
           
(176
VolatileIsAllowed176
||
!SI->isVolatile()58
);
779
328
780
328
  if (auto *RMW = dyn_cast<AtomicRMWInst>(Inst))
781
4
    return OpNo == AtomicRMWInst::getPointerOperandIndex() &&
782
4
           (VolatileIsAllowed || !RMW->isVolatile());
783
324
784
324
  if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst))
785
4
    return OpNo == AtomicCmpXchgInst::getPointerOperandIndex() &&
786
4
           (VolatileIsAllowed || !CmpX->isVolatile());
787
320
788
320
  return false;
789
320
}
790
791
/// Update memory intrinsic uses that require more complex processing than
792
/// simple memory instructions. Thse require re-mangling and may have multiple
793
/// pointer operands.
794
static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV,
795
17
                                     Value *NewV) {
796
17
  IRBuilder<> B(MI);
797
17
  MDNode *TBAA = MI->getMetadata(LLVMContext::MD_tbaa);
798
17
  MDNode *ScopeMD = MI->getMetadata(LLVMContext::MD_alias_scope);
799
17
  MDNode *NoAliasMD = MI->getMetadata(LLVMContext::MD_noalias);
800
17
801
17
  if (auto *MSI = dyn_cast<MemSetInst>(MI)) {
802
4
    B.CreateMemSet(NewV, MSI->getValue(),
803
4
                   MSI->getLength(), MSI->getDestAlignment(),
804
4
                   false, // isVolatile
805
4
                   TBAA, ScopeMD, NoAliasMD);
806
13
  } else if (auto *MTI = dyn_cast<MemTransferInst>(MI)) {
807
13
    Value *Src = MTI->getRawSource();
808
13
    Value *Dest = MTI->getRawDest();
809
13
810
13
    // Be careful in case this is a self-to-self copy.
811
13
    if (Src == OldV)
812
9
      Src = NewV;
813
13
814
13
    if (Dest == OldV)
815
5
      Dest = NewV;
816
13
817
13
    if (isa<MemCpyInst>(MTI)) {
818
12
      MDNode *TBAAStruct = MTI->getMetadata(LLVMContext::MD_tbaa_struct);
819
12
      B.CreateMemCpy(Dest, MTI->getDestAlignment(),
820
12
                     Src, MTI->getSourceAlignment(),
821
12
                     MTI->getLength(),
822
12
                     false, // isVolatile
823
12
                     TBAA, TBAAStruct, ScopeMD, NoAliasMD);
824
12
    } else {
825
1
      assert(isa<MemMoveInst>(MTI));
826
1
      B.CreateMemMove(Dest, MTI->getDestAlignment(),
827
1
                      Src, MTI->getSourceAlignment(),
828
1
                      MTI->getLength(),
829
1
                      false, // isVolatile
830
1
                      TBAA, ScopeMD, NoAliasMD);
831
1
    }
832
13
  } else
833
13
    
llvm_unreachable0
("unhandled MemIntrinsic");
834
17
835
17
  MI->eraseFromParent();
836
17
  return true;
837
17
}
838
839
// \p returns true if it is OK to change the address space of constant \p C with
840
// a ConstantExpr addrspacecast.
841
71
bool InferAddressSpaces::isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const {
842
71
  assert(NewAS != UninitializedAddressSpace);
843
71
844
71
  unsigned SrcAS = C->getType()->getPointerAddressSpace();
845
71
  if (SrcAS == NewAS || 
isa<UndefValue>(C)66
)
846
11
    return true;
847
60
848
60
  // Prevent illegal casts between different non-flat address spaces.
849
60
  if (SrcAS != FlatAddrSpace && 
NewAS != FlatAddrSpace24
)
850
24
    return false;
851
36
852
36
  if (isa<ConstantPointerNull>(C))
853
4
    return true;
854
32
855
32
  if (auto *Op = dyn_cast<Operator>(C)) {
856
32
    // If we already have a constant addrspacecast, it should be safe to cast it
857
32
    // off.
858
32
    if (Op->getOpcode() == Instruction::AddrSpaceCast)
859
29
      return isSafeToCastConstAddrSpace(cast<Constant>(Op->getOperand(0)), NewAS);
860
3
861
3
    if (Op->getOpcode() == Instruction::IntToPtr &&
862
3
        Op->getType()->getPointerAddressSpace() == FlatAddrSpace)
863
3
      return true;
864
0
  }
865
0
866
0
  return false;
867
0
}
868
869
static Value::use_iterator skipToNextUser(Value::use_iterator I,
870
669
                                          Value::use_iterator End) {
871
669
  User *CurUser = I->getUser();
872
669
  ++I;
873
669
874
671
  while (I != End && 
I->getUser() == CurUser158
)
875
2
    ++I;
876
669
877
669
  return I;
878
669
}
879
880
bool InferAddressSpaces::rewriteWithNewAddressSpaces(
881
    const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder,
882
24.3k
    const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
883
24.3k
  // For each address expression to be modified, creates a clone of it with its
884
24.3k
  // pointer operands converted to the new address space. Since the pointer
885
24.3k
  // operands are converted, the clone is naturally in the new address space by
886
24.3k
  // construction.
887
24.3k
  ValueToValueMapTy ValueWithNewAddrSpace;
888
24.3k
  SmallVector<const Use *, 32> UndefUsesToFix;
889
24.3k
  for (Value* V : Postorder) {
890
1.93k
    unsigned NewAddrSpace = InferredAddrSpace.lookup(V);
891
1.93k
    if (V->getType()->getPointerAddressSpace() != NewAddrSpace) {
892
522
      ValueWithNewAddrSpace[V] = cloneValueWithNewAddressSpace(
893
522
        V, NewAddrSpace, ValueWithNewAddrSpace, &UndefUsesToFix);
894
522
    }
895
1.93k
  }
896
24.3k
897
24.3k
  if (ValueWithNewAddrSpace.empty())
898
24.0k
    return false;
899
250
900
250
  // Fixes all the undef uses generated by cloneInstructionWithNewAddressSpace.
901
250
  for (const Use *UndefUse : UndefUsesToFix) {
902
76
    User *V = UndefUse->getUser();
903
76
    User *NewV = cast<User>(ValueWithNewAddrSpace.lookup(V));
904
76
    unsigned OperandNo = UndefUse->getOperandNo();
905
76
    assert(isa<UndefValue>(NewV->getOperand(OperandNo)));
906
76
    NewV->setOperand(OperandNo, ValueWithNewAddrSpace.lookup(UndefUse->get()));
907
76
  }
908
250
909
250
  SmallVector<Instruction *, 16> DeadInstructions;
910
250
911
250
  // Replaces the uses of the old address expressions with the new ones.
912
534
  for (const WeakTrackingVH &WVH : Postorder) {
913
534
    assert(WVH && "value was unexpectedly deleted");
914
534
    Value *V = WVH;
915
534
    Value *NewV = ValueWithNewAddrSpace.lookup(V);
916
534
    if (NewV == nullptr)
917
12
      continue;
918
522
919
522
    LLVM_DEBUG(dbgs() << "Replacing the uses of " << *V << "\n  with\n  "
920
522
                      << *NewV << '\n');
921
522
922
522
    if (Constant *C = dyn_cast<Constant>(V)) {
923
96
      Constant *Replace = ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
924
96
                                                         C->getType());
925
96
      if (C != Replace) {
926
19
        LLVM_DEBUG(dbgs() << "Inserting replacement const cast: " << Replace
927
19
                          << ": " << *Replace << '\n');
928
19
        C->replaceAllUsesWith(Replace);
929
19
        V = Replace;
930
19
      }
931
96
    }
932
522
933
522
    Value::use_iterator I, E, Next;
934
1.19k
    for (I = V->use_begin(), E = V->use_end(); I != E; ) {
935
669
      Use &U = *I;
936
669
937
669
      // Some users may see the same pointer operand in multiple operands. Skip
938
669
      // to the next instruction.
939
669
      I = skipToNextUser(I, E);
940
669
941
669
      if (isSimplePointerUseValidToReplace(
942
669
              TTI, U, V->getType()->getPointerAddressSpace())) {
943
316
        // If V is used as the pointer operand of a compatible memory operation,
944
316
        // sets the pointer operand to NewV. This replacement does not change
945
316
        // the element type, so the resultant load/store is still valid.
946
316
        U.set(NewV);
947
316
        continue;
948
316
      }
949
353
950
353
      User *CurUser = U.getUser();
951
353
      // Handle more complex cases like intrinsic that need to be remangled.
952
353
      if (auto *MI = dyn_cast<MemIntrinsic>(CurUser)) {
953
19
        if (!MI->isVolatile() && 
handleMemIntrinsicPtrUse(MI, V, NewV)17
)
954
17
          continue;
955
336
      }
956
336
957
336
      if (auto *II = dyn_cast<IntrinsicInst>(CurUser)) {
958
17
        if (rewriteIntrinsicOperands(II, V, NewV))
959
10
          continue;
960
326
      }
961
326
962
326
      if (isa<Instruction>(CurUser)) {
963
252
        if (ICmpInst *Cmp = dyn_cast<ICmpInst>(CurUser)) {
964
34
          // If we can infer that both pointers are in the same addrspace,
965
34
          // transform e.g.
966
34
          //   %cmp = icmp eq float* %p, %q
967
34
          // into
968
34
          //   %cmp = icmp eq float addrspace(3)* %new_p, %new_q
969
34
970
34
          unsigned NewAS = NewV->getType()->getPointerAddressSpace();
971
34
          int SrcIdx = U.getOperandNo();
972
34
          int OtherIdx = (SrcIdx == 0) ? 
116
:
018
;
973
34
          Value *OtherSrc = Cmp->getOperand(OtherIdx);
974
34
975
34
          if (Value *OtherNewV = ValueWithNewAddrSpace.lookup(OtherSrc)) {
976
17
            if (OtherNewV->getType()->getPointerAddressSpace() == NewAS) {
977
7
              Cmp->setOperand(OtherIdx, OtherNewV);
978
7
              Cmp->setOperand(SrcIdx, NewV);
979
7
              continue;
980
7
            }
981
27
          }
982
27
983
27
          // Even if the type mismatches, we can cast the constant.
984
27
          if (auto *KOtherSrc = dyn_cast<Constant>(OtherSrc)) {
985
12
            if (isSafeToCastConstAddrSpace(KOtherSrc, NewAS)) {
986
6
              Cmp->setOperand(SrcIdx, NewV);
987
6
              Cmp->setOperand(OtherIdx,
988
6
                ConstantExpr::getAddrSpaceCast(KOtherSrc, NewV->getType()));
989
6
              continue;
990
6
            }
991
239
          }
992
27
        }
993
239
994
239
        if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(CurUser)) {
995
9
          unsigned NewAS = NewV->getType()->getPointerAddressSpace();
996
9
          if (ASC->getDestAddressSpace() == NewAS) {
997
9
            if (ASC->getType()->getPointerElementType() !=
998
9
                NewV->getType()->getPointerElementType()) {
999
1
              NewV = CastInst::Create(Instruction::BitCast, NewV,
1000
1
                                      ASC->getType(), "", ASC);
1001
1
            }
1002
9
            ASC->replaceAllUsesWith(NewV);
1003
9
            DeadInstructions.push_back(ASC);
1004
9
            continue;
1005
9
          }
1006
230
        }
1007
230
1008
230
        // Otherwise, replaces the use with flat(NewV).
1009
230
        if (Instruction *Inst = dyn_cast<Instruction>(V)) {
1010
185
          // Don't create a copy of the original addrspacecast.
1011
185
          if (U == V && isa<AddrSpaceCastInst>(V))
1012
137
            continue;
1013
48
1014
48
          BasicBlock::iterator InsertPos = std::next(Inst->getIterator());
1015
48
          while (isa<PHINode>(InsertPos))
1016
0
            ++InsertPos;
1017
48
          U.set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos));
1018
48
        } else {
1019
45
          U.set(ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
1020
45
                                               V->getType()));
1021
45
        }
1022
230
      }
1023
326
    }
1024
522
1025
522
    if (V->use_empty()) {
1026
351
      if (Instruction *I = dyn_cast<Instruction>(V))
1027
302
        DeadInstructions.push_back(I);
1028
351
    }
1029
522
  }
1030
250
1031
250
  for (Instruction *I : DeadInstructions)
1032
311
    RecursivelyDeleteTriviallyDeadInstructions(I);
1033
250
1034
250
  return true;
1035
250
}
1036
1037
3.00k
FunctionPass *llvm::createInferAddressSpacesPass(unsigned AddressSpace) {
1038
3.00k
  return new InferAddressSpaces(AddressSpace);
1039
3.00k
}