Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements an analysis that determines, for a given memory
10
// operation, what preceding memory operations it depends on.  It builds on
11
// alias analysis information, and tries to provide a lazy, caching interface to
12
// a common kind of alias information query.
13
//
14
//===----------------------------------------------------------------------===//
15
16
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
17
#include "llvm/ADT/DenseMap.h"
18
#include "llvm/ADT/STLExtras.h"
19
#include "llvm/ADT/SmallPtrSet.h"
20
#include "llvm/ADT/SmallVector.h"
21
#include "llvm/ADT/Statistic.h"
22
#include "llvm/Analysis/AliasAnalysis.h"
23
#include "llvm/Analysis/AssumptionCache.h"
24
#include "llvm/Analysis/MemoryBuiltins.h"
25
#include "llvm/Analysis/MemoryLocation.h"
26
#include "llvm/Analysis/OrderedBasicBlock.h"
27
#include "llvm/Analysis/PHITransAddr.h"
28
#include "llvm/Analysis/PhiValues.h"
29
#include "llvm/Analysis/TargetLibraryInfo.h"
30
#include "llvm/Analysis/ValueTracking.h"
31
#include "llvm/IR/Attributes.h"
32
#include "llvm/IR/BasicBlock.h"
33
#include "llvm/IR/Constants.h"
34
#include "llvm/IR/DataLayout.h"
35
#include "llvm/IR/DerivedTypes.h"
36
#include "llvm/IR/Dominators.h"
37
#include "llvm/IR/Function.h"
38
#include "llvm/IR/InstrTypes.h"
39
#include "llvm/IR/Instruction.h"
40
#include "llvm/IR/Instructions.h"
41
#include "llvm/IR/IntrinsicInst.h"
42
#include "llvm/IR/LLVMContext.h"
43
#include "llvm/IR/Metadata.h"
44
#include "llvm/IR/Module.h"
45
#include "llvm/IR/PredIteratorCache.h"
46
#include "llvm/IR/Type.h"
47
#include "llvm/IR/Use.h"
48
#include "llvm/IR/User.h"
49
#include "llvm/IR/Value.h"
50
#include "llvm/Pass.h"
51
#include "llvm/Support/AtomicOrdering.h"
52
#include "llvm/Support/Casting.h"
53
#include "llvm/Support/CommandLine.h"
54
#include "llvm/Support/Compiler.h"
55
#include "llvm/Support/Debug.h"
56
#include "llvm/Support/MathExtras.h"
57
#include <algorithm>
58
#include <cassert>
59
#include <cstdint>
60
#include <iterator>
61
#include <utility>
62
63
using namespace llvm;
64
65
#define DEBUG_TYPE "memdep"
66
67
STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
68
STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
69
STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
70
71
STATISTIC(NumCacheNonLocalPtr,
72
          "Number of fully cached non-local ptr responses");
73
STATISTIC(NumCacheDirtyNonLocalPtr,
74
          "Number of cached, but dirty, non-local ptr responses");
75
STATISTIC(NumUncacheNonLocalPtr, "Number of uncached non-local ptr responses");
76
STATISTIC(NumCacheCompleteNonLocalPtr,
77
          "Number of block queries that were completely cached");
78
79
// Limit for the number of instructions to scan in a block.
80
81
static cl::opt<unsigned> BlockScanLimit(
82
    "memdep-block-scan-limit", cl::Hidden, cl::init(100),
83
    cl::desc("The number of instructions to scan in a block in memory "
84
             "dependency analysis (default = 100)"));
85
86
static cl::opt<unsigned>
87
    BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(1000),
88
                     cl::desc("The number of blocks to scan during memory "
89
                              "dependency analysis (default = 1000)"));
90
91
// Limit on the number of memdep results to process.
92
static const unsigned int NumResultsLimit = 100;
93
94
/// This is a helper function that removes Val from 'Inst's set in ReverseMap.
95
///
96
/// If the set becomes empty, remove Inst's entry.
97
template <typename KeyTy>
98
static void
99
RemoveFromReverseMap(DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>> &ReverseMap,
100
161k
                     Instruction *Inst, KeyTy Val) {
101
161k
  typename DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>>::iterator InstIt =
102
161k
      ReverseMap.find(Inst);
103
161k
  assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
104
161k
  bool Found = InstIt->second.erase(Val);
105
161k
  assert(Found && "Invalid reverse map!");
106
161k
  (void)Found;
107
161k
  if (InstIt->second.empty())
108
84.8k
    ReverseMap.erase(InstIt);
109
161k
}
MemoryDependenceAnalysis.cpp:void RemoveFromReverseMap<llvm::Instruction*>(llvm::DenseMap<llvm::Instruction*, llvm::SmallPtrSet<llvm::Instruction*, 4u>, llvm::DenseMapInfo<llvm::Instruction*>, llvm::detail::DenseMapPair<llvm::Instruction*, llvm::SmallPtrSet<llvm::Instruction*, 4u> > >&, llvm::Instruction*, llvm::Instruction*)
Line
Count
Source
100
22.7k
                     Instruction *Inst, KeyTy Val) {
101
22.7k
  typename DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>>::iterator InstIt =
102
22.7k
      ReverseMap.find(Inst);
103
22.7k
  assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
104
22.7k
  bool Found = InstIt->second.erase(Val);
105
22.7k
  assert(Found && "Invalid reverse map!");
106
22.7k
  (void)Found;
107
22.7k
  if (InstIt->second.empty())
108
19.9k
    ReverseMap.erase(InstIt);
109
22.7k
}
MemoryDependenceAnalysis.cpp:void RemoveFromReverseMap<llvm::PointerIntPair<llvm::Value const*, 1u, bool, llvm::PointerLikeTypeTraits<llvm::Value const*>, llvm::PointerIntPairInfo<llvm::Value const*, 1u, llvm::PointerLikeTypeTraits<llvm::Value const*> > > >(llvm::DenseMap<llvm::Instruction*, llvm::SmallPtrSet<llvm::PointerIntPair<llvm::Value const*, 1u, bool, llvm::PointerLikeTypeTraits<llvm::Value const*>, llvm::PointerIntPairInfo<llvm::Value const*, 1u, llvm::PointerLikeTypeTraits<llvm::Value const*> > >, 4u>, llvm::DenseMapInfo<llvm::Instruction*>, llvm::detail::DenseMapPair<llvm::Instruction*, llvm::SmallPtrSet<llvm::PointerIntPair<llvm::Value const*, 1u, bool, llvm::PointerLikeTypeTraits<llvm::Value const*>, llvm::PointerIntPairInfo<llvm::Value const*, 1u, llvm::PointerLikeTypeTraits<llvm::Value const*> > >, 4u> > >&, llvm::Instruction*, llvm::PointerIntPair<llvm::Value const*, 1u, bool, llvm::PointerLikeTypeTraits<llvm::Value const*>, llvm::PointerIntPairInfo<llvm::Value const*, 1u, llvm::PointerLikeTypeTraits<llvm::Value const*> > >)
Line
Count
Source
100
138k
                     Instruction *Inst, KeyTy Val) {
101
138k
  typename DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>>::iterator InstIt =
102
138k
      ReverseMap.find(Inst);
103
138k
  assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
104
138k
  bool Found = InstIt->second.erase(Val);
105
138k
  assert(Found && "Invalid reverse map!");
106
138k
  (void)Found;
107
138k
  if (InstIt->second.empty())
108
64.8k
    ReverseMap.erase(InstIt);
109
138k
}
Unexecuted instantiation: MemoryDependenceAnalysis.cpp:void RemoveFromReverseMap<llvm::Value const*>(llvm::DenseMap<llvm::Instruction*, llvm::SmallPtrSet<llvm::Value const*, 4u>, llvm::DenseMapInfo<llvm::Instruction*>, llvm::detail::DenseMapPair<llvm::Instruction*, llvm::SmallPtrSet<llvm::Value const*, 4u> > >&, llvm::Instruction*, llvm::Value const*)
110
111
/// If the given instruction references a specific memory location, fill in Loc
112
/// with the details, otherwise set Loc.Ptr to null.
113
///
114
/// Returns a ModRefInfo value describing the general behavior of the
115
/// instruction.
116
static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
117
3.33M
                              const TargetLibraryInfo &TLI) {
118
3.33M
  if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
119
1.67M
    if (LI->isUnordered()) {
120
1.67M
      Loc = MemoryLocation::get(LI);
121
1.67M
      return ModRefInfo::Ref;
122
1.67M
    }
123
1
    if (LI->getOrdering() == AtomicOrdering::Monotonic) {
124
0
      Loc = MemoryLocation::get(LI);
125
0
      return ModRefInfo::ModRef;
126
0
    }
127
1
    Loc = MemoryLocation();
128
1
    return ModRefInfo::ModRef;
129
1
  }
130
1.66M
131
1.66M
  if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
132
1.22M
    if (SI->isUnordered()) {
133
1.20M
      Loc = MemoryLocation::get(SI);
134
1.20M
      return ModRefInfo::Mod;
135
1.20M
    }
136
17.0k
    if (SI->getOrdering() == AtomicOrdering::Monotonic) {
137
4
      Loc = MemoryLocation::get(SI);
138
4
      return ModRefInfo::ModRef;
139
4
    }
140
17.0k
    Loc = MemoryLocation();
141
17.0k
    return ModRefInfo::ModRef;
142
17.0k
  }
143
437k
144
437k
  if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
145
0
    Loc = MemoryLocation::get(V);
146
0
    return ModRefInfo::ModRef;
147
0
  }
148
437k
149
437k
  if (const CallInst *CI = isFreeCall(Inst, &TLI)) {
150
258
    // calls to free() deallocate the entire structure
151
258
    Loc = MemoryLocation(CI->getArgOperand(0));
152
258
    return ModRefInfo::Mod;
153
258
  }
154
437k
155
437k
  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
156
187k
    switch (II->getIntrinsicID()) {
157
187k
    case Intrinsic::lifetime_start:
158
83.9k
    case Intrinsic::lifetime_end:
159
83.9k
    case Intrinsic::invariant_start:
160
83.9k
      Loc = MemoryLocation::getForArgument(II, 1, TLI);
161
83.9k
      // These intrinsics don't really modify the memory, but returning Mod
162
83.9k
      // will allow them to be handled conservatively.
163
83.9k
      return ModRefInfo::Mod;
164
83.9k
    case Intrinsic::invariant_end:
165
0
      Loc = MemoryLocation::getForArgument(II, 2, TLI);
166
0
      // These intrinsics don't really modify the memory, but returning Mod
167
0
      // will allow them to be handled conservatively.
168
0
      return ModRefInfo::Mod;
169
103k
    default:
170
103k
      break;
171
353k
    }
172
353k
  }
173
353k
174
353k
  // Otherwise, just do the coarse-grained thing that always works.
175
353k
  if (Inst->mayWriteToMemory())
176
114k
    return ModRefInfo::ModRef;
177
238k
  if (Inst->mayReadFromMemory())
178
2.60k
    return ModRefInfo::Ref;
179
235k
  return ModRefInfo::NoModRef;
180
235k
}
181
182
/// Private helper for finding the local dependencies of a call site.
183
MemDepResult MemoryDependenceResults::getCallDependencyFrom(
184
    CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
185
80.4k
    BasicBlock *BB) {
186
80.4k
  unsigned Limit = BlockScanLimit;
187
80.4k
188
80.4k
  // Walk backwards through the block, looking for dependencies.
189
392k
  while (ScanIt != BB->begin()) {
190
356k
    Instruction *Inst = &*--ScanIt;
191
356k
    // Debug intrinsics don't cause dependences and should not affect Limit
192
356k
    if (isa<DbgInfoIntrinsic>(Inst))
193
2
      continue;
194
356k
195
356k
    // Limit the amount of scanning we do so we don't end up with quadratic
196
356k
    // running time on extreme testcases.
197
356k
    --Limit;
198
356k
    if (!Limit)
199
138
      return MemDepResult::getUnknown();
200
356k
201
356k
    // If this inst is a memory op, get the pointer it accessed
202
356k
    MemoryLocation Loc;
203
356k
    ModRefInfo MR = GetLocation(Inst, Loc, TLI);
204
356k
    if (Loc.Ptr) {
205
81.5k
      // A simple instruction.
206
81.5k
      if (isModOrRefSet(AA.getModRefInfo(Call, Loc)))
207
22.8k
        return MemDepResult::getClobber(Inst);
208
58.6k
      continue;
209
58.6k
    }
210
274k
211
274k
    if (auto *CallB = dyn_cast<CallBase>(Inst)) {
212
39.1k
      // If these two calls do not interfere, look past it.
213
39.1k
      if (isNoModRef(AA.getModRefInfo(Call, CallB))) {
214
18.2k
        // If the two calls are the same, return Inst as a Def, so that
215
18.2k
        // Call can be found redundant and eliminated.
216
18.2k
        if (isReadOnlyCall && 
!isModSet(MR)836
&&
217
18.2k
            
Call->isIdenticalToWhenDefined(CallB)816
)
218
179
          return MemDepResult::getDef(Inst);
219
18.0k
220
18.0k
        // Otherwise if the two calls don't interact (e.g. CallB is readnone)
221
18.0k
        // keep scanning.
222
18.0k
        continue;
223
18.0k
      } else
224
20.9k
        return MemDepResult::getClobber(Inst);
225
235k
    }
226
235k
227
235k
    // If we could not obtain a pointer for the instruction and the instruction
228
235k
    // touches memory then assume that this is a dependency.
229
235k
    if (isModOrRefSet(MR))
230
13
      return MemDepResult::getClobber(Inst);
231
235k
  }
232
80.4k
233
80.4k
  // No dependence found.  If this is the entry block of the function, it is
234
80.4k
  // unknown, otherwise it is non-local.
235
80.4k
  
if (36.3k
BB != &BB->getParent()->getEntryBlock()36.3k
)
236
23.4k
    return MemDepResult::getNonLocal();
237
12.9k
  return MemDepResult::getNonFuncLocal();
238
12.9k
}
239
240
unsigned MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
241
    const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize,
242
3.21k
    const LoadInst *LI) {
243
3.21k
  // We can only extend simple integer loads.
244
3.21k
  if (!isa<IntegerType>(LI->getType()) || !LI->isSimple())
245
3.21k
    return 0;
246
0
247
0
  // Load widening is hostile to ThreadSanitizer: it may cause false positives
248
0
  // or make the reports more cryptic (access sizes are wrong).
249
0
  if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
250
0
    return 0;
251
0
252
0
  const DataLayout &DL = LI->getModule()->getDataLayout();
253
0
254
0
  // Get the base of this load.
255
0
  int64_t LIOffs = 0;
256
0
  const Value *LIBase =
257
0
      GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, DL);
258
0
259
0
  // If the two pointers are not based on the same pointer, we can't tell that
260
0
  // they are related.
261
0
  if (LIBase != MemLocBase)
262
0
    return 0;
263
0
264
0
  // Okay, the two values are based on the same pointer, but returned as
265
0
  // no-alias.  This happens when we have things like two byte loads at "P+1"
266
0
  // and "P+3".  Check to see if increasing the size of the "LI" load up to its
267
0
  // alignment (or the largest native integer type) will allow us to load all
268
0
  // the bits required by MemLoc.
269
0
270
0
  // If MemLoc is before LI, then no widening of LI will help us out.
271
0
  if (MemLocOffs < LIOffs)
272
0
    return 0;
273
0
274
0
  // Get the alignment of the load in bytes.  We assume that it is safe to load
275
0
  // any legal integer up to this size without a problem.  For example, if we're
276
0
  // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
277
0
  // widen it up to an i32 load.  If it is known 2-byte aligned, we can widen it
278
0
  // to i16.
279
0
  unsigned LoadAlign = LI->getAlignment();
280
0
281
0
  int64_t MemLocEnd = MemLocOffs + MemLocSize;
282
0
283
0
  // If no amount of rounding up will let MemLoc fit into LI, then bail out.
284
0
  if (LIOffs + LoadAlign < MemLocEnd)
285
0
    return 0;
286
0
287
0
  // This is the size of the load to try.  Start with the next larger power of
288
0
  // two.
289
0
  unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits() / 8U;
290
0
  NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
291
0
292
0
  while (true) {
293
0
    // If this load size is bigger than our known alignment or would not fit
294
0
    // into a native integer register, then we fail.
295
0
    if (NewLoadByteSize > LoadAlign ||
296
0
        !DL.fitsInLegalInteger(NewLoadByteSize * 8))
297
0
      return 0;
298
0
299
0
    if (LIOffs + NewLoadByteSize > MemLocEnd &&
300
0
        (LI->getParent()->getParent()->hasFnAttribute(
301
0
             Attribute::SanitizeAddress) ||
302
0
         LI->getParent()->getParent()->hasFnAttribute(
303
0
             Attribute::SanitizeHWAddress)))
304
0
      // We will be reading past the location accessed by the original program.
305
0
      // While this is safe in a regular build, Address Safety analysis tools
306
0
      // may start reporting false warnings. So, don't do widening.
307
0
      return 0;
308
0
309
0
    // If a load of this width would include all of MemLoc, then we succeed.
310
0
    if (LIOffs + NewLoadByteSize >= MemLocEnd)
311
0
      return NewLoadByteSize;
312
0
313
0
    NewLoadByteSize <<= 1;
314
0
  }
315
0
}
316
317
2.07M
static bool isVolatile(Instruction *Inst) {
318
2.07M
  if (auto *LI = dyn_cast<LoadInst>(Inst))
319
2.05M
    return LI->isVolatile();
320
15.4k
  if (auto *SI = dyn_cast<StoreInst>(Inst))
321
15.3k
    return SI->isVolatile();
322
106
  if (auto *AI = dyn_cast<AtomicCmpXchgInst>(Inst))
323
0
    return AI->isVolatile();
324
106
  return false;
325
106
}
326
327
MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
328
    const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
329
    BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
330
9.37M
    OrderedBasicBlock *OBB) {
331
9.37M
  MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
332
9.37M
  if (QueryInst != nullptr) {
333
9.23M
    if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
334
8.00M
      InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
335
8.00M
336
8.00M
      if (InvariantGroupDependency.isDef())
337
17
        return InvariantGroupDependency;
338
9.37M
    }
339
9.23M
  }
340
9.37M
  MemDepResult SimpleDep = getSimplePointerDependencyFrom(
341
9.37M
      MemLoc, isLoad, ScanIt, BB, QueryInst, Limit, OBB);
342
9.37M
  if (SimpleDep.isDef())
343
560k
    return SimpleDep;
344
8.81M
  // Non-local invariant group dependency indicates there is non local Def
345
8.81M
  // (it only returns nonLocal if it finds nonLocal def), which is better than
346
8.81M
  // local clobber and everything else.
347
8.81M
  if (InvariantGroupDependency.isNonLocal())
348
13
    return InvariantGroupDependency;
349
8.81M
350
8.81M
  assert(InvariantGroupDependency.isUnknown() &&
351
8.81M
         "InvariantGroupDependency should be only unknown at this point");
352
8.81M
  return SimpleDep;
353
8.81M
}
354
355
MemDepResult
356
MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
357
8.00M
                                                            BasicBlock *BB) {
358
8.00M
359
8.00M
  if (!LI->getMetadata(LLVMContext::MD_invariant_group))
360
8.00M
    return MemDepResult::getUnknown();
361
62
362
62
  // Take the ptr operand after all casts and geps 0. This way we can search
363
62
  // cast graph down only.
364
62
  Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
365
62
366
62
  // It's is not safe to walk the use list of global value, because function
367
62
  // passes aren't allowed to look outside their functions.
368
62
  // FIXME: this could be fixed by filtering instructions from outside
369
62
  // of current function.
370
62
  if (isa<GlobalValue>(LoadOperand))
371
3
    return MemDepResult::getUnknown();
372
59
373
59
  // Queue to process all pointers that are equivalent to load operand.
374
59
  SmallVector<const Value *, 8> LoadOperandsQueue;
375
59
  LoadOperandsQueue.push_back(LoadOperand);
376
59
377
59
  Instruction *ClosestDependency = nullptr;
378
59
  // Order of instructions in uses list is unpredictible. In order to always
379
59
  // get the same result, we will look for the closest dominance.
380
59
  auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) {
381
30
    assert(Other && "Must call it with not null instruction");
382
30
    if (Best == nullptr || 
DT.dominates(Best, Other)0
)
383
30
      return Other;
384
0
    return Best;
385
0
  };
386
59
387
59
  // FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case
388
59
  // we will see all the instructions. This should be fixed in MSSA.
389
180
  while (!LoadOperandsQueue.empty()) {
390
121
    const Value *Ptr = LoadOperandsQueue.pop_back_val();
391
121
    assert(Ptr && !isa<GlobalValue>(Ptr) &&
392
121
           "Null or GlobalValue should not be inserted");
393
121
394
322
    for (const Use &Us : Ptr->uses()) {
395
322
      auto *U = dyn_cast<Instruction>(Us.getUser());
396
322
      if (!U || U == LI || 
!DT.dominates(U, LI)263
)
397
149
        continue;
398
173
399
173
      // Bitcast or gep with zeros are using Ptr. Add to queue to check it's
400
173
      // users.      U = bitcast Ptr
401
173
      if (isa<BitCastInst>(U)) {
402
60
        LoadOperandsQueue.push_back(U);
403
60
        continue;
404
60
      }
405
113
      // Gep with zeros is equivalent to bitcast.
406
113
      // FIXME: we are not sure if some bitcast should be canonicalized to gep 0
407
113
      // or gep 0 to bitcast because of SROA, so there are 2 forms. When
408
113
      // typeless pointers will be ready then both cases will be gone
409
113
      // (and this BFS also won't be needed).
410
113
      if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
411
2
        if (GEP->hasAllZeroIndices()) {
412
2
          LoadOperandsQueue.push_back(U);
413
2
          continue;
414
2
        }
415
111
416
111
      // If we hit load/store with the same invariant.group metadata (and the
417
111
      // same pointer operand) we can assume that value pointed by pointer
418
111
      // operand didn't change.
419
111
      if ((isa<LoadInst>(U) || 
isa<StoreInst>(U)88
) &&
420
111
          
U->getMetadata(LLVMContext::MD_invariant_group) != nullptr44
)
421
30
        ClosestDependency = GetClosestDependency(ClosestDependency, U);
422
111
    }
423
121
  }
424
59
425
59
  if (!ClosestDependency)
426
29
    return MemDepResult::getUnknown();
427
30
  if (ClosestDependency->getParent() == BB)
428
17
    return MemDepResult::getDef(ClosestDependency);
429
13
  // Def(U) can't be returned here because it is non-local. If local
430
13
  // dependency won't be found then return nonLocal counting that the
431
13
  // user will call getNonLocalPointerDependency, which will return cached
432
13
  // result.
433
13
  NonLocalDefsCache.try_emplace(
434
13
      LI, NonLocalDepResult(ClosestDependency->getParent(),
435
13
                            MemDepResult::getDef(ClosestDependency), nullptr));
436
13
  ReverseNonLocalDefsCache[ClosestDependency].insert(LI);
437
13
  return MemDepResult::getNonLocal();
438
13
}
439
440
MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
441
    const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
442
    BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
443
9.37M
    OrderedBasicBlock *OBB) {
444
9.37M
  bool isInvariantLoad = false;
445
9.37M
446
9.37M
  unsigned DefaultLimit = BlockScanLimit;
447
9.37M
  if (!Limit)
448
9.29M
    Limit = &DefaultLimit;
449
9.37M
450
9.37M
  // We must be careful with atomic accesses, as they may allow another thread
451
9.37M
  //   to touch this location, clobbering it. We are conservative: if the
452
9.37M
  //   QueryInst is not a simple (non-atomic) memory access, we automatically
453
9.37M
  //   return getClobber.
454
9.37M
  // If it is simple, we know based on the results of
455
9.37M
  // "Compiler testing via a theory of sound optimisations in the C11/C++11
456
9.37M
  //   memory model" in PLDI 2013, that a non-atomic location can only be
457
9.37M
  //   clobbered between a pair of a release and an acquire action, with no
458
9.37M
  //   access to the location in between.
459
9.37M
  // Here is an example for giving the general intuition behind this rule.
460
9.37M
  // In the following code:
461
9.37M
  //   store x 0;
462
9.37M
  //   release action; [1]
463
9.37M
  //   acquire action; [4]
464
9.37M
  //   %val = load x;
465
9.37M
  // It is unsafe to replace %val by 0 because another thread may be running:
466
9.37M
  //   acquire action; [2]
467
9.37M
  //   store x 42;
468
9.37M
  //   release action; [3]
469
9.37M
  // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
470
9.37M
  // being 42. A key property of this program however is that if either
471
9.37M
  // 1 or 4 were missing, there would be a race between the store of 42
472
9.37M
  // either the store of 0 or the load (making the whole program racy).
473
9.37M
  // The paper mentioned above shows that the same property is respected
474
9.37M
  // by every program that can detect any optimization of that kind: either
475
9.37M
  // it is racy (undefined) or there is a release followed by an acquire
476
9.37M
  // between the pair of accesses under consideration.
477
9.37M
478
9.37M
  // If the load is invariant, we "know" that it doesn't alias *any* write. We
479
9.37M
  // do want to respect mustalias results since defs are useful for value
480
9.37M
  // forwarding, but any mayalias write can be assumed to be noalias.
481
9.37M
  // Arguably, this logic should be pushed inside AliasAnalysis itself.
482
9.37M
  if (isLoad && 
QueryInst8.02M
) {
483
8.00M
    LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
484
8.00M
    if (LI && 
LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr8.00M
)
485
24.4k
      isInvariantLoad = true;
486
8.00M
  }
487
9.37M
488
9.37M
  const DataLayout &DL = BB->getModule()->getDataLayout();
489
9.37M
490
9.37M
  // If the caller did not provide an ordered basic block,
491
9.37M
  // create one to lazily compute and cache instruction
492
9.37M
  // positions inside a BB. This is used to provide fast queries for relative
493
9.37M
  // position between two instructions in a BB and can be used by
494
9.37M
  // AliasAnalysis::callCapturesBefore.
495
9.37M
  OrderedBasicBlock OBBTmp(BB);
496
9.37M
  if (!OBB)
497
8.14M
    OBB = &OBBTmp;
498
9.37M
499
9.37M
  // Return "true" if and only if the instruction I is either a non-simple
500
9.37M
  // load or a non-simple store.
501
9.37M
  auto isNonSimpleLoadOrStore = [](Instruction *I) -> bool {
502
15.7k
    if (auto *LI = dyn_cast<LoadInst>(I))
503
5.63k
      return !LI->isSimple();
504
10.0k
    if (auto *SI = dyn_cast<StoreInst>(I))
505
8.67k
      return !SI->isSimple();
506
1.40k
    return false;
507
1.40k
  };
508
9.37M
509
9.37M
  // Return "true" if I is not a load and not a store, but it does access
510
9.37M
  // memory.
511
9.37M
  auto isOtherMemAccess = [](Instruction *I) -> bool {
512
15.6k
    return !isa<LoadInst>(I) && 
!isa<StoreInst>(I)10.0k
&&
I->mayReadOrWriteMemory()1.40k
;
513
15.6k
  };
514
9.37M
515
9.37M
  // Walk backwards through the basic block, looking for dependencies.
516
63.0M
  while (ScanIt != BB->begin()) {
517
56.5M
    Instruction *Inst = &*--ScanIt;
518
56.5M
519
56.5M
    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
520
788k
      // Debug intrinsics don't (and can't) cause dependencies.
521
788k
      if (isa<DbgInfoIntrinsic>(II))
522
91
        continue;
523
56.5M
524
56.5M
    // Limit the amount of scanning we do so we don't end up with quadratic
525
56.5M
    // running time on extreme testcases.
526
56.5M
    --*Limit;
527
56.5M
    if (!*Limit)
528
83.7k
      return MemDepResult::getUnknown();
529
56.4M
530
56.4M
    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
531
788k
      // If we reach a lifetime begin or end marker, then the query ends here
532
788k
      // because the value is undefined.
533
788k
      if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
534
303k
        // FIXME: This only considers queries directly on the invariant-tagged
535
303k
        // pointer, not on query pointers that are indexed off of them.  It'd
536
303k
        // be nice to handle that at some point (the right approach is to use
537
303k
        // GetPointerBaseWithConstantOffset).
538
303k
        if (AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), MemLoc))
539
31.0k
          return MemDepResult::getDef(II);
540
272k
        continue;
541
272k
      }
542
788k
    }
543
56.1M
544
56.1M
    // Values depend on loads if the pointers are must aliased.  This means
545
56.1M
    // that a load depends on another must aliased load from the same value.
546
56.1M
    // One exception is atomic loads: a value can depend on an atomic load that
547
56.1M
    // it does not alias with when this atomic load indicates that another
548
56.1M
    // thread may be accessing the location.
549
56.1M
    if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
550
7.66M
      // While volatile access cannot be eliminated, they do not have to clobber
551
7.66M
      // non-aliasing locations, as normal accesses, for example, can be safely
552
7.66M
      // reordered with volatile accesses.
553
7.66M
      if (LI->isVolatile()) {
554
24.5k
        if (!QueryInst)
555
18
          // Original QueryInst *may* be volatile
556
18
          return MemDepResult::getClobber(LI);
557
24.5k
        if (isVolatile(QueryInst))
558
1.41k
          // Ordering required if QueryInst is itself volatile
559
1.41k
          return MemDepResult::getClobber(LI);
560
7.66M
        // Otherwise, volatile doesn't imply any special ordering
561
7.66M
      }
562
7.66M
563
7.66M
      // Atomic loads have complications involved.
564
7.66M
      // A Monotonic (or higher) load is OK if the query inst is itself not
565
7.66M
      // atomic.
566
7.66M
      // FIXME: This is overly conservative.
567
7.66M
      if (LI->isAtomic() && 
isStrongerThanUnordered(LI->getOrdering())1.37k
) {
568
1.36k
        if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
569
1.36k
            
isOtherMemAccess(QueryInst)1.36k
)
570
3
          return MemDepResult::getClobber(LI);
571
1.36k
        if (LI->getOrdering() != AtomicOrdering::Monotonic)
572
1.36k
          return MemDepResult::getClobber(LI);
573
7.66M
      }
574
7.66M
575
7.66M
      MemoryLocation LoadLoc = MemoryLocation::get(LI);
576
7.66M
577
7.66M
      // If we found a pointer, check if it could be the same as our pointer.
578
7.66M
      AliasResult R = AA.alias(LoadLoc, MemLoc);
579
7.66M
580
7.66M
      if (isLoad) {
581
6.87M
        if (R == NoAlias)
582
5.17M
          continue;
583
1.70M
584
1.70M
        // Must aliased loads are defs of each other.
585
1.70M
        if (R == MustAlias)
586
128k
          return MemDepResult::getDef(Inst);
587
1.57M
588
#if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads
589
      // in terms of clobbering loads, but since it does this by looking
590
      // at the clobbering load directly, it doesn't know about any
591
      // phi translation that may have happened along the way.
592
593
        // If we have a partial alias, then return this as a clobber for the
594
        // client to handle.
595
        if (R == PartialAlias)
596
          return MemDepResult::getClobber(Inst);
597
#endif
598
599
1.57M
        // Random may-alias loads don't depend on each other without a
600
1.57M
        // dependence.
601
1.57M
        continue;
602
1.57M
      }
603
788k
604
788k
      // Stores don't depend on other no-aliased accesses.
605
788k
      if (R == NoAlias)
606
493k
        continue;
607
295k
608
295k
      // Stores don't alias loads from read-only memory.
609
295k
      if (AA.pointsToConstantMemory(LoadLoc))
610
2.97k
        continue;
611
292k
612
292k
      // Stores depend on may/must aliased loads.
613
292k
      return MemDepResult::getDef(Inst);
614
292k
    }
615
48.4M
616
48.4M
    if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
617
7.88M
      // Atomic stores have complications involved.
618
7.88M
      // A Monotonic store is OK if the query inst is itself not atomic.
619
7.88M
      // FIXME: This is overly conservative.
620
7.88M
      if (!SI->isUnordered() && 
SI->isAtomic()14.3k
) {
621
1.42k
        if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
622
1.42k
            
isOtherMemAccess(QueryInst)1.42k
)
623
1.39k
          return MemDepResult::getClobber(SI);
624
34
        if (SI->getOrdering() != AtomicOrdering::Monotonic)
625
32
          return MemDepResult::getClobber(SI);
626
7.88M
      }
627
7.88M
628
7.88M
      // FIXME: this is overly conservative.
629
7.88M
      // While volatile access cannot be eliminated, they do not have to clobber
630
7.88M
      // non-aliasing locations, as normal accesses can for example be reordered
631
7.88M
      // with volatile accesses.
632
7.88M
      if (SI->isVolatile())
633
12.9k
        if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
634
12.9k
            
isOtherMemAccess(QueryInst)12.8k
)
635
46
          return MemDepResult::getClobber(SI);
636
7.88M
637
7.88M
      // If alias analysis can tell that this store is guaranteed to not modify
638
7.88M
      // the query pointer, ignore it.  Use getModRefInfo to handle cases where
639
7.88M
      // the query pointer points to constant memory etc.
640
7.88M
      if (!isModOrRefSet(AA.getModRefInfo(SI, MemLoc)))
641
7.39M
        continue;
642
490k
643
490k
      // Ok, this store might clobber the query pointer.  Check to see if it is
644
490k
      // a must alias: in this case, we want to return this as a def.
645
490k
      // FIXME: Use ModRefInfo::Must bit from getModRefInfo call above.
646
490k
      MemoryLocation StoreLoc = MemoryLocation::get(SI);
647
490k
648
490k
      // If we found a pointer, check if it could be the same as our pointer.
649
490k
      AliasResult R = AA.alias(StoreLoc, MemLoc);
650
490k
651
490k
      if (R == NoAlias)
652
2
        continue;
653
490k
      if (R == MustAlias)
654
72.7k
        return MemDepResult::getDef(Inst);
655
417k
      if (isInvariantLoad)
656
11
        continue;
657
417k
      return MemDepResult::getClobber(Inst);
658
417k
    }
659
40.6M
660
40.6M
    // If this is an allocation, and if we know that the accessed pointer is to
661
40.6M
    // the allocation, return Def.  This means that there is no dependence and
662
40.6M
    // the access can be optimized based on that.  For example, a load could
663
40.6M
    // turn into undef.  Note that we can bypass the allocation itself when
664
40.6M
    // looking for a clobber in many cases; that's an alias property and is
665
40.6M
    // handled by BasicAA.
666
40.6M
    if (isa<AllocaInst>(Inst) || 
isNoAliasFn(Inst, &TLI)40.3M
) {
667
338k
      const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, DL);
668
338k
      if (AccessPtr == Inst || 
AA.isMustAlias(Inst, AccessPtr)303k
)
669
35.4k
        return MemDepResult::getDef(Inst);
670
40.5M
    }
671
40.5M
672
40.5M
    if (isInvariantLoad)
673
116k
      continue;
674
40.4M
675
40.4M
    // A release fence requires that all stores complete before it, but does
676
40.4M
    // not prevent the reordering of following loads or stores 'before' the
677
40.4M
    // fence.  As a result, we look past it when finding a dependency for
678
40.4M
    // loads.  DSE uses this to find preceding stores to delete and thus we
679
40.4M
    // can't bypass the fence if the query instruction is a store.
680
40.4M
    if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
681
2.65k
      if (isLoad && 
FI->getOrdering() == AtomicOrdering::Release2.61k
)
682
22
        continue;
683
40.4M
684
40.4M
    // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
685
40.4M
    ModRefInfo MR = AA.getModRefInfo(Inst, MemLoc);
686
40.4M
    // If necessary, perform additional analysis.
687
40.4M
    if (isModAndRefSet(MR))
688
1.74M
      MR = AA.callCapturesBefore(Inst, MemLoc, &DT, OBB);
689
40.4M
    switch (clearMust(MR)) {
690
40.4M
    case ModRefInfo::NoModRef:
691
38.6M
      // If the call has no effect on the queried pointer, just ignore it.
692
38.6M
      continue;
693
40.4M
    case ModRefInfo::Mod:
694
32.2k
      return MemDepResult::getClobber(Inst);
695
40.4M
    case ModRefInfo::Ref:
696
62.7k
      // If the call is known to never store to the pointer, and if this is a
697
62.7k
      // load query, we can safely ignore it (scan past it).
698
62.7k
      if (isLoad)
699
54.1k
        continue;
700
8.54k
      LLVM_FALLTHROUGH;
701
1.74M
    default:
702
1.74M
      // Otherwise, there is a potential dependence.  Return a clobber.
703
1.74M
      return MemDepResult::getClobber(Inst);
704
40.4M
    }
705
40.4M
  }
706
9.37M
707
9.37M
  // No dependence found.  If this is the entry block of the function, it is
708
9.37M
  // unknown, otherwise it is non-local.
709
9.37M
  
if (6.52M
BB != &BB->getParent()->getEntryBlock()6.52M
)
710
5.93M
    return MemDepResult::getNonLocal();
711
587k
  return MemDepResult::getNonFuncLocal();
712
587k
}
713
714
MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst,
715
4.54M
                                                    OrderedBasicBlock *OBB) {
716
4.54M
  Instruction *ScanPos = QueryInst;
717
4.54M
718
4.54M
  // Check for a cached result
719
4.54M
  MemDepResult &LocalCache = LocalDeps[QueryInst];
720
4.54M
721
4.54M
  // If the cached entry is non-dirty, just return it.  Note that this depends
722
4.54M
  // on MemDepResult's default constructing to 'dirty'.
723
4.54M
  if (!LocalCache.isDirty())
724
1.22M
    return LocalCache;
725
3.32M
726
3.32M
  // Otherwise, if we have a dirty entry, we know we can start the scan at that
727
3.32M
  // instruction, which may save us some work.
728
3.32M
  if (Instruction *Inst = LocalCache.getInst()) {
729
7.43k
    ScanPos = Inst;
730
7.43k
731
7.43k
    RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
732
7.43k
  }
733
3.32M
734
3.32M
  BasicBlock *QueryParent = QueryInst->getParent();
735
3.32M
736
3.32M
  // Do the scan.
737
3.32M
  if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
738
341k
    // No dependence found. If this is the entry block of the function, it is
739
341k
    // unknown, otherwise it is non-local.
740
341k
    if (QueryParent != &QueryParent->getParent()->getEntryBlock())
741
295k
      LocalCache = MemDepResult::getNonLocal();
742
46.4k
    else
743
46.4k
      LocalCache = MemDepResult::getNonFuncLocal();
744
2.97M
  } else {
745
2.97M
    MemoryLocation MemLoc;
746
2.97M
    ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI);
747
2.97M
    if (MemLoc.Ptr) {
748
2.88M
      // If we can do a pointer scan, make it happen.
749
2.88M
      bool isLoad = !isModSet(MR);
750
2.88M
      if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
751
73.1k
        isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
752
2.88M
753
2.88M
      LocalCache =
754
2.88M
          getPointerDependencyFrom(MemLoc, isLoad, ScanPos->getIterator(),
755
2.88M
                                   QueryParent, QueryInst, nullptr, OBB);
756
2.88M
    } else 
if (auto *95.4k
QueryCall95.4k
= dyn_cast<CallBase>(QueryInst)) {
757
78.3k
      bool isReadOnly = AA.onlyReadsMemory(QueryCall);
758
78.3k
      LocalCache = getCallDependencyFrom(QueryCall, isReadOnly,
759
78.3k
                                         ScanPos->getIterator(), QueryParent);
760
78.3k
    } else
761
17.0k
      // Non-memory instruction.
762
17.0k
      LocalCache = MemDepResult::getUnknown();
763
2.97M
  }
764
3.32M
765
3.32M
  // Remember the result!
766
3.32M
  if (Instruction *I = LocalCache.getInst())
767
1.04M
    ReverseLocalDeps[I].insert(QueryInst);
768
3.32M
769
3.32M
  return LocalCache;
770
3.32M
}
771
772
#ifndef NDEBUG
773
/// This method is used when -debug is specified to verify that cache arrays
774
/// are properly kept sorted.
775
static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache,
776
                         int Count = -1) {
777
  if (Count == -1)
778
    Count = Cache.size();
779
  assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
780
         "Cache isn't sorted!");
781
}
782
#endif
783
784
const MemoryDependenceResults::NonLocalDepInfo &
785
940
MemoryDependenceResults::getNonLocalCallDependency(CallBase *QueryCall) {
786
940
  assert(getDependency(QueryCall).isNonLocal() &&
787
940
         "getNonLocalCallDependency should only be used on calls with "
788
940
         "non-local deps!");
789
940
  PerInstNLInfo &CacheP = NonLocalDeps[QueryCall];
790
940
  NonLocalDepInfo &Cache = CacheP.first;
791
940
792
940
  // This is the set of blocks that need to be recomputed.  In the cached case,
793
940
  // this can happen due to instructions being deleted etc. In the uncached
794
940
  // case, this starts out as the set of predecessors we care about.
795
940
  SmallVector<BasicBlock *, 32> DirtyBlocks;
796
940
797
940
  if (!Cache.empty()) {
798
433
    // Okay, we have a cache entry.  If we know it is not dirty, just return it
799
433
    // with no computation.
800
433
    if (!CacheP.second) {
801
432
      ++NumCacheNonLocal;
802
432
      return Cache;
803
432
    }
804
1
805
1
    // If we already have a partially computed set of results, scan them to
806
1
    // determine what is dirty, seeding our initial DirtyBlocks worklist.
807
1
    for (auto &Entry : Cache)
808
1
      if (Entry.getResult().isDirty())
809
1
        DirtyBlocks.push_back(Entry.getBB());
810
1
811
1
    // Sort the cache so that we can do fast binary search lookups below.
812
1
    llvm::sort(Cache);
813
1
814
1
    ++NumCacheDirtyNonLocal;
815
1
    // cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
816
1
    //     << Cache.size() << " cached: " << *QueryInst;
817
507
  } else {
818
507
    // Seed DirtyBlocks with each of the preds of QueryInst's block.
819
507
    BasicBlock *QueryBB = QueryCall->getParent();
820
507
    for (BasicBlock *Pred : PredCache.get(QueryBB))
821
689
      DirtyBlocks.push_back(Pred);
822
507
    ++NumUncacheNonLocal;
823
507
  }
824
940
825
940
  // isReadonlyCall - If this is a read-only call, we can be more aggressive.
826
940
  bool isReadonlyCall = AA.onlyReadsMemory(QueryCall);
827
508
828
508
  SmallPtrSet<BasicBlock *, 32> Visited;
829
508
830
508
  unsigned NumSortedEntries = Cache.size();
831
508
  LLVM_DEBUG(AssertSorted(Cache));
832
508
833
508
  // Iterate while we still have blocks to update.
834
2.82k
  while (!DirtyBlocks.empty()) {
835
2.31k
    BasicBlock *DirtyBB = DirtyBlocks.back();
836
2.31k
    DirtyBlocks.pop_back();
837
2.31k
838
2.31k
    // Already processed this block?
839
2.31k
    if (!Visited.insert(DirtyBB).second)
840
179
      continue;
841
2.14k
842
2.14k
    // Do a binary search to see if we already have an entry for this block in
843
2.14k
    // the cache set.  If so, find it.
844
2.14k
    LLVM_DEBUG(AssertSorted(Cache, NumSortedEntries));
845
2.14k
    NonLocalDepInfo::iterator Entry =
846
2.14k
        std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries,
847
2.14k
                         NonLocalDepEntry(DirtyBB));
848
2.14k
    if (Entry != Cache.begin() && 
std::prev(Entry)->getBB() == DirtyBB1
)
849
1
      --Entry;
850
2.14k
851
2.14k
    NonLocalDepEntry *ExistingResult = nullptr;
852
2.14k
    if (Entry != Cache.begin() + NumSortedEntries &&
853
2.14k
        
Entry->getBB() == DirtyBB2
) {
854
1
      // If we already have an entry, and if it isn't already dirty, the block
855
1
      // is done.
856
1
      if (!Entry->getResult().isDirty())
857
0
        continue;
858
1
859
1
      // Otherwise, remember this slot so we can update the value.
860
1
      ExistingResult = &*Entry;
861
1
    }
862
2.14k
863
2.14k
    // If the dirty entry has a pointer, start scanning from it so we don't have
864
2.14k
    // to rescan the entire block.
865
2.14k
    BasicBlock::iterator ScanPos = DirtyBB->end();
866
2.14k
    if (ExistingResult) {
867
1
      if (Instruction *Inst = ExistingResult->getResult().getInst()) {
868
1
        ScanPos = Inst->getIterator();
869
1
        // We're removing QueryInst's use of Inst.
870
1
        RemoveFromReverseMap<Instruction *>(ReverseNonLocalDeps, Inst,
871
1
                                            QueryCall);
872
1
      }
873
1
    }
874
2.14k
875
2.14k
    // Find out if this block has a local dependency for QueryInst.
876
2.14k
    MemDepResult Dep;
877
2.14k
878
2.14k
    if (ScanPos != DirtyBB->begin()) {
879
2.13k
      Dep = getCallDependencyFrom(QueryCall, isReadonlyCall, ScanPos, DirtyBB);
880
2.13k
    } else 
if (1
DirtyBB != &DirtyBB->getParent()->getEntryBlock()1
) {
881
1
      // No dependence found.  If this is the entry block of the function, it is
882
1
      // a clobber, otherwise it is unknown.
883
1
      Dep = MemDepResult::getNonLocal();
884
1
    } else {
885
0
      Dep = MemDepResult::getNonFuncLocal();
886
0
    }
887
2.14k
888
2.14k
    // If we had a dirty entry for the block, update it.  Otherwise, just add
889
2.14k
    // a new entry.
890
2.14k
    if (ExistingResult)
891
1
      ExistingResult->setResult(Dep);
892
2.13k
    else
893
2.13k
      Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
894
2.14k
895
2.14k
    // If the block has a dependency (i.e. it isn't completely transparent to
896
2.14k
    // the value), remember the association!
897
2.14k
    if (!Dep.isNonLocal()) {
898
1.05k
      // Keep the ReverseNonLocalDeps map up to date so we can efficiently
899
1.05k
      // update this when we remove instructions.
900
1.05k
      if (Instruction *Inst = Dep.getInst())
901
999
        ReverseNonLocalDeps[Inst].insert(QueryCall);
902
1.08k
    } else {
903
1.08k
904
1.08k
      // If the block *is* completely transparent to the load, we need to check
905
1.08k
      // the predecessors of this block.  Add them to our worklist.
906
1.08k
      for (BasicBlock *Pred : PredCache.get(DirtyBB))
907
1.62k
        DirtyBlocks.push_back(Pred);
908
1.08k
    }
909
2.14k
  }
910
508
911
508
  return Cache;
912
940
}
913
914
void MemoryDependenceResults::getNonLocalPointerDependency(
915
2.04M
    Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) {
916
2.04M
  const MemoryLocation Loc = MemoryLocation::get(QueryInst);
917
2.04M
  bool isLoad = isa<LoadInst>(QueryInst);
918
2.04M
  BasicBlock *FromBB = QueryInst->getParent();
919
2.04M
  assert(FromBB);
920
2.04M
921
2.04M
  assert(Loc.Ptr->getType()->isPointerTy() &&
922
2.04M
         "Can't get pointer deps of a non-pointer!");
923
2.04M
  Result.clear();
924
2.04M
  {
925
2.04M
    // Check if there is cached Def with invariant.group.
926
2.04M
    auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
927
2.04M
    if (NonLocalDefIt != NonLocalDefsCache.end()) {
928
5
      Result.push_back(NonLocalDefIt->second);
929
5
      ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()]
930
5
          .erase(QueryInst);
931
5
      NonLocalDefsCache.erase(NonLocalDefIt);
932
5
      return;
933
5
    }
934
2.04M
  }
935
2.04M
  // This routine does not expect to deal with volatile instructions.
936
2.04M
  // Doing so would require piping through the QueryInst all the way through.
937
2.04M
  // TODO: volatiles can't be elided, but they can be reordered with other
938
2.04M
  // non-volatile accesses.
939
2.04M
940
2.04M
  // We currently give up on any instruction which is ordered, but we do handle
941
2.04M
  // atomic instructions which are unordered.
942
2.04M
  // TODO: Handle ordered instructions
943
2.04M
  auto isOrdered = [](Instruction *Inst) {
944
2.04M
    if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
945
2.04M
      return !LI->isUnordered();
946
2.04M
    } else 
if (StoreInst *0
SI0
= dyn_cast<StoreInst>(Inst)) {
947
0
      return !SI->isUnordered();
948
0
    }
949
0
    return false;
950
0
  };
951
2.04M
  if (isVolatile(QueryInst) || isOrdered(QueryInst)) {
952
0
    Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
953
0
                                       const_cast<Value *>(Loc.Ptr)));
954
0
    return;
955
0
  }
956
2.04M
  const DataLayout &DL = FromBB->getModule()->getDataLayout();
957
2.04M
  PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, &AC);
958
2.04M
959
2.04M
  // This is the set of blocks we've inspected, and the pointer we consider in
960
2.04M
  // each block.  Because of critical edges, we currently bail out if querying
961
2.04M
  // a block with multiple different pointers.  This can happen during PHI
962
2.04M
  // translation.
963
2.04M
  DenseMap<BasicBlock *, Value *> Visited;
964
2.04M
  if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
965
2.04M
                                   Result, Visited, true))
966
2.02M
    return;
967
18.5k
  Result.clear();
968
18.5k
  Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
969
18.5k
                                     const_cast<Value *>(Loc.Ptr)));
970
18.5k
}
971
972
/// Compute the memdep value for BB with Pointer/PointeeSize using either
973
/// cached information in Cache or by doing a lookup (which may use dirty cache
974
/// info if available).
975
///
976
/// If we do a lookup, add the result to the cache.
977
MemDepResult MemoryDependenceResults::GetNonLocalInfoForBlock(
978
    Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
979
14.0M
    BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
980
14.0M
981
14.0M
  // Do a binary search to see if we already have an entry for this block in
982
14.0M
  // the cache set.  If so, find it.
983
14.0M
  NonLocalDepInfo::iterator Entry = std::upper_bound(
984
14.0M
      Cache->begin(), Cache->begin() + NumSortedEntries, NonLocalDepEntry(BB));
985
14.0M
  if (Entry != Cache->begin() && 
(Entry - 1)->getBB() == BB9.25M
)
986
7.73M
    --Entry;
987
14.0M
988
14.0M
  NonLocalDepEntry *ExistingResult = nullptr;
989
14.0M
  if (Entry != Cache->begin() + NumSortedEntries && 
Entry->getBB() == BB9.36M
)
990
7.73M
    ExistingResult = &*Entry;
991
14.0M
992
14.0M
  // If we have a cached entry, and it is non-dirty, use it as the value for
993
14.0M
  // this dependency.
994
14.0M
  if (ExistingResult && 
!ExistingResult->getResult().isDirty()7.73M
) {
995
7.73M
    ++NumCacheNonLocalPtr;
996
7.73M
    return ExistingResult->getResult();
997
7.73M
  }
998
6.31M
999
6.31M
  // Otherwise, we have to scan for the value.  If we have a dirty cache
1000
6.31M
  // entry, start scanning from its position, otherwise we scan from the end
1001
6.31M
  // of the block.
1002
6.31M
  BasicBlock::iterator ScanPos = BB->end();
1003
6.31M
  if (ExistingResult && 
ExistingResult->getResult().getInst()1.28k
) {
1004
1.28k
    assert(ExistingResult->getResult().getInst()->getParent() == BB &&
1005
1.28k
           "Instruction invalidated?");
1006
1.28k
    ++NumCacheDirtyNonLocalPtr;
1007
1.28k
    ScanPos = ExistingResult->getResult().getInst()->getIterator();
1008
1.28k
1009
1.28k
    // Eliminating the dirty entry from 'Cache', so update the reverse info.
1010
1.28k
    ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
1011
1.28k
    RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
1012
6.30M
  } else {
1013
6.30M
    ++NumUncacheNonLocalPtr;
1014
6.30M
  }
1015
6.31M
1016
6.31M
  // Scan the block for the dependency.
1017
6.31M
  MemDepResult Dep =
1018
6.31M
      getPointerDependencyFrom(Loc, isLoad, ScanPos, BB, QueryInst);
1019
6.31M
1020
6.31M
  // If we had a dirty entry for the block, update it.  Otherwise, just add
1021
6.31M
  // a new entry.
1022
6.31M
  if (ExistingResult)
1023
1.28k
    ExistingResult->setResult(Dep);
1024
6.30M
  else
1025
6.30M
    Cache->push_back(NonLocalDepEntry(BB, Dep));
1026
6.31M
1027
6.31M
  // If the block has a dependency (i.e. it isn't completely transparent to
1028
6.31M
  // the value), remember the reverse association because we just added it
1029
6.31M
  // to Cache!
1030
6.31M
  if (!Dep.isDef() && 
!Dep.isClobber()6.12M
)
1031
4.65M
    return Dep;
1032
1.65M
1033
1.65M
  // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
1034
1.65M
  // update MemDep when we remove instructions.
1035
1.65M
  Instruction *Inst = Dep.getInst();
1036
1.65M
  assert(Inst && "Didn't depend on anything?");
1037
1.65M
  ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
1038
1.65M
  ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
1039
1.65M
  return Dep;
1040
1.65M
}
1041
1042
/// Sort the NonLocalDepInfo cache, given a certain number of elements in the
1043
/// array that are already properly ordered.
1044
///
1045
/// This is optimized for the case when only a few entries are added.
1046
static void
1047
SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
1048
3.80M
                         unsigned NumSortedEntries) {
1049
3.80M
  switch (Cache.size() - NumSortedEntries) {
1050
3.80M
  case 0:
1051
2.51M
    // done, no new entries.
1052
2.51M
    break;
1053
3.80M
  case 2: {
1054
163k
    // Two new entries, insert the last one into place.
1055
163k
    NonLocalDepEntry Val = Cache.back();
1056
163k
    Cache.pop_back();
1057
163k
    MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1058
163k
        std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
1059
163k
    Cache.insert(Entry, Val);
1060
163k
    LLVM_FALLTHROUGH;
1061
163k
  }
1062
793k
  case 1:
1063
793k
    // One new entry, Just insert the new value at the appropriate position.
1064
793k
    if (Cache.size() != 1) {
1065
371k
      NonLocalDepEntry Val = Cache.back();
1066
371k
      Cache.pop_back();
1067
371k
      MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1068
371k
          std::upper_bound(Cache.begin(), Cache.end(), Val);
1069
371k
      Cache.insert(Entry, Val);
1070
371k
    }
1071
793k
    break;
1072
497k
  default:
1073
497k
    // Added many values, do a full scale sort.
1074
497k
    llvm::sort(Cache);
1075
497k
    break;
1076
3.80M
  }
1077
3.80M
}
1078
1079
/// Perform a dependency query based on pointer/pointeesize starting at the end
1080
/// of StartBB.
1081
///
1082
/// Add any clobber/def results to the results vector and keep track of which
1083
/// blocks are visited in 'Visited'.
1084
///
1085
/// This has special behavior for the first block queries (when SkipFirstBlock
1086
/// is true).  In this special case, it ignores the contents of the specified
1087
/// block and starts returning dependence info for its predecessors.
1088
///
1089
/// This function returns true on success, or false to indicate that it could
1090
/// not compute dependence information for some reason.  This should be treated
1091
/// as a clobber dependence on the first instruction in the predecessor block.
1092
bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
1093
    Instruction *QueryInst, const PHITransAddr &Pointer,
1094
    const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
1095
    SmallVectorImpl<NonLocalDepResult> &Result,
1096
3.68M
    DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock) {
1097
3.68M
  // Look up the cached info for Pointer.
1098
3.68M
  ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
1099
3.68M
1100
3.68M
  // Set up a temporary NLPI value. If the map doesn't yet have an entry for
1101
3.68M
  // CacheKey, this value will be inserted as the associated value. Otherwise,
1102
3.68M
  // it'll be ignored, and we'll have to check to see if the cached size and
1103
3.68M
  // aa tags are consistent with the current query.
1104
3.68M
  NonLocalPointerInfo InitialNLPI;
1105
3.68M
  InitialNLPI.Size = Loc.Size;
1106
3.68M
  InitialNLPI.AATags = Loc.AATags;
1107
3.68M
1108
3.68M
  // Get the NLPI for CacheKey, inserting one into the map if it doesn't
1109
3.68M
  // already have one.
1110
3.68M
  std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
1111
3.68M
      NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
1112
3.68M
  NonLocalPointerInfo *CacheInfo = &Pair.first->second;
1113
3.68M
1114
3.68M
  // If we already have a cache entry for this CacheKey, we may need to do some
1115
3.68M
  // work to reconcile the cache entry and the current query.
1116
3.68M
  if (!Pair.second) {
1117
2.59M
    if (CacheInfo->Size != Loc.Size) {
1118
0
      bool ThrowOutEverything;
1119
0
      if (CacheInfo->Size.hasValue() && Loc.Size.hasValue()) {
1120
0
        // FIXME: We may be able to do better in the face of results with mixed
1121
0
        // precision. We don't appear to get them in practice, though, so just
1122
0
        // be conservative.
1123
0
        ThrowOutEverything =
1124
0
            CacheInfo->Size.isPrecise() != Loc.Size.isPrecise() ||
1125
0
            CacheInfo->Size.getValue() < Loc.Size.getValue();
1126
0
      } else {
1127
0
        // For our purposes, unknown size > all others.
1128
0
        ThrowOutEverything = !Loc.Size.hasValue();
1129
0
      }
1130
0
1131
0
      if (ThrowOutEverything) {
1132
0
        // The query's Size is greater than the cached one. Throw out the
1133
0
        // cached data and proceed with the query at the greater size.
1134
0
        CacheInfo->Pair = BBSkipFirstBlockPair();
1135
0
        CacheInfo->Size = Loc.Size;
1136
0
        for (auto &Entry : CacheInfo->NonLocalDeps)
1137
0
          if (Instruction *Inst = Entry.getResult().getInst())
1138
0
            RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1139
0
        CacheInfo->NonLocalDeps.clear();
1140
0
      } else {
1141
0
        // This query's Size is less than the cached one. Conservatively restart
1142
0
        // the query using the greater size.
1143
0
        return getNonLocalPointerDepFromBB(
1144
0
            QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
1145
0
            StartBB, Result, Visited, SkipFirstBlock);
1146
0
      }
1147
2.59M
    }
1148
2.59M
1149
2.59M
    // If the query's AATags are inconsistent with the cached one,
1150
2.59M
    // conservatively throw out the cached data and restart the query with
1151
2.59M
    // no tag if needed.
1152
2.59M
    if (CacheInfo->AATags != Loc.AATags) {
1153
13.4k
      if (CacheInfo->AATags) {
1154
2.63k
        CacheInfo->Pair = BBSkipFirstBlockPair();
1155
2.63k
        CacheInfo->AATags = AAMDNodes();
1156
2.63k
        for (auto &Entry : CacheInfo->NonLocalDeps)
1157
38.4k
          if (Instruction *Inst = Entry.getResult().getInst())
1158
12.0k
            RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1159
2.63k
        CacheInfo->NonLocalDeps.clear();
1160
2.63k
      }
1161
13.4k
      if (Loc.AATags)
1162
12.4k
        return getNonLocalPointerDepFromBB(
1163
12.4k
            QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result,
1164
12.4k
            Visited, SkipFirstBlock);
1165
3.66M
    }
1166
2.59M
  }
1167
3.66M
1168
3.66M
  NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
1169
3.66M
1170
3.66M
  // If we have valid cached information for exactly the block we are
1171
3.66M
  // investigating, just return it with no recomputation.
1172
3.66M
  if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
1173
77.3k
    // We have a fully cached result for this query then we can just return the
1174
77.3k
    // cached results and populate the visited set.  However, we have to verify
1175
77.3k
    // that we don't already have conflicting results for these blocks.  Check
1176
77.3k
    // to ensure that if a block in the results set is in the visited set that
1177
77.3k
    // it was for the same pointer query.
1178
77.3k
    if (!Visited.empty()) {
1179
99.6k
      for (auto &Entry : *Cache) {
1180
99.6k
        DenseMap<BasicBlock *, Value *>::iterator VI =
1181
99.6k
            Visited.find(Entry.getBB());
1182
99.6k
        if (VI == Visited.end() || 
VI->second == Pointer.getAddr()22.6k
)
1183
99.5k
          continue;
1184
24
1185
24
        // We have a pointer mismatch in a block.  Just return false, saying
1186
24
        // that something was clobbered in this result.  We could also do a
1187
24
        // non-fully cached query, but there is little point in doing this.
1188
24
        return false;
1189
24
      }
1190
22.6k
    }
1191
77.3k
1192
77.3k
    Value *Addr = Pointer.getAddr();
1193
610k
    for (auto &Entry : *Cache) {
1194
610k
      Visited.insert(std::make_pair(Entry.getBB(), Addr));
1195
610k
      if (Entry.getResult().isNonLocal()) {
1196
444k
        continue;
1197
444k
      }
1198
166k
1199
166k
      if (DT.isReachableFromEntry(Entry.getBB())) {
1200
166k
        Result.push_back(
1201
166k
            NonLocalDepResult(Entry.getBB(), Entry.getResult(), Addr));
1202
166k
      }
1203
166k
    }
1204
77.3k
    ++NumCacheCompleteNonLocalPtr;
1205
77.3k
    return true;
1206
3.59M
  }
1207
3.59M
1208
3.59M
  // Otherwise, either this is a new block, a block with an invalid cache
1209
3.59M
  // pointer or one that we're about to invalidate by putting more info into it
1210
3.59M
  // than its valid cache info.  If empty, the result will be valid cache info,
1211
3.59M
  // otherwise it isn't.
1212
3.59M
  if (Cache->empty())
1213
1.87M
    CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
1214
1.71M
  else
1215
1.71M
    CacheInfo->Pair = BBSkipFirstBlockPair();
1216
3.59M
1217
3.59M
  SmallVector<BasicBlock *, 32> Worklist;
1218
3.59M
  Worklist.push_back(StartBB);
1219
3.59M
1220
3.59M
  // PredList used inside loop.
1221
3.59M
  SmallVector<std::pair<BasicBlock *, PHITransAddr>, 16> PredList;
1222
3.59M
1223
3.59M
  // Keep track of the entries that we know are sorted.  Previously cached
1224
3.59M
  // entries will all be sorted.  The entries we add we only sort on demand (we
1225
3.59M
  // don't insert every element into its sorted position).  We know that we
1226
3.59M
  // won't get any reuse from currently inserted values, because we don't
1227
3.59M
  // revisit blocks after we insert info for them.
1228
3.59M
  unsigned NumSortedEntries = Cache->size();
1229
3.59M
  unsigned WorklistEntries = BlockNumberLimit;
1230
3.59M
  bool GotWorklistLimit = false;
1231
3.59M
  LLVM_DEBUG(AssertSorted(*Cache));
1232
3.59M
1233
19.6M
  while (!Worklist.empty()) {
1234
16.0M
    BasicBlock *BB = Worklist.pop_back_val();
1235
16.0M
1236
16.0M
    // If we do process a large number of blocks it becomes very expensive and
1237
16.0M
    // likely it isn't worth worrying about
1238
16.0M
    if (Result.size() > NumResultsLimit) {
1239
2.50k
      Worklist.clear();
1240
2.50k
      // Sort it now (if needed) so that recursive invocations of
1241
2.50k
      // getNonLocalPointerDepFromBB and other routines that could reuse the
1242
2.50k
      // cache value will only see properly sorted cache arrays.
1243
2.50k
      if (Cache && NumSortedEntries != Cache->size()) {
1244
797
        SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1245
797
      }
1246
2.50k
      // Since we bail out, the "Cache" set won't contain all of the
1247
2.50k
      // results for the query.  This is ok (we can still use it to accelerate
1248
2.50k
      // specific block queries) but we can't do the fastpath "return all
1249
2.50k
      // results from the set".  Clear out the indicator for this.
1250
2.50k
      CacheInfo->Pair = BBSkipFirstBlockPair();
1251
2.50k
      return false;
1252
2.50k
    }
1253
16.0M
1254
16.0M
    // Skip the first block if we have it.
1255
16.0M
    if (!SkipFirstBlock) {
1256
14.0M
      // Analyze the dependency of *Pointer in FromBB.  See if we already have
1257
14.0M
      // been here.
1258
14.0M
      assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
1259
14.0M
1260
14.0M
      // Get the dependency info for Pointer in BB.  If we have cached
1261
14.0M
      // information, we will use it, otherwise we compute it.
1262
14.0M
      LLVM_DEBUG(AssertSorted(*Cache, NumSortedEntries));
1263
14.0M
      MemDepResult Dep = GetNonLocalInfoForBlock(QueryInst, Loc, isLoad, BB,
1264
14.0M
                                                 Cache, NumSortedEntries);
1265
14.0M
1266
14.0M
      // If we got a Def or Clobber, add this to the list of results.
1267
14.0M
      if (!Dep.isNonLocal()) {
1268
4.11M
        if (DT.isReachableFromEntry(BB)) {
1269
4.11M
          Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
1270
4.11M
          continue;
1271
4.11M
        }
1272
11.9M
      }
1273
14.0M
    }
1274
11.9M
1275
11.9M
    // If 'Pointer' is an instruction defined in this block, then we need to do
1276
11.9M
    // phi translation to change it into a value live in the predecessor block.
1277
11.9M
    // If not, we just add the predecessors to the worklist and scan them with
1278
11.9M
    // the same Pointer.
1279
11.9M
    if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
1280
10.0M
      SkipFirstBlock = false;
1281
10.0M
      SmallVector<BasicBlock *, 16> NewBlocks;
1282
15.9M
      for (BasicBlock *Pred : PredCache.get(BB)) {
1283
15.9M
        // Verify that we haven't looked at this block yet.
1284
15.9M
        std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1285
15.9M
            Visited.insert(std::make_pair(Pred, Pointer.getAddr()));
1286
15.9M
        if (InsertRes.second) {
1287
12.5M
          // First time we've looked at *PI.
1288
12.5M
          NewBlocks.push_back(Pred);
1289
12.5M
          continue;
1290
12.5M
        }
1291
3.33M
1292
3.33M
        // If we have seen this block before, but it was with a different
1293
3.33M
        // pointer then we have a phi translation failure and we have to treat
1294
3.33M
        // this as a clobber.
1295
3.33M
        if (InsertRes.first->second != Pointer.getAddr()) {
1296
8.47k
          // Make sure to clean up the Visited map before continuing on to
1297
8.47k
          // PredTranslationFailure.
1298
11.8k
          for (unsigned i = 0; i < NewBlocks.size(); 
i++3.35k
)
1299
3.35k
            Visited.erase(NewBlocks[i]);
1300
8.47k
          goto PredTranslationFailure;
1301
8.47k
        }
1302
3.33M
      }
1303
10.0M
      
if (10.0M
NewBlocks.size() > WorklistEntries10.0M
) {
1304
493
        // Make sure to clean up the Visited map before continuing on to
1305
493
        // PredTranslationFailure.
1306
1.21k
        for (unsigned i = 0; i < NewBlocks.size(); 
i++718
)
1307
718
          Visited.erase(NewBlocks[i]);
1308
493
        GotWorklistLimit = true;
1309
493
        goto PredTranslationFailure;
1310
493
      }
1311
10.0M
      WorklistEntries -= NewBlocks.size();
1312
10.0M
      Worklist.append(NewBlocks.begin(), NewBlocks.end());
1313
10.0M
      continue;
1314
10.0M
    }
1315
1.91M
1316
1.91M
    // We do need to do phi translation, if we know ahead of time we can't phi
1317
1.91M
    // translate this value, don't even try.
1318
1.91M
    if (!Pointer.IsPotentiallyPHITranslatable())
1319
26.5k
      goto PredTranslationFailure;
1320
1.88M
1321
1.88M
    // We may have added values to the cache list before this PHI translation.
1322
1.88M
    // If so, we haven't done anything to ensure that the cache remains sorted.
1323
1.88M
    // Sort it now (if needed) so that recursive invocations of
1324
1.88M
    // getNonLocalPointerDepFromBB and other routines that could reuse the cache
1325
1.88M
    // value will only see properly sorted cache arrays.
1326
1.88M
    if (Cache && NumSortedEntries != Cache->size()) {
1327
232k
      SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1328
232k
      NumSortedEntries = Cache->size();
1329
232k
    }
1330
1.88M
    Cache = nullptr;
1331
1.88M
1332
1.88M
    PredList.clear();
1333
2.96M
    for (BasicBlock *Pred : PredCache.get(BB)) {
1334
2.96M
      PredList.push_back(std::make_pair(Pred, Pointer));
1335
2.96M
1336
2.96M
      // Get the PHI translated pointer in this predecessor.  This can fail if
1337
2.96M
      // not translatable, in which case the getAddr() returns null.
1338
2.96M
      PHITransAddr &PredPointer = PredList.back().second;
1339
2.96M
      PredPointer.PHITranslateValue(BB, Pred, &DT, /*MustDominate=*/false);
1340
2.96M
      Value *PredPtrVal = PredPointer.getAddr();
1341
2.96M
1342
2.96M
      // Check to see if we have already visited this pred block with another
1343
2.96M
      // pointer.  If so, we can't do this lookup.  This failure can occur
1344
2.96M
      // with PHI translation when a critical edge exists and the PHI node in
1345
2.96M
      // the successor translates to a pointer value different than the
1346
2.96M
      // pointer the block was first analyzed with.
1347
2.96M
      std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1348
2.96M
          Visited.insert(std::make_pair(Pred, PredPtrVal));
1349
2.96M
1350
2.96M
      if (!InsertRes.second) {
1351
34.1k
        // We found the pred; take it off the list of preds to visit.
1352
34.1k
        PredList.pop_back();
1353
34.1k
1354
34.1k
        // If the predecessor was visited with PredPtr, then we already did
1355
34.1k
        // the analysis and can ignore it.
1356
34.1k
        if (InsertRes.first->second == PredPtrVal)
1357
25.4k
          continue;
1358
8.75k
1359
8.75k
        // Otherwise, the block was previously analyzed with a different
1360
8.75k
        // pointer.  We can't represent the result of this case, so we just
1361
8.75k
        // treat this as a phi translation failure.
1362
8.75k
1363
8.75k
        // Make sure to clean up the Visited map before continuing on to
1364
8.75k
        // PredTranslationFailure.
1365
9.92k
        
for (unsigned i = 0, n = PredList.size(); 8.75k
i < n;
++i1.17k
)
1366
1.17k
          Visited.erase(PredList[i].first);
1367
8.75k
1368
8.75k
        goto PredTranslationFailure;
1369
8.75k
      }
1370
2.96M
    }
1371
1.88M
1372
1.88M
    // Actually process results here; this need to be a separate loop to avoid
1373
1.88M
    // calling getNonLocalPointerDepFromBB for blocks we don't want to return
1374
1.88M
    // any results for.  (getNonLocalPointerDepFromBB will modify our
1375
1.88M
    // datastructures in ways the code after the PredTranslationFailure label
1376
1.88M
    // doesn't expect.)
1377
4.80M
    
for (unsigned i = 0, n = PredList.size(); 1.87M
i < n;
++i2.93M
) {
1378
2.93M
      BasicBlock *Pred = PredList[i].first;
1379
2.93M
      PHITransAddr &PredPointer = PredList[i].second;
1380
2.93M
      Value *PredPtrVal = PredPointer.getAddr();
1381
2.93M
1382
2.93M
      bool CanTranslate = true;
1383
2.93M
      // If PHI translation was unable to find an available pointer in this
1384
2.93M
      // predecessor, then we have to assume that the pointer is clobbered in
1385
2.93M
      // that predecessor.  We can still do PRE of the load, which would insert
1386
2.93M
      // a computation of the pointer in this predecessor.
1387
2.93M
      if (!PredPtrVal)
1388
1.30M
        CanTranslate = false;
1389
2.93M
1390
2.93M
      // FIXME: it is entirely possible that PHI translating will end up with
1391
2.93M
      // the same value.  Consider PHI translating something like:
1392
2.93M
      // X = phi [x, bb1], [y, bb2].  PHI translating for bb1 doesn't *need*
1393
2.93M
      // to recurse here, pedantically speaking.
1394
2.93M
1395
2.93M
      // If getNonLocalPointerDepFromBB fails here, that means the cached
1396
2.93M
      // result conflicted with the Visited list; we have to conservatively
1397
2.93M
      // assume it is unknown, but this also does not block PRE of the load.
1398
2.93M
      if (!CanTranslate ||
1399
2.93M
          !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
1400
1.62M
                                      Loc.getWithNewPtr(PredPtrVal), isLoad,
1401
1.62M
                                      Pred, Result, Visited)) {
1402
1.30M
        // Add the entry to the Result list.
1403
1.30M
        NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
1404
1.30M
        Result.push_back(Entry);
1405
1.30M
1406
1.30M
        // Since we had a phi translation failure, the cache for CacheKey won't
1407
1.30M
        // include all of the entries that we need to immediately satisfy future
1408
1.30M
        // queries.  Mark this in NonLocalPointerDeps by setting the
1409
1.30M
        // BBSkipFirstBlockPair pointer to null.  This requires reuse of the
1410
1.30M
        // cached value to do more work but not miss the phi trans failure.
1411
1.30M
        NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
1412
1.30M
        NLPI.Pair = BBSkipFirstBlockPair();
1413
1.30M
        continue;
1414
1.30M
      }
1415
2.93M
    }
1416
1.87M
1417
1.87M
    // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
1418
1.87M
    CacheInfo = &NonLocalPointerDeps[CacheKey];
1419
1.87M
    Cache = &CacheInfo->NonLocalDeps;
1420
1.87M
    NumSortedEntries = Cache->size();
1421
1.87M
1422
1.87M
    // Since we did phi translation, the "Cache" set won't contain all of the
1423
1.87M
    // results for the query.  This is ok (we can still use it to accelerate
1424
1.87M
    // specific block queries) but we can't do the fastpath "return all
1425
1.87M
    // results from the set"  Clear out the indicator for this.
1426
1.87M
    CacheInfo->Pair = BBSkipFirstBlockPair();
1427
1.87M
    SkipFirstBlock = false;
1428
1.87M
    continue;
1429
44.2k
1430
44.2k
  PredTranslationFailure:
1431
44.2k
    // The following code is "failure"; we can't produce a sane translation
1432
44.2k
    // for the given block.  It assumes that we haven't modified any of
1433
44.2k
    // our datastructures while processing the current block.
1434
44.2k
1435
44.2k
    if (!Cache) {
1436
8.75k
      // Refresh the CacheInfo/Cache pointer if it got invalidated.
1437
8.75k
      CacheInfo = &NonLocalPointerDeps[CacheKey];
1438
8.75k
      Cache = &CacheInfo->NonLocalDeps;
1439
8.75k
      NumSortedEntries = Cache->size();
1440
8.75k
    }
1441
44.2k
1442
44.2k
    // Since we failed phi translation, the "Cache" set won't contain all of the
1443
44.2k
    // results for the query.  This is ok (we can still use it to accelerate
1444
44.2k
    // specific block queries) but we can't do the fastpath "return all
1445
44.2k
    // results from the set".  Clear out the indicator for this.
1446
44.2k
    CacheInfo->Pair = BBSkipFirstBlockPair();
1447
44.2k
1448
44.2k
    // If *nothing* works, mark the pointer as unknown.
1449
44.2k
    //
1450
44.2k
    // If this is the magic first block, return this as a clobber of the whole
1451
44.2k
    // incoming value.  Since we can't phi translate to one of the predecessors,
1452
44.2k
    // we have to bail out.
1453
44.2k
    if (SkipFirstBlock)
1454
16.9k
      return false;
1455
27.3k
1456
27.3k
    bool foundBlock = false;
1457
56.2k
    for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
1458
56.2k
      if (I.getBB() != BB)
1459
28.9k
        continue;
1460
27.3k
1461
27.3k
      assert((GotWorklistLimit || I.getResult().isNonLocal() ||
1462
27.3k
              !DT.isReachableFromEntry(BB)) &&
1463
27.3k
             "Should only be here with transparent block");
1464
27.3k
      foundBlock = true;
1465
27.3k
      I.setResult(MemDepResult::getUnknown());
1466
27.3k
      Result.push_back(
1467
27.3k
          NonLocalDepResult(I.getBB(), I.getResult(), Pointer.getAddr()));
1468
27.3k
      break;
1469
27.3k
    }
1470
27.3k
    (void)foundBlock; (void)GotWorklistLimit;
1471
27.3k
    assert((foundBlock || GotWorklistLimit) && "Current block not in cache?");
1472
27.3k
  }
1473
3.59M
1474
3.59M
  // Okay, we're done now.  If we added new values to the cache, re-sort it.
1475
3.59M
  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1476
3.57M
  LLVM_DEBUG(AssertSorted(*Cache));
1477
3.57M
  return true;
1478
3.59M
}
1479
1480
/// If P exists in CachedNonLocalPointerInfo or NonLocalDefsCache, remove it.
1481
void MemoryDependenceResults::RemoveCachedNonLocalPointerDependencies(
1482
1.43M
    ValueIsLoadPair P) {
1483
1.43M
1484
1.43M
  // Most of the time this cache is empty.
1485
1.43M
  if (!NonLocalDefsCache.empty()) {
1486
1
    auto it = NonLocalDefsCache.find(P.getPointer());
1487
1
    if (it != NonLocalDefsCache.end()) {
1488
0
      RemoveFromReverseMap(ReverseNonLocalDefsCache,
1489
0
                           it->second.getResult().getInst(), P.getPointer());
1490
0
      NonLocalDefsCache.erase(it);
1491
0
    }
1492
1
1493
1
    if (auto *I = dyn_cast<Instruction>(P.getPointer())) {
1494
1
      auto toRemoveIt = ReverseNonLocalDefsCache.find(I);
1495
1
      if (toRemoveIt != ReverseNonLocalDefsCache.end()) {
1496
1
        for (const auto &entry : toRemoveIt->second)
1497
1
          NonLocalDefsCache.erase(entry);
1498
1
        ReverseNonLocalDefsCache.erase(toRemoveIt);
1499
1
      }
1500
1
    }
1501
1
  }
1502
1.43M
1503
1.43M
  CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P);
1504
1.43M
  if (It == NonLocalPointerDeps.end())
1505
1.38M
    return;
1506
56.2k
1507
56.2k
  // Remove all of the entries in the BB->val map.  This involves removing
1508
56.2k
  // instructions from the reverse map.
1509
56.2k
  NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
1510
56.2k
1511
467k
  for (unsigned i = 0, e = PInfo.size(); i != e; 
++i411k
) {
1512
411k
    Instruction *Target = PInfo[i].getResult().getInst();
1513
411k
    if (!Target)
1514
285k
      continue; // Ignore non-local dep results.
1515
125k
    assert(Target->getParent() == PInfo[i].getBB());
1516
125k
1517
125k
    // Eliminating the dirty entry from 'Cache', so update the reverse info.
1518
125k
    RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
1519
125k
  }
1520
56.2k
1521
56.2k
  // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
1522
56.2k
  NonLocalPointerDeps.erase(It);
1523
56.2k
}
1524
1525
1.38M
void MemoryDependenceResults::invalidateCachedPointerInfo(Value *Ptr) {
1526
1.38M
  // If Ptr isn't really a pointer, just ignore it.
1527
1.38M
  if (!Ptr->getType()->isPointerTy())
1528
827k
    return;
1529
562k
  // Flush store info for the pointer.
1530
562k
  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
1531
562k
  // Flush load info for the pointer.
1532
562k
  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
1533
562k
  // Invalidate phis that use the pointer.
1534
562k
  PV.invalidateValue(Ptr);
1535
562k
}
1536
1537
39.3k
void MemoryDependenceResults::invalidateCachedPredecessors() {
1538
39.3k
  PredCache.clear();
1539
39.3k
}
1540
1541
365k
void MemoryDependenceResults::removeInstruction(Instruction *RemInst) {
1542
365k
  // Walk through the Non-local dependencies, removing this one as the value
1543
365k
  // for any cached queries.
1544
365k
  NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst);
1545
365k
  if (NLDI != NonLocalDeps.end()) {
1546
5
    NonLocalDepInfo &BlockMap = NLDI->second.first;
1547
5
    for (auto &Entry : BlockMap)
1548
7
      if (Instruction *Inst = Entry.getResult().getInst())
1549
5
        RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
1550
5
    NonLocalDeps.erase(NLDI);
1551
5
  }
1552
365k
1553
365k
  // If we have a cached local dependence query for this instruction, remove it.
1554
365k
  LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
1555
365k
  if (LocalDepEntry != LocalDeps.end()) {
1556
82.1k
    // Remove us from DepInst's reverse set now that the local dep info is gone.
1557
82.1k
    if (Instruction *Inst = LocalDepEntry->second.getInst())
1558
15.2k
      RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
1559
82.1k
1560
82.1k
    // Remove this local dependency info.
1561
82.1k
    LocalDeps.erase(LocalDepEntry);
1562
82.1k
  }
1563
365k
1564
365k
  // If we have any cached pointer dependencies on this instruction, remove
1565
365k
  // them.  If the instruction has non-pointer type, then it can't be a pointer
1566
365k
  // base.
1567
365k
1568
365k
  // Remove it from both the load info and the store info.  The instruction
1569
365k
  // can't be in either of these maps if it is non-pointer.
1570
365k
  if (RemInst->getType()->isPointerTy()) {
1571
157k
    RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
1572
157k
    RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
1573
157k
  }
1574
365k
1575
365k
  // Loop over all of the things that depend on the instruction we're removing.
1576
365k
  SmallVector<std::pair<Instruction *, Instruction *>, 8> ReverseDepsToAdd;
1577
365k
1578
365k
  // If we find RemInst as a clobber or Def in any of the maps for other values,
1579
365k
  // we need to replace its entry with a dirty version of the instruction after
1580
365k
  // it.  If RemInst is a terminator, we use a null dirty value.
1581
365k
  //
1582
365k
  // Using a dirty version of the instruction after RemInst saves having to scan
1583
365k
  // the entire block to get to this point.
1584
365k
  MemDepResult NewDirtyVal;
1585
365k
  if (!RemInst->isTerminator())
1586
365k
    NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
1587
365k
1588
365k
  ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
1589
365k
  if (ReverseDepIt != ReverseLocalDeps.end()) {
1590
8.26k
    // RemInst can't be the terminator if it has local stuff depending on it.
1591
8.26k
    assert(!ReverseDepIt->second.empty() && !RemInst->isTerminator() &&
1592
8.26k
           "Nothing can locally depend on a terminator");
1593
8.26k
1594
9.83k
    for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
1595
9.83k
      assert(InstDependingOnRemInst != RemInst &&
1596
9.83k
             "Already removed our local dep info");
1597
9.83k
1598
9.83k
      LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
1599
9.83k
1600
9.83k
      // Make sure to remember that new things depend on NewDepInst.
1601
9.83k
      assert(NewDirtyVal.getInst() &&
1602
9.83k
             "There is no way something else can have "
1603
9.83k
             "a local dep on this if it is a terminator!");
1604
9.83k
      ReverseDepsToAdd.push_back(
1605
9.83k
          std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst));
1606
9.83k
    }
1607
8.26k
1608
8.26k
    ReverseLocalDeps.erase(ReverseDepIt);
1609
8.26k
1610
8.26k
    // Add new reverse deps after scanning the set, to avoid invalidating the
1611
8.26k
    // 'ReverseDeps' reference.
1612
18.1k
    while (!ReverseDepsToAdd.empty()) {
1613
9.83k
      ReverseLocalDeps[ReverseDepsToAdd.back().first].insert(
1614
9.83k
          ReverseDepsToAdd.back().second);
1615
9.83k
      ReverseDepsToAdd.pop_back();
1616
9.83k
    }
1617
8.26k
  }
1618
365k
1619
365k
  ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
1620
365k
  if (ReverseDepIt != ReverseNonLocalDeps.end()) {
1621
1
    for (Instruction *I : ReverseDepIt->second) {
1622
1
      assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
1623
1
1624
1
      PerInstNLInfo &INLD = NonLocalDeps[I];
1625
1
      // The information is now dirty!
1626
1
      INLD.second = true;
1627
1
1628
1
      for (auto &Entry : INLD.first) {
1629
1
        if (Entry.getResult().getInst() != RemInst)
1630
0
          continue;
1631
1
1632
1
        // Convert to a dirty entry for the subsequent instruction.
1633
1
        Entry.setResult(NewDirtyVal);
1634
1
1635
1
        if (Instruction *NextI = NewDirtyVal.getInst())
1636
1
          ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
1637
1
      }
1638
1
    }
1639
1
1640
1
    ReverseNonLocalDeps.erase(ReverseDepIt);
1641
1
1642
1
    // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
1643
2
    while (!ReverseDepsToAdd.empty()) {
1644
1
      ReverseNonLocalDeps[ReverseDepsToAdd.back().first].insert(
1645
1
          ReverseDepsToAdd.back().second);
1646
1
      ReverseDepsToAdd.pop_back();
1647
1
    }
1648
1
  }
1649
365k
1650
365k
  // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
1651
365k
  // value in the NonLocalPointerDeps info.
1652
365k
  ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
1653
365k
      ReverseNonLocalPtrDeps.find(RemInst);
1654
365k
  if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
1655
10.4k
    SmallVector<std::pair<Instruction *, ValueIsLoadPair>, 8>
1656
10.4k
        ReversePtrDepsToAdd;
1657
10.4k
1658
11.1k
    for (ValueIsLoadPair P : ReversePtrDepIt->second) {
1659
11.1k
      assert(P.getPointer() != RemInst &&
1660
11.1k
             "Already removed NonLocalPointerDeps info for RemInst");
1661
11.1k
1662
11.1k
      NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
1663
11.1k
1664
11.1k
      // The cache is not valid for any specific block anymore.
1665
11.1k
      NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
1666
11.1k
1667
11.1k
      // Update any entries for RemInst to use the instruction after it.
1668
139k
      for (auto &Entry : NLPDI) {
1669
139k
        if (Entry.getResult().getInst() != RemInst)
1670
127k
          continue;
1671
11.1k
1672
11.1k
        // Convert to a dirty entry for the subsequent instruction.
1673
11.1k
        Entry.setResult(NewDirtyVal);
1674
11.1k
1675
11.1k
        if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
1676
11.1k
          ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
1677
11.1k
      }
1678
11.1k
1679
11.1k
      // Re-sort the NonLocalDepInfo.  Changing the dirty entry to its
1680
11.1k
      // subsequent value may invalidate the sortedness.
1681
11.1k
      llvm::sort(NLPDI);
1682
11.1k
    }
1683
10.4k
1684
10.4k
    ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
1685
10.4k
1686
21.6k
    while (!ReversePtrDepsToAdd.empty()) {
1687
11.1k
      ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first].insert(
1688
11.1k
          ReversePtrDepsToAdd.back().second);
1689
11.1k
      ReversePtrDepsToAdd.pop_back();
1690
11.1k
    }
1691
10.4k
  }
1692
365k
1693
365k
  // Invalidate phis that use the removed instruction.
1694
365k
  PV.invalidateValue(RemInst);
1695
365k
1696
365k
  assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
1697
365k
  LLVM_DEBUG(verifyRemoved(RemInst));
1698
365k
}
1699
1700
/// Verify that the specified instruction does not occur in our internal data
1701
/// structures.
1702
///
1703
/// This function verifies by asserting in debug builds.
1704
0
void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
1705
#ifndef NDEBUG
1706
  for (const auto &DepKV : LocalDeps) {
1707
    assert(DepKV.first != D && "Inst occurs in data structures");
1708
    assert(DepKV.second.getInst() != D && "Inst occurs in data structures");
1709
  }
1710
1711
  for (const auto &DepKV : NonLocalPointerDeps) {
1712
    assert(DepKV.first.getPointer() != D && "Inst occurs in NLPD map key");
1713
    for (const auto &Entry : DepKV.second.NonLocalDeps)
1714
      assert(Entry.getResult().getInst() != D && "Inst occurs as NLPD value");
1715
  }
1716
1717
  for (const auto &DepKV : NonLocalDeps) {
1718
    assert(DepKV.first != D && "Inst occurs in data structures");
1719
    const PerInstNLInfo &INLD = DepKV.second;
1720
    for (const auto &Entry : INLD.first)
1721
      assert(Entry.getResult().getInst() != D &&
1722
             "Inst occurs in data structures");
1723
  }
1724
1725
  for (const auto &DepKV : ReverseLocalDeps) {
1726
    assert(DepKV.first != D && "Inst occurs in data structures");
1727
    for (Instruction *Inst : DepKV.second)
1728
      assert(Inst != D && "Inst occurs in data structures");
1729
  }
1730
1731
  for (const auto &DepKV : ReverseNonLocalDeps) {
1732
    assert(DepKV.first != D && "Inst occurs in data structures");
1733
    for (Instruction *Inst : DepKV.second)
1734
      assert(Inst != D && "Inst occurs in data structures");
1735
  }
1736
1737
  for (const auto &DepKV : ReverseNonLocalPtrDeps) {
1738
    assert(DepKV.first != D && "Inst occurs in rev NLPD map");
1739
1740
    for (ValueIsLoadPair P : DepKV.second)
1741
      assert(P != ValueIsLoadPair(D, false) && P != ValueIsLoadPair(D, true) &&
1742
             "Inst occurs in ReverseNonLocalPtrDeps map");
1743
  }
1744
#endif
1745
}
1746
1747
AnalysisKey MemoryDependenceAnalysis::Key;
1748
1749
MemoryDependenceResults
1750
1.39k
MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
1751
1.39k
  auto &AA = AM.getResult<AAManager>(F);
1752
1.39k
  auto &AC = AM.getResult<AssumptionAnalysis>(F);
1753
1.39k
  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1754
1.39k
  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1755
1.39k
  auto &PV = AM.getResult<PhiValuesAnalysis>(F);
1756
1.39k
  return MemoryDependenceResults(AA, AC, TLI, DT, PV);
1757
1.39k
}
1758
1759
char MemoryDependenceWrapperPass::ID = 0;
1760
1761
101k
INITIALIZE_PASS_BEGIN(MemoryDependenceWrapperPass, "memdep",
1762
101k
                      "Memory Dependence Analysis", false, true)
1763
101k
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
1764
101k
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
1765
101k
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1766
101k
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
1767
101k
INITIALIZE_PASS_DEPENDENCY(PhiValuesWrapperPass)
1768
101k
INITIALIZE_PASS_END(MemoryDependenceWrapperPass, "memdep",
1769
                    "Memory Dependence Analysis", false, true)
1770
1771
42.4k
MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) {
1772
42.4k
  initializeMemoryDependenceWrapperPassPass(*PassRegistry::getPassRegistry());
1773
42.4k
}
1774
1775
42.4k
MemoryDependenceWrapperPass::~MemoryDependenceWrapperPass() = default;
1776
1777
1.42M
void MemoryDependenceWrapperPass::releaseMemory() {
1778
1.42M
  MemDep.reset();
1779
1.42M
}
1780
1781
42.4k
void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
1782
42.4k
  AU.setPreservesAll();
1783
42.4k
  AU.addRequired<AssumptionCacheTracker>();
1784
42.4k
  AU.addRequired<DominatorTreeWrapperPass>();
1785
42.4k
  AU.addRequired<PhiValuesWrapperPass>();
1786
42.4k
  AU.addRequiredTransitive<AAResultsWrapperPass>();
1787
42.4k
  AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
1788
42.4k
}
1789
1790
bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &PA,
1791
1.07k
                               FunctionAnalysisManager::Invalidator &Inv) {
1792
1.07k
  // Check whether our analysis is preserved.
1793
1.07k
  auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
1794
1.07k
  if (!PAC.preserved() && 
!PAC.preservedSet<AllAnalysesOn<Function>>()968
)
1795
968
    // If not, give up now.
1796
968
    return true;
1797
104
1798
104
  // Check whether the analyses we depend on became invalid for any reason.
1799
104
  if (Inv.invalidate<AAManager>(F, PA) ||
1800
104
      
Inv.invalidate<AssumptionAnalysis>(F, PA)101
||
1801
104
      
Inv.invalidate<DominatorTreeAnalysis>(F, PA)101
||
1802
104
      
Inv.invalidate<PhiValuesAnalysis>(F, PA)97
)
1803
104
    return true;
1804
0
1805
0
  // Otherwise this analysis result remains valid.
1806
0
  return false;
1807
0
}
1808
1809
616k
unsigned MemoryDependenceResults::getDefaultBlockScanLimit() const {
1810
616k
  return BlockScanLimit;
1811
616k
}
1812
1813
1.42M
bool MemoryDependenceWrapperPass::runOnFunction(Function &F) {
1814
1.42M
  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1815
1.42M
  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1816
1.42M
  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1817
1.42M
  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1818
1.42M
  auto &PV = getAnalysis<PhiValuesWrapperPass>().getResult();
1819
1.42M
  MemDep.emplace(AA, AC, TLI, DT, PV);
1820
1.42M
  return false;
1821
1.42M
}