Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Analysis/MemorySSA.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- MemorySSA.cpp - Memory SSA Builder ---------------------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This file implements the MemorySSA class.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "llvm/Analysis/MemorySSA.h"
15
#include "llvm/ADT/DenseMap.h"
16
#include "llvm/ADT/DenseMapInfo.h"
17
#include "llvm/ADT/DenseSet.h"
18
#include "llvm/ADT/DepthFirstIterator.h"
19
#include "llvm/ADT/Hashing.h"
20
#include "llvm/ADT/None.h"
21
#include "llvm/ADT/Optional.h"
22
#include "llvm/ADT/STLExtras.h"
23
#include "llvm/ADT/SmallPtrSet.h"
24
#include "llvm/ADT/SmallVector.h"
25
#include "llvm/ADT/iterator.h"
26
#include "llvm/ADT/iterator_range.h"
27
#include "llvm/Analysis/AliasAnalysis.h"
28
#include "llvm/Analysis/IteratedDominanceFrontier.h"
29
#include "llvm/Analysis/MemoryLocation.h"
30
#include "llvm/IR/AssemblyAnnotationWriter.h"
31
#include "llvm/IR/BasicBlock.h"
32
#include "llvm/IR/CallSite.h"
33
#include "llvm/IR/Dominators.h"
34
#include "llvm/IR/Function.h"
35
#include "llvm/IR/Instruction.h"
36
#include "llvm/IR/Instructions.h"
37
#include "llvm/IR/IntrinsicInst.h"
38
#include "llvm/IR/Intrinsics.h"
39
#include "llvm/IR/LLVMContext.h"
40
#include "llvm/IR/PassManager.h"
41
#include "llvm/IR/Use.h"
42
#include "llvm/Pass.h"
43
#include "llvm/Support/AtomicOrdering.h"
44
#include "llvm/Support/Casting.h"
45
#include "llvm/Support/CommandLine.h"
46
#include "llvm/Support/Compiler.h"
47
#include "llvm/Support/Debug.h"
48
#include "llvm/Support/ErrorHandling.h"
49
#include "llvm/Support/FormattedStream.h"
50
#include "llvm/Support/raw_ostream.h"
51
#include <algorithm>
52
#include <cassert>
53
#include <iterator>
54
#include <memory>
55
#include <utility>
56
57
using namespace llvm;
58
59
#define DEBUG_TYPE "memoryssa"
60
61
41.6k
INITIALIZE_PASS_BEGIN41.6k
(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false,
62
41.6k
                      true)
63
41.6k
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
64
41.6k
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
65
41.6k
INITIALIZE_PASS_END(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false,
66
                    true)
67
68
7.91k
INITIALIZE_PASS_BEGIN7.91k
(MemorySSAPrinterLegacyPass, "print-memoryssa",
69
7.91k
                      "Memory SSA Printer", false, false)
70
7.91k
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
71
7.91k
INITIALIZE_PASS_END(MemorySSAPrinterLegacyPass, "print-memoryssa",
72
                    "Memory SSA Printer", false, false)
73
74
static cl::opt<unsigned> MaxCheckLimit(
75
    "memssa-check-limit", cl::Hidden, cl::init(100),
76
    cl::desc("The maximum number of stores/phis MemorySSA"
77
             "will consider trying to walk past (default = 100)"));
78
79
static cl::opt<bool>
80
    VerifyMemorySSA("verify-memoryssa", cl::init(false), cl::Hidden,
81
                    cl::desc("Verify MemorySSA in legacy printer pass."));
82
83
namespace llvm {
84
85
/// \brief An assembly annotator class to print Memory SSA information in
86
/// comments.
87
class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter {
88
  friend class MemorySSA;
89
90
  const MemorySSA *MSSA;
91
92
public:
93
82
  MemorySSAAnnotatedWriter(const MemorySSA *M) : MSSA(M) {}
94
95
  void emitBasicBlockStartAnnot(const BasicBlock *BB,
96
228
                                formatted_raw_ostream &OS) override {
97
228
    if (MemoryAccess *MA = MSSA->getMemoryAccess(BB))
98
69
      OS << "; " << *MA << "\n";
99
228
  }
100
101
  void emitInstructionAnnot(const Instruction *I,
102
724
                            formatted_raw_ostream &OS) override {
103
724
    if (MemoryAccess *MA = MSSA->getMemoryAccess(I))
104
370
      OS << "; " << *MA << "\n";
105
724
  }
106
};
107
108
} // end namespace llvm
109
110
namespace {
111
112
/// Our current alias analysis API differentiates heavily between calls and
113
/// non-calls, and functions called on one usually assert on the other.
114
/// This class encapsulates the distinction to simplify other code that wants
115
/// "Memory affecting instructions and related data" to use as a key.
116
/// For example, this class is used as a densemap key in the use optimizer.
117
class MemoryLocOrCall {
118
public:
119
  bool IsCall = false;
120
121
  MemoryLocOrCall() = default;
122
  MemoryLocOrCall(MemoryUseOrDef *MUD)
123
2.23M
      : MemoryLocOrCall(MUD->getMemoryInst()) {}
124
  MemoryLocOrCall(const MemoryUseOrDef *MUD)
125
1
      : MemoryLocOrCall(MUD->getMemoryInst()) {}
126
127
2.23M
  MemoryLocOrCall(Instruction *Inst) {
128
2.23M
    if (
ImmutableCallSite(Inst)2.23M
) {
129
59.0k
      IsCall = true;
130
59.0k
      CS = ImmutableCallSite(Inst);
131
2.23M
    } else {
132
2.17M
      IsCall = false;
133
2.17M
      // There is no such thing as a memorylocation for a fence inst, and it is
134
2.17M
      // unique in that regard.
135
2.17M
      if (!isa<FenceInst>(Inst))
136
2.17M
        Loc = MemoryLocation::get(Inst);
137
2.17M
    }
138
2.23M
  }
139
140
7.83M
  explicit MemoryLocOrCall(const MemoryLocation &Loc) : Loc(Loc) {}
141
142
60.0k
  ImmutableCallSite getCS() const {
143
60.0k
    assert(IsCall);
144
60.0k
    return CS;
145
60.0k
  }
146
147
5.55M
  MemoryLocation getLoc() const {
148
5.55M
    assert(!IsCall);
149
5.55M
    return Loc;
150
5.55M
  }
151
152
30.4M
  bool operator==(const MemoryLocOrCall &Other) const {
153
30.4M
    if (IsCall != Other.IsCall)
154
98.9k
      return false;
155
30.3M
156
30.3M
    
if (30.3M
IsCall30.3M
)
157
30.9k
      return CS.getCalledValue() == Other.CS.getCalledValue();
158
30.2M
    return Loc == Other.Loc;
159
30.2M
  }
160
161
private:
162
  union {
163
    ImmutableCallSite CS;
164
    MemoryLocation Loc;
165
  };
166
};
167
168
} // end anonymous namespace
169
170
namespace llvm {
171
172
template <> struct DenseMapInfo<MemoryLocOrCall> {
173
4.98M
  static inline MemoryLocOrCall getEmptyKey() {
174
4.98M
    return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getEmptyKey());
175
4.98M
  }
176
177
2.85M
  static inline MemoryLocOrCall getTombstoneKey() {
178
2.85M
    return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getTombstoneKey());
179
2.85M
  }
180
181
2.56M
  static unsigned getHashValue(const MemoryLocOrCall &MLOC) {
182
2.56M
    if (MLOC.IsCall)
183
60.0k
      return hash_combine(MLOC.IsCall,
184
60.0k
                          DenseMapInfo<const Value *>::getHashValue(
185
60.0k
                              MLOC.getCS().getCalledValue()));
186
2.50M
    return hash_combine(
187
2.50M
        MLOC.IsCall, DenseMapInfo<MemoryLocation>::getHashValue(MLOC.getLoc()));
188
2.50M
  }
189
190
30.4M
  static bool isEqual(const MemoryLocOrCall &LHS, const MemoryLocOrCall &RHS) {
191
30.4M
    return LHS == RHS;
192
30.4M
  }
193
};
194
195
enum class Reorderability { Always, IfNoAlias, Never };
196
197
} // end namespace llvm
198
199
/// This does one-way checks to see if Use could theoretically be hoisted above
200
/// MayClobber. This will not check the other way around.
201
///
202
/// This assumes that, for the purposes of MemorySSA, Use comes directly after
203
/// MayClobber, with no potentially clobbering operations in between them.
204
/// (Where potentially clobbering ops are memory barriers, aliased stores, etc.)
205
static Reorderability getLoadReorderability(const LoadInst *Use,
206
2.53k
                                            const LoadInst *MayClobber) {
207
2.53k
  bool VolatileUse = Use->isVolatile();
208
2.53k
  bool VolatileClobber = MayClobber->isVolatile();
209
2.53k
  // Volatile operations may never be reordered with other volatile operations.
210
2.53k
  if (
VolatileUse && 2.53k
VolatileClobber0
)
211
0
    return Reorderability::Never;
212
2.53k
213
2.53k
  // The lang ref allows reordering of volatile and non-volatile operations.
214
2.53k
  // Whether an aliasing nonvolatile load and volatile load can be reordered,
215
2.53k
  // though, is ambiguous. Because it may not be best to exploit this ambiguity,
216
2.53k
  // we only allow volatile/non-volatile reordering if the volatile and
217
2.53k
  // non-volatile operations don't alias.
218
2.53k
  
Reorderability Result = VolatileUse || 2.53k
VolatileClobber2.53k
219
2.52k
                              ? Reorderability::IfNoAlias
220
10
                              : Reorderability::Always;
221
2.53k
222
2.53k
  // If a load is seq_cst, it cannot be moved above other loads. If its ordering
223
2.53k
  // is weaker, it can be moved above other loads. We just need to be sure that
224
2.53k
  // MayClobber isn't an acquire load, because loads can't be moved above
225
2.53k
  // acquire loads.
226
2.53k
  //
227
2.53k
  // Note that this explicitly *does* allow the free reordering of monotonic (or
228
2.53k
  // weaker) loads of the same address.
229
2.53k
  bool SeqCstUse = Use->getOrdering() == AtomicOrdering::SequentiallyConsistent;
230
2.53k
  bool MayClobberIsAcquire = isAtLeastOrStrongerThan(MayClobber->getOrdering(),
231
2.53k
                                                     AtomicOrdering::Acquire);
232
2.53k
  if (
SeqCstUse || 2.53k
MayClobberIsAcquire2.53k
)
233
12
    return Reorderability::Never;
234
2.52k
  return Result;
235
2.52k
}
236
237
static bool instructionClobbersQuery(MemoryDef *MD,
238
                                     const MemoryLocation &UseLoc,
239
                                     const Instruction *UseInst,
240
4.41M
                                     AliasAnalysis &AA) {
241
4.41M
  Instruction *DefInst = MD->getMemoryInst();
242
4.41M
  assert(DefInst && "Defining instruction not actually an instruction");
243
4.41M
  ImmutableCallSite UseCS(UseInst);
244
4.41M
245
4.41M
  if (const IntrinsicInst *
II4.41M
= dyn_cast<IntrinsicInst>(DefInst)) {
246
187k
    // These intrinsics will show up as affecting memory, but they are just
247
187k
    // markers.
248
187k
    switch (II->getIntrinsicID()) {
249
94.1k
    case Intrinsic::lifetime_start:
250
94.1k
      if (UseCS)
251
404
        return false;
252
93.7k
      return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), UseLoc);
253
47.2k
    case Intrinsic::lifetime_end:
254
47.2k
    case Intrinsic::invariant_start:
255
47.2k
    case Intrinsic::invariant_end:
256
47.2k
    case Intrinsic::assume:
257
47.2k
      return false;
258
46.4k
    default:
259
46.4k
      break;
260
4.27M
    }
261
4.27M
  }
262
4.27M
263
4.27M
  
if (4.27M
UseCS4.27M
) {
264
21.1k
    ModRefInfo I = AA.getModRefInfo(DefInst, UseCS);
265
21.1k
    return I != MRI_NoModRef;
266
21.1k
  }
267
4.25M
268
4.25M
  
if (auto *4.25M
DefLoad4.25M
= dyn_cast<LoadInst>(DefInst)) {
269
2.53k
    if (auto *
UseLoad2.53k
= dyn_cast<LoadInst>(UseInst)) {
270
2.53k
      switch (getLoadReorderability(UseLoad, DefLoad)) {
271
0
      case Reorderability::Always:
272
0
        return false;
273
12
      case Reorderability::Never:
274
12
        return true;
275
2.52k
      case Reorderability::IfNoAlias:
276
2.52k
        return !AA.isNoAlias(UseLoc, MemoryLocation::get(DefLoad));
277
4.25M
      }
278
4.25M
    }
279
2.53k
  }
280
4.25M
281
4.25M
  return AA.getModRefInfo(DefInst, UseLoc) & MRI_Mod;
282
4.25M
}
283
284
static bool instructionClobbersQuery(MemoryDef *MD, const MemoryUseOrDef *MU,
285
                                     const MemoryLocOrCall &UseMLOC,
286
1.54M
                                     AliasAnalysis &AA) {
287
1.54M
  // FIXME: This is a temporary hack to allow a single instructionClobbersQuery
288
1.54M
  // to exist while MemoryLocOrCall is pushed through places.
289
1.54M
  if (UseMLOC.IsCall)
290
16.9k
    return instructionClobbersQuery(MD, MemoryLocation(), MU->getMemoryInst(),
291
16.9k
                                    AA);
292
1.52M
  return instructionClobbersQuery(MD, UseMLOC.getLoc(), MU->getMemoryInst(),
293
1.52M
                                  AA);
294
1.52M
}
295
296
// Return true when MD may alias MU, return false otherwise.
297
bool MemorySSAUtil::defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU,
298
1
                                        AliasAnalysis &AA) {
299
1
  return instructionClobbersQuery(MD, MU, MemoryLocOrCall(MU), AA);
300
1
}
301
302
namespace {
303
304
struct UpwardsMemoryQuery {
305
  // True if our original query started off as a call
306
  bool IsCall = false;
307
  // The pointer location we started the query with. This will be empty if
308
  // IsCall is true.
309
  MemoryLocation StartingLoc;
310
  // This is the instruction we were querying about.
311
  const Instruction *Inst = nullptr;
312
  // The MemoryAccess we actually got called with, used to test local domination
313
  const MemoryAccess *OriginalAccess = nullptr;
314
315
2
  UpwardsMemoryQuery() = default;
316
317
  UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access)
318
910k
      : IsCall(ImmutableCallSite(Inst)), Inst(Inst), OriginalAccess(Access) {
319
910k
    if (!IsCall)
320
907k
      StartingLoc = MemoryLocation::get(Inst);
321
910k
  }
322
};
323
324
} // end anonymous namespace
325
326
static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc,
327
1.52M
                           AliasAnalysis &AA) {
328
1.52M
  Instruction *Inst = MD->getMemoryInst();
329
1.52M
  if (IntrinsicInst *
II1.52M
= dyn_cast<IntrinsicInst>(Inst)) {
330
82.0k
    switch (II->getIntrinsicID()) {
331
10.7k
    case Intrinsic::lifetime_end:
332
10.7k
      return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), Loc);
333
71.3k
    default:
334
71.3k
      return false;
335
1.44M
    }
336
1.44M
  }
337
1.44M
  return false;
338
1.44M
}
339
340
static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA,
341
3.16M
                                                   const Instruction *I) {
342
3.16M
  // If the memory can't be changed, then loads of the memory can't be
343
3.16M
  // clobbered.
344
3.16M
  //
345
3.16M
  // FIXME: We should handle invariant groups, as well. It's a bit harder,
346
3.16M
  // because we need to pay close attention to invariant group barriers.
347
3.09M
  return isa<LoadInst>(I) && (I->getMetadata(LLVMContext::MD_invariant_load) ||
348
3.09M
                              AA.pointsToConstantMemory(cast<LoadInst>(I)->
349
3.09M
                                                          getPointerOperand()));
350
3.16M
}
351
352
/// Verifies that `Start` is clobbered by `ClobberAt`, and that nothing
353
/// inbetween `Start` and `ClobberAt` can clobbers `Start`.
354
///
355
/// This is meant to be as simple and self-contained as possible. Because it
356
/// uses no cache, etc., it can be relatively expensive.
357
///
358
/// \param Start     The MemoryAccess that we want to walk from.
359
/// \param ClobberAt A clobber for Start.
360
/// \param StartLoc  The MemoryLocation for Start.
361
/// \param MSSA      The MemorySSA isntance that Start and ClobberAt belong to.
362
/// \param Query     The UpwardsMemoryQuery we used for our search.
363
/// \param AA        The AliasAnalysis we used for our search.
364
static void LLVM_ATTRIBUTE_UNUSED
365
checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt,
366
                   const MemoryLocation &StartLoc, const MemorySSA &MSSA,
367
0
                   const UpwardsMemoryQuery &Query, AliasAnalysis &AA) {
368
0
  assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?");
369
0
370
0
  if (MSSA.isLiveOnEntryDef(Start)) {
371
0
    assert(MSSA.isLiveOnEntryDef(ClobberAt) &&
372
0
           "liveOnEntry must clobber itself");
373
0
    return;
374
0
  }
375
0
376
0
  bool FoundClobber = false;
377
0
  DenseSet<MemoryAccessPair> VisitedPhis;
378
0
  SmallVector<MemoryAccessPair, 8> Worklist;
379
0
  Worklist.emplace_back(Start, StartLoc);
380
0
  // Walk all paths from Start to ClobberAt, while looking for clobbers. If one
381
0
  // is found, complain.
382
0
  while (!Worklist.empty()) {
383
0
    MemoryAccessPair MAP = Worklist.pop_back_val();
384
0
    // All we care about is that nothing from Start to ClobberAt clobbers Start.
385
0
    // We learn nothing from revisiting nodes.
386
0
    if (!VisitedPhis.insert(MAP).second)
387
0
      continue;
388
0
389
0
    for (MemoryAccess *MA : def_chain(MAP.first)) {
390
0
      if (MA == ClobberAt) {
391
0
        if (auto *MD = dyn_cast<MemoryDef>(MA)) {
392
0
          // instructionClobbersQuery isn't essentially free, so don't use `|=`,
393
0
          // since it won't let us short-circuit.
394
0
          //
395
0
          // Also, note that this can't be hoisted out of the `Worklist` loop,
396
0
          // since MD may only act as a clobber for 1 of N MemoryLocations.
397
0
          FoundClobber =
398
0
              FoundClobber || MSSA.isLiveOnEntryDef(MD) ||
399
0
              instructionClobbersQuery(MD, MAP.second, Query.Inst, AA);
400
0
        }
401
0
        break;
402
0
      }
403
0
404
0
      // We should never hit liveOnEntry, unless it's the clobber.
405
0
      assert(!MSSA.isLiveOnEntryDef(MA) && "Hit liveOnEntry before clobber?");
406
0
407
0
      if (auto *MD = dyn_cast<MemoryDef>(MA)) {
408
0
        (void)MD;
409
0
        assert(!instructionClobbersQuery(MD, MAP.second, Query.Inst, AA) &&
410
0
               "Found clobber before reaching ClobberAt!");
411
0
        continue;
412
0
      }
413
0
414
0
      assert(isa<MemoryPhi>(MA));
415
0
      Worklist.append(upward_defs_begin({MA, MAP.second}), upward_defs_end());
416
0
    }
417
0
  }
418
0
419
0
  // If ClobberAt is a MemoryPhi, we can assume something above it acted as a
420
0
  // clobber. Otherwise, `ClobberAt` should've acted as a clobber at some point.
421
0
  assert((isa<MemoryPhi>(ClobberAt) || FoundClobber) &&
422
0
         "ClobberAt never acted as a clobber");
423
0
}
424
425
namespace {
426
427
/// Our algorithm for walking (and trying to optimize) clobbers, all wrapped up
428
/// in one class.
429
class ClobberWalker {
430
  /// Save a few bytes by using unsigned instead of size_t.
431
  using ListIndex = unsigned;
432
433
  /// Represents a span of contiguous MemoryDefs, potentially ending in a
434
  /// MemoryPhi.
435
  struct DefPath {
436
    MemoryLocation Loc;
437
    // Note that, because we always walk in reverse, Last will always dominate
438
    // First. Also note that First and Last are inclusive.
439
    MemoryAccess *First;
440
    MemoryAccess *Last;
441
    Optional<ListIndex> Previous;
442
443
    DefPath(const MemoryLocation &Loc, MemoryAccess *First, MemoryAccess *Last,
444
            Optional<ListIndex> Previous)
445
6.74M
        : Loc(Loc), First(First), Last(Last), Previous(Previous) {}
446
447
    DefPath(const MemoryLocation &Loc, MemoryAccess *Init,
448
            Optional<ListIndex> Previous)
449
4.92M
        : DefPath(Loc, Init, Init, Previous) {}
450
  };
451
452
  const MemorySSA &MSSA;
453
  AliasAnalysis &AA;
454
  DominatorTree &DT;
455
  UpwardsMemoryQuery *Query;
456
457
  // Phi optimization bookkeeping
458
  SmallVector<DefPath, 32> Paths;
459
  DenseSet<ConstMemoryAccessPair> VisitedPhis;
460
461
  /// Find the nearest def or phi that `From` can legally be optimized to.
462
1.10M
  const MemoryAccess *getWalkTarget(const MemoryPhi *From) const {
463
1.10M
    assert(From->getNumOperands() && "Phi with no operands?");
464
1.10M
465
1.10M
    BasicBlock *BB = From->getBlock();
466
1.10M
    MemoryAccess *Result = MSSA.getLiveOnEntryDef();
467
1.10M
    DomTreeNode *Node = DT.getNode(BB);
468
2.34M
    while (
(Node = Node->getIDom())2.34M
) {
469
2.17M
      auto *Defs = MSSA.getBlockDefs(Node->getBlock());
470
2.17M
      if (Defs)
471
929k
        return &*Defs->rbegin();
472
2.17M
    }
473
171k
    return Result;
474
1.10M
  }
475
476
  /// Result of calling walkToPhiOrClobber.
477
  struct UpwardsWalkResult {
478
    /// The "Result" of the walk. Either a clobber, the last thing we walked, or
479
    /// both.
480
    MemoryAccess *Result;
481
    bool IsKnownClobber;
482
  };
483
484
  /// Walk to the next Phi or Clobber in the def chain starting at Desc.Last.
485
  /// This will update Desc.Last as it walks. It will (optionally) also stop at
486
  /// StopAt.
487
  ///
488
  /// This does not test for whether StopAt is a clobber
489
  UpwardsWalkResult
490
  walkToPhiOrClobber(DefPath &Desc,
491
3.45M
                     const MemoryAccess *StopAt = nullptr) const {
492
3.45M
    assert(!isa<MemoryUse>(Desc.Last) && "Uses don't exist in my world");
493
3.45M
494
5.44M
    for (MemoryAccess *Current : def_chain(Desc.Last)) {
495
5.44M
      Desc.Last = Current;
496
5.44M
      if (Current == StopAt)
497
465k
        return {Current, false};
498
4.98M
499
4.98M
      
if (auto *4.98M
MD4.98M
= dyn_cast<MemoryDef>(Current))
500
2.97M
        
if (2.97M
MSSA.isLiveOnEntryDef(MD) ||
501
2.87M
            instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA))
502
975k
          return {MD, true};
503
2.00M
    }
504
2.00M
505
3.45M
    assert(isa<MemoryPhi>(Desc.Last) &&
506
2.00M
           "Ended at a non-clobber that's not a phi?");
507
2.00M
    return {Desc.Last, false};
508
2.00M
  }
509
510
  void addSearches(MemoryPhi *Phi, SmallVectorImpl<ListIndex> &PausedSearches,
511
2.00M
                   ListIndex PriorNode) {
512
2.00M
    auto UpwardDefs = make_range(upward_defs_begin({Phi, Paths[PriorNode].Loc}),
513
2.00M
                                 upward_defs_end());
514
4.92M
    for (const MemoryAccessPair &P : UpwardDefs) {
515
4.92M
      PausedSearches.push_back(Paths.size());
516
4.92M
      Paths.emplace_back(P.second, P.first, PriorNode);
517
4.92M
    }
518
2.00M
  }
519
520
  /// Represents a search that terminated after finding a clobber. This clobber
521
  /// may or may not be present in the path of defs from LastNode..SearchStart,
522
  /// since it may have been retrieved from cache.
523
  struct TerminatedPath {
524
    MemoryAccess *Clobber;
525
    ListIndex LastNode;
526
  };
527
528
  /// Get an access that keeps us from optimizing to the given phi.
529
  ///
530
  /// PausedSearches is an array of indices into the Paths array. Its incoming
531
  /// value is the indices of searches that stopped at the last phi optimization
532
  /// target. It's left in an unspecified state.
533
  ///
534
  /// If this returns None, NewPaused is a vector of searches that terminated
535
  /// at StopWhere. Otherwise, NewPaused is left in an unspecified state.
536
  Optional<TerminatedPath>
537
  getBlockingAccess(const MemoryAccess *StopWhere,
538
                    SmallVectorImpl<ListIndex> &PausedSearches,
539
                    SmallVectorImpl<ListIndex> &NewPaused,
540
1.10M
                    SmallVectorImpl<TerminatedPath> &Terminated) {
541
1.10M
    assert(!PausedSearches.empty() && "No searches to continue?");
542
1.10M
543
1.10M
    // BFS vs DFS really doesn't make a difference here, so just do a DFS with
544
1.10M
    // PausedSearches as our stack.
545
3.30M
    while (
!PausedSearches.empty()3.30M
) {
546
3.00M
      ListIndex PathIndex = PausedSearches.pop_back_val();
547
3.00M
      DefPath &Node = Paths[PathIndex];
548
3.00M
549
3.00M
      // If we've already visited this path with this MemoryLocation, we don't
550
3.00M
      // need to do so again.
551
3.00M
      //
552
3.00M
      // NOTE: That we just drop these paths on the ground makes caching
553
3.00M
      // behavior sporadic. e.g. given a diamond:
554
3.00M
      //  A
555
3.00M
      // B C
556
3.00M
      //  D
557
3.00M
      //
558
3.00M
      // ...If we walk D, B, A, C, we'll only cache the result of phi
559
3.00M
      // optimization for A, B, and D; C will be skipped because it dies here.
560
3.00M
      // This arguably isn't the worst thing ever, since:
561
3.00M
      //   - We generally query things in a top-down order, so if we got below D
562
3.00M
      //     without needing cache entries for {C, MemLoc}, then chances are
563
3.00M
      //     that those cache entries would end up ultimately unused.
564
3.00M
      //   - We still cache things for A, so C only needs to walk up a bit.
565
3.00M
      // If this behavior becomes problematic, we can fix without a ton of extra
566
3.00M
      // work.
567
3.00M
      if (!VisitedPhis.insert({Node.Last, Node.Loc}).second)
568
904k
        continue;
569
2.10M
570
2.10M
      UpwardsWalkResult Res = walkToPhiOrClobber(Node, /*StopAt=*/StopWhere);
571
2.10M
      if (
Res.IsKnownClobber2.10M
) {
572
803k
        assert(Res.Result != StopWhere);
573
803k
        // If this wasn't a cache hit, we hit a clobber when walking. That's a
574
803k
        // failure.
575
803k
        TerminatedPath Term{Res.Result, PathIndex};
576
803k
        if (!MSSA.dominates(Res.Result, StopWhere))
577
803k
          return Term;
578
0
579
0
        // Otherwise, it's a valid thing to potentially optimize to.
580
0
        Terminated.push_back(Term);
581
0
        continue;
582
0
      }
583
1.30M
584
1.30M
      
if (1.30M
Res.Result == StopWhere1.30M
) {
585
465k
        // We've hit our target. Save this path off for if we want to continue
586
465k
        // walking.
587
465k
        NewPaused.push_back(PathIndex);
588
465k
        continue;
589
465k
      }
590
835k
591
1.30M
      assert(!MSSA.isLiveOnEntryDef(Res.Result) && "liveOnEntry is a clobber");
592
835k
      addSearches(cast<MemoryPhi>(Res.Result), PausedSearches, PathIndex);
593
835k
    }
594
1.10M
595
297k
    return None;
596
1.10M
  }
597
598
  template <typename T, typename Walker>
599
  struct generic_def_path_iterator
600
      : public iterator_facade_base<generic_def_path_iterator<T, Walker>,
601
                                    std::forward_iterator_tag, T *> {
602
803k
    generic_def_path_iterator() = default;
603
803k
    generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {}
604
605
2.78M
    T &operator*() const { return curNode(); }
606
607
1.17M
    generic_def_path_iterator &operator++() {
608
1.17M
      N = curNode().Previous;
609
1.17M
      return *this;
610
1.17M
    }
611
612
1.97M
    bool operator==(const generic_def_path_iterator &O) const {
613
1.97M
      if (N.hasValue() != O.N.hasValue())
614
1.97M
        return false;
615
0
      
return !N.hasValue() || 0
*N == *O.N0
;
616
1.97M
    }
617
618
  private:
619
3.95M
    T &curNode() const { return W->Paths[*N]; }
620
621
    Walker *W = nullptr;
622
    Optional<ListIndex> N = None;
623
  };
624
625
  using def_path_iterator = generic_def_path_iterator<DefPath, ClobberWalker>;
626
  using const_def_path_iterator =
627
      generic_def_path_iterator<const DefPath, const ClobberWalker>;
628
629
803k
  iterator_range<def_path_iterator> def_path(ListIndex From) {
630
803k
    return make_range(def_path_iterator(this, From), def_path_iterator());
631
803k
  }
632
633
0
  iterator_range<const_def_path_iterator> const_def_path(ListIndex From) const {
634
0
    return make_range(const_def_path_iterator(this, From),
635
0
                      const_def_path_iterator());
636
0
  }
637
638
  struct OptznResult {
639
    /// The path that contains our result.
640
    TerminatedPath PrimaryClobber;
641
    /// The paths that we can legally cache back from, but that aren't
642
    /// necessarily the result of the Phi optimization.
643
    SmallVector<TerminatedPath, 4> OtherClobbers;
644
  };
645
646
2.78M
  ListIndex defPathIndex(const DefPath &N) const {
647
2.78M
    // The assert looks nicer if we don't need to do &N
648
2.78M
    const DefPath *NP = &N;
649
2.78M
    assert(!Paths.empty() && NP >= &Paths.front() && NP <= &Paths.back() &&
650
2.78M
           "Out of bounds DefPath!");
651
2.78M
    return NP - &Paths.front();
652
2.78M
  }
653
654
  /// Try to optimize a phi as best as we can. Returns a SmallVector of Paths
655
  /// that act as legal clobbers. Note that this won't return *all* clobbers.
656
  ///
657
  /// Phi optimization algorithm tl;dr:
658
  ///   - Find the earliest def/phi, A, we can optimize to
659
  ///   - Find if all paths from the starting memory access ultimately reach A
660
  ///     - If not, optimization isn't possible.
661
  ///     - Otherwise, walk from A to another clobber or phi, A'.
662
  ///       - If A' is a def, we're done.
663
  ///       - If A' is a phi, try to optimize it.
664
  ///
665
  /// A path is a series of {MemoryAccess, MemoryLocation} pairs. A path
666
  /// terminates when a MemoryAccess that clobbers said MemoryLocation is found.
667
  OptznResult tryOptimizePhi(MemoryPhi *Phi, MemoryAccess *Start,
668
906k
                             const MemoryLocation &Loc) {
669
906k
    assert(Paths.empty() && VisitedPhis.empty() &&
670
906k
           "Reset the optimization state.");
671
906k
672
906k
    Paths.emplace_back(Loc, Start, Phi, None);
673
906k
    // Stores how many "valid" optimization nodes we had prior to calling
674
906k
    // addSearches/getBlockingAccess. Necessary for caching if we had a blocker.
675
906k
    auto PriorPathsSize = Paths.size();
676
906k
677
906k
    SmallVector<ListIndex, 16> PausedSearches;
678
906k
    SmallVector<ListIndex, 8> NewPaused;
679
906k
    SmallVector<TerminatedPath, 4> TerminatedPaths;
680
906k
681
906k
    addSearches(Phi, PausedSearches, 0);
682
906k
683
906k
    // Moves the TerminatedPath with the "most dominated" Clobber to the end of
684
906k
    // Paths.
685
102k
    auto MoveDominatedPathToEnd = [&](SmallVectorImpl<TerminatedPath> &Paths) {
686
102k
      assert(!Paths.empty() && "Need a path to move");
687
102k
      auto Dom = Paths.begin();
688
168k
      for (auto I = std::next(Dom), E = Paths.end(); 
I != E168k
;
++I65.2k
)
689
65.2k
        
if (65.2k
!MSSA.dominates(I->Clobber, Dom->Clobber)65.2k
)
690
0
          Dom = I;
691
102k
      auto Last = Paths.end() - 1;
692
102k
      if (Last != Dom)
693
61.7k
        std::iter_swap(Last, Dom);
694
102k
    };
695
906k
696
906k
    MemoryPhi *Current = Phi;
697
1.10M
    while (
true1.10M
) {
698
1.10M
      assert(!MSSA.isLiveOnEntryDef(Current) &&
699
1.10M
             "liveOnEntry wasn't treated as a clobber?");
700
1.10M
701
1.10M
      const auto *Target = getWalkTarget(Current);
702
1.10M
      // If a TerminatedPath doesn't dominate Target, then it wasn't a legal
703
1.10M
      // optimization for the prior phi.
704
1.10M
      assert(all_of(TerminatedPaths, [&](const TerminatedPath &P) {
705
1.10M
        return MSSA.dominates(P.Clobber, Target);
706
1.10M
      }));
707
1.10M
708
1.10M
      // FIXME: This is broken, because the Blocker may be reported to be
709
1.10M
      // liveOnEntry, and we'll happily wait for that to disappear (read: never)
710
1.10M
      // For the moment, this is fine, since we do nothing with blocker info.
711
1.10M
      if (Optional<TerminatedPath> Blocker = getBlockingAccess(
712
803k
              Target, PausedSearches, NewPaused, TerminatedPaths)) {
713
803k
714
803k
        // Find the node we started at. We can't search based on N->Last, since
715
803k
        // we may have gone around a loop with a different MemoryLocation.
716
1.97M
        auto Iter = find_if(def_path(Blocker->LastNode), [&](const DefPath &N) {
717
1.97M
          return defPathIndex(N) < PriorPathsSize;
718
1.97M
        });
719
803k
        assert(Iter != def_path_iterator());
720
803k
721
803k
        DefPath &CurNode = *Iter;
722
803k
        assert(CurNode.Last == Current);
723
803k
724
803k
        // Two things:
725
803k
        // A. We can't reliably cache all of NewPaused back. Consider a case
726
803k
        //    where we have two paths in NewPaused; one of which can't optimize
727
803k
        //    above this phi, whereas the other can. If we cache the second path
728
803k
        //    back, we'll end up with suboptimal cache entries. We can handle
729
803k
        //    cases like this a bit better when we either try to find all
730
803k
        //    clobbers that block phi optimization, or when our cache starts
731
803k
        //    supporting unfinished searches.
732
803k
        // B. We can't reliably cache TerminatedPaths back here without doing
733
803k
        //    extra checks; consider a case like:
734
803k
        //       T
735
803k
        //      / \
736
803k
        //     D   C
737
803k
        //      \ /
738
803k
        //       S
739
803k
        //    Where T is our target, C is a node with a clobber on it, D is a
740
803k
        //    diamond (with a clobber *only* on the left or right node, N), and
741
803k
        //    S is our start. Say we walk to D, through the node opposite N
742
803k
        //    (read: ignoring the clobber), and see a cache entry in the top
743
803k
        //    node of D. That cache entry gets put into TerminatedPaths. We then
744
803k
        //    walk up to C (N is later in our worklist), find the clobber, and
745
803k
        //    quit. If we append TerminatedPaths to OtherClobbers, we'll cache
746
803k
        //    the bottom part of D to the cached clobber, ignoring the clobber
747
803k
        //    in N. Again, this problem goes away if we start tracking all
748
803k
        //    blockers for a given phi optimization.
749
803k
        TerminatedPath Result{CurNode.Last, defPathIndex(CurNode)};
750
803k
        return {Result, {}};
751
803k
      }
752
297k
753
297k
      // If there's nothing left to search, then all paths led to valid clobbers
754
297k
      // that we got from our cache; pick the nearest to the start, and allow
755
297k
      // the rest to be cached back.
756
297k
      
if (297k
NewPaused.empty()297k
) {
757
0
        MoveDominatedPathToEnd(TerminatedPaths);
758
0
        TerminatedPath Result = TerminatedPaths.pop_back_val();
759
0
        return {Result, std::move(TerminatedPaths)};
760
0
      }
761
297k
762
297k
      MemoryAccess *DefChainEnd = nullptr;
763
297k
      SmallVector<TerminatedPath, 4> Clobbers;
764
434k
      for (ListIndex Paused : NewPaused) {
765
434k
        UpwardsWalkResult WR = walkToPhiOrClobber(Paths[Paused]);
766
434k
        if (WR.IsKnownClobber)
767
168k
          Clobbers.push_back({WR.Result, Paused});
768
434k
        else
769
434k
          // Micro-opt: If we hit the end of the chain, save it.
770
266k
          DefChainEnd = WR.Result;
771
434k
      }
772
297k
773
297k
      if (
!TerminatedPaths.empty()297k
) {
774
0
        // If we couldn't find the dominating phi/liveOnEntry in the above loop,
775
0
        // do it now.
776
0
        if (!DefChainEnd)
777
0
          for (auto *MA : def_chain(const_cast<MemoryAccess *>(Target)))
778
0
            DefChainEnd = MA;
779
0
780
0
        // If any of the terminated paths don't dominate the phi we'll try to
781
0
        // optimize, we need to figure out what they are and quit.
782
0
        const BasicBlock *ChainBB = DefChainEnd->getBlock();
783
0
        for (const TerminatedPath &TP : TerminatedPaths) {
784
0
          // Because we know that DefChainEnd is as "high" as we can go, we
785
0
          // don't need local dominance checks; BB dominance is sufficient.
786
0
          if (DT.dominates(ChainBB, TP.Clobber->getBlock()))
787
0
            Clobbers.push_back(TP);
788
0
        }
789
0
      }
790
297k
791
297k
      // If we have clobbers in the def chain, find the one closest to Current
792
297k
      // and quit.
793
297k
      if (
!Clobbers.empty()297k
) {
794
102k
        MoveDominatedPathToEnd(Clobbers);
795
102k
        TerminatedPath Result = Clobbers.pop_back_val();
796
102k
        return {Result, std::move(Clobbers)};
797
102k
      }
798
194k
799
297k
      assert(all_of(NewPaused,
800
194k
                    [&](ListIndex I) { return Paths[I].Last == DefChainEnd; }));
801
194k
802
194k
      // Because liveOnEntry is a clobber, this must be a phi.
803
194k
      auto *DefChainPhi = cast<MemoryPhi>(DefChainEnd);
804
194k
805
194k
      PriorPathsSize = Paths.size();
806
194k
      PausedSearches.clear();
807
194k
      for (ListIndex I : NewPaused)
808
266k
        addSearches(DefChainPhi, PausedSearches, I);
809
1.10M
      NewPaused.clear();
810
1.10M
811
1.10M
      Current = DefChainPhi;
812
1.10M
    }
813
906k
  }
814
815
906k
  void verifyOptResult(const OptznResult &R) const {
816
906k
    assert(all_of(R.OtherClobbers, [&](const TerminatedPath &P) {
817
906k
      return MSSA.dominates(P.Clobber, R.PrimaryClobber.Clobber);
818
906k
    }));
819
906k
  }
820
821
906k
  void resetPhiOptznState() {
822
906k
    Paths.clear();
823
906k
    VisitedPhis.clear();
824
906k
  }
825
826
public:
827
  ClobberWalker(const MemorySSA &MSSA, AliasAnalysis &AA, DominatorTree &DT)
828
516k
      : MSSA(MSSA), AA(AA), DT(DT) {}
829
830
532k
  void reset() {}
831
832
  /// Finds the nearest clobber for the given query, optimizing phis if
833
  /// possible.
834
910k
  MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q) {
835
910k
    Query = &Q;
836
910k
837
910k
    MemoryAccess *Current = Start;
838
910k
    // This walker pretends uses don't exist. If we're handed one, silently grab
839
910k
    // its def. (This has the nice side-effect of ensuring we never cache uses)
840
910k
    if (auto *MU = dyn_cast<MemoryUse>(Start))
841
0
      Current = MU->getDefiningAccess();
842
910k
843
910k
    DefPath FirstDesc(Q.StartingLoc, Current, Current, None);
844
910k
    // Fast path for the overly-common case (no crazy phi optimization
845
910k
    // necessary)
846
910k
    UpwardsWalkResult WalkResult = walkToPhiOrClobber(FirstDesc);
847
910k
    MemoryAccess *Result;
848
910k
    if (
WalkResult.IsKnownClobber910k
) {
849
4.17k
      Result = WalkResult.Result;
850
910k
    } else {
851
906k
      OptznResult OptRes = tryOptimizePhi(cast<MemoryPhi>(FirstDesc.Last),
852
906k
                                          Current, Q.StartingLoc);
853
906k
      verifyOptResult(OptRes);
854
906k
      resetPhiOptznState();
855
906k
      Result = OptRes.PrimaryClobber.Clobber;
856
906k
    }
857
910k
858
#ifdef EXPENSIVE_CHECKS
859
    checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA);
860
#endif
861
    return Result;
862
910k
  }
863
864
93
  void verify(const MemorySSA *MSSA) { assert(MSSA == &this->MSSA); }
865
};
866
867
struct RenamePassData {
868
  DomTreeNode *DTN;
869
  DomTreeNode::const_iterator ChildIt;
870
  MemoryAccess *IncomingVal;
871
872
  RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It,
873
                 MemoryAccess *M)
874
3.72M
      : DTN(D), ChildIt(It), IncomingVal(M) {}
875
876
0
  void swap(RenamePassData &RHS) {
877
0
    std::swap(DTN, RHS.DTN);
878
0
    std::swap(ChildIt, RHS.ChildIt);
879
0
    std::swap(IncomingVal, RHS.IncomingVal);
880
0
  }
881
};
882
883
} // end anonymous namespace
884
885
namespace llvm {
886
887
/// \brief A MemorySSAWalker that does AA walks to disambiguate accesses. It no
888
/// longer does caching on its own,
889
/// but the name has been retained for the moment.
890
class MemorySSA::CachingWalker final : public MemorySSAWalker {
891
  ClobberWalker Walker;
892
  bool AutoResetWalker = true;
893
894
  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &);
895
  void verifyRemoved(MemoryAccess *);
896
897
public:
898
  CachingWalker(MemorySSA *, AliasAnalysis *, DominatorTree *);
899
516k
  ~CachingWalker() override = default;
900
901
  using MemorySSAWalker::getClobberingMemoryAccess;
902
903
  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override;
904
  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *,
905
                                          const MemoryLocation &) override;
906
  void invalidateInfo(MemoryAccess *) override;
907
908
  /// Whether we call resetClobberWalker() after each time we *actually* walk to
909
  /// answer a clobber query.
910
1.03M
  void setAutoResetWalker(bool AutoReset) { AutoResetWalker = AutoReset; }
911
912
  /// Drop the walker's persistent data structures.
913
532k
  void resetClobberWalker() { Walker.reset(); }
914
915
93
  void verify(const MemorySSA *MSSA) override {
916
93
    MemorySSAWalker::verify(MSSA);
917
93
    Walker.verify(MSSA);
918
93
  }
919
};
920
921
} // end namespace llvm
922
923
void MemorySSA::renameSuccessorPhis(BasicBlock *BB, MemoryAccess *IncomingVal,
924
3.72M
                                    bool RenameAllUses) {
925
3.72M
  // Pass through values to our successors
926
5.02M
  for (const BasicBlock *S : successors(BB)) {
927
5.02M
    auto It = PerBlockAccesses.find(S);
928
5.02M
    // Rename the phi nodes in our successor block
929
5.02M
    if (
It == PerBlockAccesses.end() || 5.02M
!isa<MemoryPhi>(It->second->front())4.03M
)
930
2.54M
      continue;
931
2.47M
    AccessList *Accesses = It->second.get();
932
2.47M
    auto *Phi = cast<MemoryPhi>(&Accesses->front());
933
2.47M
    if (
RenameAllUses2.47M
) {
934
2
      int PhiIndex = Phi->getBasicBlockIndex(BB);
935
2
      assert(PhiIndex != -1 && "Incomplete phi during partial rename");
936
2
      Phi->setIncomingValue(PhiIndex, IncomingVal);
937
2
    } else
938
2.47M
      Phi->addIncoming(IncomingVal, BB);
939
5.02M
  }
940
3.72M
}
941
942
/// \brief Rename a single basic block into MemorySSA form.
943
/// Uses the standard SSA renaming algorithm.
944
/// \returns The new incoming value.
945
MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, MemoryAccess *IncomingVal,
946
3.72M
                                     bool RenameAllUses) {
947
3.72M
  auto It = PerBlockAccesses.find(BB);
948
3.72M
  // Skip most processing if the list is empty.
949
3.72M
  if (
It != PerBlockAccesses.end()3.72M
) {
950
2.78M
    AccessList *Accesses = It->second.get();
951
7.33M
    for (MemoryAccess &L : *Accesses) {
952
7.33M
      if (MemoryUseOrDef *
MUD7.33M
= dyn_cast<MemoryUseOrDef>(&L)) {
953
6.37M
        if (
MUD->getDefiningAccess() == nullptr || 6.37M
RenameAllUses5
)
954
6.37M
          MUD->setDefiningAccess(IncomingVal);
955
6.37M
        if (isa<MemoryDef>(&L))
956
4.12M
          IncomingVal = &L;
957
7.33M
      } else {
958
958k
        IncomingVal = &L;
959
958k
      }
960
7.33M
    }
961
2.78M
  }
962
3.72M
  return IncomingVal;
963
3.72M
}
964
965
/// \brief This is the standard SSA renaming algorithm.
966
///
967
/// We walk the dominator tree in preorder, renaming accesses, and then filling
968
/// in phi nodes in our successors.
969
void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal,
970
                           SmallPtrSetImpl<BasicBlock *> &Visited,
971
516k
                           bool SkipVisited, bool RenameAllUses) {
972
516k
  SmallVector<RenamePassData, 32> WorkStack;
973
516k
  // Skip everything if we already renamed this block and we are skipping.
974
516k
  // Note: You can't sink this into the if, because we need it to occur
975
516k
  // regardless of whether we skip blocks or not.
976
516k
  bool AlreadyVisited = !Visited.insert(Root->getBlock()).second;
977
516k
  if (
SkipVisited && 516k
AlreadyVisited1
)
978
0
    return;
979
516k
980
516k
  IncomingVal = renameBlock(Root->getBlock(), IncomingVal, RenameAllUses);
981
516k
  renameSuccessorPhis(Root->getBlock(), IncomingVal, RenameAllUses);
982
516k
  WorkStack.push_back({Root, Root->begin(), IncomingVal});
983
516k
984
7.45M
  while (
!WorkStack.empty()7.45M
) {
985
6.93M
    DomTreeNode *Node = WorkStack.back().DTN;
986
6.93M
    DomTreeNode::const_iterator ChildIt = WorkStack.back().ChildIt;
987
6.93M
    IncomingVal = WorkStack.back().IncomingVal;
988
6.93M
989
6.93M
    if (
ChildIt == Node->end()6.93M
) {
990
3.72M
      WorkStack.pop_back();
991
6.93M
    } else {
992
3.21M
      DomTreeNode *Child = *ChildIt;
993
3.21M
      ++WorkStack.back().ChildIt;
994
3.21M
      BasicBlock *BB = Child->getBlock();
995
3.21M
      // Note: You can't sink this into the if, because we need it to occur
996
3.21M
      // regardless of whether we skip blocks or not.
997
3.21M
      AlreadyVisited = !Visited.insert(BB).second;
998
3.21M
      if (
SkipVisited && 3.21M
AlreadyVisited3
) {
999
0
        // We already visited this during our renaming, which can happen when
1000
0
        // being asked to rename multiple blocks. Figure out the incoming val,
1001
0
        // which is the last def.
1002
0
        // Incoming value can only change if there is a block def, and in that
1003
0
        // case, it's the last block def in the list.
1004
0
        if (auto *BlockDefs = getWritableBlockDefs(BB))
1005
0
          IncomingVal = &*BlockDefs->rbegin();
1006
0
      } else
1007
3.21M
        IncomingVal = renameBlock(BB, IncomingVal, RenameAllUses);
1008
3.21M
      renameSuccessorPhis(BB, IncomingVal, RenameAllUses);
1009
3.21M
      WorkStack.push_back({Child, Child->begin(), IncomingVal});
1010
3.21M
    }
1011
6.93M
  }
1012
516k
}
1013
1014
/// \brief This handles unreachable block accesses by deleting phi nodes in
1015
/// unreachable blocks, and marking all other unreachable MemoryAccess's as
1016
/// being uses of the live on entry definition.
1017
5.11k
void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) {
1018
5.11k
  assert(!DT->isReachableFromEntry(BB) &&
1019
5.11k
         "Reachable block found while handling unreachable blocks");
1020
5.11k
1021
5.11k
  // Make sure phi nodes in our reachable successors end up with a
1022
5.11k
  // LiveOnEntryDef for our incoming edge, even though our block is forward
1023
5.11k
  // unreachable.  We could just disconnect these blocks from the CFG fully,
1024
5.11k
  // but we do not right now.
1025
4.38k
  for (const BasicBlock *S : successors(BB)) {
1026
4.38k
    if (!DT->isReachableFromEntry(S))
1027
3.25k
      continue;
1028
1.12k
    auto It = PerBlockAccesses.find(S);
1029
1.12k
    // Rename the phi nodes in our successor block
1030
1.12k
    if (
It == PerBlockAccesses.end() || 1.12k
!isa<MemoryPhi>(It->second->front())855
)
1031
779
      continue;
1032
346
    AccessList *Accesses = It->second.get();
1033
346
    auto *Phi = cast<MemoryPhi>(&Accesses->front());
1034
346
    Phi->addIncoming(LiveOnEntryDef.get(), BB);
1035
346
  }
1036
5.11k
1037
5.11k
  auto It = PerBlockAccesses.find(BB);
1038
5.11k
  if (It == PerBlockAccesses.end())
1039
2.34k
    return;
1040
2.77k
1041
2.77k
  auto &Accesses = It->second;
1042
5.94k
  for (auto AI = Accesses->begin(), AE = Accesses->end(); 
AI != AE5.94k
;) {
1043
3.17k
    auto Next = std::next(AI);
1044
3.17k
    // If we have a phi, just remove it. We are going to replace all
1045
3.17k
    // users with live on entry.
1046
3.17k
    if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(AI))
1047
3.17k
      UseOrDef->setDefiningAccess(LiveOnEntryDef.get());
1048
3.17k
    else
1049
0
      Accesses->erase(AI);
1050
3.17k
    AI = Next;
1051
3.17k
  }
1052
5.11k
}
1053
1054
MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT)
1055
    : AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr),
1056
516k
      NextID(INVALID_MEMORYACCESS_ID) {
1057
516k
  buildMemorySSA();
1058
516k
}
1059
1060
516k
MemorySSA::~MemorySSA() {
1061
516k
  // Drop all our references
1062
516k
  for (const auto &Pair : PerBlockAccesses)
1063
2.77M
    for (MemoryAccess &MA : *Pair.second)
1064
7.24M
      MA.dropAllReferences();
1065
516k
}
1066
1067
3.39M
MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) {
1068
3.39M
  auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr));
1069
3.39M
1070
3.39M
  if (Res.second)
1071
2.78M
    Res.first->second = llvm::make_unique<AccessList>();
1072
3.39M
  return Res.first->second.get();
1073
3.39M
}
1074
1075
2.79M
MemorySSA::DefsList *MemorySSA::getOrCreateDefsList(const BasicBlock *BB) {
1076
2.79M
  auto Res = PerBlockDefs.insert(std::make_pair(BB, nullptr));
1077
2.79M
1078
2.79M
  if (Res.second)
1079
2.32M
    Res.first->second = llvm::make_unique<DefsList>();
1080
2.79M
  return Res.first->second.get();
1081
2.79M
}
1082
1083
namespace llvm {
1084
1085
/// This class is a batch walker of all MemoryUse's in the program, and points
1086
/// their defining access at the thing that actually clobbers them.  Because it
1087
/// is a batch walker that touches everything, it does not operate like the
1088
/// other walkers.  This walker is basically performing a top-down SSA renaming
1089
/// pass, where the version stack is used as the cache.  This enables it to be
1090
/// significantly more time and memory efficient than using the regular walker,
1091
/// which is walking bottom-up.
1092
class MemorySSA::OptimizeUses {
1093
public:
1094
  OptimizeUses(MemorySSA *MSSA, MemorySSAWalker *Walker, AliasAnalysis *AA,
1095
               DominatorTree *DT)
1096
516k
      : MSSA(MSSA), Walker(Walker), AA(AA), DT(DT) {
1097
516k
    Walker = MSSA->getWalker();
1098
516k
  }
1099
1100
  void optimizeUses();
1101
1102
private:
1103
  /// This represents where a given memorylocation is in the stack.
1104
  struct MemlocStackInfo {
1105
    // This essentially is keeping track of versions of the stack. Whenever
1106
    // the stack changes due to pushes or pops, these versions increase.
1107
    unsigned long StackEpoch;
1108
    unsigned long PopEpoch;
1109
    // This is the lower bound of places on the stack to check. It is equal to
1110
    // the place the last stack walk ended.
1111
    // Note: Correctness depends on this being initialized to 0, which densemap
1112
    // does
1113
    unsigned long LowerBound;
1114
    const BasicBlock *LowerBoundBlock;
1115
    // This is where the last walk for this memory location ended.
1116
    unsigned long LastKill;
1117
    bool LastKillValid;
1118
  };
1119
1120
  void optimizeUsesInBlock(const BasicBlock *, unsigned long &, unsigned long &,
1121
                           SmallVectorImpl<MemoryAccess *> &,
1122
                           DenseMap<MemoryLocOrCall, MemlocStackInfo> &);
1123
1124
  MemorySSA *MSSA;
1125
  MemorySSAWalker *Walker;
1126
  AliasAnalysis *AA;
1127
  DominatorTree *DT;
1128
};
1129
1130
} // end namespace llvm
1131
1132
/// Optimize the uses in a given block This is basically the SSA renaming
1133
/// algorithm, with one caveat: We are able to use a single stack for all
1134
/// MemoryUses.  This is because the set of *possible* reaching MemoryDefs is
1135
/// the same for every MemoryUse.  The *actual* clobbering MemoryDef is just
1136
/// going to be some position in that stack of possible ones.
1137
///
1138
/// We track the stack positions that each MemoryLocation needs
1139
/// to check, and last ended at.  This is because we only want to check the
1140
/// things that changed since last time.  The same MemoryLocation should
1141
/// get clobbered by the same store (getModRefInfo does not use invariantness or
1142
/// things like this, and if they start, we can modify MemoryLocOrCall to
1143
/// include relevant data)
1144
void MemorySSA::OptimizeUses::optimizeUsesInBlock(
1145
    const BasicBlock *BB, unsigned long &StackEpoch, unsigned long &PopEpoch,
1146
    SmallVectorImpl<MemoryAccess *> &VersionStack,
1147
3.72M
    DenseMap<MemoryLocOrCall, MemlocStackInfo> &LocStackInfo) {
1148
3.72M
1149
3.72M
  /// If no accesses, nothing to do.
1150
3.72M
  MemorySSA::AccessList *Accesses = MSSA->getWritableBlockAccesses(BB);
1151
3.72M
  if (Accesses == nullptr)
1152
948k
    return;
1153
2.78M
1154
2.78M
  // Pop everything that doesn't dominate the current block off the stack,
1155
2.78M
  // increment the PopEpoch to account for this.
1156
4.42M
  
while (2.78M
true4.42M
) {
1157
4.42M
    assert(
1158
4.42M
        !VersionStack.empty() &&
1159
4.42M
        "Version stack should have liveOnEntry sentinel dominating everything");
1160
4.42M
    BasicBlock *BackBlock = VersionStack.back()->getBlock();
1161
4.42M
    if (DT->dominates(BackBlock, BB))
1162
2.78M
      break;
1163
4.88M
    
while (1.64M
VersionStack.back()->getBlock() == BackBlock4.88M
)
1164
3.23M
      VersionStack.pop_back();
1165
4.42M
    ++PopEpoch;
1166
4.42M
  }
1167
2.78M
1168
7.33M
  for (MemoryAccess &MA : *Accesses) {
1169
7.33M
    auto *MU = dyn_cast<MemoryUse>(&MA);
1170
7.33M
    if (
!MU7.33M
) {
1171
5.08M
      VersionStack.push_back(&MA);
1172
5.08M
      ++StackEpoch;
1173
5.08M
      continue;
1174
5.08M
    }
1175
2.25M
1176
2.25M
    
if (2.25M
isUseTriviallyOptimizableToLiveOnEntry(*AA, MU->getMemoryInst())2.25M
) {
1177
15.3k
      MU->setDefiningAccess(MSSA->getLiveOnEntryDef(), true);
1178
15.3k
      continue;
1179
15.3k
    }
1180
2.23M
1181
2.23M
    MemoryLocOrCall UseMLOC(MU);
1182
2.23M
    auto &LocInfo = LocStackInfo[UseMLOC];
1183
2.23M
    // If the pop epoch changed, it means we've removed stuff from top of
1184
2.23M
    // stack due to changing blocks. We may have to reset the lower bound or
1185
2.23M
    // last kill info.
1186
2.23M
    if (
LocInfo.PopEpoch != PopEpoch2.23M
) {
1187
2.09M
      LocInfo.PopEpoch = PopEpoch;
1188
2.09M
      LocInfo.StackEpoch = StackEpoch;
1189
2.09M
      // If the lower bound was in something that no longer dominates us, we
1190
2.09M
      // have to reset it.
1191
2.09M
      // We can't simply track stack size, because the stack may have had
1192
2.09M
      // pushes/pops in the meantime.
1193
2.09M
      // XXX: This is non-optimal, but only is slower cases with heavily
1194
2.09M
      // branching dominator trees.  To get the optimal number of queries would
1195
2.09M
      // be to make lowerbound and lastkill a per-loc stack, and pop it until
1196
2.09M
      // the top of that stack dominates us.  This does not seem worth it ATM.
1197
2.09M
      // A much cheaper optimization would be to always explore the deepest
1198
2.09M
      // branch of the dominator tree first. This will guarantee this resets on
1199
2.09M
      // the smallest set of blocks.
1200
2.09M
      if (
LocInfo.LowerBoundBlock && 2.09M
LocInfo.LowerBoundBlock != BB249k
&&
1201
2.09M
          
!DT->dominates(LocInfo.LowerBoundBlock, BB)249k
) {
1202
115k
        // Reset the lower bound of things to check.
1203
115k
        // TODO: Some day we should be able to reset to last kill, rather than
1204
115k
        // 0.
1205
115k
        LocInfo.LowerBound = 0;
1206
115k
        LocInfo.LowerBoundBlock = VersionStack[0]->getBlock();
1207
115k
        LocInfo.LastKillValid = false;
1208
115k
      }
1209
2.23M
    } else 
if (142k
LocInfo.StackEpoch != StackEpoch142k
) {
1210
114k
      // If all that has changed is the StackEpoch, we only have to check the
1211
114k
      // new things on the stack, because we've checked everything before.  In
1212
114k
      // this case, the lower bound of things to check remains the same.
1213
114k
      LocInfo.PopEpoch = PopEpoch;
1214
114k
      LocInfo.StackEpoch = StackEpoch;
1215
114k
    }
1216
2.23M
    if (
!LocInfo.LastKillValid2.23M
) {
1217
1.96M
      LocInfo.LastKill = VersionStack.size() - 1;
1218
1.96M
      LocInfo.LastKillValid = true;
1219
1.96M
    }
1220
2.23M
1221
2.23M
    // At this point, we should have corrected last kill and LowerBound to be
1222
2.23M
    // in bounds.
1223
2.23M
    assert(LocInfo.LowerBound < VersionStack.size() &&
1224
2.23M
           "Lower bound out of range");
1225
2.23M
    assert(LocInfo.LastKill < VersionStack.size() &&
1226
2.23M
           "Last kill info out of range");
1227
2.23M
    // In any case, the new upper bound is the top of the stack.
1228
2.23M
    unsigned long UpperBound = VersionStack.size() - 1;
1229
2.23M
1230
2.23M
    if (
UpperBound - LocInfo.LowerBound > MaxCheckLimit2.23M
) {
1231
12.6k
      DEBUG(dbgs() << "MemorySSA skipping optimization of " << *MU << " ("
1232
12.6k
                   << *(MU->getMemoryInst()) << ")"
1233
12.6k
                   << " because there are " << UpperBound - LocInfo.LowerBound
1234
12.6k
                   << " stores to disambiguate\n");
1235
12.6k
      // Because we did not walk, LastKill is no longer valid, as this may
1236
12.6k
      // have been a kill.
1237
12.6k
      LocInfo.LastKillValid = false;
1238
12.6k
      continue;
1239
12.6k
    }
1240
2.22M
    bool FoundClobberResult = false;
1241
3.07M
    while (
UpperBound > LocInfo.LowerBound3.07M
) {
1242
2.43M
      if (
isa<MemoryPhi>(VersionStack[UpperBound])2.43M
) {
1243
894k
        // For phis, use the walker, see where we ended up, go there
1244
894k
        Instruction *UseInst = MU->getMemoryInst();
1245
894k
        MemoryAccess *Result = Walker->getClobberingMemoryAccess(UseInst);
1246
894k
        // We are guaranteed to find it or something is wrong
1247
1.35M
        while (
VersionStack[UpperBound] != Result1.35M
) {
1248
460k
          assert(UpperBound != 0);
1249
460k
          --UpperBound;
1250
460k
        }
1251
894k
        FoundClobberResult = true;
1252
894k
        break;
1253
894k
      }
1254
1.54M
1255
1.54M
      MemoryDef *MD = cast<MemoryDef>(VersionStack[UpperBound]);
1256
1.54M
      // If the lifetime of the pointer ends at this instruction, it's live on
1257
1.54M
      // entry.
1258
1.54M
      if (
!UseMLOC.IsCall && 1.54M
lifetimeEndsAt(MD, UseMLOC.getLoc(), *AA)1.52M
) {
1259
3
        // Reset UpperBound to liveOnEntryDef's place in the stack
1260
3
        UpperBound = 0;
1261
3
        FoundClobberResult = true;
1262
3
        break;
1263
3
      }
1264
1.54M
      
if (1.54M
instructionClobbersQuery(MD, MU, UseMLOC, *AA)1.54M
) {
1265
690k
        FoundClobberResult = true;
1266
690k
        break;
1267
690k
      }
1268
852k
      --UpperBound;
1269
852k
    }
1270
2.22M
    // At the end of this loop, UpperBound is either a clobber, or lower bound
1271
2.22M
    // PHI walking may cause it to be < LowerBound, and in fact, < LastKill.
1272
2.22M
    if (
FoundClobberResult || 2.22M
UpperBound < LocInfo.LastKill640k
) {
1273
1.61M
      MU->setDefiningAccess(VersionStack[UpperBound], true);
1274
1.61M
      // We were last killed now by where we got to
1275
1.61M
      LocInfo.LastKill = UpperBound;
1276
2.22M
    } else {
1277
609k
      // Otherwise, we checked all the new ones, and now we know we can get to
1278
609k
      // LastKill.
1279
609k
      MU->setDefiningAccess(VersionStack[LocInfo.LastKill], true);
1280
609k
    }
1281
7.33M
    LocInfo.LowerBound = VersionStack.size() - 1;
1282
7.33M
    LocInfo.LowerBoundBlock = BB;
1283
7.33M
  }
1284
3.72M
}
1285
1286
/// Optimize uses to point to their actual clobbering definitions.
1287
516k
void MemorySSA::OptimizeUses::optimizeUses() {
1288
516k
  SmallVector<MemoryAccess *, 16> VersionStack;
1289
516k
  DenseMap<MemoryLocOrCall, MemlocStackInfo> LocStackInfo;
1290
516k
  VersionStack.push_back(MSSA->getLiveOnEntryDef());
1291
516k
1292
516k
  unsigned long StackEpoch = 1;
1293
516k
  unsigned long PopEpoch = 1;
1294
516k
  // We perform a non-recursive top-down dominator tree walk.
1295
516k
  for (const auto *DomNode : depth_first(DT->getRootNode()))
1296
3.72M
    optimizeUsesInBlock(DomNode->getBlock(), StackEpoch, PopEpoch, VersionStack,
1297
3.72M
                        LocStackInfo);
1298
516k
}
1299
1300
void MemorySSA::placePHINodes(
1301
    const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks,
1302
516k
    const DenseMap<const BasicBlock *, unsigned int> &BBNumbers) {
1303
516k
  // Determine where our MemoryPhi's should go
1304
516k
  ForwardIDFCalculator IDFs(*DT);
1305
516k
  IDFs.setDefiningBlocks(DefiningBlocks);
1306
516k
  SmallVector<BasicBlock *, 32> IDFBlocks;
1307
516k
  IDFs.calculate(IDFBlocks);
1308
516k
1309
516k
  std::sort(IDFBlocks.begin(), IDFBlocks.end(),
1310
3.94M
            [&BBNumbers](const BasicBlock *A, const BasicBlock *B) {
1311
3.94M
              return BBNumbers.lookup(A) < BBNumbers.lookup(B);
1312
3.94M
            });
1313
516k
1314
516k
  // Now place MemoryPhi nodes.
1315
516k
  for (auto &BB : IDFBlocks)
1316
958k
    createMemoryPhi(BB);
1317
516k
}
1318
1319
516k
void MemorySSA::buildMemorySSA() {
1320
516k
  // We create an access to represent "live on entry", for things like
1321
516k
  // arguments or users of globals, where the memory they use is defined before
1322
516k
  // the beginning of the function. We do not actually insert it into the IR.
1323
516k
  // We do not define a live on exit for the immediate uses, and thus our
1324
516k
  // semantics do *not* imply that something with no immediate uses can simply
1325
516k
  // be removed.
1326
516k
  BasicBlock &StartingPoint = F.getEntryBlock();
1327
516k
  LiveOnEntryDef =
1328
516k
      llvm::make_unique<MemoryDef>(F.getContext(), nullptr, nullptr,
1329
516k
                                   &StartingPoint, NextID++);
1330
516k
  DenseMap<const BasicBlock *, unsigned int> BBNumbers;
1331
516k
  unsigned NextBBNum = 0;
1332
516k
1333
516k
  // We maintain lists of memory accesses per-block, trading memory for time. We
1334
516k
  // could just look up the memory access for every possible instruction in the
1335
516k
  // stream.
1336
516k
  SmallPtrSet<BasicBlock *, 32> DefiningBlocks;
1337
516k
  // Go through each block, figure out where defs occur, and chain together all
1338
516k
  // the accesses.
1339
3.73M
  for (BasicBlock &B : F) {
1340
3.73M
    BBNumbers[&B] = NextBBNum++;
1341
3.73M
    bool InsertIntoDef = false;
1342
3.73M
    AccessList *Accesses = nullptr;
1343
3.73M
    DefsList *Defs = nullptr;
1344
19.2M
    for (Instruction &I : B) {
1345
19.2M
      MemoryUseOrDef *MUD = createNewAccess(&I);
1346
19.2M
      if (!MUD)
1347
12.8M
        continue;
1348
6.37M
1349
6.37M
      
if (6.37M
!Accesses6.37M
)
1350
2.43M
        Accesses = getOrCreateAccessList(&B);
1351
6.37M
      Accesses->push_back(MUD);
1352
6.37M
      if (
isa<MemoryDef>(MUD)6.37M
) {
1353
4.12M
        InsertIntoDef = true;
1354
4.12M
        if (!Defs)
1355
1.83M
          Defs = getOrCreateDefsList(&B);
1356
4.12M
        Defs->push_back(*MUD);
1357
4.12M
      }
1358
19.2M
    }
1359
3.73M
    if (InsertIntoDef)
1360
1.83M
      DefiningBlocks.insert(&B);
1361
3.73M
  }
1362
516k
  placePHINodes(DefiningBlocks, BBNumbers);
1363
516k
1364
516k
  // Now do regular SSA renaming on the MemoryDef/MemoryUse. Visited will get
1365
516k
  // filled in with all blocks.
1366
516k
  SmallPtrSet<BasicBlock *, 16> Visited;
1367
516k
  renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited);
1368
516k
1369
516k
  CachingWalker *Walker = getWalkerImpl();
1370
516k
1371
516k
  // We're doing a batch of updates; don't drop useful caches between them.
1372
516k
  Walker->setAutoResetWalker(false);
1373
516k
  OptimizeUses(this, Walker, AA, DT).optimizeUses();
1374
516k
  Walker->setAutoResetWalker(true);
1375
516k
  Walker->resetClobberWalker();
1376
516k
1377
516k
  // Mark the uses in unreachable blocks as live on entry, so that they go
1378
516k
  // somewhere.
1379
516k
  for (auto &BB : F)
1380
3.73M
    
if (3.73M
!Visited.count(&BB)3.73M
)
1381
5.11k
      markUnreachableAsLiveOnEntry(&BB);
1382
516k
}
1383
1384
897k
MemorySSAWalker *MemorySSA::getWalker() { return getWalkerImpl(); }
1385
1386
1.41M
MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() {
1387
1.41M
  if (Walker)
1388
897k
    return Walker.get();
1389
516k
1390
516k
  Walker = llvm::make_unique<CachingWalker>(this, AA, DT);
1391
516k
  return Walker.get();
1392
516k
}
1393
1394
// This is a helper function used by the creation routines. It places NewAccess
1395
// into the access and defs lists for a given basic block, at the given
1396
// insertion point.
1397
void MemorySSA::insertIntoListsForBlock(MemoryAccess *NewAccess,
1398
                                        const BasicBlock *BB,
1399
958k
                                        InsertionPlace Point) {
1400
958k
  auto *Accesses = getOrCreateAccessList(BB);
1401
958k
  if (
Point == Beginning958k
) {
1402
958k
    // If it's a phi node, it goes first, otherwise, it goes after any phi
1403
958k
    // nodes.
1404
958k
    if (
isa<MemoryPhi>(NewAccess)958k
) {
1405
958k
      Accesses->push_front(NewAccess);
1406
958k
      auto *Defs = getOrCreateDefsList(BB);
1407
958k
      Defs->push_front(*NewAccess);
1408
958k
    } else {
1409
10
      auto AI = find_if_not(
1410
3
          *Accesses, [](const MemoryAccess &MA) { return isa<MemoryPhi>(MA); });
1411
10
      Accesses->insert(AI, NewAccess);
1412
10
      if (
!isa<MemoryUse>(NewAccess)10
) {
1413
4
        auto *Defs = getOrCreateDefsList(BB);
1414
4
        auto DI = find_if_not(
1415
0
            *Defs, [](const MemoryAccess &MA) { return isa<MemoryPhi>(MA); });
1416
4
        Defs->insert(DI, *NewAccess);
1417
4
      }
1418
10
    }
1419
958k
  } else {
1420
53
    Accesses->push_back(NewAccess);
1421
53
    if (
!isa<MemoryUse>(NewAccess)53
) {
1422
12
      auto *Defs = getOrCreateDefsList(BB);
1423
12
      Defs->push_back(*NewAccess);
1424
12
    }
1425
53
  }
1426
958k
  BlockNumberingValid.erase(BB);
1427
958k
}
1428
1429
void MemorySSA::insertIntoListsBefore(MemoryAccess *What, const BasicBlock *BB,
1430
6
                                      AccessList::iterator InsertPt) {
1431
6
  auto *Accesses = getWritableBlockAccesses(BB);
1432
6
  bool WasEnd = InsertPt == Accesses->end();
1433
6
  Accesses->insert(AccessList::iterator(InsertPt), What);
1434
6
  if (
!isa<MemoryUse>(What)6
) {
1435
6
    auto *Defs = getOrCreateDefsList(BB);
1436
6
    // If we got asked to insert at the end, we have an easy job, just shove it
1437
6
    // at the end. If we got asked to insert before an existing def, we also get
1438
6
    // an terator. If we got asked to insert before a use, we have to hunt for
1439
6
    // the next def.
1440
6
    if (
WasEnd6
) {
1441
3
      Defs->push_back(*What);
1442
6
    } else 
if (3
isa<MemoryDef>(InsertPt)3
) {
1443
2
      Defs->insert(InsertPt->getDefsIterator(), *What);
1444
3
    } else {
1445
2
      while (
InsertPt != Accesses->end() && 2
!isa<MemoryDef>(InsertPt)1
)
1446
1
        ++InsertPt;
1447
1
      // Either we found a def, or we are inserting at the end
1448
1
      if (InsertPt == Accesses->end())
1449
1
        Defs->push_back(*What);
1450
1
      else
1451
0
        Defs->insert(InsertPt->getDefsIterator(), *What);
1452
3
    }
1453
6
  }
1454
6
  BlockNumberingValid.erase(BB);
1455
6
}
1456
1457
// Move What before Where in the IR.  The end result is taht What will belong to
1458
// the right lists and have the right Block set, but will not otherwise be
1459
// correct. It will not have the right defining access, and if it is a def,
1460
// things below it will not properly be updated.
1461
void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB,
1462
4
                       AccessList::iterator Where) {
1463
4
  // Keep it in the lookup tables, remove from the lists
1464
4
  removeFromLists(What, false);
1465
4
  What->setBlock(BB);
1466
4
  insertIntoListsBefore(What, BB, Where);
1467
4
}
1468
1469
void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB,
1470
52
                       InsertionPlace Point) {
1471
52
  removeFromLists(What, false);
1472
52
  What->setBlock(BB);
1473
52
  insertIntoListsForBlock(What, BB, Point);
1474
52
}
1475
1476
958k
MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) {
1477
958k
  assert(!getMemoryAccess(BB) && "MemoryPhi already exists for this BB");
1478
958k
  MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++);
1479
958k
  // Phi's always are placed at the front of the block.
1480
958k
  insertIntoListsForBlock(Phi, BB, Beginning);
1481
958k
  ValueToMemoryAccess[BB] = Phi;
1482
958k
  return Phi;
1483
958k
}
1484
1485
MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I,
1486
13
                                               MemoryAccess *Definition) {
1487
13
  assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI");
1488
13
  MemoryUseOrDef *NewAccess = createNewAccess(I);
1489
13
  assert(
1490
13
      NewAccess != nullptr &&
1491
13
      "Tried to create a memory access for a non-memory touching instruction");
1492
13
  NewAccess->setDefiningAccess(Definition);
1493
13
  return NewAccess;
1494
13
}
1495
1496
// Return true if the instruction has ordering constraints.
1497
// Note specifically that this only considers stores and loads
1498
// because others are still considered ModRef by getModRefInfo.
1499
15.1M
static inline bool isOrdered(const Instruction *I) {
1500
15.1M
  if (auto *
SI15.1M
= dyn_cast<StoreInst>(I)) {
1501
0
    if (!SI->isUnordered())
1502
0
      return true;
1503
15.1M
  } else 
if (auto *15.1M
LI15.1M
= dyn_cast<LoadInst>(I)) {
1504
2.20M
    if (!LI->isUnordered())
1505
6.70k
      return true;
1506
15.1M
  }
1507
15.1M
  return false;
1508
15.1M
}
1509
1510
/// \brief Helper function to create new memory accesses
1511
19.2M
MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) {
1512
19.2M
  // The assume intrinsic has a control dependency which we model by claiming
1513
19.2M
  // that it writes arbitrarily. Ignore that fake memory dependency here.
1514
19.2M
  // FIXME: Replace this special casing with a more accurate modelling of
1515
19.2M
  // assume's control dependency.
1516
19.2M
  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
1517
225k
    
if (225k
II->getIntrinsicID() == Intrinsic::assume225k
)
1518
73
      return nullptr;
1519
19.2M
1520
19.2M
  // Find out what affect this instruction has on memory.
1521
19.2M
  ModRefInfo ModRef = AA->getModRefInfo(I, None);
1522
19.2M
  // The isOrdered check is used to ensure that volatiles end up as defs
1523
19.2M
  // (atomics end up as ModRef right now anyway).  Until we separate the
1524
19.2M
  // ordering chain from the memory chain, this enables people to see at least
1525
19.2M
  // some relative ordering to volatiles.  Note that getClobberingMemoryAccess
1526
19.2M
  // will still give an answer that bypasses other volatile loads.  TODO:
1527
19.2M
  // Separate memory aliasing and ordering into two different chains so that we
1528
19.2M
  // can precisely represent both "what memory will this read/write/is clobbered
1529
19.2M
  // by" and "what instructions can I move this past".
1530
15.1M
  bool Def = bool(ModRef & MRI_Mod) || isOrdered(I);
1531
19.2M
  bool Use = bool(ModRef & MRI_Ref);
1532
19.2M
1533
19.2M
  // It's possible for an instruction to not modify memory at all. During
1534
19.2M
  // construction, we ignore them.
1535
19.2M
  if (
!Def && 19.2M
!Use15.1M
)
1536
12.8M
    return nullptr;
1537
6.37M
1538
19.2M
  assert((Def || Use) &&
1539
6.37M
         "Trying to create a memory access with a non-memory instruction");
1540
6.37M
1541
6.37M
  MemoryUseOrDef *MUD;
1542
6.37M
  if (Def)
1543
4.12M
    MUD = new MemoryDef(I->getContext(), nullptr, I, I->getParent(), NextID++);
1544
6.37M
  else
1545
2.25M
    MUD = new MemoryUse(I->getContext(), nullptr, I, I->getParent());
1546
19.2M
  ValueToMemoryAccess[I] = MUD;
1547
19.2M
  return MUD;
1548
19.2M
}
1549
1550
/// \brief Returns true if \p Replacer dominates \p Replacee .
1551
bool MemorySSA::dominatesUse(const MemoryAccess *Replacer,
1552
0
                             const MemoryAccess *Replacee) const {
1553
0
  if (isa<MemoryUseOrDef>(Replacee))
1554
0
    return DT->dominates(Replacer->getBlock(), Replacee->getBlock());
1555
0
  const auto *MP = cast<MemoryPhi>(Replacee);
1556
0
  // For a phi node, the use occurs in the predecessor block of the phi node.
1557
0
  // Since we may occur multiple times in the phi node, we have to check each
1558
0
  // operand to ensure Replacer dominates each operand where Replacee occurs.
1559
0
  for (const Use &Arg : MP->operands()) {
1560
0
    if (Arg.get() != Replacee &&
1561
0
        !DT->dominates(Replacer->getBlock(), MP->getIncomingBlock(Arg)))
1562
0
      return false;
1563
0
  }
1564
0
  return true;
1565
0
}
1566
1567
/// \brief Properly remove \p MA from all of MemorySSA's lookup tables.
1568
96.4k
void MemorySSA::removeFromLookups(MemoryAccess *MA) {
1569
96.4k
  assert(MA->use_empty() &&
1570
96.4k
         "Trying to remove memory access that still has uses");
1571
96.4k
  BlockNumbering.erase(MA);
1572
96.4k
  if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA))
1573
96.3k
    MUD->setDefiningAccess(nullptr);
1574
96.4k
  // Invalidate our walker's cache if necessary
1575
96.4k
  if (!isa<MemoryUse>(MA))
1576
12.2k
    Walker->invalidateInfo(MA);
1577
96.4k
  // The call below to erase will destroy MA, so we can't change the order we
1578
96.4k
  // are doing things here
1579
96.4k
  Value *MemoryInst;
1580
96.4k
  if (MemoryUseOrDef *
MUD96.4k
= dyn_cast<MemoryUseOrDef>(MA)) {
1581
96.3k
    MemoryInst = MUD->getMemoryInst();
1582
96.4k
  } else {
1583
67
    MemoryInst = MA->getBlock();
1584
67
  }
1585
96.4k
  auto VMA = ValueToMemoryAccess.find(MemoryInst);
1586
96.4k
  if (VMA->second == MA)
1587
96.4k
    ValueToMemoryAccess.erase(VMA);
1588
96.4k
}
1589
1590
/// \brief Properly remove \p MA from all of MemorySSA's lists.
1591
///
1592
/// Because of the way the intrusive list and use lists work, it is important to
1593
/// do removal in the right order.
1594
/// ShouldDelete defaults to true, and will cause the memory access to also be
1595
/// deleted, not just removed.
1596
96.4k
void MemorySSA::removeFromLists(MemoryAccess *MA, bool ShouldDelete) {
1597
96.4k
  // The access list owns the reference, so we erase it from the non-owning list
1598
96.4k
  // first.
1599
96.4k
  if (
!isa<MemoryUse>(MA)96.4k
) {
1600
12.2k
    auto DefsIt = PerBlockDefs.find(MA->getBlock());
1601
12.2k
    std::unique_ptr<DefsList> &Defs = DefsIt->second;
1602
12.2k
    Defs->remove(*MA);
1603
12.2k
    if (Defs->empty())
1604
159
      PerBlockDefs.erase(DefsIt);
1605
12.2k
  }
1606
96.4k
1607
96.4k
  // The erase call here will delete it. If we don't want it deleted, we call
1608
96.4k
  // remove instead.
1609
96.4k
  auto AccessIt = PerBlockAccesses.find(MA->getBlock());
1610
96.4k
  std::unique_ptr<AccessList> &Accesses = AccessIt->second;
1611
96.4k
  if (ShouldDelete)
1612
96.4k
    Accesses->erase(MA);
1613
96.4k
  else
1614
56
    Accesses->remove(MA);
1615
96.4k
1616
96.4k
  if (Accesses->empty())
1617
8.96k
    PerBlockAccesses.erase(AccessIt);
1618
96.4k
}
1619
1620
82
void MemorySSA::print(raw_ostream &OS) const {
1621
82
  MemorySSAAnnotatedWriter Writer(this);
1622
82
  F.print(OS, &Writer);
1623
82
}
1624
1625
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1626
LLVM_DUMP_METHOD void MemorySSA::dump() const { print(dbgs()); }
1627
#endif
1628
1629
93
void MemorySSA::verifyMemorySSA() const {
1630
93
  verifyDefUses(F);
1631
93
  verifyDomination(F);
1632
93
  verifyOrdering(F);
1633
93
  Walker->verify(this);
1634
93
}
1635
1636
/// \brief Verify that the order and existence of MemoryAccesses matches the
1637
/// order and existence of memory affecting instructions.
1638
93
void MemorySSA::verifyOrdering(Function &F) const {
1639
93
  // Walk all the blocks, comparing what the lookups think and what the access
1640
93
  // lists think, as well as the order in the blocks vs the order in the access
1641
93
  // lists.
1642
93
  SmallVector<MemoryAccess *, 32> ActualAccesses;
1643
93
  SmallVector<MemoryAccess *, 32> ActualDefs;
1644
284
  for (BasicBlock &B : F) {
1645
284
    const AccessList *AL = getBlockAccesses(&B);
1646
284
    const auto *DL = getBlockDefs(&B);
1647
284
    MemoryAccess *Phi = getMemoryAccess(&B);
1648
284
    if (
Phi284
) {
1649
81
      ActualAccesses.push_back(Phi);
1650
81
      ActualDefs.push_back(Phi);
1651
81
    }
1652
284
1653
802
    for (Instruction &I : B) {
1654
802
      MemoryAccess *MA = getMemoryAccess(&I);
1655
802
      assert((!MA || (AL && (isa<MemoryUse>(MA) || DL))) &&
1656
802
             "We have memory affecting instructions "
1657
802
             "in this block but they are not in the "
1658
802
             "access list or defs list");
1659
802
      if (
MA802
) {
1660
402
        ActualAccesses.push_back(MA);
1661
402
        if (isa<MemoryDef>(MA))
1662
234
          ActualDefs.push_back(MA);
1663
402
      }
1664
802
    }
1665
284
    // Either we hit the assert, really have no accesses, or we have both
1666
284
    // accesses and an access list.
1667
284
    // Same with defs.
1668
284
    if (
!AL && 284
!DL72
)
1669
72
      continue;
1670
284
    assert(AL->size() == ActualAccesses.size() &&
1671
212
           "We don't have the same number of accesses in the block as on the "
1672
212
           "access list");
1673
212
    assert((DL || ActualDefs.size() == 0) &&
1674
212
           "Either we should have a defs list, or we should have no defs");
1675
212
    assert((!DL || DL->size() == ActualDefs.size()) &&
1676
212
           "We don't have the same number of defs in the block as on the "
1677
212
           "def list");
1678
212
    auto ALI = AL->begin();
1679
212
    auto AAI = ActualAccesses.begin();
1680
695
    while (
ALI != AL->end() && 695
AAI != ActualAccesses.end()483
) {
1681
483
      assert(&*ALI == *AAI && "Not the same accesses in the same order");
1682
483
      ++ALI;
1683
483
      ++AAI;
1684
483
    }
1685
212
    ActualAccesses.clear();
1686
212
    if (
DL212
) {
1687
205
      auto DLI = DL->begin();
1688
205
      auto ADI = ActualDefs.begin();
1689
520
      while (
DLI != DL->end() && 520
ADI != ActualDefs.end()315
) {
1690
315
        assert(&*DLI == *ADI && "Not the same defs in the same order");
1691
315
        ++DLI;
1692
315
        ++ADI;
1693
315
      }
1694
205
    }
1695
284
    ActualDefs.clear();
1696
284
  }
1697
93
}
1698
1699
/// \brief Verify the domination properties of MemorySSA by checking that each
1700
/// definition dominates all of its uses.
1701
93
void MemorySSA::verifyDomination(Function &F) const {
1702
#ifndef NDEBUG
1703
  for (BasicBlock &B : F) {
1704
    // Phi nodes are attached to basic blocks
1705
    if (MemoryPhi *MP = getMemoryAccess(&B))
1706
      for (const Use &U : MP->uses())
1707
        assert(dominates(MP, U) && "Memory PHI does not dominate it's uses");
1708
1709
    for (Instruction &I : B) {
1710
      MemoryAccess *MD = dyn_cast_or_null<MemoryDef>(getMemoryAccess(&I));
1711
      if (!MD)
1712
        continue;
1713
1714
      for (const Use &U : MD->uses())
1715
        assert(dominates(MD, U) && "Memory Def does not dominate it's uses");
1716
    }
1717
  }
1718
#endif
1719
}
1720
1721
/// \brief Verify the def-use lists in MemorySSA, by verifying that \p Use
1722
/// appears in the use list of \p Def.
1723
585
void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const {
1724
#ifndef NDEBUG
1725
  // The live on entry use may cause us to get a NULL def here
1726
  if (!Def)
1727
    assert(isLiveOnEntryDef(Use) &&
1728
           "Null def but use not point to live on entry def");
1729
  else
1730
    assert(is_contained(Def->users(), Use) &&
1731
           "Did not find use in def's use list");
1732
#endif
1733
}
1734
1735
/// \brief Verify the immediate use information, by walking all the memory
1736
/// accesses and verifying that, for each use, it appears in the
1737
/// appropriate def's use list
1738
93
void MemorySSA::verifyDefUses(Function &F) const {
1739
284
  for (BasicBlock &B : F) {
1740
284
    // Phi nodes are attached to basic blocks
1741
284
    if (MemoryPhi *
Phi284
= getMemoryAccess(&B)) {
1742
81
      assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance(
1743
81
                                          pred_begin(&B), pred_end(&B))) &&
1744
81
             "Incomplete MemoryPhi Node");
1745
264
      for (unsigned I = 0, E = Phi->getNumIncomingValues(); 
I != E264
;
++I183
)
1746
183
        verifyUseInDefs(Phi->getIncomingValue(I), Phi);
1747
81
    }
1748
284
1749
802
    for (Instruction &I : B) {
1750
802
      if (MemoryUseOrDef *
MA802
= getMemoryAccess(&I)) {
1751
402
        verifyUseInDefs(MA->getDefiningAccess(), MA);
1752
402
      }
1753
802
    }
1754
284
  }
1755
93
}
1756
1757
2.30M
MemoryUseOrDef *MemorySSA::getMemoryAccess(const Instruction *I) const {
1758
2.30M
  return cast_or_null<MemoryUseOrDef>(ValueToMemoryAccess.lookup(I));
1759
2.30M
}
1760
1761
2.32k
MemoryPhi *MemorySSA::getMemoryAccess(const BasicBlock *BB) const {
1762
2.32k
  return cast_or_null<MemoryPhi>(ValueToMemoryAccess.lookup(cast<Value>(BB)));
1763
2.32k
}
1764
1765
/// Perform a local numbering on blocks so that instruction ordering can be
1766
/// determined in constant time.
1767
/// TODO: We currently just number in order.  If we numbered by N, we could
1768
/// allow at least N-1 sequences of insertBefore or insertAfter (and at least
1769
/// log2(N) sequences of mixed before and after) without needing to invalidate
1770
/// the numbering.
1771
62.3k
void MemorySSA::renumberBlock(const BasicBlock *B) const {
1772
62.3k
  // The pre-increment ensures the numbers really start at 1.
1773
62.3k
  unsigned long CurrentNumber = 0;
1774
62.3k
  const AccessList *AL = getBlockAccesses(B);
1775
62.3k
  assert(AL != nullptr && "Asking to renumber an empty block");
1776
62.3k
  for (const auto &I : *AL)
1777
538k
    BlockNumbering[&I] = ++CurrentNumber;
1778
62.3k
  BlockNumberingValid.insert(B);
1779
62.3k
}
1780
1781
/// \brief Determine, for two memory accesses in the same block,
1782
/// whether \p Dominator dominates \p Dominatee.
1783
/// \returns True if \p Dominator dominates \p Dominatee.
1784
bool MemorySSA::locallyDominates(const MemoryAccess *Dominator,
1785
96.9k
                                 const MemoryAccess *Dominatee) const {
1786
96.9k
  const BasicBlock *DominatorBlock = Dominator->getBlock();
1787
96.9k
1788
96.9k
  assert((DominatorBlock == Dominatee->getBlock()) &&
1789
96.9k
         "Asking for local domination when accesses are in different blocks!");
1790
96.9k
  // A node dominates itself.
1791
96.9k
  if (Dominatee == Dominator)
1792
0
    return true;
1793
96.9k
1794
96.9k
  // When Dominatee is defined on function entry, it is not dominated by another
1795
96.9k
  // memory access.
1796
96.9k
  
if (96.9k
isLiveOnEntryDef(Dominatee)96.9k
)
1797
0
    return false;
1798
96.9k
1799
96.9k
  // When Dominator is defined on function entry, it dominates the other memory
1800
96.9k
  // access.
1801
96.9k
  
if (96.9k
isLiveOnEntryDef(Dominator)96.9k
)
1802
6.75k
    return true;
1803
90.2k
1804
90.2k
  
if (90.2k
!BlockNumberingValid.count(DominatorBlock)90.2k
)
1805
62.3k
    renumberBlock(DominatorBlock);
1806
96.9k
1807
96.9k
  unsigned long DominatorNum = BlockNumbering.lookup(Dominator);
1808
96.9k
  // All numbers start with 1
1809
96.9k
  assert(DominatorNum != 0 && "Block was not numbered properly");
1810
96.9k
  unsigned long DominateeNum = BlockNumbering.lookup(Dominatee);
1811
96.9k
  assert(DominateeNum != 0 && "Block was not numbered properly");
1812
96.9k
  return DominatorNum < DominateeNum;
1813
96.9k
}
1814
1815
bool MemorySSA::dominates(const MemoryAccess *Dominator,
1816
1.24M
                          const MemoryAccess *Dominatee) const {
1817
1.24M
  if (Dominator == Dominatee)
1818
80.9k
    return true;
1819
1.16M
1820
1.16M
  
if (1.16M
isLiveOnEntryDef(Dominatee)1.16M
)
1821
125k
    return false;
1822
1.04M
1823
1.04M
  
if (1.04M
Dominator->getBlock() != Dominatee->getBlock()1.04M
)
1824
945k
    return DT->dominates(Dominator->getBlock(), Dominatee->getBlock());
1825
96.9k
  return locallyDominates(Dominator, Dominatee);
1826
96.9k
}
1827
1828
bool MemorySSA::dominates(const MemoryAccess *Dominator,
1829
0
                          const Use &Dominatee) const {
1830
0
  if (MemoryPhi *
MP0
= dyn_cast<MemoryPhi>(Dominatee.getUser())) {
1831
0
    BasicBlock *UseBB = MP->getIncomingBlock(Dominatee);
1832
0
    // The def must dominate the incoming block of the phi.
1833
0
    if (UseBB != Dominator->getBlock())
1834
0
      return DT->dominates(Dominator->getBlock(), UseBB);
1835
0
    // If the UseBB and the DefBB are the same, compare locally.
1836
0
    return locallyDominates(Dominator, cast<MemoryAccess>(Dominatee));
1837
0
  }
1838
0
  // If it's not a PHI node use, the normal dominates can already handle it.
1839
0
  return dominates(Dominator, cast<MemoryAccess>(Dominatee.getUser()));
1840
0
}
1841
1842
const static char LiveOnEntryStr[] = "liveOnEntry";
1843
1844
439
void MemoryAccess::print(raw_ostream &OS) const {
1845
439
  switch (getValueID()) {
1846
69
  case MemoryPhiVal: return static_cast<const MemoryPhi *>(this)->print(OS);
1847
216
  case MemoryDefVal: return static_cast<const MemoryDef *>(this)->print(OS);
1848
154
  case MemoryUseVal: return static_cast<const MemoryUse *>(this)->print(OS);
1849
0
  }
1850
0
  
llvm_unreachable0
("invalid value id");
1851
0
}
1852
1853
216
void MemoryDef::print(raw_ostream &OS) const {
1854
216
  MemoryAccess *UO = getDefiningAccess();
1855
216
1856
216
  OS << getID() << " = MemoryDef(";
1857
216
  if (
UO && 216
UO->getID()216
)
1858
144
    OS << UO->getID();
1859
216
  else
1860
72
    OS << LiveOnEntryStr;
1861
216
  OS << ')';
1862
216
}
1863
1864
69
void MemoryPhi::print(raw_ostream &OS) const {
1865
69
  bool First = true;
1866
69
  OS << getID() << " = MemoryPhi(";
1867
159
  for (const auto &Op : operands()) {
1868
159
    BasicBlock *BB = getIncomingBlock(Op);
1869
159
    MemoryAccess *MA = cast<MemoryAccess>(Op);
1870
159
    if (!First)
1871
90
      OS << ',';
1872
159
    else
1873
69
      First = false;
1874
159
1875
159
    OS << '{';
1876
159
    if (BB->hasName())
1877
145
      OS << BB->getName();
1878
159
    else
1879
14
      BB->printAsOperand(OS, false);
1880
159
    OS << ',';
1881
159
    if (unsigned ID = MA->getID())
1882
143
      OS << ID;
1883
159
    else
1884
16
      OS << LiveOnEntryStr;
1885
159
    OS << '}';
1886
159
  }
1887
69
  OS << ')';
1888
69
}
1889
1890
154
void MemoryUse::print(raw_ostream &OS) const {
1891
154
  MemoryAccess *UO = getDefiningAccess();
1892
154
  OS << "MemoryUse(";
1893
154
  if (
UO && 154
UO->getID()154
)
1894
128
    OS << UO->getID();
1895
154
  else
1896
26
    OS << LiveOnEntryStr;
1897
154
  OS << ')';
1898
154
}
1899
1900
0
void MemoryAccess::dump() const {
1901
0
// Cannot completely remove virtual function even in release mode.
1902
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1903
  print(dbgs());
1904
  dbgs() << "\n";
1905
#endif
1906
}
1907
1908
char MemorySSAPrinterLegacyPass::ID = 0;
1909
1910
20
MemorySSAPrinterLegacyPass::MemorySSAPrinterLegacyPass() : FunctionPass(ID) {
1911
20
  initializeMemorySSAPrinterLegacyPassPass(*PassRegistry::getPassRegistry());
1912
20
}
1913
1914
20
void MemorySSAPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
1915
20
  AU.setPreservesAll();
1916
20
  AU.addRequired<MemorySSAWrapperPass>();
1917
20
}
1918
1919
45
bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) {
1920
45
  auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
1921
45
  MSSA.print(dbgs());
1922
45
  if (VerifyMemorySSA)
1923
44
    MSSA.verifyMemorySSA();
1924
45
  return false;
1925
45
}
1926
1927
AnalysisKey MemorySSAAnalysis::Key;
1928
1929
MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F,
1930
158
                                                 FunctionAnalysisManager &AM) {
1931
158
  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1932
158
  auto &AA = AM.getResult<AAManager>(F);
1933
158
  return MemorySSAAnalysis::Result(llvm::make_unique<MemorySSA>(F, &AA, &DT));
1934
158
}
1935
1936
PreservedAnalyses MemorySSAPrinterPass::run(Function &F,
1937
36
                                            FunctionAnalysisManager &AM) {
1938
36
  OS << "MemorySSA for function: " << F.getName() << "\n";
1939
36
  AM.getResult<MemorySSAAnalysis>(F).getMSSA().print(OS);
1940
36
1941
36
  return PreservedAnalyses::all();
1942
36
}
1943
1944
PreservedAnalyses MemorySSAVerifierPass::run(Function &F,
1945
33
                                             FunctionAnalysisManager &AM) {
1946
33
  AM.getResult<MemorySSAAnalysis>(F).getMSSA().verifyMemorySSA();
1947
33
1948
33
  return PreservedAnalyses::all();
1949
33
}
1950
1951
char MemorySSAWrapperPass::ID = 0;
1952
1953
17.4k
MemorySSAWrapperPass::MemorySSAWrapperPass() : FunctionPass(ID) {
1954
17.4k
  initializeMemorySSAWrapperPassPass(*PassRegistry::getPassRegistry());
1955
17.4k
}
1956
1957
516k
void MemorySSAWrapperPass::releaseMemory() { MSSA.reset(); }
1958
1959
17.4k
void MemorySSAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
1960
17.4k
  AU.setPreservesAll();
1961
17.4k
  AU.addRequiredTransitive<DominatorTreeWrapperPass>();
1962
17.4k
  AU.addRequiredTransitive<AAResultsWrapperPass>();
1963
17.4k
}
1964
1965
516k
bool MemorySSAWrapperPass::runOnFunction(Function &F) {
1966
516k
  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1967
516k
  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1968
516k
  MSSA.reset(new MemorySSA(F, &AA, &DT));
1969
516k
  return false;
1970
516k
}
1971
1972
0
void MemorySSAWrapperPass::verifyAnalysis() const { MSSA->verifyMemorySSA(); }
1973
1974
1
void MemorySSAWrapperPass::print(raw_ostream &OS, const Module *M) const {
1975
1
  MSSA->print(OS);
1976
1
}
1977
1978
516k
MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {}
1979
1980
MemorySSA::CachingWalker::CachingWalker(MemorySSA *M, AliasAnalysis *A,
1981
                                        DominatorTree *D)
1982
516k
    : MemorySSAWalker(M), Walker(*M, *A, *D) {}
1983
1984
12.2k
void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) {
1985
12.2k
  if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
1986
12.1k
    MUD->resetOptimized();
1987
12.2k
}
1988
1989
/// \brief Walk the use-def chains starting at \p MA and find
1990
/// the MemoryAccess that actually clobbers Loc.
1991
///
1992
/// \returns our clobbering memory access
1993
MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
1994
910k
    MemoryAccess *StartingAccess, UpwardsMemoryQuery &Q) {
1995
910k
  MemoryAccess *New = Walker.findClobber(StartingAccess, Q);
1996
#ifdef EXPENSIVE_CHECKS
1997
  MemoryAccess *NewNoCache = Walker.findClobber(StartingAccess, Q);
1998
  assert(NewNoCache == New && "Cache made us hand back a different result?");
1999
  (void)NewNoCache;
2000
#endif
2001
910k
  if (AutoResetWalker)
2002
15.8k
    resetClobberWalker();
2003
910k
  return New;
2004
910k
}
2005
2006
MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
2007
2
    MemoryAccess *StartingAccess, const MemoryLocation &Loc) {
2008
2
  if (isa<MemoryPhi>(StartingAccess))
2009
0
    return StartingAccess;
2010
2
2011
2
  auto *StartingUseOrDef = cast<MemoryUseOrDef>(StartingAccess);
2012
2
  if (MSSA->isLiveOnEntryDef(StartingUseOrDef))
2013
0
    return StartingUseOrDef;
2014
2
2015
2
  Instruction *I = StartingUseOrDef->getMemoryInst();
2016
2
2017
2
  // Conservatively, fences are always clobbers, so don't perform the walk if we
2018
2
  // hit a fence.
2019
2
  if (
!ImmutableCallSite(I) && 2
I->isFenceLike()2
)
2020
0
    return StartingUseOrDef;
2021
2
2022
2
  UpwardsMemoryQuery Q;
2023
2
  Q.OriginalAccess = StartingUseOrDef;
2024
2
  Q.StartingLoc = Loc;
2025
2
  Q.Inst = I;
2026
2
  Q.IsCall = false;
2027
2
2028
2
  // Unlike the other function, do not walk to the def of a def, because we are
2029
2
  // handed something we already believe is the clobbering access.
2030
2
  MemoryAccess *DefiningAccess = isa<MemoryUse>(StartingUseOrDef)
2031
0
                                     ? StartingUseOrDef->getDefiningAccess()
2032
2
                                     : StartingUseOrDef;
2033
2
2034
2
  MemoryAccess *Clobber = getClobberingMemoryAccess(DefiningAccess, Q);
2035
2
  DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
2036
2
  DEBUG(dbgs() << *StartingUseOrDef << "\n");
2037
2
  DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
2038
2
  DEBUG(dbgs() << *Clobber << "\n");
2039
2
  return Clobber;
2040
2
}
2041
2042
MemoryAccess *
2043
1.27M
MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
2044
1.27M
  auto *StartingAccess = dyn_cast<MemoryUseOrDef>(MA);
2045
1.27M
  // If this is a MemoryPhi, we can't do anything.
2046
1.27M
  if (!StartingAccess)
2047
0
    return MA;
2048
1.27M
2049
1.27M
  // If this is an already optimized use or def, return the optimized result.
2050
1.27M
  // Note: Currently, we do not store the optimized def result because we'd need
2051
1.27M
  // a separate field, since we can't use it as the defining access.
2052
1.27M
  
if (auto *1.27M
MUD1.27M
= dyn_cast<MemoryUseOrDef>(StartingAccess))
2053
1.27M
    
if (1.27M
MUD->isOptimized()1.27M
)
2054
365k
      return MUD->getOptimized();
2055
910k
2056
910k
  const Instruction *I = StartingAccess->getMemoryInst();
2057
910k
  UpwardsMemoryQuery Q(I, StartingAccess);
2058
910k
  // We can't sanely do anything with a fences, they conservatively
2059
910k
  // clobber all memory, and have no locations to get pointers from to
2060
910k
  // try to disambiguate.
2061
910k
  if (
!Q.IsCall && 910k
I->isFenceLike()907k
)
2062
0
    return StartingAccess;
2063
910k
2064
910k
  
if (910k
isUseTriviallyOptimizableToLiveOnEntry(*MSSA->AA, I)910k
) {
2065
1
    MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef();
2066
1
    if (auto *MUD = dyn_cast<MemoryUseOrDef>(StartingAccess))
2067
1
      MUD->setOptimized(LiveOnEntry);
2068
1
    return LiveOnEntry;
2069
1
  }
2070
910k
2071
910k
  // Start with the thing we already think clobbers this location
2072
910k
  MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess();
2073
910k
2074
910k
  // At this point, DefiningAccess may be the live on entry def.
2075
910k
  // If it is, we will not get a better result.
2076
910k
  if (MSSA->isLiveOnEntryDef(DefiningAccess))
2077
8
    return DefiningAccess;
2078
910k
2079
910k
  MemoryAccess *Result = getClobberingMemoryAccess(DefiningAccess, Q);
2080
910k
  DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
2081
910k
  DEBUG(dbgs() << *DefiningAccess << "\n");
2082
910k
  DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
2083
910k
  DEBUG(dbgs() << *Result << "\n");
2084
910k
  if (auto *MUD = dyn_cast<MemoryUseOrDef>(StartingAccess))
2085
910k
    MUD->setOptimized(Result);
2086
1.27M
2087
1.27M
  return Result;
2088
1.27M
}
2089
2090
MemoryAccess *
2091
0
DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
2092
0
  if (auto *Use = dyn_cast<MemoryUseOrDef>(MA))
2093
0
    return Use->getDefiningAccess();
2094
0
  return MA;
2095
0
}
2096
2097
MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess(
2098
0
    MemoryAccess *StartingAccess, const MemoryLocation &) {
2099
0
  if (auto *Use = dyn_cast<MemoryUseOrDef>(StartingAccess))
2100
0
    return Use->getDefiningAccess();
2101
0
  return StartingAccess;
2102
0
}
2103
2104
0
void MemoryPhi::deleteMe(DerivedUser *Self) {
2105
0
  delete static_cast<MemoryPhi *>(Self);
2106
0
}
2107
2108
0
void MemoryDef::deleteMe(DerivedUser *Self) {
2109
0
  delete static_cast<MemoryDef *>(Self);
2110
0
}
2111
2112
0
void MemoryUse::deleteMe(DerivedUser *Self) {
2113
0
  delete static_cast<MemoryUse *>(Self);
2114
0
}