Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This pass performs various transformations related to eliminating memcpy
10
// calls, or transforming sets of stores into memset's.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
15
#include "llvm/ADT/DenseSet.h"
16
#include "llvm/ADT/None.h"
17
#include "llvm/ADT/STLExtras.h"
18
#include "llvm/ADT/SmallVector.h"
19
#include "llvm/ADT/Statistic.h"
20
#include "llvm/ADT/iterator_range.h"
21
#include "llvm/Analysis/AliasAnalysis.h"
22
#include "llvm/Analysis/AssumptionCache.h"
23
#include "llvm/Analysis/GlobalsModRef.h"
24
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
25
#include "llvm/Analysis/MemoryLocation.h"
26
#include "llvm/Analysis/TargetLibraryInfo.h"
27
#include "llvm/Transforms/Utils/Local.h"
28
#include "llvm/Analysis/ValueTracking.h"
29
#include "llvm/IR/Argument.h"
30
#include "llvm/IR/BasicBlock.h"
31
#include "llvm/IR/CallSite.h"
32
#include "llvm/IR/Constants.h"
33
#include "llvm/IR/DataLayout.h"
34
#include "llvm/IR/DerivedTypes.h"
35
#include "llvm/IR/Dominators.h"
36
#include "llvm/IR/Function.h"
37
#include "llvm/IR/GetElementPtrTypeIterator.h"
38
#include "llvm/IR/GlobalVariable.h"
39
#include "llvm/IR/IRBuilder.h"
40
#include "llvm/IR/InstrTypes.h"
41
#include "llvm/IR/Instruction.h"
42
#include "llvm/IR/Instructions.h"
43
#include "llvm/IR/IntrinsicInst.h"
44
#include "llvm/IR/Intrinsics.h"
45
#include "llvm/IR/LLVMContext.h"
46
#include "llvm/IR/Module.h"
47
#include "llvm/IR/Operator.h"
48
#include "llvm/IR/PassManager.h"
49
#include "llvm/IR/Type.h"
50
#include "llvm/IR/User.h"
51
#include "llvm/IR/Value.h"
52
#include "llvm/Pass.h"
53
#include "llvm/Support/Casting.h"
54
#include "llvm/Support/Debug.h"
55
#include "llvm/Support/MathExtras.h"
56
#include "llvm/Support/raw_ostream.h"
57
#include "llvm/Transforms/Scalar.h"
58
#include <algorithm>
59
#include <cassert>
60
#include <cstdint>
61
#include <utility>
62
63
using namespace llvm;
64
65
#define DEBUG_TYPE "memcpyopt"
66
67
STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
68
STATISTIC(NumMemSetInfer, "Number of memsets inferred");
69
STATISTIC(NumMoveToCpy,   "Number of memmoves converted to memcpy");
70
STATISTIC(NumCpyToSet,    "Number of memcpys converted to memset");
71
72
static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
73
                                  bool &VariableIdxFound,
74
404k
                                  const DataLayout &DL) {
75
404k
  // Skip over the first indices.
76
404k
  gep_type_iterator GTI = gep_type_begin(GEP);
77
845k
  for (unsigned i = 1; i != Idx; 
++i, ++GTI440k
)
78
440k
    /*skip along*/;
79
404k
80
404k
  // Compute the offset implied by the rest of the indices.
81
404k
  int64_t Offset = 0;
82
1.06M
  for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; 
++i, ++GTI659k
) {
83
659k
    ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i));
84
659k
    if (!OpC)
85
409
      return VariableIdxFound = true;
86
659k
    if (OpC->isZero()) 
continue45.2k
; // No offset.
87
613k
88
613k
    // Handle struct indices, which add their field offset to the pointer.
89
613k
    if (StructType *STy = GTI.getStructTypeOrNull()) {
90
277k
      Offset += DL.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
91
277k
      continue;
92
277k
    }
93
336k
94
336k
    // Otherwise, we have a sequential type like an array or vector.  Multiply
95
336k
    // the index by the ElementSize.
96
336k
    uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
97
336k
    Offset += Size*OpC->getSExtValue();
98
336k
  }
99
404k
100
404k
  
return Offset404k
;
101
404k
}
102
103
/// Return true if Ptr1 is provably equal to Ptr2 plus a constant offset, and
104
/// return that constant offset. For example, Ptr1 might be &A[42], and Ptr2
105
/// might be &A[40]. In this case offset would be -8.
106
static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
107
216k
                            const DataLayout &DL) {
108
216k
  Ptr1 = Ptr1->stripPointerCasts();
109
216k
  Ptr2 = Ptr2->stripPointerCasts();
110
216k
111
216k
  // Handle the trivial case first.
112
216k
  if (Ptr1 == Ptr2) {
113
236
    Offset = 0;
114
236
    return true;
115
236
  }
116
216k
117
216k
  GEPOperator *GEP1 = dyn_cast<GEPOperator>(Ptr1);
118
216k
  GEPOperator *GEP2 = dyn_cast<GEPOperator>(Ptr2);
119
216k
120
216k
  bool VariableIdxFound = false;
121
216k
122
216k
  // If one pointer is a GEP and the other isn't, then see if the GEP is a
123
216k
  // constant offset from the base, as in "P" and "gep P, 1".
124
216k
  if (GEP1 && 
!GEP2203k
&&
GEP1->getOperand(0)->stripPointerCasts() == Ptr26.77k
) {
125
6.30k
    Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, DL);
126
6.30k
    return !VariableIdxFound;
127
6.30k
  }
128
210k
129
210k
  if (GEP2 && 
!GEP1205k
&&
GEP2->getOperand(0)->stripPointerCasts() == Ptr19.03k
) {
130
8.35k
    Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, DL);
131
8.35k
    return !VariableIdxFound;
132
8.35k
  }
133
201k
134
201k
  // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
135
201k
  // base.  After that base, they may have some number of common (and
136
201k
  // potentially variable) indices.  After that they handle some constant
137
201k
  // offset, which determines their offset from each other.  At this point, we
138
201k
  // handle no other case.
139
201k
  if (!GEP1 || 
!GEP2197k
||
GEP1->getOperand(0) != GEP2->getOperand(0)196k
)
140
7.05k
    return false;
141
194k
142
194k
  // Skip any common indices and track the GEP types.
143
194k
  unsigned Idx = 1;
144
415k
  for (; Idx != GEP1->getNumOperands() && 
Idx != GEP2->getNumOperands()415k
;
++Idx220k
)
145
413k
    if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
146
193k
      break;
147
194k
148
194k
  int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, DL);
149
194k
  int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, DL);
150
194k
  if (VariableIdxFound) 
return false206
;
151
194k
152
194k
  Offset = Offset2-Offset1;
153
194k
  return true;
154
194k
}
155
156
namespace {
157
158
/// Represents a range of memset'd bytes with the ByteVal value.
159
/// This allows us to analyze stores like:
160
///   store 0 -> P+1
161
///   store 0 -> P+0
162
///   store 0 -> P+3
163
///   store 0 -> P+2
164
/// which sometimes happens with stores to arrays of structs etc.  When we see
165
/// the first store, we make a range [1, 2).  The second store extends the range
166
/// to [0, 2).  The third makes a new range [2, 3).  The fourth store joins the
167
/// two ranges into [0, 3) which is memset'able.
168
struct MemsetRange {
169
  // Start/End - A semi range that describes the span that this range covers.
170
  // The range is closed at the start and open at the end: [Start, End).
171
  int64_t Start, End;
172
173
  /// StartPtr - The getelementptr instruction that points to the start of the
174
  /// range.
175
  Value *StartPtr;
176
177
  /// Alignment - The known alignment of the first store.
178
  unsigned Alignment;
179
180
  /// TheStores - The actual stores that make up this range.
181
  SmallVector<Instruction*, 16> TheStores;
182
183
  bool isProfitableToUseMemset(const DataLayout &DL) const;
184
};
185
186
} // end anonymous namespace
187
188
80.6k
bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
189
80.6k
  // If we found more than 4 stores to merge or 16 bytes, use memset.
190
80.6k
  if (TheStores.size() >= 4 || 
End-Start >= 1677.0k
)
return true8.75k
;
191
71.9k
192
71.9k
  // If there is nothing to merge, don't do anything.
193
71.9k
  if (TheStores.size() < 2) 
return false0
;
194
71.9k
195
71.9k
  // If any of the stores are a memset, then it is always good to extend the
196
71.9k
  // memset.
197
71.9k
  for (Instruction *SI : TheStores)
198
162k
    if (!isa<StoreInst>(SI))
199
5
      return true;
200
71.9k
201
71.9k
  // Assume that the code generator is capable of merging pairs of stores
202
71.9k
  // together if it wants to.
203
71.9k
  
if (71.9k
TheStores.size() == 271.9k
)
return false52.9k
;
204
18.9k
205
18.9k
  // If we have fewer than 8 stores, it can still be worthwhile to do this.
206
18.9k
  // For example, merging 4 i8 stores into an i32 store is useful almost always.
207
18.9k
  // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
208
18.9k
  // memset will be split into 2 32-bit stores anyway) and doing so can
209
18.9k
  // pessimize the llvm optimizer.
210
18.9k
  //
211
18.9k
  // Since we don't have perfect knowledge here, make some assumptions: assume
212
18.9k
  // the maximum GPR width is the same size as the largest legal integer
213
18.9k
  // size. If so, check to see whether we will end up actually reducing the
214
18.9k
  // number of stores used.
215
18.9k
  unsigned Bytes = unsigned(End-Start);
216
18.9k
  unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
217
18.9k
  if (MaxIntSize == 0)
218
2
    MaxIntSize = 1;
219
18.9k
  unsigned NumPointerStores = Bytes / MaxIntSize;
220
18.9k
221
18.9k
  // Assume the remaining bytes if any are done a byte at a time.
222
18.9k
  unsigned NumByteStores = Bytes % MaxIntSize;
223
18.9k
224
18.9k
  // If we will reduce the # stores (according to this heuristic), do the
225
18.9k
  // transformation.  This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
226
18.9k
  // etc.
227
18.9k
  return TheStores.size() > NumPointerStores+NumByteStores;
228
18.9k
}
229
230
namespace {
231
232
class MemsetRanges {
233
  using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
234
235
  /// A sorted list of the memset ranges.
236
  SmallVector<MemsetRange, 8> Ranges;
237
238
  const DataLayout &DL;
239
240
public:
241
387k
  MemsetRanges(const DataLayout &DL) : DL(DL) {}
242
243
  using const_iterator = SmallVectorImpl<MemsetRange>::const_iterator;
244
245
65.4k
  const_iterator begin() const { return Ranges.begin(); }
246
65.4k
  const_iterator end() const { return Ranges.end(); }
247
387k
  bool empty() const { return Ranges.empty(); }
248
249
65.4k
  void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
250
65.4k
    if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
251
63.8k
      addStore(OffsetFromFirst, SI);
252
1.68k
    else
253
1.68k
      addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
254
65.4k
  }
255
256
258k
  void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
257
258k
    int64_t StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
258
258k
259
258k
    addRange(OffsetFromFirst, StoreSize,
260
258k
             SI->getPointerOperand(), SI->getAlignment(), SI);
261
258k
  }
262
263
16.4k
  void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
264
16.4k
    int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
265
16.4k
    addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlignment(), MSI);
266
16.4k
  }
267
268
  void addRange(int64_t Start, int64_t Size, Value *Ptr,
269
                unsigned Alignment, Instruction *Inst);
270
};
271
272
} // end anonymous namespace
273
274
/// Add a new store to the MemsetRanges data structure.  This adds a
275
/// new range for the specified store at the specified offset, merging into
276
/// existing ranges as appropriate.
277
void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
278
275k
                            unsigned Alignment, Instruction *Inst) {
279
275k
  int64_t End = Start+Size;
280
275k
281
275k
  range_iterator I = partition_point(
282
343k
      Ranges, [=](const MemsetRange &O) { return O.End < Start; });
283
275k
284
275k
  // We now know that I == E, in which case we didn't find anything to merge
285
275k
  // with, or that Start <= I->End.  If End < I->Start or I == E, then we need
286
275k
  // to insert a new range.  Handle this now.
287
275k
  if (I == Ranges.end() || 
End < I->Start155k
) {
288
158k
    MemsetRange &R = *Ranges.insert(I, MemsetRange());
289
158k
    R.Start        = Start;
290
158k
    R.End          = End;
291
158k
    R.StartPtr     = Ptr;
292
158k
    R.Alignment    = Alignment;
293
158k
    R.TheStores.push_back(Inst);
294
158k
    return;
295
158k
  }
296
116k
297
116k
  // This store overlaps with I, add it.
298
116k
  I->TheStores.push_back(Inst);
299
116k
300
116k
  // At this point, we may have an interval that completely contains our store.
301
116k
  // If so, just add it to the interval and return.
302
116k
  if (I->Start <= Start && 
I->End >= End75.9k
)
303
140
    return;
304
116k
305
116k
  // Now we know that Start <= I->End and End >= I->Start so the range overlaps
306
116k
  // but is not entirely contained within the range.
307
116k
308
116k
  // See if the range extends the start of the range.  In this case, it couldn't
309
116k
  // possibly cause it to join the prior range, because otherwise we would have
310
116k
  // stopped on *it*.
311
116k
  if (Start < I->Start) {
312
40.8k
    I->Start = Start;
313
40.8k
    I->StartPtr = Ptr;
314
40.8k
    I->Alignment = Alignment;
315
40.8k
  }
316
116k
317
116k
  // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
318
116k
  // is in or right at the end of I), and that End >= I->Start.  Extend I out to
319
116k
  // End.
320
116k
  if (End > I->End) {
321
75.8k
    I->End = End;
322
75.8k
    range_iterator NextI = I;
323
76.2k
    while (++NextI != Ranges.end() && 
End >= NextI->Start50.3k
) {
324
444
      // Merge the range in.
325
444
      I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
326
444
      if (NextI->End > I->End)
327
342
        I->End = NextI->End;
328
444
      Ranges.erase(NextI);
329
444
      NextI = I;
330
444
    }
331
75.8k
  }
332
116k
}
333
334
//===----------------------------------------------------------------------===//
335
//                         MemCpyOptLegacyPass Pass
336
//===----------------------------------------------------------------------===//
337
338
namespace {
339
340
class MemCpyOptLegacyPass : public FunctionPass {
341
  MemCpyOptPass Impl;
342
343
public:
344
  static char ID; // Pass identification, replacement for typeid
345
346
13.4k
  MemCpyOptLegacyPass() : FunctionPass(ID) {
347
13.4k
    initializeMemCpyOptLegacyPassPass(*PassRegistry::getPassRegistry());
348
13.4k
  }
349
350
  bool runOnFunction(Function &F) override;
351
352
private:
353
  // This transformation requires dominator postdominator info
354
13.4k
  void getAnalysisUsage(AnalysisUsage &AU) const override {
355
13.4k
    AU.setPreservesCFG();
356
13.4k
    AU.addRequired<AssumptionCacheTracker>();
357
13.4k
    AU.addRequired<DominatorTreeWrapperPass>();
358
13.4k
    AU.addRequired<MemoryDependenceWrapperPass>();
359
13.4k
    AU.addRequired<AAResultsWrapperPass>();
360
13.4k
    AU.addRequired<TargetLibraryInfoWrapperPass>();
361
13.4k
    AU.addPreserved<GlobalsAAWrapperPass>();
362
13.4k
    AU.addPreserved<MemoryDependenceWrapperPass>();
363
13.4k
  }
364
};
365
366
} // end anonymous namespace
367
368
char MemCpyOptLegacyPass::ID = 0;
369
370
/// The public interface to this file...
371
13.4k
FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOptLegacyPass(); }
372
373
48.9k
INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
374
48.9k
                      false, false)
375
48.9k
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
376
48.9k
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
377
48.9k
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
378
48.9k
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
379
48.9k
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
380
48.9k
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
381
48.9k
INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
382
                    false, false)
383
384
/// When scanning forward over instructions, we look for some other patterns to
385
/// fold away. In particular, this looks for stores to neighboring locations of
386
/// memory. If it sees enough consecutive ones, it attempts to merge them
387
/// together into a memcpy/memset.
388
Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
389
                                                 Value *StartPtr,
390
387k
                                                 Value *ByteVal) {
391
387k
  const DataLayout &DL = StartInst->getModule()->getDataLayout();
392
387k
393
387k
  // Okay, so we now have a single store that can be splatable.  Scan to find
394
387k
  // all subsequent stores of the same value to offset from the same pointer.
395
387k
  // Join these together into ranges, so we can decide whether contiguous blocks
396
387k
  // are stored.
397
387k
  MemsetRanges Ranges(DL);
398
387k
399
387k
  BasicBlock::iterator BI(StartInst);
400
960k
  for (++BI; !BI->isTerminator(); 
++BI573k
) {
401
767k
    if (!isa<StoreInst>(BI) && 
!isa<MemSetInst>(BI)502k
) {
402
486k
      // If the instruction is readnone, ignore it, otherwise bail out.  We
403
486k
      // don't even allow readonly here because we don't want something like:
404
486k
      // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
405
486k
      if (BI->mayWriteToMemory() || 
BI->mayReadFromMemory()419k
)
406
123k
        break;
407
363k
      continue;
408
363k
    }
409
280k
410
280k
    if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
411
264k
      // If this is a store, see if we can merge it in.
412
264k
      if (!NextStore->isSimple()) 
break17
;
413
264k
414
264k
      // Check to see if this stored value is of the same byte-splattable value.
415
264k
      Value *StoredByte = isBytewiseValue(NextStore->getOperand(0), DL);
416
264k
      if (isa<UndefValue>(ByteVal) && 
StoredByte8
)
417
3
        ByteVal = StoredByte;
418
264k
      if (ByteVal != StoredByte)
419
63.6k
        break;
420
201k
421
201k
      // Check to see if this store is to a constant offset from the start ptr.
422
201k
      int64_t Offset;
423
201k
      if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset,
424
201k
                           DL))
425
6.41k
        break;
426
194k
427
194k
      Ranges.addStore(Offset, NextStore);
428
194k
    } else {
429
16.0k
      MemSetInst *MSI = cast<MemSetInst>(BI);
430
16.0k
431
16.0k
      if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
432
16.0k
          
!isa<ConstantInt>(MSI->getLength())15.6k
)
433
411
        break;
434
15.6k
435
15.6k
      // Check to see if this store is to a constant offset from the start ptr.
436
15.6k
      int64_t Offset;
437
15.6k
      if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, DL))
438
847
        break;
439
14.7k
440
14.7k
      Ranges.addMemSet(Offset, MSI);
441
14.7k
    }
442
280k
  }
443
387k
444
387k
  // If we have no ranges, then we just had a single store with nothing that
445
387k
  // could be merged in.  This is a very common case of course.
446
387k
  if (Ranges.empty())
447
321k
    return nullptr;
448
65.4k
449
65.4k
  // If we had at least one store that could be merged in, add the starting
450
65.4k
  // store as well.  We try to avoid this unless there is at least something
451
65.4k
  // interesting as a small compile-time optimization.
452
65.4k
  Ranges.addInst(0, StartInst);
453
65.4k
454
65.4k
  // If we create any memsets, we put it right before the first instruction that
455
65.4k
  // isn't part of the memset block.  This ensure that the memset is dominated
456
65.4k
  // by any addressing instruction needed by the start of the block.
457
65.4k
  IRBuilder<> Builder(&*BI);
458
65.4k
459
65.4k
  // Now that we have full information about ranges, loop over the ranges and
460
65.4k
  // emit memset's for anything big enough to be worthwhile.
461
65.4k
  Instruction *AMemSet = nullptr;
462
157k
  for (const MemsetRange &Range : Ranges) {
463
157k
    if (Range.TheStores.size() == 1) 
continue77.1k
;
464
80.6k
465
80.6k
    // If it is profitable to lower this range to memset, do so now.
466
80.6k
    if (!Range.isProfitableToUseMemset(DL))
467
71.8k
      continue;
468
8.77k
469
8.77k
    // Otherwise, we do want to transform this!  Create a new memset.
470
8.77k
    // Get the starting pointer of the block.
471
8.77k
    StartPtr = Range.StartPtr;
472
8.77k
473
8.77k
    // Determine alignment
474
8.77k
    unsigned Alignment = Range.Alignment;
475
8.77k
    if (Alignment == 0) {
476
2
      Type *EltType =
477
2
        cast<PointerType>(StartPtr->getType())->getElementType();
478
2
      Alignment = DL.getABITypeAlignment(EltType);
479
2
    }
480
8.77k
481
8.77k
    AMemSet =
482
8.77k
      Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
483
8.77k
484
8.77k
    LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
485
8.77k
                                                   : Range.TheStores) dbgs()
486
8.77k
                                              << *SI << '\n';
487
8.77k
               dbgs() << "With: " << *AMemSet << '\n');
488
8.77k
489
8.77k
    if (!Range.TheStores.empty())
490
8.77k
      AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
491
8.77k
492
8.77k
    // Zap all the stores.
493
35.1k
    for (Instruction *SI : Range.TheStores) {
494
35.1k
      MD->removeInstruction(SI);
495
35.1k
      SI->eraseFromParent();
496
35.1k
    }
497
8.77k
    ++NumMemSetInfer;
498
8.77k
  }
499
65.4k
500
65.4k
  return AMemSet;
501
65.4k
}
502
503
5.38k
static unsigned findStoreAlignment(const DataLayout &DL, const StoreInst *SI) {
504
5.38k
  unsigned StoreAlign = SI->getAlignment();
505
5.38k
  if (!StoreAlign)
506
9
    StoreAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType());
507
5.38k
  return StoreAlign;
508
5.38k
}
509
510
5.38k
static unsigned findLoadAlignment(const DataLayout &DL, const LoadInst *LI) {
511
5.38k
  unsigned LoadAlign = LI->getAlignment();
512
5.38k
  if (!LoadAlign)
513
9
    LoadAlign = DL.getABITypeAlignment(LI->getType());
514
5.38k
  return LoadAlign;
515
5.38k
}
516
517
static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI,
518
5.37k
                                     const LoadInst *LI) {
519
5.37k
  unsigned StoreAlign = findStoreAlignment(DL, SI);
520
5.37k
  unsigned LoadAlign = findLoadAlignment(DL, LI);
521
5.37k
  return MinAlign(StoreAlign, LoadAlign);
522
5.37k
}
523
524
// This method try to lift a store instruction before position P.
525
// It will lift the store and its argument + that anything that
526
// may alias with these.
527
// The method returns true if it was successful.
528
static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
529
10
                   const LoadInst *LI) {
530
10
  // If the store alias this position, early bail out.
531
10
  MemoryLocation StoreLoc = MemoryLocation::get(SI);
532
10
  if (isModOrRefSet(AA.getModRefInfo(P, StoreLoc)))
533
6
    return false;
534
4
535
4
  // Keep track of the arguments of all instruction we plan to lift
536
4
  // so we can make sure to lift them as well if appropriate.
537
4
  DenseSet<Instruction*> Args;
538
4
  if (auto *Ptr = dyn_cast<Instruction>(SI->getPointerOperand()))
539
2
    if (Ptr->getParent() == SI->getParent())
540
2
      Args.insert(Ptr);
541
4
542
4
  // Instruction to lift before P.
543
4
  SmallVector<Instruction*, 8> ToLift;
544
4
545
4
  // Memory locations of lifted instructions.
546
4
  SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
547
4
548
4
  // Lifted calls.
549
4
  SmallVector<const CallBase *, 8> Calls;
550
4
551
4
  const MemoryLocation LoadLoc = MemoryLocation::get(LI);
552
4
553
8
  for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; 
--I4
) {
554
5
    auto *C = &*I;
555
5
556
5
    bool MayAlias = isModOrRefSet(AA.getModRefInfo(C, None));
557
5
558
5
    bool NeedLift = false;
559
5
    if (Args.erase(C))
560
4
      NeedLift = true;
561
1
    else if (MayAlias) {
562
1
      NeedLift = llvm::any_of(MemLocs, [C, &AA](const MemoryLocation &ML) {
563
1
        return isModOrRefSet(AA.getModRefInfo(C, ML));
564
1
      });
565
1
566
1
      if (!NeedLift)
567
0
        NeedLift = llvm::any_of(Calls, [C, &AA](const CallBase *Call) {
568
0
          return isModOrRefSet(AA.getModRefInfo(C, Call));
569
0
        });
570
1
    }
571
5
572
5
    if (!NeedLift)
573
0
      continue;
574
5
575
5
    if (MayAlias) {
576
2
      // Since LI is implicitly moved downwards past the lifted instructions,
577
2
      // none of them may modify its source.
578
2
      if (isModSet(AA.getModRefInfo(C, LoadLoc)))
579
1
        return false;
580
1
      else if (const auto *Call = dyn_cast<CallBase>(C)) {
581
0
        // If we can't lift this before P, it's game over.
582
0
        if (isModOrRefSet(AA.getModRefInfo(P, Call)))
583
0
          return false;
584
0
585
0
        Calls.push_back(Call);
586
1
      } else if (isa<LoadInst>(C) || 
isa<StoreInst>(C)0
||
isa<VAArgInst>(C)0
) {
587
1
        // If we can't lift this before P, it's game over.
588
1
        auto ML = MemoryLocation::get(C);
589
1
        if (isModOrRefSet(AA.getModRefInfo(P, ML)))
590
0
          return false;
591
1
592
1
        MemLocs.push_back(ML);
593
1
      } else
594
0
        // We don't know how to lift this instruction.
595
0
        return false;
596
4
    }
597
4
598
4
    ToLift.push_back(C);
599
11
    for (unsigned k = 0, e = C->getNumOperands(); k != e; 
++k7
)
600
7
      if (auto *A = dyn_cast<Instruction>(C->getOperand(k)))
601
2
        if (A->getParent() == SI->getParent())
602
2
          Args.insert(A);
603
4
  }
604
4
605
4
  // We made it, we need to lift
606
4
  
for (auto *I : llvm::reverse(ToLift))3
{
607
4
    LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
608
4
    I->moveBefore(P);
609
4
  }
610
3
611
3
  return true;
612
4
}
613
614
1.05M
bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
615
1.05M
  if (!SI->isSimple()) 
return false9.75k
;
616
1.04M
617
1.04M
  // Avoid merging nontemporal stores since the resulting
618
1.04M
  // memcpy/memset would not be able to preserve the nontemporal hint.
619
1.04M
  // In theory we could teach how to propagate the !nontemporal metadata to
620
1.04M
  // memset calls. However, that change would force the backend to
621
1.04M
  // conservatively expand !nontemporal memset calls back to sequences of
622
1.04M
  // store instructions (effectively undoing the merging).
623
1.04M
  if (SI->getMetadata(LLVMContext::MD_nontemporal))
624
10
    return false;
625
1.04M
626
1.04M
  const DataLayout &DL = SI->getModule()->getDataLayout();
627
1.04M
628
1.04M
  // Load to store forwarding can be interpreted as memcpy.
629
1.04M
  if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
630
193k
    if (LI->isSimple() && 
LI->hasOneUse()193k
&&
631
193k
        
LI->getParent() == SI->getParent()107k
) {
632
106k
633
106k
      auto *T = LI->getType();
634
106k
      if (T->isAggregateType()) {
635
16
        AliasAnalysis &AA = LookupAliasAnalysis();
636
16
        MemoryLocation LoadLoc = MemoryLocation::get(LI);
637
16
638
16
        // We use alias analysis to check if an instruction may store to
639
16
        // the memory we load from in between the load and the store. If
640
16
        // such an instruction is found, we try to promote there instead
641
16
        // of at the store position.
642
16
        Instruction *P = SI;
643
22
        for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
644
22
          if (isModSet(AA.getModRefInfo(&I, LoadLoc))) {
645
10
            P = &I;
646
10
            break;
647
10
          }
648
22
        }
649
16
650
16
        // We found an instruction that may write to the loaded memory.
651
16
        // We can try to promote at this position instead of the store
652
16
        // position if nothing alias the store memory after this and the store
653
16
        // destination is not in the range.
654
16
        if (P && P != SI) {
655
10
          if (!moveUp(AA, SI, P, LI))
656
7
            P = nullptr;
657
10
        }
658
16
659
16
        // If a valid insertion position is found, then we can promote
660
16
        // the load/store pair to a memcpy.
661
16
        if (P) {
662
9
          // If we load from memory that may alias the memory we store to,
663
9
          // memmove must be used to preserve semantic. If not, memcpy can
664
9
          // be used.
665
9
          bool UseMemMove = false;
666
9
          if (!AA.isNoAlias(MemoryLocation::get(SI), LoadLoc))
667
4
            UseMemMove = true;
668
9
669
9
          uint64_t Size = DL.getTypeStoreSize(T);
670
9
671
9
          IRBuilder<> Builder(P);
672
9
          Instruction *M;
673
9
          if (UseMemMove)
674
4
            M = Builder.CreateMemMove(
675
4
                SI->getPointerOperand(), findStoreAlignment(DL, SI),
676
4
                LI->getPointerOperand(), findLoadAlignment(DL, LI), Size);
677
5
          else
678
5
            M = Builder.CreateMemCpy(
679
5
                SI->getPointerOperand(), findStoreAlignment(DL, SI),
680
5
                LI->getPointerOperand(), findLoadAlignment(DL, LI), Size);
681
9
682
9
          LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
683
9
                            << *M << "\n");
684
9
685
9
          MD->removeInstruction(SI);
686
9
          SI->eraseFromParent();
687
9
          MD->removeInstruction(LI);
688
9
          LI->eraseFromParent();
689
9
          ++NumMemCpyInstr;
690
9
691
9
          // Make sure we do not invalidate the iterator.
692
9
          BBI = M->getIterator();
693
9
          return true;
694
9
        }
695
106k
      }
696
106k
697
106k
      // Detect cases where we're performing call slot forwarding, but
698
106k
      // happen to be using a load-store pair to implement it, rather than
699
106k
      // a memcpy.
700
106k
      MemDepResult ldep = MD->getDependency(LI);
701
106k
      CallInst *C = nullptr;
702
106k
      if (ldep.isClobber() && 
!isa<MemCpyInst>(ldep.getInst())37.0k
)
703
36.4k
        C = dyn_cast<CallInst>(ldep.getInst());
704
106k
705
106k
      if (C) {
706
9.71k
        // Check that nothing touches the dest of the "copy" between
707
9.71k
        // the call and the store.
708
9.71k
        Value *CpyDest = SI->getPointerOperand()->stripPointerCasts();
709
9.71k
        bool CpyDestIsLocal = isa<AllocaInst>(CpyDest);
710
9.71k
        AliasAnalysis &AA = LookupAliasAnalysis();
711
9.71k
        MemoryLocation StoreLoc = MemoryLocation::get(SI);
712
9.71k
        for (BasicBlock::iterator I = --SI->getIterator(), E = C->getIterator();
713
84.6k
             I != E; 
--I74.9k
) {
714
79.2k
          if (isModOrRefSet(AA.getModRefInfo(&*I, StoreLoc))) {
715
4.34k
            C = nullptr;
716
4.34k
            break;
717
4.34k
          }
718
74.9k
          // The store to dest may never happen if an exception can be thrown
719
74.9k
          // between the load and the store.
720
74.9k
          if (I->mayThrow() && 
!CpyDestIsLocal3
) {
721
3
            C = nullptr;
722
3
            break;
723
3
          }
724
74.9k
        }
725
9.71k
      }
726
106k
727
106k
      if (C) {
728
5.37k
        bool changed = performCallSlotOptzn(
729
5.37k
            LI, SI->getPointerOperand()->stripPointerCasts(),
730
5.37k
            LI->getPointerOperand()->stripPointerCasts(),
731
5.37k
            DL.getTypeStoreSize(SI->getOperand(0)->getType()),
732
5.37k
            findCommonAlignment(DL, SI, LI), C);
733
5.37k
        if (changed) {
734
2
          MD->removeInstruction(SI);
735
2
          SI->eraseFromParent();
736
2
          MD->removeInstruction(LI);
737
2
          LI->eraseFromParent();
738
2
          ++NumMemCpyInstr;
739
2
          return true;
740
2
        }
741
1.04M
      }
742
106k
    }
743
193k
  }
744
1.04M
745
1.04M
  // There are two cases that are interesting for this code to handle: memcpy
746
1.04M
  // and memset.  Right now we only handle memset.
747
1.04M
748
1.04M
  // Ensure that the value being stored is something that can be memset'able a
749
1.04M
  // byte at a time like "0" or "-1" or any width, as well as things like
750
1.04M
  // 0xA0A0A0A0 and 0.0.
751
1.04M
  auto *V = SI->getOperand(0);
752
1.04M
  if (Value *ByteVal = isBytewiseValue(V, DL)) {
753
347k
    if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
754
8.18k
                                              ByteVal)) {
755
8.18k
      BBI = I->getIterator(); // Don't invalidate iterator.
756
8.18k
      return true;
757
8.18k
    }
758
338k
759
338k
    // If we have an aggregate, we try to promote it to memset regardless
760
338k
    // of opportunity for merging as it can expose optimization opportunities
761
338k
    // in subsequent passes.
762
338k
    auto *T = V->getType();
763
338k
    if (T->isAggregateType()) {
764
5
      uint64_t Size = DL.getTypeStoreSize(T);
765
5
      unsigned Align = SI->getAlignment();
766
5
      if (!Align)
767
5
        Align = DL.getABITypeAlignment(T);
768
5
      IRBuilder<> Builder(SI);
769
5
      auto *M =
770
5
          Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size, Align);
771
5
772
5
      LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
773
5
774
5
      MD->removeInstruction(SI);
775
5
      SI->eraseFromParent();
776
5
      NumMemSetInfer++;
777
5
778
5
      // Make sure we do not invalidate the iterator.
779
5
      BBI = M->getIterator();
780
5
      return true;
781
5
    }
782
1.03M
  }
783
1.03M
784
1.03M
  return false;
785
1.03M
}
786
787
44.9k
bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
788
44.9k
  // See if there is another memset or store neighboring this memset which
789
44.9k
  // allows us to widen out the memset to do a single larger store.
790
44.9k
  if (isa<ConstantInt>(MSI->getLength()) && 
!MSI->isVolatile()40.1k
)
791
40.1k
    if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
792
455
                                              MSI->getValue())) {
793
455
      BBI = I->getIterator(); // Don't invalidate iterator.
794
455
      return true;
795
455
    }
796
44.5k
  return false;
797
44.5k
}
798
799
/// Takes a memcpy and a call that it depends on,
800
/// and checks for the possibility of a call slot optimization by having
801
/// the call write its result directly into the destination of the memcpy.
802
bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
803
                                         Value *cpySrc, uint64_t cpyLen,
804
13.3k
                                         unsigned cpyAlign, CallInst *C) {
805
13.3k
  // The general transformation to keep in mind is
806
13.3k
  //
807
13.3k
  //   call @func(..., src, ...)
808
13.3k
  //   memcpy(dest, src, ...)
809
13.3k
  //
810
13.3k
  // ->
811
13.3k
  //
812
13.3k
  //   memcpy(dest, src, ...)
813
13.3k
  //   call @func(..., dest, ...)
814
13.3k
  //
815
13.3k
  // Since moving the memcpy is technically awkward, we additionally check that
816
13.3k
  // src only holds uninitialized values at the moment of the call, meaning that
817
13.3k
  // the memcpy can be discarded rather than moved.
818
13.3k
819
13.3k
  // Lifetime marks shouldn't be operated on.
820
13.3k
  if (Function *F = C->getCalledFunction())
821
13.3k
    if (F->isIntrinsic() && 
F->getIntrinsicID() == Intrinsic::lifetime_start4.35k
)
822
979
      return false;
823
12.4k
824
12.4k
  // Deliberately get the source and destination with bitcasts stripped away,
825
12.4k
  // because we'll need to do type comparisons based on the underlying type.
826
12.4k
  CallSite CS(C);
827
12.4k
828
12.4k
  // Require that src be an alloca.  This simplifies the reasoning considerably.
829
12.4k
  AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
830
12.4k
  if (!srcAlloca)
831
10.3k
    return false;
832
2.04k
833
2.04k
  ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
834
2.04k
  if (!srcArraySize)
835
2
    return false;
836
2.03k
837
2.03k
  const DataLayout &DL = cpy->getModule()->getDataLayout();
838
2.03k
  uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
839
2.03k
                     srcArraySize->getZExtValue();
840
2.03k
841
2.03k
  if (cpyLen < srcSize)
842
151
    return false;
843
1.88k
844
1.88k
  // Check that accessing the first srcSize bytes of dest will not cause a
845
1.88k
  // trap.  Otherwise the transform is invalid since it might cause a trap
846
1.88k
  // to occur earlier than it otherwise would.
847
1.88k
  if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) {
848
197
    // The destination is an alloca.  Check it is larger than srcSize.
849
197
    ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
850
197
    if (!destArraySize)
851
0
      return false;
852
197
853
197
    uint64_t destSize = DL.getTypeAllocSize(A->getAllocatedType()) *
854
197
                        destArraySize->getZExtValue();
855
197
856
197
    if (destSize < srcSize)
857
0
      return false;
858
1.69k
  } else if (Argument *A = dyn_cast<Argument>(cpyDest)) {
859
93
    // The store to dest may never happen if the call can throw.
860
93
    if (C->mayThrow())
861
3
      return false;
862
90
863
90
    if (A->getDereferenceableBytes() < srcSize) {
864
81
      // If the destination is an sret parameter then only accesses that are
865
81
      // outside of the returned struct type can trap.
866
81
      if (!A->hasStructRetAttr())
867
41
        return false;
868
40
869
40
      Type *StructTy = cast<PointerType>(A->getType())->getElementType();
870
40
      if (!StructTy->isSized()) {
871
0
        // The call may never return and hence the copy-instruction may never
872
0
        // be executed, and therefore it's not safe to say "the destination
873
0
        // has at least <cpyLen> bytes, as implied by the copy-instruction",
874
0
        return false;
875
0
      }
876
40
877
40
      uint64_t destSize = DL.getTypeAllocSize(StructTy);
878
40
      if (destSize < srcSize)
879
0
        return false;
880
1.59k
    }
881
1.59k
  } else {
882
1.59k
    return false;
883
1.59k
  }
884
246
885
246
  // Check that dest points to memory that is at least as aligned as src.
886
246
  unsigned srcAlign = srcAlloca->getAlignment();
887
246
  if (!srcAlign)
888
10
    srcAlign = DL.getABITypeAlignment(srcAlloca->getAllocatedType());
889
246
  bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
890
246
  // If dest is not aligned enough and we can't increase its alignment then
891
246
  // bail out.
892
246
  if (!isDestSufficientlyAligned && 
!isa<AllocaInst>(cpyDest)4
)
893
1
    return false;
894
245
895
245
  // Check that src is not accessed except via the call and the memcpy.  This
896
245
  // guarantees that it holds only undefined values when passed in (so the final
897
245
  // memcpy can be dropped), that it is not read or written between the call and
898
245
  // the memcpy, and that writing beyond the end of it is undefined.
899
245
  SmallVector<User*, 8> srcUseList(srcAlloca->user_begin(),
900
245
                                   srcAlloca->user_end());
901
1.35k
  while (!srcUseList.empty()) {
902
1.20k
    User *U = srcUseList.pop_back_val();
903
1.20k
904
1.20k
    if (isa<BitCastInst>(U) || 
isa<AddrSpaceCastInst>(U)897
) {
905
310
      for (User *UU : U->users())
906
1.34k
        srcUseList.push_back(UU);
907
310
      continue;
908
310
    }
909
896
    if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) {
910
42
      if (!G->hasAllZeroIndices())
911
29
        return false;
912
13
913
13
      for (User *UU : U->users())
914
49
        srcUseList.push_back(UU);
915
13
      continue;
916
13
    }
917
854
    if (const IntrinsicInst *IT = dyn_cast<IntrinsicInst>(U))
918
814
      if (IT->isLifetimeStartOrEnd())
919
424
        continue;
920
430
921
430
    if (U != C && 
U != cpy256
)
922
68
      return false;
923
430
  }
924
245
925
245
  // Check that src isn't captured by the called function since the
926
245
  // transformation can cause aliasing issues in that case.
927
666
  
for (unsigned i = 0, e = CS.arg_size(); 148
i != e;
++i518
)
928
532
    if (CS.getArgument(i) == cpySrc && 
!CS.doesNotCapture(i)31
)
929
14
      return false;
930
148
931
148
  // Since we're changing the parameter to the callsite, we need to make sure
932
148
  // that what would be the new parameter dominates the callsite.
933
148
  DominatorTree &DT = LookupDomTree();
934
134
  if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
935
123
    if (!DT.dominates(cpyDestInst, C))
936
0
      return false;
937
134
938
134
  // In addition to knowing that the call does not access src in some
939
134
  // unexpected manner, for example via a global, which we deduce from
940
134
  // the use analysis, we also need to know that it does not sneakily
941
134
  // access dest.  We rely on AA to figure this out for us.
942
134
  AliasAnalysis &AA = LookupAliasAnalysis();
943
134
  ModRefInfo MR = AA.getModRefInfo(C, cpyDest, LocationSize::precise(srcSize));
944
134
  // If necessary, perform additional analysis.
945
134
  if (isModOrRefSet(MR))
946
4
    MR = AA.callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), &DT);
947
134
  if (isModOrRefSet(MR))
948
2
    return false;
949
132
950
132
  // We can't create address space casts here because we don't know if they're
951
132
  // safe for the target.
952
132
  if (cpySrc->getType()->getPointerAddressSpace() !=
953
132
      cpyDest->getType()->getPointerAddressSpace())
954
1
    return false;
955
637
  
for (unsigned i = 0; 131
i < CS.arg_size();
++i506
)
956
506
    if (CS.getArgument(i)->stripPointerCasts() == cpySrc &&
957
506
        cpySrc->getType()->getPointerAddressSpace() !=
958
131
        CS.getArgument(i)->getType()->getPointerAddressSpace())
959
0
      return false;
960
131
961
131
  // All the checks have passed, so do the transformation.
962
131
  bool changedArgument = false;
963
637
  for (unsigned i = 0; i < CS.arg_size(); 
++i506
)
964
506
    if (CS.getArgument(i)->stripPointerCasts() == cpySrc) {
965
131
      Value *Dest = cpySrc->getType() == cpyDest->getType() ?  
cpyDest129
966
131
        : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
967
2
                                      cpyDest->getName(), C);
968
131
      changedArgument = true;
969
131
      if (CS.getArgument(i)->getType() == Dest->getType())
970
15
        CS.setArgument(i, Dest);
971
116
      else
972
116
        CS.setArgument(i, CastInst::CreatePointerCast(Dest,
973
116
                          CS.getArgument(i)->getType(), Dest->getName(), C));
974
131
    }
975
131
976
131
  if (!changedArgument)
977
0
    return false;
978
131
979
131
  // If the destination wasn't sufficiently aligned then increase its alignment.
980
131
  if (!isDestSufficientlyAligned) {
981
2
    assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
982
2
    cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
983
2
  }
984
131
985
131
  // Drop any cached information about the call, because we may have changed
986
131
  // its dependence information by changing its parameter.
987
131
  MD->removeInstruction(C);
988
131
989
131
  // Update AA metadata
990
131
  // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
991
131
  // handled here, but combineMetadata doesn't support them yet
992
131
  unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
993
131
                         LLVMContext::MD_noalias,
994
131
                         LLVMContext::MD_invariant_group,
995
131
                         LLVMContext::MD_access_group};
996
131
  combineMetadata(C, cpy, KnownIDs, true);
997
131
998
131
  // Remove the memcpy.
999
131
  MD->removeInstruction(cpy);
1000
131
  ++NumMemCpyInstr;
1001
131
1002
131
  return true;
1003
131
}
1004
1005
/// We've found that the (upward scanning) memory dependence of memcpy 'M' is
1006
/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
1007
bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1008
1.93k
                                                  MemCpyInst *MDep) {
1009
1.93k
  // We can only transforms memcpy's where the dest of one is the source of the
1010
1.93k
  // other.
1011
1.93k
  if (M->getSource() != MDep->getDest() || 
MDep->isVolatile()185
)
1012
1.74k
    return false;
1013
185
1014
185
  // If dep instruction is reading from our current input, then it is a noop
1015
185
  // transfer and substituting the input won't change this instruction.  Just
1016
185
  // ignore the input and let someone else zap MDep.  This handles cases like:
1017
185
  //    memcpy(a <- a)
1018
185
  //    memcpy(b <- a)
1019
185
  if (M->getSource() == MDep->getSource())
1020
0
    return false;
1021
185
1022
185
  // Second, the length of the memcpy's must be the same, or the preceding one
1023
185
  // must be larger than the following one.
1024
185
  ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1025
185
  ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
1026
185
  if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
1027
2
    return false;
1028
183
1029
183
  AliasAnalysis &AA = LookupAliasAnalysis();
1030
183
1031
183
  // Verify that the copied-from memory doesn't change in between the two
1032
183
  // transfers.  For example, in:
1033
183
  //    memcpy(a <- b)
1034
183
  //    *b = 42;
1035
183
  //    memcpy(c <- a)
1036
183
  // It would be invalid to transform the second memcpy into memcpy(c <- b).
1037
183
  //
1038
183
  // TODO: If the code between M and MDep is transparent to the destination "c",
1039
183
  // then we could still perform the xform by moving M up to the first memcpy.
1040
183
  //
1041
183
  // NOTE: This is conservative, it will stop on any read from the source loc,
1042
183
  // not just the defining memcpy.
1043
183
  MemDepResult SourceDep =
1044
183
      MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
1045
183
                                   M->getIterator(), M->getParent());
1046
183
  if (!SourceDep.isClobber() || 
SourceDep.getInst() != MDep170
)
1047
111
    return false;
1048
72
1049
72
  // If the dest of the second might alias the source of the first, then the
1050
72
  // source and dest might overlap.  We still want to eliminate the intermediate
1051
72
  // value, but we have to generate a memmove instead of memcpy.
1052
72
  bool UseMemMove = false;
1053
72
  if (!AA.isNoAlias(MemoryLocation::getForDest(M),
1054
72
                    MemoryLocation::getForSource(MDep)))
1055
30
    UseMemMove = true;
1056
72
1057
72
  // If all checks passed, then we can transform M.
1058
72
  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1059
72
                    << *MDep << '\n' << *M << '\n');
1060
72
1061
72
  // TODO: Is this worth it if we're creating a less aligned memcpy? For
1062
72
  // example we could be moving from movaps -> movq on x86.
1063
72
  IRBuilder<> Builder(M);
1064
72
  if (UseMemMove)
1065
30
    Builder.CreateMemMove(M->getRawDest(), M->getDestAlignment(),
1066
30
                          MDep->getRawSource(), MDep->getSourceAlignment(),
1067
30
                          M->getLength(), M->isVolatile());
1068
42
  else
1069
42
    Builder.CreateMemCpy(M->getRawDest(), M->getDestAlignment(),
1070
42
                         MDep->getRawSource(), MDep->getSourceAlignment(),
1071
42
                         M->getLength(), M->isVolatile());
1072
72
1073
72
  // Remove the instruction we're replacing.
1074
72
  MD->removeInstruction(M);
1075
72
  M->eraseFromParent();
1076
72
  ++NumMemCpyInstr;
1077
72
  return true;
1078
72
}
1079
1080
/// We've found that the (upward scanning) memory dependence of \p MemCpy is
1081
/// \p MemSet.  Try to simplify \p MemSet to only set the trailing bytes that
1082
/// weren't copied over by \p MemCpy.
1083
///
1084
/// In other words, transform:
1085
/// \code
1086
///   memset(dst, c, dst_size);
1087
///   memcpy(dst, src, src_size);
1088
/// \endcode
1089
/// into:
1090
/// \code
1091
///   memcpy(dst, src, src_size);
1092
///   memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
1093
/// \endcode
1094
bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1095
1.41k
                                                  MemSetInst *MemSet) {
1096
1.41k
  // We can only transform memset/memcpy with the same destination.
1097
1.41k
  if (MemSet->getDest() != MemCpy->getDest())
1098
316
    return false;
1099
1.09k
1100
1.09k
  // Check that there are no other dependencies on the memset destination.
1101
1.09k
  MemDepResult DstDepInfo =
1102
1.09k
      MD->getPointerDependencyFrom(MemoryLocation::getForDest(MemSet), false,
1103
1.09k
                                   MemCpy->getIterator(), MemCpy->getParent());
1104
1.09k
  if (DstDepInfo.getInst() != MemSet)
1105
1.05k
    return false;
1106
37
1107
37
  // Use the same i8* dest as the memcpy, killing the memset dest if different.
1108
37
  Value *Dest = MemCpy->getRawDest();
1109
37
  Value *DestSize = MemSet->getLength();
1110
37
  Value *SrcSize = MemCpy->getLength();
1111
37
1112
37
  // By default, create an unaligned memset.
1113
37
  unsigned Align = 1;
1114
37
  // If Dest is aligned, and SrcSize is constant, use the minimum alignment
1115
37
  // of the sum.
1116
37
  const unsigned DestAlign =
1117
37
      std::max(MemSet->getDestAlignment(), MemCpy->getDestAlignment());
1118
37
  if (DestAlign > 1)
1119
30
    if (ConstantInt *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1120
30
      Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign);
1121
37
1122
37
  IRBuilder<> Builder(MemCpy);
1123
37
1124
37
  // If the sizes have different types, zext the smaller one.
1125
37
  if (DestSize->getType() != SrcSize->getType()) {
1126
4
    if (DestSize->getType()->getIntegerBitWidth() >
1127
4
        SrcSize->getType()->getIntegerBitWidth())
1128
2
      SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
1129
2
    else
1130
2
      DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
1131
4
  }
1132
37
1133
37
  Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1134
37
  Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1135
37
  Value *MemsetLen = Builder.CreateSelect(
1136
37
      Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
1137
37
  Builder.CreateMemSet(
1138
37
      Builder.CreateGEP(Dest->getType()->getPointerElementType(), Dest,
1139
37
                        SrcSize),
1140
37
      MemSet->getOperand(1), MemsetLen, Align);
1141
37
1142
37
  MD->removeInstruction(MemSet);
1143
37
  MemSet->eraseFromParent();
1144
37
  return true;
1145
37
}
1146
1147
/// Determine whether the instruction has undefined content for the given Size,
1148
/// either because it was freshly alloca'd or started its lifetime.
1149
175
static bool hasUndefContents(Instruction *I, ConstantInt *Size) {
1150
175
  if (isa<AllocaInst>(I))
1151
3
    return true;
1152
172
1153
172
  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
1154
152
    if (II->getIntrinsicID() == Intrinsic::lifetime_start)
1155
152
      if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
1156
152
        if (LTSize->getZExtValue() >= Size->getZExtValue())
1157
150
          return true;
1158
22
1159
22
  return false;
1160
22
}
1161
1162
/// Transform memcpy to memset when its source was just memset.
1163
/// In other words, turn:
1164
/// \code
1165
///   memset(dst1, c, dst1_size);
1166
///   memcpy(dst2, dst1, dst2_size);
1167
/// \endcode
1168
/// into:
1169
/// \code
1170
///   memset(dst1, c, dst1_size);
1171
///   memset(dst2, c, dst2_size);
1172
/// \endcode
1173
/// When dst2_size <= dst1_size.
1174
///
1175
/// The \p MemCpy must have a Constant length.
1176
bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1177
113
                                               MemSetInst *MemSet) {
1178
113
  AliasAnalysis &AA = LookupAliasAnalysis();
1179
113
1180
113
  // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
1181
113
  // memcpying from the same address. Otherwise it is hard to reason about.
1182
113
  if (!AA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
1183
57
    return false;
1184
56
1185
56
  // A known memset size is required.
1186
56
  ConstantInt *MemSetSize = dyn_cast<ConstantInt>(MemSet->getLength());
1187
56
  if (!MemSetSize)
1188
1
    return false;
1189
55
1190
55
  // Make sure the memcpy doesn't read any more than what the memset wrote.
1191
55
  // Don't worry about sizes larger than i64.
1192
55
  ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength());
1193
55
  if (CopySize->getZExtValue() > MemSetSize->getZExtValue()) {
1194
12
    // If the memcpy is larger than the memset, but the memory was undef prior
1195
12
    // to the memset, we can just ignore the tail. Technically we're only
1196
12
    // interested in the bytes from MemSetSize..CopySize here, but as we can't
1197
12
    // easily represent this location, we use the full 0..CopySize range.
1198
12
    MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1199
12
    MemDepResult DepInfo = MD->getPointerDependencyFrom(
1200
12
        MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent());
1201
12
    if (DepInfo.isDef() && 
hasUndefContents(DepInfo.getInst(), CopySize)6
)
1202
4
      CopySize = MemSetSize;
1203
8
    else
1204
8
      return false;
1205
47
  }
1206
47
1207
47
  IRBuilder<> Builder(MemCpy);
1208
47
  Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1209
47
                       CopySize, MemCpy->getDestAlignment());
1210
47
  return true;
1211
47
}
1212
1213
/// Perform simplification of memcpy's.  If we have memcpy A
1214
/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1215
/// B to be a memcpy from X to Z (or potentially a memmove, depending on
1216
/// circumstances). This allows later passes to remove the first memcpy
1217
/// altogether.
1218
23.6k
bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
1219
23.6k
  // We can only optimize non-volatile memcpy's.
1220
23.6k
  if (M->isVolatile()) 
return false1
;
1221
23.6k
1222
23.6k
  // If the source and destination of the memcpy are the same, then zap it.
1223
23.6k
  if (M->getSource() == M->getDest()) {
1224
6
    MD->removeInstruction(M);
1225
6
    M->eraseFromParent();
1226
6
    return false;
1227
6
  }
1228
23.6k
1229
23.6k
  // If copying from a constant, try to turn the memcpy into a memset.
1230
23.6k
  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource()))
1231
2.19k
    if (GV->isConstant() && 
GV->hasDefinitiveInitializer()2.03k
)
1232
2.03k
      if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
1233
12
                                           M->getModule()->getDataLayout())) {
1234
12
        IRBuilder<> Builder(M);
1235
12
        Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
1236
12
                             M->getDestAlignment(), false);
1237
12
        MD->removeInstruction(M);
1238
12
        M->eraseFromParent();
1239
12
        ++NumCpyToSet;
1240
12
        return true;
1241
12
      }
1242
23.6k
1243
23.6k
  MemDepResult DepInfo = MD->getDependency(M);
1244
23.6k
1245
23.6k
  // Try to turn a partially redundant memset + memcpy into
1246
23.6k
  // memcpy + smaller memset.  We don't need the memcpy size for this.
1247
23.6k
  if (DepInfo.isClobber())
1248
13.7k
    if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst()))
1249
1.41k
      if (processMemSetMemCpyDependence(M, MDep))
1250
37
        return true;
1251
23.6k
1252
23.6k
  // The optimizations after this point require the memcpy size.
1253
23.6k
  ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
1254
23.6k
  if (!CopySize) 
return false2.19k
;
1255
21.4k
1256
21.4k
  // There are four possible optimizations we can do for memcpy:
1257
21.4k
  //   a) memcpy-memcpy xform which exposes redundance for DSE.
1258
21.4k
  //   b) call-memcpy xform for return slot optimization.
1259
21.4k
  //   c) memcpy from freshly alloca'd space or space that has just started its
1260
21.4k
  //      lifetime copies undefined data, and we can therefore eliminate the
1261
21.4k
  //      memcpy in favor of the data that was already at the destination.
1262
21.4k
  //   d) memcpy from a just-memset'd source can be turned into memset.
1263
21.4k
  if (DepInfo.isClobber()) {
1264
13.4k
    if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
1265
8.01k
      // FIXME: Can we pass in either of dest/src alignment here instead
1266
8.01k
      // of conservatively taking the minimum?
1267
8.01k
      unsigned Align = MinAlign(M->getDestAlignment(), M->getSourceAlignment());
1268
8.01k
      if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
1269
8.01k
                               CopySize->getZExtValue(), Align,
1270
8.01k
                               C)) {
1271
129
        MD->removeInstruction(M);
1272
129
        M->eraseFromParent();
1273
129
        return true;
1274
129
      }
1275
21.3k
    }
1276
13.4k
  }
1277
21.3k
1278
21.3k
  MemoryLocation SrcLoc = MemoryLocation::getForSource(M);
1279
21.3k
  MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
1280
21.3k
      SrcLoc, true, M->getIterator(), M->getParent());
1281
21.3k
1282
21.3k
  if (SrcDepInfo.isClobber()) {
1283
10.3k
    if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
1284
1.93k
      return processMemCpyMemCpyDependence(M, MDep);
1285
10.9k
  } else if (SrcDepInfo.isDef()) {
1286
169
    if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) {
1287
149
      MD->removeInstruction(M);
1288
149
      M->eraseFromParent();
1289
149
      ++NumMemCpyInstr;
1290
149
      return true;
1291
149
    }
1292
19.2k
  }
1293
19.2k
1294
19.2k
  if (SrcDepInfo.isClobber())
1295
8.39k
    if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst()))
1296
113
      if (performMemCpyToMemSetOptzn(M, MDep)) {
1297
47
        MD->removeInstruction(M);
1298
47
        M->eraseFromParent();
1299
47
        ++NumCpyToSet;
1300
47
        return true;
1301
47
      }
1302
19.1k
1303
19.1k
  return false;
1304
19.1k
}
1305
1306
/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
1307
/// not to alias.
1308
1.03k
bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
1309
1.03k
  AliasAnalysis &AA = LookupAliasAnalysis();
1310
1.03k
1311
1.03k
  if (!TLI->has(LibFunc_memmove))
1312
0
    return false;
1313
1.03k
1314
1.03k
  // See if the pointers alias.
1315
1.03k
  if (!AA.isNoAlias(MemoryLocation::getForDest(M),
1316
1.03k
                    MemoryLocation::getForSource(M)))
1317
998
    return false;
1318
32
1319
32
  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1320
32
                    << "\n");
1321
32
1322
32
  // If not, then we know we can transform this.
1323
32
  Type *ArgTys[3] = { M->getRawDest()->getType(),
1324
32
                      M->getRawSource()->getType(),
1325
32
                      M->getLength()->getType() };
1326
32
  M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
1327
32
                                                 Intrinsic::memcpy, ArgTys));
1328
32
1329
32
  // MemDep may have over conservative information about this instruction, just
1330
32
  // conservatively flush it from the cache.
1331
32
  MD->removeInstruction(M);
1332
32
1333
32
  ++NumMoveToCpy;
1334
32
  return true;
1335
32
}
1336
1337
/// This is called on every byval argument in call sites.
1338
24
bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {
1339
24
  const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout();
1340
24
  // Find out what feeds this byval argument.
1341
24
  Value *ByValArg = CS.getArgument(ArgNo);
1342
24
  Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
1343
24
  uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
1344
24
  MemDepResult DepInfo = MD->getPointerDependencyFrom(
1345
24
      MemoryLocation(ByValArg, LocationSize::precise(ByValSize)), true,
1346
24
      CS.getInstruction()->getIterator(), CS.getInstruction()->getParent());
1347
24
  if (!DepInfo.isClobber())
1348
5
    return false;
1349
19
1350
19
  // If the byval argument isn't fed by a memcpy, ignore it.  If it is fed by
1351
19
  // a memcpy, see if we can byval from the source of the memcpy instead of the
1352
19
  // result.
1353
19
  MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
1354
19
  if (!MDep || 
MDep->isVolatile()5
||
1355
19
      
ByValArg->stripPointerCasts() != MDep->getDest()5
)
1356
14
    return false;
1357
5
1358
5
  // The length of the memcpy must be larger or equal to the size of the byval.
1359
5
  ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
1360
5
  if (!C1 || C1->getValue().getZExtValue() < ByValSize)
1361
0
    return false;
1362
5
1363
5
  // Get the alignment of the byval.  If the call doesn't specify the alignment,
1364
5
  // then it is some target specific value that we can't know.
1365
5
  unsigned ByValAlign = CS.getParamAlignment(ArgNo);
1366
5
  if (ByValAlign == 0) 
return false2
;
1367
3
1368
3
  // If it is greater than the memcpy, then we check to see if we can force the
1369
3
  // source of the memcpy to the alignment we need.  If we fail, we bail out.
1370
3
  AssumptionCache &AC = LookupAssumptionCache();
1371
3
  DominatorTree &DT = LookupDomTree();
1372
3
  if (MDep->getSourceAlignment() < ByValAlign &&
1373
3
      getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL,
1374
1
                                 CS.getInstruction(), &AC, &DT) < ByValAlign)
1375
0
    return false;
1376
3
1377
3
  // The address space of the memcpy source must match the byval argument
1378
3
  if (MDep->getSource()->getType()->getPointerAddressSpace() !=
1379
3
      ByValArg->getType()->getPointerAddressSpace())
1380
1
    return false;
1381
2
1382
2
  // Verify that the copied-from memory doesn't change in between the memcpy and
1383
2
  // the byval call.
1384
2
  //    memcpy(a <- b)
1385
2
  //    *b = 42;
1386
2
  //    foo(*a)
1387
2
  // It would be invalid to transform the second memcpy into foo(*b).
1388
2
  //
1389
2
  // NOTE: This is conservative, it will stop on any read from the source loc,
1390
2
  // not just the defining memcpy.
1391
2
  MemDepResult SourceDep = MD->getPointerDependencyFrom(
1392
2
      MemoryLocation::getForSource(MDep), false,
1393
2
      CS.getInstruction()->getIterator(), MDep->getParent());
1394
2
  if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
1395
0
    return false;
1396
2
1397
2
  Value *TmpCast = MDep->getSource();
1398
2
  if (MDep->getSource()->getType() != ByValArg->getType())
1399
0
    TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
1400
0
                              "tmpcast", CS.getInstruction());
1401
2
1402
2
  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
1403
2
                    << "  " << *MDep << "\n"
1404
2
                    << "  " << *CS.getInstruction() << "\n");
1405
2
1406
2
  // Otherwise we're good!  Update the byval argument.
1407
2
  CS.setArgument(ArgNo, TmpCast);
1408
2
  ++NumMemCpyInstr;
1409
2
  return true;
1410
2
}
1411
1412
/// Executes one iteration of MemCpyOptPass.
1413
351k
bool MemCpyOptPass::iterateOnFunction(Function &F) {
1414
351k
  bool MadeChange = false;
1415
351k
1416
351k
  DominatorTree &DT = LookupDomTree();
1417
351k
1418
351k
  // Walk all instruction in the function.
1419
2.39M
  for (BasicBlock &BB : F) {
1420
2.39M
    // Skip unreachable blocks. For example processStore assumes that an
1421
2.39M
    // instruction in a BB can't be dominated by a later instruction in the
1422
2.39M
    // same BB (which is a scenario that can happen for an unreachable BB that
1423
2.39M
    // has itself as a predecessor).
1424
2.39M
    if (!DT.isReachableFromEntry(&BB))
1425
2
      continue;
1426
2.39M
1427
15.0M
    
for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); 2.39M
BI != BE;) {
1428
12.6M
        // Avoid invalidating the iterator.
1429
12.6M
      Instruction *I = &*BI++;
1430
12.6M
1431
12.6M
      bool RepeatInstruction = false;
1432
12.6M
1433
12.6M
      if (StoreInst *SI = dyn_cast<StoreInst>(I))
1434
1.05M
        MadeChange |= processStore(SI, BI);
1435
11.5M
      else if (MemSetInst *M = dyn_cast<MemSetInst>(I))
1436
44.9k
        RepeatInstruction = processMemSet(M, BI);
1437
11.5M
      else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
1438
23.6k
        RepeatInstruction = processMemCpy(M);
1439
11.5M
      else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
1440
1.03k
        RepeatInstruction = processMemMove(M);
1441
11.5M
      else if (auto CS = CallSite(I)) {
1442
4.83M
        for (unsigned i = 0, e = CS.arg_size(); i != e; 
++i3.31M
)
1443
3.31M
          if (CS.isByValArgument(i))
1444
24
            MadeChange |= processByValArgument(CS, i);
1445
1.51M
      }
1446
12.6M
1447
12.6M
      // Reprocess the instruction if desired.
1448
12.6M
      if (RepeatInstruction) {
1449
933
        if (BI != BB.begin())
1450
929
          --BI;
1451
933
        MadeChange = true;
1452
933
      }
1453
12.6M
    }
1454
2.39M
  }
1455
351k
1456
351k
  return MadeChange;
1457
351k
}
1458
1459
1.17k
PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
1460
1.17k
  auto &MD = AM.getResult<MemoryDependenceAnalysis>(F);
1461
1.17k
  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1462
1.17k
1463
1.17k
  auto LookupAliasAnalysis = [&]() -> AliasAnalysis & {
1464
0
    return AM.getResult<AAManager>(F);
1465
0
  };
1466
1.17k
  auto LookupAssumptionCache = [&]() -> AssumptionCache & {
1467
0
    return AM.getResult<AssumptionAnalysis>(F);
1468
0
  };
1469
1.17k
  auto LookupDomTree = [&]() -> DominatorTree & {
1470
638
    return AM.getResult<DominatorTreeAnalysis>(F);
1471
638
  };
1472
1.17k
1473
1.17k
  bool MadeChange = runImpl(F, &MD, &TLI, LookupAliasAnalysis,
1474
1.17k
                            LookupAssumptionCache, LookupDomTree);
1475
1.17k
  if (!MadeChange)
1476
1.15k
    return PreservedAnalyses::all();
1477
21
1478
21
  PreservedAnalyses PA;
1479
21
  PA.preserveSet<CFGAnalyses>();
1480
21
  PA.preserve<GlobalsAA>();
1481
21
  PA.preserve<MemoryDependenceAnalysis>();
1482
21
  return PA;
1483
21
}
1484
1485
bool MemCpyOptPass::runImpl(
1486
    Function &F, MemoryDependenceResults *MD_, TargetLibraryInfo *TLI_,
1487
    std::function<AliasAnalysis &()> LookupAliasAnalysis_,
1488
    std::function<AssumptionCache &()> LookupAssumptionCache_,
1489
466k
    std::function<DominatorTree &()> LookupDomTree_) {
1490
466k
  bool MadeChange = false;
1491
466k
  MD = MD_;
1492
466k
  TLI = TLI_;
1493
466k
  LookupAliasAnalysis = std::move(LookupAliasAnalysis_);
1494
466k
  LookupAssumptionCache = std::move(LookupAssumptionCache_);
1495
466k
  LookupDomTree = std::move(LookupDomTree_);
1496
466k
1497
466k
  // If we don't have at least memset and memcpy, there is little point of doing
1498
466k
  // anything here.  These are required by a freestanding implementation, so if
1499
466k
  // even they are disabled, there is no point in trying hard.
1500
466k
  if (!TLI->has(LibFunc_memset) || 
!TLI->has(LibFunc_memcpy)346k
)
1501
120k
    return false;
1502
346k
1503
351k
  
while (346k
true351k
) {
1504
351k
    if (!iterateOnFunction(F))
1505
346k
      break;
1506
5.44k
    MadeChange = true;
1507
5.44k
  }
1508
346k
1509
346k
  MD = nullptr;
1510
346k
  return MadeChange;
1511
346k
}
1512
1513
/// This is the main transformation entry point for a function.
1514
465k
bool MemCpyOptLegacyPass::runOnFunction(Function &F) {
1515
465k
  if (skipFunction(F))
1516
44
    return false;
1517
465k
1518
465k
  auto *MD = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
1519
465k
  auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1520
465k
1521
465k
  auto LookupAliasAnalysis = [this]() -> AliasAnalysis & {
1522
11.1k
    return getAnalysis<AAResultsWrapperPass>().getAAResults();
1523
11.1k
  };
1524
465k
  auto LookupAssumptionCache = [this, &F]() -> AssumptionCache & {
1525
3
    return getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1526
3
  };
1527
465k
  auto LookupDomTree = [this]() -> DominatorTree & {
1528
351k
    return getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1529
351k
  };
1530
465k
1531
465k
  return Impl.runImpl(F, MD, TLI, LookupAliasAnalysis, LookupAssumptionCache,
1532
465k
                      LookupDomTree);
1533
465k
}