Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the visit functions for load, store and alloca.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "InstCombineInternal.h"
14
#include "llvm/ADT/MapVector.h"
15
#include "llvm/ADT/SmallString.h"
16
#include "llvm/ADT/Statistic.h"
17
#include "llvm/Analysis/Loads.h"
18
#include "llvm/Transforms/Utils/Local.h"
19
#include "llvm/IR/ConstantRange.h"
20
#include "llvm/IR/DataLayout.h"
21
#include "llvm/IR/DebugInfoMetadata.h"
22
#include "llvm/IR/IntrinsicInst.h"
23
#include "llvm/IR/LLVMContext.h"
24
#include "llvm/IR/MDBuilder.h"
25
#include "llvm/IR/PatternMatch.h"
26
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
27
using namespace llvm;
28
using namespace PatternMatch;
29
30
#define DEBUG_TYPE "instcombine"
31
32
STATISTIC(NumDeadStore,    "Number of dead stores eliminated");
33
STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
34
35
/// pointsToConstantGlobal - Return true if V (possibly indirectly) points to
36
/// some part of a constant global variable.  This intentionally only accepts
37
/// constant expressions because we can't rewrite arbitrary instructions.
38
19.6k
static bool pointsToConstantGlobal(Value *V) {
39
19.6k
  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
40
5.07k
    return GV->isConstant();
41
14.5k
42
14.5k
  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
43
40
    if (CE->getOpcode() == Instruction::BitCast ||
44
40
        CE->getOpcode() == Instruction::AddrSpaceCast ||
45
40
        CE->getOpcode() == Instruction::GetElementPtr)
46
40
      return pointsToConstantGlobal(CE->getOperand(0));
47
14.5k
  }
48
14.5k
  return false;
49
14.5k
}
50
51
/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
52
/// pointer to an alloca.  Ignore any reads of the pointer, return false if we
53
/// see any stores or other unknown uses.  If we see pointer arithmetic, keep
54
/// track of whether it moves the pointer (with IsOffset) but otherwise traverse
55
/// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
56
/// the alloca, and if the source pointer is a pointer to a constant global, we
57
/// can optimize this.
58
static bool
59
isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
60
1.07M
                               SmallVectorImpl<Instruction *> &ToDelete) {
61
1.07M
  // We track lifetime intrinsics as we encounter them.  If we decide to go
62
1.07M
  // ahead and replace the value with the global, this lets the caller quickly
63
1.07M
  // eliminate the markers.
64
1.07M
65
1.07M
  SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect;
66
1.07M
  ValuesToInspect.emplace_back(V, false);
67
1.52M
  while (!ValuesToInspect.empty()) {
68
1.52M
    auto ValuePair = ValuesToInspect.pop_back_val();
69
1.52M
    const bool IsOffset = ValuePair.second;
70
7.48M
    for (auto &U : ValuePair.first->uses()) {
71
7.48M
      auto *I = cast<Instruction>(U.getUser());
72
7.48M
73
7.48M
      if (auto *LI = dyn_cast<LoadInst>(I)) {
74
194k
        // Ignore non-volatile loads, they are always ok.
75
194k
        if (!LI->isSimple()) 
return false1.30k
;
76
193k
        continue;
77
193k
      }
78
7.29M
79
7.29M
      if (isa<BitCastInst>(I) || 
isa<AddrSpaceCastInst>(I)6.86M
) {
80
425k
        // If uses of the bitcast are ok, we are ok.
81
425k
        ValuesToInspect.emplace_back(I, IsOffset);
82
425k
        continue;
83
425k
      }
84
6.86M
      if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
85
5.39M
        // If the GEP has all zero indices, it doesn't offset the pointer. If it
86
5.39M
        // doesn't, it does.
87
5.39M
        ValuesToInspect.emplace_back(I, IsOffset || 
!GEP->hasAllZeroIndices()5.38M
);
88
5.39M
        continue;
89
5.39M
      }
90
1.46M
91
1.46M
      if (auto *Call = dyn_cast<CallBase>(I)) {
92
1.20M
        // If this is the function being called then we treat it like a load and
93
1.20M
        // ignore it.
94
1.20M
        if (Call->isCallee(&U))
95
0
          continue;
96
1.20M
97
1.20M
        unsigned DataOpNo = Call->getDataOperandNo(&U);
98
1.20M
        bool IsArgOperand = Call->isArgOperand(&U);
99
1.20M
100
1.20M
        // Inalloca arguments are clobbered by the call.
101
1.20M
        if (IsArgOperand && 
Call->isInAllocaArgument(DataOpNo)1.20M
)
102
22
          return false;
103
1.20M
104
1.20M
        // If this is a readonly/readnone call site, then we know it is just a
105
1.20M
        // load (but one that potentially returns the value itself), so we can
106
1.20M
        // ignore it if we know that the value isn't captured.
107
1.20M
        if (Call->onlyReadsMemory() &&
108
1.20M
            
(3.55k
Call->use_empty()3.55k
||
Call->doesNotCapture(DataOpNo)3.53k
))
109
2.12k
          continue;
110
1.20M
111
1.20M
        // If this is being passed as a byval argument, the caller is making a
112
1.20M
        // copy, so it is only a read of the alloca.
113
1.20M
        if (IsArgOperand && 
Call->isByValArgument(DataOpNo)1.20M
)
114
5.77k
          continue;
115
1.46M
      }
116
1.46M
117
1.46M
      // Lifetime intrinsics can be handled by the caller.
118
1.46M
      if (I->isLifetimeStartOrEnd()) {
119
364k
        assert(I->use_empty() && "Lifetime markers have no result to use!");
120
364k
        ToDelete.push_back(I);
121
364k
        continue;
122
364k
      }
123
1.09M
124
1.09M
      // If this is isn't our memcpy/memmove, reject it as something we can't
125
1.09M
      // handle.
126
1.09M
      MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
127
1.09M
      if (!MI)
128
1.05M
        return false;
129
43.6k
130
43.6k
      // If the transfer is using the alloca as a source of the transfer, then
131
43.6k
      // ignore it since it is a load (unless the transfer is volatile).
132
43.6k
      if (U.getOperandNo() == 1) {
133
23.8k
        if (MI->isVolatile()) 
return false10
;
134
23.8k
        continue;
135
23.8k
      }
136
19.8k
137
19.8k
      // If we already have seen a copy, reject the second one.
138
19.8k
      if (TheCopy) 
return false4
;
139
19.8k
140
19.8k
      // If the pointer has been offset from the start of the alloca, we can't
141
19.8k
      // safely handle this.
142
19.8k
      if (IsOffset) 
return false252
;
143
19.5k
144
19.5k
      // If the memintrinsic isn't using the alloca as the dest, reject it.
145
19.5k
      if (U.getOperandNo() != 0) 
return false0
;
146
19.5k
147
19.5k
      // If the source of the memcpy/move is not a constant global, reject it.
148
19.5k
      if (!pointsToConstantGlobal(MI->getSource()))
149
14.7k
        return false;
150
4.84k
151
4.84k
      // Otherwise, the transform is safe.  Remember the copy instruction.
152
4.84k
      TheCopy = MI;
153
4.84k
    }
154
1.52M
  }
155
1.07M
  
return true9.00k
;
156
1.07M
}
157
158
/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
159
/// modified by a copy from a constant global.  If we can prove this, we can
160
/// replace any uses of the alloca with uses of the global directly.
161
static MemTransferInst *
162
isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
163
1.07M
                               SmallVectorImpl<Instruction *> &ToDelete) {
164
1.07M
  MemTransferInst *TheCopy = nullptr;
165
1.07M
  if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
166
9.00k
    return TheCopy;
167
1.07M
  return nullptr;
168
1.07M
}
169
170
/// Returns true if V is dereferenceable for size of alloca.
171
static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
172
1.45k
                                           const DataLayout &DL) {
173
1.45k
  if (AI->isArrayAllocation())
174
0
    return false;
175
1.45k
  uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType());
176
1.45k
  if (!AllocaSize)
177
0
    return false;
178
1.45k
  return isDereferenceableAndAlignedPointer(V, AI->getAlignment(),
179
1.45k
                                            APInt(64, AllocaSize), DL);
180
1.45k
}
181
182
1.07M
static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
183
1.07M
  // Check for array size of 1 (scalar allocation).
184
1.07M
  if (!AI.isArrayAllocation()) {
185
1.07M
    // i32 1 is the canonical array size for scalar allocations.
186
1.07M
    if (AI.getArraySize()->getType()->isIntegerTy(32))
187
1.07M
      return nullptr;
188
56
189
56
    // Canonicalize it.
190
56
    Value *V = IC.Builder.getInt32(1);
191
56
    AI.setOperand(0, V);
192
56
    return &AI;
193
56
  }
194
2.74k
195
2.74k
  // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
196
2.74k
  if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
197
575
    if (C->getValue().getActiveBits() <= 64) {
198
574
      Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
199
574
      AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName());
200
574
      New->setAlignment(AI.getAlignment());
201
574
202
574
      // Scan to the end of the allocation instructions, to skip over a block of
203
574
      // allocas if possible...also skip interleaved debug info
204
574
      //
205
574
      BasicBlock::iterator It(New);
206
1.88k
      while (isa<AllocaInst>(*It) || 
isa<DbgInfoIntrinsic>(*It)575
)
207
1.31k
        ++It;
208
574
209
574
      // Now that I is pointing to the first non-allocation-inst in the block,
210
574
      // insert our getelementptr instruction...
211
574
      //
212
574
      Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
213
574
      Value *NullIdx = Constant::getNullValue(IdxTy);
214
574
      Value *Idx[2] = {NullIdx, NullIdx};
215
574
      Instruction *GEP = GetElementPtrInst::CreateInBounds(
216
574
          NewTy, New, Idx, New->getName() + ".sub");
217
574
      IC.InsertNewInstBefore(GEP, *It);
218
574
219
574
      // Now make everything use the getelementptr instead of the original
220
574
      // allocation.
221
574
      return IC.replaceInstUsesWith(AI, GEP);
222
574
    }
223
2.17k
  }
224
2.17k
225
2.17k
  if (isa<UndefValue>(AI.getArraySize()))
226
0
    return IC.replaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
227
2.17k
228
2.17k
  // Ensure that the alloca array size argument has type intptr_t, so that
229
2.17k
  // any casting is exposed early.
230
2.17k
  Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
231
2.17k
  if (AI.getArraySize()->getType() != IntPtrTy) {
232
13
    Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), IntPtrTy, false);
233
13
    AI.setOperand(0, V);
234
13
    return &AI;
235
13
  }
236
2.15k
237
2.15k
  return nullptr;
238
2.15k
}
239
240
namespace {
241
// If I and V are pointers in different address space, it is not allowed to
242
// use replaceAllUsesWith since I and V have different types. A
243
// non-target-specific transformation should not use addrspacecast on V since
244
// the two address space may be disjoint depending on target.
245
//
246
// This class chases down uses of the old pointer until reaching the load
247
// instructions, then replaces the old pointer in the load instructions with
248
// the new pointer. If during the chasing it sees bitcast or GEP, it will
249
// create new bitcast or GEP with the new pointer and use them in the load
250
// instruction.
251
class PointerReplacer {
252
public:
253
13
  PointerReplacer(InstCombiner &IC) : IC(IC) {}
254
  void replacePointer(Instruction &I, Value *V);
255
256
private:
257
  void findLoadAndReplace(Instruction &I);
258
  void replace(Instruction *I);
259
  Value *getReplacement(Value *I);
260
261
  SmallVector<Instruction *, 4> Path;
262
  MapVector<Value *, Value *> WorkMap;
263
  InstCombiner &IC;
264
};
265
} // end anonymous namespace
266
267
33
void PointerReplacer::findLoadAndReplace(Instruction &I) {
268
38
  for (auto U : I.users()) {
269
38
    auto *Inst = dyn_cast<Instruction>(&*U);
270
38
    if (!Inst)
271
0
      return;
272
38
    LLVM_DEBUG(dbgs() << "Found pointer user: " << *U << '\n');
273
38
    if (isa<LoadInst>(Inst)) {
274
5
      for (auto P : Path)
275
8
        replace(P);
276
5
      replace(Inst);
277
33
    } else if (isa<GetElementPtrInst>(Inst) || 
isa<BitCastInst>(Inst)24
) {
278
20
      Path.push_back(Inst);
279
20
      findLoadAndReplace(*Inst);
280
20
      Path.pop_back();
281
20
    } else {
282
13
      return;
283
13
    }
284
38
  }
285
33
}
286
287
26
Value *PointerReplacer::getReplacement(Value *V) {
288
26
  auto Loc = WorkMap.find(V);
289
26
  if (Loc != WorkMap.end())
290
13
    return Loc->second;
291
13
  return nullptr;
292
13
}
293
294
13
void PointerReplacer::replace(Instruction *I) {
295
13
  if (getReplacement(I))
296
0
    return;
297
13
298
13
  if (auto *LT = dyn_cast<LoadInst>(I)) {
299
5
    auto *V = getReplacement(LT->getPointerOperand());
300
5
    assert(V && "Operand not replaced");
301
5
    auto *NewI = new LoadInst(I->getType(), V);
302
5
    NewI->takeName(LT);
303
5
    IC.InsertNewInstWith(NewI, *LT);
304
5
    IC.replaceInstUsesWith(*LT, NewI);
305
5
    WorkMap[LT] = NewI;
306
8
  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
307
6
    auto *V = getReplacement(GEP->getPointerOperand());
308
6
    assert(V && "Operand not replaced");
309
6
    SmallVector<Value *, 8> Indices;
310
6
    Indices.append(GEP->idx_begin(), GEP->idx_end());
311
6
    auto *NewI = GetElementPtrInst::Create(
312
6
        V->getType()->getPointerElementType(), V, Indices);
313
6
    IC.InsertNewInstWith(NewI, *GEP);
314
6
    NewI->takeName(GEP);
315
6
    WorkMap[GEP] = NewI;
316
6
  } else 
if (auto *2
BC2
= dyn_cast<BitCastInst>(I)) {
317
2
    auto *V = getReplacement(BC->getOperand(0));
318
2
    assert(V && "Operand not replaced");
319
2
    auto *NewT = PointerType::get(BC->getType()->getPointerElementType(),
320
2
                                  V->getType()->getPointerAddressSpace());
321
2
    auto *NewI = new BitCastInst(V, NewT);
322
2
    IC.InsertNewInstWith(NewI, *BC);
323
2
    NewI->takeName(BC);
324
2
    WorkMap[BC] = NewI;
325
2
  } else {
326
0
    llvm_unreachable("should never reach here");
327
0
  }
328
13
}
329
330
13
void PointerReplacer::replacePointer(Instruction &I, Value *V) {
331
#ifndef NDEBUG
332
  auto *PT = cast<PointerType>(I.getType());
333
  auto *NT = cast<PointerType>(V->getType());
334
  assert(PT != NT && PT->getElementType() == NT->getElementType() &&
335
         "Invalid usage");
336
#endif
337
  WorkMap[&I] = V;
338
13
  findLoadAndReplace(I);
339
13
}
340
341
1.07M
Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
342
1.07M
  if (auto *I = simplifyAllocaArraySize(*this, AI))
343
643
    return I;
344
1.07M
345
1.07M
  if (AI.getAllocatedType()->isSized()) {
346
1.07M
    // If the alignment is 0 (unspecified), assign it the preferred alignment.
347
1.07M
    if (AI.getAlignment() == 0)
348
1.86k
      AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType()));
349
1.07M
350
1.07M
    // Move all alloca's of zero byte objects to the entry block and merge them
351
1.07M
    // together.  Note that we only do this for alloca's, because malloc should
352
1.07M
    // allocate and return a unique pointer, even for a zero byte allocation.
353
1.07M
    if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) {
354
109
      // For a zero sized alloca there is no point in doing an array allocation.
355
109
      // This is helpful if the array size is a complicated expression not used
356
109
      // elsewhere.
357
109
      if (AI.isArrayAllocation()) {
358
3
        AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1));
359
3
        return &AI;
360
3
      }
361
106
362
106
      // Get the first instruction in the entry block.
363
106
      BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
364
106
      Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
365
106
      if (FirstInst != &AI) {
366
13
        // If the entry block doesn't start with a zero-size alloca then move
367
13
        // this one to the start of the entry block.  There is no problem with
368
13
        // dominance as the array size was forced to a constant earlier already.
369
13
        AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
370
13
        if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
371
13
            DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
372
2
          AI.moveBefore(FirstInst);
373
2
          return &AI;
374
2
        }
375
11
376
11
        // If the alignment of the entry block alloca is 0 (unspecified),
377
11
        // assign it the preferred alignment.
378
11
        if (EntryAI->getAlignment() == 0)
379
0
          EntryAI->setAlignment(
380
0
              DL.getPrefTypeAlignment(EntryAI->getAllocatedType()));
381
11
        // Replace this zero-sized alloca with the one at the start of the entry
382
11
        // block after ensuring that the address will be aligned enough for both
383
11
        // types.
384
11
        unsigned MaxAlign = std::max(EntryAI->getAlignment(),
385
11
                                     AI.getAlignment());
386
11
        EntryAI->setAlignment(MaxAlign);
387
11
        if (AI.getType() != EntryAI->getType())
388
6
          return new BitCastInst(EntryAI, AI.getType());
389
5
        return replaceInstUsesWith(AI, EntryAI);
390
5
      }
391
106
    }
392
1.07M
  }
393
1.07M
394
1.07M
  if (AI.getAlignment()) {
395
1.07M
    // Check to see if this allocation is only modified by a memcpy/memmove from
396
1.07M
    // a constant global whose alignment is equal to or exceeds that of the
397
1.07M
    // allocation.  If this is the case, we can change all users to use
398
1.07M
    // the constant global instead.  This is commonly produced by the CFE by
399
1.07M
    // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
400
1.07M
    // is only subsequently read.
401
1.07M
    SmallVector<Instruction *, 4> ToDelete;
402
1.07M
    if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
403
1.45k
      unsigned SourceAlign = getOrEnforceKnownAlignment(
404
1.45k
          Copy->getSource(), AI.getAlignment(), DL, &AI, &AC, &DT);
405
1.45k
      if (AI.getAlignment() <= SourceAlign &&
406
1.45k
          
isDereferenceableForAllocaSize(Copy->getSource(), &AI, DL)1.45k
) {
407
1.44k
        LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
408
1.44k
        LLVM_DEBUG(dbgs() << "  memcpy = " << *Copy << '\n');
409
4.26k
        for (unsigned i = 0, e = ToDelete.size(); i != e; 
++i2.82k
)
410
2.82k
          eraseInstFromFunction(*ToDelete[i]);
411
1.44k
        Constant *TheSrc = cast<Constant>(Copy->getSource());
412
1.44k
        auto *SrcTy = TheSrc->getType();
413
1.44k
        auto *DestTy = PointerType::get(AI.getType()->getPointerElementType(),
414
1.44k
                                        SrcTy->getPointerAddressSpace());
415
1.44k
        Constant *Cast =
416
1.44k
            ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, DestTy);
417
1.44k
        if (AI.getType()->getPointerAddressSpace() ==
418
1.44k
            SrcTy->getPointerAddressSpace()) {
419
1.43k
          Instruction *NewI = replaceInstUsesWith(AI, Cast);
420
1.43k
          eraseInstFromFunction(*Copy);
421
1.43k
          ++NumGlobalCopies;
422
1.43k
          return NewI;
423
1.43k
        } else {
424
13
          PointerReplacer PtrReplacer(*this);
425
13
          PtrReplacer.replacePointer(AI, Cast);
426
13
          ++NumGlobalCopies;
427
13
        }
428
1.44k
      }
429
1.45k
    }
430
1.07M
  }
431
1.07M
432
1.07M
  // At last, use the generic allocation site handler to aggressively remove
433
1.07M
  // unused allocas.
434
1.07M
  
return visitAllocSite(AI)1.07M
;
435
1.07M
}
436
437
// Are we allowed to form a atomic load or store of this type?
438
10
static bool isSupportedAtomicType(Type *Ty) {
439
10
  return Ty->isIntOrPtrTy() || 
Ty->isFloatingPointTy()6
;
440
10
}
441
442
/// Helper to combine a load to a new type.
443
///
444
/// This just does the work of combining a load to a new type. It handles
445
/// metadata, etc., and returns the new instruction. The \c NewTy should be the
446
/// loaded *value* type. This will convert it to a pointer, cast the operand to
447
/// that pointer type, load it, etc.
448
///
449
/// Note that this will create all of the instructions with whatever insert
450
/// point the \c InstCombiner currently is using.
451
static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy,
452
93.3k
                                      const Twine &Suffix = "") {
453
93.3k
  assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
454
93.3k
         "can't fold an atomic load to requested type");
455
93.3k
456
93.3k
  Value *Ptr = LI.getPointerOperand();
457
93.3k
  unsigned AS = LI.getPointerAddressSpace();
458
93.3k
  SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
459
93.3k
  LI.getAllMetadata(MD);
460
93.3k
461
93.3k
  Value *NewPtr = nullptr;
462
93.3k
  if (!(match(Ptr, m_BitCast(m_Value(NewPtr))) &&
463
93.3k
        
NewPtr->getType()->getPointerElementType() == NewTy8.94k
&&
464
93.3k
        
NewPtr->getType()->getPointerAddressSpace() == AS2.18k
))
465
91.1k
    NewPtr = IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS));
466
93.3k
467
93.3k
  LoadInst *NewLoad = IC.Builder.CreateAlignedLoad(
468
93.3k
      NewTy, NewPtr, LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix);
469
93.3k
  NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
470
93.3k
  MDBuilder MDB(NewLoad->getContext());
471
93.3k
  for (const auto &MDPair : MD) {
472
68.0k
    unsigned ID = MDPair.first;
473
68.0k
    MDNode *N = MDPair.second;
474
68.0k
    // Note, essentially every kind of metadata should be preserved here! This
475
68.0k
    // routine is supposed to clone a load instruction changing *only its type*.
476
68.0k
    // The only metadata it makes sense to drop is metadata which is invalidated
477
68.0k
    // when the pointer type changes. This should essentially never be the case
478
68.0k
    // in LLVM, but we explicitly switch over only known metadata to be
479
68.0k
    // conservatively correct. If you are adding metadata to LLVM which pertains
480
68.0k
    // to loads, you almost certainly want to add it here.
481
68.0k
    switch (ID) {
482
68.0k
    case LLVMContext::MD_dbg:
483
68.0k
    case LLVMContext::MD_tbaa:
484
68.0k
    case LLVMContext::MD_prof:
485
68.0k
    case LLVMContext::MD_fpmath:
486
68.0k
    case LLVMContext::MD_tbaa_struct:
487
68.0k
    case LLVMContext::MD_invariant_load:
488
68.0k
    case LLVMContext::MD_alias_scope:
489
68.0k
    case LLVMContext::MD_noalias:
490
68.0k
    case LLVMContext::MD_nontemporal:
491
68.0k
    case LLVMContext::MD_mem_parallel_loop_access:
492
68.0k
    case LLVMContext::MD_access_group:
493
68.0k
      // All of these directly apply.
494
68.0k
      NewLoad->setMetadata(ID, N);
495
68.0k
      break;
496
68.0k
497
68.0k
    case LLVMContext::MD_nonnull:
498
3
      copyNonnullMetadata(LI, N, *NewLoad);
499
3
      break;
500
68.0k
    case LLVMContext::MD_align:
501
3
    case LLVMContext::MD_dereferenceable:
502
3
    case LLVMContext::MD_dereferenceable_or_null:
503
3
      // These only directly apply if the new type is also a pointer.
504
3
      if (NewTy->isPointerTy())
505
3
        NewLoad->setMetadata(ID, N);
506
3
      break;
507
3
    case LLVMContext::MD_range:
508
3
      copyRangeMetadata(IC.getDataLayout(), LI, N, *NewLoad);
509
3
      break;
510
68.0k
    }
511
68.0k
  }
512
93.3k
  return NewLoad;
513
93.3k
}
514
515
/// Combine a store to a new type.
516
///
517
/// Returns the newly created store instruction.
518
38.4k
static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) {
519
38.4k
  assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
520
38.4k
         "can't fold an atomic store of requested type");
521
38.4k
522
38.4k
  Value *Ptr = SI.getPointerOperand();
523
38.4k
  unsigned AS = SI.getPointerAddressSpace();
524
38.4k
  SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
525
38.4k
  SI.getAllMetadata(MD);
526
38.4k
527
38.4k
  StoreInst *NewStore = IC.Builder.CreateAlignedStore(
528
38.4k
      V, IC.Builder.CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
529
38.4k
      SI.getAlignment(), SI.isVolatile());
530
38.4k
  NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
531
40.5k
  for (const auto &MDPair : MD) {
532
40.5k
    unsigned ID = MDPair.first;
533
40.5k
    MDNode *N = MDPair.second;
534
40.5k
    // Note, essentially every kind of metadata should be preserved here! This
535
40.5k
    // routine is supposed to clone a store instruction changing *only its
536
40.5k
    // type*. The only metadata it makes sense to drop is metadata which is
537
40.5k
    // invalidated when the pointer type changes. This should essentially
538
40.5k
    // never be the case in LLVM, but we explicitly switch over only known
539
40.5k
    // metadata to be conservatively correct. If you are adding metadata to
540
40.5k
    // LLVM which pertains to stores, you almost certainly want to add it
541
40.5k
    // here.
542
40.5k
    switch (ID) {
543
40.5k
    case LLVMContext::MD_dbg:
544
40.5k
    case LLVMContext::MD_tbaa:
545
40.5k
    case LLVMContext::MD_prof:
546
40.5k
    case LLVMContext::MD_fpmath:
547
40.5k
    case LLVMContext::MD_tbaa_struct:
548
40.5k
    case LLVMContext::MD_alias_scope:
549
40.5k
    case LLVMContext::MD_noalias:
550
40.5k
    case LLVMContext::MD_nontemporal:
551
40.5k
    case LLVMContext::MD_mem_parallel_loop_access:
552
40.5k
    case LLVMContext::MD_access_group:
553
40.5k
      // All of these directly apply.
554
40.5k
      NewStore->setMetadata(ID, N);
555
40.5k
      break;
556
40.5k
    case LLVMContext::MD_invariant_load:
557
0
    case LLVMContext::MD_nonnull:
558
0
    case LLVMContext::MD_range:
559
0
    case LLVMContext::MD_align:
560
0
    case LLVMContext::MD_dereferenceable:
561
0
    case LLVMContext::MD_dereferenceable_or_null:
562
0
      // These don't apply for stores.
563
0
      break;
564
40.5k
    }
565
40.5k
  }
566
38.4k
567
38.4k
  return NewStore;
568
38.4k
}
569
570
/// Returns true if instruction represent minmax pattern like:
571
///   select ((cmp load V1, load V2), V1, V2).
572
8.26M
static bool isMinMaxWithLoads(Value *V) {
573
8.26M
  assert(V->getType()->isPointerTy() && "Expected pointer type.");
574
8.26M
  // Ignore possible ty* to ixx* bitcast.
575
8.26M
  V = peekThroughBitcast(V);
576
8.26M
  // Check that select is select ((cmp load V1, load V2), V1, V2) - minmax
577
8.26M
  // pattern.
578
8.26M
  CmpInst::Predicate Pred;
579
8.26M
  Instruction *L1;
580
8.26M
  Instruction *L2;
581
8.26M
  Value *LHS;
582
8.26M
  Value *RHS;
583
8.26M
  if (!match(V, m_Select(m_Cmp(Pred, m_Instruction(L1), m_Instruction(L2)),
584
8.26M
                         m_Value(LHS), m_Value(RHS))))
585
8.26M
    return false;
586
1.64k
  return (match(L1, m_Load(m_Specific(LHS))) &&
587
1.64k
          
match(L2, m_Load(m_Specific(RHS)))442
) ||
588
1.64k
         
(1.22k
match(L1, m_Load(m_Specific(RHS)))1.22k
&&
589
1.22k
          
match(L2, m_Load(m_Specific(LHS)))49
);
590
1.64k
}
591
592
/// Combine loads to match the type of their uses' value after looking
593
/// through intervening bitcasts.
594
///
595
/// The core idea here is that if the result of a load is used in an operation,
596
/// we should load the type most conducive to that operation. For example, when
597
/// loading an integer and converting that immediately to a pointer, we should
598
/// instead directly load a pointer.
599
///
600
/// However, this routine must never change the width of a load or the number of
601
/// loads as that would introduce a semantic change. This combine is expected to
602
/// be a semantic no-op which just allows loads to more closely model the types
603
/// of their consuming operations.
604
///
605
/// Currently, we also refuse to change the precise type used for an atomic load
606
/// or a volatile load. This is debatable, and might be reasonable to change
607
/// later. However, it is risky in case some backend or other part of LLVM is
608
/// relying on the exact type loaded to select appropriate atomic operations.
609
18.7M
static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
610
18.7M
  // FIXME: We could probably with some care handle both volatile and ordered
611
18.7M
  // atomic loads here but it isn't clear that this is important.
612
18.7M
  if (!LI.isUnordered())
613
188k
    return nullptr;
614
18.5M
615
18.5M
  if (LI.use_empty())
616
0
    return nullptr;
617
18.5M
618
18.5M
  // swifterror values can't be bitcasted.
619
18.5M
  if (LI.getPointerOperand()->isSwiftError())
620
2
    return nullptr;
621
18.5M
622
18.5M
  Type *Ty = LI.getType();
623
18.5M
  const DataLayout &DL = IC.getDataLayout();
624
18.5M
625
18.5M
  // Try to canonicalize loads which are only ever stored to operate over
626
18.5M
  // integers instead of any other type. We only do this when the loaded type
627
18.5M
  // is sized and has a size exactly the same as its store size and the store
628
18.5M
  // size is a legal integer type.
629
18.5M
  // Do not perform canonicalization if minmax pattern is found (to avoid
630
18.5M
  // infinite loop).
631
18.5M
  if (!Ty->isIntegerTy() && 
Ty->isSized()7.67M
&&
632
18.5M
      
DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty))7.67M
&&
633
18.5M
      
DL.typeSizeEqualsStoreSize(Ty)7.55M
&&
634
18.5M
      
!DL.isNonIntegralPointerType(Ty)7.55M
&&
635
18.5M
      !isMinMaxWithLoads(
636
7.55M
          peekThroughBitcast(LI.getPointerOperand(), /*OneUseOnly=*/true))) {
637
7.60M
    if (
all_of(LI.users(), [&LI](User *U) 7.55M
{
638
7.60M
          auto *SI = dyn_cast<StoreInst>(U);
639
7.60M
          return SI && 
SI->getPointerOperand() != &LI148k
&&
640
7.60M
                 
!SI->getPointerOperand()->isSwiftError()68.7k
;
641
7.60M
        })) {
642
20.0k
      LoadInst *NewLoad = combineLoadToNewType(
643
20.0k
          IC, LI,
644
20.0k
          Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty)));
645
20.0k
      // Replace all the stores with stores of the newly loaded value.
646
40.3k
      for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
647
20.3k
        auto *SI = cast<StoreInst>(*UI++);
648
20.3k
        IC.Builder.SetInsertPoint(SI);
649
20.3k
        combineStoreToNewValue(IC, *SI, NewLoad);
650
20.3k
        IC.eraseInstFromFunction(*SI);
651
20.3k
      }
652
20.0k
      assert(LI.use_empty() && "Failed to remove all users of the load!");
653
20.0k
      // Return the old load so the combiner can delete it safely.
654
20.0k
      return &LI;
655
20.0k
    }
656
18.5M
  }
657
18.5M
658
18.5M
  // Fold away bit casts of the loaded value by loading the desired type.
659
18.5M
  // We can do this for BitCastInsts as well as casts from and to pointer types,
660
18.5M
  // as long as those are noops (i.e., the source or dest type have the same
661
18.5M
  // bitwidth as the target's pointers).
662
18.5M
  if (LI.hasOneUse())
663
13.4M
    if (auto* CI = dyn_cast<CastInst>(LI.user_back()))
664
1.22M
      if (CI->isNoopCast(DL))
665
73.2k
        if (!LI.isAtomic() || 
isSupportedAtomicType(CI->getDestTy())6
) {
666
73.2k
          LoadInst *NewLoad = combineLoadToNewType(IC, LI, CI->getDestTy());
667
73.2k
          CI->replaceAllUsesWith(NewLoad);
668
73.2k
          IC.eraseInstFromFunction(*CI);
669
73.2k
          return &LI;
670
73.2k
        }
671
18.4M
672
18.4M
  // FIXME: We should also canonicalize loads of vectors when their elements are
673
18.4M
  // cast to other types.
674
18.4M
  return nullptr;
675
18.4M
}
676
677
18.6M
static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
678
18.6M
  // FIXME: We could probably with some care handle both volatile and atomic
679
18.6M
  // stores here but it isn't clear that this is important.
680
18.6M
  if (!LI.isSimple())
681
188k
    return nullptr;
682
18.4M
683
18.4M
  Type *T = LI.getType();
684
18.4M
  if (!T->isAggregateType())
685
18.4M
    return nullptr;
686
55
687
55
  StringRef Name = LI.getName();
688
55
  assert(LI.getAlignment() && "Alignment must be set at this point");
689
55
690
55
  if (auto *ST = dyn_cast<StructType>(T)) {
691
45
    // If the struct only have one element, we unpack.
692
45
    auto NumElements = ST->getNumElements();
693
45
    if (NumElements == 1) {
694
8
      LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
695
8
                                               ".unpack");
696
8
      AAMDNodes AAMD;
697
8
      LI.getAAMetadata(AAMD);
698
8
      NewLoad->setAAMetadata(AAMD);
699
8
      return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue(
700
8
        UndefValue::get(T), NewLoad, 0, Name));
701
8
    }
702
37
703
37
    // We don't want to break loads with padding here as we'd loose
704
37
    // the knowledge that padding exists for the rest of the pipeline.
705
37
    const DataLayout &DL = IC.getDataLayout();
706
37
    auto *SL = DL.getStructLayout(ST);
707
37
    if (SL->hasPadding())
708
22
      return nullptr;
709
15
710
15
    auto Align = LI.getAlignment();
711
15
    if (!Align)
712
0
      Align = DL.getABITypeAlignment(ST);
713
15
714
15
    auto *Addr = LI.getPointerOperand();
715
15
    auto *IdxType = Type::getInt32Ty(T->getContext());
716
15
    auto *Zero = ConstantInt::get(IdxType, 0);
717
15
718
15
    Value *V = UndefValue::get(T);
719
54
    for (unsigned i = 0; i < NumElements; 
i++39
) {
720
39
      Value *Indices[2] = {
721
39
        Zero,
722
39
        ConstantInt::get(IdxType, i),
723
39
      };
724
39
      auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
725
39
                                               Name + ".elt");
726
39
      auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
727
39
      auto *L = IC.Builder.CreateAlignedLoad(ST->getElementType(i), Ptr,
728
39
                                             EltAlign, Name + ".unpack");
729
39
      // Propagate AA metadata. It'll still be valid on the narrowed load.
730
39
      AAMDNodes AAMD;
731
39
      LI.getAAMetadata(AAMD);
732
39
      L->setAAMetadata(AAMD);
733
39
      V = IC.Builder.CreateInsertValue(V, L, i);
734
39
    }
735
15
736
15
    V->setName(Name);
737
15
    return IC.replaceInstUsesWith(LI, V);
738
15
  }
739
10
740
10
  if (auto *AT = dyn_cast<ArrayType>(T)) {
741
8
    auto *ET = AT->getElementType();
742
8
    auto NumElements = AT->getNumElements();
743
8
    if (NumElements == 1) {
744
2
      LoadInst *NewLoad = combineLoadToNewType(IC, LI, ET, ".unpack");
745
2
      AAMDNodes AAMD;
746
2
      LI.getAAMetadata(AAMD);
747
2
      NewLoad->setAAMetadata(AAMD);
748
2
      return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue(
749
2
        UndefValue::get(T), NewLoad, 0, Name));
750
2
    }
751
6
752
6
    // Bail out if the array is too large. Ideally we would like to optimize
753
6
    // arrays of arbitrary size but this has a terrible impact on compile time.
754
6
    // The threshold here is chosen arbitrarily, maybe needs a little bit of
755
6
    // tuning.
756
6
    if (NumElements > IC.MaxArraySizeForCombine)
757
1
      return nullptr;
758
5
759
5
    const DataLayout &DL = IC.getDataLayout();
760
5
    auto EltSize = DL.getTypeAllocSize(ET);
761
5
    auto Align = LI.getAlignment();
762
5
    if (!Align)
763
0
      Align = DL.getABITypeAlignment(T);
764
5
765
5
    auto *Addr = LI.getPointerOperand();
766
5
    auto *IdxType = Type::getInt64Ty(T->getContext());
767
5
    auto *Zero = ConstantInt::get(IdxType, 0);
768
5
769
5
    Value *V = UndefValue::get(T);
770
5
    uint64_t Offset = 0;
771
15
    for (uint64_t i = 0; i < NumElements; 
i++10
) {
772
10
      Value *Indices[2] = {
773
10
        Zero,
774
10
        ConstantInt::get(IdxType, i),
775
10
      };
776
10
      auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
777
10
                                               Name + ".elt");
778
10
      auto *L = IC.Builder.CreateAlignedLoad(
779
10
          AT->getElementType(), Ptr, MinAlign(Align, Offset), Name + ".unpack");
780
10
      AAMDNodes AAMD;
781
10
      LI.getAAMetadata(AAMD);
782
10
      L->setAAMetadata(AAMD);
783
10
      V = IC.Builder.CreateInsertValue(V, L, i);
784
10
      Offset += EltSize;
785
10
    }
786
5
787
5
    V->setName(Name);
788
5
    return IC.replaceInstUsesWith(LI, V);
789
5
  }
790
2
791
2
  return nullptr;
792
2
}
793
794
// If we can determine that all possible objects pointed to by the provided
795
// pointer value are, not only dereferenceable, but also definitively less than
796
// or equal to the provided maximum size, then return true. Otherwise, return
797
// false (constant global values and allocas fall into this category).
798
//
799
// FIXME: This should probably live in ValueTracking (or similar).
800
static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
801
2.87M
                                     const DataLayout &DL) {
802
2.87M
  SmallPtrSet<Value *, 4> Visited;
803
2.87M
  SmallVector<Value *, 4> Worklist(1, V);
804
2.87M
805
3.06M
  do {
806
3.06M
    Value *P = Worklist.pop_back_val();
807
3.06M
    P = P->stripPointerCasts();
808
3.06M
809
3.06M
    if (!Visited.insert(P).second)
810
12.6k
      continue;
811
3.04M
812
3.04M
    if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
813
6.93k
      Worklist.push_back(SI->getTrueValue());
814
6.93k
      Worklist.push_back(SI->getFalseValue());
815
6.93k
      continue;
816
6.93k
    }
817
3.04M
818
3.04M
    if (PHINode *PN = dyn_cast<PHINode>(P)) {
819
162k
      for (Value *IncValue : PN->incoming_values())
820
394k
        Worklist.push_back(IncValue);
821
162k
      continue;
822
162k
    }
823
2.87M
824
2.87M
    if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
825
1
      if (GA->isInterposable())
826
1
        return false;
827
0
      Worklist.push_back(GA->getAliasee());
828
0
      continue;
829
0
    }
830
2.87M
831
2.87M
    // If we know how big this object is, and it is less than MaxSize, continue
832
2.87M
    // searching. Otherwise, return false.
833
2.87M
    if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
834
148k
      if (!AI->getAllocatedType()->isSized())
835
0
        return false;
836
148k
837
148k
      ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
838
148k
      if (!CS)
839
3.68k
        return false;
840
144k
841
144k
      uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType());
842
144k
      // Make sure that, even if the multiplication below would wrap as an
843
144k
      // uint64_t, we still do the right thing.
844
144k
      if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
845
144k
        return false;
846
177
      continue;
847
177
    }
848
2.73M
849
2.73M
    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
850
413k
      if (!GV->hasDefinitiveInitializer() || 
!GV->isConstant()194k
)
851
329k
        return false;
852
84.9k
853
84.9k
      uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
854
84.9k
      if (InitSize > MaxSize)
855
84.8k
        return false;
856
84
      continue;
857
84
    }
858
2.31M
859
2.31M
    return false;
860
2.31M
  } while (
!Worklist.empty()181k
);
861
2.87M
862
2.87M
  
return true228
;
863
2.87M
}
864
865
// If we're indexing into an object of a known size, and the outer index is
866
// not a constant, but having any value but zero would lead to undefined
867
// behavior, replace it with zero.
868
//
869
// For example, if we have:
870
// @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
871
// ...
872
// %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
873
// ... = load i32* %arrayidx, align 4
874
// Then we know that we can replace %x in the GEP with i64 0.
875
//
876
// FIXME: We could fold any GEP index to zero that would cause UB if it were
877
// not zero. Currently, we only handle the first such index. Also, we could
878
// also search through non-zero constant indices if we kept track of the
879
// offsets those indices implied.
880
static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
881
20.3M
                                     Instruction *MemI, unsigned &Idx) {
882
20.3M
  if (GEPI->getNumOperands() < 2)
883
0
    return false;
884
20.3M
885
20.3M
  // Find the first non-zero index of a GEP. If all indices are zero, return
886
20.3M
  // one past the last index.
887
20.3M
  auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
888
20.3M
    unsigned I = 1;
889
44.4M
    for (unsigned IE = GEPI->getNumOperands(); I != IE; 
++I24.0M
) {
890
41.7M
      Value *V = GEPI->getOperand(I);
891
41.7M
      if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
892
38.8M
        if (CI->isZero())
893
24.0M
          continue;
894
17.6M
895
17.6M
      break;
896
17.6M
    }
897
20.3M
898
20.3M
    return I;
899
20.3M
  };
900
20.3M
901
20.3M
  // Skip through initial 'zero' indices, and find the corresponding pointer
902
20.3M
  // type. See if the next index is not a constant.
903
20.3M
  Idx = FirstNZIdx(GEPI);
904
20.3M
  if (Idx == GEPI->getNumOperands())
905
2.67M
    return false;
906
17.6M
  if (isa<Constant>(GEPI->getOperand(Idx)))
907
14.8M
    return false;
908
2.88M
909
2.88M
  SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
910
2.88M
  Type *AllocTy =
911
2.88M
    GetElementPtrInst::getIndexedType(GEPI->getSourceElementType(), Ops);
912
2.88M
  if (
!AllocTy2.88M
|| !AllocTy->isSized())
913
0
    return false;
914
2.88M
  const DataLayout &DL = IC.getDataLayout();
915
2.88M
  uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy);
916
2.88M
917
2.88M
  // If there are more indices after the one we might replace with a zero, make
918
2.88M
  // sure they're all non-negative. If any of them are negative, the overall
919
2.88M
  // address being computed might be before the base address determined by the
920
2.88M
  // first non-zero index.
921
2.88M
  auto IsAllNonNegative = [&]() {
922
378
    for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; 
++i150
) {
923
150
      KnownBits Known = IC.computeKnownBits(GEPI->getOperand(i), 0, MemI);
924
150
      if (Known.isNonNegative())
925
150
        continue;
926
0
      return false;
927
0
    }
928
228
929
228
    return true;
930
228
  };
931
2.88M
932
2.88M
  // FIXME: If the GEP is not inbounds, and there are extra indices after the
933
2.88M
  // one we'll replace, those could cause the address computation to wrap
934
2.88M
  // (rendering the IsAllNonNegative() check below insufficient). We can do
935
2.88M
  // better, ignoring zero indices (and other indices we can prove small
936
2.88M
  // enough not to wrap).
937
2.88M
  if (Idx+1 != GEPI->getNumOperands() && 
!GEPI->isInBounds()867k
)
938
1.87k
    return false;
939
2.87M
940
2.87M
  // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
941
2.87M
  // also known to be dereferenceable.
942
2.87M
  return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
943
2.87M
         
IsAllNonNegative()228
;
944
2.87M
}
945
946
// If we're indexing into an object with a variable index for the memory
947
// access, but the object has only one element, we can assume that the index
948
// will always be zero. If we replace the GEP, return it.
949
template <typename T>
950
static Instruction *replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr,
951
31.5M
                                          T &MemI) {
952
31.5M
  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
953
20.3M
    unsigned Idx;
954
20.3M
    if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
955
228
      Instruction *NewGEPI = GEPI->clone();
956
228
      NewGEPI->setOperand(Idx,
957
228
        ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
958
228
      NewGEPI->insertBefore(GEPI);
959
228
      MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
960
228
      return NewGEPI;
961
228
    }
962
31.5M
  }
963
31.5M
964
31.5M
  return nullptr;
965
31.5M
}
InstCombineLoadStoreAlloca.cpp:llvm::Instruction* replaceGEPIdxWithZero<llvm::LoadInst>(llvm::InstCombiner&, llvm::Value*, llvm::LoadInst&)
Line
Count
Source
951
18.6M
                                          T &MemI) {
952
18.6M
  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
953
12.4M
    unsigned Idx;
954
12.4M
    if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
955
107
      Instruction *NewGEPI = GEPI->clone();
956
107
      NewGEPI->setOperand(Idx,
957
107
        ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
958
107
      NewGEPI->insertBefore(GEPI);
959
107
      MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
960
107
      return NewGEPI;
961
107
    }
962
18.6M
  }
963
18.6M
964
18.6M
  return nullptr;
965
18.6M
}
InstCombineLoadStoreAlloca.cpp:llvm::Instruction* replaceGEPIdxWithZero<llvm::StoreInst>(llvm::InstCombiner&, llvm::Value*, llvm::StoreInst&)
Line
Count
Source
951
12.9M
                                          T &MemI) {
952
12.9M
  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
953
7.90M
    unsigned Idx;
954
7.90M
    if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
955
121
      Instruction *NewGEPI = GEPI->clone();
956
121
      NewGEPI->setOperand(Idx,
957
121
        ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
958
121
      NewGEPI->insertBefore(GEPI);
959
121
      MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
960
121
      return NewGEPI;
961
121
    }
962
12.9M
  }
963
12.9M
964
12.9M
  return nullptr;
965
12.9M
}
966
967
12.7M
static bool canSimplifyNullStoreOrGEP(StoreInst &SI) {
968
12.7M
  if (NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace()))
969
4.35k
    return false;
970
12.7M
971
12.7M
  auto *Ptr = SI.getPointerOperand();
972
12.7M
  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
973
7.83M
    Ptr = GEPI->getOperand(0);
974
12.7M
  return (isa<ConstantPointerNull>(Ptr) &&
975
12.7M
          
!NullPointerIsDefined(SI.getFunction(), SI.getPointerAddressSpace())113
);
976
12.7M
}
977
978
18.4M
static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op) {
979
18.4M
  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
980
12.3M
    const Value *GEPI0 = GEPI->getOperand(0);
981
12.3M
    if (isa<ConstantPointerNull>(GEPI0) &&
982
12.3M
        
!NullPointerIsDefined(LI.getFunction(), GEPI->getPointerAddressSpace())17
)
983
13
      return true;
984
18.4M
  }
985
18.4M
  if (isa<UndefValue>(Op) ||
986
18.4M
      
(18.4M
isa<ConstantPointerNull>(Op)18.4M
&&
987
18.4M
       
!NullPointerIsDefined(LI.getFunction(), LI.getPointerAddressSpace())23
))
988
33
    return true;
989
18.4M
  return false;
990
18.4M
}
991
992
18.7M
Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
993
18.7M
  Value *Op = LI.getOperand(0);
994
18.7M
995
18.7M
  // Try to canonicalize the loaded type.
996
18.7M
  if (Instruction *Res = combineLoadToOperationType(*this, LI))
997
93.2k
    return Res;
998
18.6M
999
18.6M
  // Attempt to improve the alignment.
1000
18.6M
  unsigned KnownAlign = getOrEnforceKnownAlignment(
1001
18.6M
      Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, &AC, &DT);
1002
18.6M
  unsigned LoadAlign = LI.getAlignment();
1003
18.6M
  unsigned EffectiveLoadAlign =
1004
18.6M
      LoadAlign != 0 ? 
LoadAlign18.5M
:
DL.getABITypeAlignment(LI.getType())74.6k
;
1005
18.6M
1006
18.6M
  if (KnownAlign > EffectiveLoadAlign)
1007
9.47k
    LI.setAlignment(KnownAlign);
1008
18.6M
  else if (LoadAlign == 0)
1009
74.1k
    LI.setAlignment(EffectiveLoadAlign);
1010
18.6M
1011
18.6M
  // Replace GEP indices if possible.
1012
18.6M
  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
1013
107
      Worklist.Add(NewGEPI);
1014
107
      return &LI;
1015
107
  }
1016
18.6M
1017
18.6M
  if (Instruction *Res = unpackLoadToAggregate(*this, LI))
1018
30
    return Res;
1019
18.6M
1020
18.6M
  // Do really simple store-to-load forwarding and load CSE, to catch cases
1021
18.6M
  // where there are several consecutive memory accesses to the same location,
1022
18.6M
  // separated by a few arithmetic operations.
1023
18.6M
  BasicBlock::iterator BBI(LI);
1024
18.6M
  bool IsLoadCSE = false;
1025
18.6M
  if (Value *AvailableVal = FindAvailableLoadedValue(
1026
16.9k
          &LI, LI.getParent(), BBI, DefMaxInstsToScan, AA, &IsLoadCSE)) {
1027
16.9k
    if (IsLoadCSE)
1028
9.27k
      combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI, false);
1029
16.9k
1030
16.9k
    return replaceInstUsesWith(
1031
16.9k
        LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
1032
16.9k
                                           LI.getName() + ".cast"));
1033
16.9k
  }
1034
18.6M
1035
18.6M
  // None of the following transforms are legal for volatile/ordered atomic
1036
18.6M
  // loads.  Most of them do apply for unordered atomics.
1037
18.6M
  if (!LI.isUnordered()) 
return nullptr188k
;
1038
18.4M
1039
18.4M
  // load(gep null, ...) -> unreachable
1040
18.4M
  // load null/undef -> unreachable
1041
18.4M
  // TODO: Consider a target hook for valid address spaces for this xforms.
1042
18.4M
  if (canSimplifyNullLoadOrGEP(LI, Op)) {
1043
46
    // Insert a new store to null instruction before the load to indicate
1044
46
    // that this code is not reachable.  We do this instead of inserting
1045
46
    // an unreachable instruction directly because we cannot modify the
1046
46
    // CFG.
1047
46
    StoreInst *SI = new StoreInst(UndefValue::get(LI.getType()),
1048
46
                                  Constant::getNullValue(Op->getType()), &LI);
1049
46
    SI->setDebugLoc(LI.getDebugLoc());
1050
46
    return replaceInstUsesWith(LI, UndefValue::get(LI.getType()));
1051
46
  }
1052
18.4M
1053
18.4M
  if (Op->hasOneUse()) {
1054
10.7M
    // Change select and PHI nodes to select values instead of addresses: this
1055
10.7M
    // helps alias analysis out a lot, allows many others simplifications, and
1056
10.7M
    // exposes redundancy in the code.
1057
10.7M
    //
1058
10.7M
    // Note that we cannot do the transformation unless we know that the
1059
10.7M
    // introduced loads cannot trap!  Something like this is valid as long as
1060
10.7M
    // the condition is always false: load (select bool %C, int* null, int* %G),
1061
10.7M
    // but it would not be valid if we transformed it to load from null
1062
10.7M
    // unconditionally.
1063
10.7M
    //
1064
10.7M
    if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
1065
8.70k
      // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).
1066
8.70k
      unsigned Align = LI.getAlignment();
1067
8.70k
      if (isSafeToLoadUnconditionally(SI->getOperand(1), LI.getType(), Align,
1068
8.70k
                                      DL, SI) &&
1069
8.70k
          isSafeToLoadUnconditionally(SI->getOperand(2), LI.getType(), Align,
1070
512
                                      DL, SI)) {
1071
122
        LoadInst *V1 =
1072
122
            Builder.CreateLoad(LI.getType(), SI->getOperand(1),
1073
122
                               SI->getOperand(1)->getName() + ".val");
1074
122
        LoadInst *V2 =
1075
122
            Builder.CreateLoad(LI.getType(), SI->getOperand(2),
1076
122
                               SI->getOperand(2)->getName() + ".val");
1077
122
        assert(LI.isUnordered() && "implied by above");
1078
122
        V1->setAlignment(Align);
1079
122
        V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1080
122
        V2->setAlignment(Align);
1081
122
        V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1082
122
        return SelectInst::Create(SI->getCondition(), V1, V2);
1083
122
      }
1084
8.58k
1085
8.58k
      // load (select (cond, null, P)) -> load P
1086
8.58k
      if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
1087
8.58k
          !NullPointerIsDefined(SI->getFunction(),
1088
9
                                LI.getPointerAddressSpace())) {
1089
0
        LI.setOperand(0, SI->getOperand(2));
1090
0
        return &LI;
1091
0
      }
1092
8.58k
1093
8.58k
      // load (select (cond, P, null)) -> load P
1094
8.58k
      if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
1095
8.58k
          !NullPointerIsDefined(SI->getFunction(),
1096
3
                                LI.getPointerAddressSpace())) {
1097
1
        LI.setOperand(0, SI->getOperand(1));
1098
1
        return &LI;
1099
1
      }
1100
18.4M
    }
1101
10.7M
  }
1102
18.4M
  return nullptr;
1103
18.4M
}
1104
1105
/// Look for extractelement/insertvalue sequence that acts like a bitcast.
1106
///
1107
/// \returns underlying value that was "cast", or nullptr otherwise.
1108
///
1109
/// For example, if we have:
1110
///
1111
///     %E0 = extractelement <2 x double> %U, i32 0
1112
///     %V0 = insertvalue [2 x double] undef, double %E0, 0
1113
///     %E1 = extractelement <2 x double> %U, i32 1
1114
///     %V1 = insertvalue [2 x double] %V0, double %E1, 1
1115
///
1116
/// and the layout of a <2 x double> is isomorphic to a [2 x double],
1117
/// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
1118
/// Note that %U may contain non-undef values where %V1 has undef.
1119
12.7M
static Value *likeBitCastFromVector(InstCombiner &IC, Value *V) {
1120
12.7M
  Value *U = nullptr;
1121
12.7M
  while (auto *IV = dyn_cast<InsertValueInst>(V)) {
1122
20
    auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
1123
20
    if (!E)
1124
8
      return nullptr;
1125
12
    auto *W = E->getVectorOperand();
1126
12
    if (!U)
1127
4
      U = W;
1128
8
    else if (U != W)
1129
0
      return nullptr;
1130
12
    auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
1131
12
    if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
1132
0
      return nullptr;
1133
12
    V = IV->getAggregateOperand();
1134
12
  }
1135
12.7M
  
if (12.7M
!isa<UndefValue>(V)12.7M
||
!U844
)
1136
12.7M
    return nullptr;
1137
4
1138
4
  auto *UT = cast<VectorType>(U->getType());
1139
4
  auto *VT = V->getType();
1140
4
  // Check that types UT and VT are bitwise isomorphic.
1141
4
  const auto &DL = IC.getDataLayout();
1142
4
  if (DL.getTypeStoreSizeInBits(UT) != DL.getTypeStoreSizeInBits(VT)) {
1143
0
    return nullptr;
1144
0
  }
1145
4
  if (auto *AT = dyn_cast<ArrayType>(VT)) {
1146
3
    if (AT->getNumElements() != UT->getNumElements())
1147
0
      return nullptr;
1148
1
  } else {
1149
1
    auto *ST = cast<StructType>(VT);
1150
1
    if (ST->getNumElements() != UT->getNumElements())
1151
0
      return nullptr;
1152
4
    
for (const auto *EltT : ST->elements())1
{
1153
4
      if (EltT != UT->getElementType())
1154
0
        return nullptr;
1155
4
    }
1156
1
  }
1157
4
  return U;
1158
4
}
1159
1160
/// Combine stores to match the type of value being stored.
1161
///
1162
/// The core idea here is that the memory does not have any intrinsic type and
1163
/// where we can we should match the type of a store to the type of value being
1164
/// stored.
1165
///
1166
/// However, this routine must never change the width of a store or the number of
1167
/// stores as that would introduce a semantic change. This combine is expected to
1168
/// be a semantic no-op which just allows stores to more closely model the types
1169
/// of their incoming values.
1170
///
1171
/// Currently, we also refuse to change the precise type used for an atomic or
1172
/// volatile store. This is debatable, and might be reasonable to change later.
1173
/// However, it is risky in case some backend or other part of LLVM is relying
1174
/// on the exact type stored to select appropriate atomic operations.
1175
///
1176
/// \returns true if the store was successfully combined away. This indicates
1177
/// the caller must erase the store instruction. We have to let the caller erase
1178
/// the store instruction as otherwise there is no way to signal whether it was
1179
/// combined or not: IC.EraseInstFromFunction returns a null pointer.
1180
12.9M
static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
1181
12.9M
  // FIXME: We could probably with some care handle both volatile and ordered
1182
12.9M
  // atomic stores here but it isn't clear that this is important.
1183
12.9M
  if (!SI.isUnordered())
1184
196k
    return false;
1185
12.7M
1186
12.7M
  // swifterror values can't be bitcasted.
1187
12.7M
  if (SI.getPointerOperand()->isSwiftError())
1188
9
    return false;
1189
12.7M
1190
12.7M
  Value *V = SI.getValueOperand();
1191
12.7M
1192
12.7M
  // Fold away bit casts of the stored value by storing the original type.
1193
12.7M
  if (auto *BC = dyn_cast<BitCastInst>(V)) {
1194
18.0k
    V = BC->getOperand(0);
1195
18.0k
    if (!SI.isAtomic() || 
isSupportedAtomicType(V->getType())4
) {
1196
18.0k
      combineStoreToNewValue(IC, SI, V);
1197
18.0k
      return true;
1198
18.0k
    }
1199
12.7M
  }
1200
12.7M
1201
12.7M
  if (Value *U = likeBitCastFromVector(IC, V))
1202
4
    if (!SI.isAtomic() || 
isSupportedAtomicType(U->getType())0
) {
1203
4
      combineStoreToNewValue(IC, SI, U);
1204
4
      return true;
1205
4
    }
1206
12.7M
1207
12.7M
  // FIXME: We should also canonicalize stores of vectors when their elements
1208
12.7M
  // are cast to other types.
1209
12.7M
  return false;
1210
12.7M
}
1211
1212
12.9M
static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
1213
12.9M
  // FIXME: We could probably with some care handle both volatile and atomic
1214
12.9M
  // stores here but it isn't clear that this is important.
1215
12.9M
  if (!SI.isSimple())
1216
196k
    return false;
1217
12.7M
1218
12.7M
  Value *V = SI.getValueOperand();
1219
12.7M
  Type *T = V->getType();
1220
12.7M
1221
12.7M
  if (!T->isAggregateType())
1222
12.7M
    return false;
1223
30
1224
30
  if (auto *ST = dyn_cast<StructType>(T)) {
1225
21
    // If the struct only have one element, we unpack.
1226
21
    unsigned Count = ST->getNumElements();
1227
21
    if (Count == 1) {
1228
8
      V = IC.Builder.CreateExtractValue(V, 0);
1229
8
      combineStoreToNewValue(IC, SI, V);
1230
8
      return true;
1231
8
    }
1232
13
1233
13
    // We don't want to break loads with padding here as we'd loose
1234
13
    // the knowledge that padding exists for the rest of the pipeline.
1235
13
    const DataLayout &DL = IC.getDataLayout();
1236
13
    auto *SL = DL.getStructLayout(ST);
1237
13
    if (SL->hasPadding())
1238
0
      return false;
1239
13
1240
13
    auto Align = SI.getAlignment();
1241
13
    if (!Align)
1242
0
      Align = DL.getABITypeAlignment(ST);
1243
13
1244
13
    SmallString<16> EltName = V->getName();
1245
13
    EltName += ".elt";
1246
13
    auto *Addr = SI.getPointerOperand();
1247
13
    SmallString<16> AddrName = Addr->getName();
1248
13
    AddrName += ".repack";
1249
13
1250
13
    auto *IdxType = Type::getInt32Ty(ST->getContext());
1251
13
    auto *Zero = ConstantInt::get(IdxType, 0);
1252
48
    for (unsigned i = 0; i < Count; 
i++35
) {
1253
35
      Value *Indices[2] = {
1254
35
        Zero,
1255
35
        ConstantInt::get(IdxType, i),
1256
35
      };
1257
35
      auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
1258
35
                                               AddrName);
1259
35
      auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1260
35
      auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
1261
35
      llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1262
35
      AAMDNodes AAMD;
1263
35
      SI.getAAMetadata(AAMD);
1264
35
      NS->setAAMetadata(AAMD);
1265
35
    }
1266
13
1267
13
    return true;
1268
13
  }
1269
9
1270
9
  if (auto *AT = dyn_cast<ArrayType>(T)) {
1271
9
    // If the array only have one element, we unpack.
1272
9
    auto NumElements = AT->getNumElements();
1273
9
    if (NumElements == 1) {
1274
3
      V = IC.Builder.CreateExtractValue(V, 0);
1275
3
      combineStoreToNewValue(IC, SI, V);
1276
3
      return true;
1277
3
    }
1278
6
1279
6
    // Bail out if the array is too large. Ideally we would like to optimize
1280
6
    // arrays of arbitrary size but this has a terrible impact on compile time.
1281
6
    // The threshold here is chosen arbitrarily, maybe needs a little bit of
1282
6
    // tuning.
1283
6
    if (NumElements > IC.MaxArraySizeForCombine)
1284
1
      return false;
1285
5
1286
5
    const DataLayout &DL = IC.getDataLayout();
1287
5
    auto EltSize = DL.getTypeAllocSize(AT->getElementType());
1288
5
    auto Align = SI.getAlignment();
1289
5
    if (!Align)
1290
0
      Align = DL.getABITypeAlignment(T);
1291
5
1292
5
    SmallString<16> EltName = V->getName();
1293
5
    EltName += ".elt";
1294
5
    auto *Addr = SI.getPointerOperand();
1295
5
    SmallString<16> AddrName = Addr->getName();
1296
5
    AddrName += ".repack";
1297
5
1298
5
    auto *IdxType = Type::getInt64Ty(T->getContext());
1299
5
    auto *Zero = ConstantInt::get(IdxType, 0);
1300
5
1301
5
    uint64_t Offset = 0;
1302
23
    for (uint64_t i = 0; i < NumElements; 
i++18
) {
1303
18
      Value *Indices[2] = {
1304
18
        Zero,
1305
18
        ConstantInt::get(IdxType, i),
1306
18
      };
1307
18
      auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
1308
18
                                               AddrName);
1309
18
      auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1310
18
      auto EltAlign = MinAlign(Align, Offset);
1311
18
      Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1312
18
      AAMDNodes AAMD;
1313
18
      SI.getAAMetadata(AAMD);
1314
18
      NS->setAAMetadata(AAMD);
1315
18
      Offset += EltSize;
1316
18
    }
1317
5
1318
5
    return true;
1319
5
  }
1320
0
1321
0
  return false;
1322
0
}
1323
1324
/// equivalentAddressValues - Test if A and B will obviously have the same
1325
/// value. This includes recognizing that %t0 and %t1 will have the same
1326
/// value in code like this:
1327
///   %t0 = getelementptr \@a, 0, 3
1328
///   store i32 0, i32* %t0
1329
///   %t1 = getelementptr \@a, 0, 3
1330
///   %t2 = load i32* %t1
1331
///
1332
6.94M
static bool equivalentAddressValues(Value *A, Value *B) {
1333
6.94M
  // Test if the values are trivially equivalent.
1334
6.94M
  if (A == B) 
return true562
;
1335
6.94M
1336
6.94M
  // Test if the values come form identical arithmetic instructions.
1337
6.94M
  // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
1338
6.94M
  // its only used to compare two uses within the same basic block, which
1339
6.94M
  // means that they'll always either have the same value or one of them
1340
6.94M
  // will have an undefined value.
1341
6.94M
  if (isa<BinaryOperator>(A) ||
1342
6.94M
      isa<CastInst>(A) ||
1343
6.94M
      
isa<PHINode>(A)5.00M
||
1344
6.94M
      
isa<GetElementPtrInst>(A)4.95M
)
1345
6.43M
    if (Instruction *BI = dyn_cast<Instruction>(B))
1346
6.26M
      if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
1347
1.57k
        return true;
1348
6.94M
1349
6.94M
  // Otherwise they may not be equivalent.
1350
6.94M
  return false;
1351
6.94M
}
1352
1353
/// Converts store (bitcast (load (bitcast (select ...)))) to
1354
/// store (load (select ...)), where select is minmax:
1355
/// select ((cmp load V1, load V2), V1, V2).
1356
static bool removeBitcastsFromLoadStoreOnMinMax(InstCombiner &IC,
1357
12.9M
                                                StoreInst &SI) {
1358
12.9M
  // bitcast?
1359
12.9M
  if (!match(SI.getPointerOperand(), m_BitCast(m_Value())))
1360
10.0M
    return false;
1361
2.84M
  // load? integer?
1362
2.84M
  Value *LoadAddr;
1363
2.84M
  if (!match(SI.getValueOperand(), m_Load(m_BitCast(m_Value(LoadAddr)))))
1364
2.06M
    return false;
1365
780k
  auto *LI = cast<LoadInst>(SI.getValueOperand());
1366
780k
  if (!LI->getType()->isIntegerTy())
1367
66.5k
    return false;
1368
714k
  if (!isMinMaxWithLoads(LoadAddr))
1369
714k
    return false;
1370
27
1371
28
  
if (27
!all_of(LI->users(), [LI, LoadAddr](User *U) 27
{
1372
28
        auto *SI = dyn_cast<StoreInst>(U);
1373
28
        return SI && SI->getPointerOperand() != LI &&
1374
28
               peekThroughBitcast(SI->getPointerOperand()) != LoadAddr &&
1375
28
               !SI->getPointerOperand()->isSwiftError();
1376
28
      }))
1377
0
    return false;
1378
27
1379
27
  IC.Builder.SetInsertPoint(LI);
1380
27
  LoadInst *NewLI = combineLoadToNewType(
1381
27
      IC, *LI, LoadAddr->getType()->getPointerElementType());
1382
27
  // Replace all the stores with stores of the newly loaded value.
1383
28
  for (auto *UI : LI->users()) {
1384
28
    auto *USI = cast<StoreInst>(UI);
1385
28
    IC.Builder.SetInsertPoint(USI);
1386
28
    combineStoreToNewValue(IC, *USI, NewLI);
1387
28
  }
1388
27
  IC.replaceInstUsesWith(*LI, UndefValue::get(LI->getType()));
1389
27
  IC.eraseInstFromFunction(*LI);
1390
27
  return true;
1391
27
}
1392
1393
12.9M
Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
1394
12.9M
  Value *Val = SI.getOperand(0);
1395
12.9M
  Value *Ptr = SI.getOperand(1);
1396
12.9M
1397
12.9M
  // Try to canonicalize the stored type.
1398
12.9M
  if (combineStoreToValueType(*this, SI))
1399
18.0k
    return eraseInstFromFunction(SI);
1400
12.9M
1401
12.9M
  // Attempt to improve the alignment.
1402
12.9M
  unsigned KnownAlign = getOrEnforceKnownAlignment(
1403
12.9M
      Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, &AC, &DT);
1404
12.9M
  unsigned StoreAlign = SI.getAlignment();
1405
12.9M
  unsigned EffectiveStoreAlign =
1406
12.9M
      StoreAlign != 0 ? 
StoreAlign12.8M
:
DL.getABITypeAlignment(Val->getType())84.8k
;
1407
12.9M
1408
12.9M
  if (KnownAlign > EffectiveStoreAlign)
1409
63.7k
    SI.setAlignment(KnownAlign);
1410
12.8M
  else if (StoreAlign == 0)
1411
74.0k
    SI.setAlignment(EffectiveStoreAlign);
1412
12.9M
1413
12.9M
  // Try to canonicalize the stored type.
1414
12.9M
  if (unpackStoreToAggregate(*this, SI))
1415
29
    return eraseInstFromFunction(SI);
1416
12.9M
1417
12.9M
  if (removeBitcastsFromLoadStoreOnMinMax(*this, SI))
1418
27
    return eraseInstFromFunction(SI);
1419
12.9M
1420
12.9M
  // Replace GEP indices if possible.
1421
12.9M
  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
1422
121
      Worklist.Add(NewGEPI);
1423
121
      return &SI;
1424
121
  }
1425
12.9M
1426
12.9M
  // Don't hack volatile/ordered stores.
1427
12.9M
  // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
1428
12.9M
  if (!SI.isUnordered()) 
return nullptr196k
;
1429
12.7M
1430
12.7M
  // If the RHS is an alloca with a single use, zapify the store, making the
1431
12.7M
  // alloca dead.
1432
12.7M
  if (Ptr->hasOneUse()) {
1433
7.90M
    if (isa<AllocaInst>(Ptr))
1434
0
      return eraseInstFromFunction(SI);
1435
7.90M
    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
1436
5.22M
      if (isa<AllocaInst>(GEP->getOperand(0))) {
1437
2.65M
        if (GEP->getOperand(0)->hasOneUse())
1438
0
          return eraseInstFromFunction(SI);
1439
12.7M
      }
1440
5.22M
    }
1441
7.90M
  }
1442
12.7M
1443
12.7M
  // If we have a store to a location which is known constant, we can conclude
1444
12.7M
  // that the store must be storing the constant value (else the memory
1445
12.7M
  // wouldn't be constant), and this must be a noop.
1446
12.7M
  if (AA->pointsToConstantMemory(Ptr))
1447
1
    return eraseInstFromFunction(SI);
1448
12.7M
1449
12.7M
  // Do really simple DSE, to catch cases where there are several consecutive
1450
12.7M
  // stores to the same location, separated by a few arithmetic operations. This
1451
12.7M
  // situation often occurs with bitfield accesses.
1452
12.7M
  BasicBlock::iterator BBI(SI);
1453
27.8M
  for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && 
ScanInsts25.7M
;
1454
25.5M
       
--ScanInsts15.1M
) {
1455
25.5M
    --BBI;
1456
25.5M
    // Don't count debug info directives, lest they affect codegen,
1457
25.5M
    // and we skip pointer-to-pointer bitcasts, which are NOPs.
1458
25.5M
    if (isa<DbgInfoIntrinsic>(BBI) ||
1459
25.5M
        
(25.5M
isa<BitCastInst>(BBI)25.5M
&&
BBI->getType()->isPointerTy()2.46M
)) {
1460
2.46M
      ScanInsts++;
1461
2.46M
      continue;
1462
2.46M
    }
1463
23.0M
1464
23.0M
    if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
1465
5.36M
      // Prev store isn't volatile, and stores to the same location?
1466
5.36M
      if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1),
1467
5.35M
                                                        SI.getOperand(1))) {
1468
2.05k
        ++NumDeadStore;
1469
2.05k
        ++BBI;
1470
2.05k
        eraseInstFromFunction(*PrevSI);
1471
2.05k
        continue;
1472
2.05k
      }
1473
5.35M
      break;
1474
5.35M
    }
1475
17.7M
1476
17.7M
    // If this is a load, we have to stop.  However, if the loaded value is from
1477
17.7M
    // the pointer we're loading and is producing the pointer we're storing,
1478
17.7M
    // then *this* store is dead (X = load P; store X -> P).
1479
17.7M
    if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
1480
3.27M
      if (LI == Val && 
equivalentAddressValues(LI->getOperand(0), Ptr)1.58M
) {
1481
84
        assert(SI.isUnordered() && "can't eliminate ordering operation");
1482
84
        return eraseInstFromFunction(SI);
1483
84
      }
1484
3.27M
1485
3.27M
      // Otherwise, this is a load from some other location.  Stores before it
1486
3.27M
      // may not be dead.
1487
3.27M
      break;
1488
3.27M
    }
1489
14.4M
1490
14.4M
    // Don't skip over loads, throws or things that can modify memory.
1491
14.4M
    if (BBI->mayWriteToMemory() || 
BBI->mayReadFromMemory()12.7M
||
BBI->mayThrow()12.7M
)
1492
1.72M
      break;
1493
14.4M
  }
1494
12.7M
1495
12.7M
  // store X, null    -> turns into 'unreachable' in SimplifyCFG
1496
12.7M
  // store X, GEP(null, Y) -> turns into 'unreachable' in SimplifyCFG
1497
12.7M
  
if (12.7M
canSimplifyNullStoreOrGEP(SI)12.7M
) {
1498
113
    if (!isa<UndefValue>(Val)) {
1499
14
      SI.setOperand(0, UndefValue::get(Val->getType()));
1500
14
      if (Instruction *U = dyn_cast<Instruction>(Val))
1501
8
        Worklist.Add(U);  // Dropped a use.
1502
14
    }
1503
113
    return nullptr;  // Do not modify these!
1504
113
  }
1505
12.7M
1506
12.7M
  // store undef, Ptr -> noop
1507
12.7M
  if (isa<UndefValue>(Val))
1508
741
    return eraseInstFromFunction(SI);
1509
12.7M
1510
12.7M
  // If this store is the second-to-last instruction in the basic block
1511
12.7M
  // (excluding debug info and bitcasts of pointers) and if the block ends with
1512
12.7M
  // an unconditional branch, try to move the store to the successor block.
1513
12.7M
  BBI = SI.getIterator();
1514
13.1M
  do {
1515
13.1M
    ++BBI;
1516
13.1M
  } while (isa<DbgInfoIntrinsic>(BBI) ||
1517
13.1M
           
(13.1M
isa<BitCastInst>(BBI)13.1M
&&
BBI->getType()->isPointerTy()440k
));
1518
12.7M
1519
12.7M
  if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
1520
1.67M
    if (BI->isUnconditional())
1521
1.59M
      mergeStoreIntoSuccessor(SI);
1522
12.7M
1523
12.7M
  return nullptr;
1524
12.7M
}
1525
1526
/// Try to transform:
1527
///   if () { *P = v1; } else { *P = v2 }
1528
/// or:
1529
///   *P = v1; if () { *P = v2; }
1530
/// into a phi node with a store in the successor.
1531
1.59M
bool InstCombiner::mergeStoreIntoSuccessor(StoreInst &SI) {
1532
1.59M
  assert(SI.isUnordered() &&
1533
1.59M
         "This code has not been audited for volatile or ordered store case.");
1534
1.59M
1535
1.59M
  // Check if the successor block has exactly 2 incoming edges.
1536
1.59M
  BasicBlock *StoreBB = SI.getParent();
1537
1.59M
  BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
1538
1.59M
  if (!DestBB->hasNPredecessors(2))
1539
566k
    return false;
1540
1.02M
1541
1.02M
  // Capture the other block (the block that doesn't contain our store).
1542
1.02M
  pred_iterator PredIter = pred_begin(DestBB);
1543
1.02M
  if (*PredIter == StoreBB)
1544
719k
    ++PredIter;
1545
1.02M
  BasicBlock *OtherBB = *PredIter;
1546
1.02M
1547
1.02M
  // Bail out if all of the relevant blocks aren't distinct. This can happen,
1548
1.02M
  // for example, if SI is in an infinite loop.
1549
1.02M
  if (StoreBB == DestBB || 
OtherBB == DestBB1.02M
)
1550
24.1k
    return false;
1551
1.00M
1552
1.00M
  // Verify that the other block ends in a branch and is not otherwise empty.
1553
1.00M
  BasicBlock::iterator BBI(OtherBB->getTerminator());
1554
1.00M
  BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
1555
1.00M
  if (!OtherBr || 
BBI == OtherBB->begin()997k
)
1556
31.7k
    return false;
1557
970k
1558
970k
  // If the other block ends in an unconditional branch, check for the 'if then
1559
970k
  // else' case. There is an instruction before the branch.
1560
970k
  StoreInst *OtherStore = nullptr;
1561
970k
  if (OtherBr->isUnconditional()) {
1562
259k
    --BBI;
1563
259k
    // Skip over debugging info.
1564
263k
    while (isa<DbgInfoIntrinsic>(BBI) ||
1565
263k
           
(263k
isa<BitCastInst>(BBI)263k
&&
BBI->getType()->isPointerTy()4.76k
)) {
1566
4.67k
      if (BBI==OtherBB->begin())
1567
735
        return false;
1568
3.94k
      --BBI;
1569
3.94k
    }
1570
259k
    // If this isn't a store, isn't a store to the same location, or is not the
1571
259k
    // right kind of store, bail out.
1572
259k
    OtherStore = dyn_cast<StoreInst>(BBI);
1573
259k
    if (!OtherStore || 
OtherStore->getOperand(1) != SI.getOperand(1)128k
||
1574
259k
        
!SI.isSameOperationAs(OtherStore)1.35k
)
1575
257k
      return false;
1576
710k
  } else {
1577
710k
    // Otherwise, the other block ended with a conditional branch. If one of the
1578
710k
    // destinations is StoreBB, then we have the if/then case.
1579
710k
    if (OtherBr->getSuccessor(0) != StoreBB &&
1580
710k
        
OtherBr->getSuccessor(1) != StoreBB494k
)
1581
208k
      return false;
1582
501k
1583
501k
    // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
1584
501k
    // if/then triangle. See if there is a store to the same ptr as SI that
1585
501k
    // lives in OtherBB.
1586
1.50M
    
for (;; 501k
--BBI1.00M
) {
1587
1.50M
      // Check to see if we find the matching store.
1588
1.50M
      if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
1589
93.4k
        if (OtherStore->getOperand(1) != SI.getOperand(1) ||
1590
93.4k
            
!SI.isSameOperationAs(OtherStore)1.37k
)
1591
92.1k
          return false;
1592
1.35k
        break;
1593
1.35k
      }
1594
1.41M
      // If we find something that may be using or overwriting the stored
1595
1.41M
      // value, or if we run out of instructions, we can't do the transform.
1596
1.41M
      if (BBI->mayReadFromMemory() || 
BBI->mayThrow()1.10M
||
1597
1.41M
          
BBI->mayWriteToMemory()1.10M
||
BBI == OtherBB->begin()1.10M
)
1598
408k
        return false;
1599
1.41M
    }
1600
501k
1601
501k
    // In order to eliminate the store in OtherBr, we have to make sure nothing
1602
501k
    // reads or overwrites the stored value in StoreBB.
1603
501k
    
for (BasicBlock::iterator I = StoreBB->begin(); 1.35k
&*I != &SI2.45k
;
++I1.09k
) {
1604
2.26k
      // FIXME: This should really be AA driven.
1605
2.26k
      if (I->mayReadFromMemory() || 
I->mayThrow()1.22k
||
I->mayWriteToMemory()1.22k
)
1606
1.16k
        return false;
1607
2.26k
    }
1608
1.35k
  }
1609
970k
1610
970k
  // Insert a PHI node now if we need it.
1611
970k
  Value *MergedVal = OtherStore->getOperand(0);
1612
1.53k
  // The debug locations of the original instructions might differ. Merge them.
1613
1.53k
  DebugLoc MergedLoc = DILocation::getMergedLocation(SI.getDebugLoc(),
1614
1.53k
                                                     OtherStore->getDebugLoc());
1615
1.53k
  if (MergedVal != SI.getOperand(0)) {
1616
1.44k
    PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
1617
1.44k
    PN->addIncoming(SI.getOperand(0), SI.getParent());
1618
1.44k
    PN->addIncoming(OtherStore->getOperand(0), OtherBB);
1619
1.44k
    MergedVal = InsertNewInstBefore(PN, DestBB->front());
1620
1.44k
    PN->setDebugLoc(MergedLoc);
1621
1.44k
  }
1622
1.53k
1623
1.53k
  // Advance to a place where it is safe to insert the new store and insert it.
1624
1.53k
  BBI = DestBB->getFirstInsertionPt();
1625
1.53k
  StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
1626
1.53k
                                   SI.isVolatile(), SI.getAlignment(),
1627
1.53k
                                   SI.getOrdering(), SI.getSyncScopeID());
1628
1.53k
  InsertNewInstBefore(NewSI, *BBI);
1629
1.53k
  NewSI->setDebugLoc(MergedLoc);
1630
1.53k
1631
1.53k
  // If the two stores had AA tags, merge them.
1632
1.53k
  AAMDNodes AATags;
1633
1.53k
  SI.getAAMetadata(AATags);
1634
1.53k
  if (AATags) {
1635
1.38k
    OtherStore->getAAMetadata(AATags, /* Merge = */ true);
1636
1.38k
    NewSI->setAAMetadata(AATags);
1637
1.38k
  }
1638
1.53k
1639
1.53k
  // Nuke the old stores.
1640
1.53k
  eraseInstFromFunction(SI);
1641
1.53k
  eraseInstFromFunction(*OtherStore);
1642
1.53k
  return true;
1643
970k
}