Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/IPO/GlobalOpt.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This pass transforms simple global variables that never have their address
11
// taken.  If obviously true, it marks read/write globals as constant, deletes
12
// variables only stored to, etc.
13
//
14
//===----------------------------------------------------------------------===//
15
16
#include "llvm/Transforms/IPO/GlobalOpt.h"
17
#include "llvm/ADT/DenseMap.h"
18
#include "llvm/ADT/STLExtras.h"
19
#include "llvm/ADT/SmallPtrSet.h"
20
#include "llvm/ADT/SmallSet.h"
21
#include "llvm/ADT/SmallVector.h"
22
#include "llvm/ADT/Statistic.h"
23
#include "llvm/Analysis/ConstantFolding.h"
24
#include "llvm/Analysis/MemoryBuiltins.h"
25
#include "llvm/Analysis/TargetLibraryInfo.h"
26
#include "llvm/IR/CallSite.h"
27
#include "llvm/IR/CallingConv.h"
28
#include "llvm/IR/Constants.h"
29
#include "llvm/IR/DataLayout.h"
30
#include "llvm/IR/DebugInfoMetadata.h"
31
#include "llvm/IR/DerivedTypes.h"
32
#include "llvm/IR/Dominators.h"
33
#include "llvm/IR/GetElementPtrTypeIterator.h"
34
#include "llvm/IR/Instructions.h"
35
#include "llvm/IR/IntrinsicInst.h"
36
#include "llvm/IR/Module.h"
37
#include "llvm/IR/Operator.h"
38
#include "llvm/IR/ValueHandle.h"
39
#include "llvm/IR/DebugInfoMetadata.h"
40
#include "llvm/Pass.h"
41
#include "llvm/Support/Debug.h"
42
#include "llvm/Support/ErrorHandling.h"
43
#include "llvm/Support/MathExtras.h"
44
#include "llvm/Support/raw_ostream.h"
45
#include "llvm/Transforms/IPO.h"
46
#include "llvm/Transforms/Utils/CtorUtils.h"
47
#include "llvm/Transforms/Utils/Evaluator.h"
48
#include "llvm/Transforms/Utils/GlobalStatus.h"
49
#include "llvm/Transforms/Utils/Local.h"
50
#include <algorithm>
51
using namespace llvm;
52
53
#define DEBUG_TYPE "globalopt"
54
55
STATISTIC(NumMarked    , "Number of globals marked constant");
56
STATISTIC(NumUnnamed   , "Number of globals marked unnamed_addr");
57
STATISTIC(NumSRA       , "Number of aggregate globals broken into scalars");
58
STATISTIC(NumHeapSRA   , "Number of heap objects SRA'd");
59
STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
60
STATISTIC(NumDeleted   , "Number of globals deleted");
61
STATISTIC(NumGlobUses  , "Number of global uses devirtualized");
62
STATISTIC(NumLocalized , "Number of globals localized");
63
STATISTIC(NumShrunkToBool  , "Number of global vars shrunk to booleans");
64
STATISTIC(NumFastCallFns   , "Number of functions converted to fastcc");
65
STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
66
STATISTIC(NumNestRemoved   , "Number of nest attributes removed");
67
STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
68
STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
69
STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
70
71
/// Is this global variable possibly used by a leak checker as a root?  If so,
72
/// we might not really want to eliminate the stores to it.
73
110
static bool isLeakCheckerRoot(GlobalVariable *GV) {
74
110
  // A global variable is a root if it is a pointer, or could plausibly contain
75
110
  // a pointer.  There are two challenges; one is that we could have a struct
76
110
  // the has an inner member which is a pointer.  We recurse through the type to
77
110
  // detect these (up to a point).  The other is that we may actually be a union
78
110
  // of a pointer and another type, and so our LLVM type is an integer which
79
110
  // gets converted into a pointer, or our type is an [i8 x #] with a pointer
80
110
  // potentially contained here.
81
110
82
110
  if (GV->hasPrivateLinkage())
83
0
    return false;
84
110
85
110
  SmallVector<Type *, 4> Types;
86
110
  Types.push_back(GV->getValueType());
87
110
88
110
  unsigned Limit = 20;
89
135
  do {
90
135
    Type *Ty = Types.pop_back_val();
91
135
    switch (Ty->getTypeID()) {
92
72
      default: break;
93
30
      case Type::PointerTyID: return true;
94
23
      case Type::ArrayTyID:
95
23
      case Type::VectorTyID: {
96
23
        SequentialType *STy = cast<SequentialType>(Ty);
97
23
        Types.push_back(STy->getElementType());
98
23
        break;
99
23
      }
100
10
      case Type::StructTyID: {
101
10
        StructType *STy = cast<StructType>(Ty);
102
10
        if (
STy->isOpaque()10
)
return true0
;
103
10
        for (StructType::element_iterator I = STy->element_begin(),
104
14
                 E = STy->element_end(); 
I != E14
;
++I4
) {
105
12
          Type *InnerTy = *I;
106
12
          if (
isa<PointerType>(InnerTy)12
)
return true8
;
107
4
          
if (4
isa<CompositeType>(InnerTy)4
)
108
2
            Types.push_back(InnerTy);
109
12
        }
110
2
        break;
111
97
      }
112
97
    }
113
97
    
if (97
--Limit == 097
)
return true0
;
114
97
  } while (!Types.empty());
115
72
  return false;
116
110
}
117
118
/// Given a value that is stored to a global but never read, determine whether
119
/// it's safe to remove the store and the chain of computation that feeds the
120
/// store.
121
7
static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
122
10
  do {
123
10
    if (isa<Constant>(V))
124
1
      return true;
125
9
    
if (9
!V->hasOneUse()9
)
126
0
      return false;
127
9
    
if (9
isa<LoadInst>(V) || 9
isa<InvokeInst>(V)9
||
isa<Argument>(V)6
||
128
6
        isa<GlobalValue>(V))
129
3
      return false;
130
6
    
if (6
isAllocationFn(V, TLI)6
)
131
3
      return true;
132
3
133
3
    Instruction *I = cast<Instruction>(V);
134
3
    if (I->mayHaveSideEffects())
135
0
      return false;
136
3
    
if (GetElementPtrInst *3
GEP3
= dyn_cast<GetElementPtrInst>(I)) {
137
0
      if (!GEP->hasAllConstantIndices())
138
0
        return false;
139
3
    } else 
if (3
I->getNumOperands() != 13
) {
140
0
      return false;
141
0
    }
142
3
143
3
    V = I->getOperand(0);
144
7
  } while (1);
145
7
}
146
147
/// This GV is a pointer root.  Loop over all users of the global and clean up
148
/// any that obviously don't assign the global a value that isn't dynamically
149
/// allocated.
150
static bool CleanupPointerRootUsers(GlobalVariable *GV,
151
38
                                    const TargetLibraryInfo *TLI) {
152
38
  // A brief explanation of leak checkers.  The goal is to find bugs where
153
38
  // pointers are forgotten, causing an accumulating growth in memory
154
38
  // usage over time.  The common strategy for leak checkers is to whitelist the
155
38
  // memory pointed to by globals at exit.  This is popular because it also
156
38
  // solves another problem where the main thread of a C++ program may shut down
157
38
  // before other threads that are still expecting to use those globals.  To
158
38
  // handle that case, we expect the program may create a singleton and never
159
38
  // destroy it.
160
38
161
38
  bool Changed = false;
162
38
163
38
  // If Dead[n].first is the only use of a malloc result, we can delete its
164
38
  // chain of computation and the store to the global in Dead[n].second.
165
38
  SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
166
38
167
38
  // Constants can't be pointers to dynamically allocated memory.
168
38
  for (Value::user_iterator UI = GV->user_begin(), E = GV->user_end();
169
87
       
UI != E87
;) {
170
49
    User *U = *UI++;
171
49
    if (StoreInst *
SI49
= dyn_cast<StoreInst>(U)) {
172
35
      Value *V = SI->getValueOperand();
173
35
      if (
isa<Constant>(V)35
) {
174
9
        Changed = true;
175
9
        SI->eraseFromParent();
176
35
      } else 
if (Instruction *26
I26
= dyn_cast<Instruction>(V)) {
177
23
        if (I->hasOneUse())
178
7
          Dead.push_back(std::make_pair(I, SI));
179
26
      }
180
49
    } else 
if (MemSetInst *14
MSI14
= dyn_cast<MemSetInst>(U)) {
181
0
      if (
isa<Constant>(MSI->getValue())0
) {
182
0
        Changed = true;
183
0
        MSI->eraseFromParent();
184
0
      } else 
if (Instruction *0
I0
= dyn_cast<Instruction>(MSI->getValue())) {
185
0
        if (I->hasOneUse())
186
0
          Dead.push_back(std::make_pair(I, MSI));
187
0
      }
188
14
    } else 
if (MemTransferInst *14
MTI14
= dyn_cast<MemTransferInst>(U)) {
189
0
      GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource());
190
0
      if (
MemSrc && 0
MemSrc->isConstant()0
) {
191
0
        Changed = true;
192
0
        MTI->eraseFromParent();
193
0
      } else 
if (Instruction *0
I0
= dyn_cast<Instruction>(MemSrc)) {
194
0
        if (I->hasOneUse())
195
0
          Dead.push_back(std::make_pair(I, MTI));
196
0
      }
197
14
    } else 
if (ConstantExpr *14
CE14
= dyn_cast<ConstantExpr>(U)) {
198
8
      if (
CE->use_empty()8
) {
199
0
        CE->destroyConstant();
200
0
        Changed = true;
201
0
      }
202
14
    } else 
if (Constant *6
C6
= dyn_cast<Constant>(U)) {
203
0
      if (
isSafeToDestroyConstant(C)0
) {
204
0
        C->destroyConstant();
205
0
        // This could have invalidated UI, start over from scratch.
206
0
        Dead.clear();
207
0
        CleanupPointerRootUsers(GV, TLI);
208
0
        return true;
209
0
      }
210
14
    }
211
49
  }
212
38
213
45
  
for (int i = 0, e = Dead.size(); 38
i != e45
;
++i7
) {
214
7
    if (
IsSafeComputationToRemove(Dead[i].first, TLI)7
) {
215
4
      Dead[i].second->eraseFromParent();
216
4
      Instruction *I = Dead[i].first;
217
6
      do {
218
6
        if (isAllocationFn(I, TLI))
219
3
          break;
220
3
        Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
221
3
        if (!J)
222
1
          break;
223
2
        I->eraseFromParent();
224
2
        I = J;
225
4
      } while (1);
226
4
      I->eraseFromParent();
227
4
    }
228
7
  }
229
38
230
38
  return Changed;
231
38
}
232
233
/// We just marked GV constant.  Loop over all users of the global, cleaning up
234
/// the obvious ones.  This is largely just a quick scan over the use list to
235
/// clean up the easy and obvious cruft.  This returns true if it made a change.
236
static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
237
                                       const DataLayout &DL,
238
2.63k
                                       TargetLibraryInfo *TLI) {
239
2.63k
  bool Changed = false;
240
2.63k
  // Note that we need to use a weak value handle for the worklist items. When
241
2.63k
  // we delete a constant array, we may also be holding pointer to one of its
242
2.63k
  // elements (or an element of one of its elements if we're dealing with an
243
2.63k
  // array of arrays) in the worklist.
244
2.63k
  SmallVector<WeakTrackingVH, 8> WorkList(V->user_begin(), V->user_end());
245
6.80k
  while (
!WorkList.empty()6.80k
) {
246
4.17k
    Value *UV = WorkList.pop_back_val();
247
4.17k
    if (!UV)
248
5
      continue;
249
4.17k
250
4.17k
    User *U = cast<User>(UV);
251
4.17k
252
4.17k
    if (LoadInst *
LI4.17k
= dyn_cast<LoadInst>(U)) {
253
1.88k
      if (
Init1.88k
) {
254
89
        // Replace the load with the initializer.
255
89
        LI->replaceAllUsesWith(Init);
256
89
        LI->eraseFromParent();
257
89
        Changed = true;
258
89
      }
259
4.17k
    } else 
if (StoreInst *2.28k
SI2.28k
= dyn_cast<StoreInst>(U)) {
260
108
      // Store must be unreachable or storing Init into the global.
261
108
      SI->eraseFromParent();
262
108
      Changed = true;
263
2.28k
    } else 
if (ConstantExpr *2.17k
CE2.17k
= dyn_cast<ConstantExpr>(U)) {
264
105
      if (
CE->getOpcode() == Instruction::GetElementPtr105
) {
265
93
        Constant *SubInit = nullptr;
266
93
        if (Init)
267
92
          SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
268
93
        Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, TLI);
269
105
      } else 
if (12
(CE->getOpcode() == Instruction::BitCast &&
270
11
                  CE->getType()->isPointerTy()) ||
271
12
                 
CE->getOpcode() == Instruction::AddrSpaceCast1
) {
272
12
        // Pointer cast, delete any stores and memsets to the global.
273
12
        Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, TLI);
274
12
      }
275
105
276
105
      if (
CE->use_empty()105
) {
277
52
        CE->destroyConstant();
278
52
        Changed = true;
279
52
      }
280
2.17k
    } else 
if (GetElementPtrInst *2.07k
GEP2.07k
= dyn_cast<GetElementPtrInst>(U)) {
281
1.96k
      // Do not transform "gepinst (gep constexpr (GV))" here, because forming
282
1.96k
      // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
283
1.96k
      // and will invalidate our notion of what Init is.
284
1.96k
      Constant *SubInit = nullptr;
285
1.96k
      if (
!isa<ConstantExpr>(GEP->getOperand(0))1.96k
) {
286
1.93k
        ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(
287
1.93k
            ConstantFoldInstruction(GEP, DL, TLI));
288
1.93k
        if (
Init && 1.93k
CE1.59k
&&
CE->getOpcode() == Instruction::GetElementPtr0
)
289
0
          SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
290
1.93k
291
1.93k
        // If the initializer is an all-null value and we have an inbounds GEP,
292
1.93k
        // we already know what the result of any load from that GEP is.
293
1.93k
        // TODO: Handle splats.
294
1.93k
        if (
Init && 1.93k
isa<ConstantAggregateZero>(Init)1.59k
&&
GEP->isInBounds()43
)
295
42
          SubInit = Constant::getNullValue(GEP->getResultElementType());
296
1.93k
      }
297
1.96k
      Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, TLI);
298
1.96k
299
1.96k
      if (
GEP->use_empty()1.96k
) {
300
44
        GEP->eraseFromParent();
301
44
        Changed = true;
302
44
      }
303
2.07k
    } else 
if (MemIntrinsic *113
MI113
= dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
304
21
      if (
MI->getRawDest() == V21
) {
305
6
        MI->eraseFromParent();
306
6
        Changed = true;
307
6
      }
308
21
309
113
    } else 
if (Constant *92
C92
= dyn_cast<Constant>(U)) {
310
10
      // If we have a chain of dead constantexprs or other things dangling from
311
10
      // us, and if they are all dead, nuke them without remorse.
312
10
      if (
isSafeToDestroyConstant(C)10
) {
313
10
        C->destroyConstant();
314
10
        CleanupConstantGlobalUsers(V, Init, DL, TLI);
315
10
        return true;
316
10
      }
317
2.28k
    }
318
4.17k
  }
319
2.62k
  return Changed;
320
2.63k
}
321
322
/// Return true if the specified instruction is a safe user of a derived
323
/// expression from a global that we want to SROA.
324
18.7k
static bool isSafeSROAElementUse(Value *V) {
325
18.7k
  // We might have a dead and dangling constant hanging off of here.
326
18.7k
  if (Constant *C = dyn_cast<Constant>(V))
327
40
    return isSafeToDestroyConstant(C);
328
18.7k
329
18.7k
  Instruction *I = dyn_cast<Instruction>(V);
330
18.7k
  if (
!I18.7k
)
return false0
;
331
18.7k
332
18.7k
  // Loads are ok.
333
18.7k
  
if (18.7k
isa<LoadInst>(I)18.7k
)
return true17.9k
;
334
818
335
818
  // Stores *to* the pointer are ok.
336
818
  
if (StoreInst *818
SI818
= dyn_cast<StoreInst>(I))
337
531
    return SI->getOperand(0) != V;
338
287
339
287
  // Otherwise, it must be a GEP.
340
287
  GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
341
287
  if (
!GEPI287
)
return false85
;
342
202
343
202
  
if (202
GEPI->getNumOperands() < 3 || 202
!isa<Constant>(GEPI->getOperand(1))190
||
344
190
      !cast<Constant>(GEPI->getOperand(1))->isNullValue())
345
13
    return false;
346
189
347
189
  for (User *U : GEPI->users())
348
401
    
if (401
!isSafeSROAElementUse(U)401
)
349
4
      return false;
350
185
  return true;
351
185
}
352
353
354
/// U is a direct user of the specified global value.  Look at it and its uses
355
/// and decide whether it is safe to SROA this global.
356
26.7k
static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
357
26.7k
  // The user of the global must be a GEP Inst or a ConstantExpr GEP.
358
26.7k
  if (!isa<GetElementPtrInst>(U) &&
359
25.6k
      (!isa<ConstantExpr>(U) ||
360
25.6k
       cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
361
8.35k
    return false;
362
18.4k
363
18.4k
  // Check to see if this ConstantExpr GEP is SRA'able.  In particular, we
364
18.4k
  // don't like < 3 operand CE's, and we don't like non-constant integer
365
18.4k
  // indices.  This enforces that all uses are 'gep GV, 0, C, ...' for some
366
18.4k
  // value of C.
367
18.4k
  
if (18.4k
U->getNumOperands() < 3 || 18.4k
!isa<Constant>(U->getOperand(1))18.4k
||
368
18.4k
      !cast<Constant>(U->getOperand(1))->isNullValue() ||
369
18.4k
      !isa<ConstantInt>(U->getOperand(2)))
370
1.14k
    return false;
371
17.2k
372
17.2k
  gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
373
17.2k
  ++GEPI;  // Skip over the pointer index.
374
17.2k
375
17.2k
  // If this is a use of an array allocation, do a bit more checking for sanity.
376
17.2k
  if (
GEPI.isSequential()17.2k
) {
377
514
    ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
378
514
379
514
    // Check to make sure that index falls within the array.  If not,
380
514
    // something funny is going on, so we won't do the optimization.
381
514
    //
382
514
    if (GEPI.isBoundedSequential() &&
383
514
        Idx->getZExtValue() >= GEPI.getSequentialNumElements())
384
0
      return false;
385
514
386
514
    // We cannot scalar repl this level of the array unless any array
387
514
    // sub-indices are in-range constants.  In particular, consider:
388
514
    // A[0][i].  We cannot know that the user isn't doing invalid things like
389
514
    // allowing i to index an out-of-range subscript that accesses A[1].
390
514
    //
391
514
    // Scalar replacing *just* the outer index of the array is probably not
392
514
    // going to be a win anyway, so just give up.
393
514
    for (++GEPI; // Skip array index.
394
628
         GEPI != E;
395
514
         
++GEPI114
) {
396
118
      if (GEPI.isStruct())
397
57
        continue;
398
61
399
61
      ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
400
61
      if (!IdxVal ||
401
57
          (GEPI.isBoundedSequential() &&
402
57
           IdxVal->getZExtValue() >= GEPI.getSequentialNumElements()))
403
4
        return false;
404
118
    }
405
514
  }
406
17.2k
407
17.2k
  return llvm::all_of(U->users(),
408
18.3k
                      [](User *UU) { return isSafeSROAElementUse(UU); });
409
26.7k
}
410
411
/// Look at all uses of the global and decide whether it is safe for us to
412
/// perform this transformation.
413
9.66k
static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
414
9.66k
  for (User *U : GV->users())
415
26.7k
    
if (26.7k
!IsUserOfGlobalSafeForSRA(U, GV)26.7k
)
416
9.60k
      return false;
417
64
418
64
  return true;
419
64
}
420
421
/// Copy over the debug info for a variable to its SRA replacements.
422
static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV,
423
                                 uint64_t FragmentOffsetInBits,
424
                                 uint64_t FragmentSizeInBits,
425
266
                                 unsigned NumElements) {
426
266
  SmallVector<DIGlobalVariableExpression *, 1> GVs;
427
266
  GV->getDebugInfo(GVs);
428
9
  for (auto *GVE : GVs) {
429
9
    DIVariable *Var = GVE->getVariable();
430
9
    DIExpression *Expr = GVE->getExpression();
431
9
    if (NumElements > 1)
432
8
      Expr = DIExpression::createFragmentExpression(Expr, FragmentOffsetInBits,
433
8
                                                    FragmentSizeInBits);
434
9
    auto *NGVE = DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr);
435
9
    NGV->addDebugInfo(NGVE);
436
9
  }
437
266
}
438
439
440
/// Perform scalar replacement of aggregates on the specified global variable.
441
/// This opens the door for other optimizations by exposing the behavior of the
442
/// program in a more fine-grained way.  We have determined that this
443
/// transformation is safe already.  We return the first global variable we
444
/// insert so that the caller can reprocess it.
445
9.66k
static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
446
9.66k
  // Make sure this global only has simple uses that we can SRA.
447
9.66k
  if (!GlobalUsersSafeToSRA(GV))
448
9.60k
    return nullptr;
449
64
450
9.66k
  assert(GV->hasLocalLinkage());
451
64
  Constant *Init = GV->getInitializer();
452
64
  Type *Ty = Init->getType();
453
64
454
64
  std::vector<GlobalVariable*> NewGlobals;
455
64
  Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
456
64
457
64
  // Get the alignment of the global, either explicit or target-specific.
458
64
  unsigned StartAlignment = GV->getAlignment();
459
64
  if (StartAlignment == 0)
460
7
    StartAlignment = DL.getABITypeAlignment(GV->getType());
461
64
462
64
  if (StructType *
STy64
= dyn_cast<StructType>(Ty)) {
463
24
    uint64_t FragmentOffset = 0;
464
24
    unsigned NumElements = STy->getNumElements();
465
24
    NewGlobals.reserve(NumElements);
466
24
    const StructLayout &Layout = *DL.getStructLayout(STy);
467
105
    for (unsigned i = 0, e = NumElements; 
i != e105
;
++i81
) {
468
81
      Constant *In = Init->getAggregateElement(i);
469
81
      assert(In && "Couldn't get element of initializer?");
470
81
      GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
471
81
                                               GlobalVariable::InternalLinkage,
472
81
                                               In, GV->getName()+"."+Twine(i),
473
81
                                               GV->getThreadLocalMode(),
474
81
                                              GV->getType()->getAddressSpace());
475
81
      NGV->setExternallyInitialized(GV->isExternallyInitialized());
476
81
      NGV->copyAttributesFrom(GV);
477
81
      Globals.push_back(NGV);
478
81
      NewGlobals.push_back(NGV);
479
81
480
81
      // Calculate the known alignment of the field.  If the original aggregate
481
81
      // had 256 byte alignment for example, something might depend on that:
482
81
      // propagate info to each field.
483
81
      uint64_t FieldOffset = Layout.getElementOffset(i);
484
81
      unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset);
485
81
      if (NewAlign > DL.getABITypeAlignment(STy->getElementType(i)))
486
20
        NGV->setAlignment(NewAlign);
487
81
488
81
      // Copy over the debug info for the variable.
489
81
      FragmentOffset = alignTo(FragmentOffset, NewAlign);
490
81
      uint64_t Size = DL.getTypeSizeInBits(NGV->getValueType());
491
81
      transferSRADebugInfo(GV, NGV, FragmentOffset, Size, NumElements);
492
81
      FragmentOffset += Size;
493
81
    }
494
64
  } else 
if (SequentialType *40
STy40
= dyn_cast<SequentialType>(Ty)) {
495
40
    unsigned NumElements = STy->getNumElements();
496
40
    if (
NumElements > 16 && 40
GV->hasNUsesOrMore(16)0
)
497
0
      return nullptr; // It's not worth it.
498
40
    NewGlobals.reserve(NumElements);
499
40
    auto ElTy = STy->getElementType();
500
40
    uint64_t EltSize = DL.getTypeAllocSize(ElTy);
501
40
    unsigned EltAlign = DL.getABITypeAlignment(ElTy);
502
40
    uint64_t FragmentSizeInBits = DL.getTypeSizeInBits(ElTy);
503
225
    for (unsigned i = 0, e = NumElements; 
i != e225
;
++i185
) {
504
185
      Constant *In = Init->getAggregateElement(i);
505
185
      assert(In && "Couldn't get element of initializer?");
506
185
507
185
      GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
508
185
                                               GlobalVariable::InternalLinkage,
509
185
                                               In, GV->getName()+"."+Twine(i),
510
185
                                               GV->getThreadLocalMode(),
511
185
                                              GV->getType()->getAddressSpace());
512
185
      NGV->setExternallyInitialized(GV->isExternallyInitialized());
513
185
      NGV->copyAttributesFrom(GV);
514
185
      Globals.push_back(NGV);
515
185
      NewGlobals.push_back(NGV);
516
185
517
185
      // Calculate the known alignment of the field.  If the original aggregate
518
185
      // had 256 byte alignment for example, something might depend on that:
519
185
      // propagate info to each field.
520
185
      unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i);
521
185
      if (NewAlign > EltAlign)
522
10
        NGV->setAlignment(NewAlign);
523
185
      transferSRADebugInfo(GV, NGV, FragmentSizeInBits * i, FragmentSizeInBits,
524
185
                           NumElements);
525
185
    }
526
40
  }
527
64
528
64
  
if (64
NewGlobals.empty()64
)
529
0
    return nullptr;
530
64
531
64
  
DEBUG64
(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n");
532
64
533
64
  Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
534
64
535
64
  // Loop over all of the uses of the global, replacing the constantexpr geps,
536
64
  // with smaller constantexpr geps or direct references.
537
326
  while (
!GV->use_empty()326
) {
538
262
    User *GEP = GV->user_back();
539
262
    assert(((isa<ConstantExpr>(GEP) &&
540
262
             cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)||
541
262
            isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!");
542
262
543
262
    // Ignore the 1th operand, which has to be zero or else the program is quite
544
262
    // broken (undefined).  Get the 2nd operand, which is the structure or array
545
262
    // index.
546
262
    unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
547
262
    if (
Val >= NewGlobals.size()262
)
Val = 00
; // Out of bound array access.
548
262
549
262
    Value *NewPtr = NewGlobals[Val];
550
262
    Type *NewTy = NewGlobals[Val]->getValueType();
551
262
552
262
    // Form a shorter GEP if needed.
553
262
    if (
GEP->getNumOperands() > 3262
) {
554
23
      if (ConstantExpr *
CE23
= dyn_cast<ConstantExpr>(GEP)) {
555
21
        SmallVector<Constant*, 8> Idxs;
556
21
        Idxs.push_back(NullInt);
557
42
        for (unsigned i = 3, e = CE->getNumOperands(); 
i != e42
;
++i21
)
558
21
          Idxs.push_back(CE->getOperand(i));
559
21
        NewPtr =
560
21
            ConstantExpr::getGetElementPtr(NewTy, cast<Constant>(NewPtr), Idxs);
561
23
      } else {
562
2
        GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
563
2
        SmallVector<Value*, 8> Idxs;
564
2
        Idxs.push_back(NullInt);
565
4
        for (unsigned i = 3, e = GEPI->getNumOperands(); 
i != e4
;
++i2
)
566
2
          Idxs.push_back(GEPI->getOperand(i));
567
2
        NewPtr = GetElementPtrInst::Create(
568
2
            NewTy, NewPtr, Idxs, GEPI->getName() + "." + Twine(Val), GEPI);
569
2
      }
570
23
    }
571
262
    GEP->replaceAllUsesWith(NewPtr);
572
262
573
262
    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP))
574
2
      GEPI->eraseFromParent();
575
262
    else
576
260
      cast<ConstantExpr>(GEP)->destroyConstant();
577
262
  }
578
64
579
64
  // Delete the old global, now that it is dead.
580
64
  Globals.erase(GV);
581
64
  ++NumSRA;
582
64
583
64
  // Loop over the new globals array deleting any globals that are obviously
584
64
  // dead.  This can arise due to scalarization of a structure or an array that
585
64
  // has elements that are dead.
586
64
  unsigned FirstGlobal = 0;
587
330
  for (unsigned i = 0, e = NewGlobals.size(); 
i != e330
;
++i266
)
588
266
    
if (266
NewGlobals[i]->use_empty()266
) {
589
21
      Globals.erase(NewGlobals[i]);
590
21
      if (
FirstGlobal == i21
)
++FirstGlobal0
;
591
266
    }
592
64
593
64
  return FirstGlobal != NewGlobals.size() ? 
NewGlobals[FirstGlobal]64
:
nullptr0
;
594
9.66k
}
595
596
/// Return true if all users of the specified value will trap if the value is
597
/// dynamically null.  PHIs keeps track of any phi nodes we've seen to avoid
598
/// reprocessing them.
599
static bool AllUsesOfValueWillTrapIfNull(const Value *V,
600
2.80k
                                        SmallPtrSetImpl<const PHINode*> &PHIs) {
601
2.80k
  for (const User *U : V->users())
602
3.12k
    
if (3.12k
isa<LoadInst>(U)3.12k
) {
603
870
      // Will trap.
604
3.12k
    } else 
if (const StoreInst *2.25k
SI2.25k
= dyn_cast<StoreInst>(U)) {
605
528
      if (
SI->getOperand(0) == V528
) {
606
0
        //cerr << "NONTRAPPING USE: " << *U;
607
0
        return false;  // Storing the value.
608
0
      }
609
1.72k
    } else 
if (const CallInst *1.72k
CI1.72k
= dyn_cast<CallInst>(U)) {
610
164
      if (
CI->getCalledValue() != V164
) {
611
164
        //cerr << "NONTRAPPING USE: " << *U;
612
164
        return false;  // Not calling the ptr
613
164
      }
614
1.56k
    } else 
if (const InvokeInst *1.56k
II1.56k
= dyn_cast<InvokeInst>(U)) {
615
0
      if (
II->getCalledValue() != V0
) {
616
0
        //cerr << "NONTRAPPING USE: " << *U;
617
0
        return false;  // Not calling the ptr
618
0
      }
619
1.56k
    } else 
if (const BitCastInst *1.56k
CI1.56k
= dyn_cast<BitCastInst>(U)) {
620
120
      if (
!AllUsesOfValueWillTrapIfNull(CI, PHIs)120
)
return false120
;
621
1.44k
    } else 
if (const GetElementPtrInst *1.44k
GEPI1.44k
= dyn_cast<GetElementPtrInst>(U)) {
622
1.31k
      if (
!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)1.31k
)
return false4
;
623
124
    } else 
if (const PHINode *124
PN124
= dyn_cast<PHINode>(U)) {
624
48
      // If we've already seen this phi node, ignore it, it has already been
625
48
      // checked.
626
48
      if (
PHIs.insert(PN).second && 48
!AllUsesOfValueWillTrapIfNull(PN, PHIs)40
)
627
0
        return false;
628
76
    } else 
if (76
isa<ICmpInst>(U) &&
629
76
               
isa<ConstantPointerNull>(U->getOperand(1))76
) {
630
76
      // Ignore icmp X, null
631
76
    } else {
632
0
      //cerr << "NONTRAPPING USE: " << *U;
633
0
      return false;
634
0
    }
635
2.51k
636
2.51k
  return true;
637
2.51k
}
638
639
/// Return true if all uses of any loads from GV will trap if the loaded value
640
/// is null.  Note that this also permits comparisons of the loaded value
641
/// against null, as a special case.
642
279
static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
643
279
  for (const User *U : GV->users())
644
1.62k
    
if (const LoadInst *1.62k
LI1.62k
= dyn_cast<LoadInst>(U)) {
645
1.32k
      SmallPtrSet<const PHINode*, 8> PHIs;
646
1.32k
      if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
647
164
        return false;
648
299
    } else 
if (299
isa<StoreInst>(U)299
) {
649
299
      // Ignore stores to the global.
650
299
    } else {
651
0
      // We don't know or understand this user, bail out.
652
0
      //cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
653
0
      return false;
654
0
    }
655
115
  return true;
656
115
}
657
658
66
static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
659
66
  bool Changed = false;
660
131
  for (auto UI = V->user_begin(), E = V->user_end(); 
UI != E131
; ) {
661
65
    Instruction *I = cast<Instruction>(*UI++);
662
65
    if (LoadInst *
LI65
= dyn_cast<LoadInst>(I)) {
663
1
      LI->setOperand(0, NewV);
664
1
      Changed = true;
665
65
    } else 
if (StoreInst *64
SI64
= dyn_cast<StoreInst>(I)) {
666
0
      if (
SI->getOperand(1) == V0
) {
667
0
        SI->setOperand(1, NewV);
668
0
        Changed = true;
669
0
      }
670
64
    } else 
if (64
isa<CallInst>(I) || 64
isa<InvokeInst>(I)58
) {
671
6
      CallSite CS(I);
672
6
      if (
CS.getCalledValue() == V6
) {
673
6
        // Calling through the pointer!  Turn into a direct call, but be careful
674
6
        // that the pointer is not also being passed as an argument.
675
6
        CS.setCalledFunction(NewV);
676
6
        Changed = true;
677
6
        bool PassedAsArg = false;
678
22
        for (unsigned i = 0, e = CS.arg_size(); 
i != e22
;
++i16
)
679
16
          
if (16
CS.getArgument(i) == V16
) {
680
0
            PassedAsArg = true;
681
0
            CS.setArgument(i, NewV);
682
0
          }
683
6
684
6
        if (
PassedAsArg6
) {
685
0
          // Being passed as an argument also.  Be careful to not invalidate UI!
686
0
          UI = V->user_begin();
687
0
        }
688
6
      }
689
64
    } else 
if (CastInst *58
CI58
= dyn_cast<CastInst>(I)) {
690
0
      Changed |= OptimizeAwayTrappingUsesOfValue(CI,
691
0
                                ConstantExpr::getCast(CI->getOpcode(),
692
0
                                                      NewV, CI->getType()));
693
0
      if (
CI->use_empty()0
) {
694
0
        Changed = true;
695
0
        CI->eraseFromParent();
696
0
      }
697
58
    } else 
if (GetElementPtrInst *58
GEPI58
= dyn_cast<GetElementPtrInst>(I)) {
698
40
      // Should handle GEP here.
699
40
      SmallVector<Constant*, 8> Idxs;
700
40
      Idxs.reserve(GEPI->getNumOperands()-1);
701
40
      for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
702
40
           
i != e40
;
++i0
)
703
40
        
if (Constant *40
C40
= dyn_cast<Constant>(*i))
704
0
          Idxs.push_back(C);
705
40
        else
706
40
          break;
707
40
      if (Idxs.size() == GEPI->getNumOperands()-1)
708
0
        Changed |= OptimizeAwayTrappingUsesOfValue(
709
0
            GEPI, ConstantExpr::getGetElementPtr(nullptr, NewV, Idxs));
710
40
      if (
GEPI->use_empty()40
) {
711
0
        Changed = true;
712
0
        GEPI->eraseFromParent();
713
0
      }
714
64
    }
715
65
  }
716
66
717
66
  return Changed;
718
66
}
719
720
721
/// The specified global has only one non-null value stored into it.  If there
722
/// are uses of the loaded value that would trap if the loaded value is
723
/// dynamically null, then we know that they cannot be reachable with a null
724
/// optimize away the load.
725
static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
726
                                            const DataLayout &DL,
727
26
                                            TargetLibraryInfo *TLI) {
728
26
  bool Changed = false;
729
26
730
26
  // Keep track of whether we are able to remove all the uses of the global
731
26
  // other than the store that defines it.
732
26
  bool AllNonStoreUsesGone = true;
733
26
734
26
  // Replace all uses of loads with uses of uses of the stored value.
735
129
  for (Value::user_iterator GUI = GV->user_begin(), E = GV->user_end(); 
GUI != E129
;){
736
103
    User *GlobalUser = *GUI++;
737
103
    if (LoadInst *
LI103
= dyn_cast<LoadInst>(GlobalUser)) {
738
66
      Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
739
66
      // If we were able to delete all uses of the loads
740
66
      if (
LI->use_empty()66
) {
741
8
        LI->eraseFromParent();
742
8
        Changed = true;
743
66
      } else {
744
58
        AllNonStoreUsesGone = false;
745
58
      }
746
103
    } else 
if (37
isa<StoreInst>(GlobalUser)37
) {
747
28
      // Ignore the store that stores "LV" to the global.
748
28
      assert(GlobalUser->getOperand(1) == GV &&
749
28
             "Must be storing *to* the global");
750
37
    } else {
751
9
      AllNonStoreUsesGone = false;
752
9
753
9
      // If we get here we could have other crazy uses that are transitively
754
9
      // loaded.
755
9
      assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
756
9
              isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||
757
9
              isa<BitCastInst>(GlobalUser) ||
758
9
              isa<GetElementPtrInst>(GlobalUser)) &&
759
9
             "Only expect load and stores!");
760
9
    }
761
103
  }
762
26
763
26
  if (
Changed26
) {
764
7
    DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV << "\n");
765
7
    ++NumGlobUses;
766
7
  }
767
26
768
26
  // If we nuked all of the loads, then none of the stores are needed either,
769
26
  // nor is the global.
770
26
  if (
AllNonStoreUsesGone26
) {
771
4
    if (
isLeakCheckerRoot(GV)4
) {
772
4
      Changed |= CleanupPointerRootUsers(GV, TLI);
773
4
    } else {
774
0
      Changed = true;
775
0
      CleanupConstantGlobalUsers(GV, nullptr, DL, TLI);
776
0
    }
777
4
    if (
GV->use_empty()4
) {
778
4
      DEBUG(dbgs() << "  *** GLOBAL NOW DEAD!\n");
779
4
      Changed = true;
780
4
      GV->eraseFromParent();
781
4
      ++NumDeleted;
782
4
    }
783
4
  }
784
26
  return Changed;
785
26
}
786
787
/// Walk the use list of V, constant folding all of the instructions that are
788
/// foldable.
789
static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
790
7
                                TargetLibraryInfo *TLI) {
791
17
  for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; )
792
10
    
if (Instruction *10
I10
= dyn_cast<Instruction>(*UI++))
793
7
      
if (Constant *7
NewC7
= ConstantFoldInstruction(I, DL, TLI)) {
794
4
        I->replaceAllUsesWith(NewC);
795
4
796
4
        // Advance UI to the next non-I use to avoid invalidating it!
797
4
        // Instructions could multiply use V.
798
4
        while (
UI != E && 4
*UI == I0
)
799
0
          ++UI;
800
4
        if (isInstructionTriviallyDead(I, TLI))
801
4
          I->eraseFromParent();
802
10
      }
803
7
}
804
805
/// This function takes the specified global variable, and transforms the
806
/// program as if it always contained the result of the specified malloc.
807
/// Because it is always the result of the specified malloc, there is no reason
808
/// to actually DO the malloc.  Instead, turn the malloc into a global, and any
809
/// loads of GV as uses of the new global.
810
static GlobalVariable *
811
OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
812
                              ConstantInt *NElements, const DataLayout &DL,
813
4
                              TargetLibraryInfo *TLI) {
814
4
  DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI << '\n');
815
4
816
4
  Type *GlobalType;
817
4
  if (NElements->getZExtValue() == 1)
818
1
    GlobalType = AllocTy;
819
4
  else
820
4
    // If we have an array allocation, the global variable is of an array.
821
3
    GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue());
822
4
823
4
  // Create the new global variable.  The contents of the malloc'd memory is
824
4
  // undefined, so initialize with an undef value.
825
4
  GlobalVariable *NewGV = new GlobalVariable(
826
4
      *GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage,
827
4
      UndefValue::get(GlobalType), GV->getName() + ".body", nullptr,
828
4
      GV->getThreadLocalMode());
829
4
830
4
  // If there are bitcast users of the malloc (which is typical, usually we have
831
4
  // a malloc + bitcast) then replace them with uses of the new global.  Update
832
4
  // other users to use the global as well.
833
4
  BitCastInst *TheBC = nullptr;
834
8
  while (
!CI->use_empty()8
) {
835
4
    Instruction *User = cast<Instruction>(CI->user_back());
836
4
    if (BitCastInst *
BCI4
= dyn_cast<BitCastInst>(User)) {
837
4
      if (
BCI->getType() == NewGV->getType()4
) {
838
1
        BCI->replaceAllUsesWith(NewGV);
839
1
        BCI->eraseFromParent();
840
4
      } else {
841
3
        BCI->setOperand(0, NewGV);
842
3
      }
843
0
    } else {
844
0
      if (!TheBC)
845
0
        TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
846
0
      User->replaceUsesOfWith(CI, TheBC);
847
0
    }
848
4
  }
849
4
850
4
  Constant *RepValue = NewGV;
851
4
  if (NewGV->getType() != GV->getValueType())
852
3
    RepValue = ConstantExpr::getBitCast(RepValue, GV->getValueType());
853
4
854
4
  // If there is a comparison against null, we will insert a global bool to
855
4
  // keep track of whether the global was initialized yet or not.
856
4
  GlobalVariable *InitBool =
857
4
    new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
858
4
                       GlobalValue::InternalLinkage,
859
4
                       ConstantInt::getFalse(GV->getContext()),
860
4
                       GV->getName()+".init", GV->getThreadLocalMode());
861
4
  bool InitBoolUsed = false;
862
4
863
4
  // Loop over all uses of GV, processing them in turn.
864
13
  while (
!GV->use_empty()13
) {
865
9
    if (StoreInst *
SI9
= dyn_cast<StoreInst>(GV->user_back())) {
866
4
      // The global is initialized when the store to it occurs.
867
4
      new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0,
868
4
                    SI->getOrdering(), SI->getSyncScopeID(), SI);
869
4
      SI->eraseFromParent();
870
4
      continue;
871
4
    }
872
5
873
5
    LoadInst *LI = cast<LoadInst>(GV->user_back());
874
9
    while (
!LI->use_empty()9
) {
875
4
      Use &LoadUse = *LI->use_begin();
876
4
      ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser());
877
4
      if (
!ICI4
) {
878
4
        LoadUse = RepValue;
879
4
        continue;
880
4
      }
881
0
882
0
      // Replace the cmp X, 0 with a use of the bool value.
883
0
      // Sink the load to where the compare was, if atomic rules allow us to.
884
0
      Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0,
885
0
                               LI->getOrdering(), LI->getSyncScopeID(),
886
0
                               LI->isUnordered() ? 
(Instruction*)ICI0
:
LI0
);
887
0
      InitBoolUsed = true;
888
0
      switch (ICI->getPredicate()) {
889
0
      
default: 0
llvm_unreachable0
("Unknown ICmp Predicate!");
890
0
      case ICmpInst::ICMP_ULT:
891
0
      case ICmpInst::ICMP_SLT:   // X < null -> always false
892
0
        LV = ConstantInt::getFalse(GV->getContext());
893
0
        break;
894
0
      case ICmpInst::ICMP_ULE:
895
0
      case ICmpInst::ICMP_SLE:
896
0
      case ICmpInst::ICMP_EQ:
897
0
        LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
898
0
        break;
899
0
      case ICmpInst::ICMP_NE:
900
0
      case ICmpInst::ICMP_UGE:
901
0
      case ICmpInst::ICMP_SGE:
902
0
      case ICmpInst::ICMP_UGT:
903
0
      case ICmpInst::ICMP_SGT:
904
0
        break;  // no change.
905
0
      }
906
0
      ICI->replaceAllUsesWith(LV);
907
0
      ICI->eraseFromParent();
908
0
    }
909
5
    LI->eraseFromParent();
910
5
  }
911
4
912
4
  // If the initialization boolean was used, insert it, otherwise delete it.
913
4
  
if (4
!InitBoolUsed4
) {
914
8
    while (!InitBool->use_empty())  // Delete initializations
915
4
      cast<StoreInst>(InitBool->user_back())->eraseFromParent();
916
4
    delete InitBool;
917
4
  } else
918
0
    GV->getParent()->getGlobalList().insert(GV->getIterator(), InitBool);
919
4
920
4
  // Now the GV is dead, nuke it and the malloc..
921
4
  GV->eraseFromParent();
922
4
  CI->eraseFromParent();
923
4
924
4
  // To further other optimizations, loop over all users of NewGV and try to
925
4
  // constant prop them.  This will promote GEP instructions with constant
926
4
  // indices into GEP constant-exprs, which will allow global-opt to hack on it.
927
4
  ConstantPropUsersOf(NewGV, DL, TLI);
928
4
  if (RepValue != NewGV)
929
3
    ConstantPropUsersOf(RepValue, DL, TLI);
930
4
931
4
  return NewGV;
932
4
}
933
934
/// Scan the use-list of V checking to make sure that there are no complex uses
935
/// of V.  We permit simple things like dereferencing the pointer, but not
936
/// storing through the address, unless it is to the specified global.
937
static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
938
                                                      const GlobalVariable *GV,
939
304
                                        SmallPtrSetImpl<const PHINode*> &PHIs) {
940
412
  for (const User *U : V->users()) {
941
412
    const Instruction *Inst = cast<Instruction>(U);
942
412
943
412
    if (
isa<LoadInst>(Inst) || 412
isa<CmpInst>(Inst)412
) {
944
0
      continue; // Fine, ignore.
945
0
    }
946
412
947
412
    
if (const StoreInst *412
SI412
= dyn_cast<StoreInst>(Inst)) {
948
219
      if (
SI->getOperand(0) == V && 219
SI->getOperand(1) != GV147
)
949
44
        return false;  // Storing the pointer itself... bad.
950
175
      continue; // Otherwise, storing through it, or storing into GV... fine.
951
175
    }
952
193
953
193
    // Must index into the array and into the struct.
954
193
    
if (193
isa<GetElementPtrInst>(Inst) && 193
Inst->getNumOperands() >= 378
) {
955
78
      if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
956
0
        return false;
957
78
      continue;
958
78
    }
959
115
960
115
    
if (const PHINode *115
PN115
= dyn_cast<PHINode>(Inst)) {
961
0
      // PHIs are ok if all uses are ok.  Don't infinitely recurse through PHI
962
0
      // cycles.
963
0
      if (PHIs.insert(PN).second)
964
0
        
if (0
!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs)0
)
965
0
          return false;
966
0
      continue;
967
0
    }
968
115
969
115
    
if (const BitCastInst *115
BCI115
= dyn_cast<BitCastInst>(Inst)) {
970
111
      if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
971
44
        return false;
972
67
      continue;
973
67
    }
974
4
975
4
    return false;
976
4
  }
977
212
  return true;
978
212
}
979
980
/// The Alloc pointer is stored into GV somewhere.  Transform all uses of the
981
/// allocation into loads from the global and uses of the resultant pointer.
982
/// Further, delete the store into GV.  This assumes that these value pass the
983
/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
984
static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
985
20
                                          GlobalVariable *GV) {
986
40
  while (
!Alloc->use_empty()40
) {
987
20
    Instruction *U = cast<Instruction>(*Alloc->user_begin());
988
20
    Instruction *InsertPt = U;
989
20
    if (StoreInst *
SI20
= dyn_cast<StoreInst>(U)) {
990
7
      // If this is the store of the allocation into the global, remove it.
991
7
      if (
SI->getOperand(1) == GV7
) {
992
7
        SI->eraseFromParent();
993
7
        continue;
994
7
      }
995
13
    } else 
if (PHINode *13
PN13
= dyn_cast<PHINode>(U)) {
996
0
      // Insert the load in the corresponding predecessor, not right before the
997
0
      // PHI.
998
0
      InsertPt = PN->getIncomingBlock(*Alloc->use_begin())->getTerminator();
999
13
    } else 
if (13
isa<BitCastInst>(U)13
) {
1000
11
      // Must be bitcast between the malloc and store to initialize the global.
1001
11
      ReplaceUsesOfMallocWithGlobal(U, GV);
1002
11
      U->eraseFromParent();
1003
11
      continue;
1004
2
    } else 
if (GetElementPtrInst *2
GEPI2
= dyn_cast<GetElementPtrInst>(U)) {
1005
2
      // If this is a "GEP bitcast" and the user is a store to the global, then
1006
2
      // just process it as a bitcast.
1007
2
      if (
GEPI->hasAllZeroIndices() && 2
GEPI->hasOneUse()2
)
1008
2
        
if (StoreInst *2
SI2
= dyn_cast<StoreInst>(GEPI->user_back()))
1009
2
          
if (2
SI->getOperand(1) == GV2
) {
1010
2
            // Must be bitcast GEP between the malloc and store to initialize
1011
2
            // the global.
1012
2
            ReplaceUsesOfMallocWithGlobal(GEPI, GV);
1013
2
            GEPI->eraseFromParent();
1014
2
            continue;
1015
2
          }
1016
0
    }
1017
0
1018
0
    // Insert a load from the global, and use it instead of the malloc.
1019
0
    Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
1020
0
    U->replaceUsesOfWith(Alloc, NL);
1021
0
  }
1022
20
}
1023
1024
/// Verify that all uses of V (a load, or a phi of a load) are simple enough to
1025
/// perform heap SRA on.  This permits GEP's that index through the array and
1026
/// struct field, icmps of null, and PHIs.
1027
static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
1028
                        SmallPtrSetImpl<const PHINode*> &LoadUsingPHIs,
1029
9
                        SmallPtrSetImpl<const PHINode*> &LoadUsingPHIsPerLoad) {
1030
9
  // We permit two users of the load: setcc comparing against the null
1031
9
  // pointer, and a getelementptr of a specific form.
1032
8
  for (const User *U : V->users()) {
1033
8
    const Instruction *UI = cast<Instruction>(U);
1034
8
1035
8
    // Comparison against null is ok.
1036
8
    if (const ICmpInst *
ICI8
= dyn_cast<ICmpInst>(UI)) {
1037
0
      if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
1038
0
        return false;
1039
0
      continue;
1040
0
    }
1041
8
1042
8
    // getelementptr is also ok, but only a simple form.
1043
8
    
if (const GetElementPtrInst *8
GEPI8
= dyn_cast<GetElementPtrInst>(UI)) {
1044
6
      // Must index into the array and into the struct.
1045
6
      if (GEPI->getNumOperands() < 3)
1046
0
        return false;
1047
6
1048
6
      // Otherwise the GEP is ok.
1049
6
      continue;
1050
6
    }
1051
2
1052
2
    
if (const PHINode *2
PN2
= dyn_cast<PHINode>(UI)) {
1053
2
      if (!LoadUsingPHIsPerLoad.insert(PN).second)
1054
2
        // This means some phi nodes are dependent on each other.
1055
2
        // Avoid infinite looping!
1056
0
        return false;
1057
2
      
if (2
!LoadUsingPHIs.insert(PN).second2
)
1058
2
        // If we have already analyzed this PHI, then it is safe.
1059
1
        continue;
1060
1
1061
1
      // Make sure all uses of the PHI are simple enough to transform.
1062
1
      
if (1
!LoadUsesSimpleEnoughForHeapSRA(PN,
1063
1
                                          LoadUsingPHIs, LoadUsingPHIsPerLoad))
1064
0
        return false;
1065
1
1066
1
      continue;
1067
1
    }
1068
0
1069
0
    // Otherwise we don't know what this is, not ok.
1070
0
    return false;
1071
0
  }
1072
9
1073
9
  return true;
1074
9
}
1075
1076
1077
/// If all users of values loaded from GV are simple enough to perform HeapSRA,
1078
/// return true.
1079
static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
1080
7
                                                    Instruction *StoredVal) {
1081
7
  SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
1082
7
  SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad;
1083
7
  for (const User *U : GV->users())
1084
15
    
if (const LoadInst *15
LI15
= dyn_cast<LoadInst>(U)) {
1085
8
      if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
1086
8
                                          LoadUsingPHIsPerLoad))
1087
0
        return false;
1088
8
      LoadUsingPHIsPerLoad.clear();
1089
8
    }
1090
7
1091
7
  // If we reach here, we know that all uses of the loads and transitive uses
1092
7
  // (through PHI nodes) are simple enough to transform.  However, we don't know
1093
7
  // that all inputs the to the PHI nodes are in the same equivalence sets.
1094
7
  // Check to verify that all operands of the PHIs are either PHIS that can be
1095
7
  // transformed, loads from GV, or MI itself.
1096
7
  
for (const PHINode *PN : LoadUsingPHIs) 7
{
1097
3
    for (unsigned op = 0, e = PN->getNumIncomingValues(); 
op != e3
;
++op2
) {
1098
2
      Value *InVal = PN->getIncomingValue(op);
1099
2
1100
2
      // PHI of the stored value itself is ok.
1101
2
      if (
InVal == StoredVal2
)
continue0
;
1102
2
1103
2
      
if (const PHINode *2
InPN2
= dyn_cast<PHINode>(InVal)) {
1104
0
        // One of the PHIs in our set is (optimistically) ok.
1105
0
        if (LoadUsingPHIs.count(InPN))
1106
0
          continue;
1107
0
        return false;
1108
0
      }
1109
2
1110
2
      // Load from GV is ok.
1111
2
      
if (const LoadInst *2
LI2
= dyn_cast<LoadInst>(InVal))
1112
2
        
if (2
LI->getOperand(0) == GV2
)
1113
2
          continue;
1114
0
1115
0
      // UNDEF? NULL?
1116
0
1117
0
      // Anything else is rejected.
1118
0
      return false;
1119
0
    }
1120
1
  }
1121
7
1122
7
  return true;
1123
7
}
1124
1125
static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
1126
               DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
1127
18
                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
1128
18
  std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
1129
18
1130
18
  if (FieldNo >= FieldVals.size())
1131
9
    FieldVals.resize(FieldNo+1);
1132
18
1133
18
  // If we already have this value, just reuse the previously scalarized
1134
18
  // version.
1135
18
  if (Value *FieldVal = FieldVals[FieldNo])
1136
8
    return FieldVal;
1137
10
1138
10
  // Depending on what instruction this is, we have several cases.
1139
10
  Value *Result;
1140
10
  if (LoadInst *
LI10
= dyn_cast<LoadInst>(V)) {
1141
8
    // This is a scalarized version of the load from the global.  Just create
1142
8
    // a new Load of the scalarized global.
1143
8
    Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
1144
8
                                           InsertedScalarizedValues,
1145
8
                                           PHIsToRewrite),
1146
8
                          LI->getName()+".f"+Twine(FieldNo), LI);
1147
10
  } else {
1148
2
    PHINode *PN = cast<PHINode>(V);
1149
2
    // PN's type is pointer to struct.  Make a new PHI of pointer to struct
1150
2
    // field.
1151
2
1152
2
    PointerType *PTy = cast<PointerType>(PN->getType());
1153
2
    StructType *ST = cast<StructType>(PTy->getElementType());
1154
2
1155
2
    unsigned AS = PTy->getAddressSpace();
1156
2
    PHINode *NewPN =
1157
2
      PHINode::Create(PointerType::get(ST->getElementType(FieldNo), AS),
1158
2
                     PN->getNumIncomingValues(),
1159
2
                     PN->getName()+".f"+Twine(FieldNo), PN);
1160
2
    Result = NewPN;
1161
2
    PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
1162
2
  }
1163
18
1164
18
  return FieldVals[FieldNo] = Result;
1165
18
}
1166
1167
/// Given a load instruction and a value derived from the load, rewrite the
1168
/// derived value to use the HeapSRoA'd load.
1169
static void RewriteHeapSROALoadUser(Instruction *LoadUser,
1170
             DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
1171
8
                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
1172
8
  // If this is a comparison against null, handle it.
1173
8
  if (ICmpInst *
SCI8
= dyn_cast<ICmpInst>(LoadUser)) {
1174
0
    assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
1175
0
    // If we have a setcc of the loaded pointer, we can use a setcc of any
1176
0
    // field.
1177
0
    Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
1178
0
                                   InsertedScalarizedValues, PHIsToRewrite);
1179
0
1180
0
    Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
1181
0
                              Constant::getNullValue(NPtr->getType()),
1182
0
                              SCI->getName());
1183
0
    SCI->replaceAllUsesWith(New);
1184
0
    SCI->eraseFromParent();
1185
0
    return;
1186
0
  }
1187
8
1188
8
  // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
1189
8
  
if (GetElementPtrInst *8
GEPI8
= dyn_cast<GetElementPtrInst>(LoadUser)) {
1190
6
    assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
1191
6
           && "Unexpected GEPI!");
1192
6
1193
6
    // Load the pointer for this field.
1194
6
    unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
1195
6
    Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
1196
6
                                     InsertedScalarizedValues, PHIsToRewrite);
1197
6
1198
6
    // Create the new GEP idx vector.
1199
6
    SmallVector<Value*, 8> GEPIdx;
1200
6
    GEPIdx.push_back(GEPI->getOperand(1));
1201
6
    GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
1202
6
1203
6
    Value *NGEPI = GetElementPtrInst::Create(GEPI->getResultElementType(), NewPtr, GEPIdx,
1204
6
                                             GEPI->getName(), GEPI);
1205
6
    GEPI->replaceAllUsesWith(NGEPI);
1206
6
    GEPI->eraseFromParent();
1207
6
    return;
1208
6
  }
1209
2
1210
2
  // Recursively transform the users of PHI nodes.  This will lazily create the
1211
2
  // PHIs that are needed for individual elements.  Keep track of what PHIs we
1212
2
  // see in InsertedScalarizedValues so that we don't get infinite loops (very
1213
2
  // antisocial).  If the PHI is already in InsertedScalarizedValues, it has
1214
2
  // already been seen first by another load, so its uses have already been
1215
2
  // processed.
1216
2
  PHINode *PN = cast<PHINode>(LoadUser);
1217
2
  if (!InsertedScalarizedValues.insert(std::make_pair(PN,
1218
2
                                              std::vector<Value*>())).second)
1219
1
    return;
1220
1
1221
1
  // If this is the first time we've seen this PHI, recursively process all
1222
1
  // users.
1223
3
  
for (auto UI = PN->user_begin(), E = PN->user_end(); 1
UI != E3
;) {
1224
2
    Instruction *User = cast<Instruction>(*UI++);
1225
2
    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
1226
2
  }
1227
8
}
1228
1229
/// We are performing Heap SRoA on a global.  Ptr is a value loaded from the
1230
/// global.  Eliminate all uses of Ptr, making them use FieldGlobals instead.
1231
/// All uses of loaded values satisfy AllGlobalLoadUsesSimpleEnoughForHeapSRA.
1232
static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
1233
               DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
1234
8
                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
1235
14
  for (auto UI = Load->user_begin(), E = Load->user_end(); 
UI != E14
;) {
1236
6
    Instruction *User = cast<Instruction>(*UI++);
1237
6
    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
1238
6
  }
1239
8
1240
8
  if (
Load->use_empty()8
) {
1241
6
    Load->eraseFromParent();
1242
6
    InsertedScalarizedValues.erase(Load);
1243
6
  }
1244
8
}
1245
1246
/// CI is an allocation of an array of structures.  Break it up into multiple
1247
/// allocations of arrays of the fields.
1248
static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
1249
                                            Value *NElems, const DataLayout &DL,
1250
7
                                            const TargetLibraryInfo *TLI) {
1251
7
  DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *CI << '\n');
1252
7
  Type *MAT = getMallocAllocatedType(CI, TLI);
1253
7
  StructType *STy = cast<StructType>(MAT);
1254
7
1255
7
  // There is guaranteed to be at least one use of the malloc (storing
1256
7
  // it into GV).  If there are other uses, change them to be uses of
1257
7
  // the global to simplify later code.  This also deletes the store
1258
7
  // into GV.
1259
7
  ReplaceUsesOfMallocWithGlobal(CI, GV);
1260
7
1261
7
  // Okay, at this point, there are no users of the malloc.  Insert N
1262
7
  // new mallocs at the same place as CI, and N globals.
1263
7
  std::vector<Value*> FieldGlobals;
1264
7
  std::vector<Value*> FieldMallocs;
1265
7
1266
7
  SmallVector<OperandBundleDef, 1> OpBundles;
1267
7
  CI->getOperandBundlesAsDefs(OpBundles);
1268
7
1269
7
  unsigned AS = GV->getType()->getPointerAddressSpace();
1270
21
  for (unsigned FieldNo = 0, e = STy->getNumElements(); 
FieldNo != e21
;
++FieldNo14
){
1271
14
    Type *FieldTy = STy->getElementType(FieldNo);
1272
14
    PointerType *PFieldTy = PointerType::get(FieldTy, AS);
1273
14
1274
14
    GlobalVariable *NGV = new GlobalVariable(
1275
14
        *GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage,
1276
14
        Constant::getNullValue(PFieldTy), GV->getName() + ".f" + Twine(FieldNo),
1277
14
        nullptr, GV->getThreadLocalMode());
1278
14
    NGV->copyAttributesFrom(GV);
1279
14
    FieldGlobals.push_back(NGV);
1280
14
1281
14
    unsigned TypeSize = DL.getTypeAllocSize(FieldTy);
1282
14
    if (StructType *ST = dyn_cast<StructType>(FieldTy))
1283
0
      TypeSize = DL.getStructLayout(ST)->getSizeInBytes();
1284
14
    Type *IntPtrTy = DL.getIntPtrType(CI->getType());
1285
14
    Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
1286
14
                                        ConstantInt::get(IntPtrTy, TypeSize),
1287
14
                                        NElems, OpBundles, nullptr,
1288
14
                                        CI->getName() + ".f" + Twine(FieldNo));
1289
14
    FieldMallocs.push_back(NMI);
1290
14
    new StoreInst(NMI, NGV, CI);
1291
14
  }
1292
7
1293
7
  // The tricky aspect of this transformation is handling the case when malloc
1294
7
  // fails.  In the original code, malloc failing would set the result pointer
1295
7
  // of malloc to null.  In this case, some mallocs could succeed and others
1296
7
  // could fail.  As such, we emit code that looks like this:
1297
7
  //    F0 = malloc(field0)
1298
7
  //    F1 = malloc(field1)
1299
7
  //    F2 = malloc(field2)
1300
7
  //    if (F0 == 0 || F1 == 0 || F2 == 0) {
1301
7
  //      if (F0) { free(F0); F0 = 0; }
1302
7
  //      if (F1) { free(F1); F1 = 0; }
1303
7
  //      if (F2) { free(F2); F2 = 0; }
1304
7
  //    }
1305
7
  // The malloc can also fail if its argument is too large.
1306
7
  Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0);
1307
7
  Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0),
1308
7
                                  ConstantZero, "isneg");
1309
21
  for (unsigned i = 0, e = FieldMallocs.size(); 
i != e21
;
++i14
) {
1310
14
    Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
1311
14
                             Constant::getNullValue(FieldMallocs[i]->getType()),
1312
14
                               "isnull");
1313
14
    RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
1314
14
  }
1315
7
1316
7
  // Split the basic block at the old malloc.
1317
7
  BasicBlock *OrigBB = CI->getParent();
1318
7
  BasicBlock *ContBB =
1319
7
      OrigBB->splitBasicBlock(CI->getIterator(), "malloc_cont");
1320
7
1321
7
  // Create the block to check the first condition.  Put all these blocks at the
1322
7
  // end of the function as they are unlikely to be executed.
1323
7
  BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
1324
7
                                                "malloc_ret_null",
1325
7
                                                OrigBB->getParent());
1326
7
1327
7
  // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
1328
7
  // branch on RunningOr.
1329
7
  OrigBB->getTerminator()->eraseFromParent();
1330
7
  BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
1331
7
1332
7
  // Within the NullPtrBlock, we need to emit a comparison and branch for each
1333
7
  // pointer, because some may be null while others are not.
1334
21
  for (unsigned i = 0, e = FieldGlobals.size(); 
i != e21
;
++i14
) {
1335
14
    Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
1336
14
    Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
1337
14
                              Constant::getNullValue(GVVal->getType()));
1338
14
    BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
1339
14
                                               OrigBB->getParent());
1340
14
    BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
1341
14
                                               OrigBB->getParent());
1342
14
    Instruction *BI = BranchInst::Create(FreeBlock, NextBlock,
1343
14
                                         Cmp, NullPtrBlock);
1344
14
1345
14
    // Fill in FreeBlock.
1346
14
    CallInst::CreateFree(GVVal, OpBundles, BI);
1347
14
    new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
1348
14
                  FreeBlock);
1349
14
    BranchInst::Create(NextBlock, FreeBlock);
1350
14
1351
14
    NullPtrBlock = NextBlock;
1352
14
  }
1353
7
1354
7
  BranchInst::Create(ContBB, NullPtrBlock);
1355
7
1356
7
  // CI is no longer needed, remove it.
1357
7
  CI->eraseFromParent();
1358
7
1359
7
  /// As we process loads, if we can't immediately update all uses of the load,
1360
7
  /// keep track of what scalarized loads are inserted for a given load.
1361
7
  DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
1362
7
  InsertedScalarizedValues[GV] = FieldGlobals;
1363
7
1364
7
  std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
1365
7
1366
7
  // Okay, the malloc site is completely handled.  All of the uses of GV are now
1367
7
  // loads, and all uses of those loads are simple.  Rewrite them to use loads
1368
7
  // of the per-field globals instead.
1369
15
  for (auto UI = GV->user_begin(), E = GV->user_end(); 
UI != E15
;) {
1370
8
    Instruction *User = cast<Instruction>(*UI++);
1371
8
1372
8
    if (LoadInst *
LI8
= dyn_cast<LoadInst>(User)) {
1373
8
      RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
1374
8
      continue;
1375
8
    }
1376
0
1377
0
    // Must be a store of null.
1378
0
    StoreInst *SI = cast<StoreInst>(User);
1379
0
    assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
1380
0
           "Unexpected heap-sra user!");
1381
0
1382
0
    // Insert a store of null into each global.
1383
0
    for (unsigned i = 0, e = FieldGlobals.size(); 
i != e0
;
++i0
) {
1384
0
      Type *ValTy = cast<GlobalValue>(FieldGlobals[i])->getValueType();
1385
0
      Constant *Null = Constant::getNullValue(ValTy);
1386
0
      new StoreInst(Null, FieldGlobals[i], SI);
1387
0
    }
1388
8
    // Erase the original store.
1389
8
    SI->eraseFromParent();
1390
8
  }
1391
7
1392
7
  // While we have PHIs that are interesting to rewrite, do it.
1393
9
  while (
!PHIsToRewrite.empty()9
) {
1394
2
    PHINode *PN = PHIsToRewrite.back().first;
1395
2
    unsigned FieldNo = PHIsToRewrite.back().second;
1396
2
    PHIsToRewrite.pop_back();
1397
2
    PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
1398
2
    assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
1399
2
1400
2
    // Add all the incoming values.  This can materialize more phis.
1401
6
    for (unsigned i = 0, e = PN->getNumIncomingValues(); 
i != e6
;
++i4
) {
1402
4
      Value *InVal = PN->getIncomingValue(i);
1403
4
      InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
1404
4
                               PHIsToRewrite);
1405
4
      FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
1406
4
    }
1407
2
  }
1408
7
1409
7
  // Drop all inter-phi links and any loads that made it this far.
1410
7
  for (DenseMap<Value*, std::vector<Value*> >::iterator
1411
7
       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
1412
17
       
I != E17
;
++I10
) {
1413
10
    if (PHINode *PN = dyn_cast<PHINode>(I->first))
1414
1
      PN->dropAllReferences();
1415
9
    else 
if (LoadInst *9
LI9
= dyn_cast<LoadInst>(I->first))
1416
2
      LI->dropAllReferences();
1417
10
  }
1418
7
1419
7
  // Delete all the phis and loads now that inter-references are dead.
1420
7
  for (DenseMap<Value*, std::vector<Value*> >::iterator
1421
7
       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
1422
17
       
I != E17
;
++I10
) {
1423
10
    if (PHINode *PN = dyn_cast<PHINode>(I->first))
1424
1
      PN->eraseFromParent();
1425
9
    else 
if (LoadInst *9
LI9
= dyn_cast<LoadInst>(I->first))
1426
2
      LI->eraseFromParent();
1427
10
  }
1428
7
1429
7
  // The old global is now dead, remove it.
1430
7
  GV->eraseFromParent();
1431
7
1432
7
  ++NumHeapSRA;
1433
7
  return cast<GlobalVariable>(FieldGlobals[0]);
1434
7
}
1435
1436
/// This function is called when we see a pointer global variable with a single
1437
/// value stored it that is a malloc or cast of malloc.
1438
static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
1439
                                               Type *AllocTy,
1440
                                               AtomicOrdering Ordering,
1441
                                               const DataLayout &DL,
1442
279
                                               TargetLibraryInfo *TLI) {
1443
279
  // If this is a malloc of an abstract type, don't touch it.
1444
279
  if (!AllocTy->isSized())
1445
0
    return false;
1446
279
1447
279
  // We can't optimize this global unless all uses of it are *known* to be
1448
279
  // of the malloc value, not of the null initializer value (consider a use
1449
279
  // that compares the global's value against zero to see if the malloc has
1450
279
  // been reached).  To do this, we check to see if all uses of the global
1451
279
  // would trap if the global were null: this proves that they must all
1452
279
  // happen after the malloc.
1453
279
  
if (279
!AllUsesOfLoadedValueWillTrapIfNull(GV)279
)
1454
164
    return false;
1455
115
1456
115
  // We can't optimize this if the malloc itself is used in a complex way,
1457
115
  // for example, being stored into multiple globals.  This allows the
1458
115
  // malloc to be stored into the specified global, loaded icmp'd, and
1459
115
  // GEP'd.  These are all things we could transform to using the global
1460
115
  // for.
1461
115
  SmallPtrSet<const PHINode*, 8> PHIs;
1462
115
  if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
1463
48
    return false;
1464
67
1465
67
  // If we have a global that is only initialized with a fixed size malloc,
1466
67
  // transform the program to use global memory instead of malloc'd memory.
1467
67
  // This eliminates dynamic allocation, avoids an indirection accessing the
1468
67
  // data, and exposes the resultant global to further GlobalOpt.
1469
67
  // We cannot optimize the malloc if we cannot determine malloc array size.
1470
67
  Value *NElems = getMallocArraySize(CI, DL, TLI, true);
1471
67
  if (!NElems)
1472
56
    return false;
1473
11
1474
11
  
if (ConstantInt *11
NElements11
= dyn_cast<ConstantInt>(NElems))
1475
11
    // Restrict this transformation to only working on small allocations
1476
11
    // (2048 bytes currently), as we don't want to introduce a 16M global or
1477
11
    // something.
1478
6
    
if (6
NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 20486
) {
1479
4
      OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI);
1480
4
      return true;
1481
4
    }
1482
7
1483
7
  // If the allocation is an array of structures, consider transforming this
1484
7
  // into multiple malloc'd arrays, one for each field.  This is basically
1485
7
  // SRoA for malloc'd memory.
1486
7
1487
7
  
if (7
Ordering != AtomicOrdering::NotAtomic7
)
1488
0
    return false;
1489
7
1490
7
  // If this is an allocation of a fixed size array of structs, analyze as a
1491
7
  // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
1492
7
  
if (7
NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1)7
)
1493
2
    
if (ArrayType *2
AT2
= dyn_cast<ArrayType>(AllocTy))
1494
2
      AllocTy = AT->getElementType();
1495
7
1496
7
  StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
1497
7
  if (!AllocSTy)
1498
0
    return false;
1499
7
1500
7
  // This the structure has an unreasonable number of fields, leave it
1501
7
  // alone.
1502
7
  
if (7
AllocSTy->getNumElements() <= 16 && 7
AllocSTy->getNumElements() != 07
&&
1503
7
      
AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)7
) {
1504
7
1505
7
    // If this is a fixed size array, transform the Malloc to be an alloc of
1506
7
    // structs.  malloc [100 x struct],1 -> malloc struct, 100
1507
7
    if (ArrayType *
AT7
= dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
1508
2
      Type *IntPtrTy = DL.getIntPtrType(CI->getType());
1509
2
      unsigned TypeSize = DL.getStructLayout(AllocSTy)->getSizeInBytes();
1510
2
      Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
1511
2
      Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
1512
2
      SmallVector<OperandBundleDef, 1> OpBundles;
1513
2
      CI->getOperandBundlesAsDefs(OpBundles);
1514
2
      Instruction *Malloc =
1515
2
          CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, AllocSize, NumElements,
1516
2
                                 OpBundles, nullptr, CI->getName());
1517
2
      Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
1518
2
      CI->replaceAllUsesWith(Cast);
1519
2
      CI->eraseFromParent();
1520
2
      if (BitCastInst *BCI = dyn_cast<BitCastInst>(Malloc))
1521
2
        CI = cast<CallInst>(BCI->getOperand(0));
1522
2
      else
1523
0
        CI = cast<CallInst>(Malloc);
1524
2
    }
1525
7
1526
7
    PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true), DL,
1527
7
                         TLI);
1528
7
    return true;
1529
7
  }
1530
0
1531
0
  return false;
1532
0
}
1533
1534
// Try to optimize globals based on the knowledge that only one value (besides
1535
// its initializer) is ever stored to the global.
1536
static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
1537
                                     AtomicOrdering Ordering,
1538
                                     const DataLayout &DL,
1539
14.4k
                                     TargetLibraryInfo *TLI) {
1540
14.4k
  // Ignore no-op GEPs and bitcasts.
1541
14.4k
  StoredOnceVal = StoredOnceVal->stripPointerCasts();
1542
14.4k
1543
14.4k
  // If we are dealing with a pointer global that is initialized to null and
1544
14.4k
  // only has one (non-null) value stored into it, then we can optimize any
1545
14.4k
  // users of the loaded value (often calls and loads) that would trap if the
1546
14.4k
  // value was null.
1547
14.4k
  if (GV->getInitializer()->getType()->isPointerTy() &&
1548
14.4k
      
GV->getInitializer()->isNullValue()3.59k
) {
1549
3.57k
    if (Constant *
SOVC3.57k
= dyn_cast<Constant>(StoredOnceVal)) {
1550
26
      if (GV->getInitializer()->getType() != SOVC->getType())
1551
3
        SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
1552
26
1553
26
      // Optimize away any trapping uses of the loaded value.
1554
26
      if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, TLI))
1555
7
        return true;
1556
3.54k
    } else 
if (CallInst *3.54k
CI3.54k
= extractMallocCall(StoredOnceVal, TLI)) {
1557
279
      Type *MallocType = getMallocAllocatedType(CI, TLI);
1558
279
      if (
MallocType && 279
tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
1559
279
                                                           Ordering, DL, TLI))
1560
11
        return true;
1561
14.4k
    }
1562
3.57k
  }
1563
14.4k
1564
14.4k
  return false;
1565
14.4k
}
1566
1567
/// At this point, we have learned that the only two values ever stored into GV
1568
/// are its initializer and OtherVal.  See if we can shrink the global into a
1569
/// boolean and select between the two values whenever it is used.  This exposes
1570
/// the values to other scalar optimizations.
1571
8.93k
static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
1572
8.93k
  Type *GVElType = GV->getValueType();
1573
8.93k
1574
8.93k
  // If GVElType is already i1, it is already shrunk.  If the type of the GV is
1575
8.93k
  // an FP value, pointer or vector, don't do this optimization because a select
1576
8.93k
  // between them is very expensive and unlikely to lead to later
1577
8.93k
  // simplification.  In these cases, we typically end up with "cond ? v1 : v2"
1578
8.93k
  // where v1 and v2 both require constant pool loads, a big loss.
1579
8.93k
  if (GVElType == Type::getInt1Ty(GV->getContext()) ||
1580
337
      GVElType->isFloatingPointTy() ||
1581
8.93k
      
GVElType->isPointerTy()295
||
GVElType->isVectorTy()264
)
1582
8.67k
    return false;
1583
264
1584
264
  // Walk the use list of the global seeing if all the uses are load or store.
1585
264
  // If there is anything else, bail out.
1586
264
  for (User *U : GV->users())
1587
22.0k
    
if (22.0k
!isa<LoadInst>(U) && 22.0k
!isa<StoreInst>(U)21.4k
)
1588
6
      return false;
1589
258
1590
258
  
DEBUG258
(dbgs() << " *** SHRINKING TO BOOL: " << *GV << "\n");
1591
258
1592
258
  // Create the new global, initializing it to false.
1593
258
  GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
1594
258
                                             false,
1595
258
                                             GlobalValue::InternalLinkage,
1596
258
                                        ConstantInt::getFalse(GV->getContext()),
1597
258
                                             GV->getName()+".b",
1598
258
                                             GV->getThreadLocalMode(),
1599
258
                                             GV->getType()->getAddressSpace());
1600
258
  NewGV->copyAttributesFrom(GV);
1601
258
  GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV);
1602
258
1603
258
  Constant *InitVal = GV->getInitializer();
1604
258
  assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
1605
258
         "No reason to shrink to bool!");
1606
258
1607
258
  SmallVector<DIGlobalVariableExpression *, 1> GVs;
1608
258
  GV->getDebugInfo(GVs);
1609
258
1610
258
  // If initialized to zero and storing one into the global, we can use a cast
1611
258
  // instead of a select to synthesize the desired value.
1612
258
  bool IsOneZero = false;
1613
258
  bool EmitOneOrZero = true;
1614
258
  if (ConstantInt *
CI258
= dyn_cast<ConstantInt>(OtherVal)){
1615
218
    IsOneZero = InitVal->isNullValue() && CI->isOne();
1616
258
1617
258
    if (ConstantInt *
CIInit258
= dyn_cast<ConstantInt>(GV->getInitializer())){
1618
258
      uint64_t ValInit = CIInit->getZExtValue();
1619
258
      uint64_t ValOther = CI->getZExtValue();
1620
258
      uint64_t ValMinus = ValOther - ValInit;
1621
258
1622
1
      for(auto *GVe : GVs){
1623
1
        DIGlobalVariable *DGV = GVe->getVariable();
1624
1
        DIExpression *E = GVe->getExpression();
1625
1
1626
1
        // It is expected that the address of global optimized variable is on
1627
1
        // top of the stack. After optimization, value of that variable will
1628
1
        // be ether 0 for initial value or 1 for other value. The following
1629
1
        // expression should return constant integer value depending on the
1630
1
        // value at global object address:
1631
1
        // val * (ValOther - ValInit) + ValInit:
1632
1
        // DW_OP_deref DW_OP_constu <ValMinus>
1633
1
        // DW_OP_mul DW_OP_constu <ValInit> DW_OP_plus DW_OP_stack_value
1634
1
        E = DIExpression::get(NewGV->getContext(),
1635
1
                             {dwarf::DW_OP_deref,
1636
1
                              dwarf::DW_OP_constu,
1637
1
                              ValMinus,
1638
1
                              dwarf::DW_OP_mul,
1639
1
                              dwarf::DW_OP_constu,
1640
1
                              ValInit,
1641
1
                              dwarf::DW_OP_plus,
1642
1
                              dwarf::DW_OP_stack_value});
1643
1
        DIGlobalVariableExpression *DGVE =
1644
1
          DIGlobalVariableExpression::get(NewGV->getContext(), DGV, E);
1645
1
        NewGV->addDebugInfo(DGVE);
1646
1
     }
1647
258
     EmitOneOrZero = false;
1648
258
    }
1649
258
  }
1650
258
1651
258
  if (
EmitOneOrZero258
) {
1652
0
     // FIXME: This will only emit address for debugger on which will
1653
0
     // be written only 0 or 1.
1654
0
     for(auto *GV : GVs)
1655
0
       NewGV->addDebugInfo(GV);
1656
0
   }
1657
258
1658
22.2k
  while (
!GV->use_empty()22.2k
) {
1659
22.0k
    Instruction *UI = cast<Instruction>(GV->user_back());
1660
22.0k
    if (StoreInst *
SI22.0k
= dyn_cast<StoreInst>(UI)) {
1661
21.4k
      // Change the store into a boolean store.
1662
21.4k
      bool StoringOther = SI->getOperand(0) == OtherVal;
1663
21.4k
      // Only do this if we weren't storing a loaded value.
1664
21.4k
      Value *StoreVal;
1665
21.4k
      if (
StoringOther || 21.4k
SI->getOperand(0) == InitVal177
) {
1666
21.4k
        StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
1667
21.4k
                                    StoringOther);
1668
21.4k
      } else {
1669
2
        // Otherwise, we are storing a previously loaded copy.  To do this,
1670
2
        // change the copy from copying the original value to just copying the
1671
2
        // bool.
1672
2
        Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
1673
2
1674
2
        // If we've already replaced the input, StoredVal will be a cast or
1675
2
        // select instruction.  If not, it will be a load of the original
1676
2
        // global.
1677
2
        if (LoadInst *
LI2
= dyn_cast<LoadInst>(StoredVal)) {
1678
2
          assert(LI->getOperand(0) == GV && "Not a copy!");
1679
2
          // Insert a new load, to preserve the saved value.
1680
2
          StoreVal = new LoadInst(NewGV, LI->getName()+".b", false, 0,
1681
2
                                  LI->getOrdering(), LI->getSyncScopeID(), LI);
1682
2
        } else {
1683
0
          assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
1684
0
                 "This is not a form that we understand!");
1685
0
          StoreVal = StoredVal->getOperand(0);
1686
0
          assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
1687
0
        }
1688
2
      }
1689
21.4k
      new StoreInst(StoreVal, NewGV, false, 0,
1690
21.4k
                    SI->getOrdering(), SI->getSyncScopeID(), SI);
1691
22.0k
    } else {
1692
549
      // Change the load into a load of bool then a select.
1693
549
      LoadInst *LI = cast<LoadInst>(UI);
1694
549
      LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0,
1695
549
                                   LI->getOrdering(), LI->getSyncScopeID(), LI);
1696
549
      Value *NSI;
1697
549
      if (IsOneZero)
1698
408
        NSI = new ZExtInst(NLI, LI->getType(), "", LI);
1699
549
      else
1700
141
        NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
1701
549
      NSI->takeName(LI);
1702
549
      LI->replaceAllUsesWith(NSI);
1703
549
    }
1704
22.0k
    UI->eraseFromParent();
1705
22.0k
  }
1706
8.93k
1707
8.93k
  // Retain the name of the old global variable. People who are debugging their
1708
8.93k
  // programs may expect these variables to be named the same.
1709
8.93k
  NewGV->takeName(GV);
1710
8.93k
  GV->eraseFromParent();
1711
8.93k
  return true;
1712
8.93k
}
1713
1714
static bool deleteIfDead(GlobalValue &GV,
1715
5.63M
                         SmallSet<const Comdat *, 8> &NotDiscardableComdats) {
1716
5.63M
  GV.removeDeadConstantUsers();
1717
5.63M
1718
5.63M
  if (
!GV.isDiscardableIfUnused() && 5.63M
!GV.isDeclaration()3.12M
)
1719
1.11M
    return false;
1720
4.51M
1721
4.51M
  
if (const Comdat *4.51M
C4.51M
= GV.getComdat())
1722
3.66k
    
if (3.66k
!GV.hasLocalLinkage() && 3.66k
NotDiscardableComdats.count(C)3.62k
)
1723
824
      return false;
1724
4.51M
1725
4.51M
  bool Dead;
1726
4.51M
  if (auto *F = dyn_cast<Function>(&GV))
1727
2.59M
    
Dead = (F->isDeclaration() && 2.59M
F->use_empty()1.97M
) ||
F->isDefTriviallyDead()1.15M
;
1728
4.51M
  else
1729
1.92M
    Dead = GV.use_empty();
1730
4.51M
  if (!Dead)
1731
2.82M
    return false;
1732
1.68M
1733
1.68M
  
DEBUG1.68M
(dbgs() << "GLOBAL DEAD: " << GV << "\n");
1734
1.68M
  GV.eraseFromParent();
1735
1.68M
  ++NumDeleted;
1736
1.68M
  return true;
1737
1.68M
}
1738
1739
static bool isPointerValueDeadOnEntryToFunction(
1740
    const Function *F, GlobalValue *GV,
1741
13
    function_ref<DominatorTree &(Function &)> LookupDomTree) {
1742
13
  // Find all uses of GV. We expect them all to be in F, and if we can't
1743
13
  // identify any of the uses we bail out.
1744
13
  //
1745
13
  // On each of these uses, identify if the memory that GV points to is
1746
13
  // used/required/live at the start of the function. If it is not, for example
1747
13
  // if the first thing the function does is store to the GV, the GV can
1748
13
  // possibly be demoted.
1749
13
  //
1750
13
  // We don't do an exhaustive search for memory operations - simply look
1751
13
  // through bitcasts as they're quite common and benign.
1752
13
  const DataLayout &DL = GV->getParent()->getDataLayout();
1753
13
  SmallVector<LoadInst *, 4> Loads;
1754
13
  SmallVector<StoreInst *, 4> Stores;
1755
23
  for (auto *U : GV->users()) {
1756
23
    if (
Operator::getOpcode(U) == Instruction::BitCast23
) {
1757
6
      for (auto *UU : U->users()) {
1758
6
        if (auto *LI = dyn_cast<LoadInst>(UU))
1759
6
          Loads.push_back(LI);
1760
0
        else 
if (auto *0
SI0
= dyn_cast<StoreInst>(UU))
1761
0
          Stores.push_back(SI);
1762
0
        else
1763
0
          return false;
1764
6
      }
1765
6
      continue;
1766
6
    }
1767
17
1768
17
    Instruction *I = dyn_cast<Instruction>(U);
1769
17
    if (!I)
1770
0
      return false;
1771
17
    assert(I->getParent()->getParent() == F);
1772
17
1773
17
    if (auto *LI = dyn_cast<LoadInst>(I))
1774
3
      Loads.push_back(LI);
1775
14
    else 
if (auto *14
SI14
= dyn_cast<StoreInst>(I))
1776
12
      Stores.push_back(SI);
1777
14
    else
1778
2
      return false;
1779
11
  }
1780
11
1781
11
  // We have identified all uses of GV into loads and stores. Now check if all
1782
11
  // of them are known not to depend on the value of the global at the function
1783
11
  // entry point. We do this by ensuring that every load is dominated by at
1784
11
  // least one store.
1785
11
  auto &DT = LookupDomTree(*const_cast<Function *>(F));
1786
11
1787
11
  // The below check is quadratic. Check we're not going to do too many tests.
1788
11
  // FIXME: Even though this will always have worst-case quadratic time, we
1789
11
  // could put effort into minimizing the average time by putting stores that
1790
11
  // have been shown to dominate at least one load at the beginning of the
1791
11
  // Stores array, making subsequent dominance checks more likely to succeed
1792
11
  // early.
1793
11
  //
1794
11
  // The threshold here is fairly large because global->local demotion is a
1795
11
  // very powerful optimization should it fire.
1796
11
  const unsigned Threshold = 100;
1797
11
  if (Loads.size() * Stores.size() > Threshold)
1798
0
    return false;
1799
11
1800
11
  
for (auto *L : Loads) 11
{
1801
9
    auto *LTy = L->getType();
1802
9
    if (
none_of(Stores, [&](const StoreInst *S) 9
{
1803
8
          auto *STy = S->getValueOperand()->getType();
1804
8
          // The load is only dominated by the store if DomTree says so
1805
8
          // and the number of bits loaded in L is less than or equal to
1806
8
          // the number of bits stored in S.
1807
8
          return DT.dominates(S, L) &&
1808
6
                 DL.getTypeStoreSize(LTy) <= DL.getTypeStoreSize(STy);
1809
8
        }))
1810
5
      return false;
1811
6
  }
1812
6
  // All loads have known dependences inside F, so the global can be localized.
1813
6
  return true;
1814
6
}
1815
1816
/// C may have non-instruction users. Can all of those users be turned into
1817
/// instructions?
1818
12.9k
static bool allNonInstructionUsersCanBeMadeInstructions(Constant *C) {
1819
12.9k
  // We don't do this exhaustively. The most common pattern that we really need
1820
12.9k
  // to care about is a constant GEP or constant bitcast - so just looking
1821
12.9k
  // through one single ConstantExpr.
1822
12.9k
  //
1823
12.9k
  // The set of constants that this function returns true for must be able to be
1824
12.9k
  // handled by makeAllConstantUsesInstructions.
1825
41.3k
  for (auto *U : C->users()) {
1826
41.3k
    if (isa<Instruction>(U))
1827
41.3k
      continue;
1828
5
    
if (5
!isa<ConstantExpr>(U)5
)
1829
5
      // Non instruction, non-constantexpr user; cannot convert this.
1830
0
      return false;
1831
5
    for (auto *UU : U->users())
1832
5
      
if (5
!isa<Instruction>(UU)5
)
1833
5
        // A constantexpr used by another constant. We don't try and recurse any
1834
5
        // further but just bail out at this point.
1835
1
        return false;
1836
12.9k
  }
1837
12.9k
1838
12.9k
  return true;
1839
12.9k
}
1840
1841
/// C may have non-instruction users, and
1842
/// allNonInstructionUsersCanBeMadeInstructions has returned true. Convert the
1843
/// non-instruction users to instructions.
1844
6
static void makeAllConstantUsesInstructions(Constant *C) {
1845
6
  SmallVector<ConstantExpr*,4> Users;
1846
10
  for (auto *U : C->users()) {
1847
10
    if (isa<ConstantExpr>(U))
1848
2
      Users.push_back(cast<ConstantExpr>(U));
1849
10
    else
1850
10
      // We should never get here; allNonInstructionUsersCanBeMadeInstructions
1851
10
      // should not have returned true for C.
1852
10
      assert(
1853
10
          isa<Instruction>(U) &&
1854
10
          "Can't transform non-constantexpr non-instruction to instruction!");
1855
10
  }
1856
6
1857
6
  SmallVector<Value*,4> UUsers;
1858
2
  for (auto *U : Users) {
1859
2
    UUsers.clear();
1860
2
    for (auto *UU : U->users())
1861
2
      UUsers.push_back(UU);
1862
2
    for (auto *UU : UUsers) {
1863
2
      Instruction *UI = cast<Instruction>(UU);
1864
2
      Instruction *NewU = U->getAsInstruction();
1865
2
      NewU->insertBefore(UI);
1866
2
      UI->replaceUsesOfWith(U, NewU);
1867
2
    }
1868
2
    // We've replaced all the uses, so destroy the constant. (destroyConstant
1869
2
    // will update value handles and metadata.)
1870
2
    U->destroyConstant();
1871
2
  }
1872
6
}
1873
1874
/// Analyze the specified global variable and optimize
1875
/// it if possible.  If we make a change, return true.
1876
static bool processInternalGlobal(
1877
    GlobalVariable *GV, const GlobalStatus &GS, TargetLibraryInfo *TLI,
1878
26.1k
    function_ref<DominatorTree &(Function &)> LookupDomTree) {
1879
26.1k
  auto &DL = GV->getParent()->getDataLayout();
1880
26.1k
  // If this is a first class global and has only one accessing function and
1881
26.1k
  // this function is non-recursive, we replace the global with a local alloca
1882
26.1k
  // in this function.
1883
26.1k
  //
1884
26.1k
  // NOTE: It doesn't make sense to promote non-single-value types since we
1885
26.1k
  // are just replacing static memory to stack memory.
1886
26.1k
  //
1887
26.1k
  // If the global is in different address space, don't bring it to stack.
1888
26.1k
  if (!GS.HasMultipleAccessingFunctions &&
1889
22.1k
      GS.AccessingFunction &&
1890
22.1k
      GV->getValueType()->isSingleValueType() &&
1891
13.0k
      GV->getType()->getAddressSpace() == 0 &&
1892
13.0k
      !GV->isExternallyInitialized() &&
1893
12.9k
      allNonInstructionUsersCanBeMadeInstructions(GV) &&
1894
12.9k
      GS.AccessingFunction->doesNotRecurse() &&
1895
13
      isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV,
1896
26.1k
                                          LookupDomTree)) {
1897
6
    const DataLayout &DL = GV->getParent()->getDataLayout();
1898
6
1899
6
    DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
1900
6
    Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
1901
6
                                                   ->getEntryBlock().begin());
1902
6
    Type *ElemTy = GV->getValueType();
1903
6
    // FIXME: Pass Global's alignment when globals have alignment
1904
6
    AllocaInst *Alloca = new AllocaInst(ElemTy, DL.getAllocaAddrSpace(), nullptr,
1905
6
                                        GV->getName(), &FirstI);
1906
6
    if (!isa<UndefValue>(GV->getInitializer()))
1907
6
      new StoreInst(GV->getInitializer(), Alloca, &FirstI);
1908
6
1909
6
    makeAllConstantUsesInstructions(GV);
1910
6
1911
6
    GV->replaceAllUsesWith(Alloca);
1912
6
    GV->eraseFromParent();
1913
6
    ++NumLocalized;
1914
6
    return true;
1915
6
  }
1916
26.1k
1917
26.1k
  // If the global is never loaded (but may be stored to), it is dead.
1918
26.1k
  // Delete it now.
1919
26.1k
  
if (26.1k
!GS.IsLoaded26.1k
) {
1920
106
    DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n");
1921
106
1922
106
    bool Changed;
1923
106
    if (
isLeakCheckerRoot(GV)106
) {
1924
34
      // Delete any constant stores to the global.
1925
34
      Changed = CleanupPointerRootUsers(GV, TLI);
1926
106
    } else {
1927
72
      // Delete any stores we can find to the global.  We may not be able to
1928
72
      // make it completely dead though.
1929
72
      Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
1930
72
    }
1931
106
1932
106
    // If the global is dead now, delete it.
1933
106
    if (
GV->use_empty()106
) {
1934
73
      GV->eraseFromParent();
1935
73
      ++NumDeleted;
1936
73
      Changed = true;
1937
73
    }
1938
106
    return Changed;
1939
106
1940
106
  }
1941
26.0k
  
if (26.0k
GS.StoredType <= GlobalStatus::InitializerStored26.0k
) {
1942
487
    DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");
1943
487
    GV->setConstant(true);
1944
487
1945
487
    // Clean up any obviously simplifiable users now.
1946
487
    CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
1947
487
1948
487
    // If the global is dead now, just nuke it.
1949
487
    if (
GV->use_empty()487
) {
1950
35
      DEBUG(dbgs() << "   *** Marking constant allowed us to simplify "
1951
35
            << "all users and delete global!\n");
1952
35
      GV->eraseFromParent();
1953
35
      ++NumDeleted;
1954
35
      return true;
1955
35
    }
1956
452
1957
452
    // Fall through to the next check; see if we can optimize further.
1958
452
    ++NumMarked;
1959
452
  }
1960
25.9k
  
if (25.9k
!GV->getInitializer()->getType()->isSingleValueType()25.9k
) {
1961
9.66k
    const DataLayout &DL = GV->getParent()->getDataLayout();
1962
9.66k
    if (SRAGlobal(GV, DL))
1963
64
      return true;
1964
25.9k
  }
1965
25.9k
  
if (25.9k
GS.StoredType == GlobalStatus::StoredOnce && 25.9k
GS.StoredOnceValue14.4k
) {
1966
14.4k
    // If the initial value for the global was an undef value, and if only
1967
14.4k
    // one other value was stored into it, we can just change the
1968
14.4k
    // initializer to be the stored value, then delete all stores to the
1969
14.4k
    // global.  This allows us to mark it constant.
1970
14.4k
    if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
1971
8.94k
      
if (8.94k
isa<UndefValue>(GV->getInitializer())8.94k
) {
1972
1
        // Change the initial value here.
1973
1
        GV->setInitializer(SOVConstant);
1974
1
1975
1
        // Clean up any obviously simplifiable users now.
1976
1
        CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI);
1977
1
1978
1
        if (
GV->use_empty()1
) {
1979
1
          DEBUG(dbgs() << "   *** Substituting initializer allowed us to "
1980
1
                       << "simplify all users and delete global!\n");
1981
1
          GV->eraseFromParent();
1982
1
          ++NumDeleted;
1983
1
        }
1984
8.94k
        ++NumSubstitute;
1985
8.94k
        return true;
1986
8.94k
      }
1987
14.4k
1988
14.4k
    // Try to optimize globals based on the knowledge that only one value
1989
14.4k
    // (besides its initializer) is ever stored to the global.
1990
14.4k
    
if (14.4k
optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL, TLI)14.4k
)
1991
18
      return true;
1992
14.4k
1993
14.4k
    // Otherwise, if the global was not a boolean, we can shrink it to be a
1994
14.4k
    // boolean.
1995
14.4k
    
if (Constant *14.4k
SOVConstant14.4k
= dyn_cast<Constant>(GS.StoredOnceValue)) {
1996
8.93k
      if (
GS.Ordering == AtomicOrdering::NotAtomic8.93k
) {
1997
8.93k
        if (
TryToShrinkGlobalToBoolean(GV, SOVConstant)8.93k
) {
1998
258
          ++NumShrunkToBool;
1999
258
          return true;
2000
258
        }
2001
25.6k
      }
2002
8.93k
    }
2003
14.4k
  }
2004
25.6k
2005
25.6k
  return false;
2006
25.6k
}
2007
2008
/// Analyze the specified global variable and optimize it if possible.  If we
2009
/// make a change, return true.
2010
static bool
2011
processGlobal(GlobalValue &GV, TargetLibraryInfo *TLI,
2012
3.94M
              function_ref<DominatorTree &(Function &)> LookupDomTree) {
2013
3.94M
  if (GV.getName().startswith("llvm."))
2014
95.4k
    return false;
2015
3.85M
2016
3.85M
  GlobalStatus GS;
2017
3.85M
2018
3.85M
  if (GlobalStatus::analyzeGlobal(&GV, GS))
2019
1.85M
    return false;
2020
1.99M
2021
1.99M
  bool Changed = false;
2022
1.99M
  if (
!GS.IsCompared && 1.99M
!GV.hasGlobalUnnamedAddr()1.99M
) {
2023
31.0k
    auto NewUnnamedAddr = GV.hasLocalLinkage() ? GlobalValue::UnnamedAddr::Global
2024
1.86M
                                               : GlobalValue::UnnamedAddr::Local;
2025
1.89M
    if (
NewUnnamedAddr != GV.getUnnamedAddr()1.89M
) {
2026
244k
      GV.setUnnamedAddr(NewUnnamedAddr);
2027
244k
      NumUnnamed++;
2028
244k
      Changed = true;
2029
244k
    }
2030
1.89M
  }
2031
1.99M
2032
1.99M
  // Do more involved optimizations if the global is internal.
2033
1.99M
  if (!GV.hasLocalLinkage())
2034
1.90M
    return Changed;
2035
96.7k
2036
96.7k
  auto *GVar = dyn_cast<GlobalVariable>(&GV);
2037
96.7k
  if (!GVar)
2038
51.4k
    return Changed;
2039
45.3k
2040
45.3k
  
if (45.3k
GVar->isConstant() || 45.3k
!GVar->hasInitializer()26.1k
)
2041
19.2k
    return Changed;
2042
26.1k
2043
26.1k
  
return processInternalGlobal(GVar, GS, TLI, LookupDomTree) || 26.1k
Changed25.6k
;
2044
3.94M
}
2045
2046
/// Walk all of the direct calls of the specified function, changing them to
2047
/// FastCC.
2048
24.9k
static void ChangeCalleesToFastCall(Function *F) {
2049
127k
  for (User *U : F->users()) {
2050
127k
    if (isa<BlockAddress>(U))
2051
10
      continue;
2052
127k
    CallSite CS(cast<Instruction>(U));
2053
127k
    CS.setCallingConv(CallingConv::Fast);
2054
127k
  }
2055
24.9k
}
2056
2057
0
static AttributeList StripNest(LLVMContext &C, AttributeList Attrs) {
2058
0
  // There can be at most one attribute set with a nest attribute.
2059
0
  unsigned NestIndex;
2060
0
  if (Attrs.hasAttrSomewhere(Attribute::Nest, &NestIndex))
2061
0
    return Attrs.removeAttribute(C, NestIndex, Attribute::Nest);
2062
0
  return Attrs;
2063
0
}
2064
2065
0
static void RemoveNestAttribute(Function *F) {
2066
0
  F->setAttributes(StripNest(F->getContext(), F->getAttributes()));
2067
0
  for (User *U : F->users()) {
2068
0
    if (isa<BlockAddress>(U))
2069
0
      continue;
2070
0
    CallSite CS(cast<Instruction>(U));
2071
0
    CS.setAttributes(StripNest(F->getContext(), CS.getAttributes()));
2072
0
  }
2073
0
}
2074
2075
/// Return true if this is a calling convention that we'd like to change.  The
2076
/// idea here is that we don't want to mess with the convention if the user
2077
/// explicitly requested something with performance implications like coldcc,
2078
/// GHC, or anyregcc.
2079
57.5k
static bool isProfitableToMakeFastCC(Function *F) {
2080
57.5k
  CallingConv::ID CC = F->getCallingConv();
2081
57.5k
  // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
2082
26.4k
  return CC == CallingConv::C || CC == CallingConv::X86_ThisCall;
2083
57.5k
}
2084
2085
static bool
2086
OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
2087
                  function_ref<DominatorTree &(Function &)> LookupDomTree,
2088
38.0k
                  SmallSet<const Comdat *, 8> &NotDiscardableComdats) {
2089
38.0k
  bool Changed = false;
2090
38.0k
  // Optimize functions.
2091
3.67M
  for (Module::iterator FI = M.begin(), E = M.end(); 
FI != E3.67M
; ) {
2092
3.64M
    Function *F = &*FI++;
2093
3.64M
    // Functions without names cannot be referenced outside this module.
2094
3.64M
    if (
!F->hasName() && 3.64M
!F->isDeclaration()2
&&
!F->hasLocalLinkage()2
)
2095
1
      F->setLinkage(GlobalValue::InternalLinkage);
2096
3.64M
2097
3.64M
    if (
deleteIfDead(*F, NotDiscardableComdats)3.64M
) {
2098
1.68M
      Changed = true;
2099
1.68M
      continue;
2100
1.68M
    }
2101
1.95M
2102
1.95M
    // LLVM's definition of dominance allows instructions that are cyclic
2103
1.95M
    // in unreachable blocks, e.g.:
2104
1.95M
    // %pat = select i1 %condition, @global, i16* %pat
2105
1.95M
    // because any instruction dominates an instruction in a block that's
2106
1.95M
    // not reachable from entry.
2107
1.95M
    // So, remove unreachable blocks from the function, because a) there's
2108
1.95M
    // no point in analyzing them and b) GlobalOpt should otherwise grow
2109
1.95M
    // some more complicated logic to break these cycles.
2110
1.95M
    // Removing unreachable blocks might invalidate the dominator so we
2111
1.95M
    // recalculate it.
2112
1.95M
    
if (1.95M
!F->isDeclaration()1.95M
) {
2113
1.41M
      if (
removeUnreachableBlocks(*F)1.41M
) {
2114
7.07k
        auto &DT = LookupDomTree(*F);
2115
7.07k
        DT.recalculate(*F);
2116
7.07k
        Changed = true;
2117
7.07k
      }
2118
1.41M
    }
2119
1.95M
2120
1.95M
    Changed |= processGlobal(*F, TLI, LookupDomTree);
2121
1.95M
2122
1.95M
    if (!F->hasLocalLinkage())
2123
1.89M
      continue;
2124
57.5k
    
if (57.5k
isProfitableToMakeFastCC(F) && 57.5k
!F->isVarArg()31.0k
&&
2125
57.5k
        
!F->hasAddressTaken()31.0k
) {
2126
24.9k
      // If this function has a calling convention worth changing, is not a
2127
24.9k
      // varargs function, and is only called directly, promote it to use the
2128
24.9k
      // Fast calling convention.
2129
24.9k
      F->setCallingConv(CallingConv::Fast);
2130
24.9k
      ChangeCalleesToFastCall(F);
2131
24.9k
      ++NumFastCallFns;
2132
24.9k
      Changed = true;
2133
24.9k
    }
2134
57.5k
2135
57.5k
    if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
2136
57.5k
        
!F->hasAddressTaken()0
) {
2137
0
      // The function is not used by a trampoline intrinsic, so it is safe
2138
0
      // to remove the 'nest' attribute.
2139
0
      RemoveNestAttribute(F);
2140
0
      ++NumNestRemoved;
2141
0
      Changed = true;
2142
0
    }
2143
3.64M
  }
2144
38.0k
  return Changed;
2145
38.0k
}
2146
2147
static bool
2148
OptimizeGlobalVars(Module &M, TargetLibraryInfo *TLI,
2149
                   function_ref<DominatorTree &(Function &)> LookupDomTree,
2150
38.0k
                   SmallSet<const Comdat *, 8> &NotDiscardableComdats) {
2151
38.0k
  bool Changed = false;
2152
38.0k
2153
38.0k
  for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
2154
2.03M
       
GVI != E2.03M
; ) {
2155
1.99M
    GlobalVariable *GV = &*GVI++;
2156
1.99M
    // Global variables without names cannot be referenced outside this module.
2157
1.99M
    if (
!GV->hasName() && 1.99M
!GV->isDeclaration()11.1k
&&
!GV->hasLocalLinkage()11.1k
)
2158
1
      GV->setLinkage(GlobalValue::InternalLinkage);
2159
1.99M
    // Simplify the initializer.
2160
1.99M
    if (GV->hasInitializer())
2161
1.96M
      
if (auto *1.96M
C1.96M
= dyn_cast<Constant>(GV->getInitializer())) {
2162
1.96M
        auto &DL = M.getDataLayout();
2163
1.96M
        Constant *New = ConstantFoldConstant(C, DL, TLI);
2164
1.96M
        if (
New && 1.96M
New != C2.94k
)
2165
1.24k
          GV->setInitializer(New);
2166
1.96M
      }
2167
1.99M
2168
1.99M
    if (
deleteIfDead(*GV, NotDiscardableComdats)1.99M
) {
2169
857
      Changed = true;
2170
857
      continue;
2171
857
    }
2172
1.99M
2173
1.99M
    Changed |= processGlobal(*GV, TLI, LookupDomTree);
2174
1.99M
  }
2175
38.0k
  return Changed;
2176
38.0k
}
2177
2178
/// Evaluate a piece of a constantexpr store into a global initializer.  This
2179
/// returns 'Init' modified to reflect 'Val' stored into it.  At this point, the
2180
/// GEP operands of Addr [0, OpNo) have been stepped into.
2181
static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
2182
1.18k
                                   ConstantExpr *Addr, unsigned OpNo) {
2183
1.18k
  // Base case of the recursion.
2184
1.18k
  if (
OpNo == Addr->getNumOperands()1.18k
) {
2185
406
    assert(Val->getType() == Init->getType() && "Type mismatch!");
2186
406
    return Val;
2187
406
  }
2188
777
2189
777
  SmallVector<Constant*, 32> Elts;
2190
777
  if (StructType *
STy777
= dyn_cast<StructType>(Init->getType())) {
2191
573
    // Break up the constant into its elements.
2192
2.85k
    for (unsigned i = 0, e = STy->getNumElements(); 
i != e2.85k
;
++i2.27k
)
2193
2.27k
      Elts.push_back(Init->getAggregateElement(i));
2194
573
2195
573
    // Replace the element that we are supposed to.
2196
573
    ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
2197
573
    unsigned Idx = CU->getZExtValue();
2198
573
    assert(Idx < STy->getNumElements() && "Struct index out of range!");
2199
573
    Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
2200
573
2201
573
    // Return the modified struct.
2202
573
    return ConstantStruct::get(STy, Elts);
2203
573
  }
2204
204
2205
204
  ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
2206
204
  SequentialType *InitTy = cast<SequentialType>(Init->getType());
2207
204
  uint64_t NumElts = InitTy->getNumElements();
2208
204
2209
204
  // Break up the array into elements.
2210
2.00k
  for (uint64_t i = 0, e = NumElts; 
i != e2.00k
;
++i1.80k
)
2211
1.80k
    Elts.push_back(Init->getAggregateElement(i));
2212
204
2213
204
  assert(CI->getZExtValue() < NumElts);
2214
204
  Elts[CI->getZExtValue()] =
2215
204
    EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
2216
204
2217
204
  if (Init->getType()->isArrayTy())
2218
203
    return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
2219
1
  return ConstantVector::get(Elts);
2220
1
}
2221
2222
/// We have decided that Addr (which satisfies the predicate
2223
/// isSimpleEnoughPointerToCommit) should get Val as its value.  Make it happen.
2224
451
static void CommitValueTo(Constant *Val, Constant *Addr) {
2225
451
  if (GlobalVariable *
GV451
= dyn_cast<GlobalVariable>(Addr)) {
2226
45
    assert(GV->hasInitializer());
2227
45
    GV->setInitializer(Val);
2228
45
    return;
2229
45
  }
2230
406
2231
406
  ConstantExpr *CE = cast<ConstantExpr>(Addr);
2232
406
  GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
2233
406
  GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2));
2234
406
}
2235
2236
/// Evaluate static constructors in the function, if we can.  Return true if we
2237
/// can, false otherwise.
2238
static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
2239
307
                                      TargetLibraryInfo *TLI) {
2240
307
  // Call the function.
2241
307
  Evaluator Eval(DL, TLI);
2242
307
  Constant *RetValDummy;
2243
307
  bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
2244
307
                                           SmallVector<Constant*, 0>());
2245
307
2246
307
  if (
EvalSuccess307
) {
2247
50
    ++NumCtorsEvaluated;
2248
50
2249
50
    // We succeeded at evaluation: commit the result.
2250
50
    DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
2251
50
          << F->getName() << "' to " << Eval.getMutatedMemory().size()
2252
50
          << " stores.\n");
2253
50
    for (const auto &I : Eval.getMutatedMemory())
2254
451
      CommitValueTo(I.second, I.first);
2255
50
    for (GlobalVariable *GV : Eval.getInvariants())
2256
5
      GV->setConstant(true);
2257
50
  }
2258
307
2259
307
  return EvalSuccess;
2260
307
}
2261
2262
16.0k
static int compareNames(Constant *const *A, Constant *const *B) {
2263
16.0k
  Value *AStripped = (*A)->stripPointerCastsNoFollowAliases();
2264
16.0k
  Value *BStripped = (*B)->stripPointerCastsNoFollowAliases();
2265
16.0k
  return AStripped->getName().compare(BStripped->getName());
2266
16.0k
}
2267
2268
static void setUsedInitializer(GlobalVariable &V,
2269
769
                               const SmallPtrSet<GlobalValue *, 8> &Init) {
2270
769
  if (
Init.empty()769
) {
2271
1
    V.eraseFromParent();
2272
1
    return;
2273
1
  }
2274
768
2275
768
  // Type of pointer to the array of pointers.
2276
768
  PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0);
2277
768
2278
768
  SmallVector<llvm::Constant *, 8> UsedArray;
2279
3.83k
  for (GlobalValue *GV : Init) {
2280
3.83k
    Constant *Cast
2281
3.83k
      = ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Int8PtrTy);
2282
3.83k
    UsedArray.push_back(Cast);
2283
3.83k
  }
2284
769
  // Sort to get deterministic order.
2285
769
  array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames);
2286
769
  ArrayType *ATy = ArrayType::get(Int8PtrTy, UsedArray.size());
2287
769
2288
769
  Module *M = V.getParent();
2289
769
  V.removeFromParent();
2290
769
  GlobalVariable *NV =
2291
769
      new GlobalVariable(*M, ATy, false, llvm::GlobalValue::AppendingLinkage,
2292
769
                         llvm::ConstantArray::get(ATy, UsedArray), "");
2293
769
  NV->takeName(&V);
2294
769
  NV->setSection("llvm.metadata");
2295
769
  delete &V;
2296
769
}
2297
2298
namespace {
2299
/// An easy to access representation of llvm.used and llvm.compiler.used.
2300
class LLVMUsed {
2301
  SmallPtrSet<GlobalValue *, 8> Used;
2302
  SmallPtrSet<GlobalValue *, 8> CompilerUsed;
2303
  GlobalVariable *UsedV;
2304
  GlobalVariable *CompilerUsedV;
2305
2306
public:
2307
38.0k
  LLVMUsed(Module &M) {
2308
38.0k
    UsedV = collectUsedGlobalVariables(M, Used, false);
2309
38.0k
    CompilerUsedV = collectUsedGlobalVariables(M, CompilerUsed, true);
2310
38.0k
  }
2311
  typedef SmallPtrSet<GlobalValue *, 8>::iterator iterator;
2312
  typedef iterator_range<iterator> used_iterator_range;
2313
38.0k
  iterator usedBegin() { return Used.begin(); }
2314
38.0k
  iterator usedEnd() { return Used.end(); }
2315
38.0k
  used_iterator_range used() {
2316
38.0k
    return used_iterator_range(usedBegin(), usedEnd());
2317
38.0k
  }
2318
0
  iterator compilerUsedBegin() { return CompilerUsed.begin(); }
2319
0
  iterator compilerUsedEnd() { return CompilerUsed.end(); }
2320
0
  used_iterator_range compilerUsed() {
2321
0
    return used_iterator_range(compilerUsedBegin(), compilerUsedEnd());
2322
0
  }
2323
925
  bool usedCount(GlobalValue *GV) const { return Used.count(GV); }
2324
47
  bool compilerUsedCount(GlobalValue *GV) const {
2325
47
    return CompilerUsed.count(GV);
2326
47
  }
2327
294
  bool usedErase(GlobalValue *GV) { return Used.erase(GV); }
2328
1.21k
  bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); }
2329
289
  bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; }
2330
2
  bool compilerUsedInsert(GlobalValue *GV) {
2331
2
    return CompilerUsed.insert(GV).second;
2332
2
  }
2333
2334
38.0k
  void syncVariablesAndSets() {
2335
38.0k
    if (UsedV)
2336
616
      setUsedInitializer(*UsedV, Used);
2337
38.0k
    if (CompilerUsedV)
2338
153
      setUsedInitializer(*CompilerUsedV, CompilerUsed);
2339
38.0k
  }
2340
};
2341
}
2342
2343
355
static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) {
2344
355
  if (GA.use_empty()) // No use at all.
2345
46
    return false;
2346
309
2347
355
  assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) &&
2348
309
         "We should have removed the duplicated "
2349
309
         "element from llvm.compiler.used");
2350
309
  if (!GA.hasOneUse())
2351
309
    // Strictly more than one use. So at least one is not in llvm.used and
2352
309
    // llvm.compiler.used.
2353
5
    return true;
2354
304
2355
304
  // Exactly one use. Check if it is in llvm.used or llvm.compiler.used.
2356
304
  
return !U.usedCount(&GA) && 304
!U.compilerUsedCount(&GA)12
;
2357
355
}
2358
2359
static bool hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue &V,
2360
313
                                               const LLVMUsed &U) {
2361
313
  unsigned N = 2;
2362
313
  assert((!U.usedCount(&V) || !U.compilerUsedCount(&V)) &&
2363
313
         "We should have removed the duplicated "
2364
313
         "element from llvm.compiler.used");
2365
313
  if (
U.usedCount(&V) || 313
U.compilerUsedCount(&V)25
)
2366
290
    ++N;
2367
313
  return V.hasNUsesOrMore(N);
2368
313
}
2369
2370
367
static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) {
2371
367
  if (!GA.hasLocalLinkage())
2372
59
    return true;
2373
308
2374
308
  
return U.usedCount(&GA) || 308
U.compilerUsedCount(&GA)10
;
2375
367
}
2376
2377
static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U,
2378
355
                             bool &RenameTarget) {
2379
355
  RenameTarget = false;
2380
355
  bool Ret = false;
2381
355
  if (hasUseOtherThanLLVMUsed(GA, U))
2382
13
    Ret = true;
2383
355
2384
355
  // If the alias is externally visible, we may still be able to simplify it.
2385
355
  if (!mayHaveOtherReferences(GA, U))
2386
3
    return Ret;
2387
352
2388
352
  // If the aliasee has internal linkage, give it the name and linkage
2389
352
  // of the alias, and delete the alias.  This turns:
2390
352
  //   define internal ... @f(...)
2391
352
  //   @a = alias ... @f
2392
352
  // into:
2393
352
  //   define ... @a(...)
2394
352
  Constant *Aliasee = GA.getAliasee();
2395
352
  GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
2396
352
  if (!Target->hasLocalLinkage())
2397
39
    return Ret;
2398
313
2399
313
  // Do not perform the transform if multiple aliases potentially target the
2400
313
  // aliasee. This check also ensures that it is safe to replace the section
2401
313
  // and other attributes of the aliasee with those of the alias.
2402
313
  
if (313
hasMoreThanOneUseOtherThanLLVMUsed(*Target, U)313
)
2403
19
    return Ret;
2404
294
2405
294
  RenameTarget = true;
2406
294
  return true;
2407
294
}
2408
2409
static bool
2410
OptimizeGlobalAliases(Module &M,
2411
38.0k
                      SmallSet<const Comdat *, 8> &NotDiscardableComdats) {
2412
38.0k
  bool Changed = false;
2413
38.0k
  LLVMUsed Used(M);
2414
38.0k
2415
38.0k
  for (GlobalValue *GV : Used.used())
2416
924
    Used.compilerUsedErase(GV);
2417
38.0k
2418
38.0k
  for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
2419
38.4k
       
I != E38.4k
;) {
2420
400
    GlobalAlias *J = &*I++;
2421
400
2422
400
    // Aliases without names cannot be referenced outside this module.
2423
400
    if (
!J->hasName() && 400
!J->isDeclaration()0
&&
!J->hasLocalLinkage()0
)
2424
0
      J->setLinkage(GlobalValue::InternalLinkage);
2425
400
2426
400
    if (
deleteIfDead(*J, NotDiscardableComdats)400
) {
2427
5
      Changed = true;
2428
5
      continue;
2429
5
    }
2430
395
2431
395
    // If the aliasee may change at link time, nothing can be done - bail out.
2432
395
    
if (395
J->isInterposable()395
)
2433
6
      continue;
2434
389
2435
389
    Constant *Aliasee = J->getAliasee();
2436
389
    GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts());
2437
389
    // We can't trivially replace the alias with the aliasee if the aliasee is
2438
389
    // non-trivial in some way.
2439
389
    // TODO: Try to handle non-zero GEPs of local aliasees.
2440
389
    if (!Target)
2441
34
      continue;
2442
355
    Target->removeDeadConstantUsers();
2443
355
2444
355
    // Make all users of the alias use the aliasee instead.
2445
355
    bool RenameTarget;
2446
355
    if (!hasUsesToReplace(*J, Used, RenameTarget))
2447
49
      continue;
2448
306
2449
306
    J->replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J->getType()));
2450
306
    ++NumAliasesResolved;
2451
306
    Changed = true;
2452
306
2453
306
    if (
RenameTarget306
) {
2454
294
      // Give the aliasee the name, linkage and other attributes of the alias.
2455
294
      Target->takeName(&*J);
2456
294
      Target->setLinkage(J->getLinkage());
2457
294
      Target->setVisibility(J->getVisibility());
2458
294
      Target->setDLLStorageClass(J->getDLLStorageClass());
2459
294
2460
294
      if (Used.usedErase(&*J))
2461
289
        Used.usedInsert(Target);
2462
294
2463
294
      if (Used.compilerUsedErase(&*J))
2464
2
        Used.compilerUsedInsert(Target);
2465
306
    } else 
if (12
mayHaveOtherReferences(*J, Used)12
)
2466
9
      continue;
2467
297
2468
297
    // Delete the alias.
2469
297
    M.getAliasList().erase(J);
2470
297
    ++NumAliasesRemoved;
2471
297
    Changed = true;
2472
297
  }
2473
38.0k
2474
38.0k
  Used.syncVariablesAndSets();
2475
38.0k
2476
38.0k
  return Changed;
2477
38.0k
}
2478
2479
38.0k
static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
2480
38.0k
  LibFunc F = LibFunc_cxa_atexit;
2481
38.0k
  if (!TLI->has(F))
2482
5.63k
    return nullptr;
2483
32.4k
2484
32.4k
  Function *Fn = M.getFunction(TLI->getName(F));
2485
32.4k
  if (!Fn)
2486
32.3k
    return nullptr;
2487
65
2488
65
  // Make sure that the function has the correct prototype.
2489
65
  
if (65
!TLI->getLibFunc(*Fn, F) || 65
F != LibFunc_cxa_atexit65
)
2490
0
    return nullptr;
2491
65
2492
65
  return Fn;
2493
65
}
2494
2495
/// Returns whether the given function is an empty C++ destructor and can
2496
/// therefore be eliminated.
2497
/// Note that we assume that other optimization passes have already simplified
2498
/// the code so we only look for a function with a single basic block, where
2499
/// the only allowed instructions are 'ret', 'call' to an empty C++ dtor and
2500
/// other side-effect free instructions.
2501
static bool cxxDtorIsEmpty(const Function &Fn,
2502
1.64k
                           SmallPtrSet<const Function *, 8> &CalledFunctions) {
2503
1.64k
  // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
2504
1.64k
  // nounwind, but that doesn't seem worth doing.
2505
1.64k
  if (Fn.isDeclaration())
2506
24
    return false;
2507
1.61k
2508
1.61k
  
if (1.61k
++Fn.begin() != Fn.end()1.61k
)
2509
420
    return false;
2510
1.19k
2511
1.19k
  const BasicBlock &EntryBlock = Fn.getEntryBlock();
2512
1.19k
  for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end();
2513
3.04k
       
I != E3.04k
;
++I1.84k
) {
2514
3.04k
    if (const CallInst *
CI3.04k
= dyn_cast<CallInst>(I)) {
2515
1.18k
      // Ignore debug intrinsics.
2516
1.18k
      if (isa<DbgInfoIntrinsic>(CI))
2517
0
        continue;
2518
1.18k
2519
1.18k
      const Function *CalledFn = CI->getCalledFunction();
2520
1.18k
2521
1.18k
      if (!CalledFn)
2522
0
        return false;
2523
1.18k
2524
1.18k
      SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions);
2525
1.18k
2526
1.18k
      // Don't treat recursive functions as empty.
2527
1.18k
      if (!NewCalledFunctions.insert(CalledFn).second)
2528
0
        return false;
2529
1.18k
2530
1.18k
      
if (1.18k
!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions)1.18k
)
2531
1.17k
        return false;
2532
1.86k
    } else 
if (1.86k
isa<ReturnInst>(*I)1.86k
)
2533
15
      return true; // We're done.
2534
1.84k
    else 
if (1.84k
I->mayHaveSideEffects()1.84k
)
2535
10
      return false; // Destructor with side effects, bail.
2536
3.04k
  }
2537
1.19k
2538
0
  return false;
2539
1.64k
}
2540
2541
65
static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
2542
65
  /// Itanium C++ ABI p3.3.5:
2543
65
  ///
2544
65
  ///   After constructing a global (or local static) object, that will require
2545
65
  ///   destruction on exit, a termination function is registered as follows:
2546
65
  ///
2547
65
  ///   extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d );
2548
65
  ///
2549
65
  ///   This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
2550
65
  ///   call f(p) when DSO d is unloaded, before all such termination calls
2551
65
  ///   registered before this one. It returns zero if registration is
2552
65
  ///   successful, nonzero on failure.
2553
65
2554
65
  // This pass will look for calls to __cxa_atexit where the function is trivial
2555
65
  // and remove them.
2556
65
  bool Changed = false;
2557
65
2558
65
  for (auto I = CXAAtExitFn->user_begin(), E = CXAAtExitFn->user_end();
2559
525
       
I != E525
;) {
2560
460
    // We're only interested in calls. Theoretically, we could handle invoke
2561
460
    // instructions as well, but neither llvm-gcc nor clang generate invokes
2562
460
    // to __cxa_atexit.
2563
460
    CallInst *CI = dyn_cast<CallInst>(*I++);
2564
460
    if (!CI)
2565
0
      continue;
2566
460
2567
460
    Function *DtorFn =
2568
460
      dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
2569
460
    if (!DtorFn)
2570
0
      continue;
2571
460
2572
460
    SmallPtrSet<const Function *, 8> CalledFunctions;
2573
460
    if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions))
2574
454
      continue;
2575
6
2576
6
    // Just remove the call.
2577
6
    CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
2578
6
    CI->eraseFromParent();
2579
6
2580
6
    ++NumCXXDtorsRemoved;
2581
6
2582
6
    Changed |= true;
2583
6
  }
2584
65
2585
65
  return Changed;
2586
65
}
2587
2588
static bool optimizeGlobalsInModule(
2589
    Module &M, const DataLayout &DL, TargetLibraryInfo *TLI,
2590
18.0k
    function_ref<DominatorTree &(Function &)> LookupDomTree) {
2591
18.0k
  SmallSet<const Comdat *, 8> NotDiscardableComdats;
2592
18.0k
  bool Changed = false;
2593
18.0k
  bool LocalChange = true;
2594
56.1k
  while (
LocalChange56.1k
) {
2595
38.0k
    LocalChange = false;
2596
38.0k
2597
38.0k
    NotDiscardableComdats.clear();
2598
38.0k
    for (const GlobalVariable &GV : M.globals())
2599
1.99M
      
if (const Comdat *1.99M
C1.99M
= GV.getComdat())
2600
297
        
if (297
!GV.isDiscardableIfUnused() || 297
!GV.use_empty()283
)
2601
295
          NotDiscardableComdats.insert(C);
2602
38.0k
    for (Function &F : M)
2603
3.64M
      
if (const Comdat *3.64M
C3.64M
= F.getComdat())
2604
3.46k
        
if (3.46k
!F.isDefTriviallyDead()3.46k
)
2605
663
          NotDiscardableComdats.insert(C);
2606
38.0k
    for (GlobalAlias &GA : M.aliases())
2607
400
      
if (const Comdat *400
C400
= GA.getComdat())
2608
61
        
if (61
!GA.isDiscardableIfUnused() || 61
!GA.use_empty()1
)
2609
60
          NotDiscardableComdats.insert(C);
2610
38.0k
2611
38.0k
    // Delete functions that are trivially dead, ccc -> fastcc
2612
38.0k
    LocalChange |=
2613
38.0k
        OptimizeFunctions(M, TLI, LookupDomTree, NotDiscardableComdats);
2614
38.0k
2615
38.0k
    // Optimize global_ctors list.
2616
307
    LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) {
2617
307
      return EvaluateStaticConstructor(F, DL, TLI);
2618
307
    });
2619
38.0k
2620
38.0k
    // Optimize non-address-taken globals.
2621
38.0k
    LocalChange |= OptimizeGlobalVars(M, TLI, LookupDomTree,
2622
38.0k
                                      NotDiscardableComdats);
2623
38.0k
2624
38.0k
    // Resolve aliases, when possible.
2625
38.0k
    LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats);
2626
38.0k
2627
38.0k
    // Try to remove trivial global destructors if they are not removed
2628
38.0k
    // already.
2629
38.0k
    Function *CXAAtExitFn = FindCXAAtExit(M, TLI);
2630
38.0k
    if (CXAAtExitFn)
2631
65
      LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);
2632
38.0k
2633
38.0k
    Changed |= LocalChange;
2634
38.0k
  }
2635
18.0k
2636
18.0k
  // TODO: Move all global ctors functions to the end of the module for code
2637
18.0k
  // layout.
2638
18.0k
2639
18.0k
  return Changed;
2640
18.0k
}
2641
2642
121
PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) {
2643
121
    auto &DL = M.getDataLayout();
2644
121
    auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
2645
121
    auto &FAM =
2646
121
        AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2647
0
    auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{
2648
0
      return FAM.getResult<DominatorTreeAnalysis>(F);
2649
0
    };
2650
121
    if (!optimizeGlobalsInModule(M, DL, &TLI, LookupDomTree))
2651
83
      return PreservedAnalyses::all();
2652
38
    return PreservedAnalyses::none();
2653
38
}
2654
2655
namespace {
2656
struct GlobalOptLegacyPass : public ModulePass {
2657
  static char ID; // Pass identification, replacement for typeid
2658
17.9k
  GlobalOptLegacyPass() : ModulePass(ID) {
2659
17.9k
    initializeGlobalOptLegacyPassPass(*PassRegistry::getPassRegistry());
2660
17.9k
  }
2661
2662
17.9k
  bool runOnModule(Module &M) override {
2663
17.9k
    if (skipModule(M))
2664
2
      return false;
2665
17.9k
2666
17.9k
    auto &DL = M.getDataLayout();
2667
17.9k
    auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
2668
7.08k
    auto LookupDomTree = [this](Function &F) -> DominatorTree & {
2669
7.08k
      return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
2670
7.08k
    };
2671
17.9k
    return optimizeGlobalsInModule(M, DL, TLI, LookupDomTree);
2672
17.9k
  }
2673
2674
17.9k
  void getAnalysisUsage(AnalysisUsage &AU) const override {
2675
17.9k
    AU.addRequired<TargetLibraryInfoWrapperPass>();
2676
17.9k
    AU.addRequired<DominatorTreeWrapperPass>();
2677
17.9k
  }
2678
};
2679
}
2680
2681
char GlobalOptLegacyPass::ID = 0;
2682
25.1k
INITIALIZE_PASS_BEGIN25.1k
(GlobalOptLegacyPass, "globalopt",
2683
25.1k
                      "Global Variable Optimizer", false, false)
2684
25.1k
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
2685
25.1k
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
2686
25.1k
INITIALIZE_PASS_END(GlobalOptLegacyPass, "globalopt",
2687
                    "Global Variable Optimizer", false, false)
2688
2689
17.8k
ModulePass *llvm::createGlobalOptimizerPass() {
2690
17.8k
  return new GlobalOptLegacyPass();
2691
17.8k
}