/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/IPO/GlobalOpt.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- GlobalOpt.cpp - Optimize Global Variables --------------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This pass transforms simple global variables that never have their address |
11 | | // taken. If obviously true, it marks read/write globals as constant, deletes |
12 | | // variables only stored to, etc. |
13 | | // |
14 | | //===----------------------------------------------------------------------===// |
15 | | |
16 | | #include "llvm/Transforms/IPO/GlobalOpt.h" |
17 | | #include "llvm/ADT/DenseMap.h" |
18 | | #include "llvm/ADT/STLExtras.h" |
19 | | #include "llvm/ADT/SmallPtrSet.h" |
20 | | #include "llvm/ADT/SmallSet.h" |
21 | | #include "llvm/ADT/SmallVector.h" |
22 | | #include "llvm/ADT/Statistic.h" |
23 | | #include "llvm/Analysis/ConstantFolding.h" |
24 | | #include "llvm/Analysis/MemoryBuiltins.h" |
25 | | #include "llvm/Analysis/TargetLibraryInfo.h" |
26 | | #include "llvm/IR/CallSite.h" |
27 | | #include "llvm/IR/CallingConv.h" |
28 | | #include "llvm/IR/Constants.h" |
29 | | #include "llvm/IR/DataLayout.h" |
30 | | #include "llvm/IR/DebugInfoMetadata.h" |
31 | | #include "llvm/IR/DerivedTypes.h" |
32 | | #include "llvm/IR/Dominators.h" |
33 | | #include "llvm/IR/GetElementPtrTypeIterator.h" |
34 | | #include "llvm/IR/Instructions.h" |
35 | | #include "llvm/IR/IntrinsicInst.h" |
36 | | #include "llvm/IR/Module.h" |
37 | | #include "llvm/IR/Operator.h" |
38 | | #include "llvm/IR/ValueHandle.h" |
39 | | #include "llvm/IR/DebugInfoMetadata.h" |
40 | | #include "llvm/Pass.h" |
41 | | #include "llvm/Support/Debug.h" |
42 | | #include "llvm/Support/ErrorHandling.h" |
43 | | #include "llvm/Support/MathExtras.h" |
44 | | #include "llvm/Support/raw_ostream.h" |
45 | | #include "llvm/Transforms/IPO.h" |
46 | | #include "llvm/Transforms/Utils/CtorUtils.h" |
47 | | #include "llvm/Transforms/Utils/Evaluator.h" |
48 | | #include "llvm/Transforms/Utils/GlobalStatus.h" |
49 | | #include "llvm/Transforms/Utils/Local.h" |
50 | | #include <algorithm> |
51 | | using namespace llvm; |
52 | | |
53 | | #define DEBUG_TYPE "globalopt" |
54 | | |
55 | | STATISTIC(NumMarked , "Number of globals marked constant"); |
56 | | STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr"); |
57 | | STATISTIC(NumSRA , "Number of aggregate globals broken into scalars"); |
58 | | STATISTIC(NumHeapSRA , "Number of heap objects SRA'd"); |
59 | | STATISTIC(NumSubstitute,"Number of globals with initializers stored into them"); |
60 | | STATISTIC(NumDeleted , "Number of globals deleted"); |
61 | | STATISTIC(NumGlobUses , "Number of global uses devirtualized"); |
62 | | STATISTIC(NumLocalized , "Number of globals localized"); |
63 | | STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans"); |
64 | | STATISTIC(NumFastCallFns , "Number of functions converted to fastcc"); |
65 | | STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated"); |
66 | | STATISTIC(NumNestRemoved , "Number of nest attributes removed"); |
67 | | STATISTIC(NumAliasesResolved, "Number of global aliases resolved"); |
68 | | STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); |
69 | | STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed"); |
70 | | |
71 | | /// Is this global variable possibly used by a leak checker as a root? If so, |
72 | | /// we might not really want to eliminate the stores to it. |
73 | 110 | static bool isLeakCheckerRoot(GlobalVariable *GV) { |
74 | 110 | // A global variable is a root if it is a pointer, or could plausibly contain |
75 | 110 | // a pointer. There are two challenges; one is that we could have a struct |
76 | 110 | // the has an inner member which is a pointer. We recurse through the type to |
77 | 110 | // detect these (up to a point). The other is that we may actually be a union |
78 | 110 | // of a pointer and another type, and so our LLVM type is an integer which |
79 | 110 | // gets converted into a pointer, or our type is an [i8 x #] with a pointer |
80 | 110 | // potentially contained here. |
81 | 110 | |
82 | 110 | if (GV->hasPrivateLinkage()) |
83 | 0 | return false; |
84 | 110 | |
85 | 110 | SmallVector<Type *, 4> Types; |
86 | 110 | Types.push_back(GV->getValueType()); |
87 | 110 | |
88 | 110 | unsigned Limit = 20; |
89 | 135 | do { |
90 | 135 | Type *Ty = Types.pop_back_val(); |
91 | 135 | switch (Ty->getTypeID()) { |
92 | 72 | default: break; |
93 | 30 | case Type::PointerTyID: return true; |
94 | 23 | case Type::ArrayTyID: |
95 | 23 | case Type::VectorTyID: { |
96 | 23 | SequentialType *STy = cast<SequentialType>(Ty); |
97 | 23 | Types.push_back(STy->getElementType()); |
98 | 23 | break; |
99 | 23 | } |
100 | 10 | case Type::StructTyID: { |
101 | 10 | StructType *STy = cast<StructType>(Ty); |
102 | 10 | if (STy->isOpaque()10 ) return true0 ; |
103 | 10 | for (StructType::element_iterator I = STy->element_begin(), |
104 | 14 | E = STy->element_end(); I != E14 ; ++I4 ) { |
105 | 12 | Type *InnerTy = *I; |
106 | 12 | if (isa<PointerType>(InnerTy)12 ) return true8 ; |
107 | 4 | if (4 isa<CompositeType>(InnerTy)4 ) |
108 | 2 | Types.push_back(InnerTy); |
109 | 12 | } |
110 | 2 | break; |
111 | 97 | } |
112 | 97 | } |
113 | 97 | if (97 --Limit == 097 ) return true0 ; |
114 | 97 | } while (!Types.empty()); |
115 | 72 | return false; |
116 | 110 | } |
117 | | |
118 | | /// Given a value that is stored to a global but never read, determine whether |
119 | | /// it's safe to remove the store and the chain of computation that feeds the |
120 | | /// store. |
121 | 7 | static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) { |
122 | 10 | do { |
123 | 10 | if (isa<Constant>(V)) |
124 | 1 | return true; |
125 | 9 | if (9 !V->hasOneUse()9 ) |
126 | 0 | return false; |
127 | 9 | if (9 isa<LoadInst>(V) || 9 isa<InvokeInst>(V)9 || isa<Argument>(V)6 || |
128 | 6 | isa<GlobalValue>(V)) |
129 | 3 | return false; |
130 | 6 | if (6 isAllocationFn(V, TLI)6 ) |
131 | 3 | return true; |
132 | 3 | |
133 | 3 | Instruction *I = cast<Instruction>(V); |
134 | 3 | if (I->mayHaveSideEffects()) |
135 | 0 | return false; |
136 | 3 | if (GetElementPtrInst *3 GEP3 = dyn_cast<GetElementPtrInst>(I)) { |
137 | 0 | if (!GEP->hasAllConstantIndices()) |
138 | 0 | return false; |
139 | 3 | } else if (3 I->getNumOperands() != 13 ) { |
140 | 0 | return false; |
141 | 0 | } |
142 | 3 | |
143 | 3 | V = I->getOperand(0); |
144 | 7 | } while (1); |
145 | 7 | } |
146 | | |
147 | | /// This GV is a pointer root. Loop over all users of the global and clean up |
148 | | /// any that obviously don't assign the global a value that isn't dynamically |
149 | | /// allocated. |
150 | | static bool CleanupPointerRootUsers(GlobalVariable *GV, |
151 | 38 | const TargetLibraryInfo *TLI) { |
152 | 38 | // A brief explanation of leak checkers. The goal is to find bugs where |
153 | 38 | // pointers are forgotten, causing an accumulating growth in memory |
154 | 38 | // usage over time. The common strategy for leak checkers is to whitelist the |
155 | 38 | // memory pointed to by globals at exit. This is popular because it also |
156 | 38 | // solves another problem where the main thread of a C++ program may shut down |
157 | 38 | // before other threads that are still expecting to use those globals. To |
158 | 38 | // handle that case, we expect the program may create a singleton and never |
159 | 38 | // destroy it. |
160 | 38 | |
161 | 38 | bool Changed = false; |
162 | 38 | |
163 | 38 | // If Dead[n].first is the only use of a malloc result, we can delete its |
164 | 38 | // chain of computation and the store to the global in Dead[n].second. |
165 | 38 | SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead; |
166 | 38 | |
167 | 38 | // Constants can't be pointers to dynamically allocated memory. |
168 | 38 | for (Value::user_iterator UI = GV->user_begin(), E = GV->user_end(); |
169 | 87 | UI != E87 ;) { |
170 | 49 | User *U = *UI++; |
171 | 49 | if (StoreInst *SI49 = dyn_cast<StoreInst>(U)) { |
172 | 35 | Value *V = SI->getValueOperand(); |
173 | 35 | if (isa<Constant>(V)35 ) { |
174 | 9 | Changed = true; |
175 | 9 | SI->eraseFromParent(); |
176 | 35 | } else if (Instruction *26 I26 = dyn_cast<Instruction>(V)) { |
177 | 23 | if (I->hasOneUse()) |
178 | 7 | Dead.push_back(std::make_pair(I, SI)); |
179 | 26 | } |
180 | 49 | } else if (MemSetInst *14 MSI14 = dyn_cast<MemSetInst>(U)) { |
181 | 0 | if (isa<Constant>(MSI->getValue())0 ) { |
182 | 0 | Changed = true; |
183 | 0 | MSI->eraseFromParent(); |
184 | 0 | } else if (Instruction *0 I0 = dyn_cast<Instruction>(MSI->getValue())) { |
185 | 0 | if (I->hasOneUse()) |
186 | 0 | Dead.push_back(std::make_pair(I, MSI)); |
187 | 0 | } |
188 | 14 | } else if (MemTransferInst *14 MTI14 = dyn_cast<MemTransferInst>(U)) { |
189 | 0 | GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource()); |
190 | 0 | if (MemSrc && 0 MemSrc->isConstant()0 ) { |
191 | 0 | Changed = true; |
192 | 0 | MTI->eraseFromParent(); |
193 | 0 | } else if (Instruction *0 I0 = dyn_cast<Instruction>(MemSrc)) { |
194 | 0 | if (I->hasOneUse()) |
195 | 0 | Dead.push_back(std::make_pair(I, MTI)); |
196 | 0 | } |
197 | 14 | } else if (ConstantExpr *14 CE14 = dyn_cast<ConstantExpr>(U)) { |
198 | 8 | if (CE->use_empty()8 ) { |
199 | 0 | CE->destroyConstant(); |
200 | 0 | Changed = true; |
201 | 0 | } |
202 | 14 | } else if (Constant *6 C6 = dyn_cast<Constant>(U)) { |
203 | 0 | if (isSafeToDestroyConstant(C)0 ) { |
204 | 0 | C->destroyConstant(); |
205 | 0 | // This could have invalidated UI, start over from scratch. |
206 | 0 | Dead.clear(); |
207 | 0 | CleanupPointerRootUsers(GV, TLI); |
208 | 0 | return true; |
209 | 0 | } |
210 | 14 | } |
211 | 49 | } |
212 | 38 | |
213 | 45 | for (int i = 0, e = Dead.size(); 38 i != e45 ; ++i7 ) { |
214 | 7 | if (IsSafeComputationToRemove(Dead[i].first, TLI)7 ) { |
215 | 4 | Dead[i].second->eraseFromParent(); |
216 | 4 | Instruction *I = Dead[i].first; |
217 | 6 | do { |
218 | 6 | if (isAllocationFn(I, TLI)) |
219 | 3 | break; |
220 | 3 | Instruction *J = dyn_cast<Instruction>(I->getOperand(0)); |
221 | 3 | if (!J) |
222 | 1 | break; |
223 | 2 | I->eraseFromParent(); |
224 | 2 | I = J; |
225 | 4 | } while (1); |
226 | 4 | I->eraseFromParent(); |
227 | 4 | } |
228 | 7 | } |
229 | 38 | |
230 | 38 | return Changed; |
231 | 38 | } |
232 | | |
233 | | /// We just marked GV constant. Loop over all users of the global, cleaning up |
234 | | /// the obvious ones. This is largely just a quick scan over the use list to |
235 | | /// clean up the easy and obvious cruft. This returns true if it made a change. |
236 | | static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, |
237 | | const DataLayout &DL, |
238 | 2.63k | TargetLibraryInfo *TLI) { |
239 | 2.63k | bool Changed = false; |
240 | 2.63k | // Note that we need to use a weak value handle for the worklist items. When |
241 | 2.63k | // we delete a constant array, we may also be holding pointer to one of its |
242 | 2.63k | // elements (or an element of one of its elements if we're dealing with an |
243 | 2.63k | // array of arrays) in the worklist. |
244 | 2.63k | SmallVector<WeakTrackingVH, 8> WorkList(V->user_begin(), V->user_end()); |
245 | 6.80k | while (!WorkList.empty()6.80k ) { |
246 | 4.17k | Value *UV = WorkList.pop_back_val(); |
247 | 4.17k | if (!UV) |
248 | 5 | continue; |
249 | 4.17k | |
250 | 4.17k | User *U = cast<User>(UV); |
251 | 4.17k | |
252 | 4.17k | if (LoadInst *LI4.17k = dyn_cast<LoadInst>(U)) { |
253 | 1.88k | if (Init1.88k ) { |
254 | 89 | // Replace the load with the initializer. |
255 | 89 | LI->replaceAllUsesWith(Init); |
256 | 89 | LI->eraseFromParent(); |
257 | 89 | Changed = true; |
258 | 89 | } |
259 | 4.17k | } else if (StoreInst *2.28k SI2.28k = dyn_cast<StoreInst>(U)) { |
260 | 108 | // Store must be unreachable or storing Init into the global. |
261 | 108 | SI->eraseFromParent(); |
262 | 108 | Changed = true; |
263 | 2.28k | } else if (ConstantExpr *2.17k CE2.17k = dyn_cast<ConstantExpr>(U)) { |
264 | 105 | if (CE->getOpcode() == Instruction::GetElementPtr105 ) { |
265 | 93 | Constant *SubInit = nullptr; |
266 | 93 | if (Init) |
267 | 92 | SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); |
268 | 93 | Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, TLI); |
269 | 105 | } else if (12 (CE->getOpcode() == Instruction::BitCast && |
270 | 11 | CE->getType()->isPointerTy()) || |
271 | 12 | CE->getOpcode() == Instruction::AddrSpaceCast1 ) { |
272 | 12 | // Pointer cast, delete any stores and memsets to the global. |
273 | 12 | Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, TLI); |
274 | 12 | } |
275 | 105 | |
276 | 105 | if (CE->use_empty()105 ) { |
277 | 52 | CE->destroyConstant(); |
278 | 52 | Changed = true; |
279 | 52 | } |
280 | 2.17k | } else if (GetElementPtrInst *2.07k GEP2.07k = dyn_cast<GetElementPtrInst>(U)) { |
281 | 1.96k | // Do not transform "gepinst (gep constexpr (GV))" here, because forming |
282 | 1.96k | // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold |
283 | 1.96k | // and will invalidate our notion of what Init is. |
284 | 1.96k | Constant *SubInit = nullptr; |
285 | 1.96k | if (!isa<ConstantExpr>(GEP->getOperand(0))1.96k ) { |
286 | 1.93k | ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>( |
287 | 1.93k | ConstantFoldInstruction(GEP, DL, TLI)); |
288 | 1.93k | if (Init && 1.93k CE1.59k && CE->getOpcode() == Instruction::GetElementPtr0 ) |
289 | 0 | SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); |
290 | 1.93k | |
291 | 1.93k | // If the initializer is an all-null value and we have an inbounds GEP, |
292 | 1.93k | // we already know what the result of any load from that GEP is. |
293 | 1.93k | // TODO: Handle splats. |
294 | 1.93k | if (Init && 1.93k isa<ConstantAggregateZero>(Init)1.59k && GEP->isInBounds()43 ) |
295 | 42 | SubInit = Constant::getNullValue(GEP->getResultElementType()); |
296 | 1.93k | } |
297 | 1.96k | Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, TLI); |
298 | 1.96k | |
299 | 1.96k | if (GEP->use_empty()1.96k ) { |
300 | 44 | GEP->eraseFromParent(); |
301 | 44 | Changed = true; |
302 | 44 | } |
303 | 2.07k | } else if (MemIntrinsic *113 MI113 = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv |
304 | 21 | if (MI->getRawDest() == V21 ) { |
305 | 6 | MI->eraseFromParent(); |
306 | 6 | Changed = true; |
307 | 6 | } |
308 | 21 | |
309 | 113 | } else if (Constant *92 C92 = dyn_cast<Constant>(U)) { |
310 | 10 | // If we have a chain of dead constantexprs or other things dangling from |
311 | 10 | // us, and if they are all dead, nuke them without remorse. |
312 | 10 | if (isSafeToDestroyConstant(C)10 ) { |
313 | 10 | C->destroyConstant(); |
314 | 10 | CleanupConstantGlobalUsers(V, Init, DL, TLI); |
315 | 10 | return true; |
316 | 10 | } |
317 | 2.28k | } |
318 | 4.17k | } |
319 | 2.62k | return Changed; |
320 | 2.63k | } |
321 | | |
322 | | /// Return true if the specified instruction is a safe user of a derived |
323 | | /// expression from a global that we want to SROA. |
324 | 18.7k | static bool isSafeSROAElementUse(Value *V) { |
325 | 18.7k | // We might have a dead and dangling constant hanging off of here. |
326 | 18.7k | if (Constant *C = dyn_cast<Constant>(V)) |
327 | 40 | return isSafeToDestroyConstant(C); |
328 | 18.7k | |
329 | 18.7k | Instruction *I = dyn_cast<Instruction>(V); |
330 | 18.7k | if (!I18.7k ) return false0 ; |
331 | 18.7k | |
332 | 18.7k | // Loads are ok. |
333 | 18.7k | if (18.7k isa<LoadInst>(I)18.7k ) return true17.9k ; |
334 | 818 | |
335 | 818 | // Stores *to* the pointer are ok. |
336 | 818 | if (StoreInst *818 SI818 = dyn_cast<StoreInst>(I)) |
337 | 531 | return SI->getOperand(0) != V; |
338 | 287 | |
339 | 287 | // Otherwise, it must be a GEP. |
340 | 287 | GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I); |
341 | 287 | if (!GEPI287 ) return false85 ; |
342 | 202 | |
343 | 202 | if (202 GEPI->getNumOperands() < 3 || 202 !isa<Constant>(GEPI->getOperand(1))190 || |
344 | 190 | !cast<Constant>(GEPI->getOperand(1))->isNullValue()) |
345 | 13 | return false; |
346 | 189 | |
347 | 189 | for (User *U : GEPI->users()) |
348 | 401 | if (401 !isSafeSROAElementUse(U)401 ) |
349 | 4 | return false; |
350 | 185 | return true; |
351 | 185 | } |
352 | | |
353 | | |
354 | | /// U is a direct user of the specified global value. Look at it and its uses |
355 | | /// and decide whether it is safe to SROA this global. |
356 | 26.7k | static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { |
357 | 26.7k | // The user of the global must be a GEP Inst or a ConstantExpr GEP. |
358 | 26.7k | if (!isa<GetElementPtrInst>(U) && |
359 | 25.6k | (!isa<ConstantExpr>(U) || |
360 | 25.6k | cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr)) |
361 | 8.35k | return false; |
362 | 18.4k | |
363 | 18.4k | // Check to see if this ConstantExpr GEP is SRA'able. In particular, we |
364 | 18.4k | // don't like < 3 operand CE's, and we don't like non-constant integer |
365 | 18.4k | // indices. This enforces that all uses are 'gep GV, 0, C, ...' for some |
366 | 18.4k | // value of C. |
367 | 18.4k | if (18.4k U->getNumOperands() < 3 || 18.4k !isa<Constant>(U->getOperand(1))18.4k || |
368 | 18.4k | !cast<Constant>(U->getOperand(1))->isNullValue() || |
369 | 18.4k | !isa<ConstantInt>(U->getOperand(2))) |
370 | 1.14k | return false; |
371 | 17.2k | |
372 | 17.2k | gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U); |
373 | 17.2k | ++GEPI; // Skip over the pointer index. |
374 | 17.2k | |
375 | 17.2k | // If this is a use of an array allocation, do a bit more checking for sanity. |
376 | 17.2k | if (GEPI.isSequential()17.2k ) { |
377 | 514 | ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2)); |
378 | 514 | |
379 | 514 | // Check to make sure that index falls within the array. If not, |
380 | 514 | // something funny is going on, so we won't do the optimization. |
381 | 514 | // |
382 | 514 | if (GEPI.isBoundedSequential() && |
383 | 514 | Idx->getZExtValue() >= GEPI.getSequentialNumElements()) |
384 | 0 | return false; |
385 | 514 | |
386 | 514 | // We cannot scalar repl this level of the array unless any array |
387 | 514 | // sub-indices are in-range constants. In particular, consider: |
388 | 514 | // A[0][i]. We cannot know that the user isn't doing invalid things like |
389 | 514 | // allowing i to index an out-of-range subscript that accesses A[1]. |
390 | 514 | // |
391 | 514 | // Scalar replacing *just* the outer index of the array is probably not |
392 | 514 | // going to be a win anyway, so just give up. |
393 | 514 | for (++GEPI; // Skip array index. |
394 | 628 | GEPI != E; |
395 | 514 | ++GEPI114 ) { |
396 | 118 | if (GEPI.isStruct()) |
397 | 57 | continue; |
398 | 61 | |
399 | 61 | ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand()); |
400 | 61 | if (!IdxVal || |
401 | 57 | (GEPI.isBoundedSequential() && |
402 | 57 | IdxVal->getZExtValue() >= GEPI.getSequentialNumElements())) |
403 | 4 | return false; |
404 | 118 | } |
405 | 514 | } |
406 | 17.2k | |
407 | 17.2k | return llvm::all_of(U->users(), |
408 | 18.3k | [](User *UU) { return isSafeSROAElementUse(UU); }); |
409 | 26.7k | } |
410 | | |
411 | | /// Look at all uses of the global and decide whether it is safe for us to |
412 | | /// perform this transformation. |
413 | 9.66k | static bool GlobalUsersSafeToSRA(GlobalValue *GV) { |
414 | 9.66k | for (User *U : GV->users()) |
415 | 26.7k | if (26.7k !IsUserOfGlobalSafeForSRA(U, GV)26.7k ) |
416 | 9.60k | return false; |
417 | 64 | |
418 | 64 | return true; |
419 | 64 | } |
420 | | |
421 | | /// Copy over the debug info for a variable to its SRA replacements. |
422 | | static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV, |
423 | | uint64_t FragmentOffsetInBits, |
424 | | uint64_t FragmentSizeInBits, |
425 | 266 | unsigned NumElements) { |
426 | 266 | SmallVector<DIGlobalVariableExpression *, 1> GVs; |
427 | 266 | GV->getDebugInfo(GVs); |
428 | 9 | for (auto *GVE : GVs) { |
429 | 9 | DIVariable *Var = GVE->getVariable(); |
430 | 9 | DIExpression *Expr = GVE->getExpression(); |
431 | 9 | if (NumElements > 1) |
432 | 8 | Expr = DIExpression::createFragmentExpression(Expr, FragmentOffsetInBits, |
433 | 8 | FragmentSizeInBits); |
434 | 9 | auto *NGVE = DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr); |
435 | 9 | NGV->addDebugInfo(NGVE); |
436 | 9 | } |
437 | 266 | } |
438 | | |
439 | | |
440 | | /// Perform scalar replacement of aggregates on the specified global variable. |
441 | | /// This opens the door for other optimizations by exposing the behavior of the |
442 | | /// program in a more fine-grained way. We have determined that this |
443 | | /// transformation is safe already. We return the first global variable we |
444 | | /// insert so that the caller can reprocess it. |
445 | 9.66k | static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { |
446 | 9.66k | // Make sure this global only has simple uses that we can SRA. |
447 | 9.66k | if (!GlobalUsersSafeToSRA(GV)) |
448 | 9.60k | return nullptr; |
449 | 64 | |
450 | 9.66k | assert(GV->hasLocalLinkage()); |
451 | 64 | Constant *Init = GV->getInitializer(); |
452 | 64 | Type *Ty = Init->getType(); |
453 | 64 | |
454 | 64 | std::vector<GlobalVariable*> NewGlobals; |
455 | 64 | Module::GlobalListType &Globals = GV->getParent()->getGlobalList(); |
456 | 64 | |
457 | 64 | // Get the alignment of the global, either explicit or target-specific. |
458 | 64 | unsigned StartAlignment = GV->getAlignment(); |
459 | 64 | if (StartAlignment == 0) |
460 | 7 | StartAlignment = DL.getABITypeAlignment(GV->getType()); |
461 | 64 | |
462 | 64 | if (StructType *STy64 = dyn_cast<StructType>(Ty)) { |
463 | 24 | uint64_t FragmentOffset = 0; |
464 | 24 | unsigned NumElements = STy->getNumElements(); |
465 | 24 | NewGlobals.reserve(NumElements); |
466 | 24 | const StructLayout &Layout = *DL.getStructLayout(STy); |
467 | 105 | for (unsigned i = 0, e = NumElements; i != e105 ; ++i81 ) { |
468 | 81 | Constant *In = Init->getAggregateElement(i); |
469 | 81 | assert(In && "Couldn't get element of initializer?"); |
470 | 81 | GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false, |
471 | 81 | GlobalVariable::InternalLinkage, |
472 | 81 | In, GV->getName()+"."+Twine(i), |
473 | 81 | GV->getThreadLocalMode(), |
474 | 81 | GV->getType()->getAddressSpace()); |
475 | 81 | NGV->setExternallyInitialized(GV->isExternallyInitialized()); |
476 | 81 | NGV->copyAttributesFrom(GV); |
477 | 81 | Globals.push_back(NGV); |
478 | 81 | NewGlobals.push_back(NGV); |
479 | 81 | |
480 | 81 | // Calculate the known alignment of the field. If the original aggregate |
481 | 81 | // had 256 byte alignment for example, something might depend on that: |
482 | 81 | // propagate info to each field. |
483 | 81 | uint64_t FieldOffset = Layout.getElementOffset(i); |
484 | 81 | unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset); |
485 | 81 | if (NewAlign > DL.getABITypeAlignment(STy->getElementType(i))) |
486 | 20 | NGV->setAlignment(NewAlign); |
487 | 81 | |
488 | 81 | // Copy over the debug info for the variable. |
489 | 81 | FragmentOffset = alignTo(FragmentOffset, NewAlign); |
490 | 81 | uint64_t Size = DL.getTypeSizeInBits(NGV->getValueType()); |
491 | 81 | transferSRADebugInfo(GV, NGV, FragmentOffset, Size, NumElements); |
492 | 81 | FragmentOffset += Size; |
493 | 81 | } |
494 | 64 | } else if (SequentialType *40 STy40 = dyn_cast<SequentialType>(Ty)) { |
495 | 40 | unsigned NumElements = STy->getNumElements(); |
496 | 40 | if (NumElements > 16 && 40 GV->hasNUsesOrMore(16)0 ) |
497 | 0 | return nullptr; // It's not worth it. |
498 | 40 | NewGlobals.reserve(NumElements); |
499 | 40 | auto ElTy = STy->getElementType(); |
500 | 40 | uint64_t EltSize = DL.getTypeAllocSize(ElTy); |
501 | 40 | unsigned EltAlign = DL.getABITypeAlignment(ElTy); |
502 | 40 | uint64_t FragmentSizeInBits = DL.getTypeSizeInBits(ElTy); |
503 | 225 | for (unsigned i = 0, e = NumElements; i != e225 ; ++i185 ) { |
504 | 185 | Constant *In = Init->getAggregateElement(i); |
505 | 185 | assert(In && "Couldn't get element of initializer?"); |
506 | 185 | |
507 | 185 | GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false, |
508 | 185 | GlobalVariable::InternalLinkage, |
509 | 185 | In, GV->getName()+"."+Twine(i), |
510 | 185 | GV->getThreadLocalMode(), |
511 | 185 | GV->getType()->getAddressSpace()); |
512 | 185 | NGV->setExternallyInitialized(GV->isExternallyInitialized()); |
513 | 185 | NGV->copyAttributesFrom(GV); |
514 | 185 | Globals.push_back(NGV); |
515 | 185 | NewGlobals.push_back(NGV); |
516 | 185 | |
517 | 185 | // Calculate the known alignment of the field. If the original aggregate |
518 | 185 | // had 256 byte alignment for example, something might depend on that: |
519 | 185 | // propagate info to each field. |
520 | 185 | unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i); |
521 | 185 | if (NewAlign > EltAlign) |
522 | 10 | NGV->setAlignment(NewAlign); |
523 | 185 | transferSRADebugInfo(GV, NGV, FragmentSizeInBits * i, FragmentSizeInBits, |
524 | 185 | NumElements); |
525 | 185 | } |
526 | 40 | } |
527 | 64 | |
528 | 64 | if (64 NewGlobals.empty()64 ) |
529 | 0 | return nullptr; |
530 | 64 | |
531 | 64 | DEBUG64 (dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n"); |
532 | 64 | |
533 | 64 | Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext())); |
534 | 64 | |
535 | 64 | // Loop over all of the uses of the global, replacing the constantexpr geps, |
536 | 64 | // with smaller constantexpr geps or direct references. |
537 | 326 | while (!GV->use_empty()326 ) { |
538 | 262 | User *GEP = GV->user_back(); |
539 | 262 | assert(((isa<ConstantExpr>(GEP) && |
540 | 262 | cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)|| |
541 | 262 | isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!"); |
542 | 262 | |
543 | 262 | // Ignore the 1th operand, which has to be zero or else the program is quite |
544 | 262 | // broken (undefined). Get the 2nd operand, which is the structure or array |
545 | 262 | // index. |
546 | 262 | unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue(); |
547 | 262 | if (Val >= NewGlobals.size()262 ) Val = 00 ; // Out of bound array access. |
548 | 262 | |
549 | 262 | Value *NewPtr = NewGlobals[Val]; |
550 | 262 | Type *NewTy = NewGlobals[Val]->getValueType(); |
551 | 262 | |
552 | 262 | // Form a shorter GEP if needed. |
553 | 262 | if (GEP->getNumOperands() > 3262 ) { |
554 | 23 | if (ConstantExpr *CE23 = dyn_cast<ConstantExpr>(GEP)) { |
555 | 21 | SmallVector<Constant*, 8> Idxs; |
556 | 21 | Idxs.push_back(NullInt); |
557 | 42 | for (unsigned i = 3, e = CE->getNumOperands(); i != e42 ; ++i21 ) |
558 | 21 | Idxs.push_back(CE->getOperand(i)); |
559 | 21 | NewPtr = |
560 | 21 | ConstantExpr::getGetElementPtr(NewTy, cast<Constant>(NewPtr), Idxs); |
561 | 23 | } else { |
562 | 2 | GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP); |
563 | 2 | SmallVector<Value*, 8> Idxs; |
564 | 2 | Idxs.push_back(NullInt); |
565 | 4 | for (unsigned i = 3, e = GEPI->getNumOperands(); i != e4 ; ++i2 ) |
566 | 2 | Idxs.push_back(GEPI->getOperand(i)); |
567 | 2 | NewPtr = GetElementPtrInst::Create( |
568 | 2 | NewTy, NewPtr, Idxs, GEPI->getName() + "." + Twine(Val), GEPI); |
569 | 2 | } |
570 | 23 | } |
571 | 262 | GEP->replaceAllUsesWith(NewPtr); |
572 | 262 | |
573 | 262 | if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP)) |
574 | 2 | GEPI->eraseFromParent(); |
575 | 262 | else |
576 | 260 | cast<ConstantExpr>(GEP)->destroyConstant(); |
577 | 262 | } |
578 | 64 | |
579 | 64 | // Delete the old global, now that it is dead. |
580 | 64 | Globals.erase(GV); |
581 | 64 | ++NumSRA; |
582 | 64 | |
583 | 64 | // Loop over the new globals array deleting any globals that are obviously |
584 | 64 | // dead. This can arise due to scalarization of a structure or an array that |
585 | 64 | // has elements that are dead. |
586 | 64 | unsigned FirstGlobal = 0; |
587 | 330 | for (unsigned i = 0, e = NewGlobals.size(); i != e330 ; ++i266 ) |
588 | 266 | if (266 NewGlobals[i]->use_empty()266 ) { |
589 | 21 | Globals.erase(NewGlobals[i]); |
590 | 21 | if (FirstGlobal == i21 ) ++FirstGlobal0 ; |
591 | 266 | } |
592 | 64 | |
593 | 64 | return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal]64 : nullptr0 ; |
594 | 9.66k | } |
595 | | |
596 | | /// Return true if all users of the specified value will trap if the value is |
597 | | /// dynamically null. PHIs keeps track of any phi nodes we've seen to avoid |
598 | | /// reprocessing them. |
599 | | static bool AllUsesOfValueWillTrapIfNull(const Value *V, |
600 | 2.80k | SmallPtrSetImpl<const PHINode*> &PHIs) { |
601 | 2.80k | for (const User *U : V->users()) |
602 | 3.12k | if (3.12k isa<LoadInst>(U)3.12k ) { |
603 | 870 | // Will trap. |
604 | 3.12k | } else if (const StoreInst *2.25k SI2.25k = dyn_cast<StoreInst>(U)) { |
605 | 528 | if (SI->getOperand(0) == V528 ) { |
606 | 0 | //cerr << "NONTRAPPING USE: " << *U; |
607 | 0 | return false; // Storing the value. |
608 | 0 | } |
609 | 1.72k | } else if (const CallInst *1.72k CI1.72k = dyn_cast<CallInst>(U)) { |
610 | 164 | if (CI->getCalledValue() != V164 ) { |
611 | 164 | //cerr << "NONTRAPPING USE: " << *U; |
612 | 164 | return false; // Not calling the ptr |
613 | 164 | } |
614 | 1.56k | } else if (const InvokeInst *1.56k II1.56k = dyn_cast<InvokeInst>(U)) { |
615 | 0 | if (II->getCalledValue() != V0 ) { |
616 | 0 | //cerr << "NONTRAPPING USE: " << *U; |
617 | 0 | return false; // Not calling the ptr |
618 | 0 | } |
619 | 1.56k | } else if (const BitCastInst *1.56k CI1.56k = dyn_cast<BitCastInst>(U)) { |
620 | 120 | if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)120 ) return false120 ; |
621 | 1.44k | } else if (const GetElementPtrInst *1.44k GEPI1.44k = dyn_cast<GetElementPtrInst>(U)) { |
622 | 1.31k | if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)1.31k ) return false4 ; |
623 | 124 | } else if (const PHINode *124 PN124 = dyn_cast<PHINode>(U)) { |
624 | 48 | // If we've already seen this phi node, ignore it, it has already been |
625 | 48 | // checked. |
626 | 48 | if (PHIs.insert(PN).second && 48 !AllUsesOfValueWillTrapIfNull(PN, PHIs)40 ) |
627 | 0 | return false; |
628 | 76 | } else if (76 isa<ICmpInst>(U) && |
629 | 76 | isa<ConstantPointerNull>(U->getOperand(1))76 ) { |
630 | 76 | // Ignore icmp X, null |
631 | 76 | } else { |
632 | 0 | //cerr << "NONTRAPPING USE: " << *U; |
633 | 0 | return false; |
634 | 0 | } |
635 | 2.51k | |
636 | 2.51k | return true; |
637 | 2.51k | } |
638 | | |
639 | | /// Return true if all uses of any loads from GV will trap if the loaded value |
640 | | /// is null. Note that this also permits comparisons of the loaded value |
641 | | /// against null, as a special case. |
642 | 279 | static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) { |
643 | 279 | for (const User *U : GV->users()) |
644 | 1.62k | if (const LoadInst *1.62k LI1.62k = dyn_cast<LoadInst>(U)) { |
645 | 1.32k | SmallPtrSet<const PHINode*, 8> PHIs; |
646 | 1.32k | if (!AllUsesOfValueWillTrapIfNull(LI, PHIs)) |
647 | 164 | return false; |
648 | 299 | } else if (299 isa<StoreInst>(U)299 ) { |
649 | 299 | // Ignore stores to the global. |
650 | 299 | } else { |
651 | 0 | // We don't know or understand this user, bail out. |
652 | 0 | //cerr << "UNKNOWN USER OF GLOBAL!: " << *U; |
653 | 0 | return false; |
654 | 0 | } |
655 | 115 | return true; |
656 | 115 | } |
657 | | |
658 | 66 | static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { |
659 | 66 | bool Changed = false; |
660 | 131 | for (auto UI = V->user_begin(), E = V->user_end(); UI != E131 ; ) { |
661 | 65 | Instruction *I = cast<Instruction>(*UI++); |
662 | 65 | if (LoadInst *LI65 = dyn_cast<LoadInst>(I)) { |
663 | 1 | LI->setOperand(0, NewV); |
664 | 1 | Changed = true; |
665 | 65 | } else if (StoreInst *64 SI64 = dyn_cast<StoreInst>(I)) { |
666 | 0 | if (SI->getOperand(1) == V0 ) { |
667 | 0 | SI->setOperand(1, NewV); |
668 | 0 | Changed = true; |
669 | 0 | } |
670 | 64 | } else if (64 isa<CallInst>(I) || 64 isa<InvokeInst>(I)58 ) { |
671 | 6 | CallSite CS(I); |
672 | 6 | if (CS.getCalledValue() == V6 ) { |
673 | 6 | // Calling through the pointer! Turn into a direct call, but be careful |
674 | 6 | // that the pointer is not also being passed as an argument. |
675 | 6 | CS.setCalledFunction(NewV); |
676 | 6 | Changed = true; |
677 | 6 | bool PassedAsArg = false; |
678 | 22 | for (unsigned i = 0, e = CS.arg_size(); i != e22 ; ++i16 ) |
679 | 16 | if (16 CS.getArgument(i) == V16 ) { |
680 | 0 | PassedAsArg = true; |
681 | 0 | CS.setArgument(i, NewV); |
682 | 0 | } |
683 | 6 | |
684 | 6 | if (PassedAsArg6 ) { |
685 | 0 | // Being passed as an argument also. Be careful to not invalidate UI! |
686 | 0 | UI = V->user_begin(); |
687 | 0 | } |
688 | 6 | } |
689 | 64 | } else if (CastInst *58 CI58 = dyn_cast<CastInst>(I)) { |
690 | 0 | Changed |= OptimizeAwayTrappingUsesOfValue(CI, |
691 | 0 | ConstantExpr::getCast(CI->getOpcode(), |
692 | 0 | NewV, CI->getType())); |
693 | 0 | if (CI->use_empty()0 ) { |
694 | 0 | Changed = true; |
695 | 0 | CI->eraseFromParent(); |
696 | 0 | } |
697 | 58 | } else if (GetElementPtrInst *58 GEPI58 = dyn_cast<GetElementPtrInst>(I)) { |
698 | 40 | // Should handle GEP here. |
699 | 40 | SmallVector<Constant*, 8> Idxs; |
700 | 40 | Idxs.reserve(GEPI->getNumOperands()-1); |
701 | 40 | for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end(); |
702 | 40 | i != e40 ; ++i0 ) |
703 | 40 | if (Constant *40 C40 = dyn_cast<Constant>(*i)) |
704 | 0 | Idxs.push_back(C); |
705 | 40 | else |
706 | 40 | break; |
707 | 40 | if (Idxs.size() == GEPI->getNumOperands()-1) |
708 | 0 | Changed |= OptimizeAwayTrappingUsesOfValue( |
709 | 0 | GEPI, ConstantExpr::getGetElementPtr(nullptr, NewV, Idxs)); |
710 | 40 | if (GEPI->use_empty()40 ) { |
711 | 0 | Changed = true; |
712 | 0 | GEPI->eraseFromParent(); |
713 | 0 | } |
714 | 64 | } |
715 | 65 | } |
716 | 66 | |
717 | 66 | return Changed; |
718 | 66 | } |
719 | | |
720 | | |
721 | | /// The specified global has only one non-null value stored into it. If there |
722 | | /// are uses of the loaded value that would trap if the loaded value is |
723 | | /// dynamically null, then we know that they cannot be reachable with a null |
724 | | /// optimize away the load. |
725 | | static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, |
726 | | const DataLayout &DL, |
727 | 26 | TargetLibraryInfo *TLI) { |
728 | 26 | bool Changed = false; |
729 | 26 | |
730 | 26 | // Keep track of whether we are able to remove all the uses of the global |
731 | 26 | // other than the store that defines it. |
732 | 26 | bool AllNonStoreUsesGone = true; |
733 | 26 | |
734 | 26 | // Replace all uses of loads with uses of uses of the stored value. |
735 | 129 | for (Value::user_iterator GUI = GV->user_begin(), E = GV->user_end(); GUI != E129 ;){ |
736 | 103 | User *GlobalUser = *GUI++; |
737 | 103 | if (LoadInst *LI103 = dyn_cast<LoadInst>(GlobalUser)) { |
738 | 66 | Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV); |
739 | 66 | // If we were able to delete all uses of the loads |
740 | 66 | if (LI->use_empty()66 ) { |
741 | 8 | LI->eraseFromParent(); |
742 | 8 | Changed = true; |
743 | 66 | } else { |
744 | 58 | AllNonStoreUsesGone = false; |
745 | 58 | } |
746 | 103 | } else if (37 isa<StoreInst>(GlobalUser)37 ) { |
747 | 28 | // Ignore the store that stores "LV" to the global. |
748 | 28 | assert(GlobalUser->getOperand(1) == GV && |
749 | 28 | "Must be storing *to* the global"); |
750 | 37 | } else { |
751 | 9 | AllNonStoreUsesGone = false; |
752 | 9 | |
753 | 9 | // If we get here we could have other crazy uses that are transitively |
754 | 9 | // loaded. |
755 | 9 | assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) || |
756 | 9 | isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) || |
757 | 9 | isa<BitCastInst>(GlobalUser) || |
758 | 9 | isa<GetElementPtrInst>(GlobalUser)) && |
759 | 9 | "Only expect load and stores!"); |
760 | 9 | } |
761 | 103 | } |
762 | 26 | |
763 | 26 | if (Changed26 ) { |
764 | 7 | DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV << "\n"); |
765 | 7 | ++NumGlobUses; |
766 | 7 | } |
767 | 26 | |
768 | 26 | // If we nuked all of the loads, then none of the stores are needed either, |
769 | 26 | // nor is the global. |
770 | 26 | if (AllNonStoreUsesGone26 ) { |
771 | 4 | if (isLeakCheckerRoot(GV)4 ) { |
772 | 4 | Changed |= CleanupPointerRootUsers(GV, TLI); |
773 | 4 | } else { |
774 | 0 | Changed = true; |
775 | 0 | CleanupConstantGlobalUsers(GV, nullptr, DL, TLI); |
776 | 0 | } |
777 | 4 | if (GV->use_empty()4 ) { |
778 | 4 | DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); |
779 | 4 | Changed = true; |
780 | 4 | GV->eraseFromParent(); |
781 | 4 | ++NumDeleted; |
782 | 4 | } |
783 | 4 | } |
784 | 26 | return Changed; |
785 | 26 | } |
786 | | |
787 | | /// Walk the use list of V, constant folding all of the instructions that are |
788 | | /// foldable. |
789 | | static void ConstantPropUsersOf(Value *V, const DataLayout &DL, |
790 | 7 | TargetLibraryInfo *TLI) { |
791 | 17 | for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; ) |
792 | 10 | if (Instruction *10 I10 = dyn_cast<Instruction>(*UI++)) |
793 | 7 | if (Constant *7 NewC7 = ConstantFoldInstruction(I, DL, TLI)) { |
794 | 4 | I->replaceAllUsesWith(NewC); |
795 | 4 | |
796 | 4 | // Advance UI to the next non-I use to avoid invalidating it! |
797 | 4 | // Instructions could multiply use V. |
798 | 4 | while (UI != E && 4 *UI == I0 ) |
799 | 0 | ++UI; |
800 | 4 | if (isInstructionTriviallyDead(I, TLI)) |
801 | 4 | I->eraseFromParent(); |
802 | 10 | } |
803 | 7 | } |
804 | | |
805 | | /// This function takes the specified global variable, and transforms the |
806 | | /// program as if it always contained the result of the specified malloc. |
807 | | /// Because it is always the result of the specified malloc, there is no reason |
808 | | /// to actually DO the malloc. Instead, turn the malloc into a global, and any |
809 | | /// loads of GV as uses of the new global. |
810 | | static GlobalVariable * |
811 | | OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, |
812 | | ConstantInt *NElements, const DataLayout &DL, |
813 | 4 | TargetLibraryInfo *TLI) { |
814 | 4 | DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n'); |
815 | 4 | |
816 | 4 | Type *GlobalType; |
817 | 4 | if (NElements->getZExtValue() == 1) |
818 | 1 | GlobalType = AllocTy; |
819 | 4 | else |
820 | 4 | // If we have an array allocation, the global variable is of an array. |
821 | 3 | GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue()); |
822 | 4 | |
823 | 4 | // Create the new global variable. The contents of the malloc'd memory is |
824 | 4 | // undefined, so initialize with an undef value. |
825 | 4 | GlobalVariable *NewGV = new GlobalVariable( |
826 | 4 | *GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage, |
827 | 4 | UndefValue::get(GlobalType), GV->getName() + ".body", nullptr, |
828 | 4 | GV->getThreadLocalMode()); |
829 | 4 | |
830 | 4 | // If there are bitcast users of the malloc (which is typical, usually we have |
831 | 4 | // a malloc + bitcast) then replace them with uses of the new global. Update |
832 | 4 | // other users to use the global as well. |
833 | 4 | BitCastInst *TheBC = nullptr; |
834 | 8 | while (!CI->use_empty()8 ) { |
835 | 4 | Instruction *User = cast<Instruction>(CI->user_back()); |
836 | 4 | if (BitCastInst *BCI4 = dyn_cast<BitCastInst>(User)) { |
837 | 4 | if (BCI->getType() == NewGV->getType()4 ) { |
838 | 1 | BCI->replaceAllUsesWith(NewGV); |
839 | 1 | BCI->eraseFromParent(); |
840 | 4 | } else { |
841 | 3 | BCI->setOperand(0, NewGV); |
842 | 3 | } |
843 | 0 | } else { |
844 | 0 | if (!TheBC) |
845 | 0 | TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI); |
846 | 0 | User->replaceUsesOfWith(CI, TheBC); |
847 | 0 | } |
848 | 4 | } |
849 | 4 | |
850 | 4 | Constant *RepValue = NewGV; |
851 | 4 | if (NewGV->getType() != GV->getValueType()) |
852 | 3 | RepValue = ConstantExpr::getBitCast(RepValue, GV->getValueType()); |
853 | 4 | |
854 | 4 | // If there is a comparison against null, we will insert a global bool to |
855 | 4 | // keep track of whether the global was initialized yet or not. |
856 | 4 | GlobalVariable *InitBool = |
857 | 4 | new GlobalVariable(Type::getInt1Ty(GV->getContext()), false, |
858 | 4 | GlobalValue::InternalLinkage, |
859 | 4 | ConstantInt::getFalse(GV->getContext()), |
860 | 4 | GV->getName()+".init", GV->getThreadLocalMode()); |
861 | 4 | bool InitBoolUsed = false; |
862 | 4 | |
863 | 4 | // Loop over all uses of GV, processing them in turn. |
864 | 13 | while (!GV->use_empty()13 ) { |
865 | 9 | if (StoreInst *SI9 = dyn_cast<StoreInst>(GV->user_back())) { |
866 | 4 | // The global is initialized when the store to it occurs. |
867 | 4 | new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0, |
868 | 4 | SI->getOrdering(), SI->getSyncScopeID(), SI); |
869 | 4 | SI->eraseFromParent(); |
870 | 4 | continue; |
871 | 4 | } |
872 | 5 | |
873 | 5 | LoadInst *LI = cast<LoadInst>(GV->user_back()); |
874 | 9 | while (!LI->use_empty()9 ) { |
875 | 4 | Use &LoadUse = *LI->use_begin(); |
876 | 4 | ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser()); |
877 | 4 | if (!ICI4 ) { |
878 | 4 | LoadUse = RepValue; |
879 | 4 | continue; |
880 | 4 | } |
881 | 0 |
|
882 | 0 | // Replace the cmp X, 0 with a use of the bool value. |
883 | 0 | // Sink the load to where the compare was, if atomic rules allow us to. |
884 | 0 | Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0, |
885 | 0 | LI->getOrdering(), LI->getSyncScopeID(), |
886 | 0 | LI->isUnordered() ? (Instruction*)ICI0 : LI0 ); |
887 | 0 | InitBoolUsed = true; |
888 | 0 | switch (ICI->getPredicate()) { |
889 | 0 | default: 0 llvm_unreachable0 ("Unknown ICmp Predicate!"); |
890 | 0 | case ICmpInst::ICMP_ULT: |
891 | 0 | case ICmpInst::ICMP_SLT: // X < null -> always false |
892 | 0 | LV = ConstantInt::getFalse(GV->getContext()); |
893 | 0 | break; |
894 | 0 | case ICmpInst::ICMP_ULE: |
895 | 0 | case ICmpInst::ICMP_SLE: |
896 | 0 | case ICmpInst::ICMP_EQ: |
897 | 0 | LV = BinaryOperator::CreateNot(LV, "notinit", ICI); |
898 | 0 | break; |
899 | 0 | case ICmpInst::ICMP_NE: |
900 | 0 | case ICmpInst::ICMP_UGE: |
901 | 0 | case ICmpInst::ICMP_SGE: |
902 | 0 | case ICmpInst::ICMP_UGT: |
903 | 0 | case ICmpInst::ICMP_SGT: |
904 | 0 | break; // no change. |
905 | 0 | } |
906 | 0 | ICI->replaceAllUsesWith(LV); |
907 | 0 | ICI->eraseFromParent(); |
908 | 0 | } |
909 | 5 | LI->eraseFromParent(); |
910 | 5 | } |
911 | 4 | |
912 | 4 | // If the initialization boolean was used, insert it, otherwise delete it. |
913 | 4 | if (4 !InitBoolUsed4 ) { |
914 | 8 | while (!InitBool->use_empty()) // Delete initializations |
915 | 4 | cast<StoreInst>(InitBool->user_back())->eraseFromParent(); |
916 | 4 | delete InitBool; |
917 | 4 | } else |
918 | 0 | GV->getParent()->getGlobalList().insert(GV->getIterator(), InitBool); |
919 | 4 | |
920 | 4 | // Now the GV is dead, nuke it and the malloc.. |
921 | 4 | GV->eraseFromParent(); |
922 | 4 | CI->eraseFromParent(); |
923 | 4 | |
924 | 4 | // To further other optimizations, loop over all users of NewGV and try to |
925 | 4 | // constant prop them. This will promote GEP instructions with constant |
926 | 4 | // indices into GEP constant-exprs, which will allow global-opt to hack on it. |
927 | 4 | ConstantPropUsersOf(NewGV, DL, TLI); |
928 | 4 | if (RepValue != NewGV) |
929 | 3 | ConstantPropUsersOf(RepValue, DL, TLI); |
930 | 4 | |
931 | 4 | return NewGV; |
932 | 4 | } |
933 | | |
934 | | /// Scan the use-list of V checking to make sure that there are no complex uses |
935 | | /// of V. We permit simple things like dereferencing the pointer, but not |
936 | | /// storing through the address, unless it is to the specified global. |
937 | | static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V, |
938 | | const GlobalVariable *GV, |
939 | 304 | SmallPtrSetImpl<const PHINode*> &PHIs) { |
940 | 412 | for (const User *U : V->users()) { |
941 | 412 | const Instruction *Inst = cast<Instruction>(U); |
942 | 412 | |
943 | 412 | if (isa<LoadInst>(Inst) || 412 isa<CmpInst>(Inst)412 ) { |
944 | 0 | continue; // Fine, ignore. |
945 | 0 | } |
946 | 412 | |
947 | 412 | if (const StoreInst *412 SI412 = dyn_cast<StoreInst>(Inst)) { |
948 | 219 | if (SI->getOperand(0) == V && 219 SI->getOperand(1) != GV147 ) |
949 | 44 | return false; // Storing the pointer itself... bad. |
950 | 175 | continue; // Otherwise, storing through it, or storing into GV... fine. |
951 | 175 | } |
952 | 193 | |
953 | 193 | // Must index into the array and into the struct. |
954 | 193 | if (193 isa<GetElementPtrInst>(Inst) && 193 Inst->getNumOperands() >= 378 ) { |
955 | 78 | if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs)) |
956 | 0 | return false; |
957 | 78 | continue; |
958 | 78 | } |
959 | 115 | |
960 | 115 | if (const PHINode *115 PN115 = dyn_cast<PHINode>(Inst)) { |
961 | 0 | // PHIs are ok if all uses are ok. Don't infinitely recurse through PHI |
962 | 0 | // cycles. |
963 | 0 | if (PHIs.insert(PN).second) |
964 | 0 | if (0 !ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs)0 ) |
965 | 0 | return false; |
966 | 0 | continue; |
967 | 0 | } |
968 | 115 | |
969 | 115 | if (const BitCastInst *115 BCI115 = dyn_cast<BitCastInst>(Inst)) { |
970 | 111 | if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) |
971 | 44 | return false; |
972 | 67 | continue; |
973 | 67 | } |
974 | 4 | |
975 | 4 | return false; |
976 | 4 | } |
977 | 212 | return true; |
978 | 212 | } |
979 | | |
980 | | /// The Alloc pointer is stored into GV somewhere. Transform all uses of the |
981 | | /// allocation into loads from the global and uses of the resultant pointer. |
982 | | /// Further, delete the store into GV. This assumes that these value pass the |
983 | | /// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate. |
984 | | static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc, |
985 | 20 | GlobalVariable *GV) { |
986 | 40 | while (!Alloc->use_empty()40 ) { |
987 | 20 | Instruction *U = cast<Instruction>(*Alloc->user_begin()); |
988 | 20 | Instruction *InsertPt = U; |
989 | 20 | if (StoreInst *SI20 = dyn_cast<StoreInst>(U)) { |
990 | 7 | // If this is the store of the allocation into the global, remove it. |
991 | 7 | if (SI->getOperand(1) == GV7 ) { |
992 | 7 | SI->eraseFromParent(); |
993 | 7 | continue; |
994 | 7 | } |
995 | 13 | } else if (PHINode *13 PN13 = dyn_cast<PHINode>(U)) { |
996 | 0 | // Insert the load in the corresponding predecessor, not right before the |
997 | 0 | // PHI. |
998 | 0 | InsertPt = PN->getIncomingBlock(*Alloc->use_begin())->getTerminator(); |
999 | 13 | } else if (13 isa<BitCastInst>(U)13 ) { |
1000 | 11 | // Must be bitcast between the malloc and store to initialize the global. |
1001 | 11 | ReplaceUsesOfMallocWithGlobal(U, GV); |
1002 | 11 | U->eraseFromParent(); |
1003 | 11 | continue; |
1004 | 2 | } else if (GetElementPtrInst *2 GEPI2 = dyn_cast<GetElementPtrInst>(U)) { |
1005 | 2 | // If this is a "GEP bitcast" and the user is a store to the global, then |
1006 | 2 | // just process it as a bitcast. |
1007 | 2 | if (GEPI->hasAllZeroIndices() && 2 GEPI->hasOneUse()2 ) |
1008 | 2 | if (StoreInst *2 SI2 = dyn_cast<StoreInst>(GEPI->user_back())) |
1009 | 2 | if (2 SI->getOperand(1) == GV2 ) { |
1010 | 2 | // Must be bitcast GEP between the malloc and store to initialize |
1011 | 2 | // the global. |
1012 | 2 | ReplaceUsesOfMallocWithGlobal(GEPI, GV); |
1013 | 2 | GEPI->eraseFromParent(); |
1014 | 2 | continue; |
1015 | 2 | } |
1016 | 0 | } |
1017 | 0 |
|
1018 | 0 | // Insert a load from the global, and use it instead of the malloc. |
1019 | 0 | Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt); |
1020 | 0 | U->replaceUsesOfWith(Alloc, NL); |
1021 | 0 | } |
1022 | 20 | } |
1023 | | |
1024 | | /// Verify that all uses of V (a load, or a phi of a load) are simple enough to |
1025 | | /// perform heap SRA on. This permits GEP's that index through the array and |
1026 | | /// struct field, icmps of null, and PHIs. |
1027 | | static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, |
1028 | | SmallPtrSetImpl<const PHINode*> &LoadUsingPHIs, |
1029 | 9 | SmallPtrSetImpl<const PHINode*> &LoadUsingPHIsPerLoad) { |
1030 | 9 | // We permit two users of the load: setcc comparing against the null |
1031 | 9 | // pointer, and a getelementptr of a specific form. |
1032 | 8 | for (const User *U : V->users()) { |
1033 | 8 | const Instruction *UI = cast<Instruction>(U); |
1034 | 8 | |
1035 | 8 | // Comparison against null is ok. |
1036 | 8 | if (const ICmpInst *ICI8 = dyn_cast<ICmpInst>(UI)) { |
1037 | 0 | if (!isa<ConstantPointerNull>(ICI->getOperand(1))) |
1038 | 0 | return false; |
1039 | 0 | continue; |
1040 | 0 | } |
1041 | 8 | |
1042 | 8 | // getelementptr is also ok, but only a simple form. |
1043 | 8 | if (const GetElementPtrInst *8 GEPI8 = dyn_cast<GetElementPtrInst>(UI)) { |
1044 | 6 | // Must index into the array and into the struct. |
1045 | 6 | if (GEPI->getNumOperands() < 3) |
1046 | 0 | return false; |
1047 | 6 | |
1048 | 6 | // Otherwise the GEP is ok. |
1049 | 6 | continue; |
1050 | 6 | } |
1051 | 2 | |
1052 | 2 | if (const PHINode *2 PN2 = dyn_cast<PHINode>(UI)) { |
1053 | 2 | if (!LoadUsingPHIsPerLoad.insert(PN).second) |
1054 | 2 | // This means some phi nodes are dependent on each other. |
1055 | 2 | // Avoid infinite looping! |
1056 | 0 | return false; |
1057 | 2 | if (2 !LoadUsingPHIs.insert(PN).second2 ) |
1058 | 2 | // If we have already analyzed this PHI, then it is safe. |
1059 | 1 | continue; |
1060 | 1 | |
1061 | 1 | // Make sure all uses of the PHI are simple enough to transform. |
1062 | 1 | if (1 !LoadUsesSimpleEnoughForHeapSRA(PN, |
1063 | 1 | LoadUsingPHIs, LoadUsingPHIsPerLoad)) |
1064 | 0 | return false; |
1065 | 1 | |
1066 | 1 | continue; |
1067 | 1 | } |
1068 | 0 |
|
1069 | 0 | // Otherwise we don't know what this is, not ok. |
1070 | 0 | return false; |
1071 | 0 | } |
1072 | 9 | |
1073 | 9 | return true; |
1074 | 9 | } |
1075 | | |
1076 | | |
1077 | | /// If all users of values loaded from GV are simple enough to perform HeapSRA, |
1078 | | /// return true. |
1079 | | static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, |
1080 | 7 | Instruction *StoredVal) { |
1081 | 7 | SmallPtrSet<const PHINode*, 32> LoadUsingPHIs; |
1082 | 7 | SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad; |
1083 | 7 | for (const User *U : GV->users()) |
1084 | 15 | if (const LoadInst *15 LI15 = dyn_cast<LoadInst>(U)) { |
1085 | 8 | if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs, |
1086 | 8 | LoadUsingPHIsPerLoad)) |
1087 | 0 | return false; |
1088 | 8 | LoadUsingPHIsPerLoad.clear(); |
1089 | 8 | } |
1090 | 7 | |
1091 | 7 | // If we reach here, we know that all uses of the loads and transitive uses |
1092 | 7 | // (through PHI nodes) are simple enough to transform. However, we don't know |
1093 | 7 | // that all inputs the to the PHI nodes are in the same equivalence sets. |
1094 | 7 | // Check to verify that all operands of the PHIs are either PHIS that can be |
1095 | 7 | // transformed, loads from GV, or MI itself. |
1096 | 7 | for (const PHINode *PN : LoadUsingPHIs) 7 { |
1097 | 3 | for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e3 ; ++op2 ) { |
1098 | 2 | Value *InVal = PN->getIncomingValue(op); |
1099 | 2 | |
1100 | 2 | // PHI of the stored value itself is ok. |
1101 | 2 | if (InVal == StoredVal2 ) continue0 ; |
1102 | 2 | |
1103 | 2 | if (const PHINode *2 InPN2 = dyn_cast<PHINode>(InVal)) { |
1104 | 0 | // One of the PHIs in our set is (optimistically) ok. |
1105 | 0 | if (LoadUsingPHIs.count(InPN)) |
1106 | 0 | continue; |
1107 | 0 | return false; |
1108 | 0 | } |
1109 | 2 | |
1110 | 2 | // Load from GV is ok. |
1111 | 2 | if (const LoadInst *2 LI2 = dyn_cast<LoadInst>(InVal)) |
1112 | 2 | if (2 LI->getOperand(0) == GV2 ) |
1113 | 2 | continue; |
1114 | 0 |
|
1115 | 0 | // UNDEF? NULL? |
1116 | 0 |
|
1117 | 0 | // Anything else is rejected. |
1118 | 0 | return false; |
1119 | 0 | } |
1120 | 1 | } |
1121 | 7 | |
1122 | 7 | return true; |
1123 | 7 | } |
1124 | | |
1125 | | static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, |
1126 | | DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, |
1127 | 18 | std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { |
1128 | 18 | std::vector<Value*> &FieldVals = InsertedScalarizedValues[V]; |
1129 | 18 | |
1130 | 18 | if (FieldNo >= FieldVals.size()) |
1131 | 9 | FieldVals.resize(FieldNo+1); |
1132 | 18 | |
1133 | 18 | // If we already have this value, just reuse the previously scalarized |
1134 | 18 | // version. |
1135 | 18 | if (Value *FieldVal = FieldVals[FieldNo]) |
1136 | 8 | return FieldVal; |
1137 | 10 | |
1138 | 10 | // Depending on what instruction this is, we have several cases. |
1139 | 10 | Value *Result; |
1140 | 10 | if (LoadInst *LI10 = dyn_cast<LoadInst>(V)) { |
1141 | 8 | // This is a scalarized version of the load from the global. Just create |
1142 | 8 | // a new Load of the scalarized global. |
1143 | 8 | Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo, |
1144 | 8 | InsertedScalarizedValues, |
1145 | 8 | PHIsToRewrite), |
1146 | 8 | LI->getName()+".f"+Twine(FieldNo), LI); |
1147 | 10 | } else { |
1148 | 2 | PHINode *PN = cast<PHINode>(V); |
1149 | 2 | // PN's type is pointer to struct. Make a new PHI of pointer to struct |
1150 | 2 | // field. |
1151 | 2 | |
1152 | 2 | PointerType *PTy = cast<PointerType>(PN->getType()); |
1153 | 2 | StructType *ST = cast<StructType>(PTy->getElementType()); |
1154 | 2 | |
1155 | 2 | unsigned AS = PTy->getAddressSpace(); |
1156 | 2 | PHINode *NewPN = |
1157 | 2 | PHINode::Create(PointerType::get(ST->getElementType(FieldNo), AS), |
1158 | 2 | PN->getNumIncomingValues(), |
1159 | 2 | PN->getName()+".f"+Twine(FieldNo), PN); |
1160 | 2 | Result = NewPN; |
1161 | 2 | PHIsToRewrite.push_back(std::make_pair(PN, FieldNo)); |
1162 | 2 | } |
1163 | 18 | |
1164 | 18 | return FieldVals[FieldNo] = Result; |
1165 | 18 | } |
1166 | | |
1167 | | /// Given a load instruction and a value derived from the load, rewrite the |
1168 | | /// derived value to use the HeapSRoA'd load. |
1169 | | static void RewriteHeapSROALoadUser(Instruction *LoadUser, |
1170 | | DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, |
1171 | 8 | std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { |
1172 | 8 | // If this is a comparison against null, handle it. |
1173 | 8 | if (ICmpInst *SCI8 = dyn_cast<ICmpInst>(LoadUser)) { |
1174 | 0 | assert(isa<ConstantPointerNull>(SCI->getOperand(1))); |
1175 | 0 | // If we have a setcc of the loaded pointer, we can use a setcc of any |
1176 | 0 | // field. |
1177 | 0 | Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0, |
1178 | 0 | InsertedScalarizedValues, PHIsToRewrite); |
1179 | 0 |
|
1180 | 0 | Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr, |
1181 | 0 | Constant::getNullValue(NPtr->getType()), |
1182 | 0 | SCI->getName()); |
1183 | 0 | SCI->replaceAllUsesWith(New); |
1184 | 0 | SCI->eraseFromParent(); |
1185 | 0 | return; |
1186 | 0 | } |
1187 | 8 | |
1188 | 8 | // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...' |
1189 | 8 | if (GetElementPtrInst *8 GEPI8 = dyn_cast<GetElementPtrInst>(LoadUser)) { |
1190 | 6 | assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2)) |
1191 | 6 | && "Unexpected GEPI!"); |
1192 | 6 | |
1193 | 6 | // Load the pointer for this field. |
1194 | 6 | unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue(); |
1195 | 6 | Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo, |
1196 | 6 | InsertedScalarizedValues, PHIsToRewrite); |
1197 | 6 | |
1198 | 6 | // Create the new GEP idx vector. |
1199 | 6 | SmallVector<Value*, 8> GEPIdx; |
1200 | 6 | GEPIdx.push_back(GEPI->getOperand(1)); |
1201 | 6 | GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end()); |
1202 | 6 | |
1203 | 6 | Value *NGEPI = GetElementPtrInst::Create(GEPI->getResultElementType(), NewPtr, GEPIdx, |
1204 | 6 | GEPI->getName(), GEPI); |
1205 | 6 | GEPI->replaceAllUsesWith(NGEPI); |
1206 | 6 | GEPI->eraseFromParent(); |
1207 | 6 | return; |
1208 | 6 | } |
1209 | 2 | |
1210 | 2 | // Recursively transform the users of PHI nodes. This will lazily create the |
1211 | 2 | // PHIs that are needed for individual elements. Keep track of what PHIs we |
1212 | 2 | // see in InsertedScalarizedValues so that we don't get infinite loops (very |
1213 | 2 | // antisocial). If the PHI is already in InsertedScalarizedValues, it has |
1214 | 2 | // already been seen first by another load, so its uses have already been |
1215 | 2 | // processed. |
1216 | 2 | PHINode *PN = cast<PHINode>(LoadUser); |
1217 | 2 | if (!InsertedScalarizedValues.insert(std::make_pair(PN, |
1218 | 2 | std::vector<Value*>())).second) |
1219 | 1 | return; |
1220 | 1 | |
1221 | 1 | // If this is the first time we've seen this PHI, recursively process all |
1222 | 1 | // users. |
1223 | 3 | for (auto UI = PN->user_begin(), E = PN->user_end(); 1 UI != E3 ;) { |
1224 | 2 | Instruction *User = cast<Instruction>(*UI++); |
1225 | 2 | RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); |
1226 | 2 | } |
1227 | 8 | } |
1228 | | |
1229 | | /// We are performing Heap SRoA on a global. Ptr is a value loaded from the |
1230 | | /// global. Eliminate all uses of Ptr, making them use FieldGlobals instead. |
1231 | | /// All uses of loaded values satisfy AllGlobalLoadUsesSimpleEnoughForHeapSRA. |
1232 | | static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, |
1233 | | DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, |
1234 | 8 | std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { |
1235 | 14 | for (auto UI = Load->user_begin(), E = Load->user_end(); UI != E14 ;) { |
1236 | 6 | Instruction *User = cast<Instruction>(*UI++); |
1237 | 6 | RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); |
1238 | 6 | } |
1239 | 8 | |
1240 | 8 | if (Load->use_empty()8 ) { |
1241 | 6 | Load->eraseFromParent(); |
1242 | 6 | InsertedScalarizedValues.erase(Load); |
1243 | 6 | } |
1244 | 8 | } |
1245 | | |
1246 | | /// CI is an allocation of an array of structures. Break it up into multiple |
1247 | | /// allocations of arrays of the fields. |
1248 | | static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, |
1249 | | Value *NElems, const DataLayout &DL, |
1250 | 7 | const TargetLibraryInfo *TLI) { |
1251 | 7 | DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n'); |
1252 | 7 | Type *MAT = getMallocAllocatedType(CI, TLI); |
1253 | 7 | StructType *STy = cast<StructType>(MAT); |
1254 | 7 | |
1255 | 7 | // There is guaranteed to be at least one use of the malloc (storing |
1256 | 7 | // it into GV). If there are other uses, change them to be uses of |
1257 | 7 | // the global to simplify later code. This also deletes the store |
1258 | 7 | // into GV. |
1259 | 7 | ReplaceUsesOfMallocWithGlobal(CI, GV); |
1260 | 7 | |
1261 | 7 | // Okay, at this point, there are no users of the malloc. Insert N |
1262 | 7 | // new mallocs at the same place as CI, and N globals. |
1263 | 7 | std::vector<Value*> FieldGlobals; |
1264 | 7 | std::vector<Value*> FieldMallocs; |
1265 | 7 | |
1266 | 7 | SmallVector<OperandBundleDef, 1> OpBundles; |
1267 | 7 | CI->getOperandBundlesAsDefs(OpBundles); |
1268 | 7 | |
1269 | 7 | unsigned AS = GV->getType()->getPointerAddressSpace(); |
1270 | 21 | for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e21 ;++FieldNo14 ){ |
1271 | 14 | Type *FieldTy = STy->getElementType(FieldNo); |
1272 | 14 | PointerType *PFieldTy = PointerType::get(FieldTy, AS); |
1273 | 14 | |
1274 | 14 | GlobalVariable *NGV = new GlobalVariable( |
1275 | 14 | *GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage, |
1276 | 14 | Constant::getNullValue(PFieldTy), GV->getName() + ".f" + Twine(FieldNo), |
1277 | 14 | nullptr, GV->getThreadLocalMode()); |
1278 | 14 | NGV->copyAttributesFrom(GV); |
1279 | 14 | FieldGlobals.push_back(NGV); |
1280 | 14 | |
1281 | 14 | unsigned TypeSize = DL.getTypeAllocSize(FieldTy); |
1282 | 14 | if (StructType *ST = dyn_cast<StructType>(FieldTy)) |
1283 | 0 | TypeSize = DL.getStructLayout(ST)->getSizeInBytes(); |
1284 | 14 | Type *IntPtrTy = DL.getIntPtrType(CI->getType()); |
1285 | 14 | Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, |
1286 | 14 | ConstantInt::get(IntPtrTy, TypeSize), |
1287 | 14 | NElems, OpBundles, nullptr, |
1288 | 14 | CI->getName() + ".f" + Twine(FieldNo)); |
1289 | 14 | FieldMallocs.push_back(NMI); |
1290 | 14 | new StoreInst(NMI, NGV, CI); |
1291 | 14 | } |
1292 | 7 | |
1293 | 7 | // The tricky aspect of this transformation is handling the case when malloc |
1294 | 7 | // fails. In the original code, malloc failing would set the result pointer |
1295 | 7 | // of malloc to null. In this case, some mallocs could succeed and others |
1296 | 7 | // could fail. As such, we emit code that looks like this: |
1297 | 7 | // F0 = malloc(field0) |
1298 | 7 | // F1 = malloc(field1) |
1299 | 7 | // F2 = malloc(field2) |
1300 | 7 | // if (F0 == 0 || F1 == 0 || F2 == 0) { |
1301 | 7 | // if (F0) { free(F0); F0 = 0; } |
1302 | 7 | // if (F1) { free(F1); F1 = 0; } |
1303 | 7 | // if (F2) { free(F2); F2 = 0; } |
1304 | 7 | // } |
1305 | 7 | // The malloc can also fail if its argument is too large. |
1306 | 7 | Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0); |
1307 | 7 | Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0), |
1308 | 7 | ConstantZero, "isneg"); |
1309 | 21 | for (unsigned i = 0, e = FieldMallocs.size(); i != e21 ; ++i14 ) { |
1310 | 14 | Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i], |
1311 | 14 | Constant::getNullValue(FieldMallocs[i]->getType()), |
1312 | 14 | "isnull"); |
1313 | 14 | RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI); |
1314 | 14 | } |
1315 | 7 | |
1316 | 7 | // Split the basic block at the old malloc. |
1317 | 7 | BasicBlock *OrigBB = CI->getParent(); |
1318 | 7 | BasicBlock *ContBB = |
1319 | 7 | OrigBB->splitBasicBlock(CI->getIterator(), "malloc_cont"); |
1320 | 7 | |
1321 | 7 | // Create the block to check the first condition. Put all these blocks at the |
1322 | 7 | // end of the function as they are unlikely to be executed. |
1323 | 7 | BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(), |
1324 | 7 | "malloc_ret_null", |
1325 | 7 | OrigBB->getParent()); |
1326 | 7 | |
1327 | 7 | // Remove the uncond branch from OrigBB to ContBB, turning it into a cond |
1328 | 7 | // branch on RunningOr. |
1329 | 7 | OrigBB->getTerminator()->eraseFromParent(); |
1330 | 7 | BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB); |
1331 | 7 | |
1332 | 7 | // Within the NullPtrBlock, we need to emit a comparison and branch for each |
1333 | 7 | // pointer, because some may be null while others are not. |
1334 | 21 | for (unsigned i = 0, e = FieldGlobals.size(); i != e21 ; ++i14 ) { |
1335 | 14 | Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); |
1336 | 14 | Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal, |
1337 | 14 | Constant::getNullValue(GVVal->getType())); |
1338 | 14 | BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it", |
1339 | 14 | OrigBB->getParent()); |
1340 | 14 | BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next", |
1341 | 14 | OrigBB->getParent()); |
1342 | 14 | Instruction *BI = BranchInst::Create(FreeBlock, NextBlock, |
1343 | 14 | Cmp, NullPtrBlock); |
1344 | 14 | |
1345 | 14 | // Fill in FreeBlock. |
1346 | 14 | CallInst::CreateFree(GVVal, OpBundles, BI); |
1347 | 14 | new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], |
1348 | 14 | FreeBlock); |
1349 | 14 | BranchInst::Create(NextBlock, FreeBlock); |
1350 | 14 | |
1351 | 14 | NullPtrBlock = NextBlock; |
1352 | 14 | } |
1353 | 7 | |
1354 | 7 | BranchInst::Create(ContBB, NullPtrBlock); |
1355 | 7 | |
1356 | 7 | // CI is no longer needed, remove it. |
1357 | 7 | CI->eraseFromParent(); |
1358 | 7 | |
1359 | 7 | /// As we process loads, if we can't immediately update all uses of the load, |
1360 | 7 | /// keep track of what scalarized loads are inserted for a given load. |
1361 | 7 | DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues; |
1362 | 7 | InsertedScalarizedValues[GV] = FieldGlobals; |
1363 | 7 | |
1364 | 7 | std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite; |
1365 | 7 | |
1366 | 7 | // Okay, the malloc site is completely handled. All of the uses of GV are now |
1367 | 7 | // loads, and all uses of those loads are simple. Rewrite them to use loads |
1368 | 7 | // of the per-field globals instead. |
1369 | 15 | for (auto UI = GV->user_begin(), E = GV->user_end(); UI != E15 ;) { |
1370 | 8 | Instruction *User = cast<Instruction>(*UI++); |
1371 | 8 | |
1372 | 8 | if (LoadInst *LI8 = dyn_cast<LoadInst>(User)) { |
1373 | 8 | RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite); |
1374 | 8 | continue; |
1375 | 8 | } |
1376 | 0 |
|
1377 | 0 | // Must be a store of null. |
1378 | 0 | StoreInst *SI = cast<StoreInst>(User); |
1379 | 0 | assert(isa<ConstantPointerNull>(SI->getOperand(0)) && |
1380 | 0 | "Unexpected heap-sra user!"); |
1381 | 0 |
|
1382 | 0 | // Insert a store of null into each global. |
1383 | 0 | for (unsigned i = 0, e = FieldGlobals.size(); i != e0 ; ++i0 ) { |
1384 | 0 | Type *ValTy = cast<GlobalValue>(FieldGlobals[i])->getValueType(); |
1385 | 0 | Constant *Null = Constant::getNullValue(ValTy); |
1386 | 0 | new StoreInst(Null, FieldGlobals[i], SI); |
1387 | 0 | } |
1388 | 8 | // Erase the original store. |
1389 | 8 | SI->eraseFromParent(); |
1390 | 8 | } |
1391 | 7 | |
1392 | 7 | // While we have PHIs that are interesting to rewrite, do it. |
1393 | 9 | while (!PHIsToRewrite.empty()9 ) { |
1394 | 2 | PHINode *PN = PHIsToRewrite.back().first; |
1395 | 2 | unsigned FieldNo = PHIsToRewrite.back().second; |
1396 | 2 | PHIsToRewrite.pop_back(); |
1397 | 2 | PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]); |
1398 | 2 | assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi"); |
1399 | 2 | |
1400 | 2 | // Add all the incoming values. This can materialize more phis. |
1401 | 6 | for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e6 ; ++i4 ) { |
1402 | 4 | Value *InVal = PN->getIncomingValue(i); |
1403 | 4 | InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues, |
1404 | 4 | PHIsToRewrite); |
1405 | 4 | FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); |
1406 | 4 | } |
1407 | 2 | } |
1408 | 7 | |
1409 | 7 | // Drop all inter-phi links and any loads that made it this far. |
1410 | 7 | for (DenseMap<Value*, std::vector<Value*> >::iterator |
1411 | 7 | I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); |
1412 | 17 | I != E17 ; ++I10 ) { |
1413 | 10 | if (PHINode *PN = dyn_cast<PHINode>(I->first)) |
1414 | 1 | PN->dropAllReferences(); |
1415 | 9 | else if (LoadInst *9 LI9 = dyn_cast<LoadInst>(I->first)) |
1416 | 2 | LI->dropAllReferences(); |
1417 | 10 | } |
1418 | 7 | |
1419 | 7 | // Delete all the phis and loads now that inter-references are dead. |
1420 | 7 | for (DenseMap<Value*, std::vector<Value*> >::iterator |
1421 | 7 | I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); |
1422 | 17 | I != E17 ; ++I10 ) { |
1423 | 10 | if (PHINode *PN = dyn_cast<PHINode>(I->first)) |
1424 | 1 | PN->eraseFromParent(); |
1425 | 9 | else if (LoadInst *9 LI9 = dyn_cast<LoadInst>(I->first)) |
1426 | 2 | LI->eraseFromParent(); |
1427 | 10 | } |
1428 | 7 | |
1429 | 7 | // The old global is now dead, remove it. |
1430 | 7 | GV->eraseFromParent(); |
1431 | 7 | |
1432 | 7 | ++NumHeapSRA; |
1433 | 7 | return cast<GlobalVariable>(FieldGlobals[0]); |
1434 | 7 | } |
1435 | | |
1436 | | /// This function is called when we see a pointer global variable with a single |
1437 | | /// value stored it that is a malloc or cast of malloc. |
1438 | | static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI, |
1439 | | Type *AllocTy, |
1440 | | AtomicOrdering Ordering, |
1441 | | const DataLayout &DL, |
1442 | 279 | TargetLibraryInfo *TLI) { |
1443 | 279 | // If this is a malloc of an abstract type, don't touch it. |
1444 | 279 | if (!AllocTy->isSized()) |
1445 | 0 | return false; |
1446 | 279 | |
1447 | 279 | // We can't optimize this global unless all uses of it are *known* to be |
1448 | 279 | // of the malloc value, not of the null initializer value (consider a use |
1449 | 279 | // that compares the global's value against zero to see if the malloc has |
1450 | 279 | // been reached). To do this, we check to see if all uses of the global |
1451 | 279 | // would trap if the global were null: this proves that they must all |
1452 | 279 | // happen after the malloc. |
1453 | 279 | if (279 !AllUsesOfLoadedValueWillTrapIfNull(GV)279 ) |
1454 | 164 | return false; |
1455 | 115 | |
1456 | 115 | // We can't optimize this if the malloc itself is used in a complex way, |
1457 | 115 | // for example, being stored into multiple globals. This allows the |
1458 | 115 | // malloc to be stored into the specified global, loaded icmp'd, and |
1459 | 115 | // GEP'd. These are all things we could transform to using the global |
1460 | 115 | // for. |
1461 | 115 | SmallPtrSet<const PHINode*, 8> PHIs; |
1462 | 115 | if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs)) |
1463 | 48 | return false; |
1464 | 67 | |
1465 | 67 | // If we have a global that is only initialized with a fixed size malloc, |
1466 | 67 | // transform the program to use global memory instead of malloc'd memory. |
1467 | 67 | // This eliminates dynamic allocation, avoids an indirection accessing the |
1468 | 67 | // data, and exposes the resultant global to further GlobalOpt. |
1469 | 67 | // We cannot optimize the malloc if we cannot determine malloc array size. |
1470 | 67 | Value *NElems = getMallocArraySize(CI, DL, TLI, true); |
1471 | 67 | if (!NElems) |
1472 | 56 | return false; |
1473 | 11 | |
1474 | 11 | if (ConstantInt *11 NElements11 = dyn_cast<ConstantInt>(NElems)) |
1475 | 11 | // Restrict this transformation to only working on small allocations |
1476 | 11 | // (2048 bytes currently), as we don't want to introduce a 16M global or |
1477 | 11 | // something. |
1478 | 6 | if (6 NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 20486 ) { |
1479 | 4 | OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI); |
1480 | 4 | return true; |
1481 | 4 | } |
1482 | 7 | |
1483 | 7 | // If the allocation is an array of structures, consider transforming this |
1484 | 7 | // into multiple malloc'd arrays, one for each field. This is basically |
1485 | 7 | // SRoA for malloc'd memory. |
1486 | 7 | |
1487 | 7 | if (7 Ordering != AtomicOrdering::NotAtomic7 ) |
1488 | 0 | return false; |
1489 | 7 | |
1490 | 7 | // If this is an allocation of a fixed size array of structs, analyze as a |
1491 | 7 | // variable size array. malloc [100 x struct],1 -> malloc struct, 100 |
1492 | 7 | if (7 NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1)7 ) |
1493 | 2 | if (ArrayType *2 AT2 = dyn_cast<ArrayType>(AllocTy)) |
1494 | 2 | AllocTy = AT->getElementType(); |
1495 | 7 | |
1496 | 7 | StructType *AllocSTy = dyn_cast<StructType>(AllocTy); |
1497 | 7 | if (!AllocSTy) |
1498 | 0 | return false; |
1499 | 7 | |
1500 | 7 | // This the structure has an unreasonable number of fields, leave it |
1501 | 7 | // alone. |
1502 | 7 | if (7 AllocSTy->getNumElements() <= 16 && 7 AllocSTy->getNumElements() != 07 && |
1503 | 7 | AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)7 ) { |
1504 | 7 | |
1505 | 7 | // If this is a fixed size array, transform the Malloc to be an alloc of |
1506 | 7 | // structs. malloc [100 x struct],1 -> malloc struct, 100 |
1507 | 7 | if (ArrayType *AT7 = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) { |
1508 | 2 | Type *IntPtrTy = DL.getIntPtrType(CI->getType()); |
1509 | 2 | unsigned TypeSize = DL.getStructLayout(AllocSTy)->getSizeInBytes(); |
1510 | 2 | Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize); |
1511 | 2 | Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); |
1512 | 2 | SmallVector<OperandBundleDef, 1> OpBundles; |
1513 | 2 | CI->getOperandBundlesAsDefs(OpBundles); |
1514 | 2 | Instruction *Malloc = |
1515 | 2 | CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, AllocSize, NumElements, |
1516 | 2 | OpBundles, nullptr, CI->getName()); |
1517 | 2 | Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); |
1518 | 2 | CI->replaceAllUsesWith(Cast); |
1519 | 2 | CI->eraseFromParent(); |
1520 | 2 | if (BitCastInst *BCI = dyn_cast<BitCastInst>(Malloc)) |
1521 | 2 | CI = cast<CallInst>(BCI->getOperand(0)); |
1522 | 2 | else |
1523 | 0 | CI = cast<CallInst>(Malloc); |
1524 | 2 | } |
1525 | 7 | |
1526 | 7 | PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true), DL, |
1527 | 7 | TLI); |
1528 | 7 | return true; |
1529 | 7 | } |
1530 | 0 |
|
1531 | 0 | return false; |
1532 | 0 | } |
1533 | | |
1534 | | // Try to optimize globals based on the knowledge that only one value (besides |
1535 | | // its initializer) is ever stored to the global. |
1536 | | static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal, |
1537 | | AtomicOrdering Ordering, |
1538 | | const DataLayout &DL, |
1539 | 14.4k | TargetLibraryInfo *TLI) { |
1540 | 14.4k | // Ignore no-op GEPs and bitcasts. |
1541 | 14.4k | StoredOnceVal = StoredOnceVal->stripPointerCasts(); |
1542 | 14.4k | |
1543 | 14.4k | // If we are dealing with a pointer global that is initialized to null and |
1544 | 14.4k | // only has one (non-null) value stored into it, then we can optimize any |
1545 | 14.4k | // users of the loaded value (often calls and loads) that would trap if the |
1546 | 14.4k | // value was null. |
1547 | 14.4k | if (GV->getInitializer()->getType()->isPointerTy() && |
1548 | 14.4k | GV->getInitializer()->isNullValue()3.59k ) { |
1549 | 3.57k | if (Constant *SOVC3.57k = dyn_cast<Constant>(StoredOnceVal)) { |
1550 | 26 | if (GV->getInitializer()->getType() != SOVC->getType()) |
1551 | 3 | SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); |
1552 | 26 | |
1553 | 26 | // Optimize away any trapping uses of the loaded value. |
1554 | 26 | if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, TLI)) |
1555 | 7 | return true; |
1556 | 3.54k | } else if (CallInst *3.54k CI3.54k = extractMallocCall(StoredOnceVal, TLI)) { |
1557 | 279 | Type *MallocType = getMallocAllocatedType(CI, TLI); |
1558 | 279 | if (MallocType && 279 tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, |
1559 | 279 | Ordering, DL, TLI)) |
1560 | 11 | return true; |
1561 | 14.4k | } |
1562 | 3.57k | } |
1563 | 14.4k | |
1564 | 14.4k | return false; |
1565 | 14.4k | } |
1566 | | |
1567 | | /// At this point, we have learned that the only two values ever stored into GV |
1568 | | /// are its initializer and OtherVal. See if we can shrink the global into a |
1569 | | /// boolean and select between the two values whenever it is used. This exposes |
1570 | | /// the values to other scalar optimizations. |
1571 | 8.93k | static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { |
1572 | 8.93k | Type *GVElType = GV->getValueType(); |
1573 | 8.93k | |
1574 | 8.93k | // If GVElType is already i1, it is already shrunk. If the type of the GV is |
1575 | 8.93k | // an FP value, pointer or vector, don't do this optimization because a select |
1576 | 8.93k | // between them is very expensive and unlikely to lead to later |
1577 | 8.93k | // simplification. In these cases, we typically end up with "cond ? v1 : v2" |
1578 | 8.93k | // where v1 and v2 both require constant pool loads, a big loss. |
1579 | 8.93k | if (GVElType == Type::getInt1Ty(GV->getContext()) || |
1580 | 337 | GVElType->isFloatingPointTy() || |
1581 | 8.93k | GVElType->isPointerTy()295 || GVElType->isVectorTy()264 ) |
1582 | 8.67k | return false; |
1583 | 264 | |
1584 | 264 | // Walk the use list of the global seeing if all the uses are load or store. |
1585 | 264 | // If there is anything else, bail out. |
1586 | 264 | for (User *U : GV->users()) |
1587 | 22.0k | if (22.0k !isa<LoadInst>(U) && 22.0k !isa<StoreInst>(U)21.4k ) |
1588 | 6 | return false; |
1589 | 258 | |
1590 | 258 | DEBUG258 (dbgs() << " *** SHRINKING TO BOOL: " << *GV << "\n"); |
1591 | 258 | |
1592 | 258 | // Create the new global, initializing it to false. |
1593 | 258 | GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()), |
1594 | 258 | false, |
1595 | 258 | GlobalValue::InternalLinkage, |
1596 | 258 | ConstantInt::getFalse(GV->getContext()), |
1597 | 258 | GV->getName()+".b", |
1598 | 258 | GV->getThreadLocalMode(), |
1599 | 258 | GV->getType()->getAddressSpace()); |
1600 | 258 | NewGV->copyAttributesFrom(GV); |
1601 | 258 | GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV); |
1602 | 258 | |
1603 | 258 | Constant *InitVal = GV->getInitializer(); |
1604 | 258 | assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) && |
1605 | 258 | "No reason to shrink to bool!"); |
1606 | 258 | |
1607 | 258 | SmallVector<DIGlobalVariableExpression *, 1> GVs; |
1608 | 258 | GV->getDebugInfo(GVs); |
1609 | 258 | |
1610 | 258 | // If initialized to zero and storing one into the global, we can use a cast |
1611 | 258 | // instead of a select to synthesize the desired value. |
1612 | 258 | bool IsOneZero = false; |
1613 | 258 | bool EmitOneOrZero = true; |
1614 | 258 | if (ConstantInt *CI258 = dyn_cast<ConstantInt>(OtherVal)){ |
1615 | 218 | IsOneZero = InitVal->isNullValue() && CI->isOne(); |
1616 | 258 | |
1617 | 258 | if (ConstantInt *CIInit258 = dyn_cast<ConstantInt>(GV->getInitializer())){ |
1618 | 258 | uint64_t ValInit = CIInit->getZExtValue(); |
1619 | 258 | uint64_t ValOther = CI->getZExtValue(); |
1620 | 258 | uint64_t ValMinus = ValOther - ValInit; |
1621 | 258 | |
1622 | 1 | for(auto *GVe : GVs){ |
1623 | 1 | DIGlobalVariable *DGV = GVe->getVariable(); |
1624 | 1 | DIExpression *E = GVe->getExpression(); |
1625 | 1 | |
1626 | 1 | // It is expected that the address of global optimized variable is on |
1627 | 1 | // top of the stack. After optimization, value of that variable will |
1628 | 1 | // be ether 0 for initial value or 1 for other value. The following |
1629 | 1 | // expression should return constant integer value depending on the |
1630 | 1 | // value at global object address: |
1631 | 1 | // val * (ValOther - ValInit) + ValInit: |
1632 | 1 | // DW_OP_deref DW_OP_constu <ValMinus> |
1633 | 1 | // DW_OP_mul DW_OP_constu <ValInit> DW_OP_plus DW_OP_stack_value |
1634 | 1 | E = DIExpression::get(NewGV->getContext(), |
1635 | 1 | {dwarf::DW_OP_deref, |
1636 | 1 | dwarf::DW_OP_constu, |
1637 | 1 | ValMinus, |
1638 | 1 | dwarf::DW_OP_mul, |
1639 | 1 | dwarf::DW_OP_constu, |
1640 | 1 | ValInit, |
1641 | 1 | dwarf::DW_OP_plus, |
1642 | 1 | dwarf::DW_OP_stack_value}); |
1643 | 1 | DIGlobalVariableExpression *DGVE = |
1644 | 1 | DIGlobalVariableExpression::get(NewGV->getContext(), DGV, E); |
1645 | 1 | NewGV->addDebugInfo(DGVE); |
1646 | 1 | } |
1647 | 258 | EmitOneOrZero = false; |
1648 | 258 | } |
1649 | 258 | } |
1650 | 258 | |
1651 | 258 | if (EmitOneOrZero258 ) { |
1652 | 0 | // FIXME: This will only emit address for debugger on which will |
1653 | 0 | // be written only 0 or 1. |
1654 | 0 | for(auto *GV : GVs) |
1655 | 0 | NewGV->addDebugInfo(GV); |
1656 | 0 | } |
1657 | 258 | |
1658 | 22.2k | while (!GV->use_empty()22.2k ) { |
1659 | 22.0k | Instruction *UI = cast<Instruction>(GV->user_back()); |
1660 | 22.0k | if (StoreInst *SI22.0k = dyn_cast<StoreInst>(UI)) { |
1661 | 21.4k | // Change the store into a boolean store. |
1662 | 21.4k | bool StoringOther = SI->getOperand(0) == OtherVal; |
1663 | 21.4k | // Only do this if we weren't storing a loaded value. |
1664 | 21.4k | Value *StoreVal; |
1665 | 21.4k | if (StoringOther || 21.4k SI->getOperand(0) == InitVal177 ) { |
1666 | 21.4k | StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()), |
1667 | 21.4k | StoringOther); |
1668 | 21.4k | } else { |
1669 | 2 | // Otherwise, we are storing a previously loaded copy. To do this, |
1670 | 2 | // change the copy from copying the original value to just copying the |
1671 | 2 | // bool. |
1672 | 2 | Instruction *StoredVal = cast<Instruction>(SI->getOperand(0)); |
1673 | 2 | |
1674 | 2 | // If we've already replaced the input, StoredVal will be a cast or |
1675 | 2 | // select instruction. If not, it will be a load of the original |
1676 | 2 | // global. |
1677 | 2 | if (LoadInst *LI2 = dyn_cast<LoadInst>(StoredVal)) { |
1678 | 2 | assert(LI->getOperand(0) == GV && "Not a copy!"); |
1679 | 2 | // Insert a new load, to preserve the saved value. |
1680 | 2 | StoreVal = new LoadInst(NewGV, LI->getName()+".b", false, 0, |
1681 | 2 | LI->getOrdering(), LI->getSyncScopeID(), LI); |
1682 | 2 | } else { |
1683 | 0 | assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) && |
1684 | 0 | "This is not a form that we understand!"); |
1685 | 0 | StoreVal = StoredVal->getOperand(0); |
1686 | 0 | assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!"); |
1687 | 0 | } |
1688 | 2 | } |
1689 | 21.4k | new StoreInst(StoreVal, NewGV, false, 0, |
1690 | 21.4k | SI->getOrdering(), SI->getSyncScopeID(), SI); |
1691 | 22.0k | } else { |
1692 | 549 | // Change the load into a load of bool then a select. |
1693 | 549 | LoadInst *LI = cast<LoadInst>(UI); |
1694 | 549 | LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0, |
1695 | 549 | LI->getOrdering(), LI->getSyncScopeID(), LI); |
1696 | 549 | Value *NSI; |
1697 | 549 | if (IsOneZero) |
1698 | 408 | NSI = new ZExtInst(NLI, LI->getType(), "", LI); |
1699 | 549 | else |
1700 | 141 | NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI); |
1701 | 549 | NSI->takeName(LI); |
1702 | 549 | LI->replaceAllUsesWith(NSI); |
1703 | 549 | } |
1704 | 22.0k | UI->eraseFromParent(); |
1705 | 22.0k | } |
1706 | 8.93k | |
1707 | 8.93k | // Retain the name of the old global variable. People who are debugging their |
1708 | 8.93k | // programs may expect these variables to be named the same. |
1709 | 8.93k | NewGV->takeName(GV); |
1710 | 8.93k | GV->eraseFromParent(); |
1711 | 8.93k | return true; |
1712 | 8.93k | } |
1713 | | |
1714 | | static bool deleteIfDead(GlobalValue &GV, |
1715 | 5.63M | SmallSet<const Comdat *, 8> &NotDiscardableComdats) { |
1716 | 5.63M | GV.removeDeadConstantUsers(); |
1717 | 5.63M | |
1718 | 5.63M | if (!GV.isDiscardableIfUnused() && 5.63M !GV.isDeclaration()3.12M ) |
1719 | 1.11M | return false; |
1720 | 4.51M | |
1721 | 4.51M | if (const Comdat *4.51M C4.51M = GV.getComdat()) |
1722 | 3.66k | if (3.66k !GV.hasLocalLinkage() && 3.66k NotDiscardableComdats.count(C)3.62k ) |
1723 | 824 | return false; |
1724 | 4.51M | |
1725 | 4.51M | bool Dead; |
1726 | 4.51M | if (auto *F = dyn_cast<Function>(&GV)) |
1727 | 2.59M | Dead = (F->isDeclaration() && 2.59M F->use_empty()1.97M ) || F->isDefTriviallyDead()1.15M ; |
1728 | 4.51M | else |
1729 | 1.92M | Dead = GV.use_empty(); |
1730 | 4.51M | if (!Dead) |
1731 | 2.82M | return false; |
1732 | 1.68M | |
1733 | 1.68M | DEBUG1.68M (dbgs() << "GLOBAL DEAD: " << GV << "\n"); |
1734 | 1.68M | GV.eraseFromParent(); |
1735 | 1.68M | ++NumDeleted; |
1736 | 1.68M | return true; |
1737 | 1.68M | } |
1738 | | |
1739 | | static bool isPointerValueDeadOnEntryToFunction( |
1740 | | const Function *F, GlobalValue *GV, |
1741 | 13 | function_ref<DominatorTree &(Function &)> LookupDomTree) { |
1742 | 13 | // Find all uses of GV. We expect them all to be in F, and if we can't |
1743 | 13 | // identify any of the uses we bail out. |
1744 | 13 | // |
1745 | 13 | // On each of these uses, identify if the memory that GV points to is |
1746 | 13 | // used/required/live at the start of the function. If it is not, for example |
1747 | 13 | // if the first thing the function does is store to the GV, the GV can |
1748 | 13 | // possibly be demoted. |
1749 | 13 | // |
1750 | 13 | // We don't do an exhaustive search for memory operations - simply look |
1751 | 13 | // through bitcasts as they're quite common and benign. |
1752 | 13 | const DataLayout &DL = GV->getParent()->getDataLayout(); |
1753 | 13 | SmallVector<LoadInst *, 4> Loads; |
1754 | 13 | SmallVector<StoreInst *, 4> Stores; |
1755 | 23 | for (auto *U : GV->users()) { |
1756 | 23 | if (Operator::getOpcode(U) == Instruction::BitCast23 ) { |
1757 | 6 | for (auto *UU : U->users()) { |
1758 | 6 | if (auto *LI = dyn_cast<LoadInst>(UU)) |
1759 | 6 | Loads.push_back(LI); |
1760 | 0 | else if (auto *0 SI0 = dyn_cast<StoreInst>(UU)) |
1761 | 0 | Stores.push_back(SI); |
1762 | 0 | else |
1763 | 0 | return false; |
1764 | 6 | } |
1765 | 6 | continue; |
1766 | 6 | } |
1767 | 17 | |
1768 | 17 | Instruction *I = dyn_cast<Instruction>(U); |
1769 | 17 | if (!I) |
1770 | 0 | return false; |
1771 | 17 | assert(I->getParent()->getParent() == F); |
1772 | 17 | |
1773 | 17 | if (auto *LI = dyn_cast<LoadInst>(I)) |
1774 | 3 | Loads.push_back(LI); |
1775 | 14 | else if (auto *14 SI14 = dyn_cast<StoreInst>(I)) |
1776 | 12 | Stores.push_back(SI); |
1777 | 14 | else |
1778 | 2 | return false; |
1779 | 11 | } |
1780 | 11 | |
1781 | 11 | // We have identified all uses of GV into loads and stores. Now check if all |
1782 | 11 | // of them are known not to depend on the value of the global at the function |
1783 | 11 | // entry point. We do this by ensuring that every load is dominated by at |
1784 | 11 | // least one store. |
1785 | 11 | auto &DT = LookupDomTree(*const_cast<Function *>(F)); |
1786 | 11 | |
1787 | 11 | // The below check is quadratic. Check we're not going to do too many tests. |
1788 | 11 | // FIXME: Even though this will always have worst-case quadratic time, we |
1789 | 11 | // could put effort into minimizing the average time by putting stores that |
1790 | 11 | // have been shown to dominate at least one load at the beginning of the |
1791 | 11 | // Stores array, making subsequent dominance checks more likely to succeed |
1792 | 11 | // early. |
1793 | 11 | // |
1794 | 11 | // The threshold here is fairly large because global->local demotion is a |
1795 | 11 | // very powerful optimization should it fire. |
1796 | 11 | const unsigned Threshold = 100; |
1797 | 11 | if (Loads.size() * Stores.size() > Threshold) |
1798 | 0 | return false; |
1799 | 11 | |
1800 | 11 | for (auto *L : Loads) 11 { |
1801 | 9 | auto *LTy = L->getType(); |
1802 | 9 | if (none_of(Stores, [&](const StoreInst *S) 9 { |
1803 | 8 | auto *STy = S->getValueOperand()->getType(); |
1804 | 8 | // The load is only dominated by the store if DomTree says so |
1805 | 8 | // and the number of bits loaded in L is less than or equal to |
1806 | 8 | // the number of bits stored in S. |
1807 | 8 | return DT.dominates(S, L) && |
1808 | 6 | DL.getTypeStoreSize(LTy) <= DL.getTypeStoreSize(STy); |
1809 | 8 | })) |
1810 | 5 | return false; |
1811 | 6 | } |
1812 | 6 | // All loads have known dependences inside F, so the global can be localized. |
1813 | 6 | return true; |
1814 | 6 | } |
1815 | | |
1816 | | /// C may have non-instruction users. Can all of those users be turned into |
1817 | | /// instructions? |
1818 | 12.9k | static bool allNonInstructionUsersCanBeMadeInstructions(Constant *C) { |
1819 | 12.9k | // We don't do this exhaustively. The most common pattern that we really need |
1820 | 12.9k | // to care about is a constant GEP or constant bitcast - so just looking |
1821 | 12.9k | // through one single ConstantExpr. |
1822 | 12.9k | // |
1823 | 12.9k | // The set of constants that this function returns true for must be able to be |
1824 | 12.9k | // handled by makeAllConstantUsesInstructions. |
1825 | 41.3k | for (auto *U : C->users()) { |
1826 | 41.3k | if (isa<Instruction>(U)) |
1827 | 41.3k | continue; |
1828 | 5 | if (5 !isa<ConstantExpr>(U)5 ) |
1829 | 5 | // Non instruction, non-constantexpr user; cannot convert this. |
1830 | 0 | return false; |
1831 | 5 | for (auto *UU : U->users()) |
1832 | 5 | if (5 !isa<Instruction>(UU)5 ) |
1833 | 5 | // A constantexpr used by another constant. We don't try and recurse any |
1834 | 5 | // further but just bail out at this point. |
1835 | 1 | return false; |
1836 | 12.9k | } |
1837 | 12.9k | |
1838 | 12.9k | return true; |
1839 | 12.9k | } |
1840 | | |
1841 | | /// C may have non-instruction users, and |
1842 | | /// allNonInstructionUsersCanBeMadeInstructions has returned true. Convert the |
1843 | | /// non-instruction users to instructions. |
1844 | 6 | static void makeAllConstantUsesInstructions(Constant *C) { |
1845 | 6 | SmallVector<ConstantExpr*,4> Users; |
1846 | 10 | for (auto *U : C->users()) { |
1847 | 10 | if (isa<ConstantExpr>(U)) |
1848 | 2 | Users.push_back(cast<ConstantExpr>(U)); |
1849 | 10 | else |
1850 | 10 | // We should never get here; allNonInstructionUsersCanBeMadeInstructions |
1851 | 10 | // should not have returned true for C. |
1852 | 10 | assert( |
1853 | 10 | isa<Instruction>(U) && |
1854 | 10 | "Can't transform non-constantexpr non-instruction to instruction!"); |
1855 | 10 | } |
1856 | 6 | |
1857 | 6 | SmallVector<Value*,4> UUsers; |
1858 | 2 | for (auto *U : Users) { |
1859 | 2 | UUsers.clear(); |
1860 | 2 | for (auto *UU : U->users()) |
1861 | 2 | UUsers.push_back(UU); |
1862 | 2 | for (auto *UU : UUsers) { |
1863 | 2 | Instruction *UI = cast<Instruction>(UU); |
1864 | 2 | Instruction *NewU = U->getAsInstruction(); |
1865 | 2 | NewU->insertBefore(UI); |
1866 | 2 | UI->replaceUsesOfWith(U, NewU); |
1867 | 2 | } |
1868 | 2 | // We've replaced all the uses, so destroy the constant. (destroyConstant |
1869 | 2 | // will update value handles and metadata.) |
1870 | 2 | U->destroyConstant(); |
1871 | 2 | } |
1872 | 6 | } |
1873 | | |
1874 | | /// Analyze the specified global variable and optimize |
1875 | | /// it if possible. If we make a change, return true. |
1876 | | static bool processInternalGlobal( |
1877 | | GlobalVariable *GV, const GlobalStatus &GS, TargetLibraryInfo *TLI, |
1878 | 26.1k | function_ref<DominatorTree &(Function &)> LookupDomTree) { |
1879 | 26.1k | auto &DL = GV->getParent()->getDataLayout(); |
1880 | 26.1k | // If this is a first class global and has only one accessing function and |
1881 | 26.1k | // this function is non-recursive, we replace the global with a local alloca |
1882 | 26.1k | // in this function. |
1883 | 26.1k | // |
1884 | 26.1k | // NOTE: It doesn't make sense to promote non-single-value types since we |
1885 | 26.1k | // are just replacing static memory to stack memory. |
1886 | 26.1k | // |
1887 | 26.1k | // If the global is in different address space, don't bring it to stack. |
1888 | 26.1k | if (!GS.HasMultipleAccessingFunctions && |
1889 | 22.1k | GS.AccessingFunction && |
1890 | 22.1k | GV->getValueType()->isSingleValueType() && |
1891 | 13.0k | GV->getType()->getAddressSpace() == 0 && |
1892 | 13.0k | !GV->isExternallyInitialized() && |
1893 | 12.9k | allNonInstructionUsersCanBeMadeInstructions(GV) && |
1894 | 12.9k | GS.AccessingFunction->doesNotRecurse() && |
1895 | 13 | isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV, |
1896 | 26.1k | LookupDomTree)) { |
1897 | 6 | const DataLayout &DL = GV->getParent()->getDataLayout(); |
1898 | 6 | |
1899 | 6 | DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n"); |
1900 | 6 | Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction |
1901 | 6 | ->getEntryBlock().begin()); |
1902 | 6 | Type *ElemTy = GV->getValueType(); |
1903 | 6 | // FIXME: Pass Global's alignment when globals have alignment |
1904 | 6 | AllocaInst *Alloca = new AllocaInst(ElemTy, DL.getAllocaAddrSpace(), nullptr, |
1905 | 6 | GV->getName(), &FirstI); |
1906 | 6 | if (!isa<UndefValue>(GV->getInitializer())) |
1907 | 6 | new StoreInst(GV->getInitializer(), Alloca, &FirstI); |
1908 | 6 | |
1909 | 6 | makeAllConstantUsesInstructions(GV); |
1910 | 6 | |
1911 | 6 | GV->replaceAllUsesWith(Alloca); |
1912 | 6 | GV->eraseFromParent(); |
1913 | 6 | ++NumLocalized; |
1914 | 6 | return true; |
1915 | 6 | } |
1916 | 26.1k | |
1917 | 26.1k | // If the global is never loaded (but may be stored to), it is dead. |
1918 | 26.1k | // Delete it now. |
1919 | 26.1k | if (26.1k !GS.IsLoaded26.1k ) { |
1920 | 106 | DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n"); |
1921 | 106 | |
1922 | 106 | bool Changed; |
1923 | 106 | if (isLeakCheckerRoot(GV)106 ) { |
1924 | 34 | // Delete any constant stores to the global. |
1925 | 34 | Changed = CleanupPointerRootUsers(GV, TLI); |
1926 | 106 | } else { |
1927 | 72 | // Delete any stores we can find to the global. We may not be able to |
1928 | 72 | // make it completely dead though. |
1929 | 72 | Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI); |
1930 | 72 | } |
1931 | 106 | |
1932 | 106 | // If the global is dead now, delete it. |
1933 | 106 | if (GV->use_empty()106 ) { |
1934 | 73 | GV->eraseFromParent(); |
1935 | 73 | ++NumDeleted; |
1936 | 73 | Changed = true; |
1937 | 73 | } |
1938 | 106 | return Changed; |
1939 | 106 | |
1940 | 106 | } |
1941 | 26.0k | if (26.0k GS.StoredType <= GlobalStatus::InitializerStored26.0k ) { |
1942 | 487 | DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n"); |
1943 | 487 | GV->setConstant(true); |
1944 | 487 | |
1945 | 487 | // Clean up any obviously simplifiable users now. |
1946 | 487 | CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI); |
1947 | 487 | |
1948 | 487 | // If the global is dead now, just nuke it. |
1949 | 487 | if (GV->use_empty()487 ) { |
1950 | 35 | DEBUG(dbgs() << " *** Marking constant allowed us to simplify " |
1951 | 35 | << "all users and delete global!\n"); |
1952 | 35 | GV->eraseFromParent(); |
1953 | 35 | ++NumDeleted; |
1954 | 35 | return true; |
1955 | 35 | } |
1956 | 452 | |
1957 | 452 | // Fall through to the next check; see if we can optimize further. |
1958 | 452 | ++NumMarked; |
1959 | 452 | } |
1960 | 25.9k | if (25.9k !GV->getInitializer()->getType()->isSingleValueType()25.9k ) { |
1961 | 9.66k | const DataLayout &DL = GV->getParent()->getDataLayout(); |
1962 | 9.66k | if (SRAGlobal(GV, DL)) |
1963 | 64 | return true; |
1964 | 25.9k | } |
1965 | 25.9k | if (25.9k GS.StoredType == GlobalStatus::StoredOnce && 25.9k GS.StoredOnceValue14.4k ) { |
1966 | 14.4k | // If the initial value for the global was an undef value, and if only |
1967 | 14.4k | // one other value was stored into it, we can just change the |
1968 | 14.4k | // initializer to be the stored value, then delete all stores to the |
1969 | 14.4k | // global. This allows us to mark it constant. |
1970 | 14.4k | if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) |
1971 | 8.94k | if (8.94k isa<UndefValue>(GV->getInitializer())8.94k ) { |
1972 | 1 | // Change the initial value here. |
1973 | 1 | GV->setInitializer(SOVConstant); |
1974 | 1 | |
1975 | 1 | // Clean up any obviously simplifiable users now. |
1976 | 1 | CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, TLI); |
1977 | 1 | |
1978 | 1 | if (GV->use_empty()1 ) { |
1979 | 1 | DEBUG(dbgs() << " *** Substituting initializer allowed us to " |
1980 | 1 | << "simplify all users and delete global!\n"); |
1981 | 1 | GV->eraseFromParent(); |
1982 | 1 | ++NumDeleted; |
1983 | 1 | } |
1984 | 8.94k | ++NumSubstitute; |
1985 | 8.94k | return true; |
1986 | 8.94k | } |
1987 | 14.4k | |
1988 | 14.4k | // Try to optimize globals based on the knowledge that only one value |
1989 | 14.4k | // (besides its initializer) is ever stored to the global. |
1990 | 14.4k | if (14.4k optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL, TLI)14.4k ) |
1991 | 18 | return true; |
1992 | 14.4k | |
1993 | 14.4k | // Otherwise, if the global was not a boolean, we can shrink it to be a |
1994 | 14.4k | // boolean. |
1995 | 14.4k | if (Constant *14.4k SOVConstant14.4k = dyn_cast<Constant>(GS.StoredOnceValue)) { |
1996 | 8.93k | if (GS.Ordering == AtomicOrdering::NotAtomic8.93k ) { |
1997 | 8.93k | if (TryToShrinkGlobalToBoolean(GV, SOVConstant)8.93k ) { |
1998 | 258 | ++NumShrunkToBool; |
1999 | 258 | return true; |
2000 | 258 | } |
2001 | 25.6k | } |
2002 | 8.93k | } |
2003 | 14.4k | } |
2004 | 25.6k | |
2005 | 25.6k | return false; |
2006 | 25.6k | } |
2007 | | |
2008 | | /// Analyze the specified global variable and optimize it if possible. If we |
2009 | | /// make a change, return true. |
2010 | | static bool |
2011 | | processGlobal(GlobalValue &GV, TargetLibraryInfo *TLI, |
2012 | 3.94M | function_ref<DominatorTree &(Function &)> LookupDomTree) { |
2013 | 3.94M | if (GV.getName().startswith("llvm.")) |
2014 | 95.4k | return false; |
2015 | 3.85M | |
2016 | 3.85M | GlobalStatus GS; |
2017 | 3.85M | |
2018 | 3.85M | if (GlobalStatus::analyzeGlobal(&GV, GS)) |
2019 | 1.85M | return false; |
2020 | 1.99M | |
2021 | 1.99M | bool Changed = false; |
2022 | 1.99M | if (!GS.IsCompared && 1.99M !GV.hasGlobalUnnamedAddr()1.99M ) { |
2023 | 31.0k | auto NewUnnamedAddr = GV.hasLocalLinkage() ? GlobalValue::UnnamedAddr::Global |
2024 | 1.86M | : GlobalValue::UnnamedAddr::Local; |
2025 | 1.89M | if (NewUnnamedAddr != GV.getUnnamedAddr()1.89M ) { |
2026 | 244k | GV.setUnnamedAddr(NewUnnamedAddr); |
2027 | 244k | NumUnnamed++; |
2028 | 244k | Changed = true; |
2029 | 244k | } |
2030 | 1.89M | } |
2031 | 1.99M | |
2032 | 1.99M | // Do more involved optimizations if the global is internal. |
2033 | 1.99M | if (!GV.hasLocalLinkage()) |
2034 | 1.90M | return Changed; |
2035 | 96.7k | |
2036 | 96.7k | auto *GVar = dyn_cast<GlobalVariable>(&GV); |
2037 | 96.7k | if (!GVar) |
2038 | 51.4k | return Changed; |
2039 | 45.3k | |
2040 | 45.3k | if (45.3k GVar->isConstant() || 45.3k !GVar->hasInitializer()26.1k ) |
2041 | 19.2k | return Changed; |
2042 | 26.1k | |
2043 | 26.1k | return processInternalGlobal(GVar, GS, TLI, LookupDomTree) || 26.1k Changed25.6k ; |
2044 | 3.94M | } |
2045 | | |
2046 | | /// Walk all of the direct calls of the specified function, changing them to |
2047 | | /// FastCC. |
2048 | 24.9k | static void ChangeCalleesToFastCall(Function *F) { |
2049 | 127k | for (User *U : F->users()) { |
2050 | 127k | if (isa<BlockAddress>(U)) |
2051 | 10 | continue; |
2052 | 127k | CallSite CS(cast<Instruction>(U)); |
2053 | 127k | CS.setCallingConv(CallingConv::Fast); |
2054 | 127k | } |
2055 | 24.9k | } |
2056 | | |
2057 | 0 | static AttributeList StripNest(LLVMContext &C, AttributeList Attrs) { |
2058 | 0 | // There can be at most one attribute set with a nest attribute. |
2059 | 0 | unsigned NestIndex; |
2060 | 0 | if (Attrs.hasAttrSomewhere(Attribute::Nest, &NestIndex)) |
2061 | 0 | return Attrs.removeAttribute(C, NestIndex, Attribute::Nest); |
2062 | 0 | return Attrs; |
2063 | 0 | } |
2064 | | |
2065 | 0 | static void RemoveNestAttribute(Function *F) { |
2066 | 0 | F->setAttributes(StripNest(F->getContext(), F->getAttributes())); |
2067 | 0 | for (User *U : F->users()) { |
2068 | 0 | if (isa<BlockAddress>(U)) |
2069 | 0 | continue; |
2070 | 0 | CallSite CS(cast<Instruction>(U)); |
2071 | 0 | CS.setAttributes(StripNest(F->getContext(), CS.getAttributes())); |
2072 | 0 | } |
2073 | 0 | } |
2074 | | |
2075 | | /// Return true if this is a calling convention that we'd like to change. The |
2076 | | /// idea here is that we don't want to mess with the convention if the user |
2077 | | /// explicitly requested something with performance implications like coldcc, |
2078 | | /// GHC, or anyregcc. |
2079 | 57.5k | static bool isProfitableToMakeFastCC(Function *F) { |
2080 | 57.5k | CallingConv::ID CC = F->getCallingConv(); |
2081 | 57.5k | // FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc? |
2082 | 26.4k | return CC == CallingConv::C || CC == CallingConv::X86_ThisCall; |
2083 | 57.5k | } |
2084 | | |
2085 | | static bool |
2086 | | OptimizeFunctions(Module &M, TargetLibraryInfo *TLI, |
2087 | | function_ref<DominatorTree &(Function &)> LookupDomTree, |
2088 | 38.0k | SmallSet<const Comdat *, 8> &NotDiscardableComdats) { |
2089 | 38.0k | bool Changed = false; |
2090 | 38.0k | // Optimize functions. |
2091 | 3.67M | for (Module::iterator FI = M.begin(), E = M.end(); FI != E3.67M ; ) { |
2092 | 3.64M | Function *F = &*FI++; |
2093 | 3.64M | // Functions without names cannot be referenced outside this module. |
2094 | 3.64M | if (!F->hasName() && 3.64M !F->isDeclaration()2 && !F->hasLocalLinkage()2 ) |
2095 | 1 | F->setLinkage(GlobalValue::InternalLinkage); |
2096 | 3.64M | |
2097 | 3.64M | if (deleteIfDead(*F, NotDiscardableComdats)3.64M ) { |
2098 | 1.68M | Changed = true; |
2099 | 1.68M | continue; |
2100 | 1.68M | } |
2101 | 1.95M | |
2102 | 1.95M | // LLVM's definition of dominance allows instructions that are cyclic |
2103 | 1.95M | // in unreachable blocks, e.g.: |
2104 | 1.95M | // %pat = select i1 %condition, @global, i16* %pat |
2105 | 1.95M | // because any instruction dominates an instruction in a block that's |
2106 | 1.95M | // not reachable from entry. |
2107 | 1.95M | // So, remove unreachable blocks from the function, because a) there's |
2108 | 1.95M | // no point in analyzing them and b) GlobalOpt should otherwise grow |
2109 | 1.95M | // some more complicated logic to break these cycles. |
2110 | 1.95M | // Removing unreachable blocks might invalidate the dominator so we |
2111 | 1.95M | // recalculate it. |
2112 | 1.95M | if (1.95M !F->isDeclaration()1.95M ) { |
2113 | 1.41M | if (removeUnreachableBlocks(*F)1.41M ) { |
2114 | 7.07k | auto &DT = LookupDomTree(*F); |
2115 | 7.07k | DT.recalculate(*F); |
2116 | 7.07k | Changed = true; |
2117 | 7.07k | } |
2118 | 1.41M | } |
2119 | 1.95M | |
2120 | 1.95M | Changed |= processGlobal(*F, TLI, LookupDomTree); |
2121 | 1.95M | |
2122 | 1.95M | if (!F->hasLocalLinkage()) |
2123 | 1.89M | continue; |
2124 | 57.5k | if (57.5k isProfitableToMakeFastCC(F) && 57.5k !F->isVarArg()31.0k && |
2125 | 57.5k | !F->hasAddressTaken()31.0k ) { |
2126 | 24.9k | // If this function has a calling convention worth changing, is not a |
2127 | 24.9k | // varargs function, and is only called directly, promote it to use the |
2128 | 24.9k | // Fast calling convention. |
2129 | 24.9k | F->setCallingConv(CallingConv::Fast); |
2130 | 24.9k | ChangeCalleesToFastCall(F); |
2131 | 24.9k | ++NumFastCallFns; |
2132 | 24.9k | Changed = true; |
2133 | 24.9k | } |
2134 | 57.5k | |
2135 | 57.5k | if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) && |
2136 | 57.5k | !F->hasAddressTaken()0 ) { |
2137 | 0 | // The function is not used by a trampoline intrinsic, so it is safe |
2138 | 0 | // to remove the 'nest' attribute. |
2139 | 0 | RemoveNestAttribute(F); |
2140 | 0 | ++NumNestRemoved; |
2141 | 0 | Changed = true; |
2142 | 0 | } |
2143 | 3.64M | } |
2144 | 38.0k | return Changed; |
2145 | 38.0k | } |
2146 | | |
2147 | | static bool |
2148 | | OptimizeGlobalVars(Module &M, TargetLibraryInfo *TLI, |
2149 | | function_ref<DominatorTree &(Function &)> LookupDomTree, |
2150 | 38.0k | SmallSet<const Comdat *, 8> &NotDiscardableComdats) { |
2151 | 38.0k | bool Changed = false; |
2152 | 38.0k | |
2153 | 38.0k | for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); |
2154 | 2.03M | GVI != E2.03M ; ) { |
2155 | 1.99M | GlobalVariable *GV = &*GVI++; |
2156 | 1.99M | // Global variables without names cannot be referenced outside this module. |
2157 | 1.99M | if (!GV->hasName() && 1.99M !GV->isDeclaration()11.1k && !GV->hasLocalLinkage()11.1k ) |
2158 | 1 | GV->setLinkage(GlobalValue::InternalLinkage); |
2159 | 1.99M | // Simplify the initializer. |
2160 | 1.99M | if (GV->hasInitializer()) |
2161 | 1.96M | if (auto *1.96M C1.96M = dyn_cast<Constant>(GV->getInitializer())) { |
2162 | 1.96M | auto &DL = M.getDataLayout(); |
2163 | 1.96M | Constant *New = ConstantFoldConstant(C, DL, TLI); |
2164 | 1.96M | if (New && 1.96M New != C2.94k ) |
2165 | 1.24k | GV->setInitializer(New); |
2166 | 1.96M | } |
2167 | 1.99M | |
2168 | 1.99M | if (deleteIfDead(*GV, NotDiscardableComdats)1.99M ) { |
2169 | 857 | Changed = true; |
2170 | 857 | continue; |
2171 | 857 | } |
2172 | 1.99M | |
2173 | 1.99M | Changed |= processGlobal(*GV, TLI, LookupDomTree); |
2174 | 1.99M | } |
2175 | 38.0k | return Changed; |
2176 | 38.0k | } |
2177 | | |
2178 | | /// Evaluate a piece of a constantexpr store into a global initializer. This |
2179 | | /// returns 'Init' modified to reflect 'Val' stored into it. At this point, the |
2180 | | /// GEP operands of Addr [0, OpNo) have been stepped into. |
2181 | | static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, |
2182 | 1.18k | ConstantExpr *Addr, unsigned OpNo) { |
2183 | 1.18k | // Base case of the recursion. |
2184 | 1.18k | if (OpNo == Addr->getNumOperands()1.18k ) { |
2185 | 406 | assert(Val->getType() == Init->getType() && "Type mismatch!"); |
2186 | 406 | return Val; |
2187 | 406 | } |
2188 | 777 | |
2189 | 777 | SmallVector<Constant*, 32> Elts; |
2190 | 777 | if (StructType *STy777 = dyn_cast<StructType>(Init->getType())) { |
2191 | 573 | // Break up the constant into its elements. |
2192 | 2.85k | for (unsigned i = 0, e = STy->getNumElements(); i != e2.85k ; ++i2.27k ) |
2193 | 2.27k | Elts.push_back(Init->getAggregateElement(i)); |
2194 | 573 | |
2195 | 573 | // Replace the element that we are supposed to. |
2196 | 573 | ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo)); |
2197 | 573 | unsigned Idx = CU->getZExtValue(); |
2198 | 573 | assert(Idx < STy->getNumElements() && "Struct index out of range!"); |
2199 | 573 | Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); |
2200 | 573 | |
2201 | 573 | // Return the modified struct. |
2202 | 573 | return ConstantStruct::get(STy, Elts); |
2203 | 573 | } |
2204 | 204 | |
2205 | 204 | ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo)); |
2206 | 204 | SequentialType *InitTy = cast<SequentialType>(Init->getType()); |
2207 | 204 | uint64_t NumElts = InitTy->getNumElements(); |
2208 | 204 | |
2209 | 204 | // Break up the array into elements. |
2210 | 2.00k | for (uint64_t i = 0, e = NumElts; i != e2.00k ; ++i1.80k ) |
2211 | 1.80k | Elts.push_back(Init->getAggregateElement(i)); |
2212 | 204 | |
2213 | 204 | assert(CI->getZExtValue() < NumElts); |
2214 | 204 | Elts[CI->getZExtValue()] = |
2215 | 204 | EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); |
2216 | 204 | |
2217 | 204 | if (Init->getType()->isArrayTy()) |
2218 | 203 | return ConstantArray::get(cast<ArrayType>(InitTy), Elts); |
2219 | 1 | return ConstantVector::get(Elts); |
2220 | 1 | } |
2221 | | |
2222 | | /// We have decided that Addr (which satisfies the predicate |
2223 | | /// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen. |
2224 | 451 | static void CommitValueTo(Constant *Val, Constant *Addr) { |
2225 | 451 | if (GlobalVariable *GV451 = dyn_cast<GlobalVariable>(Addr)) { |
2226 | 45 | assert(GV->hasInitializer()); |
2227 | 45 | GV->setInitializer(Val); |
2228 | 45 | return; |
2229 | 45 | } |
2230 | 406 | |
2231 | 406 | ConstantExpr *CE = cast<ConstantExpr>(Addr); |
2232 | 406 | GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); |
2233 | 406 | GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2)); |
2234 | 406 | } |
2235 | | |
2236 | | /// Evaluate static constructors in the function, if we can. Return true if we |
2237 | | /// can, false otherwise. |
2238 | | static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL, |
2239 | 307 | TargetLibraryInfo *TLI) { |
2240 | 307 | // Call the function. |
2241 | 307 | Evaluator Eval(DL, TLI); |
2242 | 307 | Constant *RetValDummy; |
2243 | 307 | bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy, |
2244 | 307 | SmallVector<Constant*, 0>()); |
2245 | 307 | |
2246 | 307 | if (EvalSuccess307 ) { |
2247 | 50 | ++NumCtorsEvaluated; |
2248 | 50 | |
2249 | 50 | // We succeeded at evaluation: commit the result. |
2250 | 50 | DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" |
2251 | 50 | << F->getName() << "' to " << Eval.getMutatedMemory().size() |
2252 | 50 | << " stores.\n"); |
2253 | 50 | for (const auto &I : Eval.getMutatedMemory()) |
2254 | 451 | CommitValueTo(I.second, I.first); |
2255 | 50 | for (GlobalVariable *GV : Eval.getInvariants()) |
2256 | 5 | GV->setConstant(true); |
2257 | 50 | } |
2258 | 307 | |
2259 | 307 | return EvalSuccess; |
2260 | 307 | } |
2261 | | |
2262 | 16.0k | static int compareNames(Constant *const *A, Constant *const *B) { |
2263 | 16.0k | Value *AStripped = (*A)->stripPointerCastsNoFollowAliases(); |
2264 | 16.0k | Value *BStripped = (*B)->stripPointerCastsNoFollowAliases(); |
2265 | 16.0k | return AStripped->getName().compare(BStripped->getName()); |
2266 | 16.0k | } |
2267 | | |
2268 | | static void setUsedInitializer(GlobalVariable &V, |
2269 | 769 | const SmallPtrSet<GlobalValue *, 8> &Init) { |
2270 | 769 | if (Init.empty()769 ) { |
2271 | 1 | V.eraseFromParent(); |
2272 | 1 | return; |
2273 | 1 | } |
2274 | 768 | |
2275 | 768 | // Type of pointer to the array of pointers. |
2276 | 768 | PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0); |
2277 | 768 | |
2278 | 768 | SmallVector<llvm::Constant *, 8> UsedArray; |
2279 | 3.83k | for (GlobalValue *GV : Init) { |
2280 | 3.83k | Constant *Cast |
2281 | 3.83k | = ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Int8PtrTy); |
2282 | 3.83k | UsedArray.push_back(Cast); |
2283 | 3.83k | } |
2284 | 769 | // Sort to get deterministic order. |
2285 | 769 | array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames); |
2286 | 769 | ArrayType *ATy = ArrayType::get(Int8PtrTy, UsedArray.size()); |
2287 | 769 | |
2288 | 769 | Module *M = V.getParent(); |
2289 | 769 | V.removeFromParent(); |
2290 | 769 | GlobalVariable *NV = |
2291 | 769 | new GlobalVariable(*M, ATy, false, llvm::GlobalValue::AppendingLinkage, |
2292 | 769 | llvm::ConstantArray::get(ATy, UsedArray), ""); |
2293 | 769 | NV->takeName(&V); |
2294 | 769 | NV->setSection("llvm.metadata"); |
2295 | 769 | delete &V; |
2296 | 769 | } |
2297 | | |
2298 | | namespace { |
2299 | | /// An easy to access representation of llvm.used and llvm.compiler.used. |
2300 | | class LLVMUsed { |
2301 | | SmallPtrSet<GlobalValue *, 8> Used; |
2302 | | SmallPtrSet<GlobalValue *, 8> CompilerUsed; |
2303 | | GlobalVariable *UsedV; |
2304 | | GlobalVariable *CompilerUsedV; |
2305 | | |
2306 | | public: |
2307 | 38.0k | LLVMUsed(Module &M) { |
2308 | 38.0k | UsedV = collectUsedGlobalVariables(M, Used, false); |
2309 | 38.0k | CompilerUsedV = collectUsedGlobalVariables(M, CompilerUsed, true); |
2310 | 38.0k | } |
2311 | | typedef SmallPtrSet<GlobalValue *, 8>::iterator iterator; |
2312 | | typedef iterator_range<iterator> used_iterator_range; |
2313 | 38.0k | iterator usedBegin() { return Used.begin(); } |
2314 | 38.0k | iterator usedEnd() { return Used.end(); } |
2315 | 38.0k | used_iterator_range used() { |
2316 | 38.0k | return used_iterator_range(usedBegin(), usedEnd()); |
2317 | 38.0k | } |
2318 | 0 | iterator compilerUsedBegin() { return CompilerUsed.begin(); } |
2319 | 0 | iterator compilerUsedEnd() { return CompilerUsed.end(); } |
2320 | 0 | used_iterator_range compilerUsed() { |
2321 | 0 | return used_iterator_range(compilerUsedBegin(), compilerUsedEnd()); |
2322 | 0 | } |
2323 | 925 | bool usedCount(GlobalValue *GV) const { return Used.count(GV); } |
2324 | 47 | bool compilerUsedCount(GlobalValue *GV) const { |
2325 | 47 | return CompilerUsed.count(GV); |
2326 | 47 | } |
2327 | 294 | bool usedErase(GlobalValue *GV) { return Used.erase(GV); } |
2328 | 1.21k | bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); } |
2329 | 289 | bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; } |
2330 | 2 | bool compilerUsedInsert(GlobalValue *GV) { |
2331 | 2 | return CompilerUsed.insert(GV).second; |
2332 | 2 | } |
2333 | | |
2334 | 38.0k | void syncVariablesAndSets() { |
2335 | 38.0k | if (UsedV) |
2336 | 616 | setUsedInitializer(*UsedV, Used); |
2337 | 38.0k | if (CompilerUsedV) |
2338 | 153 | setUsedInitializer(*CompilerUsedV, CompilerUsed); |
2339 | 38.0k | } |
2340 | | }; |
2341 | | } |
2342 | | |
2343 | 355 | static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) { |
2344 | 355 | if (GA.use_empty()) // No use at all. |
2345 | 46 | return false; |
2346 | 309 | |
2347 | 355 | assert((!U.usedCount(&GA) || !U.compilerUsedCount(&GA)) && |
2348 | 309 | "We should have removed the duplicated " |
2349 | 309 | "element from llvm.compiler.used"); |
2350 | 309 | if (!GA.hasOneUse()) |
2351 | 309 | // Strictly more than one use. So at least one is not in llvm.used and |
2352 | 309 | // llvm.compiler.used. |
2353 | 5 | return true; |
2354 | 304 | |
2355 | 304 | // Exactly one use. Check if it is in llvm.used or llvm.compiler.used. |
2356 | 304 | return !U.usedCount(&GA) && 304 !U.compilerUsedCount(&GA)12 ; |
2357 | 355 | } |
2358 | | |
2359 | | static bool hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue &V, |
2360 | 313 | const LLVMUsed &U) { |
2361 | 313 | unsigned N = 2; |
2362 | 313 | assert((!U.usedCount(&V) || !U.compilerUsedCount(&V)) && |
2363 | 313 | "We should have removed the duplicated " |
2364 | 313 | "element from llvm.compiler.used"); |
2365 | 313 | if (U.usedCount(&V) || 313 U.compilerUsedCount(&V)25 ) |
2366 | 290 | ++N; |
2367 | 313 | return V.hasNUsesOrMore(N); |
2368 | 313 | } |
2369 | | |
2370 | 367 | static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) { |
2371 | 367 | if (!GA.hasLocalLinkage()) |
2372 | 59 | return true; |
2373 | 308 | |
2374 | 308 | return U.usedCount(&GA) || 308 U.compilerUsedCount(&GA)10 ; |
2375 | 367 | } |
2376 | | |
2377 | | static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U, |
2378 | 355 | bool &RenameTarget) { |
2379 | 355 | RenameTarget = false; |
2380 | 355 | bool Ret = false; |
2381 | 355 | if (hasUseOtherThanLLVMUsed(GA, U)) |
2382 | 13 | Ret = true; |
2383 | 355 | |
2384 | 355 | // If the alias is externally visible, we may still be able to simplify it. |
2385 | 355 | if (!mayHaveOtherReferences(GA, U)) |
2386 | 3 | return Ret; |
2387 | 352 | |
2388 | 352 | // If the aliasee has internal linkage, give it the name and linkage |
2389 | 352 | // of the alias, and delete the alias. This turns: |
2390 | 352 | // define internal ... @f(...) |
2391 | 352 | // @a = alias ... @f |
2392 | 352 | // into: |
2393 | 352 | // define ... @a(...) |
2394 | 352 | Constant *Aliasee = GA.getAliasee(); |
2395 | 352 | GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts()); |
2396 | 352 | if (!Target->hasLocalLinkage()) |
2397 | 39 | return Ret; |
2398 | 313 | |
2399 | 313 | // Do not perform the transform if multiple aliases potentially target the |
2400 | 313 | // aliasee. This check also ensures that it is safe to replace the section |
2401 | 313 | // and other attributes of the aliasee with those of the alias. |
2402 | 313 | if (313 hasMoreThanOneUseOtherThanLLVMUsed(*Target, U)313 ) |
2403 | 19 | return Ret; |
2404 | 294 | |
2405 | 294 | RenameTarget = true; |
2406 | 294 | return true; |
2407 | 294 | } |
2408 | | |
2409 | | static bool |
2410 | | OptimizeGlobalAliases(Module &M, |
2411 | 38.0k | SmallSet<const Comdat *, 8> &NotDiscardableComdats) { |
2412 | 38.0k | bool Changed = false; |
2413 | 38.0k | LLVMUsed Used(M); |
2414 | 38.0k | |
2415 | 38.0k | for (GlobalValue *GV : Used.used()) |
2416 | 924 | Used.compilerUsedErase(GV); |
2417 | 38.0k | |
2418 | 38.0k | for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); |
2419 | 38.4k | I != E38.4k ;) { |
2420 | 400 | GlobalAlias *J = &*I++; |
2421 | 400 | |
2422 | 400 | // Aliases without names cannot be referenced outside this module. |
2423 | 400 | if (!J->hasName() && 400 !J->isDeclaration()0 && !J->hasLocalLinkage()0 ) |
2424 | 0 | J->setLinkage(GlobalValue::InternalLinkage); |
2425 | 400 | |
2426 | 400 | if (deleteIfDead(*J, NotDiscardableComdats)400 ) { |
2427 | 5 | Changed = true; |
2428 | 5 | continue; |
2429 | 5 | } |
2430 | 395 | |
2431 | 395 | // If the aliasee may change at link time, nothing can be done - bail out. |
2432 | 395 | if (395 J->isInterposable()395 ) |
2433 | 6 | continue; |
2434 | 389 | |
2435 | 389 | Constant *Aliasee = J->getAliasee(); |
2436 | 389 | GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts()); |
2437 | 389 | // We can't trivially replace the alias with the aliasee if the aliasee is |
2438 | 389 | // non-trivial in some way. |
2439 | 389 | // TODO: Try to handle non-zero GEPs of local aliasees. |
2440 | 389 | if (!Target) |
2441 | 34 | continue; |
2442 | 355 | Target->removeDeadConstantUsers(); |
2443 | 355 | |
2444 | 355 | // Make all users of the alias use the aliasee instead. |
2445 | 355 | bool RenameTarget; |
2446 | 355 | if (!hasUsesToReplace(*J, Used, RenameTarget)) |
2447 | 49 | continue; |
2448 | 306 | |
2449 | 306 | J->replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J->getType())); |
2450 | 306 | ++NumAliasesResolved; |
2451 | 306 | Changed = true; |
2452 | 306 | |
2453 | 306 | if (RenameTarget306 ) { |
2454 | 294 | // Give the aliasee the name, linkage and other attributes of the alias. |
2455 | 294 | Target->takeName(&*J); |
2456 | 294 | Target->setLinkage(J->getLinkage()); |
2457 | 294 | Target->setVisibility(J->getVisibility()); |
2458 | 294 | Target->setDLLStorageClass(J->getDLLStorageClass()); |
2459 | 294 | |
2460 | 294 | if (Used.usedErase(&*J)) |
2461 | 289 | Used.usedInsert(Target); |
2462 | 294 | |
2463 | 294 | if (Used.compilerUsedErase(&*J)) |
2464 | 2 | Used.compilerUsedInsert(Target); |
2465 | 306 | } else if (12 mayHaveOtherReferences(*J, Used)12 ) |
2466 | 9 | continue; |
2467 | 297 | |
2468 | 297 | // Delete the alias. |
2469 | 297 | M.getAliasList().erase(J); |
2470 | 297 | ++NumAliasesRemoved; |
2471 | 297 | Changed = true; |
2472 | 297 | } |
2473 | 38.0k | |
2474 | 38.0k | Used.syncVariablesAndSets(); |
2475 | 38.0k | |
2476 | 38.0k | return Changed; |
2477 | 38.0k | } |
2478 | | |
2479 | 38.0k | static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) { |
2480 | 38.0k | LibFunc F = LibFunc_cxa_atexit; |
2481 | 38.0k | if (!TLI->has(F)) |
2482 | 5.63k | return nullptr; |
2483 | 32.4k | |
2484 | 32.4k | Function *Fn = M.getFunction(TLI->getName(F)); |
2485 | 32.4k | if (!Fn) |
2486 | 32.3k | return nullptr; |
2487 | 65 | |
2488 | 65 | // Make sure that the function has the correct prototype. |
2489 | 65 | if (65 !TLI->getLibFunc(*Fn, F) || 65 F != LibFunc_cxa_atexit65 ) |
2490 | 0 | return nullptr; |
2491 | 65 | |
2492 | 65 | return Fn; |
2493 | 65 | } |
2494 | | |
2495 | | /// Returns whether the given function is an empty C++ destructor and can |
2496 | | /// therefore be eliminated. |
2497 | | /// Note that we assume that other optimization passes have already simplified |
2498 | | /// the code so we only look for a function with a single basic block, where |
2499 | | /// the only allowed instructions are 'ret', 'call' to an empty C++ dtor and |
2500 | | /// other side-effect free instructions. |
2501 | | static bool cxxDtorIsEmpty(const Function &Fn, |
2502 | 1.64k | SmallPtrSet<const Function *, 8> &CalledFunctions) { |
2503 | 1.64k | // FIXME: We could eliminate C++ destructors if they're readonly/readnone and |
2504 | 1.64k | // nounwind, but that doesn't seem worth doing. |
2505 | 1.64k | if (Fn.isDeclaration()) |
2506 | 24 | return false; |
2507 | 1.61k | |
2508 | 1.61k | if (1.61k ++Fn.begin() != Fn.end()1.61k ) |
2509 | 420 | return false; |
2510 | 1.19k | |
2511 | 1.19k | const BasicBlock &EntryBlock = Fn.getEntryBlock(); |
2512 | 1.19k | for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end(); |
2513 | 3.04k | I != E3.04k ; ++I1.84k ) { |
2514 | 3.04k | if (const CallInst *CI3.04k = dyn_cast<CallInst>(I)) { |
2515 | 1.18k | // Ignore debug intrinsics. |
2516 | 1.18k | if (isa<DbgInfoIntrinsic>(CI)) |
2517 | 0 | continue; |
2518 | 1.18k | |
2519 | 1.18k | const Function *CalledFn = CI->getCalledFunction(); |
2520 | 1.18k | |
2521 | 1.18k | if (!CalledFn) |
2522 | 0 | return false; |
2523 | 1.18k | |
2524 | 1.18k | SmallPtrSet<const Function *, 8> NewCalledFunctions(CalledFunctions); |
2525 | 1.18k | |
2526 | 1.18k | // Don't treat recursive functions as empty. |
2527 | 1.18k | if (!NewCalledFunctions.insert(CalledFn).second) |
2528 | 0 | return false; |
2529 | 1.18k | |
2530 | 1.18k | if (1.18k !cxxDtorIsEmpty(*CalledFn, NewCalledFunctions)1.18k ) |
2531 | 1.17k | return false; |
2532 | 1.86k | } else if (1.86k isa<ReturnInst>(*I)1.86k ) |
2533 | 15 | return true; // We're done. |
2534 | 1.84k | else if (1.84k I->mayHaveSideEffects()1.84k ) |
2535 | 10 | return false; // Destructor with side effects, bail. |
2536 | 3.04k | } |
2537 | 1.19k | |
2538 | 0 | return false; |
2539 | 1.64k | } |
2540 | | |
2541 | 65 | static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { |
2542 | 65 | /// Itanium C++ ABI p3.3.5: |
2543 | 65 | /// |
2544 | 65 | /// After constructing a global (or local static) object, that will require |
2545 | 65 | /// destruction on exit, a termination function is registered as follows: |
2546 | 65 | /// |
2547 | 65 | /// extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d ); |
2548 | 65 | /// |
2549 | 65 | /// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the |
2550 | 65 | /// call f(p) when DSO d is unloaded, before all such termination calls |
2551 | 65 | /// registered before this one. It returns zero if registration is |
2552 | 65 | /// successful, nonzero on failure. |
2553 | 65 | |
2554 | 65 | // This pass will look for calls to __cxa_atexit where the function is trivial |
2555 | 65 | // and remove them. |
2556 | 65 | bool Changed = false; |
2557 | 65 | |
2558 | 65 | for (auto I = CXAAtExitFn->user_begin(), E = CXAAtExitFn->user_end(); |
2559 | 525 | I != E525 ;) { |
2560 | 460 | // We're only interested in calls. Theoretically, we could handle invoke |
2561 | 460 | // instructions as well, but neither llvm-gcc nor clang generate invokes |
2562 | 460 | // to __cxa_atexit. |
2563 | 460 | CallInst *CI = dyn_cast<CallInst>(*I++); |
2564 | 460 | if (!CI) |
2565 | 0 | continue; |
2566 | 460 | |
2567 | 460 | Function *DtorFn = |
2568 | 460 | dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts()); |
2569 | 460 | if (!DtorFn) |
2570 | 0 | continue; |
2571 | 460 | |
2572 | 460 | SmallPtrSet<const Function *, 8> CalledFunctions; |
2573 | 460 | if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions)) |
2574 | 454 | continue; |
2575 | 6 | |
2576 | 6 | // Just remove the call. |
2577 | 6 | CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); |
2578 | 6 | CI->eraseFromParent(); |
2579 | 6 | |
2580 | 6 | ++NumCXXDtorsRemoved; |
2581 | 6 | |
2582 | 6 | Changed |= true; |
2583 | 6 | } |
2584 | 65 | |
2585 | 65 | return Changed; |
2586 | 65 | } |
2587 | | |
2588 | | static bool optimizeGlobalsInModule( |
2589 | | Module &M, const DataLayout &DL, TargetLibraryInfo *TLI, |
2590 | 18.0k | function_ref<DominatorTree &(Function &)> LookupDomTree) { |
2591 | 18.0k | SmallSet<const Comdat *, 8> NotDiscardableComdats; |
2592 | 18.0k | bool Changed = false; |
2593 | 18.0k | bool LocalChange = true; |
2594 | 56.1k | while (LocalChange56.1k ) { |
2595 | 38.0k | LocalChange = false; |
2596 | 38.0k | |
2597 | 38.0k | NotDiscardableComdats.clear(); |
2598 | 38.0k | for (const GlobalVariable &GV : M.globals()) |
2599 | 1.99M | if (const Comdat *1.99M C1.99M = GV.getComdat()) |
2600 | 297 | if (297 !GV.isDiscardableIfUnused() || 297 !GV.use_empty()283 ) |
2601 | 295 | NotDiscardableComdats.insert(C); |
2602 | 38.0k | for (Function &F : M) |
2603 | 3.64M | if (const Comdat *3.64M C3.64M = F.getComdat()) |
2604 | 3.46k | if (3.46k !F.isDefTriviallyDead()3.46k ) |
2605 | 663 | NotDiscardableComdats.insert(C); |
2606 | 38.0k | for (GlobalAlias &GA : M.aliases()) |
2607 | 400 | if (const Comdat *400 C400 = GA.getComdat()) |
2608 | 61 | if (61 !GA.isDiscardableIfUnused() || 61 !GA.use_empty()1 ) |
2609 | 60 | NotDiscardableComdats.insert(C); |
2610 | 38.0k | |
2611 | 38.0k | // Delete functions that are trivially dead, ccc -> fastcc |
2612 | 38.0k | LocalChange |= |
2613 | 38.0k | OptimizeFunctions(M, TLI, LookupDomTree, NotDiscardableComdats); |
2614 | 38.0k | |
2615 | 38.0k | // Optimize global_ctors list. |
2616 | 307 | LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) { |
2617 | 307 | return EvaluateStaticConstructor(F, DL, TLI); |
2618 | 307 | }); |
2619 | 38.0k | |
2620 | 38.0k | // Optimize non-address-taken globals. |
2621 | 38.0k | LocalChange |= OptimizeGlobalVars(M, TLI, LookupDomTree, |
2622 | 38.0k | NotDiscardableComdats); |
2623 | 38.0k | |
2624 | 38.0k | // Resolve aliases, when possible. |
2625 | 38.0k | LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats); |
2626 | 38.0k | |
2627 | 38.0k | // Try to remove trivial global destructors if they are not removed |
2628 | 38.0k | // already. |
2629 | 38.0k | Function *CXAAtExitFn = FindCXAAtExit(M, TLI); |
2630 | 38.0k | if (CXAAtExitFn) |
2631 | 65 | LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn); |
2632 | 38.0k | |
2633 | 38.0k | Changed |= LocalChange; |
2634 | 38.0k | } |
2635 | 18.0k | |
2636 | 18.0k | // TODO: Move all global ctors functions to the end of the module for code |
2637 | 18.0k | // layout. |
2638 | 18.0k | |
2639 | 18.0k | return Changed; |
2640 | 18.0k | } |
2641 | | |
2642 | 121 | PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) { |
2643 | 121 | auto &DL = M.getDataLayout(); |
2644 | 121 | auto &TLI = AM.getResult<TargetLibraryAnalysis>(M); |
2645 | 121 | auto &FAM = |
2646 | 121 | AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); |
2647 | 0 | auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{ |
2648 | 0 | return FAM.getResult<DominatorTreeAnalysis>(F); |
2649 | 0 | }; |
2650 | 121 | if (!optimizeGlobalsInModule(M, DL, &TLI, LookupDomTree)) |
2651 | 83 | return PreservedAnalyses::all(); |
2652 | 38 | return PreservedAnalyses::none(); |
2653 | 38 | } |
2654 | | |
2655 | | namespace { |
2656 | | struct GlobalOptLegacyPass : public ModulePass { |
2657 | | static char ID; // Pass identification, replacement for typeid |
2658 | 17.9k | GlobalOptLegacyPass() : ModulePass(ID) { |
2659 | 17.9k | initializeGlobalOptLegacyPassPass(*PassRegistry::getPassRegistry()); |
2660 | 17.9k | } |
2661 | | |
2662 | 17.9k | bool runOnModule(Module &M) override { |
2663 | 17.9k | if (skipModule(M)) |
2664 | 2 | return false; |
2665 | 17.9k | |
2666 | 17.9k | auto &DL = M.getDataLayout(); |
2667 | 17.9k | auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); |
2668 | 7.08k | auto LookupDomTree = [this](Function &F) -> DominatorTree & { |
2669 | 7.08k | return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); |
2670 | 7.08k | }; |
2671 | 17.9k | return optimizeGlobalsInModule(M, DL, TLI, LookupDomTree); |
2672 | 17.9k | } |
2673 | | |
2674 | 17.9k | void getAnalysisUsage(AnalysisUsage &AU) const override { |
2675 | 17.9k | AU.addRequired<TargetLibraryInfoWrapperPass>(); |
2676 | 17.9k | AU.addRequired<DominatorTreeWrapperPass>(); |
2677 | 17.9k | } |
2678 | | }; |
2679 | | } |
2680 | | |
2681 | | char GlobalOptLegacyPass::ID = 0; |
2682 | 25.1k | INITIALIZE_PASS_BEGIN25.1k (GlobalOptLegacyPass, "globalopt",
|
2683 | 25.1k | "Global Variable Optimizer", false, false) |
2684 | 25.1k | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) |
2685 | 25.1k | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) |
2686 | 25.1k | INITIALIZE_PASS_END(GlobalOptLegacyPass, "globalopt", |
2687 | | "Global Variable Optimizer", false, false) |
2688 | | |
2689 | 17.8k | ModulePass *llvm::createGlobalOptimizerPass() { |
2690 | 17.8k | return new GlobalOptLegacyPass(); |
2691 | 17.8k | } |