/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- ArgumentPromotion.cpp - Promote by-reference arguments ------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This pass promotes "by reference" arguments to be "by value" arguments. In |
11 | | // practice, this means looking for internal functions that have pointer |
12 | | // arguments. If it can prove, through the use of alias analysis, that an |
13 | | // argument is *only* loaded, then it can pass the value into the function |
14 | | // instead of the address of the value. This can cause recursive simplification |
15 | | // of code and lead to the elimination of allocas (especially in C++ template |
16 | | // code like the STL). |
17 | | // |
18 | | // This pass also handles aggregate arguments that are passed into a function, |
19 | | // scalarizing them if the elements of the aggregate are only loaded. Note that |
20 | | // by default it refuses to scalarize aggregates which would require passing in |
21 | | // more than three operands to the function, because passing thousands of |
22 | | // operands for a large array or structure is unprofitable! This limit can be |
23 | | // configured or disabled, however. |
24 | | // |
25 | | // Note that this transformation could also be done for arguments that are only |
26 | | // stored to (returning the value instead), but does not currently. This case |
27 | | // would be best handled when and if LLVM begins supporting multiple return |
28 | | // values from functions. |
29 | | // |
30 | | //===----------------------------------------------------------------------===// |
31 | | |
32 | | #include "llvm/Transforms/IPO/ArgumentPromotion.h" |
33 | | #include "llvm/ADT/DepthFirstIterator.h" |
34 | | #include "llvm/ADT/Optional.h" |
35 | | #include "llvm/ADT/Statistic.h" |
36 | | #include "llvm/ADT/StringExtras.h" |
37 | | #include "llvm/Analysis/AliasAnalysis.h" |
38 | | #include "llvm/Analysis/AssumptionCache.h" |
39 | | #include "llvm/Analysis/BasicAliasAnalysis.h" |
40 | | #include "llvm/Analysis/CallGraph.h" |
41 | | #include "llvm/Analysis/CallGraphSCCPass.h" |
42 | | #include "llvm/Analysis/LazyCallGraph.h" |
43 | | #include "llvm/Analysis/Loads.h" |
44 | | #include "llvm/Analysis/TargetLibraryInfo.h" |
45 | | #include "llvm/IR/CFG.h" |
46 | | #include "llvm/IR/CallSite.h" |
47 | | #include "llvm/IR/Constants.h" |
48 | | #include "llvm/IR/DataLayout.h" |
49 | | #include "llvm/IR/DebugInfo.h" |
50 | | #include "llvm/IR/DerivedTypes.h" |
51 | | #include "llvm/IR/Instructions.h" |
52 | | #include "llvm/IR/LLVMContext.h" |
53 | | #include "llvm/IR/Module.h" |
54 | | #include "llvm/Support/Debug.h" |
55 | | #include "llvm/Support/raw_ostream.h" |
56 | | #include "llvm/Transforms/IPO.h" |
57 | | #include <set> |
58 | | using namespace llvm; |
59 | | |
60 | | #define DEBUG_TYPE "argpromotion" |
61 | | |
62 | | STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted"); |
63 | | STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted"); |
64 | | STATISTIC(NumByValArgsPromoted, "Number of byval arguments promoted"); |
65 | | STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated"); |
66 | | |
67 | | /// A vector used to hold the indices of a single GEP instruction |
68 | | typedef std::vector<uint64_t> IndicesVector; |
69 | | |
70 | | /// DoPromotion - This method actually performs the promotion of the specified |
71 | | /// arguments, and returns the new function. At this point, we know that it's |
72 | | /// safe to do so. |
73 | | static Function * |
74 | | doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, |
75 | | SmallPtrSetImpl<Argument *> &ByValArgsToTransform, |
76 | | Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>> |
77 | 2.72k | ReplaceCallSite) { |
78 | 2.72k | |
79 | 2.72k | // Start by computing a new prototype for the function, which is the same as |
80 | 2.72k | // the old function, but has modified arguments. |
81 | 2.72k | FunctionType *FTy = F->getFunctionType(); |
82 | 2.72k | std::vector<Type *> Params; |
83 | 2.72k | |
84 | 2.72k | typedef std::set<std::pair<Type *, IndicesVector>> ScalarizeTable; |
85 | 2.72k | |
86 | 2.72k | // ScalarizedElements - If we are promoting a pointer that has elements |
87 | 2.72k | // accessed out of it, keep track of which elements are accessed so that we |
88 | 2.72k | // can add one argument for each. |
89 | 2.72k | // |
90 | 2.72k | // Arguments that are directly loaded will have a zero element value here, to |
91 | 2.72k | // handle cases where there are both a direct load and GEP accesses. |
92 | 2.72k | // |
93 | 2.72k | std::map<Argument *, ScalarizeTable> ScalarizedElements; |
94 | 2.72k | |
95 | 2.72k | // OriginalLoads - Keep track of a representative load instruction from the |
96 | 2.72k | // original function so that we can tell the alias analysis implementation |
97 | 2.72k | // what the new GEP/Load instructions we are inserting look like. |
98 | 2.72k | // We need to keep the original loads for each argument and the elements |
99 | 2.72k | // of the argument that are accessed. |
100 | 2.72k | std::map<std::pair<Argument *, IndicesVector>, LoadInst *> OriginalLoads; |
101 | 2.72k | |
102 | 2.72k | // Attribute - Keep track of the parameter attributes for the arguments |
103 | 2.72k | // that we are *not* promoting. For the ones that we do promote, the parameter |
104 | 2.72k | // attributes are lost |
105 | 2.72k | SmallVector<AttributeSet, 8> ArgAttrVec; |
106 | 2.72k | AttributeList PAL = F->getAttributes(); |
107 | 2.72k | |
108 | 2.72k | // First, determine the new argument list |
109 | 2.72k | unsigned ArgNo = 0; |
110 | 6.27k | for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; |
111 | 3.54k | ++I, ++ArgNo3.54k ) { |
112 | 3.54k | if (ByValArgsToTransform.count(&*I)3.54k ) { |
113 | 33 | // Simple byval argument? Just add all the struct element types. |
114 | 33 | Type *AgTy = cast<PointerType>(I->getType())->getElementType(); |
115 | 33 | StructType *STy = cast<StructType>(AgTy); |
116 | 33 | Params.insert(Params.end(), STy->element_begin(), STy->element_end()); |
117 | 33 | ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(), |
118 | 33 | AttributeSet()); |
119 | 33 | ++NumByValArgsPromoted; |
120 | 3.54k | } else if (3.51k !ArgsToPromote.count(&*I)3.51k ) { |
121 | 698 | // Unchanged argument |
122 | 698 | Params.push_back(I->getType()); |
123 | 698 | ArgAttrVec.push_back(PAL.getParamAttributes(ArgNo)); |
124 | 3.51k | } else if (2.81k I->use_empty()2.81k ) { |
125 | 29 | // Dead argument (which are always marked as promotable) |
126 | 29 | ++NumArgumentsDead; |
127 | 29 | |
128 | 29 | // There may be remaining metadata uses of the argument for things like |
129 | 29 | // llvm.dbg.value. Replace them with undef. |
130 | 29 | I->replaceAllUsesWith(UndefValue::get(I->getType())); |
131 | 2.81k | } else { |
132 | 2.78k | // Okay, this is being promoted. This means that the only uses are loads |
133 | 2.78k | // or GEPs which are only used by loads |
134 | 2.78k | |
135 | 2.78k | // In this table, we will track which indices are loaded from the argument |
136 | 2.78k | // (where direct loads are tracked as no indices). |
137 | 2.78k | ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; |
138 | 3.19k | for (User *U : I->users()) { |
139 | 3.19k | Instruction *UI = cast<Instruction>(U); |
140 | 3.19k | Type *SrcTy; |
141 | 3.19k | if (LoadInst *L = dyn_cast<LoadInst>(UI)) |
142 | 144 | SrcTy = L->getType(); |
143 | 3.19k | else |
144 | 3.04k | SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType(); |
145 | 3.19k | IndicesVector Indices; |
146 | 3.19k | Indices.reserve(UI->getNumOperands() - 1); |
147 | 3.19k | // Since loads will only have a single operand, and GEPs only a single |
148 | 3.19k | // non-index operand, this will record direct loads without any indices, |
149 | 3.19k | // and gep+loads with the GEP indices. |
150 | 3.19k | for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end(); |
151 | 9.31k | II != IE9.31k ; ++II6.12k ) |
152 | 6.12k | Indices.push_back(cast<ConstantInt>(*II)->getSExtValue()); |
153 | 3.19k | // GEPs with a single 0 index can be merged with direct loads |
154 | 3.19k | if (Indices.size() == 1 && 3.19k Indices.front() == 09 ) |
155 | 0 | Indices.clear(); |
156 | 3.19k | ArgIndices.insert(std::make_pair(SrcTy, Indices)); |
157 | 3.19k | LoadInst *OrigLoad; |
158 | 3.19k | if (LoadInst *L = dyn_cast<LoadInst>(UI)) |
159 | 144 | OrigLoad = L; |
160 | 3.19k | else |
161 | 3.19k | // Take any load, we will use it only to update Alias Analysis |
162 | 3.04k | OrigLoad = cast<LoadInst>(UI->user_back()); |
163 | 3.19k | OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad; |
164 | 3.19k | } |
165 | 2.78k | |
166 | 2.78k | // Add a parameter to the function for each element passed in. |
167 | 3.12k | for (const auto &ArgIndex : ArgIndices) { |
168 | 3.12k | // not allowed to dereference ->begin() if size() is 0 |
169 | 3.12k | Params.push_back(GetElementPtrInst::getIndexedType( |
170 | 3.12k | cast<PointerType>(I->getType()->getScalarType())->getElementType(), |
171 | 3.12k | ArgIndex.second)); |
172 | 3.12k | ArgAttrVec.push_back(AttributeSet()); |
173 | 3.12k | assert(Params.back()); |
174 | 3.12k | } |
175 | 2.78k | |
176 | 2.78k | if (ArgIndices.size() == 1 && 2.78k ArgIndices.begin()->second.empty()2.52k ) |
177 | 144 | ++NumArgumentsPromoted; |
178 | 2.78k | else |
179 | 2.64k | ++NumAggregatesPromoted; |
180 | 3.51k | } |
181 | 3.54k | } |
182 | 2.72k | |
183 | 2.72k | Type *RetTy = FTy->getReturnType(); |
184 | 2.72k | |
185 | 2.72k | // Construct the new function type using the new arguments. |
186 | 2.72k | FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); |
187 | 2.72k | |
188 | 2.72k | // Create the new function body and insert it into the module. |
189 | 2.72k | Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName()); |
190 | 2.72k | NF->copyAttributesFrom(F); |
191 | 2.72k | |
192 | 2.72k | // Patch the pointer to LLVM function in debug info descriptor. |
193 | 2.72k | NF->setSubprogram(F->getSubprogram()); |
194 | 2.72k | F->setSubprogram(nullptr); |
195 | 2.72k | |
196 | 2.72k | DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" |
197 | 2.72k | << "From: " << *F); |
198 | 2.72k | |
199 | 2.72k | // Recompute the parameter attributes list based on the new arguments for |
200 | 2.72k | // the function. |
201 | 2.72k | NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttributes(), |
202 | 2.72k | PAL.getRetAttributes(), ArgAttrVec)); |
203 | 2.72k | ArgAttrVec.clear(); |
204 | 2.72k | |
205 | 2.72k | F->getParent()->getFunctionList().insert(F->getIterator(), NF); |
206 | 2.72k | NF->takeName(F); |
207 | 2.72k | |
208 | 2.72k | // Loop over all of the callers of the function, transforming the call sites |
209 | 2.72k | // to pass in the loaded pointers. |
210 | 2.72k | // |
211 | 2.72k | SmallVector<Value *, 16> Args; |
212 | 23.6k | while (!F->use_empty()23.6k ) { |
213 | 20.9k | CallSite CS(F->user_back()); |
214 | 20.9k | assert(CS.getCalledFunction() == F); |
215 | 20.9k | Instruction *Call = CS.getInstruction(); |
216 | 20.9k | const AttributeList &CallPAL = CS.getAttributes(); |
217 | 20.9k | |
218 | 20.9k | // Loop over the operands, inserting GEP and loads in the caller as |
219 | 20.9k | // appropriate. |
220 | 20.9k | CallSite::arg_iterator AI = CS.arg_begin(); |
221 | 20.9k | ArgNo = 0; |
222 | 45.1k | for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; |
223 | 24.2k | ++I, ++AI, ++ArgNo) |
224 | 24.2k | if (24.2k !ArgsToPromote.count(&*I) && 24.2k !ByValArgsToTransform.count(&*I)3.12k ) { |
225 | 3.08k | Args.push_back(*AI); // Unmodified argument |
226 | 3.08k | ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo)); |
227 | 24.2k | } else if (21.1k ByValArgsToTransform.count(&*I)21.1k ) { |
228 | 33 | // Emit a GEP and load for each element of the struct. |
229 | 33 | Type *AgTy = cast<PointerType>(I->getType())->getElementType(); |
230 | 33 | StructType *STy = cast<StructType>(AgTy); |
231 | 33 | Value *Idxs[2] = { |
232 | 33 | ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr}; |
233 | 114 | for (unsigned i = 0, e = STy->getNumElements(); i != e114 ; ++i81 ) { |
234 | 81 | Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); |
235 | 81 | Value *Idx = GetElementPtrInst::Create( |
236 | 81 | STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call); |
237 | 81 | // TODO: Tell AA about the new values? |
238 | 81 | Args.push_back(new LoadInst(Idx, Idx->getName() + ".val", Call)); |
239 | 81 | ArgAttrVec.push_back(AttributeSet()); |
240 | 81 | } |
241 | 21.1k | } else if (21.1k !I->use_empty()21.1k ) { |
242 | 21.0k | // Non-dead argument: insert GEPs and loads as appropriate. |
243 | 21.0k | ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; |
244 | 21.0k | // Store the Value* version of the indices in here, but declare it now |
245 | 21.0k | // for reuse. |
246 | 21.0k | std::vector<Value *> Ops; |
247 | 22.8k | for (const auto &ArgIndex : ArgIndices) { |
248 | 22.8k | Value *V = *AI; |
249 | 22.8k | LoadInst *OrigLoad = |
250 | 22.8k | OriginalLoads[std::make_pair(&*I, ArgIndex.second)]; |
251 | 22.8k | if (!ArgIndex.second.empty()22.8k ) { |
252 | 22.6k | Ops.reserve(ArgIndex.second.size()); |
253 | 22.6k | Type *ElTy = V->getType(); |
254 | 45.4k | for (auto II : ArgIndex.second) { |
255 | 45.4k | // Use i32 to index structs, and i64 for others (pointers/arrays). |
256 | 45.4k | // This satisfies GEP constraints. |
257 | 45.4k | Type *IdxTy = |
258 | 22.7k | (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext()) |
259 | 22.6k | : Type::getInt64Ty(F->getContext())); |
260 | 45.4k | Ops.push_back(ConstantInt::get(IdxTy, II)); |
261 | 45.4k | // Keep track of the type we're currently indexing. |
262 | 45.4k | if (auto *ElPTy = dyn_cast<PointerType>(ElTy)) |
263 | 22.6k | ElTy = ElPTy->getElementType(); |
264 | 45.4k | else |
265 | 22.7k | ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II); |
266 | 45.4k | } |
267 | 22.6k | // And create a GEP to extract those indices. |
268 | 22.6k | V = GetElementPtrInst::Create(ArgIndex.first, V, Ops, |
269 | 22.6k | V->getName() + ".idx", Call); |
270 | 22.6k | Ops.clear(); |
271 | 22.6k | } |
272 | 22.8k | // Since we're replacing a load make sure we take the alignment |
273 | 22.8k | // of the previous load. |
274 | 22.8k | LoadInst *newLoad = new LoadInst(V, V->getName() + ".val", Call); |
275 | 22.8k | newLoad->setAlignment(OrigLoad->getAlignment()); |
276 | 22.8k | // Transfer the AA info too. |
277 | 22.8k | AAMDNodes AAInfo; |
278 | 22.8k | OrigLoad->getAAMetadata(AAInfo); |
279 | 22.8k | newLoad->setAAMetadata(AAInfo); |
280 | 22.8k | |
281 | 22.8k | Args.push_back(newLoad); |
282 | 22.8k | ArgAttrVec.push_back(AttributeSet()); |
283 | 22.8k | } |
284 | 24.2k | } |
285 | 20.9k | |
286 | 20.9k | // Push any varargs arguments on the list. |
287 | 20.9k | for (; AI != CS.arg_end()20.9k ; ++AI, ++ArgNo0 ) { |
288 | 0 | Args.push_back(*AI); |
289 | 0 | ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo)); |
290 | 0 | } |
291 | 20.9k | |
292 | 20.9k | SmallVector<OperandBundleDef, 1> OpBundles; |
293 | 20.9k | CS.getOperandBundlesAsDefs(OpBundles); |
294 | 20.9k | |
295 | 20.9k | CallSite NewCS; |
296 | 20.9k | if (InvokeInst *II20.9k = dyn_cast<InvokeInst>(Call)) { |
297 | 14 | NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), |
298 | 14 | Args, OpBundles, "", Call); |
299 | 20.9k | } else { |
300 | 20.9k | auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", Call); |
301 | 20.9k | NewCall->setTailCallKind(cast<CallInst>(Call)->getTailCallKind()); |
302 | 20.9k | NewCS = NewCall; |
303 | 20.9k | } |
304 | 20.9k | NewCS.setCallingConv(CS.getCallingConv()); |
305 | 20.9k | NewCS.setAttributes( |
306 | 20.9k | AttributeList::get(F->getContext(), CallPAL.getFnAttributes(), |
307 | 20.9k | CallPAL.getRetAttributes(), ArgAttrVec)); |
308 | 20.9k | NewCS->setDebugLoc(Call->getDebugLoc()); |
309 | 20.9k | uint64_t W; |
310 | 20.9k | if (Call->extractProfTotalWeight(W)) |
311 | 1 | NewCS->setProfWeight(W); |
312 | 20.9k | Args.clear(); |
313 | 20.9k | ArgAttrVec.clear(); |
314 | 20.9k | |
315 | 20.9k | // Update the callgraph to know that the callsite has been transformed. |
316 | 20.9k | if (ReplaceCallSite) |
317 | 20.9k | (*ReplaceCallSite)(CS, NewCS); |
318 | 20.9k | |
319 | 20.9k | if (!Call->use_empty()20.9k ) { |
320 | 20.4k | Call->replaceAllUsesWith(NewCS.getInstruction()); |
321 | 20.4k | NewCS->takeName(Call); |
322 | 20.4k | } |
323 | 20.9k | |
324 | 20.9k | // Finally, remove the old call from the program, reducing the use-count of |
325 | 20.9k | // F. |
326 | 20.9k | Call->eraseFromParent(); |
327 | 20.9k | } |
328 | 2.72k | |
329 | 2.72k | const DataLayout &DL = F->getParent()->getDataLayout(); |
330 | 2.72k | |
331 | 2.72k | // Since we have now created the new function, splice the body of the old |
332 | 2.72k | // function right into the new function, leaving the old rotting hulk of the |
333 | 2.72k | // function empty. |
334 | 2.72k | NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); |
335 | 2.72k | |
336 | 2.72k | // Loop over the argument list, transferring uses of the old arguments over to |
337 | 2.72k | // the new arguments, also transferring over the names as well. |
338 | 2.72k | // |
339 | 2.72k | for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), |
340 | 2.72k | I2 = NF->arg_begin(); |
341 | 6.27k | I != E6.27k ; ++I3.54k ) { |
342 | 3.54k | if (!ArgsToPromote.count(&*I) && 3.54k !ByValArgsToTransform.count(&*I)731 ) { |
343 | 698 | // If this is an unmodified argument, move the name and users over to the |
344 | 698 | // new version. |
345 | 698 | I->replaceAllUsesWith(&*I2); |
346 | 698 | I2->takeName(&*I); |
347 | 698 | ++I2; |
348 | 698 | continue; |
349 | 698 | } |
350 | 2.84k | |
351 | 2.84k | if (2.84k ByValArgsToTransform.count(&*I)2.84k ) { |
352 | 33 | // In the callee, we create an alloca, and store each of the new incoming |
353 | 33 | // arguments into the alloca. |
354 | 33 | Instruction *InsertPt = &NF->begin()->front(); |
355 | 33 | |
356 | 33 | // Just add all the struct element types. |
357 | 33 | Type *AgTy = cast<PointerType>(I->getType())->getElementType(); |
358 | 33 | Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr, |
359 | 33 | I->getParamAlignment(), "", InsertPt); |
360 | 33 | StructType *STy = cast<StructType>(AgTy); |
361 | 33 | Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), |
362 | 33 | nullptr}; |
363 | 33 | |
364 | 114 | for (unsigned i = 0, e = STy->getNumElements(); i != e114 ; ++i81 ) { |
365 | 81 | Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); |
366 | 81 | Value *Idx = GetElementPtrInst::Create( |
367 | 81 | AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i), |
368 | 81 | InsertPt); |
369 | 81 | I2->setName(I->getName() + "." + Twine(i)); |
370 | 81 | new StoreInst(&*I2++, Idx, InsertPt); |
371 | 81 | } |
372 | 33 | |
373 | 33 | // Anything that used the arg should now use the alloca. |
374 | 33 | I->replaceAllUsesWith(TheAlloca); |
375 | 33 | TheAlloca->takeName(&*I); |
376 | 33 | |
377 | 33 | // If the alloca is used in a call, we must clear the tail flag since |
378 | 33 | // the callee now uses an alloca from the caller. |
379 | 130 | for (User *U : TheAlloca->users()) { |
380 | 130 | CallInst *Call = dyn_cast<CallInst>(U); |
381 | 130 | if (!Call) |
382 | 128 | continue; |
383 | 2 | Call->setTailCall(false); |
384 | 2 | } |
385 | 33 | continue; |
386 | 33 | } |
387 | 2.81k | |
388 | 2.81k | if (2.81k I->use_empty()2.81k ) |
389 | 29 | continue; |
390 | 2.78k | |
391 | 2.78k | // Otherwise, if we promoted this argument, then all users are load |
392 | 2.78k | // instructions (or GEPs with only load users), and all loads should be |
393 | 2.78k | // using the new argument that we added. |
394 | 2.78k | ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; |
395 | 2.78k | |
396 | 5.97k | while (!I->use_empty()5.97k ) { |
397 | 3.19k | if (LoadInst *LI3.19k = dyn_cast<LoadInst>(I->user_back())) { |
398 | 144 | assert(ArgIndices.begin()->second.empty() && |
399 | 144 | "Load element should sort to front!"); |
400 | 144 | I2->setName(I->getName() + ".val"); |
401 | 144 | LI->replaceAllUsesWith(&*I2); |
402 | 144 | LI->eraseFromParent(); |
403 | 144 | DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName() |
404 | 144 | << "' in function '" << F->getName() << "'\n"); |
405 | 3.19k | } else { |
406 | 3.04k | GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back()); |
407 | 3.04k | IndicesVector Operands; |
408 | 3.04k | Operands.reserve(GEP->getNumIndices()); |
409 | 3.04k | for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); |
410 | 9.17k | II != IE9.17k ; ++II6.12k ) |
411 | 6.12k | Operands.push_back(cast<ConstantInt>(*II)->getSExtValue()); |
412 | 3.04k | |
413 | 3.04k | // GEPs with a single 0 index can be merged with direct loads |
414 | 3.04k | if (Operands.size() == 1 && 3.04k Operands.front() == 09 ) |
415 | 0 | Operands.clear(); |
416 | 3.04k | |
417 | 3.04k | Function::arg_iterator TheArg = I2; |
418 | 3.04k | for (ScalarizeTable::iterator It = ArgIndices.begin(); |
419 | 3.47k | It->second != Operands3.47k ; ++It, ++TheArg429 ) { |
420 | 429 | assert(It != ArgIndices.end() && "GEP not handled??"); |
421 | 429 | } |
422 | 3.04k | |
423 | 3.04k | std::string NewName = I->getName(); |
424 | 9.17k | for (unsigned i = 0, e = Operands.size(); i != e9.17k ; ++i6.12k ) { |
425 | 6.12k | NewName += "." + utostr(Operands[i]); |
426 | 6.12k | } |
427 | 3.04k | NewName += ".val"; |
428 | 3.04k | TheArg->setName(NewName); |
429 | 3.04k | |
430 | 3.04k | DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() |
431 | 3.04k | << "' of function '" << NF->getName() << "'\n"); |
432 | 3.04k | |
433 | 3.04k | // All of the uses must be load instructions. Replace them all with |
434 | 3.04k | // the argument specified by ArgNo. |
435 | 6.19k | while (!GEP->use_empty()6.19k ) { |
436 | 3.14k | LoadInst *L = cast<LoadInst>(GEP->user_back()); |
437 | 3.14k | L->replaceAllUsesWith(&*TheArg); |
438 | 3.14k | L->eraseFromParent(); |
439 | 3.14k | } |
440 | 3.04k | GEP->eraseFromParent(); |
441 | 3.04k | } |
442 | 3.19k | } |
443 | 3.54k | |
444 | 3.54k | // Increment I2 past all of the arguments added for this promoted pointer. |
445 | 3.54k | std::advance(I2, ArgIndices.size()); |
446 | 3.54k | } |
447 | 2.72k | |
448 | 2.72k | return NF; |
449 | 2.72k | } |
450 | | |
451 | | /// AllCallersPassInValidPointerForArgument - Return true if we can prove that |
452 | | /// all callees pass in a valid pointer for the specified function argument. |
453 | 24.7k | static bool allCallersPassInValidPointerForArgument(Argument *Arg) { |
454 | 24.7k | Function *Callee = Arg->getParent(); |
455 | 24.7k | const DataLayout &DL = Callee->getParent()->getDataLayout(); |
456 | 24.7k | |
457 | 24.7k | unsigned ArgNo = Arg->getArgNo(); |
458 | 24.7k | |
459 | 24.7k | // Look at all call sites of the function. At this point we know we only have |
460 | 24.7k | // direct callees. |
461 | 31.2k | for (User *U : Callee->users()) { |
462 | 31.2k | CallSite CS(U); |
463 | 31.2k | assert(CS && "Should only have direct calls!"); |
464 | 31.2k | |
465 | 31.2k | if (!isDereferenceablePointer(CS.getArgument(ArgNo), DL)) |
466 | 21.1k | return false; |
467 | 3.62k | } |
468 | 3.62k | return true; |
469 | 3.62k | } |
470 | | |
471 | | /// Returns true if Prefix is a prefix of longer. That means, Longer has a size |
472 | | /// that is greater than or equal to the size of prefix, and each of the |
473 | | /// elements in Prefix is the same as the corresponding elements in Longer. |
474 | | /// |
475 | | /// This means it also returns true when Prefix and Longer are equal! |
476 | 14.2k | static bool isPrefix(const IndicesVector &Prefix, const IndicesVector &Longer) { |
477 | 14.2k | if (Prefix.size() > Longer.size()) |
478 | 62 | return false; |
479 | 14.2k | return std::equal(Prefix.begin(), Prefix.end(), Longer.begin()); |
480 | 14.2k | } |
481 | | |
482 | | /// Checks if Indices, or a prefix of Indices, is in Set. |
483 | | static bool prefixIn(const IndicesVector &Indices, |
484 | 8.76k | std::set<IndicesVector> &Set) { |
485 | 8.76k | std::set<IndicesVector>::iterator Low; |
486 | 8.76k | Low = Set.upper_bound(Indices); |
487 | 8.76k | if (Low != Set.begin()) |
488 | 7.08k | Low--; |
489 | 8.76k | // Low is now the last element smaller than or equal to Indices. This means |
490 | 8.76k | // it points to a prefix of Indices (possibly Indices itself), if such |
491 | 8.76k | // prefix exists. |
492 | 8.76k | // |
493 | 8.76k | // This load is safe if any prefix of its operands is safe to load. |
494 | 7.51k | return Low != Set.end() && isPrefix(*Low, Indices); |
495 | 8.76k | } |
496 | | |
497 | | /// Mark the given indices (ToMark) as safe in the given set of indices |
498 | | /// (Safe). Marking safe usually means adding ToMark to Safe. However, if there |
499 | | /// is already a prefix of Indices in Safe, Indices are implicitely marked safe |
500 | | /// already. Furthermore, any indices that Indices is itself a prefix of, are |
501 | | /// removed from Safe (since they are implicitely safe because of Indices now). |
502 | | static void markIndicesSafe(const IndicesVector &ToMark, |
503 | 11.7k | std::set<IndicesVector> &Safe) { |
504 | 11.7k | std::set<IndicesVector>::iterator Low; |
505 | 11.7k | Low = Safe.upper_bound(ToMark); |
506 | 11.7k | // Guard against the case where Safe is empty |
507 | 11.7k | if (Low != Safe.begin()) |
508 | 4.94k | Low--; |
509 | 11.7k | // Low is now the last element smaller than or equal to Indices. This |
510 | 11.7k | // means it points to a prefix of Indices (possibly Indices itself), if |
511 | 11.7k | // such prefix exists. |
512 | 11.7k | if (Low != Safe.end()11.7k ) { |
513 | 5.50k | if (isPrefix(*Low, ToMark)) |
514 | 5.50k | // If there is already a prefix of these indices (or exactly these |
515 | 5.50k | // indices) marked a safe, don't bother adding these indices |
516 | 2.85k | return; |
517 | 2.65k | |
518 | 2.65k | // Increment Low, so we can use it as a "insert before" hint |
519 | 2.65k | ++Low; |
520 | 2.65k | } |
521 | 11.7k | // Insert |
522 | 8.89k | Low = Safe.insert(Low, ToMark); |
523 | 8.89k | ++Low; |
524 | 8.89k | // If there we're a prefix of longer index list(s), remove those |
525 | 8.89k | std::set<IndicesVector>::iterator End = Safe.end(); |
526 | 8.89k | while (Low != End && 8.89k isPrefix(ToMark, *Low)1.25k ) { |
527 | 0 | std::set<IndicesVector>::iterator Remove = Low; |
528 | 0 | ++Low; |
529 | 0 | Safe.erase(Remove); |
530 | 0 | } |
531 | 11.7k | } |
532 | | |
533 | | /// isSafeToPromoteArgument - As you might guess from the name of this method, |
534 | | /// it checks to see if it is both safe and useful to promote the argument. |
535 | | /// This method limits promotion of aggregates to only promote up to three |
536 | | /// elements of the aggregate in order to avoid exploding the number of |
537 | | /// arguments passed in. |
538 | | static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca, |
539 | 24.8k | AAResults &AAR, unsigned MaxElements) { |
540 | 24.8k | typedef std::set<IndicesVector> GEPIndicesSet; |
541 | 24.8k | |
542 | 24.8k | // Quick exit for unused arguments |
543 | 24.8k | if (Arg->use_empty()) |
544 | 29 | return true; |
545 | 24.8k | |
546 | 24.8k | // We can only promote this argument if all of the uses are loads, or are GEP |
547 | 24.8k | // instructions (with constant indices) that are subsequently loaded. |
548 | 24.8k | // |
549 | 24.8k | // Promoting the argument causes it to be loaded in the caller |
550 | 24.8k | // unconditionally. This is only safe if we can prove that either the load |
551 | 24.8k | // would have happened in the callee anyway (ie, there is a load in the entry |
552 | 24.8k | // block) or the pointer passed in at every call site is guaranteed to be |
553 | 24.8k | // valid. |
554 | 24.8k | // In the former case, invalid loads can happen, but would have happened |
555 | 24.8k | // anyway, in the latter case, invalid loads won't happen. This prevents us |
556 | 24.8k | // from introducing an invalid load that wouldn't have happened in the |
557 | 24.8k | // original code. |
558 | 24.8k | // |
559 | 24.8k | // This set will contain all sets of indices that are loaded in the entry |
560 | 24.8k | // block, and thus are safe to unconditionally load in the caller. |
561 | 24.8k | // |
562 | 24.8k | // This optimization is also safe for InAlloca parameters, because it verifies |
563 | 24.8k | // that the address isn't captured. |
564 | 24.8k | GEPIndicesSet SafeToUnconditionallyLoad; |
565 | 24.8k | |
566 | 24.8k | // This set contains all the sets of indices that we are planning to promote. |
567 | 24.8k | // This makes it possible to limit the number of arguments added. |
568 | 24.8k | GEPIndicesSet ToPromote; |
569 | 24.8k | |
570 | 24.8k | // If the pointer is always valid, any load with first index 0 is valid. |
571 | 24.8k | if (isByValOrInAlloca || 24.8k allCallersPassInValidPointerForArgument(Arg)24.7k ) |
572 | 3.64k | SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); |
573 | 24.8k | |
574 | 24.8k | // First, iterate the entry block and mark loads of (geps of) arguments as |
575 | 24.8k | // safe. |
576 | 24.8k | BasicBlock &EntryBlock = Arg->getParent()->front(); |
577 | 24.8k | // Declare this here so we can reuse it |
578 | 24.8k | IndicesVector Indices; |
579 | 24.8k | for (Instruction &I : EntryBlock) |
580 | 212k | if (LoadInst *212k LI212k = dyn_cast<LoadInst>(&I)) { |
581 | 39.9k | Value *V = LI->getPointerOperand(); |
582 | 39.9k | if (GetElementPtrInst *GEP39.9k = dyn_cast<GetElementPtrInst>(V)) { |
583 | 30.0k | V = GEP->getPointerOperand(); |
584 | 30.0k | if (V == Arg30.0k ) { |
585 | 11.1k | // This load actually loads (part of) Arg? Check the indices then. |
586 | 11.1k | Indices.reserve(GEP->getNumIndices()); |
587 | 11.1k | for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); |
588 | 31.6k | II != IE31.6k ; ++II20.5k ) |
589 | 20.9k | if (ConstantInt *20.9k CI20.9k = dyn_cast<ConstantInt>(*II)) |
590 | 20.5k | Indices.push_back(CI->getSExtValue()); |
591 | 20.9k | else |
592 | 20.9k | // We found a non-constant GEP index for this argument? Bail out |
593 | 20.9k | // right away, can't promote this argument at all. |
594 | 386 | return false; |
595 | 11.1k | |
596 | 11.1k | // Indices checked out, mark them as safe |
597 | 10.7k | markIndicesSafe(Indices, SafeToUnconditionallyLoad); |
598 | 10.7k | Indices.clear(); |
599 | 10.7k | } |
600 | 39.9k | } else if (9.88k V == Arg9.88k ) { |
601 | 966 | // Direct loads are equivalent to a GEP with a single 0 index. |
602 | 966 | markIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad); |
603 | 966 | } |
604 | 212k | } |
605 | 24.8k | |
606 | 24.8k | // Now, iterate all uses of the argument to see if there are any uses that are |
607 | 24.8k | // not (GEP+)loads, or any (GEP+)loads that are not safe to promote. |
608 | 24.4k | SmallVector<LoadInst *, 16> Loads; |
609 | 24.4k | IndicesVector Operands; |
610 | 27.7k | for (Use &U : Arg->uses()) { |
611 | 27.7k | User *UR = U.getUser(); |
612 | 27.7k | Operands.clear(); |
613 | 27.7k | if (LoadInst *LI27.7k = dyn_cast<LoadInst>(UR)) { |
614 | 889 | // Don't hack volatile/atomic loads |
615 | 889 | if (!LI->isSimple()) |
616 | 0 | return false; |
617 | 889 | Loads.push_back(LI); |
618 | 889 | // Direct loads are equivalent to a GEP with a zero index and then a load. |
619 | 889 | Operands.push_back(0); |
620 | 27.7k | } else if (GetElementPtrInst *26.8k GEP26.8k = dyn_cast<GetElementPtrInst>(UR)) { |
621 | 14.2k | if (GEP->use_empty()14.2k ) { |
622 | 10 | // Dead GEP's cause trouble later. Just remove them if we run into |
623 | 10 | // them. |
624 | 10 | GEP->eraseFromParent(); |
625 | 10 | // TODO: This runs the above loop over and over again for dead GEPs |
626 | 10 | // Couldn't we just do increment the UI iterator earlier and erase the |
627 | 10 | // use? |
628 | 10 | return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR, |
629 | 10 | MaxElements); |
630 | 10 | } |
631 | 14.1k | |
632 | 14.1k | // Ensure that all of the indices are constants. |
633 | 38.8k | for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end(); 14.1k i != e38.8k ; |
634 | 24.6k | ++i) |
635 | 27.4k | if (ConstantInt *27.4k C27.4k = dyn_cast<ConstantInt>(*i)) |
636 | 24.6k | Operands.push_back(C->getSExtValue()); |
637 | 27.4k | else |
638 | 2.85k | return false; // Not a constant operand GEP! |
639 | 14.1k | |
640 | 14.1k | // Ensure that the only users of the GEP are load instructions. |
641 | 11.3k | for (User *GEPU : GEP->users()) |
642 | 12.7k | if (LoadInst *12.7k LI12.7k = dyn_cast<LoadInst>(GEPU)) { |
643 | 9.26k | // Don't hack volatile/atomic loads |
644 | 9.26k | if (!LI->isSimple()) |
645 | 0 | return false; |
646 | 9.26k | Loads.push_back(LI); |
647 | 12.7k | } else { |
648 | 3.46k | // Other uses than load? |
649 | 3.46k | return false; |
650 | 3.46k | } |
651 | 12.6k | } else { |
652 | 12.6k | return false; // Not a load or a GEP. |
653 | 12.6k | } |
654 | 8.76k | |
655 | 8.76k | // Now, see if it is safe to promote this load / loads of this GEP. Loading |
656 | 8.76k | // is safe if Operands, or a prefix of Operands, is marked as safe. |
657 | 8.76k | if (8.76k !prefixIn(Operands, SafeToUnconditionallyLoad)8.76k ) |
658 | 2.29k | return false; |
659 | 6.47k | |
660 | 6.47k | // See if we are already promoting a load with these indices. If not, check |
661 | 6.47k | // to make sure that we aren't promoting too many elements. If so, nothing |
662 | 6.47k | // to do. |
663 | 6.47k | if (6.47k ToPromote.find(Operands) == ToPromote.end()6.47k ) { |
664 | 5.86k | if (MaxElements > 0 && 5.86k ToPromote.size() == MaxElements5.86k ) { |
665 | 146 | DEBUG(dbgs() << "argpromotion not promoting argument '" |
666 | 146 | << Arg->getName() |
667 | 146 | << "' because it would require adding more " |
668 | 146 | << "than " << MaxElements |
669 | 146 | << " arguments to the function.\n"); |
670 | 146 | // We limit aggregate promotion to only promoting up to a fixed number |
671 | 146 | // of elements of the aggregate. |
672 | 146 | return false; |
673 | 146 | } |
674 | 5.71k | ToPromote.insert(std::move(Operands)); |
675 | 5.71k | } |
676 | 27.7k | } |
677 | 24.4k | |
678 | 2.98k | if (2.98k Loads.empty()2.98k ) |
679 | 0 | return true; // No users, this is a dead argument. |
680 | 2.98k | |
681 | 2.98k | // Okay, now we know that the argument is only used by load instructions and |
682 | 2.98k | // it is safe to unconditionally perform all of them. Use alias analysis to |
683 | 2.98k | // check to see if the pointer is guaranteed to not be modified from entry of |
684 | 2.98k | // the function to each of the load instructions. |
685 | 2.98k | |
686 | 2.98k | // Because there could be several/many load instructions, remember which |
687 | 2.98k | // blocks we know to be transparent to the load. |
688 | 2.98k | df_iterator_default_set<BasicBlock *, 16> TranspBlocks; |
689 | 2.98k | |
690 | 3.54k | for (LoadInst *Load : Loads) { |
691 | 3.54k | // Check to see if the load is invalidated from the start of the block to |
692 | 3.54k | // the load itself. |
693 | 3.54k | BasicBlock *BB = Load->getParent(); |
694 | 3.54k | |
695 | 3.54k | MemoryLocation Loc = MemoryLocation::get(Load); |
696 | 3.54k | if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, MRI_Mod)) |
697 | 91 | return false; // Pointer is invalidated! |
698 | 3.45k | |
699 | 3.45k | // Now check every path from the entry block to the load for transparency. |
700 | 3.45k | // To do this, we perform a depth first search on the inverse CFG from the |
701 | 3.45k | // loading block. |
702 | 3.45k | for (BasicBlock *P : predecessors(BB)) 3.45k { |
703 | 385 | for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks)) |
704 | 949 | if (949 AAR.canBasicBlockModify(*TranspBB, Loc)949 ) |
705 | 105 | return false; |
706 | 2.78k | } |
707 | 3.54k | } |
708 | 2.78k | |
709 | 2.78k | // If the path from the entry of the function to each load is free of |
710 | 2.78k | // instructions that potentially invalidate the load, we can make the |
711 | 2.78k | // transformation! |
712 | 2.78k | return true; |
713 | 2.78k | } |
714 | | |
715 | | /// \brief Checks if a type could have padding bytes. |
716 | 141 | static bool isDenselyPacked(Type *type, const DataLayout &DL) { |
717 | 141 | |
718 | 141 | // There is no size information, so be conservative. |
719 | 141 | if (!type->isSized()) |
720 | 0 | return false; |
721 | 141 | |
722 | 141 | // If the alloc size is not equal to the storage size, then there are padding |
723 | 141 | // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128. |
724 | 141 | if (141 DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type)141 ) |
725 | 4 | return false; |
726 | 137 | |
727 | 137 | if (137 !isa<CompositeType>(type)137 ) |
728 | 96 | return true; |
729 | 41 | |
730 | 41 | // For homogenous sequential types, check for padding within members. |
731 | 41 | if (SequentialType *41 seqTy41 = dyn_cast<SequentialType>(type)) |
732 | 1 | return isDenselyPacked(seqTy->getElementType(), DL); |
733 | 40 | |
734 | 40 | // Check for padding within and between elements of a struct. |
735 | 40 | StructType *StructTy = cast<StructType>(type); |
736 | 40 | const StructLayout *Layout = DL.getStructLayout(StructTy); |
737 | 40 | uint64_t StartPos = 0; |
738 | 124 | for (unsigned i = 0, E = StructTy->getNumElements(); i < E124 ; ++i84 ) { |
739 | 92 | Type *ElTy = StructTy->getElementType(i); |
740 | 92 | if (!isDenselyPacked(ElTy, DL)) |
741 | 4 | return false; |
742 | 88 | if (88 StartPos != Layout->getElementOffsetInBits(i)88 ) |
743 | 4 | return false; |
744 | 84 | StartPos += DL.getTypeAllocSizeInBits(ElTy); |
745 | 84 | } |
746 | 40 | |
747 | 32 | return true; |
748 | 141 | } |
749 | | |
750 | | /// \brief Checks if the padding bytes of an argument could be accessed. |
751 | 8 | static bool canPaddingBeAccessed(Argument *arg) { |
752 | 8 | |
753 | 8 | assert(arg->hasByValAttr()); |
754 | 8 | |
755 | 8 | // Track all the pointers to the argument to make sure they are not captured. |
756 | 8 | SmallPtrSet<Value *, 16> PtrValues; |
757 | 8 | PtrValues.insert(arg); |
758 | 8 | |
759 | 8 | // Track all of the stores. |
760 | 8 | SmallVector<StoreInst *, 16> Stores; |
761 | 8 | |
762 | 8 | // Scan through the uses recursively to make sure the pointer is always used |
763 | 8 | // sanely. |
764 | 8 | SmallVector<Value *, 16> WorkList; |
765 | 8 | WorkList.insert(WorkList.end(), arg->user_begin(), arg->user_end()); |
766 | 22 | while (!WorkList.empty()22 ) { |
767 | 18 | Value *V = WorkList.back(); |
768 | 18 | WorkList.pop_back(); |
769 | 18 | if (isa<GetElementPtrInst>(V) || 18 isa<PHINode>(V)14 ) { |
770 | 10 | if (PtrValues.insert(V).second) |
771 | 8 | WorkList.insert(WorkList.end(), V->user_begin(), V->user_end()); |
772 | 18 | } else if (StoreInst *8 Store8 = dyn_cast<StoreInst>(V)) { |
773 | 2 | Stores.push_back(Store); |
774 | 8 | } else if (6 !isa<LoadInst>(V)6 ) { |
775 | 4 | return true; |
776 | 4 | } |
777 | 18 | } |
778 | 8 | |
779 | 8 | // Check to make sure the pointers aren't captured |
780 | 4 | for (StoreInst *Store : Stores) |
781 | 2 | if (2 PtrValues.count(Store->getValueOperand())2 ) |
782 | 2 | return true; |
783 | 2 | |
784 | 2 | return false; |
785 | 2 | } |
786 | | |
787 | | /// PromoteArguments - This method checks the specified function to see if there |
788 | | /// are any promotable arguments and if it is safe to promote the function (for |
789 | | /// example, all callers are direct). If safe to promote some arguments, it |
790 | | /// calls the DoPromotion method. |
791 | | /// |
792 | | static Function * |
793 | | promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter, |
794 | | unsigned MaxElements, |
795 | | Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>> |
796 | 719k | ReplaceCallSite) { |
797 | 719k | // Make sure that it is local to this module. |
798 | 719k | if (!F->hasLocalLinkage()) |
799 | 690k | return nullptr; |
800 | 28.3k | |
801 | 28.3k | // Don't promote arguments for variadic functions. Adding, removing, or |
802 | 28.3k | // changing non-pack parameters can change the classification of pack |
803 | 28.3k | // parameters. Frontends encode that classification at the call site in the |
804 | 28.3k | // IR, while in the callee the classification is determined dynamically based |
805 | 28.3k | // on the number of registers consumed so far. |
806 | 28.3k | if (28.3k F->isVarArg()28.3k ) |
807 | 23 | return nullptr; |
808 | 28.3k | |
809 | 28.3k | // First check: see if there are any pointer arguments! If not, quick exit. |
810 | 28.3k | SmallVector<Argument *, 16> PointerArgs; |
811 | 28.3k | for (Argument &I : F->args()) |
812 | 50.1k | if (50.1k I.getType()->isPointerTy()50.1k ) |
813 | 29.7k | PointerArgs.push_back(&I); |
814 | 28.3k | if (PointerArgs.empty()) |
815 | 10.7k | return nullptr; |
816 | 17.5k | |
817 | 17.5k | // Second check: make sure that all callers are direct callers. We can't |
818 | 17.5k | // transform functions that have indirect callers. Also see if the function |
819 | 17.5k | // is self-recursive. |
820 | 17.5k | bool isSelfRecursive = false; |
821 | 76.4k | for (Use &U : F->uses()) { |
822 | 76.4k | CallSite CS(U.getUser()); |
823 | 76.4k | // Must be a direct call. |
824 | 76.4k | if (CS.getInstruction() == nullptr || 76.4k !CS.isCallee(&U)74.3k ) |
825 | 2.45k | return nullptr; |
826 | 73.9k | |
827 | 73.9k | if (73.9k CS.getInstruction()->getParent()->getParent() == F73.9k ) |
828 | 983 | isSelfRecursive = true; |
829 | 76.4k | } |
830 | 17.5k | |
831 | 15.0k | const DataLayout &DL = F->getParent()->getDataLayout(); |
832 | 15.0k | |
833 | 15.0k | AAResults &AAR = AARGetter(*F); |
834 | 15.0k | |
835 | 15.0k | // Check to see which arguments are promotable. If an argument is promotable, |
836 | 15.0k | // add it to ArgsToPromote. |
837 | 15.0k | SmallPtrSet<Argument *, 8> ArgsToPromote; |
838 | 15.0k | SmallPtrSet<Argument *, 8> ByValArgsToTransform; |
839 | 25.3k | for (Argument *PtrArg : PointerArgs) { |
840 | 25.3k | Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); |
841 | 25.3k | |
842 | 25.3k | // Replace sret attribute with noalias. This reduces register pressure by |
843 | 25.3k | // avoiding a register copy. |
844 | 25.3k | if (PtrArg->hasStructRetAttr()25.3k ) { |
845 | 60 | unsigned ArgNo = PtrArg->getArgNo(); |
846 | 60 | F->removeParamAttr(ArgNo, Attribute::StructRet); |
847 | 60 | F->addParamAttr(ArgNo, Attribute::NoAlias); |
848 | 112 | for (Use &U : F->uses()) { |
849 | 112 | CallSite CS(U.getUser()); |
850 | 112 | CS.removeParamAttr(ArgNo, Attribute::StructRet); |
851 | 112 | CS.addParamAttr(ArgNo, Attribute::NoAlias); |
852 | 112 | } |
853 | 60 | } |
854 | 25.3k | |
855 | 25.3k | // If this is a byval argument, and if the aggregate type is small, just |
856 | 25.3k | // pass the elements, which is always safe, if the passed value is densely |
857 | 25.3k | // packed or if we can prove the padding bytes are never accessed. This does |
858 | 25.3k | // not apply to inalloca. |
859 | 25.3k | bool isSafeToPromote = |
860 | 25.3k | PtrArg->hasByValAttr() && |
861 | 48 | (isDenselyPacked(AgTy, DL) || 48 !canPaddingBeAccessed(PtrArg)8 ); |
862 | 25.3k | if (isSafeToPromote25.3k ) { |
863 | 42 | if (StructType *STy42 = dyn_cast<StructType>(AgTy)) { |
864 | 34 | if (MaxElements > 0 && 34 STy->getNumElements() > MaxElements34 ) { |
865 | 0 | DEBUG(dbgs() << "argpromotion disable promoting argument '" |
866 | 0 | << PtrArg->getName() |
867 | 0 | << "' because it would require adding more" |
868 | 0 | << " than " << MaxElements |
869 | 0 | << " arguments to the function.\n"); |
870 | 0 | continue; |
871 | 0 | } |
872 | 34 | |
873 | 34 | // If all the elements are single-value types, we can promote it. |
874 | 34 | bool AllSimple = true; |
875 | 82 | for (const auto *EltTy : STy->elements()) { |
876 | 82 | if (!EltTy->isSingleValueType()82 ) { |
877 | 1 | AllSimple = false; |
878 | 1 | break; |
879 | 1 | } |
880 | 34 | } |
881 | 34 | |
882 | 34 | // Safe to transform, don't even bother trying to "promote" it. |
883 | 34 | // Passing the elements as a scalar will allow sroa to hack on |
884 | 34 | // the new alloca we introduce. |
885 | 34 | if (AllSimple34 ) { |
886 | 33 | ByValArgsToTransform.insert(PtrArg); |
887 | 33 | continue; |
888 | 33 | } |
889 | 25.2k | } |
890 | 42 | } |
891 | 25.2k | |
892 | 25.2k | // If the argument is a recursive type and we're in a recursive |
893 | 25.2k | // function, we could end up infinitely peeling the function argument. |
894 | 25.2k | if (25.2k isSelfRecursive25.2k ) { |
895 | 1.15k | if (StructType *STy1.15k = dyn_cast<StructType>(AgTy)) { |
896 | 941 | bool RecursiveType = false; |
897 | 8.18k | for (const auto *EltTy : STy->elements()) { |
898 | 8.18k | if (EltTy == PtrArg->getType()8.18k ) { |
899 | 441 | RecursiveType = true; |
900 | 441 | break; |
901 | 441 | } |
902 | 941 | } |
903 | 941 | if (RecursiveType) |
904 | 441 | continue; |
905 | 24.8k | } |
906 | 1.15k | } |
907 | 24.8k | |
908 | 24.8k | // Otherwise, see if we can promote the pointer to its value. |
909 | 24.8k | if (24.8k isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR, |
910 | 24.8k | MaxElements)) |
911 | 2.81k | ArgsToPromote.insert(PtrArg); |
912 | 25.3k | } |
913 | 15.0k | |
914 | 15.0k | // No promotable pointer arguments. |
915 | 15.0k | if (ArgsToPromote.empty() && 15.0k ByValArgsToTransform.empty()12.3k ) |
916 | 12.3k | return nullptr; |
917 | 2.72k | |
918 | 2.72k | return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite); |
919 | 2.72k | } |
920 | | |
921 | | PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C, |
922 | | CGSCCAnalysisManager &AM, |
923 | | LazyCallGraph &CG, |
924 | 66 | CGSCCUpdateResult &UR) { |
925 | 66 | bool Changed = false, LocalChange; |
926 | 66 | |
927 | 66 | // Iterate until we stop promoting from this SCC. |
928 | 85 | do { |
929 | 85 | LocalChange = false; |
930 | 85 | |
931 | 85 | for (LazyCallGraph::Node &N : C) { |
932 | 85 | Function &OldF = N.getFunction(); |
933 | 85 | |
934 | 85 | FunctionAnalysisManager &FAM = |
935 | 85 | AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); |
936 | 85 | // FIXME: This lambda must only be used with this function. We should |
937 | 85 | // skip the lambda and just get the AA results directly. |
938 | 28 | auto AARGetter = [&](Function &F) -> AAResults & { |
939 | 28 | assert(&F == &OldF && "Called with an unexpected function!"); |
940 | 28 | return FAM.getResult<AAManager>(F); |
941 | 28 | }; |
942 | 85 | |
943 | 85 | Function *NewF = promoteArguments(&OldF, AARGetter, 3u, None); |
944 | 85 | if (!NewF) |
945 | 66 | continue; |
946 | 19 | LocalChange = true; |
947 | 19 | |
948 | 19 | // Directly substitute the functions in the call graph. Note that this |
949 | 19 | // requires the old function to be completely dead and completely |
950 | 19 | // replaced by the new function. It does no call graph updates, it merely |
951 | 19 | // swaps out the particular function mapped to a particular node in the |
952 | 19 | // graph. |
953 | 19 | C.getOuterRefSCC().replaceNodeFunction(N, *NewF); |
954 | 19 | OldF.eraseFromParent(); |
955 | 19 | } |
956 | 85 | |
957 | 85 | Changed |= LocalChange; |
958 | 85 | } while (LocalChange); |
959 | 66 | |
960 | 66 | if (!Changed) |
961 | 49 | return PreservedAnalyses::all(); |
962 | 17 | |
963 | 17 | return PreservedAnalyses::none(); |
964 | 17 | } |
965 | | |
966 | | namespace { |
967 | | /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass. |
968 | | /// |
969 | | struct ArgPromotion : public CallGraphSCCPass { |
970 | 15.6k | void getAnalysisUsage(AnalysisUsage &AU) const override { |
971 | 15.6k | AU.addRequired<AssumptionCacheTracker>(); |
972 | 15.6k | AU.addRequired<TargetLibraryInfoWrapperPass>(); |
973 | 15.6k | getAAResultsAnalysisUsage(AU); |
974 | 15.6k | CallGraphSCCPass::getAnalysisUsage(AU); |
975 | 15.6k | } |
976 | | |
977 | | bool runOnSCC(CallGraphSCC &SCC) override; |
978 | | static char ID; // Pass identification, replacement for typeid |
979 | | explicit ArgPromotion(unsigned MaxElements = 3) |
980 | 15.6k | : CallGraphSCCPass(ID), MaxElements(MaxElements) { |
981 | 15.6k | initializeArgPromotionPass(*PassRegistry::getPassRegistry()); |
982 | 15.6k | } |
983 | | |
984 | | private: |
985 | | using llvm::Pass::doInitialization; |
986 | | bool doInitialization(CallGraph &CG) override; |
987 | | /// The maximum number of elements to expand, or 0 for unlimited. |
988 | | unsigned MaxElements; |
989 | | }; |
990 | | } |
991 | | |
992 | | char ArgPromotion::ID = 0; |
993 | 23.5k | INITIALIZE_PASS_BEGIN23.5k (ArgPromotion, "argpromotion",
|
994 | 23.5k | "Promote 'by reference' arguments to scalars", false, |
995 | 23.5k | false) |
996 | 23.5k | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) |
997 | 23.5k | INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) |
998 | 23.5k | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) |
999 | 23.5k | INITIALIZE_PASS_END(ArgPromotion, "argpromotion", |
1000 | | "Promote 'by reference' arguments to scalars", false, false) |
1001 | | |
1002 | 15.6k | Pass *llvm::createArgumentPromotionPass(unsigned MaxElements) { |
1003 | 15.6k | return new ArgPromotion(MaxElements); |
1004 | 15.6k | } |
1005 | | |
1006 | 744k | bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { |
1007 | 744k | if (skipSCC(SCC)) |
1008 | 16 | return false; |
1009 | 744k | |
1010 | 744k | // Get the callgraph information that we need to update to reflect our |
1011 | 744k | // changes. |
1012 | 744k | CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); |
1013 | 744k | |
1014 | 744k | LegacyAARGetter AARGetter(*this); |
1015 | 744k | |
1016 | 744k | bool Changed = false, LocalChange; |
1017 | 744k | |
1018 | 744k | // Iterate until we stop promoting from this SCC. |
1019 | 747k | do { |
1020 | 747k | LocalChange = false; |
1021 | 747k | // Attempt to promote arguments from all functions in this SCC. |
1022 | 747k | for (CallGraphNode *OldNode : SCC) { |
1023 | 747k | Function *OldF = OldNode->getFunction(); |
1024 | 747k | if (!OldF) |
1025 | 28.9k | continue; |
1026 | 719k | |
1027 | 719k | auto ReplaceCallSite = [&](CallSite OldCS, CallSite NewCS) 719k { |
1028 | 20.9k | Function *Caller = OldCS.getInstruction()->getParent()->getParent(); |
1029 | 20.9k | CallGraphNode *NewCalleeNode = |
1030 | 20.9k | CG.getOrInsertFunction(NewCS.getCalledFunction()); |
1031 | 20.9k | CallGraphNode *CallerNode = CG[Caller]; |
1032 | 20.9k | CallerNode->replaceCallEdge(OldCS, NewCS, NewCalleeNode); |
1033 | 20.9k | }; |
1034 | 719k | |
1035 | 719k | if (Function *NewF = promoteArguments(OldF, AARGetter, MaxElements, |
1036 | 2.70k | {ReplaceCallSite})) { |
1037 | 2.70k | LocalChange = true; |
1038 | 2.70k | |
1039 | 2.70k | // Update the call graph for the newly promoted function. |
1040 | 2.70k | CallGraphNode *NewNode = CG.getOrInsertFunction(NewF); |
1041 | 2.70k | NewNode->stealCalledFunctionsFrom(OldNode); |
1042 | 2.70k | if (OldNode->getNumReferences() == 0) |
1043 | 2.63k | delete CG.removeFunctionFromModule(OldNode); |
1044 | 2.70k | else |
1045 | 77 | OldF->setLinkage(Function::ExternalLinkage); |
1046 | 2.70k | |
1047 | 2.70k | // And updat ethe SCC we're iterating as well. |
1048 | 2.70k | SCC.ReplaceNode(OldNode, NewNode); |
1049 | 2.70k | } |
1050 | 747k | } |
1051 | 747k | // Remember that we changed something. |
1052 | 747k | Changed |= LocalChange; |
1053 | 747k | } while (LocalChange); |
1054 | 744k | |
1055 | 744k | return Changed; |
1056 | 744k | } |
1057 | | |
1058 | 15.6k | bool ArgPromotion::doInitialization(CallGraph &CG) { |
1059 | 15.6k | return CallGraphSCCPass::doInitialization(CG); |
1060 | 15.6k | } |