/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/IPO/MergeFunctions.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- MergeFunctions.cpp - Merge identical functions ---------------------===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | // |
10 | | // This pass looks for equivalent functions that are mergable and folds them. |
11 | | // |
12 | | // Order relation is defined on set of functions. It was made through |
13 | | // special function comparison procedure that returns |
14 | | // 0 when functions are equal, |
15 | | // -1 when Left function is less than right function, and |
16 | | // 1 for opposite case. We need total-ordering, so we need to maintain |
17 | | // four properties on the functions set: |
18 | | // a <= a (reflexivity) |
19 | | // if a <= b and b <= a then a = b (antisymmetry) |
20 | | // if a <= b and b <= c then a <= c (transitivity). |
21 | | // for all a and b: a <= b or b <= a (totality). |
22 | | // |
23 | | // Comparison iterates through each instruction in each basic block. |
24 | | // Functions are kept on binary tree. For each new function F we perform |
25 | | // lookup in binary tree. |
26 | | // In practice it works the following way: |
27 | | // -- We define Function* container class with custom "operator<" (FunctionPtr). |
28 | | // -- "FunctionPtr" instances are stored in std::set collection, so every |
29 | | // std::set::insert operation will give you result in log(N) time. |
30 | | // |
31 | | // As an optimization, a hash of the function structure is calculated first, and |
32 | | // two functions are only compared if they have the same hash. This hash is |
33 | | // cheap to compute, and has the property that if function F == G according to |
34 | | // the comparison function, then hash(F) == hash(G). This consistency property |
35 | | // is critical to ensuring all possible merging opportunities are exploited. |
36 | | // Collisions in the hash affect the speed of the pass but not the correctness |
37 | | // or determinism of the resulting transformation. |
38 | | // |
39 | | // When a match is found the functions are folded. If both functions are |
40 | | // overridable, we move the functionality into a new internal function and |
41 | | // leave two overridable thunks to it. |
42 | | // |
43 | | //===----------------------------------------------------------------------===// |
44 | | // |
45 | | // Future work: |
46 | | // |
47 | | // * virtual functions. |
48 | | // |
49 | | // Many functions have their address taken by the virtual function table for |
50 | | // the object they belong to. However, as long as it's only used for a lookup |
51 | | // and call, this is irrelevant, and we'd like to fold such functions. |
52 | | // |
53 | | // * be smarter about bitcasts. |
54 | | // |
55 | | // In order to fold functions, we will sometimes add either bitcast instructions |
56 | | // or bitcast constant expressions. Unfortunately, this can confound further |
57 | | // analysis since the two functions differ where one has a bitcast and the |
58 | | // other doesn't. We should learn to look through bitcasts. |
59 | | // |
60 | | // * Compare complex types with pointer types inside. |
61 | | // * Compare cross-reference cases. |
62 | | // * Compare complex expressions. |
63 | | // |
64 | | // All the three issues above could be described as ability to prove that |
65 | | // fA == fB == fC == fE == fF == fG in example below: |
66 | | // |
67 | | // void fA() { |
68 | | // fB(); |
69 | | // } |
70 | | // void fB() { |
71 | | // fA(); |
72 | | // } |
73 | | // |
74 | | // void fE() { |
75 | | // fF(); |
76 | | // } |
77 | | // void fF() { |
78 | | // fG(); |
79 | | // } |
80 | | // void fG() { |
81 | | // fE(); |
82 | | // } |
83 | | // |
84 | | // Simplest cross-reference case (fA <--> fB) was implemented in previous |
85 | | // versions of MergeFunctions, though it presented only in two function pairs |
86 | | // in test-suite (that counts >50k functions) |
87 | | // Though possibility to detect complex cross-referencing (e.g.: A->B->C->D->A) |
88 | | // could cover much more cases. |
89 | | // |
90 | | //===----------------------------------------------------------------------===// |
91 | | |
92 | | #include "llvm/ADT/Hashing.h" |
93 | | #include "llvm/ADT/STLExtras.h" |
94 | | #include "llvm/ADT/SmallSet.h" |
95 | | #include "llvm/ADT/Statistic.h" |
96 | | #include "llvm/IR/CallSite.h" |
97 | | #include "llvm/IR/Constants.h" |
98 | | #include "llvm/IR/DataLayout.h" |
99 | | #include "llvm/IR/DebugInfo.h" |
100 | | #include "llvm/IR/IRBuilder.h" |
101 | | #include "llvm/IR/Instructions.h" |
102 | | #include "llvm/IR/IntrinsicInst.h" |
103 | | #include "llvm/IR/LLVMContext.h" |
104 | | #include "llvm/IR/Module.h" |
105 | | #include "llvm/IR/ValueHandle.h" |
106 | | #include "llvm/IR/ValueMap.h" |
107 | | #include "llvm/Pass.h" |
108 | | #include "llvm/Support/CommandLine.h" |
109 | | #include "llvm/Support/Debug.h" |
110 | | #include "llvm/Support/ErrorHandling.h" |
111 | | #include "llvm/Support/raw_ostream.h" |
112 | | #include "llvm/Transforms/IPO.h" |
113 | | #include "llvm/Transforms/Utils/FunctionComparator.h" |
114 | | #include <vector> |
115 | | |
116 | | using namespace llvm; |
117 | | |
118 | | #define DEBUG_TYPE "mergefunc" |
119 | | |
120 | | STATISTIC(NumFunctionsMerged, "Number of functions merged"); |
121 | | STATISTIC(NumThunksWritten, "Number of thunks generated"); |
122 | | STATISTIC(NumDoubleWeak, "Number of new functions created"); |
123 | | |
124 | | static cl::opt<unsigned> NumFunctionsForSanityCheck( |
125 | | "mergefunc-sanity", |
126 | | cl::desc("How many functions in module could be used for " |
127 | | "MergeFunctions pass sanity check. " |
128 | | "'0' disables this check. Works only with '-debug' key."), |
129 | | cl::init(0), cl::Hidden); |
130 | | |
131 | | // Under option -mergefunc-preserve-debug-info we: |
132 | | // - Do not create a new function for a thunk. |
133 | | // - Retain the debug info for a thunk's parameters (and associated |
134 | | // instructions for the debug info) from the entry block. |
135 | | // Note: -debug will display the algorithm at work. |
136 | | // - Create debug-info for the call (to the shared implementation) made by |
137 | | // a thunk and its return value. |
138 | | // - Erase the rest of the function, retaining the (minimally sized) entry |
139 | | // block to create a thunk. |
140 | | // - Preserve a thunk's call site to point to the thunk even when both occur |
141 | | // within the same translation unit, to aid debugability. Note that this |
142 | | // behaviour differs from the underlying -mergefunc implementation which |
143 | | // modifies the thunk's call site to point to the shared implementation |
144 | | // when both occur within the same translation unit. |
145 | | static cl::opt<bool> |
146 | | MergeFunctionsPDI("mergefunc-preserve-debug-info", cl::Hidden, |
147 | | cl::init(false), |
148 | | cl::desc("Preserve debug info in thunk when mergefunc " |
149 | | "transformations are made.")); |
150 | | |
151 | | namespace { |
152 | | |
153 | | class FunctionNode { |
154 | | mutable AssertingVH<Function> F; |
155 | | FunctionComparator::FunctionHash Hash; |
156 | | public: |
157 | | // Note the hash is recalculated potentially multiple times, but it is cheap. |
158 | | FunctionNode(Function *F) |
159 | 127 | : F(F), Hash(FunctionComparator::functionHash(*F)) {} |
160 | 446 | Function *getFunc() const { return F; } |
161 | 552 | FunctionComparator::FunctionHash getHash() const { return Hash; } |
162 | | |
163 | | /// Replace the reference to the function F by the function G, assuming their |
164 | | /// implementations are equal. |
165 | 8 | void replaceBy(Function *G) const { |
166 | 8 | F = G; |
167 | 8 | } |
168 | | |
169 | 0 | void release() { F = nullptr; } |
170 | | }; |
171 | | |
172 | | /// MergeFunctions finds functions which will generate identical machine code, |
173 | | /// by considering all pointer types to be equivalent. Once identified, |
174 | | /// MergeFunctions will fold them by replacing a call to one to a call to a |
175 | | /// bitcast of the other. |
176 | | /// |
177 | | class MergeFunctions : public ModulePass { |
178 | | public: |
179 | | static char ID; |
180 | | MergeFunctions() |
181 | 45 | : ModulePass(ID), FnTree(FunctionNodeCmp(&GlobalNumbers)), FNodesInTree() { |
182 | 45 | initializeMergeFunctionsPass(*PassRegistry::getPassRegistry()); |
183 | 45 | } |
184 | | |
185 | | bool runOnModule(Module &M) override; |
186 | | |
187 | | private: |
188 | | // The function comparison operator is provided here so that FunctionNodes do |
189 | | // not need to become larger with another pointer. |
190 | | class FunctionNodeCmp { |
191 | | GlobalNumberState* GlobalNumbers; |
192 | | public: |
193 | 45 | FunctionNodeCmp(GlobalNumberState* GN) : GlobalNumbers(GN) {} |
194 | 214 | bool operator()(const FunctionNode &LHS, const FunctionNode &RHS) const { |
195 | 214 | // Order first by hashes, then full function comparison. |
196 | 214 | if (LHS.getHash() != RHS.getHash()) |
197 | 62 | return LHS.getHash() < RHS.getHash(); |
198 | 152 | FunctionComparator FCmp(LHS.getFunc(), RHS.getFunc(), GlobalNumbers); |
199 | 152 | return FCmp.compare() == -1; |
200 | 152 | } |
201 | | }; |
202 | | typedef std::set<FunctionNode, FunctionNodeCmp> FnTreeType; |
203 | | |
204 | | GlobalNumberState GlobalNumbers; |
205 | | |
206 | | /// A work queue of functions that may have been modified and should be |
207 | | /// analyzed again. |
208 | | std::vector<WeakTrackingVH> Deferred; |
209 | | |
210 | | /// Checks the rules of order relation introduced among functions set. |
211 | | /// Returns true, if sanity check has been passed, and false if failed. |
212 | | #ifndef NDEBUG |
213 | | bool doSanityCheck(std::vector<WeakTrackingVH> &Worklist); |
214 | | #endif |
215 | | |
216 | | /// Insert a ComparableFunction into the FnTree, or merge it away if it's |
217 | | /// equal to one that's already present. |
218 | | bool insert(Function *NewFunction); |
219 | | |
220 | | /// Remove a Function from the FnTree and queue it up for a second sweep of |
221 | | /// analysis. |
222 | | void remove(Function *F); |
223 | | |
224 | | /// Find the functions that use this Value and remove them from FnTree and |
225 | | /// queue the functions. |
226 | | void removeUsers(Value *V); |
227 | | |
228 | | /// Replace all direct calls of Old with calls of New. Will bitcast New if |
229 | | /// necessary to make types match. |
230 | | void replaceDirectCallers(Function *Old, Function *New); |
231 | | |
232 | | /// Merge two equivalent functions. Upon completion, G may be deleted, or may |
233 | | /// be converted into a thunk. In either case, it should never be visited |
234 | | /// again. |
235 | | void mergeTwoFunctions(Function *F, Function *G); |
236 | | |
237 | | /// Fill PDIUnrelatedWL with instructions from the entry block that are |
238 | | /// unrelated to parameter related debug info. |
239 | | void filterInstsUnrelatedToPDI(BasicBlock *GEntryBlock, |
240 | | std::vector<Instruction *> &PDIUnrelatedWL); |
241 | | |
242 | | /// Erase the rest of the CFG (i.e. barring the entry block). |
243 | | void eraseTail(Function *G); |
244 | | |
245 | | /// Erase the instructions in PDIUnrelatedWL as they are unrelated to the |
246 | | /// parameter debug info, from the entry block. |
247 | | void eraseInstsUnrelatedToPDI(std::vector<Instruction *> &PDIUnrelatedWL); |
248 | | |
249 | | /// Replace G with a simple tail call to bitcast(F). Also (unless |
250 | | /// MergeFunctionsPDI holds) replace direct uses of G with bitcast(F), |
251 | | /// delete G. |
252 | | void writeThunk(Function *F, Function *G); |
253 | | |
254 | | /// Replace function F with function G in the function tree. |
255 | | void replaceFunctionInTree(const FunctionNode &FN, Function *G); |
256 | | |
257 | | /// The set of all distinct functions. Use the insert() and remove() methods |
258 | | /// to modify it. The map allows efficient lookup and deferring of Functions. |
259 | | FnTreeType FnTree; |
260 | | // Map functions to the iterators of the FunctionNode which contains them |
261 | | // in the FnTree. This must be updated carefully whenever the FnTree is |
262 | | // modified, i.e. in insert(), remove(), and replaceFunctionInTree(), to avoid |
263 | | // dangling iterators into FnTree. The invariant that preserves this is that |
264 | | // there is exactly one mapping F -> FN for each FunctionNode FN in FnTree. |
265 | | ValueMap<Function*, FnTreeType::iterator> FNodesInTree; |
266 | | }; |
267 | | |
268 | | } // end anonymous namespace |
269 | | |
270 | | char MergeFunctions::ID = 0; |
271 | | INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false) |
272 | | |
273 | 1 | ModulePass *llvm::createMergeFunctionsPass() { |
274 | 1 | return new MergeFunctions(); |
275 | 1 | } |
276 | | |
277 | | #ifndef NDEBUG |
278 | | bool MergeFunctions::doSanityCheck(std::vector<WeakTrackingVH> &Worklist) { |
279 | | if (const unsigned Max = NumFunctionsForSanityCheck) { |
280 | | unsigned TripleNumber = 0; |
281 | | bool Valid = true; |
282 | | |
283 | | dbgs() << "MERGEFUNC-SANITY: Started for first " << Max << " functions.\n"; |
284 | | |
285 | | unsigned i = 0; |
286 | | for (std::vector<WeakTrackingVH>::iterator I = Worklist.begin(), |
287 | | E = Worklist.end(); |
288 | | I != E && i < Max; ++I, ++i) { |
289 | | unsigned j = i; |
290 | | for (std::vector<WeakTrackingVH>::iterator J = I; J != E && j < Max; |
291 | | ++J, ++j) { |
292 | | Function *F1 = cast<Function>(*I); |
293 | | Function *F2 = cast<Function>(*J); |
294 | | int Res1 = FunctionComparator(F1, F2, &GlobalNumbers).compare(); |
295 | | int Res2 = FunctionComparator(F2, F1, &GlobalNumbers).compare(); |
296 | | |
297 | | // If F1 <= F2, then F2 >= F1, otherwise report failure. |
298 | | if (Res1 != -Res2) { |
299 | | dbgs() << "MERGEFUNC-SANITY: Non-symmetric; triple: " << TripleNumber |
300 | | << "\n"; |
301 | | dbgs() << *F1 << '\n' << *F2 << '\n'; |
302 | | Valid = false; |
303 | | } |
304 | | |
305 | | if (Res1 == 0) |
306 | | continue; |
307 | | |
308 | | unsigned k = j; |
309 | | for (std::vector<WeakTrackingVH>::iterator K = J; K != E && k < Max; |
310 | | ++k, ++K, ++TripleNumber) { |
311 | | if (K == J) |
312 | | continue; |
313 | | |
314 | | Function *F3 = cast<Function>(*K); |
315 | | int Res3 = FunctionComparator(F1, F3, &GlobalNumbers).compare(); |
316 | | int Res4 = FunctionComparator(F2, F3, &GlobalNumbers).compare(); |
317 | | |
318 | | bool Transitive = true; |
319 | | |
320 | | if (Res1 != 0 && Res1 == Res4) { |
321 | | // F1 > F2, F2 > F3 => F1 > F3 |
322 | | Transitive = Res3 == Res1; |
323 | | } else if (Res3 != 0 && Res3 == -Res4) { |
324 | | // F1 > F3, F3 > F2 => F1 > F2 |
325 | | Transitive = Res3 == Res1; |
326 | | } else if (Res4 != 0 && -Res3 == Res4) { |
327 | | // F2 > F3, F3 > F1 => F2 > F1 |
328 | | Transitive = Res4 == -Res1; |
329 | | } |
330 | | |
331 | | if (!Transitive) { |
332 | | dbgs() << "MERGEFUNC-SANITY: Non-transitive; triple: " |
333 | | << TripleNumber << "\n"; |
334 | | dbgs() << "Res1, Res3, Res4: " << Res1 << ", " << Res3 << ", " |
335 | | << Res4 << "\n"; |
336 | | dbgs() << *F1 << '\n' << *F2 << '\n' << *F3 << '\n'; |
337 | | Valid = false; |
338 | | } |
339 | | } |
340 | | } |
341 | | } |
342 | | |
343 | | dbgs() << "MERGEFUNC-SANITY: " << (Valid ? "Passed." : "Failed.") << "\n"; |
344 | | return Valid; |
345 | | } |
346 | | return true; |
347 | | } |
348 | | #endif |
349 | | |
350 | 45 | bool MergeFunctions::runOnModule(Module &M) { |
351 | 45 | if (skipModule(M)) |
352 | 0 | return false; |
353 | 45 | |
354 | 45 | bool Changed = false; |
355 | 45 | |
356 | 45 | // All functions in the module, ordered by hash. Functions with a unique |
357 | 45 | // hash value are easily eliminated. |
358 | 45 | std::vector<std::pair<FunctionComparator::FunctionHash, Function *>> |
359 | 45 | HashedFuncs; |
360 | 157 | for (Function &Func : M) { |
361 | 157 | if (!Func.isDeclaration() && 157 !Func.hasAvailableExternallyLinkage()140 ) { |
362 | 140 | HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func}); |
363 | 140 | } |
364 | 157 | } |
365 | 45 | |
366 | 45 | std::stable_sort( |
367 | 45 | HashedFuncs.begin(), HashedFuncs.end(), |
368 | 45 | [](const std::pair<FunctionComparator::FunctionHash, Function *> &a, |
369 | 153 | const std::pair<FunctionComparator::FunctionHash, Function *> &b) { |
370 | 153 | return a.first < b.first; |
371 | 153 | }); |
372 | 45 | |
373 | 45 | auto S = HashedFuncs.begin(); |
374 | 185 | for (auto I = HashedFuncs.begin(), IE = HashedFuncs.end(); I != IE185 ; ++I140 ) { |
375 | 140 | // If the hash value matches the previous value or the next one, we must |
376 | 140 | // consider merging it. Otherwise it is dropped and never considered again. |
377 | 140 | if ((I != S && 140 std::prev(I)->first == I->first95 ) || |
378 | 140 | (std::next(I) != IE && 67 std::next(I)->first == I->first61 ) ) { |
379 | 125 | Deferred.push_back(WeakTrackingVH(I->second)); |
380 | 125 | } |
381 | 140 | } |
382 | 45 | |
383 | 47 | do { |
384 | 47 | std::vector<WeakTrackingVH> Worklist; |
385 | 47 | Deferred.swap(Worklist); |
386 | 47 | |
387 | 47 | DEBUG(doSanityCheck(Worklist)); |
388 | 47 | |
389 | 47 | DEBUG(dbgs() << "size of module: " << M.size() << '\n'); |
390 | 47 | DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n'); |
391 | 47 | |
392 | 47 | // Insert functions and merge them. |
393 | 127 | for (WeakTrackingVH &I : Worklist) { |
394 | 127 | if (!I) |
395 | 0 | continue; |
396 | 127 | Function *F = cast<Function>(I); |
397 | 127 | if (!F->isDeclaration() && 127 !F->hasAvailableExternallyLinkage()127 ) { |
398 | 127 | Changed |= insert(F); |
399 | 127 | } |
400 | 127 | } |
401 | 47 | DEBUG(dbgs() << "size of FnTree: " << FnTree.size() << '\n'); |
402 | 47 | } while (!Deferred.empty()); |
403 | 45 | |
404 | 45 | FnTree.clear(); |
405 | 45 | GlobalNumbers.clear(); |
406 | 45 | |
407 | 45 | return Changed; |
408 | 45 | } |
409 | | |
410 | | // Replace direct callers of Old with New. |
411 | 28 | void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) { |
412 | 28 | Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType()); |
413 | 38 | for (auto UI = Old->use_begin(), UE = Old->use_end(); UI != UE38 ;) { |
414 | 10 | Use *U = &*UI; |
415 | 10 | ++UI; |
416 | 10 | CallSite CS(U->getUser()); |
417 | 10 | if (CS && 10 CS.isCallee(U)3 ) { |
418 | 3 | // Transfer the called function's attributes to the call site. Due to the |
419 | 3 | // bitcast we will 'lose' ABI changing attributes because the 'called |
420 | 3 | // function' is no longer a Function* but the bitcast. Code that looks up |
421 | 3 | // the attributes from the called function will fail. |
422 | 3 | |
423 | 3 | // FIXME: This is not actually true, at least not anymore. The callsite |
424 | 3 | // will always have the same ABI affecting attributes as the callee, |
425 | 3 | // because otherwise the original input has UB. Note that Old and New |
426 | 3 | // always have matching ABI, so no attributes need to be changed. |
427 | 3 | // Transferring other attributes may help other optimizations, but that |
428 | 3 | // should be done uniformly and not in this ad-hoc way. |
429 | 3 | auto &Context = New->getContext(); |
430 | 3 | auto NewPAL = New->getAttributes(); |
431 | 3 | SmallVector<AttributeSet, 4> NewArgAttrs; |
432 | 10 | for (unsigned argIdx = 0; argIdx < CS.arg_size()10 ; argIdx++7 ) |
433 | 7 | NewArgAttrs.push_back(NewPAL.getParamAttributes(argIdx)); |
434 | 3 | // Don't transfer attributes from the function to the callee. Function |
435 | 3 | // attributes typically aren't relevant to the calling convention or ABI. |
436 | 3 | CS.setAttributes(AttributeList::get(Context, /*FnAttrs=*/AttributeSet(), |
437 | 3 | NewPAL.getRetAttributes(), |
438 | 3 | NewArgAttrs)); |
439 | 3 | |
440 | 3 | remove(CS.getInstruction()->getParent()->getParent()); |
441 | 3 | U->set(BitcastNew); |
442 | 3 | } |
443 | 10 | } |
444 | 28 | } |
445 | | |
446 | | // Helper for writeThunk, |
447 | | // Selects proper bitcast operation, |
448 | | // but a bit simpler then CastInst::getCastOpcode. |
449 | 60 | static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) { |
450 | 60 | Type *SrcTy = V->getType(); |
451 | 60 | if (SrcTy->isStructTy()60 ) { |
452 | 1 | assert(DestTy->isStructTy()); |
453 | 1 | assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements()); |
454 | 1 | Value *Result = UndefValue::get(DestTy); |
455 | 3 | for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E3 ; ++I2 ) { |
456 | 2 | Value *Element = createCast( |
457 | 2 | Builder, Builder.CreateExtractValue(V, makeArrayRef(I)), |
458 | 2 | DestTy->getStructElementType(I)); |
459 | 2 | |
460 | 2 | Result = |
461 | 2 | Builder.CreateInsertValue(Result, Element, makeArrayRef(I)); |
462 | 2 | } |
463 | 1 | return Result; |
464 | 1 | } |
465 | 60 | assert(!DestTy->isStructTy()); |
466 | 59 | if (SrcTy->isIntegerTy() && 59 DestTy->isPointerTy()34 ) |
467 | 3 | return Builder.CreateIntToPtr(V, DestTy); |
468 | 56 | else if (56 SrcTy->isPointerTy() && 56 DestTy->isIntegerTy()25 ) |
469 | 2 | return Builder.CreatePtrToInt(V, DestTy); |
470 | 56 | else |
471 | 54 | return Builder.CreateBitCast(V, DestTy); |
472 | 0 | } |
473 | | |
474 | | // Erase the instructions in PDIUnrelatedWL as they are unrelated to the |
475 | | // parameter debug info, from the entry block. |
476 | | void MergeFunctions::eraseInstsUnrelatedToPDI( |
477 | 2 | std::vector<Instruction *> &PDIUnrelatedWL) { |
478 | 2 | |
479 | 2 | DEBUG(dbgs() << " Erasing instructions (in reverse order of appearance in " |
480 | 2 | "entry block) unrelated to parameter debug info from entry " |
481 | 2 | "block: {\n"); |
482 | 14 | while (!PDIUnrelatedWL.empty()14 ) { |
483 | 12 | Instruction *I = PDIUnrelatedWL.back(); |
484 | 12 | DEBUG(dbgs() << " Deleting Instruction: "); |
485 | 12 | DEBUG(I->print(dbgs())); |
486 | 12 | DEBUG(dbgs() << "\n"); |
487 | 12 | I->eraseFromParent(); |
488 | 12 | PDIUnrelatedWL.pop_back(); |
489 | 12 | } |
490 | 2 | DEBUG(dbgs() << " } // Done erasing instructions unrelated to parameter " |
491 | 2 | "debug info from entry block. \n"); |
492 | 2 | } |
493 | | |
494 | | // Reduce G to its entry block. |
495 | 2 | void MergeFunctions::eraseTail(Function *G) { |
496 | 2 | |
497 | 2 | std::vector<BasicBlock *> WorklistBB; |
498 | 2 | for (Function::iterator BBI = std::next(G->begin()), BBE = G->end(); |
499 | 5 | BBI != BBE5 ; ++BBI3 ) { |
500 | 3 | BBI->dropAllReferences(); |
501 | 3 | WorklistBB.push_back(&*BBI); |
502 | 3 | } |
503 | 5 | while (!WorklistBB.empty()5 ) { |
504 | 3 | BasicBlock *BB = WorklistBB.back(); |
505 | 3 | BB->eraseFromParent(); |
506 | 3 | WorklistBB.pop_back(); |
507 | 3 | } |
508 | 2 | } |
509 | | |
510 | | // We are interested in the following instructions from the entry block as being |
511 | | // related to parameter debug info: |
512 | | // - @llvm.dbg.declare |
513 | | // - stores from the incoming parameters to locations on the stack-frame |
514 | | // - allocas that create these locations on the stack-frame |
515 | | // - @llvm.dbg.value |
516 | | // - the entry block's terminator |
517 | | // The rest are unrelated to debug info for the parameters; fill up |
518 | | // PDIUnrelatedWL with such instructions. |
519 | | void MergeFunctions::filterInstsUnrelatedToPDI( |
520 | 2 | BasicBlock *GEntryBlock, std::vector<Instruction *> &PDIUnrelatedWL) { |
521 | 2 | |
522 | 2 | std::set<Instruction *> PDIRelated; |
523 | 2 | for (BasicBlock::iterator BI = GEntryBlock->begin(), BIE = GEntryBlock->end(); |
524 | 24 | BI != BIE24 ; ++BI22 ) { |
525 | 22 | if (auto *DVI22 = dyn_cast<DbgValueInst>(&*BI)) { |
526 | 3 | DEBUG(dbgs() << " Deciding: "); |
527 | 3 | DEBUG(BI->print(dbgs())); |
528 | 3 | DEBUG(dbgs() << "\n"); |
529 | 3 | DILocalVariable *DILocVar = DVI->getVariable(); |
530 | 3 | if (DILocVar->isParameter()3 ) { |
531 | 2 | DEBUG(dbgs() << " Include (parameter): "); |
532 | 2 | DEBUG(BI->print(dbgs())); |
533 | 2 | DEBUG(dbgs() << "\n"); |
534 | 2 | PDIRelated.insert(&*BI); |
535 | 3 | } else { |
536 | 1 | DEBUG(dbgs() << " Delete (!parameter): "); |
537 | 1 | DEBUG(BI->print(dbgs())); |
538 | 1 | DEBUG(dbgs() << "\n"); |
539 | 1 | } |
540 | 22 | } else if (auto *19 DDI19 = dyn_cast<DbgDeclareInst>(&*BI)) { |
541 | 5 | DEBUG(dbgs() << " Deciding: "); |
542 | 5 | DEBUG(BI->print(dbgs())); |
543 | 5 | DEBUG(dbgs() << "\n"); |
544 | 5 | DILocalVariable *DILocVar = DDI->getVariable(); |
545 | 5 | if (DILocVar->isParameter()5 ) { |
546 | 2 | DEBUG(dbgs() << " Parameter: "); |
547 | 2 | DEBUG(DILocVar->print(dbgs())); |
548 | 2 | AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress()); |
549 | 2 | if (AI2 ) { |
550 | 2 | DEBUG(dbgs() << " Processing alloca users: "); |
551 | 2 | DEBUG(dbgs() << "\n"); |
552 | 6 | for (User *U : AI->users()) { |
553 | 6 | if (StoreInst *SI6 = dyn_cast<StoreInst>(U)) { |
554 | 2 | if (Value *Arg2 = SI->getValueOperand()) { |
555 | 2 | if (dyn_cast<Argument>(Arg)2 ) { |
556 | 2 | DEBUG(dbgs() << " Include: "); |
557 | 2 | DEBUG(AI->print(dbgs())); |
558 | 2 | DEBUG(dbgs() << "\n"); |
559 | 2 | PDIRelated.insert(AI); |
560 | 2 | DEBUG(dbgs() << " Include (parameter): "); |
561 | 2 | DEBUG(SI->print(dbgs())); |
562 | 2 | DEBUG(dbgs() << "\n"); |
563 | 2 | PDIRelated.insert(SI); |
564 | 2 | DEBUG(dbgs() << " Include: "); |
565 | 2 | DEBUG(BI->print(dbgs())); |
566 | 2 | DEBUG(dbgs() << "\n"); |
567 | 2 | PDIRelated.insert(&*BI); |
568 | 0 | } else { |
569 | 0 | DEBUG(dbgs() << " Delete (!parameter): "); |
570 | 0 | DEBUG(SI->print(dbgs())); |
571 | 0 | DEBUG(dbgs() << "\n"); |
572 | 0 | } |
573 | 2 | } |
574 | 6 | } else { |
575 | 4 | DEBUG(dbgs() << " Defer: "); |
576 | 4 | DEBUG(U->print(dbgs())); |
577 | 4 | DEBUG(dbgs() << "\n"); |
578 | 4 | } |
579 | 6 | } |
580 | 0 | } else { |
581 | 0 | DEBUG(dbgs() << " Delete (alloca NULL): "); |
582 | 0 | DEBUG(BI->print(dbgs())); |
583 | 0 | DEBUG(dbgs() << "\n"); |
584 | 0 | } |
585 | 5 | } else { |
586 | 3 | DEBUG(dbgs() << " Delete (!parameter): "); |
587 | 3 | DEBUG(BI->print(dbgs())); |
588 | 3 | DEBUG(dbgs() << "\n"); |
589 | 3 | } |
590 | 19 | } else if (14 dyn_cast<TerminatorInst>(BI) == GEntryBlock->getTerminator()14 ) { |
591 | 2 | DEBUG(dbgs() << " Will Include Terminator: "); |
592 | 2 | DEBUG(BI->print(dbgs())); |
593 | 2 | DEBUG(dbgs() << "\n"); |
594 | 2 | PDIRelated.insert(&*BI); |
595 | 14 | } else { |
596 | 12 | DEBUG(dbgs() << " Defer: "); |
597 | 12 | DEBUG(BI->print(dbgs())); |
598 | 12 | DEBUG(dbgs() << "\n"); |
599 | 19 | } |
600 | 22 | } |
601 | 2 | DEBUG(dbgs() |
602 | 2 | << " Report parameter debug info related/related instructions: {\n"); |
603 | 2 | for (BasicBlock::iterator BI = GEntryBlock->begin(), BE = GEntryBlock->end(); |
604 | 24 | BI != BE24 ; ++BI22 ) { |
605 | 22 | |
606 | 22 | Instruction *I = &*BI; |
607 | 22 | if (PDIRelated.find(I) == PDIRelated.end()22 ) { |
608 | 12 | DEBUG(dbgs() << " !PDIRelated: "); |
609 | 12 | DEBUG(I->print(dbgs())); |
610 | 12 | DEBUG(dbgs() << "\n"); |
611 | 12 | PDIUnrelatedWL.push_back(I); |
612 | 22 | } else { |
613 | 10 | DEBUG(dbgs() << " PDIRelated: "); |
614 | 10 | DEBUG(I->print(dbgs())); |
615 | 10 | DEBUG(dbgs() << "\n"); |
616 | 10 | } |
617 | 22 | } |
618 | 2 | DEBUG(dbgs() << " }\n"); |
619 | 2 | } |
620 | | |
621 | | // Replace G with a simple tail call to bitcast(F). Also (unless |
622 | | // MergeFunctionsPDI holds) replace direct uses of G with bitcast(F), |
623 | | // delete G. Under MergeFunctionsPDI, we use G itself for creating |
624 | | // the thunk as we preserve the debug info (and associated instructions) |
625 | | // from G's entry block pertaining to G's incoming arguments which are |
626 | | // passed on as corresponding arguments in the call that G makes to F. |
627 | | // For better debugability, under MergeFunctionsPDI, we do not modify G's |
628 | | // call sites to point to F even when within the same translation unit. |
629 | 33 | void MergeFunctions::writeThunk(Function *F, Function *G) { |
630 | 33 | if (!G->isInterposable() && 33 !MergeFunctionsPDI30 ) { |
631 | 28 | // Redirect direct callers of G to F. (See note on MergeFunctionsPDI |
632 | 28 | // above). |
633 | 28 | replaceDirectCallers(G, F); |
634 | 28 | } |
635 | 33 | |
636 | 33 | // If G was internal then we may have replaced all uses of G with F. If so, |
637 | 33 | // stop here and delete G. There's no need for a thunk. (See note on |
638 | 33 | // MergeFunctionsPDI above). |
639 | 33 | if (G->hasLocalLinkage() && 33 G->use_empty()13 && !MergeFunctionsPDI9 ) { |
640 | 9 | G->eraseFromParent(); |
641 | 9 | return; |
642 | 9 | } |
643 | 24 | |
644 | 24 | BasicBlock *GEntryBlock = nullptr; |
645 | 24 | std::vector<Instruction *> PDIUnrelatedWL; |
646 | 24 | BasicBlock *BB = nullptr; |
647 | 24 | Function *NewG = nullptr; |
648 | 24 | if (MergeFunctionsPDI24 ) { |
649 | 2 | DEBUG(dbgs() << "writeThunk: (MergeFunctionsPDI) Do not create a new " |
650 | 2 | "function as thunk; retain original: " |
651 | 2 | << G->getName() << "()\n"); |
652 | 2 | GEntryBlock = &G->getEntryBlock(); |
653 | 2 | DEBUG(dbgs() << "writeThunk: (MergeFunctionsPDI) filter parameter related " |
654 | 2 | "debug info for " |
655 | 2 | << G->getName() << "() {\n"); |
656 | 2 | filterInstsUnrelatedToPDI(GEntryBlock, PDIUnrelatedWL); |
657 | 2 | GEntryBlock->getTerminator()->eraseFromParent(); |
658 | 2 | BB = GEntryBlock; |
659 | 24 | } else { |
660 | 22 | NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "", |
661 | 22 | G->getParent()); |
662 | 22 | BB = BasicBlock::Create(F->getContext(), "", NewG); |
663 | 22 | } |
664 | 24 | |
665 | 24 | IRBuilder<> Builder(BB); |
666 | 24 | Function *H = MergeFunctionsPDI ? G2 : NewG22 ; |
667 | 24 | SmallVector<Value *, 16> Args; |
668 | 24 | unsigned i = 0; |
669 | 24 | FunctionType *FFTy = F->getFunctionType(); |
670 | 41 | for (Argument & AI : H->args()) { |
671 | 41 | Args.push_back(createCast(Builder, &AI, FFTy->getParamType(i))); |
672 | 41 | ++i; |
673 | 41 | } |
674 | 24 | |
675 | 24 | CallInst *CI = Builder.CreateCall(F, Args); |
676 | 24 | ReturnInst *RI = nullptr; |
677 | 24 | CI->setTailCall(); |
678 | 24 | CI->setCallingConv(F->getCallingConv()); |
679 | 24 | CI->setAttributes(F->getAttributes()); |
680 | 24 | if (H->getReturnType()->isVoidTy()24 ) { |
681 | 7 | RI = Builder.CreateRetVoid(); |
682 | 24 | } else { |
683 | 17 | RI = Builder.CreateRet(createCast(Builder, CI, H->getReturnType())); |
684 | 17 | } |
685 | 24 | |
686 | 24 | if (MergeFunctionsPDI24 ) { |
687 | 2 | DISubprogram *DIS = G->getSubprogram(); |
688 | 2 | if (DIS2 ) { |
689 | 2 | DebugLoc CIDbgLoc = DebugLoc::get(DIS->getScopeLine(), 0, DIS); |
690 | 2 | DebugLoc RIDbgLoc = DebugLoc::get(DIS->getScopeLine(), 0, DIS); |
691 | 2 | CI->setDebugLoc(CIDbgLoc); |
692 | 2 | RI->setDebugLoc(RIDbgLoc); |
693 | 2 | } else { |
694 | 0 | DEBUG(dbgs() << "writeThunk: (MergeFunctionsPDI) No DISubprogram for " |
695 | 0 | << G->getName() << "()\n"); |
696 | 0 | } |
697 | 2 | eraseTail(G); |
698 | 2 | eraseInstsUnrelatedToPDI(PDIUnrelatedWL); |
699 | 2 | DEBUG(dbgs() << "} // End of parameter related debug info filtering for: " |
700 | 2 | << G->getName() << "()\n"); |
701 | 24 | } else { |
702 | 22 | NewG->copyAttributesFrom(G); |
703 | 22 | NewG->takeName(G); |
704 | 22 | removeUsers(G); |
705 | 22 | G->replaceAllUsesWith(NewG); |
706 | 22 | G->eraseFromParent(); |
707 | 22 | } |
708 | 24 | |
709 | 24 | DEBUG(dbgs() << "writeThunk: " << H->getName() << '\n'); |
710 | 33 | ++NumThunksWritten; |
711 | 33 | } |
712 | | |
713 | | // Merge two equivalent functions. Upon completion, Function G is deleted. |
714 | 32 | void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { |
715 | 32 | if (F->isInterposable()32 ) { |
716 | 1 | assert(G->isInterposable()); |
717 | 1 | |
718 | 1 | // Make them both thunks to the same internal function. |
719 | 1 | Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", |
720 | 1 | F->getParent()); |
721 | 1 | H->copyAttributesFrom(F); |
722 | 1 | H->takeName(F); |
723 | 1 | removeUsers(F); |
724 | 1 | F->replaceAllUsesWith(H); |
725 | 1 | |
726 | 1 | unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment()); |
727 | 1 | |
728 | 1 | writeThunk(F, G); |
729 | 1 | writeThunk(F, H); |
730 | 1 | |
731 | 1 | F->setAlignment(MaxAlignment); |
732 | 1 | F->setLinkage(GlobalValue::PrivateLinkage); |
733 | 1 | ++NumDoubleWeak; |
734 | 32 | } else { |
735 | 31 | writeThunk(F, G); |
736 | 31 | } |
737 | 32 | |
738 | 32 | ++NumFunctionsMerged; |
739 | 32 | } |
740 | | |
741 | | /// Replace function F by function G. |
742 | | void MergeFunctions::replaceFunctionInTree(const FunctionNode &FN, |
743 | 8 | Function *G) { |
744 | 8 | Function *F = FN.getFunc(); |
745 | 8 | assert(FunctionComparator(F, G, &GlobalNumbers).compare() == 0 && |
746 | 8 | "The two functions must be equal"); |
747 | 8 | |
748 | 8 | auto I = FNodesInTree.find(F); |
749 | 8 | assert(I != FNodesInTree.end() && "F should be in FNodesInTree"); |
750 | 8 | assert(FNodesInTree.count(G) == 0 && "FNodesInTree should not contain G"); |
751 | 8 | |
752 | 8 | FnTreeType::iterator IterToFNInFnTree = I->second; |
753 | 8 | assert(&(*IterToFNInFnTree) == &FN && "F should map to FN in FNodesInTree."); |
754 | 8 | // Remove F -> FN and insert G -> FN |
755 | 8 | FNodesInTree.erase(I); |
756 | 8 | FNodesInTree.insert({G, IterToFNInFnTree}); |
757 | 8 | // Replace F with G in FN, which is stored inside the FnTree. |
758 | 8 | FN.replaceBy(G); |
759 | 8 | } |
760 | | |
761 | | // Insert a ComparableFunction into the FnTree, or merge it away if equal to one |
762 | | // that was already inserted. |
763 | 127 | bool MergeFunctions::insert(Function *NewFunction) { |
764 | 127 | std::pair<FnTreeType::iterator, bool> Result = |
765 | 127 | FnTree.insert(FunctionNode(NewFunction)); |
766 | 127 | |
767 | 127 | if (Result.second127 ) { |
768 | 90 | assert(FNodesInTree.count(NewFunction) == 0); |
769 | 90 | FNodesInTree.insert({NewFunction, Result.first}); |
770 | 90 | DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n'); |
771 | 90 | return false; |
772 | 90 | } |
773 | 37 | |
774 | 37 | const FunctionNode &OldF = *Result.first; |
775 | 37 | |
776 | 37 | // Don't merge tiny functions, since it can just end up making the function |
777 | 37 | // larger. |
778 | 37 | // FIXME: Should still merge them if they are unnamed_addr and produce an |
779 | 37 | // alias. |
780 | 37 | if (NewFunction->size() == 137 ) { |
781 | 31 | if (NewFunction->front().size() <= 231 ) { |
782 | 5 | DEBUG(dbgs() << NewFunction->getName() |
783 | 5 | << " is to small to bother merging\n"); |
784 | 5 | return false; |
785 | 5 | } |
786 | 32 | } |
787 | 32 | |
788 | 32 | // Impose a total order (by name) on the replacement of functions. This is |
789 | 32 | // important when operating on more than one module independently to prevent |
790 | 32 | // cycles of thunks calling each other when the modules are linked together. |
791 | 32 | // |
792 | 32 | // First of all, we process strong functions before weak functions. |
793 | 32 | if (32 (OldF.getFunc()->isInterposable() && 32 !NewFunction->isInterposable()2 ) || |
794 | 31 | (OldF.getFunc()->isInterposable() == NewFunction->isInterposable() && |
795 | 32 | OldF.getFunc()->getName() > NewFunction->getName()31 )) { |
796 | 8 | // Swap the two functions. |
797 | 8 | Function *F = OldF.getFunc(); |
798 | 8 | replaceFunctionInTree(*Result.first, NewFunction); |
799 | 8 | NewFunction = F; |
800 | 8 | assert(OldF.getFunc() != F && "Must have swapped the functions."); |
801 | 8 | } |
802 | 32 | |
803 | 32 | DEBUG(dbgs() << " " << OldF.getFunc()->getName() |
804 | 127 | << " == " << NewFunction->getName() << '\n'); |
805 | 127 | |
806 | 127 | Function *DeleteF = NewFunction; |
807 | 127 | mergeTwoFunctions(OldF.getFunc(), DeleteF); |
808 | 127 | return true; |
809 | 127 | } |
810 | | |
811 | | // Remove a function from FnTree. If it was already in FnTree, add |
812 | | // it to Deferred so that we'll look at it in the next round. |
813 | 6 | void MergeFunctions::remove(Function *F) { |
814 | 6 | auto I = FNodesInTree.find(F); |
815 | 6 | if (I != FNodesInTree.end()6 ) { |
816 | 2 | DEBUG(dbgs() << "Deferred " << F->getName()<< ".\n"); |
817 | 2 | FnTree.erase(I->second); |
818 | 2 | // I->second has been invalidated, remove it from the FNodesInTree map to |
819 | 2 | // preserve the invariant. |
820 | 2 | FNodesInTree.erase(I); |
821 | 2 | Deferred.emplace_back(F); |
822 | 2 | } |
823 | 6 | } |
824 | | |
825 | | // For each instruction used by the value, remove() the function that contains |
826 | | // the instruction. This should happen right before a call to RAUW. |
827 | 23 | void MergeFunctions::removeUsers(Value *V) { |
828 | 23 | std::vector<Value *> Worklist; |
829 | 23 | Worklist.push_back(V); |
830 | 23 | SmallSet<Value*, 8> Visited; |
831 | 23 | Visited.insert(V); |
832 | 46 | while (!Worklist.empty()46 ) { |
833 | 23 | Value *V = Worklist.back(); |
834 | 23 | Worklist.pop_back(); |
835 | 23 | |
836 | 10 | for (User *U : V->users()) { |
837 | 10 | if (Instruction *I10 = dyn_cast<Instruction>(U)) { |
838 | 3 | remove(I->getParent()->getParent()); |
839 | 10 | } else if (7 isa<GlobalValue>(U)7 ) { |
840 | 0 | // do nothing |
841 | 7 | } else if (Constant *7 C7 = dyn_cast<Constant>(U)) { |
842 | 7 | for (User *UU : C->users()) { |
843 | 7 | if (!Visited.insert(UU).second) |
844 | 0 | Worklist.push_back(UU); |
845 | 7 | } |
846 | 7 | } |
847 | 10 | } |
848 | 23 | } |
849 | 23 | } |