/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/IPO/Inliner.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- Inliner.cpp - Code common to all inliners --------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file implements the mechanics required to implement inlining without |
10 | | // missing any calls and updating the call graph. The decisions of which calls |
11 | | // are profitable to inline are implemented elsewhere. |
12 | | // |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #include "llvm/Transforms/IPO/Inliner.h" |
16 | | #include "llvm/ADT/DenseMap.h" |
17 | | #include "llvm/ADT/None.h" |
18 | | #include "llvm/ADT/Optional.h" |
19 | | #include "llvm/ADT/STLExtras.h" |
20 | | #include "llvm/ADT/SetVector.h" |
21 | | #include "llvm/ADT/SmallPtrSet.h" |
22 | | #include "llvm/ADT/SmallVector.h" |
23 | | #include "llvm/ADT/Statistic.h" |
24 | | #include "llvm/ADT/StringRef.h" |
25 | | #include "llvm/Analysis/AliasAnalysis.h" |
26 | | #include "llvm/Analysis/AssumptionCache.h" |
27 | | #include "llvm/Analysis/BasicAliasAnalysis.h" |
28 | | #include "llvm/Analysis/BlockFrequencyInfo.h" |
29 | | #include "llvm/Analysis/CGSCCPassManager.h" |
30 | | #include "llvm/Analysis/CallGraph.h" |
31 | | #include "llvm/Analysis/InlineCost.h" |
32 | | #include "llvm/Analysis/LazyCallGraph.h" |
33 | | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
34 | | #include "llvm/Analysis/ProfileSummaryInfo.h" |
35 | | #include "llvm/Analysis/TargetLibraryInfo.h" |
36 | | #include "llvm/Analysis/TargetTransformInfo.h" |
37 | | #include "llvm/Transforms/Utils/Local.h" |
38 | | #include "llvm/IR/Attributes.h" |
39 | | #include "llvm/IR/BasicBlock.h" |
40 | | #include "llvm/IR/CallSite.h" |
41 | | #include "llvm/IR/DataLayout.h" |
42 | | #include "llvm/IR/DebugLoc.h" |
43 | | #include "llvm/IR/DerivedTypes.h" |
44 | | #include "llvm/IR/DiagnosticInfo.h" |
45 | | #include "llvm/IR/Function.h" |
46 | | #include "llvm/IR/InstIterator.h" |
47 | | #include "llvm/IR/Instruction.h" |
48 | | #include "llvm/IR/Instructions.h" |
49 | | #include "llvm/IR/IntrinsicInst.h" |
50 | | #include "llvm/IR/Metadata.h" |
51 | | #include "llvm/IR/Module.h" |
52 | | #include "llvm/IR/PassManager.h" |
53 | | #include "llvm/IR/User.h" |
54 | | #include "llvm/IR/Value.h" |
55 | | #include "llvm/Pass.h" |
56 | | #include "llvm/Support/Casting.h" |
57 | | #include "llvm/Support/CommandLine.h" |
58 | | #include "llvm/Support/Debug.h" |
59 | | #include "llvm/Support/raw_ostream.h" |
60 | | #include "llvm/Transforms/Utils/Cloning.h" |
61 | | #include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h" |
62 | | #include "llvm/Transforms/Utils/ModuleUtils.h" |
63 | | #include <algorithm> |
64 | | #include <cassert> |
65 | | #include <functional> |
66 | | #include <sstream> |
67 | | #include <tuple> |
68 | | #include <utility> |
69 | | #include <vector> |
70 | | |
71 | | using namespace llvm; |
72 | | |
73 | 1.15k | #define DEBUG_TYPE "inline" |
74 | | |
75 | | STATISTIC(NumInlined, "Number of functions inlined"); |
76 | | STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined"); |
77 | | STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); |
78 | | STATISTIC(NumMergedAllocas, "Number of allocas merged together"); |
79 | | |
80 | | // This weirdly named statistic tracks the number of times that, when attempting |
81 | | // to inline a function A into B, we analyze the callers of B in order to see |
82 | | // if those would be more profitable and blocked inline steps. |
83 | | STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed"); |
84 | | |
85 | | /// Flag to disable manual alloca merging. |
86 | | /// |
87 | | /// Merging of allocas was originally done as a stack-size saving technique |
88 | | /// prior to LLVM's code generator having support for stack coloring based on |
89 | | /// lifetime markers. It is now in the process of being removed. To experiment |
90 | | /// with disabling it and relying fully on lifetime marker based stack |
91 | | /// coloring, you can pass this flag to LLVM. |
92 | | static cl::opt<bool> |
93 | | DisableInlinedAllocaMerging("disable-inlined-alloca-merging", |
94 | | cl::init(false), cl::Hidden); |
95 | | |
96 | | namespace { |
97 | | |
98 | | enum class InlinerFunctionImportStatsOpts { |
99 | | No = 0, |
100 | | Basic = 1, |
101 | | Verbose = 2, |
102 | | }; |
103 | | |
104 | | } // end anonymous namespace |
105 | | |
106 | | static cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats( |
107 | | "inliner-function-import-stats", |
108 | | cl::init(InlinerFunctionImportStatsOpts::No), |
109 | | cl::values(clEnumValN(InlinerFunctionImportStatsOpts::Basic, "basic", |
110 | | "basic statistics"), |
111 | | clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose", |
112 | | "printing of statistics for each inlined function")), |
113 | | cl::Hidden, cl::desc("Enable inliner stats for imported functions")); |
114 | | |
115 | | /// Flag to add inline messages as callsite attributes 'inline-remark'. |
116 | | static cl::opt<bool> |
117 | | InlineRemarkAttribute("inline-remark-attribute", cl::init(false), |
118 | | cl::Hidden, |
119 | | cl::desc("Enable adding inline-remark attribute to" |
120 | | " callsites processed by inliner but decided" |
121 | | " to be not inlined")); |
122 | | |
123 | 13.6k | LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {} |
124 | | |
125 | | LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime) |
126 | 11.6k | : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {} |
127 | | |
128 | | /// For this class, we declare that we require and preserve the call graph. |
129 | | /// If the derived class implements this method, it should |
130 | | /// always explicitly call the implementation here. |
131 | 25.1k | void LegacyInlinerBase::getAnalysisUsage(AnalysisUsage &AU) const { |
132 | 25.1k | AU.addRequired<AssumptionCacheTracker>(); |
133 | 25.1k | AU.addRequired<ProfileSummaryInfoWrapperPass>(); |
134 | 25.1k | AU.addRequired<TargetLibraryInfoWrapperPass>(); |
135 | 25.1k | getAAResultsAnalysisUsage(AU); |
136 | 25.1k | CallGraphSCCPass::getAnalysisUsage(AU); |
137 | 25.1k | } |
138 | | |
139 | | using InlinedArrayAllocasTy = DenseMap<ArrayType *, std::vector<AllocaInst *>>; |
140 | | |
141 | | /// Look at all of the allocas that we inlined through this call site. If we |
142 | | /// have already inlined other allocas through other calls into this function, |
143 | | /// then we know that they have disjoint lifetimes and that we can merge them. |
144 | | /// |
145 | | /// There are many heuristics possible for merging these allocas, and the |
146 | | /// different options have different tradeoffs. One thing that we *really* |
147 | | /// don't want to hurt is SRoA: once inlining happens, often allocas are no |
148 | | /// longer address taken and so they can be promoted. |
149 | | /// |
150 | | /// Our "solution" for that is to only merge allocas whose outermost type is an |
151 | | /// array type. These are usually not promoted because someone is using a |
152 | | /// variable index into them. These are also often the most important ones to |
153 | | /// merge. |
154 | | /// |
155 | | /// A better solution would be to have real memory lifetime markers in the IR |
156 | | /// and not have the inliner do any merging of allocas at all. This would |
157 | | /// allow the backend to do proper stack slot coloring of all allocas that |
158 | | /// *actually make it to the backend*, which is really what we want. |
159 | | /// |
160 | | /// Because we don't have this information, we do this simple and useful hack. |
161 | | static void mergeInlinedArrayAllocas( |
162 | | Function *Caller, InlineFunctionInfo &IFI, |
163 | 540k | InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory) { |
164 | 540k | SmallPtrSet<AllocaInst *, 16> UsedAllocas; |
165 | 540k | |
166 | 540k | // When processing our SCC, check to see if CS was inlined from some other |
167 | 540k | // call site. For example, if we're processing "A" in this code: |
168 | 540k | // A() { B() } |
169 | 540k | // B() { x = alloca ... C() } |
170 | 540k | // C() { y = alloca ... } |
171 | 540k | // Assume that C was not inlined into B initially, and so we're processing A |
172 | 540k | // and decide to inline B into A. Doing this makes an alloca available for |
173 | 540k | // reuse and makes a callsite (C) available for inlining. When we process |
174 | 540k | // the C call site we don't want to do any alloca merging between X and Y |
175 | 540k | // because their scopes are not disjoint. We could make this smarter by |
176 | 540k | // keeping track of the inline history for each alloca in the |
177 | 540k | // InlinedArrayAllocas but this isn't likely to be a significant win. |
178 | 540k | if (InlineHistory != -1) // Only do merging for top-level call sites in SCC. |
179 | 7.36k | return; |
180 | 532k | |
181 | 532k | // Loop over all the allocas we have so far and see if they can be merged with |
182 | 532k | // a previously inlined alloca. If not, remember that we had it. |
183 | 634k | for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); 532k AllocaNo != e; |
184 | 532k | ++AllocaNo102k ) { |
185 | 102k | AllocaInst *AI = IFI.StaticAllocas[AllocaNo]; |
186 | 102k | |
187 | 102k | // Don't bother trying to merge array allocations (they will usually be |
188 | 102k | // canonicalized to be an allocation *of* an array), or allocations whose |
189 | 102k | // type is not itself an array (because we're afraid of pessimizing SRoA). |
190 | 102k | ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType()); |
191 | 102k | if (!ATy || AI->isArrayAllocation()1.69k ) |
192 | 100k | continue; |
193 | 1.69k | |
194 | 1.69k | // Get the list of all available allocas for this array type. |
195 | 1.69k | std::vector<AllocaInst *> &AllocasForType = InlinedArrayAllocas[ATy]; |
196 | 1.69k | |
197 | 1.69k | // Loop over the allocas in AllocasForType to see if we can reuse one. Note |
198 | 1.69k | // that we have to be careful not to reuse the same "available" alloca for |
199 | 1.69k | // multiple different allocas that we just inlined, we use the 'UsedAllocas' |
200 | 1.69k | // set to keep track of which "available" allocas are being used by this |
201 | 1.69k | // function. Also, AllocasForType can be empty of course! |
202 | 1.69k | bool MergedAwayAlloca = false; |
203 | 1.69k | for (AllocaInst *AvailableAlloca : AllocasForType) { |
204 | 1.67k | unsigned Align1 = AI->getAlignment(), |
205 | 1.67k | Align2 = AvailableAlloca->getAlignment(); |
206 | 1.67k | |
207 | 1.67k | // The available alloca has to be in the right function, not in some other |
208 | 1.67k | // function in this SCC. |
209 | 1.67k | if (AvailableAlloca->getParent() != AI->getParent()) |
210 | 4 | continue; |
211 | 1.67k | |
212 | 1.67k | // If the inlined function already uses this alloca then we can't reuse |
213 | 1.67k | // it. |
214 | 1.67k | if (!UsedAllocas.insert(AvailableAlloca).second) |
215 | 902 | continue; |
216 | 771 | |
217 | 771 | // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare |
218 | 771 | // success! |
219 | 771 | LLVM_DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI |
220 | 771 | << "\n\t\tINTO: " << *AvailableAlloca << '\n'); |
221 | 771 | |
222 | 771 | // Move affected dbg.declare calls immediately after the new alloca to |
223 | 771 | // avoid the situation when a dbg.declare precedes its alloca. |
224 | 771 | if (auto *L = LocalAsMetadata::getIfExists(AI)) |
225 | 1 | if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L)) |
226 | 1 | for (User *U : MDV->users()) |
227 | 1 | if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U)) |
228 | 1 | DDI->moveBefore(AvailableAlloca->getNextNode()); |
229 | 771 | |
230 | 771 | AI->replaceAllUsesWith(AvailableAlloca); |
231 | 771 | |
232 | 771 | if (Align1 != Align2) { |
233 | 2 | if (!Align1 || !Align2) { |
234 | 1 | const DataLayout &DL = Caller->getParent()->getDataLayout(); |
235 | 1 | unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType()); |
236 | 1 | |
237 | 1 | Align1 = Align1 ? Align1 : TypeAlign0 ; |
238 | 1 | Align2 = Align2 ? Align20 : TypeAlign; |
239 | 1 | } |
240 | 2 | |
241 | 2 | if (Align1 > Align2) |
242 | 2 | AvailableAlloca->setAlignment(AI->getAlignment()); |
243 | 2 | } |
244 | 771 | |
245 | 771 | AI->eraseFromParent(); |
246 | 771 | MergedAwayAlloca = true; |
247 | 771 | ++NumMergedAllocas; |
248 | 771 | IFI.StaticAllocas[AllocaNo] = nullptr; |
249 | 771 | break; |
250 | 771 | } |
251 | 1.69k | |
252 | 1.69k | // If we already nuked the alloca, we're done with it. |
253 | 1.69k | if (MergedAwayAlloca) |
254 | 771 | continue; |
255 | 924 | |
256 | 924 | // If we were unable to merge away the alloca either because there are no |
257 | 924 | // allocas of the right type available or because we reused them all |
258 | 924 | // already, remember that this alloca came from an inlined function and mark |
259 | 924 | // it used so we don't reuse it for other allocas from this inline |
260 | 924 | // operation. |
261 | 924 | AllocasForType.push_back(AI); |
262 | 924 | UsedAllocas.insert(AI); |
263 | 924 | } |
264 | 532k | } |
265 | | |
266 | | /// If it is possible to inline the specified call site, |
267 | | /// do so and update the CallGraph for this operation. |
268 | | /// |
269 | | /// This function also does some basic book-keeping to update the IR. The |
270 | | /// InlinedArrayAllocas map keeps track of any allocas that are already |
271 | | /// available from other functions inlined into the caller. If we are able to |
272 | | /// inline this call site we attempt to reuse already available allocas or add |
273 | | /// any new allocas to the set if not possible. |
274 | | static InlineResult InlineCallIfPossible( |
275 | | CallSite CS, InlineFunctionInfo &IFI, |
276 | | InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory, |
277 | | bool InsertLifetime, function_ref<AAResults &(Function &)> &AARGetter, |
278 | 540k | ImportedFunctionsInliningStatistics &ImportedFunctionsStats) { |
279 | 540k | Function *Callee = CS.getCalledFunction(); |
280 | 540k | Function *Caller = CS.getCaller(); |
281 | 540k | |
282 | 540k | AAResults &AAR = AARGetter(*Callee); |
283 | 540k | |
284 | 540k | // Try to inline the function. Get the list of static allocas that were |
285 | 540k | // inlined. |
286 | 540k | InlineResult IR = InlineFunction(CS, IFI, &AAR, InsertLifetime); |
287 | 540k | if (!IR) |
288 | 5 | return IR; |
289 | 540k | |
290 | 540k | if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) |
291 | 30 | ImportedFunctionsStats.recordInline(*Caller, *Callee); |
292 | 540k | |
293 | 540k | AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee); |
294 | 540k | |
295 | 540k | if (!DisableInlinedAllocaMerging) |
296 | 540k | mergeInlinedArrayAllocas(Caller, IFI, InlinedArrayAllocas, InlineHistory); |
297 | 540k | |
298 | 540k | return IR; // success |
299 | 540k | } |
300 | | |
301 | | /// Return true if inlining of CS can block the caller from being |
302 | | /// inlined which is proved to be more beneficial. \p IC is the |
303 | | /// estimated inline cost associated with callsite \p CS. |
304 | | /// \p TotalSecondaryCost will be set to the estimated cost of inlining the |
305 | | /// caller if \p CS is suppressed for inlining. |
306 | | static bool |
307 | | shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC, |
308 | | int &TotalSecondaryCost, |
309 | 489k | function_ref<InlineCost(CallSite CS)> GetInlineCost) { |
310 | 489k | // For now we only handle local or inline functions. |
311 | 489k | if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage()434k ) |
312 | 171k | return false; |
313 | 318k | // If the cost of inlining CS is non-positive, it is not going to prevent the |
314 | 318k | // caller from being inlined into its callers and hence we don't need to |
315 | 318k | // defer. |
316 | 318k | if (IC.getCost() <= 0) |
317 | 236k | return false; |
318 | 82.0k | // Try to detect the case where the current inlining candidate caller (call |
319 | 82.0k | // it B) is a static or linkonce-ODR function and is an inlining candidate |
320 | 82.0k | // elsewhere, and the current candidate callee (call it C) is large enough |
321 | 82.0k | // that inlining it into B would make B too big to inline later. In these |
322 | 82.0k | // circumstances it may be best not to inline C into B, but to inline B into |
323 | 82.0k | // its callers. |
324 | 82.0k | // |
325 | 82.0k | // This only applies to static and linkonce-ODR functions because those are |
326 | 82.0k | // expected to be available for inlining in the translation units where they |
327 | 82.0k | // are used. Thus we will always have the opportunity to make local inlining |
328 | 82.0k | // decisions. Importantly the linkonce-ODR linkage covers inline functions |
329 | 82.0k | // and templates in C++. |
330 | 82.0k | // |
331 | 82.0k | // FIXME: All of this logic should be sunk into getInlineCost. It relies on |
332 | 82.0k | // the internal implementation of the inline cost metrics rather than |
333 | 82.0k | // treating them as truly abstract units etc. |
334 | 82.0k | TotalSecondaryCost = 0; |
335 | 82.0k | // The candidate cost to be imposed upon the current function. |
336 | 82.0k | int CandidateCost = IC.getCost() - 1; |
337 | 82.0k | // If the caller has local linkage and can be inlined to all its callers, we |
338 | 82.0k | // can apply a huge negative bonus to TotalSecondaryCost. |
339 | 82.0k | bool ApplyLastCallBonus = Caller->hasLocalLinkage() && !Caller->hasOneUse()19.1k ; |
340 | 82.0k | // This bool tracks what happens if we DO inline C into B. |
341 | 82.0k | bool inliningPreventsSomeOuterInline = false; |
342 | 165k | for (User *U : Caller->users()) { |
343 | 165k | // If the caller will not be removed (either because it does not have a |
344 | 165k | // local linkage or because the LastCallToStaticBonus has been already |
345 | 165k | // applied), then we can exit the loop early. |
346 | 165k | if (!ApplyLastCallBonus && TotalSecondaryCost >= IC.getCost()149k ) |
347 | 543 | return false; |
348 | 164k | CallSite CS2(U); |
349 | 164k | |
350 | 164k | // If this isn't a call to Caller (it could be some other sort |
351 | 164k | // of reference) skip it. Such references will prevent the caller |
352 | 164k | // from being removed. |
353 | 164k | if (!CS2 || CS2.getCalledFunction() != Caller160k ) { |
354 | 4.79k | ApplyLastCallBonus = false; |
355 | 4.79k | continue; |
356 | 4.79k | } |
357 | 159k | |
358 | 159k | InlineCost IC2 = GetInlineCost(CS2); |
359 | 159k | ++NumCallerCallersAnalyzed; |
360 | 159k | if (!IC2) { |
361 | 70.6k | ApplyLastCallBonus = false; |
362 | 70.6k | continue; |
363 | 70.6k | } |
364 | 89.0k | if (IC2.isAlways()) |
365 | 354 | continue; |
366 | 88.7k | |
367 | 88.7k | // See if inlining of the original callsite would erase the cost delta of |
368 | 88.7k | // this callsite. We subtract off the penalty for the call instruction, |
369 | 88.7k | // which we would be deleting. |
370 | 88.7k | if (IC2.getCostDelta() <= CandidateCost) { |
371 | 7.12k | inliningPreventsSomeOuterInline = true; |
372 | 7.12k | TotalSecondaryCost += IC2.getCost(); |
373 | 7.12k | } |
374 | 88.7k | } |
375 | 82.0k | // If all outer calls to Caller would get inlined, the cost for the last |
376 | 82.0k | // one is set very low by getInlineCost, in anticipation that Caller will |
377 | 82.0k | // be removed entirely. We did not account for this above unless there |
378 | 82.0k | // is only one caller of Caller. |
379 | 82.0k | if (81.5k ApplyLastCallBonus81.5k ) |
380 | 1.72k | TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus; |
381 | 81.5k | |
382 | 81.5k | if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost()4.30k ) |
383 | 2.08k | return true; |
384 | 79.4k | |
385 | 79.4k | return false; |
386 | 79.4k | } |
387 | | |
388 | | static std::basic_ostream<char> &operator<<(std::basic_ostream<char> &R, |
389 | 1.11M | const ore::NV &Arg) { |
390 | 1.11M | return R << Arg.Val; |
391 | 1.11M | } |
392 | | |
393 | | template <class RemarkT> |
394 | 970k | RemarkT &operator<<(RemarkT &&R, const InlineCost &IC) { |
395 | 970k | using namespace ore; |
396 | 970k | if (IC.isAlways()) { |
397 | 37 | R << "(cost=always)"; |
398 | 970k | } else if (IC.isNever()) { |
399 | 827k | R << "(cost=never)"; |
400 | 827k | } else { |
401 | 142k | R << "(cost=" << ore::NV("Cost", IC.getCost()) |
402 | 142k | << ", threshold=" << ore::NV("Threshold", IC.getThreshold()) << ")"; |
403 | 142k | } |
404 | 970k | if (const char *Reason = IC.getReason()) |
405 | 827k | R << ": " << ore::NV("Reason", Reason); |
406 | 970k | return R; |
407 | 970k | } llvm::OptimizationRemarkMissed&& operator<<<llvm::OptimizationRemarkMissed&>(llvm::OptimizationRemarkMissed&&&, llvm::InlineCost const&) Line | Count | Source | 394 | 58 | RemarkT &operator<<(RemarkT &&R, const InlineCost &IC) { | 395 | 58 | using namespace ore; | 396 | 58 | if (IC.isAlways()) { | 397 | 0 | R << "(cost=always)"; | 398 | 58 | } else if (IC.isNever()) { | 399 | 40 | R << "(cost=never)"; | 400 | 40 | } else { | 401 | 18 | R << "(cost=" << ore::NV("Cost", IC.getCost()) | 402 | 18 | << ", threshold=" << ore::NV("Threshold", IC.getThreshold()) << ")"; | 403 | 18 | } | 404 | 58 | if (const char *Reason = IC.getReason()) | 405 | 40 | R << ": " << ore::NV("Reason", Reason); | 406 | 58 | return R; | 407 | 58 | } |
std::__1::basic_stringstream<char, std::__1::char_traits<char>, std::__1::allocator<char> >&& operator<<<std::__1::basic_stringstream<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_stringstream<char, std::__1::char_traits<char>, std::__1::allocator<char> >&&&, llvm::InlineCost const&) Line | Count | Source | 394 | 970k | RemarkT &operator<<(RemarkT &&R, const InlineCost &IC) { | 395 | 970k | using namespace ore; | 396 | 970k | if (IC.isAlways()) { | 397 | 0 | R << "(cost=always)"; | 398 | 970k | } else if (IC.isNever()) { | 399 | 827k | R << "(cost=never)"; | 400 | 827k | } else { | 401 | 142k | R << "(cost=" << ore::NV("Cost", IC.getCost()) | 402 | 142k | << ", threshold=" << ore::NV("Threshold", IC.getThreshold()) << ")"; | 403 | 142k | } | 404 | 970k | if (const char *Reason = IC.getReason()) | 405 | 827k | R << ": " << ore::NV("Reason", Reason); | 406 | 970k | return R; | 407 | 970k | } |
llvm::OptimizationRemark&& operator<<<llvm::OptimizationRemark&>(llvm::OptimizationRemark&&&, llvm::InlineCost const&) Line | Count | Source | 394 | 83 | RemarkT &operator<<(RemarkT &&R, const InlineCost &IC) { | 395 | 83 | using namespace ore; | 396 | 83 | if (IC.isAlways()) { | 397 | 37 | R << "(cost=always)"; | 398 | 46 | } else if (IC.isNever()) { | 399 | 0 | R << "(cost=never)"; | 400 | 46 | } else { | 401 | 46 | R << "(cost=" << ore::NV("Cost", IC.getCost()) | 402 | 46 | << ", threshold=" << ore::NV("Threshold", IC.getThreshold()) << ")"; | 403 | 46 | } | 404 | 83 | if (const char *Reason = IC.getReason()) | 405 | 37 | R << ": " << ore::NV("Reason", Reason); | 406 | 83 | return R; | 407 | 83 | } |
|
408 | | |
409 | 970k | static std::string inlineCostStr(const InlineCost &IC) { |
410 | 970k | std::stringstream Remark; |
411 | 970k | Remark << IC; |
412 | 970k | return Remark.str(); |
413 | 970k | } |
414 | | |
415 | | /// Return the cost only if the inliner should attempt to inline at the given |
416 | | /// CallSite. If we return the cost, we will emit an optimisation remark later |
417 | | /// using that cost, so we won't do so from this function. |
418 | | static Optional<InlineCost> |
419 | | shouldInline(CallSite CS, function_ref<InlineCost(CallSite CS)> GetInlineCost, |
420 | 1.51M | OptimizationRemarkEmitter &ORE) { |
421 | 1.51M | using namespace ore; |
422 | 1.51M | |
423 | 1.51M | InlineCost IC = GetInlineCost(CS); |
424 | 1.51M | Instruction *Call = CS.getInstruction(); |
425 | 1.51M | Function *Callee = CS.getCalledFunction(); |
426 | 1.51M | Function *Caller = CS.getCaller(); |
427 | 1.51M | |
428 | 1.51M | if (IC.isAlways()) { |
429 | 57.6k | LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC) |
430 | 57.6k | << ", Call: " << *CS.getInstruction() << "\n"); |
431 | 57.6k | return IC; |
432 | 57.6k | } |
433 | 1.46M | |
434 | 1.46M | if (IC.isNever()) { |
435 | 827k | LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC) |
436 | 827k | << ", Call: " << *CS.getInstruction() << "\n"); |
437 | 827k | ORE.emit([&]() { |
438 | 40 | return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) |
439 | 40 | << NV("Callee", Callee) << " not inlined into " |
440 | 40 | << NV("Caller", Caller) << " because it should never be inlined " |
441 | 40 | << IC; |
442 | 40 | }); |
443 | 827k | return IC; |
444 | 827k | } |
445 | 632k | |
446 | 632k | if (!IC) { |
447 | 142k | LLVM_DEBUG(dbgs() << " NOT Inlining " << inlineCostStr(IC) |
448 | 142k | << ", Call: " << *CS.getInstruction() << "\n"); |
449 | 142k | ORE.emit([&]() { |
450 | 18 | return OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call) |
451 | 18 | << NV("Callee", Callee) << " not inlined into " |
452 | 18 | << NV("Caller", Caller) << " because too costly to inline " << IC; |
453 | 18 | }); |
454 | 142k | return IC; |
455 | 142k | } |
456 | 489k | |
457 | 489k | int TotalSecondaryCost = 0; |
458 | 489k | if (shouldBeDeferred(Caller, CS, IC, TotalSecondaryCost, GetInlineCost)) { |
459 | 2.08k | LLVM_DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() |
460 | 2.08k | << " Cost = " << IC.getCost() |
461 | 2.08k | << ", outer Cost = " << TotalSecondaryCost << '\n'); |
462 | 2.08k | ORE.emit([&]() { |
463 | 0 | return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts", |
464 | 0 | Call) |
465 | 0 | << "Not inlining. Cost of inlining " << NV("Callee", Callee) |
466 | 0 | << " increases the cost of inlining " << NV("Caller", Caller) |
467 | 0 | << " in other contexts"; |
468 | 0 | }); |
469 | 2.08k | |
470 | 2.08k | // IC does not bool() to false, so get an InlineCost that will. |
471 | 2.08k | // This will not be inspected to make an error message. |
472 | 2.08k | return None; |
473 | 2.08k | } |
474 | 487k | |
475 | 487k | LLVM_DEBUG(dbgs() << " Inlining " << inlineCostStr(IC) |
476 | 487k | << ", Call: " << *CS.getInstruction() << '\n'); |
477 | 487k | return IC; |
478 | 487k | } |
479 | | |
480 | | /// Return true if the specified inline history ID |
481 | | /// indicates an inline history that includes the specified function. |
482 | | static bool InlineHistoryIncludes( |
483 | | Function *F, int InlineHistoryID, |
484 | 126k | const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) { |
485 | 264k | while (InlineHistoryID != -1) { |
486 | 137k | assert(unsigned(InlineHistoryID) < InlineHistory.size() && |
487 | 137k | "Invalid inline history ID"); |
488 | 137k | if (InlineHistory[InlineHistoryID].first == F) |
489 | 127 | return true; |
490 | 137k | InlineHistoryID = InlineHistory[InlineHistoryID].second; |
491 | 137k | } |
492 | 126k | return false126k ; |
493 | 126k | } |
494 | | |
495 | 25.1k | bool LegacyInlinerBase::doInitialization(CallGraph &CG) { |
496 | 25.1k | if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) |
497 | 2 | ImportedFunctionsStats.setModuleInfo(CG.getModule()); |
498 | 25.1k | return false; // No changes to CallGraph. |
499 | 25.1k | } |
500 | | |
501 | 643k | bool LegacyInlinerBase::runOnSCC(CallGraphSCC &SCC) { |
502 | 643k | if (skipSCC(SCC)) |
503 | 24 | return false; |
504 | 643k | return inlineCalls(SCC); |
505 | 643k | } |
506 | | |
507 | | static void emit_inlined_into(OptimizationRemarkEmitter &ORE, DebugLoc &DLoc, |
508 | | const BasicBlock *Block, const Function &Callee, |
509 | 540k | const Function &Caller, const InlineCost &IC) { |
510 | 540k | ORE.emit([&]() { |
511 | 83 | bool AlwaysInline = IC.isAlways(); |
512 | 83 | StringRef RemarkName = AlwaysInline ? "AlwaysInline"37 : "Inlined"46 ; |
513 | 83 | return OptimizationRemark(DEBUG_TYPE, RemarkName, DLoc, Block) |
514 | 83 | << ore::NV("Callee", &Callee) << " inlined into " |
515 | 83 | << ore::NV("Caller", &Caller) << " with " << IC; |
516 | 83 | }); |
517 | 540k | } |
518 | | |
519 | 1.50M | static void setInlineRemark(CallSite &CS, StringRef message) { |
520 | 1.50M | if (!InlineRemarkAttribute) |
521 | 1.50M | return; |
522 | 9 | |
523 | 9 | Attribute attr = Attribute::get(CS->getContext(), "inline-remark", message); |
524 | 9 | CS.addAttribute(AttributeList::FunctionIndex, attr); |
525 | 9 | } |
526 | | |
527 | | static bool |
528 | | inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, |
529 | | std::function<AssumptionCache &(Function &)> GetAssumptionCache, |
530 | | ProfileSummaryInfo *PSI, TargetLibraryInfo &TLI, |
531 | | bool InsertLifetime, |
532 | | function_ref<InlineCost(CallSite CS)> GetInlineCost, |
533 | | function_ref<AAResults &(Function &)> AARGetter, |
534 | 853k | ImportedFunctionsInliningStatistics &ImportedFunctionsStats) { |
535 | 853k | SmallPtrSet<Function *, 8> SCCFunctions; |
536 | 853k | LLVM_DEBUG(dbgs() << "Inliner visiting SCC:"); |
537 | 854k | for (CallGraphNode *Node : SCC) { |
538 | 854k | Function *F = Node->getFunction(); |
539 | 854k | if (F) |
540 | 814k | SCCFunctions.insert(F); |
541 | 854k | LLVM_DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE")); |
542 | 854k | } |
543 | 853k | |
544 | 853k | // Scan through and identify all call sites ahead of time so that we only |
545 | 853k | // inline call sites in the original functions, not call sites that result |
546 | 853k | // from inlining other functions. |
547 | 853k | SmallVector<std::pair<CallSite, int>, 16> CallSites; |
548 | 853k | |
549 | 853k | // When inlining a callee produces new call sites, we want to keep track of |
550 | 853k | // the fact that they were inlined from the callee. This allows us to avoid |
551 | 853k | // infinite inlining in some obscure cases. To represent this, we use an |
552 | 853k | // index into the InlineHistory vector. |
553 | 853k | SmallVector<std::pair<Function *, int>, 8> InlineHistory; |
554 | 853k | |
555 | 854k | for (CallGraphNode *Node : SCC) { |
556 | 854k | Function *F = Node->getFunction(); |
557 | 854k | if (!F || F->isDeclaration()814k ) |
558 | 242k | continue; |
559 | 611k | |
560 | 611k | OptimizationRemarkEmitter ORE(F); |
561 | 611k | for (BasicBlock &BB : *F) |
562 | 13.2M | for (Instruction &I : BB)2.39M { |
563 | 13.2M | CallSite CS(cast<Value>(&I)); |
564 | 13.2M | // If this isn't a call, or it is a call to an intrinsic, it can |
565 | 13.2M | // never be inlined. |
566 | 13.2M | if (!CS || isa<IntrinsicInst>(I)2.27M ) |
567 | 11.2M | continue; |
568 | 1.94M | |
569 | 1.94M | // If this is a direct call to an external function, we can never inline |
570 | 1.94M | // it. If it is an indirect call, inlining may resolve it to be a |
571 | 1.94M | // direct call, so we keep it. |
572 | 1.94M | if (Function *Callee = CS.getCalledFunction()) |
573 | 1.87M | if (Callee->isDeclaration()) { |
574 | 524k | using namespace ore; |
575 | 524k | |
576 | 524k | setInlineRemark(CS, "unavailable definition"); |
577 | 524k | ORE.emit([&]() { |
578 | 37 | return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I) |
579 | 37 | << NV("Callee", Callee) << " will not be inlined into " |
580 | 37 | << NV("Caller", CS.getCaller()) |
581 | 37 | << " because its definition is unavailable" |
582 | 37 | << setIsVerbose(); |
583 | 37 | }); |
584 | 524k | continue; |
585 | 524k | } |
586 | 1.42M | |
587 | 1.42M | CallSites.push_back(std::make_pair(CS, -1)); |
588 | 1.42M | } |
589 | 611k | } |
590 | 853k | |
591 | 853k | LLVM_DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n"); |
592 | 853k | |
593 | 853k | // If there are no calls in this function, exit early. |
594 | 853k | if (CallSites.empty()) |
595 | 508k | return false; |
596 | 344k | |
597 | 344k | // Now that we have all of the call sites, move the ones to functions in the |
598 | 344k | // current SCC to the end of the list. |
599 | 344k | unsigned FirstCallInSCC = CallSites.size(); |
600 | 1.76M | for (unsigned i = 0; i < FirstCallInSCC; ++i1.42M ) |
601 | 1.42M | if (Function *F = CallSites[i].first.getCalledFunction()) |
602 | 1.35M | if (SCCFunctions.count(F)) |
603 | 4.73k | std::swap(CallSites[i--], CallSites[--FirstCallInSCC]); |
604 | 344k | |
605 | 344k | InlinedArrayAllocasTy InlinedArrayAllocas; |
606 | 344k | InlineFunctionInfo InlineInfo(&CG, &GetAssumptionCache, PSI); |
607 | 344k | |
608 | 344k | // Now that we have all of the call sites, loop over them and inline them if |
609 | 344k | // it looks profitable to do so. |
610 | 344k | bool Changed = false; |
611 | 344k | bool LocalChange; |
612 | 520k | do { |
613 | 520k | LocalChange = false; |
614 | 520k | // Iterate over the outer loop because inlining functions can cause indirect |
615 | 520k | // calls to become direct calls. |
616 | 520k | // CallSites may be modified inside so ranged for loop can not be used. |
617 | 2.64M | for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi2.12M ) { |
618 | 2.12M | CallSite CS = CallSites[CSi].first; |
619 | 2.12M | |
620 | 2.12M | Function *Caller = CS.getCaller(); |
621 | 2.12M | Function *Callee = CS.getCalledFunction(); |
622 | 2.12M | |
623 | 2.12M | // We can only inline direct calls to non-declarations. |
624 | 2.12M | if (!Callee || Callee->isDeclaration()1.98M ) |
625 | 606k | continue; |
626 | 1.51M | |
627 | 1.51M | Instruction *Instr = CS.getInstruction(); |
628 | 1.51M | |
629 | 1.51M | bool IsTriviallyDead = isInstructionTriviallyDead(Instr, &TLI); |
630 | 1.51M | |
631 | 1.51M | int InlineHistoryID; |
632 | 1.51M | if (!IsTriviallyDead) { |
633 | 1.51M | // If this call site was obtained by inlining another function, verify |
634 | 1.51M | // that the include path for the function did not include the callee |
635 | 1.51M | // itself. If so, we'd be recursively inlining the same function, |
636 | 1.51M | // which would provide the same callsites, which would cause us to |
637 | 1.51M | // infinitely inline. |
638 | 1.51M | InlineHistoryID = CallSites[CSi].second; |
639 | 1.51M | if (InlineHistoryID != -1 && |
640 | 1.51M | InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)126k ) { |
641 | 126 | setInlineRemark(CS, "recursive"); |
642 | 126 | continue; |
643 | 126 | } |
644 | 1.51M | } |
645 | 1.51M | |
646 | 1.51M | // FIXME for new PM: because of the old PM we currently generate ORE and |
647 | 1.51M | // in turn BFI on demand. With the new PM, the ORE dependency should |
648 | 1.51M | // just become a regular analysis dependency. |
649 | 1.51M | OptimizationRemarkEmitter ORE(Caller); |
650 | 1.51M | |
651 | 1.51M | Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE); |
652 | 1.51M | // If the policy determines that we should inline this function, |
653 | 1.51M | // delete the call instead. |
654 | 1.51M | if (!OIC.hasValue()) { |
655 | 2.08k | setInlineRemark(CS, "deferred"); |
656 | 2.08k | continue; |
657 | 2.08k | } |
658 | 1.51M | |
659 | 1.51M | if (!OIC.getValue()) { |
660 | 970k | // shouldInline() call returned a negative inline cost that explains |
661 | 970k | // why this callsite should not be inlined. |
662 | 970k | setInlineRemark(CS, inlineCostStr(*OIC)); |
663 | 970k | continue; |
664 | 970k | } |
665 | 544k | |
666 | 544k | // If this call site is dead and it is to a readonly function, we should |
667 | 544k | // just delete the call instead of trying to inline it, regardless of |
668 | 544k | // size. This happens because IPSCCP propagates the result out of the |
669 | 544k | // call and then we're left with the dead call. |
670 | 544k | if (IsTriviallyDead) { |
671 | 4.48k | LLVM_DEBUG(dbgs() << " -> Deleting dead call: " << *Instr << "\n"); |
672 | 4.48k | // Update the call graph by deleting the edge from Callee to Caller. |
673 | 4.48k | setInlineRemark(CS, "trivially dead"); |
674 | 4.48k | CG[Caller]->removeCallEdgeFor(*cast<CallBase>(CS.getInstruction())); |
675 | 4.48k | Instr->eraseFromParent(); |
676 | 4.48k | ++NumCallsDeleted; |
677 | 540k | } else { |
678 | 540k | // Get DebugLoc to report. CS will be invalid after Inliner. |
679 | 540k | DebugLoc DLoc = CS->getDebugLoc(); |
680 | 540k | BasicBlock *Block = CS.getParent(); |
681 | 540k | |
682 | 540k | // Attempt to inline the function. |
683 | 540k | using namespace ore; |
684 | 540k | |
685 | 540k | InlineResult IR = InlineCallIfPossible( |
686 | 540k | CS, InlineInfo, InlinedArrayAllocas, InlineHistoryID, |
687 | 540k | InsertLifetime, AARGetter, ImportedFunctionsStats); |
688 | 540k | if (!IR) { |
689 | 5 | setInlineRemark(CS, std::string(IR) + "; " + inlineCostStr(*OIC)); |
690 | 5 | ORE.emit([&]() { |
691 | 0 | return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, |
692 | 0 | Block) |
693 | 0 | << NV("Callee", Callee) << " will not be inlined into " |
694 | 0 | << NV("Caller", Caller) << ": " << NV("Reason", IR.message); |
695 | 0 | }); |
696 | 5 | continue; |
697 | 5 | } |
698 | 540k | ++NumInlined; |
699 | 540k | |
700 | 540k | emit_inlined_into(ORE, DLoc, Block, *Callee, *Caller, *OIC); |
701 | 540k | |
702 | 540k | // If inlining this function gave us any new call sites, throw them |
703 | 540k | // onto our worklist to process. They are useful inline candidates. |
704 | 540k | if (!InlineInfo.InlinedCalls.empty()) { |
705 | 159k | // Create a new inline history entry for this, so that we remember |
706 | 159k | // that these new callsites came about due to inlining Callee. |
707 | 159k | int NewHistoryID = InlineHistory.size(); |
708 | 159k | InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID)); |
709 | 159k | |
710 | 159k | for (Value *Ptr : InlineInfo.InlinedCalls) |
711 | 325k | CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID)); |
712 | 159k | } |
713 | 540k | } |
714 | 544k | |
715 | 544k | // If we inlined or deleted the last possible call site to the function, |
716 | 544k | // delete the function body now. |
717 | 544k | if (544k Callee544k && Callee->use_empty()544k && Callee->hasLocalLinkage()214k && |
718 | 544k | // TODO: Can remove if in SCC now. |
719 | 544k | !SCCFunctions.count(Callee)41.7k && |
720 | 544k | // The function may be apparently dead, but if there are indirect |
721 | 544k | // callgraph references to the node, we cannot delete it yet, this |
722 | 544k | // could invalidate the CGSCC iterator. |
723 | 544k | CG[Callee]->getNumReferences() == 041.6k ) { |
724 | 41.6k | LLVM_DEBUG(dbgs() << " -> Deleting dead function: " |
725 | 41.6k | << Callee->getName() << "\n"); |
726 | 41.6k | CallGraphNode *CalleeNode = CG[Callee]; |
727 | 41.6k | |
728 | 41.6k | // Remove any call graph edges from the callee to its callees. |
729 | 41.6k | CalleeNode->removeAllCalledFunctions(); |
730 | 41.6k | |
731 | 41.6k | // Removing the node for callee from the call graph and delete it. |
732 | 41.6k | delete CG.removeFunctionFromModule(CalleeNode); |
733 | 41.6k | ++NumDeleted; |
734 | 41.6k | } |
735 | 544k | |
736 | 544k | // Remove this call site from the list. If possible, use |
737 | 544k | // swap/pop_back for efficiency, but do not use it if doing so would |
738 | 544k | // move a call site to a function in this SCC before the |
739 | 544k | // 'FirstCallInSCC' barrier. |
740 | 544k | if (SCC.isSingular()) { |
741 | 537k | CallSites[CSi] = CallSites.back(); |
742 | 537k | CallSites.pop_back(); |
743 | 537k | } else { |
744 | 6.89k | CallSites.erase(CallSites.begin() + CSi); |
745 | 6.89k | } |
746 | 544k | --CSi; |
747 | 544k | |
748 | 544k | Changed = true; |
749 | 544k | LocalChange = true; |
750 | 544k | } |
751 | 520k | } while (LocalChange); |
752 | 344k | |
753 | 344k | return Changed; |
754 | 344k | } |
755 | | |
756 | 853k | bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) { |
757 | 853k | CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); |
758 | 853k | ACT = &getAnalysis<AssumptionCacheTracker>(); |
759 | 853k | PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); |
760 | 853k | auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); |
761 | 853k | auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { |
762 | 548k | return ACT->getAssumptionCache(F); |
763 | 548k | }; |
764 | 853k | return inlineCallsImpl(SCC, CG, GetAssumptionCache, PSI, TLI, InsertLifetime, |
765 | 1.67M | [this](CallSite CS) { return getInlineCost(CS); }, |
766 | 853k | LegacyAARGetter(*this), ImportedFunctionsStats); |
767 | 853k | } |
768 | | |
769 | | /// Remove now-dead linkonce functions at the end of |
770 | | /// processing to avoid breaking the SCC traversal. |
771 | 13.5k | bool LegacyInlinerBase::doFinalization(CallGraph &CG) { |
772 | 13.5k | if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) |
773 | 2 | ImportedFunctionsStats.dump(InlinerFunctionImportStats == |
774 | 2 | InlinerFunctionImportStatsOpts::Verbose); |
775 | 13.5k | return removeDeadFunctions(CG); |
776 | 13.5k | } |
777 | | |
778 | | /// Remove dead functions that are not included in DNR (Do Not Remove) list. |
779 | | bool LegacyInlinerBase::removeDeadFunctions(CallGraph &CG, |
780 | 25.1k | bool AlwaysInlineOnly) { |
781 | 25.1k | SmallVector<CallGraphNode *, 16> FunctionsToRemove; |
782 | 25.1k | SmallVector<Function *, 16> DeadFunctionsInComdats; |
783 | 25.1k | |
784 | 161k | auto RemoveCGN = [&](CallGraphNode *CGN) { |
785 | 161k | // Remove any call graph edges from the function to its callees. |
786 | 161k | CGN->removeAllCalledFunctions(); |
787 | 161k | |
788 | 161k | // Remove any edges from the external node to the function's call graph |
789 | 161k | // node. These edges might have been made irrelegant due to |
790 | 161k | // optimization of the program. |
791 | 161k | CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN); |
792 | 161k | |
793 | 161k | // Removing the node for callee from the call graph and delete it. |
794 | 161k | FunctionsToRemove.push_back(CGN); |
795 | 161k | }; |
796 | 25.1k | |
797 | 25.1k | // Scan for all of the functions, looking for ones that should now be removed |
798 | 25.1k | // from the program. Insert the dead ones in the FunctionsToRemove set. |
799 | 798k | for (const auto &I : CG) { |
800 | 798k | CallGraphNode *CGN = I.second.get(); |
801 | 798k | Function *F = CGN->getFunction(); |
802 | 798k | if (!F || F->isDeclaration()773k ) |
803 | 229k | continue; |
804 | 569k | |
805 | 569k | // Handle the case when this function is called and we only want to care |
806 | 569k | // about always-inline functions. This is a bit of a hack to share code |
807 | 569k | // between here and the InlineAlways pass. |
808 | 569k | if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline)127k ) |
809 | 127k | continue; |
810 | 441k | |
811 | 441k | // If the only remaining users of the function are dead constants, remove |
812 | 441k | // them. |
813 | 441k | F->removeDeadConstantUsers(); |
814 | 441k | |
815 | 441k | if (!F->isDefTriviallyDead()) |
816 | 280k | continue; |
817 | 161k | |
818 | 161k | // It is unsafe to drop a function with discardable linkage from a COMDAT |
819 | 161k | // without also dropping the other members of the COMDAT. |
820 | 161k | // The inliner doesn't visit non-function entities which are in COMDAT |
821 | 161k | // groups so it is unsafe to do so *unless* the linkage is local. |
822 | 161k | if (!F->hasLocalLinkage()) { |
823 | 161k | if (F->hasComdat()) { |
824 | 59 | DeadFunctionsInComdats.push_back(F); |
825 | 59 | continue; |
826 | 59 | } |
827 | 161k | } |
828 | 161k | |
829 | 161k | RemoveCGN(CGN); |
830 | 161k | } |
831 | 25.1k | if (!DeadFunctionsInComdats.empty()) { |
832 | 21 | // Filter out the functions whose comdats remain alive. |
833 | 21 | filterDeadComdatFunctions(CG.getModule(), DeadFunctionsInComdats); |
834 | 21 | // Remove the rest. |
835 | 21 | for (Function *F : DeadFunctionsInComdats) |
836 | 55 | RemoveCGN(CG[F]); |
837 | 21 | } |
838 | 25.1k | |
839 | 25.1k | if (FunctionsToRemove.empty()) |
840 | 23.0k | return false; |
841 | 2.07k | |
842 | 2.07k | // Now that we know which functions to delete, do so. We didn't want to do |
843 | 2.07k | // this inline, because that would invalidate our CallGraph::iterator |
844 | 2.07k | // objects. :( |
845 | 2.07k | // |
846 | 2.07k | // Note that it doesn't matter that we are iterating over a non-stable order |
847 | 2.07k | // here to do this, it doesn't matter which order the functions are deleted |
848 | 2.07k | // in. |
849 | 2.07k | array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end()); |
850 | 2.07k | FunctionsToRemove.erase( |
851 | 2.07k | std::unique(FunctionsToRemove.begin(), FunctionsToRemove.end()), |
852 | 2.07k | FunctionsToRemove.end()); |
853 | 161k | for (CallGraphNode *CGN : FunctionsToRemove) { |
854 | 161k | delete CG.removeFunctionFromModule(CGN); |
855 | 161k | ++NumDeleted; |
856 | 161k | } |
857 | 2.07k | return true; |
858 | 2.07k | } |
859 | | |
860 | 990 | InlinerPass::~InlinerPass() { |
861 | 990 | if (ImportedFunctionsStats) { |
862 | 2 | assert(InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No); |
863 | 2 | ImportedFunctionsStats->dump(InlinerFunctionImportStats == |
864 | 2 | InlinerFunctionImportStatsOpts::Verbose); |
865 | 2 | } |
866 | 990 | } |
867 | | |
868 | | PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, |
869 | | CGSCCAnalysisManager &AM, LazyCallGraph &CG, |
870 | 1.81k | CGSCCUpdateResult &UR) { |
871 | 1.81k | const ModuleAnalysisManager &MAM = |
872 | 1.81k | AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG).getManager(); |
873 | 1.81k | bool Changed = false; |
874 | 1.81k | |
875 | 1.81k | assert(InitialC.size() > 0 && "Cannot handle an empty SCC!"); |
876 | 1.81k | Module &M = *InitialC.begin()->getFunction().getParent(); |
877 | 1.81k | ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M); |
878 | 1.81k | |
879 | 1.81k | if (!ImportedFunctionsStats && |
880 | 1.81k | InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No1.79k ) { |
881 | 2 | ImportedFunctionsStats = |
882 | 2 | llvm::make_unique<ImportedFunctionsInliningStatistics>(); |
883 | 2 | ImportedFunctionsStats->setModuleInfo(M); |
884 | 2 | } |
885 | 1.81k | |
886 | 1.81k | // We use a single common worklist for calls across the entire SCC. We |
887 | 1.81k | // process these in-order and append new calls introduced during inlining to |
888 | 1.81k | // the end. |
889 | 1.81k | // |
890 | 1.81k | // Note that this particular order of processing is actually critical to |
891 | 1.81k | // avoid very bad behaviors. Consider *highly connected* call graphs where |
892 | 1.81k | // each function contains a small amonut of code and a couple of calls to |
893 | 1.81k | // other functions. Because the LLVM inliner is fundamentally a bottom-up |
894 | 1.81k | // inliner, it can handle gracefully the fact that these all appear to be |
895 | 1.81k | // reasonable inlining candidates as it will flatten things until they become |
896 | 1.81k | // too big to inline, and then move on and flatten another batch. |
897 | 1.81k | // |
898 | 1.81k | // However, when processing call edges *within* an SCC we cannot rely on this |
899 | 1.81k | // bottom-up behavior. As a consequence, with heavily connected *SCCs* of |
900 | 1.81k | // functions we can end up incrementally inlining N calls into each of |
901 | 1.81k | // N functions because each incremental inlining decision looks good and we |
902 | 1.81k | // don't have a topological ordering to prevent explosions. |
903 | 1.81k | // |
904 | 1.81k | // To compensate for this, we don't process transitive edges made immediate |
905 | 1.81k | // by inlining until we've done one pass of inlining across the entire SCC. |
906 | 1.81k | // Large, highly connected SCCs still lead to some amount of code bloat in |
907 | 1.81k | // this model, but it is uniformly spread across all the functions in the SCC |
908 | 1.81k | // and eventually they all become too large to inline, rather than |
909 | 1.81k | // incrementally maknig a single function grow in a super linear fashion. |
910 | 1.81k | SmallVector<std::pair<CallSite, int>, 16> Calls; |
911 | 1.81k | |
912 | 1.81k | FunctionAnalysisManager &FAM = |
913 | 1.81k | AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG) |
914 | 1.81k | .getManager(); |
915 | 1.81k | |
916 | 1.81k | // Populate the initial list of calls in this SCC. |
917 | 1.86k | for (auto &N : InitialC) { |
918 | 1.86k | auto &ORE = |
919 | 1.86k | FAM.getResult<OptimizationRemarkEmitterAnalysis>(N.getFunction()); |
920 | 1.86k | // We want to generally process call sites top-down in order for |
921 | 1.86k | // simplifications stemming from replacing the call with the returned value |
922 | 1.86k | // after inlining to be visible to subsequent inlining decisions. |
923 | 1.86k | // FIXME: Using instructions sequence is a really bad way to do this. |
924 | 1.86k | // Instead we should do an actual RPO walk of the function body. |
925 | 1.86k | for (Instruction &I : instructions(N.getFunction())) |
926 | 11.1k | if (auto CS = CallSite(&I)) |
927 | 5.01k | if (Function *Callee = CS.getCalledFunction()) { |
928 | 4.94k | if (!Callee->isDeclaration()) |
929 | 974 | Calls.push_back({CS, -1}); |
930 | 3.97k | else if (!isa<IntrinsicInst>(I)) { |
931 | 2.80k | using namespace ore; |
932 | 2.80k | setInlineRemark(CS, "unavailable definition"); |
933 | 2.80k | ORE.emit([&]() { |
934 | 14 | return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I) |
935 | 14 | << NV("Callee", Callee) << " will not be inlined into " |
936 | 14 | << NV("Caller", CS.getCaller()) |
937 | 14 | << " because its definition is unavailable" |
938 | 14 | << setIsVerbose(); |
939 | 14 | }); |
940 | 2.80k | } |
941 | 4.94k | } |
942 | 1.86k | } |
943 | 1.81k | if (Calls.empty()) |
944 | 1.10k | return PreservedAnalyses::all(); |
945 | 710 | |
946 | 710 | // Capture updatable variables for the current SCC and RefSCC. |
947 | 710 | auto *C = &InitialC; |
948 | 710 | auto *RC = &C->getOuterRefSCC(); |
949 | 710 | |
950 | 710 | // When inlining a callee produces new call sites, we want to keep track of |
951 | 710 | // the fact that they were inlined from the callee. This allows us to avoid |
952 | 710 | // infinite inlining in some obscure cases. To represent this, we use an |
953 | 710 | // index into the InlineHistory vector. |
954 | 710 | SmallVector<std::pair<Function *, int>, 16> InlineHistory; |
955 | 710 | |
956 | 710 | // Track a set vector of inlined callees so that we can augment the caller |
957 | 710 | // with all of their edges in the call graph before pruning out the ones that |
958 | 710 | // got simplified away. |
959 | 710 | SmallSetVector<Function *, 4> InlinedCallees; |
960 | 710 | |
961 | 710 | // Track the dead functions to delete once finished with inlining calls. We |
962 | 710 | // defer deleting these to make it easier to handle the call graph updates. |
963 | 710 | SmallVector<Function *, 4> DeadFunctions; |
964 | 710 | |
965 | 710 | // Loop forward over all of the calls. Note that we cannot cache the size as |
966 | 710 | // inlining can introduce new calls that need to be processed. |
967 | 1.48k | for (int i = 0; i < (int)Calls.size(); ++i774 ) { |
968 | 774 | // We expect the calls to typically be batched with sequences of calls that |
969 | 774 | // have the same caller, so we first set up some shared infrastructure for |
970 | 774 | // this caller. We also do any pruning we can at this layer on the caller |
971 | 774 | // alone. |
972 | 774 | Function &F = *Calls[i].first.getCaller(); |
973 | 774 | LazyCallGraph::Node &N = *CG.lookup(F); |
974 | 774 | if (CG.lookupSCC(N) != C) |
975 | 8 | continue; |
976 | 766 | if (F.hasOptNone()) { |
977 | 0 | setInlineRemark(Calls[i].first, "optnone attribute"); |
978 | 0 | continue; |
979 | 0 | } |
980 | 766 | |
981 | 766 | LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n"); |
982 | 766 | |
983 | 766 | // Get a FunctionAnalysisManager via a proxy for this particular node. We |
984 | 766 | // do this each time we visit a node as the SCC may have changed and as |
985 | 766 | // we're going to mutate this particular function we want to make sure the |
986 | 766 | // proxy is in place to forward any invalidation events. We can use the |
987 | 766 | // manager we get here for looking up results for functions other than this |
988 | 766 | // node however because those functions aren't going to be mutated by this |
989 | 766 | // pass. |
990 | 766 | FunctionAnalysisManager &FAM = |
991 | 766 | AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG) |
992 | 766 | .getManager(); |
993 | 766 | |
994 | 766 | // Get the remarks emission analysis for the caller. |
995 | 766 | auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); |
996 | 766 | |
997 | 766 | std::function<AssumptionCache &(Function &)> GetAssumptionCache = |
998 | 1.28k | [&](Function &F) -> AssumptionCache & { |
999 | 1.28k | return FAM.getResult<AssumptionAnalysis>(F); |
1000 | 1.28k | }; |
1001 | 766 | auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & { |
1002 | 509 | return FAM.getResult<BlockFrequencyAnalysis>(F); |
1003 | 509 | }; |
1004 | 766 | |
1005 | 961 | auto GetInlineCost = [&](CallSite CS) { |
1006 | 961 | Function &Callee = *CS.getCalledFunction(); |
1007 | 961 | auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee); |
1008 | 961 | bool RemarksEnabled = |
1009 | 961 | Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled( |
1010 | 961 | DEBUG_TYPE); |
1011 | 961 | return getInlineCost(cast<CallBase>(*CS.getInstruction()), Params, |
1012 | 961 | CalleeTTI, GetAssumptionCache, {GetBFI}, PSI, |
1013 | 961 | RemarksEnabled ? &ORE19 : nullptr942 ); |
1014 | 961 | }; |
1015 | 766 | |
1016 | 766 | // Now process as many calls as we have within this caller in the sequnece. |
1017 | 766 | // We bail out as soon as the caller has to change so we can update the |
1018 | 766 | // call graph and prepare the context of that new caller. |
1019 | 766 | bool DidInline = false; |
1020 | 1.77k | for (; i < (int)Calls.size() && Calls[i].first.getCaller() == &F1.07k ; ++i1.01k ) { |
1021 | 1.01k | int InlineHistoryID; |
1022 | 1.01k | CallSite CS; |
1023 | 1.01k | std::tie(CS, InlineHistoryID) = Calls[i]; |
1024 | 1.01k | Function &Callee = *CS.getCalledFunction(); |
1025 | 1.01k | |
1026 | 1.01k | if (InlineHistoryID != -1 && |
1027 | 1.01k | InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)53 ) { |
1028 | 1 | setInlineRemark(CS, "recursive"); |
1029 | 1 | continue; |
1030 | 1 | } |
1031 | 1.01k | |
1032 | 1.01k | // Check if this inlining may repeat breaking an SCC apart that has |
1033 | 1.01k | // already been split once before. In that case, inlining here may |
1034 | 1.01k | // trigger infinite inlining, much like is prevented within the inliner |
1035 | 1.01k | // itself by the InlineHistory above, but spread across CGSCC iterations |
1036 | 1.01k | // and thus hidden from the full inline history. |
1037 | 1.01k | if (CG.lookupSCC(*CG.lookup(Callee)) == C && |
1038 | 1.01k | UR.InlinedInternalEdges.count({&N, C})169 ) { |
1039 | 63 | LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node " |
1040 | 63 | "previously split out of this SCC by inlining: " |
1041 | 63 | << F.getName() << " -> " << Callee.getName() << "\n"); |
1042 | 63 | setInlineRemark(CS, "recursive SCC split"); |
1043 | 63 | continue; |
1044 | 63 | } |
1045 | 949 | |
1046 | 949 | Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE); |
1047 | 949 | // Check whether we want to inline this callsite. |
1048 | 949 | if (!OIC.hasValue()) { |
1049 | 2 | setInlineRemark(CS, "deferred"); |
1050 | 2 | continue; |
1051 | 2 | } |
1052 | 947 | |
1053 | 947 | if (!OIC.getValue()) { |
1054 | 192 | // shouldInline() call returned a negative inline cost that explains |
1055 | 192 | // why this callsite should not be inlined. |
1056 | 192 | setInlineRemark(CS, inlineCostStr(*OIC)); |
1057 | 192 | continue; |
1058 | 192 | } |
1059 | 755 | |
1060 | 755 | // Setup the data structure used to plumb customization into the |
1061 | 755 | // `InlineFunction` routine. |
1062 | 755 | InlineFunctionInfo IFI( |
1063 | 755 | /*cg=*/nullptr, &GetAssumptionCache, PSI, |
1064 | 755 | &FAM.getResult<BlockFrequencyAnalysis>(*(CS.getCaller())), |
1065 | 755 | &FAM.getResult<BlockFrequencyAnalysis>(Callee)); |
1066 | 755 | |
1067 | 755 | // Get DebugLoc to report. CS will be invalid after Inliner. |
1068 | 755 | DebugLoc DLoc = CS->getDebugLoc(); |
1069 | 755 | BasicBlock *Block = CS.getParent(); |
1070 | 755 | |
1071 | 755 | using namespace ore; |
1072 | 755 | |
1073 | 755 | InlineResult IR = InlineFunction(CS, IFI); |
1074 | 755 | if (!IR) { |
1075 | 0 | setInlineRemark(CS, std::string(IR) + "; " + inlineCostStr(*OIC)); |
1076 | 0 | ORE.emit([&]() { |
1077 | 0 | return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) |
1078 | 0 | << NV("Callee", &Callee) << " will not be inlined into " |
1079 | 0 | << NV("Caller", &F) << ": " << NV("Reason", IR.message); |
1080 | 0 | }); |
1081 | 0 | continue; |
1082 | 0 | } |
1083 | 755 | DidInline = true; |
1084 | 755 | InlinedCallees.insert(&Callee); |
1085 | 755 | |
1086 | 755 | ++NumInlined; |
1087 | 755 | |
1088 | 755 | emit_inlined_into(ORE, DLoc, Block, Callee, F, *OIC); |
1089 | 755 | |
1090 | 755 | // Add any new callsites to defined functions to the worklist. |
1091 | 755 | if (!IFI.InlinedCallSites.empty()) { |
1092 | 437 | int NewHistoryID = InlineHistory.size(); |
1093 | 437 | InlineHistory.push_back({&Callee, InlineHistoryID}); |
1094 | 437 | for (CallSite &CS : reverse(IFI.InlinedCallSites)) |
1095 | 1.28k | if (Function *NewCallee = CS.getCalledFunction()) |
1096 | 1.27k | if (!NewCallee->isDeclaration()) |
1097 | 53 | Calls.push_back({CS, NewHistoryID}); |
1098 | 437 | } |
1099 | 755 | |
1100 | 755 | if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) |
1101 | 30 | ImportedFunctionsStats->recordInline(F, Callee); |
1102 | 755 | |
1103 | 755 | // Merge the attributes based on the inlining. |
1104 | 755 | AttributeFuncs::mergeAttributesForInlining(F, Callee); |
1105 | 755 | |
1106 | 755 | // For local functions, check whether this makes the callee trivially |
1107 | 755 | // dead. In that case, we can drop the body of the function eagerly |
1108 | 755 | // which may reduce the number of callers of other functions to one, |
1109 | 755 | // changing inline cost thresholds. |
1110 | 755 | if (Callee.hasLocalLinkage()) { |
1111 | 402 | // To check this we also need to nuke any dead constant uses (perhaps |
1112 | 402 | // made dead by this operation on other functions). |
1113 | 402 | Callee.removeDeadConstantUsers(); |
1114 | 402 | if (Callee.use_empty() && !CG.isLibFunction(Callee)306 ) { |
1115 | 305 | Calls.erase( |
1116 | 305 | std::remove_if(Calls.begin() + i + 1, Calls.end(), |
1117 | 305 | [&Callee](const std::pair<CallSite, int> &Call) { |
1118 | 46 | return Call.first.getCaller() == &Callee; |
1119 | 46 | }), |
1120 | 305 | Calls.end()); |
1121 | 305 | // Clear the body and queue the function itself for deletion when we |
1122 | 305 | // finish inlining and call graph updates. |
1123 | 305 | // Note that after this point, it is an error to do anything other |
1124 | 305 | // than use the callee's address or delete it. |
1125 | 305 | Callee.dropAllReferences(); |
1126 | 305 | assert(find(DeadFunctions, &Callee) == DeadFunctions.end() && |
1127 | 305 | "Cannot put cause a function to become dead twice!"); |
1128 | 305 | DeadFunctions.push_back(&Callee); |
1129 | 305 | } |
1130 | 402 | } |
1131 | 755 | } |
1132 | 766 | |
1133 | 766 | // Back the call index up by one to put us in a good position to go around |
1134 | 766 | // the outer loop. |
1135 | 766 | --i; |
1136 | 766 | |
1137 | 766 | if (!DidInline) |
1138 | 142 | continue; |
1139 | 624 | Changed = true; |
1140 | 624 | |
1141 | 624 | // Add all the inlined callees' edges as ref edges to the caller. These are |
1142 | 624 | // by definition trivial edges as we always have *some* transitive ref edge |
1143 | 624 | // chain. While in some cases these edges are direct calls inside the |
1144 | 624 | // callee, they have to be modeled in the inliner as reference edges as |
1145 | 624 | // there may be a reference edge anywhere along the chain from the current |
1146 | 624 | // caller to the callee that causes the whole thing to appear like |
1147 | 624 | // a (transitive) reference edge that will require promotion to a call edge |
1148 | 624 | // below. |
1149 | 705 | for (Function *InlinedCallee : InlinedCallees) { |
1150 | 705 | LazyCallGraph::Node &CalleeN = *CG.lookup(*InlinedCallee); |
1151 | 705 | for (LazyCallGraph::Edge &E : *CalleeN) |
1152 | 104 | RC->insertTrivialRefEdge(N, E.getNode()); |
1153 | 705 | } |
1154 | 624 | |
1155 | 624 | // At this point, since we have made changes we have at least removed |
1156 | 624 | // a call instruction. However, in the process we do some incremental |
1157 | 624 | // simplification of the surrounding code. This simplification can |
1158 | 624 | // essentially do all of the same things as a function pass and we can |
1159 | 624 | // re-use the exact same logic for updating the call graph to reflect the |
1160 | 624 | // change. |
1161 | 624 | LazyCallGraph::SCC *OldC = C; |
1162 | 624 | C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR); |
1163 | 624 | LLVM_DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n"); |
1164 | 624 | RC = &C->getOuterRefSCC(); |
1165 | 624 | |
1166 | 624 | // If this causes an SCC to split apart into multiple smaller SCCs, there |
1167 | 624 | // is a subtle risk we need to prepare for. Other transformations may |
1168 | 624 | // expose an "infinite inlining" opportunity later, and because of the SCC |
1169 | 624 | // mutation, we will revisit this function and potentially re-inline. If we |
1170 | 624 | // do, and that re-inlining also has the potentially to mutate the SCC |
1171 | 624 | // structure, the infinite inlining problem can manifest through infinite |
1172 | 624 | // SCC splits and merges. To avoid this, we capture the originating caller |
1173 | 624 | // node and the SCC containing the call edge. This is a slight over |
1174 | 624 | // approximation of the possible inlining decisions that must be avoided, |
1175 | 624 | // but is relatively efficient to store. We use C != OldC to know when |
1176 | 624 | // a new SCC is generated and the original SCC may be generated via merge |
1177 | 624 | // in later iterations. |
1178 | 624 | // |
1179 | 624 | // It is also possible that even if no new SCC is generated |
1180 | 624 | // (i.e., C == OldC), the original SCC could be split and then merged |
1181 | 624 | // into the same one as itself. and the original SCC will be added into |
1182 | 624 | // UR.CWorklist again, we want to catch such cases too. |
1183 | 624 | // |
1184 | 624 | // FIXME: This seems like a very heavyweight way of retaining the inline |
1185 | 624 | // history, we should look for a more efficient way of tracking it. |
1186 | 624 | if ((C != OldC || UR.CWorklist.count(OldC)608 ) && |
1187 | 624 | llvm::any_of(InlinedCallees, [&](Function *Callee) 26 { |
1188 | 27 | return CG.lookupSCC(*CG.lookup(*Callee)) == OldC; |
1189 | 27 | })) { |
1190 | 22 | LLVM_DEBUG(dbgs() << "Inlined an internal call edge and split an SCC, " |
1191 | 22 | "retaining this to avoid infinite inlining.\n"); |
1192 | 22 | UR.InlinedInternalEdges.insert({&N, OldC}); |
1193 | 22 | } |
1194 | 624 | InlinedCallees.clear(); |
1195 | 624 | } |
1196 | 710 | |
1197 | 710 | // Now that we've finished inlining all of the calls across this SCC, delete |
1198 | 710 | // all of the trivially dead functions, updating the call graph and the CGSCC |
1199 | 710 | // pass manager in the process. |
1200 | 710 | // |
1201 | 710 | // Note that this walks a pointer set which has non-deterministic order but |
1202 | 710 | // that is OK as all we do is delete things and add pointers to unordered |
1203 | 710 | // sets. |
1204 | 710 | for (Function *DeadF : DeadFunctions) { |
1205 | 305 | // Get the necessary information out of the call graph and nuke the |
1206 | 305 | // function there. Also, cclear out any cached analyses. |
1207 | 305 | auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF)); |
1208 | 305 | FunctionAnalysisManager &FAM = |
1209 | 305 | AM.getResult<FunctionAnalysisManagerCGSCCProxy>(DeadC, CG) |
1210 | 305 | .getManager(); |
1211 | 305 | FAM.clear(*DeadF, DeadF->getName()); |
1212 | 305 | AM.clear(DeadC, DeadC.getName()); |
1213 | 305 | auto &DeadRC = DeadC.getOuterRefSCC(); |
1214 | 305 | CG.removeDeadFunction(*DeadF); |
1215 | 305 | |
1216 | 305 | // Mark the relevant parts of the call graph as invalid so we don't visit |
1217 | 305 | // them. |
1218 | 305 | UR.InvalidatedSCCs.insert(&DeadC); |
1219 | 305 | UR.InvalidatedRefSCCs.insert(&DeadRC); |
1220 | 305 | |
1221 | 305 | // And delete the actual function from the module. |
1222 | 305 | M.getFunctionList().erase(DeadF); |
1223 | 305 | ++NumDeleted; |
1224 | 305 | } |
1225 | 710 | |
1226 | 710 | if (!Changed) |
1227 | 98 | return PreservedAnalyses::all(); |
1228 | 612 | |
1229 | 612 | // Even if we change the IR, we update the core CGSCC data structures and so |
1230 | 612 | // can preserve the proxy to the function analysis manager. |
1231 | 612 | PreservedAnalyses PA; |
1232 | 612 | PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); |
1233 | 612 | return PA; |
1234 | 612 | } |