Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/IPO/Inliner.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- Inliner.cpp - Code common to all inliners --------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the mechanics required to implement inlining without
10
// missing any calls and updating the call graph.  The decisions of which calls
11
// are profitable to inline are implemented elsewhere.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "llvm/Transforms/IPO/Inliner.h"
16
#include "llvm/ADT/DenseMap.h"
17
#include "llvm/ADT/None.h"
18
#include "llvm/ADT/Optional.h"
19
#include "llvm/ADT/STLExtras.h"
20
#include "llvm/ADT/SetVector.h"
21
#include "llvm/ADT/SmallPtrSet.h"
22
#include "llvm/ADT/SmallVector.h"
23
#include "llvm/ADT/Statistic.h"
24
#include "llvm/ADT/StringRef.h"
25
#include "llvm/Analysis/AliasAnalysis.h"
26
#include "llvm/Analysis/AssumptionCache.h"
27
#include "llvm/Analysis/BasicAliasAnalysis.h"
28
#include "llvm/Analysis/BlockFrequencyInfo.h"
29
#include "llvm/Analysis/CGSCCPassManager.h"
30
#include "llvm/Analysis/CallGraph.h"
31
#include "llvm/Analysis/InlineCost.h"
32
#include "llvm/Analysis/LazyCallGraph.h"
33
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
34
#include "llvm/Analysis/ProfileSummaryInfo.h"
35
#include "llvm/Analysis/TargetLibraryInfo.h"
36
#include "llvm/Analysis/TargetTransformInfo.h"
37
#include "llvm/Transforms/Utils/Local.h"
38
#include "llvm/IR/Attributes.h"
39
#include "llvm/IR/BasicBlock.h"
40
#include "llvm/IR/CallSite.h"
41
#include "llvm/IR/DataLayout.h"
42
#include "llvm/IR/DebugLoc.h"
43
#include "llvm/IR/DerivedTypes.h"
44
#include "llvm/IR/DiagnosticInfo.h"
45
#include "llvm/IR/Function.h"
46
#include "llvm/IR/InstIterator.h"
47
#include "llvm/IR/Instruction.h"
48
#include "llvm/IR/Instructions.h"
49
#include "llvm/IR/IntrinsicInst.h"
50
#include "llvm/IR/Metadata.h"
51
#include "llvm/IR/Module.h"
52
#include "llvm/IR/PassManager.h"
53
#include "llvm/IR/User.h"
54
#include "llvm/IR/Value.h"
55
#include "llvm/Pass.h"
56
#include "llvm/Support/Casting.h"
57
#include "llvm/Support/CommandLine.h"
58
#include "llvm/Support/Debug.h"
59
#include "llvm/Support/raw_ostream.h"
60
#include "llvm/Transforms/Utils/Cloning.h"
61
#include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h"
62
#include "llvm/Transforms/Utils/ModuleUtils.h"
63
#include <algorithm>
64
#include <cassert>
65
#include <functional>
66
#include <sstream>
67
#include <tuple>
68
#include <utility>
69
#include <vector>
70
71
using namespace llvm;
72
73
1.15k
#define DEBUG_TYPE "inline"
74
75
STATISTIC(NumInlined, "Number of functions inlined");
76
STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
77
STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
78
STATISTIC(NumMergedAllocas, "Number of allocas merged together");
79
80
// This weirdly named statistic tracks the number of times that, when attempting
81
// to inline a function A into B, we analyze the callers of B in order to see
82
// if those would be more profitable and blocked inline steps.
83
STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");
84
85
/// Flag to disable manual alloca merging.
86
///
87
/// Merging of allocas was originally done as a stack-size saving technique
88
/// prior to LLVM's code generator having support for stack coloring based on
89
/// lifetime markers. It is now in the process of being removed. To experiment
90
/// with disabling it and relying fully on lifetime marker based stack
91
/// coloring, you can pass this flag to LLVM.
92
static cl::opt<bool>
93
    DisableInlinedAllocaMerging("disable-inlined-alloca-merging",
94
                                cl::init(false), cl::Hidden);
95
96
namespace {
97
98
enum class InlinerFunctionImportStatsOpts {
99
  No = 0,
100
  Basic = 1,
101
  Verbose = 2,
102
};
103
104
} // end anonymous namespace
105
106
static cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats(
107
    "inliner-function-import-stats",
108
    cl::init(InlinerFunctionImportStatsOpts::No),
109
    cl::values(clEnumValN(InlinerFunctionImportStatsOpts::Basic, "basic",
110
                          "basic statistics"),
111
               clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose",
112
                          "printing of statistics for each inlined function")),
113
    cl::Hidden, cl::desc("Enable inliner stats for imported functions"));
114
115
/// Flag to add inline messages as callsite attributes 'inline-remark'.
116
static cl::opt<bool>
117
    InlineRemarkAttribute("inline-remark-attribute", cl::init(false),
118
                          cl::Hidden,
119
                          cl::desc("Enable adding inline-remark attribute to"
120
                                   " callsites processed by inliner but decided"
121
                                   " to be not inlined"));
122
123
13.6k
LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {}
124
125
LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime)
126
11.6k
    : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {}
127
128
/// For this class, we declare that we require and preserve the call graph.
129
/// If the derived class implements this method, it should
130
/// always explicitly call the implementation here.
131
25.1k
void LegacyInlinerBase::getAnalysisUsage(AnalysisUsage &AU) const {
132
25.1k
  AU.addRequired<AssumptionCacheTracker>();
133
25.1k
  AU.addRequired<ProfileSummaryInfoWrapperPass>();
134
25.1k
  AU.addRequired<TargetLibraryInfoWrapperPass>();
135
25.1k
  getAAResultsAnalysisUsage(AU);
136
25.1k
  CallGraphSCCPass::getAnalysisUsage(AU);
137
25.1k
}
138
139
using InlinedArrayAllocasTy = DenseMap<ArrayType *, std::vector<AllocaInst *>>;
140
141
/// Look at all of the allocas that we inlined through this call site.  If we
142
/// have already inlined other allocas through other calls into this function,
143
/// then we know that they have disjoint lifetimes and that we can merge them.
144
///
145
/// There are many heuristics possible for merging these allocas, and the
146
/// different options have different tradeoffs.  One thing that we *really*
147
/// don't want to hurt is SRoA: once inlining happens, often allocas are no
148
/// longer address taken and so they can be promoted.
149
///
150
/// Our "solution" for that is to only merge allocas whose outermost type is an
151
/// array type.  These are usually not promoted because someone is using a
152
/// variable index into them.  These are also often the most important ones to
153
/// merge.
154
///
155
/// A better solution would be to have real memory lifetime markers in the IR
156
/// and not have the inliner do any merging of allocas at all.  This would
157
/// allow the backend to do proper stack slot coloring of all allocas that
158
/// *actually make it to the backend*, which is really what we want.
159
///
160
/// Because we don't have this information, we do this simple and useful hack.
161
static void mergeInlinedArrayAllocas(
162
    Function *Caller, InlineFunctionInfo &IFI,
163
540k
    InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory) {
164
540k
  SmallPtrSet<AllocaInst *, 16> UsedAllocas;
165
540k
166
540k
  // When processing our SCC, check to see if CS was inlined from some other
167
540k
  // call site.  For example, if we're processing "A" in this code:
168
540k
  //   A() { B() }
169
540k
  //   B() { x = alloca ... C() }
170
540k
  //   C() { y = alloca ... }
171
540k
  // Assume that C was not inlined into B initially, and so we're processing A
172
540k
  // and decide to inline B into A.  Doing this makes an alloca available for
173
540k
  // reuse and makes a callsite (C) available for inlining.  When we process
174
540k
  // the C call site we don't want to do any alloca merging between X and Y
175
540k
  // because their scopes are not disjoint.  We could make this smarter by
176
540k
  // keeping track of the inline history for each alloca in the
177
540k
  // InlinedArrayAllocas but this isn't likely to be a significant win.
178
540k
  if (InlineHistory != -1) // Only do merging for top-level call sites in SCC.
179
7.36k
    return;
180
532k
181
532k
  // Loop over all the allocas we have so far and see if they can be merged with
182
532k
  // a previously inlined alloca.  If not, remember that we had it.
183
634k
  
for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); 532k
AllocaNo != e;
184
532k
       
++AllocaNo102k
) {
185
102k
    AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
186
102k
187
102k
    // Don't bother trying to merge array allocations (they will usually be
188
102k
    // canonicalized to be an allocation *of* an array), or allocations whose
189
102k
    // type is not itself an array (because we're afraid of pessimizing SRoA).
190
102k
    ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
191
102k
    if (!ATy || 
AI->isArrayAllocation()1.69k
)
192
100k
      continue;
193
1.69k
194
1.69k
    // Get the list of all available allocas for this array type.
195
1.69k
    std::vector<AllocaInst *> &AllocasForType = InlinedArrayAllocas[ATy];
196
1.69k
197
1.69k
    // Loop over the allocas in AllocasForType to see if we can reuse one.  Note
198
1.69k
    // that we have to be careful not to reuse the same "available" alloca for
199
1.69k
    // multiple different allocas that we just inlined, we use the 'UsedAllocas'
200
1.69k
    // set to keep track of which "available" allocas are being used by this
201
1.69k
    // function.  Also, AllocasForType can be empty of course!
202
1.69k
    bool MergedAwayAlloca = false;
203
1.69k
    for (AllocaInst *AvailableAlloca : AllocasForType) {
204
1.67k
      unsigned Align1 = AI->getAlignment(),
205
1.67k
               Align2 = AvailableAlloca->getAlignment();
206
1.67k
207
1.67k
      // The available alloca has to be in the right function, not in some other
208
1.67k
      // function in this SCC.
209
1.67k
      if (AvailableAlloca->getParent() != AI->getParent())
210
4
        continue;
211
1.67k
212
1.67k
      // If the inlined function already uses this alloca then we can't reuse
213
1.67k
      // it.
214
1.67k
      if (!UsedAllocas.insert(AvailableAlloca).second)
215
902
        continue;
216
771
217
771
      // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
218
771
      // success!
219
771
      LLVM_DEBUG(dbgs() << "    ***MERGED ALLOCA: " << *AI
220
771
                        << "\n\t\tINTO: " << *AvailableAlloca << '\n');
221
771
222
771
      // Move affected dbg.declare calls immediately after the new alloca to
223
771
      // avoid the situation when a dbg.declare precedes its alloca.
224
771
      if (auto *L = LocalAsMetadata::getIfExists(AI))
225
1
        if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L))
226
1
          for (User *U : MDV->users())
227
1
            if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
228
1
              DDI->moveBefore(AvailableAlloca->getNextNode());
229
771
230
771
      AI->replaceAllUsesWith(AvailableAlloca);
231
771
232
771
      if (Align1 != Align2) {
233
2
        if (!Align1 || !Align2) {
234
1
          const DataLayout &DL = Caller->getParent()->getDataLayout();
235
1
          unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType());
236
1
237
1
          Align1 = Align1 ? Align1 : 
TypeAlign0
;
238
1
          Align2 = Align2 ? 
Align20
: TypeAlign;
239
1
        }
240
2
241
2
        if (Align1 > Align2)
242
2
          AvailableAlloca->setAlignment(AI->getAlignment());
243
2
      }
244
771
245
771
      AI->eraseFromParent();
246
771
      MergedAwayAlloca = true;
247
771
      ++NumMergedAllocas;
248
771
      IFI.StaticAllocas[AllocaNo] = nullptr;
249
771
      break;
250
771
    }
251
1.69k
252
1.69k
    // If we already nuked the alloca, we're done with it.
253
1.69k
    if (MergedAwayAlloca)
254
771
      continue;
255
924
256
924
    // If we were unable to merge away the alloca either because there are no
257
924
    // allocas of the right type available or because we reused them all
258
924
    // already, remember that this alloca came from an inlined function and mark
259
924
    // it used so we don't reuse it for other allocas from this inline
260
924
    // operation.
261
924
    AllocasForType.push_back(AI);
262
924
    UsedAllocas.insert(AI);
263
924
  }
264
532k
}
265
266
/// If it is possible to inline the specified call site,
267
/// do so and update the CallGraph for this operation.
268
///
269
/// This function also does some basic book-keeping to update the IR.  The
270
/// InlinedArrayAllocas map keeps track of any allocas that are already
271
/// available from other functions inlined into the caller.  If we are able to
272
/// inline this call site we attempt to reuse already available allocas or add
273
/// any new allocas to the set if not possible.
274
static InlineResult InlineCallIfPossible(
275
    CallSite CS, InlineFunctionInfo &IFI,
276
    InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory,
277
    bool InsertLifetime, function_ref<AAResults &(Function &)> &AARGetter,
278
540k
    ImportedFunctionsInliningStatistics &ImportedFunctionsStats) {
279
540k
  Function *Callee = CS.getCalledFunction();
280
540k
  Function *Caller = CS.getCaller();
281
540k
282
540k
  AAResults &AAR = AARGetter(*Callee);
283
540k
284
540k
  // Try to inline the function.  Get the list of static allocas that were
285
540k
  // inlined.
286
540k
  InlineResult IR = InlineFunction(CS, IFI, &AAR, InsertLifetime);
287
540k
  if (!IR)
288
5
    return IR;
289
540k
290
540k
  if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
291
30
    ImportedFunctionsStats.recordInline(*Caller, *Callee);
292
540k
293
540k
  AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee);
294
540k
295
540k
  if (!DisableInlinedAllocaMerging)
296
540k
    mergeInlinedArrayAllocas(Caller, IFI, InlinedArrayAllocas, InlineHistory);
297
540k
298
540k
  return IR; // success
299
540k
}
300
301
/// Return true if inlining of CS can block the caller from being
302
/// inlined which is proved to be more beneficial. \p IC is the
303
/// estimated inline cost associated with callsite \p CS.
304
/// \p TotalSecondaryCost will be set to the estimated cost of inlining the
305
/// caller if \p CS is suppressed for inlining.
306
static bool
307
shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
308
                 int &TotalSecondaryCost,
309
489k
                 function_ref<InlineCost(CallSite CS)> GetInlineCost) {
310
489k
  // For now we only handle local or inline functions.
311
489k
  if (!Caller->hasLocalLinkage() && 
!Caller->hasLinkOnceODRLinkage()434k
)
312
171k
    return false;
313
318k
  // If the cost of inlining CS is non-positive, it is not going to prevent the
314
318k
  // caller from being inlined into its callers and hence we don't need to
315
318k
  // defer.
316
318k
  if (IC.getCost() <= 0)
317
236k
    return false;
318
82.0k
  // Try to detect the case where the current inlining candidate caller (call
319
82.0k
  // it B) is a static or linkonce-ODR function and is an inlining candidate
320
82.0k
  // elsewhere, and the current candidate callee (call it C) is large enough
321
82.0k
  // that inlining it into B would make B too big to inline later. In these
322
82.0k
  // circumstances it may be best not to inline C into B, but to inline B into
323
82.0k
  // its callers.
324
82.0k
  //
325
82.0k
  // This only applies to static and linkonce-ODR functions because those are
326
82.0k
  // expected to be available for inlining in the translation units where they
327
82.0k
  // are used. Thus we will always have the opportunity to make local inlining
328
82.0k
  // decisions. Importantly the linkonce-ODR linkage covers inline functions
329
82.0k
  // and templates in C++.
330
82.0k
  //
331
82.0k
  // FIXME: All of this logic should be sunk into getInlineCost. It relies on
332
82.0k
  // the internal implementation of the inline cost metrics rather than
333
82.0k
  // treating them as truly abstract units etc.
334
82.0k
  TotalSecondaryCost = 0;
335
82.0k
  // The candidate cost to be imposed upon the current function.
336
82.0k
  int CandidateCost = IC.getCost() - 1;
337
82.0k
  // If the caller has local linkage and can be inlined to all its callers, we
338
82.0k
  // can apply a huge negative bonus to TotalSecondaryCost.
339
82.0k
  bool ApplyLastCallBonus = Caller->hasLocalLinkage() && 
!Caller->hasOneUse()19.1k
;
340
82.0k
  // This bool tracks what happens if we DO inline C into B.
341
82.0k
  bool inliningPreventsSomeOuterInline = false;
342
165k
  for (User *U : Caller->users()) {
343
165k
    // If the caller will not be removed (either because it does not have a
344
165k
    // local linkage or because the LastCallToStaticBonus has been already
345
165k
    // applied), then we can exit the loop early.
346
165k
    if (!ApplyLastCallBonus && 
TotalSecondaryCost >= IC.getCost()149k
)
347
543
      return false;
348
164k
    CallSite CS2(U);
349
164k
350
164k
    // If this isn't a call to Caller (it could be some other sort
351
164k
    // of reference) skip it.  Such references will prevent the caller
352
164k
    // from being removed.
353
164k
    if (!CS2 || 
CS2.getCalledFunction() != Caller160k
) {
354
4.79k
      ApplyLastCallBonus = false;
355
4.79k
      continue;
356
4.79k
    }
357
159k
358
159k
    InlineCost IC2 = GetInlineCost(CS2);
359
159k
    ++NumCallerCallersAnalyzed;
360
159k
    if (!IC2) {
361
70.6k
      ApplyLastCallBonus = false;
362
70.6k
      continue;
363
70.6k
    }
364
89.0k
    if (IC2.isAlways())
365
354
      continue;
366
88.7k
367
88.7k
    // See if inlining of the original callsite would erase the cost delta of
368
88.7k
    // this callsite. We subtract off the penalty for the call instruction,
369
88.7k
    // which we would be deleting.
370
88.7k
    if (IC2.getCostDelta() <= CandidateCost) {
371
7.12k
      inliningPreventsSomeOuterInline = true;
372
7.12k
      TotalSecondaryCost += IC2.getCost();
373
7.12k
    }
374
88.7k
  }
375
82.0k
  // If all outer calls to Caller would get inlined, the cost for the last
376
82.0k
  // one is set very low by getInlineCost, in anticipation that Caller will
377
82.0k
  // be removed entirely.  We did not account for this above unless there
378
82.0k
  // is only one caller of Caller.
379
82.0k
  
if (81.5k
ApplyLastCallBonus81.5k
)
380
1.72k
    TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus;
381
81.5k
382
81.5k
  if (inliningPreventsSomeOuterInline && 
TotalSecondaryCost < IC.getCost()4.30k
)
383
2.08k
    return true;
384
79.4k
385
79.4k
  return false;
386
79.4k
}
387
388
static std::basic_ostream<char> &operator<<(std::basic_ostream<char> &R,
389
1.11M
                                            const ore::NV &Arg) {
390
1.11M
  return R << Arg.Val;
391
1.11M
}
392
393
template <class RemarkT>
394
970k
RemarkT &operator<<(RemarkT &&R, const InlineCost &IC) {
395
970k
  using namespace ore;
396
970k
  if (IC.isAlways()) {
397
37
    R << "(cost=always)";
398
970k
  } else if (IC.isNever()) {
399
827k
    R << "(cost=never)";
400
827k
  } else {
401
142k
    R << "(cost=" << ore::NV("Cost", IC.getCost())
402
142k
      << ", threshold=" << ore::NV("Threshold", IC.getThreshold()) << ")";
403
142k
  }
404
970k
  if (const char *Reason = IC.getReason())
405
827k
    R << ": " << ore::NV("Reason", Reason);
406
970k
  return R;
407
970k
}
llvm::OptimizationRemarkMissed&& operator<<<llvm::OptimizationRemarkMissed&>(llvm::OptimizationRemarkMissed&&&, llvm::InlineCost const&)
Line
Count
Source
394
58
RemarkT &operator<<(RemarkT &&R, const InlineCost &IC) {
395
58
  using namespace ore;
396
58
  if (IC.isAlways()) {
397
0
    R << "(cost=always)";
398
58
  } else if (IC.isNever()) {
399
40
    R << "(cost=never)";
400
40
  } else {
401
18
    R << "(cost=" << ore::NV("Cost", IC.getCost())
402
18
      << ", threshold=" << ore::NV("Threshold", IC.getThreshold()) << ")";
403
18
  }
404
58
  if (const char *Reason = IC.getReason())
405
40
    R << ": " << ore::NV("Reason", Reason);
406
58
  return R;
407
58
}
std::__1::basic_stringstream<char, std::__1::char_traits<char>, std::__1::allocator<char> >&& operator<<<std::__1::basic_stringstream<char, std::__1::char_traits<char>, std::__1::allocator<char> >&>(std::__1::basic_stringstream<char, std::__1::char_traits<char>, std::__1::allocator<char> >&&&, llvm::InlineCost const&)
Line
Count
Source
394
970k
RemarkT &operator<<(RemarkT &&R, const InlineCost &IC) {
395
970k
  using namespace ore;
396
970k
  if (IC.isAlways()) {
397
0
    R << "(cost=always)";
398
970k
  } else if (IC.isNever()) {
399
827k
    R << "(cost=never)";
400
827k
  } else {
401
142k
    R << "(cost=" << ore::NV("Cost", IC.getCost())
402
142k
      << ", threshold=" << ore::NV("Threshold", IC.getThreshold()) << ")";
403
142k
  }
404
970k
  if (const char *Reason = IC.getReason())
405
827k
    R << ": " << ore::NV("Reason", Reason);
406
970k
  return R;
407
970k
}
llvm::OptimizationRemark&& operator<<<llvm::OptimizationRemark&>(llvm::OptimizationRemark&&&, llvm::InlineCost const&)
Line
Count
Source
394
83
RemarkT &operator<<(RemarkT &&R, const InlineCost &IC) {
395
83
  using namespace ore;
396
83
  if (IC.isAlways()) {
397
37
    R << "(cost=always)";
398
46
  } else if (IC.isNever()) {
399
0
    R << "(cost=never)";
400
46
  } else {
401
46
    R << "(cost=" << ore::NV("Cost", IC.getCost())
402
46
      << ", threshold=" << ore::NV("Threshold", IC.getThreshold()) << ")";
403
46
  }
404
83
  if (const char *Reason = IC.getReason())
405
37
    R << ": " << ore::NV("Reason", Reason);
406
83
  return R;
407
83
}
408
409
970k
static std::string inlineCostStr(const InlineCost &IC) {
410
970k
  std::stringstream Remark;
411
970k
  Remark << IC;
412
970k
  return Remark.str();
413
970k
}
414
415
/// Return the cost only if the inliner should attempt to inline at the given
416
/// CallSite. If we return the cost, we will emit an optimisation remark later
417
/// using that cost, so we won't do so from this function.
418
static Optional<InlineCost>
419
shouldInline(CallSite CS, function_ref<InlineCost(CallSite CS)> GetInlineCost,
420
1.51M
             OptimizationRemarkEmitter &ORE) {
421
1.51M
  using namespace ore;
422
1.51M
423
1.51M
  InlineCost IC = GetInlineCost(CS);
424
1.51M
  Instruction *Call = CS.getInstruction();
425
1.51M
  Function *Callee = CS.getCalledFunction();
426
1.51M
  Function *Caller = CS.getCaller();
427
1.51M
428
1.51M
  if (IC.isAlways()) {
429
57.6k
    LLVM_DEBUG(dbgs() << "    Inlining " << inlineCostStr(IC)
430
57.6k
                      << ", Call: " << *CS.getInstruction() << "\n");
431
57.6k
    return IC;
432
57.6k
  }
433
1.46M
434
1.46M
  if (IC.isNever()) {
435
827k
    LLVM_DEBUG(dbgs() << "    NOT Inlining " << inlineCostStr(IC)
436
827k
                      << ", Call: " << *CS.getInstruction() << "\n");
437
827k
    ORE.emit([&]() {
438
40
      return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
439
40
             << NV("Callee", Callee) << " not inlined into "
440
40
             << NV("Caller", Caller) << " because it should never be inlined "
441
40
             << IC;
442
40
    });
443
827k
    return IC;
444
827k
  }
445
632k
446
632k
  if (!IC) {
447
142k
    LLVM_DEBUG(dbgs() << "    NOT Inlining " << inlineCostStr(IC)
448
142k
                      << ", Call: " << *CS.getInstruction() << "\n");
449
142k
    ORE.emit([&]() {
450
18
      return OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
451
18
             << NV("Callee", Callee) << " not inlined into "
452
18
             << NV("Caller", Caller) << " because too costly to inline " << IC;
453
18
    });
454
142k
    return IC;
455
142k
  }
456
489k
457
489k
  int TotalSecondaryCost = 0;
458
489k
  if (shouldBeDeferred(Caller, CS, IC, TotalSecondaryCost, GetInlineCost)) {
459
2.08k
    LLVM_DEBUG(dbgs() << "    NOT Inlining: " << *CS.getInstruction()
460
2.08k
                      << " Cost = " << IC.getCost()
461
2.08k
                      << ", outer Cost = " << TotalSecondaryCost << '\n');
462
2.08k
    ORE.emit([&]() {
463
0
      return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts",
464
0
                                      Call)
465
0
             << "Not inlining. Cost of inlining " << NV("Callee", Callee)
466
0
             << " increases the cost of inlining " << NV("Caller", Caller)
467
0
             << " in other contexts";
468
0
    });
469
2.08k
470
2.08k
    // IC does not bool() to false, so get an InlineCost that will.
471
2.08k
    // This will not be inspected to make an error message.
472
2.08k
    return None;
473
2.08k
  }
474
487k
475
487k
  LLVM_DEBUG(dbgs() << "    Inlining " << inlineCostStr(IC)
476
487k
                    << ", Call: " << *CS.getInstruction() << '\n');
477
487k
  return IC;
478
487k
}
479
480
/// Return true if the specified inline history ID
481
/// indicates an inline history that includes the specified function.
482
static bool InlineHistoryIncludes(
483
    Function *F, int InlineHistoryID,
484
126k
    const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
485
264k
  while (InlineHistoryID != -1) {
486
137k
    assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
487
137k
           "Invalid inline history ID");
488
137k
    if (InlineHistory[InlineHistoryID].first == F)
489
127
      return true;
490
137k
    InlineHistoryID = InlineHistory[InlineHistoryID].second;
491
137k
  }
492
126k
  
return false126k
;
493
126k
}
494
495
25.1k
bool LegacyInlinerBase::doInitialization(CallGraph &CG) {
496
25.1k
  if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
497
2
    ImportedFunctionsStats.setModuleInfo(CG.getModule());
498
25.1k
  return false; // No changes to CallGraph.
499
25.1k
}
500
501
643k
bool LegacyInlinerBase::runOnSCC(CallGraphSCC &SCC) {
502
643k
  if (skipSCC(SCC))
503
24
    return false;
504
643k
  return inlineCalls(SCC);
505
643k
}
506
507
static void emit_inlined_into(OptimizationRemarkEmitter &ORE, DebugLoc &DLoc,
508
                              const BasicBlock *Block, const Function &Callee,
509
540k
                              const Function &Caller, const InlineCost &IC) {
510
540k
  ORE.emit([&]() {
511
83
    bool AlwaysInline = IC.isAlways();
512
83
    StringRef RemarkName = AlwaysInline ? 
"AlwaysInline"37
:
"Inlined"46
;
513
83
    return OptimizationRemark(DEBUG_TYPE, RemarkName, DLoc, Block)
514
83
           << ore::NV("Callee", &Callee) << " inlined into "
515
83
           << ore::NV("Caller", &Caller) << " with " << IC;
516
83
  });
517
540k
}
518
519
1.50M
static void setInlineRemark(CallSite &CS, StringRef message) {
520
1.50M
  if (!InlineRemarkAttribute)
521
1.50M
    return;
522
9
523
9
  Attribute attr = Attribute::get(CS->getContext(), "inline-remark", message);
524
9
  CS.addAttribute(AttributeList::FunctionIndex, attr);
525
9
}
526
527
static bool
528
inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
529
                std::function<AssumptionCache &(Function &)> GetAssumptionCache,
530
                ProfileSummaryInfo *PSI, TargetLibraryInfo &TLI,
531
                bool InsertLifetime,
532
                function_ref<InlineCost(CallSite CS)> GetInlineCost,
533
                function_ref<AAResults &(Function &)> AARGetter,
534
853k
                ImportedFunctionsInliningStatistics &ImportedFunctionsStats) {
535
853k
  SmallPtrSet<Function *, 8> SCCFunctions;
536
853k
  LLVM_DEBUG(dbgs() << "Inliner visiting SCC:");
537
854k
  for (CallGraphNode *Node : SCC) {
538
854k
    Function *F = Node->getFunction();
539
854k
    if (F)
540
814k
      SCCFunctions.insert(F);
541
854k
    LLVM_DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
542
854k
  }
543
853k
544
853k
  // Scan through and identify all call sites ahead of time so that we only
545
853k
  // inline call sites in the original functions, not call sites that result
546
853k
  // from inlining other functions.
547
853k
  SmallVector<std::pair<CallSite, int>, 16> CallSites;
548
853k
549
853k
  // When inlining a callee produces new call sites, we want to keep track of
550
853k
  // the fact that they were inlined from the callee.  This allows us to avoid
551
853k
  // infinite inlining in some obscure cases.  To represent this, we use an
552
853k
  // index into the InlineHistory vector.
553
853k
  SmallVector<std::pair<Function *, int>, 8> InlineHistory;
554
853k
555
854k
  for (CallGraphNode *Node : SCC) {
556
854k
    Function *F = Node->getFunction();
557
854k
    if (!F || 
F->isDeclaration()814k
)
558
242k
      continue;
559
611k
560
611k
    OptimizationRemarkEmitter ORE(F);
561
611k
    for (BasicBlock &BB : *F)
562
13.2M
      
for (Instruction &I : BB)2.39M
{
563
13.2M
        CallSite CS(cast<Value>(&I));
564
13.2M
        // If this isn't a call, or it is a call to an intrinsic, it can
565
13.2M
        // never be inlined.
566
13.2M
        if (!CS || 
isa<IntrinsicInst>(I)2.27M
)
567
11.2M
          continue;
568
1.94M
569
1.94M
        // If this is a direct call to an external function, we can never inline
570
1.94M
        // it.  If it is an indirect call, inlining may resolve it to be a
571
1.94M
        // direct call, so we keep it.
572
1.94M
        if (Function *Callee = CS.getCalledFunction())
573
1.87M
          if (Callee->isDeclaration()) {
574
524k
            using namespace ore;
575
524k
576
524k
            setInlineRemark(CS, "unavailable definition");
577
524k
            ORE.emit([&]() {
578
37
              return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
579
37
                     << NV("Callee", Callee) << " will not be inlined into "
580
37
                     << NV("Caller", CS.getCaller())
581
37
                     << " because its definition is unavailable"
582
37
                     << setIsVerbose();
583
37
            });
584
524k
            continue;
585
524k
          }
586
1.42M
587
1.42M
        CallSites.push_back(std::make_pair(CS, -1));
588
1.42M
      }
589
611k
  }
590
853k
591
853k
  LLVM_DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
592
853k
593
853k
  // If there are no calls in this function, exit early.
594
853k
  if (CallSites.empty())
595
508k
    return false;
596
344k
597
344k
  // Now that we have all of the call sites, move the ones to functions in the
598
344k
  // current SCC to the end of the list.
599
344k
  unsigned FirstCallInSCC = CallSites.size();
600
1.76M
  for (unsigned i = 0; i < FirstCallInSCC; 
++i1.42M
)
601
1.42M
    if (Function *F = CallSites[i].first.getCalledFunction())
602
1.35M
      if (SCCFunctions.count(F))
603
4.73k
        std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
604
344k
605
344k
  InlinedArrayAllocasTy InlinedArrayAllocas;
606
344k
  InlineFunctionInfo InlineInfo(&CG, &GetAssumptionCache, PSI);
607
344k
608
344k
  // Now that we have all of the call sites, loop over them and inline them if
609
344k
  // it looks profitable to do so.
610
344k
  bool Changed = false;
611
344k
  bool LocalChange;
612
520k
  do {
613
520k
    LocalChange = false;
614
520k
    // Iterate over the outer loop because inlining functions can cause indirect
615
520k
    // calls to become direct calls.
616
520k
    // CallSites may be modified inside so ranged for loop can not be used.
617
2.64M
    for (unsigned CSi = 0; CSi != CallSites.size(); 
++CSi2.12M
) {
618
2.12M
      CallSite CS = CallSites[CSi].first;
619
2.12M
620
2.12M
      Function *Caller = CS.getCaller();
621
2.12M
      Function *Callee = CS.getCalledFunction();
622
2.12M
623
2.12M
      // We can only inline direct calls to non-declarations.
624
2.12M
      if (!Callee || 
Callee->isDeclaration()1.98M
)
625
606k
        continue;
626
1.51M
627
1.51M
      Instruction *Instr = CS.getInstruction();
628
1.51M
629
1.51M
      bool IsTriviallyDead = isInstructionTriviallyDead(Instr, &TLI);
630
1.51M
631
1.51M
      int InlineHistoryID;
632
1.51M
      if (!IsTriviallyDead) {
633
1.51M
        // If this call site was obtained by inlining another function, verify
634
1.51M
        // that the include path for the function did not include the callee
635
1.51M
        // itself.  If so, we'd be recursively inlining the same function,
636
1.51M
        // which would provide the same callsites, which would cause us to
637
1.51M
        // infinitely inline.
638
1.51M
        InlineHistoryID = CallSites[CSi].second;
639
1.51M
        if (InlineHistoryID != -1 &&
640
1.51M
            
InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)126k
) {
641
126
          setInlineRemark(CS, "recursive");
642
126
          continue;
643
126
        }
644
1.51M
      }
645
1.51M
646
1.51M
      // FIXME for new PM: because of the old PM we currently generate ORE and
647
1.51M
      // in turn BFI on demand.  With the new PM, the ORE dependency should
648
1.51M
      // just become a regular analysis dependency.
649
1.51M
      OptimizationRemarkEmitter ORE(Caller);
650
1.51M
651
1.51M
      Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
652
1.51M
      // If the policy determines that we should inline this function,
653
1.51M
      // delete the call instead.
654
1.51M
      if (!OIC.hasValue()) {
655
2.08k
        setInlineRemark(CS, "deferred");
656
2.08k
        continue;
657
2.08k
      }
658
1.51M
659
1.51M
      if (!OIC.getValue()) {
660
970k
        // shouldInline() call returned a negative inline cost that explains
661
970k
        // why this callsite should not be inlined.
662
970k
        setInlineRemark(CS, inlineCostStr(*OIC));
663
970k
        continue;
664
970k
      }
665
544k
666
544k
      // If this call site is dead and it is to a readonly function, we should
667
544k
      // just delete the call instead of trying to inline it, regardless of
668
544k
      // size.  This happens because IPSCCP propagates the result out of the
669
544k
      // call and then we're left with the dead call.
670
544k
      if (IsTriviallyDead) {
671
4.48k
        LLVM_DEBUG(dbgs() << "    -> Deleting dead call: " << *Instr << "\n");
672
4.48k
        // Update the call graph by deleting the edge from Callee to Caller.
673
4.48k
        setInlineRemark(CS, "trivially dead");
674
4.48k
        CG[Caller]->removeCallEdgeFor(*cast<CallBase>(CS.getInstruction()));
675
4.48k
        Instr->eraseFromParent();
676
4.48k
        ++NumCallsDeleted;
677
540k
      } else {
678
540k
        // Get DebugLoc to report. CS will be invalid after Inliner.
679
540k
        DebugLoc DLoc = CS->getDebugLoc();
680
540k
        BasicBlock *Block = CS.getParent();
681
540k
682
540k
        // Attempt to inline the function.
683
540k
        using namespace ore;
684
540k
685
540k
        InlineResult IR = InlineCallIfPossible(
686
540k
            CS, InlineInfo, InlinedArrayAllocas, InlineHistoryID,
687
540k
            InsertLifetime, AARGetter, ImportedFunctionsStats);
688
540k
        if (!IR) {
689
5
          setInlineRemark(CS, std::string(IR) + "; " + inlineCostStr(*OIC));
690
5
          ORE.emit([&]() {
691
0
            return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc,
692
0
                                            Block)
693
0
                   << NV("Callee", Callee) << " will not be inlined into "
694
0
                   << NV("Caller", Caller) << ": " << NV("Reason", IR.message);
695
0
          });
696
5
          continue;
697
5
        }
698
540k
        ++NumInlined;
699
540k
700
540k
        emit_inlined_into(ORE, DLoc, Block, *Callee, *Caller, *OIC);
701
540k
702
540k
        // If inlining this function gave us any new call sites, throw them
703
540k
        // onto our worklist to process.  They are useful inline candidates.
704
540k
        if (!InlineInfo.InlinedCalls.empty()) {
705
159k
          // Create a new inline history entry for this, so that we remember
706
159k
          // that these new callsites came about due to inlining Callee.
707
159k
          int NewHistoryID = InlineHistory.size();
708
159k
          InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));
709
159k
710
159k
          for (Value *Ptr : InlineInfo.InlinedCalls)
711
325k
            CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
712
159k
        }
713
540k
      }
714
544k
715
544k
      // If we inlined or deleted the last possible call site to the function,
716
544k
      // delete the function body now.
717
544k
      
if (544k
Callee544k
&&
Callee->use_empty()544k
&&
Callee->hasLocalLinkage()214k
&&
718
544k
          // TODO: Can remove if in SCC now.
719
544k
          
!SCCFunctions.count(Callee)41.7k
&&
720
544k
          // The function may be apparently dead, but if there are indirect
721
544k
          // callgraph references to the node, we cannot delete it yet, this
722
544k
          // could invalidate the CGSCC iterator.
723
544k
          
CG[Callee]->getNumReferences() == 041.6k
) {
724
41.6k
        LLVM_DEBUG(dbgs() << "    -> Deleting dead function: "
725
41.6k
                          << Callee->getName() << "\n");
726
41.6k
        CallGraphNode *CalleeNode = CG[Callee];
727
41.6k
728
41.6k
        // Remove any call graph edges from the callee to its callees.
729
41.6k
        CalleeNode->removeAllCalledFunctions();
730
41.6k
731
41.6k
        // Removing the node for callee from the call graph and delete it.
732
41.6k
        delete CG.removeFunctionFromModule(CalleeNode);
733
41.6k
        ++NumDeleted;
734
41.6k
      }
735
544k
736
544k
      // Remove this call site from the list.  If possible, use
737
544k
      // swap/pop_back for efficiency, but do not use it if doing so would
738
544k
      // move a call site to a function in this SCC before the
739
544k
      // 'FirstCallInSCC' barrier.
740
544k
      if (SCC.isSingular()) {
741
537k
        CallSites[CSi] = CallSites.back();
742
537k
        CallSites.pop_back();
743
537k
      } else {
744
6.89k
        CallSites.erase(CallSites.begin() + CSi);
745
6.89k
      }
746
544k
      --CSi;
747
544k
748
544k
      Changed = true;
749
544k
      LocalChange = true;
750
544k
    }
751
520k
  } while (LocalChange);
752
344k
753
344k
  return Changed;
754
344k
}
755
756
853k
bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) {
757
853k
  CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
758
853k
  ACT = &getAnalysis<AssumptionCacheTracker>();
759
853k
  PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
760
853k
  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
761
853k
  auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
762
548k
    return ACT->getAssumptionCache(F);
763
548k
  };
764
853k
  return inlineCallsImpl(SCC, CG, GetAssumptionCache, PSI, TLI, InsertLifetime,
765
1.67M
                         [this](CallSite CS) { return getInlineCost(CS); },
766
853k
                         LegacyAARGetter(*this), ImportedFunctionsStats);
767
853k
}
768
769
/// Remove now-dead linkonce functions at the end of
770
/// processing to avoid breaking the SCC traversal.
771
13.5k
bool LegacyInlinerBase::doFinalization(CallGraph &CG) {
772
13.5k
  if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
773
2
    ImportedFunctionsStats.dump(InlinerFunctionImportStats ==
774
2
                                InlinerFunctionImportStatsOpts::Verbose);
775
13.5k
  return removeDeadFunctions(CG);
776
13.5k
}
777
778
/// Remove dead functions that are not included in DNR (Do Not Remove) list.
779
bool LegacyInlinerBase::removeDeadFunctions(CallGraph &CG,
780
25.1k
                                            bool AlwaysInlineOnly) {
781
25.1k
  SmallVector<CallGraphNode *, 16> FunctionsToRemove;
782
25.1k
  SmallVector<Function *, 16> DeadFunctionsInComdats;
783
25.1k
784
161k
  auto RemoveCGN = [&](CallGraphNode *CGN) {
785
161k
    // Remove any call graph edges from the function to its callees.
786
161k
    CGN->removeAllCalledFunctions();
787
161k
788
161k
    // Remove any edges from the external node to the function's call graph
789
161k
    // node.  These edges might have been made irrelegant due to
790
161k
    // optimization of the program.
791
161k
    CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
792
161k
793
161k
    // Removing the node for callee from the call graph and delete it.
794
161k
    FunctionsToRemove.push_back(CGN);
795
161k
  };
796
25.1k
797
25.1k
  // Scan for all of the functions, looking for ones that should now be removed
798
25.1k
  // from the program.  Insert the dead ones in the FunctionsToRemove set.
799
798k
  for (const auto &I : CG) {
800
798k
    CallGraphNode *CGN = I.second.get();
801
798k
    Function *F = CGN->getFunction();
802
798k
    if (!F || 
F->isDeclaration()773k
)
803
229k
      continue;
804
569k
805
569k
    // Handle the case when this function is called and we only want to care
806
569k
    // about always-inline functions. This is a bit of a hack to share code
807
569k
    // between here and the InlineAlways pass.
808
569k
    if (AlwaysInlineOnly && 
!F->hasFnAttribute(Attribute::AlwaysInline)127k
)
809
127k
      continue;
810
441k
811
441k
    // If the only remaining users of the function are dead constants, remove
812
441k
    // them.
813
441k
    F->removeDeadConstantUsers();
814
441k
815
441k
    if (!F->isDefTriviallyDead())
816
280k
      continue;
817
161k
818
161k
    // It is unsafe to drop a function with discardable linkage from a COMDAT
819
161k
    // without also dropping the other members of the COMDAT.
820
161k
    // The inliner doesn't visit non-function entities which are in COMDAT
821
161k
    // groups so it is unsafe to do so *unless* the linkage is local.
822
161k
    if (!F->hasLocalLinkage()) {
823
161k
      if (F->hasComdat()) {
824
59
        DeadFunctionsInComdats.push_back(F);
825
59
        continue;
826
59
      }
827
161k
    }
828
161k
829
161k
    RemoveCGN(CGN);
830
161k
  }
831
25.1k
  if (!DeadFunctionsInComdats.empty()) {
832
21
    // Filter out the functions whose comdats remain alive.
833
21
    filterDeadComdatFunctions(CG.getModule(), DeadFunctionsInComdats);
834
21
    // Remove the rest.
835
21
    for (Function *F : DeadFunctionsInComdats)
836
55
      RemoveCGN(CG[F]);
837
21
  }
838
25.1k
839
25.1k
  if (FunctionsToRemove.empty())
840
23.0k
    return false;
841
2.07k
842
2.07k
  // Now that we know which functions to delete, do so.  We didn't want to do
843
2.07k
  // this inline, because that would invalidate our CallGraph::iterator
844
2.07k
  // objects. :(
845
2.07k
  //
846
2.07k
  // Note that it doesn't matter that we are iterating over a non-stable order
847
2.07k
  // here to do this, it doesn't matter which order the functions are deleted
848
2.07k
  // in.
849
2.07k
  array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end());
850
2.07k
  FunctionsToRemove.erase(
851
2.07k
      std::unique(FunctionsToRemove.begin(), FunctionsToRemove.end()),
852
2.07k
      FunctionsToRemove.end());
853
161k
  for (CallGraphNode *CGN : FunctionsToRemove) {
854
161k
    delete CG.removeFunctionFromModule(CGN);
855
161k
    ++NumDeleted;
856
161k
  }
857
2.07k
  return true;
858
2.07k
}
859
860
990
InlinerPass::~InlinerPass() {
861
990
  if (ImportedFunctionsStats) {
862
2
    assert(InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No);
863
2
    ImportedFunctionsStats->dump(InlinerFunctionImportStats ==
864
2
                                 InlinerFunctionImportStatsOpts::Verbose);
865
2
  }
866
990
}
867
868
PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
869
                                   CGSCCAnalysisManager &AM, LazyCallGraph &CG,
870
1.81k
                                   CGSCCUpdateResult &UR) {
871
1.81k
  const ModuleAnalysisManager &MAM =
872
1.81k
      AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG).getManager();
873
1.81k
  bool Changed = false;
874
1.81k
875
1.81k
  assert(InitialC.size() > 0 && "Cannot handle an empty SCC!");
876
1.81k
  Module &M = *InitialC.begin()->getFunction().getParent();
877
1.81k
  ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M);
878
1.81k
879
1.81k
  if (!ImportedFunctionsStats &&
880
1.81k
      
InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No1.79k
) {
881
2
    ImportedFunctionsStats =
882
2
        llvm::make_unique<ImportedFunctionsInliningStatistics>();
883
2
    ImportedFunctionsStats->setModuleInfo(M);
884
2
  }
885
1.81k
886
1.81k
  // We use a single common worklist for calls across the entire SCC. We
887
1.81k
  // process these in-order and append new calls introduced during inlining to
888
1.81k
  // the end.
889
1.81k
  //
890
1.81k
  // Note that this particular order of processing is actually critical to
891
1.81k
  // avoid very bad behaviors. Consider *highly connected* call graphs where
892
1.81k
  // each function contains a small amonut of code and a couple of calls to
893
1.81k
  // other functions. Because the LLVM inliner is fundamentally a bottom-up
894
1.81k
  // inliner, it can handle gracefully the fact that these all appear to be
895
1.81k
  // reasonable inlining candidates as it will flatten things until they become
896
1.81k
  // too big to inline, and then move on and flatten another batch.
897
1.81k
  //
898
1.81k
  // However, when processing call edges *within* an SCC we cannot rely on this
899
1.81k
  // bottom-up behavior. As a consequence, with heavily connected *SCCs* of
900
1.81k
  // functions we can end up incrementally inlining N calls into each of
901
1.81k
  // N functions because each incremental inlining decision looks good and we
902
1.81k
  // don't have a topological ordering to prevent explosions.
903
1.81k
  //
904
1.81k
  // To compensate for this, we don't process transitive edges made immediate
905
1.81k
  // by inlining until we've done one pass of inlining across the entire SCC.
906
1.81k
  // Large, highly connected SCCs still lead to some amount of code bloat in
907
1.81k
  // this model, but it is uniformly spread across all the functions in the SCC
908
1.81k
  // and eventually they all become too large to inline, rather than
909
1.81k
  // incrementally maknig a single function grow in a super linear fashion.
910
1.81k
  SmallVector<std::pair<CallSite, int>, 16> Calls;
911
1.81k
912
1.81k
  FunctionAnalysisManager &FAM =
913
1.81k
      AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG)
914
1.81k
          .getManager();
915
1.81k
916
1.81k
  // Populate the initial list of calls in this SCC.
917
1.86k
  for (auto &N : InitialC) {
918
1.86k
    auto &ORE =
919
1.86k
        FAM.getResult<OptimizationRemarkEmitterAnalysis>(N.getFunction());
920
1.86k
    // We want to generally process call sites top-down in order for
921
1.86k
    // simplifications stemming from replacing the call with the returned value
922
1.86k
    // after inlining to be visible to subsequent inlining decisions.
923
1.86k
    // FIXME: Using instructions sequence is a really bad way to do this.
924
1.86k
    // Instead we should do an actual RPO walk of the function body.
925
1.86k
    for (Instruction &I : instructions(N.getFunction()))
926
11.1k
      if (auto CS = CallSite(&I))
927
5.01k
        if (Function *Callee = CS.getCalledFunction()) {
928
4.94k
          if (!Callee->isDeclaration())
929
974
            Calls.push_back({CS, -1});
930
3.97k
          else if (!isa<IntrinsicInst>(I)) {
931
2.80k
            using namespace ore;
932
2.80k
            setInlineRemark(CS, "unavailable definition");
933
2.80k
            ORE.emit([&]() {
934
14
              return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
935
14
                     << NV("Callee", Callee) << " will not be inlined into "
936
14
                     << NV("Caller", CS.getCaller())
937
14
                     << " because its definition is unavailable"
938
14
                     << setIsVerbose();
939
14
            });
940
2.80k
          }
941
4.94k
        }
942
1.86k
  }
943
1.81k
  if (Calls.empty())
944
1.10k
    return PreservedAnalyses::all();
945
710
946
710
  // Capture updatable variables for the current SCC and RefSCC.
947
710
  auto *C = &InitialC;
948
710
  auto *RC = &C->getOuterRefSCC();
949
710
950
710
  // When inlining a callee produces new call sites, we want to keep track of
951
710
  // the fact that they were inlined from the callee.  This allows us to avoid
952
710
  // infinite inlining in some obscure cases.  To represent this, we use an
953
710
  // index into the InlineHistory vector.
954
710
  SmallVector<std::pair<Function *, int>, 16> InlineHistory;
955
710
956
710
  // Track a set vector of inlined callees so that we can augment the caller
957
710
  // with all of their edges in the call graph before pruning out the ones that
958
710
  // got simplified away.
959
710
  SmallSetVector<Function *, 4> InlinedCallees;
960
710
961
710
  // Track the dead functions to delete once finished with inlining calls. We
962
710
  // defer deleting these to make it easier to handle the call graph updates.
963
710
  SmallVector<Function *, 4> DeadFunctions;
964
710
965
710
  // Loop forward over all of the calls. Note that we cannot cache the size as
966
710
  // inlining can introduce new calls that need to be processed.
967
1.48k
  for (int i = 0; i < (int)Calls.size(); 
++i774
) {
968
774
    // We expect the calls to typically be batched with sequences of calls that
969
774
    // have the same caller, so we first set up some shared infrastructure for
970
774
    // this caller. We also do any pruning we can at this layer on the caller
971
774
    // alone.
972
774
    Function &F = *Calls[i].first.getCaller();
973
774
    LazyCallGraph::Node &N = *CG.lookup(F);
974
774
    if (CG.lookupSCC(N) != C)
975
8
      continue;
976
766
    if (F.hasOptNone()) {
977
0
      setInlineRemark(Calls[i].first, "optnone attribute");
978
0
      continue;
979
0
    }
980
766
981
766
    LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n");
982
766
983
766
    // Get a FunctionAnalysisManager via a proxy for this particular node. We
984
766
    // do this each time we visit a node as the SCC may have changed and as
985
766
    // we're going to mutate this particular function we want to make sure the
986
766
    // proxy is in place to forward any invalidation events. We can use the
987
766
    // manager we get here for looking up results for functions other than this
988
766
    // node however because those functions aren't going to be mutated by this
989
766
    // pass.
990
766
    FunctionAnalysisManager &FAM =
991
766
        AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG)
992
766
            .getManager();
993
766
994
766
    // Get the remarks emission analysis for the caller.
995
766
    auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
996
766
997
766
    std::function<AssumptionCache &(Function &)> GetAssumptionCache =
998
1.28k
        [&](Function &F) -> AssumptionCache & {
999
1.28k
      return FAM.getResult<AssumptionAnalysis>(F);
1000
1.28k
    };
1001
766
    auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & {
1002
509
      return FAM.getResult<BlockFrequencyAnalysis>(F);
1003
509
    };
1004
766
1005
961
    auto GetInlineCost = [&](CallSite CS) {
1006
961
      Function &Callee = *CS.getCalledFunction();
1007
961
      auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
1008
961
      bool RemarksEnabled =
1009
961
          Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
1010
961
              DEBUG_TYPE);
1011
961
      return getInlineCost(cast<CallBase>(*CS.getInstruction()), Params,
1012
961
                           CalleeTTI, GetAssumptionCache, {GetBFI}, PSI,
1013
961
                           RemarksEnabled ? 
&ORE19
:
nullptr942
);
1014
961
    };
1015
766
1016
766
    // Now process as many calls as we have within this caller in the sequnece.
1017
766
    // We bail out as soon as the caller has to change so we can update the
1018
766
    // call graph and prepare the context of that new caller.
1019
766
    bool DidInline = false;
1020
1.77k
    for (; i < (int)Calls.size() && 
Calls[i].first.getCaller() == &F1.07k
;
++i1.01k
) {
1021
1.01k
      int InlineHistoryID;
1022
1.01k
      CallSite CS;
1023
1.01k
      std::tie(CS, InlineHistoryID) = Calls[i];
1024
1.01k
      Function &Callee = *CS.getCalledFunction();
1025
1.01k
1026
1.01k
      if (InlineHistoryID != -1 &&
1027
1.01k
          
InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)53
) {
1028
1
        setInlineRemark(CS, "recursive");
1029
1
        continue;
1030
1
      }
1031
1.01k
1032
1.01k
      // Check if this inlining may repeat breaking an SCC apart that has
1033
1.01k
      // already been split once before. In that case, inlining here may
1034
1.01k
      // trigger infinite inlining, much like is prevented within the inliner
1035
1.01k
      // itself by the InlineHistory above, but spread across CGSCC iterations
1036
1.01k
      // and thus hidden from the full inline history.
1037
1.01k
      if (CG.lookupSCC(*CG.lookup(Callee)) == C &&
1038
1.01k
          
UR.InlinedInternalEdges.count({&N, C})169
) {
1039
63
        LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node "
1040
63
                             "previously split out of this SCC by inlining: "
1041
63
                          << F.getName() << " -> " << Callee.getName() << "\n");
1042
63
        setInlineRemark(CS, "recursive SCC split");
1043
63
        continue;
1044
63
      }
1045
949
1046
949
      Optional<InlineCost> OIC = shouldInline(CS, GetInlineCost, ORE);
1047
949
      // Check whether we want to inline this callsite.
1048
949
      if (!OIC.hasValue()) {
1049
2
        setInlineRemark(CS, "deferred");
1050
2
        continue;
1051
2
      }
1052
947
1053
947
      if (!OIC.getValue()) {
1054
192
        // shouldInline() call returned a negative inline cost that explains
1055
192
        // why this callsite should not be inlined.
1056
192
        setInlineRemark(CS, inlineCostStr(*OIC));
1057
192
        continue;
1058
192
      }
1059
755
1060
755
      // Setup the data structure used to plumb customization into the
1061
755
      // `InlineFunction` routine.
1062
755
      InlineFunctionInfo IFI(
1063
755
          /*cg=*/nullptr, &GetAssumptionCache, PSI,
1064
755
          &FAM.getResult<BlockFrequencyAnalysis>(*(CS.getCaller())),
1065
755
          &FAM.getResult<BlockFrequencyAnalysis>(Callee));
1066
755
1067
755
      // Get DebugLoc to report. CS will be invalid after Inliner.
1068
755
      DebugLoc DLoc = CS->getDebugLoc();
1069
755
      BasicBlock *Block = CS.getParent();
1070
755
1071
755
      using namespace ore;
1072
755
1073
755
      InlineResult IR = InlineFunction(CS, IFI);
1074
755
      if (!IR) {
1075
0
        setInlineRemark(CS, std::string(IR) + "; " + inlineCostStr(*OIC));
1076
0
        ORE.emit([&]() {
1077
0
          return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
1078
0
                 << NV("Callee", &Callee) << " will not be inlined into "
1079
0
                 << NV("Caller", &F) << ": " << NV("Reason", IR.message);
1080
0
        });
1081
0
        continue;
1082
0
      }
1083
755
      DidInline = true;
1084
755
      InlinedCallees.insert(&Callee);
1085
755
1086
755
      ++NumInlined;
1087
755
1088
755
      emit_inlined_into(ORE, DLoc, Block, Callee, F, *OIC);
1089
755
1090
755
      // Add any new callsites to defined functions to the worklist.
1091
755
      if (!IFI.InlinedCallSites.empty()) {
1092
437
        int NewHistoryID = InlineHistory.size();
1093
437
        InlineHistory.push_back({&Callee, InlineHistoryID});
1094
437
        for (CallSite &CS : reverse(IFI.InlinedCallSites))
1095
1.28k
          if (Function *NewCallee = CS.getCalledFunction())
1096
1.27k
            if (!NewCallee->isDeclaration())
1097
53
              Calls.push_back({CS, NewHistoryID});
1098
437
      }
1099
755
1100
755
      if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
1101
30
        ImportedFunctionsStats->recordInline(F, Callee);
1102
755
1103
755
      // Merge the attributes based on the inlining.
1104
755
      AttributeFuncs::mergeAttributesForInlining(F, Callee);
1105
755
1106
755
      // For local functions, check whether this makes the callee trivially
1107
755
      // dead. In that case, we can drop the body of the function eagerly
1108
755
      // which may reduce the number of callers of other functions to one,
1109
755
      // changing inline cost thresholds.
1110
755
      if (Callee.hasLocalLinkage()) {
1111
402
        // To check this we also need to nuke any dead constant uses (perhaps
1112
402
        // made dead by this operation on other functions).
1113
402
        Callee.removeDeadConstantUsers();
1114
402
        if (Callee.use_empty() && 
!CG.isLibFunction(Callee)306
) {
1115
305
          Calls.erase(
1116
305
              std::remove_if(Calls.begin() + i + 1, Calls.end(),
1117
305
                             [&Callee](const std::pair<CallSite, int> &Call) {
1118
46
                               return Call.first.getCaller() == &Callee;
1119
46
                             }),
1120
305
              Calls.end());
1121
305
          // Clear the body and queue the function itself for deletion when we
1122
305
          // finish inlining and call graph updates.
1123
305
          // Note that after this point, it is an error to do anything other
1124
305
          // than use the callee's address or delete it.
1125
305
          Callee.dropAllReferences();
1126
305
          assert(find(DeadFunctions, &Callee) == DeadFunctions.end() &&
1127
305
                 "Cannot put cause a function to become dead twice!");
1128
305
          DeadFunctions.push_back(&Callee);
1129
305
        }
1130
402
      }
1131
755
    }
1132
766
1133
766
    // Back the call index up by one to put us in a good position to go around
1134
766
    // the outer loop.
1135
766
    --i;
1136
766
1137
766
    if (!DidInline)
1138
142
      continue;
1139
624
    Changed = true;
1140
624
1141
624
    // Add all the inlined callees' edges as ref edges to the caller. These are
1142
624
    // by definition trivial edges as we always have *some* transitive ref edge
1143
624
    // chain. While in some cases these edges are direct calls inside the
1144
624
    // callee, they have to be modeled in the inliner as reference edges as
1145
624
    // there may be a reference edge anywhere along the chain from the current
1146
624
    // caller to the callee that causes the whole thing to appear like
1147
624
    // a (transitive) reference edge that will require promotion to a call edge
1148
624
    // below.
1149
705
    for (Function *InlinedCallee : InlinedCallees) {
1150
705
      LazyCallGraph::Node &CalleeN = *CG.lookup(*InlinedCallee);
1151
705
      for (LazyCallGraph::Edge &E : *CalleeN)
1152
104
        RC->insertTrivialRefEdge(N, E.getNode());
1153
705
    }
1154
624
1155
624
    // At this point, since we have made changes we have at least removed
1156
624
    // a call instruction. However, in the process we do some incremental
1157
624
    // simplification of the surrounding code. This simplification can
1158
624
    // essentially do all of the same things as a function pass and we can
1159
624
    // re-use the exact same logic for updating the call graph to reflect the
1160
624
    // change.
1161
624
    LazyCallGraph::SCC *OldC = C;
1162
624
    C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR);
1163
624
    LLVM_DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n");
1164
624
    RC = &C->getOuterRefSCC();
1165
624
1166
624
    // If this causes an SCC to split apart into multiple smaller SCCs, there
1167
624
    // is a subtle risk we need to prepare for. Other transformations may
1168
624
    // expose an "infinite inlining" opportunity later, and because of the SCC
1169
624
    // mutation, we will revisit this function and potentially re-inline. If we
1170
624
    // do, and that re-inlining also has the potentially to mutate the SCC
1171
624
    // structure, the infinite inlining problem can manifest through infinite
1172
624
    // SCC splits and merges. To avoid this, we capture the originating caller
1173
624
    // node and the SCC containing the call edge. This is a slight over
1174
624
    // approximation of the possible inlining decisions that must be avoided,
1175
624
    // but is relatively efficient to store. We use C != OldC to know when
1176
624
    // a new SCC is generated and the original SCC may be generated via merge
1177
624
    // in later iterations.
1178
624
    //
1179
624
    // It is also possible that even if no new SCC is generated
1180
624
    // (i.e., C == OldC), the original SCC could be split and then merged
1181
624
    // into the same one as itself. and the original SCC will be added into
1182
624
    // UR.CWorklist again, we want to catch such cases too.
1183
624
    //
1184
624
    // FIXME: This seems like a very heavyweight way of retaining the inline
1185
624
    // history, we should look for a more efficient way of tracking it.
1186
624
    if ((C != OldC || 
UR.CWorklist.count(OldC)608
) &&
1187
624
        
llvm::any_of(InlinedCallees, [&](Function *Callee) 26
{
1188
27
          return CG.lookupSCC(*CG.lookup(*Callee)) == OldC;
1189
27
        })) {
1190
22
      LLVM_DEBUG(dbgs() << "Inlined an internal call edge and split an SCC, "
1191
22
                           "retaining this to avoid infinite inlining.\n");
1192
22
      UR.InlinedInternalEdges.insert({&N, OldC});
1193
22
    }
1194
624
    InlinedCallees.clear();
1195
624
  }
1196
710
1197
710
  // Now that we've finished inlining all of the calls across this SCC, delete
1198
710
  // all of the trivially dead functions, updating the call graph and the CGSCC
1199
710
  // pass manager in the process.
1200
710
  //
1201
710
  // Note that this walks a pointer set which has non-deterministic order but
1202
710
  // that is OK as all we do is delete things and add pointers to unordered
1203
710
  // sets.
1204
710
  for (Function *DeadF : DeadFunctions) {
1205
305
    // Get the necessary information out of the call graph and nuke the
1206
305
    // function there. Also, cclear out any cached analyses.
1207
305
    auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF));
1208
305
    FunctionAnalysisManager &FAM =
1209
305
        AM.getResult<FunctionAnalysisManagerCGSCCProxy>(DeadC, CG)
1210
305
            .getManager();
1211
305
    FAM.clear(*DeadF, DeadF->getName());
1212
305
    AM.clear(DeadC, DeadC.getName());
1213
305
    auto &DeadRC = DeadC.getOuterRefSCC();
1214
305
    CG.removeDeadFunction(*DeadF);
1215
305
1216
305
    // Mark the relevant parts of the call graph as invalid so we don't visit
1217
305
    // them.
1218
305
    UR.InvalidatedSCCs.insert(&DeadC);
1219
305
    UR.InvalidatedRefSCCs.insert(&DeadRC);
1220
305
1221
305
    // And delete the actual function from the module.
1222
305
    M.getFunctionList().erase(DeadF);
1223
305
    ++NumDeleted;
1224
305
  }
1225
710
1226
710
  if (!Changed)
1227
98
    return PreservedAnalyses::all();
1228
612
1229
612
  // Even if we change the IR, we update the core CGSCC data structures and so
1230
612
  // can preserve the proxy to the function analysis manager.
1231
612
  PreservedAnalyses PA;
1232
612
  PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
1233
612
  return PA;
1234
612
}