Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
Line
Count
Source (jump to first uncovered line)
1
//===-- PGOInstrumentation.cpp - MST-based PGO Instrumentation ------------===//
2
//
3
//                      The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// This file implements PGO instrumentation using a minimum spanning tree based
11
// on the following paper:
12
//   [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
13
//   for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
14
//   Issue 3, pp 313-322
15
// The idea of the algorithm based on the fact that for each node (except for
16
// the entry and exit), the sum of incoming edge counts equals the sum of
17
// outgoing edge counts. The count of edge on spanning tree can be derived from
18
// those edges not on the spanning tree. Knuth proves this method instruments
19
// the minimum number of edges.
20
//
21
// The minimal spanning tree here is actually a maximum weight tree -- on-tree
22
// edges have higher frequencies (more likely to execute). The idea is to
23
// instrument those less frequently executed edges to reduce the runtime
24
// overhead of instrumented binaries.
25
//
26
// This file contains two passes:
27
// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
28
// count profile, and generates the instrumentation for indirect call
29
// profiling.
30
// (2) Pass PGOInstrumentationUse which reads the edge count profile and
31
// annotates the branch weights. It also reads the indirect call value
32
// profiling records and annotate the indirect call instructions.
33
//
34
// To get the precise counter information, These two passes need to invoke at
35
// the same compilation point (so they see the same IR). For pass
36
// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
37
// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
38
// the profile is opened in module level and passed to each PGOUseFunc instance.
39
// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
40
// in class FuncPGOInstrumentation.
41
//
42
// Class PGOEdge represents a CFG edge and some auxiliary information. Class
43
// BBInfo contains auxiliary information for each BB. These two classes are used
44
// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
45
// class of PGOEdge and BBInfo, respectively. They contains extra data structure
46
// used in populating profile counters.
47
// The MST implementation is in Class CFGMST (CFGMST.h).
48
//
49
//===----------------------------------------------------------------------===//
50
51
#include "llvm/Transforms/PGOInstrumentation.h"
52
#include "CFGMST.h"
53
#include "llvm/ADT/STLExtras.h"
54
#include "llvm/ADT/SmallVector.h"
55
#include "llvm/ADT/Statistic.h"
56
#include "llvm/ADT/Triple.h"
57
#include "llvm/Analysis/BlockFrequencyInfo.h"
58
#include "llvm/Analysis/BranchProbabilityInfo.h"
59
#include "llvm/Analysis/CFG.h"
60
#include "llvm/Analysis/IndirectCallSiteVisitor.h"
61
#include "llvm/Analysis/LoopInfo.h"
62
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
63
#include "llvm/IR/CallSite.h"
64
#include "llvm/IR/DiagnosticInfo.h"
65
#include "llvm/IR/Dominators.h"
66
#include "llvm/IR/GlobalValue.h"
67
#include "llvm/IR/IRBuilder.h"
68
#include "llvm/IR/InstIterator.h"
69
#include "llvm/IR/Instructions.h"
70
#include "llvm/IR/IntrinsicInst.h"
71
#include "llvm/IR/MDBuilder.h"
72
#include "llvm/IR/Module.h"
73
#include "llvm/Pass.h"
74
#include "llvm/ProfileData/InstrProfReader.h"
75
#include "llvm/ProfileData/ProfileCommon.h"
76
#include "llvm/Support/BranchProbability.h"
77
#include "llvm/Support/DOTGraphTraits.h"
78
#include "llvm/Support/Debug.h"
79
#include "llvm/Support/GraphWriter.h"
80
#include "llvm/Support/JamCRC.h"
81
#include "llvm/Transforms/Instrumentation.h"
82
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
83
#include <algorithm>
84
#include <string>
85
#include <unordered_map>
86
#include <utility>
87
#include <vector>
88
89
using namespace llvm;
90
91
2
#define DEBUG_TYPE "pgo-instrumentation"
92
93
STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
94
STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
95
STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
96
STATISTIC(NumOfPGOEdge, "Number of edges.");
97
STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
98
STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
99
STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
100
STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
101
STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
102
STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
103
104
// Command line option to specify the file to read profile from. This is
105
// mainly used for testing.
106
static cl::opt<std::string>
107
    PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
108
                       cl::value_desc("filename"),
109
                       cl::desc("Specify the path of profile data file. This is"
110
                                "mainly for test purpose."));
111
112
// Command line option to disable value profiling. The default is false:
113
// i.e. value profiling is enabled by default. This is for debug purpose.
114
static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
115
                                           cl::Hidden,
116
                                           cl::desc("Disable Value Profiling"));
117
118
// Command line option to set the maximum number of VP annotations to write to
119
// the metadata for a single indirect call callsite.
120
static cl::opt<unsigned> MaxNumAnnotations(
121
    "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore,
122
    cl::desc("Max number of annotations for a single indirect "
123
             "call callsite"));
124
125
// Command line option to set the maximum number of value annotations
126
// to write to the metadata for a single memop intrinsic.
127
static cl::opt<unsigned> MaxNumMemOPAnnotations(
128
    "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore,
129
    cl::desc("Max number of preicise value annotations for a single memop"
130
             "intrinsic"));
131
132
// Command line option to control appending FunctionHash to the name of a COMDAT
133
// function. This is to avoid the hash mismatch caused by the preinliner.
134
static cl::opt<bool> DoComdatRenaming(
135
    "do-comdat-renaming", cl::init(false), cl::Hidden,
136
    cl::desc("Append function hash to the name of COMDAT function to avoid "
137
             "function hash mismatch due to the preinliner"));
138
139
// Command line option to enable/disable the warning about missing profile
140
// information.
141
static cl::opt<bool>
142
    PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden,
143
                   cl::desc("Use this option to turn on/off "
144
                            "warnings about missing profile data for "
145
                            "functions."));
146
147
// Command line option to enable/disable the warning about a hash mismatch in
148
// the profile data.
149
static cl::opt<bool>
150
    NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
151
                      cl::desc("Use this option to turn off/on "
152
                               "warnings about profile cfg mismatch."));
153
154
// Command line option to enable/disable the warning about a hash mismatch in
155
// the profile data for Comdat functions, which often turns out to be false
156
// positive due to the pre-instrumentation inline.
157
static cl::opt<bool>
158
    NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true),
159
                            cl::Hidden,
160
                            cl::desc("The option is used to turn on/off "
161
                                     "warnings about hash mismatch for comdat "
162
                                     "functions."));
163
164
// Command line option to enable/disable select instruction instrumentation.
165
static cl::opt<bool>
166
    PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
167
                   cl::desc("Use this option to turn on/off SELECT "
168
                            "instruction instrumentation. "));
169
170
// Command line option to turn on CFG dot or text dump of raw profile counts
171
static cl::opt<PGOViewCountsType> PGOViewRawCounts(
172
    "pgo-view-raw-counts", cl::Hidden,
173
    cl::desc("A boolean option to show CFG dag or text "
174
             "with raw profile counts from "
175
             "profile data. See also option "
176
             "-pgo-view-counts. To limit graph "
177
             "display to only one function, use "
178
             "filtering option -view-bfi-func-name."),
179
    cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
180
               clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
181
               clEnumValN(PGOVCT_Text, "text", "show in text.")));
182
183
// Command line option to enable/disable memop intrinsic call.size profiling.
184
static cl::opt<bool>
185
    PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
186
                  cl::desc("Use this option to turn on/off "
187
                           "memory intrinsic size profiling."));
188
189
// Emit branch probability as optimization remarks.
190
static cl::opt<bool>
191
    EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
192
                          cl::desc("When this option is on, the annotated "
193
                                   "branch probability will be emitted as "
194
                                   " optimization remarks: -Rpass-analysis="
195
                                   "pgo-instr-use"));
196
197
// Command line option to turn on CFG dot dump after profile annotation.
198
// Defined in Analysis/BlockFrequencyInfo.cpp:  -pgo-view-counts
199
extern cl::opt<PGOViewCountsType> PGOViewCounts;
200
201
// Command line option to specify the name of the function for CFG dump
202
// Defined in Analysis/BlockFrequencyInfo.cpp:  -view-bfi-func-name=
203
extern cl::opt<std::string> ViewBlockFreqFuncName;
204
205
namespace {
206
207
// Return a string describing the branch condition that can be
208
// used in static branch probability heuristics:
209
2
std::string getBranchCondString(Instruction *TI) {
210
2
  BranchInst *BI = dyn_cast<BranchInst>(TI);
211
2
  if (
!BI || 2
!BI->isConditional()2
)
212
0
    return std::string();
213
2
214
2
  Value *Cond = BI->getCondition();
215
2
  ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
216
2
  if (!CI)
217
0
    return std::string();
218
2
219
2
  std::string result;
220
2
  raw_string_ostream OS(result);
221
2
  OS << CmpInst::getPredicateName(CI->getPredicate()) << "_";
222
2
  CI->getOperand(0)->getType()->print(OS, true);
223
2
224
2
  Value *RHS = CI->getOperand(1);
225
2
  ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
226
2
  if (
CV2
) {
227
2
    if (CV->isZero())
228
2
      OS << "_Zero";
229
0
    else 
if (0
CV->isOne()0
)
230
0
      OS << "_One";
231
0
    else 
if (0
CV->isMinusOne()0
)
232
0
      OS << "_MinusOne";
233
0
    else
234
0
      OS << "_Const";
235
2
  }
236
2
  OS.flush();
237
2
  return result;
238
2
}
239
240
/// The select instruction visitor plays three roles specified
241
/// by the mode. In \c VM_counting mode, it simply counts the number of
242
/// select instructions. In \c VM_instrument mode, it inserts code to count
243
/// the number times TrueValue of select is taken. In \c VM_annotate mode,
244
/// it reads the profile data and annotate the select instruction with metadata.
245
enum VisitMode { VM_counting, VM_instrument, VM_annotate };
246
class PGOUseFunc;
247
248
/// Instruction Visitor class to visit select instructions.
249
struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
250
  Function &F;
251
  unsigned NSIs = 0;             // Number of select instructions instrumented.
252
  VisitMode Mode = VM_counting;  // Visiting mode.
253
  unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
254
  unsigned TotalNumCtrs = 0;     // Total number of counters
255
  GlobalVariable *FuncNameVar = nullptr;
256
  uint64_t FuncHash = 0;
257
  PGOUseFunc *UseFunc = nullptr;
258
259
147
  SelectInstVisitor(Function &Func) : F(Func) {}
260
261
147
  void countSelects(Function &Func) {
262
147
    NSIs = 0;
263
147
    Mode = VM_counting;
264
147
    visit(Func);
265
147
  }
266
  // Visit the IR stream and instrument all select instructions. \p
267
  // Ind is a pointer to the counter index variable; \p TotalNC
268
  // is the total number of counters; \p FNV is the pointer to the
269
  // PGO function name var; \p FHash is the function hash.
270
  void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC,
271
90
                         GlobalVariable *FNV, uint64_t FHash) {
272
90
    Mode = VM_instrument;
273
90
    CurCtrIdx = Ind;
274
90
    TotalNumCtrs = TotalNC;
275
90
    FuncHash = FHash;
276
90
    FuncNameVar = FNV;
277
90
    visit(Func);
278
90
  }
279
280
  // Visit the IR stream and annotate all select instructions.
281
49
  void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) {
282
49
    Mode = VM_annotate;
283
49
    UseFunc = UF;
284
49
    CurCtrIdx = Ind;
285
49
    visit(Func);
286
49
  }
287
288
  void instrumentOneSelectInst(SelectInst &SI);
289
  void annotateOneSelectInst(SelectInst &SI);
290
  // Visit \p SI instruction and perform tasks according to visit mode.
291
  void visitSelectInst(SelectInst &SI);
292
  // Return the number of select instructions. This needs be called after
293
  // countSelects().
294
384
  unsigned getNumOfSelectInsts() const { return NSIs; }
295
};
296
297
/// Instruction Visitor class to visit memory intrinsic calls.
298
struct MemIntrinsicVisitor : public InstVisitor<MemIntrinsicVisitor> {
299
  Function &F;
300
  unsigned NMemIs = 0;          // Number of memIntrinsics instrumented.
301
  VisitMode Mode = VM_counting; // Visiting mode.
302
  unsigned CurCtrId = 0;        // Current counter index.
303
  unsigned TotalNumCtrs = 0;    // Total number of counters
304
  GlobalVariable *FuncNameVar = nullptr;
305
  uint64_t FuncHash = 0;
306
  PGOUseFunc *UseFunc = nullptr;
307
  std::vector<Instruction *> Candidates;
308
309
147
  MemIntrinsicVisitor(Function &Func) : F(Func) {}
310
311
147
  void countMemIntrinsics(Function &Func) {
312
147
    NMemIs = 0;
313
147
    Mode = VM_counting;
314
147
    visit(Func);
315
147
  }
316
317
  void instrumentMemIntrinsics(Function &Func, unsigned TotalNC,
318
90
                               GlobalVariable *FNV, uint64_t FHash) {
319
90
    Mode = VM_instrument;
320
90
    TotalNumCtrs = TotalNC;
321
90
    FuncHash = FHash;
322
90
    FuncNameVar = FNV;
323
90
    visit(Func);
324
90
  }
325
326
147
  std::vector<Instruction *> findMemIntrinsics(Function &Func) {
327
147
    Candidates.clear();
328
147
    Mode = VM_annotate;
329
147
    visit(Func);
330
147
    return Candidates;
331
147
  }
332
333
  // Visit the IR stream and annotate all mem intrinsic call instructions.
334
  void instrumentOneMemIntrinsic(MemIntrinsic &MI);
335
  // Visit \p MI instruction and perform tasks according to visit mode.
336
  void visitMemIntrinsic(MemIntrinsic &SI);
337
147
  unsigned getNumOfMemIntrinsics() const { return NMemIs; }
338
};
339
340
class PGOInstrumentationGenLegacyPass : public ModulePass {
341
public:
342
  static char ID;
343
344
31
  PGOInstrumentationGenLegacyPass() : ModulePass(ID) {
345
31
    initializePGOInstrumentationGenLegacyPassPass(
346
31
        *PassRegistry::getPassRegistry());
347
31
  }
348
349
1
  StringRef getPassName() const override { return "PGOInstrumentationGenPass"; }
350
351
private:
352
  bool runOnModule(Module &M) override;
353
354
31
  void getAnalysisUsage(AnalysisUsage &AU) const override {
355
31
    AU.addRequired<BlockFrequencyInfoWrapperPass>();
356
31
  }
357
};
358
359
class PGOInstrumentationUseLegacyPass : public ModulePass {
360
public:
361
  static char ID;
362
363
  // Provide the profile filename as the parameter.
364
  PGOInstrumentationUseLegacyPass(std::string Filename = "")
365
24
      : ModulePass(ID), ProfileFileName(std::move(Filename)) {
366
24
    if (!PGOTestProfileFile.empty())
367
23
      ProfileFileName = PGOTestProfileFile;
368
24
    initializePGOInstrumentationUseLegacyPassPass(
369
24
        *PassRegistry::getPassRegistry());
370
24
  }
371
372
1
  StringRef getPassName() const override { return "PGOInstrumentationUsePass"; }
373
374
private:
375
  std::string ProfileFileName;
376
377
  bool runOnModule(Module &M) override;
378
24
  void getAnalysisUsage(AnalysisUsage &AU) const override {
379
24
    AU.addRequired<BlockFrequencyInfoWrapperPass>();
380
24
  }
381
};
382
383
} // end anonymous namespace
384
385
char PGOInstrumentationGenLegacyPass::ID = 0;
386
7.92k
INITIALIZE_PASS_BEGIN7.92k
(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
387
7.92k
                      "PGO instrumentation.", false, false)
388
7.92k
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
389
7.92k
INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
390
7.92k
INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
391
                    "PGO instrumentation.", false, false)
392
393
5
ModulePass *llvm::createPGOInstrumentationGenLegacyPass() {
394
5
  return new PGOInstrumentationGenLegacyPass();
395
5
}
396
397
char PGOInstrumentationUseLegacyPass::ID = 0;
398
7.91k
INITIALIZE_PASS_BEGIN7.91k
(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
399
7.91k
                      "Read PGO instrumentation profile.", false, false)
400
7.91k
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
401
7.91k
INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
402
7.91k
INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
403
                    "Read PGO instrumentation profile.", false, false)
404
405
1
ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename) {
406
1
  return new PGOInstrumentationUseLegacyPass(Filename.str());
407
1
}
408
409
namespace {
410
/// \brief An MST based instrumentation for PGO
411
///
412
/// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO
413
/// in the function level.
414
struct PGOEdge {
415
  // This class implements the CFG edges. Note the CFG can be a multi-graph.
416
  // So there might be multiple edges with same SrcBB and DestBB.
417
  const BasicBlock *SrcBB;
418
  const BasicBlock *DestBB;
419
  uint64_t Weight;
420
  bool InMST;
421
  bool Removed;
422
  bool IsCritical;
423
  PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1)
424
      : SrcBB(Src), DestBB(Dest), Weight(W), InMST(false), Removed(false),
425
827
        IsCritical(false) {}
426
  // Return the information string of an edge.
427
0
  const std::string infoString() const {
428
0
    return (Twine(Removed ? 
"-"0
:
" "0
) + (InMST ?
" "0
:
"*"0
) +
429
0
            (IsCritical ? 
"c"0
:
" "0
) + " W=" + Twine(Weight)).str();
430
0
  }
431
};
432
433
// This class stores the auxiliary information for each BB.
434
struct BBInfo {
435
  BBInfo *Group;
436
  uint32_t Index;
437
  uint32_t Rank;
438
439
660
  BBInfo(unsigned IX) : Group(this), Index(IX), Rank(0) {}
440
441
  // Return the information string of this object.
442
0
  const std::string infoString() const {
443
0
    return (Twine("Index=") + Twine(Index)).str();
444
0
  }
445
};
446
447
// This class implements the CFG edges. Note the CFG can be a multi-graph.
448
template <class Edge, class BBInfo> class FuncPGOInstrumentation {
449
private:
450
  Function &F;
451
  void computeCFGHash();
452
  void renameComdatFunction();
453
  // A map that stores the Comdat group in function F.
454
  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
455
456
public:
457
  std::vector<std::vector<Instruction *>> ValueSites;
458
  SelectInstVisitor SIVisitor;
459
  MemIntrinsicVisitor MIVisitor;
460
  std::string FuncName;
461
  GlobalVariable *FuncNameVar;
462
  // CFG hash value for this function.
463
  uint64_t FunctionHash;
464
465
  // The Minimum Spanning Tree of function CFG.
466
  CFGMST<Edge, BBInfo> MST;
467
468
  // Give an edge, find the BB that will be instrumented.
469
  // Return nullptr if there is no BB to be instrumented.
470
  BasicBlock *getInstrBB(Edge *E);
471
472
  // Return the auxiliary BB information.
473
1.37k
  BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
474
475
  // Return the auxiliary BB information if available.
476
1.23k
  BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOEdge, (anonymous namespace)::BBInfo>::findBBInfo(llvm::BasicBlock const*) const
Line
Count
Source
476
305
  BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOUseEdge, (anonymous namespace)::UseBBInfo>::findBBInfo(llvm::BasicBlock const*) const
Line
Count
Source
476
933
  BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
477
478
  // Dump edges and BB information.
479
0
  void dumpInfo(std::string Str = "") const {
480
0
    MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " +
481
0
                              Twine(FunctionHash) + "\t" + Str);
482
0
  }
483
484
  FuncPGOInstrumentation(
485
      Function &Func,
486
      std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
487
      bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
488
      BlockFrequencyInfo *BFI = nullptr)
489
      : F(Func), ComdatMembers(ComdatMembers), ValueSites(IPVK_Last + 1),
490
147
        SIVisitor(Func), MIVisitor(Func), FunctionHash(0), MST(F, BPI, BFI) {
491
147
492
147
    // This should be done before CFG hash computation.
493
147
    SIVisitor.countSelects(Func);
494
147
    MIVisitor.countMemIntrinsics(Func);
495
147
    NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
496
147
    NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
497
147
    ValueSites[IPVK_IndirectCallTarget] = findIndirectCallSites(Func);
498
147
    ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func);
499
147
500
147
    FuncName = getPGOFuncName(F);
501
147
    computeCFGHash();
502
147
    if (ComdatMembers.size())
503
24
      renameComdatFunction();
504
147
    DEBUG(dumpInfo("after CFGMST"));
505
147
506
147
    NumOfPGOBB += MST.BBInfos.size();
507
819
    for (auto &E : MST.AllEdges) {
508
819
      if (E->Removed)
509
0
        continue;
510
819
      NumOfPGOEdge++;
511
819
      if (!E->InMST)
512
310
        NumOfPGOInstrument++;
513
819
    }
514
147
515
147
    if (CreateGlobalVar)
516
90
      FuncNameVar = createPGOFuncNameVar(F, FuncName);
517
147
  }
PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOUseEdge, (anonymous namespace)::UseBBInfo>::FuncPGOInstrumentation(llvm::Function&, std::__1::unordered_multimap<llvm::Comdat*, llvm::GlobalValue*, std::__1::hash<llvm::Comdat*>, std::__1::equal_to<llvm::Comdat*>, std::__1::allocator<std::__1::pair<llvm::Comdat* const, llvm::GlobalValue*> > >&, bool, llvm::BranchProbabilityInfo*, llvm::BlockFrequencyInfo*)
Line
Count
Source
490
57
        SIVisitor(Func), MIVisitor(Func), FunctionHash(0), MST(F, BPI, BFI) {
491
57
492
57
    // This should be done before CFG hash computation.
493
57
    SIVisitor.countSelects(Func);
494
57
    MIVisitor.countMemIntrinsics(Func);
495
57
    NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
496
57
    NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
497
57
    ValueSites[IPVK_IndirectCallTarget] = findIndirectCallSites(Func);
498
57
    ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func);
499
57
500
57
    FuncName = getPGOFuncName(F);
501
57
    computeCFGHash();
502
57
    if (ComdatMembers.size())
503
0
      renameComdatFunction();
504
57
    DEBUG(dumpInfo("after CFGMST"));
505
57
506
57
    NumOfPGOBB += MST.BBInfos.size();
507
327
    for (auto &E : MST.AllEdges) {
508
327
      if (E->Removed)
509
0
        continue;
510
327
      NumOfPGOEdge++;
511
327
      if (!E->InMST)
512
120
        NumOfPGOInstrument++;
513
327
    }
514
57
515
57
    if (CreateGlobalVar)
516
0
      FuncNameVar = createPGOFuncNameVar(F, FuncName);
517
57
  }
PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOEdge, (anonymous namespace)::BBInfo>::FuncPGOInstrumentation(llvm::Function&, std::__1::unordered_multimap<llvm::Comdat*, llvm::GlobalValue*, std::__1::hash<llvm::Comdat*>, std::__1::equal_to<llvm::Comdat*>, std::__1::allocator<std::__1::pair<llvm::Comdat* const, llvm::GlobalValue*> > >&, bool, llvm::BranchProbabilityInfo*, llvm::BlockFrequencyInfo*)
Line
Count
Source
490
90
        SIVisitor(Func), MIVisitor(Func), FunctionHash(0), MST(F, BPI, BFI) {
491
90
492
90
    // This should be done before CFG hash computation.
493
90
    SIVisitor.countSelects(Func);
494
90
    MIVisitor.countMemIntrinsics(Func);
495
90
    NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
496
90
    NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
497
90
    ValueSites[IPVK_IndirectCallTarget] = findIndirectCallSites(Func);
498
90
    ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func);
499
90
500
90
    FuncName = getPGOFuncName(F);
501
90
    computeCFGHash();
502
90
    if (ComdatMembers.size())
503
24
      renameComdatFunction();
504
90
    DEBUG(dumpInfo("after CFGMST"));
505
90
506
90
    NumOfPGOBB += MST.BBInfos.size();
507
492
    for (auto &E : MST.AllEdges) {
508
492
      if (E->Removed)
509
0
        continue;
510
492
      NumOfPGOEdge++;
511
492
      if (!E->InMST)
512
190
        NumOfPGOInstrument++;
513
492
    }
514
90
515
90
    if (CreateGlobalVar)
516
90
      FuncNameVar = createPGOFuncNameVar(F, FuncName);
517
90
  }
518
519
  // Return the number of profile counters needed for the function.
520
90
  unsigned getNumCounters() {
521
90
    unsigned NumCounters = 0;
522
492
    for (auto &E : this->MST.AllEdges) {
523
492
      if (
!E->InMST && 492
!E->Removed190
)
524
190
        NumCounters++;
525
492
    }
526
90
    return NumCounters + SIVisitor.getNumOfSelectInsts();
527
90
  }
528
};
529
530
// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
531
// value of each BB in the CFG. The higher 32 bits record the number of edges.
532
template <class Edge, class BBInfo>
533
147
void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
534
147
  std::vector<char> Indexes;
535
147
  JamCRC JC;
536
511
  for (auto &BB : F) {
537
511
    const TerminatorInst *TI = BB.getTerminator();
538
1.02k
    for (unsigned I = 0, E = TI->getNumSuccessors(); 
I != E1.02k
;
++I514
) {
539
514
      BasicBlock *Succ = TI->getSuccessor(I);
540
514
      auto BI = findBBInfo(Succ);
541
514
      if (BI == nullptr)
542
0
        continue;
543
514
      uint32_t Index = BI->Index;
544
2.57k
      for (int J = 0; 
J < 42.57k
;
J++2.05k
)
545
2.05k
        Indexes.push_back((char)(Index >> (J * 8)));
546
514
    }
547
511
  }
548
147
  JC.update(Indexes);
549
147
  FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
550
147
                 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
551
147
                 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
552
147
  DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
553
147
               << " CRC = " << JC.getCRC()
554
147
               << ", Selects = " << SIVisitor.getNumOfSelectInsts()
555
147
               << ", Edges = " << MST.AllEdges.size()
556
147
               << ", ICSites = " << ValueSites[IPVK_IndirectCallTarget].size()
557
147
               << ", Hash = " << FunctionHash << "\n";);
558
147
}
PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOEdge, (anonymous namespace)::BBInfo>::computeCFGHash()
Line
Count
Source
533
90
void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
534
90
  std::vector<char> Indexes;
535
90
  JamCRC JC;
536
302
  for (auto &BB : F) {
537
302
    const TerminatorInst *TI = BB.getTerminator();
538
607
    for (unsigned I = 0, E = TI->getNumSuccessors(); 
I != E607
;
++I305
) {
539
305
      BasicBlock *Succ = TI->getSuccessor(I);
540
305
      auto BI = findBBInfo(Succ);
541
305
      if (BI == nullptr)
542
0
        continue;
543
305
      uint32_t Index = BI->Index;
544
1.52k
      for (int J = 0; 
J < 41.52k
;
J++1.22k
)
545
1.22k
        Indexes.push_back((char)(Index >> (J * 8)));
546
305
    }
547
302
  }
548
90
  JC.update(Indexes);
549
90
  FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
550
90
                 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
551
90
                 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
552
90
  DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
553
90
               << " CRC = " << JC.getCRC()
554
90
               << ", Selects = " << SIVisitor.getNumOfSelectInsts()
555
90
               << ", Edges = " << MST.AllEdges.size()
556
90
               << ", ICSites = " << ValueSites[IPVK_IndirectCallTarget].size()
557
90
               << ", Hash = " << FunctionHash << "\n";);
558
90
}
PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOUseEdge, (anonymous namespace)::UseBBInfo>::computeCFGHash()
Line
Count
Source
533
57
void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
534
57
  std::vector<char> Indexes;
535
57
  JamCRC JC;
536
209
  for (auto &BB : F) {
537
209
    const TerminatorInst *TI = BB.getTerminator();
538
418
    for (unsigned I = 0, E = TI->getNumSuccessors(); 
I != E418
;
++I209
) {
539
209
      BasicBlock *Succ = TI->getSuccessor(I);
540
209
      auto BI = findBBInfo(Succ);
541
209
      if (BI == nullptr)
542
0
        continue;
543
209
      uint32_t Index = BI->Index;
544
1.04k
      for (int J = 0; 
J < 41.04k
;
J++836
)
545
836
        Indexes.push_back((char)(Index >> (J * 8)));
546
209
    }
547
209
  }
548
57
  JC.update(Indexes);
549
57
  FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
550
57
                 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
551
57
                 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
552
57
  DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
553
57
               << " CRC = " << JC.getCRC()
554
57
               << ", Selects = " << SIVisitor.getNumOfSelectInsts()
555
57
               << ", Edges = " << MST.AllEdges.size()
556
57
               << ", ICSites = " << ValueSites[IPVK_IndirectCallTarget].size()
557
57
               << ", Hash = " << FunctionHash << "\n";);
558
57
}
559
560
// Check if we can safely rename this Comdat function.
561
static bool canRenameComdat(
562
    Function &F,
563
24
    std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
564
24
  if (
!DoComdatRenaming || 24
!canRenameComdatFunc(F, true)24
)
565
6
    return false;
566
18
567
18
  // FIXME: Current only handle those Comdat groups that only containing one
568
18
  // function and function aliases.
569
18
  // (1) For a Comdat group containing multiple functions, we need to have a
570
18
  // unique postfix based on the hashes for each function. There is a
571
18
  // non-trivial code refactoring to do this efficiently.
572
18
  // (2) Variables can not be renamed, so we can not rename Comdat function in a
573
18
  // group including global vars.
574
18
  Comdat *C = F.getComdat();
575
24
  for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
576
24
    if (dyn_cast<GlobalAlias>(CM.second))
577
0
      continue;
578
24
    Function *FM = dyn_cast<Function>(CM.second);
579
24
    if (FM != &F)
580
12
      return false;
581
6
  }
582
6
  return true;
583
6
}
584
585
// Append the CFGHash to the Comdat function name.
586
template <class Edge, class BBInfo>
587
24
void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
588
24
  if (!canRenameComdat(F, ComdatMembers))
589
18
    return;
590
6
  std::string OrigName = F.getName().str();
591
6
  std::string NewFuncName =
592
6
      Twine(F.getName() + "." + Twine(FunctionHash)).str();
593
6
  F.setName(Twine(NewFuncName));
594
6
  GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F);
595
6
  FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
596
6
  Comdat *NewComdat;
597
6
  Module *M = F.getParent();
598
6
  // For AvailableExternallyLinkage functions, change the linkage to
599
6
  // LinkOnceODR and put them into comdat. This is because after renaming, there
600
6
  // is no backup external copy available for the function.
601
6
  if (
!F.hasComdat()6
) {
602
2
    assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage);
603
2
    NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
604
2
    F.setLinkage(GlobalValue::LinkOnceODRLinkage);
605
2
    F.setComdat(NewComdat);
606
2
    return;
607
2
  }
608
4
609
4
  // This function belongs to a single function Comdat group.
610
4
  Comdat *OrigComdat = F.getComdat();
611
4
  std::string NewComdatName =
612
4
      Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
613
4
  NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
614
4
  NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
615
4
616
4
  for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
617
4
    if (GlobalAlias *
GA4
= dyn_cast<GlobalAlias>(CM.second)) {
618
0
      // For aliases, change the name directly.
619
0
      assert(dyn_cast<Function>(GA->getAliasee()->stripPointerCasts()) == &F);
620
0
      std::string OrigGAName = GA->getName().str();
621
0
      GA->setName(Twine(GA->getName() + "." + Twine(FunctionHash)));
622
0
      GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigGAName, GA);
623
0
      continue;
624
0
    }
625
4
    // Must be a function.
626
4
    Function *CF = dyn_cast<Function>(CM.second);
627
4
    assert(CF);
628
4
    CF->setComdat(NewComdat);
629
4
  }
630
24
}
Unexecuted instantiation: PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOUseEdge, (anonymous namespace)::UseBBInfo>::renameComdatFunction()
PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOEdge, (anonymous namespace)::BBInfo>::renameComdatFunction()
Line
Count
Source
587
24
void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
588
24
  if (!canRenameComdat(F, ComdatMembers))
589
18
    return;
590
6
  std::string OrigName = F.getName().str();
591
6
  std::string NewFuncName =
592
6
      Twine(F.getName() + "." + Twine(FunctionHash)).str();
593
6
  F.setName(Twine(NewFuncName));
594
6
  GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F);
595
6
  FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
596
6
  Comdat *NewComdat;
597
6
  Module *M = F.getParent();
598
6
  // For AvailableExternallyLinkage functions, change the linkage to
599
6
  // LinkOnceODR and put them into comdat. This is because after renaming, there
600
6
  // is no backup external copy available for the function.
601
6
  if (
!F.hasComdat()6
) {
602
2
    assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage);
603
2
    NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
604
2
    F.setLinkage(GlobalValue::LinkOnceODRLinkage);
605
2
    F.setComdat(NewComdat);
606
2
    return;
607
2
  }
608
4
609
4
  // This function belongs to a single function Comdat group.
610
4
  Comdat *OrigComdat = F.getComdat();
611
4
  std::string NewComdatName =
612
4
      Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
613
4
  NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
614
4
  NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
615
4
616
4
  for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
617
4
    if (GlobalAlias *
GA4
= dyn_cast<GlobalAlias>(CM.second)) {
618
0
      // For aliases, change the name directly.
619
0
      assert(dyn_cast<Function>(GA->getAliasee()->stripPointerCasts()) == &F);
620
0
      std::string OrigGAName = GA->getName().str();
621
0
      GA->setName(Twine(GA->getName() + "." + Twine(FunctionHash)));
622
0
      GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigGAName, GA);
623
0
      continue;
624
0
    }
625
4
    // Must be a function.
626
4
    Function *CF = dyn_cast<Function>(CM.second);
627
4
    assert(CF);
628
4
    CF->setComdat(NewComdat);
629
4
  }
630
24
}
631
632
// Given a CFG E to be instrumented, find which BB to place the instrumented
633
// code. The function will split the critical edge if necessary.
634
template <class Edge, class BBInfo>
635
803
BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
636
803
  if (
E->InMST || 803
E->Removed302
)
637
501
    return nullptr;
638
302
639
302
  BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
640
302
  BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
641
302
  // For a fake edge, instrument the real BB.
642
302
  if (SrcBB == nullptr)
643
0
    return DestBB;
644
302
  
if (302
DestBB == nullptr302
)
645
118
    return SrcBB;
646
184
647
184
  // Instrument the SrcBB if it has a single successor,
648
184
  // otherwise, the DestBB if this is not a critical edge.
649
184
  TerminatorInst *TI = SrcBB->getTerminator();
650
184
  if (TI->getNumSuccessors() <= 1)
651
117
    return SrcBB;
652
67
  
if (67
!E->IsCritical67
)
653
45
    return DestBB;
654
22
655
22
  // For a critical edge, we have to split. Instrument the newly
656
22
  // created BB.
657
22
  NumOfPGOSplit++;
658
22
  DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index << " --> "
659
803
               << getBBInfo(DestBB).Index << "\n");
660
803
  unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
661
803
  BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
662
803
  assert(InstrBB && "Critical edge is not split");
663
803
664
803
  E->Removed = true;
665
803
  return InstrBB;
666
803
}
PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOUseEdge, (anonymous namespace)::UseBBInfo>::getInstrBB((anonymous namespace)::PGOUseEdge*)
Line
Count
Source
635
311
BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
636
311
  if (
E->InMST || 311
E->Removed112
)
637
199
    return nullptr;
638
112
639
112
  BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
640
112
  BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
641
112
  // For a fake edge, instrument the real BB.
642
112
  if (SrcBB == nullptr)
643
0
    return DestBB;
644
112
  
if (112
DestBB == nullptr112
)
645
41
    return SrcBB;
646
71
647
71
  // Instrument the SrcBB if it has a single successor,
648
71
  // otherwise, the DestBB if this is not a critical edge.
649
71
  TerminatorInst *TI = SrcBB->getTerminator();
650
71
  if (TI->getNumSuccessors() <= 1)
651
48
    return SrcBB;
652
23
  
if (23
!E->IsCritical23
)
653
19
    return DestBB;
654
4
655
4
  // For a critical edge, we have to split. Instrument the newly
656
4
  // created BB.
657
4
  NumOfPGOSplit++;
658
4
  DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index << " --> "
659
311
               << getBBInfo(DestBB).Index << "\n");
660
311
  unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
661
311
  BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
662
311
  assert(InstrBB && "Critical edge is not split");
663
311
664
311
  E->Removed = true;
665
311
  return InstrBB;
666
311
}
PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOEdge, (anonymous namespace)::BBInfo>::getInstrBB((anonymous namespace)::PGOEdge*)
Line
Count
Source
635
492
BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
636
492
  if (
E->InMST || 492
E->Removed190
)
637
302
    return nullptr;
638
190
639
190
  BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
640
190
  BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
641
190
  // For a fake edge, instrument the real BB.
642
190
  if (SrcBB == nullptr)
643
0
    return DestBB;
644
190
  
if (190
DestBB == nullptr190
)
645
77
    return SrcBB;
646
113
647
113
  // Instrument the SrcBB if it has a single successor,
648
113
  // otherwise, the DestBB if this is not a critical edge.
649
113
  TerminatorInst *TI = SrcBB->getTerminator();
650
113
  if (TI->getNumSuccessors() <= 1)
651
69
    return SrcBB;
652
44
  
if (44
!E->IsCritical44
)
653
26
    return DestBB;
654
18
655
18
  // For a critical edge, we have to split. Instrument the newly
656
18
  // created BB.
657
18
  NumOfPGOSplit++;
658
18
  DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index << " --> "
659
492
               << getBBInfo(DestBB).Index << "\n");
660
492
  unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
661
492
  BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
662
492
  assert(InstrBB && "Critical edge is not split");
663
492
664
492
  E->Removed = true;
665
492
  return InstrBB;
666
492
}
667
668
// Visit all edge and instrument the edges not in MST, and do value profiling.
669
// Critical edges will be split.
670
static void instrumentOneFunc(
671
    Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI,
672
90
    std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
673
90
  FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI,
674
90
                                                   BFI);
675
90
  unsigned NumCounters = FuncInfo.getNumCounters();
676
90
677
90
  uint32_t I = 0;
678
90
  Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
679
492
  for (auto &E : FuncInfo.MST.AllEdges) {
680
492
    BasicBlock *InstrBB = FuncInfo.getInstrBB(E.get());
681
492
    if (!InstrBB)
682
302
      continue;
683
190
684
190
    IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
685
190
    assert(Builder.GetInsertPoint() != InstrBB->end() &&
686
190
           "Cannot get the Instrumentation point");
687
190
    Builder.CreateCall(
688
190
        Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment),
689
190
        {llvm::ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
690
190
         Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters),
691
190
         Builder.getInt32(I++)});
692
190
  }
693
90
694
90
  // Now instrument select instructions:
695
90
  FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar,
696
90
                                       FuncInfo.FunctionHash);
697
90
  assert(I == NumCounters);
698
90
699
90
  if (DisableValueProfiling)
700
0
    return;
701
90
702
90
  unsigned NumIndirectCallSites = 0;
703
6
  for (auto &I : FuncInfo.ValueSites[IPVK_IndirectCallTarget]) {
704
6
    CallSite CS(I);
705
6
    Value *Callee = CS.getCalledValue();
706
6
    DEBUG(dbgs() << "Instrument one indirect call: CallSite Index = "
707
6
                 << NumIndirectCallSites << "\n");
708
6
    IRBuilder<> Builder(I);
709
6
    assert(Builder.GetInsertPoint() != I->getParent()->end() &&
710
6
           "Cannot get the Instrumentation point");
711
6
    Builder.CreateCall(
712
6
        Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
713
6
        {llvm::ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
714
6
         Builder.getInt64(FuncInfo.FunctionHash),
715
6
         Builder.CreatePtrToInt(Callee, Builder.getInt64Ty()),
716
6
         Builder.getInt32(IPVK_IndirectCallTarget),
717
6
         Builder.getInt32(NumIndirectCallSites++)});
718
6
  }
719
90
  NumOfPGOICall += NumIndirectCallSites;
720
90
721
90
  // Now instrument memop intrinsic calls.
722
90
  FuncInfo.MIVisitor.instrumentMemIntrinsics(
723
90
      F, NumCounters, FuncInfo.FuncNameVar, FuncInfo.FunctionHash);
724
90
}
725
726
// This class represents a CFG edge in profile use compilation.
727
struct PGOUseEdge : public PGOEdge {
728
  bool CountValid;
729
  uint64_t CountValue;
730
  PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1)
731
335
      : PGOEdge(Src, Dest, W), CountValid(false), CountValue(0) {}
732
733
  // Set edge count value
734
315
  void setEdgeCount(uint64_t Value) {
735
315
    CountValue = Value;
736
315
    CountValid = true;
737
315
  }
738
739
  // Return the information string for this object.
740
0
  const std::string infoString() const {
741
0
    if (!CountValid)
742
0
      return PGOEdge::infoString();
743
0
    return (Twine(PGOEdge::infoString()) + "  Count=" + Twine(CountValue))
744
0
        .str();
745
0
  }
746
};
747
748
typedef SmallVector<PGOUseEdge *, 2> DirectEdges;
749
750
// This class stores the auxiliary information for each BB.
751
struct UseBBInfo : public BBInfo {
752
  uint64_t CountValue;
753
  bool CountValid;
754
  int32_t UnknownCountInEdge;
755
  int32_t UnknownCountOutEdge;
756
  DirectEdges InEdges;
757
  DirectEdges OutEdges;
758
  UseBBInfo(unsigned IX)
759
      : BBInfo(IX), CountValue(0), CountValid(false), UnknownCountInEdge(0),
760
268
        UnknownCountOutEdge(0) {}
761
  UseBBInfo(unsigned IX, uint64_t C)
762
      : BBInfo(IX), CountValue(C), CountValid(true), UnknownCountInEdge(0),
763
0
        UnknownCountOutEdge(0) {}
764
765
  // Set the profile count value for this BB.
766
112
  void setBBInfoCount(uint64_t Value) {
767
112
    CountValue = Value;
768
112
    CountValid = true;
769
112
  }
770
771
  // Return the information string of this object.
772
0
  const std::string infoString() const {
773
0
    if (!CountValid)
774
0
      return BBInfo::infoString();
775
0
    return (Twine(BBInfo::infoString()) + "  Count=" + Twine(CountValue)).str();
776
0
  }
777
};
778
779
// Sum up the count values for all the edges.
780
290
static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) {
781
290
  uint64_t Total = 0;
782
412
  for (auto &E : Edges) {
783
412
    if (E->Removed)
784
0
      continue;
785
412
    Total += E->CountValue;
786
412
  }
787
290
  return Total;
788
290
}
789
790
class PGOUseFunc {
791
public:
792
  PGOUseFunc(Function &Func, Module *Modu,
793
             std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
794
             BranchProbabilityInfo *BPI = nullptr,
795
             BlockFrequencyInfo *BFI = nullptr)
796
      : F(Func), M(Modu), FuncInfo(Func, ComdatMembers, false, BPI, BFI),
797
57
        CountPosition(0), ProfileCountSize(0), FreqAttr(FFA_Normal) {}
798
799
  // Read counts for the instrumented BB from profile.
800
  bool readCounters(IndexedInstrProfReader *PGOReader);
801
802
  // Populate the counts for all BBs.
803
  void populateCounters();
804
805
  // Set the branch weights based on the count values.
806
  void setBranchWeights();
807
808
  // Annotate the value profile call sites all all value kind.
809
  void annotateValueSites();
810
811
  // Annotate the value profile call sites for one value kind.
812
  void annotateValueSites(uint32_t Kind);
813
814
  // The hotness of the function from the profile count.
815
  enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
816
817
  // Return the function hotness from the profile.
818
49
  FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
819
820
  // Return the function hash.
821
0
  uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
822
  // Return the profile record for this function;
823
8
  InstrProfRecord &getProfileRecord() { return ProfileRecord; }
824
825
  // Return the auxiliary BB information.
826
1.37k
  UseBBInfo &getBBInfo(const BasicBlock *BB) const {
827
1.37k
    return FuncInfo.getBBInfo(BB);
828
1.37k
  }
829
830
  // Return the auxiliary BB information if available.
831
724
  UseBBInfo *findBBInfo(const BasicBlock *BB) const {
832
724
    return FuncInfo.findBBInfo(BB);
833
724
  }
834
835
0
  Function &getFunc() const { return F; }
836
837
0
  void dumpInfo(std::string Str = "") const {
838
0
    FuncInfo.dumpInfo(Str);
839
0
  }
840
841
private:
842
  Function &F;
843
  Module *M;
844
  // This member stores the shared information with class PGOGenFunc.
845
  FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
846
847
  // The maximum count value in the profile. This is only used in PGO use
848
  // compilation.
849
  uint64_t ProgramMaxCount;
850
851
  // Position of counter that remains to be read.
852
  uint32_t CountPosition;
853
854
  // Total size of the profile count for this function.
855
  uint32_t ProfileCountSize;
856
857
  // ProfileRecord for this function.
858
  InstrProfRecord ProfileRecord;
859
860
  // Function hotness info derived from profile.
861
  FuncFreqAttr FreqAttr;
862
863
  // Find the Instrumented BB and set the value.
864
  void setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
865
866
  // Set the edge counter value for the unknown edge -- there should be only
867
  // one unknown edge.
868
  void setEdgeCount(DirectEdges &Edges, uint64_t Value);
869
870
  // Return FuncName string;
871
0
  const std::string getFuncName() const { return FuncInfo.FuncName; }
872
873
  // Set the hot/cold inline hints based on the count values.
874
  // FIXME: This function should be removed once the functionality in
875
  // the inliner is implemented.
876
49
  void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
877
49
    if (ProgramMaxCount == 0)
878
4
      return;
879
45
    // Threshold of the hot functions.
880
45
    const BranchProbability HotFunctionThreshold(1, 100);
881
45
    // Threshold of the cold functions.
882
45
    const BranchProbability ColdFunctionThreshold(2, 10000);
883
45
    if (EntryCount >= HotFunctionThreshold.scale(ProgramMaxCount))
884
39
      FreqAttr = FFA_Hot;
885
6
    else 
if (6
MaxCount <= ColdFunctionThreshold.scale(ProgramMaxCount)6
)
886
0
      FreqAttr = FFA_Cold;
887
49
  }
888
};
889
890
// Visit all the edges and assign the count value for the instrumented
891
// edges and the BB.
892
void PGOUseFunc::setInstrumentedCounts(
893
49
    const std::vector<uint64_t> &CountFromProfile) {
894
49
895
49
  assert(FuncInfo.getNumCounters() == CountFromProfile.size());
896
49
  // Use a worklist as we will update the vector during the iteration.
897
49
  std::vector<PGOUseEdge *> WorkList;
898
49
  for (auto &E : FuncInfo.MST.AllEdges)
899
311
    WorkList.push_back(E.get());
900
49
901
49
  uint32_t I = 0;
902
311
  for (auto &E : WorkList) {
903
311
    BasicBlock *InstrBB = FuncInfo.getInstrBB(E);
904
311
    if (!InstrBB)
905
199
      continue;
906
112
    uint64_t CountValue = CountFromProfile[I++];
907
112
    if (
!E->Removed112
) {
908
108
      getBBInfo(InstrBB).setBBInfoCount(CountValue);
909
108
      E->setEdgeCount(CountValue);
910
108
      continue;
911
108
    }
912
4
913
4
    // Need to add two new edges.
914
4
    BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
915
4
    BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
916
4
    // Add new edge of SrcBB->InstrBB.
917
4
    PGOUseEdge &NewEdge = FuncInfo.MST.addEdge(SrcBB, InstrBB, 0);
918
4
    NewEdge.setEdgeCount(CountValue);
919
4
    // Add new edge of InstrBB->DestBB.
920
4
    PGOUseEdge &NewEdge1 = FuncInfo.MST.addEdge(InstrBB, DestBB, 0);
921
4
    NewEdge1.setEdgeCount(CountValue);
922
4
    NewEdge1.InMST = true;
923
4
    getBBInfo(InstrBB).setBBInfoCount(CountValue);
924
4
  }
925
49
  ProfileCountSize = CountFromProfile.size();
926
49
  CountPosition = I;
927
49
}
928
929
// Set the count value for the unknown edge. There should be one and only one
930
// unknown edge in Edges vector.
931
199
void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
932
215
  for (auto &E : Edges) {
933
215
    if (E->CountValid)
934
16
      continue;
935
199
    E->setEdgeCount(Value);
936
199
937
199
    getBBInfo(E->SrcBB).UnknownCountOutEdge--;
938
199
    getBBInfo(E->DestBB).UnknownCountInEdge--;
939
199
    return;
940
199
  }
941
0
  
llvm_unreachable0
("Cannot find the unknown count edge");
942
0
}
943
944
// Read the profile from ProfileFileName and assign the value to the
945
// instrumented BB and the edges. This function also updates ProgramMaxCount.
946
// Return true if the profile are successfully read, and false on errors.
947
57
bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader) {
948
57
  auto &Ctx = M->getContext();
949
57
  Expected<InstrProfRecord> Result =
950
57
      PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash);
951
57
  if (Error 
E57
= Result.takeError()) {
952
8
    handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
953
8
      auto Err = IPE.get();
954
8
      bool SkipWarning = false;
955
8
      if (
Err == instrprof_error::unknown_function8
) {
956
6
        NumOfPGOMissing++;
957
6
        SkipWarning = !PGOWarnMissing;
958
8
      } else 
if (2
Err == instrprof_error::hash_mismatch ||
959
2
                 
Err == instrprof_error::malformed0
) {
960
2
        NumOfPGOMismatch++;
961
2
        SkipWarning =
962
2
            NoPGOWarnMismatch ||
963
2
            (NoPGOWarnMismatchComdat &&
964
2
             (F.hasComdat() ||
965
2
              F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
966
2
      }
967
8
968
8
      if (SkipWarning)
969
4
        return;
970
4
971
4
      std::string Msg = IPE.message() + std::string(" ") + F.getName().str();
972
4
      Ctx.diagnose(
973
4
          DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
974
4
    });
975
8
    return false;
976
8
  }
977
49
  ProfileRecord = std::move(Result.get());
978
49
  std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
979
49
980
49
  NumOfPGOFunc++;
981
49
  DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
982
49
  uint64_t ValueSum = 0;
983
169
  for (unsigned I = 0, S = CountFromProfile.size(); 
I < S169
;
I++120
) {
984
120
    DEBUG(dbgs() << "  " << I << ": " << CountFromProfile[I] << "\n");
985
120
    ValueSum += CountFromProfile[I];
986
120
  }
987
49
988
49
  DEBUG(dbgs() << "SUM =  " << ValueSum << "\n");
989
57
990
57
  getBBInfo(nullptr).UnknownCountOutEdge = 2;
991
57
  getBBInfo(nullptr).UnknownCountInEdge = 2;
992
57
993
57
  setInstrumentedCounts(CountFromProfile);
994
57
  ProgramMaxCount = PGOReader->getMaximumFunctionCount();
995
57
  return true;
996
57
}
997
998
// Populate the counters from instrumented BBs to all BBs.
999
// In the end of this operation, all BBs should have a valid count value.
1000
49
void PGOUseFunc::populateCounters() {
1001
49
  // First set up Count variable for all BBs.
1002
319
  for (auto &E : FuncInfo.MST.AllEdges) {
1003
319
    if (E->Removed)
1004
4
      continue;
1005
315
1006
315
    const BasicBlock *SrcBB = E->SrcBB;
1007
315
    const BasicBlock *DestBB = E->DestBB;
1008
315
    UseBBInfo &SrcInfo = getBBInfo(SrcBB);
1009
315
    UseBBInfo &DestInfo = getBBInfo(DestBB);
1010
315
    SrcInfo.OutEdges.push_back(E.get());
1011
315
    DestInfo.InEdges.push_back(E.get());
1012
315
    SrcInfo.UnknownCountOutEdge++;
1013
315
    DestInfo.UnknownCountInEdge++;
1014
315
1015
315
    if (!E->CountValid)
1016
199
      continue;
1017
116
    DestInfo.UnknownCountInEdge--;
1018
116
    SrcInfo.UnknownCountOutEdge--;
1019
116
  }
1020
49
1021
49
  bool Changes = true;
1022
49
  unsigned NumPasses = 0;
1023
164
  while (
Changes164
) {
1024
115
    NumPasses++;
1025
115
    Changes = false;
1026
115
1027
115
    // For efficient traversal, it's better to start from the end as most
1028
115
    // of the instrumented edges are at the end.
1029
511
    for (auto &BB : reverse(F)) {
1030
511
      UseBBInfo *Count = findBBInfo(&BB);
1031
511
      if (Count == nullptr)
1032
4
        continue;
1033
507
      
if (507
!Count->CountValid507
) {
1034
114
        if (
Count->UnknownCountOutEdge == 0114
) {
1035
79
          Count->CountValue = sumEdgeCount(Count->OutEdges);
1036
79
          Count->CountValid = true;
1037
79
          Changes = true;
1038
114
        } else 
if (35
Count->UnknownCountInEdge == 035
) {
1039
12
          Count->CountValue = sumEdgeCount(Count->InEdges);
1040
12
          Count->CountValid = true;
1041
12
          Changes = true;
1042
12
        }
1043
114
      }
1044
507
      if (
Count->CountValid507
) {
1045
484
        if (
Count->UnknownCountOutEdge == 1484
) {
1046
31
          uint64_t Total = 0;
1047
31
          uint64_t OutSum = sumEdgeCount(Count->OutEdges);
1048
31
          // If the one of the successor block can early terminate (no-return),
1049
31
          // we can end up with situation where out edge sum count is larger as
1050
31
          // the source BB's count is collected by a post-dominated block.
1051
31
          if (Count->CountValue > OutSum)
1052
29
            Total = Count->CountValue - OutSum;
1053
31
          setEdgeCount(Count->OutEdges, Total);
1054
31
          Changes = true;
1055
31
        }
1056
484
        if (
Count->UnknownCountInEdge == 1484
) {
1057
168
          uint64_t Total = 0;
1058
168
          uint64_t InSum = sumEdgeCount(Count->InEdges);
1059
168
          if (Count->CountValue > InSum)
1060
160
            Total = Count->CountValue - InSum;
1061
168
          setEdgeCount(Count->InEdges, Total);
1062
168
          Changes = true;
1063
168
        }
1064
484
      }
1065
511
    }
1066
115
  }
1067
49
1068
49
  DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1069
#ifndef NDEBUG
1070
  // Assert every BB has a valid counter.
1071
  for (auto &BB : F) {
1072
    auto BI = findBBInfo(&BB);
1073
    if (BI == nullptr)
1074
      continue;
1075
    assert(BI->CountValid && "BB count is not valid");
1076
  }
1077
#endif
1078
  uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue;
1079
49
  F.setEntryCount(FuncEntryCount);
1080
49
  uint64_t FuncMaxCount = FuncEntryCount;
1081
205
  for (auto &BB : F) {
1082
205
    auto BI = findBBInfo(&BB);
1083
205
    if (BI == nullptr)
1084
2
      continue;
1085
203
    FuncMaxCount = std::max(FuncMaxCount, BI->CountValue);
1086
203
  }
1087
49
  markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1088
49
1089
49
  // Now annotate select instructions
1090
49
  FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition);
1091
49
  assert(CountPosition == ProfileCountSize);
1092
49
1093
49
  DEBUG(FuncInfo.dumpInfo("after reading profile."));
1094
49
}
1095
1096
// Assign the scaled count values to the BB with multiple out edges.
1097
49
void PGOUseFunc::setBranchWeights() {
1098
49
  // Generate MD_prof metadata for every branch instruction.
1099
49
  DEBUG(dbgs() << "\nSetting branch weights.\n");
1100
205
  for (auto &BB : F) {
1101
205
    TerminatorInst *TI = BB.getTerminator();
1102
205
    if (TI->getNumSuccessors() < 2)
1103
157
      continue;
1104
48
    
if (48
!(isa<BranchInst>(TI) || 48
isa<SwitchInst>(TI)9
||
1105
5
          isa<IndirectBrInst>(TI)))
1106
2
      continue;
1107
46
    
if (46
getBBInfo(&BB).CountValue == 046
)
1108
0
      continue;
1109
46
1110
46
    // We have a non-zero Branch BB.
1111
46
    const UseBBInfo &BBCountInfo = getBBInfo(&BB);
1112
46
    unsigned Size = BBCountInfo.OutEdges.size();
1113
46
    SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
1114
46
    uint64_t MaxCount = 0;
1115
153
    for (unsigned s = 0; 
s < Size153
;
s++107
) {
1116
107
      const PGOUseEdge *E = BBCountInfo.OutEdges[s];
1117
107
      const BasicBlock *SrcBB = E->SrcBB;
1118
107
      const BasicBlock *DestBB = E->DestBB;
1119
107
      if (DestBB == nullptr)
1120
0
        continue;
1121
107
      unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1122
107
      uint64_t EdgeCount = E->CountValue;
1123
107
      if (EdgeCount > MaxCount)
1124
60
        MaxCount = EdgeCount;
1125
107
      EdgeCounts[SuccNum] = EdgeCount;
1126
107
    }
1127
205
    setProfMetadata(M, TI, EdgeCounts, MaxCount);
1128
205
  }
1129
49
}
1130
1131
2
void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1132
2
  Module *M = F.getParent();
1133
2
  IRBuilder<> Builder(&SI);
1134
2
  Type *Int64Ty = Builder.getInt64Ty();
1135
2
  Type *I8PtrTy = Builder.getInt8PtrTy();
1136
2
  auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1137
2
  Builder.CreateCall(
1138
2
      Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
1139
2
      {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
1140
2
       Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
1141
2
       Builder.getInt32(*CurCtrIdx), Step});
1142
2
  ++(*CurCtrIdx);
1143
2
}
1144
1145
8
void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1146
8
  std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1147
8
  assert(*CurCtrIdx < CountFromProfile.size() &&
1148
8
         "Out of bound access of counters");
1149
8
  uint64_t SCounts[2];
1150
8
  SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1151
8
  ++(*CurCtrIdx);
1152
8
  uint64_t TotalCount = 0;
1153
8
  auto BI = UseFunc->findBBInfo(SI.getParent());
1154
8
  if (BI != nullptr)
1155
8
    TotalCount = BI->CountValue;
1156
8
  // False Count
1157
8
  SCounts[1] = (TotalCount > SCounts[0] ? 
TotalCount - SCounts[0]6
:
02
);
1158
8
  uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1159
8
  if (MaxCount)
1160
6
    setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
1161
8
}
1162
1163
24
void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1164
24
  if (!PGOInstrSelect)
1165
4
    return;
1166
20
  // FIXME: do not handle this yet.
1167
20
  
if (20
SI.getCondition()->getType()->isVectorTy()20
)
1168
0
    return;
1169
20
1170
20
  switch (Mode) {
1171
10
  case VM_counting:
1172
10
    NSIs++;
1173
10
    return;
1174
2
  case VM_instrument:
1175
2
    instrumentOneSelectInst(SI);
1176
2
    return;
1177
8
  case VM_annotate:
1178
8
    annotateOneSelectInst(SI);
1179
8
    return;
1180
0
  }
1181
0
1182
0
  
llvm_unreachable0
("Unknown visiting mode");
1183
0
}
1184
1185
3
void MemIntrinsicVisitor::instrumentOneMemIntrinsic(MemIntrinsic &MI) {
1186
3
  Module *M = F.getParent();
1187
3
  IRBuilder<> Builder(&MI);
1188
3
  Type *Int64Ty = Builder.getInt64Ty();
1189
3
  Type *I8PtrTy = Builder.getInt8PtrTy();
1190
3
  Value *Length = MI.getLength();
1191
3
  assert(!dyn_cast<ConstantInt>(Length));
1192
3
  Builder.CreateCall(
1193
3
      Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
1194
3
      {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
1195
3
       Builder.getInt64(FuncHash), Builder.CreateZExtOrTrunc(Length, Int64Ty),
1196
3
       Builder.getInt32(IPVK_MemOPSize), Builder.getInt32(CurCtrId)});
1197
3
  ++CurCtrId;
1198
3
}
1199
1200
17
void MemIntrinsicVisitor::visitMemIntrinsic(MemIntrinsic &MI) {
1201
17
  if (!PGOInstrMemOP)
1202
0
    return;
1203
17
  Value *Length = MI.getLength();
1204
17
  // Not instrument constant length calls.
1205
17
  if (dyn_cast<ConstantInt>(Length))
1206
0
    return;
1207
17
1208
17
  switch (Mode) {
1209
7
  case VM_counting:
1210
7
    NMemIs++;
1211
7
    return;
1212
3
  case VM_instrument:
1213
3
    instrumentOneMemIntrinsic(MI);
1214
3
    return;
1215
7
  case VM_annotate:
1216
7
    Candidates.push_back(&MI);
1217
7
    return;
1218
0
  }
1219
0
  
llvm_unreachable0
("Unknown visiting mode");
1220
0
}
1221
1222
// Traverse all valuesites and annotate the instructions for all value kind.
1223
49
void PGOUseFunc::annotateValueSites() {
1224
49
  if (DisableValueProfiling)
1225
0
    return;
1226
49
1227
49
  // Create the PGOFuncName meta data.
1228
49
  createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1229
49
1230
147
  for (uint32_t Kind = IPVK_First; 
Kind <= IPVK_Last147
;
++Kind98
)
1231
98
    annotateValueSites(Kind);
1232
49
}
1233
1234
// Annotate the instructions for a specific value kind.
1235
98
void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1236
98
  unsigned ValueSiteIndex = 0;
1237
98
  auto &ValueSites = FuncInfo.ValueSites[Kind];
1238
98
  unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1239
98
  if (
NumValueSites != ValueSites.size()98
) {
1240
0
    auto &Ctx = M->getContext();
1241
0
    Ctx.diagnose(DiagnosticInfoPGOProfile(
1242
0
        M->getName().data(),
1243
0
        Twine("Inconsistent number of value sites for kind = ") + Twine(Kind) +
1244
0
            " in " + F.getName().str(),
1245
0
        DS_Warning));
1246
0
    return;
1247
0
  }
1248
98
1249
98
  
for (auto &I : ValueSites) 98
{
1250
6
    DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1251
6
                 << "): Index = " << ValueSiteIndex << " out of "
1252
6
                 << NumValueSites << "\n");
1253
6
    annotateValueSite(*M, *I, ProfileRecord,
1254
6
                      static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1255
4
                      Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations
1256
2
                                             : MaxNumAnnotations);
1257
6
    ValueSiteIndex++;
1258
6
  }
1259
98
}
1260
} // end anonymous namespace
1261
1262
// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
1263
// aware this is an ir_level profile so it can set the version flag.
1264
57
static void createIRLevelProfileFlagVariable(Module &M) {
1265
57
  Type *IntTy64 = Type::getInt64Ty(M.getContext());
1266
57
  uint64_t ProfileVersion = (
INSTR_PROF_RAW_VERSION57
|
VARIANT_MASK_IR_PROF57
);
1267
57
  auto IRLevelVersionVariable = new GlobalVariable(
1268
57
      M, IntTy64, true, GlobalVariable::ExternalLinkage,
1269
57
      Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)),
1270
57
      INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
1271
57
  IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility);
1272
57
  Triple TT(M.getTargetTriple());
1273
57
  if (!TT.supportsCOMDAT())
1274
8
    IRLevelVersionVariable->setLinkage(GlobalValue::WeakAnyLinkage);
1275
57
  else
1276
49
    IRLevelVersionVariable->setComdat(M.getOrInsertComdat(
1277
49
        StringRef(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR))));
1278
57
}
1279
1280
// Collect the set of members for each Comdat in module M and store
1281
// in ComdatMembers.
1282
static void collectComdatMembers(
1283
    Module &M,
1284
101
    std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1285
101
  if (!DoComdatRenaming)
1286
97
    return;
1287
4
  for (Function &F : M)
1288
24
    
if (Comdat *24
C24
= F.getComdat())
1289
20
      ComdatMembers.insert(std::make_pair(C, &F));
1290
4
  for (GlobalVariable &GV : M.globals())
1291
8
    
if (Comdat *8
C8
= GV.getComdat())
1292
8
      ComdatMembers.insert(std::make_pair(C, &GV));
1293
4
  for (GlobalAlias &GA : M.aliases())
1294
0
    
if (Comdat *0
C0
= GA.getComdat())
1295
0
      ComdatMembers.insert(std::make_pair(C, &GA));
1296
101
}
1297
1298
static bool InstrumentAllFunctions(
1299
    Module &M, function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
1300
57
    function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) {
1301
57
  createIRLevelProfileFlagVariable(M);
1302
57
  std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1303
57
  collectComdatMembers(M, ComdatMembers);
1304
57
1305
192
  for (auto &F : M) {
1306
192
    if (F.isDeclaration())
1307
102
      continue;
1308
90
    auto *BPI = LookupBPI(F);
1309
90
    auto *BFI = LookupBFI(F);
1310
90
    instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers);
1311
90
  }
1312
57
  return true;
1313
57
}
1314
1315
31
bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) {
1316
31
  if (skipModule(M))
1317
0
    return false;
1318
31
1319
31
  
auto LookupBPI = [this](Function &F) 31
{
1320
46
    return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
1321
46
  };
1322
46
  auto LookupBFI = [this](Function &F) {
1323
46
    return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
1324
46
  };
1325
31
  return InstrumentAllFunctions(M, LookupBPI, LookupBFI);
1326
31
}
1327
1328
PreservedAnalyses PGOInstrumentationGen::run(Module &M,
1329
26
                                             ModuleAnalysisManager &AM) {
1330
26
1331
26
  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1332
44
  auto LookupBPI = [&FAM](Function &F) {
1333
44
    return &FAM.getResult<BranchProbabilityAnalysis>(F);
1334
44
  };
1335
26
1336
44
  auto LookupBFI = [&FAM](Function &F) {
1337
44
    return &FAM.getResult<BlockFrequencyAnalysis>(F);
1338
44
  };
1339
26
1340
26
  if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI))
1341
0
    return PreservedAnalyses::all();
1342
26
1343
26
  return PreservedAnalyses::none();
1344
26
}
1345
1346
static bool annotateAllFunctions(
1347
    Module &M, StringRef ProfileFileName,
1348
    function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
1349
48
    function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) {
1350
48
  DEBUG(dbgs() << "Read in profile counters: ");
1351
48
  auto &Ctx = M.getContext();
1352
48
  // Read the counter array from file.
1353
48
  auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName);
1354
48
  if (Error 
E48
= ReaderOrErr.takeError()) {
1355
2
    handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
1356
2
      Ctx.diagnose(
1357
2
          DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
1358
2
    });
1359
2
    return false;
1360
2
  }
1361
46
1362
46
  std::unique_ptr<IndexedInstrProfReader> PGOReader =
1363
46
      std::move(ReaderOrErr.get());
1364
46
  if (
!PGOReader46
) {
1365
0
    Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
1366
0
                                          StringRef("Cannot get PGOReader")));
1367
0
    return false;
1368
0
  }
1369
46
  // TODO: might need to change the warning once the clang option is finalized.
1370
46
  
if (46
!PGOReader->isIRLevelProfile()46
) {
1371
2
    Ctx.diagnose(DiagnosticInfoPGOProfile(
1372
2
        ProfileFileName.data(), "Not an IR level instrumentation profile"));
1373
2
    return false;
1374
2
  }
1375
44
1376
44
  std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1377
44
  collectComdatMembers(M, ComdatMembers);
1378
44
  std::vector<Function *> HotFunctions;
1379
44
  std::vector<Function *> ColdFunctions;
1380
83
  for (auto &F : M) {
1381
83
    if (F.isDeclaration())
1382
26
      continue;
1383
57
    auto *BPI = LookupBPI(F);
1384
57
    auto *BFI = LookupBFI(F);
1385
57
    PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI);
1386
57
    if (!Func.readCounters(PGOReader.get()))
1387
8
      continue;
1388
49
    Func.populateCounters();
1389
49
    Func.setBranchWeights();
1390
49
    Func.annotateValueSites();
1391
49
    PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
1392
49
    if (FreqAttr == PGOUseFunc::FFA_Cold)
1393
0
      ColdFunctions.push_back(&F);
1394
49
    else 
if (49
FreqAttr == PGOUseFunc::FFA_Hot49
)
1395
39
      HotFunctions.push_back(&F);
1396
49
    if (PGOViewCounts != PGOVCT_None &&
1397
0
        (ViewBlockFreqFuncName.empty() ||
1398
49
         
F.getName().equals(ViewBlockFreqFuncName)0
)) {
1399
0
      LoopInfo LI{DominatorTree(F)};
1400
0
      std::unique_ptr<BranchProbabilityInfo> NewBPI =
1401
0
          llvm::make_unique<BranchProbabilityInfo>(F, LI);
1402
0
      std::unique_ptr<BlockFrequencyInfo> NewBFI =
1403
0
          llvm::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
1404
0
      if (PGOViewCounts == PGOVCT_Graph)
1405
0
        NewBFI->view();
1406
0
      else 
if (0
PGOViewCounts == PGOVCT_Text0
) {
1407
0
        dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
1408
0
        NewBFI->print(dbgs());
1409
0
      }
1410
0
    }
1411
49
    if (PGOViewRawCounts != PGOVCT_None &&
1412
0
        (ViewBlockFreqFuncName.empty() ||
1413
49
         
F.getName().equals(ViewBlockFreqFuncName)0
)) {
1414
0
      if (PGOViewRawCounts == PGOVCT_Graph)
1415
0
        
if (0
ViewBlockFreqFuncName.empty()0
)
1416
0
          WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
1417
0
        else
1418
0
          ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
1419
0
      else 
if (0
PGOViewRawCounts == PGOVCT_Text0
) {
1420
0
        dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
1421
0
        Func.dumpInfo();
1422
0
      }
1423
0
    }
1424
83
  }
1425
44
  M.setProfileSummary(PGOReader->getSummary().getMD(M.getContext()));
1426
44
  // Set function hotness attribute from the profile.
1427
44
  // We have to apply these attributes at the end because their presence
1428
44
  // can affect the BranchProbabilityInfo of any callers, resulting in an
1429
44
  // inconsistent MST between prof-gen and prof-use.
1430
39
  for (auto &F : HotFunctions) {
1431
39
    F->addFnAttr(llvm::Attribute::InlineHint);
1432
39
    DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
1433
39
                 << "\n");
1434
39
  }
1435
0
  for (auto &F : ColdFunctions) {
1436
0
    F->addFnAttr(llvm::Attribute::Cold);
1437
0
    DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() << "\n");
1438
0
  }
1439
48
  return true;
1440
48
}
1441
1442
PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename)
1443
24
    : ProfileFileName(std::move(Filename)) {
1444
24
  if (!PGOTestProfileFile.empty())
1445
23
    ProfileFileName = PGOTestProfileFile;
1446
24
}
1447
1448
PreservedAnalyses PGOInstrumentationUse::run(Module &M,
1449
24
                                             ModuleAnalysisManager &AM) {
1450
24
1451
24
  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1452
29
  auto LookupBPI = [&FAM](Function &F) {
1453
29
    return &FAM.getResult<BranchProbabilityAnalysis>(F);
1454
29
  };
1455
24
1456
29
  auto LookupBFI = [&FAM](Function &F) {
1457
29
    return &FAM.getResult<BlockFrequencyAnalysis>(F);
1458
29
  };
1459
24
1460
24
  if (!annotateAllFunctions(M, ProfileFileName, LookupBPI, LookupBFI))
1461
0
    return PreservedAnalyses::all();
1462
24
1463
24
  return PreservedAnalyses::none();
1464
24
}
1465
1466
24
bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) {
1467
24
  if (skipModule(M))
1468
0
    return false;
1469
24
1470
24
  
auto LookupBPI = [this](Function &F) 24
{
1471
28
    return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
1472
28
  };
1473
28
  auto LookupBFI = [this](Function &F) {
1474
28
    return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
1475
28
  };
1476
24
1477
24
  return annotateAllFunctions(M, ProfileFileName, LookupBPI, LookupBFI);
1478
24
}
1479
1480
namespace llvm {
1481
void setProfMetadata(Module *M, Instruction *TI, ArrayRef<uint64_t> EdgeCounts,
1482
62
                     uint64_t MaxCount) {
1483
62
  MDBuilder MDB(M->getContext());
1484
62
  assert(MaxCount > 0 && "Bad max count");
1485
62
  uint64_t Scale = calculateCountScale(MaxCount);
1486
62
  SmallVector<unsigned, 4> Weights;
1487
62
  for (const auto &ECI : EdgeCounts)
1488
141
    Weights.push_back(scaleBranchCount(ECI, Scale));
1489
62
1490
62
  DEBUG(dbgs() << "Weight is: ";
1491
62
        for (const auto &W : Weights) { dbgs() << W << " "; }
1492
62
        dbgs() << "\n";);
1493
62
  TI->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1494
62
  if (
EmitBranchProbability62
) {
1495
2
    std::string BrCondStr = getBranchCondString(TI);
1496
2
    if (BrCondStr.empty())
1497
0
      return;
1498
2
1499
2
    unsigned WSum =
1500
2
        std::accumulate(Weights.begin(), Weights.end(), 0,
1501
4
                        [](unsigned w1, unsigned w2) { return w1 + w2; });
1502
2
    uint64_t TotalCount =
1503
2
        std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), 0,
1504
4
                        [](uint64_t c1, uint64_t c2) { return c1 + c2; });
1505
2
    BranchProbability BP(Weights[0], WSum);
1506
2
    std::string BranchProbStr;
1507
2
    raw_string_ostream OS(BranchProbStr);
1508
2
    OS << BP;
1509
2
    OS << " (total count : " << TotalCount << ")";
1510
2
    OS.flush();
1511
2
    Function *F = TI->getParent()->getParent();
1512
2
    OptimizationRemarkEmitter ORE(F);
1513
2
    ORE.emit(OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
1514
2
             << BrCondStr << " is true with probability : " << BranchProbStr);
1515
2
  }
1516
62
}
1517
1518
template <> struct GraphTraits<PGOUseFunc *> {
1519
  typedef const BasicBlock *NodeRef;
1520
  typedef succ_const_iterator ChildIteratorType;
1521
  typedef pointer_iterator<Function::const_iterator> nodes_iterator;
1522
1523
0
  static NodeRef getEntryNode(const PGOUseFunc *G) {
1524
0
    return &G->getFunc().front();
1525
0
  }
1526
0
  static ChildIteratorType child_begin(const NodeRef N) {
1527
0
    return succ_begin(N);
1528
0
  }
1529
0
  static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
1530
0
  static nodes_iterator nodes_begin(const PGOUseFunc *G) {
1531
0
    return nodes_iterator(G->getFunc().begin());
1532
0
  }
1533
0
  static nodes_iterator nodes_end(const PGOUseFunc *G) {
1534
0
    return nodes_iterator(G->getFunc().end());
1535
0
  }
1536
};
1537
1538
0
static std::string getSimpleNodeName(const BasicBlock *Node) {
1539
0
  if (!Node->getName().empty())
1540
0
    return Node->getName();
1541
0
1542
0
  std::string SimpleNodeName;
1543
0
  raw_string_ostream OS(SimpleNodeName);
1544
0
  Node->printAsOperand(OS, false);
1545
0
  return OS.str();
1546
0
}
1547
1548
template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
1549
  explicit DOTGraphTraits(bool isSimple = false)
1550
0
      : DefaultDOTGraphTraits(isSimple) {}
1551
1552
0
  static std::string getGraphName(const PGOUseFunc *G) {
1553
0
    return G->getFunc().getName();
1554
0
  }
1555
1556
0
  std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
1557
0
    std::string Result;
1558
0
    raw_string_ostream OS(Result);
1559
0
1560
0
    OS << getSimpleNodeName(Node) << ":\\l";
1561
0
    UseBBInfo *BI = Graph->findBBInfo(Node);
1562
0
    OS << "Count : ";
1563
0
    if (
BI && 0
BI->CountValid0
)
1564
0
      OS << BI->CountValue << "\\l";
1565
0
    else
1566
0
      OS << "Unknown\\l";
1567
0
1568
0
    if (!PGOInstrSelect)
1569
0
      return Result;
1570
0
1571
0
    
for (auto BI = Node->begin(); 0
BI != Node->end()0
;
++BI0
) {
1572
0
      auto *I = &*BI;
1573
0
      if (!isa<SelectInst>(I))
1574
0
        continue;
1575
0
      // Display scaled counts for SELECT instruction:
1576
0
      OS << "SELECT : { T = ";
1577
0
      uint64_t TC, FC;
1578
0
      bool HasProf = I->extractProfMetadata(TC, FC);
1579
0
      if (!HasProf)
1580
0
        OS << "Unknown, F = Unknown }\\l";
1581
0
      else
1582
0
        OS << TC << ", F = " << FC << " }\\l";
1583
0
    }
1584
0
    return Result;
1585
0
  }
1586
};
1587
} // namespace llvm