Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements PGO instrumentation using a minimum spanning tree based
10
// on the following paper:
11
//   [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12
//   for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13
//   Issue 3, pp 313-322
14
// The idea of the algorithm based on the fact that for each node (except for
15
// the entry and exit), the sum of incoming edge counts equals the sum of
16
// outgoing edge counts. The count of edge on spanning tree can be derived from
17
// those edges not on the spanning tree. Knuth proves this method instruments
18
// the minimum number of edges.
19
//
20
// The minimal spanning tree here is actually a maximum weight tree -- on-tree
21
// edges have higher frequencies (more likely to execute). The idea is to
22
// instrument those less frequently executed edges to reduce the runtime
23
// overhead of instrumented binaries.
24
//
25
// This file contains two passes:
26
// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27
// count profile, and generates the instrumentation for indirect call
28
// profiling.
29
// (2) Pass PGOInstrumentationUse which reads the edge count profile and
30
// annotates the branch weights. It also reads the indirect call value
31
// profiling records and annotate the indirect call instructions.
32
//
33
// To get the precise counter information, These two passes need to invoke at
34
// the same compilation point (so they see the same IR). For pass
35
// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36
// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37
// the profile is opened in module level and passed to each PGOUseFunc instance.
38
// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39
// in class FuncPGOInstrumentation.
40
//
41
// Class PGOEdge represents a CFG edge and some auxiliary information. Class
42
// BBInfo contains auxiliary information for each BB. These two classes are used
43
// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44
// class of PGOEdge and BBInfo, respectively. They contains extra data structure
45
// used in populating profile counters.
46
// The MST implementation is in Class CFGMST (CFGMST.h).
47
//
48
//===----------------------------------------------------------------------===//
49
50
#include "CFGMST.h"
51
#include "llvm/ADT/APInt.h"
52
#include "llvm/ADT/ArrayRef.h"
53
#include "llvm/ADT/STLExtras.h"
54
#include "llvm/ADT/SmallVector.h"
55
#include "llvm/ADT/Statistic.h"
56
#include "llvm/ADT/StringRef.h"
57
#include "llvm/ADT/Triple.h"
58
#include "llvm/ADT/Twine.h"
59
#include "llvm/ADT/iterator.h"
60
#include "llvm/ADT/iterator_range.h"
61
#include "llvm/Analysis/BlockFrequencyInfo.h"
62
#include "llvm/Analysis/BranchProbabilityInfo.h"
63
#include "llvm/Analysis/CFG.h"
64
#include "llvm/Analysis/IndirectCallVisitor.h"
65
#include "llvm/Analysis/LoopInfo.h"
66
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
67
#include "llvm/Analysis/ProfileSummaryInfo.h"
68
#include "llvm/IR/Attributes.h"
69
#include "llvm/IR/BasicBlock.h"
70
#include "llvm/IR/CFG.h"
71
#include "llvm/IR/CallSite.h"
72
#include "llvm/IR/Comdat.h"
73
#include "llvm/IR/Constant.h"
74
#include "llvm/IR/Constants.h"
75
#include "llvm/IR/DiagnosticInfo.h"
76
#include "llvm/IR/Dominators.h"
77
#include "llvm/IR/Function.h"
78
#include "llvm/IR/GlobalAlias.h"
79
#include "llvm/IR/GlobalValue.h"
80
#include "llvm/IR/GlobalVariable.h"
81
#include "llvm/IR/IRBuilder.h"
82
#include "llvm/IR/InstVisitor.h"
83
#include "llvm/IR/InstrTypes.h"
84
#include "llvm/IR/Instruction.h"
85
#include "llvm/IR/Instructions.h"
86
#include "llvm/IR/IntrinsicInst.h"
87
#include "llvm/IR/Intrinsics.h"
88
#include "llvm/IR/LLVMContext.h"
89
#include "llvm/IR/MDBuilder.h"
90
#include "llvm/IR/Module.h"
91
#include "llvm/IR/PassManager.h"
92
#include "llvm/IR/ProfileSummary.h"
93
#include "llvm/IR/Type.h"
94
#include "llvm/IR/Value.h"
95
#include "llvm/Pass.h"
96
#include "llvm/ProfileData/InstrProf.h"
97
#include "llvm/ProfileData/InstrProfReader.h"
98
#include "llvm/Support/BranchProbability.h"
99
#include "llvm/Support/Casting.h"
100
#include "llvm/Support/CommandLine.h"
101
#include "llvm/Support/DOTGraphTraits.h"
102
#include "llvm/Support/Debug.h"
103
#include "llvm/Support/Error.h"
104
#include "llvm/Support/ErrorHandling.h"
105
#include "llvm/Support/GraphWriter.h"
106
#include "llvm/Support/JamCRC.h"
107
#include "llvm/Support/raw_ostream.h"
108
#include "llvm/Transforms/Instrumentation.h"
109
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
110
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
111
#include <algorithm>
112
#include <cassert>
113
#include <cstdint>
114
#include <memory>
115
#include <numeric>
116
#include <string>
117
#include <unordered_map>
118
#include <utility>
119
#include <vector>
120
121
using namespace llvm;
122
using ProfileCount = Function::ProfileCount;
123
124
4
#define DEBUG_TYPE "pgo-instrumentation"
125
126
STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
127
STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
128
STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
129
STATISTIC(NumOfPGOEdge, "Number of edges.");
130
STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
131
STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
132
STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
133
STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
134
STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
135
STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
136
STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
137
STATISTIC(NumOfCSPGOSelectInsts,
138
          "Number of select instruction instrumented in CSPGO.");
139
STATISTIC(NumOfCSPGOMemIntrinsics,
140
          "Number of mem intrinsics instrumented in CSPGO.");
141
STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
142
STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
143
STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
144
STATISTIC(NumOfCSPGOFunc,
145
          "Number of functions having valid profile counts in CSPGO.");
146
STATISTIC(NumOfCSPGOMismatch,
147
          "Number of functions having mismatch profile in CSPGO.");
148
STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
149
150
// Command line option to specify the file to read profile from. This is
151
// mainly used for testing.
152
static cl::opt<std::string>
153
    PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
154
                       cl::value_desc("filename"),
155
                       cl::desc("Specify the path of profile data file. This is"
156
                                "mainly for test purpose."));
157
static cl::opt<std::string> PGOTestProfileRemappingFile(
158
    "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
159
    cl::value_desc("filename"),
160
    cl::desc("Specify the path of profile remapping file. This is mainly for "
161
             "test purpose."));
162
163
// Command line option to disable value profiling. The default is false:
164
// i.e. value profiling is enabled by default. This is for debug purpose.
165
static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
166
                                           cl::Hidden,
167
                                           cl::desc("Disable Value Profiling"));
168
169
// Command line option to set the maximum number of VP annotations to write to
170
// the metadata for a single indirect call callsite.
171
static cl::opt<unsigned> MaxNumAnnotations(
172
    "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore,
173
    cl::desc("Max number of annotations for a single indirect "
174
             "call callsite"));
175
176
// Command line option to set the maximum number of value annotations
177
// to write to the metadata for a single memop intrinsic.
178
static cl::opt<unsigned> MaxNumMemOPAnnotations(
179
    "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore,
180
    cl::desc("Max number of preicise value annotations for a single memop"
181
             "intrinsic"));
182
183
// Command line option to control appending FunctionHash to the name of a COMDAT
184
// function. This is to avoid the hash mismatch caused by the preinliner.
185
static cl::opt<bool> DoComdatRenaming(
186
    "do-comdat-renaming", cl::init(false), cl::Hidden,
187
    cl::desc("Append function hash to the name of COMDAT function to avoid "
188
             "function hash mismatch due to the preinliner"));
189
190
// Command line option to enable/disable the warning about missing profile
191
// information.
192
static cl::opt<bool>
193
    PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden,
194
                   cl::desc("Use this option to turn on/off "
195
                            "warnings about missing profile data for "
196
                            "functions."));
197
198
// Command line option to enable/disable the warning about a hash mismatch in
199
// the profile data.
200
static cl::opt<bool>
201
    NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
202
                      cl::desc("Use this option to turn off/on "
203
                               "warnings about profile cfg mismatch."));
204
205
// Command line option to enable/disable the warning about a hash mismatch in
206
// the profile data for Comdat functions, which often turns out to be false
207
// positive due to the pre-instrumentation inline.
208
static cl::opt<bool>
209
    NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true),
210
                            cl::Hidden,
211
                            cl::desc("The option is used to turn on/off "
212
                                     "warnings about hash mismatch for comdat "
213
                                     "functions."));
214
215
// Command line option to enable/disable select instruction instrumentation.
216
static cl::opt<bool>
217
    PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
218
                   cl::desc("Use this option to turn on/off SELECT "
219
                            "instruction instrumentation. "));
220
221
// Command line option to turn on CFG dot or text dump of raw profile counts
222
static cl::opt<PGOViewCountsType> PGOViewRawCounts(
223
    "pgo-view-raw-counts", cl::Hidden,
224
    cl::desc("A boolean option to show CFG dag or text "
225
             "with raw profile counts from "
226
             "profile data. See also option "
227
             "-pgo-view-counts. To limit graph "
228
             "display to only one function, use "
229
             "filtering option -view-bfi-func-name."),
230
    cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
231
               clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
232
               clEnumValN(PGOVCT_Text, "text", "show in text.")));
233
234
// Command line option to enable/disable memop intrinsic call.size profiling.
235
static cl::opt<bool>
236
    PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
237
                  cl::desc("Use this option to turn on/off "
238
                           "memory intrinsic size profiling."));
239
240
// Emit branch probability as optimization remarks.
241
static cl::opt<bool>
242
    EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
243
                          cl::desc("When this option is on, the annotated "
244
                                   "branch probability will be emitted as "
245
                                   "optimization remarks: -{Rpass|"
246
                                   "pass-remarks}=pgo-instrumentation"));
247
248
// Command line option to turn on CFG dot dump after profile annotation.
249
// Defined in Analysis/BlockFrequencyInfo.cpp:  -pgo-view-counts
250
extern cl::opt<PGOViewCountsType> PGOViewCounts;
251
252
// Command line option to specify the name of the function for CFG dump
253
// Defined in Analysis/BlockFrequencyInfo.cpp:  -view-bfi-func-name=
254
extern cl::opt<std::string> ViewBlockFreqFuncName;
255
256
// Return a string describing the branch condition that can be
257
// used in static branch probability heuristics:
258
4
static std::string getBranchCondString(Instruction *TI) {
259
4
  BranchInst *BI = dyn_cast<BranchInst>(TI);
260
4
  if (!BI || !BI->isConditional())
261
0
    return std::string();
262
4
263
4
  Value *Cond = BI->getCondition();
264
4
  ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
265
4
  if (!CI)
266
0
    return std::string();
267
4
268
4
  std::string result;
269
4
  raw_string_ostream OS(result);
270
4
  OS << CmpInst::getPredicateName(CI->getPredicate()) << "_";
271
4
  CI->getOperand(0)->getType()->print(OS, true);
272
4
273
4
  Value *RHS = CI->getOperand(1);
274
4
  ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
275
4
  if (CV) {
276
4
    if (CV->isZero())
277
4
      OS << "_Zero";
278
0
    else if (CV->isOne())
279
0
      OS << "_One";
280
0
    else if (CV->isMinusOne())
281
0
      OS << "_MinusOne";
282
0
    else
283
0
      OS << "_Const";
284
4
  }
285
4
  OS.flush();
286
4
  return result;
287
4
}
288
289
namespace {
290
291
/// The select instruction visitor plays three roles specified
292
/// by the mode. In \c VM_counting mode, it simply counts the number of
293
/// select instructions. In \c VM_instrument mode, it inserts code to count
294
/// the number times TrueValue of select is taken. In \c VM_annotate mode,
295
/// it reads the profile data and annotate the select instruction with metadata.
296
enum VisitMode { VM_counting, VM_instrument, VM_annotate };
297
class PGOUseFunc;
298
299
/// Instruction Visitor class to visit select instructions.
300
struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
301
  Function &F;
302
  unsigned NSIs = 0;             // Number of select instructions instrumented.
303
  VisitMode Mode = VM_counting;  // Visiting mode.
304
  unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
305
  unsigned TotalNumCtrs = 0;     // Total number of counters
306
  GlobalVariable *FuncNameVar = nullptr;
307
  uint64_t FuncHash = 0;
308
  PGOUseFunc *UseFunc = nullptr;
309
310
198
  SelectInstVisitor(Function &Func) : F(Func) {}
311
312
198
  void countSelects(Function &Func) {
313
198
    NSIs = 0;
314
198
    Mode = VM_counting;
315
198
    visit(Func);
316
198
  }
317
318
  // Visit the IR stream and instrument all select instructions. \p
319
  // Ind is a pointer to the counter index variable; \p TotalNC
320
  // is the total number of counters; \p FNV is the pointer to the
321
  // PGO function name var; \p FHash is the function hash.
322
  void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC,
323
101
                         GlobalVariable *FNV, uint64_t FHash) {
324
101
    Mode = VM_instrument;
325
101
    CurCtrIdx = Ind;
326
101
    TotalNumCtrs = TotalNC;
327
101
    FuncHash = FHash;
328
101
    FuncNameVar = FNV;
329
101
    visit(Func);
330
101
  }
331
332
  // Visit the IR stream and annotate all select instructions.
333
72
  void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) {
334
72
    Mode = VM_annotate;
335
72
    UseFunc = UF;
336
72
    CurCtrIdx = Ind;
337
72
    visit(Func);
338
72
  }
339
340
  void instrumentOneSelectInst(SelectInst &SI);
341
  void annotateOneSelectInst(SelectInst &SI);
342
343
  // Visit \p SI instruction and perform tasks according to visit mode.
344
  void visitSelectInst(SelectInst &SI);
345
346
  // Return the number of select instructions. This needs be called after
347
  // countSelects().
348
578
  unsigned getNumOfSelectInsts() const { return NSIs; }
349
};
350
351
/// Instruction Visitor class to visit memory intrinsic calls.
352
struct MemIntrinsicVisitor : public InstVisitor<MemIntrinsicVisitor> {
353
  Function &F;
354
  unsigned NMemIs = 0;          // Number of memIntrinsics instrumented.
355
  VisitMode Mode = VM_counting; // Visiting mode.
356
  unsigned CurCtrId = 0;        // Current counter index.
357
  unsigned TotalNumCtrs = 0;    // Total number of counters
358
  GlobalVariable *FuncNameVar = nullptr;
359
  uint64_t FuncHash = 0;
360
  PGOUseFunc *UseFunc = nullptr;
361
  std::vector<Instruction *> Candidates;
362
363
198
  MemIntrinsicVisitor(Function &Func) : F(Func) {}
364
365
198
  void countMemIntrinsics(Function &Func) {
366
198
    NMemIs = 0;
367
198
    Mode = VM_counting;
368
198
    visit(Func);
369
198
  }
370
371
  void instrumentMemIntrinsics(Function &Func, unsigned TotalNC,
372
101
                               GlobalVariable *FNV, uint64_t FHash) {
373
101
    Mode = VM_instrument;
374
101
    TotalNumCtrs = TotalNC;
375
101
    FuncHash = FHash;
376
101
    FuncNameVar = FNV;
377
101
    visit(Func);
378
101
  }
379
380
198
  std::vector<Instruction *> findMemIntrinsics(Function &Func) {
381
198
    Candidates.clear();
382
198
    Mode = VM_annotate;
383
198
    visit(Func);
384
198
    return Candidates;
385
198
  }
386
387
  // Visit the IR stream and annotate all mem intrinsic call instructions.
388
  void instrumentOneMemIntrinsic(MemIntrinsic &MI);
389
390
  // Visit \p MI instruction and perform tasks according to visit mode.
391
  void visitMemIntrinsic(MemIntrinsic &SI);
392
393
198
  unsigned getNumOfMemIntrinsics() const { return NMemIs; }
394
};
395
396
class PGOInstrumentationGenLegacyPass : public ModulePass {
397
public:
398
  static char ID;
399
400
  PGOInstrumentationGenLegacyPass(bool IsCS = false)
401
42
      : ModulePass(ID), IsCS(IsCS) {
402
42
    initializePGOInstrumentationGenLegacyPassPass(
403
42
        *PassRegistry::getPassRegistry());
404
42
  }
405
406
6
  StringRef getPassName() const override { return "PGOInstrumentationGenPass"; }
407
408
private:
409
  // Is this is context-sensitive instrumentation.
410
  bool IsCS;
411
  bool runOnModule(Module &M) override;
412
413
42
  void getAnalysisUsage(AnalysisUsage &AU) const override {
414
42
    AU.addRequired<BlockFrequencyInfoWrapperPass>();
415
42
  }
416
};
417
418
class PGOInstrumentationUseLegacyPass : public ModulePass {
419
public:
420
  static char ID;
421
422
  // Provide the profile filename as the parameter.
423
  PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false)
424
53
      : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) {
425
53
    if (!PGOTestProfileFile.empty())
426
29
      ProfileFileName = PGOTestProfileFile;
427
53
    initializePGOInstrumentationUseLegacyPassPass(
428
53
        *PassRegistry::getPassRegistry());
429
53
  }
430
431
18
  StringRef getPassName() const override { return "PGOInstrumentationUsePass"; }
432
433
private:
434
  std::string ProfileFileName;
435
  // Is this is context-sensitive instrumentation use.
436
  bool IsCS;
437
438
  bool runOnModule(Module &M) override;
439
440
53
  void getAnalysisUsage(AnalysisUsage &AU) const override {
441
53
    AU.addRequired<ProfileSummaryInfoWrapperPass>();
442
53
    AU.addRequired<BlockFrequencyInfoWrapperPass>();
443
53
  }
444
};
445
446
class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass {
447
public:
448
  static char ID;
449
6
  StringRef getPassName() const override {
450
6
    return "PGOInstrumentationGenCreateVarPass";
451
6
  }
452
  PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "")
453
6
      : ModulePass(ID), InstrProfileOutput(CSInstrName) {
454
6
    initializePGOInstrumentationGenCreateVarLegacyPassPass(
455
6
        *PassRegistry::getPassRegistry());
456
6
  }
457
458
private:
459
6
  bool runOnModule(Module &M) override {
460
6
    createProfileFileNameVar(M, InstrProfileOutput);
461
6
    createIRLevelProfileFlagVar(M, true);
462
6
    return false;
463
6
  }
464
  std::string InstrProfileOutput;
465
};
466
467
} // end anonymous namespace
468
469
char PGOInstrumentationGenLegacyPass::ID = 0;
470
471
11.0k
INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
472
11.0k
                      "PGO instrumentation.", false, false)
473
11.0k
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
474
11.0k
INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
475
11.0k
INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
476
                    "PGO instrumentation.", false, false)
477
478
13
ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) {
479
13
  return new PGOInstrumentationGenLegacyPass(IsCS);
480
13
}
481
482
char PGOInstrumentationUseLegacyPass::ID = 0;
483
484
11.0k
INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
485
11.0k
                      "Read PGO instrumentation profile.", false, false)
486
11.0k
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
487
11.0k
INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
488
11.0k
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
489
11.0k
INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
490
                    "Read PGO instrumentation profile.", false, false)
491
492
ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename,
493
24
                                                        bool IsCS) {
494
24
  return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS);
495
24
}
496
497
char PGOInstrumentationGenCreateVarLegacyPass::ID = 0;
498
499
INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass,
500
                "pgo-instr-gen-create-var",
501
                "Create PGO instrumentation version variable for CSPGO.", false,
502
                false)
503
504
ModulePass *
505
6
llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) {
506
6
  return new PGOInstrumentationGenCreateVarLegacyPass(CSInstrName);
507
6
}
508
509
namespace {
510
511
/// An MST based instrumentation for PGO
512
///
513
/// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO
514
/// in the function level.
515
struct PGOEdge {
516
  // This class implements the CFG edges. Note the CFG can be a multi-graph.
517
  // So there might be multiple edges with same SrcBB and DestBB.
518
  const BasicBlock *SrcBB;
519
  const BasicBlock *DestBB;
520
  uint64_t Weight;
521
  bool InMST = false;
522
  bool Removed = false;
523
  bool IsCritical = false;
524
525
  PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
526
1.26k
      : SrcBB(Src), DestBB(Dest), Weight(W) {}
527
528
  // Return the information string of an edge.
529
0
  const std::string infoString() const {
530
0
    return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
531
0
            (IsCritical ? "c" : " ") + "  W=" + Twine(Weight)).str();
532
0
  }
533
};
534
535
// This class stores the auxiliary information for each BB.
536
struct BBInfo {
537
  BBInfo *Group;
538
  uint32_t Index;
539
  uint32_t Rank = 0;
540
541
992
  BBInfo(unsigned IX) : Group(this), Index(IX) {}
542
543
  // Return the information string of this object.
544
0
  const std::string infoString() const {
545
0
    return (Twine("Index=") + Twine(Index)).str();
546
0
  }
547
548
  // Empty function -- only applicable to UseBBInfo.
549
605
  void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
550
551
  // Empty function -- only applicable to UseBBInfo.
552
605
  void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
553
};
554
555
// This class implements the CFG edges. Note the CFG can be a multi-graph.
556
template <class Edge, class BBInfo> class FuncPGOInstrumentation {
557
private:
558
  Function &F;
559
560
  // Is this is context-sensitive instrumentation.
561
  bool IsCS;
562
563
  // A map that stores the Comdat group in function F.
564
  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
565
566
  void computeCFGHash();
567
  void renameComdatFunction();
568
569
public:
570
  std::vector<std::vector<Instruction *>> ValueSites;
571
  SelectInstVisitor SIVisitor;
572
  MemIntrinsicVisitor MIVisitor;
573
  std::string FuncName;
574
  GlobalVariable *FuncNameVar;
575
576
  // CFG hash value for this function.
577
  uint64_t FunctionHash = 0;
578
579
  // The Minimum Spanning Tree of function CFG.
580
  CFGMST<Edge, BBInfo> MST;
581
582
  // Collect all the BBs that will be instrumented, and store them in
583
  // InstrumentBBs.
584
  void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
585
586
  // Give an edge, find the BB that will be instrumented.
587
  // Return nullptr if there is no BB to be instrumented.
588
  BasicBlock *getInstrBB(Edge *E);
589
590
  // Return the auxiliary BB information.
591
4.53k
  BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOEdge, (anonymous namespace)::BBInfo>::getBBInfo(llvm::BasicBlock const*) const
Line
Count
Source
591
1.21k
  BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOUseEdge, (anonymous namespace)::UseBBInfo>::getBBInfo(llvm::BasicBlock const*) const
Line
Count
Source
591
3.32k
  BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
592
593
  // Return the auxiliary BB information if available.
594
2.49k
  BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOUseEdge, (anonymous namespace)::UseBBInfo>::findBBInfo(llvm::BasicBlock const*) const
Line
Count
Source
594
2.11k
  BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOEdge, (anonymous namespace)::BBInfo>::findBBInfo(llvm::BasicBlock const*) const
Line
Count
Source
594
378
  BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
595
596
  // Dump edges and BB information.
597
0
  void dumpInfo(std::string Str = "") const {
598
0
    MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " +
599
0
                              Twine(FunctionHash) + "\t" + Str);
600
0
  }
601
602
  FuncPGOInstrumentation(
603
      Function &Func,
604
      std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
605
      bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
606
      BlockFrequencyInfo *BFI = nullptr, bool IsCS = false)
607
      : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers),
608
        ValueSites(IPVK_Last + 1), SIVisitor(Func), MIVisitor(Func),
609
198
        MST(F, BPI, BFI) {
610
198
    // This should be done before CFG hash computation.
611
198
    SIVisitor.countSelects(Func);
612
198
    MIVisitor.countMemIntrinsics(Func);
613
198
    if (!IsCS) {
614
186
      NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
615
186
      NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
616
186
      NumOfPGOBB += MST.BBInfos.size();
617
186
      ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func);
618
186
    } else {
619
12
      NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
620
12
      NumOfCSPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
621
12
      NumOfCSPGOBB += MST.BBInfos.size();
622
12
    }
623
198
    ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func);
624
198
625
198
    FuncName = getPGOFuncName(F);
626
198
    computeCFGHash();
627
198
    if (!ComdatMembers.empty())
628
24
      renameComdatFunction();
629
198
    LLVM_DEBUG(dumpInfo("after CFGMST"));
630
198
631
1.21k
    for (auto &E : MST.AllEdges) {
632
1.21k
      if (E->Removed)
633
0
        continue;
634
1.21k
      IsCS ? 
NumOfCSPGOEdge++96
:
NumOfPGOEdge++1.12k
;
635
1.21k
      if (!E->InMST)
636
448
        IsCS ? 
NumOfCSPGOInstrument++33
:
NumOfPGOInstrument++415
;
637
1.21k
    }
638
198
639
198
    if (CreateGlobalVar)
640
101
      FuncNameVar = createPGOFuncNameVar(F, FuncName);
641
198
  }
PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOEdge, (anonymous namespace)::BBInfo>::FuncPGOInstrumentation(llvm::Function&, std::__1::unordered_multimap<llvm::Comdat*, llvm::GlobalValue*, std::__1::hash<llvm::Comdat*>, std::__1::equal_to<llvm::Comdat*>, std::__1::allocator<std::__1::pair<llvm::Comdat* const, llvm::GlobalValue*> > >&, bool, llvm::BranchProbabilityInfo*, llvm::BlockFrequencyInfo*, bool)
Line
Count
Source
609
101
        MST(F, BPI, BFI) {
610
101
    // This should be done before CFG hash computation.
611
101
    SIVisitor.countSelects(Func);
612
101
    MIVisitor.countMemIntrinsics(Func);
613
101
    if (!IsCS) {
614
97
      NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
615
97
      NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
616
97
      NumOfPGOBB += MST.BBInfos.size();
617
97
      ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func);
618
97
    } else {
619
4
      NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
620
4
      NumOfCSPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
621
4
      NumOfCSPGOBB += MST.BBInfos.size();
622
4
    }
623
101
    ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func);
624
101
625
101
    FuncName = getPGOFuncName(F);
626
101
    computeCFGHash();
627
101
    if (!ComdatMembers.empty())
628
24
      renameComdatFunction();
629
101
    LLVM_DEBUG(dumpInfo("after CFGMST"));
630
101
631
587
    for (auto &E : MST.AllEdges) {
632
587
      if (E->Removed)
633
0
        continue;
634
587
      IsCS ? 
NumOfCSPGOEdge++23
:
NumOfPGOEdge++564
;
635
587
      if (!E->InMST)
636
221
        IsCS ? 
NumOfCSPGOInstrument++8
:
NumOfPGOInstrument++213
;
637
587
    }
638
101
639
101
    if (CreateGlobalVar)
640
101
      FuncNameVar = createPGOFuncNameVar(F, FuncName);
641
101
  }
PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOUseEdge, (anonymous namespace)::UseBBInfo>::FuncPGOInstrumentation(llvm::Function&, std::__1::unordered_multimap<llvm::Comdat*, llvm::GlobalValue*, std::__1::hash<llvm::Comdat*>, std::__1::equal_to<llvm::Comdat*>, std::__1::allocator<std::__1::pair<llvm::Comdat* const, llvm::GlobalValue*> > >&, bool, llvm::BranchProbabilityInfo*, llvm::BlockFrequencyInfo*, bool)
Line
Count
Source
609
97
        MST(F, BPI, BFI) {
610
97
    // This should be done before CFG hash computation.
611
97
    SIVisitor.countSelects(Func);
612
97
    MIVisitor.countMemIntrinsics(Func);
613
97
    if (!IsCS) {
614
89
      NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
615
89
      NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
616
89
      NumOfPGOBB += MST.BBInfos.size();
617
89
      ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func);
618
89
    } else {
619
8
      NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
620
8
      NumOfCSPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
621
8
      NumOfCSPGOBB += MST.BBInfos.size();
622
8
    }
623
97
    ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func);
624
97
625
97
    FuncName = getPGOFuncName(F);
626
97
    computeCFGHash();
627
97
    if (!ComdatMembers.empty())
628
0
      renameComdatFunction();
629
97
    LLVM_DEBUG(dumpInfo("after CFGMST"));
630
97
631
631
    for (auto &E : MST.AllEdges) {
632
631
      if (E->Removed)
633
0
        continue;
634
631
      IsCS ? 
NumOfCSPGOEdge++73
:
NumOfPGOEdge++558
;
635
631
      if (!E->InMST)
636
227
        IsCS ? 
NumOfCSPGOInstrument++25
:
NumOfPGOInstrument++202
;
637
631
    }
638
97
639
97
    if (CreateGlobalVar)
640
0
      FuncNameVar = createPGOFuncNameVar(F, FuncName);
641
97
  }
642
};
643
644
} // end anonymous namespace
645
646
// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
647
// value of each BB in the CFG. The higher 32 bits record the number of edges.
648
template <class Edge, class BBInfo>
649
198
void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
650
198
  std::vector<char> Indexes;
651
198
  JamCRC JC;
652
774
  for (auto &BB : F) {
653
774
    const Instruction *TI = BB.getTerminator();
654
1.58k
    for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; 
++I809
) {
655
809
      BasicBlock *Succ = TI->getSuccessor(I);
656
809
      auto BI = findBBInfo(Succ);
657
809
      if (BI == nullptr)
658
0
        continue;
659
809
      uint32_t Index = BI->Index;
660
4.04k
      for (int J = 0; J < 4; 
J++3.23k
)
661
3.23k
        Indexes.push_back((char)(Index >> (J * 8)));
662
809
    }
663
774
  }
664
198
  JC.update(Indexes);
665
198
666
198
  // Hash format for context sensitive profile. Reserve 4 bits for other
667
198
  // information.
668
198
  FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
669
198
                 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
670
198
                 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 |
671
198
                 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
672
198
  // Reserve bit 60-63 for other information purpose.
673
198
  FunctionHash &= 0x0FFFFFFFFFFFFFFF;
674
198
  if (IsCS)
675
12
    NamedInstrProfRecord::setCSFlagInHash(FunctionHash);
676
198
  LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
677
198
                    << " CRC = " << JC.getCRC()
678
198
                    << ", Selects = " << SIVisitor.getNumOfSelectInsts()
679
198
                    << ", Edges = " << MST.AllEdges.size() << ", ICSites = "
680
198
                    << ValueSites[IPVK_IndirectCallTarget].size()
681
198
                    << ", Hash = " << FunctionHash << "\n";);
682
198
}
PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOEdge, (anonymous namespace)::BBInfo>::computeCFGHash()
Line
Count
Source
649
101
void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
650
101
  std::vector<char> Indexes;
651
101
  JamCRC JC;
652
368
  for (auto &BB : F) {
653
368
    const Instruction *TI = BB.getTerminator();
654
746
    for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; 
++I378
) {
655
378
      BasicBlock *Succ = TI->getSuccessor(I);
656
378
      auto BI = findBBInfo(Succ);
657
378
      if (BI == nullptr)
658
0
        continue;
659
378
      uint32_t Index = BI->Index;
660
1.89k
      for (int J = 0; J < 4; 
J++1.51k
)
661
1.51k
        Indexes.push_back((char)(Index >> (J * 8)));
662
378
    }
663
368
  }
664
101
  JC.update(Indexes);
665
101
666
101
  // Hash format for context sensitive profile. Reserve 4 bits for other
667
101
  // information.
668
101
  FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
669
101
                 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
670
101
                 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 |
671
101
                 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
672
101
  // Reserve bit 60-63 for other information purpose.
673
101
  FunctionHash &= 0x0FFFFFFFFFFFFFFF;
674
101
  if (IsCS)
675
4
    NamedInstrProfRecord::setCSFlagInHash(FunctionHash);
676
101
  LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
677
101
                    << " CRC = " << JC.getCRC()
678
101
                    << ", Selects = " << SIVisitor.getNumOfSelectInsts()
679
101
                    << ", Edges = " << MST.AllEdges.size() << ", ICSites = "
680
101
                    << ValueSites[IPVK_IndirectCallTarget].size()
681
101
                    << ", Hash = " << FunctionHash << "\n";);
682
101
}
PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOUseEdge, (anonymous namespace)::UseBBInfo>::computeCFGHash()
Line
Count
Source
649
97
void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
650
97
  std::vector<char> Indexes;
651
97
  JamCRC JC;
652
406
  for (auto &BB : F) {
653
406
    const Instruction *TI = BB.getTerminator();
654
837
    for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; 
++I431
) {
655
431
      BasicBlock *Succ = TI->getSuccessor(I);
656
431
      auto BI = findBBInfo(Succ);
657
431
      if (BI == nullptr)
658
0
        continue;
659
431
      uint32_t Index = BI->Index;
660
2.15k
      for (int J = 0; J < 4; 
J++1.72k
)
661
1.72k
        Indexes.push_back((char)(Index >> (J * 8)));
662
431
    }
663
406
  }
664
97
  JC.update(Indexes);
665
97
666
97
  // Hash format for context sensitive profile. Reserve 4 bits for other
667
97
  // information.
668
97
  FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
669
97
                 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
670
97
                 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 |
671
97
                 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
672
97
  // Reserve bit 60-63 for other information purpose.
673
97
  FunctionHash &= 0x0FFFFFFFFFFFFFFF;
674
97
  if (IsCS)
675
8
    NamedInstrProfRecord::setCSFlagInHash(FunctionHash);
676
97
  LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
677
97
                    << " CRC = " << JC.getCRC()
678
97
                    << ", Selects = " << SIVisitor.getNumOfSelectInsts()
679
97
                    << ", Edges = " << MST.AllEdges.size() << ", ICSites = "
680
97
                    << ValueSites[IPVK_IndirectCallTarget].size()
681
97
                    << ", Hash = " << FunctionHash << "\n";);
682
97
}
683
684
// Check if we can safely rename this Comdat function.
685
static bool canRenameComdat(
686
    Function &F,
687
24
    std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
688
24
  if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
689
4
    return false;
690
20
691
20
  // FIXME: Current only handle those Comdat groups that only containing one
692
20
  // function and function aliases.
693
20
  // (1) For a Comdat group containing multiple functions, we need to have a
694
20
  // unique postfix based on the hashes for each function. There is a
695
20
  // non-trivial code refactoring to do this efficiently.
696
20
  // (2) Variables can not be renamed, so we can not rename Comdat function in a
697
20
  // group including global vars.
698
20
  Comdat *C = F.getComdat();
699
24
  for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
700
24
    if (dyn_cast<GlobalAlias>(CM.second))
701
0
      continue;
702
24
    Function *FM = dyn_cast<Function>(CM.second);
703
24
    if (FM != &F)
704
12
      return false;
705
24
  }
706
20
  
return true8
;
707
20
}
708
709
// Append the CFGHash to the Comdat function name.
710
template <class Edge, class BBInfo>
711
24
void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
712
24
  if (!canRenameComdat(F, ComdatMembers))
713
16
    return;
714
8
  std::string OrigName = F.getName().str();
715
8
  std::string NewFuncName =
716
8
      Twine(F.getName() + "." + Twine(FunctionHash)).str();
717
8
  F.setName(Twine(NewFuncName));
718
8
  GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F);
719
8
  FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
720
8
  Comdat *NewComdat;
721
8
  Module *M = F.getParent();
722
8
  // For AvailableExternallyLinkage functions, change the linkage to
723
8
  // LinkOnceODR and put them into comdat. This is because after renaming, there
724
8
  // is no backup external copy available for the function.
725
8
  if (!F.hasComdat()) {
726
4
    assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage);
727
4
    NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
728
4
    F.setLinkage(GlobalValue::LinkOnceODRLinkage);
729
4
    F.setComdat(NewComdat);
730
4
    return;
731
4
  }
732
4
733
4
  // This function belongs to a single function Comdat group.
734
4
  Comdat *OrigComdat = F.getComdat();
735
4
  std::string NewComdatName =
736
4
      Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
737
4
  NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
738
4
  NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
739
4
740
4
  for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
741
4
    if (GlobalAlias *GA = dyn_cast<GlobalAlias>(CM.second)) {
742
0
      // For aliases, change the name directly.
743
0
      assert(dyn_cast<Function>(GA->getAliasee()->stripPointerCasts()) == &F);
744
0
      std::string OrigGAName = GA->getName().str();
745
0
      GA->setName(Twine(GA->getName() + "." + Twine(FunctionHash)));
746
0
      GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigGAName, GA);
747
0
      continue;
748
0
    }
749
4
    // Must be a function.
750
4
    Function *CF = dyn_cast<Function>(CM.second);
751
4
    assert(CF);
752
4
    CF->setComdat(NewComdat);
753
4
  }
754
4
}
PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOEdge, (anonymous namespace)::BBInfo>::renameComdatFunction()
Line
Count
Source
711
24
void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
712
24
  if (!canRenameComdat(F, ComdatMembers))
713
16
    return;
714
8
  std::string OrigName = F.getName().str();
715
8
  std::string NewFuncName =
716
8
      Twine(F.getName() + "." + Twine(FunctionHash)).str();
717
8
  F.setName(Twine(NewFuncName));
718
8
  GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F);
719
8
  FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
720
8
  Comdat *NewComdat;
721
8
  Module *M = F.getParent();
722
8
  // For AvailableExternallyLinkage functions, change the linkage to
723
8
  // LinkOnceODR and put them into comdat. This is because after renaming, there
724
8
  // is no backup external copy available for the function.
725
8
  if (!F.hasComdat()) {
726
4
    assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage);
727
4
    NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
728
4
    F.setLinkage(GlobalValue::LinkOnceODRLinkage);
729
4
    F.setComdat(NewComdat);
730
4
    return;
731
4
  }
732
4
733
4
  // This function belongs to a single function Comdat group.
734
4
  Comdat *OrigComdat = F.getComdat();
735
4
  std::string NewComdatName =
736
4
      Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
737
4
  NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
738
4
  NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
739
4
740
4
  for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
741
4
    if (GlobalAlias *GA = dyn_cast<GlobalAlias>(CM.second)) {
742
0
      // For aliases, change the name directly.
743
0
      assert(dyn_cast<Function>(GA->getAliasee()->stripPointerCasts()) == &F);
744
0
      std::string OrigGAName = GA->getName().str();
745
0
      GA->setName(Twine(GA->getName() + "." + Twine(FunctionHash)));
746
0
      GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigGAName, GA);
747
0
      continue;
748
0
    }
749
4
    // Must be a function.
750
4
    Function *CF = dyn_cast<Function>(CM.second);
751
4
    assert(CF);
752
4
    CF->setComdat(NewComdat);
753
4
  }
754
4
}
Unexecuted instantiation: PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOUseEdge, (anonymous namespace)::UseBBInfo>::renameComdatFunction()
755
756
// Collect all the BBs that will be instruments and return them in
757
// InstrumentBBs and setup InEdges/OutEdge for UseBBInfo.
758
template <class Edge, class BBInfo>
759
void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
760
182
    std::vector<BasicBlock *> &InstrumentBBs) {
761
182
  // Use a worklist as we will update the vector during the iteration.
762
182
  std::vector<Edge *> EdgeList;
763
182
  EdgeList.reserve(MST.AllEdges.size());
764
182
  for (auto &E : MST.AllEdges)
765
1.18k
    EdgeList.push_back(E.get());
766
182
767
1.18k
  for (auto &E : EdgeList) {
768
1.18k
    BasicBlock *InstrBB = getInstrBB(E);
769
1.18k
    if (InstrBB)
770
424
      InstrumentBBs.push_back(InstrBB);
771
1.18k
  }
772
182
773
182
  // Set up InEdges/OutEdges for all BBs.
774
1.23k
  for (auto &E : MST.AllEdges) {
775
1.23k
    if (E->Removed)
776
22
      continue;
777
1.20k
    const BasicBlock *SrcBB = E->SrcBB;
778
1.20k
    const BasicBlock *DestBB = E->DestBB;
779
1.20k
    BBInfo &SrcInfo = getBBInfo(SrcBB);
780
1.20k
    BBInfo &DestInfo = getBBInfo(DestBB);
781
1.20k
    SrcInfo.addOutEdge(E.get());
782
1.20k
    DestInfo.addInEdge(E.get());
783
1.20k
  }
784
182
}
PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOEdge, (anonymous namespace)::BBInfo>::getInstrumentBBs(std::__1::vector<llvm::BasicBlock*, std::__1::allocator<llvm::BasicBlock*> >&)
Line
Count
Source
760
101
    std::vector<BasicBlock *> &InstrumentBBs) {
761
101
  // Use a worklist as we will update the vector during the iteration.
762
101
  std::vector<Edge *> EdgeList;
763
101
  EdgeList.reserve(MST.AllEdges.size());
764
101
  for (auto &E : MST.AllEdges)
765
587
    EdgeList.push_back(E.get());
766
101
767
587
  for (auto &E : EdgeList) {
768
587
    BasicBlock *InstrBB = getInstrBB(E);
769
587
    if (InstrBB)
770
217
      InstrumentBBs.push_back(InstrBB);
771
587
  }
772
101
773
101
  // Set up InEdges/OutEdges for all BBs.
774
623
  for (auto &E : MST.AllEdges) {
775
623
    if (E->Removed)
776
18
      continue;
777
605
    const BasicBlock *SrcBB = E->SrcBB;
778
605
    const BasicBlock *DestBB = E->DestBB;
779
605
    BBInfo &SrcInfo = getBBInfo(SrcBB);
780
605
    BBInfo &DestInfo = getBBInfo(DestBB);
781
605
    SrcInfo.addOutEdge(E.get());
782
605
    DestInfo.addInEdge(E.get());
783
605
  }
784
101
}
PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOUseEdge, (anonymous namespace)::UseBBInfo>::getInstrumentBBs(std::__1::vector<llvm::BasicBlock*, std::__1::allocator<llvm::BasicBlock*> >&)
Line
Count
Source
760
81
    std::vector<BasicBlock *> &InstrumentBBs) {
761
81
  // Use a worklist as we will update the vector during the iteration.
762
81
  std::vector<Edge *> EdgeList;
763
81
  EdgeList.reserve(MST.AllEdges.size());
764
81
  for (auto &E : MST.AllEdges)
765
599
    EdgeList.push_back(E.get());
766
81
767
599
  for (auto &E : EdgeList) {
768
599
    BasicBlock *InstrBB = getInstrBB(E);
769
599
    if (InstrBB)
770
207
      InstrumentBBs.push_back(InstrBB);
771
599
  }
772
81
773
81
  // Set up InEdges/OutEdges for all BBs.
774
607
  for (auto &E : MST.AllEdges) {
775
607
    if (E->Removed)
776
4
      continue;
777
603
    const BasicBlock *SrcBB = E->SrcBB;
778
603
    const BasicBlock *DestBB = E->DestBB;
779
603
    BBInfo &SrcInfo = getBBInfo(SrcBB);
780
603
    BBInfo &DestInfo = getBBInfo(DestBB);
781
603
    SrcInfo.addOutEdge(E.get());
782
603
    DestInfo.addInEdge(E.get());
783
603
  }
784
81
}
785
786
// Given a CFG E to be instrumented, find which BB to place the instrumented
787
// code. The function will split the critical edge if necessary.
788
template <class Edge, class BBInfo>
789
1.18k
BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
790
1.18k
  if (E->InMST || 
E->Removed432
)
791
754
    return nullptr;
792
432
793
432
  BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
794
432
  BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
795
432
  // For a fake edge, instrument the real BB.
796
432
  if (SrcBB == nullptr)
797
112
    return DestBB;
798
320
  if (DestBB == nullptr)
799
11
    return SrcBB;
800
309
801
309
  auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
802
305
    // There are basic blocks (such as catchswitch) cannot be instrumented.
803
305
    // If the returned first insertion point is the end of BB, skip this BB.
804
305
    if (BB->getFirstInsertionPt() == BB->end())
805
4
      return nullptr;
806
301
    return BB;
807
301
  };
PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOEdge, (anonymous namespace)::BBInfo>::getInstrBB((anonymous namespace)::PGOEdge*)::'lambda'(llvm::BasicBlock*)::operator()(llvm::BasicBlock*) const
Line
Count
Source
801
143
  auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
802
143
    // There are basic blocks (such as catchswitch) cannot be instrumented.
803
143
    // If the returned first insertion point is the end of BB, skip this BB.
804
143
    if (BB->getFirstInsertionPt() == BB->end())
805
2
      return nullptr;
806
141
    return BB;
807
141
  };
PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOUseEdge, (anonymous namespace)::UseBBInfo>::getInstrBB((anonymous namespace)::PGOUseEdge*)::'lambda'(llvm::BasicBlock*)::operator()(llvm::BasicBlock*) const
Line
Count
Source
801
162
  auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
802
162
    // There are basic blocks (such as catchswitch) cannot be instrumented.
803
162
    // If the returned first insertion point is the end of BB, skip this BB.
804
162
    if (BB->getFirstInsertionPt() == BB->end())
805
2
      return nullptr;
806
160
    return BB;
807
160
  };
808
309
809
309
  // Instrument the SrcBB if it has a single successor,
810
309
  // otherwise, the DestBB if this is not a critical edge.
811
309
  Instruction *TI = SrcBB->getTerminator();
812
309
  if (TI->getNumSuccessors() <= 1)
813
187
    return canInstrument(SrcBB);
814
122
  if (!E->IsCritical)
815
96
    return canInstrument(DestBB);
816
26
817
26
  unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
818
26
  BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
819
26
  if (!InstrBB) {
820
4
    LLVM_DEBUG(
821
4
        dbgs() << "Fail to split critical edge: not instrument this edge.\n");
822
4
    return nullptr;
823
4
  }
824
22
  // For a critical edge, we have to split. Instrument the newly
825
22
  // created BB.
826
22
  IsCS ? 
NumOfCSPGOSplit++0
: NumOfPGOSplit++;
827
22
  LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
828
22
                    << " --> " << getBBInfo(DestBB).Index << "\n");
829
22
  // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
830
22
  MST.addEdge(SrcBB, InstrBB, 0);
831
22
  // Second one: Add new edge of InstrBB->DestBB.
832
22
  Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
833
22
  NewEdge1.InMST = true;
834
22
  E->Removed = true;
835
22
836
22
  return canInstrument(InstrBB);
837
22
}
PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOEdge, (anonymous namespace)::BBInfo>::getInstrBB((anonymous namespace)::PGOEdge*)
Line
Count
Source
789
587
BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
790
587
  if (E->InMST || 
E->Removed221
)
791
366
    return nullptr;
792
221
793
221
  BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
794
221
  BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
795
221
  // For a fake edge, instrument the real BB.
796
221
  if (SrcBB == nullptr)
797
69
    return DestBB;
798
152
  if (DestBB == nullptr)
799
7
    return SrcBB;
800
145
801
145
  auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
802
145
    // There are basic blocks (such as catchswitch) cannot be instrumented.
803
145
    // If the returned first insertion point is the end of BB, skip this BB.
804
145
    if (BB->getFirstInsertionPt() == BB->end())
805
145
      return nullptr;
806
145
    return BB;
807
145
  };
808
145
809
145
  // Instrument the SrcBB if it has a single successor,
810
145
  // otherwise, the DestBB if this is not a critical edge.
811
145
  Instruction *TI = SrcBB->getTerminator();
812
145
  if (TI->getNumSuccessors() <= 1)
813
83
    return canInstrument(SrcBB);
814
62
  if (!E->IsCritical)
815
42
    return canInstrument(DestBB);
816
20
817
20
  unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
818
20
  BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
819
20
  if (!InstrBB) {
820
2
    LLVM_DEBUG(
821
2
        dbgs() << "Fail to split critical edge: not instrument this edge.\n");
822
2
    return nullptr;
823
2
  }
824
18
  // For a critical edge, we have to split. Instrument the newly
825
18
  // created BB.
826
18
  IsCS ? 
NumOfCSPGOSplit++0
: NumOfPGOSplit++;
827
18
  LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
828
18
                    << " --> " << getBBInfo(DestBB).Index << "\n");
829
18
  // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
830
18
  MST.addEdge(SrcBB, InstrBB, 0);
831
18
  // Second one: Add new edge of InstrBB->DestBB.
832
18
  Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
833
18
  NewEdge1.InMST = true;
834
18
  E->Removed = true;
835
18
836
18
  return canInstrument(InstrBB);
837
18
}
PGOInstrumentation.cpp:(anonymous namespace)::FuncPGOInstrumentation<(anonymous namespace)::PGOUseEdge, (anonymous namespace)::UseBBInfo>::getInstrBB((anonymous namespace)::PGOUseEdge*)
Line
Count
Source
789
599
BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
790
599
  if (E->InMST || 
E->Removed211
)
791
388
    return nullptr;
792
211
793
211
  BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
794
211
  BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
795
211
  // For a fake edge, instrument the real BB.
796
211
  if (SrcBB == nullptr)
797
43
    return DestBB;
798
168
  if (DestBB == nullptr)
799
4
    return SrcBB;
800
164
801
164
  auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
802
164
    // There are basic blocks (such as catchswitch) cannot be instrumented.
803
164
    // If the returned first insertion point is the end of BB, skip this BB.
804
164
    if (BB->getFirstInsertionPt() == BB->end())
805
164
      return nullptr;
806
164
    return BB;
807
164
  };
808
164
809
164
  // Instrument the SrcBB if it has a single successor,
810
164
  // otherwise, the DestBB if this is not a critical edge.
811
164
  Instruction *TI = SrcBB->getTerminator();
812
164
  if (TI->getNumSuccessors() <= 1)
813
104
    return canInstrument(SrcBB);
814
60
  if (!E->IsCritical)
815
54
    return canInstrument(DestBB);
816
6
817
6
  unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
818
6
  BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
819
6
  if (!InstrBB) {
820
2
    LLVM_DEBUG(
821
2
        dbgs() << "Fail to split critical edge: not instrument this edge.\n");
822
2
    return nullptr;
823
2
  }
824
4
  // For a critical edge, we have to split. Instrument the newly
825
4
  // created BB.
826
4
  IsCS ? 
NumOfCSPGOSplit++0
: NumOfPGOSplit++;
827
4
  LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
828
4
                    << " --> " << getBBInfo(DestBB).Index << "\n");
829
4
  // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
830
4
  MST.addEdge(SrcBB, InstrBB, 0);
831
4
  // Second one: Add new edge of InstrBB->DestBB.
832
4
  Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
833
4
  NewEdge1.InMST = true;
834
4
  E->Removed = true;
835
4
836
4
  return canInstrument(InstrBB);
837
4
}
838
839
// Visit all edge and instrument the edges not in MST, and do value profiling.
840
// Critical edges will be split.
841
static void instrumentOneFunc(
842
    Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI,
843
    std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
844
101
    bool IsCS) {
845
101
  // Split indirectbr critical edges here before computing the MST rather than
846
101
  // later in getInstrBB() to avoid invalidating it.
847
101
  SplitIndirectBrCriticalEdges(F, BPI, BFI);
848
101
849
101
  FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI,
850
101
                                                   BFI, IsCS);
851
101
  std::vector<BasicBlock *> InstrumentBBs;
852
101
  FuncInfo.getInstrumentBBs(InstrumentBBs);
853
101
  unsigned NumCounters =
854
101
      InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
855
101
856
101
  uint32_t I = 0;
857
101
  Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
858
217
  for (auto *InstrBB : InstrumentBBs) {
859
217
    IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
860
217
    assert(Builder.GetInsertPoint() != InstrBB->end() &&
861
217
           "Cannot get the Instrumentation point");
862
217
    Builder.CreateCall(
863
217
        Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment),
864
217
        {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
865
217
         Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters),
866
217
         Builder.getInt32(I++)});
867
217
  }
868
101
869
101
  // Now instrument select instructions:
870
101
  FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar,
871
101
                                       FuncInfo.FunctionHash);
872
101
  assert(I == NumCounters);
873
101
874
101
  if (DisableValueProfiling)
875
0
    return;
876
101
877
101
  unsigned NumIndirectCalls = 0;
878
101
  for (auto &I : FuncInfo.ValueSites[IPVK_IndirectCallTarget]) {
879
6
    CallSite CS(I);
880
6
    Value *Callee = CS.getCalledValue();
881
6
    LLVM_DEBUG(dbgs() << "Instrument one indirect call: CallSite Index = "
882
6
                      << NumIndirectCalls << "\n");
883
6
    IRBuilder<> Builder(I);
884
6
    assert(Builder.GetInsertPoint() != I->getParent()->end() &&
885
6
           "Cannot get the Instrumentation point");
886
6
    Builder.CreateCall(
887
6
        Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
888
6
        {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
889
6
         Builder.getInt64(FuncInfo.FunctionHash),
890
6
         Builder.CreatePtrToInt(Callee, Builder.getInt64Ty()),
891
6
         Builder.getInt32(IPVK_IndirectCallTarget),
892
6
         Builder.getInt32(NumIndirectCalls++)});
893
6
  }
894
101
  NumOfPGOICall += NumIndirectCalls;
895
101
896
101
  // Now instrument memop intrinsic calls.
897
101
  FuncInfo.MIVisitor.instrumentMemIntrinsics(
898
101
      F, NumCounters, FuncInfo.FuncNameVar, FuncInfo.FunctionHash);
899
101
}
900
901
namespace {
902
903
// This class represents a CFG edge in profile use compilation.
904
struct PGOUseEdge : public PGOEdge {
905
  bool CountValid = false;
906
  uint64_t CountValue = 0;
907
908
  PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
909
639
      : PGOEdge(Src, Dest, W) {}
910
911
  // Set edge count value
912
580
  void setEdgeCount(uint64_t Value) {
913
580
    CountValue = Value;
914
580
    CountValid = true;
915
580
  }
916
917
  // Return the information string for this object.
918
0
  const std::string infoString() const {
919
0
    if (!CountValid)
920
0
      return PGOEdge::infoString();
921
0
    return (Twine(PGOEdge::infoString()) + "  Count=" + Twine(CountValue))
922
0
        .str();
923
0
  }
924
};
925
926
using DirectEdges = SmallVector<PGOUseEdge *, 2>;
927
928
// This class stores the auxiliary information for each BB.
929
struct UseBBInfo : public BBInfo {
930
  uint64_t CountValue = 0;
931
  bool CountValid;
932
  int32_t UnknownCountInEdge = 0;
933
  int32_t UnknownCountOutEdge = 0;
934
  DirectEdges InEdges;
935
  DirectEdges OutEdges;
936
937
505
  UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {}
938
939
  UseBBInfo(unsigned IX, uint64_t C)
940
0
      : BBInfo(IX), CountValue(C), CountValid(true) {}
941
942
  // Set the profile count value for this BB.
943
205
  void setBBInfoCount(uint64_t Value) {
944
205
    CountValue = Value;
945
205
    CountValid = true;
946
205
  }
947
948
  // Return the information string of this object.
949
0
  const std::string infoString() const {
950
0
    if (!CountValid)
951
0
      return BBInfo::infoString();
952
0
    return (Twine(BBInfo::infoString()) + "  Count=" + Twine(CountValue)).str();
953
0
  }
954
955
  // Add an OutEdge and update the edge count.
956
603
  void addOutEdge(PGOUseEdge *E) {
957
603
    OutEdges.push_back(E);
958
603
    UnknownCountOutEdge++;
959
603
  }
960
961
  // Add an InEdge and update the edge count.
962
603
  void addInEdge(PGOUseEdge *E) {
963
603
    InEdges.push_back(E);
964
603
    UnknownCountInEdge++;
965
603
  }
966
};
967
968
} // end anonymous namespace
969
970
// Sum up the count values for all the edges.
971
549
static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) {
972
549
  uint64_t Total = 0;
973
793
  for (auto &E : Edges) {
974
793
    if (E->Removed)
975
0
      continue;
976
793
    Total += E->CountValue;
977
793
  }
978
549
  return Total;
979
549
}
980
981
namespace {
982
983
class PGOUseFunc {
984
public:
985
  PGOUseFunc(Function &Func, Module *Modu,
986
             std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
987
             BranchProbabilityInfo *BPI = nullptr,
988
             BlockFrequencyInfo *BFIin = nullptr, bool IsCS = false)
989
      : F(Func), M(Modu), BFI(BFIin),
990
        FuncInfo(Func, ComdatMembers, false, BPI, BFIin, IsCS),
991
97
        FreqAttr(FFA_Normal), IsCS(IsCS) {}
992
993
  // Read counts for the instrumented BB from profile.
994
  bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros);
995
996
  // Populate the counts for all BBs.
997
  void populateCounters();
998
999
  // Set the branch weights based on the count values.
1000
  void setBranchWeights();
1001
1002
  // Annotate the value profile call sites for all value kind.
1003
  void annotateValueSites();
1004
1005
  // Annotate the value profile call sites for one value kind.
1006
  void annotateValueSites(uint32_t Kind);
1007
1008
  // Annotate the irreducible loop header weights.
1009
  void annotateIrrLoopHeaderWeights();
1010
1011
  // The hotness of the function from the profile count.
1012
  enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1013
1014
  // Return the function hotness from the profile.
1015
72
  FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1016
1017
  // Return the function hash.
1018
0
  uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1019
1020
  // Return the profile record for this function;
1021
6
  InstrProfRecord &getProfileRecord() { return ProfileRecord; }
1022
1023
  // Return the auxiliary BB information.
1024
2.12k
  UseBBInfo &getBBInfo(const BasicBlock *BB) const {
1025
2.12k
    return FuncInfo.getBBInfo(BB);
1026
2.12k
  }
1027
1028
  // Return the auxiliary BB information if available.
1029
1.68k
  UseBBInfo *findBBInfo(const BasicBlock *BB) const {
1030
1.68k
    return FuncInfo.findBBInfo(BB);
1031
1.68k
  }
1032
1033
0
  Function &getFunc() const { return F; }
1034
1035
0
  void dumpInfo(std::string Str = "") const {
1036
0
    FuncInfo.dumpInfo(Str);
1037
0
  }
1038
1039
7
  uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1040
private:
1041
  Function &F;
1042
  Module *M;
1043
  BlockFrequencyInfo *BFI;
1044
1045
  // This member stores the shared information with class PGOGenFunc.
1046
  FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
1047
1048
  // The maximum count value in the profile. This is only used in PGO use
1049
  // compilation.
1050
  uint64_t ProgramMaxCount;
1051
1052
  // Position of counter that remains to be read.
1053
  uint32_t CountPosition = 0;
1054
1055
  // Total size of the profile count for this function.
1056
  uint32_t ProfileCountSize = 0;
1057
1058
  // ProfileRecord for this function.
1059
  InstrProfRecord ProfileRecord;
1060
1061
  // Function hotness info derived from profile.
1062
  FuncFreqAttr FreqAttr;
1063
1064
  // Is to use the context sensitive profile.
1065
  bool IsCS;
1066
1067
  // Find the Instrumented BB and set the value. Return false on error.
1068
  bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1069
1070
  // Set the edge counter value for the unknown edge -- there should be only
1071
  // one unknown edge.
1072
  void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1073
1074
  // Return FuncName string;
1075
0
  const std::string getFuncName() const { return FuncInfo.FuncName; }
1076
1077
  // Set the hot/cold inline hints based on the count values.
1078
  // FIXME: This function should be removed once the functionality in
1079
  // the inliner is implemented.
1080
72
  void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1081
72
    if (ProgramMaxCount == 0)
1082
2
      return;
1083
70
    // Threshold of the hot functions.
1084
70
    const BranchProbability HotFunctionThreshold(1, 100);
1085
70
    // Threshold of the cold functions.
1086
70
    const BranchProbability ColdFunctionThreshold(2, 10000);
1087
70
    if (EntryCount >= HotFunctionThreshold.scale(ProgramMaxCount))
1088
52
      FreqAttr = FFA_Hot;
1089
18
    else if (MaxCount <= ColdFunctionThreshold.scale(ProgramMaxCount))
1090
4
      FreqAttr = FFA_Cold;
1091
70
  }
1092
};
1093
1094
} // end anonymous namespace
1095
1096
// Visit all the edges and assign the count value for the instrumented
1097
// edges and the BB. Return false on error.
1098
bool PGOUseFunc::setInstrumentedCounts(
1099
81
    const std::vector<uint64_t> &CountFromProfile) {
1100
81
1101
81
  std::vector<BasicBlock *> InstrumentBBs;
1102
81
  FuncInfo.getInstrumentBBs(InstrumentBBs);
1103
81
  unsigned NumCounters =
1104
81
      InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1105
81
  // The number of counters here should match the number of counters
1106
81
  // in profile. Return if they mismatch.
1107
81
  if (NumCounters != CountFromProfile.size()) {
1108
2
    return false;
1109
2
  }
1110
79
  // Set the profile count to the Instrumented BBs.
1111
79
  uint32_t I = 0;
1112
205
  for (BasicBlock *InstrBB : InstrumentBBs) {
1113
205
    uint64_t CountValue = CountFromProfile[I++];
1114
205
    UseBBInfo &Info = getBBInfo(InstrBB);
1115
205
    Info.setBBInfoCount(CountValue);
1116
205
  }
1117
79
  ProfileCountSize = CountFromProfile.size();
1118
79
  CountPosition = I;
1119
79
1120
79
  // Set the edge count and update the count of unknown edges for BBs.
1121
209
  auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1122
209
    E->setEdgeCount(Value);
1123
209
    this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1124
209
    this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1125
209
  };
1126
79
1127
79
  // Set the profile count the Instrumented edges. There are BBs that not in
1128
79
  // MST but not instrumented. Need to set the edge count value so that we can
1129
79
  // populate the profile counts later.
1130
603
  for (auto &E : FuncInfo.MST.AllEdges) {
1131
603
    if (E->Removed || 
E->InMST599
)
1132
394
      continue;
1133
209
    const BasicBlock *SrcBB = E->SrcBB;
1134
209
    UseBBInfo &SrcInfo = getBBInfo(SrcBB);
1135
209
1136
209
    // If only one out-edge, the edge profile count should be the same as BB
1137
209
    // profile count.
1138
209
    if (SrcInfo.CountValid && 
SrcInfo.OutEdges.size() == 1121
)
1139
108
      setEdgeCount(E.get(), SrcInfo.CountValue);
1140
101
    else {
1141
101
      const BasicBlock *DestBB = E->DestBB;
1142
101
      UseBBInfo &DestInfo = getBBInfo(DestBB);
1143
101
      // If only one in-edge, the edge profile count should be the same as BB
1144
101
      // profile count.
1145
101
      if (DestInfo.CountValid && 
DestInfo.InEdges.size() == 197
)
1146
97
        setEdgeCount(E.get(), DestInfo.CountValue);
1147
101
    }
1148
209
    if (E->CountValid)
1149
205
      continue;
1150
4
    // E's count should have been set from profile. If not, this meenas E skips
1151
4
    // the instrumentation. We set the count to 0.
1152
4
    setEdgeCount(E.get(), 0);
1153
4
  }
1154
79
  return true;
1155
79
}
1156
1157
// Set the count value for the unknown edge. There should be one and only one
1158
// unknown edge in Edges vector.
1159
371
void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1160
399
  for (auto &E : Edges) {
1161
399
    if (E->CountValid)
1162
28
      continue;
1163
371
    E->setEdgeCount(Value);
1164
371
1165
371
    getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1166
371
    getBBInfo(E->DestBB).UnknownCountInEdge--;
1167
371
    return;
1168
371
  }
1169
371
  
llvm_unreachable0
("Cannot find the unknown count edge");
1170
371
}
1171
1172
// Read the profile from ProfileFileName and assign the value to the
1173
// instrumented BB and the edges. This function also updates ProgramMaxCount.
1174
// Return true if the profile are successfully read, and false on errors.
1175
97
bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros) {
1176
97
  auto &Ctx = M->getContext();
1177
97
  Expected<InstrProfRecord> Result =
1178
97
      PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash);
1179
97
  if (Error E = Result.takeError()) {
1180
16
    handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
1181
16
      auto Err = IPE.get();
1182
16
      bool SkipWarning = false;
1183
16
      LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1184
16
                        << FuncInfo.FuncName << ": ");
1185
16
      if (Err == instrprof_error::unknown_function) {
1186
14
        IsCS ? 
NumOfCSPGOMissing++1
:
NumOfPGOMissing++13
;
1187
14
        SkipWarning = !PGOWarnMissing;
1188
14
        LLVM_DEBUG(dbgs() << "unknown function");
1189
14
      } else 
if (2
Err == instrprof_error::hash_mismatch2
||
1190
2
                 
Err == instrprof_error::malformed0
) {
1191
2
        IsCS ? 
NumOfCSPGOMismatch++0
: NumOfPGOMismatch++;
1192
2
        SkipWarning =
1193
2
            NoPGOWarnMismatch ||
1194
2
            (NoPGOWarnMismatchComdat &&
1195
2
             (F.hasComdat() ||
1196
2
              F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
1197
2
        LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
1198
2
      }
1199
16
1200
16
      LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1201
16
      if (SkipWarning)
1202
12
        return;
1203
4
1204
4
      std::string Msg = IPE.message() + std::string(" ") + F.getName().str() +
1205
4
                        std::string(" Hash = ") +
1206
4
                        std::to_string(FuncInfo.FunctionHash);
1207
4
1208
4
      Ctx.diagnose(
1209
4
          DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1210
4
    });
1211
16
    return false;
1212
16
  }
1213
81
  ProfileRecord = std::move(Result.get());
1214
81
  std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1215
81
1216
81
  IsCS ? 
NumOfCSPGOFunc++7
:
NumOfPGOFunc++74
;
1217
81
  LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1218
81
  uint64_t ValueSum = 0;
1219
296
  for (unsigned I = 0, S = CountFromProfile.size(); I < S; 
I++215
) {
1220
215
    LLVM_DEBUG(dbgs() << "  " << I << ": " << CountFromProfile[I] << "\n");
1221
215
    ValueSum += CountFromProfile[I];
1222
215
  }
1223
81
  AllZeros = (ValueSum == 0);
1224
81
1225
81
  LLVM_DEBUG(dbgs() << "SUM =  " << ValueSum << "\n");
1226
81
1227
81
  getBBInfo(nullptr).UnknownCountOutEdge = 2;
1228
81
  getBBInfo(nullptr).UnknownCountInEdge = 2;
1229
81
1230
81
  if (!setInstrumentedCounts(CountFromProfile)) {
1231
2
    LLVM_DEBUG(
1232
2
        dbgs() << "Inconsistent number of counts, skipping this function");
1233
2
    Ctx.diagnose(DiagnosticInfoPGOProfile(
1234
2
        M->getName().data(),
1235
2
        Twine("Inconsistent number of counts in ") + F.getName().str()
1236
2
        + Twine(": the profile may be stale or there is a function name collision."),
1237
2
        DS_Warning));
1238
2
    return false;
1239
2
  }
1240
79
  ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1241
79
  return true;
1242
79
}
1243
1244
// Populate the counters from instrumented BBs to all BBs.
1245
// In the end of this operation, all BBs should have a valid count value.
1246
72
void PGOUseFunc::populateCounters() {
1247
72
  bool Changes = true;
1248
72
  unsigned NumPasses = 0;
1249
280
  while (Changes) {
1250
208
    NumPasses++;
1251
208
    Changes = false;
1252
208
1253
208
    // For efficient traversal, it's better to start from the end as most
1254
208
    // of the instrumented edges are at the end.
1255
1.30k
    for (auto &BB : reverse(F)) {
1256
1.30k
      UseBBInfo *Count = findBBInfo(&BB);
1257
1.30k
      if (Count == nullptr)
1258
0
        continue;
1259
1.30k
      if (!Count->CountValid) {
1260
284
        if (Count->UnknownCountOutEdge == 0) {
1261
88
          Count->CountValue = sumEdgeCount(Count->OutEdges);
1262
88
          Count->CountValid = true;
1263
88
          Changes = true;
1264
196
        } else if (Count->UnknownCountInEdge == 0) {
1265
90
          Count->CountValue = sumEdgeCount(Count->InEdges);
1266
90
          Count->CountValid = true;
1267
90
          Changes = true;
1268
90
        }
1269
284
      }
1270
1.30k
      if (Count->CountValid) {
1271
1.20k
        if (Count->UnknownCountOutEdge == 1) {
1272
178
          uint64_t Total = 0;
1273
178
          uint64_t OutSum = sumEdgeCount(Count->OutEdges);
1274
178
          // If the one of the successor block can early terminate (no-return),
1275
178
          // we can end up with situation where out edge sum count is larger as
1276
178
          // the source BB's count is collected by a post-dominated block.
1277
178
          if (Count->CountValue > OutSum)
1278
162
            Total = Count->CountValue - OutSum;
1279
178
          setEdgeCount(Count->OutEdges, Total);
1280
178
          Changes = true;
1281
178
        }
1282
1.20k
        if (Count->UnknownCountInEdge == 1) {
1283
193
          uint64_t Total = 0;
1284
193
          uint64_t InSum = sumEdgeCount(Count->InEdges);
1285
193
          if (Count->CountValue > InSum)
1286
178
            Total = Count->CountValue - InSum;
1287
193
          setEdgeCount(Count->InEdges, Total);
1288
193
          Changes = true;
1289
193
        }
1290
1.20k
      }
1291
1.30k
    }
1292
208
  }
1293
72
1294
72
  LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1295
#ifndef NDEBUG
1296
  // Assert every BB has a valid counter.
1297
  for (auto &BB : F) {
1298
    auto BI = findBBInfo(&BB);
1299
    if (BI == nullptr)
1300
      continue;
1301
    assert(BI->CountValid && "BB count is not valid");
1302
  }
1303
#endif
1304
  uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue;
1305
72
  F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real));
1306
72
  uint64_t FuncMaxCount = FuncEntryCount;
1307
371
  for (auto &BB : F) {
1308
371
    auto BI = findBBInfo(&BB);
1309
371
    if (BI == nullptr)
1310
0
      continue;
1311
371
    FuncMaxCount = std::max(FuncMaxCount, BI->CountValue);
1312
371
  }
1313
72
  markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1314
72
1315
72
  // Now annotate select instructions
1316
72
  FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition);
1317
72
  assert(CountPosition == ProfileCountSize);
1318
72
1319
72
  LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1320
72
}
1321
1322
// Assign the scaled count values to the BB with multiple out edges.
1323
72
void PGOUseFunc::setBranchWeights() {
1324
72
  // Generate MD_prof metadata for every branch instruction.
1325
72
  LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1326
72
                    << " IsCS=" << IsCS << "\n");
1327
371
  for (auto &BB : F) {
1328
371
    Instruction *TI = BB.getTerminator();
1329
371
    if (TI->getNumSuccessors() < 2)
1330
269
      continue;
1331
102
    if (!(isa<BranchInst>(TI) || 
isa<SwitchInst>(TI)21
||
1332
102
          
isa<IndirectBrInst>(TI)15
))
1333
10
      continue;
1334
92
1335
92
    if (getBBInfo(&BB).CountValue == 0)
1336
0
      continue;
1337
92
1338
92
    // We have a non-zero Branch BB.
1339
92
    const UseBBInfo &BBCountInfo = getBBInfo(&BB);
1340
92
    unsigned Size = BBCountInfo.OutEdges.size();
1341
92
    SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
1342
92
    uint64_t MaxCount = 0;
1343
299
    for (unsigned s = 0; s < Size; 
s++207
) {
1344
207
      const PGOUseEdge *E = BBCountInfo.OutEdges[s];
1345
207
      const BasicBlock *SrcBB = E->SrcBB;
1346
207
      const BasicBlock *DestBB = E->DestBB;
1347
207
      if (DestBB == nullptr)
1348
0
        continue;
1349
207
      unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1350
207
      uint64_t EdgeCount = E->CountValue;
1351
207
      if (EdgeCount > MaxCount)
1352
119
        MaxCount = EdgeCount;
1353
207
      EdgeCounts[SuccNum] = EdgeCount;
1354
207
    }
1355
92
    setProfMetadata(M, TI, EdgeCounts, MaxCount);
1356
92
  }
1357
72
}
1358
1359
359
static bool isIndirectBrTarget(BasicBlock *BB) {
1360
734
  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; 
++PI375
) {
1361
392
    if (isa<IndirectBrInst>((*PI)->getTerminator()))
1362
17
      return true;
1363
392
  }
1364
359
  
return false342
;
1365
359
}
1366
1367
72
void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1368
72
  LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1369
72
  // Find irr loop headers
1370
371
  for (auto &BB : F) {
1371
371
    // As a heuristic also annotate indrectbr targets as they have a high chance
1372
371
    // to become an irreducible loop header after the indirectbr tail
1373
371
    // duplication.
1374
371
    if (BFI->isIrrLoopHeader(&BB) || 
isIndirectBrTarget(&BB)359
) {
1375
29
      Instruction *TI = BB.getTerminator();
1376
29
      const UseBBInfo &BBCountInfo = getBBInfo(&BB);
1377
29
      setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue);
1378
29
    }
1379
371
  }
1380
72
}
1381
1382
2
void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1383
2
  Module *M = F.getParent();
1384
2
  IRBuilder<> Builder(&SI);
1385
2
  Type *Int64Ty = Builder.getInt64Ty();
1386
2
  Type *I8PtrTy = Builder.getInt8PtrTy();
1387
2
  auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1388
2
  Builder.CreateCall(
1389
2
      Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
1390
2
      {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
1391
2
       Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
1392
2
       Builder.getInt32(*CurCtrIdx), Step});
1393
2
  ++(*CurCtrIdx);
1394
2
}
1395
1396
6
void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1397
6
  std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1398
6
  assert(*CurCtrIdx < CountFromProfile.size() &&
1399
6
         "Out of bound access of counters");
1400
6
  uint64_t SCounts[2];
1401
6
  SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1402
6
  ++(*CurCtrIdx);
1403
6
  uint64_t TotalCount = 0;
1404
6
  auto BI = UseFunc->findBBInfo(SI.getParent());
1405
6
  if (BI != nullptr)
1406
6
    TotalCount = BI->CountValue;
1407
6
  // False Count
1408
6
  SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 
00
);
1409
6
  uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1410
6
  if (MaxCount)
1411
6
    setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
1412
6
}
1413
1414
54
void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1415
54
  if (!PGOInstrSelect)
1416
4
    return;
1417
50
  // FIXME: do not handle this yet.
1418
50
  if (SI.getCondition()->getType()->isVectorTy())
1419
0
    return;
1420
50
1421
50
  switch (Mode) {
1422
50
  case VM_counting:
1423
42
    NSIs++;
1424
42
    return;
1425
50
  case VM_instrument:
1426
2
    instrumentOneSelectInst(SI);
1427
2
    return;
1428
50
  case VM_annotate:
1429
6
    annotateOneSelectInst(SI);
1430
6
    return;
1431
0
  }
1432
0
1433
0
  llvm_unreachable("Unknown visiting mode");
1434
0
}
1435
1436
3
void MemIntrinsicVisitor::instrumentOneMemIntrinsic(MemIntrinsic &MI) {
1437
3
  Module *M = F.getParent();
1438
3
  IRBuilder<> Builder(&MI);
1439
3
  Type *Int64Ty = Builder.getInt64Ty();
1440
3
  Type *I8PtrTy = Builder.getInt8PtrTy();
1441
3
  Value *Length = MI.getLength();
1442
3
  assert(!isa<ConstantInt>(Length));
1443
3
  Builder.CreateCall(
1444
3
      Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
1445
3
      {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
1446
3
       Builder.getInt64(FuncHash), Builder.CreateZExtOrTrunc(Length, Int64Ty),
1447
3
       Builder.getInt32(IPVK_MemOPSize), Builder.getInt32(CurCtrId)});
1448
3
  ++CurCtrId;
1449
3
}
1450
1451
19
void MemIntrinsicVisitor::visitMemIntrinsic(MemIntrinsic &MI) {
1452
19
  if (!PGOInstrMemOP)
1453
0
    return;
1454
19
  Value *Length = MI.getLength();
1455
19
  // Not instrument constant length calls.
1456
19
  if (dyn_cast<ConstantInt>(Length))
1457
0
    return;
1458
19
1459
19
  switch (Mode) {
1460
19
  case VM_counting:
1461
8
    NMemIs++;
1462
8
    return;
1463
19
  case VM_instrument:
1464
3
    instrumentOneMemIntrinsic(MI);
1465
3
    return;
1466
19
  case VM_annotate:
1467
8
    Candidates.push_back(&MI);
1468
8
    return;
1469
0
  }
1470
0
  llvm_unreachable("Unknown visiting mode");
1471
0
}
1472
1473
// Traverse all valuesites and annotate the instructions for all value kind.
1474
72
void PGOUseFunc::annotateValueSites() {
1475
72
  if (DisableValueProfiling)
1476
0
    return;
1477
72
1478
72
  // Create the PGOFuncName meta data.
1479
72
  createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1480
72
1481
216
  for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; 
++Kind144
)
1482
144
    annotateValueSites(Kind);
1483
72
}
1484
1485
static const char *ValueProfKindDescr[] = {
1486
#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
1487
#include "llvm/ProfileData/InstrProfData.inc"
1488
};
1489
1490
// Annotate the instructions for a specific value kind.
1491
144
void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1492
144
  assert(Kind <= IPVK_Last);
1493
144
  unsigned ValueSiteIndex = 0;
1494
144
  auto &ValueSites = FuncInfo.ValueSites[Kind];
1495
144
  unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1496
144
  if (NumValueSites != ValueSites.size()) {
1497
1
    auto &Ctx = M->getContext();
1498
1
    Ctx.diagnose(DiagnosticInfoPGOProfile(
1499
1
        M->getName().data(),
1500
1
        Twine("Inconsistent number of value sites for ") +
1501
1
            Twine(ValueProfKindDescr[Kind]) +
1502
1
            Twine(" profiling in \"") + F.getName().str() +
1503
1
            Twine("\", possibly due to the use of a stale profile."),
1504
1
        DS_Warning));
1505
1
    return;
1506
1
  }
1507
143
1508
143
  for (auto &I : ValueSites) {
1509
6
    LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1510
6
                      << "): Index = " << ValueSiteIndex << " out of "
1511
6
                      << NumValueSites << "\n");
1512
6
    annotateValueSite(*M, *I, ProfileRecord,
1513
6
                      static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1514
6
                      Kind == IPVK_MemOPSize ? 
MaxNumMemOPAnnotations4
1515
6
                                             : 
MaxNumAnnotations2
);
1516
6
    ValueSiteIndex++;
1517
6
  }
1518
143
}
1519
1520
// Collect the set of members for each Comdat in module M and store
1521
// in ComdatMembers.
1522
static void collectComdatMembers(
1523
    Module &M,
1524
179
    std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1525
179
  if (!DoComdatRenaming)
1526
175
    return;
1527
4
  for (Function &F : M)
1528
24
    if (Comdat *C = F.getComdat())
1529
20
      ComdatMembers.insert(std::make_pair(C, &F));
1530
4
  for (GlobalVariable &GV : M.globals())
1531
8
    if (Comdat *C = GV.getComdat())
1532
8
      ComdatMembers.insert(std::make_pair(C, &GV));
1533
4
  for (GlobalAlias &GA : M.aliases())
1534
0
    if (Comdat *C = GA.getComdat())
1535
0
      ComdatMembers.insert(std::make_pair(C, &GA));
1536
4
}
1537
1538
static bool InstrumentAllFunctions(
1539
    Module &M, function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
1540
79
    function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
1541
79
  // For the context-sensitve instrumentation, we should have a separated pass
1542
79
  // (before LTO/ThinLTO linking) to create these variables.
1543
79
  if (!IsCS)
1544
64
    createIRLevelProfileFlagVar(M, /* IsCS */ false);
1545
79
  std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1546
79
  collectComdatMembers(M, ComdatMembers);
1547
79
1548
229
  for (auto &F : M) {
1549
229
    if (F.isDeclaration())
1550
128
      continue;
1551
101
    auto *BPI = LookupBPI(F);
1552
101
    auto *BFI = LookupBFI(F);
1553
101
    instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers, IsCS);
1554
101
  }
1555
79
  return true;
1556
79
}
1557
1558
PreservedAnalyses
1559
8
PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) {
1560
8
  createProfileFileNameVar(M, CSInstrName);
1561
8
  createIRLevelProfileFlagVar(M, /* IsCS */ true);
1562
8
  return PreservedAnalyses::all();
1563
8
}
1564
1565
42
bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) {
1566
42
  if (skipModule(M))
1567
0
    return false;
1568
42
1569
53
  
auto LookupBPI = [this](Function &F) 42
{
1570
53
    return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
1571
53
  };
1572
53
  auto LookupBFI = [this](Function &F) {
1573
53
    return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
1574
53
  };
1575
42
  return InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS);
1576
42
}
1577
1578
PreservedAnalyses PGOInstrumentationGen::run(Module &M,
1579
37
                                             ModuleAnalysisManager &AM) {
1580
37
  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1581
48
  auto LookupBPI = [&FAM](Function &F) {
1582
48
    return &FAM.getResult<BranchProbabilityAnalysis>(F);
1583
48
  };
1584
37
1585
48
  auto LookupBFI = [&FAM](Function &F) {
1586
48
    return &FAM.getResult<BlockFrequencyAnalysis>(F);
1587
48
  };
1588
37
1589
37
  if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS))
1590
0
    return PreservedAnalyses::all();
1591
37
1592
37
  return PreservedAnalyses::none();
1593
37
}
1594
1595
static bool annotateAllFunctions(
1596
    Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
1597
    function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
1598
106
    function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
1599
106
  LLVM_DEBUG(dbgs() << "Read in profile counters: ");
1600
106
  auto &Ctx = M.getContext();
1601
106
  // Read the counter array from file.
1602
106
  auto ReaderOrErr =
1603
106
      IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName);
1604
106
  if (Error E = ReaderOrErr.takeError()) {
1605
2
    handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
1606
2
      Ctx.diagnose(
1607
2
          DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
1608
2
    });
1609
2
    return false;
1610
2
  }
1611
104
1612
104
  std::unique_ptr<IndexedInstrProfReader> PGOReader =
1613
104
      std::move(ReaderOrErr.get());
1614
104
  if (!PGOReader) {
1615
0
    Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
1616
0
                                          StringRef("Cannot get PGOReader")));
1617
0
    return false;
1618
0
  }
1619
104
  if (!PGOReader->hasCSIRLevelProfile() && 
IsCS79
)
1620
2
    return false;
1621
102
1622
102
  // TODO: might need to change the warning once the clang option is finalized.
1623
102
  if (!PGOReader->isIRLevelProfile()) {
1624
2
    Ctx.diagnose(DiagnosticInfoPGOProfile(
1625
2
        ProfileFileName.data(), "Not an IR level instrumentation profile"));
1626
2
    return false;
1627
2
  }
1628
100
1629
100
  std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1630
100
  collectComdatMembers(M, ComdatMembers);
1631
100
  std::vector<Function *> HotFunctions;
1632
100
  std::vector<Function *> ColdFunctions;
1633
152
  for (auto &F : M) {
1634
152
    if (F.isDeclaration())
1635
55
      continue;
1636
97
    auto *BPI = LookupBPI(F);
1637
97
    auto *BFI = LookupBFI(F);
1638
97
    // Split indirectbr critical edges here before computing the MST rather than
1639
97
    // later in getInstrBB() to avoid invalidating it.
1640
97
    SplitIndirectBrCriticalEdges(F, BPI, BFI);
1641
97
    PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, IsCS);
1642
97
    bool AllZeros = false;
1643
97
    if (!Func.readCounters(PGOReader.get(), AllZeros))
1644
18
      continue;
1645
79
    if (AllZeros) {
1646
7
      F.setEntryCount(ProfileCount(0, Function::PCT_Real));
1647
7
      if (Func.getProgramMaxCount() != 0)
1648
5
        ColdFunctions.push_back(&F);
1649
7
      continue;
1650
7
    }
1651
72
    Func.populateCounters();
1652
72
    Func.setBranchWeights();
1653
72
    Func.annotateValueSites();
1654
72
    Func.annotateIrrLoopHeaderWeights();
1655
72
    PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
1656
72
    if (FreqAttr == PGOUseFunc::FFA_Cold)
1657
4
      ColdFunctions.push_back(&F);
1658
68
    else if (FreqAttr == PGOUseFunc::FFA_Hot)
1659
52
      HotFunctions.push_back(&F);
1660
72
    if (PGOViewCounts != PGOVCT_None &&
1661
72
        
(0
ViewBlockFreqFuncName.empty()0
||
1662
0
         F.getName().equals(ViewBlockFreqFuncName))) {
1663
0
      LoopInfo LI{DominatorTree(F)};
1664
0
      std::unique_ptr<BranchProbabilityInfo> NewBPI =
1665
0
          llvm::make_unique<BranchProbabilityInfo>(F, LI);
1666
0
      std::unique_ptr<BlockFrequencyInfo> NewBFI =
1667
0
          llvm::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
1668
0
      if (PGOViewCounts == PGOVCT_Graph)
1669
0
        NewBFI->view();
1670
0
      else if (PGOViewCounts == PGOVCT_Text) {
1671
0
        dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
1672
0
        NewBFI->print(dbgs());
1673
0
      }
1674
0
    }
1675
72
    if (PGOViewRawCounts != PGOVCT_None &&
1676
72
        
(0
ViewBlockFreqFuncName.empty()0
||
1677
0
         F.getName().equals(ViewBlockFreqFuncName))) {
1678
0
      if (PGOViewRawCounts == PGOVCT_Graph)
1679
0
        if (ViewBlockFreqFuncName.empty())
1680
0
          WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
1681
0
        else
1682
0
          ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
1683
0
      else if (PGOViewRawCounts == PGOVCT_Text) {
1684
0
        dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
1685
0
        Func.dumpInfo();
1686
0
      }
1687
0
    }
1688
72
  }
1689
100
  M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
1690
100
                      IsCS ? 
ProfileSummary::PSK_CSInstr12
1691
100
                           : 
ProfileSummary::PSK_Instr88
);
1692
100
1693
100
  // Set function hotness attribute from the profile.
1694
100
  // We have to apply these attributes at the end because their presence
1695
100
  // can affect the BranchProbabilityInfo of any callers, resulting in an
1696
100
  // inconsistent MST between prof-gen and prof-use.
1697
100
  for (auto &F : HotFunctions) {
1698
52
    F->addFnAttr(Attribute::InlineHint);
1699
52
    LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
1700
52
                      << "\n");
1701
52
  }
1702
100
  for (auto &F : ColdFunctions) {
1703
9
    F->addFnAttr(Attribute::Cold);
1704
9
    LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
1705
9
                      << "\n");
1706
9
  }
1707
100
  return true;
1708
100
}
1709
1710
PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename,
1711
                                             std::string RemappingFilename,
1712
                                             bool IsCS)
1713
    : ProfileFileName(std::move(Filename)),
1714
53
      ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) {
1715
53
  if (!PGOTestProfileFile.empty())
1716
30
    ProfileFileName = PGOTestProfileFile;
1717
53
  if (!PGOTestProfileRemappingFile.empty())
1718
1
    ProfileRemappingFileName = PGOTestProfileRemappingFile;
1719
53
}
1720
1721
PreservedAnalyses PGOInstrumentationUse::run(Module &M,
1722
53
                                             ModuleAnalysisManager &AM) {
1723
53
1724
53
  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1725
53
  auto LookupBPI = [&FAM](Function &F) {
1726
40
    return &FAM.getResult<BranchProbabilityAnalysis>(F);
1727
40
  };
1728
53
1729
53
  auto LookupBFI = [&FAM](Function &F) {
1730
40
    return &FAM.getResult<BlockFrequencyAnalysis>(F);
1731
40
  };
1732
53
1733
53
  if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName,
1734
53
                            LookupBPI, LookupBFI, IsCS))
1735
0
    return PreservedAnalyses::all();
1736
53
1737
53
  return PreservedAnalyses::none();
1738
53
}
1739
1740
53
bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) {
1741
53
  if (skipModule(M))
1742
0
    return false;
1743
53
1744
57
  
auto LookupBPI = [this](Function &F) 53
{
1745
57
    return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
1746
57
  };
1747
57
  auto LookupBFI = [this](Function &F) {
1748
57
    return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
1749
57
  };
1750
53
1751
53
  return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI,
1752
53
                              IsCS);
1753
53
}
1754
1755
0
static std::string getSimpleNodeName(const BasicBlock *Node) {
1756
0
  if (!Node->getName().empty())
1757
0
    return Node->getName();
1758
0
1759
0
  std::string SimpleNodeName;
1760
0
  raw_string_ostream OS(SimpleNodeName);
1761
0
  Node->printAsOperand(OS, false);
1762
0
  return OS.str();
1763
0
}
1764
1765
void llvm::setProfMetadata(Module *M, Instruction *TI,
1766
                           ArrayRef<uint64_t> EdgeCounts,
1767
108
                           uint64_t MaxCount) {
1768
108
  MDBuilder MDB(M->getContext());
1769
108
  assert(MaxCount > 0 && "Bad max count");
1770
108
  uint64_t Scale = calculateCountScale(MaxCount);
1771
108
  SmallVector<unsigned, 4> Weights;
1772
108
  for (const auto &ECI : EdgeCounts)
1773
241
    Weights.push_back(scaleBranchCount(ECI, Scale));
1774
108
1775
108
  LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
1776
108
                                           : Weights) {
1777
108
    dbgs() << W << " ";
1778
108
  } dbgs() << "\n";);
1779
108
  TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1780
108
  if (EmitBranchProbability) {
1781
4
    std::string BrCondStr = getBranchCondString(TI);
1782
4
    if (BrCondStr.empty())
1783
0
      return;
1784
4
1785
4
    uint64_t WSum =
1786
4
        std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
1787
8
                        [](uint64_t w1, uint64_t w2) { return w1 + w2; });
1788
4
    uint64_t TotalCount =
1789
4
        std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
1790
8
                        [](uint64_t c1, uint64_t c2) { return c1 + c2; });
1791
4
    Scale = calculateCountScale(WSum);
1792
4
    BranchProbability BP(scaleBranchCount(Weights[0], Scale),
1793
4
                         scaleBranchCount(WSum, Scale));
1794
4
    std::string BranchProbStr;
1795
4
    raw_string_ostream OS(BranchProbStr);
1796
4
    OS << BP;
1797
4
    OS << " (total count : " << TotalCount << ")";
1798
4
    OS.flush();
1799
4
    Function *F = TI->getParent()->getParent();
1800
4
    OptimizationRemarkEmitter ORE(F);
1801
4
    ORE.emit([&]() {
1802
4
      return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
1803
4
             << BrCondStr << " is true with probability : " << BranchProbStr;
1804
4
    });
1805
4
  }
1806
108
}
1807
1808
namespace llvm {
1809
1810
29
void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) {
1811
29
  MDBuilder MDB(M->getContext());
1812
29
  TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
1813
29
                  MDB.createIrrLoopHeaderWeight(Count));
1814
29
}
1815
1816
template <> struct GraphTraits<PGOUseFunc *> {
1817
  using NodeRef = const BasicBlock *;
1818
  using ChildIteratorType = succ_const_iterator;
1819
  using nodes_iterator = pointer_iterator<Function::const_iterator>;
1820
1821
0
  static NodeRef getEntryNode(const PGOUseFunc *G) {
1822
0
    return &G->getFunc().front();
1823
0
  }
1824
1825
0
  static ChildIteratorType child_begin(const NodeRef N) {
1826
0
    return succ_begin(N);
1827
0
  }
1828
1829
0
  static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
1830
1831
0
  static nodes_iterator nodes_begin(const PGOUseFunc *G) {
1832
0
    return nodes_iterator(G->getFunc().begin());
1833
0
  }
1834
1835
0
  static nodes_iterator nodes_end(const PGOUseFunc *G) {
1836
0
    return nodes_iterator(G->getFunc().end());
1837
0
  }
1838
};
1839
1840
template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
1841
  explicit DOTGraphTraits(bool isSimple = false)
1842
0
      : DefaultDOTGraphTraits(isSimple) {}
1843
1844
0
  static std::string getGraphName(const PGOUseFunc *G) {
1845
0
    return G->getFunc().getName();
1846
0
  }
1847
1848
0
  std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
1849
0
    std::string Result;
1850
0
    raw_string_ostream OS(Result);
1851
0
1852
0
    OS << getSimpleNodeName(Node) << ":\\l";
1853
0
    UseBBInfo *BI = Graph->findBBInfo(Node);
1854
0
    OS << "Count : ";
1855
0
    if (BI && BI->CountValid)
1856
0
      OS << BI->CountValue << "\\l";
1857
0
    else
1858
0
      OS << "Unknown\\l";
1859
0
1860
0
    if (!PGOInstrSelect)
1861
0
      return Result;
1862
0
1863
0
    for (auto BI = Node->begin(); BI != Node->end(); ++BI) {
1864
0
      auto *I = &*BI;
1865
0
      if (!isa<SelectInst>(I))
1866
0
        continue;
1867
0
      // Display scaled counts for SELECT instruction:
1868
0
      OS << "SELECT : { T = ";
1869
0
      uint64_t TC, FC;
1870
0
      bool HasProf = I->extractProfMetadata(TC, FC);
1871
0
      if (!HasProf)
1872
0
        OS << "Unknown, F = Unknown }\\l";
1873
0
      else
1874
0
        OS << TC << ", F = " << FC << " }\\l";
1875
0
    }
1876
0
    return Result;
1877
0
  }
1878
};
1879
1880
} // end namespace llvm