Coverage Report

Created: 2022-05-17 06:19

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/CodeGen/CodeGenPGO.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Instrumentation-based profile-guided optimization
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "CodeGenPGO.h"
14
#include "CodeGenFunction.h"
15
#include "CoverageMappingGen.h"
16
#include "clang/AST/RecursiveASTVisitor.h"
17
#include "clang/AST/StmtVisitor.h"
18
#include "llvm/IR/Intrinsics.h"
19
#include "llvm/IR/MDBuilder.h"
20
#include "llvm/Support/CommandLine.h"
21
#include "llvm/Support/Endian.h"
22
#include "llvm/Support/FileSystem.h"
23
#include "llvm/Support/MD5.h"
24
25
static llvm::cl::opt<bool>
26
    EnableValueProfiling("enable-value-profiling", llvm::cl::ZeroOrMore,
27
                         llvm::cl::desc("Enable value profiling"),
28
                         llvm::cl::Hidden, llvm::cl::init(false));
29
30
using namespace clang;
31
using namespace CodeGen;
32
33
void CodeGenPGO::setFuncName(StringRef Name,
34
600
                             llvm::GlobalValue::LinkageTypes Linkage) {
35
600
  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
36
600
  FuncName = llvm::getPGOFuncName(
37
600
      Name, Linkage, CGM.getCodeGenOpts().MainFileName,
38
600
      PGOReader ? 
PGOReader->getVersion()187
:
llvm::IndexedInstrProf::Version413
);
39
40
  // If we're generating a profile, create a variable for the name.
41
600
  if (CGM.getCodeGenOpts().hasProfileClangInstr())
42
413
    FuncNameVar = llvm::createPGOFuncNameVar(CGM.getModule(), Linkage, FuncName);
43
600
}
44
45
579
void CodeGenPGO::setFuncName(llvm::Function *Fn) {
46
579
  setFuncName(Fn->getName(), Fn->getLinkage());
47
  // Create PGOFuncName meta data.
48
579
  llvm::createPGOFuncNameMetadata(*Fn, FuncName);
49
579
}
50
51
/// The version of the PGO hash algorithm.
52
enum PGOHashVersion : unsigned {
53
  PGO_HASH_V1,
54
  PGO_HASH_V2,
55
  PGO_HASH_V3,
56
57
  // Keep this set to the latest hash version.
58
  PGO_HASH_LATEST = PGO_HASH_V3
59
};
60
61
namespace {
62
/// Stable hasher for PGO region counters.
63
///
64
/// PGOHash produces a stable hash of a given function's control flow.
65
///
66
/// Changing the output of this hash will invalidate all previously generated
67
/// profiles -- i.e., don't do it.
68
///
69
/// \note  When this hash does eventually change (years?), we still need to
70
/// support old hashes.  We'll need to pull in the version number from the
71
/// profile data format and use the matching hash function.
72
class PGOHash {
73
  uint64_t Working;
74
  unsigned Count;
75
  PGOHashVersion HashVersion;
76
  llvm::MD5 MD5;
77
78
  static const int NumBitsPerType = 6;
79
  static const unsigned NumTypesPerWord = sizeof(uint64_t) * 8 / NumBitsPerType;
80
  static const unsigned TooBig = 1u << NumBitsPerType;
81
82
public:
83
  /// Hash values for AST nodes.
84
  ///
85
  /// Distinct values for AST nodes that have region counters attached.
86
  ///
87
  /// These values must be stable.  All new members must be added at the end,
88
  /// and no members should be removed.  Changing the enumeration value for an
89
  /// AST node will affect the hash of every function that contains that node.
90
  enum HashType : unsigned char {
91
    None = 0,
92
    LabelStmt = 1,
93
    WhileStmt,
94
    DoStmt,
95
    ForStmt,
96
    CXXForRangeStmt,
97
    ObjCForCollectionStmt,
98
    SwitchStmt,
99
    CaseStmt,
100
    DefaultStmt,
101
    IfStmt,
102
    CXXTryStmt,
103
    CXXCatchStmt,
104
    ConditionalOperator,
105
    BinaryOperatorLAnd,
106
    BinaryOperatorLOr,
107
    BinaryConditionalOperator,
108
    // The preceding values are available with PGO_HASH_V1.
109
110
    EndOfScope,
111
    IfThenBranch,
112
    IfElseBranch,
113
    GotoStmt,
114
    IndirectGotoStmt,
115
    BreakStmt,
116
    ContinueStmt,
117
    ReturnStmt,
118
    ThrowExpr,
119
    UnaryOperatorLNot,
120
    BinaryOperatorLT,
121
    BinaryOperatorGT,
122
    BinaryOperatorLE,
123
    BinaryOperatorGE,
124
    BinaryOperatorEQ,
125
    BinaryOperatorNE,
126
    // The preceding values are available since PGO_HASH_V2.
127
128
    // Keep this last.  It's for the static assert that follows.
129
    LastHashType
130
  };
131
  static_assert(LastHashType <= TooBig, "Too many types in HashType");
132
133
  PGOHash(PGOHashVersion HashVersion)
134
579
      : Working(0), Count(0), HashVersion(HashVersion) {}
135
  void combine(HashType Type);
136
  uint64_t finalize();
137
23.3k
  PGOHashVersion getHashVersion() const { return HashVersion; }
138
};
139
const int PGOHash::NumBitsPerType;
140
const unsigned PGOHash::NumTypesPerWord;
141
const unsigned PGOHash::TooBig;
142
143
/// Get the PGO hash version used in the given indexed profile.
144
static PGOHashVersion getPGOHashVersion(llvm::IndexedInstrProfReader *PGOReader,
145
187
                                        CodeGenModule &CGM) {
146
187
  if (PGOReader->getVersion() <= 4)
147
24
    return PGO_HASH_V1;
148
163
  if (PGOReader->getVersion() <= 5)
149
37
    return PGO_HASH_V2;
150
126
  return PGO_HASH_V3;
151
163
}
152
153
/// A RecursiveASTVisitor that fills a map of statements to PGO counters.
154
struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
155
  using Base = RecursiveASTVisitor<MapRegionCounters>;
156
157
  /// The next counter value to assign.
158
  unsigned NextCounter;
159
  /// The function hash.
160
  PGOHash Hash;
161
  /// The map of statements to counters.
162
  llvm::DenseMap<const Stmt *, unsigned> &CounterMap;
163
  /// The profile version.
164
  uint64_t ProfileVersion;
165
166
  MapRegionCounters(PGOHashVersion HashVersion, uint64_t ProfileVersion,
167
                    llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
168
      : NextCounter(0), Hash(HashVersion), CounterMap(CounterMap),
169
579
        ProfileVersion(ProfileVersion) {}
170
171
  // Blocks and lambdas are handled as separate functions, so we need not
172
  // traverse them in the parent context.
173
2
  bool TraverseBlockExpr(BlockExpr *BE) { return true; }
174
4
  bool TraverseLambdaExpr(LambdaExpr *LE) {
175
    // Traverse the captures, but not the body.
176
4
    for (auto C : zip(LE->captures(), LE->capture_inits()))
177
2
      TraverseLambdaCapture(LE, &std::get<0>(C), std::get<1>(C));
178
4
    return true;
179
4
  }
180
6
  bool TraverseCapturedStmt(CapturedStmt *CS) { return true; }
181
182
1.35k
  bool VisitDecl(const Decl *D) {
183
1.35k
    switch (D->getKind()) {
184
771
    default:
185
771
      break;
186
771
    case Decl::Function:
187
515
    case Decl::CXXMethod:
188
547
    case Decl::CXXConstructor:
189
566
    case Decl::CXXDestructor:
190
570
    case Decl::CXXConversion:
191
573
    case Decl::ObjCMethod:
192
575
    case Decl::Block:
193
581
    case Decl::Captured:
194
581
      CounterMap[D->getBody()] = NextCounter++;
195
581
      break;
196
1.35k
    }
197
1.35k
    return true;
198
1.35k
  }
199
200
  /// If \p S gets a fresh counter, update the counter mappings. Return the
201
  /// V1 hash of \p S.
202
11.9k
  PGOHash::HashType updateCounterMappings(Stmt *S) {
203
11.9k
    auto Type = getHashType(PGO_HASH_V1, S);
204
11.9k
    if (Type != PGOHash::None)
205
1.28k
      CounterMap[S] = NextCounter++;
206
11.9k
    return Type;
207
11.9k
  }
208
209
  /// The RHS of all logical operators gets a fresh counter in order to count
210
  /// how many times the RHS evaluates to true or false, depending on the
211
  /// semantics of the operator. This is only valid for ">= v7" of the profile
212
  /// version so that we facilitate backward compatibility.
213
1.15k
  bool VisitBinaryOperator(BinaryOperator *S) {
214
1.15k
    if (ProfileVersion >= llvm::IndexedInstrProf::Version7)
215
918
      if (S->isLogicalOp() &&
216
918
          
CodeGenFunction::isInstrumentedCondition(S->getRHS())153
)
217
131
        CounterMap[S->getRHS()] = NextCounter++;
218
1.15k
    return Base::VisitBinaryOperator(S);
219
1.15k
  }
220
221
  /// Include \p S in the function hash.
222
11.9k
  bool VisitStmt(Stmt *S) {
223
11.9k
    auto Type = updateCounterMappings(S);
224
11.9k
    if (Hash.getHashVersion() != PGO_HASH_V1)
225
10.6k
      Type = getHashType(Hash.getHashVersion(), S);
226
11.9k
    if (Type != PGOHash::None)
227
2.12k
      Hash.combine(Type);
228
11.9k
    return true;
229
11.9k
  }
230
231
431
  bool TraverseIfStmt(IfStmt *If) {
232
    // If we used the V1 hash, use the default traversal.
233
431
    if (Hash.getHashVersion() == PGO_HASH_V1)
234
68
      return Base::TraverseIfStmt(If);
235
236
    // Otherwise, keep track of which branch we're in while traversing.
237
363
    VisitStmt(If);
238
797
    for (Stmt *CS : If->children()) {
239
797
      if (!CS)
240
0
        continue;
241
797
      if (CS == If->getThen())
242
363
        Hash.combine(PGOHash::IfThenBranch);
243
434
      else if (CS == If->getElse())
244
69
        Hash.combine(PGOHash::IfElseBranch);
245
797
      TraverseStmt(CS);
246
797
    }
247
363
    Hash.combine(PGOHash::EndOfScope);
248
363
    return true;
249
431
  }
250
251
// If the statement type \p N is nestable, and its nesting impacts profile
252
// stability, define a custom traversal which tracks the end of the statement
253
// in the hash (provided we're not using the V1 hash).
254
#define DEFINE_NESTABLE_TRAVERSAL(N)                                           \
255
380
  bool Traverse##N(N *S) {                                                     \
256
380
    Base::Traverse##N(S);                                                      \
257
380
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
380
      
Hash.combine(PGOHash::EndOfScope)340
; \
259
380
    return true;                                                               \
260
380
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseCXXCatchStmt(clang::CXXCatchStmt*)
Line
Count
Source
255
26
  bool Traverse##N(N *S) {                                                     \
256
26
    Base::Traverse##N(S);                                                      \
257
26
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
26
      Hash.combine(PGOHash::EndOfScope);                                       \
259
26
    return true;                                                               \
260
26
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseCXXForRangeStmt(clang::CXXForRangeStmt*)
Line
Count
Source
255
13
  bool Traverse##N(N *S) {                                                     \
256
13
    Base::Traverse##N(S);                                                      \
257
13
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
13
      Hash.combine(PGOHash::EndOfScope);                                       \
259
13
    return true;                                                               \
260
13
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseCXXTryStmt(clang::CXXTryStmt*)
Line
Count
Source
255
24
  bool Traverse##N(N *S) {                                                     \
256
24
    Base::Traverse##N(S);                                                      \
257
24
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
24
      Hash.combine(PGOHash::EndOfScope);                                       \
259
24
    return true;                                                               \
260
24
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseDoStmt(clang::DoStmt*)
Line
Count
Source
255
35
  bool Traverse##N(N *S) {                                                     \
256
35
    Base::Traverse##N(S);                                                      \
257
35
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
35
      
Hash.combine(PGOHash::EndOfScope)29
; \
259
35
    return true;                                                               \
260
35
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseForStmt(clang::ForStmt*)
Line
Count
Source
255
195
  bool Traverse##N(N *S) {                                                     \
256
195
    Base::Traverse##N(S);                                                      \
257
195
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
195
      
Hash.combine(PGOHash::EndOfScope)173
; \
259
195
    return true;                                                               \
260
195
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseObjCForCollectionStmt(clang::ObjCForCollectionStmt*)
Line
Count
Source
255
11
  bool Traverse##N(N *S) {                                                     \
256
11
    Base::Traverse##N(S);                                                      \
257
11
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
11
      Hash.combine(PGOHash::EndOfScope);                                       \
259
11
    return true;                                                               \
260
11
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseWhileStmt(clang::WhileStmt*)
Line
Count
Source
255
76
  bool Traverse##N(N *S) {                                                     \
256
76
    Base::Traverse##N(S);                                                      \
257
76
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
76
      
Hash.combine(PGOHash::EndOfScope)64
; \
259
76
    return true;                                                               \
260
76
  }
261
262
  DEFINE_NESTABLE_TRAVERSAL(WhileStmt)
263
  DEFINE_NESTABLE_TRAVERSAL(DoStmt)
264
  DEFINE_NESTABLE_TRAVERSAL(ForStmt)
265
  DEFINE_NESTABLE_TRAVERSAL(CXXForRangeStmt)
266
  DEFINE_NESTABLE_TRAVERSAL(ObjCForCollectionStmt)
267
  DEFINE_NESTABLE_TRAVERSAL(CXXTryStmt)
268
  DEFINE_NESTABLE_TRAVERSAL(CXXCatchStmt)
269
270
  /// Get version \p HashVersion of the PGO hash for \p S.
271
22.5k
  PGOHash::HashType getHashType(PGOHashVersion HashVersion, const Stmt *S) {
272
22.5k
    switch (S->getStmtClass()) {
273
18.4k
    default:
274
18.4k
      break;
275
18.4k
    case Stmt::LabelStmtClass:
276
96
      return PGOHash::LabelStmt;
277
140
    case Stmt::WhileStmtClass:
278
140
      return PGOHash::WhileStmt;
279
64
    case Stmt::DoStmtClass:
280
64
      return PGOHash::DoStmt;
281
368
    case Stmt::ForStmtClass:
282
368
      return PGOHash::ForStmt;
283
26
    case Stmt::CXXForRangeStmtClass:
284
26
      return PGOHash::CXXForRangeStmt;
285
22
    case Stmt::ObjCForCollectionStmtClass:
286
22
      return PGOHash::ObjCForCollectionStmt;
287
94
    case Stmt::SwitchStmtClass:
288
94
      return PGOHash::SwitchStmt;
289
200
    case Stmt::CaseStmtClass:
290
200
      return PGOHash::CaseStmt;
291
58
    case Stmt::DefaultStmtClass:
292
58
      return PGOHash::DefaultStmt;
293
794
    case Stmt::IfStmtClass:
294
794
      return PGOHash::IfStmt;
295
48
    case Stmt::CXXTryStmtClass:
296
48
      return PGOHash::CXXTryStmt;
297
52
    case Stmt::CXXCatchStmtClass:
298
52
      return PGOHash::CXXCatchStmt;
299
38
    case Stmt::ConditionalOperatorClass:
300
38
      return PGOHash::ConditionalOperator;
301
12
    case Stmt::BinaryConditionalOperatorClass:
302
12
      return PGOHash::BinaryConditionalOperator;
303
2.09k
    case Stmt::BinaryOperatorClass: {
304
2.09k
      const BinaryOperator *BO = cast<BinaryOperator>(S);
305
2.09k
      if (BO->getOpcode() == BO_LAnd)
306
192
        return PGOHash::BinaryOperatorLAnd;
307
1.89k
      if (BO->getOpcode() == BO_LOr)
308
162
        return PGOHash::BinaryOperatorLOr;
309
1.73k
      if (HashVersion >= PGO_HASH_V2) {
310
811
        switch (BO->getOpcode()) {
311
456
        default:
312
456
          break;
313
456
        case BO_LT:
314
203
          return PGOHash::BinaryOperatorLT;
315
34
        case BO_GT:
316
34
          return PGOHash::BinaryOperatorGT;
317
12
        case BO_LE:
318
12
          return PGOHash::BinaryOperatorLE;
319
11
        case BO_GE:
320
11
          return PGOHash::BinaryOperatorGE;
321
83
        case BO_EQ:
322
83
          return PGOHash::BinaryOperatorEQ;
323
12
        case BO_NE:
324
12
          return PGOHash::BinaryOperatorNE;
325
811
        }
326
811
      }
327
1.38k
      break;
328
1.73k
    }
329
22.5k
    }
330
331
19.8k
    if (HashVersion >= PGO_HASH_V2) {
332
9.18k
      switch (S->getStmtClass()) {
333
8.44k
      default:
334
8.44k
        break;
335
8.44k
      case Stmt::GotoStmtClass:
336
37
        return PGOHash::GotoStmt;
337
2
      case Stmt::IndirectGotoStmtClass:
338
2
        return PGOHash::IndirectGotoStmt;
339
68
      case Stmt::BreakStmtClass:
340
68
        return PGOHash::BreakStmt;
341
19
      case Stmt::ContinueStmtClass:
342
19
        return PGOHash::ContinueStmt;
343
309
      case Stmt::ReturnStmtClass:
344
309
        return PGOHash::ReturnStmt;
345
17
      case Stmt::CXXThrowExprClass:
346
17
        return PGOHash::ThrowExpr;
347
292
      case Stmt::UnaryOperatorClass: {
348
292
        const UnaryOperator *UO = cast<UnaryOperator>(S);
349
292
        if (UO->getOpcode() == UO_LNot)
350
37
          return PGOHash::UnaryOperatorLNot;
351
255
        break;
352
292
      }
353
9.18k
      }
354
9.18k
    }
355
356
19.3k
    return PGOHash::None;
357
19.8k
  }
358
};
359
360
/// A StmtVisitor that propagates the raw counts through the AST and
361
/// records the count at statements where the value may change.
362
struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
363
  /// PGO state.
364
  CodeGenPGO &PGO;
365
366
  /// A flag that is set when the current count should be recorded on the
367
  /// next statement, such as at the exit of a loop.
368
  bool RecordNextStmtCount;
369
370
  /// The count at the current location in the traversal.
371
  uint64_t CurrentCount;
372
373
  /// The map of statements to count values.
374
  llvm::DenseMap<const Stmt *, uint64_t> &CountMap;
375
376
  /// BreakContinueStack - Keep counts of breaks and continues inside loops.
377
  struct BreakContinue {
378
    uint64_t BreakCount;
379
    uint64_t ContinueCount;
380
184
    BreakContinue() : BreakCount(0), ContinueCount(0) {}
381
  };
382
  SmallVector<BreakContinue, 8> BreakContinueStack;
383
384
  ComputeRegionCounts(llvm::DenseMap<const Stmt *, uint64_t> &CountMap,
385
                      CodeGenPGO &PGO)
386
187
      : PGO(PGO), RecordNextStmtCount(false), CountMap(CountMap) {}
387
388
4.70k
  void RecordStmtCount(const Stmt *S) {
389
4.70k
    if (RecordNextStmtCount) {
390
401
      CountMap[S] = CurrentCount;
391
401
      RecordNextStmtCount = false;
392
401
    }
393
4.70k
  }
394
395
  /// Set and return the current count.
396
1.49k
  uint64_t setCount(uint64_t Count) {
397
1.49k
    CurrentCount = Count;
398
1.49k
    return Count;
399
1.49k
  }
400
401
4.05k
  void VisitStmt(const Stmt *S) {
402
4.05k
    RecordStmtCount(S);
403
4.05k
    for (const Stmt *Child : S->children())
404
3.27k
      if (Child)
405
3.27k
        this->Visit(Child);
406
4.05k
  }
407
408
183
  void VisitFunctionDecl(const FunctionDecl *D) {
409
    // Counter tracks entry to the function body.
410
183
    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
411
183
    CountMap[D->getBody()] = BodyCount;
412
183
    Visit(D->getBody());
413
183
  }
414
415
  // Skip lambda expressions. We visit these as FunctionDecls when we're
416
  // generating them and aren't interested in the body when generating a
417
  // parent context.
418
1
  void VisitLambdaExpr(const LambdaExpr *LE) {}
419
420
2
  void VisitCapturedDecl(const CapturedDecl *D) {
421
    // Counter tracks entry to the capture body.
422
2
    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
423
2
    CountMap[D->getBody()] = BodyCount;
424
2
    Visit(D->getBody());
425
2
  }
426
427
1
  void VisitObjCMethodDecl(const ObjCMethodDecl *D) {
428
    // Counter tracks entry to the method body.
429
1
    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
430
1
    CountMap[D->getBody()] = BodyCount;
431
1
    Visit(D->getBody());
432
1
  }
433
434
1
  void VisitBlockDecl(const BlockDecl *D) {
435
    // Counter tracks entry to the block body.
436
1
    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
437
1
    CountMap[D->getBody()] = BodyCount;
438
1
    Visit(D->getBody());
439
1
  }
440
441
74
  void VisitReturnStmt(const ReturnStmt *S) {
442
74
    RecordStmtCount(S);
443
74
    if (S->getRetValue())
444
63
      Visit(S->getRetValue());
445
74
    CurrentCount = 0;
446
74
    RecordNextStmtCount = true;
447
74
  }
448
449
8
  void VisitCXXThrowExpr(const CXXThrowExpr *E) {
450
8
    RecordStmtCount(E);
451
8
    if (E->getSubExpr())
452
8
      Visit(E->getSubExpr());
453
8
    CurrentCount = 0;
454
8
    RecordNextStmtCount = true;
455
8
  }
456
457
34
  void VisitGotoStmt(const GotoStmt *S) {
458
34
    RecordStmtCount(S);
459
34
    CurrentCount = 0;
460
34
    RecordNextStmtCount = true;
461
34
  }
462
463
34
  void VisitLabelStmt(const LabelStmt *S) {
464
34
    RecordNextStmtCount = false;
465
    // Counter tracks the block following the label.
466
34
    uint64_t BlockCount = setCount(PGO.getRegionCount(S));
467
34
    CountMap[S] = BlockCount;
468
34
    Visit(S->getSubStmt());
469
34
  }
470
471
56
  void VisitBreakStmt(const BreakStmt *S) {
472
56
    RecordStmtCount(S);
473
56
    assert(!BreakContinueStack.empty() && "break not in a loop or switch!");
474
0
    BreakContinueStack.back().BreakCount += CurrentCount;
475
56
    CurrentCount = 0;
476
56
    RecordNextStmtCount = true;
477
56
  }
478
479
15
  void VisitContinueStmt(const ContinueStmt *S) {
480
15
    RecordStmtCount(S);
481
15
    assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");
482
0
    BreakContinueStack.back().ContinueCount += CurrentCount;
483
15
    CurrentCount = 0;
484
15
    RecordNextStmtCount = true;
485
15
  }
486
487
36
  void VisitWhileStmt(const WhileStmt *S) {
488
36
    RecordStmtCount(S);
489
36
    uint64_t ParentCount = CurrentCount;
490
491
36
    BreakContinueStack.push_back(BreakContinue());
492
    // Visit the body region first so the break/continue adjustments can be
493
    // included when visiting the condition.
494
36
    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
495
36
    CountMap[S->getBody()] = CurrentCount;
496
36
    Visit(S->getBody());
497
36
    uint64_t BackedgeCount = CurrentCount;
498
499
    // ...then go back and propagate counts through the condition. The count
500
    // at the start of the condition is the sum of the incoming edges,
501
    // the backedge from the end of the loop body, and the edges from
502
    // continue statements.
503
36
    BreakContinue BC = BreakContinueStack.pop_back_val();
504
36
    uint64_t CondCount =
505
36
        setCount(ParentCount + BackedgeCount + BC.ContinueCount);
506
36
    CountMap[S->getCond()] = CondCount;
507
36
    Visit(S->getCond());
508
36
    setCount(BC.BreakCount + CondCount - BodyCount);
509
36
    RecordNextStmtCount = true;
510
36
  }
511
512
22
  void VisitDoStmt(const DoStmt *S) {
513
22
    RecordStmtCount(S);
514
22
    uint64_t LoopCount = PGO.getRegionCount(S);
515
516
22
    BreakContinueStack.push_back(BreakContinue());
517
    // The count doesn't include the fallthrough from the parent scope. Add it.
518
22
    uint64_t BodyCount = setCount(LoopCount + CurrentCount);
519
22
    CountMap[S->getBody()] = BodyCount;
520
22
    Visit(S->getBody());
521
22
    uint64_t BackedgeCount = CurrentCount;
522
523
22
    BreakContinue BC = BreakContinueStack.pop_back_val();
524
    // The count at the start of the condition is equal to the count at the
525
    // end of the body, plus any continues.
526
22
    uint64_t CondCount = setCount(BackedgeCount + BC.ContinueCount);
527
22
    CountMap[S->getCond()] = CondCount;
528
22
    Visit(S->getCond());
529
22
    setCount(BC.BreakCount + CondCount - LoopCount);
530
22
    RecordNextStmtCount = true;
531
22
  }
532
533
85
  void VisitForStmt(const ForStmt *S) {
534
85
    RecordStmtCount(S);
535
85
    if (S->getInit())
536
81
      Visit(S->getInit());
537
538
85
    uint64_t ParentCount = CurrentCount;
539
540
85
    BreakContinueStack.push_back(BreakContinue());
541
    // Visit the body region first. (This is basically the same as a while
542
    // loop; see further comments in VisitWhileStmt.)
543
85
    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
544
85
    CountMap[S->getBody()] = BodyCount;
545
85
    Visit(S->getBody());
546
85
    uint64_t BackedgeCount = CurrentCount;
547
85
    BreakContinue BC = BreakContinueStack.pop_back_val();
548
549
    // The increment is essentially part of the body but it needs to include
550
    // the count for all the continue statements.
551
85
    if (S->getInc()) {
552
85
      uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
553
85
      CountMap[S->getInc()] = IncCount;
554
85
      Visit(S->getInc());
555
85
    }
556
557
    // ...then go back and propagate counts through the condition.
558
85
    uint64_t CondCount =
559
85
        setCount(ParentCount + BackedgeCount + BC.ContinueCount);
560
85
    if (S->getCond()) {
561
85
      CountMap[S->getCond()] = CondCount;
562
85
      Visit(S->getCond());
563
85
    }
564
85
    setCount(BC.BreakCount + CondCount - BodyCount);
565
85
    RecordNextStmtCount = true;
566
85
  }
567
568
9
  void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
569
9
    RecordStmtCount(S);
570
9
    if (S->getInit())
571
0
      Visit(S->getInit());
572
9
    Visit(S->getLoopVarStmt());
573
9
    Visit(S->getRangeStmt());
574
9
    Visit(S->getBeginStmt());
575
9
    Visit(S->getEndStmt());
576
577
9
    uint64_t ParentCount = CurrentCount;
578
9
    BreakContinueStack.push_back(BreakContinue());
579
    // Visit the body region first. (This is basically the same as a while
580
    // loop; see further comments in VisitWhileStmt.)
581
9
    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
582
9
    CountMap[S->getBody()] = BodyCount;
583
9
    Visit(S->getBody());
584
9
    uint64_t BackedgeCount = CurrentCount;
585
9
    BreakContinue BC = BreakContinueStack.pop_back_val();
586
587
    // The increment is essentially part of the body but it needs to include
588
    // the count for all the continue statements.
589
9
    uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
590
9
    CountMap[S->getInc()] = IncCount;
591
9
    Visit(S->getInc());
592
593
    // ...then go back and propagate counts through the condition.
594
9
    uint64_t CondCount =
595
9
        setCount(ParentCount + BackedgeCount + BC.ContinueCount);
596
9
    CountMap[S->getCond()] = CondCount;
597
9
    Visit(S->getCond());
598
9
    setCount(BC.BreakCount + CondCount - BodyCount);
599
9
    RecordNextStmtCount = true;
600
9
  }
601
602
5
  void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
603
5
    RecordStmtCount(S);
604
5
    Visit(S->getElement());
605
5
    uint64_t ParentCount = CurrentCount;
606
5
    BreakContinueStack.push_back(BreakContinue());
607
    // Counter tracks the body of the loop.
608
5
    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
609
5
    CountMap[S->getBody()] = BodyCount;
610
5
    Visit(S->getBody());
611
5
    uint64_t BackedgeCount = CurrentCount;
612
5
    BreakContinue BC = BreakContinueStack.pop_back_val();
613
614
5
    setCount(BC.BreakCount + ParentCount + BackedgeCount + BC.ContinueCount -
615
5
             BodyCount);
616
5
    RecordNextStmtCount = true;
617
5
  }
618
619
27
  void VisitSwitchStmt(const SwitchStmt *S) {
620
27
    RecordStmtCount(S);
621
27
    if (S->getInit())
622
0
      Visit(S->getInit());
623
27
    Visit(S->getCond());
624
27
    CurrentCount = 0;
625
27
    BreakContinueStack.push_back(BreakContinue());
626
27
    Visit(S->getBody());
627
    // If the switch is inside a loop, add the continue counts.
628
27
    BreakContinue BC = BreakContinueStack.pop_back_val();
629
27
    if (!BreakContinueStack.empty())
630
21
      BreakContinueStack.back().ContinueCount += BC.ContinueCount;
631
    // Counter tracks the exit block of the switch.
632
27
    setCount(PGO.getRegionCount(S));
633
27
    RecordNextStmtCount = true;
634
27
  }
635
636
96
  void VisitSwitchCase(const SwitchCase *S) {
637
96
    RecordNextStmtCount = false;
638
    // Counter for this particular case. This counts only jumps from the
639
    // switch header and does not include fallthrough from the case before
640
    // this one.
641
96
    uint64_t CaseCount = PGO.getRegionCount(S);
642
96
    setCount(CurrentCount + CaseCount);
643
    // We need the count without fallthrough in the mapping, so it's more useful
644
    // for branch probabilities.
645
96
    CountMap[S] = CaseCount;
646
96
    RecordNextStmtCount = true;
647
96
    Visit(S->getSubStmt());
648
96
  }
649
650
205
  void VisitIfStmt(const IfStmt *S) {
651
205
    RecordStmtCount(S);
652
653
205
    if (S->isConsteval()) {
654
0
      const Stmt *Stm = S->isNegatedConsteval() ? S->getThen() : S->getElse();
655
0
      if (Stm)
656
0
        Visit(Stm);
657
0
      return;
658
0
    }
659
660
205
    uint64_t ParentCount = CurrentCount;
661
205
    if (S->getInit())
662
0
      Visit(S->getInit());
663
205
    Visit(S->getCond());
664
665
    // Counter tracks the "then" part of an if statement. The count for
666
    // the "else" part, if it exists, will be calculated from this counter.
667
205
    uint64_t ThenCount = setCount(PGO.getRegionCount(S));
668
205
    CountMap[S->getThen()] = ThenCount;
669
205
    Visit(S->getThen());
670
205
    uint64_t OutCount = CurrentCount;
671
672
205
    uint64_t ElseCount = ParentCount - ThenCount;
673
205
    if (S->getElse()) {
674
32
      setCount(ElseCount);
675
32
      CountMap[S->getElse()] = ElseCount;
676
32
      Visit(S->getElse());
677
32
      OutCount += CurrentCount;
678
32
    } else
679
173
      OutCount += ElseCount;
680
205
    setCount(OutCount);
681
205
    RecordNextStmtCount = true;
682
205
  }
683
684
12
  void VisitCXXTryStmt(const CXXTryStmt *S) {
685
12
    RecordStmtCount(S);
686
12
    Visit(S->getTryBlock());
687
24
    for (unsigned I = 0, E = S->getNumHandlers(); I < E; 
++I12
)
688
12
      Visit(S->getHandler(I));
689
    // Counter tracks the continuation block of the try statement.
690
12
    setCount(PGO.getRegionCount(S));
691
12
    RecordNextStmtCount = true;
692
12
  }
693
694
12
  void VisitCXXCatchStmt(const CXXCatchStmt *S) {
695
12
    RecordNextStmtCount = false;
696
    // Counter tracks the catch statement's handler block.
697
12
    uint64_t CatchCount = setCount(PGO.getRegionCount(S));
698
12
    CountMap[S] = CatchCount;
699
12
    Visit(S->getHandlerBlock());
700
12
  }
701
702
9
  void VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
703
9
    RecordStmtCount(E);
704
9
    uint64_t ParentCount = CurrentCount;
705
9
    Visit(E->getCond());
706
707
    // Counter tracks the "true" part of a conditional operator. The
708
    // count in the "false" part will be calculated from this counter.
709
9
    uint64_t TrueCount = setCount(PGO.getRegionCount(E));
710
9
    CountMap[E->getTrueExpr()] = TrueCount;
711
9
    Visit(E->getTrueExpr());
712
9
    uint64_t OutCount = CurrentCount;
713
714
9
    uint64_t FalseCount = setCount(ParentCount - TrueCount);
715
9
    CountMap[E->getFalseExpr()] = FalseCount;
716
9
    Visit(E->getFalseExpr());
717
9
    OutCount += CurrentCount;
718
719
9
    setCount(OutCount);
720
9
    RecordNextStmtCount = true;
721
9
  }
722
723
25
  void VisitBinLAnd(const BinaryOperator *E) {
724
25
    RecordStmtCount(E);
725
25
    uint64_t ParentCount = CurrentCount;
726
25
    Visit(E->getLHS());
727
    // Counter tracks the right hand side of a logical and operator.
728
25
    uint64_t RHSCount = setCount(PGO.getRegionCount(E));
729
25
    CountMap[E->getRHS()] = RHSCount;
730
25
    Visit(E->getRHS());
731
25
    setCount(ParentCount + RHSCount - CurrentCount);
732
25
    RecordNextStmtCount = true;
733
25
  }
734
735
24
  void VisitBinLOr(const BinaryOperator *E) {
736
24
    RecordStmtCount(E);
737
24
    uint64_t ParentCount = CurrentCount;
738
24
    Visit(E->getLHS());
739
    // Counter tracks the right hand side of a logical or operator.
740
24
    uint64_t RHSCount = setCount(PGO.getRegionCount(E));
741
24
    CountMap[E->getRHS()] = RHSCount;
742
24
    Visit(E->getRHS());
743
24
    setCount(ParentCount + RHSCount - CurrentCount);
744
24
    RecordNextStmtCount = true;
745
24
  }
746
};
747
} // end anonymous namespace
748
749
3.25k
void PGOHash::combine(HashType Type) {
750
  // Check that we never combine 0 and only have six bits.
751
3.25k
  assert(Type && "Hash is invalid: unexpected type 0");
752
0
  assert(unsigned(Type) < TooBig && "Hash is invalid: too many types");
753
754
  // Pass through MD5 if enough work has built up.
755
3.25k
  if (Count && 
Count % NumTypesPerWord == 02.79k
) {
756
132
    using namespace llvm::support;
757
132
    uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
758
132
    MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
759
132
    Working = 0;
760
132
  }
761
762
  // Accumulate the current type.
763
3.25k
  ++Count;
764
3.25k
  Working = Working << NumBitsPerType | Type;
765
3.25k
}
766
767
579
uint64_t PGOHash::finalize() {
768
  // Use Working as the hash directly if we never used MD5.
769
579
  if (Count <= NumTypesPerWord)
770
    // No need to byte swap here, since none of the math was endian-dependent.
771
    // This number will be byte-swapped as required on endianness transitions,
772
    // so we will see the same value on the other side.
773
501
    return Working;
774
775
  // Check for remaining work in Working.
776
78
  if (Working) {
777
    // Keep the buggy behavior from v1 and v2 for backward-compatibility. This
778
    // is buggy because it converts a uint64_t into an array of uint8_t.
779
78
    if (HashVersion < PGO_HASH_V3) {
780
13
      MD5.update({(uint8_t)Working});
781
65
    } else {
782
65
      using namespace llvm::support;
783
65
      uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
784
65
      MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
785
65
    }
786
78
  }
787
788
  // Finalize the MD5 and return the hash.
789
78
  llvm::MD5::MD5Result Result;
790
78
  MD5.final(Result);
791
78
  return Result.low();
792
579
}
793
794
298k
void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) {
795
298k
  const Decl *D = GD.getDecl();
796
298k
  if (!D->hasBody())
797
130
    return;
798
799
  // Skip CUDA/HIP kernel launch stub functions.
800
298k
  if (CGM.getLangOpts().CUDA && 
!CGM.getLangOpts().CUDAIsDevice614
&&
801
298k
      
D->hasAttr<CUDAGlobalAttr>()317
)
802
69
    return;
803
804
298k
  bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr();
805
298k
  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
806
298k
  if (!InstrumentRegions && 
!PGOReader297k
)
807
297k
    return;
808
641
  if (D->isImplicit())
809
17
    return;
810
  // Constructors and destructors may be represented by several functions in IR.
811
  // If so, instrument only base variant, others are implemented by delegation
812
  // to the base one, it would be counted twice otherwise.
813
624
  if (CGM.getTarget().getCXXABI().hasConstructorVariants()) {
814
607
    if (const auto *CCD = dyn_cast<CXXConstructorDecl>(D))
815
53
      if (GD.getCtorType() != Ctor_Base &&
816
53
          
CodeGenFunction::IsConstructorDelegationValid(CCD)26
)
817
21
        return;
818
607
  }
819
603
  if (isa<CXXDestructorDecl>(D) && 
GD.getDtorType() != Dtor_Base39
)
820
20
    return;
821
822
583
  CGM.ClearUnusedCoverageMapping(D);
823
583
  if (Fn->hasFnAttribute(llvm::Attribute::NoProfile))
824
4
    return;
825
826
579
  setFuncName(Fn);
827
828
579
  mapRegionCounters(D);
829
579
  if (CGM.getCodeGenOpts().CoverageMapping)
830
231
    emitCounterRegionMapping(D);
831
579
  if (PGOReader) {
832
187
    SourceManager &SM = CGM.getContext().getSourceManager();
833
187
    loadRegionCounts(PGOReader, SM.isInMainFile(D->getLocation()));
834
187
    computeRegionCounts(D);
835
187
    applyFunctionAttributes(PGOReader, Fn);
836
187
  }
837
579
}
838
839
579
void CodeGenPGO::mapRegionCounters(const Decl *D) {
840
  // Use the latest hash version when inserting instrumentation, but use the
841
  // version in the indexed profile if we're reading PGO data.
842
579
  PGOHashVersion HashVersion = PGO_HASH_LATEST;
843
579
  uint64_t ProfileVersion = llvm::IndexedInstrProf::Version;
844
579
  if (auto *PGOReader = CGM.getPGOReader()) {
845
187
    HashVersion = getPGOHashVersion(PGOReader, CGM);
846
187
    ProfileVersion = PGOReader->getVersion();
847
187
  }
848
849
579
  RegionCounterMap.reset(new llvm::DenseMap<const Stmt *, unsigned>);
850
579
  MapRegionCounters Walker(HashVersion, ProfileVersion, *RegionCounterMap);
851
579
  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
852
568
    Walker.TraverseDecl(const_cast<FunctionDecl *>(FD));
853
11
  else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
854
3
    Walker.TraverseDecl(const_cast<ObjCMethodDecl *>(MD));
855
8
  else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
856
2
    Walker.TraverseDecl(const_cast<BlockDecl *>(BD));
857
6
  else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
858
6
    Walker.TraverseDecl(const_cast<CapturedDecl *>(CD));
859
579
  assert(Walker.NextCounter > 0 && "no entry counter mapped for decl");
860
0
  NumRegionCounters = Walker.NextCounter;
861
579
  FunctionHash = Walker.Hash.finalize();
862
579
}
863
864
257
bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) {
865
257
  if (!D->getBody())
866
0
    return true;
867
868
  // Skip host-only functions in the CUDA device compilation and device-only
869
  // functions in the host compilation. Just roughly filter them out based on
870
  // the function attributes. If there are effectively host-only or device-only
871
  // ones, their coverage mapping may still be generated.
872
257
  if (CGM.getLangOpts().CUDA &&
873
257
      
(8
(8
CGM.getLangOpts().CUDAIsDevice8
&&
!D->hasAttr<CUDADeviceAttr>()0
&&
874
8
        
!D->hasAttr<CUDAGlobalAttr>()0
) ||
875
8
       (!CGM.getLangOpts().CUDAIsDevice &&
876
8
        (D->hasAttr<CUDAGlobalAttr>() ||
877
8
         
(6
!D->hasAttr<CUDAHostAttr>()6
&&
D->hasAttr<CUDADeviceAttr>()4
)))))
878
4
    return true;
879
880
  // Don't map the functions in system headers.
881
253
  const auto &SM = CGM.getContext().getSourceManager();
882
253
  auto Loc = D->getBody()->getBeginLoc();
883
253
  return SM.isInSystemHeader(Loc);
884
257
}
885
886
231
void CodeGenPGO::emitCounterRegionMapping(const Decl *D) {
887
231
  if (skipRegionMappingForDecl(D))
888
0
    return;
889
890
231
  std::string CoverageMapping;
891
231
  llvm::raw_string_ostream OS(CoverageMapping);
892
231
  CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
893
231
                                CGM.getContext().getSourceManager(),
894
231
                                CGM.getLangOpts(), RegionCounterMap.get());
895
231
  MappingGen.emitCounterMapping(D, OS);
896
231
  OS.flush();
897
898
231
  if (CoverageMapping.empty())
899
1
    return;
900
901
230
  CGM.getCoverageMapping()->addFunctionMappingRecord(
902
230
      FuncNameVar, FuncName, FunctionHash, CoverageMapping);
903
230
}
904
905
void
906
CodeGenPGO::emitEmptyCounterMapping(const Decl *D, StringRef Name,
907
26
                                    llvm::GlobalValue::LinkageTypes Linkage) {
908
26
  if (skipRegionMappingForDecl(D))
909
5
    return;
910
911
21
  std::string CoverageMapping;
912
21
  llvm::raw_string_ostream OS(CoverageMapping);
913
21
  CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
914
21
                                CGM.getContext().getSourceManager(),
915
21
                                CGM.getLangOpts());
916
21
  MappingGen.emitEmptyMapping(D, OS);
917
21
  OS.flush();
918
919
21
  if (CoverageMapping.empty())
920
0
    return;
921
922
21
  setFuncName(Name, Linkage);
923
21
  CGM.getCoverageMapping()->addFunctionMappingRecord(
924
21
      FuncNameVar, FuncName, FunctionHash, CoverageMapping, false);
925
21
}
926
927
187
void CodeGenPGO::computeRegionCounts(const Decl *D) {
928
187
  StmtCountMap.reset(new llvm::DenseMap<const Stmt *, uint64_t>);
929
187
  ComputeRegionCounts Walker(*StmtCountMap, *this);
930
187
  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
931
183
    Walker.VisitFunctionDecl(FD);
932
4
  else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
933
1
    Walker.VisitObjCMethodDecl(MD);
934
3
  else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
935
1
    Walker.VisitBlockDecl(BD);
936
2
  else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
937
2
    Walker.VisitCapturedDecl(const_cast<CapturedDecl *>(CD));
938
187
}
939
940
void
941
CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
942
187
                                    llvm::Function *Fn) {
943
187
  if (!haveRegionCounts())
944
15
    return;
945
946
172
  uint64_t FunctionCount = getRegionCount(nullptr);
947
172
  Fn->setEntryCount(FunctionCount);
948
172
}
949
950
void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S,
951
1.12k
                                      llvm::Value *StepV) {
952
1.12k
  if (!CGM.getCodeGenOpts().hasProfileClangInstr() || !RegionCounterMap)
953
28
    return;
954
1.10k
  if (!Builder.GetInsertBlock())
955
5
    return;
956
957
1.09k
  unsigned Counter = (*RegionCounterMap)[S];
958
1.09k
  auto *I8PtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
959
960
1.09k
  llvm::Value *Args[] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
961
1.09k
                         Builder.getInt64(FunctionHash),
962
1.09k
                         Builder.getInt32(NumRegionCounters),
963
1.09k
                         Builder.getInt32(Counter), StepV};
964
1.09k
  if (!StepV)
965
1.09k
    Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment),
966
1.09k
                       makeArrayRef(Args, 4));
967
1
  else
968
1
    Builder.CreateCall(
969
1
        CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment_step),
970
1
        makeArrayRef(Args));
971
1.09k
}
972
973
35.1k
void CodeGenPGO::setValueProfilingFlag(llvm::Module &M) {
974
35.1k
  if (CGM.getCodeGenOpts().hasProfileClangInstr())
975
121
    M.addModuleFlag(llvm::Module::Warning, "EnableValueProfiling",
976
121
                    uint32_t(EnableValueProfiling));
977
35.1k
}
978
979
// This method either inserts a call to the profile run-time during
980
// instrumentation or puts profile data into metadata for PGO use.
981
void CodeGenPGO::valueProfile(CGBuilderTy &Builder, uint32_t ValueKind,
982
21.7k
    llvm::Instruction *ValueSite, llvm::Value *ValuePtr) {
983
984
21.7k
  if (!EnableValueProfiling)
985
21.7k
    return;
986
987
4
  if (!ValuePtr || !ValueSite || !Builder.GetInsertBlock())
988
0
    return;
989
990
4
  if (isa<llvm::Constant>(ValuePtr))
991
1
    return;
992
993
3
  bool InstrumentValueSites = CGM.getCodeGenOpts().hasProfileClangInstr();
994
3
  if (InstrumentValueSites && RegionCounterMap) {
995
3
    auto BuilderInsertPoint = Builder.saveIP();
996
3
    Builder.SetInsertPoint(ValueSite);
997
3
    llvm::Value *Args[5] = {
998
3
        llvm::ConstantExpr::getBitCast(FuncNameVar, Builder.getInt8PtrTy()),
999
3
        Builder.getInt64(FunctionHash),
1000
3
        Builder.CreatePtrToInt(ValuePtr, Builder.getInt64Ty()),
1001
3
        Builder.getInt32(ValueKind),
1002
3
        Builder.getInt32(NumValueSites[ValueKind]++)
1003
3
    };
1004
3
    Builder.CreateCall(
1005
3
        CGM.getIntrinsic(llvm::Intrinsic::instrprof_value_profile), Args);
1006
3
    Builder.restoreIP(BuilderInsertPoint);
1007
3
    return;
1008
3
  }
1009
1010
0
  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
1011
0
  if (PGOReader && haveRegionCounts()) {
1012
    // We record the top most called three functions at each call site.
1013
    // Profile metadata contains "VP" string identifying this metadata
1014
    // as value profiling data, then a uint32_t value for the value profiling
1015
    // kind, a uint64_t value for the total number of times the call is
1016
    // executed, followed by the function hash and execution count (uint64_t)
1017
    // pairs for each function.
1018
0
    if (NumValueSites[ValueKind] >= ProfRecord->getNumValueSites(ValueKind))
1019
0
      return;
1020
1021
0
    llvm::annotateValueSite(CGM.getModule(), *ValueSite, *ProfRecord,
1022
0
                            (llvm::InstrProfValueKind)ValueKind,
1023
0
                            NumValueSites[ValueKind]);
1024
1025
0
    NumValueSites[ValueKind]++;
1026
0
  }
1027
0
}
1028
1029
void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
1030
187
                                  bool IsInMainFile) {
1031
187
  CGM.getPGOStats().addVisited(IsInMainFile);
1032
187
  RegionCounts.clear();
1033
187
  llvm::Expected<llvm::InstrProfRecord> RecordExpected =
1034
187
      PGOReader->getInstrProfRecord(FuncName, FunctionHash);
1035
187
  if (auto E = RecordExpected.takeError()) {
1036
15
    auto IPE = llvm::InstrProfError::take(std::move(E));
1037
15
    if (IPE == llvm::instrprof_error::unknown_function)
1038
6
      CGM.getPGOStats().addMissing(IsInMainFile);
1039
9
    else if (IPE == llvm::instrprof_error::hash_mismatch)
1040
9
      CGM.getPGOStats().addMismatched(IsInMainFile);
1041
0
    else if (IPE == llvm::instrprof_error::malformed)
1042
      // TODO: Consider a more specific warning for this case.
1043
0
      CGM.getPGOStats().addMismatched(IsInMainFile);
1044
15
    return;
1045
15
  }
1046
172
  ProfRecord =
1047
172
      std::make_unique<llvm::InstrProfRecord>(std::move(RecordExpected.get()));
1048
172
  RegionCounts = ProfRecord->Counts;
1049
172
}
1050
1051
/// Calculate what to divide by to scale weights.
1052
///
1053
/// Given the maximum weight, calculate a divisor that will scale all the
1054
/// weights to strictly less than UINT32_MAX.
1055
363
static uint64_t calculateWeightScale(uint64_t MaxWeight) {
1056
363
  return MaxWeight < UINT32_MAX ? 
1360
:
MaxWeight / UINT32_MAX + 13
;
1057
363
}
1058
1059
/// Scale an individual branch weight (and add 1).
1060
///
1061
/// Scale a 64-bit weight down to 32-bits using \c Scale.
1062
///
1063
/// According to Laplace's Rule of Succession, it is better to compute the
1064
/// weight based on the count plus 1, so universally add 1 to the value.
1065
///
1066
/// \pre \c Scale was calculated by \a calculateWeightScale() with a weight no
1067
/// greater than \c Weight.
1068
815
static uint32_t scaleBranchWeight(uint64_t Weight, uint64_t Scale) {
1069
815
  assert(Scale && "scale by 0?");
1070
0
  uint64_t Scaled = Weight / Scale + 1;
1071
815
  assert(Scaled <= UINT32_MAX && "overflow 32-bits");
1072
0
  return Scaled;
1073
815
}
1074
1075
llvm::MDNode *CodeGenFunction::createProfileWeights(uint64_t TrueCount,
1076
152k
                                                    uint64_t FalseCount) const {
1077
  // Check for empty weights.
1078
152k
  if (!TrueCount && 
!FalseCount151k
)
1079
151k
    return nullptr;
1080
1081
  // Calculate how to scale down to 32-bits.
1082
327
  uint64_t Scale = calculateWeightScale(std::max(TrueCount, FalseCount));
1083
1084
327
  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
1085
327
  return MDHelper.createBranchWeights(scaleBranchWeight(TrueCount, Scale),
1086
327
                                      scaleBranchWeight(FalseCount, Scale));
1087
152k
}
1088
1089
llvm::MDNode *
1090
38
CodeGenFunction::createProfileWeights(ArrayRef<uint64_t> Weights) const {
1091
  // We need at least two elements to create meaningful weights.
1092
38
  if (Weights.size() < 2)
1093
0
    return nullptr;
1094
1095
  // Check for empty weights.
1096
38
  uint64_t MaxWeight = *std::max_element(Weights.begin(), Weights.end());
1097
38
  if (MaxWeight == 0)
1098
2
    return nullptr;
1099
1100
  // Calculate how to scale down to 32-bits.
1101
36
  uint64_t Scale = calculateWeightScale(MaxWeight);
1102
1103
36
  SmallVector<uint32_t, 16> ScaledWeights;
1104
36
  ScaledWeights.reserve(Weights.size());
1105
36
  for (uint64_t W : Weights)
1106
161
    ScaledWeights.push_back(scaleBranchWeight(W, Scale));
1107
1108
36
  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
1109
36
  return MDHelper.createBranchWeights(ScaledWeights);
1110
38
}
1111
1112
llvm::MDNode *
1113
CodeGenFunction::createProfileWeightsForLoop(const Stmt *Cond,
1114
18.6k
                                             uint64_t LoopCount) const {
1115
18.6k
  if (!PGO.haveRegionCounts())
1116
18.5k
    return nullptr;
1117
141
  Optional<uint64_t> CondCount = PGO.getStmtCount(Cond);
1118
141
  if (!CondCount || *CondCount == 0)
1119
34
    return nullptr;
1120
107
  return createProfileWeights(LoopCount,
1121
107
                              std::max(*CondCount, LoopCount) - LoopCount);
1122
141
}