Coverage Report

Created: 2022-01-18 06:27

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/CodeGen/CodeGenPGO.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Instrumentation-based profile-guided optimization
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "CodeGenPGO.h"
14
#include "CodeGenFunction.h"
15
#include "CoverageMappingGen.h"
16
#include "clang/AST/RecursiveASTVisitor.h"
17
#include "clang/AST/StmtVisitor.h"
18
#include "llvm/IR/Intrinsics.h"
19
#include "llvm/IR/MDBuilder.h"
20
#include "llvm/Support/CommandLine.h"
21
#include "llvm/Support/Endian.h"
22
#include "llvm/Support/FileSystem.h"
23
#include "llvm/Support/MD5.h"
24
25
static llvm::cl::opt<bool>
26
    EnableValueProfiling("enable-value-profiling", llvm::cl::ZeroOrMore,
27
                         llvm::cl::desc("Enable value profiling"),
28
                         llvm::cl::Hidden, llvm::cl::init(false));
29
30
using namespace clang;
31
using namespace CodeGen;
32
33
void CodeGenPGO::setFuncName(StringRef Name,
34
600
                             llvm::GlobalValue::LinkageTypes Linkage) {
35
600
  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
36
600
  FuncName = llvm::getPGOFuncName(
37
600
      Name, Linkage, CGM.getCodeGenOpts().MainFileName,
38
600
      PGOReader ? 
PGOReader->getVersion()190
:
llvm::IndexedInstrProf::Version410
);
39
40
  // If we're generating a profile, create a variable for the name.
41
600
  if (CGM.getCodeGenOpts().hasProfileClangInstr())
42
410
    FuncNameVar = llvm::createPGOFuncNameVar(CGM.getModule(), Linkage, FuncName);
43
600
}
44
45
580
void CodeGenPGO::setFuncName(llvm::Function *Fn) {
46
580
  setFuncName(Fn->getName(), Fn->getLinkage());
47
  // Create PGOFuncName meta data.
48
580
  llvm::createPGOFuncNameMetadata(*Fn, FuncName);
49
580
}
50
51
/// The version of the PGO hash algorithm.
52
enum PGOHashVersion : unsigned {
53
  PGO_HASH_V1,
54
  PGO_HASH_V2,
55
  PGO_HASH_V3,
56
57
  // Keep this set to the latest hash version.
58
  PGO_HASH_LATEST = PGO_HASH_V3
59
};
60
61
namespace {
62
/// Stable hasher for PGO region counters.
63
///
64
/// PGOHash produces a stable hash of a given function's control flow.
65
///
66
/// Changing the output of this hash will invalidate all previously generated
67
/// profiles -- i.e., don't do it.
68
///
69
/// \note  When this hash does eventually change (years?), we still need to
70
/// support old hashes.  We'll need to pull in the version number from the
71
/// profile data format and use the matching hash function.
72
class PGOHash {
73
  uint64_t Working;
74
  unsigned Count;
75
  PGOHashVersion HashVersion;
76
  llvm::MD5 MD5;
77
78
  static const int NumBitsPerType = 6;
79
  static const unsigned NumTypesPerWord = sizeof(uint64_t) * 8 / NumBitsPerType;
80
  static const unsigned TooBig = 1u << NumBitsPerType;
81
82
public:
83
  /// Hash values for AST nodes.
84
  ///
85
  /// Distinct values for AST nodes that have region counters attached.
86
  ///
87
  /// These values must be stable.  All new members must be added at the end,
88
  /// and no members should be removed.  Changing the enumeration value for an
89
  /// AST node will affect the hash of every function that contains that node.
90
  enum HashType : unsigned char {
91
    None = 0,
92
    LabelStmt = 1,
93
    WhileStmt,
94
    DoStmt,
95
    ForStmt,
96
    CXXForRangeStmt,
97
    ObjCForCollectionStmt,
98
    SwitchStmt,
99
    CaseStmt,
100
    DefaultStmt,
101
    IfStmt,
102
    CXXTryStmt,
103
    CXXCatchStmt,
104
    ConditionalOperator,
105
    BinaryOperatorLAnd,
106
    BinaryOperatorLOr,
107
    BinaryConditionalOperator,
108
    // The preceding values are available with PGO_HASH_V1.
109
110
    EndOfScope,
111
    IfThenBranch,
112
    IfElseBranch,
113
    GotoStmt,
114
    IndirectGotoStmt,
115
    BreakStmt,
116
    ContinueStmt,
117
    ReturnStmt,
118
    ThrowExpr,
119
    UnaryOperatorLNot,
120
    BinaryOperatorLT,
121
    BinaryOperatorGT,
122
    BinaryOperatorLE,
123
    BinaryOperatorGE,
124
    BinaryOperatorEQ,
125
    BinaryOperatorNE,
126
    // The preceding values are available since PGO_HASH_V2.
127
128
    // Keep this last.  It's for the static assert that follows.
129
    LastHashType
130
  };
131
  static_assert(LastHashType <= TooBig, "Too many types in HashType");
132
133
  PGOHash(PGOHashVersion HashVersion)
134
580
      : Working(0), Count(0), HashVersion(HashVersion) {}
135
  void combine(HashType Type);
136
  uint64_t finalize();
137
22.1k
  PGOHashVersion getHashVersion() const { return HashVersion; }
138
};
139
const int PGOHash::NumBitsPerType;
140
const unsigned PGOHash::NumTypesPerWord;
141
const unsigned PGOHash::TooBig;
142
143
/// Get the PGO hash version used in the given indexed profile.
144
static PGOHashVersion getPGOHashVersion(llvm::IndexedInstrProfReader *PGOReader,
145
190
                                        CodeGenModule &CGM) {
146
190
  if (PGOReader->getVersion() <= 4)
147
24
    return PGO_HASH_V1;
148
166
  if (PGOReader->getVersion() <= 5)
149
37
    return PGO_HASH_V2;
150
129
  return PGO_HASH_V3;
151
166
}
152
153
/// A RecursiveASTVisitor that fills a map of statements to PGO counters.
154
struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
155
  using Base = RecursiveASTVisitor<MapRegionCounters>;
156
157
  /// The next counter value to assign.
158
  unsigned NextCounter;
159
  /// The function hash.
160
  PGOHash Hash;
161
  /// The map of statements to counters.
162
  llvm::DenseMap<const Stmt *, unsigned> &CounterMap;
163
  /// The profile version.
164
  uint64_t ProfileVersion;
165
166
  MapRegionCounters(PGOHashVersion HashVersion, uint64_t ProfileVersion,
167
                    llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
168
      : NextCounter(0), Hash(HashVersion), CounterMap(CounterMap),
169
580
        ProfileVersion(ProfileVersion) {}
170
171
  // Blocks and lambdas are handled as separate functions, so we need not
172
  // traverse them in the parent context.
173
2
  bool TraverseBlockExpr(BlockExpr *BE) { return true; }
174
4
  bool TraverseLambdaExpr(LambdaExpr *LE) {
175
    // Traverse the captures, but not the body.
176
4
    for (auto C : zip(LE->captures(), LE->capture_inits()))
177
2
      TraverseLambdaCapture(LE, &std::get<0>(C), std::get<1>(C));
178
4
    return true;
179
4
  }
180
6
  bool TraverseCapturedStmt(CapturedStmt *CS) { return true; }
181
182
1.35k
  bool VisitDecl(const Decl *D) {
183
1.35k
    switch (D->getKind()) {
184
770
    default:
185
770
      break;
186
770
    case Decl::Function:
187
516
    case Decl::CXXMethod:
188
548
    case Decl::CXXConstructor:
189
567
    case Decl::CXXDestructor:
190
571
    case Decl::CXXConversion:
191
574
    case Decl::ObjCMethod:
192
576
    case Decl::Block:
193
582
    case Decl::Captured:
194
582
      CounterMap[D->getBody()] = NextCounter++;
195
582
      break;
196
1.35k
    }
197
1.35k
    return true;
198
1.35k
  }
199
200
  /// If \p S gets a fresh counter, update the counter mappings. Return the
201
  /// V1 hash of \p S.
202
11.3k
  PGOHash::HashType updateCounterMappings(Stmt *S) {
203
11.3k
    auto Type = getHashType(PGO_HASH_V1, S);
204
11.3k
    if (Type != PGOHash::None)
205
1.24k
      CounterMap[S] = NextCounter++;
206
11.3k
    return Type;
207
11.3k
  }
208
209
  /// The RHS of all logical operators gets a fresh counter in order to count
210
  /// how many times the RHS evaluates to true or false, depending on the
211
  /// semantics of the operator. This is only valid for ">= v7" of the profile
212
  /// version so that we facilitate backward compatibility.
213
1.09k
  bool VisitBinaryOperator(BinaryOperator *S) {
214
1.09k
    if (ProfileVersion >= llvm::IndexedInstrProf::Version7)
215
861
      if (S->isLogicalOp() &&
216
861
          
CodeGenFunction::isInstrumentedCondition(S->getRHS())153
)
217
131
        CounterMap[S->getRHS()] = NextCounter++;
218
1.09k
    return Base::VisitBinaryOperator(S);
219
1.09k
  }
220
221
  /// Include \p S in the function hash.
222
11.3k
  bool VisitStmt(Stmt *S) {
223
11.3k
    auto Type = updateCounterMappings(S);
224
11.3k
    if (Hash.getHashVersion() != PGO_HASH_V1)
225
10.0k
      Type = getHashType(Hash.getHashVersion(), S);
226
11.3k
    if (Type != PGOHash::None)
227
2.04k
      Hash.combine(Type);
228
11.3k
    return true;
229
11.3k
  }
230
231
420
  bool TraverseIfStmt(IfStmt *If) {
232
    // If we used the V1 hash, use the default traversal.
233
420
    if (Hash.getHashVersion() == PGO_HASH_V1)
234
68
      return Base::TraverseIfStmt(If);
235
236
    // Otherwise, keep track of which branch we're in while traversing.
237
352
    VisitStmt(If);
238
764
    for (Stmt *CS : If->children()) {
239
764
      if (!CS)
240
0
        continue;
241
764
      if (CS == If->getThen())
242
352
        Hash.combine(PGOHash::IfThenBranch);
243
412
      else if (CS == If->getElse())
244
58
        Hash.combine(PGOHash::IfElseBranch);
245
764
      TraverseStmt(CS);
246
764
    }
247
352
    Hash.combine(PGOHash::EndOfScope);
248
352
    return true;
249
420
  }
250
251
// If the statement type \p N is nestable, and its nesting impacts profile
252
// stability, define a custom traversal which tracks the end of the statement
253
// in the hash (provided we're not using the V1 hash).
254
#define DEFINE_NESTABLE_TRAVERSAL(N)                                           \
255
379
  bool Traverse##N(N *S) {                                                     \
256
379
    Base::Traverse##N(S);                                                      \
257
379
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
379
      
Hash.combine(PGOHash::EndOfScope)339
; \
259
379
    return true;                                                               \
260
379
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseCXXCatchStmt(clang::CXXCatchStmt*)
Line
Count
Source
255
26
  bool Traverse##N(N *S) {                                                     \
256
26
    Base::Traverse##N(S);                                                      \
257
26
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
26
      Hash.combine(PGOHash::EndOfScope);                                       \
259
26
    return true;                                                               \
260
26
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseCXXForRangeStmt(clang::CXXForRangeStmt*)
Line
Count
Source
255
13
  bool Traverse##N(N *S) {                                                     \
256
13
    Base::Traverse##N(S);                                                      \
257
13
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
13
      Hash.combine(PGOHash::EndOfScope);                                       \
259
13
    return true;                                                               \
260
13
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseCXXTryStmt(clang::CXXTryStmt*)
Line
Count
Source
255
24
  bool Traverse##N(N *S) {                                                     \
256
24
    Base::Traverse##N(S);                                                      \
257
24
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
24
      Hash.combine(PGOHash::EndOfScope);                                       \
259
24
    return true;                                                               \
260
24
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseDoStmt(clang::DoStmt*)
Line
Count
Source
255
35
  bool Traverse##N(N *S) {                                                     \
256
35
    Base::Traverse##N(S);                                                      \
257
35
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
35
      
Hash.combine(PGOHash::EndOfScope)29
; \
259
35
    return true;                                                               \
260
35
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseForStmt(clang::ForStmt*)
Line
Count
Source
255
194
  bool Traverse##N(N *S) {                                                     \
256
194
    Base::Traverse##N(S);                                                      \
257
194
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
194
      
Hash.combine(PGOHash::EndOfScope)172
; \
259
194
    return true;                                                               \
260
194
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseObjCForCollectionStmt(clang::ObjCForCollectionStmt*)
Line
Count
Source
255
11
  bool Traverse##N(N *S) {                                                     \
256
11
    Base::Traverse##N(S);                                                      \
257
11
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
11
      Hash.combine(PGOHash::EndOfScope);                                       \
259
11
    return true;                                                               \
260
11
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseWhileStmt(clang::WhileStmt*)
Line
Count
Source
255
76
  bool Traverse##N(N *S) {                                                     \
256
76
    Base::Traverse##N(S);                                                      \
257
76
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
76
      
Hash.combine(PGOHash::EndOfScope)64
; \
259
76
    return true;                                                               \
260
76
  }
261
262
  DEFINE_NESTABLE_TRAVERSAL(WhileStmt)
263
  DEFINE_NESTABLE_TRAVERSAL(DoStmt)
264
  DEFINE_NESTABLE_TRAVERSAL(ForStmt)
265
  DEFINE_NESTABLE_TRAVERSAL(CXXForRangeStmt)
266
  DEFINE_NESTABLE_TRAVERSAL(ObjCForCollectionStmt)
267
  DEFINE_NESTABLE_TRAVERSAL(CXXTryStmt)
268
  DEFINE_NESTABLE_TRAVERSAL(CXXCatchStmt)
269
270
  /// Get version \p HashVersion of the PGO hash for \p S.
271
21.3k
  PGOHash::HashType getHashType(PGOHashVersion HashVersion, const Stmt *S) {
272
21.3k
    switch (S->getStmtClass()) {
273
17.4k
    default:
274
17.4k
      break;
275
17.4k
    case Stmt::LabelStmtClass:
276
96
      return PGOHash::LabelStmt;
277
140
    case Stmt::WhileStmtClass:
278
140
      return PGOHash::WhileStmt;
279
64
    case Stmt::DoStmtClass:
280
64
      return PGOHash::DoStmt;
281
366
    case Stmt::ForStmtClass:
282
366
      return PGOHash::ForStmt;
283
26
    case Stmt::CXXForRangeStmtClass:
284
26
      return PGOHash::CXXForRangeStmt;
285
22
    case Stmt::ObjCForCollectionStmtClass:
286
22
      return PGOHash::ObjCForCollectionStmt;
287
86
    case Stmt::SwitchStmtClass:
288
86
      return PGOHash::SwitchStmt;
289
172
    case Stmt::CaseStmtClass:
290
172
      return PGOHash::CaseStmt;
291
50
    case Stmt::DefaultStmtClass:
292
50
      return PGOHash::DefaultStmt;
293
772
    case Stmt::IfStmtClass:
294
772
      return PGOHash::IfStmt;
295
48
    case Stmt::CXXTryStmtClass:
296
48
      return PGOHash::CXXTryStmt;
297
52
    case Stmt::CXXCatchStmtClass:
298
52
      return PGOHash::CXXCatchStmt;
299
38
    case Stmt::ConditionalOperatorClass:
300
38
      return PGOHash::ConditionalOperator;
301
12
    case Stmt::BinaryConditionalOperatorClass:
302
12
      return PGOHash::BinaryConditionalOperator;
303
1.97k
    case Stmt::BinaryOperatorClass: {
304
1.97k
      const BinaryOperator *BO = cast<BinaryOperator>(S);
305
1.97k
      if (BO->getOpcode() == BO_LAnd)
306
192
        return PGOHash::BinaryOperatorLAnd;
307
1.78k
      if (BO->getOpcode() == BO_LOr)
308
162
        return PGOHash::BinaryOperatorLOr;
309
1.62k
      if (HashVersion >= PGO_HASH_V2) {
310
755
        switch (BO->getOpcode()) {
311
412
        default:
312
412
          break;
313
412
        case BO_LT:
314
202
          return PGOHash::BinaryOperatorLT;
315
34
        case BO_GT:
316
34
          return PGOHash::BinaryOperatorGT;
317
12
        case BO_LE:
318
12
          return PGOHash::BinaryOperatorLE;
319
11
        case BO_GE:
320
11
          return PGOHash::BinaryOperatorGE;
321
72
        case BO_EQ:
322
72
          return PGOHash::BinaryOperatorEQ;
323
12
        case BO_NE:
324
12
          return PGOHash::BinaryOperatorNE;
325
755
        }
326
755
      }
327
1.28k
      break;
328
1.62k
    }
329
21.3k
    }
330
331
18.6k
    if (HashVersion >= PGO_HASH_V2) {
332
8.62k
      switch (S->getStmtClass()) {
333
7.91k
      default:
334
7.91k
        break;
335
7.91k
      case Stmt::GotoStmtClass:
336
37
        return PGOHash::GotoStmt;
337
2
      case Stmt::IndirectGotoStmtClass:
338
2
        return PGOHash::IndirectGotoStmt;
339
59
      case Stmt::BreakStmtClass:
340
59
        return PGOHash::BreakStmt;
341
19
      case Stmt::ContinueStmtClass:
342
19
        return PGOHash::ContinueStmt;
343
305
      case Stmt::ReturnStmtClass:
344
305
        return PGOHash::ReturnStmt;
345
17
      case Stmt::CXXThrowExprClass:
346
17
        return PGOHash::ThrowExpr;
347
271
      case Stmt::UnaryOperatorClass: {
348
271
        const UnaryOperator *UO = cast<UnaryOperator>(S);
349
271
        if (UO->getOpcode() == UO_LNot)
350
17
          return PGOHash::UnaryOperatorLNot;
351
254
        break;
352
271
      }
353
8.62k
      }
354
8.62k
    }
355
356
18.2k
    return PGOHash::None;
357
18.6k
  }
358
};
359
360
/// A StmtVisitor that propagates the raw counts through the AST and
361
/// records the count at statements where the value may change.
362
struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
363
  /// PGO state.
364
  CodeGenPGO &PGO;
365
366
  /// A flag that is set when the current count should be recorded on the
367
  /// next statement, such as at the exit of a loop.
368
  bool RecordNextStmtCount;
369
370
  /// The count at the current location in the traversal.
371
  uint64_t CurrentCount;
372
373
  /// The map of statements to count values.
374
  llvm::DenseMap<const Stmt *, uint64_t> &CountMap;
375
376
  /// BreakContinueStack - Keep counts of breaks and continues inside loops.
377
  struct BreakContinue {
378
    uint64_t BreakCount;
379
    uint64_t ContinueCount;
380
179
    BreakContinue() : BreakCount(0), ContinueCount(0) {}
381
  };
382
  SmallVector<BreakContinue, 8> BreakContinueStack;
383
384
  ComputeRegionCounts(llvm::DenseMap<const Stmt *, uint64_t> &CountMap,
385
                      CodeGenPGO &PGO)
386
190
      : PGO(PGO), RecordNextStmtCount(false), CountMap(CountMap) {}
387
388
4.16k
  void RecordStmtCount(const Stmt *S) {
389
4.16k
    if (RecordNextStmtCount) {
390
374
      CountMap[S] = CurrentCount;
391
374
      RecordNextStmtCount = false;
392
374
    }
393
4.16k
  }
394
395
  /// Set and return the current count.
396
1.43k
  uint64_t setCount(uint64_t Count) {
397
1.43k
    CurrentCount = Count;
398
1.43k
    return Count;
399
1.43k
  }
400
401
3.55k
  void VisitStmt(const Stmt *S) {
402
3.55k
    RecordStmtCount(S);
403
3.55k
    for (const Stmt *Child : S->children())
404
2.78k
      if (Child)
405
2.78k
        this->Visit(Child);
406
3.55k
  }
407
408
186
  void VisitFunctionDecl(const FunctionDecl *D) {
409
    // Counter tracks entry to the function body.
410
186
    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
411
186
    CountMap[D->getBody()] = BodyCount;
412
186
    Visit(D->getBody());
413
186
  }
414
415
  // Skip lambda expressions. We visit these as FunctionDecls when we're
416
  // generating them and aren't interested in the body when generating a
417
  // parent context.
418
1
  void VisitLambdaExpr(const LambdaExpr *LE) {}
419
420
2
  void VisitCapturedDecl(const CapturedDecl *D) {
421
    // Counter tracks entry to the capture body.
422
2
    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
423
2
    CountMap[D->getBody()] = BodyCount;
424
2
    Visit(D->getBody());
425
2
  }
426
427
1
  void VisitObjCMethodDecl(const ObjCMethodDecl *D) {
428
    // Counter tracks entry to the method body.
429
1
    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
430
1
    CountMap[D->getBody()] = BodyCount;
431
1
    Visit(D->getBody());
432
1
  }
433
434
1
  void VisitBlockDecl(const BlockDecl *D) {
435
    // Counter tracks entry to the block body.
436
1
    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
437
1
    CountMap[D->getBody()] = BodyCount;
438
1
    Visit(D->getBody());
439
1
  }
440
441
71
  void VisitReturnStmt(const ReturnStmt *S) {
442
71
    RecordStmtCount(S);
443
71
    if (S->getRetValue())
444
60
      Visit(S->getRetValue());
445
71
    CurrentCount = 0;
446
71
    RecordNextStmtCount = true;
447
71
  }
448
449
8
  void VisitCXXThrowExpr(const CXXThrowExpr *E) {
450
8
    RecordStmtCount(E);
451
8
    if (E->getSubExpr())
452
8
      Visit(E->getSubExpr());
453
8
    CurrentCount = 0;
454
8
    RecordNextStmtCount = true;
455
8
  }
456
457
34
  void VisitGotoStmt(const GotoStmt *S) {
458
34
    RecordStmtCount(S);
459
34
    CurrentCount = 0;
460
34
    RecordNextStmtCount = true;
461
34
  }
462
463
34
  void VisitLabelStmt(const LabelStmt *S) {
464
34
    RecordNextStmtCount = false;
465
    // Counter tracks the block following the label.
466
34
    uint64_t BlockCount = setCount(PGO.getRegionCount(S));
467
34
    CountMap[S] = BlockCount;
468
34
    Visit(S->getSubStmt());
469
34
  }
470
471
47
  void VisitBreakStmt(const BreakStmt *S) {
472
47
    RecordStmtCount(S);
473
47
    assert(!BreakContinueStack.empty() && "break not in a loop or switch!");
474
0
    BreakContinueStack.back().BreakCount += CurrentCount;
475
47
    CurrentCount = 0;
476
47
    RecordNextStmtCount = true;
477
47
  }
478
479
15
  void VisitContinueStmt(const ContinueStmt *S) {
480
15
    RecordStmtCount(S);
481
15
    assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");
482
0
    BreakContinueStack.back().ContinueCount += CurrentCount;
483
15
    CurrentCount = 0;
484
15
    RecordNextStmtCount = true;
485
15
  }
486
487
36
  void VisitWhileStmt(const WhileStmt *S) {
488
36
    RecordStmtCount(S);
489
36
    uint64_t ParentCount = CurrentCount;
490
491
36
    BreakContinueStack.push_back(BreakContinue());
492
    // Visit the body region first so the break/continue adjustments can be
493
    // included when visiting the condition.
494
36
    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
495
36
    CountMap[S->getBody()] = CurrentCount;
496
36
    Visit(S->getBody());
497
36
    uint64_t BackedgeCount = CurrentCount;
498
499
    // ...then go back and propagate counts through the condition. The count
500
    // at the start of the condition is the sum of the incoming edges,
501
    // the backedge from the end of the loop body, and the edges from
502
    // continue statements.
503
36
    BreakContinue BC = BreakContinueStack.pop_back_val();
504
36
    uint64_t CondCount =
505
36
        setCount(ParentCount + BackedgeCount + BC.ContinueCount);
506
36
    CountMap[S->getCond()] = CondCount;
507
36
    Visit(S->getCond());
508
36
    setCount(BC.BreakCount + CondCount - BodyCount);
509
36
    RecordNextStmtCount = true;
510
36
  }
511
512
22
  void VisitDoStmt(const DoStmt *S) {
513
22
    RecordStmtCount(S);
514
22
    uint64_t LoopCount = PGO.getRegionCount(S);
515
516
22
    BreakContinueStack.push_back(BreakContinue());
517
    // The count doesn't include the fallthrough from the parent scope. Add it.
518
22
    uint64_t BodyCount = setCount(LoopCount + CurrentCount);
519
22
    CountMap[S->getBody()] = BodyCount;
520
22
    Visit(S->getBody());
521
22
    uint64_t BackedgeCount = CurrentCount;
522
523
22
    BreakContinue BC = BreakContinueStack.pop_back_val();
524
    // The count at the start of the condition is equal to the count at the
525
    // end of the body, plus any continues.
526
22
    uint64_t CondCount = setCount(BackedgeCount + BC.ContinueCount);
527
22
    CountMap[S->getCond()] = CondCount;
528
22
    Visit(S->getCond());
529
22
    setCount(BC.BreakCount + CondCount - LoopCount);
530
22
    RecordNextStmtCount = true;
531
22
  }
532
533
84
  void VisitForStmt(const ForStmt *S) {
534
84
    RecordStmtCount(S);
535
84
    if (S->getInit())
536
80
      Visit(S->getInit());
537
538
84
    uint64_t ParentCount = CurrentCount;
539
540
84
    BreakContinueStack.push_back(BreakContinue());
541
    // Visit the body region first. (This is basically the same as a while
542
    // loop; see further comments in VisitWhileStmt.)
543
84
    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
544
84
    CountMap[S->getBody()] = BodyCount;
545
84
    Visit(S->getBody());
546
84
    uint64_t BackedgeCount = CurrentCount;
547
84
    BreakContinue BC = BreakContinueStack.pop_back_val();
548
549
    // The increment is essentially part of the body but it needs to include
550
    // the count for all the continue statements.
551
84
    if (S->getInc()) {
552
84
      uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
553
84
      CountMap[S->getInc()] = IncCount;
554
84
      Visit(S->getInc());
555
84
    }
556
557
    // ...then go back and propagate counts through the condition.
558
84
    uint64_t CondCount =
559
84
        setCount(ParentCount + BackedgeCount + BC.ContinueCount);
560
84
    if (S->getCond()) {
561
84
      CountMap[S->getCond()] = CondCount;
562
84
      Visit(S->getCond());
563
84
    }
564
84
    setCount(BC.BreakCount + CondCount - BodyCount);
565
84
    RecordNextStmtCount = true;
566
84
  }
567
568
9
  void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
569
9
    RecordStmtCount(S);
570
9
    if (S->getInit())
571
0
      Visit(S->getInit());
572
9
    Visit(S->getLoopVarStmt());
573
9
    Visit(S->getRangeStmt());
574
9
    Visit(S->getBeginStmt());
575
9
    Visit(S->getEndStmt());
576
577
9
    uint64_t ParentCount = CurrentCount;
578
9
    BreakContinueStack.push_back(BreakContinue());
579
    // Visit the body region first. (This is basically the same as a while
580
    // loop; see further comments in VisitWhileStmt.)
581
9
    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
582
9
    CountMap[S->getBody()] = BodyCount;
583
9
    Visit(S->getBody());
584
9
    uint64_t BackedgeCount = CurrentCount;
585
9
    BreakContinue BC = BreakContinueStack.pop_back_val();
586
587
    // The increment is essentially part of the body but it needs to include
588
    // the count for all the continue statements.
589
9
    uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
590
9
    CountMap[S->getInc()] = IncCount;
591
9
    Visit(S->getInc());
592
593
    // ...then go back and propagate counts through the condition.
594
9
    uint64_t CondCount =
595
9
        setCount(ParentCount + BackedgeCount + BC.ContinueCount);
596
9
    CountMap[S->getCond()] = CondCount;
597
9
    Visit(S->getCond());
598
9
    setCount(BC.BreakCount + CondCount - BodyCount);
599
9
    RecordNextStmtCount = true;
600
9
  }
601
602
5
  void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
603
5
    RecordStmtCount(S);
604
5
    Visit(S->getElement());
605
5
    uint64_t ParentCount = CurrentCount;
606
5
    BreakContinueStack.push_back(BreakContinue());
607
    // Counter tracks the body of the loop.
608
5
    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
609
5
    CountMap[S->getBody()] = BodyCount;
610
5
    Visit(S->getBody());
611
5
    uint64_t BackedgeCount = CurrentCount;
612
5
    BreakContinue BC = BreakContinueStack.pop_back_val();
613
614
5
    setCount(BC.BreakCount + ParentCount + BackedgeCount + BC.ContinueCount -
615
5
             BodyCount);
616
5
    RecordNextStmtCount = true;
617
5
  }
618
619
23
  void VisitSwitchStmt(const SwitchStmt *S) {
620
23
    RecordStmtCount(S);
621
23
    if (S->getInit())
622
0
      Visit(S->getInit());
623
23
    Visit(S->getCond());
624
23
    CurrentCount = 0;
625
23
    BreakContinueStack.push_back(BreakContinue());
626
23
    Visit(S->getBody());
627
    // If the switch is inside a loop, add the continue counts.
628
23
    BreakContinue BC = BreakContinueStack.pop_back_val();
629
23
    if (!BreakContinueStack.empty())
630
17
      BreakContinueStack.back().ContinueCount += BC.ContinueCount;
631
    // Counter tracks the exit block of the switch.
632
23
    setCount(PGO.getRegionCount(S));
633
23
    RecordNextStmtCount = true;
634
23
  }
635
636
78
  void VisitSwitchCase(const SwitchCase *S) {
637
78
    RecordNextStmtCount = false;
638
    // Counter for this particular case. This counts only jumps from the
639
    // switch header and does not include fallthrough from the case before
640
    // this one.
641
78
    uint64_t CaseCount = PGO.getRegionCount(S);
642
78
    setCount(CurrentCount + CaseCount);
643
    // We need the count without fallthrough in the mapping, so it's more useful
644
    // for branch probabilities.
645
78
    CountMap[S] = CaseCount;
646
78
    RecordNextStmtCount = true;
647
78
    Visit(S->getSubStmt());
648
78
  }
649
650
194
  void VisitIfStmt(const IfStmt *S) {
651
194
    RecordStmtCount(S);
652
653
194
    if (S->isConsteval()) {
654
0
      const Stmt *Stm = S->isNegatedConsteval() ? S->getThen() : S->getElse();
655
0
      if (Stm)
656
0
        Visit(Stm);
657
0
      return;
658
0
    }
659
660
194
    uint64_t ParentCount = CurrentCount;
661
194
    if (S->getInit())
662
0
      Visit(S->getInit());
663
194
    Visit(S->getCond());
664
665
    // Counter tracks the "then" part of an if statement. The count for
666
    // the "else" part, if it exists, will be calculated from this counter.
667
194
    uint64_t ThenCount = setCount(PGO.getRegionCount(S));
668
194
    CountMap[S->getThen()] = ThenCount;
669
194
    Visit(S->getThen());
670
194
    uint64_t OutCount = CurrentCount;
671
672
194
    uint64_t ElseCount = ParentCount - ThenCount;
673
194
    if (S->getElse()) {
674
21
      setCount(ElseCount);
675
21
      CountMap[S->getElse()] = ElseCount;
676
21
      Visit(S->getElse());
677
21
      OutCount += CurrentCount;
678
21
    } else
679
173
      OutCount += ElseCount;
680
194
    setCount(OutCount);
681
194
    RecordNextStmtCount = true;
682
194
  }
683
684
12
  void VisitCXXTryStmt(const CXXTryStmt *S) {
685
12
    RecordStmtCount(S);
686
12
    Visit(S->getTryBlock());
687
24
    for (unsigned I = 0, E = S->getNumHandlers(); I < E; 
++I12
)
688
12
      Visit(S->getHandler(I));
689
    // Counter tracks the continuation block of the try statement.
690
12
    setCount(PGO.getRegionCount(S));
691
12
    RecordNextStmtCount = true;
692
12
  }
693
694
12
  void VisitCXXCatchStmt(const CXXCatchStmt *S) {
695
12
    RecordNextStmtCount = false;
696
    // Counter tracks the catch statement's handler block.
697
12
    uint64_t CatchCount = setCount(PGO.getRegionCount(S));
698
12
    CountMap[S] = CatchCount;
699
12
    Visit(S->getHandlerBlock());
700
12
  }
701
702
9
  void VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
703
9
    RecordStmtCount(E);
704
9
    uint64_t ParentCount = CurrentCount;
705
9
    Visit(E->getCond());
706
707
    // Counter tracks the "true" part of a conditional operator. The
708
    // count in the "false" part will be calculated from this counter.
709
9
    uint64_t TrueCount = setCount(PGO.getRegionCount(E));
710
9
    CountMap[E->getTrueExpr()] = TrueCount;
711
9
    Visit(E->getTrueExpr());
712
9
    uint64_t OutCount = CurrentCount;
713
714
9
    uint64_t FalseCount = setCount(ParentCount - TrueCount);
715
9
    CountMap[E->getFalseExpr()] = FalseCount;
716
9
    Visit(E->getFalseExpr());
717
9
    OutCount += CurrentCount;
718
719
9
    setCount(OutCount);
720
9
    RecordNextStmtCount = true;
721
9
  }
722
723
25
  void VisitBinLAnd(const BinaryOperator *E) {
724
25
    RecordStmtCount(E);
725
25
    uint64_t ParentCount = CurrentCount;
726
25
    Visit(E->getLHS());
727
    // Counter tracks the right hand side of a logical and operator.
728
25
    uint64_t RHSCount = setCount(PGO.getRegionCount(E));
729
25
    CountMap[E->getRHS()] = RHSCount;
730
25
    Visit(E->getRHS());
731
25
    setCount(ParentCount + RHSCount - CurrentCount);
732
25
    RecordNextStmtCount = true;
733
25
  }
734
735
24
  void VisitBinLOr(const BinaryOperator *E) {
736
24
    RecordStmtCount(E);
737
24
    uint64_t ParentCount = CurrentCount;
738
24
    Visit(E->getLHS());
739
    // Counter tracks the right hand side of a logical or operator.
740
24
    uint64_t RHSCount = setCount(PGO.getRegionCount(E));
741
24
    CountMap[E->getRHS()] = RHSCount;
742
24
    Visit(E->getRHS());
743
24
    setCount(ParentCount + RHSCount - CurrentCount);
744
24
    RecordNextStmtCount = true;
745
24
  }
746
};
747
} // end anonymous namespace
748
749
3.14k
void PGOHash::combine(HashType Type) {
750
  // Check that we never combine 0 and only have six bits.
751
3.14k
  assert(Type && "Hash is invalid: unexpected type 0");
752
0
  assert(unsigned(Type) < TooBig && "Hash is invalid: too many types");
753
754
  // Pass through MD5 if enough work has built up.
755
3.14k
  if (Count && 
Count % NumTypesPerWord == 02.68k
) {
756
129
    using namespace llvm::support;
757
129
    uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
758
129
    MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
759
129
    Working = 0;
760
129
  }
761
762
  // Accumulate the current type.
763
3.14k
  ++Count;
764
3.14k
  Working = Working << NumBitsPerType | Type;
765
3.14k
}
766
767
580
uint64_t PGOHash::finalize() {
768
  // Use Working as the hash directly if we never used MD5.
769
580
  if (Count <= NumTypesPerWord)
770
    // No need to byte swap here, since none of the math was endian-dependent.
771
    // This number will be byte-swapped as required on endianness transitions,
772
    // so we will see the same value on the other side.
773
505
    return Working;
774
775
  // Check for remaining work in Working.
776
75
  if (Working) {
777
    // Keep the buggy behavior from v1 and v2 for backward-compatibility. This
778
    // is buggy because it converts a uint64_t into an array of uint8_t.
779
75
    if (HashVersion < PGO_HASH_V3) {
780
13
      MD5.update({(uint8_t)Working});
781
62
    } else {
782
62
      using namespace llvm::support;
783
62
      uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
784
62
      MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
785
62
    }
786
75
  }
787
788
  // Finalize the MD5 and return the hash.
789
75
  llvm::MD5::MD5Result Result;
790
75
  MD5.final(Result);
791
75
  return Result.low();
792
580
}
793
794
295k
void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) {
795
295k
  const Decl *D = GD.getDecl();
796
295k
  if (!D->hasBody())
797
130
    return;
798
799
  // Skip CUDA/HIP kernel launch stub functions.
800
295k
  if (CGM.getLangOpts().CUDA && 
!CGM.getLangOpts().CUDAIsDevice505
&&
801
295k
      
D->hasAttr<CUDAGlobalAttr>()255
)
802
55
    return;
803
804
295k
  bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr();
805
295k
  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
806
295k
  if (!InstrumentRegions && 
!PGOReader295k
)
807
294k
    return;
808
642
  if (D->isImplicit())
809
17
    return;
810
  // Constructors and destructors may be represented by several functions in IR.
811
  // If so, instrument only base variant, others are implemented by delegation
812
  // to the base one, it would be counted twice otherwise.
813
625
  if (CGM.getTarget().getCXXABI().hasConstructorVariants()) {
814
608
    if (const auto *CCD = dyn_cast<CXXConstructorDecl>(D))
815
53
      if (GD.getCtorType() != Ctor_Base &&
816
53
          
CodeGenFunction::IsConstructorDelegationValid(CCD)26
)
817
21
        return;
818
608
  }
819
604
  if (isa<CXXDestructorDecl>(D) && 
GD.getDtorType() != Dtor_Base39
)
820
20
    return;
821
822
584
  CGM.ClearUnusedCoverageMapping(D);
823
584
  if (Fn->hasFnAttribute(llvm::Attribute::NoProfile))
824
4
    return;
825
826
580
  setFuncName(Fn);
827
828
580
  mapRegionCounters(D);
829
580
  if (CGM.getCodeGenOpts().CoverageMapping)
830
229
    emitCounterRegionMapping(D);
831
580
  if (PGOReader) {
832
190
    SourceManager &SM = CGM.getContext().getSourceManager();
833
190
    loadRegionCounts(PGOReader, SM.isInMainFile(D->getLocation()));
834
190
    computeRegionCounts(D);
835
190
    applyFunctionAttributes(PGOReader, Fn);
836
190
  }
837
580
}
838
839
580
void CodeGenPGO::mapRegionCounters(const Decl *D) {
840
  // Use the latest hash version when inserting instrumentation, but use the
841
  // version in the indexed profile if we're reading PGO data.
842
580
  PGOHashVersion HashVersion = PGO_HASH_LATEST;
843
580
  uint64_t ProfileVersion = llvm::IndexedInstrProf::Version;
844
580
  if (auto *PGOReader = CGM.getPGOReader()) {
845
190
    HashVersion = getPGOHashVersion(PGOReader, CGM);
846
190
    ProfileVersion = PGOReader->getVersion();
847
190
  }
848
849
580
  RegionCounterMap.reset(new llvm::DenseMap<const Stmt *, unsigned>);
850
580
  MapRegionCounters Walker(HashVersion, ProfileVersion, *RegionCounterMap);
851
580
  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
852
569
    Walker.TraverseDecl(const_cast<FunctionDecl *>(FD));
853
11
  else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
854
3
    Walker.TraverseDecl(const_cast<ObjCMethodDecl *>(MD));
855
8
  else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
856
2
    Walker.TraverseDecl(const_cast<BlockDecl *>(BD));
857
6
  else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
858
6
    Walker.TraverseDecl(const_cast<CapturedDecl *>(CD));
859
580
  assert(Walker.NextCounter > 0 && "no entry counter mapped for decl");
860
0
  NumRegionCounters = Walker.NextCounter;
861
580
  FunctionHash = Walker.Hash.finalize();
862
580
}
863
864
254
bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) {
865
254
  if (!D->getBody())
866
0
    return true;
867
868
  // Skip host-only functions in the CUDA device compilation and device-only
869
  // functions in the host compilation. Just roughly filter them out based on
870
  // the function attributes. If there are effectively host-only or device-only
871
  // ones, their coverage mapping may still be generated.
872
254
  if (CGM.getLangOpts().CUDA &&
873
254
      
(8
(8
CGM.getLangOpts().CUDAIsDevice8
&&
!D->hasAttr<CUDADeviceAttr>()0
&&
874
8
        
!D->hasAttr<CUDAGlobalAttr>()0
) ||
875
8
       (!CGM.getLangOpts().CUDAIsDevice &&
876
8
        (D->hasAttr<CUDAGlobalAttr>() ||
877
8
         
(6
!D->hasAttr<CUDAHostAttr>()6
&&
D->hasAttr<CUDADeviceAttr>()4
)))))
878
4
    return true;
879
880
  // Don't map the functions in system headers.
881
250
  const auto &SM = CGM.getContext().getSourceManager();
882
250
  auto Loc = D->getBody()->getBeginLoc();
883
250
  return SM.isInSystemHeader(Loc);
884
254
}
885
886
229
void CodeGenPGO::emitCounterRegionMapping(const Decl *D) {
887
229
  if (skipRegionMappingForDecl(D))
888
0
    return;
889
890
229
  std::string CoverageMapping;
891
229
  llvm::raw_string_ostream OS(CoverageMapping);
892
229
  CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
893
229
                                CGM.getContext().getSourceManager(),
894
229
                                CGM.getLangOpts(), RegionCounterMap.get());
895
229
  MappingGen.emitCounterMapping(D, OS);
896
229
  OS.flush();
897
898
229
  if (CoverageMapping.empty())
899
1
    return;
900
901
228
  CGM.getCoverageMapping()->addFunctionMappingRecord(
902
228
      FuncNameVar, FuncName, FunctionHash, CoverageMapping);
903
228
}
904
905
void
906
CodeGenPGO::emitEmptyCounterMapping(const Decl *D, StringRef Name,
907
25
                                    llvm::GlobalValue::LinkageTypes Linkage) {
908
25
  if (skipRegionMappingForDecl(D))
909
5
    return;
910
911
20
  std::string CoverageMapping;
912
20
  llvm::raw_string_ostream OS(CoverageMapping);
913
20
  CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
914
20
                                CGM.getContext().getSourceManager(),
915
20
                                CGM.getLangOpts());
916
20
  MappingGen.emitEmptyMapping(D, OS);
917
20
  OS.flush();
918
919
20
  if (CoverageMapping.empty())
920
0
    return;
921
922
20
  setFuncName(Name, Linkage);
923
20
  CGM.getCoverageMapping()->addFunctionMappingRecord(
924
20
      FuncNameVar, FuncName, FunctionHash, CoverageMapping, false);
925
20
}
926
927
190
void CodeGenPGO::computeRegionCounts(const Decl *D) {
928
190
  StmtCountMap.reset(new llvm::DenseMap<const Stmt *, uint64_t>);
929
190
  ComputeRegionCounts Walker(*StmtCountMap, *this);
930
190
  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
931
186
    Walker.VisitFunctionDecl(FD);
932
4
  else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
933
1
    Walker.VisitObjCMethodDecl(MD);
934
3
  else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
935
1
    Walker.VisitBlockDecl(BD);
936
2
  else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
937
2
    Walker.VisitCapturedDecl(const_cast<CapturedDecl *>(CD));
938
190
}
939
940
void
941
CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
942
190
                                    llvm::Function *Fn) {
943
190
  if (!haveRegionCounts())
944
15
    return;
945
946
175
  uint64_t FunctionCount = getRegionCount(nullptr);
947
175
  Fn->setEntryCount(FunctionCount);
948
175
}
949
950
void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S,
951
1.12k
                                      llvm::Value *StepV) {
952
1.12k
  if (!CGM.getCodeGenOpts().hasProfileClangInstr() || !RegionCounterMap)
953
28
    return;
954
1.09k
  if (!Builder.GetInsertBlock())
955
5
    return;
956
957
1.09k
  unsigned Counter = (*RegionCounterMap)[S];
958
1.09k
  auto *I8PtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
959
960
1.09k
  llvm::Value *Args[] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
961
1.09k
                         Builder.getInt64(FunctionHash),
962
1.09k
                         Builder.getInt32(NumRegionCounters),
963
1.09k
                         Builder.getInt32(Counter), StepV};
964
1.09k
  if (!StepV)
965
1.09k
    Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment),
966
1.09k
                       makeArrayRef(Args, 4));
967
1
  else
968
1
    Builder.CreateCall(
969
1
        CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment_step),
970
1
        makeArrayRef(Args));
971
1.09k
}
972
973
34.5k
void CodeGenPGO::setValueProfilingFlag(llvm::Module &M) {
974
34.5k
  if (CGM.getCodeGenOpts().hasProfileClangInstr())
975
120
    M.addModuleFlag(llvm::Module::Warning, "EnableValueProfiling",
976
120
                    uint32_t(EnableValueProfiling));
977
34.5k
}
978
979
// This method either inserts a call to the profile run-time during
980
// instrumentation or puts profile data into metadata for PGO use.
981
void CodeGenPGO::valueProfile(CGBuilderTy &Builder, uint32_t ValueKind,
982
21.9k
    llvm::Instruction *ValueSite, llvm::Value *ValuePtr) {
983
984
21.9k
  if (!EnableValueProfiling)
985
21.8k
    return;
986
987
4
  if (!ValuePtr || !ValueSite || !Builder.GetInsertBlock())
988
0
    return;
989
990
4
  if (isa<llvm::Constant>(ValuePtr))
991
1
    return;
992
993
3
  bool InstrumentValueSites = CGM.getCodeGenOpts().hasProfileClangInstr();
994
3
  if (InstrumentValueSites && RegionCounterMap) {
995
3
    auto BuilderInsertPoint = Builder.saveIP();
996
3
    Builder.SetInsertPoint(ValueSite);
997
3
    llvm::Value *Args[5] = {
998
3
        llvm::ConstantExpr::getBitCast(FuncNameVar, Builder.getInt8PtrTy()),
999
3
        Builder.getInt64(FunctionHash),
1000
3
        Builder.CreatePtrToInt(ValuePtr, Builder.getInt64Ty()),
1001
3
        Builder.getInt32(ValueKind),
1002
3
        Builder.getInt32(NumValueSites[ValueKind]++)
1003
3
    };
1004
3
    Builder.CreateCall(
1005
3
        CGM.getIntrinsic(llvm::Intrinsic::instrprof_value_profile), Args);
1006
3
    Builder.restoreIP(BuilderInsertPoint);
1007
3
    return;
1008
3
  }
1009
1010
0
  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
1011
0
  if (PGOReader && haveRegionCounts()) {
1012
    // We record the top most called three functions at each call site.
1013
    // Profile metadata contains "VP" string identifying this metadata
1014
    // as value profiling data, then a uint32_t value for the value profiling
1015
    // kind, a uint64_t value for the total number of times the call is
1016
    // executed, followed by the function hash and execution count (uint64_t)
1017
    // pairs for each function.
1018
0
    if (NumValueSites[ValueKind] >= ProfRecord->getNumValueSites(ValueKind))
1019
0
      return;
1020
1021
0
    llvm::annotateValueSite(CGM.getModule(), *ValueSite, *ProfRecord,
1022
0
                            (llvm::InstrProfValueKind)ValueKind,
1023
0
                            NumValueSites[ValueKind]);
1024
1025
0
    NumValueSites[ValueKind]++;
1026
0
  }
1027
0
}
1028
1029
void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
1030
190
                                  bool IsInMainFile) {
1031
190
  CGM.getPGOStats().addVisited(IsInMainFile);
1032
190
  RegionCounts.clear();
1033
190
  llvm::Expected<llvm::InstrProfRecord> RecordExpected =
1034
190
      PGOReader->getInstrProfRecord(FuncName, FunctionHash);
1035
190
  if (auto E = RecordExpected.takeError()) {
1036
15
    auto IPE = llvm::InstrProfError::take(std::move(E));
1037
15
    if (IPE == llvm::instrprof_error::unknown_function)
1038
6
      CGM.getPGOStats().addMissing(IsInMainFile);
1039
9
    else if (IPE == llvm::instrprof_error::hash_mismatch)
1040
9
      CGM.getPGOStats().addMismatched(IsInMainFile);
1041
0
    else if (IPE == llvm::instrprof_error::malformed)
1042
      // TODO: Consider a more specific warning for this case.
1043
0
      CGM.getPGOStats().addMismatched(IsInMainFile);
1044
15
    return;
1045
15
  }
1046
175
  ProfRecord =
1047
175
      std::make_unique<llvm::InstrProfRecord>(std::move(RecordExpected.get()));
1048
175
  RegionCounts = ProfRecord->Counts;
1049
175
}
1050
1051
/// Calculate what to divide by to scale weights.
1052
///
1053
/// Given the maximum weight, calculate a divisor that will scale all the
1054
/// weights to strictly less than UINT32_MAX.
1055
348
static uint64_t calculateWeightScale(uint64_t MaxWeight) {
1056
348
  return MaxWeight < UINT32_MAX ? 
1345
:
MaxWeight / UINT32_MAX + 13
;
1057
348
}
1058
1059
/// Scale an individual branch weight (and add 1).
1060
///
1061
/// Scale a 64-bit weight down to 32-bits using \c Scale.
1062
///
1063
/// According to Laplace's Rule of Succession, it is better to compute the
1064
/// weight based on the count plus 1, so universally add 1 to the value.
1065
///
1066
/// \pre \c Scale was calculated by \a calculateWeightScale() with a weight no
1067
/// greater than \c Weight.
1068
774
static uint32_t scaleBranchWeight(uint64_t Weight, uint64_t Scale) {
1069
774
  assert(Scale && "scale by 0?");
1070
0
  uint64_t Scaled = Weight / Scale + 1;
1071
774
  assert(Scaled <= UINT32_MAX && "overflow 32-bits");
1072
0
  return Scaled;
1073
774
}
1074
1075
llvm::MDNode *CodeGenFunction::createProfileWeights(uint64_t TrueCount,
1076
150k
                                                    uint64_t FalseCount) const {
1077
  // Check for empty weights.
1078
150k
  if (!TrueCount && 
!FalseCount150k
)
1079
150k
    return nullptr;
1080
1081
  // Calculate how to scale down to 32-bits.
1082
315
  uint64_t Scale = calculateWeightScale(std::max(TrueCount, FalseCount));
1083
1084
315
  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
1085
315
  return MDHelper.createBranchWeights(scaleBranchWeight(TrueCount, Scale),
1086
315
                                      scaleBranchWeight(FalseCount, Scale));
1087
150k
}
1088
1089
llvm::MDNode *
1090
35
CodeGenFunction::createProfileWeights(ArrayRef<uint64_t> Weights) const {
1091
  // We need at least two elements to create meaningful weights.
1092
35
  if (Weights.size() < 2)
1093
0
    return nullptr;
1094
1095
  // Check for empty weights.
1096
35
  uint64_t MaxWeight = *std::max_element(Weights.begin(), Weights.end());
1097
35
  if (MaxWeight == 0)
1098
2
    return nullptr;
1099
1100
  // Calculate how to scale down to 32-bits.
1101
33
  uint64_t Scale = calculateWeightScale(MaxWeight);
1102
1103
33
  SmallVector<uint32_t, 16> ScaledWeights;
1104
33
  ScaledWeights.reserve(Weights.size());
1105
33
  for (uint64_t W : Weights)
1106
144
    ScaledWeights.push_back(scaleBranchWeight(W, Scale));
1107
1108
33
  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
1109
33
  return MDHelper.createBranchWeights(ScaledWeights);
1110
35
}
1111
1112
llvm::MDNode *
1113
CodeGenFunction::createProfileWeightsForLoop(const Stmt *Cond,
1114
18.7k
                                             uint64_t LoopCount) const {
1115
18.7k
  if (!PGO.haveRegionCounts())
1116
18.6k
    return nullptr;
1117
140
  Optional<uint64_t> CondCount = PGO.getStmtCount(Cond);
1118
140
  if (!CondCount || *CondCount == 0)
1119
34
    return nullptr;
1120
106
  return createProfileWeights(LoopCount,
1121
106
                              std::max(*CondCount, LoopCount) - LoopCount);
1122
140
}