Coverage Report

Created: 2021-08-24 07:12

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/CodeGen/CodeGenPGO.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Instrumentation-based profile-guided optimization
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "CodeGenPGO.h"
14
#include "CodeGenFunction.h"
15
#include "CoverageMappingGen.h"
16
#include "clang/AST/RecursiveASTVisitor.h"
17
#include "clang/AST/StmtVisitor.h"
18
#include "llvm/IR/Intrinsics.h"
19
#include "llvm/IR/MDBuilder.h"
20
#include "llvm/Support/CommandLine.h"
21
#include "llvm/Support/Endian.h"
22
#include "llvm/Support/FileSystem.h"
23
#include "llvm/Support/MD5.h"
24
25
static llvm::cl::opt<bool>
26
    EnableValueProfiling("enable-value-profiling", llvm::cl::ZeroOrMore,
27
                         llvm::cl::desc("Enable value profiling"),
28
                         llvm::cl::Hidden, llvm::cl::init(false));
29
30
using namespace clang;
31
using namespace CodeGen;
32
33
void CodeGenPGO::setFuncName(StringRef Name,
34
604
                             llvm::GlobalValue::LinkageTypes Linkage) {
35
604
  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
36
604
  FuncName = llvm::getPGOFuncName(
37
604
      Name, Linkage, CGM.getCodeGenOpts().MainFileName,
38
604
      PGOReader ? 
PGOReader->getVersion()194
:
llvm::IndexedInstrProf::Version410
);
39
40
  // If we're generating a profile, create a variable for the name.
41
604
  if (CGM.getCodeGenOpts().hasProfileClangInstr())
42
410
    FuncNameVar = llvm::createPGOFuncNameVar(CGM.getModule(), Linkage, FuncName);
43
604
}
44
45
584
void CodeGenPGO::setFuncName(llvm::Function *Fn) {
46
584
  setFuncName(Fn->getName(), Fn->getLinkage());
47
  // Create PGOFuncName meta data.
48
584
  llvm::createPGOFuncNameMetadata(*Fn, FuncName);
49
584
}
50
51
/// The version of the PGO hash algorithm.
52
enum PGOHashVersion : unsigned {
53
  PGO_HASH_V1,
54
  PGO_HASH_V2,
55
  PGO_HASH_V3,
56
57
  // Keep this set to the latest hash version.
58
  PGO_HASH_LATEST = PGO_HASH_V3
59
};
60
61
namespace {
62
/// Stable hasher for PGO region counters.
63
///
64
/// PGOHash produces a stable hash of a given function's control flow.
65
///
66
/// Changing the output of this hash will invalidate all previously generated
67
/// profiles -- i.e., don't do it.
68
///
69
/// \note  When this hash does eventually change (years?), we still need to
70
/// support old hashes.  We'll need to pull in the version number from the
71
/// profile data format and use the matching hash function.
72
class PGOHash {
73
  uint64_t Working;
74
  unsigned Count;
75
  PGOHashVersion HashVersion;
76
  llvm::MD5 MD5;
77
78
  static const int NumBitsPerType = 6;
79
  static const unsigned NumTypesPerWord = sizeof(uint64_t) * 8 / NumBitsPerType;
80
  static const unsigned TooBig = 1u << NumBitsPerType;
81
82
public:
83
  /// Hash values for AST nodes.
84
  ///
85
  /// Distinct values for AST nodes that have region counters attached.
86
  ///
87
  /// These values must be stable.  All new members must be added at the end,
88
  /// and no members should be removed.  Changing the enumeration value for an
89
  /// AST node will affect the hash of every function that contains that node.
90
  enum HashType : unsigned char {
91
    None = 0,
92
    LabelStmt = 1,
93
    WhileStmt,
94
    DoStmt,
95
    ForStmt,
96
    CXXForRangeStmt,
97
    ObjCForCollectionStmt,
98
    SwitchStmt,
99
    CaseStmt,
100
    DefaultStmt,
101
    IfStmt,
102
    CXXTryStmt,
103
    CXXCatchStmt,
104
    ConditionalOperator,
105
    BinaryOperatorLAnd,
106
    BinaryOperatorLOr,
107
    BinaryConditionalOperator,
108
    // The preceding values are available with PGO_HASH_V1.
109
110
    EndOfScope,
111
    IfThenBranch,
112
    IfElseBranch,
113
    GotoStmt,
114
    IndirectGotoStmt,
115
    BreakStmt,
116
    ContinueStmt,
117
    ReturnStmt,
118
    ThrowExpr,
119
    UnaryOperatorLNot,
120
    BinaryOperatorLT,
121
    BinaryOperatorGT,
122
    BinaryOperatorLE,
123
    BinaryOperatorGE,
124
    BinaryOperatorEQ,
125
    BinaryOperatorNE,
126
    // The preceding values are available since PGO_HASH_V2.
127
128
    // Keep this last.  It's for the static assert that follows.
129
    LastHashType
130
  };
131
  static_assert(LastHashType <= TooBig, "Too many types in HashType");
132
133
  PGOHash(PGOHashVersion HashVersion)
134
584
      : Working(0), Count(0), HashVersion(HashVersion), MD5() {}
135
  void combine(HashType Type);
136
  uint64_t finalize();
137
22.2k
  PGOHashVersion getHashVersion() const { return HashVersion; }
138
};
139
const int PGOHash::NumBitsPerType;
140
const unsigned PGOHash::NumTypesPerWord;
141
const unsigned PGOHash::TooBig;
142
143
/// Get the PGO hash version used in the given indexed profile.
144
static PGOHashVersion getPGOHashVersion(llvm::IndexedInstrProfReader *PGOReader,
145
194
                                        CodeGenModule &CGM) {
146
194
  if (PGOReader->getVersion() <= 4)
147
24
    return PGO_HASH_V1;
148
170
  if (PGOReader->getVersion() <= 5)
149
37
    return PGO_HASH_V2;
150
133
  return PGO_HASH_V3;
151
170
}
152
153
/// A RecursiveASTVisitor that fills a map of statements to PGO counters.
154
struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
155
  using Base = RecursiveASTVisitor<MapRegionCounters>;
156
157
  /// The next counter value to assign.
158
  unsigned NextCounter;
159
  /// The function hash.
160
  PGOHash Hash;
161
  /// The map of statements to counters.
162
  llvm::DenseMap<const Stmt *, unsigned> &CounterMap;
163
  /// The profile version.
164
  uint64_t ProfileVersion;
165
166
  MapRegionCounters(PGOHashVersion HashVersion, uint64_t ProfileVersion,
167
                    llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
168
      : NextCounter(0), Hash(HashVersion), CounterMap(CounterMap),
169
584
        ProfileVersion(ProfileVersion) {}
170
171
  // Blocks and lambdas are handled as separate functions, so we need not
172
  // traverse them in the parent context.
173
2
  bool TraverseBlockExpr(BlockExpr *BE) { return true; }
174
4
  bool TraverseLambdaExpr(LambdaExpr *LE) {
175
    // Traverse the captures, but not the body.
176
4
    for (auto C : zip(LE->captures(), LE->capture_inits()))
177
2
      TraverseLambdaCapture(LE, &std::get<0>(C), std::get<1>(C));
178
4
    return true;
179
4
  }
180
6
  bool TraverseCapturedStmt(CapturedStmt *CS) { return true; }
181
182
1.36k
  bool VisitDecl(const Decl *D) {
183
1.36k
    switch (D->getKind()) {
184
774
    default:
185
774
      break;
186
774
    case Decl::Function:
187
520
    case Decl::CXXMethod:
188
552
    case Decl::CXXConstructor:
189
571
    case Decl::CXXDestructor:
190
575
    case Decl::CXXConversion:
191
578
    case Decl::ObjCMethod:
192
580
    case Decl::Block:
193
586
    case Decl::Captured:
194
586
      CounterMap[D->getBody()] = NextCounter++;
195
586
      break;
196
1.36k
    }
197
1.36k
    return true;
198
1.36k
  }
199
200
  /// If \p S gets a fresh counter, update the counter mappings. Return the
201
  /// V1 hash of \p S.
202
11.3k
  PGOHash::HashType updateCounterMappings(Stmt *S) {
203
11.3k
    auto Type = getHashType(PGO_HASH_V1, S);
204
11.3k
    if (Type != PGOHash::None)
205
1.25k
      CounterMap[S] = NextCounter++;
206
11.3k
    return Type;
207
11.3k
  }
208
209
  /// The RHS of all logical operators gets a fresh counter in order to count
210
  /// how many times the RHS evaluates to true or false, depending on the
211
  /// semantics of the operator. This is only valid for ">= v7" of the profile
212
  /// version so that we facilitate backward compatibility.
213
1.10k
  bool VisitBinaryOperator(BinaryOperator *S) {
214
1.10k
    if (ProfileVersion >= llvm::IndexedInstrProf::Version7)
215
873
      if (S->isLogicalOp() &&
216
873
          
CodeGenFunction::isInstrumentedCondition(S->getRHS())153
)
217
131
        CounterMap[S->getRHS()] = NextCounter++;
218
1.10k
    return Base::VisitBinaryOperator(S);
219
1.10k
  }
220
221
  /// Include \p S in the function hash.
222
11.3k
  bool VisitStmt(Stmt *S) {
223
11.3k
    auto Type = updateCounterMappings(S);
224
11.3k
    if (Hash.getHashVersion() != PGO_HASH_V1)
225
10.0k
      Type = getHashType(Hash.getHashVersion(), S);
226
11.3k
    if (Type != PGOHash::None)
227
2.05k
      Hash.combine(Type);
228
11.3k
    return true;
229
11.3k
  }
230
231
420
  bool TraverseIfStmt(IfStmt *If) {
232
    // If we used the V1 hash, use the default traversal.
233
420
    if (Hash.getHashVersion() == PGO_HASH_V1)
234
68
      return Base::TraverseIfStmt(If);
235
236
    // Otherwise, keep track of which branch we're in while traversing.
237
352
    VisitStmt(If);
238
764
    for (Stmt *CS : If->children()) {
239
764
      if (!CS)
240
0
        continue;
241
764
      if (CS == If->getThen())
242
352
        Hash.combine(PGOHash::IfThenBranch);
243
412
      else if (CS == If->getElse())
244
58
        Hash.combine(PGOHash::IfElseBranch);
245
764
      TraverseStmt(CS);
246
764
    }
247
352
    Hash.combine(PGOHash::EndOfScope);
248
352
    return true;
249
420
  }
250
251
// If the statement type \p N is nestable, and its nesting impacts profile
252
// stability, define a custom traversal which tracks the end of the statement
253
// in the hash (provided we're not using the V1 hash).
254
#define DEFINE_NESTABLE_TRAVERSAL(N)                                           \
255
383
  bool Traverse##N(N *S) {                                                     \
256
383
    Base::Traverse##N(S);                                                      \
257
383
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
383
      
Hash.combine(PGOHash::EndOfScope)343
; \
259
383
    return true;                                                               \
260
383
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseCXXCatchStmt(clang::CXXCatchStmt*)
Line
Count
Source
255
26
  bool Traverse##N(N *S) {                                                     \
256
26
    Base::Traverse##N(S);                                                      \
257
26
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
26
      Hash.combine(PGOHash::EndOfScope);                                       \
259
26
    return true;                                                               \
260
26
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseCXXForRangeStmt(clang::CXXForRangeStmt*)
Line
Count
Source
255
13
  bool Traverse##N(N *S) {                                                     \
256
13
    Base::Traverse##N(S);                                                      \
257
13
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
13
      Hash.combine(PGOHash::EndOfScope);                                       \
259
13
    return true;                                                               \
260
13
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseCXXTryStmt(clang::CXXTryStmt*)
Line
Count
Source
255
24
  bool Traverse##N(N *S) {                                                     \
256
24
    Base::Traverse##N(S);                                                      \
257
24
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
24
      Hash.combine(PGOHash::EndOfScope);                                       \
259
24
    return true;                                                               \
260
24
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseDoStmt(clang::DoStmt*)
Line
Count
Source
255
35
  bool Traverse##N(N *S) {                                                     \
256
35
    Base::Traverse##N(S);                                                      \
257
35
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
35
      
Hash.combine(PGOHash::EndOfScope)29
; \
259
35
    return true;                                                               \
260
35
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseForStmt(clang::ForStmt*)
Line
Count
Source
255
198
  bool Traverse##N(N *S) {                                                     \
256
198
    Base::Traverse##N(S);                                                      \
257
198
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
198
      
Hash.combine(PGOHash::EndOfScope)176
; \
259
198
    return true;                                                               \
260
198
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseObjCForCollectionStmt(clang::ObjCForCollectionStmt*)
Line
Count
Source
255
11
  bool Traverse##N(N *S) {                                                     \
256
11
    Base::Traverse##N(S);                                                      \
257
11
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
11
      Hash.combine(PGOHash::EndOfScope);                                       \
259
11
    return true;                                                               \
260
11
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseWhileStmt(clang::WhileStmt*)
Line
Count
Source
255
76
  bool Traverse##N(N *S) {                                                     \
256
76
    Base::Traverse##N(S);                                                      \
257
76
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
76
      
Hash.combine(PGOHash::EndOfScope)64
; \
259
76
    return true;                                                               \
260
76
  }
261
262
  DEFINE_NESTABLE_TRAVERSAL(WhileStmt)
263
  DEFINE_NESTABLE_TRAVERSAL(DoStmt)
264
  DEFINE_NESTABLE_TRAVERSAL(ForStmt)
265
  DEFINE_NESTABLE_TRAVERSAL(CXXForRangeStmt)
266
  DEFINE_NESTABLE_TRAVERSAL(ObjCForCollectionStmt)
267
  DEFINE_NESTABLE_TRAVERSAL(CXXTryStmt)
268
  DEFINE_NESTABLE_TRAVERSAL(CXXCatchStmt)
269
270
  /// Get version \p HashVersion of the PGO hash for \p S.
271
21.4k
  PGOHash::HashType getHashType(PGOHashVersion HashVersion, const Stmt *S) {
272
21.4k
    switch (S->getStmtClass()) {
273
17.5k
    default:
274
17.5k
      break;
275
17.5k
    case Stmt::LabelStmtClass:
276
96
      return PGOHash::LabelStmt;
277
140
    case Stmt::WhileStmtClass:
278
140
      return PGOHash::WhileStmt;
279
64
    case Stmt::DoStmtClass:
280
64
      return PGOHash::DoStmt;
281
374
    case Stmt::ForStmtClass:
282
374
      return PGOHash::ForStmt;
283
26
    case Stmt::CXXForRangeStmtClass:
284
26
      return PGOHash::CXXForRangeStmt;
285
22
    case Stmt::ObjCForCollectionStmtClass:
286
22
      return PGOHash::ObjCForCollectionStmt;
287
86
    case Stmt::SwitchStmtClass:
288
86
      return PGOHash::SwitchStmt;
289
172
    case Stmt::CaseStmtClass:
290
172
      return PGOHash::CaseStmt;
291
50
    case Stmt::DefaultStmtClass:
292
50
      return PGOHash::DefaultStmt;
293
772
    case Stmt::IfStmtClass:
294
772
      return PGOHash::IfStmt;
295
48
    case Stmt::CXXTryStmtClass:
296
48
      return PGOHash::CXXTryStmt;
297
52
    case Stmt::CXXCatchStmtClass:
298
52
      return PGOHash::CXXCatchStmt;
299
38
    case Stmt::ConditionalOperatorClass:
300
38
      return PGOHash::ConditionalOperator;
301
12
    case Stmt::BinaryConditionalOperatorClass:
302
12
      return PGOHash::BinaryConditionalOperator;
303
1.99k
    case Stmt::BinaryOperatorClass: {
304
1.99k
      const BinaryOperator *BO = cast<BinaryOperator>(S);
305
1.99k
      if (BO->getOpcode() == BO_LAnd)
306
192
        return PGOHash::BinaryOperatorLAnd;
307
1.80k
      if (BO->getOpcode() == BO_LOr)
308
162
        return PGOHash::BinaryOperatorLOr;
309
1.64k
      if (HashVersion >= PGO_HASH_V2) {
310
763
        switch (BO->getOpcode()) {
311
416
        default:
312
416
          break;
313
416
        case BO_LT:
314
206
          return PGOHash::BinaryOperatorLT;
315
34
        case BO_GT:
316
34
          return PGOHash::BinaryOperatorGT;
317
12
        case BO_LE:
318
12
          return PGOHash::BinaryOperatorLE;
319
11
        case BO_GE:
320
11
          return PGOHash::BinaryOperatorGE;
321
72
        case BO_EQ:
322
72
          return PGOHash::BinaryOperatorEQ;
323
12
        case BO_NE:
324
12
          return PGOHash::BinaryOperatorNE;
325
763
        }
326
763
      }
327
1.29k
      break;
328
1.64k
    }
329
21.4k
    }
330
331
18.8k
    if (HashVersion >= PGO_HASH_V2) {
332
8.69k
      switch (S->getStmtClass()) {
333
7.97k
      default:
334
7.97k
        break;
335
7.97k
      case Stmt::GotoStmtClass:
336
37
        return PGOHash::GotoStmt;
337
2
      case Stmt::IndirectGotoStmtClass:
338
2
        return PGOHash::IndirectGotoStmt;
339
59
      case Stmt::BreakStmtClass:
340
59
        return PGOHash::BreakStmt;
341
19
      case Stmt::ContinueStmtClass:
342
19
        return PGOHash::ContinueStmt;
343
309
      case Stmt::ReturnStmtClass:
344
309
        return PGOHash::ReturnStmt;
345
17
      case Stmt::CXXThrowExprClass:
346
17
        return PGOHash::ThrowExpr;
347
275
      case Stmt::UnaryOperatorClass: {
348
275
        const UnaryOperator *UO = cast<UnaryOperator>(S);
349
275
        if (UO->getOpcode() == UO_LNot)
350
17
          return PGOHash::UnaryOperatorLNot;
351
258
        break;
352
275
      }
353
8.69k
      }
354
8.69k
    }
355
356
18.3k
    return PGOHash::None;
357
18.8k
  }
358
};
359
360
/// A StmtVisitor that propagates the raw counts through the AST and
361
/// records the count at statements where the value may change.
362
struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
363
  /// PGO state.
364
  CodeGenPGO &PGO;
365
366
  /// A flag that is set when the current count should be recorded on the
367
  /// next statement, such as at the exit of a loop.
368
  bool RecordNextStmtCount;
369
370
  /// The count at the current location in the traversal.
371
  uint64_t CurrentCount;
372
373
  /// The map of statements to count values.
374
  llvm::DenseMap<const Stmt *, uint64_t> &CountMap;
375
376
  /// BreakContinueStack - Keep counts of breaks and continues inside loops.
377
  struct BreakContinue {
378
    uint64_t BreakCount;
379
    uint64_t ContinueCount;
380
183
    BreakContinue() : BreakCount(0), ContinueCount(0) {}
381
  };
382
  SmallVector<BreakContinue, 8> BreakContinueStack;
383
384
  ComputeRegionCounts(llvm::DenseMap<const Stmt *, uint64_t> &CountMap,
385
                      CodeGenPGO &PGO)
386
194
      : PGO(PGO), RecordNextStmtCount(false), CountMap(CountMap) {}
387
388
4.24k
  void RecordStmtCount(const Stmt *S) {
389
4.24k
    if (RecordNextStmtCount) {
390
378
      CountMap[S] = CurrentCount;
391
378
      RecordNextStmtCount = false;
392
378
    }
393
4.24k
  }
394
395
  /// Set and return the current count.
396
1.45k
  uint64_t setCount(uint64_t Count) {
397
1.45k
    CurrentCount = Count;
398
1.45k
    return Count;
399
1.45k
  }
400
401
3.61k
  void VisitStmt(const Stmt *S) {
402
3.61k
    RecordStmtCount(S);
403
3.61k
    for (const Stmt *Child : S->children())
404
2.83k
      if (Child)
405
2.83k
        this->Visit(Child);
406
3.61k
  }
407
408
190
  void VisitFunctionDecl(const FunctionDecl *D) {
409
    // Counter tracks entry to the function body.
410
190
    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
411
190
    CountMap[D->getBody()] = BodyCount;
412
190
    Visit(D->getBody());
413
190
  }
414
415
  // Skip lambda expressions. We visit these as FunctionDecls when we're
416
  // generating them and aren't interested in the body when generating a
417
  // parent context.
418
1
  void VisitLambdaExpr(const LambdaExpr *LE) {}
419
420
2
  void VisitCapturedDecl(const CapturedDecl *D) {
421
    // Counter tracks entry to the capture body.
422
2
    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
423
2
    CountMap[D->getBody()] = BodyCount;
424
2
    Visit(D->getBody());
425
2
  }
426
427
1
  void VisitObjCMethodDecl(const ObjCMethodDecl *D) {
428
    // Counter tracks entry to the method body.
429
1
    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
430
1
    CountMap[D->getBody()] = BodyCount;
431
1
    Visit(D->getBody());
432
1
  }
433
434
1
  void VisitBlockDecl(const BlockDecl *D) {
435
    // Counter tracks entry to the block body.
436
1
    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
437
1
    CountMap[D->getBody()] = BodyCount;
438
1
    Visit(D->getBody());
439
1
  }
440
441
75
  void VisitReturnStmt(const ReturnStmt *S) {
442
75
    RecordStmtCount(S);
443
75
    if (S->getRetValue())
444
64
      Visit(S->getRetValue());
445
75
    CurrentCount = 0;
446
75
    RecordNextStmtCount = true;
447
75
  }
448
449
8
  void VisitCXXThrowExpr(const CXXThrowExpr *E) {
450
8
    RecordStmtCount(E);
451
8
    if (E->getSubExpr())
452
8
      Visit(E->getSubExpr());
453
8
    CurrentCount = 0;
454
8
    RecordNextStmtCount = true;
455
8
  }
456
457
34
  void VisitGotoStmt(const GotoStmt *S) {
458
34
    RecordStmtCount(S);
459
34
    CurrentCount = 0;
460
34
    RecordNextStmtCount = true;
461
34
  }
462
463
34
  void VisitLabelStmt(const LabelStmt *S) {
464
34
    RecordNextStmtCount = false;
465
    // Counter tracks the block following the label.
466
34
    uint64_t BlockCount = setCount(PGO.getRegionCount(S));
467
34
    CountMap[S] = BlockCount;
468
34
    Visit(S->getSubStmt());
469
34
  }
470
471
47
  void VisitBreakStmt(const BreakStmt *S) {
472
47
    RecordStmtCount(S);
473
47
    assert(!BreakContinueStack.empty() && "break not in a loop or switch!");
474
0
    BreakContinueStack.back().BreakCount += CurrentCount;
475
47
    CurrentCount = 0;
476
47
    RecordNextStmtCount = true;
477
47
  }
478
479
15
  void VisitContinueStmt(const ContinueStmt *S) {
480
15
    RecordStmtCount(S);
481
15
    assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");
482
0
    BreakContinueStack.back().ContinueCount += CurrentCount;
483
15
    CurrentCount = 0;
484
15
    RecordNextStmtCount = true;
485
15
  }
486
487
36
  void VisitWhileStmt(const WhileStmt *S) {
488
36
    RecordStmtCount(S);
489
36
    uint64_t ParentCount = CurrentCount;
490
491
36
    BreakContinueStack.push_back(BreakContinue());
492
    // Visit the body region first so the break/continue adjustments can be
493
    // included when visiting the condition.
494
36
    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
495
36
    CountMap[S->getBody()] = CurrentCount;
496
36
    Visit(S->getBody());
497
36
    uint64_t BackedgeCount = CurrentCount;
498
499
    // ...then go back and propagate counts through the condition. The count
500
    // at the start of the condition is the sum of the incoming edges,
501
    // the backedge from the end of the loop body, and the edges from
502
    // continue statements.
503
36
    BreakContinue BC = BreakContinueStack.pop_back_val();
504
36
    uint64_t CondCount =
505
36
        setCount(ParentCount + BackedgeCount + BC.ContinueCount);
506
36
    CountMap[S->getCond()] = CondCount;
507
36
    Visit(S->getCond());
508
36
    setCount(BC.BreakCount + CondCount - BodyCount);
509
36
    RecordNextStmtCount = true;
510
36
  }
511
512
22
  void VisitDoStmt(const DoStmt *S) {
513
22
    RecordStmtCount(S);
514
22
    uint64_t LoopCount = PGO.getRegionCount(S);
515
516
22
    BreakContinueStack.push_back(BreakContinue());
517
    // The count doesn't include the fallthrough from the parent scope. Add it.
518
22
    uint64_t BodyCount = setCount(LoopCount + CurrentCount);
519
22
    CountMap[S->getBody()] = BodyCount;
520
22
    Visit(S->getBody());
521
22
    uint64_t BackedgeCount = CurrentCount;
522
523
22
    BreakContinue BC = BreakContinueStack.pop_back_val();
524
    // The count at the start of the condition is equal to the count at the
525
    // end of the body, plus any continues.
526
22
    uint64_t CondCount = setCount(BackedgeCount + BC.ContinueCount);
527
22
    CountMap[S->getCond()] = CondCount;
528
22
    Visit(S->getCond());
529
22
    setCount(BC.BreakCount + CondCount - LoopCount);
530
22
    RecordNextStmtCount = true;
531
22
  }
532
533
88
  void VisitForStmt(const ForStmt *S) {
534
88
    RecordStmtCount(S);
535
88
    if (S->getInit())
536
84
      Visit(S->getInit());
537
538
88
    uint64_t ParentCount = CurrentCount;
539
540
88
    BreakContinueStack.push_back(BreakContinue());
541
    // Visit the body region first. (This is basically the same as a while
542
    // loop; see further comments in VisitWhileStmt.)
543
88
    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
544
88
    CountMap[S->getBody()] = BodyCount;
545
88
    Visit(S->getBody());
546
88
    uint64_t BackedgeCount = CurrentCount;
547
88
    BreakContinue BC = BreakContinueStack.pop_back_val();
548
549
    // The increment is essentially part of the body but it needs to include
550
    // the count for all the continue statements.
551
88
    if (S->getInc()) {
552
88
      uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
553
88
      CountMap[S->getInc()] = IncCount;
554
88
      Visit(S->getInc());
555
88
    }
556
557
    // ...then go back and propagate counts through the condition.
558
88
    uint64_t CondCount =
559
88
        setCount(ParentCount + BackedgeCount + BC.ContinueCount);
560
88
    if (S->getCond()) {
561
88
      CountMap[S->getCond()] = CondCount;
562
88
      Visit(S->getCond());
563
88
    }
564
88
    setCount(BC.BreakCount + CondCount - BodyCount);
565
88
    RecordNextStmtCount = true;
566
88
  }
567
568
9
  void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
569
9
    RecordStmtCount(S);
570
9
    if (S->getInit())
571
0
      Visit(S->getInit());
572
9
    Visit(S->getLoopVarStmt());
573
9
    Visit(S->getRangeStmt());
574
9
    Visit(S->getBeginStmt());
575
9
    Visit(S->getEndStmt());
576
577
9
    uint64_t ParentCount = CurrentCount;
578
9
    BreakContinueStack.push_back(BreakContinue());
579
    // Visit the body region first. (This is basically the same as a while
580
    // loop; see further comments in VisitWhileStmt.)
581
9
    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
582
9
    CountMap[S->getBody()] = BodyCount;
583
9
    Visit(S->getBody());
584
9
    uint64_t BackedgeCount = CurrentCount;
585
9
    BreakContinue BC = BreakContinueStack.pop_back_val();
586
587
    // The increment is essentially part of the body but it needs to include
588
    // the count for all the continue statements.
589
9
    uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
590
9
    CountMap[S->getInc()] = IncCount;
591
9
    Visit(S->getInc());
592
593
    // ...then go back and propagate counts through the condition.
594
9
    uint64_t CondCount =
595
9
        setCount(ParentCount + BackedgeCount + BC.ContinueCount);
596
9
    CountMap[S->getCond()] = CondCount;
597
9
    Visit(S->getCond());
598
9
    setCount(BC.BreakCount + CondCount - BodyCount);
599
9
    RecordNextStmtCount = true;
600
9
  }
601
602
5
  void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
603
5
    RecordStmtCount(S);
604
5
    Visit(S->getElement());
605
5
    uint64_t ParentCount = CurrentCount;
606
5
    BreakContinueStack.push_back(BreakContinue());
607
    // Counter tracks the body of the loop.
608
5
    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
609
5
    CountMap[S->getBody()] = BodyCount;
610
5
    Visit(S->getBody());
611
5
    uint64_t BackedgeCount = CurrentCount;
612
5
    BreakContinue BC = BreakContinueStack.pop_back_val();
613
614
5
    setCount(BC.BreakCount + ParentCount + BackedgeCount + BC.ContinueCount -
615
5
             BodyCount);
616
5
    RecordNextStmtCount = true;
617
5
  }
618
619
23
  void VisitSwitchStmt(const SwitchStmt *S) {
620
23
    RecordStmtCount(S);
621
23
    if (S->getInit())
622
0
      Visit(S->getInit());
623
23
    Visit(S->getCond());
624
23
    CurrentCount = 0;
625
23
    BreakContinueStack.push_back(BreakContinue());
626
23
    Visit(S->getBody());
627
    // If the switch is inside a loop, add the continue counts.
628
23
    BreakContinue BC = BreakContinueStack.pop_back_val();
629
23
    if (!BreakContinueStack.empty())
630
17
      BreakContinueStack.back().ContinueCount += BC.ContinueCount;
631
    // Counter tracks the exit block of the switch.
632
23
    setCount(PGO.getRegionCount(S));
633
23
    RecordNextStmtCount = true;
634
23
  }
635
636
78
  void VisitSwitchCase(const SwitchCase *S) {
637
78
    RecordNextStmtCount = false;
638
    // Counter for this particular case. This counts only jumps from the
639
    // switch header and does not include fallthrough from the case before
640
    // this one.
641
78
    uint64_t CaseCount = PGO.getRegionCount(S);
642
78
    setCount(CurrentCount + CaseCount);
643
    // We need the count without fallthrough in the mapping, so it's more useful
644
    // for branch probabilities.
645
78
    CountMap[S] = CaseCount;
646
78
    RecordNextStmtCount = true;
647
78
    Visit(S->getSubStmt());
648
78
  }
649
650
194
  void VisitIfStmt(const IfStmt *S) {
651
194
    RecordStmtCount(S);
652
194
    uint64_t ParentCount = CurrentCount;
653
194
    if (S->getInit())
654
0
      Visit(S->getInit());
655
194
    Visit(S->getCond());
656
657
    // Counter tracks the "then" part of an if statement. The count for
658
    // the "else" part, if it exists, will be calculated from this counter.
659
194
    uint64_t ThenCount = setCount(PGO.getRegionCount(S));
660
194
    CountMap[S->getThen()] = ThenCount;
661
194
    Visit(S->getThen());
662
194
    uint64_t OutCount = CurrentCount;
663
664
194
    uint64_t ElseCount = ParentCount - ThenCount;
665
194
    if (S->getElse()) {
666
21
      setCount(ElseCount);
667
21
      CountMap[S->getElse()] = ElseCount;
668
21
      Visit(S->getElse());
669
21
      OutCount += CurrentCount;
670
21
    } else
671
173
      OutCount += ElseCount;
672
194
    setCount(OutCount);
673
194
    RecordNextStmtCount = true;
674
194
  }
675
676
12
  void VisitCXXTryStmt(const CXXTryStmt *S) {
677
12
    RecordStmtCount(S);
678
12
    Visit(S->getTryBlock());
679
24
    for (unsigned I = 0, E = S->getNumHandlers(); I < E; 
++I12
)
680
12
      Visit(S->getHandler(I));
681
    // Counter tracks the continuation block of the try statement.
682
12
    setCount(PGO.getRegionCount(S));
683
12
    RecordNextStmtCount = true;
684
12
  }
685
686
12
  void VisitCXXCatchStmt(const CXXCatchStmt *S) {
687
12
    RecordNextStmtCount = false;
688
    // Counter tracks the catch statement's handler block.
689
12
    uint64_t CatchCount = setCount(PGO.getRegionCount(S));
690
12
    CountMap[S] = CatchCount;
691
12
    Visit(S->getHandlerBlock());
692
12
  }
693
694
9
  void VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
695
9
    RecordStmtCount(E);
696
9
    uint64_t ParentCount = CurrentCount;
697
9
    Visit(E->getCond());
698
699
    // Counter tracks the "true" part of a conditional operator. The
700
    // count in the "false" part will be calculated from this counter.
701
9
    uint64_t TrueCount = setCount(PGO.getRegionCount(E));
702
9
    CountMap[E->getTrueExpr()] = TrueCount;
703
9
    Visit(E->getTrueExpr());
704
9
    uint64_t OutCount = CurrentCount;
705
706
9
    uint64_t FalseCount = setCount(ParentCount - TrueCount);
707
9
    CountMap[E->getFalseExpr()] = FalseCount;
708
9
    Visit(E->getFalseExpr());
709
9
    OutCount += CurrentCount;
710
711
9
    setCount(OutCount);
712
9
    RecordNextStmtCount = true;
713
9
  }
714
715
25
  void VisitBinLAnd(const BinaryOperator *E) {
716
25
    RecordStmtCount(E);
717
25
    uint64_t ParentCount = CurrentCount;
718
25
    Visit(E->getLHS());
719
    // Counter tracks the right hand side of a logical and operator.
720
25
    uint64_t RHSCount = setCount(PGO.getRegionCount(E));
721
25
    CountMap[E->getRHS()] = RHSCount;
722
25
    Visit(E->getRHS());
723
25
    setCount(ParentCount + RHSCount - CurrentCount);
724
25
    RecordNextStmtCount = true;
725
25
  }
726
727
24
  void VisitBinLOr(const BinaryOperator *E) {
728
24
    RecordStmtCount(E);
729
24
    uint64_t ParentCount = CurrentCount;
730
24
    Visit(E->getLHS());
731
    // Counter tracks the right hand side of a logical or operator.
732
24
    uint64_t RHSCount = setCount(PGO.getRegionCount(E));
733
24
    CountMap[E->getRHS()] = RHSCount;
734
24
    Visit(E->getRHS());
735
24
    setCount(ParentCount + RHSCount - CurrentCount);
736
24
    RecordNextStmtCount = true;
737
24
  }
738
};
739
} // end anonymous namespace
740
741
3.16k
void PGOHash::combine(HashType Type) {
742
  // Check that we never combine 0 and only have six bits.
743
3.16k
  assert(Type && "Hash is invalid: unexpected type 0");
744
0
  assert(unsigned(Type) < TooBig && "Hash is invalid: too many types");
745
746
  // Pass through MD5 if enough work has built up.
747
3.16k
  if (Count && 
Count % NumTypesPerWord == 02.69k
) {
748
129
    using namespace llvm::support;
749
129
    uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
750
129
    MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
751
129
    Working = 0;
752
129
  }
753
754
  // Accumulate the current type.
755
3.16k
  ++Count;
756
3.16k
  Working = Working << NumBitsPerType | Type;
757
3.16k
}
758
759
584
uint64_t PGOHash::finalize() {
760
  // Use Working as the hash directly if we never used MD5.
761
584
  if (Count <= NumTypesPerWord)
762
    // No need to byte swap here, since none of the math was endian-dependent.
763
    // This number will be byte-swapped as required on endianness transitions,
764
    // so we will see the same value on the other side.
765
509
    return Working;
766
767
  // Check for remaining work in Working.
768
75
  if (Working) {
769
    // Keep the buggy behavior from v1 and v2 for backward-compatibility. This
770
    // is buggy because it converts a uint64_t into an array of uint8_t.
771
75
    if (HashVersion < PGO_HASH_V3) {
772
13
      MD5.update({(uint8_t)Working});
773
62
    } else {
774
62
      using namespace llvm::support;
775
62
      uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
776
62
      MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
777
62
    }
778
75
  }
779
780
  // Finalize the MD5 and return the hash.
781
75
  llvm::MD5::MD5Result Result;
782
75
  MD5.final(Result);
783
75
  return Result.low();
784
584
}
785
786
359k
void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) {
787
359k
  const Decl *D = GD.getDecl();
788
359k
  if (!D->hasBody())
789
128
    return;
790
791
  // Skip CUDA/HIP kernel launch stub functions.
792
359k
  if (CGM.getLangOpts().CUDA && 
!CGM.getLangOpts().CUDAIsDevice1.01k
&&
793
359k
      
D->hasAttr<CUDAGlobalAttr>()286
)
794
63
    return;
795
796
359k
  bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr();
797
359k
  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
798
359k
  if (!InstrumentRegions && 
!PGOReader358k
)
799
358k
    return;
800
646
  if (D->isImplicit())
801
17
    return;
802
  // Constructors and destructors may be represented by several functions in IR.
803
  // If so, instrument only base variant, others are implemented by delegation
804
  // to the base one, it would be counted twice otherwise.
805
629
  if (CGM.getTarget().getCXXABI().hasConstructorVariants()) {
806
612
    if (const auto *CCD = dyn_cast<CXXConstructorDecl>(D))
807
53
      if (GD.getCtorType() != Ctor_Base &&
808
53
          
CodeGenFunction::IsConstructorDelegationValid(CCD)26
)
809
21
        return;
810
612
  }
811
608
  if (isa<CXXDestructorDecl>(D) && 
GD.getDtorType() != Dtor_Base39
)
812
20
    return;
813
814
588
  CGM.ClearUnusedCoverageMapping(D);
815
588
  if (Fn->hasFnAttribute(llvm::Attribute::NoProfile))
816
4
    return;
817
818
584
  setFuncName(Fn);
819
820
584
  mapRegionCounters(D);
821
584
  if (CGM.getCodeGenOpts().CoverageMapping)
822
229
    emitCounterRegionMapping(D);
823
584
  if (PGOReader) {
824
194
    SourceManager &SM = CGM.getContext().getSourceManager();
825
194
    loadRegionCounts(PGOReader, SM.isInMainFile(D->getLocation()));
826
194
    computeRegionCounts(D);
827
194
    applyFunctionAttributes(PGOReader, Fn);
828
194
  }
829
584
}
830
831
584
void CodeGenPGO::mapRegionCounters(const Decl *D) {
832
  // Use the latest hash version when inserting instrumentation, but use the
833
  // version in the indexed profile if we're reading PGO data.
834
584
  PGOHashVersion HashVersion = PGO_HASH_LATEST;
835
584
  uint64_t ProfileVersion = llvm::IndexedInstrProf::Version;
836
584
  if (auto *PGOReader = CGM.getPGOReader()) {
837
194
    HashVersion = getPGOHashVersion(PGOReader, CGM);
838
194
    ProfileVersion = PGOReader->getVersion();
839
194
  }
840
841
584
  RegionCounterMap.reset(new llvm::DenseMap<const Stmt *, unsigned>);
842
584
  MapRegionCounters Walker(HashVersion, ProfileVersion, *RegionCounterMap);
843
584
  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
844
573
    Walker.TraverseDecl(const_cast<FunctionDecl *>(FD));
845
11
  else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
846
3
    Walker.TraverseDecl(const_cast<ObjCMethodDecl *>(MD));
847
8
  else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
848
2
    Walker.TraverseDecl(const_cast<BlockDecl *>(BD));
849
6
  else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
850
6
    Walker.TraverseDecl(const_cast<CapturedDecl *>(CD));
851
584
  assert(Walker.NextCounter > 0 && "no entry counter mapped for decl");
852
0
  NumRegionCounters = Walker.NextCounter;
853
584
  FunctionHash = Walker.Hash.finalize();
854
584
}
855
856
254
bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) {
857
254
  if (!D->getBody())
858
0
    return true;
859
860
  // Skip host-only functions in the CUDA device compilation and device-only
861
  // functions in the host compilation. Just roughly filter them out based on
862
  // the function attributes. If there are effectively host-only or device-only
863
  // ones, their coverage mapping may still be generated.
864
254
  if (CGM.getLangOpts().CUDA &&
865
254
      
(8
(8
CGM.getLangOpts().CUDAIsDevice8
&&
!D->hasAttr<CUDADeviceAttr>()0
&&
866
8
        
!D->hasAttr<CUDAGlobalAttr>()0
) ||
867
8
       (!CGM.getLangOpts().CUDAIsDevice &&
868
8
        (D->hasAttr<CUDAGlobalAttr>() ||
869
8
         
(6
!D->hasAttr<CUDAHostAttr>()6
&&
D->hasAttr<CUDADeviceAttr>()4
)))))
870
4
    return true;
871
872
  // Don't map the functions in system headers.
873
250
  const auto &SM = CGM.getContext().getSourceManager();
874
250
  auto Loc = D->getBody()->getBeginLoc();
875
250
  return SM.isInSystemHeader(Loc);
876
254
}
877
878
229
void CodeGenPGO::emitCounterRegionMapping(const Decl *D) {
879
229
  if (skipRegionMappingForDecl(D))
880
0
    return;
881
882
229
  std::string CoverageMapping;
883
229
  llvm::raw_string_ostream OS(CoverageMapping);
884
229
  CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
885
229
                                CGM.getContext().getSourceManager(),
886
229
                                CGM.getLangOpts(), RegionCounterMap.get());
887
229
  MappingGen.emitCounterMapping(D, OS);
888
229
  OS.flush();
889
890
229
  if (CoverageMapping.empty())
891
1
    return;
892
893
228
  CGM.getCoverageMapping()->addFunctionMappingRecord(
894
228
      FuncNameVar, FuncName, FunctionHash, CoverageMapping);
895
228
}
896
897
void
898
CodeGenPGO::emitEmptyCounterMapping(const Decl *D, StringRef Name,
899
25
                                    llvm::GlobalValue::LinkageTypes Linkage) {
900
25
  if (skipRegionMappingForDecl(D))
901
5
    return;
902
903
20
  std::string CoverageMapping;
904
20
  llvm::raw_string_ostream OS(CoverageMapping);
905
20
  CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
906
20
                                CGM.getContext().getSourceManager(),
907
20
                                CGM.getLangOpts());
908
20
  MappingGen.emitEmptyMapping(D, OS);
909
20
  OS.flush();
910
911
20
  if (CoverageMapping.empty())
912
0
    return;
913
914
20
  setFuncName(Name, Linkage);
915
20
  CGM.getCoverageMapping()->addFunctionMappingRecord(
916
20
      FuncNameVar, FuncName, FunctionHash, CoverageMapping, false);
917
20
}
918
919
194
void CodeGenPGO::computeRegionCounts(const Decl *D) {
920
194
  StmtCountMap.reset(new llvm::DenseMap<const Stmt *, uint64_t>);
921
194
  ComputeRegionCounts Walker(*StmtCountMap, *this);
922
194
  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
923
190
    Walker.VisitFunctionDecl(FD);
924
4
  else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
925
1
    Walker.VisitObjCMethodDecl(MD);
926
3
  else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
927
1
    Walker.VisitBlockDecl(BD);
928
2
  else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
929
2
    Walker.VisitCapturedDecl(const_cast<CapturedDecl *>(CD));
930
194
}
931
932
void
933
CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
934
194
                                    llvm::Function *Fn) {
935
194
  if (!haveRegionCounts())
936
15
    return;
937
938
179
  uint64_t FunctionCount = getRegionCount(nullptr);
939
179
  Fn->setEntryCount(FunctionCount);
940
179
}
941
942
void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S,
943
1.12k
                                      llvm::Value *StepV) {
944
1.12k
  if (!CGM.getCodeGenOpts().hasProfileClangInstr() || !RegionCounterMap)
945
28
    return;
946
1.09k
  if (!Builder.GetInsertBlock())
947
5
    return;
948
949
1.09k
  unsigned Counter = (*RegionCounterMap)[S];
950
1.09k
  auto *I8PtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
951
952
1.09k
  llvm::Value *Args[] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
953
1.09k
                         Builder.getInt64(FunctionHash),
954
1.09k
                         Builder.getInt32(NumRegionCounters),
955
1.09k
                         Builder.getInt32(Counter), StepV};
956
1.09k
  if (!StepV)
957
1.09k
    Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment),
958
1.09k
                       makeArrayRef(Args, 4));
959
1
  else
960
1
    Builder.CreateCall(
961
1
        CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment_step),
962
1
        makeArrayRef(Args));
963
1.09k
}
964
965
36.5k
void CodeGenPGO::setValueProfilingFlag(llvm::Module &M) {
966
36.5k
  if (CGM.getCodeGenOpts().hasProfileClangInstr())
967
123
    M.addModuleFlag(llvm::Module::Warning, "EnableValueProfiling",
968
123
                    uint32_t(EnableValueProfiling));
969
36.5k
}
970
971
// This method either inserts a call to the profile run-time during
972
// instrumentation or puts profile data into metadata for PGO use.
973
void CodeGenPGO::valueProfile(CGBuilderTy &Builder, uint32_t ValueKind,
974
21.9k
    llvm::Instruction *ValueSite, llvm::Value *ValuePtr) {
975
976
21.9k
  if (!EnableValueProfiling)
977
21.9k
    return;
978
979
4
  if (!ValuePtr || !ValueSite || !Builder.GetInsertBlock())
980
0
    return;
981
982
4
  if (isa<llvm::Constant>(ValuePtr))
983
1
    return;
984
985
3
  bool InstrumentValueSites = CGM.getCodeGenOpts().hasProfileClangInstr();
986
3
  if (InstrumentValueSites && RegionCounterMap) {
987
3
    auto BuilderInsertPoint = Builder.saveIP();
988
3
    Builder.SetInsertPoint(ValueSite);
989
3
    llvm::Value *Args[5] = {
990
3
        llvm::ConstantExpr::getBitCast(FuncNameVar, Builder.getInt8PtrTy()),
991
3
        Builder.getInt64(FunctionHash),
992
3
        Builder.CreatePtrToInt(ValuePtr, Builder.getInt64Ty()),
993
3
        Builder.getInt32(ValueKind),
994
3
        Builder.getInt32(NumValueSites[ValueKind]++)
995
3
    };
996
3
    Builder.CreateCall(
997
3
        CGM.getIntrinsic(llvm::Intrinsic::instrprof_value_profile), Args);
998
3
    Builder.restoreIP(BuilderInsertPoint);
999
3
    return;
1000
3
  }
1001
1002
0
  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
1003
0
  if (PGOReader && haveRegionCounts()) {
1004
    // We record the top most called three functions at each call site.
1005
    // Profile metadata contains "VP" string identifying this metadata
1006
    // as value profiling data, then a uint32_t value for the value profiling
1007
    // kind, a uint64_t value for the total number of times the call is
1008
    // executed, followed by the function hash and execution count (uint64_t)
1009
    // pairs for each function.
1010
0
    if (NumValueSites[ValueKind] >= ProfRecord->getNumValueSites(ValueKind))
1011
0
      return;
1012
1013
0
    llvm::annotateValueSite(CGM.getModule(), *ValueSite, *ProfRecord,
1014
0
                            (llvm::InstrProfValueKind)ValueKind,
1015
0
                            NumValueSites[ValueKind]);
1016
1017
0
    NumValueSites[ValueKind]++;
1018
0
  }
1019
0
}
1020
1021
void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
1022
194
                                  bool IsInMainFile) {
1023
194
  CGM.getPGOStats().addVisited(IsInMainFile);
1024
194
  RegionCounts.clear();
1025
194
  llvm::Expected<llvm::InstrProfRecord> RecordExpected =
1026
194
      PGOReader->getInstrProfRecord(FuncName, FunctionHash);
1027
194
  if (auto E = RecordExpected.takeError()) {
1028
15
    auto IPE = llvm::InstrProfError::take(std::move(E));
1029
15
    if (IPE == llvm::instrprof_error::unknown_function)
1030
6
      CGM.getPGOStats().addMissing(IsInMainFile);
1031
9
    else if (IPE == llvm::instrprof_error::hash_mismatch)
1032
9
      CGM.getPGOStats().addMismatched(IsInMainFile);
1033
0
    else if (IPE == llvm::instrprof_error::malformed)
1034
      // TODO: Consider a more specific warning for this case.
1035
0
      CGM.getPGOStats().addMismatched(IsInMainFile);
1036
15
    return;
1037
15
  }
1038
179
  ProfRecord =
1039
179
      std::make_unique<llvm::InstrProfRecord>(std::move(RecordExpected.get()));
1040
179
  RegionCounts = ProfRecord->Counts;
1041
179
}
1042
1043
/// Calculate what to divide by to scale weights.
1044
///
1045
/// Given the maximum weight, calculate a divisor that will scale all the
1046
/// weights to strictly less than UINT32_MAX.
1047
352
static uint64_t calculateWeightScale(uint64_t MaxWeight) {
1048
352
  return MaxWeight < UINT32_MAX ? 
1349
:
MaxWeight / UINT32_MAX + 13
;
1049
352
}
1050
1051
/// Scale an individual branch weight (and add 1).
1052
///
1053
/// Scale a 64-bit weight down to 32-bits using \c Scale.
1054
///
1055
/// According to Laplace's Rule of Succession, it is better to compute the
1056
/// weight based on the count plus 1, so universally add 1 to the value.
1057
///
1058
/// \pre \c Scale was calculated by \a calculateWeightScale() with a weight no
1059
/// greater than \c Weight.
1060
782
static uint32_t scaleBranchWeight(uint64_t Weight, uint64_t Scale) {
1061
782
  assert(Scale && "scale by 0?");
1062
0
  uint64_t Scaled = Weight / Scale + 1;
1063
782
  assert(Scaled <= UINT32_MAX && "overflow 32-bits");
1064
0
  return Scaled;
1065
782
}
1066
1067
llvm::MDNode *CodeGenFunction::createProfileWeights(uint64_t TrueCount,
1068
151k
                                                    uint64_t FalseCount) const {
1069
  // Check for empty weights.
1070
151k
  if (!TrueCount && 
!FalseCount150k
)
1071
150k
    return nullptr;
1072
1073
  // Calculate how to scale down to 32-bits.
1074
319
  uint64_t Scale = calculateWeightScale(std::max(TrueCount, FalseCount));
1075
1076
319
  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
1077
319
  return MDHelper.createBranchWeights(scaleBranchWeight(TrueCount, Scale),
1078
319
                                      scaleBranchWeight(FalseCount, Scale));
1079
151k
}
1080
1081
llvm::MDNode *
1082
35
CodeGenFunction::createProfileWeights(ArrayRef<uint64_t> Weights) const {
1083
  // We need at least two elements to create meaningful weights.
1084
35
  if (Weights.size() < 2)
1085
0
    return nullptr;
1086
1087
  // Check for empty weights.
1088
35
  uint64_t MaxWeight = *std::max_element(Weights.begin(), Weights.end());
1089
35
  if (MaxWeight == 0)
1090
2
    return nullptr;
1091
1092
  // Calculate how to scale down to 32-bits.
1093
33
  uint64_t Scale = calculateWeightScale(MaxWeight);
1094
1095
33
  SmallVector<uint32_t, 16> ScaledWeights;
1096
33
  ScaledWeights.reserve(Weights.size());
1097
33
  for (uint64_t W : Weights)
1098
144
    ScaledWeights.push_back(scaleBranchWeight(W, Scale));
1099
1100
33
  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
1101
33
  return MDHelper.createBranchWeights(ScaledWeights);
1102
35
}
1103
1104
llvm::MDNode *
1105
CodeGenFunction::createProfileWeightsForLoop(const Stmt *Cond,
1106
18.7k
                                             uint64_t LoopCount) const {
1107
18.7k
  if (!PGO.haveRegionCounts())
1108
18.5k
    return nullptr;
1109
144
  Optional<uint64_t> CondCount = PGO.getStmtCount(Cond);
1110
144
  if (!CondCount || *CondCount == 0)
1111
34
    return nullptr;
1112
110
  return createProfileWeights(LoopCount,
1113
110
                              std::max(*CondCount, LoopCount) - LoopCount);
1114
144
}