Coverage Report

Created: 2021-01-19 06:58

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/CodeGen/CodeGenPGO.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Instrumentation-based profile-guided optimization
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "CodeGenPGO.h"
14
#include "CodeGenFunction.h"
15
#include "CoverageMappingGen.h"
16
#include "clang/AST/RecursiveASTVisitor.h"
17
#include "clang/AST/StmtVisitor.h"
18
#include "llvm/IR/Intrinsics.h"
19
#include "llvm/IR/MDBuilder.h"
20
#include "llvm/Support/CommandLine.h"
21
#include "llvm/Support/Endian.h"
22
#include "llvm/Support/FileSystem.h"
23
#include "llvm/Support/MD5.h"
24
25
static llvm::cl::opt<bool>
26
    EnableValueProfiling("enable-value-profiling", llvm::cl::ZeroOrMore,
27
                         llvm::cl::desc("Enable value profiling"),
28
                         llvm::cl::Hidden, llvm::cl::init(false));
29
30
using namespace clang;
31
using namespace CodeGen;
32
33
void CodeGenPGO::setFuncName(StringRef Name,
34
566
                             llvm::GlobalValue::LinkageTypes Linkage) {
35
566
  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
36
566
  FuncName = llvm::getPGOFuncName(
37
566
      Name, Linkage, CGM.getCodeGenOpts().MainFileName,
38
376
      PGOReader ? 
PGOReader->getVersion()190
: llvm::IndexedInstrProf::Version);
39
40
  // If we're generating a profile, create a variable for the name.
41
566
  if (CGM.getCodeGenOpts().hasProfileClangInstr())
42
376
    FuncNameVar = llvm::createPGOFuncNameVar(CGM.getModule(), Linkage, FuncName);
43
566
}
44
45
546
void CodeGenPGO::setFuncName(llvm::Function *Fn) {
46
546
  setFuncName(Fn->getName(), Fn->getLinkage());
47
  // Create PGOFuncName meta data.
48
546
  llvm::createPGOFuncNameMetadata(*Fn, FuncName);
49
546
}
50
51
/// The version of the PGO hash algorithm.
52
enum PGOHashVersion : unsigned {
53
  PGO_HASH_V1,
54
  PGO_HASH_V2,
55
  PGO_HASH_V3,
56
57
  // Keep this set to the latest hash version.
58
  PGO_HASH_LATEST = PGO_HASH_V3
59
};
60
61
namespace {
62
/// Stable hasher for PGO region counters.
63
///
64
/// PGOHash produces a stable hash of a given function's control flow.
65
///
66
/// Changing the output of this hash will invalidate all previously generated
67
/// profiles -- i.e., don't do it.
68
///
69
/// \note  When this hash does eventually change (years?), we still need to
70
/// support old hashes.  We'll need to pull in the version number from the
71
/// profile data format and use the matching hash function.
72
class PGOHash {
73
  uint64_t Working;
74
  unsigned Count;
75
  PGOHashVersion HashVersion;
76
  llvm::MD5 MD5;
77
78
  static const int NumBitsPerType = 6;
79
  static const unsigned NumTypesPerWord = sizeof(uint64_t) * 8 / NumBitsPerType;
80
  static const unsigned TooBig = 1u << NumBitsPerType;
81
82
public:
83
  /// Hash values for AST nodes.
84
  ///
85
  /// Distinct values for AST nodes that have region counters attached.
86
  ///
87
  /// These values must be stable.  All new members must be added at the end,
88
  /// and no members should be removed.  Changing the enumeration value for an
89
  /// AST node will affect the hash of every function that contains that node.
90
  enum HashType : unsigned char {
91
    None = 0,
92
    LabelStmt = 1,
93
    WhileStmt,
94
    DoStmt,
95
    ForStmt,
96
    CXXForRangeStmt,
97
    ObjCForCollectionStmt,
98
    SwitchStmt,
99
    CaseStmt,
100
    DefaultStmt,
101
    IfStmt,
102
    CXXTryStmt,
103
    CXXCatchStmt,
104
    ConditionalOperator,
105
    BinaryOperatorLAnd,
106
    BinaryOperatorLOr,
107
    BinaryConditionalOperator,
108
    // The preceding values are available with PGO_HASH_V1.
109
110
    EndOfScope,
111
    IfThenBranch,
112
    IfElseBranch,
113
    GotoStmt,
114
    IndirectGotoStmt,
115
    BreakStmt,
116
    ContinueStmt,
117
    ReturnStmt,
118
    ThrowExpr,
119
    UnaryOperatorLNot,
120
    BinaryOperatorLT,
121
    BinaryOperatorGT,
122
    BinaryOperatorLE,
123
    BinaryOperatorGE,
124
    BinaryOperatorEQ,
125
    BinaryOperatorNE,
126
    // The preceding values are available since PGO_HASH_V2.
127
128
    // Keep this last.  It's for the static assert that follows.
129
    LastHashType
130
  };
131
  static_assert(LastHashType <= TooBig, "Too many types in HashType");
132
133
  PGOHash(PGOHashVersion HashVersion)
134
546
      : Working(0), Count(0), HashVersion(HashVersion), MD5() {}
135
  void combine(HashType Type);
136
  uint64_t finalize();
137
21.2k
  PGOHashVersion getHashVersion() const { return HashVersion; }
138
};
139
const int PGOHash::NumBitsPerType;
140
const unsigned PGOHash::NumTypesPerWord;
141
const unsigned PGOHash::TooBig;
142
143
/// Get the PGO hash version used in the given indexed profile.
144
static PGOHashVersion getPGOHashVersion(llvm::IndexedInstrProfReader *PGOReader,
145
190
                                        CodeGenModule &CGM) {
146
190
  if (PGOReader->getVersion() <= 4)
147
24
    return PGO_HASH_V1;
148
166
  if (PGOReader->getVersion() <= 5)
149
37
    return PGO_HASH_V2;
150
129
  return PGO_HASH_V3;
151
129
}
152
153
/// A RecursiveASTVisitor that fills a map of statements to PGO counters.
154
struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
155
  using Base = RecursiveASTVisitor<MapRegionCounters>;
156
157
  /// The next counter value to assign.
158
  unsigned NextCounter;
159
  /// The function hash.
160
  PGOHash Hash;
161
  /// The map of statements to counters.
162
  llvm::DenseMap<const Stmt *, unsigned> &CounterMap;
163
  /// The profile version.
164
  uint64_t ProfileVersion;
165
166
  MapRegionCounters(PGOHashVersion HashVersion, uint64_t ProfileVersion,
167
                    llvm::DenseMap<const Stmt *, unsigned> &CounterMap)
168
      : NextCounter(0), Hash(HashVersion), CounterMap(CounterMap),
169
546
        ProfileVersion(ProfileVersion) {}
170
171
  // Blocks and lambdas are handled as separate functions, so we need not
172
  // traverse them in the parent context.
173
2
  bool TraverseBlockExpr(BlockExpr *BE) { return true; }
174
4
  bool TraverseLambdaExpr(LambdaExpr *LE) {
175
    // Traverse the captures, but not the body.
176
4
    for (auto C : zip(LE->captures(), LE->capture_inits()))
177
2
      TraverseLambdaCapture(LE, &std::get<0>(C), std::get<1>(C));
178
4
    return true;
179
4
  }
180
5
  bool TraverseCapturedStmt(CapturedStmt *CS) { return true; }
181
182
1.28k
  bool VisitDecl(const Decl *D) {
183
1.28k
    switch (D->getKind()) {
184
740
    default:
185
740
      break;
186
458
    case Decl::Function:
187
483
    case Decl::CXXMethod:
188
515
    case Decl::CXXConstructor:
189
534
    case Decl::CXXDestructor:
190
538
    case Decl::CXXConversion:
191
541
    case Decl::ObjCMethod:
192
543
    case Decl::Block:
193
548
    case Decl::Captured:
194
548
      CounterMap[D->getBody()] = NextCounter++;
195
548
      break;
196
1.28k
    }
197
1.28k
    return true;
198
1.28k
  }
199
200
  /// If \p S gets a fresh counter, update the counter mappings. Return the
201
  /// V1 hash of \p S.
202
10.8k
  PGOHash::HashType updateCounterMappings(Stmt *S) {
203
10.8k
    auto Type = getHashType(PGO_HASH_V1, S);
204
10.8k
    if (Type != PGOHash::None)
205
1.21k
      CounterMap[S] = NextCounter++;
206
10.8k
    return Type;
207
10.8k
  }
208
209
  /// The RHS of all logical operators gets a fresh counter in order to count
210
  /// how many times the RHS evaluates to true or false, depending on the
211
  /// semantics of the operator. This is only valid for ">= v7" of the profile
212
  /// version so that we facilitate backward compatibility.
213
1.05k
  bool VisitBinaryOperator(BinaryOperator *S) {
214
1.05k
    if (ProfileVersion >= llvm::IndexedInstrProf::Version7)
215
818
      if (S->isLogicalOp() &&
216
153
          CodeGenFunction::isInstrumentedCondition(S->getRHS()))
217
131
        CounterMap[S->getRHS()] = NextCounter++;
218
1.05k
    return Base::VisitBinaryOperator(S);
219
1.05k
  }
220
221
  /// Include \p S in the function hash.
222
10.8k
  bool VisitStmt(Stmt *S) {
223
10.8k
    auto Type = updateCounterMappings(S);
224
10.8k
    if (Hash.getHashVersion() != PGO_HASH_V1)
225
9.59k
      Type = getHashType(Hash.getHashVersion(), S);
226
10.8k
    if (Type != PGOHash::None)
227
1.94k
      Hash.combine(Type);
228
10.8k
    return true;
229
10.8k
  }
230
231
406
  bool TraverseIfStmt(IfStmt *If) {
232
    // If we used the V1 hash, use the default traversal.
233
406
    if (Hash.getHashVersion() == PGO_HASH_V1)
234
68
      return Base::TraverseIfStmt(If);
235
236
    // Otherwise, keep track of which branch we're in while traversing.
237
338
    VisitStmt(If);
238
731
    for (Stmt *CS : If->children()) {
239
731
      if (!CS)
240
0
        continue;
241
731
      if (CS == If->getThen())
242
338
        Hash.combine(PGOHash::IfThenBranch);
243
393
      else if (CS == If->getElse())
244
53
        Hash.combine(PGOHash::IfElseBranch);
245
731
      TraverseStmt(CS);
246
731
    }
247
338
    Hash.combine(PGOHash::EndOfScope);
248
338
    return true;
249
338
  }
250
251
// If the statement type \p N is nestable, and its nesting impacts profile
252
// stability, define a custom traversal which tracks the end of the statement
253
// in the hash (provided we're not using the V1 hash).
254
#define DEFINE_NESTABLE_TRAVERSAL(N)                                           \
255
361
  bool Traverse##N(N *S) {                                                     \
256
361
    Base::Traverse##N(S);                                                      \
257
361
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
321
      Hash.combine(PGOHash::EndOfScope);                                       \
259
361
    return true;                                                               \
260
361
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseCXXCatchStmt(clang::CXXCatchStmt*)
Line
Count
Source
255
26
  bool Traverse##N(N *S) {                                                     \
256
26
    Base::Traverse##N(S);                                                      \
257
26
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
26
      Hash.combine(PGOHash::EndOfScope);                                       \
259
26
    return true;                                                               \
260
26
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseCXXForRangeStmt(clang::CXXForRangeStmt*)
Line
Count
Source
255
12
  bool Traverse##N(N *S) {                                                     \
256
12
    Base::Traverse##N(S);                                                      \
257
12
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
12
      Hash.combine(PGOHash::EndOfScope);                                       \
259
12
    return true;                                                               \
260
12
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseCXXTryStmt(clang::CXXTryStmt*)
Line
Count
Source
255
24
  bool Traverse##N(N *S) {                                                     \
256
24
    Base::Traverse##N(S);                                                      \
257
24
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
24
      Hash.combine(PGOHash::EndOfScope);                                       \
259
24
    return true;                                                               \
260
24
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseDoStmt(clang::DoStmt*)
Line
Count
Source
255
34
  bool Traverse##N(N *S) {                                                     \
256
34
    Base::Traverse##N(S);                                                      \
257
34
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
28
      Hash.combine(PGOHash::EndOfScope);                                       \
259
34
    return true;                                                               \
260
34
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseForStmt(clang::ForStmt*)
Line
Count
Source
255
184
  bool Traverse##N(N *S) {                                                     \
256
184
    Base::Traverse##N(S);                                                      \
257
184
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
162
      Hash.combine(PGOHash::EndOfScope);                                       \
259
184
    return true;                                                               \
260
184
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseObjCForCollectionStmt(clang::ObjCForCollectionStmt*)
Line
Count
Source
255
11
  bool Traverse##N(N *S) {                                                     \
256
11
    Base::Traverse##N(S);                                                      \
257
11
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
11
      Hash.combine(PGOHash::EndOfScope);                                       \
259
11
    return true;                                                               \
260
11
  }
CodeGenPGO.cpp:(anonymous namespace)::MapRegionCounters::TraverseWhileStmt(clang::WhileStmt*)
Line
Count
Source
255
70
  bool Traverse##N(N *S) {                                                     \
256
70
    Base::Traverse##N(S);                                                      \
257
70
    if (Hash.getHashVersion() != PGO_HASH_V1)                                  \
258
58
      Hash.combine(PGOHash::EndOfScope);                                       \
259
70
    return true;                                                               \
260
70
  }
261
262
  DEFINE_NESTABLE_TRAVERSAL(WhileStmt)
263
  DEFINE_NESTABLE_TRAVERSAL(DoStmt)
264
  DEFINE_NESTABLE_TRAVERSAL(ForStmt)
265
  DEFINE_NESTABLE_TRAVERSAL(CXXForRangeStmt)
266
  DEFINE_NESTABLE_TRAVERSAL(ObjCForCollectionStmt)
267
  DEFINE_NESTABLE_TRAVERSAL(CXXTryStmt)
268
  DEFINE_NESTABLE_TRAVERSAL(CXXCatchStmt)
269
270
  /// Get version \p HashVersion of the PGO hash for \p S.
271
20.4k
  PGOHash::HashType getHashType(PGOHashVersion HashVersion, const Stmt *S) {
272
20.4k
    switch (S->getStmtClass()) {
273
16.7k
    default:
274
16.7k
      break;
275
96
    case Stmt::LabelStmtClass:
276
96
      return PGOHash::LabelStmt;
277
128
    case Stmt::WhileStmtClass:
278
128
      return PGOHash::WhileStmt;
279
62
    case Stmt::DoStmtClass:
280
62
      return PGOHash::DoStmt;
281
346
    case Stmt::ForStmtClass:
282
346
      return PGOHash::ForStmt;
283
24
    case Stmt::CXXForRangeStmtClass:
284
24
      return PGOHash::CXXForRangeStmt;
285
22
    case Stmt::ObjCForCollectionStmtClass:
286
22
      return PGOHash::ObjCForCollectionStmt;
287
86
    case Stmt::SwitchStmtClass:
288
86
      return PGOHash::SwitchStmt;
289
172
    case Stmt::CaseStmtClass:
290
172
      return PGOHash::CaseStmt;
291
50
    case Stmt::DefaultStmtClass:
292
50
      return PGOHash::DefaultStmt;
293
744
    case Stmt::IfStmtClass:
294
744
      return PGOHash::IfStmt;
295
48
    case Stmt::CXXTryStmtClass:
296
48
      return PGOHash::CXXTryStmt;
297
52
    case Stmt::CXXCatchStmtClass:
298
52
      return PGOHash::CXXCatchStmt;
299
38
    case Stmt::ConditionalOperatorClass:
300
38
      return PGOHash::ConditionalOperator;
301
12
    case Stmt::BinaryConditionalOperatorClass:
302
12
      return PGOHash::BinaryConditionalOperator;
303
1.89k
    case Stmt::BinaryOperatorClass: {
304
1.89k
      const BinaryOperator *BO = cast<BinaryOperator>(S);
305
1.89k
      if (BO->getOpcode() == BO_LAnd)
306
192
        return PGOHash::BinaryOperatorLAnd;
307
1.70k
      if (BO->getOpcode() == BO_LOr)
308
162
        return PGOHash::BinaryOperatorLOr;
309
1.53k
      if (HashVersion >= PGO_HASH_V2) {
310
712
        switch (BO->getOpcode()) {
311
399
        default:
312
399
          break;
313
190
        case BO_LT:
314
190
          return PGOHash::BinaryOperatorLT;
315
32
        case BO_GT:
316
32
          return PGOHash::BinaryOperatorGT;
317
12
        case BO_LE:
318
12
          return PGOHash::BinaryOperatorLE;
319
11
        case BO_GE:
320
11
          return PGOHash::BinaryOperatorGE;
321
56
        case BO_EQ:
322
56
          return PGOHash::BinaryOperatorEQ;
323
12
        case BO_NE:
324
12
          return PGOHash::BinaryOperatorNE;
325
1.22k
        }
326
1.22k
      }
327
1.22k
      break;
328
1.22k
    }
329
17.9k
    }
330
331
17.9k
    if (HashVersion >= PGO_HASH_V2) {
332
8.26k
      switch (S->getStmtClass()) {
333
7.61k
      default:
334
7.61k
        break;
335
37
      case Stmt::GotoStmtClass:
336
37
        return PGOHash::GotoStmt;
337
2
      case Stmt::IndirectGotoStmtClass:
338
2
        return PGOHash::IndirectGotoStmt;
339
58
      case Stmt::BreakStmtClass:
340
58
        return PGOHash::BreakStmt;
341
19
      case Stmt::ContinueStmtClass:
342
19
        return PGOHash::ContinueStmt;
343
263
      case Stmt::ReturnStmtClass:
344
263
        return PGOHash::ReturnStmt;
345
17
      case Stmt::CXXThrowExprClass:
346
17
        return PGOHash::ThrowExpr;
347
257
      case Stmt::UnaryOperatorClass: {
348
257
        const UnaryOperator *UO = cast<UnaryOperator>(S);
349
257
        if (UO->getOpcode() == UO_LNot)
350
17
          return PGOHash::UnaryOperatorLNot;
351
240
        break;
352
240
      }
353
8.26k
      }
354
8.26k
    }
355
356
17.5k
    return PGOHash::None;
357
17.5k
  }
358
};
359
360
/// A StmtVisitor that propagates the raw counts through the AST and
361
/// records the count at statements where the value may change.
362
struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
363
  /// PGO state.
364
  CodeGenPGO &PGO;
365
366
  /// A flag that is set when the current count should be recorded on the
367
  /// next statement, such as at the exit of a loop.
368
  bool RecordNextStmtCount;
369
370
  /// The count at the current location in the traversal.
371
  uint64_t CurrentCount;
372
373
  /// The map of statements to count values.
374
  llvm::DenseMap<const Stmt *, uint64_t> &CountMap;
375
376
  /// BreakContinueStack - Keep counts of breaks and continues inside loops.
377
  struct BreakContinue {
378
    uint64_t BreakCount;
379
    uint64_t ContinueCount;
380
179
    BreakContinue() : BreakCount(0), ContinueCount(0) {}
381
  };
382
  SmallVector<BreakContinue, 8> BreakContinueStack;
383
384
  ComputeRegionCounts(llvm::DenseMap<const Stmt *, uint64_t> &CountMap,
385
                      CodeGenPGO &PGO)
386
190
      : PGO(PGO), RecordNextStmtCount(false), CountMap(CountMap) {}
387
388
4.16k
  void RecordStmtCount(const Stmt *S) {
389
4.16k
    if (RecordNextStmtCount) {
390
374
      CountMap[S] = CurrentCount;
391
374
      RecordNextStmtCount = false;
392
374
    }
393
4.16k
  }
394
395
  /// Set and return the current count.
396
1.43k
  uint64_t setCount(uint64_t Count) {
397
1.43k
    CurrentCount = Count;
398
1.43k
    return Count;
399
1.43k
  }
400
401
3.54k
  void VisitStmt(const Stmt *S) {
402
3.54k
    RecordStmtCount(S);
403
3.54k
    for (const Stmt *Child : S->children())
404
2.78k
      if (Child)
405
2.78k
        this->Visit(Child);
406
3.54k
  }
407
408
186
  void VisitFunctionDecl(const FunctionDecl *D) {
409
    // Counter tracks entry to the function body.
410
186
    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
411
186
    CountMap[D->getBody()] = BodyCount;
412
186
    Visit(D->getBody());
413
186
  }
414
415
  // Skip lambda expressions. We visit these as FunctionDecls when we're
416
  // generating them and aren't interested in the body when generating a
417
  // parent context.
418
1
  void VisitLambdaExpr(const LambdaExpr *LE) {}
419
420
2
  void VisitCapturedDecl(const CapturedDecl *D) {
421
    // Counter tracks entry to the capture body.
422
2
    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
423
2
    CountMap[D->getBody()] = BodyCount;
424
2
    Visit(D->getBody());
425
2
  }
426
427
1
  void VisitObjCMethodDecl(const ObjCMethodDecl *D) {
428
    // Counter tracks entry to the method body.
429
1
    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
430
1
    CountMap[D->getBody()] = BodyCount;
431
1
    Visit(D->getBody());
432
1
  }
433
434
1
  void VisitBlockDecl(const BlockDecl *D) {
435
    // Counter tracks entry to the block body.
436
1
    uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
437
1
    CountMap[D->getBody()] = BodyCount;
438
1
    Visit(D->getBody());
439
1
  }
440
441
71
  void VisitReturnStmt(const ReturnStmt *S) {
442
71
    RecordStmtCount(S);
443
71
    if (S->getRetValue())
444
60
      Visit(S->getRetValue());
445
71
    CurrentCount = 0;
446
71
    RecordNextStmtCount = true;
447
71
  }
448
449
8
  void VisitCXXThrowExpr(const CXXThrowExpr *E) {
450
8
    RecordStmtCount(E);
451
8
    if (E->getSubExpr())
452
8
      Visit(E->getSubExpr());
453
8
    CurrentCount = 0;
454
8
    RecordNextStmtCount = true;
455
8
  }
456
457
34
  void VisitGotoStmt(const GotoStmt *S) {
458
34
    RecordStmtCount(S);
459
34
    CurrentCount = 0;
460
34
    RecordNextStmtCount = true;
461
34
  }
462
463
34
  void VisitLabelStmt(const LabelStmt *S) {
464
34
    RecordNextStmtCount = false;
465
    // Counter tracks the block following the label.
466
34
    uint64_t BlockCount = setCount(PGO.getRegionCount(S));
467
34
    CountMap[S] = BlockCount;
468
34
    Visit(S->getSubStmt());
469
34
  }
470
471
47
  void VisitBreakStmt(const BreakStmt *S) {
472
47
    RecordStmtCount(S);
473
47
    assert(!BreakContinueStack.empty() && "break not in a loop or switch!");
474
47
    BreakContinueStack.back().BreakCount += CurrentCount;
475
47
    CurrentCount = 0;
476
47
    RecordNextStmtCount = true;
477
47
  }
478
479
15
  void VisitContinueStmt(const ContinueStmt *S) {
480
15
    RecordStmtCount(S);
481
15
    assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");
482
15
    BreakContinueStack.back().ContinueCount += CurrentCount;
483
15
    CurrentCount = 0;
484
15
    RecordNextStmtCount = true;
485
15
  }
486
487
36
  void VisitWhileStmt(const WhileStmt *S) {
488
36
    RecordStmtCount(S);
489
36
    uint64_t ParentCount = CurrentCount;
490
491
36
    BreakContinueStack.push_back(BreakContinue());
492
    // Visit the body region first so the break/continue adjustments can be
493
    // included when visiting the condition.
494
36
    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
495
36
    CountMap[S->getBody()] = CurrentCount;
496
36
    Visit(S->getBody());
497
36
    uint64_t BackedgeCount = CurrentCount;
498
499
    // ...then go back and propagate counts through the condition. The count
500
    // at the start of the condition is the sum of the incoming edges,
501
    // the backedge from the end of the loop body, and the edges from
502
    // continue statements.
503
36
    BreakContinue BC = BreakContinueStack.pop_back_val();
504
36
    uint64_t CondCount =
505
36
        setCount(ParentCount + BackedgeCount + BC.ContinueCount);
506
36
    CountMap[S->getCond()] = CondCount;
507
36
    Visit(S->getCond());
508
36
    setCount(BC.BreakCount + CondCount - BodyCount);
509
36
    RecordNextStmtCount = true;
510
36
  }
511
512
22
  void VisitDoStmt(const DoStmt *S) {
513
22
    RecordStmtCount(S);
514
22
    uint64_t LoopCount = PGO.getRegionCount(S);
515
516
22
    BreakContinueStack.push_back(BreakContinue());
517
    // The count doesn't include the fallthrough from the parent scope. Add it.
518
22
    uint64_t BodyCount = setCount(LoopCount + CurrentCount);
519
22
    CountMap[S->getBody()] = BodyCount;
520
22
    Visit(S->getBody());
521
22
    uint64_t BackedgeCount = CurrentCount;
522
523
22
    BreakContinue BC = BreakContinueStack.pop_back_val();
524
    // The count at the start of the condition is equal to the count at the
525
    // end of the body, plus any continues.
526
22
    uint64_t CondCount = setCount(BackedgeCount + BC.ContinueCount);
527
22
    CountMap[S->getCond()] = CondCount;
528
22
    Visit(S->getCond());
529
22
    setCount(BC.BreakCount + CondCount - LoopCount);
530
22
    RecordNextStmtCount = true;
531
22
  }
532
533
84
  void VisitForStmt(const ForStmt *S) {
534
84
    RecordStmtCount(S);
535
84
    if (S->getInit())
536
80
      Visit(S->getInit());
537
538
84
    uint64_t ParentCount = CurrentCount;
539
540
84
    BreakContinueStack.push_back(BreakContinue());
541
    // Visit the body region first. (This is basically the same as a while
542
    // loop; see further comments in VisitWhileStmt.)
543
84
    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
544
84
    CountMap[S->getBody()] = BodyCount;
545
84
    Visit(S->getBody());
546
84
    uint64_t BackedgeCount = CurrentCount;
547
84
    BreakContinue BC = BreakContinueStack.pop_back_val();
548
549
    // The increment is essentially part of the body but it needs to include
550
    // the count for all the continue statements.
551
84
    if (S->getInc()) {
552
84
      uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
553
84
      CountMap[S->getInc()] = IncCount;
554
84
      Visit(S->getInc());
555
84
    }
556
557
    // ...then go back and propagate counts through the condition.
558
84
    uint64_t CondCount =
559
84
        setCount(ParentCount + BackedgeCount + BC.ContinueCount);
560
84
    if (S->getCond()) {
561
84
      CountMap[S->getCond()] = CondCount;
562
84
      Visit(S->getCond());
563
84
    }
564
84
    setCount(BC.BreakCount + CondCount - BodyCount);
565
84
    RecordNextStmtCount = true;
566
84
  }
567
568
9
  void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
569
9
    RecordStmtCount(S);
570
9
    if (S->getInit())
571
0
      Visit(S->getInit());
572
9
    Visit(S->getLoopVarStmt());
573
9
    Visit(S->getRangeStmt());
574
9
    Visit(S->getBeginStmt());
575
9
    Visit(S->getEndStmt());
576
577
9
    uint64_t ParentCount = CurrentCount;
578
9
    BreakContinueStack.push_back(BreakContinue());
579
    // Visit the body region first. (This is basically the same as a while
580
    // loop; see further comments in VisitWhileStmt.)
581
9
    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
582
9
    CountMap[S->getBody()] = BodyCount;
583
9
    Visit(S->getBody());
584
9
    uint64_t BackedgeCount = CurrentCount;
585
9
    BreakContinue BC = BreakContinueStack.pop_back_val();
586
587
    // The increment is essentially part of the body but it needs to include
588
    // the count for all the continue statements.
589
9
    uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
590
9
    CountMap[S->getInc()] = IncCount;
591
9
    Visit(S->getInc());
592
593
    // ...then go back and propagate counts through the condition.
594
9
    uint64_t CondCount =
595
9
        setCount(ParentCount + BackedgeCount + BC.ContinueCount);
596
9
    CountMap[S->getCond()] = CondCount;
597
9
    Visit(S->getCond());
598
9
    setCount(BC.BreakCount + CondCount - BodyCount);
599
9
    RecordNextStmtCount = true;
600
9
  }
601
602
5
  void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
603
5
    RecordStmtCount(S);
604
5
    Visit(S->getElement());
605
5
    uint64_t ParentCount = CurrentCount;
606
5
    BreakContinueStack.push_back(BreakContinue());
607
    // Counter tracks the body of the loop.
608
5
    uint64_t BodyCount = setCount(PGO.getRegionCount(S));
609
5
    CountMap[S->getBody()] = BodyCount;
610
5
    Visit(S->getBody());
611
5
    uint64_t BackedgeCount = CurrentCount;
612
5
    BreakContinue BC = BreakContinueStack.pop_back_val();
613
614
5
    setCount(BC.BreakCount + ParentCount + BackedgeCount + BC.ContinueCount -
615
5
             BodyCount);
616
5
    RecordNextStmtCount = true;
617
5
  }
618
619
23
  void VisitSwitchStmt(const SwitchStmt *S) {
620
23
    RecordStmtCount(S);
621
23
    if (S->getInit())
622
0
      Visit(S->getInit());
623
23
    Visit(S->getCond());
624
23
    CurrentCount = 0;
625
23
    BreakContinueStack.push_back(BreakContinue());
626
23
    Visit(S->getBody());
627
    // If the switch is inside a loop, add the continue counts.
628
23
    BreakContinue BC = BreakContinueStack.pop_back_val();
629
23
    if (!BreakContinueStack.empty())
630
17
      BreakContinueStack.back().ContinueCount += BC.ContinueCount;
631
    // Counter tracks the exit block of the switch.
632
23
    setCount(PGO.getRegionCount(S));
633
23
    RecordNextStmtCount = true;
634
23
  }
635
636
78
  void VisitSwitchCase(const SwitchCase *S) {
637
78
    RecordNextStmtCount = false;
638
    // Counter for this particular case. This counts only jumps from the
639
    // switch header and does not include fallthrough from the case before
640
    // this one.
641
78
    uint64_t CaseCount = PGO.getRegionCount(S);
642
78
    setCount(CurrentCount + CaseCount);
643
    // We need the count without fallthrough in the mapping, so it's more useful
644
    // for branch probabilities.
645
78
    CountMap[S] = CaseCount;
646
78
    RecordNextStmtCount = true;
647
78
    Visit(S->getSubStmt());
648
78
  }
649
650
194
  void VisitIfStmt(const IfStmt *S) {
651
194
    RecordStmtCount(S);
652
194
    uint64_t ParentCount = CurrentCount;
653
194
    if (S->getInit())
654
0
      Visit(S->getInit());
655
194
    Visit(S->getCond());
656
657
    // Counter tracks the "then" part of an if statement. The count for
658
    // the "else" part, if it exists, will be calculated from this counter.
659
194
    uint64_t ThenCount = setCount(PGO.getRegionCount(S));
660
194
    CountMap[S->getThen()] = ThenCount;
661
194
    Visit(S->getThen());
662
194
    uint64_t OutCount = CurrentCount;
663
664
194
    uint64_t ElseCount = ParentCount - ThenCount;
665
194
    if (S->getElse()) {
666
21
      setCount(ElseCount);
667
21
      CountMap[S->getElse()] = ElseCount;
668
21
      Visit(S->getElse());
669
21
      OutCount += CurrentCount;
670
21
    } else
671
173
      OutCount += ElseCount;
672
194
    setCount(OutCount);
673
194
    RecordNextStmtCount = true;
674
194
  }
675
676
12
  void VisitCXXTryStmt(const CXXTryStmt *S) {
677
12
    RecordStmtCount(S);
678
12
    Visit(S->getTryBlock());
679
24
    for (unsigned I = 0, E = S->getNumHandlers(); I < E; 
++I12
)
680
12
      Visit(S->getHandler(I));
681
    // Counter tracks the continuation block of the try statement.
682
12
    setCount(PGO.getRegionCount(S));
683
12
    RecordNextStmtCount = true;
684
12
  }
685
686
12
  void VisitCXXCatchStmt(const CXXCatchStmt *S) {
687
12
    RecordNextStmtCount = false;
688
    // Counter tracks the catch statement's handler block.
689
12
    uint64_t CatchCount = setCount(PGO.getRegionCount(S));
690
12
    CountMap[S] = CatchCount;
691
12
    Visit(S->getHandlerBlock());
692
12
  }
693
694
9
  void VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
695
9
    RecordStmtCount(E);
696
9
    uint64_t ParentCount = CurrentCount;
697
9
    Visit(E->getCond());
698
699
    // Counter tracks the "true" part of a conditional operator. The
700
    // count in the "false" part will be calculated from this counter.
701
9
    uint64_t TrueCount = setCount(PGO.getRegionCount(E));
702
9
    CountMap[E->getTrueExpr()] = TrueCount;
703
9
    Visit(E->getTrueExpr());
704
9
    uint64_t OutCount = CurrentCount;
705
706
9
    uint64_t FalseCount = setCount(ParentCount - TrueCount);
707
9
    CountMap[E->getFalseExpr()] = FalseCount;
708
9
    Visit(E->getFalseExpr());
709
9
    OutCount += CurrentCount;
710
711
9
    setCount(OutCount);
712
9
    RecordNextStmtCount = true;
713
9
  }
714
715
25
  void VisitBinLAnd(const BinaryOperator *E) {
716
25
    RecordStmtCount(E);
717
25
    uint64_t ParentCount = CurrentCount;
718
25
    Visit(E->getLHS());
719
    // Counter tracks the right hand side of a logical and operator.
720
25
    uint64_t RHSCount = setCount(PGO.getRegionCount(E));
721
25
    CountMap[E->getRHS()] = RHSCount;
722
25
    Visit(E->getRHS());
723
25
    setCount(ParentCount + RHSCount - CurrentCount);
724
25
    RecordNextStmtCount = true;
725
25
  }
726
727
24
  void VisitBinLOr(const BinaryOperator *E) {
728
24
    RecordStmtCount(E);
729
24
    uint64_t ParentCount = CurrentCount;
730
24
    Visit(E->getLHS());
731
    // Counter tracks the right hand side of a logical or operator.
732
24
    uint64_t RHSCount = setCount(PGO.getRegionCount(E));
733
24
    CountMap[E->getRHS()] = RHSCount;
734
24
    Visit(E->getRHS());
735
24
    setCount(ParentCount + RHSCount - CurrentCount);
736
24
    RecordNextStmtCount = true;
737
24
  }
738
};
739
} // end anonymous namespace
740
741
2.99k
void PGOHash::combine(HashType Type) {
742
  // Check that we never combine 0 and only have six bits.
743
2.99k
  assert(Type && "Hash is invalid: unexpected type 0");
744
2.99k
  assert(unsigned(Type) < TooBig && "Hash is invalid: too many types");
745
746
  // Pass through MD5 if enough work has built up.
747
2.99k
  if (Count && 
Count % NumTypesPerWord == 02.55k
) {
748
123
    using namespace llvm::support;
749
123
    uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
750
123
    MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
751
123
    Working = 0;
752
123
  }
753
754
  // Accumulate the current type.
755
2.99k
  ++Count;
756
2.99k
  Working = Working << NumBitsPerType | Type;
757
2.99k
}
758
759
546
uint64_t PGOHash::finalize() {
760
  // Use Working as the hash directly if we never used MD5.
761
546
  if (Count <= NumTypesPerWord)
762
    // No need to byte swap here, since none of the math was endian-dependent.
763
    // This number will be byte-swapped as required on endianness transitions,
764
    // so we will see the same value on the other side.
765
477
    return Working;
766
767
  // Check for remaining work in Working.
768
69
  if (Working) {
769
    // Keep the buggy behavior from v1 and v2 for backward-compatibility. This
770
    // is buggy because it converts a uint64_t into an array of uint8_t.
771
69
    if (HashVersion < PGO_HASH_V3) {
772
13
      MD5.update({(uint8_t)Working});
773
56
    } else {
774
56
      using namespace llvm::support;
775
56
      uint64_t Swapped = endian::byte_swap<uint64_t, little>(Working);
776
56
      MD5.update(llvm::makeArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
777
56
    }
778
69
  }
779
780
  // Finalize the MD5 and return the hash.
781
69
  llvm::MD5::MD5Result Result;
782
69
  MD5.final(Result);
783
69
  return Result.low();
784
69
}
785
786
279k
void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) {
787
279k
  const Decl *D = GD.getDecl();
788
279k
  if (!D->hasBody())
789
128
    return;
790
791
  // Skip CUDA/HIP kernel launch stub functions.
792
279k
  if (CGM.getLangOpts().CUDA && 
!CGM.getLangOpts().CUDAIsDevice703
&&
793
215
      D->hasAttr<CUDAGlobalAttr>())
794
47
    return;
795
796
279k
  bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr();
797
279k
  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
798
279k
  if (!InstrumentRegions && 
!PGOReader278k
)
799
278k
    return;
800
604
  if (D->isImplicit())
801
17
    return;
802
  // Constructors and destructors may be represented by several functions in IR.
803
  // If so, instrument only base variant, others are implemented by delegation
804
  // to the base one, it would be counted twice otherwise.
805
587
  if (CGM.getTarget().getCXXABI().hasConstructorVariants()) {
806
570
    if (const auto *CCD = dyn_cast<CXXConstructorDecl>(D))
807
53
      if (GD.getCtorType() != Ctor_Base &&
808
26
          CodeGenFunction::IsConstructorDelegationValid(CCD))
809
21
        return;
810
566
  }
811
566
  if (isa<CXXDestructorDecl>(D) && 
GD.getDtorType() != Dtor_Base39
)
812
20
    return;
813
814
546
  CGM.ClearUnusedCoverageMapping(D);
815
546
  setFuncName(Fn);
816
817
546
  mapRegionCounters(D);
818
546
  if (CGM.getCodeGenOpts().CoverageMapping)
819
200
    emitCounterRegionMapping(D);
820
546
  if (PGOReader) {
821
190
    SourceManager &SM = CGM.getContext().getSourceManager();
822
190
    loadRegionCounts(PGOReader, SM.isInMainFile(D->getLocation()));
823
190
    computeRegionCounts(D);
824
190
    applyFunctionAttributes(PGOReader, Fn);
825
190
  }
826
546
}
827
828
546
void CodeGenPGO::mapRegionCounters(const Decl *D) {
829
  // Use the latest hash version when inserting instrumentation, but use the
830
  // version in the indexed profile if we're reading PGO data.
831
546
  PGOHashVersion HashVersion = PGO_HASH_LATEST;
832
546
  uint64_t ProfileVersion = llvm::IndexedInstrProf::Version;
833
546
  if (auto *PGOReader = CGM.getPGOReader()) {
834
190
    HashVersion = getPGOHashVersion(PGOReader, CGM);
835
190
    ProfileVersion = PGOReader->getVersion();
836
190
  }
837
838
546
  RegionCounterMap.reset(new llvm::DenseMap<const Stmt *, unsigned>);
839
546
  MapRegionCounters Walker(HashVersion, ProfileVersion, *RegionCounterMap);
840
546
  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
841
536
    Walker.TraverseDecl(const_cast<FunctionDecl *>(FD));
842
10
  else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
843
3
    Walker.TraverseDecl(const_cast<ObjCMethodDecl *>(MD));
844
7
  else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
845
2
    Walker.TraverseDecl(const_cast<BlockDecl *>(BD));
846
5
  else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
847
5
    Walker.TraverseDecl(const_cast<CapturedDecl *>(CD));
848
546
  assert(Walker.NextCounter > 0 && "no entry counter mapped for decl");
849
546
  NumRegionCounters = Walker.NextCounter;
850
546
  FunctionHash = Walker.Hash.finalize();
851
546
}
852
853
225
bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) {
854
225
  if (!D->getBody())
855
0
    return true;
856
857
  // Skip host-only functions in the CUDA device compilation and device-only
858
  // functions in the host compilation. Just roughly filter them out based on
859
  // the function attributes. If there are effectively host-only or device-only
860
  // ones, their coverage mapping may still be generated.
861
225
  if (CGM.getLangOpts().CUDA &&
862
8
      ((CGM.getLangOpts().CUDAIsDevice && 
!D->hasAttr<CUDADeviceAttr>()0
&&
863
0
        !D->hasAttr<CUDAGlobalAttr>()) ||
864
8
       (!CGM.getLangOpts().CUDAIsDevice &&
865
8
        (D->hasAttr<CUDAGlobalAttr>() ||
866
6
         (!D->hasAttr<CUDAHostAttr>() && 
D->hasAttr<CUDADeviceAttr>()4
)))))
867
4
    return true;
868
869
  // Don't map the functions in system headers.
870
221
  const auto &SM = CGM.getContext().getSourceManager();
871
221
  auto Loc = D->getBody()->getBeginLoc();
872
221
  return SM.isInSystemHeader(Loc);
873
221
}
874
875
200
void CodeGenPGO::emitCounterRegionMapping(const Decl *D) {
876
200
  if (skipRegionMappingForDecl(D))
877
0
    return;
878
879
200
  std::string CoverageMapping;
880
200
  llvm::raw_string_ostream OS(CoverageMapping);
881
200
  CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
882
200
                                CGM.getContext().getSourceManager(),
883
200
                                CGM.getLangOpts(), RegionCounterMap.get());
884
200
  MappingGen.emitCounterMapping(D, OS);
885
200
  OS.flush();
886
887
200
  if (CoverageMapping.empty())
888
1
    return;
889
890
199
  CGM.getCoverageMapping()->addFunctionMappingRecord(
891
199
      FuncNameVar, FuncName, FunctionHash, CoverageMapping);
892
199
}
893
894
void
895
CodeGenPGO::emitEmptyCounterMapping(const Decl *D, StringRef Name,
896
25
                                    llvm::GlobalValue::LinkageTypes Linkage) {
897
25
  if (skipRegionMappingForDecl(D))
898
5
    return;
899
900
20
  std::string CoverageMapping;
901
20
  llvm::raw_string_ostream OS(CoverageMapping);
902
20
  CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
903
20
                                CGM.getContext().getSourceManager(),
904
20
                                CGM.getLangOpts());
905
20
  MappingGen.emitEmptyMapping(D, OS);
906
20
  OS.flush();
907
908
20
  if (CoverageMapping.empty())
909
0
    return;
910
911
20
  setFuncName(Name, Linkage);
912
20
  CGM.getCoverageMapping()->addFunctionMappingRecord(
913
20
      FuncNameVar, FuncName, FunctionHash, CoverageMapping, false);
914
20
}
915
916
190
void CodeGenPGO::computeRegionCounts(const Decl *D) {
917
190
  StmtCountMap.reset(new llvm::DenseMap<const Stmt *, uint64_t>);
918
190
  ComputeRegionCounts Walker(*StmtCountMap, *this);
919
190
  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
920
186
    Walker.VisitFunctionDecl(FD);
921
4
  else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
922
1
    Walker.VisitObjCMethodDecl(MD);
923
3
  else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
924
1
    Walker.VisitBlockDecl(BD);
925
2
  else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
926
2
    Walker.VisitCapturedDecl(const_cast<CapturedDecl *>(CD));
927
190
}
928
929
void
930
CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
931
190
                                    llvm::Function *Fn) {
932
190
  if (!haveRegionCounts())
933
15
    return;
934
935
175
  uint64_t FunctionCount = getRegionCount(nullptr);
936
175
  Fn->setEntryCount(FunctionCount);
937
175
}
938
939
void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S,
940
1.06k
                                      llvm::Value *StepV) {
941
1.06k
  if (!CGM.getCodeGenOpts().hasProfileClangInstr() || !RegionCounterMap)
942
28
    return;
943
1.03k
  if (!Builder.GetInsertBlock())
944
5
    return;
945
946
1.02k
  unsigned Counter = (*RegionCounterMap)[S];
947
1.02k
  auto *I8PtrTy = llvm::Type::getInt8PtrTy(CGM.getLLVMContext());
948
949
1.02k
  llvm::Value *Args[] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
950
1.02k
                         Builder.getInt64(FunctionHash),
951
1.02k
                         Builder.getInt32(NumRegionCounters),
952
1.02k
                         Builder.getInt32(Counter), StepV};
953
1.02k
  if (!StepV)
954
1.02k
    Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment),
955
1.02k
                       makeArrayRef(Args, 4));
956
1
  else
957
1
    Builder.CreateCall(
958
1
        CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment_step),
959
1
        makeArrayRef(Args));
960
1.02k
}
961
962
// This method either inserts a call to the profile run-time during
963
// instrumentation or puts profile data into metadata for PGO use.
964
void CodeGenPGO::valueProfile(CGBuilderTy &Builder, uint32_t ValueKind,
965
18.6k
    llvm::Instruction *ValueSite, llvm::Value *ValuePtr) {
966
967
18.6k
  if (!EnableValueProfiling)
968
18.6k
    return;
969
970
4
  if (!ValuePtr || !ValueSite || !Builder.GetInsertBlock())
971
0
    return;
972
973
4
  if (isa<llvm::Constant>(ValuePtr))
974
1
    return;
975
976
3
  bool InstrumentValueSites = CGM.getCodeGenOpts().hasProfileClangInstr();
977
3
  if (InstrumentValueSites && RegionCounterMap) {
978
3
    auto BuilderInsertPoint = Builder.saveIP();
979
3
    Builder.SetInsertPoint(ValueSite);
980
3
    llvm::Value *Args[5] = {
981
3
        llvm::ConstantExpr::getBitCast(FuncNameVar, Builder.getInt8PtrTy()),
982
3
        Builder.getInt64(FunctionHash),
983
3
        Builder.CreatePtrToInt(ValuePtr, Builder.getInt64Ty()),
984
3
        Builder.getInt32(ValueKind),
985
3
        Builder.getInt32(NumValueSites[ValueKind]++)
986
3
    };
987
3
    Builder.CreateCall(
988
3
        CGM.getIntrinsic(llvm::Intrinsic::instrprof_value_profile), Args);
989
3
    Builder.restoreIP(BuilderInsertPoint);
990
3
    return;
991
3
  }
992
993
0
  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
994
0
  if (PGOReader && haveRegionCounts()) {
995
    // We record the top most called three functions at each call site.
996
    // Profile metadata contains "VP" string identifying this metadata
997
    // as value profiling data, then a uint32_t value for the value profiling
998
    // kind, a uint64_t value for the total number of times the call is
999
    // executed, followed by the function hash and execution count (uint64_t)
1000
    // pairs for each function.
1001
0
    if (NumValueSites[ValueKind] >= ProfRecord->getNumValueSites(ValueKind))
1002
0
      return;
1003
1004
0
    llvm::annotateValueSite(CGM.getModule(), *ValueSite, *ProfRecord,
1005
0
                            (llvm::InstrProfValueKind)ValueKind,
1006
0
                            NumValueSites[ValueKind]);
1007
1008
0
    NumValueSites[ValueKind]++;
1009
0
  }
1010
0
}
1011
1012
void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
1013
190
                                  bool IsInMainFile) {
1014
190
  CGM.getPGOStats().addVisited(IsInMainFile);
1015
190
  RegionCounts.clear();
1016
190
  llvm::Expected<llvm::InstrProfRecord> RecordExpected =
1017
190
      PGOReader->getInstrProfRecord(FuncName, FunctionHash);
1018
190
  if (auto E = RecordExpected.takeError()) {
1019
15
    auto IPE = llvm::InstrProfError::take(std::move(E));
1020
15
    if (IPE == llvm::instrprof_error::unknown_function)
1021
6
      CGM.getPGOStats().addMissing(IsInMainFile);
1022
9
    else if (IPE == llvm::instrprof_error::hash_mismatch)
1023
9
      CGM.getPGOStats().addMismatched(IsInMainFile);
1024
0
    else if (IPE == llvm::instrprof_error::malformed)
1025
      // TODO: Consider a more specific warning for this case.
1026
0
      CGM.getPGOStats().addMismatched(IsInMainFile);
1027
15
    return;
1028
15
  }
1029
175
  ProfRecord =
1030
175
      std::make_unique<llvm::InstrProfRecord>(std::move(RecordExpected.get()));
1031
175
  RegionCounts = ProfRecord->Counts;
1032
175
}
1033
1034
/// Calculate what to divide by to scale weights.
1035
///
1036
/// Given the maximum weight, calculate a divisor that will scale all the
1037
/// weights to strictly less than UINT32_MAX.
1038
348
static uint64_t calculateWeightScale(uint64_t MaxWeight) {
1039
345
  return MaxWeight < UINT32_MAX ? 1 : 
MaxWeight / UINT32_MAX + 13
;
1040
348
}
1041
1042
/// Scale an individual branch weight (and add 1).
1043
///
1044
/// Scale a 64-bit weight down to 32-bits using \c Scale.
1045
///
1046
/// According to Laplace's Rule of Succession, it is better to compute the
1047
/// weight based on the count plus 1, so universally add 1 to the value.
1048
///
1049
/// \pre \c Scale was calculated by \a calculateWeightScale() with a weight no
1050
/// greater than \c Weight.
1051
774
static uint32_t scaleBranchWeight(uint64_t Weight, uint64_t Scale) {
1052
774
  assert(Scale && "scale by 0?");
1053
774
  uint64_t Scaled = Weight / Scale + 1;
1054
774
  assert(Scaled <= UINT32_MAX && "overflow 32-bits");
1055
774
  return Scaled;
1056
774
}
1057
1058
llvm::MDNode *CodeGenFunction::createProfileWeights(uint64_t TrueCount,
1059
101k
                                                    uint64_t FalseCount) const {
1060
  // Check for empty weights.
1061
101k
  if (!TrueCount && 
!FalseCount100k
)
1062
100k
    return nullptr;
1063
1064
  // Calculate how to scale down to 32-bits.
1065
315
  uint64_t Scale = calculateWeightScale(std::max(TrueCount, FalseCount));
1066
1067
315
  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
1068
315
  return MDHelper.createBranchWeights(scaleBranchWeight(TrueCount, Scale),
1069
315
                                      scaleBranchWeight(FalseCount, Scale));
1070
315
}
1071
1072
llvm::MDNode *
1073
35
CodeGenFunction::createProfileWeights(ArrayRef<uint64_t> Weights) const {
1074
  // We need at least two elements to create meaningful weights.
1075
35
  if (Weights.size() < 2)
1076
0
    return nullptr;
1077
1078
  // Check for empty weights.
1079
35
  uint64_t MaxWeight = *std::max_element(Weights.begin(), Weights.end());
1080
35
  if (MaxWeight == 0)
1081
2
    return nullptr;
1082
1083
  // Calculate how to scale down to 32-bits.
1084
33
  uint64_t Scale = calculateWeightScale(MaxWeight);
1085
1086
33
  SmallVector<uint32_t, 16> ScaledWeights;
1087
33
  ScaledWeights.reserve(Weights.size());
1088
33
  for (uint64_t W : Weights)
1089
144
    ScaledWeights.push_back(scaleBranchWeight(W, Scale));
1090
1091
33
  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
1092
33
  return MDHelper.createBranchWeights(ScaledWeights);
1093
33
}
1094
1095
llvm::MDNode *
1096
CodeGenFunction::createProfileWeightsForLoop(const Stmt *Cond,
1097
11.2k
                                             uint64_t LoopCount) const {
1098
11.2k
  if (!PGO.haveRegionCounts())
1099
11.1k
    return nullptr;
1100
140
  Optional<uint64_t> CondCount = PGO.getStmtCount(Cond);
1101
140
  if (!CondCount || *CondCount == 0)
1102
34
    return nullptr;
1103
106
  return createProfileWeights(LoopCount,
1104
106
                              std::max(*CondCount, LoopCount) - LoopCount);
1105
106
}