Coverage Report

Created: 2018-10-23 15:26

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/lld/ELF/CallGraphSort.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- CallGraphSort.cpp --------------------------------------------------===//
2
//
3
//                             The LLVM Linker
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
///
10
/// Implementation of Call-Chain Clustering from: Optimizing Function Placement
11
/// for Large-Scale Data-Center Applications
12
/// https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf
13
///
14
/// The goal of this algorithm is to improve runtime performance of the final
15
/// executable by arranging code sections such that page table and i-cache
16
/// misses are minimized.
17
///
18
/// Definitions:
19
/// * Cluster
20
///   * An ordered list of input sections which are layed out as a unit. At the
21
///     beginning of the algorithm each input section has its own cluster and
22
///     the weight of the cluster is the sum of the weight of all incomming
23
///     edges.
24
/// * Call-Chain Clustering (C³) Heuristic
25
///   * Defines when and how clusters are combined. Pick the highest weighted
26
///     input section then add it to its most likely predecessor if it wouldn't
27
///     penalize it too much.
28
/// * Density
29
///   * The weight of the cluster divided by the size of the cluster. This is a
30
///     proxy for the ammount of execution time spent per byte of the cluster.
31
///
32
/// It does so given a call graph profile by the following:
33
/// * Build a weighted call graph from the call graph profile
34
/// * Sort input sections by weight
35
/// * For each input section starting with the highest weight
36
///   * Find its most likely predecessor cluster
37
///   * Check if the combined cluster would be too large, or would have too low
38
///     a density.
39
///   * If not, then combine the clusters.
40
/// * Sort non-empty clusters by density
41
///
42
//===----------------------------------------------------------------------===//
43
44
#include "CallGraphSort.h"
45
#include "OutputSections.h"
46
#include "SymbolTable.h"
47
#include "Symbols.h"
48
49
using namespace llvm;
50
using namespace lld;
51
using namespace lld::elf;
52
53
namespace {
54
struct Edge {
55
  int From;
56
  uint64_t Weight;
57
};
58
59
struct Cluster {
60
48
  Cluster(int Sec, size_t S) : Sections{Sec}, Size(S) {}
61
62
250
  double getDensity() const {
63
250
    if (Size == 0)
64
0
      return 0;
65
250
    return double(Weight) / double(Size);
66
250
  }
67
68
  std::vector<int> Sections;
69
  size_t Size = 0;
70
  uint64_t Weight = 0;
71
  uint64_t InitialWeight = 0;
72
  Edge BestPred = {-1, 0};
73
};
74
75
class CallGraphSort {
76
public:
77
  CallGraphSort();
78
79
  DenseMap<const InputSectionBase *, int> run();
80
81
private:
82
  std::vector<Cluster> Clusters;
83
  std::vector<const InputSectionBase *> Sections;
84
85
  void groupClusters();
86
};
87
88
// Maximum ammount the combined cluster density can be worse than the original
89
// cluster to consider merging.
90
constexpr int MAX_DENSITY_DEGRADATION = 8;
91
92
// Maximum cluster size in bytes.
93
constexpr uint64_t MAX_CLUSTER_SIZE = 1024 * 1024;
94
} // end anonymous namespace
95
96
typedef std::pair<const InputSectionBase *, const InputSectionBase *>
97
    SectionPair;
98
99
// Take the edge list in Config->CallGraphProfile, resolve symbol names to
100
// Symbols, and generate a graph between InputSections with the provided
101
// weights.
102
10
CallGraphSort::CallGraphSort() {
103
10
  MapVector<SectionPair, uint64_t> &Profile = Config->CallGraphProfile;
104
10
  DenseMap<const InputSectionBase *, int> SecToCluster;
105
10
106
84
  auto GetOrCreateNode = [&](const InputSectionBase *IS) -> int {
107
84
    auto Res = SecToCluster.insert(std::make_pair(IS, Clusters.size()));
108
84
    if (Res.second) {
109
48
      Sections.push_back(IS);
110
48
      Clusters.emplace_back(Clusters.size(), IS->getSize());
111
48
    }
112
84
    return Res.first->second;
113
84
  };
114
10
115
10
  // Create the graph.
116
43
  for (std::pair<SectionPair, uint64_t> &C : Profile) {
117
43
    const auto *FromSB = cast<InputSectionBase>(C.first.first->Repl);
118
43
    const auto *ToSB = cast<InputSectionBase>(C.first.second->Repl);
119
43
    uint64_t Weight = C.second;
120
43
121
43
    // Ignore edges between input sections belonging to different output
122
43
    // sections.  This is done because otherwise we would end up with clusters
123
43
    // containing input sections that can't actually be placed adjacently in the
124
43
    // output.  This messes with the cluster size and density calculations.  We
125
43
    // would also end up moving input sections in other output sections without
126
43
    // moving them closer to what calls them.
127
43
    if (FromSB->getOutputSection() != ToSB->getOutputSection())
128
1
      continue;
129
42
130
42
    int From = GetOrCreateNode(FromSB);
131
42
    int To = GetOrCreateNode(ToSB);
132
42
133
42
    Clusters[To].Weight += Weight;
134
42
135
42
    if (From == To)
136
5
      continue;
137
37
138
37
    // Remember the best edge.
139
37
    Cluster &ToC = Clusters[To];
140
37
    if (ToC.BestPred.From == -1 || 
ToC.BestPred.Weight < Weight14
) {
141
25
      ToC.BestPred.From = From;
142
25
      ToC.BestPred.Weight = Weight;
143
25
    }
144
37
  }
145
10
  for (Cluster &C : Clusters)
146
48
    C.InitialWeight = C.Weight;
147
10
}
148
149
// It's bad to merge clusters which would degrade the density too much.
150
20
static bool isNewDensityBad(Cluster &A, Cluster &B) {
151
20
  double NewDensity = double(A.Weight + B.Weight) / double(A.Size + B.Size);
152
20
  return NewDensity < A.getDensity() / MAX_DENSITY_DEGRADATION;
153
20
}
154
155
19
static void mergeClusters(Cluster &Into, Cluster &From) {
156
19
  Into.Sections.insert(Into.Sections.end(), From.Sections.begin(),
157
19
                       From.Sections.end());
158
19
  Into.Size += From.Size;
159
19
  Into.Weight += From.Weight;
160
19
  From.Sections.clear();
161
19
  From.Size = 0;
162
19
  From.Weight = 0;
163
19
}
164
165
// Group InputSections into clusters using the Call-Chain Clustering heuristic
166
// then sort the clusters by density.
167
10
void CallGraphSort::groupClusters() {
168
10
  std::vector<int> SortedSecs(Clusters.size());
169
10
  std::vector<Cluster *> SecToCluster(Clusters.size());
170
10
171
58
  for (size_t I = 0; I < Clusters.size(); 
++I48
) {
172
48
    SortedSecs[I] = I;
173
48
    SecToCluster[I] = &Clusters[I];
174
48
  }
175
10
176
85
  std::stable_sort(SortedSecs.begin(), SortedSecs.end(), [&](int A, int B) {
177
85
    return Clusters[B].getDensity() < Clusters[A].getDensity();
178
85
  });
179
10
180
48
  for (int SI : SortedSecs) {
181
48
    // Clusters[SI] is the same as SecToClusters[SI] here because it has not
182
48
    // been merged into another cluster yet.
183
48
    Cluster &C = Clusters[SI];
184
48
185
48
    // Don't consider merging if the edge is unlikely.
186
48
    if (C.BestPred.From == -1 || 
C.BestPred.Weight * 10 <= C.InitialWeight23
)
187
26
      continue;
188
22
189
22
    Cluster *PredC = SecToCluster[C.BestPred.From];
190
22
    if (PredC == &C)
191
1
      continue;
192
21
193
21
    if (C.Size + PredC->Size > MAX_CLUSTER_SIZE)
194
1
      continue;
195
20
196
20
    if (isNewDensityBad(*PredC, C))
197
1
      continue;
198
19
199
19
    // NOTE: Consider using a disjoint-set to track section -> cluster mapping
200
19
    // if this is ever slow.
201
19
    for (int SI : C.Sections)
202
22
      SecToCluster[SI] = PredC;
203
19
204
19
    mergeClusters(*PredC, C);
205
19
  }
206
10
207
10
  // Remove empty or dead nodes. Invalidates all cluster indices.
208
48
  llvm::erase_if(Clusters, [](const Cluster &C) {
209
48
    return C.Size == 0 || 
C.Sections.empty()29
;
210
48
  });
211
10
212
10
  // Sort by density.
213
10
  std::stable_sort(Clusters.begin(), Clusters.end(),
214
30
                   [](const Cluster &A, const Cluster &B) {
215
30
                     return A.getDensity() > B.getDensity();
216
30
                   });
217
10
}
218
219
10
DenseMap<const InputSectionBase *, int> CallGraphSort::run() {
220
10
  groupClusters();
221
10
222
10
  // Generate order.
223
10
  DenseMap<const InputSectionBase *, int> OrderMap;
224
10
  ssize_t CurOrder = 1;
225
10
226
10
  for (const Cluster &C : Clusters)
227
29
    for (int SecIndex : C.Sections)
228
48
      OrderMap[Sections[SecIndex]] = CurOrder++;
229
10
230
10
  return OrderMap;
231
10
}
232
233
// Sort sections by the profile data provided by -callgraph-profile-file
234
//
235
// This first builds a call graph based on the profile data then merges sections
236
// according to the C³ huristic. All clusters are then sorted by a density
237
// metric to further improve locality.
238
10
DenseMap<const InputSectionBase *, int> elf::computeCallGraphProfileOrder() {
239
10
  return CallGraphSort().run();
240
10
}