Coverage Report

Created: 2019-05-19 14:56

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/lld/ELF/CallGraphSort.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- CallGraphSort.cpp --------------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
///
9
/// Implementation of Call-Chain Clustering from: Optimizing Function Placement
10
/// for Large-Scale Data-Center Applications
11
/// https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf
12
///
13
/// The goal of this algorithm is to improve runtime performance of the final
14
/// executable by arranging code sections such that page table and i-cache
15
/// misses are minimized.
16
///
17
/// Definitions:
18
/// * Cluster
19
///   * An ordered list of input sections which are layed out as a unit. At the
20
///     beginning of the algorithm each input section has its own cluster and
21
///     the weight of the cluster is the sum of the weight of all incomming
22
///     edges.
23
/// * Call-Chain Clustering (C³) Heuristic
24
///   * Defines when and how clusters are combined. Pick the highest weighted
25
///     input section then add it to its most likely predecessor if it wouldn't
26
///     penalize it too much.
27
/// * Density
28
///   * The weight of the cluster divided by the size of the cluster. This is a
29
///     proxy for the ammount of execution time spent per byte of the cluster.
30
///
31
/// It does so given a call graph profile by the following:
32
/// * Build a weighted call graph from the call graph profile
33
/// * Sort input sections by weight
34
/// * For each input section starting with the highest weight
35
///   * Find its most likely predecessor cluster
36
///   * Check if the combined cluster would be too large, or would have too low
37
///     a density.
38
///   * If not, then combine the clusters.
39
/// * Sort non-empty clusters by density
40
///
41
//===----------------------------------------------------------------------===//
42
43
#include "CallGraphSort.h"
44
#include "OutputSections.h"
45
#include "SymbolTable.h"
46
#include "Symbols.h"
47
48
using namespace llvm;
49
using namespace lld;
50
using namespace lld::elf;
51
52
namespace {
53
struct Edge {
54
  int From;
55
  uint64_t Weight;
56
};
57
58
struct Cluster {
59
54
  Cluster(int Sec, size_t S) : Sections{Sec}, Size(S) {}
60
61
282
  double getDensity() const {
62
282
    if (Size == 0)
63
0
      return 0;
64
282
    return double(Weight) / double(Size);
65
282
  }
66
67
  std::vector<int> Sections;
68
  size_t Size = 0;
69
  uint64_t Weight = 0;
70
  uint64_t InitialWeight = 0;
71
  Edge BestPred = {-1, 0};
72
};
73
74
class CallGraphSort {
75
public:
76
  CallGraphSort();
77
78
  DenseMap<const InputSectionBase *, int> run();
79
80
private:
81
  std::vector<Cluster> Clusters;
82
  std::vector<const InputSectionBase *> Sections;
83
84
  void groupClusters();
85
};
86
87
// Maximum ammount the combined cluster density can be worse than the original
88
// cluster to consider merging.
89
constexpr int MAX_DENSITY_DEGRADATION = 8;
90
91
// Maximum cluster size in bytes.
92
constexpr uint64_t MAX_CLUSTER_SIZE = 1024 * 1024;
93
} // end anonymous namespace
94
95
using SectionPair =
96
    std::pair<const InputSectionBase *, const InputSectionBase *>;
97
98
// Take the edge list in Config->CallGraphProfile, resolve symbol names to
99
// Symbols, and generate a graph between InputSections with the provided
100
// weights.
101
10
CallGraphSort::CallGraphSort() {
102
10
  MapVector<SectionPair, uint64_t> &Profile = Config->CallGraphProfile;
103
10
  DenseMap<const InputSectionBase *, int> SecToCluster;
104
10
105
96
  auto GetOrCreateNode = [&](const InputSectionBase *IS) -> int {
106
96
    auto Res = SecToCluster.insert(std::make_pair(IS, Clusters.size()));
107
96
    if (Res.second) {
108
54
      Sections.push_back(IS);
109
54
      Clusters.emplace_back(Clusters.size(), IS->getSize());
110
54
    }
111
96
    return Res.first->second;
112
96
  };
113
10
114
10
  // Create the graph.
115
49
  for (std::pair<SectionPair, uint64_t> &C : Profile) {
116
49
    const auto *FromSB = cast<InputSectionBase>(C.first.first->Repl);
117
49
    const auto *ToSB = cast<InputSectionBase>(C.first.second->Repl);
118
49
    uint64_t Weight = C.second;
119
49
120
49
    // Ignore edges between input sections belonging to different output
121
49
    // sections.  This is done because otherwise we would end up with clusters
122
49
    // containing input sections that can't actually be placed adjacently in the
123
49
    // output.  This messes with the cluster size and density calculations.  We
124
49
    // would also end up moving input sections in other output sections without
125
49
    // moving them closer to what calls them.
126
49
    if (FromSB->getOutputSection() != ToSB->getOutputSection())
127
1
      continue;
128
48
129
48
    int From = GetOrCreateNode(FromSB);
130
48
    int To = GetOrCreateNode(ToSB);
131
48
132
48
    Clusters[To].Weight += Weight;
133
48
134
48
    if (From == To)
135
3
      continue;
136
45
137
45
    // Remember the best edge.
138
45
    Cluster &ToC = Clusters[To];
139
45
    if (ToC.BestPred.From == -1 || 
ToC.BestPred.Weight < Weight16
) {
140
33
      ToC.BestPred.From = From;
141
33
      ToC.BestPred.Weight = Weight;
142
33
    }
143
45
  }
144
10
  for (Cluster &C : Clusters)
145
54
    C.InitialWeight = C.Weight;
146
10
}
147
148
// It's bad to merge clusters which would degrade the density too much.
149
24
static bool isNewDensityBad(Cluster &A, Cluster &B) {
150
24
  double NewDensity = double(A.Weight + B.Weight) / double(A.Size + B.Size);
151
24
  return NewDensity < A.getDensity() / MAX_DENSITY_DEGRADATION;
152
24
}
153
154
23
static void mergeClusters(Cluster &Into, Cluster &From) {
155
23
  Into.Sections.insert(Into.Sections.end(), From.Sections.begin(),
156
23
                       From.Sections.end());
157
23
  Into.Size += From.Size;
158
23
  Into.Weight += From.Weight;
159
23
  From.Sections.clear();
160
23
  From.Size = 0;
161
23
  From.Weight = 0;
162
23
}
163
164
// Group InputSections into clusters using the Call-Chain Clustering heuristic
165
// then sort the clusters by density.
166
10
void CallGraphSort::groupClusters() {
167
10
  std::vector<int> SortedSecs(Clusters.size());
168
10
  std::vector<Cluster *> SecToCluster(Clusters.size());
169
10
170
64
  for (size_t I = 0; I < Clusters.size(); 
++I54
) {
171
54
    SortedSecs[I] = I;
172
54
    SecToCluster[I] = &Clusters[I];
173
54
  }
174
10
175
97
  llvm::stable_sort(SortedSecs, [&](int A, int B) {
176
97
    return Clusters[A].getDensity() > Clusters[B].getDensity();
177
97
  });
178
10
179
54
  for (int SI : SortedSecs) {
180
54
    // Clusters[SI] is the same as SecToClusters[SI] here because it has not
181
54
    // been merged into another cluster yet.
182
54
    Cluster &C = Clusters[SI];
183
54
184
54
    // Don't consider merging if the edge is unlikely.
185
54
    if (C.BestPred.From == -1 || 
C.BestPred.Weight * 10 <= C.InitialWeight29
)
186
26
      continue;
187
28
188
28
    Cluster *PredC = SecToCluster[C.BestPred.From];
189
28
    if (PredC == &C)
190
3
      continue;
191
25
192
25
    if (C.Size + PredC->Size > MAX_CLUSTER_SIZE)
193
1
      continue;
194
24
195
24
    if (isNewDensityBad(*PredC, C))
196
1
      continue;
197
23
198
23
    // NOTE: Consider using a disjoint-set to track section -> cluster mapping
199
23
    // if this is ever slow.
200
23
    for (int SI : C.Sections)
201
26
      SecToCluster[SI] = PredC;
202
23
203
23
    mergeClusters(*PredC, C);
204
23
  }
205
10
206
10
  // Remove empty or dead nodes. Invalidates all cluster indices.
207
54
  llvm::erase_if(Clusters, [](const Cluster &C) {
208
54
    return C.Size == 0 || 
C.Sections.empty()31
;
209
54
  });
210
10
211
10
  // Sort by density.
212
32
  llvm::stable_sort(Clusters, [](const Cluster &A, const Cluster &B) {
213
32
    return A.getDensity() > B.getDensity();
214
32
  });
215
10
}
216
217
10
DenseMap<const InputSectionBase *, int> CallGraphSort::run() {
218
10
  groupClusters();
219
10
220
10
  // Generate order.
221
10
  DenseMap<const InputSectionBase *, int> OrderMap;
222
10
  ssize_t CurOrder = 1;
223
10
224
10
  for (const Cluster &C : Clusters)
225
31
    for (int SecIndex : C.Sections)
226
54
      OrderMap[Sections[SecIndex]] = CurOrder++;
227
10
228
10
  if (!Config->PrintSymbolOrder.empty()) {
229
2
    std::error_code EC;
230
2
    raw_fd_ostream OS(Config->PrintSymbolOrder, EC, sys::fs::F_None);
231
2
    if (EC) {
232
0
      error("cannot open " + Config->PrintSymbolOrder + ": " + EC.message());
233
0
      return OrderMap;
234
0
    }
235
2
236
2
    // Print the symbols ordered by C3, in the order of increasing CurOrder
237
2
    // Instead of sorting all the OrderMap, just repeat the loops above.
238
2
    for (const Cluster &C : Clusters)
239
4
      for (int SecIndex : C.Sections)
240
8
        // Search all the symbols in the file of the section
241
8
        // and find out a Defined symbol with name that is within the section.
242
8
        for (Symbol *Sym: Sections[SecIndex]->File->getSymbols())
243
40
          if (!Sym->isSection()) // Filter out section-type symbols here.
244
40
            if (auto *D = dyn_cast<Defined>(Sym))
245
32
              if (Sections[SecIndex] == D->Section)
246
8
                OS << Sym->getName() << "\n";
247
2
  }
248
10
249
10
  return OrderMap;
250
10
}
251
252
// Sort sections by the profile data provided by -callgraph-profile-file
253
//
254
// This first builds a call graph based on the profile data then merges sections
255
// according to the C³ huristic. All clusters are then sorted by a density
256
// metric to further improve locality.
257
10
DenseMap<const InputSectionBase *, int> elf::computeCallGraphProfileOrder() {
258
10
  return CallGraphSort().run();
259
10
}