Coverage Report

Created: 2018-08-19 14:04

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/lld/ELF/CallGraphSort.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- CallGraphSort.cpp --------------------------------------------------===//
2
//
3
//                             The LLVM Linker
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
///
10
/// Implementation of Call-Chain Clustering from: Optimizing Function Placement
11
/// for Large-Scale Data-Center Applications
12
/// https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf
13
///
14
/// The goal of this algorithm is to improve runtime performance of the final
15
/// executable by arranging code sections such that page table and i-cache
16
/// misses are minimized.
17
///
18
/// Definitions:
19
/// * Cluster
20
///   * An ordered list of input sections which are layed out as a unit. At the
21
///     beginning of the algorithm each input section has its own cluster and
22
///     the weight of the cluster is the sum of the weight of all incomming
23
///     edges.
24
/// * Call-Chain Clustering (C³) Heuristic
25
///   * Defines when and how clusters are combined. Pick the highest weighted
26
///     input section then add it to its most likely predecessor if it wouldn't
27
///     penalize it too much.
28
/// * Density
29
///   * The weight of the cluster divided by the size of the cluster. This is a
30
///     proxy for the ammount of execution time spent per byte of the cluster.
31
///
32
/// It does so given a call graph profile by the following:
33
/// * Build a weighted call graph from the call graph profile
34
/// * Sort input sections by weight
35
/// * For each input section starting with the highest weight
36
///   * Find its most likely predecessor cluster
37
///   * Check if the combined cluster would be too large, or would have too low
38
///     a density.
39
///   * If not, then combine the clusters.
40
/// * Sort non-empty clusters by density
41
///
42
//===----------------------------------------------------------------------===//
43
44
#include "CallGraphSort.h"
45
#include "OutputSections.h"
46
#include "SymbolTable.h"
47
#include "Symbols.h"
48
49
using namespace llvm;
50
using namespace lld;
51
using namespace lld::elf;
52
53
namespace {
54
struct Edge {
55
  int From;
56
  uint64_t Weight;
57
};
58
59
struct Cluster {
60
42
  Cluster(int Sec, size_t S) {
61
42
    Sections.push_back(Sec);
62
42
    Size = S;
63
42
  }
64
65
235
  double getDensity() const {
66
235
    if (Size == 0)
67
0
      return 0;
68
235
    return double(Weight) / double(Size);
69
235
  }
70
71
  std::vector<int> Sections;
72
  size_t Size = 0;
73
  uint64_t Weight = 0;
74
  uint64_t InitialWeight = 0;
75
  std::vector<Edge> Preds;
76
};
77
78
class CallGraphSort {
79
public:
80
  CallGraphSort();
81
82
  DenseMap<const InputSectionBase *, int> run();
83
84
private:
85
  std::vector<Cluster> Clusters;
86
  std::vector<const InputSectionBase *> Sections;
87
88
  void groupClusters();
89
};
90
91
// Maximum ammount the combined cluster density can be worse than the original
92
// cluster to consider merging.
93
constexpr int MAX_DENSITY_DEGRADATION = 8;
94
95
// Maximum cluster size in bytes.
96
constexpr uint64_t MAX_CLUSTER_SIZE = 1024 * 1024;
97
} // end anonymous namespace
98
99
// Take the edge list in Config->CallGraphProfile, resolve symbol names to
100
// Symbols, and generate a graph between InputSections with the provided
101
// weights.
102
7
CallGraphSort::CallGraphSort() {
103
7
  llvm::MapVector<std::pair<const InputSectionBase *, const InputSectionBase *>,
104
7
                  uint64_t> &Profile = Config->CallGraphProfile;
105
7
  DenseMap<const InputSectionBase *, int> SecToCluster;
106
7
107
72
  auto GetOrCreateNode = [&](const InputSectionBase *IS) -> int {
108
72
    auto Res = SecToCluster.insert(std::make_pair(IS, Clusters.size()));
109
72
    if (Res.second) {
110
42
      Sections.push_back(IS);
111
42
      Clusters.emplace_back(Clusters.size(), IS->getSize());
112
42
    }
113
72
    return Res.first->second;
114
72
  };
115
7
116
7
  // Create the graph.
117
37
  for (const auto &C : Profile) {
118
37
    const auto *FromSB = cast<InputSectionBase>(C.first.first->Repl);
119
37
    const auto *ToSB = cast<InputSectionBase>(C.first.second->Repl);
120
37
    uint64_t Weight = C.second;
121
37
122
37
    // Ignore edges between input sections belonging to different output
123
37
    // sections.  This is done because otherwise we would end up with clusters
124
37
    // containing input sections that can't actually be placed adjacently in the
125
37
    // output.  This messes with the cluster size and density calculations.  We
126
37
    // would also end up moving input sections in other output sections without
127
37
    // moving them closer to what calls them.
128
37
    if (FromSB->getOutputSection() != ToSB->getOutputSection())
129
1
      continue;
130
36
131
36
    int From = GetOrCreateNode(FromSB);
132
36
    int To = GetOrCreateNode(ToSB);
133
36
134
36
    Clusters[To].Weight += Weight;
135
36
136
36
    if (From == To)
137
3
      continue;
138
33
139
33
    // Add an edge
140
33
    Clusters[To].Preds.push_back({From, Weight});
141
33
  }
142
7
  for (Cluster &C : Clusters)
143
42
    C.InitialWeight = C.Weight;
144
7
}
145
146
// It's bad to merge clusters which would degrade the density too much.
147
17
static bool isNewDensityBad(Cluster &A, Cluster &B) {
148
17
  double NewDensity = double(A.Weight + B.Weight) / double(A.Size + B.Size);
149
17
  if (NewDensity < A.getDensity() / MAX_DENSITY_DEGRADATION)
150
1
    return true;
151
16
  return false;
152
16
}
153
154
16
static void mergeClusters(Cluster &Into, Cluster &From) {
155
16
  Into.Sections.insert(Into.Sections.end(), From.Sections.begin(),
156
16
                       From.Sections.end());
157
16
  Into.Size += From.Size;
158
16
  Into.Weight += From.Weight;
159
16
  From.Sections.clear();
160
16
  From.Size = 0;
161
16
  From.Weight = 0;
162
16
}
163
164
// Group InputSections into clusters using the Call-Chain Clustering heuristic
165
// then sort the clusters by density.
166
7
void CallGraphSort::groupClusters() {
167
7
  std::vector<int> SortedSecs(Clusters.size());
168
7
  std::vector<Cluster *> SecToCluster(Clusters.size());
169
7
170
49
  for (size_t I = 0; I < Clusters.size(); 
++I42
) {
171
42
    SortedSecs[I] = I;
172
42
    SecToCluster[I] = &Clusters[I];
173
42
  }
174
7
175
79
  std::stable_sort(SortedSecs.begin(), SortedSecs.end(), [&](int A, int B) {
176
79
    return Clusters[B].getDensity() < Clusters[A].getDensity();
177
79
  });
178
7
179
42
  for (int SI : SortedSecs) {
180
42
    // Clusters[SI] is the same as SecToClusters[SI] here because it has not
181
42
    // been merged into another cluster yet.
182
42
    Cluster &C = Clusters[SI];
183
42
184
42
    int BestPred = -1;
185
42
    uint64_t BestWeight = 0;
186
42
187
42
    for (Edge &E : C.Preds) {
188
33
      if (BestPred == -1 || 
E.Weight > BestWeight13
) {
189
22
        BestPred = E.From;
190
22
        BestWeight = E.Weight;
191
22
      }
192
33
    }
193
42
194
42
    // don't consider merging if the edge is unlikely.
195
42
    if (BestWeight * 10 <= C.InitialWeight)
196
23
      continue;
197
19
198
19
    Cluster *PredC = SecToCluster[BestPred];
199
19
    if (PredC == &C)
200
1
      continue;
201
18
202
18
    if (C.Size + PredC->Size > MAX_CLUSTER_SIZE)
203
1
      continue;
204
17
205
17
    if (isNewDensityBad(*PredC, C))
206
1
      continue;
207
16
208
16
    // NOTE: Consider using a disjoint-set to track section -> cluster mapping
209
16
    // if this is ever slow.
210
16
    for (int SI : C.Sections)
211
18
      SecToCluster[SI] = PredC;
212
16
213
16
    mergeClusters(*PredC, C);
214
16
  }
215
7
216
7
  // Remove empty or dead nodes. Invalidates all cluster indices.
217
42
  llvm::erase_if(Clusters, [](const Cluster &C) {
218
42
    return C.Size == 0 || 
C.Sections.empty()26
;
219
42
  });
220
7
221
7
  // Sort by density.
222
7
  std::stable_sort(Clusters.begin(), Clusters.end(),
223
30
                   [](const Cluster &A, const Cluster &B) {
224
30
                     return A.getDensity() > B.getDensity();
225
30
                   });
226
7
}
227
228
7
DenseMap<const InputSectionBase *, int> CallGraphSort::run() {
229
7
  groupClusters();
230
7
231
7
  // Generate order.
232
7
  llvm::DenseMap<const InputSectionBase *, int> OrderMap;
233
7
  ssize_t CurOrder = 1;
234
7
235
7
  for (const Cluster &C : Clusters)
236
26
    for (int SecIndex : C.Sections)
237
42
      OrderMap[Sections[SecIndex]] = CurOrder++;
238
7
239
7
  return OrderMap;
240
7
}
241
242
// Sort sections by the profile data provided by -callgraph-profile-file
243
//
244
// This first builds a call graph based on the profile data then merges sections
245
// according to the C³ huristic. All clusters are then sorted by a density
246
// metric to further improve locality.
247
7
DenseMap<const InputSectionBase *, int> elf::computeCallGraphProfileOrder() {
248
7
  return CallGraphSort().run();
249
7
}