Coverage Report

Created: 2018-06-18 20:01

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/lld/ELF/CallGraphSort.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- CallGraphSort.cpp --------------------------------------------------===//
2
//
3
//                             The LLVM Linker
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
///
10
/// Implementation of Call-Chain Clustering from: Optimizing Function Placement
11
/// for Large-Scale Data-Center Applications
12
/// https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf
13
///
14
/// The goal of this algorithm is to improve runtime performance of the final
15
/// executable by arranging code sections such that page table and i-cache
16
/// misses are minimized.
17
///
18
/// Definitions:
19
/// * Cluster
20
///   * An ordered list of input sections which are layed out as a unit. At the
21
///     beginning of the algorithm each input section has its own cluster and
22
///     the weight of the cluster is the sum of the weight of all incomming
23
///     edges.
24
/// * Call-Chain Clustering (C³) Heuristic
25
///   * Defines when and how clusters are combined. Pick the highest weighted
26
///     input section then add it to its most likely predecessor if it wouldn't
27
///     penalize it too much.
28
/// * Density
29
///   * The weight of the cluster divided by the size of the cluster. This is a
30
///     proxy for the ammount of execution time spent per byte of the cluster.
31
///
32
/// It does so given a call graph profile by the following:
33
/// * Build a weighted call graph from the call graph profile
34
/// * Sort input sections by weight
35
/// * For each input section starting with the highest weight
36
///   * Find its most likely predecessor cluster
37
///   * Check if the combined cluster would be too large, or would have too low
38
///     a density.
39
///   * If not, then combine the clusters.
40
/// * Sort non-empty clusters by density
41
///
42
//===----------------------------------------------------------------------===//
43
44
#include "CallGraphSort.h"
45
#include "OutputSections.h"
46
#include "SymbolTable.h"
47
#include "Symbols.h"
48
49
using namespace llvm;
50
using namespace lld;
51
using namespace lld::elf;
52
53
namespace {
54
struct Edge {
55
  int From;
56
  uint64_t Weight;
57
};
58
59
struct Cluster {
60
37
  Cluster(int Sec, size_t S) {
61
37
    Sections.push_back(Sec);
62
37
    Size = S;
63
37
  }
64
65
219
  double getDensity() const {
66
219
    if (Size == 0)
67
0
      return 0;
68
219
    return double(Weight) / double(Size);
69
219
  }
70
71
  std::vector<int> Sections;
72
  size_t Size = 0;
73
  uint64_t Weight = 0;
74
  uint64_t InitialWeight = 0;
75
  std::vector<Edge> Preds;
76
};
77
78
class CallGraphSort {
79
public:
80
  CallGraphSort();
81
82
  DenseMap<const InputSectionBase *, int> run();
83
84
private:
85
  std::vector<Cluster> Clusters;
86
  std::vector<const InputSectionBase *> Sections;
87
88
  void groupClusters();
89
};
90
91
// Maximum ammount the combined cluster density can be worse than the original
92
// cluster to consider merging.
93
constexpr int MAX_DENSITY_DEGRADATION = 8;
94
95
// Maximum cluster size in bytes.
96
constexpr uint64_t MAX_CLUSTER_SIZE = 1024 * 1024;
97
} // end anonymous namespace
98
99
// Take the edge list in Config->CallGraphProfile, resolve symbol names to
100
// Symbols, and generate a graph between InputSections with the provided
101
// weights.
102
5
CallGraphSort::CallGraphSort() {
103
5
  llvm::MapVector<std::pair<const InputSectionBase *, const InputSectionBase *>,
104
5
                  uint64_t> &Profile = Config->CallGraphProfile;
105
5
  DenseMap<const InputSectionBase *, int> SecToCluster;
106
5
107
62
  auto GetOrCreateNode = [&](const InputSectionBase *IS) -> int {
108
62
    auto Res = SecToCluster.insert(std::make_pair(IS, Clusters.size()));
109
62
    if (Res.second) {
110
37
      Sections.push_back(IS);
111
37
      Clusters.emplace_back(Clusters.size(), IS->getSize());
112
37
    }
113
62
    return Res.first->second;
114
62
  };
115
5
116
5
  // Create the graph.
117
32
  for (const auto &C : Profile) {
118
32
    const auto *FromSB = cast<InputSectionBase>(C.first.first->Repl);
119
32
    const auto *ToSB = cast<InputSectionBase>(C.first.second->Repl);
120
32
    uint64_t Weight = C.second;
121
32
122
32
    // Ignore edges between input sections belonging to different output
123
32
    // sections.  This is done because otherwise we would end up with clusters
124
32
    // containing input sections that can't actually be placed adjacently in the
125
32
    // output.  This messes with the cluster size and density calculations.  We
126
32
    // would also end up moving input sections in other output sections without
127
32
    // moving them closer to what calls them.
128
32
    if (FromSB->getOutputSection() != ToSB->getOutputSection())
129
1
      continue;
130
31
131
31
    int From = GetOrCreateNode(FromSB);
132
31
    int To = GetOrCreateNode(ToSB);
133
31
134
31
    Clusters[To].Weight += Weight;
135
31
136
31
    if (From == To)
137
2
      continue;
138
29
139
29
    // Add an edge
140
29
    Clusters[To].Preds.push_back({From, Weight});
141
29
  }
142
5
  for (Cluster &C : Clusters)
143
37
    C.InitialWeight = C.Weight;
144
5
}
145
146
// It's bad to merge clusters which would degrade the density too much.
147
15
static bool isNewDensityBad(Cluster &A, Cluster &B) {
148
15
  double NewDensity = double(A.Weight + B.Weight) / double(A.Size + B.Size);
149
15
  if (NewDensity < A.getDensity() / MAX_DENSITY_DEGRADATION)
150
1
    return true;
151
14
  return false;
152
14
}
153
154
14
static void mergeClusters(Cluster &Into, Cluster &From) {
155
14
  Into.Sections.insert(Into.Sections.end(), From.Sections.begin(),
156
14
                       From.Sections.end());
157
14
  Into.Size += From.Size;
158
14
  Into.Weight += From.Weight;
159
14
  From.Sections.clear();
160
14
  From.Size = 0;
161
14
  From.Weight = 0;
162
14
}
163
164
// Group InputSections into clusters using the Call-Chain Clustering heuristic
165
// then sort the clusters by density.
166
5
void CallGraphSort::groupClusters() {
167
5
  std::vector<int> SortedSecs(Clusters.size());
168
5
  std::vector<Cluster *> SecToCluster(Clusters.size());
169
5
170
42
  for (int SI = 0, SE = Clusters.size(); SI != SE; 
++SI37
) {
171
37
    SortedSecs[SI] = SI;
172
37
    SecToCluster[SI] = &Clusters[SI];
173
37
  }
174
5
175
73
  std::stable_sort(SortedSecs.begin(), SortedSecs.end(), [&](int A, int B) {
176
73
    return Clusters[B].getDensity() < Clusters[A].getDensity();
177
73
  });
178
5
179
37
  for (int SI : SortedSecs) {
180
37
    // Clusters[SI] is the same as SecToClusters[SI] here because it has not
181
37
    // been merged into another cluster yet.
182
37
    Cluster &C = Clusters[SI];
183
37
184
37
    int BestPred = -1;
185
37
    uint64_t BestWeight = 0;
186
37
187
37
    for (Edge &E : C.Preds) {
188
29
      if (BestPred == -1 || 
E.Weight > BestWeight12
) {
189
18
        BestPred = E.From;
190
18
        BestWeight = E.Weight;
191
18
      }
192
29
    }
193
37
194
37
    // don't consider merging if the edge is unlikely.
195
37
    if (BestWeight * 10 <= C.InitialWeight)
196
21
      continue;
197
16
198
16
    Cluster *PredC = SecToCluster[BestPred];
199
16
    if (PredC == &C)
200
0
      continue;
201
16
202
16
    if (C.Size + PredC->Size > MAX_CLUSTER_SIZE)
203
1
      continue;
204
15
205
15
    if (isNewDensityBad(*PredC, C))
206
1
      continue;
207
14
208
14
    // NOTE: Consider using a disjoint-set to track section -> cluster mapping
209
14
    // if this is ever slow.
210
14
    for (int SI : C.Sections)
211
16
      SecToCluster[SI] = PredC;
212
14
213
14
    mergeClusters(*PredC, C);
214
14
  }
215
5
216
5
  // Remove empty or dead nodes. Invalidates all cluster indices.
217
37
  llvm::erase_if(Clusters, [](const Cluster &C) {
218
37
    return C.Size == 0 || 
C.Sections.empty()23
;
219
37
  });
220
5
221
5
  // Sort by density.
222
5
  std::stable_sort(Clusters.begin(), Clusters.end(),
223
29
                   [](const Cluster &A, const Cluster &B) {
224
29
                     return A.getDensity() > B.getDensity();
225
29
                   });
226
5
}
227
228
5
DenseMap<const InputSectionBase *, int> CallGraphSort::run() {
229
5
  groupClusters();
230
5
231
5
  // Generate order.
232
5
  llvm::DenseMap<const InputSectionBase *, int> OrderMap;
233
5
  ssize_t CurOrder = 1;
234
5
235
5
  for (const Cluster &C : Clusters)
236
23
    for (int SecIndex : C.Sections)
237
37
      OrderMap[Sections[SecIndex]] = CurOrder++;
238
5
239
5
  return OrderMap;
240
5
}
241
242
// Sort sections by the profile data provided by -callgraph-profile-file
243
//
244
// This first builds a call graph based on the profile data then merges sections
245
// according to the C³ huristic. All clusters are then sorted by a density
246
// metric to further improve locality.
247
5
DenseMap<const InputSectionBase *, int> elf::computeCallGraphProfileOrder() {
248
5
  return CallGraphSort().run();
249
5
}