Coverage Report

Created: 2018-08-19 21:11

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/lld/COFF/ICF.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- ICF.cpp ------------------------------------------------------------===//
2
//
3
//                             The LLVM Linker
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// ICF is short for Identical Code Folding. That is a size optimization to
11
// identify and merge two or more read-only sections (typically functions)
12
// that happened to have the same contents. It usually reduces output size
13
// by a few percent.
14
//
15
// On Windows, ICF is enabled by default.
16
//
17
// See ELF/ICF.cpp for the details about the algortihm.
18
//
19
//===----------------------------------------------------------------------===//
20
21
#include "ICF.h"
22
#include "Chunks.h"
23
#include "Symbols.h"
24
#include "lld/Common/ErrorHandler.h"
25
#include "lld/Common/Timer.h"
26
#include "llvm/ADT/Hashing.h"
27
#include "llvm/Support/Debug.h"
28
#include "llvm/Support/Parallel.h"
29
#include "llvm/Support/raw_ostream.h"
30
#include "llvm/Support/xxhash.h"
31
#include <algorithm>
32
#include <atomic>
33
#include <vector>
34
35
using namespace llvm;
36
37
namespace lld {
38
namespace coff {
39
40
static Timer ICFTimer("ICF", Timer::root());
41
42
class ICF {
43
public:
44
  void run(ArrayRef<Chunk *> V);
45
46
private:
47
  void segregate(size_t Begin, size_t End, bool Constant);
48
49
  bool assocEquals(const SectionChunk *A, const SectionChunk *B);
50
51
  bool equalsConstant(const SectionChunk *A, const SectionChunk *B);
52
  bool equalsVariable(const SectionChunk *A, const SectionChunk *B);
53
54
  uint32_t getHash(SectionChunk *C);
55
  bool isEligible(SectionChunk *C);
56
57
  size_t findBoundary(size_t Begin, size_t End);
58
59
  void forEachClassRange(size_t Begin, size_t End,
60
                         std::function<void(size_t, size_t)> Fn);
61
62
  void forEachClass(std::function<void(size_t, size_t)> Fn);
63
64
  std::vector<SectionChunk *> Chunks;
65
  int Cnt = 0;
66
  std::atomic<bool> Repeat = {false};
67
};
68
69
// Returns true if section S is subject of ICF.
70
//
71
// Microsoft's documentation
72
// (https://msdn.microsoft.com/en-us/library/bxwfs976.aspx; visited April
73
// 2017) says that /opt:icf folds both functions and read-only data.
74
// Despite that, the MSVC linker folds only functions. We found
75
// a few instances of programs that are not safe for data merging.
76
// Therefore, we merge only functions just like the MSVC tool. However, we also
77
// merge read-only sections in a couple of cases where the address of the
78
// section is insignificant to the user program and the behaviour matches that
79
// of the Visual C++ linker.
80
976
bool ICF::isEligible(SectionChunk *C) {
81
976
  // Non-comdat chunks, dead chunks, and writable chunks are not elegible.
82
976
  bool Writable = C->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_WRITE;
83
976
  if (!C->isCOMDAT() || 
!C->isLive()188
||
Writable164
)
84
817
    return false;
85
159
86
159
  // Code sections are eligible.
87
159
  if (C->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_EXECUTE)
88
101
    return true;
89
58
90
58
  // .pdata and .xdata unwind info sections are eligible.
91
58
  StringRef OutSecName = C->getSectionName().split('$').first;
92
58
  if (OutSecName == ".pdata" || 
OutSecName == ".xdata"42
)
93
33
    return true;
94
25
95
25
  // So are vtables.
96
25
  return C->Sym && 
C->Sym->getName().startswith("??_7")20
;
97
25
}
98
99
// Split an equivalence class into smaller classes.
100
220
void ICF::segregate(size_t Begin, size_t End, bool Constant) {
101
442
  while (Begin < End) {
102
222
    // Divide [Begin, End) into two. Let Mid be the start index of the
103
222
    // second group.
104
222
    auto Bound = std::stable_partition(
105
222
        Chunks.begin() + Begin + 1, Chunks.begin() + End, [&](SectionChunk *S) {
106
52
          if (Constant)
107
27
            return equalsConstant(Chunks[Begin], S);
108
25
          return equalsVariable(Chunks[Begin], S);
109
25
        });
110
222
    size_t Mid = Bound - Chunks.begin();
111
222
112
222
    // Split [Begin, End) into [Begin, Mid) and [Mid, End). We use Mid as an
113
222
    // equivalence class ID because every group ends with a unique index.
114
494
    for (size_t I = Begin; I < Mid; 
++I272
)
115
272
      Chunks[I]->Class[(Cnt + 1) % 2] = Mid;
116
222
117
222
    // If we created a group, we need to iterate the main loop again.
118
222
    if (Mid != End)
119
2
      Repeat = true;
120
222
121
222
    Begin = Mid;
122
222
  }
123
220
}
124
125
// Returns true if two sections' associative children are equal.
126
51
bool ICF::assocEquals(const SectionChunk *A, const SectionChunk *B) {
127
102
  auto ChildClasses = [&](const SectionChunk *SC) {
128
102
    std::vector<uint32_t> Classes;
129
102
    for (const SectionChunk *C : SC->children())
130
22
      if (!C->SectionName.startswith(".debug") &&
131
22
          
C->SectionName != ".gfids$y"16
&&
C->SectionName != ".gljmp$y"14
)
132
12
        Classes.push_back(C->Class[Cnt % 2]);
133
102
    return Classes;
134
102
  };
135
51
  return ChildClasses(A) == ChildClasses(B);
136
51
}
137
138
// Compare "non-moving" part of two sections, namely everything
139
// except relocation targets.
140
27
bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) {
141
27
  if (A->Relocs.size() != B->Relocs.size())
142
0
    return false;
143
27
144
27
  // Compare relocations.
145
27
  auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) {
146
14
    if (R1.Type != R2.Type ||
147
14
        R1.VirtualAddress != R2.VirtualAddress) {
148
0
      return false;
149
0
    }
150
14
    Symbol *B1 = A->File->getSymbol(R1.SymbolTableIndex);
151
14
    Symbol *B2 = B->File->getSymbol(R2.SymbolTableIndex);
152
14
    if (B1 == B2)
153
6
      return true;
154
8
    if (auto *D1 = dyn_cast<DefinedRegular>(B1))
155
8
      if (auto *D2 = dyn_cast<DefinedRegular>(B2))
156
8
        return D1->getValue() == D2->getValue() &&
157
8
               D1->getChunk()->Class[Cnt % 2] == D2->getChunk()->Class[Cnt % 2];
158
0
    return false;
159
0
  };
160
27
  if (!std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq))
161
1
    return false;
162
26
163
26
  // Compare section attributes and contents.
164
26
  return A->getOutputCharacteristics() == B->getOutputCharacteristics() &&
165
26
         A->SectionName == B->SectionName &&
166
26
         A->Header->SizeOfRawData == B->Header->SizeOfRawData &&
167
26
         A->Checksum == B->Checksum && A->getContents() == B->getContents() &&
168
26
         assocEquals(A, B);
169
26
}
170
171
// Compare "moving" part of two sections, namely relocation targets.
172
25
bool ICF::equalsVariable(const SectionChunk *A, const SectionChunk *B) {
173
25
  // Compare relocations.
174
25
  auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) {
175
9
    Symbol *B1 = A->File->getSymbol(R1.SymbolTableIndex);
176
9
    Symbol *B2 = B->File->getSymbol(R2.SymbolTableIndex);
177
9
    if (B1 == B2)
178
4
      return true;
179
5
    if (auto *D1 = dyn_cast<DefinedRegular>(B1))
180
5
      if (auto *D2 = dyn_cast<DefinedRegular>(B2))
181
5
        return D1->getChunk()->Class[Cnt % 2] == D2->getChunk()->Class[Cnt % 2];
182
0
    return false;
183
0
  };
184
25
  return std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(),
185
25
                    Eq) &&
186
25
         assocEquals(A, B);
187
25
}
188
189
// Find the first Chunk after Begin that has a different class from Begin.
190
331
size_t ICF::findBoundary(size_t Begin, size_t End) {
191
408
  for (size_t I = Begin + 1; I < End; 
++I77
)
192
219
    if (Chunks[Begin]->Class[Cnt % 2] != Chunks[I]->Class[Cnt % 2])
193
142
      return I;
194
331
  
return End189
;
195
331
}
196
197
void ICF::forEachClassRange(size_t Begin, size_t End,
198
936
                            std::function<void(size_t, size_t)> Fn) {
199
1.26k
  while (Begin < End) {
200
331
    size_t Mid = findBoundary(Begin, End);
201
331
    Fn(Begin, Mid);
202
331
    Begin = Mid;
203
331
  }
204
936
}
205
206
// Call Fn on each class group.
207
936
void ICF::forEachClass(std::function<void(size_t, size_t)> Fn) {
208
936
  // If the number of sections are too small to use threading,
209
936
  // call Fn sequentially.
210
936
  if (Chunks.size() < 1024) {
211
936
    forEachClassRange(0, Chunks.size(), Fn);
212
936
    ++Cnt;
213
936
    return;
214
936
  }
215
0
216
0
  // Shard into non-overlapping intervals, and call Fn in parallel.
217
0
  // The sharding must be completed before any calls to Fn are made
218
0
  // so that Fn can modify the Chunks in its shard without causing data
219
0
  // races.
220
0
  const size_t NumShards = 256;
221
0
  size_t Step = Chunks.size() / NumShards;
222
0
  size_t Boundaries[NumShards + 1];
223
0
  Boundaries[0] = 0;
224
0
  Boundaries[NumShards] = Chunks.size();
225
0
  for_each_n(parallel::par, size_t(1), NumShards, [&](size_t I) {
226
0
    Boundaries[I] = findBoundary((I - 1) * Step, Chunks.size());
227
0
  });
228
0
  for_each_n(parallel::par, size_t(1), NumShards + 1, [&](size_t I) {
229
0
    if (Boundaries[I - 1] < Boundaries[I]) {
230
0
      forEachClassRange(Boundaries[I - 1], Boundaries[I], Fn);
231
0
    }
232
0
  });
233
0
  ++Cnt;
234
0
}
235
236
// Merge identical COMDAT sections.
237
// Two sections are considered the same if their section headers,
238
// contents and relocations are all the same.
239
312
void ICF::run(ArrayRef<Chunk *> Vec) {
240
312
  ScopedTimer T(ICFTimer);
241
312
242
312
  // Collect only mergeable sections and group by hash value.
243
312
  uint32_t NextId = 1;
244
988
  for (Chunk *C : Vec) {
245
988
    if (auto *SC = dyn_cast<SectionChunk>(C)) {
246
976
      if (isEligible(SC))
247
136
        Chunks.push_back(SC);
248
840
      else
249
840
        SC->Class[0] = NextId++;
250
976
    }
251
988
  }
252
312
253
312
  // Make sure that ICF doesn't merge sections that are being handled by string
254
312
  // tail merging.
255
312
  for (auto &P : MergeChunk::Instances)
256
2
    for (SectionChunk *SC : P.second->Sections)
257
5
      SC->Class[0] = NextId++;
258
312
259
312
  // Initially, we use hash values to partition sections.
260
312
  for_each(parallel::par, Chunks.begin(), Chunks.end(), [&](SectionChunk *SC) {
261
136
    // Set MSB to 1 to avoid collisions with non-hash classs.
262
136
    SC->Class[0] = xxHash64(SC->getContents()) | (1 << 31);
263
136
  });
264
312
265
312
  // From now on, sections in Chunks are ordered so that sections in
266
312
  // the same group are consecutive in the vector.
267
312
  std::stable_sort(Chunks.begin(), Chunks.end(),
268
312
                   [](SectionChunk *A, SectionChunk *B) {
269
116
                     return A->Class[0] < B->Class[0];
270
116
                   });
271
312
272
312
  // Compare static contents and assign unique IDs for each static content.
273
312
  forEachClass([&](size_t Begin, size_t End) 
{ segregate(Begin, End, true); }109
);
274
312
275
312
  // Split groups by comparing relocations until convergence is obtained.
276
312
  do {
277
312
    Repeat = false;
278
312
    forEachClass(
279
312
        [&](size_t Begin, size_t End) 
{ segregate(Begin, End, false); }111
);
280
312
  } while (Repeat);
281
312
282
312
  log("ICF needed " + Twine(Cnt) + " iterations");
283
312
284
312
  // Merge sections in the same classs.
285
312
  forEachClass([&](size_t Begin, size_t End) {
286
111
    if (End - Begin == 1)
287
91
      return;
288
20
289
20
    log("Selected " + Chunks[Begin]->getDebugName());
290
45
    for (size_t I = Begin + 1; I < End; 
++I25
) {
291
25
      log("  Removed " + Chunks[I]->getDebugName());
292
25
      Chunks[Begin]->replace(Chunks[I]);
293
25
    }
294
20
  });
295
312
}
296
297
// Entry point to ICF.
298
312
void doICF(ArrayRef<Chunk *> Chunks) { ICF().run(Chunks); }
299
300
} // namespace coff
301
} // namespace lld