Coverage Report

Created: 2018-10-23 15:26

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/lld/COFF/ICF.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- ICF.cpp ------------------------------------------------------------===//
2
//
3
//                             The LLVM Linker
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// ICF is short for Identical Code Folding. That is a size optimization to
11
// identify and merge two or more read-only sections (typically functions)
12
// that happened to have the same contents. It usually reduces output size
13
// by a few percent.
14
//
15
// On Windows, ICF is enabled by default.
16
//
17
// See ELF/ICF.cpp for the details about the algortihm.
18
//
19
//===----------------------------------------------------------------------===//
20
21
#include "ICF.h"
22
#include "Chunks.h"
23
#include "Symbols.h"
24
#include "lld/Common/ErrorHandler.h"
25
#include "lld/Common/Timer.h"
26
#include "llvm/ADT/Hashing.h"
27
#include "llvm/Support/Debug.h"
28
#include "llvm/Support/Parallel.h"
29
#include "llvm/Support/raw_ostream.h"
30
#include "llvm/Support/xxhash.h"
31
#include <algorithm>
32
#include <atomic>
33
#include <vector>
34
35
using namespace llvm;
36
37
namespace lld {
38
namespace coff {
39
40
static Timer ICFTimer("ICF", Timer::root());
41
42
class ICF {
43
public:
44
  void run(ArrayRef<Chunk *> V);
45
46
private:
47
  void segregate(size_t Begin, size_t End, bool Constant);
48
49
  bool assocEquals(const SectionChunk *A, const SectionChunk *B);
50
51
  bool equalsConstant(const SectionChunk *A, const SectionChunk *B);
52
  bool equalsVariable(const SectionChunk *A, const SectionChunk *B);
53
54
  uint32_t getHash(SectionChunk *C);
55
  bool isEligible(SectionChunk *C);
56
57
  size_t findBoundary(size_t Begin, size_t End);
58
59
  void forEachClassRange(size_t Begin, size_t End,
60
                         std::function<void(size_t, size_t)> Fn);
61
62
  void forEachClass(std::function<void(size_t, size_t)> Fn);
63
64
  std::vector<SectionChunk *> Chunks;
65
  int Cnt = 0;
66
  std::atomic<bool> Repeat = {false};
67
};
68
69
// Returns true if section S is subject of ICF.
70
//
71
// Microsoft's documentation
72
// (https://msdn.microsoft.com/en-us/library/bxwfs976.aspx; visited April
73
// 2017) says that /opt:icf folds both functions and read-only data.
74
// Despite that, the MSVC linker folds only functions. We found
75
// a few instances of programs that are not safe for data merging.
76
// Therefore, we merge only functions just like the MSVC tool. However, we also
77
// merge read-only sections in a couple of cases where the address of the
78
// section is insignificant to the user program and the behaviour matches that
79
// of the Visual C++ linker.
80
1.32k
bool ICF::isEligible(SectionChunk *C) {
81
1.32k
  // Non-comdat chunks, dead chunks, and writable chunks are not elegible.
82
1.32k
  bool Writable = C->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_WRITE;
83
1.32k
  if (!C->isCOMDAT() || 
!C->Live207
||
Writable185
)
84
1.14k
    return false;
85
180
86
180
  // Code sections are eligible.
87
180
  if (C->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_EXECUTE)
88
101
    return true;
89
79
90
79
  // .pdata and .xdata unwind info sections are eligible.
91
79
  StringRef OutSecName = C->getSectionName().split('$').first;
92
79
  if (OutSecName == ".pdata" || 
OutSecName == ".xdata"62
)
93
35
    return true;
94
44
95
44
  // So are vtables.
96
44
  if (C->Sym && 
C->Sym->getName().startswith("??_7")33
)
97
2
    return true;
98
42
99
42
  // Anything else not in an address-significance table is eligible.
100
42
  return !C->KeepUnique;
101
42
}
102
103
// Split an equivalence class into smaller classes.
104
226
void ICF::segregate(size_t Begin, size_t End, bool Constant) {
105
454
  while (Begin < End) {
106
228
    // Divide [Begin, End) into two. Let Mid be the start index of the
107
228
    // second group.
108
228
    auto Bound = std::stable_partition(
109
228
        Chunks.begin() + Begin + 1, Chunks.begin() + End, [&](SectionChunk *S) {
110
54
          if (Constant)
111
28
            return equalsConstant(Chunks[Begin], S);
112
26
          return equalsVariable(Chunks[Begin], S);
113
26
        });
114
228
    size_t Mid = Bound - Chunks.begin();
115
228
116
228
    // Split [Begin, End) into [Begin, Mid) and [Mid, End). We use Mid as an
117
228
    // equivalence class ID because every group ends with a unique index.
118
508
    for (size_t I = Begin; I < Mid; 
++I280
)
119
280
      Chunks[I]->Class[(Cnt + 1) % 2] = Mid;
120
228
121
228
    // If we created a group, we need to iterate the main loop again.
122
228
    if (Mid != End)
123
2
      Repeat = true;
124
228
125
228
    Begin = Mid;
126
228
  }
127
226
}
128
129
// Returns true if two sections' associative children are equal.
130
53
bool ICF::assocEquals(const SectionChunk *A, const SectionChunk *B) {
131
106
  auto ChildClasses = [&](const SectionChunk *SC) {
132
106
    std::vector<uint32_t> Classes;
133
106
    for (const SectionChunk *C : SC->children())
134
22
      if (!C->SectionName.startswith(".debug") &&
135
22
          
C->SectionName != ".gfids$y"16
&&
C->SectionName != ".gljmp$y"14
)
136
12
        Classes.push_back(C->Class[Cnt % 2]);
137
106
    return Classes;
138
106
  };
139
53
  return ChildClasses(A) == ChildClasses(B);
140
53
}
141
142
// Compare "non-moving" part of two sections, namely everything
143
// except relocation targets.
144
28
bool ICF::equalsConstant(const SectionChunk *A, const SectionChunk *B) {
145
28
  if (A->Relocs.size() != B->Relocs.size())
146
0
    return false;
147
28
148
28
  // Compare relocations.
149
28
  auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) {
150
14
    if (R1.Type != R2.Type ||
151
14
        R1.VirtualAddress != R2.VirtualAddress) {
152
0
      return false;
153
0
    }
154
14
    Symbol *B1 = A->File->getSymbol(R1.SymbolTableIndex);
155
14
    Symbol *B2 = B->File->getSymbol(R2.SymbolTableIndex);
156
14
    if (B1 == B2)
157
6
      return true;
158
8
    if (auto *D1 = dyn_cast<DefinedRegular>(B1))
159
8
      if (auto *D2 = dyn_cast<DefinedRegular>(B2))
160
8
        return D1->getValue() == D2->getValue() &&
161
8
               D1->getChunk()->Class[Cnt % 2] == D2->getChunk()->Class[Cnt % 2];
162
0
    return false;
163
0
  };
164
28
  if (!std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(), Eq))
165
1
    return false;
166
27
167
27
  // Compare section attributes and contents.
168
27
  return A->getOutputCharacteristics() == B->getOutputCharacteristics() &&
169
27
         A->SectionName == B->SectionName &&
170
27
         A->Header->SizeOfRawData == B->Header->SizeOfRawData &&
171
27
         A->Checksum == B->Checksum && A->getContents() == B->getContents() &&
172
27
         assocEquals(A, B);
173
27
}
174
175
// Compare "moving" part of two sections, namely relocation targets.
176
26
bool ICF::equalsVariable(const SectionChunk *A, const SectionChunk *B) {
177
26
  // Compare relocations.
178
26
  auto Eq = [&](const coff_relocation &R1, const coff_relocation &R2) {
179
9
    Symbol *B1 = A->File->getSymbol(R1.SymbolTableIndex);
180
9
    Symbol *B2 = B->File->getSymbol(R2.SymbolTableIndex);
181
9
    if (B1 == B2)
182
4
      return true;
183
5
    if (auto *D1 = dyn_cast<DefinedRegular>(B1))
184
5
      if (auto *D2 = dyn_cast<DefinedRegular>(B2))
185
5
        return D1->getChunk()->Class[Cnt % 2] == D2->getChunk()->Class[Cnt % 2];
186
0
    return false;
187
0
  };
188
26
  return std::equal(A->Relocs.begin(), A->Relocs.end(), B->Relocs.begin(),
189
26
                    Eq) &&
190
26
         assocEquals(A, B);
191
26
}
192
193
// Find the first Chunk after Begin that has a different class from Begin.
194
340
size_t ICF::findBoundary(size_t Begin, size_t End) {
195
420
  for (size_t I = Begin + 1; I < End; 
++I80
)
196
228
    if (Chunks[Begin]->Class[Cnt % 2] != Chunks[I]->Class[Cnt % 2])
197
148
      return I;
198
340
  
return End192
;
199
340
}
200
201
void ICF::forEachClassRange(size_t Begin, size_t End,
202
1.07k
                            std::function<void(size_t, size_t)> Fn) {
203
1.41k
  while (Begin < End) {
204
340
    size_t Mid = findBoundary(Begin, End);
205
340
    Fn(Begin, Mid);
206
340
    Begin = Mid;
207
340
  }
208
1.07k
}
209
210
// Call Fn on each class group.
211
1.07k
void ICF::forEachClass(std::function<void(size_t, size_t)> Fn) {
212
1.07k
  // If the number of sections are too small to use threading,
213
1.07k
  // call Fn sequentially.
214
1.07k
  if (Chunks.size() < 1024) {
215
1.07k
    forEachClassRange(0, Chunks.size(), Fn);
216
1.07k
    ++Cnt;
217
1.07k
    return;
218
1.07k
  }
219
0
220
0
  // Shard into non-overlapping intervals, and call Fn in parallel.
221
0
  // The sharding must be completed before any calls to Fn are made
222
0
  // so that Fn can modify the Chunks in its shard without causing data
223
0
  // races.
224
0
  const size_t NumShards = 256;
225
0
  size_t Step = Chunks.size() / NumShards;
226
0
  size_t Boundaries[NumShards + 1];
227
0
  Boundaries[0] = 0;
228
0
  Boundaries[NumShards] = Chunks.size();
229
0
  for_each_n(parallel::par, size_t(1), NumShards, [&](size_t I) {
230
0
    Boundaries[I] = findBoundary((I - 1) * Step, Chunks.size());
231
0
  });
232
0
  for_each_n(parallel::par, size_t(1), NumShards + 1, [&](size_t I) {
233
0
    if (Boundaries[I - 1] < Boundaries[I]) {
234
0
      forEachClassRange(Boundaries[I - 1], Boundaries[I], Fn);
235
0
    }
236
0
  });
237
0
  ++Cnt;
238
0
}
239
240
// Merge identical COMDAT sections.
241
// Two sections are considered the same if their section headers,
242
// contents and relocations are all the same.
243
359
void ICF::run(ArrayRef<Chunk *> Vec) {
244
359
  ScopedTimer T(ICFTimer);
245
359
246
359
  // Collect only mergeable sections and group by hash value.
247
359
  uint32_t NextId = 1;
248
1.34k
  for (Chunk *C : Vec) {
249
1.34k
    if (auto *SC = dyn_cast<SectionChunk>(C)) {
250
1.32k
      if (isEligible(SC))
251
140
        Chunks.push_back(SC);
252
1.18k
      else
253
1.18k
        SC->Class[0] = NextId++;
254
1.32k
    }
255
1.34k
  }
256
359
257
359
  // Make sure that ICF doesn't merge sections that are being handled by string
258
359
  // tail merging.
259
359
  for (auto &P : MergeChunk::Instances)
260
3
    for (SectionChunk *SC : P.second->Sections)
261
7
      SC->Class[0] = NextId++;
262
359
263
359
  // Initially, we use hash values to partition sections.
264
359
  for_each(parallel::par, Chunks.begin(), Chunks.end(), [&](SectionChunk *SC) {
265
140
    // Set MSB to 1 to avoid collisions with non-hash classs.
266
140
    SC->Class[0] = xxHash64(SC->getContents()) | (1 << 31);
267
140
  });
268
359
269
359
  // From now on, sections in Chunks are ordered so that sections in
270
359
  // the same group are consecutive in the vector.
271
359
  std::stable_sort(Chunks.begin(), Chunks.end(),
272
359
                   [](SectionChunk *A, SectionChunk *B) {
273
120
                     return A->Class[0] < B->Class[0];
274
120
                   });
275
359
276
359
  // Compare static contents and assign unique IDs for each static content.
277
359
  forEachClass([&](size_t Begin, size_t End) 
{ segregate(Begin, End, true); }112
);
278
359
279
359
  // Split groups by comparing relocations until convergence is obtained.
280
359
  do {
281
359
    Repeat = false;
282
359
    forEachClass(
283
359
        [&](size_t Begin, size_t End) 
{ segregate(Begin, End, false); }114
);
284
359
  } while (Repeat);
285
359
286
359
  log("ICF needed " + Twine(Cnt) + " iterations");
287
359
288
359
  // Merge sections in the same classs.
289
359
  forEachClass([&](size_t Begin, size_t End) {
290
114
    if (End - Begin == 1)
291
93
      return;
292
21
293
21
    log("Selected " + Chunks[Begin]->getDebugName());
294
47
    for (size_t I = Begin + 1; I < End; 
++I26
) {
295
26
      log("  Removed " + Chunks[I]->getDebugName());
296
26
      Chunks[Begin]->replace(Chunks[I]);
297
26
    }
298
21
  });
299
359
}
300
301
// Entry point to ICF.
302
359
void doICF(ArrayRef<Chunk *> Chunks) { ICF().run(Chunks); }
303
304
} // namespace coff
305
} // namespace lld