Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/lld/include/lld/Common/Threads.h
Line
Count
Source
1
//===- Threads.h ------------------------------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// LLD supports threads to distribute workloads to multiple cores. Using
10
// multicore is most effective when more than one core are idle. At the
11
// last step of a build, it is often the case that a linker is the only
12
// active process on a computer. So, we are naturally interested in using
13
// threads wisely to reduce latency to deliver results to users.
14
//
15
// That said, we don't want to do "too clever" things using threads.
16
// Complex multi-threaded algorithms are sometimes extremely hard to
17
// reason about and can easily mess up the entire design.
18
//
19
// Fortunately, when a linker links large programs (when the link time is
20
// most critical), it spends most of the time to work on massive number of
21
// small pieces of data of the same kind, and there are opportunities for
22
// large parallelism there. Here are examples:
23
//
24
//  - We have hundreds of thousands of input sections that need to be
25
//    copied to a result file at the last step of link. Once we fix a file
26
//    layout, each section can be copied to its destination and its
27
//    relocations can be applied independently.
28
//
29
//  - We have tens of millions of small strings when constructing a
30
//    mergeable string section.
31
//
32
// For the cases such as the former, we can just use parallelForEach
33
// instead of std::for_each (or a plain for loop). Because tasks are
34
// completely independent from each other, we can run them in parallel
35
// without any coordination between them. That's very easy to understand
36
// and reason about.
37
//
38
// For the cases such as the latter, we can use parallel algorithms to
39
// deal with massive data. We have to write code for a tailored algorithm
40
// for each problem, but the complexity of multi-threading is isolated in
41
// a single pass and doesn't affect the linker's overall design.
42
//
43
// The above approach seems to be working fairly well. As an example, when
44
// linking Chromium (output size 1.6 GB), using 4 cores reduces latency to
45
// 75% compared to single core (from 12.66 seconds to 9.55 seconds) on my
46
// Ivy Bridge Xeon 2.8 GHz machine. Using 40 cores reduces it to 63% (from
47
// 12.66 seconds to 7.95 seconds). Because of the Amdahl's law, the
48
// speedup is not linear, but as you add more cores, it gets faster.
49
//
50
// On a final note, if you are trying to optimize, keep the axiom "don't
51
// guess, measure!" in mind. Some important passes of the linker are not
52
// that slow. For example, resolving all symbols is not a very heavy pass,
53
// although it would be very hard to parallelize it. You want to first
54
// identify a slow pass and then optimize it.
55
//
56
//===----------------------------------------------------------------------===//
57
58
#ifndef LLD_COMMON_THREADS_H
59
#define LLD_COMMON_THREADS_H
60
61
#include "llvm/Support/Parallel.h"
62
#include <functional>
63
64
namespace lld {
65
66
extern bool threadsEnabled;
67
68
8.40k
template <typename R, class FuncTy> void parallelForEach(R &&range, FuncTy fn) {
69
8.40k
  if (threadsEnabled)
70
8.39k
    for_each(llvm::parallel::par, std::begin(range), std::end(range), fn);
71
16
  else
72
16
    for_each(llvm::parallel::seq, std::begin(range), std::end(range), fn);
73
8.40k
}
ICF.cpp:void lld::parallelForEach<std::__1::vector<lld::coff::SectionChunk*, std::__1::allocator<lld::coff::SectionChunk*> >&, lld::coff::ICF::run(llvm::ArrayRef<lld::coff::Chunk*>)::$_6>(std::__1::vector<lld::coff::SectionChunk*, std::__1::allocator<lld::coff::SectionChunk*> >&&&, lld::coff::ICF::run(llvm::ArrayRef<lld::coff::Chunk*>)::$_6)
Line
Count
Source
68
436
template <typename R, class FuncTy> void parallelForEach(R &&range, FuncTy fn) {
69
436
  if (threadsEnabled)
70
436
    for_each(llvm::parallel::par, std::begin(range), std::end(range), fn);
71
0
  else
72
0
    for_each(llvm::parallel::seq, std::begin(range), std::end(range), fn);
73
436
}
ICF.cpp:void lld::parallelForEach<std::__1::vector<lld::coff::SectionChunk*, std::__1::allocator<lld::coff::SectionChunk*> >&, lld::coff::ICF::run(llvm::ArrayRef<lld::coff::Chunk*>)::$_7>(std::__1::vector<lld::coff::SectionChunk*, std::__1::allocator<lld::coff::SectionChunk*> >&&&, lld::coff::ICF::run(llvm::ArrayRef<lld::coff::Chunk*>)::$_7)
Line
Count
Source
68
872
template <typename R, class FuncTy> void parallelForEach(R &&range, FuncTy fn) {
69
872
  if (threadsEnabled)
70
872
    for_each(llvm::parallel::par, std::begin(range), std::end(range), fn);
71
0
  else
72
0
    for_each(llvm::parallel::seq, std::begin(range), std::end(range), fn);
73
872
}
Writer.cpp:void lld::parallelForEach<std::__1::vector<lld::coff::Chunk*, std::__1::allocator<lld::coff::Chunk*> >&, (anonymous namespace)::Writer::writeSections()::$_6>(std::__1::vector<lld::coff::Chunk*, std::__1::allocator<lld::coff::Chunk*> >&&&, (anonymous namespace)::Writer::writeSections()::$_6)
Line
Count
Source
68
1.21k
template <typename R, class FuncTy> void parallelForEach(R &&range, FuncTy fn) {
69
1.21k
  if (threadsEnabled)
70
1.21k
    for_each(llvm::parallel::par, std::begin(range), std::end(range), fn);
71
4
  else
72
4
    for_each(llvm::parallel::seq, std::begin(range), std::end(range), fn);
73
1.21k
}
Driver.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::InputFile*, std::__1::allocator<lld::elf::InputFile*> >&, wrapSymbols(llvm::ArrayRef<WrappedSymbol>)::$_11>(std::__1::vector<lld::elf::InputFile*, std::__1::allocator<lld::elf::InputFile*> >&&&, wrapSymbols(llvm::ArrayRef<WrappedSymbol>)::$_11)
Line
Count
Source
68
13
template <typename R, class FuncTy> void parallelForEach(R &&range, FuncTy fn) {
69
13
  if (threadsEnabled)
70
13
    for_each(llvm::parallel::par, std::begin(range), std::end(range), fn);
71
0
  else
72
0
    for_each(llvm::parallel::seq, std::begin(range), std::end(range), fn);
73
13
}
ICF.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)1, false> >::run()::'lambda'(lld::elf::InputSection*)>(std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&&&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)1, false> >::run()::'lambda'(lld::elf::InputSection*))
Line
Count
Source
68
3
template <typename R, class FuncTy> void parallelForEach(R &&range, FuncTy fn) {
69
3
  if (threadsEnabled)
70
3
    for_each(llvm::parallel::par, std::begin(range), std::end(range), fn);
71
0
  else
72
0
    for_each(llvm::parallel::seq, std::begin(range), std::end(range), fn);
73
3
}
ICF.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)1, false> >::run()::'lambda0'(lld::elf::InputSection*)>(std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&&&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)1, false> >::run()::'lambda0'(lld::elf::InputSection*))
Line
Count
Source
68
6
template <typename R, class FuncTy> void parallelForEach(R &&range, FuncTy fn) {
69
6
  if (threadsEnabled)
70
6
    for_each(llvm::parallel::par, std::begin(range), std::end(range), fn);
71
0
  else
72
0
    for_each(llvm::parallel::seq, std::begin(range), std::end(range), fn);
73
6
}
Unexecuted instantiation: ICF.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)0, false> >::run()::'lambda'(lld::elf::InputSection*)>(std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&&&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)0, false> >::run()::'lambda'(lld::elf::InputSection*))
Unexecuted instantiation: ICF.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)0, false> >::run()::'lambda0'(lld::elf::InputSection*)>(std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&&&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)0, false> >::run()::'lambda0'(lld::elf::InputSection*))
ICF.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)1, true> >::run()::'lambda'(lld::elf::InputSection*)>(std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&&&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)1, true> >::run()::'lambda'(lld::elf::InputSection*))
Line
Count
Source
68
58
template <typename R, class FuncTy> void parallelForEach(R &&range, FuncTy fn) {
69
58
  if (threadsEnabled)
70
58
    for_each(llvm::parallel::par, std::begin(range), std::end(range), fn);
71
0
  else
72
0
    for_each(llvm::parallel::seq, std::begin(range), std::end(range), fn);
73
58
}
ICF.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)1, true> >::run()::'lambda0'(lld::elf::InputSection*)>(std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&&&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)1, true> >::run()::'lambda0'(lld::elf::InputSection*))
Line
Count
Source
68
116
template <typename R, class FuncTy> void parallelForEach(R &&range, FuncTy fn) {
69
116
  if (threadsEnabled)
70
116
    for_each(llvm::parallel::par, std::begin(range), std::end(range), fn);
71
0
  else
72
0
    for_each(llvm::parallel::seq, std::begin(range), std::end(range), fn);
73
116
}
Unexecuted instantiation: ICF.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)0, true> >::run()::'lambda'(lld::elf::InputSection*)>(std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&&&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)0, true> >::run()::'lambda'(lld::elf::InputSection*))
Unexecuted instantiation: ICF.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)0, true> >::run()::'lambda0'(lld::elf::InputSection*)>(std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&&&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)0, true> >::run()::'lambda0'(lld::elf::InputSection*))
SyntheticSections.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::GdbIndexSection::GdbSymbol, std::__1::allocator<lld::elf::GdbIndexSection::GdbSymbol> >&, lld::elf::GdbIndexSection::writeTo(unsigned char*)::$_14>(std::__1::vector<lld::elf::GdbIndexSection::GdbSymbol, std::__1::allocator<lld::elf::GdbIndexSection::GdbSymbol> >&&&, lld::elf::GdbIndexSection::writeTo(unsigned char*)::$_14)
Line
Count
Source
68
15
template <typename R, class FuncTy> void parallelForEach(R &&range, FuncTy fn) {
69
15
  if (threadsEnabled)
70
15
    for_each(llvm::parallel::par, std::begin(range), std::end(range), fn);
71
0
  else
72
0
    for_each(llvm::parallel::seq, std::begin(range), std::end(range), fn);
73
15
}
SyntheticSections.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::MergeInputSection*, std::__1::allocator<lld::elf::MergeInputSection*> >&, lld::elf::MergeNoTailSection::finalizeContents()::$_16>(std::__1::vector<lld::elf::MergeInputSection*, std::__1::allocator<lld::elf::MergeInputSection*> >&&&, lld::elf::MergeNoTailSection::finalizeContents()::$_16)
Line
Count
Source
68
2.74k
template <typename R, class FuncTy> void parallelForEach(R &&range, FuncTy fn) {
69
2.74k
  if (threadsEnabled)
70
2.73k
    for_each(llvm::parallel::par, std::begin(range), std::end(range), fn);
71
6
  else
72
6
    for_each(llvm::parallel::seq, std::begin(range), std::end(range), fn);
73
2.74k
}
void lld::parallelForEach<std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&, void lld::elf::splitSections<llvm::object::ELFType<(llvm::support::endianness)1, false> >()::'lambda'(lld::elf::InputSectionBase*)>(std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&&&, void lld::elf::splitSections<llvm::object::ELFType<(llvm::support::endianness)1, false> >()::'lambda'(lld::elf::InputSectionBase*))
Line
Count
Source
68
376
template <typename R, class FuncTy> void parallelForEach(R &&range, FuncTy fn) {
69
376
  if (threadsEnabled)
70
376
    for_each(llvm::parallel::par, std::begin(range), std::end(range), fn);
71
0
  else
72
0
    for_each(llvm::parallel::seq, std::begin(range), std::end(range), fn);
73
376
}
void lld::parallelForEach<std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&, void lld::elf::splitSections<llvm::object::ELFType<(llvm::support::endianness)0, false> >()::'lambda'(lld::elf::InputSectionBase*)>(std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&&&, void lld::elf::splitSections<llvm::object::ELFType<(llvm::support::endianness)0, false> >()::'lambda'(lld::elf::InputSectionBase*))
Line
Count
Source
68
157
template <typename R, class FuncTy> void parallelForEach(R &&range, FuncTy fn) {
69
157
  if (threadsEnabled)
70
156
    for_each(llvm::parallel::par, std::begin(range), std::end(range), fn);
71
1
  else
72
1
    for_each(llvm::parallel::seq, std::begin(range), std::end(range), fn);
73
157
}
void lld::parallelForEach<std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&, void lld::elf::splitSections<llvm::object::ELFType<(llvm::support::endianness)1, true> >()::'lambda'(lld::elf::InputSectionBase*)>(std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&&&, void lld::elf::splitSections<llvm::object::ELFType<(llvm::support::endianness)1, true> >()::'lambda'(lld::elf::InputSectionBase*))
Line
Count
Source
68
2.11k
template <typename R, class FuncTy> void parallelForEach(R &&range, FuncTy fn) {
69
2.11k
  if (threadsEnabled)
70
2.10k
    for_each(llvm::parallel::par, std::begin(range), std::end(range), fn);
71
5
  else
72
5
    for_each(llvm::parallel::seq, std::begin(range), std::end(range), fn);
73
2.11k
}
void lld::parallelForEach<std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&, void lld::elf::splitSections<llvm::object::ELFType<(llvm::support::endianness)0, true> >()::'lambda'(lld::elf::InputSectionBase*)>(std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&&&, void lld::elf::splitSections<llvm::object::ELFType<(llvm::support::endianness)0, true> >()::'lambda'(lld::elf::InputSectionBase*))
Line
Count
Source
68
108
template <typename R, class FuncTy> void parallelForEach(R &&range, FuncTy fn) {
69
108
  if (threadsEnabled)
70
108
    for_each(llvm::parallel::par, std::begin(range), std::end(range), fn);
71
0
  else
72
0
    for_each(llvm::parallel::seq, std::begin(range), std::end(range), fn);
73
108
}
Driver.cpp:void lld::parallelForEach<std::__1::vector<lld::wasm::ObjFile*, std::__1::allocator<lld::wasm::ObjFile*> >&, wrapSymbols(llvm::ArrayRef<WrappedSymbol>)::$_6>(std::__1::vector<lld::wasm::ObjFile*, std::__1::allocator<lld::wasm::ObjFile*> >&&&, wrapSymbols(llvm::ArrayRef<WrappedSymbol>)::$_6)
Line
Count
Source
68
2
template <typename R, class FuncTy> void parallelForEach(R &&range, FuncTy fn) {
69
2
  if (threadsEnabled)
70
2
    for_each(llvm::parallel::par, std::begin(range), std::end(range), fn);
71
0
  else
72
0
    for_each(llvm::parallel::seq, std::begin(range), std::end(range), fn);
73
2
}
Writer.cpp:void lld::parallelForEach<std::__1::vector<lld::wasm::OutputSection*, std::__1::allocator<lld::wasm::OutputSection*> >&, (anonymous namespace)::Writer::writeSections()::$_0>(std::__1::vector<lld::wasm::OutputSection*, std::__1::allocator<lld::wasm::OutputSection*> >&&&, (anonymous namespace)::Writer::writeSections()::$_0)
Line
Count
Source
68
172
template <typename R, class FuncTy> void parallelForEach(R &&range, FuncTy fn) {
69
172
  if (threadsEnabled)
70
172
    for_each(llvm::parallel::par, std::begin(range), std::end(range), fn);
71
0
  else
72
0
    for_each(llvm::parallel::seq, std::begin(range), std::end(range), fn);
73
172
}
74
75
inline void parallelForEachN(size_t begin, size_t end,
76
354k
                             llvm::function_ref<void(size_t)> fn) {
77
354k
  if (threadsEnabled)
78
354k
    for_each_n(llvm::parallel::par, begin, end, fn);
79
62
  else
80
62
    for_each_n(llvm::parallel::seq, begin, end, fn);
81
354k
}
82
83
167
template <typename R, class FuncTy> void parallelSort(R &&range, FuncTy fn) {
84
167
  if (threadsEnabled)
85
166
    sort(llvm::parallel::par, std::begin(range), std::end(range), fn);
86
1
  else
87
1
    sort(llvm::parallel::seq, std::begin(range), std::end(range), fn);
88
167
}
PDB.cpp:void lld::parallelSort<std::__1::vector<llvm::codeview::PublicSym32, std::__1::allocator<llvm::codeview::PublicSym32> >&, (anonymous namespace)::PDBLinker::addObjectsToPDB()::$_4>(std::__1::vector<llvm::codeview::PublicSym32, std::__1::allocator<llvm::codeview::PublicSym32> >&&&, (anonymous namespace)::PDBLinker::addObjectsToPDB()::$_4)
Line
Count
Source
83
107
template <typename R, class FuncTy> void parallelSort(R &&range, FuncTy fn) {
84
107
  if (threadsEnabled)
85
106
    sort(llvm::parallel::par, std::begin(range), std::end(range), fn);
86
1
  else
87
1
    sort(llvm::parallel::seq, std::begin(range), std::end(range), fn);
88
107
}
Writer.cpp:void lld::parallelSort<llvm::MutableArrayRef<(anonymous namespace)::Writer::sortExceptionTable()::Entry>, (anonymous namespace)::Writer::sortExceptionTable()::$_7>(llvm::MutableArrayRef<(anonymous namespace)::Writer::sortExceptionTable()::Entry>&&, (anonymous namespace)::Writer::sortExceptionTable()::$_7)
Line
Count
Source
83
59
template <typename R, class FuncTy> void parallelSort(R &&range, FuncTy fn) {
84
59
  if (threadsEnabled)
85
59
    sort(llvm::parallel::par, std::begin(range), std::end(range), fn);
86
0
  else
87
0
    sort(llvm::parallel::seq, std::begin(range), std::end(range), fn);
88
59
}
Writer.cpp:void lld::parallelSort<llvm::MutableArrayRef<(anonymous namespace)::Writer::sortExceptionTable()::Entry>, (anonymous namespace)::Writer::sortExceptionTable()::$_8>(llvm::MutableArrayRef<(anonymous namespace)::Writer::sortExceptionTable()::Entry>&&, (anonymous namespace)::Writer::sortExceptionTable()::$_8)
Line
Count
Source
83
1
template <typename R, class FuncTy> void parallelSort(R &&range, FuncTy fn) {
84
1
  if (threadsEnabled)
85
1
    sort(llvm::parallel::par, std::begin(range), std::end(range), fn);
86
0
  else
87
0
    sort(llvm::parallel::seq, std::begin(range), std::end(range), fn);
88
1
}
89
90
} // namespace lld
91
92
#endif