Coverage Report

Created: 2018-10-20 12:32

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/lld/include/lld/Common/Threads.h
Line
Count
Source
1
//===- Threads.h ------------------------------------------------*- C++ -*-===//
2
//
3
//                             The LLVM Linker
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// LLD supports threads to distribute workloads to multiple cores. Using
11
// multicore is most effective when more than one core are idle. At the
12
// last step of a build, it is often the case that a linker is the only
13
// active process on a computer. So, we are naturally interested in using
14
// threads wisely to reduce latency to deliver results to users.
15
//
16
// That said, we don't want to do "too clever" things using threads.
17
// Complex multi-threaded algorithms are sometimes extremely hard to
18
// reason about and can easily mess up the entire design.
19
//
20
// Fortunately, when a linker links large programs (when the link time is
21
// most critical), it spends most of the time to work on massive number of
22
// small pieces of data of the same kind, and there are opportunities for
23
// large parallelism there. Here are examples:
24
//
25
//  - We have hundreds of thousands of input sections that need to be
26
//    copied to a result file at the last step of link. Once we fix a file
27
//    layout, each section can be copied to its destination and its
28
//    relocations can be applied independently.
29
//
30
//  - We have tens of millions of small strings when constructing a
31
//    mergeable string section.
32
//
33
// For the cases such as the former, we can just use parallelForEach
34
// instead of std::for_each (or a plain for loop). Because tasks are
35
// completely independent from each other, we can run them in parallel
36
// without any coordination between them. That's very easy to understand
37
// and reason about.
38
//
39
// For the cases such as the latter, we can use parallel algorithms to
40
// deal with massive data. We have to write code for a tailored algorithm
41
// for each problem, but the complexity of multi-threading is isolated in
42
// a single pass and doesn't affect the linker's overall design.
43
//
44
// The above approach seems to be working fairly well. As an example, when
45
// linking Chromium (output size 1.6 GB), using 4 cores reduces latency to
46
// 75% compared to single core (from 12.66 seconds to 9.55 seconds) on my
47
// Ivy Bridge Xeon 2.8 GHz machine. Using 40 cores reduces it to 63% (from
48
// 12.66 seconds to 7.95 seconds). Because of the Amdahl's law, the
49
// speedup is not linear, but as you add more cores, it gets faster.
50
//
51
// On a final note, if you are trying to optimize, keep the axiom "don't
52
// guess, measure!" in mind. Some important passes of the linker are not
53
// that slow. For example, resolving all symbols is not a very heavy pass,
54
// although it would be very hard to parallelize it. You want to first
55
// identify a slow pass and then optimize it.
56
//
57
//===----------------------------------------------------------------------===//
58
59
#ifndef LLD_COMMON_THREADS_H
60
#define LLD_COMMON_THREADS_H
61
62
#include "llvm/Support/Parallel.h"
63
#include <functional>
64
65
namespace lld {
66
67
extern bool ThreadsEnabled;
68
69
4.95k
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
70
4.95k
  if (ThreadsEnabled)
71
4.94k
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
72
10
  else
73
10
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
74
4.95k
}
Unexecuted instantiation: Driver.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::InputFile*, std::__1::allocator<lld::elf::InputFile*> >&, void wrapSymbols<llvm::object::ELFType<(llvm::support::endianness)1, false> >(llvm::ArrayRef<WrappedSymbol>)::'lambda'(lld::elf::InputFile*)>(std::__1::vector<lld::elf::InputFile*, std::__1::allocator<lld::elf::InputFile*> >&&&, void wrapSymbols<llvm::object::ELFType<(llvm::support::endianness)1, false> >(llvm::ArrayRef<WrappedSymbol>)::'lambda'(lld::elf::InputFile*))
Unexecuted instantiation: Driver.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::InputFile*, std::__1::allocator<lld::elf::InputFile*> >&, void wrapSymbols<llvm::object::ELFType<(llvm::support::endianness)0, false> >(llvm::ArrayRef<WrappedSymbol>)::'lambda'(lld::elf::InputFile*)>(std::__1::vector<lld::elf::InputFile*, std::__1::allocator<lld::elf::InputFile*> >&&&, void wrapSymbols<llvm::object::ELFType<(llvm::support::endianness)0, false> >(llvm::ArrayRef<WrappedSymbol>)::'lambda'(lld::elf::InputFile*))
Driver.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::InputFile*, std::__1::allocator<lld::elf::InputFile*> >&, void wrapSymbols<llvm::object::ELFType<(llvm::support::endianness)1, true> >(llvm::ArrayRef<WrappedSymbol>)::'lambda'(lld::elf::InputFile*)>(std::__1::vector<lld::elf::InputFile*, std::__1::allocator<lld::elf::InputFile*> >&&&, void wrapSymbols<llvm::object::ELFType<(llvm::support::endianness)1, true> >(llvm::ArrayRef<WrappedSymbol>)::'lambda'(lld::elf::InputFile*))
Line
Count
Source
69
12
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
70
12
  if (ThreadsEnabled)
71
12
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
72
0
  else
73
0
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
74
12
}
Unexecuted instantiation: Driver.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::InputFile*, std::__1::allocator<lld::elf::InputFile*> >&, void wrapSymbols<llvm::object::ELFType<(llvm::support::endianness)0, true> >(llvm::ArrayRef<WrappedSymbol>)::'lambda'(lld::elf::InputFile*)>(std::__1::vector<lld::elf::InputFile*, std::__1::allocator<lld::elf::InputFile*> >&&&, void wrapSymbols<llvm::object::ELFType<(llvm::support::endianness)0, true> >(llvm::ArrayRef<WrappedSymbol>)::'lambda'(lld::elf::InputFile*))
ICF.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)1, false> >::run()::'lambda'(lld::elf::InputSection*)>(std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&&&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)1, false> >::run()::'lambda'(lld::elf::InputSection*))
Line
Count
Source
69
3
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
70
3
  if (ThreadsEnabled)
71
3
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
72
0
  else
73
0
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
74
3
}
Unexecuted instantiation: ICF.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)0, false> >::run()::'lambda'(lld::elf::InputSection*)>(std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&&&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)0, false> >::run()::'lambda'(lld::elf::InputSection*))
ICF.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)1, true> >::run()::'lambda'(lld::elf::InputSection*)>(std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&&&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)1, true> >::run()::'lambda'(lld::elf::InputSection*))
Line
Count
Source
69
59
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
70
59
  if (ThreadsEnabled)
71
59
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
72
0
  else
73
0
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
74
59
}
Unexecuted instantiation: ICF.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)0, true> >::run()::'lambda'(lld::elf::InputSection*)>(std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&&&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)0, true> >::run()::'lambda'(lld::elf::InputSection*))
SyntheticSections.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::GdbIndexSection::GdbSymbol, std::__1::allocator<lld::elf::GdbIndexSection::GdbSymbol> >&, lld::elf::GdbIndexSection::writeTo(unsigned char*)::$_15>(std::__1::vector<lld::elf::GdbIndexSection::GdbSymbol, std::__1::allocator<lld::elf::GdbIndexSection::GdbSymbol> >&&&, lld::elf::GdbIndexSection::writeTo(unsigned char*)::$_15)
Line
Count
Source
69
9
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
70
9
  if (ThreadsEnabled)
71
9
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
72
0
  else
73
0
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
74
9
}
SyntheticSections.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::MergeInputSection*, std::__1::allocator<lld::elf::MergeInputSection*> >&, lld::elf::MergeNoTailSection::finalizeContents()::$_17>(std::__1::vector<lld::elf::MergeInputSection*, std::__1::allocator<lld::elf::MergeInputSection*> >&&&, lld::elf::MergeNoTailSection::finalizeContents()::$_17)
Line
Count
Source
69
2.31k
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
70
2.31k
  if (ThreadsEnabled)
71
2.31k
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
72
5
  else
73
5
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
74
2.31k
}
void lld::parallelForEach<std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&, void lld::elf::splitSections<llvm::object::ELFType<(llvm::support::endianness)1, false> >()::'lambda'(lld::elf::InputSectionBase*)>(std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&&&, void lld::elf::splitSections<llvm::object::ELFType<(llvm::support::endianness)1, false> >()::'lambda'(lld::elf::InputSectionBase*))
Line
Count
Source
69
277
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
70
277
  if (ThreadsEnabled)
71
277
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
72
0
  else
73
0
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
74
277
}
void lld::parallelForEach<std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&, void lld::elf::splitSections<llvm::object::ELFType<(llvm::support::endianness)0, false> >()::'lambda'(lld::elf::InputSectionBase*)>(std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&&&, void lld::elf::splitSections<llvm::object::ELFType<(llvm::support::endianness)0, false> >()::'lambda'(lld::elf::InputSectionBase*))
Line
Count
Source
69
119
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
70
119
  if (ThreadsEnabled)
71
118
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
72
1
  else
73
1
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
74
119
}
void lld::parallelForEach<std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&, void lld::elf::splitSections<llvm::object::ELFType<(llvm::support::endianness)1, true> >()::'lambda'(lld::elf::InputSectionBase*)>(std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&&&, void lld::elf::splitSections<llvm::object::ELFType<(llvm::support::endianness)1, true> >()::'lambda'(lld::elf::InputSectionBase*))
Line
Count
Source
69
1.81k
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
70
1.81k
  if (ThreadsEnabled)
71
1.81k
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
72
4
  else
73
4
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
74
1.81k
}
void lld::parallelForEach<std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&, void lld::elf::splitSections<llvm::object::ELFType<(llvm::support::endianness)0, true> >()::'lambda'(lld::elf::InputSectionBase*)>(std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&&&, void lld::elf::splitSections<llvm::object::ELFType<(llvm::support::endianness)0, true> >()::'lambda'(lld::elf::InputSectionBase*))
Line
Count
Source
69
105
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
70
105
  if (ThreadsEnabled)
71
105
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
72
0
  else
73
0
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
74
105
}
OutputSections.cpp:void lld::parallelForEach<llvm::ArrayRef<lld::wasm::InputFunction*>&, lld::wasm::CodeSection::writeTo(unsigned char*)::$_0>(llvm::ArrayRef<lld::wasm::InputFunction*>&&&, lld::wasm::CodeSection::writeTo(unsigned char*)::$_0)
Line
Count
Source
69
97
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
70
97
  if (ThreadsEnabled)
71
97
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
72
0
  else
73
0
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
74
97
}
OutputSections.cpp:void lld::parallelForEach<llvm::ArrayRef<lld::wasm::OutputSegment*>&, lld::wasm::DataSection::writeTo(unsigned char*)::$_1>(llvm::ArrayRef<lld::wasm::OutputSegment*>&&&, lld::wasm::DataSection::writeTo(unsigned char*)::$_1)
Line
Count
Source
69
21
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
70
21
  if (ThreadsEnabled)
71
21
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
72
0
  else
73
0
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
74
21
}
OutputSections.cpp:void lld::parallelForEach<llvm::ArrayRef<lld::wasm::InputSection*>&, lld::wasm::CustomSection::writeTo(unsigned char*)::$_2>(llvm::ArrayRef<lld::wasm::InputSection*>&&&, lld::wasm::CustomSection::writeTo(unsigned char*)::$_2)
Line
Count
Source
69
18
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
70
18
  if (ThreadsEnabled)
71
18
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
72
0
  else
73
0
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
74
18
}
Writer.cpp:void lld::parallelForEach<std::__1::vector<lld::wasm::OutputSection*, std::__1::allocator<lld::wasm::OutputSection*> >&, (anonymous namespace)::Writer::writeSections()::$_0>(std::__1::vector<lld::wasm::OutputSection*, std::__1::allocator<lld::wasm::OutputSection*> >&&&, (anonymous namespace)::Writer::writeSections()::$_0)
Line
Count
Source
69
98
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
70
98
  if (ThreadsEnabled)
71
98
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
72
0
  else
73
0
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
74
98
}
75
76
inline void parallelForEachN(size_t Begin, size_t End,
77
350k
                             std::function<void(size_t)> Fn) {
78
350k
  if (ThreadsEnabled)
79
350k
    for_each_n(llvm::parallel::par, Begin, End, Fn);
80
54
  else
81
54
    for_each_n(llvm::parallel::seq, Begin, End, Fn);
82
350k
}
83
84
} // namespace lld
85
86
#endif