Coverage Report

Created: 2018-06-25 02:00

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/lld/include/lld/Common/Threads.h
Line
Count
Source
1
//===- Threads.h ------------------------------------------------*- C++ -*-===//
2
//
3
//                             The LLVM Linker
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// LLD supports threads to distribute workloads to multiple cores. Using
11
// multicore is most effective when more than one core are idle. At the
12
// last step of a build, it is often the case that a linker is the only
13
// active process on a computer. So, we are naturally interested in using
14
// threads wisely to reduce latency to deliver results to users.
15
//
16
// That said, we don't want to do "too clever" things using threads.
17
// Complex multi-threaded algorithms are sometimes extremely hard to
18
// reason about and can easily mess up the entire design.
19
//
20
// Fortunately, when a linker links large programs (when the link time is
21
// most critical), it spends most of the time to work on massive number of
22
// small pieces of data of the same kind, and there are opportunities for
23
// large parallelism there. Here are examples:
24
//
25
//  - We have hundreds of thousands of input sections that need to be
26
//    copied to a result file at the last step of link. Once we fix a file
27
//    layout, each section can be copied to its destination and its
28
//    relocations can be applied independently.
29
//
30
//  - We have tens of millions of small strings when constructing a
31
//    mergeable string section.
32
//
33
// For the cases such as the former, we can just use parallelForEach
34
// instead of std::for_each (or a plain for loop). Because tasks are
35
// completely independent from each other, we can run them in parallel
36
// without any coordination between them. That's very easy to understand
37
// and reason about.
38
//
39
// For the cases such as the latter, we can use parallel algorithms to
40
// deal with massive data. We have to write code for a tailored algorithm
41
// for each problem, but the complexity of multi-threading is isolated in
42
// a single pass and doesn't affect the linker's overall design.
43
//
44
// The above approach seems to be working fairly well. As an example, when
45
// linking Chromium (output size 1.6 GB), using 4 cores reduces latency to
46
// 75% compared to single core (from 12.66 seconds to 9.55 seconds) on my
47
// Ivy Bridge Xeon 2.8 GHz machine. Using 40 cores reduces it to 63% (from
48
// 12.66 seconds to 7.95 seconds). Because of the Amdahl's law, the
49
// speedup is not linear, but as you add more cores, it gets faster.
50
//
51
// On a final note, if you are trying to optimize, keep the axiom "don't
52
// guess, measure!" in mind. Some important passes of the linker are not
53
// that slow. For example, resolving all symbols is not a very heavy pass,
54
// although it would be very hard to parallelize it. You want to first
55
// identify a slow pass and then optimize it.
56
//
57
//===----------------------------------------------------------------------===//
58
59
#ifndef LLD_COMMON_THREADS_H
60
#define LLD_COMMON_THREADS_H
61
62
#include "llvm/Support/Parallel.h"
63
#include <functional>
64
65
namespace lld {
66
67
extern bool ThreadsEnabled;
68
69
6.43k
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
70
6.43k
  if (ThreadsEnabled)
71
6.42k
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
72
15
  else
73
15
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
74
6.43k
}
ICF.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)1, false> >::run()::'lambda'(lld::elf::InputSection*)>(std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&&&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)1, false> >::run()::'lambda'(lld::elf::InputSection*))
Line
Count
Source
69
3
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
70
3
  if (ThreadsEnabled)
71
3
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
72
0
  else
73
0
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
74
3
}
Unexecuted instantiation: ICF.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)0, false> >::run()::'lambda'(lld::elf::InputSection*)>(std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&&&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)0, false> >::run()::'lambda'(lld::elf::InputSection*))
ICF.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)1, true> >::run()::'lambda'(lld::elf::InputSection*)>(std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&&&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)1, true> >::run()::'lambda'(lld::elf::InputSection*))
Line
Count
Source
69
39
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
70
39
  if (ThreadsEnabled)
71
39
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
72
0
  else
73
0
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
74
39
}
Unexecuted instantiation: ICF.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)0, true> >::run()::'lambda'(lld::elf::InputSection*)>(std::__1::vector<lld::elf::InputSection*, std::__1::allocator<lld::elf::InputSection*> >&&&, (anonymous namespace)::ICF<llvm::object::ELFType<(llvm::support::endianness)0, true> >::run()::'lambda'(lld::elf::InputSection*))
SyntheticSections.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::MergeInputSection*, std::__1::allocator<lld::elf::MergeInputSection*> >&, lld::elf::MergeNoTailSection::finalizeContents()::$_17>(std::__1::vector<lld::elf::MergeInputSection*, std::__1::allocator<lld::elf::MergeInputSection*> >&&&, lld::elf::MergeNoTailSection::finalizeContents()::$_17)
Line
Count
Source
69
2.12k
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
70
2.12k
  if (ThreadsEnabled)
71
2.12k
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
72
5
  else
73
5
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
74
2.12k
}
SyntheticSections.cpp:void lld::parallelForEach<std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&, lld::elf::decompressSections()::$_18>(std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&&&, lld::elf::decompressSections()::$_18)
Line
Count
Source
69
2.13k
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
70
2.13k
  if (ThreadsEnabled)
71
2.12k
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
72
5
  else
73
5
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
74
2.13k
}
void lld::parallelForEach<std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&, void lld::elf::splitSections<llvm::object::ELFType<(llvm::support::endianness)1, false> >()::'lambda'(lld::elf::InputSectionBase*)>(std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&&&, void lld::elf::splitSections<llvm::object::ELFType<(llvm::support::endianness)1, false> >()::'lambda'(lld::elf::InputSectionBase*))
Line
Count
Source
69
256
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
70
256
  if (ThreadsEnabled)
71
256
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
72
0
  else
73
0
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
74
256
}
void lld::parallelForEach<std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&, void lld::elf::splitSections<llvm::object::ELFType<(llvm::support::endianness)0, false> >()::'lambda'(lld::elf::InputSectionBase*)>(std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&&&, void lld::elf::splitSections<llvm::object::ELFType<(llvm::support::endianness)0, false> >()::'lambda'(lld::elf::InputSectionBase*))
Line
Count
Source
69
119
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
70
119
  if (ThreadsEnabled)
71
118
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
72
1
  else
73
1
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
74
119
}
void lld::parallelForEach<std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&, void lld::elf::splitSections<llvm::object::ELFType<(llvm::support::endianness)1, true> >()::'lambda'(lld::elf::InputSectionBase*)>(std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&&&, void lld::elf::splitSections<llvm::object::ELFType<(llvm::support::endianness)1, true> >()::'lambda'(lld::elf::InputSectionBase*))
Line
Count
Source
69
1.68k
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
70
1.68k
  if (ThreadsEnabled)
71
1.68k
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
72
4
  else
73
4
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
74
1.68k
}
void lld::parallelForEach<std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&, void lld::elf::splitSections<llvm::object::ELFType<(llvm::support::endianness)0, true> >()::'lambda'(lld::elf::InputSectionBase*)>(std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&&&, void lld::elf::splitSections<llvm::object::ELFType<(llvm::support::endianness)0, true> >()::'lambda'(lld::elf::InputSectionBase*))
Line
Count
Source
69
74
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
70
74
  if (ThreadsEnabled)
71
74
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
72
0
  else
73
0
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
74
74
}
Unexecuted instantiation: OutputSections.cpp:void lld::parallelForEach<llvm::ArrayRef<lld::wasm::InputFunction*>&, lld::wasm::CodeSection::writeTo(unsigned char*)::$_0>(llvm::ArrayRef<lld::wasm::InputFunction*>&&&, lld::wasm::CodeSection::writeTo(unsigned char*)::$_0)
Unexecuted instantiation: OutputSections.cpp:void lld::parallelForEach<llvm::ArrayRef<lld::wasm::OutputSegment*>&, lld::wasm::DataSection::writeTo(unsigned char*)::$_1>(llvm::ArrayRef<lld::wasm::OutputSegment*>&&&, lld::wasm::DataSection::writeTo(unsigned char*)::$_1)
Unexecuted instantiation: OutputSections.cpp:void lld::parallelForEach<llvm::ArrayRef<lld::wasm::InputSection*>&, lld::wasm::CustomSection::writeTo(unsigned char*)::$_2>(llvm::ArrayRef<lld::wasm::InputSection*>&&&, lld::wasm::CustomSection::writeTo(unsigned char*)::$_2)
Unexecuted instantiation: Writer.cpp:void lld::parallelForEach<std::__1::vector<lld::wasm::OutputSection*, std::__1::allocator<lld::wasm::OutputSection*> >&, (anonymous namespace)::Writer::writeSections()::$_0>(std::__1::vector<lld::wasm::OutputSection*, std::__1::allocator<lld::wasm::OutputSection*> >&&&, (anonymous namespace)::Writer::writeSections()::$_0)
75
76
inline void parallelForEachN(size_t Begin, size_t End,
77
215k
                             std::function<void(size_t)> Fn) {
78
215k
  if (ThreadsEnabled)
79
215k
    for_each_n(llvm::parallel::par, Begin, End, Fn);
80
54
  else
81
54
    for_each_n(llvm::parallel::seq, Begin, End, Fn);
82
215k
}
83
84
} // namespace lld
85
86
#endif