Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/tools/lld/ELF/Threads.h
Line
Count
Source
1
//===- Threads.h ------------------------------------------------*- C++ -*-===//
2
//
3
//                             The LLVM Linker
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
// LLD supports threads to distribute workloads to multiple cores. Using
11
// multicore is most effective when more than one core are idle. At the
12
// last step of a build, it is often the case that a linker is the only
13
// active process on a computer. So, we are naturally interested in using
14
// threads wisely to reduce latency to deliver results to users.
15
//
16
// That said, we don't want to do "too clever" things using threads.
17
// Complex multi-threaded algorithms are sometimes extremely hard to
18
// reason about and can easily mess up the entire design.
19
//
20
// Fortunately, when a linker links large programs (when the link time is
21
// most critical), it spends most of the time to work on massive number of
22
// small pieces of data of the same kind, and there are opportunities for
23
// large parallelism there. Here are examples:
24
//
25
//  - We have hundreds of thousands of input sections that need to be
26
//    copied to a result file at the last step of link. Once we fix a file
27
//    layout, each section can be copied to its destination and its
28
//    relocations can be applied independently.
29
//
30
//  - We have tens of millions of small strings when constructing a
31
//    mergeable string section.
32
//
33
// For the cases such as the former, we can just use parallelForEach
34
// instead of std::for_each (or a plain for loop). Because tasks are
35
// completely independent from each other, we can run them in parallel
36
// without any coordination between them. That's very easy to understand
37
// and reason about.
38
//
39
// For the cases such as the latter, we can use parallel algorithms to
40
// deal with massive data. We have to write code for a tailored algorithm
41
// for each problem, but the complexity of multi-threading is isolated in
42
// a single pass and doesn't affect the linker's overall design.
43
//
44
// The above approach seems to be working fairly well. As an example, when
45
// linking Chromium (output size 1.6 GB), using 4 cores reduces latency to
46
// 75% compared to single core (from 12.66 seconds to 9.55 seconds) on my
47
// Ivy Bridge Xeon 2.8 GHz machine. Using 40 cores reduces it to 63% (from
48
// 12.66 seconds to 7.95 seconds). Because of the Amdahl's law, the
49
// speedup is not linear, but as you add more cores, it gets faster.
50
//
51
// On a final note, if you are trying to optimize, keep the axiom "don't
52
// guess, measure!" in mind. Some important passes of the linker are not
53
// that slow. For example, resolving all symbols is not a very heavy pass,
54
// although it would be very hard to parallelize it. You want to first
55
// identify a slow pass and then optimize it.
56
//
57
//===----------------------------------------------------------------------===//
58
59
#ifndef LLD_ELF_THREADS_H
60
#define LLD_ELF_THREADS_H
61
62
#include "Config.h"
63
64
#include "llvm/Support/Parallel.h"
65
#include <functional>
66
67
namespace lld {
68
namespace elf {
69
70
3.12k
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
71
3.12k
  if (Config->Threads)
72
3.11k
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
73
3.12k
  else
74
8
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
75
3.12k
}
SyntheticSections.cpp:void lld::elf::parallelForEach<std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&, lld::elf::decompressAndMergeSections()::$_13>(std::__1::vector<lld::elf::InputSectionBase*, std::__1::allocator<lld::elf::InputSectionBase*> >&&&, lld::elf::decompressAndMergeSections()::$_13)
Line
Count
Source
70
1.59k
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
71
1.59k
  if (Config->Threads)
72
1.59k
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
73
1.59k
  else
74
4
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
75
1.59k
}
Writer.cpp:void lld::elf::parallelForEach<std::__1::vector<lld::elf::OutputSection*, std::__1::allocator<lld::elf::OutputSection*> >&, (anonymous namespace)::Writer<llvm::object::ELFType<(llvm::support::endianness)1, false> >::run()::'lambda'(lld::elf::OutputSection*)>(std::__1::vector<lld::elf::OutputSection*, std::__1::allocator<lld::elf::OutputSection*> >&&&, (anonymous namespace)::Writer<llvm::object::ELFType<(llvm::support::endianness)1, false> >::run()::'lambda'(lld::elf::OutputSection*))
Line
Count
Source
70
189
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
71
189
  if (Config->Threads)
72
189
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
73
189
  else
74
0
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
75
189
}
Writer.cpp:void lld::elf::parallelForEach<std::__1::vector<lld::elf::OutputSection*, std::__1::allocator<lld::elf::OutputSection*> >&, (anonymous namespace)::Writer<llvm::object::ELFType<(llvm::support::endianness)0, false> >::run()::'lambda'(lld::elf::OutputSection*)>(std::__1::vector<lld::elf::OutputSection*, std::__1::allocator<lld::elf::OutputSection*> >&&&, (anonymous namespace)::Writer<llvm::object::ELFType<(llvm::support::endianness)0, false> >::run()::'lambda'(lld::elf::OutputSection*))
Line
Count
Source
70
93
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
71
93
  if (Config->Threads)
72
93
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
73
93
  else
74
0
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
75
93
}
Writer.cpp:void lld::elf::parallelForEach<std::__1::vector<lld::elf::OutputSection*, std::__1::allocator<lld::elf::OutputSection*> >&, (anonymous namespace)::Writer<llvm::object::ELFType<(llvm::support::endianness)1, true> >::run()::'lambda'(lld::elf::OutputSection*)>(std::__1::vector<lld::elf::OutputSection*, std::__1::allocator<lld::elf::OutputSection*> >&&&, (anonymous namespace)::Writer<llvm::object::ELFType<(llvm::support::endianness)1, true> >::run()::'lambda'(lld::elf::OutputSection*))
Line
Count
Source
70
1.20k
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
71
1.20k
  if (Config->Threads)
72
1.19k
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
73
1.20k
  else
74
4
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
75
1.20k
}
Writer.cpp:void lld::elf::parallelForEach<std::__1::vector<lld::elf::OutputSection*, std::__1::allocator<lld::elf::OutputSection*> >&, (anonymous namespace)::Writer<llvm::object::ELFType<(llvm::support::endianness)0, true> >::run()::'lambda'(lld::elf::OutputSection*)>(std::__1::vector<lld::elf::OutputSection*, std::__1::allocator<lld::elf::OutputSection*> >&&&, (anonymous namespace)::Writer<llvm::object::ELFType<(llvm::support::endianness)0, true> >::run()::'lambda'(lld::elf::OutputSection*))
Line
Count
Source
70
44
template <typename R, class FuncTy> void parallelForEach(R &&Range, FuncTy Fn) {
71
44
  if (Config->Threads)
72
44
    for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn);
73
44
  else
74
0
    for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn);
75
44
}
76
77
inline void parallelForEachN(size_t Begin, size_t End,
78
208k
                             std::function<void(size_t)> Fn) {
79
208k
  if (Config->Threads)
80
208k
    for_each_n(llvm::parallel::par, Begin, End, Fn);
81
208k
  else
82
32
    for_each_n(llvm::parallel::seq, Begin, End, Fn);
83
208k
}
84
} // namespace elf
85
} // namespace lld
86
87
#endif