/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Serialization/MultiOnDiskHashTable.h
Line | Count | Source (jump to first uncovered line) |
1 | | //===- MultiOnDiskHashTable.h - Merged set of hash tables -------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file provides a hash table data structure suitable for incremental and |
10 | | // distributed storage across a set of files. |
11 | | // |
12 | | // Multiple hash tables from different files are implicitly merged to improve |
13 | | // performance, and on reload the merged table will override those from other |
14 | | // files. |
15 | | // |
16 | | //===----------------------------------------------------------------------===// |
17 | | |
18 | | #ifndef LLVM_CLANG_LIB_SERIALIZATION_MULTIONDISKHASHTABLE_H |
19 | | #define LLVM_CLANG_LIB_SERIALIZATION_MULTIONDISKHASHTABLE_H |
20 | | |
21 | | #include "llvm/ADT/DenseMap.h" |
22 | | #include "llvm/ADT/DenseSet.h" |
23 | | #include "llvm/ADT/PointerUnion.h" |
24 | | #include "llvm/ADT/STLExtras.h" |
25 | | #include "llvm/ADT/SmallVector.h" |
26 | | #include "llvm/ADT/TinyPtrVector.h" |
27 | | #include "llvm/ADT/iterator_range.h" |
28 | | #include "llvm/Support/Endian.h" |
29 | | #include "llvm/Support/EndianStream.h" |
30 | | #include "llvm/Support/OnDiskHashTable.h" |
31 | | #include "llvm/Support/raw_ostream.h" |
32 | | #include <algorithm> |
33 | | #include <cstdint> |
34 | | #include <vector> |
35 | | |
36 | | namespace clang { |
37 | | namespace serialization { |
38 | | |
39 | | /// A collection of on-disk hash tables, merged when relevant for performance. |
40 | | template<typename Info> class MultiOnDiskHashTable { |
41 | | public: |
42 | | /// A handle to a file, used when overriding tables. |
43 | | using file_type = typename Info::file_type; |
44 | | |
45 | | /// A pointer to an on-disk representation of the hash table. |
46 | | using storage_type = const unsigned char *; |
47 | | |
48 | | using external_key_type = typename Info::external_key_type; |
49 | | using internal_key_type = typename Info::internal_key_type; |
50 | | using data_type = typename Info::data_type; |
51 | | using data_type_builder = typename Info::data_type_builder; |
52 | | using hash_value_type = unsigned; |
53 | | |
54 | | private: |
55 | | /// The generator is permitted to read our merged table. |
56 | | template<typename ReaderInfo, typename WriterInfo> |
57 | | friend class MultiOnDiskHashTableGenerator; |
58 | | |
59 | | /// A hash table stored on disk. |
60 | | struct OnDiskTable { |
61 | | using HashTable = llvm::OnDiskIterableChainedHashTable<Info>; |
62 | | |
63 | | file_type File; |
64 | | HashTable Table; |
65 | | |
66 | | OnDiskTable(file_type File, unsigned NumBuckets, unsigned NumEntries, |
67 | | storage_type Buckets, storage_type Payload, storage_type Base, |
68 | | const Info &InfoObj) |
69 | 1.32M | : File(File), |
70 | 1.32M | Table(NumBuckets, NumEntries, Buckets, Payload, Base, InfoObj) {} |
71 | | }; |
72 | | |
73 | | struct MergedTable { |
74 | | std::vector<file_type> Files; |
75 | | llvm::DenseMap<internal_key_type, data_type> Data; |
76 | | }; |
77 | | |
78 | | using Table = llvm::PointerUnion<OnDiskTable *, MergedTable *>; |
79 | | using TableVector = llvm::TinyPtrVector<void *>; |
80 | | |
81 | | /// The current set of on-disk and merged tables. |
82 | | /// We manually store the opaque value of the Table because TinyPtrVector |
83 | | /// can't cope with holding a PointerUnion directly. |
84 | | /// There can be at most one MergedTable in this vector, and if present, |
85 | | /// it is the first table. |
86 | | TableVector Tables; |
87 | | |
88 | | /// Files corresponding to overridden tables that we've not yet |
89 | | /// discarded. |
90 | | llvm::TinyPtrVector<file_type> PendingOverrides; |
91 | | |
92 | | struct AsOnDiskTable { |
93 | | using result_type = OnDiskTable *; |
94 | | |
95 | 1.36M | result_type operator()(void *P) const { |
96 | 1.36M | return Table::getFromOpaqueValue(P).template get<OnDiskTable *>(); |
97 | 1.36M | } |
98 | | }; |
99 | | |
100 | | using table_iterator = |
101 | | llvm::mapped_iterator<TableVector::iterator, AsOnDiskTable>; |
102 | | using table_range = llvm::iterator_range<table_iterator>; |
103 | | |
104 | | /// The current set of on-disk tables. |
105 | 1.30M | table_range tables() { |
106 | 1.30M | auto Begin = Tables.begin(), End = Tables.end(); |
107 | 1.30M | if (getMergedTable()) |
108 | 113k | ++Begin; |
109 | 1.30M | return llvm::make_range(llvm::map_iterator(Begin, AsOnDiskTable()), |
110 | 1.30M | llvm::map_iterator(End, AsOnDiskTable())); |
111 | 1.30M | } |
112 | | |
113 | 2.60M | MergedTable *getMergedTable() const { |
114 | | // If we already have a merged table, it's the first one. |
115 | 2.60M | return Tables.empty() ? nullptr583k : Table::getFromOpaqueValue(*Tables.begin()) |
116 | 2.02M | .template dyn_cast<MergedTable*>(); |
117 | 2.60M | } |
118 | | |
119 | | /// Delete all our current on-disk tables. |
120 | 524k | void clear() { |
121 | 524k | for (auto *T : tables()) |
122 | 324k | delete T; |
123 | 524k | if (auto *M = getMergedTable()) |
124 | 2.07k | delete M; |
125 | 524k | Tables.clear(); |
126 | 524k | } |
127 | | |
128 | 4.22k | void removeOverriddenTables() { |
129 | 4.22k | llvm::DenseSet<file_type> Files; |
130 | 4.22k | Files.insert(PendingOverrides.begin(), PendingOverrides.end()); |
131 | | // Explicitly capture Files to work around an MSVC 2015 rejects-valid bug. |
132 | 1.00M | auto ShouldRemove = [&Files](void *T) -> bool { |
133 | 1.00M | auto *ODT = Table::getFromOpaqueValue(T).template get<OnDiskTable *>(); |
134 | 1.00M | bool Remove = Files.count(ODT->File); |
135 | 1.00M | if (Remove) |
136 | 979k | delete ODT; |
137 | 1.00M | return Remove; |
138 | 1.00M | }; |
139 | 4.22k | Tables.erase(std::remove_if(tables().begin().getCurrent(), Tables.end(), |
140 | 4.22k | ShouldRemove), |
141 | 4.22k | Tables.end()); |
142 | 4.22k | PendingOverrides.clear(); |
143 | 4.22k | } |
144 | | |
145 | 2.18k | void condense() { |
146 | 2.18k | MergedTable *Merged = getMergedTable(); |
147 | 2.18k | if (!Merged) |
148 | 2.08k | Merged = new MergedTable; |
149 | | |
150 | | // Read in all the tables and merge them together. |
151 | | // FIXME: Be smarter about which tables we merge. |
152 | 19.1k | for (auto *ODT : tables()) { |
153 | 19.1k | auto &HT = ODT->Table; |
154 | 19.1k | Info &InfoObj = HT.getInfoObj(); |
155 | | |
156 | 5.37M | for (auto I = HT.data_begin(), E = HT.data_end(); I != E; ++I5.35M ) { |
157 | 5.35M | auto *LocalPtr = I.getItem(); |
158 | | |
159 | | // FIXME: Don't rely on the OnDiskHashTable format here. |
160 | 5.35M | auto L = InfoObj.ReadKeyDataLength(LocalPtr); |
161 | 5.35M | const internal_key_type &Key = InfoObj.ReadKey(LocalPtr, L.first); |
162 | 5.35M | data_type_builder ValueBuilder(Merged->Data[Key]); |
163 | 5.35M | InfoObj.ReadDataInto(Key, LocalPtr + L.first, L.second, |
164 | 5.35M | ValueBuilder); |
165 | 5.35M | } |
166 | | |
167 | 19.1k | Merged->Files.push_back(ODT->File); |
168 | 19.1k | delete ODT; |
169 | 19.1k | } |
170 | | |
171 | 2.18k | Tables.clear(); |
172 | 2.18k | Tables.push_back(Table(Merged).getOpaqueValue()); |
173 | 2.18k | } |
174 | | |
175 | | public: |
176 | 233k | MultiOnDiskHashTable() = default; |
177 | | |
178 | | MultiOnDiskHashTable(MultiOnDiskHashTable &&O) |
179 | 291k | : Tables(std::move(O.Tables)), |
180 | 291k | PendingOverrides(std::move(O.PendingOverrides)) { |
181 | 291k | O.Tables.clear(); |
182 | 291k | } |
183 | | |
184 | | MultiOnDiskHashTable &operator=(MultiOnDiskHashTable &&O) { |
185 | | if (&O == this) |
186 | | return *this; |
187 | | clear(); |
188 | | Tables = std::move(O.Tables); |
189 | | O.Tables.clear(); |
190 | | PendingOverrides = std::move(O.PendingOverrides); |
191 | | return *this; |
192 | | } |
193 | | |
194 | 524k | ~MultiOnDiskHashTable() { clear(); } |
195 | | |
196 | | /// Add the table \p Data loaded from file \p File. |
197 | 1.32M | void add(file_type File, storage_type Data, Info InfoObj = Info()) { |
198 | 1.32M | using namespace llvm::support; |
199 | | |
200 | 1.32M | storage_type Ptr = Data; |
201 | | |
202 | 1.32M | uint32_t BucketOffset = |
203 | 1.32M | endian::readNext<uint32_t, llvm::endianness::little, unaligned>(Ptr); |
204 | | |
205 | | // Read the list of overridden files. |
206 | 1.32M | uint32_t NumFiles = |
207 | 1.32M | endian::readNext<uint32_t, llvm::endianness::little, unaligned>(Ptr); |
208 | | // FIXME: Add a reserve() to TinyPtrVector so that we don't need to make |
209 | | // an additional copy. |
210 | 1.32M | llvm::SmallVector<file_type, 16> OverriddenFiles; |
211 | 1.32M | OverriddenFiles.reserve(NumFiles); |
212 | 4.91M | for (/**/; NumFiles != 0; --NumFiles3.58M ) |
213 | 3.58M | OverriddenFiles.push_back(InfoObj.ReadFileRef(Ptr)); |
214 | 1.32M | PendingOverrides.insert(PendingOverrides.end(), OverriddenFiles.begin(), |
215 | 1.32M | OverriddenFiles.end()); |
216 | | |
217 | | // Read the OnDiskChainedHashTable header. |
218 | 1.32M | storage_type Buckets = Data + BucketOffset; |
219 | 1.32M | auto NumBucketsAndEntries = |
220 | 1.32M | OnDiskTable::HashTable::readNumBucketsAndEntries(Buckets); |
221 | | |
222 | | // Register the table. |
223 | 1.32M | Table NewTable = new OnDiskTable(File, NumBucketsAndEntries.first, |
224 | 1.32M | NumBucketsAndEntries.second, |
225 | 1.32M | Buckets, Ptr, Data, std::move(InfoObj)); |
226 | 1.32M | Tables.push_back(NewTable.getOpaqueValue()); |
227 | 1.32M | } |
228 | | |
229 | | /// Find and read the lookup results for \p EKey. |
230 | 770k | data_type find(const external_key_type &EKey) { |
231 | 770k | data_type Result; |
232 | | |
233 | 770k | if (!PendingOverrides.empty()) |
234 | 4.22k | removeOverriddenTables(); |
235 | | |
236 | 770k | if (Tables.size() > static_cast<unsigned>(Info::MaxTables)) |
237 | 2.18k | condense(); |
238 | | |
239 | 770k | internal_key_type Key = Info::GetInternalKey(EKey); |
240 | 770k | auto KeyHash = Info::ComputeHash(Key); |
241 | | |
242 | 770k | if (MergedTable *M = getMergedTable()) { |
243 | 110k | auto It = M->Data.find(Key); |
244 | 110k | if (It != M->Data.end()) |
245 | 20.7k | Result = It->second; |
246 | 110k | } |
247 | | |
248 | 770k | data_type_builder ResultBuilder(Result); |
249 | | |
250 | 1.02M | for (auto *ODT : tables()) { |
251 | 1.02M | auto &HT = ODT->Table; |
252 | 1.02M | auto It = HT.find_hashed(Key, KeyHash); |
253 | 1.02M | if (It != HT.end()) |
254 | 167k | HT.getInfoObj().ReadDataInto(Key, It.getDataPtr(), It.getDataLen(), |
255 | 167k | ResultBuilder); |
256 | 1.02M | } |
257 | | |
258 | 770k | return Result; |
259 | 770k | } |
260 | | |
261 | | /// Read all the lookup results into a single value. This only makes |
262 | | /// sense if merging values across keys is meaningful. |
263 | 75 | data_type findAll() { |
264 | 75 | data_type Result; |
265 | 75 | data_type_builder ResultBuilder(Result); |
266 | | |
267 | 75 | if (!PendingOverrides.empty()) |
268 | 0 | removeOverriddenTables(); |
269 | | |
270 | 75 | if (MergedTable *M = getMergedTable()) { |
271 | 0 | for (auto &KV : M->Data) |
272 | 0 | Info::MergeDataInto(KV.second, ResultBuilder); |
273 | 0 | } |
274 | | |
275 | 82 | for (auto *ODT : tables()) { |
276 | 82 | auto &HT = ODT->Table; |
277 | 82 | Info &InfoObj = HT.getInfoObj(); |
278 | 408 | for (auto I = HT.data_begin(), E = HT.data_end(); I != E; ++I326 ) { |
279 | 326 | auto *LocalPtr = I.getItem(); |
280 | | |
281 | | // FIXME: Don't rely on the OnDiskHashTable format here. |
282 | 326 | auto L = InfoObj.ReadKeyDataLength(LocalPtr); |
283 | 326 | const internal_key_type &Key = InfoObj.ReadKey(LocalPtr, L.first); |
284 | 326 | InfoObj.ReadDataInto(Key, LocalPtr + L.first, L.second, ResultBuilder); |
285 | 326 | } |
286 | 82 | } |
287 | | |
288 | 75 | return Result; |
289 | 75 | } |
290 | | }; |
291 | | |
292 | | /// Writer for the on-disk hash table. |
293 | | template<typename ReaderInfo, typename WriterInfo> |
294 | | class MultiOnDiskHashTableGenerator { |
295 | | using BaseTable = MultiOnDiskHashTable<ReaderInfo>; |
296 | | using Generator = llvm::OnDiskChainedHashTableGenerator<WriterInfo>; |
297 | | |
298 | | Generator Gen; |
299 | | |
300 | | public: |
301 | 61.1k | MultiOnDiskHashTableGenerator() : Gen() {} |
302 | | |
303 | | void insert(typename WriterInfo::key_type_ref Key, |
304 | 579k | typename WriterInfo::data_type_ref Data, WriterInfo &Info) { |
305 | 579k | Gen.insert(Key, Data, Info); |
306 | 579k | } |
307 | | |
308 | | void emit(llvm::SmallVectorImpl<char> &Out, WriterInfo &Info, |
309 | 61.1k | const BaseTable *Base) { |
310 | 61.1k | using namespace llvm::support; |
311 | | |
312 | 61.1k | llvm::raw_svector_ostream OutStream(Out); |
313 | | |
314 | | // Write our header information. |
315 | 61.1k | { |
316 | 61.1k | endian::Writer Writer(OutStream, llvm::endianness::little); |
317 | | |
318 | | // Reserve four bytes for the bucket offset. |
319 | 61.1k | Writer.write<uint32_t>(0); |
320 | | |
321 | 61.1k | if (auto *Merged = Base ? Base->getMergedTable() : nullptr) { |
322 | | // Write list of overridden files. |
323 | 1.11k | Writer.write<uint32_t>(Merged->Files.size()); |
324 | 1.11k | for (const auto &F : Merged->Files) |
325 | 10.7k | Info.EmitFileRef(OutStream, F); |
326 | | |
327 | | // Add all merged entries from Base to the generator. |
328 | 2.16M | for (auto &KV : Merged->Data) { |
329 | 2.16M | if (!Gen.contains(KV.first, Info)) |
330 | 2.15M | Gen.insert(KV.first, Info.ImportData(KV.second), Info); |
331 | 2.16M | } |
332 | 60.0k | } else { |
333 | 60.0k | Writer.write<uint32_t>(0); |
334 | 60.0k | } |
335 | 61.1k | } |
336 | | |
337 | | // Write the table itself. |
338 | 61.1k | uint32_t BucketOffset = Gen.Emit(OutStream, Info); |
339 | | |
340 | | // Replace the first four bytes with the bucket offset. |
341 | 61.1k | endian::write32le(Out.data(), BucketOffset); |
342 | 61.1k | } |
343 | | }; |
344 | | |
345 | | } // namespace serialization |
346 | | } // namespace clang |
347 | | |
348 | | #endif // LLVM_CLANG_LIB_SERIALIZATION_MULTIONDISKHASHTABLE_H |