/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Serialization/MultiOnDiskHashTable.h
Line | Count | Source (jump to first uncovered line) |
1 | | //===- MultiOnDiskHashTable.h - Merged set of hash tables -------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file provides a hash table data structure suitable for incremental and |
10 | | // distributed storage across a set of files. |
11 | | // |
12 | | // Multiple hash tables from different files are implicitly merged to improve |
13 | | // performance, and on reload the merged table will override those from other |
14 | | // files. |
15 | | // |
16 | | //===----------------------------------------------------------------------===// |
17 | | |
18 | | #ifndef LLVM_CLANG_LIB_SERIALIZATION_MULTIONDISKHASHTABLE_H |
19 | | #define LLVM_CLANG_LIB_SERIALIZATION_MULTIONDISKHASHTABLE_H |
20 | | |
21 | | #include "llvm/ADT/DenseMap.h" |
22 | | #include "llvm/ADT/DenseSet.h" |
23 | | #include "llvm/ADT/PointerUnion.h" |
24 | | #include "llvm/ADT/STLExtras.h" |
25 | | #include "llvm/ADT/SmallVector.h" |
26 | | #include "llvm/ADT/TinyPtrVector.h" |
27 | | #include "llvm/ADT/iterator_range.h" |
28 | | #include "llvm/Support/Endian.h" |
29 | | #include "llvm/Support/EndianStream.h" |
30 | | #include "llvm/Support/OnDiskHashTable.h" |
31 | | #include "llvm/Support/raw_ostream.h" |
32 | | #include <algorithm> |
33 | | #include <cstdint> |
34 | | #include <vector> |
35 | | |
36 | | namespace clang { |
37 | | namespace serialization { |
38 | | |
39 | | /// A collection of on-disk hash tables, merged when relevant for performance. |
40 | | template<typename Info> class MultiOnDiskHashTable { |
41 | | public: |
42 | | /// A handle to a file, used when overriding tables. |
43 | | using file_type = typename Info::file_type; |
44 | | |
45 | | /// A pointer to an on-disk representation of the hash table. |
46 | | using storage_type = const unsigned char *; |
47 | | |
48 | | using external_key_type = typename Info::external_key_type; |
49 | | using internal_key_type = typename Info::internal_key_type; |
50 | | using data_type = typename Info::data_type; |
51 | | using data_type_builder = typename Info::data_type_builder; |
52 | | using hash_value_type = unsigned; |
53 | | |
54 | | private: |
55 | | /// The generator is permitted to read our merged table. |
56 | | template<typename ReaderInfo, typename WriterInfo> |
57 | | friend class MultiOnDiskHashTableGenerator; |
58 | | |
59 | | /// A hash table stored on disk. |
60 | | struct OnDiskTable { |
61 | | using HashTable = llvm::OnDiskIterableChainedHashTable<Info>; |
62 | | |
63 | | file_type File; |
64 | | HashTable Table; |
65 | | |
66 | | OnDiskTable(file_type File, unsigned NumBuckets, unsigned NumEntries, |
67 | | storage_type Buckets, storage_type Payload, storage_type Base, |
68 | | const Info &InfoObj) |
69 | 1.49M | : File(File), |
70 | 1.49M | Table(NumBuckets, NumEntries, Buckets, Payload, Base, InfoObj) {} |
71 | | }; |
72 | | |
73 | | struct MergedTable { |
74 | | std::vector<file_type> Files; |
75 | | llvm::DenseMap<internal_key_type, data_type> Data; |
76 | | }; |
77 | | |
78 | | using Table = llvm::PointerUnion<OnDiskTable *, MergedTable *>; |
79 | | using TableVector = llvm::TinyPtrVector<void *>; |
80 | | |
81 | | /// The current set of on-disk and merged tables. |
82 | | /// We manually store the opaque value of the Table because TinyPtrVector |
83 | | /// can't cope with holding a PointerUnion directly. |
84 | | /// There can be at most one MergedTable in this vector, and if present, |
85 | | /// it is the first table. |
86 | | TableVector Tables; |
87 | | |
88 | | /// Files corresponding to overridden tables that we've not yet |
89 | | /// discarded. |
90 | | llvm::TinyPtrVector<file_type> PendingOverrides; |
91 | | |
92 | | struct AsOnDiskTable { |
93 | | using result_type = OnDiskTable *; |
94 | | |
95 | 1.33M | result_type operator()(void *P) const { |
96 | 1.33M | return Table::getFromOpaqueValue(P).template get<OnDiskTable *>(); |
97 | 1.33M | } |
98 | | }; |
99 | | |
100 | | using table_iterator = |
101 | | llvm::mapped_iterator<TableVector::iterator, AsOnDiskTable>; |
102 | | using table_range = llvm::iterator_range<table_iterator>; |
103 | | |
104 | | /// The current set of on-disk tables. |
105 | 1.44M | table_range tables() { |
106 | 1.44M | auto Begin = Tables.begin(), End = Tables.end(); |
107 | 1.44M | if (getMergedTable()) |
108 | 164k | ++Begin; |
109 | 1.44M | return llvm::make_range(llvm::map_iterator(Begin, AsOnDiskTable()), |
110 | 1.44M | llvm::map_iterator(End, AsOnDiskTable())); |
111 | 1.44M | } |
112 | | |
113 | 2.89M | MergedTable *getMergedTable() const { |
114 | | // If we already have a merged table, it's the first one. |
115 | 2.89M | return Tables.empty() ? nullptr740k : Table::getFromOpaqueValue(*Tables.begin()) |
116 | 2.15M | .template dyn_cast<MergedTable*>(); |
117 | 2.89M | } |
118 | | |
119 | | /// Delete all our current on-disk tables. |
120 | 655k | void clear() { |
121 | 655k | for (auto *T : tables()) |
122 | 374k | delete T; |
123 | 655k | if (auto *M = getMergedTable()) |
124 | 2.56k | delete M; |
125 | 655k | Tables.clear(); |
126 | 655k | } |
127 | | |
128 | 4.65k | void removeOverriddenTables() { |
129 | 4.65k | llvm::DenseSet<file_type> Files; |
130 | 4.65k | Files.insert(PendingOverrides.begin(), PendingOverrides.end()); |
131 | | // Explicitly capture Files to work around an MSVC 2015 rejects-valid bug. |
132 | 1.12M | auto ShouldRemove = [&Files](void *T) -> bool { |
133 | 1.12M | auto *ODT = Table::getFromOpaqueValue(T).template get<OnDiskTable *>(); |
134 | 1.12M | bool Remove = Files.count(ODT->File); |
135 | 1.12M | if (Remove) |
136 | 1.07M | delete ODT; |
137 | 1.12M | return Remove; |
138 | 1.12M | }; |
139 | 4.65k | Tables.erase(std::remove_if(tables().begin().getCurrent(), Tables.end(), |
140 | 4.65k | ShouldRemove), |
141 | 4.65k | Tables.end()); |
142 | 4.65k | PendingOverrides.clear(); |
143 | 4.65k | } |
144 | | |
145 | 2.65k | void condense() { |
146 | 2.65k | MergedTable *Merged = getMergedTable(); |
147 | 2.65k | if (!Merged) |
148 | 2.56k | Merged = new MergedTable; |
149 | | |
150 | | // Read in all the tables and merge them together. |
151 | | // FIXME: Be smarter about which tables we merge. |
152 | 43.4k | for (auto *ODT : tables()) { |
153 | 43.4k | auto &HT = ODT->Table; |
154 | 43.4k | Info &InfoObj = HT.getInfoObj(); |
155 | | |
156 | 19.0M | for (auto I = HT.data_begin(), E = HT.data_end(); I != E; ++I18.9M ) { |
157 | 18.9M | auto *LocalPtr = I.getItem(); |
158 | | |
159 | | // FIXME: Don't rely on the OnDiskHashTable format here. |
160 | 18.9M | auto L = InfoObj.ReadKeyDataLength(LocalPtr); |
161 | 18.9M | const internal_key_type &Key = InfoObj.ReadKey(LocalPtr, L.first); |
162 | 18.9M | data_type_builder ValueBuilder(Merged->Data[Key]); |
163 | 18.9M | InfoObj.ReadDataInto(Key, LocalPtr + L.first, L.second, |
164 | 18.9M | ValueBuilder); |
165 | 18.9M | } |
166 | | |
167 | 43.4k | Merged->Files.push_back(ODT->File); |
168 | 43.4k | delete ODT; |
169 | 43.4k | } |
170 | | |
171 | 2.65k | Tables.clear(); |
172 | 2.65k | Tables.push_back(Table(Merged).getOpaqueValue()); |
173 | 2.65k | } |
174 | | |
175 | | public: |
176 | 286k | MultiOnDiskHashTable() = default; |
177 | | |
178 | | MultiOnDiskHashTable(MultiOnDiskHashTable &&O) |
179 | 370k | : Tables(std::move(O.Tables)), |
180 | 370k | PendingOverrides(std::move(O.PendingOverrides)) { |
181 | 370k | O.Tables.clear(); |
182 | 370k | } |
183 | | |
184 | | MultiOnDiskHashTable &operator=(MultiOnDiskHashTable &&O) { |
185 | | if (&O == this) |
186 | | return *this; |
187 | | clear(); |
188 | | Tables = std::move(O.Tables); |
189 | | O.Tables.clear(); |
190 | | PendingOverrides = std::move(O.PendingOverrides); |
191 | | return *this; |
192 | | } |
193 | | |
194 | 655k | ~MultiOnDiskHashTable() { clear(); } |
195 | | |
196 | | /// Add the table \p Data loaded from file \p File. |
197 | 1.49M | void add(file_type File, storage_type Data, Info InfoObj = Info()) { |
198 | 1.49M | using namespace llvm::support; |
199 | | |
200 | 1.49M | storage_type Ptr = Data; |
201 | | |
202 | 1.49M | uint32_t BucketOffset = endian::readNext<uint32_t, little, unaligned>(Ptr); |
203 | | |
204 | | // Read the list of overridden files. |
205 | 1.49M | uint32_t NumFiles = endian::readNext<uint32_t, little, unaligned>(Ptr); |
206 | | // FIXME: Add a reserve() to TinyPtrVector so that we don't need to make |
207 | | // an additional copy. |
208 | 1.49M | llvm::SmallVector<file_type, 16> OverriddenFiles; |
209 | 1.49M | OverriddenFiles.reserve(NumFiles); |
210 | 5.50M | for (/**/; NumFiles != 0; --NumFiles4.00M ) |
211 | 4.00M | OverriddenFiles.push_back(InfoObj.ReadFileRef(Ptr)); |
212 | 1.49M | PendingOverrides.insert(PendingOverrides.end(), OverriddenFiles.begin(), |
213 | 1.49M | OverriddenFiles.end()); |
214 | | |
215 | | // Read the OnDiskChainedHashTable header. |
216 | 1.49M | storage_type Buckets = Data + BucketOffset; |
217 | 1.49M | auto NumBucketsAndEntries = |
218 | 1.49M | OnDiskTable::HashTable::readNumBucketsAndEntries(Buckets); |
219 | | |
220 | | // Register the table. |
221 | 1.49M | Table NewTable = new OnDiskTable(File, NumBucketsAndEntries.first, |
222 | 1.49M | NumBucketsAndEntries.second, |
223 | 1.49M | Buckets, Ptr, Data, std::move(InfoObj)); |
224 | 1.49M | Tables.push_back(NewTable.getOpaqueValue()); |
225 | 1.49M | } |
226 | | |
227 | | /// Find and read the lookup results for \p EKey. |
228 | 786k | data_type find(const external_key_type &EKey) { |
229 | 786k | data_type Result; |
230 | | |
231 | 786k | if (!PendingOverrides.empty()) |
232 | 4.65k | removeOverriddenTables(); |
233 | | |
234 | 786k | if (Tables.size() > static_cast<unsigned>(Info::MaxTables)) |
235 | 2.65k | condense(); |
236 | | |
237 | 786k | internal_key_type Key = Info::GetInternalKey(EKey); |
238 | 786k | auto KeyHash = Info::ComputeHash(Key); |
239 | | |
240 | 786k | if (MergedTable *M = getMergedTable()) { |
241 | 161k | auto It = M->Data.find(Key); |
242 | 161k | if (It != M->Data.end()) |
243 | 46.6k | Result = It->second; |
244 | 161k | } |
245 | | |
246 | 786k | data_type_builder ResultBuilder(Result); |
247 | | |
248 | 918k | for (auto *ODT : tables()) { |
249 | 918k | auto &HT = ODT->Table; |
250 | 918k | auto It = HT.find_hashed(Key, KeyHash); |
251 | 918k | if (It != HT.end()) |
252 | 154k | HT.getInfoObj().ReadDataInto(Key, It.getDataPtr(), It.getDataLen(), |
253 | 154k | ResultBuilder); |
254 | 918k | } |
255 | | |
256 | 786k | return Result; |
257 | 786k | } |
258 | | |
259 | | /// Read all the lookup results into a single value. This only makes |
260 | | /// sense if merging values across keys is meaningful. |
261 | 75 | data_type findAll() { |
262 | 75 | data_type Result; |
263 | 75 | data_type_builder ResultBuilder(Result); |
264 | | |
265 | 75 | if (!PendingOverrides.empty()) |
266 | 0 | removeOverriddenTables(); |
267 | | |
268 | 75 | if (MergedTable *M = getMergedTable()) { |
269 | 0 | for (auto &KV : M->Data) |
270 | 0 | Info::MergeDataInto(KV.second, ResultBuilder); |
271 | 0 | } |
272 | | |
273 | 82 | for (auto *ODT : tables()) { |
274 | 82 | auto &HT = ODT->Table; |
275 | 82 | Info &InfoObj = HT.getInfoObj(); |
276 | 408 | for (auto I = HT.data_begin(), E = HT.data_end(); I != E; ++I326 ) { |
277 | 326 | auto *LocalPtr = I.getItem(); |
278 | | |
279 | | // FIXME: Don't rely on the OnDiskHashTable format here. |
280 | 326 | auto L = InfoObj.ReadKeyDataLength(LocalPtr); |
281 | 326 | const internal_key_type &Key = InfoObj.ReadKey(LocalPtr, L.first); |
282 | 326 | InfoObj.ReadDataInto(Key, LocalPtr + L.first, L.second, ResultBuilder); |
283 | 326 | } |
284 | 82 | } |
285 | | |
286 | 75 | return Result; |
287 | 75 | } |
288 | | }; |
289 | | |
290 | | /// Writer for the on-disk hash table. |
291 | | template<typename ReaderInfo, typename WriterInfo> |
292 | | class MultiOnDiskHashTableGenerator { |
293 | | using BaseTable = MultiOnDiskHashTable<ReaderInfo>; |
294 | | using Generator = llvm::OnDiskChainedHashTableGenerator<WriterInfo>; |
295 | | |
296 | | Generator Gen; |
297 | | |
298 | | public: |
299 | 61.0k | MultiOnDiskHashTableGenerator() : Gen() {} |
300 | | |
301 | | void insert(typename WriterInfo::key_type_ref Key, |
302 | 578k | typename WriterInfo::data_type_ref Data, WriterInfo &Info) { |
303 | 578k | Gen.insert(Key, Data, Info); |
304 | 578k | } |
305 | | |
306 | | void emit(llvm::SmallVectorImpl<char> &Out, WriterInfo &Info, |
307 | 61.0k | const BaseTable *Base) { |
308 | 61.0k | using namespace llvm::support; |
309 | | |
310 | 61.0k | llvm::raw_svector_ostream OutStream(Out); |
311 | | |
312 | | // Write our header information. |
313 | 61.0k | { |
314 | 61.0k | endian::Writer Writer(OutStream, little); |
315 | | |
316 | | // Reserve four bytes for the bucket offset. |
317 | 61.0k | Writer.write<uint32_t>(0); |
318 | | |
319 | 61.0k | if (auto *Merged = Base ? Base->getMergedTable() : nullptr) { |
320 | | // Write list of overridden files. |
321 | 1.10k | Writer.write<uint32_t>(Merged->Files.size()); |
322 | 1.10k | for (const auto &F : Merged->Files) |
323 | 11.2k | Info.EmitFileRef(OutStream, F); |
324 | | |
325 | | // Add all merged entries from Base to the generator. |
326 | 2.21M | for (auto &KV : Merged->Data) { |
327 | 2.21M | if (!Gen.contains(KV.first, Info)) |
328 | 2.19M | Gen.insert(KV.first, Info.ImportData(KV.second), Info); |
329 | 2.21M | } |
330 | 59.9k | } else { |
331 | 59.9k | Writer.write<uint32_t>(0); |
332 | 59.9k | } |
333 | 61.0k | } |
334 | | |
335 | | // Write the table itself. |
336 | 61.0k | uint32_t BucketOffset = Gen.Emit(OutStream, Info); |
337 | | |
338 | | // Replace the first four bytes with the bucket offset. |
339 | 61.0k | endian::write32le(Out.data(), BucketOffset); |
340 | 61.0k | } |
341 | | }; |
342 | | |
343 | | } // namespace serialization |
344 | | } // namespace clang |
345 | | |
346 | | #endif // LLVM_CLANG_LIB_SERIALIZATION_MULTIONDISKHASHTABLE_H |