/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h
Line | Count | Source |
1 | | //===- TypeHashing.h ---------------------------------------------*- C++-*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H |
10 | | #define LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H |
11 | | |
12 | | #include "llvm/ADT/DenseMapInfo.h" |
13 | | #include "llvm/ADT/Hashing.h" |
14 | | |
15 | | #include "llvm/DebugInfo/CodeView/CodeView.h" |
16 | | #include "llvm/DebugInfo/CodeView/TypeCollection.h" |
17 | | #include "llvm/DebugInfo/CodeView/TypeIndex.h" |
18 | | |
19 | | #include "llvm/Support/FormatProviders.h" |
20 | | |
21 | | #include <type_traits> |
22 | | |
23 | | namespace llvm { |
24 | | namespace codeview { |
25 | | |
26 | | /// A locally hashed type represents a straightforward hash code of a serialized |
27 | | /// record. The record is simply serialized, and then the bytes are hashed by |
28 | | /// a standard algorithm. This is sufficient for the case of de-duplicating |
29 | | /// records within a single sequence of types, because if two records both have |
30 | | /// a back-reference to the same type in the same stream, they will both have |
31 | | /// the same numeric value for the TypeIndex of the back reference. |
32 | | struct LocallyHashedType { |
33 | | hash_code Hash; |
34 | | ArrayRef<uint8_t> RecordData; |
35 | | |
36 | | /// Given a type, compute its local hash. |
37 | | static LocallyHashedType hashType(ArrayRef<uint8_t> RecordData); |
38 | | |
39 | | /// Given a sequence of types, compute all of the local hashes. |
40 | | template <typename Range> |
41 | | static std::vector<LocallyHashedType> hashTypes(Range &&Records) { |
42 | | std::vector<LocallyHashedType> Hashes; |
43 | | Hashes.reserve(std::distance(std::begin(Records), std::end(Records))); |
44 | | for (const auto &R : Records) |
45 | | Hashes.push_back(hashType(R)); |
46 | | |
47 | | return Hashes; |
48 | | } |
49 | | |
50 | | static std::vector<LocallyHashedType> |
51 | | hashTypeCollection(TypeCollection &Types) { |
52 | | std::vector<LocallyHashedType> Hashes; |
53 | | Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) { |
54 | | Hashes.push_back(hashType(Type.RecordData)); |
55 | | }); |
56 | | return Hashes; |
57 | | } |
58 | | }; |
59 | | |
60 | | enum class GlobalTypeHashAlg : uint16_t { |
61 | | SHA1 = 0, // standard 20-byte SHA1 hash |
62 | | SHA1_8 // last 8-bytes of standard SHA1 hash |
63 | | }; |
64 | | |
65 | | /// A globally hashed type represents a hash value that is sufficient to |
66 | | /// uniquely identify a record across multiple type streams or type sequences. |
67 | | /// This works by, for any given record A which references B, replacing the |
68 | | /// TypeIndex that refers to B with a previously-computed global hash for B. As |
69 | | /// this is a recursive algorithm (e.g. the global hash of B also depends on the |
70 | | /// global hashes of the types that B refers to), a global hash can uniquely |
71 | | /// identify identify that A occurs in another stream that has a completely |
72 | | /// different graph structure. Although the hash itself is slower to compute, |
73 | | /// probing is much faster with a globally hashed type, because the hash itself |
74 | | /// is considered "as good as" the original type. Since type records can be |
75 | | /// quite large, this makes the equality comparison of the hash much faster than |
76 | | /// equality comparison of a full record. |
77 | | struct GloballyHashedType { |
78 | | GloballyHashedType() = default; |
79 | | GloballyHashedType(StringRef H) |
80 | 3.60k | : GloballyHashedType(ArrayRef<uint8_t>(H.bytes_begin(), H.bytes_end())) {} Unexecuted instantiation: llvm::codeview::GloballyHashedType::GloballyHashedType(llvm::StringRef) llvm::codeview::GloballyHashedType::GloballyHashedType(llvm::StringRef) Line | Count | Source | 80 | 3.60k | : GloballyHashedType(ArrayRef<uint8_t>(H.bytes_begin(), H.bytes_end())) {} |
|
81 | 217k | GloballyHashedType(ArrayRef<uint8_t> H) { |
82 | 217k | assert(H.size() == 8); |
83 | 217k | ::memcpy(Hash.data(), H.data(), 8); |
84 | 217k | } |
85 | | std::array<uint8_t, 8> Hash; |
86 | | |
87 | 3.37k | bool empty() const { return *(const uint64_t*)Hash.data() == 0; } |
88 | | |
89 | | /// Given a sequence of bytes representing a record, compute a global hash for |
90 | | /// this record. Due to the nature of global hashes incorporating the hashes |
91 | | /// of referenced records, this function requires a list of types and ids |
92 | | /// that RecordData might reference, indexable by TypeIndex. |
93 | | static GloballyHashedType hashType(ArrayRef<uint8_t> RecordData, |
94 | | ArrayRef<GloballyHashedType> PreviousTypes, |
95 | | ArrayRef<GloballyHashedType> PreviousIds); |
96 | | |
97 | | /// Given a sequence of bytes representing a record, compute a global hash for |
98 | | /// this record. Due to the nature of global hashes incorporating the hashes |
99 | | /// of referenced records, this function requires a list of types and ids |
100 | | /// that RecordData might reference, indexable by TypeIndex. |
101 | | static GloballyHashedType hashType(CVType Type, |
102 | | ArrayRef<GloballyHashedType> PreviousTypes, |
103 | | ArrayRef<GloballyHashedType> PreviousIds) { |
104 | | return hashType(Type.RecordData, PreviousTypes, PreviousIds); |
105 | | } |
106 | | |
107 | | /// Given a sequence of combined type and ID records, compute global hashes |
108 | | /// for each of them, returning the results in a vector of hashed types. |
109 | | template <typename Range> |
110 | | static std::vector<GloballyHashedType> hashTypes(Range &&Records) { |
111 | | std::vector<GloballyHashedType> Hashes; |
112 | | bool UnresolvedRecords = false; |
113 | | for (const auto &R : Records) { |
114 | | GloballyHashedType H = hashType(R, Hashes, Hashes); |
115 | | if (H.empty()) |
116 | | UnresolvedRecords = true; |
117 | | Hashes.push_back(H); |
118 | | } |
119 | | |
120 | | // In some rare cases, there might be records with forward references in the |
121 | | // stream. Several passes might be needed to fully hash each record in the |
122 | | // Type stream. However this occurs on very small OBJs generated by MASM, |
123 | | // with a dozen records at most. Therefore this codepath isn't |
124 | | // time-critical, as it isn't taken in 99% of cases. |
125 | | while (UnresolvedRecords) { |
126 | | UnresolvedRecords = false; |
127 | | auto HashIt = Hashes.begin(); |
128 | | for (const auto &R : Records) { |
129 | | if (HashIt->empty()) { |
130 | | GloballyHashedType H = hashType(R, Hashes, Hashes); |
131 | | if (H.empty()) |
132 | | UnresolvedRecords = true; |
133 | | else |
134 | | *HashIt = H; |
135 | | } |
136 | | ++HashIt; |
137 | | } |
138 | | } |
139 | | |
140 | | return Hashes; |
141 | | } |
142 | | |
143 | | /// Given a sequence of combined type and ID records, compute global hashes |
144 | | /// for each of them, returning the results in a vector of hashed types. |
145 | | template <typename Range> |
146 | | static std::vector<GloballyHashedType> |
147 | | hashIds(Range &&Records, ArrayRef<GloballyHashedType> TypeHashes) { |
148 | | std::vector<GloballyHashedType> IdHashes; |
149 | | for (const auto &R : Records) |
150 | | IdHashes.push_back(hashType(R, TypeHashes, IdHashes)); |
151 | | |
152 | | return IdHashes; |
153 | | } |
154 | | |
155 | | static std::vector<GloballyHashedType> |
156 | | hashTypeCollection(TypeCollection &Types) { |
157 | | std::vector<GloballyHashedType> Hashes; |
158 | | Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) { |
159 | | Hashes.push_back(hashType(Type.RecordData, Hashes, Hashes)); |
160 | | }); |
161 | | return Hashes; |
162 | | } |
163 | | }; |
164 | | #if defined(_MSC_VER) |
165 | | // is_trivially_copyable is not available in older versions of libc++, but it is |
166 | | // available in all supported versions of MSVC, so at least this gives us some |
167 | | // coverage. |
168 | | static_assert(std::is_trivially_copyable<GloballyHashedType>::value, |
169 | | "GloballyHashedType must be trivially copyable so that we can " |
170 | | "reinterpret_cast arrays of hash data to arrays of " |
171 | | "GloballyHashedType"); |
172 | | #endif |
173 | | } // namespace codeview |
174 | | |
175 | | template <> struct DenseMapInfo<codeview::LocallyHashedType> { |
176 | | static codeview::LocallyHashedType Empty; |
177 | | static codeview::LocallyHashedType Tombstone; |
178 | | |
179 | | static codeview::LocallyHashedType getEmptyKey() { return Empty; } |
180 | | |
181 | | static codeview::LocallyHashedType getTombstoneKey() { return Tombstone; } |
182 | | |
183 | | static unsigned getHashValue(codeview::LocallyHashedType Val) { |
184 | | return Val.Hash; |
185 | | } |
186 | | |
187 | | static bool isEqual(codeview::LocallyHashedType LHS, |
188 | | codeview::LocallyHashedType RHS) { |
189 | | if (LHS.Hash != RHS.Hash) |
190 | | return false; |
191 | | return LHS.RecordData == RHS.RecordData; |
192 | | } |
193 | | }; |
194 | | |
195 | | template <> struct DenseMapInfo<codeview::GloballyHashedType> { |
196 | | static codeview::GloballyHashedType Empty; |
197 | | static codeview::GloballyHashedType Tombstone; |
198 | | |
199 | 7.69k | static codeview::GloballyHashedType getEmptyKey() { return Empty; } |
200 | | |
201 | 4.33k | static codeview::GloballyHashedType getTombstoneKey() { return Tombstone; } |
202 | | |
203 | 4.12k | static unsigned getHashValue(codeview::GloballyHashedType Val) { |
204 | 4.12k | return *reinterpret_cast<const unsigned *>(Val.Hash.data()); |
205 | 4.12k | } |
206 | | |
207 | | static bool isEqual(codeview::GloballyHashedType LHS, |
208 | 32.5k | codeview::GloballyHashedType RHS) { |
209 | 32.5k | return LHS.Hash == RHS.Hash; |
210 | 32.5k | } |
211 | | }; |
212 | | |
213 | | template <> struct format_provider<codeview::LocallyHashedType> { |
214 | | public: |
215 | | static void format(const codeview::LocallyHashedType &V, |
216 | | llvm::raw_ostream &Stream, StringRef Style) { |
217 | | write_hex(Stream, V.Hash, HexPrintStyle::Upper, 8); |
218 | | } |
219 | | }; |
220 | | |
221 | | template <> struct format_provider<codeview::GloballyHashedType> { |
222 | | public: |
223 | | static void format(const codeview::GloballyHashedType &V, |
224 | 31 | llvm::raw_ostream &Stream, StringRef Style) { |
225 | 248 | for (uint8_t B : V.Hash) { |
226 | 248 | write_hex(Stream, B, HexPrintStyle::Upper, 2); |
227 | 248 | } |
228 | 31 | } |
229 | | }; |
230 | | |
231 | | } // namespace llvm |
232 | | |
233 | | #endif |