/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/include/clang/Serialization/SourceLocationEncoding.h
Line | Count | Source |
1 | | //===--- SourceLocationEncoding.h - Small serialized locations --*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // Source locations are stored pervasively in the AST, making up a third of |
10 | | // the size of typical serialized files. Storing them efficiently is important. |
11 | | // |
12 | | // We use integers optimized by VBR-encoding, because: |
13 | | // - when abbreviations cannot be used, VBR6 encoding is our only choice |
14 | | // - in the worst case a SourceLocation can be ~any 32-bit number, but in |
15 | | // practice they are highly predictable |
16 | | // |
17 | | // We encode the integer so that likely values encode as small numbers that |
18 | | // turn into few VBR chunks: |
19 | | // - the invalid sentinel location is a very common value: it encodes as 0 |
20 | | // - the "macro or not" bit is stored at the bottom of the integer |
21 | | // (rather than at the top, as in memory), so macro locations can have |
22 | | // small representations. |
23 | | // - related locations (e.g. of a left and right paren pair) are usually |
24 | | // similar, so when encoding a sequence of locations we store only |
25 | | // differences between successive elements. |
26 | | // |
27 | | //===----------------------------------------------------------------------===// |
28 | | |
29 | | #include <climits> |
30 | | #include "clang/Basic/SourceLocation.h" |
31 | | |
32 | | #ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H |
33 | | #define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H |
34 | | |
35 | | namespace clang { |
36 | | class SourceLocationSequence; |
37 | | |
38 | | /// Serialized encoding of SourceLocations without context. |
39 | | /// Optimized to have small unsigned values (=> small after VBR encoding). |
40 | | /// |
41 | | // Macro locations have the top bit set, we rotate by one so it is the low bit. |
42 | | class SourceLocationEncoding { |
43 | | using UIntTy = SourceLocation::UIntTy; |
44 | | constexpr static unsigned UIntBits = CHAR_BIT * sizeof(UIntTy); |
45 | | |
46 | 26.0M | static UIntTy encodeRaw(UIntTy Raw) { |
47 | 26.0M | return (Raw << 1) | (Raw >> (UIntBits - 1)); |
48 | 26.0M | } |
49 | 75.1M | static UIntTy decodeRaw(UIntTy Raw) { |
50 | 75.1M | return (Raw >> 1) | (Raw << (UIntBits - 1)); |
51 | 75.1M | } |
52 | | friend SourceLocationSequence; |
53 | | |
54 | | public: |
55 | | static uint64_t encode(SourceLocation Loc, |
56 | | SourceLocationSequence * = nullptr); |
57 | | static SourceLocation decode(uint64_t, SourceLocationSequence * = nullptr); |
58 | | }; |
59 | | |
60 | | /// Serialized encoding of a sequence of SourceLocations. |
61 | | /// |
62 | | /// Optimized to produce small values when locations with the sequence are |
63 | | /// similar. Each element can be delta-encoded against the last nonzero element. |
64 | | /// |
65 | | /// Sequences should be started by creating a SourceLocationSequence::State, |
66 | | /// and then passed around as SourceLocationSequence*. Example: |
67 | | /// |
68 | | /// // establishes a sequence |
69 | | /// void EmitTopLevelThing() { |
70 | | /// SourceLocationSequence::State Seq; |
71 | | /// EmitContainedThing(Seq); |
72 | | /// EmitRecursiveThing(Seq); |
73 | | /// } |
74 | | /// |
75 | | /// // optionally part of a sequence |
76 | | /// void EmitContainedThing(SourceLocationSequence *Seq = nullptr) { |
77 | | /// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq)); |
78 | | /// } |
79 | | /// |
80 | | /// // establishes a sequence if there isn't one already |
81 | | /// void EmitRecursiveThing(SourceLocationSequence *ParentSeq = nullptr) { |
82 | | /// SourceLocationSequence::State Seq(ParentSeq); |
83 | | /// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq)); |
84 | | /// EmitRecursiveThing(Seq); |
85 | | /// } |
86 | | /// |
87 | | class SourceLocationSequence { |
88 | | using UIntTy = SourceLocation::UIntTy; |
89 | | using EncodedTy = uint64_t; |
90 | | constexpr static auto UIntBits = SourceLocationEncoding::UIntBits; |
91 | | static_assert(sizeof(EncodedTy) > sizeof(UIntTy), "Need one extra bit!"); |
92 | | |
93 | | // Prev stores the rotated last nonzero location. |
94 | | UIntTy &Prev; |
95 | | |
96 | | // Zig-zag encoding turns small signed integers into small unsigned integers. |
97 | | // 0 => 0, -1 => 1, 1 => 2, -2 => 3, ... |
98 | 5.47M | static UIntTy zigZag(UIntTy V) { |
99 | 5.47M | UIntTy Sign = (V & (1 << (UIntBits - 1))) ? UIntTy(-1)722k : UIntTy(0)4.74M ; |
100 | 5.47M | return Sign ^ (V << 1); |
101 | 5.47M | } |
102 | 28.6M | static UIntTy zagZig(UIntTy V) { return (V >> 1) ^ -(V & 1); } |
103 | | |
104 | 21.8M | SourceLocationSequence(UIntTy &Prev) : Prev(Prev) {} |
105 | | |
106 | 12.0M | EncodedTy encodeRaw(UIntTy Raw) { |
107 | 12.0M | if (Raw == 0) |
108 | 2.56M | return 0; |
109 | 9.44M | UIntTy Rotated = SourceLocationEncoding::encodeRaw(Raw); |
110 | 9.44M | if (Prev == 0) |
111 | 3.97M | return Prev = Rotated; |
112 | 5.47M | UIntTy Delta = Rotated - Prev; |
113 | 5.47M | Prev = Rotated; |
114 | | // Exactly one 33 bit value is possible! (1 << 32). |
115 | | // This is because we have two representations of zero: trivial & relative. |
116 | 5.47M | return 1 + EncodedTy{zigZag(Delta)}; |
117 | 9.44M | } |
118 | 53.6M | UIntTy decodeRaw(EncodedTy Encoded) { |
119 | 53.6M | if (Encoded == 0) |
120 | 7.11M | return 0; |
121 | 46.5M | if (Prev == 0) |
122 | 17.8M | return SourceLocationEncoding::decodeRaw(Prev = Encoded); |
123 | 28.6M | return SourceLocationEncoding::decodeRaw(Prev += zagZig(Encoded - 1)); |
124 | 46.5M | } |
125 | | |
126 | | public: |
127 | 53.6M | SourceLocation decode(EncodedTy Encoded) { |
128 | 53.6M | return SourceLocation::getFromRawEncoding(decodeRaw(Encoded)); |
129 | 53.6M | } |
130 | 12.0M | EncodedTy encode(SourceLocation Loc) { |
131 | 12.0M | return encodeRaw(Loc.getRawEncoding()); |
132 | 12.0M | } |
133 | | |
134 | | class State; |
135 | | }; |
136 | | |
137 | | /// This object establishes a SourceLocationSequence. |
138 | | class SourceLocationSequence::State { |
139 | | UIntTy Prev = 0; |
140 | | SourceLocationSequence Seq; |
141 | | |
142 | | public: |
143 | | // If Parent is provided and non-null, then this root becomes part of that |
144 | | // enclosing sequence instead of establishing a new one. |
145 | | State(SourceLocationSequence *Parent = nullptr) |
146 | 21.8M | : Seq(Parent ? Parent->Prev : Prev) {} |
147 | | |
148 | | // Implicit conversion for uniform use of roots vs propagated sequences. |
149 | 52.7M | operator SourceLocationSequence *() { return &Seq; } |
150 | | }; |
151 | | |
152 | | inline uint64_t SourceLocationEncoding::encode(SourceLocation Loc, |
153 | 28.6M | SourceLocationSequence *Seq) { |
154 | 28.6M | return Seq ? Seq->encode(Loc)12.0M : encodeRaw(Loc.getRawEncoding())16.6M ; |
155 | 28.6M | } |
156 | | inline SourceLocation |
157 | 82.2M | SourceLocationEncoding::decode(uint64_t Encoded, SourceLocationSequence *Seq) { |
158 | 82.2M | return Seq ? Seq->decode(Encoded)53.6M |
159 | 82.2M | : SourceLocation::getFromRawEncoding(decodeRaw(Encoded))28.5M ; |
160 | 82.2M | } |
161 | | |
162 | | } // namespace clang |
163 | | #endif |