/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Format/Macros.h
Line | Count | Source |
1 | | //===--- Macros.h - Format C++ code -----------------------------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | /// |
9 | | /// \file |
10 | | /// This file contains the main building blocks of macro support in |
11 | | /// clang-format. |
12 | | /// |
13 | | /// In order to not violate the requirement that clang-format can format files |
14 | | /// in isolation, clang-format's macro support uses expansions users provide |
15 | | /// as part of clang-format's style configuration. |
16 | | /// |
17 | | /// Macro definitions are of the form "MACRO(p1, p2)=p1 + p2", but only support |
18 | | /// one level of expansion (\see MacroExpander for a full description of what |
19 | | /// is supported). |
20 | | /// |
21 | | /// As part of parsing, clang-format uses the MacroExpander to expand the |
22 | | /// spelled token streams into expanded token streams when it encounters a |
23 | | /// macro call. The UnwrappedLineParser continues to parse UnwrappedLines |
24 | | /// from the expanded token stream. |
25 | | /// After the expanded unwrapped lines are parsed, the MacroCallReconstructor |
26 | | /// matches the spelled token stream into unwrapped lines that best resemble the |
27 | | /// structure of the expanded unwrapped lines. These reconstructed unwrapped |
28 | | /// lines are aliasing the tokens in the expanded token stream, so that token |
29 | | /// annotations will be reused when formatting the spelled macro calls. |
30 | | /// |
31 | | /// When formatting, clang-format annotates and formats the expanded unwrapped |
32 | | /// lines first, determining the token types. Next, it formats the spelled |
33 | | /// unwrapped lines, keeping the token types fixed, while allowing other |
34 | | /// formatting decisions to change. |
35 | | /// |
36 | | //===----------------------------------------------------------------------===// |
37 | | |
38 | | #ifndef CLANG_LIB_FORMAT_MACROS_H |
39 | | #define CLANG_LIB_FORMAT_MACROS_H |
40 | | |
41 | | #include <list> |
42 | | #include <map> |
43 | | #include <string> |
44 | | #include <vector> |
45 | | |
46 | | #include "FormatToken.h" |
47 | | #include "llvm/ADT/ArrayRef.h" |
48 | | #include "llvm/ADT/DenseMap.h" |
49 | | #include "llvm/ADT/SmallVector.h" |
50 | | #include "llvm/ADT/StringRef.h" |
51 | | |
52 | | namespace clang { |
53 | | namespace format { |
54 | | |
55 | | struct UnwrappedLine; |
56 | | struct UnwrappedLineNode; |
57 | | |
58 | | /// Takes a set of macro definitions as strings and allows expanding calls to |
59 | | /// those macros. |
60 | | /// |
61 | | /// For example: |
62 | | /// Definition: A(x, y)=x + y |
63 | | /// Call : A(int a = 1, 2) |
64 | | /// Expansion : int a = 1 + 2 |
65 | | /// |
66 | | /// Expansion does not check arity of the definition. |
67 | | /// If fewer arguments than expected are provided, the remaining parameters |
68 | | /// are considered empty: |
69 | | /// Call : A(a) |
70 | | /// Expansion: a + |
71 | | /// If more arguments than expected are provided, they will be discarded. |
72 | | /// |
73 | | /// The expander does not support: |
74 | | /// - recursive expansion |
75 | | /// - stringification |
76 | | /// - concatenation |
77 | | /// - variadic macros |
78 | | /// |
79 | | /// Furthermore, only a single expansion of each macro argument is supported, |
80 | | /// so that we cannot get conflicting formatting decisions from different |
81 | | /// expansions. |
82 | | /// Definition: A(x)=x+x |
83 | | /// Call : A(id) |
84 | | /// Expansion : id+x |
85 | | /// |
86 | | class MacroExpander { |
87 | | public: |
88 | | using ArgsList = llvm::ArrayRef<llvm::SmallVector<FormatToken *, 8>>; |
89 | | |
90 | | /// Construct a macro expander from a set of macro definitions. |
91 | | /// Macro definitions must be encoded as UTF-8. |
92 | | /// |
93 | | /// Each entry in \p Macros must conform to the following simple |
94 | | /// macro-definition language: |
95 | | /// <definition> ::= <id> <expansion> | <id> "(" <params> ")" <expansion> |
96 | | /// <params> ::= <id-list> | "" |
97 | | /// <id-list> ::= <id> | <id> "," <params> |
98 | | /// <expansion> ::= "=" <tail> | <eof> |
99 | | /// <tail> ::= <tok> <tail> | <eof> |
100 | | /// |
101 | | /// Macros that cannot be parsed will be silently discarded. |
102 | | /// |
103 | | MacroExpander(const std::vector<std::string> &Macros, |
104 | | clang::SourceManager &SourceMgr, const FormatStyle &Style, |
105 | | llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, |
106 | | IdentifierTable &IdentTable); |
107 | | ~MacroExpander(); |
108 | | |
109 | | /// Returns whether any macro \p Name is defined, regardless of overloads. |
110 | | bool defined(llvm::StringRef Name) const; |
111 | | |
112 | | /// Returns whetherh there is an object-like overload, i.e. where the macro |
113 | | /// has no arguments and should not consume subsequent parentheses. |
114 | | bool objectLike(llvm::StringRef Name) const; |
115 | | |
116 | | /// Returns whether macro \p Name provides an overload with the given arity. |
117 | | bool hasArity(llvm::StringRef Name, unsigned Arity) const; |
118 | | |
119 | | /// Returns the expanded stream of format tokens for \p ID, where |
120 | | /// each element in \p Args is a positional argument to the macro call. |
121 | | /// If \p Args is not set, the object-like overload is used. |
122 | | /// If \p Args is set, the overload with the arity equal to \c Args.size() is |
123 | | /// used. |
124 | | llvm::SmallVector<FormatToken *, 8> |
125 | | expand(FormatToken *ID, std::optional<ArgsList> OptionalArgs) const; |
126 | | |
127 | | private: |
128 | | struct Definition; |
129 | | class DefinitionParser; |
130 | | |
131 | | void parseDefinition(const std::string &Macro); |
132 | | |
133 | | clang::SourceManager &SourceMgr; |
134 | | const FormatStyle &Style; |
135 | | llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator; |
136 | | IdentifierTable &IdentTable; |
137 | | SmallVector<std::unique_ptr<llvm::MemoryBuffer>> Buffers; |
138 | | llvm::StringMap<llvm::DenseMap<int, Definition>> FunctionLike; |
139 | | llvm::StringMap<Definition> ObjectLike; |
140 | | }; |
141 | | |
142 | | /// Converts a sequence of UnwrappedLines containing expanded macros into a |
143 | | /// single UnwrappedLine containing the macro calls. This UnwrappedLine may be |
144 | | /// broken into child lines, in a way that best conveys the structure of the |
145 | | /// expanded code. |
146 | | /// |
147 | | /// In the simplest case, a spelled UnwrappedLine contains one macro, and after |
148 | | /// expanding it we have one expanded UnwrappedLine. In general, macro |
149 | | /// expansions can span UnwrappedLines, and multiple macros can contribute |
150 | | /// tokens to the same line. We keep consuming expanded lines until: |
151 | | /// * all expansions that started have finished (we're not chopping any macros |
152 | | /// in half) |
153 | | /// * *and* we've reached the end of a *spelled* unwrapped line. |
154 | | /// |
155 | | /// A single UnwrappedLine represents this chunk of code. |
156 | | /// |
157 | | /// After this point, the state of the spelled/expanded stream is "in sync" |
158 | | /// (both at the start of an UnwrappedLine, with no macros open), so the |
159 | | /// Reconstructor can be thrown away and parsing can continue. |
160 | | /// |
161 | | /// Given a mapping from the macro name identifier token in the macro call |
162 | | /// to the tokens of the macro call, for example: |
163 | | /// CLASSA -> CLASSA({public: void x();}) |
164 | | /// |
165 | | /// When getting the formatted lines of the expansion via the \c addLine method |
166 | | /// (each '->' specifies a call to \c addLine ): |
167 | | /// -> class A { |
168 | | /// -> public: |
169 | | /// -> void x(); |
170 | | /// -> }; |
171 | | /// |
172 | | /// Creates the tree of unwrapped lines containing the macro call tokens so that |
173 | | /// the macro call tokens fit the semantic structure of the expanded formatted |
174 | | /// lines: |
175 | | /// -> CLASSA({ |
176 | | /// -> public: |
177 | | /// -> void x(); |
178 | | /// -> }) |
179 | | class MacroCallReconstructor { |
180 | | public: |
181 | | /// Create an Reconstructor whose resulting \p UnwrappedLine will start at |
182 | | /// \p Level, using the map from name identifier token to the corresponding |
183 | | /// tokens of the spelled macro call. |
184 | | MacroCallReconstructor( |
185 | | unsigned Level, |
186 | | const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>> |
187 | | &ActiveExpansions); |
188 | | |
189 | | /// For the given \p Line, match all occurences of tokens expanded from a |
190 | | /// macro to unwrapped lines in the spelled macro call so that the resulting |
191 | | /// tree of unwrapped lines best resembles the structure of unwrapped lines |
192 | | /// passed in via \c addLine. |
193 | | void addLine(const UnwrappedLine &Line); |
194 | | |
195 | | /// Check whether at the current state there is no open macro expansion |
196 | | /// that needs to be processed to finish an macro call. |
197 | | /// Only when \c finished() is true, \c takeResult() can be called to retrieve |
198 | | /// the resulting \c UnwrappedLine. |
199 | | /// If there are multiple subsequent macro calls within an unwrapped line in |
200 | | /// the spelled token stream, the calling code may also continue to call |
201 | | /// \c addLine() when \c finished() is true. |
202 | 979 | bool finished() const { return ActiveExpansions.empty(); } |
203 | | |
204 | | /// Retrieve the formatted \c UnwrappedLine containing the orginal |
205 | | /// macro calls, formatted according to the expanded token stream received |
206 | | /// via \c addLine(). |
207 | | /// Generally, this line tries to have the same structure as the expanded, |
208 | | /// formatted unwrapped lines handed in via \c addLine(), with the exception |
209 | | /// that for multiple top-level lines, each subsequent line will be the |
210 | | /// child of the last token in its predecessor. This representation is chosen |
211 | | /// because it is a precondition to the formatter that we get what looks like |
212 | | /// a single statement in a single \c UnwrappedLine (i.e. matching parens). |
213 | | /// |
214 | | /// If a token in a macro argument is a child of a token in the expansion, |
215 | | /// the parent will be the corresponding token in the macro call. |
216 | | /// For example: |
217 | | /// #define C(a, b) class C { a b |
218 | | /// C(int x;, int y;) |
219 | | /// would expand to |
220 | | /// class C { int x; int y; |
221 | | /// where in a formatted line "int x;" and "int y;" would both be new separate |
222 | | /// lines. |
223 | | /// |
224 | | /// In the result, "int x;" will be a child of the opening parenthesis in "C(" |
225 | | /// and "int y;" will be a child of the "," token: |
226 | | /// C ( |
227 | | /// \- int x; |
228 | | /// , |
229 | | /// \- int y; |
230 | | /// ) |
231 | | UnwrappedLine takeResult() &&; |
232 | | |
233 | | private: |
234 | | void add(FormatToken *Token, FormatToken *ExpandedParent, bool First); |
235 | | void prepareParent(FormatToken *ExpandedParent, bool First); |
236 | | FormatToken *getParentInResult(FormatToken *Parent); |
237 | | void reconstruct(FormatToken *Token); |
238 | | void startReconstruction(FormatToken *Token); |
239 | | bool reconstructActiveCallUntil(FormatToken *Token); |
240 | | void endReconstruction(FormatToken *Token); |
241 | | bool processNextReconstructed(); |
242 | | void finalize(); |
243 | | |
244 | | struct ReconstructedLine; |
245 | | |
246 | | void appendToken(FormatToken *Token, ReconstructedLine *L = nullptr); |
247 | | UnwrappedLine createUnwrappedLine(const ReconstructedLine &Line, int Level); |
248 | | void debug(const ReconstructedLine &Line, int Level); |
249 | | ReconstructedLine &parentLine(); |
250 | | ReconstructedLine *currentLine(); |
251 | | void debugParentMap() const; |
252 | | |
253 | | #ifndef NDEBUG |
254 | | enum ReconstructorState { |
255 | | Start, // No macro expansion was found in the input yet. |
256 | | InProgress, // During a macro reconstruction. |
257 | | Finalized, // Past macro reconstruction, the result is finalized. |
258 | | }; |
259 | | ReconstructorState State = Start; |
260 | | #endif |
261 | | |
262 | | // Node in which we build up the resulting unwrapped line; this type is |
263 | | // analogous to UnwrappedLineNode. |
264 | | struct LineNode { |
265 | 377 | LineNode() = default; |
266 | 3.57k | LineNode(FormatToken *Tok) : Tok(Tok) {} |
267 | | FormatToken *Tok = nullptr; |
268 | | llvm::SmallVector<std::unique_ptr<ReconstructedLine>> Children; |
269 | | }; |
270 | | |
271 | | // Line in which we build up the resulting unwrapped line. |
272 | | // FIXME: Investigate changing UnwrappedLine to a pointer type and using it |
273 | | // instead of rolling our own type. |
274 | | struct ReconstructedLine { |
275 | | llvm::SmallVector<std::unique_ptr<LineNode>> Tokens; |
276 | | }; |
277 | | |
278 | | // The line in which we collect the resulting reconstructed output. |
279 | | // To reduce special cases in the algorithm, the first level of the line |
280 | | // contains a single null token that has the reconstructed incoming |
281 | | // lines as children. |
282 | | // In the end, we stich the lines together so that each subsequent line |
283 | | // is a child of the last token of the previous line. This is necessary |
284 | | // in order to format the overall expression as a single logical line - |
285 | | // if we created separate lines, we'd format them with their own top-level |
286 | | // indent depending on the semantic structure, which is not desired. |
287 | | ReconstructedLine Result; |
288 | | |
289 | | // Stack of currently "open" lines, where each line's predecessor's last |
290 | | // token is the parent token for that line. |
291 | | llvm::SmallVector<ReconstructedLine *> ActiveReconstructedLines; |
292 | | |
293 | | // Maps from the expanded token to the token that takes its place in the |
294 | | // reconstructed token stream in terms of parent-child relationships. |
295 | | // Note that it might take multiple steps to arrive at the correct |
296 | | // parent in the output. |
297 | | // Given: #define C(a, b) []() { a; b; } |
298 | | // And a call: C(f(), g()) |
299 | | // The structure in the incoming formatted unwrapped line will be: |
300 | | // []() { |
301 | | // |- f(); |
302 | | // \- g(); |
303 | | // } |
304 | | // with f and g being children of the opening brace. |
305 | | // In the reconstructed call: |
306 | | // C(f(), g()) |
307 | | // \- f() |
308 | | // \- g() |
309 | | // We want f to be a child of the opening parenthesis and g to be a child |
310 | | // of the comma token in the macro call. |
311 | | // Thus, we map |
312 | | // { -> ( |
313 | | // and add |
314 | | // ( -> , |
315 | | // once we're past the comma in the reconstruction. |
316 | | llvm::DenseMap<FormatToken *, FormatToken *> |
317 | | SpelledParentToReconstructedParent; |
318 | | |
319 | | // Keeps track of a single expansion while we're reconstructing tokens it |
320 | | // generated. |
321 | | struct Expansion { |
322 | | // The identifier token of the macro call. |
323 | | FormatToken *ID; |
324 | | // Our current position in the reconstruction. |
325 | | std::list<UnwrappedLineNode>::iterator SpelledI; |
326 | | // The end of the reconstructed token sequence. |
327 | | std::list<UnwrappedLineNode>::iterator SpelledE; |
328 | | }; |
329 | | |
330 | | // Stack of macro calls for which we're in the middle of an expansion. |
331 | | llvm::SmallVector<Expansion> ActiveExpansions; |
332 | | |
333 | | struct MacroCallState { |
334 | | MacroCallState(ReconstructedLine *Line, FormatToken *ParentLastToken, |
335 | | FormatToken *MacroCallLParen); |
336 | | |
337 | | ReconstructedLine *Line; |
338 | | |
339 | | // The last token in the parent line or expansion, or nullptr if the macro |
340 | | // expansion is on a top-level line. |
341 | | // |
342 | | // For example, in the macro call: |
343 | | // auto f = []() { ID(1); }; |
344 | | // The MacroCallState for ID will have '{' as ParentLastToken. |
345 | | // |
346 | | // In the macro call: |
347 | | // ID(ID(void f())); |
348 | | // The MacroCallState of the outer ID will have nullptr as ParentLastToken, |
349 | | // while the MacroCallState for the inner ID will have the '(' of the outer |
350 | | // ID as ParentLastToken. |
351 | | // |
352 | | // In the macro call: |
353 | | // ID2(a, ID(b)); |
354 | | // The MacroCallState of ID will have ',' as ParentLastToken. |
355 | | FormatToken *ParentLastToken; |
356 | | |
357 | | // The l_paren of this MacroCallState's macro call. |
358 | | FormatToken *MacroCallLParen; |
359 | | }; |
360 | | |
361 | | // Keeps track of the lines into which the opening brace/parenthesis & |
362 | | // argument separating commas for each level in the macro call go in order to |
363 | | // put the corresponding closing brace/parenthesis into the same line in the |
364 | | // output and keep track of which parents in the expanded token stream map to |
365 | | // which tokens in the reconstructed stream. |
366 | | // When an opening brace/parenthesis has children, we want the structure of |
367 | | // the output line to be: |
368 | | // |- MACRO |
369 | | // |- ( |
370 | | // | \- <argument> |
371 | | // |- , |
372 | | // | \- <argument> |
373 | | // \- ) |
374 | | llvm::SmallVector<MacroCallState> MacroCallStructure; |
375 | | |
376 | | // Level the generated UnwrappedLine will be at. |
377 | | const unsigned Level; |
378 | | |
379 | | // Maps from identifier of the macro call to an unwrapped line containing |
380 | | // all tokens of the macro call. |
381 | | const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>> |
382 | | &IdToReconstructed; |
383 | | }; |
384 | | |
385 | | } // namespace format |
386 | | } // namespace clang |
387 | | |
388 | | #endif |