Coverage Report

Created: 2022-07-16 07:03

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Format/Macros.h
Line
Count
Source
1
//===--- Macros.h - Format C++ code -----------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
///
9
/// \file
10
/// This file contains the main building blocks of macro support in
11
/// clang-format.
12
///
13
/// In order to not violate the requirement that clang-format can format files
14
/// in isolation, clang-format's macro support uses expansions users provide
15
/// as part of clang-format's style configuration.
16
///
17
/// Macro definitions are of the form "MACRO(p1, p2)=p1 + p2", but only support
18
/// one level of expansion (\see MacroExpander for a full description of what
19
/// is supported).
20
///
21
/// As part of parsing, clang-format uses the MacroExpander to expand the
22
/// spelled token streams into expanded token streams when it encounters a
23
/// macro call. The UnwrappedLineParser continues to parse UnwrappedLines
24
/// from the expanded token stream.
25
/// After the expanded unwrapped lines are parsed, the MacroCallReconstructor
26
/// matches the spelled token stream into unwrapped lines that best resemble the
27
/// structure of the expanded unwrapped lines. These reconstructed unwrapped
28
/// lines are aliasing the tokens in the expanded token stream, so that token
29
/// annotations will be reused when formatting the spelled macro calls.
30
///
31
/// When formatting, clang-format annotates and formats the expanded unwrapped
32
/// lines first, determining the token types. Next, it formats the spelled
33
/// unwrapped lines, keeping the token types fixed, while allowing other
34
/// formatting decisions to change.
35
///
36
//===----------------------------------------------------------------------===//
37
38
#ifndef CLANG_LIB_FORMAT_MACROS_H
39
#define CLANG_LIB_FORMAT_MACROS_H
40
41
#include <list>
42
#include <map>
43
#include <string>
44
#include <vector>
45
46
#include "FormatToken.h"
47
#include "llvm/ADT/ArrayRef.h"
48
#include "llvm/ADT/DenseMap.h"
49
#include "llvm/ADT/SmallVector.h"
50
#include "llvm/ADT/StringRef.h"
51
52
namespace clang {
53
namespace format {
54
55
struct UnwrappedLine;
56
struct UnwrappedLineNode;
57
58
/// Takes a set of macro definitions as strings and allows expanding calls to
59
/// those macros.
60
///
61
/// For example:
62
/// Definition: A(x, y)=x + y
63
/// Call      : A(int a = 1, 2)
64
/// Expansion : int a = 1 + 2
65
///
66
/// Expansion does not check arity of the definition.
67
/// If fewer arguments than expected are provided, the remaining parameters
68
/// are considered empty:
69
/// Call     : A(a)
70
/// Expansion: a +
71
/// If more arguments than expected are provided, they will be discarded.
72
///
73
/// The expander does not support:
74
/// - recursive expansion
75
/// - stringification
76
/// - concatenation
77
/// - variadic macros
78
///
79
/// Furthermore, only a single expansion of each macro argument is supported,
80
/// so that we cannot get conflicting formatting decisions from different
81
/// expansions.
82
/// Definition: A(x)=x+x
83
/// Call      : A(id)
84
/// Expansion : id+x
85
///
86
class MacroExpander {
87
public:
88
  using ArgsList = llvm::ArrayRef<llvm::SmallVector<FormatToken *, 8>>;
89
90
  /// Construct a macro expander from a set of macro definitions.
91
  /// Macro definitions must be encoded as UTF-8.
92
  ///
93
  /// Each entry in \p Macros must conform to the following simple
94
  /// macro-definition language:
95
  /// <definition> ::= <id> <expansion> | <id> "(" <params> ")" <expansion>
96
  /// <params>     ::= <id-list> | ""
97
  /// <id-list>    ::= <id> | <id> "," <params>
98
  /// <expansion>  ::= "=" <tail> | <eof>
99
  /// <tail>       ::= <tok> <tail> | <eof>
100
  ///
101
  /// Macros that cannot be parsed will be silently discarded.
102
  ///
103
  MacroExpander(const std::vector<std::string> &Macros,
104
                clang::SourceManager &SourceMgr, const FormatStyle &Style,
105
                llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
106
                IdentifierTable &IdentTable);
107
  ~MacroExpander();
108
109
  /// Returns whether a macro \p Name is defined.
110
  bool defined(llvm::StringRef Name) const;
111
112
  /// Returns whether the macro has no arguments and should not consume
113
  /// subsequent parentheses.
114
  bool objectLike(llvm::StringRef Name) const;
115
116
  /// Returns the expanded stream of format tokens for \p ID, where
117
  /// each element in \p Args is a positional argument to the macro call.
118
  llvm::SmallVector<FormatToken *, 8> expand(FormatToken *ID,
119
                                             ArgsList Args) const;
120
121
private:
122
  struct Definition;
123
  class DefinitionParser;
124
125
  void parseDefinition(const std::string &Macro);
126
127
  clang::SourceManager &SourceMgr;
128
  const FormatStyle &Style;
129
  llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator;
130
  IdentifierTable &IdentTable;
131
  SmallVector<std::unique_ptr<llvm::MemoryBuffer>> Buffers;
132
  llvm::StringMap<Definition> Definitions;
133
};
134
135
/// Converts a sequence of UnwrappedLines containing expanded macros into a
136
/// single UnwrappedLine containing the macro calls.  This UnwrappedLine may be
137
/// broken into child lines, in a way that best conveys the structure of the
138
/// expanded code.
139
///
140
/// In the simplest case, a spelled UnwrappedLine contains one macro, and after
141
/// expanding it we have one expanded UnwrappedLine.  In general, macro
142
/// expansions can span UnwrappedLines, and multiple macros can contribute
143
/// tokens to the same line.  We keep consuming expanded lines until:
144
/// *   all expansions that started have finished (we're not chopping any macros
145
///     in half)
146
/// *   *and* we've reached the end of a *spelled* unwrapped line.
147
///
148
/// A single UnwrappedLine represents this chunk of code.
149
///
150
/// After this point, the state of the spelled/expanded stream is "in sync"
151
/// (both at the start of an UnwrappedLine, with no macros open), so the
152
/// Unexpander can be thrown away and parsing can continue.
153
///
154
/// Given a mapping from the macro name identifier token in the macro call
155
/// to the tokens of the macro call, for example:
156
/// CLASSA -> CLASSA({public: void x();})
157
///
158
/// When getting the formatted lines of the expansion via the \c addLine method
159
/// (each '->' specifies a call to \c addLine ):
160
/// -> class A {
161
/// -> public:
162
/// ->   void x();
163
/// -> };
164
///
165
/// Creates the tree of unwrapped lines containing the macro call tokens so that
166
/// the macro call tokens fit the semantic structure of the expanded formatted
167
/// lines:
168
/// -> CLASSA({
169
/// -> public:
170
/// ->   void x();
171
/// -> })
172
class MacroCallReconstructor {
173
public:
174
  /// Create an Reconstructor whose resulting \p UnwrappedLine will start at
175
  /// \p Level, using the map from name identifier token to the corresponding
176
  /// tokens of the spelled macro call.
177
  MacroCallReconstructor(
178
      unsigned Level,
179
      const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
180
          &ActiveExpansions);
181
182
  /// For the given \p Line, match all occurences of tokens expanded from a
183
  /// macro to unwrapped lines in the spelled macro call so that the resulting
184
  /// tree of unwrapped lines best resembles the structure of unwrapped lines
185
  /// passed in via \c addLine.
186
  void addLine(const UnwrappedLine &Line);
187
188
  /// Check whether at the current state there is no open macro expansion
189
  /// that needs to be processed to finish an macro call.
190
  /// Only when \c finished() is true, \c takeResult() can be called to retrieve
191
  /// the resulting \c UnwrappedLine.
192
  /// If there are multiple subsequent macro calls within an unwrapped line in
193
  /// the spelled token stream, the calling code may also continue to call
194
  /// \c addLine() when \c finished() is true.
195
41
  bool finished() const { return ActiveExpansions.empty(); }
196
197
  /// Retrieve the formatted \c UnwrappedLine containing the orginal
198
  /// macro calls, formatted according to the expanded token stream received
199
  /// via \c addLine().
200
  /// Generally, this line tries to have the same structure as the expanded,
201
  /// formatted unwrapped lines handed in via \c addLine(), with the exception
202
  /// that for multiple top-level lines, each subsequent line will be the
203
  /// child of the last token in its predecessor. This representation is chosen
204
  /// because it is a precondition to the formatter that we get what looks like
205
  /// a single statement in a single \c UnwrappedLine (i.e. matching parens).
206
  ///
207
  /// If a token in a macro argument is a child of a token in the expansion,
208
  /// the parent will be the corresponding token in the macro call.
209
  /// For example:
210
  ///   #define C(a, b) class C { a b
211
  ///   C(int x;, int y;)
212
  /// would expand to
213
  ///   class C { int x; int y;
214
  /// where in a formatted line "int x;" and "int y;" would both be new separate
215
  /// lines.
216
  ///
217
  /// In the result, "int x;" will be a child of the opening parenthesis in "C("
218
  /// and "int y;" will be a child of the "," token:
219
  ///   C (
220
  ///     \- int x;
221
  ///     ,
222
  ///     \- int y;
223
  ///     )
224
  UnwrappedLine takeResult() &&;
225
226
private:
227
  void add(FormatToken *Token, FormatToken *ExpandedParent, bool First);
228
  void prepareParent(FormatToken *ExpandedParent, bool First);
229
  FormatToken *getParentInResult(FormatToken *Parent);
230
  void reconstruct(FormatToken *Token);
231
  void startReconstruction(FormatToken *Token);
232
  bool reconstructActiveCallUntil(FormatToken *Token);
233
  void endReconstruction(FormatToken *Token);
234
  bool processNextReconstructed();
235
  void finalize();
236
237
  struct ReconstructedLine;
238
239
  void appendToken(FormatToken *Token, ReconstructedLine *L = nullptr);
240
  UnwrappedLine createUnwrappedLine(const ReconstructedLine &Line, int Level);
241
  void debug(const ReconstructedLine &Line, int Level);
242
  ReconstructedLine &parentLine();
243
  ReconstructedLine *currentLine();
244
  void debugParentMap() const;
245
246
#ifndef NDEBUG
247
  enum ReconstructorState {
248
    Start,      // No macro expansion was found in the input yet.
249
    InProgress, // During a macro reconstruction.
250
    Finalized,  // Past macro reconstruction, the result is finalized.
251
  };
252
  ReconstructorState State = Start;
253
#endif
254
255
  // Node in which we build up the resulting unwrapped line; this type is
256
  // analogous to UnwrappedLineNode.
257
  struct LineNode {
258
16
    LineNode() = default;
259
150
    LineNode(FormatToken *Tok) : Tok(Tok) {}
260
    FormatToken *Tok = nullptr;
261
    llvm::SmallVector<std::unique_ptr<ReconstructedLine>> Children;
262
  };
263
264
  // Line in which we build up the resulting unwrapped line.
265
  // FIXME: Investigate changing UnwrappedLine to a pointer type and using it
266
  // instead of rolling our own type.
267
  struct ReconstructedLine {
268
    llvm::SmallVector<std::unique_ptr<LineNode>> Tokens;
269
  };
270
271
  // The line in which we collect the resulting reconstructed output.
272
  // To reduce special cases in the algorithm, the first level of the line
273
  // contains a single null token that has the reconstructed incoming
274
  // lines as children.
275
  // In the end, we stich the lines together so that each subsequent line
276
  // is a child of the last token of the previous line. This is necessary
277
  // in order to format the overall expression as a single logical line -
278
  // if we created separate lines, we'd format them with their own top-level
279
  // indent depending on the semantic structure, which is not desired.
280
  ReconstructedLine Result;
281
282
  // Stack of currently "open" lines, where each line's predecessor's last
283
  // token is the parent token for that line.
284
  llvm::SmallVector<ReconstructedLine *> ActiveReconstructedLines;
285
286
  // Maps from the expanded token to the token that takes its place in the
287
  // reconstructed token stream in terms of parent-child relationships.
288
  // Note that it might take multiple steps to arrive at the correct
289
  // parent in the output.
290
  // Given: #define C(a, b) []() { a; b; }
291
  // And a call: C(f(), g())
292
  // The structure in the incoming formatted unwrapped line will be:
293
  // []() {
294
  //      |- f();
295
  //      \- g();
296
  // }
297
  // with f and g being children of the opening brace.
298
  // In the reconstructed call:
299
  // C(f(), g())
300
  //  \- f()
301
  //      \- g()
302
  // We want f to be a child of the opening parenthesis and g to be a child
303
  // of the comma token in the macro call.
304
  // Thus, we map
305
  // { -> (
306
  // and add
307
  // ( -> ,
308
  // once we're past the comma in the reconstruction.
309
  llvm::DenseMap<FormatToken *, FormatToken *>
310
      SpelledParentToReconstructedParent;
311
312
  // Keeps track of a single expansion while we're reconstructing tokens it
313
  // generated.
314
  struct Expansion {
315
    // The identifier token of the macro call.
316
    FormatToken *ID;
317
    // Our current position in the reconstruction.
318
    std::list<UnwrappedLineNode>::iterator SpelledI;
319
    // The end of the reconstructed token sequence.
320
    std::list<UnwrappedLineNode>::iterator SpelledE;
321
  };
322
323
  // Stack of macro calls for which we're in the middle of an expansion.
324
  llvm::SmallVector<Expansion> ActiveExpansions;
325
326
  struct MacroCallState {
327
    MacroCallState(ReconstructedLine *Line, FormatToken *ParentLastToken,
328
                   FormatToken *MacroCallLParen);
329
330
    ReconstructedLine *Line;
331
332
    // The last token in the parent line or expansion, or nullptr if the macro
333
    // expansion is on a top-level line.
334
    //
335
    // For example, in the macro call:
336
    //   auto f = []() { ID(1); };
337
    // The MacroCallState for ID will have '{' as ParentLastToken.
338
    //
339
    // In the macro call:
340
    //   ID(ID(void f()));
341
    // The MacroCallState of the outer ID will have nullptr as ParentLastToken,
342
    // while the MacroCallState for the inner ID will have the '(' of the outer
343
    // ID as ParentLastToken.
344
    //
345
    // In the macro call:
346
    //   ID2(a, ID(b));
347
    // The MacroCallState of ID will have ',' as ParentLastToken.
348
    FormatToken *ParentLastToken;
349
350
    // The l_paren of this MacroCallState's macro call.
351
    FormatToken *MacroCallLParen;
352
  };
353
354
  // Keeps track of the lines into which the opening brace/parenthesis &
355
  // argument separating commas for each level in the macro call go in order to
356
  // put the corresponding closing brace/parenthesis into the same line in the
357
  // output and keep track of which parents in the expanded token stream map to
358
  // which tokens in the reconstructed stream.
359
  // When an opening brace/parenthesis has children, we want the structure of
360
  // the output line to be:
361
  // |- MACRO
362
  // |- (
363
  // |  \- <argument>
364
  // |- ,
365
  // |  \- <argument>
366
  // \- )
367
  llvm::SmallVector<MacroCallState> MacroCallStructure;
368
369
  // Level the generated UnwrappedLine will be at.
370
  const unsigned Level;
371
372
  // Maps from identifier of the macro call to an unwrapped line containing
373
  // all tokens of the macro call.
374
  const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>>
375
      &IdToReconstructed;
376
};
377
378
} // namespace format
379
} // namespace clang
380
381
#endif