/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Format/UnwrappedLineParser.h
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- UnwrappedLineParser.h - Format C++ code ----------------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | /// |
9 | | /// \file |
10 | | /// This file contains the declaration of the UnwrappedLineParser, |
11 | | /// which turns a stream of tokens into UnwrappedLines. |
12 | | /// |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H |
16 | | #define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H |
17 | | |
18 | | #include "FormatToken.h" |
19 | | #include "clang/Basic/IdentifierTable.h" |
20 | | #include "clang/Format/Format.h" |
21 | | #include "llvm/Support/Regex.h" |
22 | | #include <list> |
23 | | #include <stack> |
24 | | |
25 | | namespace clang { |
26 | | namespace format { |
27 | | |
28 | | struct UnwrappedLineNode; |
29 | | |
30 | | /// An unwrapped line is a sequence of \c Token, that we would like to |
31 | | /// put on a single line if there was no column limit. |
32 | | /// |
33 | | /// This is used as a main interface between the \c UnwrappedLineParser and the |
34 | | /// \c UnwrappedLineFormatter. The key property is that changing the formatting |
35 | | /// within an unwrapped line does not affect any other unwrapped lines. |
36 | | struct UnwrappedLine { |
37 | | UnwrappedLine(); |
38 | | |
39 | | // FIXME: Don't use std::list here. |
40 | | /// The \c Tokens comprising this \c UnwrappedLine. |
41 | | std::list<UnwrappedLineNode> Tokens; |
42 | | |
43 | | /// The indent level of the \c UnwrappedLine. |
44 | | unsigned Level; |
45 | | |
46 | | /// Whether this \c UnwrappedLine is part of a preprocessor directive. |
47 | | bool InPPDirective; |
48 | | |
49 | | bool MustBeDeclaration; |
50 | | |
51 | | /// If this \c UnwrappedLine closes a block in a sequence of lines, |
52 | | /// \c MatchingOpeningBlockLineIndex stores the index of the corresponding |
53 | | /// opening line. Otherwise, \c MatchingOpeningBlockLineIndex must be |
54 | | /// \c kInvalidIndex. |
55 | | size_t MatchingOpeningBlockLineIndex = kInvalidIndex; |
56 | | |
57 | | /// If this \c UnwrappedLine opens a block, stores the index of the |
58 | | /// line with the corresponding closing brace. |
59 | | size_t MatchingClosingBlockLineIndex = kInvalidIndex; |
60 | | |
61 | | static const size_t kInvalidIndex = -1; |
62 | | |
63 | | unsigned FirstStartColumn = 0; |
64 | | }; |
65 | | |
66 | | class UnwrappedLineConsumer { |
67 | | public: |
68 | 37.5k | virtual ~UnwrappedLineConsumer() {} |
69 | | virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0; |
70 | | virtual void finishRun() = 0; |
71 | | }; |
72 | | |
73 | | class FormatTokenSource; |
74 | | |
75 | | class UnwrappedLineParser { |
76 | | public: |
77 | | UnwrappedLineParser(const FormatStyle &Style, |
78 | | const AdditionalKeywords &Keywords, |
79 | | unsigned FirstStartColumn, ArrayRef<FormatToken *> Tokens, |
80 | | UnwrappedLineConsumer &Callback); |
81 | | |
82 | | void parse(); |
83 | | |
84 | | private: |
85 | | void reset(); |
86 | | void parseFile(); |
87 | | void parseLevel(bool HasOpeningBrace); |
88 | | void parseBlock(bool MustBeDeclaration, bool AddLevel = true, |
89 | | bool MunchSemi = true); |
90 | | void parseChildBlock(); |
91 | | void parsePPDirective(); |
92 | | void parsePPDefine(); |
93 | | void parsePPIf(bool IfDef); |
94 | | void parsePPElIf(); |
95 | | void parsePPElse(); |
96 | | void parsePPEndIf(); |
97 | | void parsePPUnknown(); |
98 | | void readTokenWithJavaScriptASI(); |
99 | | void parseStructuralElement(); |
100 | | bool tryToParseBracedList(); |
101 | | bool parseBracedList(bool ContinueOnSemicolons = false, bool IsEnum = false, |
102 | | tok::TokenKind ClosingBraceKind = tok::r_brace); |
103 | | void parseParens(); |
104 | | void parseSquare(bool LambdaIntroducer = false); |
105 | | void parseIfThenElse(); |
106 | | void parseTryCatch(); |
107 | | void parseForOrWhileLoop(); |
108 | | void parseDoWhile(); |
109 | | void parseLabel(bool LeftAlignLabel = false); |
110 | | void parseCaseLabel(); |
111 | | void parseSwitch(); |
112 | | void parseNamespace(); |
113 | | void parseNew(); |
114 | | void parseAccessSpecifier(); |
115 | | bool parseEnum(); |
116 | | void parseConcept(); |
117 | | void parseRequires(); |
118 | | void parseRequiresExpression(unsigned int OriginalLevel); |
119 | | void parseConstraintExpression(unsigned int OriginalLevel); |
120 | | void parseJavaEnumBody(); |
121 | | // Parses a record (aka class) as a top level element. If ParseAsExpr is true, |
122 | | // parses the record as a child block, i.e. if the class declaration is an |
123 | | // expression. |
124 | | void parseRecord(bool ParseAsExpr = false); |
125 | | void parseObjCLightweightGenerics(); |
126 | | void parseObjCMethod(); |
127 | | void parseObjCProtocolList(); |
128 | | void parseObjCUntilAtEnd(); |
129 | | void parseObjCInterfaceOrImplementation(); |
130 | | bool parseObjCProtocol(); |
131 | | void parseJavaScriptEs6ImportExport(); |
132 | | void parseStatementMacro(); |
133 | | void parseCSharpAttribute(); |
134 | | // Parse a C# generic type constraint: `where T : IComparable<T>`. |
135 | | // See: |
136 | | // https://docs.microsoft.com/en-us/dotnet/csharp/language-reference/keywords/where-generic-type-constraint |
137 | | void parseCSharpGenericTypeConstraint(); |
138 | | bool tryToParseLambda(); |
139 | | bool tryToParseLambdaIntroducer(); |
140 | | bool tryToParsePropertyAccessor(); |
141 | | void tryToParseJSFunction(); |
142 | | bool tryToParseSimpleAttribute(); |
143 | | void addUnwrappedLine(); |
144 | | bool eof() const; |
145 | | // LevelDifference is the difference of levels after and before the current |
146 | | // token. For example: |
147 | | // - if the token is '{' and opens a block, LevelDifference is 1. |
148 | | // - if the token is '}' and closes a block, LevelDifference is -1. |
149 | | void nextToken(int LevelDifference = 0); |
150 | | void readToken(int LevelDifference = 0); |
151 | | |
152 | | // Decides which comment tokens should be added to the current line and which |
153 | | // should be added as comments before the next token. |
154 | | // |
155 | | // Comments specifies the sequence of comment tokens to analyze. They get |
156 | | // either pushed to the current line or added to the comments before the next |
157 | | // token. |
158 | | // |
159 | | // NextTok specifies the next token. A null pointer NextTok is supported, and |
160 | | // signifies either the absence of a next token, or that the next token |
161 | | // shouldn't be taken into accunt for the analysis. |
162 | | void distributeComments(const SmallVectorImpl<FormatToken *> &Comments, |
163 | | const FormatToken *NextTok); |
164 | | |
165 | | // Adds the comment preceding the next token to unwrapped lines. |
166 | | void flushComments(bool NewlineBeforeNext); |
167 | | void pushToken(FormatToken *Tok); |
168 | | void calculateBraceTypes(bool ExpectClassBody = false); |
169 | | |
170 | | // Marks a conditional compilation edge (for example, an '#if', '#ifdef', |
171 | | // '#else' or merge conflict marker). If 'Unreachable' is true, assumes |
172 | | // this branch either cannot be taken (for example '#if false'), or should |
173 | | // not be taken in this round. |
174 | | void conditionalCompilationCondition(bool Unreachable); |
175 | | void conditionalCompilationStart(bool Unreachable); |
176 | | void conditionalCompilationAlternative(); |
177 | | void conditionalCompilationEnd(); |
178 | | |
179 | | bool isOnNewLine(const FormatToken &FormatTok); |
180 | | |
181 | | // Compute hash of the current preprocessor branch. |
182 | | // This is used to identify the different branches, and thus track if block |
183 | | // open and close in the same branch. |
184 | | size_t computePPHash() const; |
185 | | |
186 | | // FIXME: We are constantly running into bugs where Line.Level is incorrectly |
187 | | // subtracted from beyond 0. Introduce a method to subtract from Line.Level |
188 | | // and use that everywhere in the Parser. |
189 | | std::unique_ptr<UnwrappedLine> Line; |
190 | | |
191 | | // Comments are sorted into unwrapped lines by whether they are in the same |
192 | | // line as the previous token, or not. If not, they belong to the next token. |
193 | | // Since the next token might already be in a new unwrapped line, we need to |
194 | | // store the comments belonging to that token. |
195 | | SmallVector<FormatToken *, 1> CommentsBeforeNextToken; |
196 | | FormatToken *FormatTok; |
197 | | bool MustBreakBeforeNextToken; |
198 | | |
199 | | // The parsed lines. Only added to through \c CurrentLines. |
200 | | SmallVector<UnwrappedLine, 8> Lines; |
201 | | |
202 | | // Preprocessor directives are parsed out-of-order from other unwrapped lines. |
203 | | // Thus, we need to keep a list of preprocessor directives to be reported |
204 | | // after an unwrapped line that has been started was finished. |
205 | | SmallVector<UnwrappedLine, 4> PreprocessorDirectives; |
206 | | |
207 | | // New unwrapped lines are added via CurrentLines. |
208 | | // Usually points to \c &Lines. While parsing a preprocessor directive when |
209 | | // there is an unfinished previous unwrapped line, will point to |
210 | | // \c &PreprocessorDirectives. |
211 | | SmallVectorImpl<UnwrappedLine> *CurrentLines; |
212 | | |
213 | | // We store for each line whether it must be a declaration depending on |
214 | | // whether we are in a compound statement or not. |
215 | | std::vector<bool> DeclarationScopeStack; |
216 | | |
217 | | const FormatStyle &Style; |
218 | | const AdditionalKeywords &Keywords; |
219 | | |
220 | | llvm::Regex CommentPragmasRegex; |
221 | | |
222 | | FormatTokenSource *Tokens; |
223 | | UnwrappedLineConsumer &Callback; |
224 | | |
225 | | // FIXME: This is a temporary measure until we have reworked the ownership |
226 | | // of the format tokens. The goal is to have the actual tokens created and |
227 | | // owned outside of and handed into the UnwrappedLineParser. |
228 | | ArrayRef<FormatToken *> AllTokens; |
229 | | |
230 | | // Represents preprocessor branch type, so we can find matching |
231 | | // #if/#else/#endif directives. |
232 | | enum PPBranchKind { |
233 | | PP_Conditional, // Any #if, #ifdef, #ifndef, #elif, block outside #if 0 |
234 | | PP_Unreachable // #if 0 or a conditional preprocessor block inside #if 0 |
235 | | }; |
236 | | |
237 | | struct PPBranch { |
238 | 1.67k | PPBranch(PPBranchKind Kind, size_t Line) : Kind(Kind), Line(Line) {} |
239 | | PPBranchKind Kind; |
240 | | size_t Line; |
241 | | }; |
242 | | |
243 | | // Keeps a stack of currently active preprocessor branching directives. |
244 | | SmallVector<PPBranch, 16> PPStack; |
245 | | |
246 | | // The \c UnwrappedLineParser re-parses the code for each combination |
247 | | // of preprocessor branches that can be taken. |
248 | | // To that end, we take the same branch (#if, #else, or one of the #elif |
249 | | // branches) for each nesting level of preprocessor branches. |
250 | | // \c PPBranchLevel stores the current nesting level of preprocessor |
251 | | // branches during one pass over the code. |
252 | | int PPBranchLevel; |
253 | | |
254 | | // Contains the current branch (#if, #else or one of the #elif branches) |
255 | | // for each nesting level. |
256 | | SmallVector<int, 8> PPLevelBranchIndex; |
257 | | |
258 | | // Contains the maximum number of branches at each nesting level. |
259 | | SmallVector<int, 8> PPLevelBranchCount; |
260 | | |
261 | | // Contains the number of branches per nesting level we are currently |
262 | | // in while parsing a preprocessor branch sequence. |
263 | | // This is used to update PPLevelBranchCount at the end of a branch |
264 | | // sequence. |
265 | | std::stack<int> PPChainBranchIndex; |
266 | | |
267 | | // Include guard search state. Used to fixup preprocessor indent levels |
268 | | // so that include guards do not participate in indentation. |
269 | | enum IncludeGuardState { |
270 | | IG_Inited, // Search started, looking for #ifndef. |
271 | | IG_IfNdefed, // #ifndef found, IncludeGuardToken points to condition. |
272 | | IG_Defined, // Matching #define found, checking other requirements. |
273 | | IG_Found, // All requirements met, need to fix indents. |
274 | | IG_Rejected, // Search failed or never started. |
275 | | }; |
276 | | |
277 | | // Current state of include guard search. |
278 | | IncludeGuardState IncludeGuard; |
279 | | |
280 | | // Points to the #ifndef condition for a potential include guard. Null unless |
281 | | // IncludeGuardState == IG_IfNdefed. |
282 | | FormatToken *IncludeGuardToken; |
283 | | |
284 | | // Contains the first start column where the source begins. This is zero for |
285 | | // normal source code and may be nonzero when formatting a code fragment that |
286 | | // does not start at the beginning of the file. |
287 | | unsigned FirstStartColumn; |
288 | | |
289 | | friend class ScopedLineState; |
290 | | friend class CompoundStatementIndenter; |
291 | | }; |
292 | | |
293 | | struct UnwrappedLineNode { |
294 | 0 | UnwrappedLineNode() : Tok(nullptr) {} |
295 | 642k | UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {} |
296 | | |
297 | | FormatToken *Tok; |
298 | | SmallVector<UnwrappedLine, 0> Children; |
299 | | }; |
300 | | |
301 | | inline UnwrappedLine::UnwrappedLine() |
302 | | : Level(0), InPPDirective(false), MustBeDeclaration(false), |
303 | 86.6k | MatchingOpeningBlockLineIndex(kInvalidIndex) {} |
304 | | |
305 | | } // end namespace format |
306 | | } // end namespace clang |
307 | | |
308 | | #endif |