/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Format/FormatTokenLexer.h
Line | Count | Source |
1 | | //===--- FormatTokenLexer.h - Format C++ code ----------------*- C++ ----*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | /// |
9 | | /// \file |
10 | | /// This file contains FormatTokenLexer, which tokenizes a source file |
11 | | /// into a token stream suitable for ClangFormat. |
12 | | /// |
13 | | //===----------------------------------------------------------------------===// |
14 | | |
15 | | #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H |
16 | | #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H |
17 | | |
18 | | #include "Encoding.h" |
19 | | #include "FormatToken.h" |
20 | | #include "clang/Basic/LangOptions.h" |
21 | | #include "clang/Basic/SourceLocation.h" |
22 | | #include "clang/Basic/SourceManager.h" |
23 | | #include "clang/Format/Format.h" |
24 | | #include "llvm/ADT/MapVector.h" |
25 | | #include "llvm/ADT/StringSet.h" |
26 | | #include "llvm/Support/Regex.h" |
27 | | |
28 | | #include <stack> |
29 | | |
30 | | namespace clang { |
31 | | namespace format { |
32 | | |
33 | | enum LexerState { |
34 | | NORMAL, |
35 | | TEMPLATE_STRING, |
36 | | TOKEN_STASHED, |
37 | | }; |
38 | | |
39 | | class FormatTokenLexer { |
40 | | public: |
41 | | FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column, |
42 | | const FormatStyle &Style, encoding::Encoding Encoding, |
43 | | llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, |
44 | | IdentifierTable &IdentTable); |
45 | | |
46 | | ArrayRef<FormatToken *> lex(); |
47 | | |
48 | 179k | const AdditionalKeywords &getKeywords() { return Keywords; } |
49 | | |
50 | | private: |
51 | | void tryMergePreviousTokens(); |
52 | | |
53 | | bool tryMergeLessLess(); |
54 | | bool tryMergeNSStringLiteral(); |
55 | | bool tryMergeJSPrivateIdentifier(); |
56 | | bool tryMergeCSharpStringLiteral(); |
57 | | bool tryMergeCSharpKeywordVariables(); |
58 | | bool tryMergeNullishCoalescingEqual(); |
59 | | bool tryTransformCSharpForEach(); |
60 | | bool tryMergeForEach(); |
61 | | bool tryTransformTryUsageForC(); |
62 | | |
63 | | bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType); |
64 | | |
65 | | // Returns \c true if \p Tok can only be followed by an operand in JavaScript. |
66 | | bool precedesOperand(FormatToken *Tok); |
67 | | |
68 | | bool canPrecedeRegexLiteral(FormatToken *Prev); |
69 | | |
70 | | // Tries to parse a JavaScript Regex literal starting at the current token, |
71 | | // if that begins with a slash and is in a location where JavaScript allows |
72 | | // regex literals. Changes the current token to a regex literal and updates |
73 | | // its text if successful. |
74 | | void tryParseJSRegexLiteral(); |
75 | | |
76 | | // Handles JavaScript template strings. |
77 | | // |
78 | | // JavaScript template strings use backticks ('`') as delimiters, and allow |
79 | | // embedding expressions nested in ${expr-here}. Template strings can be |
80 | | // nested recursively, i.e. expressions can contain template strings in turn. |
81 | | // |
82 | | // The code below parses starting from a backtick, up to a closing backtick or |
83 | | // an opening ${. It also maintains a stack of lexing contexts to handle |
84 | | // nested template parts by balancing curly braces. |
85 | | void handleTemplateStrings(); |
86 | | |
87 | | void handleCSharpVerbatimAndInterpolatedStrings(); |
88 | | |
89 | | void tryParsePythonComment(); |
90 | | |
91 | | bool tryMerge_TMacro(); |
92 | | |
93 | | bool tryMergeConflictMarkers(); |
94 | | |
95 | | void truncateToken(size_t NewLen); |
96 | | |
97 | | FormatToken *getStashedToken(); |
98 | | |
99 | | FormatToken *getNextToken(); |
100 | | |
101 | | FormatToken *FormatTok; |
102 | | bool IsFirstToken; |
103 | | std::stack<LexerState> StateStack; |
104 | | unsigned Column; |
105 | | unsigned TrailingWhitespace; |
106 | | std::unique_ptr<Lexer> Lex; |
107 | | LangOptions LangOpts; |
108 | | const SourceManager &SourceMgr; |
109 | | FileID ID; |
110 | | const FormatStyle &Style; |
111 | | IdentifierTable &IdentTable; |
112 | | AdditionalKeywords Keywords; |
113 | | encoding::Encoding Encoding; |
114 | | llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator; |
115 | | // Index (in 'Tokens') of the last token that starts a new line. |
116 | | unsigned FirstInLineIndex; |
117 | | SmallVector<FormatToken *, 16> Tokens; |
118 | | |
119 | | llvm::SmallMapVector<IdentifierInfo *, TokenType, 8> Macros; |
120 | | |
121 | | bool FormattingDisabled; |
122 | | |
123 | | llvm::Regex MacroBlockBeginRegex; |
124 | | llvm::Regex MacroBlockEndRegex; |
125 | | |
126 | | // Targets that may appear inside a C# attribute. |
127 | | static const llvm::StringSet<> CSharpAttributeTargets; |
128 | | |
129 | | /// Handle Verilog-specific tokens. |
130 | | bool readRawTokenVerilogSpecific(Token &Tok); |
131 | | |
132 | | void readRawToken(FormatToken &Tok); |
133 | | |
134 | | void resetLexer(unsigned Offset); |
135 | | }; |
136 | | |
137 | | } // namespace format |
138 | | } // namespace clang |
139 | | |
140 | | #endif |