/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/Plugins/Language/ClangCommon/ClangHighlighter.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- ClangHighlighter.cpp ----------------------------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | #include "ClangHighlighter.h" |
10 | | |
11 | | #include "lldb/Host/FileSystem.h" |
12 | | #include "lldb/Target/Language.h" |
13 | | #include "lldb/Utility/AnsiTerminal.h" |
14 | | #include "lldb/Utility/StreamString.h" |
15 | | |
16 | | #include "clang/Basic/FileManager.h" |
17 | | #include "clang/Basic/SourceManager.h" |
18 | | #include "clang/Lex/Lexer.h" |
19 | | #include "llvm/ADT/StringSet.h" |
20 | | #include "llvm/Support/MemoryBuffer.h" |
21 | | #include <optional> |
22 | | |
23 | | using namespace lldb_private; |
24 | | |
25 | 56.5k | bool ClangHighlighter::isKeyword(llvm::StringRef token) const { |
26 | 56.5k | return keywords.contains(token); |
27 | 56.5k | } |
28 | | |
29 | 8.92k | ClangHighlighter::ClangHighlighter() { |
30 | 2.82M | #define KEYWORD(X, N) keywords.insert(#X); |
31 | 8.92k | #include "clang/Basic/TokenKinds.def" |
32 | 8.92k | } |
33 | | |
34 | | /// Determines which style should be applied to the given token. |
35 | | /// \param highlighter |
36 | | /// The current highlighter that should use the style. |
37 | | /// \param token |
38 | | /// The current token. |
39 | | /// \param tok_str |
40 | | /// The string in the source code the token represents. |
41 | | /// \param options |
42 | | /// The style we use for coloring the source code. |
43 | | /// \param in_pp_directive |
44 | | /// If we are currently in a preprocessor directive. NOTE: This is |
45 | | /// passed by reference and will be updated if the current token starts |
46 | | /// or ends a preprocessor directive. |
47 | | /// \return |
48 | | /// The ColorStyle that should be applied to the token. |
49 | | static HighlightStyle::ColorStyle |
50 | | determineClangStyle(const ClangHighlighter &highlighter, |
51 | | const clang::Token &token, llvm::StringRef tok_str, |
52 | 63.1k | const HighlightStyle &options, bool &in_pp_directive) { |
53 | 63.1k | using namespace clang; |
54 | | |
55 | 63.1k | if (token.is(tok::comment)) { |
56 | | // If we were in a preprocessor directive before, we now left it. |
57 | 1.78k | in_pp_directive = false; |
58 | 1.78k | return options.comment; |
59 | 61.4k | } else if (in_pp_directive || token.getKind() == tok::hash60.7k ) { |
60 | | // Let's assume that the rest of the line is a PP directive. |
61 | 861 | in_pp_directive = true; |
62 | | // Preprocessor directives are hard to match, so we have to hack this in. |
63 | 861 | return options.pp_directive; |
64 | 60.5k | } else if (tok::isStringLiteral(token.getKind())) |
65 | 1.46k | return options.string_literal; |
66 | 59.0k | else if (tok::isLiteral(token.getKind())) |
67 | 2.50k | return options.scalar_literal; |
68 | 56.5k | else if (highlighter.isKeyword(tok_str)) |
69 | 3.47k | return options.keyword; |
70 | 53.0k | else |
71 | 53.0k | switch (token.getKind()) { |
72 | 10.7k | case tok::raw_identifier: |
73 | 10.7k | case tok::identifier: |
74 | 10.7k | return options.identifier; |
75 | 1.12k | case tok::l_brace: |
76 | 2.40k | case tok::r_brace: |
77 | 2.40k | return options.braces; |
78 | 679 | case tok::l_square: |
79 | 1.36k | case tok::r_square: |
80 | 1.36k | return options.square_brackets; |
81 | 3.55k | case tok::l_paren: |
82 | 7.12k | case tok::r_paren: |
83 | 7.12k | return options.parentheses; |
84 | 2.07k | case tok::comma: |
85 | 2.07k | return options.comma; |
86 | 247 | case tok::coloncolon: |
87 | 430 | case tok::colon: |
88 | 430 | return options.colon; |
89 | | |
90 | 162 | case tok::amp: |
91 | 178 | case tok::ampamp: |
92 | 178 | case tok::ampequal: |
93 | 900 | case tok::star: |
94 | 904 | case tok::starequal: |
95 | 1.09k | case tok::plus: |
96 | 1.50k | case tok::plusplus: |
97 | 1.53k | case tok::plusequal: |
98 | 1.57k | case tok::minus: |
99 | 1.74k | case tok::arrow: |
100 | 1.74k | case tok::minusminus: |
101 | 1.74k | case tok::minusequal: |
102 | 1.74k | case tok::tilde: |
103 | 1.88k | case tok::exclaim: |
104 | 1.88k | case tok::exclaimequal: |
105 | 1.89k | case tok::slash: |
106 | 1.89k | case tok::slashequal: |
107 | 1.90k | case tok::percent: |
108 | 1.90k | case tok::percentequal: |
109 | 2.05k | case tok::less: |
110 | 2.08k | case tok::lessless: |
111 | 2.09k | case tok::lessequal: |
112 | 2.09k | case tok::lesslessequal: |
113 | 2.09k | case tok::spaceship: |
114 | 2.19k | case tok::greater: |
115 | 2.19k | case tok::greatergreater: |
116 | 2.20k | case tok::greaterequal: |
117 | 2.20k | case tok::greatergreaterequal: |
118 | 2.43k | case tok::caret: |
119 | 2.43k | case tok::caretequal: |
120 | 2.43k | case tok::pipe: |
121 | 2.43k | case tok::pipepipe: |
122 | 2.43k | case tok::pipeequal: |
123 | 2.43k | case tok::question: |
124 | 3.99k | case tok::equal: |
125 | 4.01k | case tok::equalequal: |
126 | 4.01k | return options.operators; |
127 | 24.8k | default: |
128 | 24.8k | break; |
129 | 53.0k | } |
130 | 24.8k | return HighlightStyle::ColorStyle(); |
131 | 63.1k | } |
132 | | |
133 | | void ClangHighlighter::Highlight(const HighlightStyle &options, |
134 | | llvm::StringRef line, |
135 | | std::optional<size_t> cursor_pos, |
136 | | llvm::StringRef previous_lines, |
137 | 10.0k | Stream &result) const { |
138 | 10.0k | using namespace clang; |
139 | | |
140 | 10.0k | FileSystemOptions file_opts; |
141 | 10.0k | FileManager file_mgr(file_opts, |
142 | 10.0k | FileSystem::Instance().GetVirtualFileSystem()); |
143 | | |
144 | | // The line might end in a backslash which would cause Clang to drop the |
145 | | // backslash and the terminating new line. This makes sense when parsing C++, |
146 | | // but when highlighting we care about preserving the backslash/newline. To |
147 | | // not lose this information we remove the new line here so that Clang knows |
148 | | // this is just a single line we are highlighting. We add back the newline |
149 | | // after tokenizing. |
150 | 10.0k | llvm::StringRef line_ending = ""; |
151 | | // There are a few legal line endings Clang recognizes and we need to |
152 | | // temporarily remove from the string. |
153 | 10.0k | if (line.consume_back("\r\n")) |
154 | 2 | line_ending = "\r\n"; |
155 | 10.0k | else if (line.consume_back("\n")) |
156 | 9.97k | line_ending = "\n"; |
157 | 31 | else if (line.consume_back("\r")) |
158 | 1 | line_ending = "\r"; |
159 | | |
160 | 10.0k | unsigned line_number = previous_lines.count('\n') + 1U; |
161 | | |
162 | | // Let's build the actual source code Clang needs and setup some utility |
163 | | // objects. |
164 | 10.0k | std::string full_source = previous_lines.str() + line.str(); |
165 | 10.0k | llvm::IntrusiveRefCntPtr<DiagnosticIDs> diag_ids(new DiagnosticIDs()); |
166 | 10.0k | llvm::IntrusiveRefCntPtr<DiagnosticOptions> diags_opts( |
167 | 10.0k | new DiagnosticOptions()); |
168 | 10.0k | DiagnosticsEngine diags(diag_ids, diags_opts); |
169 | 10.0k | clang::SourceManager SM(diags, file_mgr); |
170 | 10.0k | auto buf = llvm::MemoryBuffer::getMemBuffer(full_source); |
171 | | |
172 | 10.0k | FileID FID = SM.createFileID(buf->getMemBufferRef()); |
173 | | |
174 | | // Let's just enable the latest ObjC and C++ which should get most tokens |
175 | | // right. |
176 | 10.0k | LangOptions Opts; |
177 | 10.0k | Opts.ObjC = true; |
178 | | // FIXME: This should probably set CPlusPlus, CPlusPlus11, ... too |
179 | 10.0k | Opts.CPlusPlus17 = true; |
180 | 10.0k | Opts.LineComment = true; |
181 | | |
182 | 10.0k | Lexer lex(FID, buf->getMemBufferRef(), SM, Opts); |
183 | | // The lexer should keep whitespace around. |
184 | 10.0k | lex.SetKeepWhitespaceMode(true); |
185 | | |
186 | | // Keeps track if we have entered a PP directive. |
187 | 10.0k | bool in_pp_directive = false; |
188 | | |
189 | | // True once we actually lexed the user provided line. |
190 | 10.0k | bool found_user_line = false; |
191 | | |
192 | | // True if we already highlighted the token under the cursor, false otherwise. |
193 | 10.0k | bool highlighted_cursor = false; |
194 | 10.0k | Token token; |
195 | 10.0k | bool exit = false; |
196 | 75.1k | while (!exit) { |
197 | | // Returns true if this is the last token we get from the lexer. |
198 | 65.1k | exit = lex.LexFromRawLexer(token); |
199 | | |
200 | 65.1k | bool invalid = false; |
201 | 65.1k | unsigned current_line_number = |
202 | 65.1k | SM.getSpellingLineNumber(token.getLocation(), &invalid); |
203 | 65.1k | if (current_line_number != line_number) |
204 | 0 | continue; |
205 | 65.1k | found_user_line = true; |
206 | | |
207 | | // We don't need to print any tokens without a spelling line number. |
208 | 65.1k | if (invalid) |
209 | 0 | continue; |
210 | | |
211 | | // Same as above but with the column number. |
212 | 65.1k | invalid = false; |
213 | 65.1k | unsigned start = SM.getSpellingColumnNumber(token.getLocation(), &invalid); |
214 | 65.1k | if (invalid) |
215 | 0 | continue; |
216 | | // Column numbers start at 1, but indexes in our string start at 0. |
217 | 65.1k | --start; |
218 | | |
219 | | // Annotations don't have a length, so let's skip them. |
220 | 65.1k | if (token.isAnnotation()) |
221 | 0 | continue; |
222 | | |
223 | | // Extract the token string from our source code. |
224 | 65.1k | llvm::StringRef tok_str = line.substr(start, token.getLength()); |
225 | | |
226 | | // If the token is just an empty string, we can skip all the work below. |
227 | 65.1k | if (tok_str.empty()) |
228 | 1.93k | continue; |
229 | | |
230 | | // If the cursor is inside this token, we have to apply the 'selected' |
231 | | // highlight style before applying the actual token color. |
232 | 63.1k | llvm::StringRef to_print = tok_str; |
233 | 63.1k | StreamString storage; |
234 | 63.1k | auto end = start + token.getLength(); |
235 | 63.1k | if (cursor_pos && end > *cursor_pos14.6k && !highlighted_cursor9.35k ) { |
236 | 1.49k | highlighted_cursor = true; |
237 | 1.49k | options.selected.Apply(storage, tok_str); |
238 | 1.49k | to_print = storage.GetString(); |
239 | 1.49k | } |
240 | | |
241 | | // See how we are supposed to highlight this token. |
242 | 63.1k | HighlightStyle::ColorStyle color = |
243 | 63.1k | determineClangStyle(*this, token, tok_str, options, in_pp_directive); |
244 | | |
245 | 63.1k | color.Apply(result, to_print); |
246 | 63.1k | } |
247 | | |
248 | | // Add the line ending we trimmed before tokenizing. |
249 | 10.0k | result << line_ending; |
250 | | |
251 | | // If we went over the whole file but couldn't find our own file, then |
252 | | // somehow our setup was wrong. When we're in release mode we just give the |
253 | | // user the normal line and pretend we don't know how to highlight it. In |
254 | | // debug mode we bail out with an assert as this should never happen. |
255 | 10.0k | if (!found_user_line) { |
256 | 0 | result << line; |
257 | 0 | assert(false && "We couldn't find the user line in the input file?"); |
258 | 0 | } |
259 | 10.0k | } |