/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===-- CPlusPlusNameParser.cpp -------------------------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | #include "CPlusPlusNameParser.h" |
10 | | |
11 | | #include "clang/Basic/IdentifierTable.h" |
12 | | #include "clang/Basic/TokenKinds.h" |
13 | | #include "llvm/ADT/StringMap.h" |
14 | | #include "llvm/Support/Threading.h" |
15 | | #include <optional> |
16 | | |
17 | | using namespace lldb; |
18 | | using namespace lldb_private; |
19 | | using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction; |
20 | | using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName; |
21 | | namespace tok = clang::tok; |
22 | | |
23 | 664k | std::optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() { |
24 | 664k | m_next_token_index = 0; |
25 | 664k | std::optional<ParsedFunction> result(std::nullopt); |
26 | | |
27 | | // Try to parse the name as function without a return type specified e.g. |
28 | | // main(int, char*[]) |
29 | 664k | { |
30 | 664k | Bookmark start_position = SetBookmark(); |
31 | 664k | result = ParseFunctionImpl(false); |
32 | 664k | if (result && !HasMoreTokens()382 ) |
33 | 377 | return result; |
34 | 664k | } |
35 | | |
36 | | // Try to parse the name as function with function pointer return type e.g. |
37 | | // void (*get_func(const char*))() |
38 | 663k | result = ParseFuncPtr(true); |
39 | 663k | if (result) |
40 | 7 | return result; |
41 | | |
42 | | // Finally try to parse the name as a function with non-function return type |
43 | | // e.g. int main(int, char*[]) |
44 | 663k | result = ParseFunctionImpl(true); |
45 | 663k | if (HasMoreTokens()) |
46 | 663k | return std::nullopt; |
47 | 234 | return result; |
48 | 663k | } |
49 | | |
50 | 680k | std::optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() { |
51 | 680k | m_next_token_index = 0; |
52 | 680k | std::optional<ParsedNameRanges> name_ranges = ParseFullNameImpl(); |
53 | 680k | if (!name_ranges) |
54 | 19.9k | return std::nullopt; |
55 | 660k | if (HasMoreTokens()) |
56 | 1.43k | return std::nullopt; |
57 | 659k | ParsedName result; |
58 | 659k | result.basename = GetTextForRange(name_ranges->basename_range); |
59 | 659k | result.context = GetTextForRange(name_ranges->context_range); |
60 | 659k | return result; |
61 | 660k | } |
62 | | |
63 | 23.5M | bool CPlusPlusNameParser::HasMoreTokens() { |
64 | 23.5M | return m_next_token_index < m_tokens.size(); |
65 | 23.5M | } |
66 | | |
67 | 2.76M | void CPlusPlusNameParser::Advance() { ++m_next_token_index; } |
68 | | |
69 | 2 | void CPlusPlusNameParser::TakeBack() { --m_next_token_index; } |
70 | | |
71 | 2.69M | bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) { |
72 | 2.69M | if (!HasMoreTokens()) |
73 | 642k | return false; |
74 | | |
75 | 2.05M | if (!Peek().is(kind)) |
76 | 1.99M | return false; |
77 | | |
78 | 62.4k | Advance(); |
79 | 62.4k | return true; |
80 | 2.05M | } |
81 | | |
82 | 3.90M | template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) { |
83 | 3.90M | if (!HasMoreTokens()) |
84 | 2.57M | return false; |
85 | | |
86 | 1.33M | if (!Peek().isOneOf(kinds...)) |
87 | 1.33M | return false; |
88 | | |
89 | 786 | Advance(); |
90 | 786 | return true; |
91 | 1.33M | } bool lldb_private::CPlusPlusNameParser::ConsumeToken<clang::tok::TokenKind, clang::tok::TokenKind, clang::tok::TokenKind, clang::tok::TokenKind>(clang::tok::TokenKind, clang::tok::TokenKind, clang::tok::TokenKind, clang::tok::TokenKind) Line | Count | Source | 82 | 2.63k | template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) { | 83 | 2.63k | if (!HasMoreTokens()) | 84 | 1.31k | return false; | 85 | | | 86 | 1.32k | if (!Peek().isOneOf(kinds...)) | 87 | 639 | return false; | 88 | | | 89 | 683 | Advance(); | 90 | 683 | return true; | 91 | 1.32k | } |
bool lldb_private::CPlusPlusNameParser::ConsumeToken<clang::tok::TokenKind, clang::tok::TokenKind>(clang::tok::TokenKind, clang::tok::TokenKind) Line | Count | Source | 82 | 2.61M | template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) { | 83 | 2.61M | if (!HasMoreTokens()) | 84 | 1.28M | return false; | 85 | | | 86 | 1.33M | if (!Peek().isOneOf(kinds...)) | 87 | 1.33M | return false; | 88 | | | 89 | 3 | Advance(); | 90 | 3 | return true; | 91 | 1.33M | } |
bool lldb_private::CPlusPlusNameParser::ConsumeToken<clang::tok::TokenKind, clang::tok::TokenKind, clang::tok::TokenKind, clang::tok::TokenKind, clang::tok::TokenKind>(clang::tok::TokenKind, clang::tok::TokenKind, clang::tok::TokenKind, clang::tok::TokenKind, clang::tok::TokenKind) Line | Count | Source | 82 | 1.28M | template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) { | 83 | 1.28M | if (!HasMoreTokens()) | 84 | 1.28M | return false; | 85 | | | 86 | 3.34k | if (!Peek().isOneOf(kinds...)) | 87 | 3.24k | return false; | 88 | | | 89 | 100 | Advance(); | 90 | 100 | return true; | 91 | 3.34k | } |
|
92 | | |
93 | 9.34M | CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() { |
94 | 9.34M | return Bookmark(m_next_token_index); |
95 | 9.34M | } |
96 | | |
97 | 4.55M | size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; } |
98 | | |
99 | 7.58M | clang::Token &CPlusPlusNameParser::Peek() { |
100 | 7.58M | assert(HasMoreTokens()); |
101 | 7.58M | return m_tokens[m_next_token_index]; |
102 | 7.58M | } |
103 | | |
104 | | std::optional<ParsedFunction> |
105 | 1.32M | CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) { |
106 | 1.32M | Bookmark start_position = SetBookmark(); |
107 | | |
108 | 1.32M | ParsedFunction result; |
109 | 1.32M | if (expect_return_type) { |
110 | 663k | size_t return_start = GetCurrentPosition(); |
111 | | // Consume return type if it's expected. |
112 | 663k | if (!ConsumeToken(tok::kw_auto) && !ConsumeTypename()663k ) |
113 | 19.9k | return std::nullopt; |
114 | | |
115 | 643k | size_t return_end = GetCurrentPosition(); |
116 | 643k | result.return_type = GetTextForRange(Range(return_start, return_end)); |
117 | 643k | } |
118 | | |
119 | 1.30M | auto maybe_name = ParseFullNameImpl(); |
120 | 1.30M | if (!maybe_name) { |
121 | 663k | return std::nullopt; |
122 | 663k | } |
123 | | |
124 | 644k | size_t argument_start = GetCurrentPosition(); |
125 | 644k | if (!ConsumeArguments()) { |
126 | 643k | return std::nullopt; |
127 | 643k | } |
128 | | |
129 | 623 | size_t qualifiers_start = GetCurrentPosition(); |
130 | 623 | SkipFunctionQualifiers(); |
131 | 623 | size_t end_position = GetCurrentPosition(); |
132 | | |
133 | 623 | result.name.basename = GetTextForRange(maybe_name->basename_range); |
134 | 623 | result.name.context = GetTextForRange(maybe_name->context_range); |
135 | 623 | result.arguments = GetTextForRange(Range(argument_start, qualifiers_start)); |
136 | 623 | result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position)); |
137 | 623 | start_position.Remove(); |
138 | 623 | return result; |
139 | 644k | } |
140 | | |
141 | | std::optional<ParsedFunction> |
142 | 663k | CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) { |
143 | | // This function parses a function definition |
144 | | // that returns a pointer type. |
145 | | // E.g., double (*(*func(long))(int))(float) |
146 | | |
147 | | // Step 1: |
148 | | // Remove the return type of the innermost |
149 | | // function pointer type. |
150 | | // |
151 | | // Leaves us with: |
152 | | // (*(*func(long))(int))(float) |
153 | 663k | Bookmark start_position = SetBookmark(); |
154 | 663k | if (expect_return_type) { |
155 | | // Consume return type. |
156 | 663k | if (!ConsumeTypename()) |
157 | 19.9k | return std::nullopt; |
158 | 663k | } |
159 | | |
160 | | // Step 2: |
161 | | // |
162 | | // Skip a pointer and parenthesis pair. |
163 | | // |
164 | | // Leaves us with: |
165 | | // (*func(long))(int))(float) |
166 | 643k | if (!ConsumeToken(tok::l_paren)) |
167 | 643k | return std::nullopt; |
168 | 28 | if (!ConsumePtrsAndRefs()) |
169 | 10 | return std::nullopt; |
170 | | |
171 | | // Step 3: |
172 | | // |
173 | | // Consume inner function name. This will fail unless |
174 | | // we stripped all the pointers on the left hand side |
175 | | // of the function name. |
176 | 18 | { |
177 | 18 | Bookmark before_inner_function_pos = SetBookmark(); |
178 | 18 | auto maybe_inner_function_name = ParseFunctionImpl(false); |
179 | 18 | if (maybe_inner_function_name) |
180 | 7 | if (ConsumeToken(tok::r_paren)) |
181 | 7 | if (ConsumeArguments()) { |
182 | 7 | SkipFunctionQualifiers(); |
183 | 7 | start_position.Remove(); |
184 | 7 | before_inner_function_pos.Remove(); |
185 | 7 | return maybe_inner_function_name; |
186 | 7 | } |
187 | 18 | } |
188 | | |
189 | | // Step 4: |
190 | | // |
191 | | // Parse the remaining string as a function pointer again. |
192 | | // This time don't consume the inner-most typename since |
193 | | // we're left with pointers only. This will strip another |
194 | | // layer of pointers until we're left with the innermost |
195 | | // function name/argument. I.e., func(long))(int))(float) |
196 | | // |
197 | | // Once we successfully stripped all pointers and gotten |
198 | | // the innermost function name from ParseFunctionImpl above, |
199 | | // we consume a single ')' and the arguments '(...)' that follows. |
200 | | // |
201 | | // Leaves us with: |
202 | | // )(float) |
203 | | // |
204 | | // This is the remnant of the outer function pointers' arguments. |
205 | | // Unwinding the recursive calls will remove the remaining |
206 | | // arguments. |
207 | 11 | auto maybe_inner_function_ptr_name = ParseFuncPtr(false); |
208 | 11 | if (maybe_inner_function_ptr_name) |
209 | 11 | if (ConsumeToken(tok::r_paren)) |
210 | 11 | if (ConsumeArguments()) { |
211 | 11 | SkipFunctionQualifiers(); |
212 | 11 | start_position.Remove(); |
213 | 11 | return maybe_inner_function_ptr_name; |
214 | 11 | } |
215 | | |
216 | 0 | return std::nullopt; |
217 | 11 | } |
218 | | |
219 | 645k | bool CPlusPlusNameParser::ConsumeArguments() { |
220 | 645k | return ConsumeBrackets(tok::l_paren, tok::r_paren); |
221 | 645k | } |
222 | | |
223 | 3.85k | bool CPlusPlusNameParser::ConsumeTemplateArgs() { |
224 | 3.85k | Bookmark start_position = SetBookmark(); |
225 | 3.85k | if (!HasMoreTokens() || Peek().getKind() != tok::less) |
226 | 0 | return false; |
227 | 3.85k | Advance(); |
228 | | |
229 | | // Consuming template arguments is a bit trickier than consuming function |
230 | | // arguments, because '<' '>' brackets are not always trivially balanced. In |
231 | | // some rare cases tokens '<' and '>' can appear inside template arguments as |
232 | | // arithmetic or shift operators not as template brackets. Examples: |
233 | | // std::enable_if<(10u)<(64), bool> |
234 | | // f<A<operator<(X,Y)::Subclass>> |
235 | | // Good thing that compiler makes sure that really ambiguous cases of '>' |
236 | | // usage should be enclosed within '()' brackets. |
237 | 3.85k | int template_counter = 1; |
238 | 3.85k | bool can_open_template = false; |
239 | 59.0k | while (HasMoreTokens() && template_counter > 058.7k ) { |
240 | 55.2k | tok::TokenKind kind = Peek().getKind(); |
241 | 55.2k | switch (kind) { |
242 | 2.19k | case tok::greatergreater: |
243 | 2.19k | template_counter -= 2; |
244 | 2.19k | can_open_template = false; |
245 | 2.19k | Advance(); |
246 | 2.19k | break; |
247 | 5.05k | case tok::greater: |
248 | 5.05k | --template_counter; |
249 | 5.05k | can_open_template = false; |
250 | 5.05k | Advance(); |
251 | 5.05k | break; |
252 | 5.60k | case tok::less: |
253 | | // '<' is an attempt to open a subteamplte |
254 | | // check if parser is at the point where it's actually possible, |
255 | | // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. No |
256 | | // need to do the same for '>' because compiler actually makes sure that |
257 | | // '>' always surrounded by brackets to avoid ambiguity. |
258 | 5.60k | if (can_open_template) |
259 | 5.60k | ++template_counter; |
260 | 5.60k | can_open_template = false; |
261 | 5.60k | Advance(); |
262 | 5.60k | break; |
263 | 1 | case tok::kw_operator: // C++ operator overloading. |
264 | 1 | if (!ConsumeOperator()) |
265 | 0 | return false; |
266 | 1 | can_open_template = true; |
267 | 1 | break; |
268 | 17.0k | case tok::raw_identifier: |
269 | 17.0k | can_open_template = true; |
270 | 17.0k | Advance(); |
271 | 17.0k | break; |
272 | 48 | case tok::l_square: |
273 | | // Handle templates tagged with an ABI tag. |
274 | | // An example demangled/prettified version is: |
275 | | // func[abi:tag1][abi:tag2]<type[abi:tag3]>(int) |
276 | 48 | if (ConsumeAbiTag()) |
277 | 43 | can_open_template = true; |
278 | 5 | else if (ConsumeBrackets(tok::l_square, tok::r_square)) |
279 | 4 | can_open_template = false; |
280 | 1 | else |
281 | 1 | return false; |
282 | 47 | break; |
283 | 160 | case tok::l_paren: |
284 | 160 | if (!ConsumeArguments()) |
285 | 0 | return false; |
286 | 160 | can_open_template = false; |
287 | 160 | break; |
288 | 25.0k | default: |
289 | 25.0k | can_open_template = false; |
290 | 25.0k | Advance(); |
291 | 25.0k | break; |
292 | 55.2k | } |
293 | 55.2k | } |
294 | | |
295 | 3.85k | if (template_counter != 0) { |
296 | 22 | return false; |
297 | 22 | } |
298 | 3.82k | start_position.Remove(); |
299 | 3.82k | return true; |
300 | 3.85k | } |
301 | | |
302 | 609 | bool CPlusPlusNameParser::ConsumeAbiTag() { |
303 | 609 | Bookmark start_position = SetBookmark(); |
304 | 609 | if (!ConsumeToken(tok::l_square)) |
305 | 0 | return false; |
306 | | |
307 | 609 | if (HasMoreTokens() && Peek().is(tok::raw_identifier) && |
308 | 609 | Peek().getRawIdentifier() == "abi"599 ) |
309 | 598 | Advance(); |
310 | 11 | else |
311 | 11 | return false; |
312 | | |
313 | 598 | if (!ConsumeToken(tok::colon)) |
314 | 1 | return false; |
315 | | |
316 | | // Consume the actual tag string (and allow some special characters) |
317 | 1.19k | while (597 ConsumeToken(tok::raw_identifier, tok::comma, tok::period, |
318 | 1.19k | tok::numeric_constant)) |
319 | 602 | ; |
320 | | |
321 | 597 | if (!ConsumeToken(tok::r_square)) |
322 | 1 | return false; |
323 | | |
324 | 596 | start_position.Remove(); |
325 | 596 | return true; |
326 | 597 | } |
327 | | |
328 | 24 | bool CPlusPlusNameParser::ConsumeAnonymousNamespace() { |
329 | 24 | Bookmark start_position = SetBookmark(); |
330 | 24 | if (!ConsumeToken(tok::l_paren)) { |
331 | 0 | return false; |
332 | 0 | } |
333 | 24 | constexpr llvm::StringLiteral g_anonymous("anonymous"); |
334 | 24 | if (HasMoreTokens() && Peek().is(tok::raw_identifier) && |
335 | 24 | Peek().getRawIdentifier() == g_anonymous13 ) { |
336 | 3 | Advance(); |
337 | 21 | } else { |
338 | 21 | return false; |
339 | 21 | } |
340 | | |
341 | 3 | if (!ConsumeToken(tok::kw_namespace)) { |
342 | 0 | return false; |
343 | 0 | } |
344 | | |
345 | 3 | if (!ConsumeToken(tok::r_paren)) { |
346 | 0 | return false; |
347 | 0 | } |
348 | 3 | start_position.Remove(); |
349 | 3 | return true; |
350 | 3 | } |
351 | | |
352 | 2 | bool CPlusPlusNameParser::ConsumeLambda() { |
353 | 2 | Bookmark start_position = SetBookmark(); |
354 | 2 | if (!ConsumeToken(tok::l_brace)) { |
355 | 0 | return false; |
356 | 0 | } |
357 | 2 | constexpr llvm::StringLiteral g_lambda("lambda"); |
358 | 2 | if (HasMoreTokens() && Peek().is(tok::raw_identifier) && |
359 | 2 | Peek().getRawIdentifier() == g_lambda) { |
360 | | // Put the matched brace back so we can use ConsumeBrackets |
361 | 2 | TakeBack(); |
362 | 2 | } else { |
363 | 0 | return false; |
364 | 0 | } |
365 | | |
366 | 2 | if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) { |
367 | 0 | return false; |
368 | 0 | } |
369 | | |
370 | 2 | start_position.Remove(); |
371 | 2 | return true; |
372 | 2 | } |
373 | | |
374 | | bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left, |
375 | 650k | tok::TokenKind right) { |
376 | 650k | Bookmark start_position = SetBookmark(); |
377 | 650k | if (!HasMoreTokens() || Peek().getKind() != left8.50k ) |
378 | 643k | return false; |
379 | 7.08k | Advance(); |
380 | | |
381 | 7.08k | int counter = 1; |
382 | 27.4k | while (HasMoreTokens() && counter > 020.7k ) { |
383 | 20.4k | tok::TokenKind kind = Peek().getKind(); |
384 | 20.4k | if (kind == right) |
385 | 7.74k | --counter; |
386 | 12.6k | else if (kind == left) |
387 | 663 | ++counter; |
388 | 20.4k | Advance(); |
389 | 20.4k | } |
390 | | |
391 | 7.08k | assert(counter >= 0); |
392 | 7.08k | if (counter > 0) { |
393 | 1 | return false; |
394 | 1 | } |
395 | 7.07k | start_position.Remove(); |
396 | 7.07k | return true; |
397 | 7.08k | } |
398 | | |
399 | 60.4k | bool CPlusPlusNameParser::ConsumeOperator() { |
400 | 60.4k | Bookmark start_position = SetBookmark(); |
401 | 60.4k | if (!ConsumeToken(tok::kw_operator)) |
402 | 0 | return false; |
403 | | |
404 | 60.4k | if (!HasMoreTokens()) { |
405 | 19 | return false; |
406 | 19 | } |
407 | | |
408 | 60.4k | const auto &token = Peek(); |
409 | | |
410 | | // When clang generates debug info it adds template parameters to names. |
411 | | // Since clang doesn't add a space between the name and the template parameter |
412 | | // in some cases we are not generating valid C++ names e.g.: |
413 | | // |
414 | | // operator<<A::B> |
415 | | // |
416 | | // In some of these cases we will not parse them correctly. This fixes the |
417 | | // issue by detecting this case and inserting tok::less in place of |
418 | | // tok::lessless and returning successfully that we consumed the operator. |
419 | 60.4k | if (token.getKind() == tok::lessless) { |
420 | | // Make sure we have more tokens before attempting to look ahead one more. |
421 | 1.52k | if (m_next_token_index + 1 < m_tokens.size()) { |
422 | | // Look ahead two tokens. |
423 | 387 | clang::Token n_token = m_tokens[m_next_token_index + 1]; |
424 | | // If we find ( or < then this is indeed operator<< no need for fix. |
425 | 387 | if (n_token.getKind() != tok::l_paren && n_token.getKind() != tok::less80 ) { |
426 | 20 | clang::Token tmp_tok; |
427 | 20 | tmp_tok.startToken(); |
428 | 20 | tmp_tok.setLength(1); |
429 | 20 | tmp_tok.setLocation(token.getLocation().getLocWithOffset(1)); |
430 | 20 | tmp_tok.setKind(tok::less); |
431 | | |
432 | 20 | m_tokens[m_next_token_index] = tmp_tok; |
433 | | |
434 | 20 | start_position.Remove(); |
435 | 20 | return true; |
436 | 20 | } |
437 | 387 | } |
438 | 1.52k | } |
439 | | |
440 | 60.4k | switch (token.getKind()) { |
441 | 5.41k | case tok::kw_new: |
442 | 10.9k | case tok::kw_delete: |
443 | | // This is 'new' or 'delete' operators. |
444 | 10.9k | Advance(); |
445 | | // Check for array new/delete. |
446 | 10.9k | if (HasMoreTokens() && Peek().is(tok::l_square)5.49k ) { |
447 | | // Consume the '[' and ']'. |
448 | 5.49k | if (!ConsumeBrackets(tok::l_square, tok::r_square)) |
449 | 0 | return false; |
450 | 5.49k | } |
451 | 10.9k | break; |
452 | | |
453 | 10.9k | #define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \ |
454 | 49.4k | case tok::Token: \ |
455 | 49.4k | Advance(); \ |
456 | 49.4k | break; |
457 | 10.9k | #define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly) |
458 | 10.9k | #include "clang/Basic/OperatorKinds.def" |
459 | 0 | #undef OVERLOADED_OPERATOR |
460 | 0 | #undef OVERLOADED_OPERATOR_MULTI |
461 | | |
462 | 6 | case tok::l_paren: |
463 | | // Call operator consume '(' ... ')'. |
464 | 6 | if (ConsumeBrackets(tok::l_paren, tok::r_paren)) |
465 | 6 | break; |
466 | 0 | return false; |
467 | | |
468 | 14 | case tok::l_square: |
469 | | // This is a [] operator. |
470 | | // Consume the '[' and ']'. |
471 | 14 | if (ConsumeBrackets(tok::l_square, tok::r_square)) |
472 | 14 | break; |
473 | 0 | return false; |
474 | | |
475 | 87 | default: |
476 | | // This might be a cast operator. |
477 | 87 | if (ConsumeTypename()) |
478 | 87 | break; |
479 | 0 | return false; |
480 | 60.4k | } |
481 | 60.4k | start_position.Remove(); |
482 | 60.4k | return true; |
483 | 60.4k | } |
484 | | |
485 | 2.61M | void CPlusPlusNameParser::SkipTypeQualifiers() { |
486 | 2.61M | while (ConsumeToken(tok::kw_const, tok::kw_volatile)) |
487 | 3 | ; |
488 | 2.61M | } |
489 | | |
490 | 1.35k | void CPlusPlusNameParser::SkipFunctionQualifiers() { |
491 | 1.43k | while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp)) |
492 | 81 | ; |
493 | 1.35k | } |
494 | | |
495 | 1.32M | bool CPlusPlusNameParser::ConsumeBuiltinType() { |
496 | 1.32M | bool result = false; |
497 | 1.32M | bool continue_parsing = true; |
498 | | // Built-in types can be made of a few keywords like 'unsigned long long |
499 | | // int'. This function consumes all built-in type keywords without checking |
500 | | // if they make sense like 'unsigned char void'. |
501 | 2.65M | while (continue_parsing && HasMoreTokens()1.32M ) { |
502 | 1.32M | switch (Peek().getKind()) { |
503 | 0 | case tok::kw_short: |
504 | 2 | case tok::kw_long: |
505 | 2 | case tok::kw___int64: |
506 | 2 | case tok::kw___int128: |
507 | 2 | case tok::kw_signed: |
508 | 4 | case tok::kw_unsigned: |
509 | 194 | case tok::kw_void: |
510 | 194 | case tok::kw_char: |
511 | 306 | case tok::kw_int: |
512 | 306 | case tok::kw_half: |
513 | 306 | case tok::kw_float: |
514 | 330 | case tok::kw_double: |
515 | 330 | case tok::kw___float128: |
516 | 330 | case tok::kw_wchar_t: |
517 | 369 | case tok::kw_bool: |
518 | 369 | case tok::kw_char16_t: |
519 | 369 | case tok::kw_char32_t: |
520 | 369 | result = true; |
521 | 369 | Advance(); |
522 | 369 | break; |
523 | 1.32M | default: |
524 | 1.32M | continue_parsing = false; |
525 | 1.32M | break; |
526 | 1.32M | } |
527 | 1.32M | } |
528 | 1.32M | return result; |
529 | 1.32M | } |
530 | | |
531 | 1.28M | void CPlusPlusNameParser::SkipPtrsAndRefs() { |
532 | | // Ignoring result. |
533 | 1.28M | ConsumePtrsAndRefs(); |
534 | 1.28M | } |
535 | | |
536 | 1.28M | bool CPlusPlusNameParser::ConsumePtrsAndRefs() { |
537 | 1.28M | bool found = false; |
538 | 1.28M | SkipTypeQualifiers(); |
539 | 1.28M | while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const, |
540 | 1.28M | tok::kw_volatile)) { |
541 | 100 | found = true; |
542 | 100 | SkipTypeQualifiers(); |
543 | 100 | } |
544 | 1.28M | return found; |
545 | 1.28M | } |
546 | | |
547 | 1.32M | bool CPlusPlusNameParser::ConsumeDecltype() { |
548 | 1.32M | Bookmark start_position = SetBookmark(); |
549 | 1.32M | if (!ConsumeToken(tok::kw_decltype)) |
550 | 1.32M | return false; |
551 | | |
552 | 44 | if (!ConsumeArguments()) |
553 | 0 | return false; |
554 | | |
555 | 44 | start_position.Remove(); |
556 | 44 | return true; |
557 | 44 | } |
558 | | |
559 | 1.32M | bool CPlusPlusNameParser::ConsumeTypename() { |
560 | 1.32M | Bookmark start_position = SetBookmark(); |
561 | 1.32M | SkipTypeQualifiers(); |
562 | 1.32M | if (!ConsumeBuiltinType() && !ConsumeDecltype()1.32M ) { |
563 | 1.32M | if (!ParseFullNameImpl()) |
564 | 39.8k | return false; |
565 | 1.32M | } |
566 | 1.28M | SkipPtrsAndRefs(); |
567 | 1.28M | start_position.Remove(); |
568 | 1.28M | return true; |
569 | 1.32M | } |
570 | | |
571 | | std::optional<CPlusPlusNameParser::ParsedNameRanges> |
572 | 3.31M | CPlusPlusNameParser::ParseFullNameImpl() { |
573 | | // Name parsing state machine. |
574 | 3.31M | enum class State { |
575 | 3.31M | Beginning, // start of the name |
576 | 3.31M | AfterTwoColons, // right after :: |
577 | 3.31M | AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+) |
578 | 3.31M | AfterTemplate, // right after template brackets (<something>) |
579 | 3.31M | AfterOperator, // right after name of C++ operator |
580 | 3.31M | }; |
581 | | |
582 | 3.31M | Bookmark start_position = SetBookmark(); |
583 | 3.31M | State state = State::Beginning; |
584 | 3.31M | bool continue_parsing = true; |
585 | 3.31M | std::optional<size_t> last_coloncolon_position; |
586 | | |
587 | 6.02M | while (continue_parsing && HasMoreTokens()5.93M ) { |
588 | 2.70M | const auto &token = Peek(); |
589 | 2.70M | switch (token.getKind()) { |
590 | 2.54M | case tok::raw_identifier: // Just a name. |
591 | 2.54M | if (state != State::Beginning && state != State::AfterTwoColons11.0k ) { |
592 | 33 | continue_parsing = false; |
593 | 33 | break; |
594 | 33 | } |
595 | 2.54M | Advance(); |
596 | 2.54M | state = State::AfterIdentifier; |
597 | 2.54M | break; |
598 | 610 | case tok::l_square: { |
599 | | // Handles types or functions that were tagged |
600 | | // with, e.g., |
601 | | // [[gnu::abi_tag("tag1","tag2")]] func() |
602 | | // and demangled/prettified into: |
603 | | // func[abi:tag1][abi:tag2]() |
604 | | |
605 | | // ABI tags only appear after a method or type name |
606 | 610 | const bool valid_state = |
607 | 610 | state == State::AfterIdentifier || state == State::AfterOperator80 ; |
608 | 610 | if (!valid_state || !ConsumeAbiTag()561 ) { |
609 | 57 | continue_parsing = false; |
610 | 57 | } |
611 | | |
612 | 610 | break; |
613 | 2.54M | } |
614 | 739 | case tok::l_paren: { |
615 | 739 | if (state == State::Beginning || state == State::AfterTwoColons717 ) { |
616 | | // (anonymous namespace) |
617 | 24 | if (ConsumeAnonymousNamespace()) { |
618 | 3 | state = State::AfterIdentifier; |
619 | 3 | break; |
620 | 3 | } |
621 | 24 | } |
622 | | |
623 | | // Type declared inside a function 'func()::Type' |
624 | 736 | if (state != State::AfterIdentifier && state != State::AfterTemplate647 && |
625 | 736 | state != State::AfterOperator381 ) { |
626 | 21 | continue_parsing = false; |
627 | 21 | break; |
628 | 21 | } |
629 | 715 | Bookmark l_paren_position = SetBookmark(); |
630 | | // Consume the '(' ... ') [const]'. |
631 | 715 | if (!ConsumeArguments()) { |
632 | 0 | continue_parsing = false; |
633 | 0 | break; |
634 | 0 | } |
635 | 715 | SkipFunctionQualifiers(); |
636 | | |
637 | | // Consume '::' |
638 | 715 | size_t coloncolon_position = GetCurrentPosition(); |
639 | 715 | if (!ConsumeToken(tok::coloncolon)) { |
640 | 713 | continue_parsing = false; |
641 | 713 | break; |
642 | 713 | } |
643 | 2 | l_paren_position.Remove(); |
644 | 2 | last_coloncolon_position = coloncolon_position; |
645 | 2 | state = State::AfterTwoColons; |
646 | 2 | break; |
647 | 715 | } |
648 | 2 | case tok::l_brace: |
649 | 2 | if (state == State::Beginning || state == State::AfterTwoColons) { |
650 | 2 | if (ConsumeLambda()) { |
651 | 2 | state = State::AfterIdentifier; |
652 | 2 | break; |
653 | 2 | } |
654 | 2 | } |
655 | 0 | continue_parsing = false; |
656 | 0 | break; |
657 | 11.8k | case tok::coloncolon: // Type nesting delimiter. |
658 | 11.8k | if (state != State::Beginning && state != State::AfterIdentifier11.8k && |
659 | 11.8k | state != State::AfterTemplate3.19k ) { |
660 | 0 | continue_parsing = false; |
661 | 0 | break; |
662 | 0 | } |
663 | 11.8k | last_coloncolon_position = GetCurrentPosition(); |
664 | 11.8k | Advance(); |
665 | 11.8k | state = State::AfterTwoColons; |
666 | 11.8k | break; |
667 | 3.85k | case tok::less: // Template brackets. |
668 | 3.85k | if (state != State::AfterIdentifier && state != State::AfterOperator99 ) { |
669 | 4 | continue_parsing = false; |
670 | 4 | break; |
671 | 4 | } |
672 | 3.85k | if (!ConsumeTemplateArgs()) { |
673 | 23 | continue_parsing = false; |
674 | 23 | break; |
675 | 23 | } |
676 | 3.82k | state = State::AfterTemplate; |
677 | 3.82k | break; |
678 | 60.4k | case tok::kw_operator: // C++ operator overloading. |
679 | 60.4k | if (state != State::Beginning && state != State::AfterTwoColons731 ) { |
680 | 0 | continue_parsing = false; |
681 | 0 | break; |
682 | 0 | } |
683 | 60.4k | if (!ConsumeOperator()) { |
684 | 19 | continue_parsing = false; |
685 | 19 | break; |
686 | 19 | } |
687 | 60.4k | state = State::AfterOperator; |
688 | 60.4k | break; |
689 | 45 | case tok::tilde: // Destructor. |
690 | 45 | if (state != State::Beginning && state != State::AfterTwoColons29 ) { |
691 | 0 | continue_parsing = false; |
692 | 0 | break; |
693 | 0 | } |
694 | 45 | Advance(); |
695 | 45 | if (ConsumeToken(tok::raw_identifier)) { |
696 | 45 | state = State::AfterIdentifier; |
697 | 45 | } else { |
698 | 0 | TakeBack(); |
699 | 0 | continue_parsing = false; |
700 | 0 | } |
701 | 45 | break; |
702 | 86.7k | default: |
703 | 86.7k | continue_parsing = false; |
704 | 86.7k | break; |
705 | 2.70M | } |
706 | 2.70M | } |
707 | | |
708 | 3.31M | if (state == State::AfterIdentifier || state == State::AfterOperator784k || |
709 | 3.31M | state == State::AfterTemplate724k ) { |
710 | 2.59M | ParsedNameRanges result; |
711 | 2.59M | if (last_coloncolon_position) { |
712 | 7.10k | result.context_range = |
713 | 7.10k | Range(start_position.GetSavedPosition(), *last_coloncolon_position); |
714 | 7.10k | result.basename_range = |
715 | 7.10k | Range(*last_coloncolon_position + 1, GetCurrentPosition()); |
716 | 2.58M | } else { |
717 | 2.58M | result.basename_range = |
718 | 2.58M | Range(start_position.GetSavedPosition(), GetCurrentPosition()); |
719 | 2.58M | } |
720 | 2.59M | start_position.Remove(); |
721 | 2.59M | return result; |
722 | 2.59M | } else { |
723 | 723k | return std::nullopt; |
724 | 723k | } |
725 | 3.31M | } |
726 | | |
727 | 1.96M | llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) { |
728 | 1.96M | if (range.empty()) |
729 | 658k | return llvm::StringRef(); |
730 | 1.30M | assert(range.begin_index < range.end_index); |
731 | 1.30M | assert(range.begin_index < m_tokens.size()); |
732 | 1.30M | assert(range.end_index <= m_tokens.size()); |
733 | 1.30M | clang::Token &first_token = m_tokens[range.begin_index]; |
734 | 1.30M | clang::Token &last_token = m_tokens[range.end_index - 1]; |
735 | 1.30M | clang::SourceLocation start_loc = first_token.getLocation(); |
736 | 1.30M | clang::SourceLocation end_loc = last_token.getLocation(); |
737 | 1.30M | unsigned start_pos = start_loc.getRawEncoding(); |
738 | 1.30M | unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength(); |
739 | 1.30M | return m_text.take_front(end_pos).drop_front(start_pos); |
740 | 1.30M | } |
741 | | |
742 | 1.34M | static const clang::LangOptions &GetLangOptions() { |
743 | 1.34M | static clang::LangOptions g_options; |
744 | 1.34M | static llvm::once_flag g_once_flag; |
745 | 1.34M | llvm::call_once(g_once_flag, []() { |
746 | 907 | g_options.LineComment = true; |
747 | 907 | g_options.C99 = true; |
748 | 907 | g_options.C11 = true; |
749 | 907 | g_options.CPlusPlus = true; |
750 | 907 | g_options.CPlusPlus11 = true; |
751 | 907 | g_options.CPlusPlus14 = true; |
752 | 907 | g_options.CPlusPlus17 = true; |
753 | 907 | g_options.CPlusPlus20 = true; |
754 | 907 | }); |
755 | 1.34M | return g_options; |
756 | 1.34M | } |
757 | | |
758 | 1.34M | static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() { |
759 | 1.34M | static llvm::StringMap<tok::TokenKind> g_map{ |
760 | 424M | #define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name}, |
761 | 1.34M | #include "clang/Basic/TokenKinds.def" |
762 | 1.34M | #undef KEYWORD |
763 | 1.34M | }; |
764 | 1.34M | return g_map; |
765 | 1.34M | } |
766 | | |
767 | 1.34M | void CPlusPlusNameParser::ExtractTokens() { |
768 | 1.34M | if (m_text.empty()) |
769 | 8 | return; |
770 | 1.34M | clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(), |
771 | 1.34M | m_text.data(), m_text.data() + m_text.size()); |
772 | 1.34M | const auto &kw_map = GetKeywordsMap(); |
773 | 1.34M | clang::Token token; |
774 | 3.00M | for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof); |
775 | 1.66M | lexer.LexFromRawLexer(token)) { |
776 | 1.66M | if (token.is(clang::tok::raw_identifier)) { |
777 | 1.43M | auto it = kw_map.find(token.getRawIdentifier()); |
778 | 1.43M | if (it != kw_map.end()) { |
779 | 45.9k | token.setKind(it->getValue()); |
780 | 45.9k | } |
781 | 1.43M | } |
782 | | |
783 | 1.66M | m_tokens.push_back(token); |
784 | 1.66M | } |
785 | 1.34M | } |