/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Tooling/Syntax/Tokens.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- Tokens.cpp - collect tokens from preprocessing ---------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | #include "clang/Tooling/Syntax/Tokens.h" |
9 | | |
10 | | #include "clang/Basic/Diagnostic.h" |
11 | | #include "clang/Basic/IdentifierTable.h" |
12 | | #include "clang/Basic/LLVM.h" |
13 | | #include "clang/Basic/LangOptions.h" |
14 | | #include "clang/Basic/SourceLocation.h" |
15 | | #include "clang/Basic/SourceManager.h" |
16 | | #include "clang/Basic/TokenKinds.h" |
17 | | #include "clang/Lex/PPCallbacks.h" |
18 | | #include "clang/Lex/Preprocessor.h" |
19 | | #include "clang/Lex/Token.h" |
20 | | #include "llvm/ADT/ArrayRef.h" |
21 | | #include "llvm/ADT/None.h" |
22 | | #include "llvm/ADT/Optional.h" |
23 | | #include "llvm/ADT/STLExtras.h" |
24 | | #include "llvm/Support/Debug.h" |
25 | | #include "llvm/Support/ErrorHandling.h" |
26 | | #include "llvm/Support/FormatVariadic.h" |
27 | | #include "llvm/Support/raw_ostream.h" |
28 | | #include <algorithm> |
29 | | #include <cassert> |
30 | | #include <iterator> |
31 | | #include <string> |
32 | | #include <utility> |
33 | | #include <vector> |
34 | | |
35 | | using namespace clang; |
36 | | using namespace clang::syntax; |
37 | | |
38 | | namespace { |
39 | | // Finds the smallest consecutive subsuquence of Toks that covers R. |
40 | | llvm::ArrayRef<syntax::Token> |
41 | | getTokensCovering(llvm::ArrayRef<syntax::Token> Toks, SourceRange R, |
42 | 389 | const SourceManager &SM) { |
43 | 389 | if (R.isInvalid()) |
44 | 0 | return {}; |
45 | 389 | const syntax::Token *Begin = |
46 | 2.07k | llvm::partition_point(Toks, [&](const syntax::Token &T) { |
47 | 2.07k | return SM.isBeforeInTranslationUnit(T.location(), R.getBegin()); |
48 | 2.07k | }); |
49 | 389 | const syntax::Token *End = |
50 | 2.07k | llvm::partition_point(Toks, [&](const syntax::Token &T) { |
51 | 2.07k | return !SM.isBeforeInTranslationUnit(R.getEnd(), T.location()); |
52 | 2.07k | }); |
53 | 389 | if (Begin > End) |
54 | 0 | return {}; |
55 | 389 | return {Begin, End}; |
56 | 389 | } |
57 | | |
58 | | // Finds the smallest expansion range that contains expanded tokens First and |
59 | | // Last, e.g.: |
60 | | // #define ID(x) x |
61 | | // ID(ID(ID(a1) a2)) |
62 | | // ~~ -> a1 |
63 | | // ~~ -> a2 |
64 | | // ~~~~~~~~~ -> a1 a2 |
65 | | SourceRange findCommonRangeForMacroArgs(const syntax::Token &First, |
66 | | const syntax::Token &Last, |
67 | 450 | const SourceManager &SM) { |
68 | 450 | SourceRange Res; |
69 | 450 | auto FirstLoc = First.location(), LastLoc = Last.location(); |
70 | | // Keep traversing up the spelling chain as longs as tokens are part of the |
71 | | // same expansion. |
72 | 845 | while (!FirstLoc.isFileID() && !LastLoc.isFileID()460 ) { |
73 | 457 | auto ExpInfoFirst = SM.getSLocEntry(SM.getFileID(FirstLoc)).getExpansion(); |
74 | 457 | auto ExpInfoLast = SM.getSLocEntry(SM.getFileID(LastLoc)).getExpansion(); |
75 | | // Stop if expansions have diverged. |
76 | 457 | if (ExpInfoFirst.getExpansionLocStart() != |
77 | 457 | ExpInfoLast.getExpansionLocStart()) |
78 | 61 | break; |
79 | | // Do not continue into macro bodies. |
80 | 396 | if (!ExpInfoFirst.isMacroArgExpansion() || |
81 | 396 | !ExpInfoLast.isMacroArgExpansion()395 ) |
82 | 1 | break; |
83 | 395 | FirstLoc = SM.getImmediateSpellingLoc(FirstLoc); |
84 | 395 | LastLoc = SM.getImmediateSpellingLoc(LastLoc); |
85 | | // Update the result afterwards, as we want the tokens that triggered the |
86 | | // expansion. |
87 | 395 | Res = {FirstLoc, LastLoc}; |
88 | 395 | } |
89 | | // Normally mapping back to expansion location here only changes FileID, as |
90 | | // we've already found some tokens expanded from the same macro argument, and |
91 | | // they should map to a consecutive subset of spelled tokens. Unfortunately |
92 | | // SourceManager::isBeforeInTranslationUnit discriminates sourcelocations |
93 | | // based on their FileID in addition to offsets. So even though we are |
94 | | // referring to same tokens, SourceManager might tell us that one is before |
95 | | // the other if they've got different FileIDs. |
96 | 450 | return SM.getExpansionRange(CharSourceRange(Res, true)).getAsRange(); |
97 | 450 | } |
98 | | |
99 | | } // namespace |
100 | | |
101 | | syntax::Token::Token(SourceLocation Location, unsigned Length, |
102 | | tok::TokenKind Kind) |
103 | 80.9k | : Location(Location), Length(Length), Kind(Kind) { |
104 | 80.9k | assert(Location.isValid()); |
105 | 80.9k | } |
106 | | |
107 | | syntax::Token::Token(const clang::Token &T) |
108 | 80.9k | : Token(T.getLocation(), T.getLength(), T.getKind()) { |
109 | 80.9k | assert(!T.isAnnotation()); |
110 | 80.9k | } Unexecuted instantiation: clang::syntax::Token::Token(clang::Token const&) clang::syntax::Token::Token(clang::Token const&) Line | Count | Source | 108 | 80.9k | : Token(T.getLocation(), T.getLength(), T.getKind()) { | 109 | 80.9k | assert(!T.isAnnotation()); | 110 | 80.9k | } |
|
111 | | |
112 | 21.9k | llvm::StringRef syntax::Token::text(const SourceManager &SM) const { |
113 | 21.9k | bool Invalid = false; |
114 | 21.9k | const char *Start = SM.getCharacterData(location(), &Invalid); |
115 | 21.9k | assert(!Invalid); |
116 | 0 | return llvm::StringRef(Start, length()); |
117 | 21.9k | } |
118 | | |
119 | 137k | FileRange syntax::Token::range(const SourceManager &SM) const { |
120 | 137k | assert(location().isFileID() && "must be a spelled token"); |
121 | 0 | FileID File; |
122 | 137k | unsigned StartOffset; |
123 | 137k | std::tie(File, StartOffset) = SM.getDecomposedLoc(location()); |
124 | 137k | return FileRange(File, StartOffset, StartOffset + length()); |
125 | 137k | } |
126 | | |
127 | | FileRange syntax::Token::range(const SourceManager &SM, |
128 | | const syntax::Token &First, |
129 | 68.9k | const syntax::Token &Last) { |
130 | 68.9k | auto F = First.range(SM); |
131 | 68.9k | auto L = Last.range(SM); |
132 | 68.9k | assert(F.file() == L.file() && "tokens from different files"); |
133 | 0 | assert((F == L || F.endOffset() <= L.beginOffset()) && |
134 | 68.9k | "wrong order of tokens"); |
135 | 0 | return FileRange(F.file(), F.beginOffset(), L.endOffset()); |
136 | 68.9k | } |
137 | | |
138 | 90 | llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, const Token &T) { |
139 | 90 | return OS << T.str(); |
140 | 90 | } |
141 | | |
142 | | FileRange::FileRange(FileID File, unsigned BeginOffset, unsigned EndOffset) |
143 | 279k | : File(File), Begin(BeginOffset), End(EndOffset) { |
144 | 279k | assert(File.isValid()); |
145 | 0 | assert(BeginOffset <= EndOffset); |
146 | 279k | } |
147 | | |
148 | | FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc, |
149 | 0 | unsigned Length) { |
150 | 0 | assert(BeginLoc.isValid()); |
151 | 0 | assert(BeginLoc.isFileID()); |
152 | | |
153 | 0 | std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc); |
154 | 0 | End = Begin + Length; |
155 | 0 | } |
156 | | FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc, |
157 | 0 | SourceLocation EndLoc) { |
158 | 0 | assert(BeginLoc.isValid()); |
159 | 0 | assert(BeginLoc.isFileID()); |
160 | 0 | assert(EndLoc.isValid()); |
161 | 0 | assert(EndLoc.isFileID()); |
162 | 0 | assert(SM.getFileID(BeginLoc) == SM.getFileID(EndLoc)); |
163 | 0 | assert(SM.getFileOffset(BeginLoc) <= SM.getFileOffset(EndLoc)); |
164 | | |
165 | 0 | std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc); |
166 | 0 | End = SM.getFileOffset(EndLoc); |
167 | 0 | } |
168 | | |
169 | | llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, |
170 | 0 | const FileRange &R) { |
171 | 0 | return OS << llvm::formatv("FileRange(file = {0}, offsets = {1}-{2})", |
172 | 0 | R.file().getHashValue(), R.beginOffset(), |
173 | 0 | R.endOffset()); |
174 | 0 | } |
175 | | |
176 | 14 | llvm::StringRef FileRange::text(const SourceManager &SM) const { |
177 | 14 | bool Invalid = false; |
178 | 14 | StringRef Text = SM.getBufferData(File, &Invalid); |
179 | 14 | if (Invalid) |
180 | 0 | return ""; |
181 | 14 | assert(Begin <= Text.size()); |
182 | 0 | assert(End <= Text.size()); |
183 | 0 | return Text.substr(Begin, length()); |
184 | 14 | } |
185 | | |
186 | 40 | void TokenBuffer::indexExpandedTokens() { |
187 | | // No-op if the index is already created. |
188 | 40 | if (!ExpandedTokIndex.empty()) |
189 | 0 | return; |
190 | 40 | ExpandedTokIndex.reserve(ExpandedTokens.size()); |
191 | | // Index ExpandedTokens for faster lookups by SourceLocation. |
192 | 402 | for (size_t I = 0, E = ExpandedTokens.size(); I != E; ++I362 ) { |
193 | 362 | SourceLocation Loc = ExpandedTokens[I].location(); |
194 | 362 | if (Loc.isValid()) |
195 | 362 | ExpandedTokIndex[Loc] = I; |
196 | 362 | } |
197 | 40 | } |
198 | | |
199 | 2 | llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const { |
200 | 2 | if (R.isInvalid()) |
201 | 1 | return {}; |
202 | 1 | if (!ExpandedTokIndex.empty()) { |
203 | | // Quick lookup if `R` is a token range. |
204 | | // This is a huge win since majority of the users use ranges provided by an |
205 | | // AST. Ranges in AST are token ranges from expanded token stream. |
206 | 1 | const auto B = ExpandedTokIndex.find(R.getBegin()); |
207 | 1 | const auto E = ExpandedTokIndex.find(R.getEnd()); |
208 | 1 | if (B != ExpandedTokIndex.end() && E != ExpandedTokIndex.end()) { |
209 | 1 | const Token *L = ExpandedTokens.data() + B->getSecond(); |
210 | | // Add 1 to End to make a half-open range. |
211 | 1 | const Token *R = ExpandedTokens.data() + E->getSecond() + 1; |
212 | 1 | if (L > R) |
213 | 0 | return {}; |
214 | 1 | return {L, R}; |
215 | 1 | } |
216 | 1 | } |
217 | | // Slow case. Use `isBeforeInTranslationUnit` to binary search for the |
218 | | // required range. |
219 | 0 | return getTokensCovering(expandedTokens(), R, *SourceMgr); |
220 | 1 | } |
221 | | |
222 | 42 | CharSourceRange FileRange::toCharRange(const SourceManager &SM) const { |
223 | 42 | return CharSourceRange( |
224 | 42 | SourceRange(SM.getComposedLoc(File, Begin), SM.getComposedLoc(File, End)), |
225 | 42 | /*IsTokenRange=*/false); |
226 | 42 | } |
227 | | |
228 | | std::pair<const syntax::Token *, const TokenBuffer::Mapping *> |
229 | 135k | TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const { |
230 | 135k | assert(Expanded); |
231 | 0 | assert(ExpandedTokens.data() <= Expanded && |
232 | 135k | Expanded < ExpandedTokens.data() + ExpandedTokens.size()); |
233 | | |
234 | 0 | auto FileIt = Files.find( |
235 | 135k | SourceMgr->getFileID(SourceMgr->getExpansionLoc(Expanded->location()))); |
236 | 135k | assert(FileIt != Files.end() && "no file for an expanded token"); |
237 | | |
238 | 0 | const MarkedFile &File = FileIt->second; |
239 | | |
240 | 135k | unsigned ExpandedIndex = Expanded - ExpandedTokens.data(); |
241 | | // Find the first mapping that produced tokens after \p Expanded. |
242 | 135k | auto It = llvm::partition_point(File.Mappings, [&](const Mapping &M) { |
243 | 12.0k | return M.BeginExpanded <= ExpandedIndex; |
244 | 12.0k | }); |
245 | | // Our token could only be produced by the previous mapping. |
246 | 135k | if (It == File.Mappings.begin()) { |
247 | | // No previous mapping, no need to modify offsets. |
248 | 129k | return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded], |
249 | 129k | /*Mapping=*/nullptr}; |
250 | 129k | } |
251 | 6.18k | --It; // 'It' now points to last mapping that started before our token. |
252 | | |
253 | | // Check if the token is part of the mapping. |
254 | 6.18k | if (ExpandedIndex < It->EndExpanded) |
255 | 2.41k | return {&File.SpelledTokens[It->BeginSpelled], /*Mapping=*/&*It}; |
256 | | |
257 | | // Not part of the mapping, use the index from previous mapping to compute the |
258 | | // corresponding spelled token. |
259 | 3.77k | return { |
260 | 3.77k | &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)], |
261 | 3.77k | /*Mapping=*/nullptr}; |
262 | 6.18k | } |
263 | | |
264 | | const TokenBuffer::Mapping * |
265 | | TokenBuffer::mappingStartingBeforeSpelled(const MarkedFile &F, |
266 | 41 | const syntax::Token *Spelled) { |
267 | 41 | assert(F.SpelledTokens.data() <= Spelled); |
268 | 0 | unsigned SpelledI = Spelled - F.SpelledTokens.data(); |
269 | 41 | assert(SpelledI < F.SpelledTokens.size()); |
270 | | |
271 | 87 | auto It = llvm::partition_point(F.Mappings, [SpelledI](const Mapping &M) { |
272 | 87 | return M.BeginSpelled <= SpelledI; |
273 | 87 | }); |
274 | 41 | if (It == F.Mappings.begin()) |
275 | 6 | return nullptr; |
276 | 35 | --It; |
277 | 35 | return &*It; |
278 | 41 | } |
279 | | |
280 | | llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1> |
281 | 22 | TokenBuffer::expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const { |
282 | 22 | if (Spelled.empty()) |
283 | 0 | return {}; |
284 | 22 | const auto &File = fileForSpelled(Spelled); |
285 | | |
286 | 22 | auto *FrontMapping = mappingStartingBeforeSpelled(File, &Spelled.front()); |
287 | 22 | unsigned SpelledFrontI = &Spelled.front() - File.SpelledTokens.data(); |
288 | 22 | assert(SpelledFrontI < File.SpelledTokens.size()); |
289 | 0 | unsigned ExpandedBegin; |
290 | 22 | if (!FrontMapping) { |
291 | | // No mapping that starts before the first token of Spelled, we don't have |
292 | | // to modify offsets. |
293 | 3 | ExpandedBegin = File.BeginExpanded + SpelledFrontI; |
294 | 19 | } else if (SpelledFrontI < FrontMapping->EndSpelled) { |
295 | | // This mapping applies to Spelled tokens. |
296 | 19 | if (SpelledFrontI != FrontMapping->BeginSpelled) { |
297 | | // Spelled tokens don't cover the entire mapping, returning empty result. |
298 | 3 | return {}; // FIXME: support macro arguments. |
299 | 3 | } |
300 | | // Spelled tokens start at the beginning of this mapping. |
301 | 16 | ExpandedBegin = FrontMapping->BeginExpanded; |
302 | 16 | } else { |
303 | | // Spelled tokens start after the mapping ends (they start in the hole |
304 | | // between 2 mappings, or between a mapping and end of the file). |
305 | 0 | ExpandedBegin = |
306 | 0 | FrontMapping->EndExpanded + (SpelledFrontI - FrontMapping->EndSpelled); |
307 | 0 | } |
308 | | |
309 | 19 | auto *BackMapping = mappingStartingBeforeSpelled(File, &Spelled.back()); |
310 | 19 | unsigned SpelledBackI = &Spelled.back() - File.SpelledTokens.data(); |
311 | 19 | unsigned ExpandedEnd; |
312 | 19 | if (!BackMapping) { |
313 | | // No mapping that starts before the last token of Spelled, we don't have to |
314 | | // modify offsets. |
315 | 3 | ExpandedEnd = File.BeginExpanded + SpelledBackI + 1; |
316 | 16 | } else if (SpelledBackI < BackMapping->EndSpelled) { |
317 | | // This mapping applies to Spelled tokens. |
318 | 16 | if (SpelledBackI + 1 != BackMapping->EndSpelled) { |
319 | | // Spelled tokens don't cover the entire mapping, returning empty result. |
320 | 1 | return {}; // FIXME: support macro arguments. |
321 | 1 | } |
322 | 15 | ExpandedEnd = BackMapping->EndExpanded; |
323 | 15 | } else { |
324 | | // Spelled tokens end after the mapping ends. |
325 | 0 | ExpandedEnd = |
326 | 0 | BackMapping->EndExpanded + (SpelledBackI - BackMapping->EndSpelled) + 1; |
327 | 0 | } |
328 | | |
329 | 18 | assert(ExpandedBegin < ExpandedTokens.size()); |
330 | 0 | assert(ExpandedEnd < ExpandedTokens.size()); |
331 | | // Avoid returning empty ranges. |
332 | 18 | if (ExpandedBegin == ExpandedEnd) |
333 | 3 | return {}; |
334 | 15 | return {llvm::makeArrayRef(ExpandedTokens.data() + ExpandedBegin, |
335 | 15 | ExpandedTokens.data() + ExpandedEnd)}; |
336 | 18 | } |
337 | | |
338 | 82 | llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const { |
339 | 82 | auto It = Files.find(FID); |
340 | 82 | assert(It != Files.end()); |
341 | 0 | return It->second.SpelledTokens; |
342 | 82 | } |
343 | | |
344 | 0 | const syntax::Token *TokenBuffer::spelledTokenAt(SourceLocation Loc) const { |
345 | 0 | assert(Loc.isFileID()); |
346 | 0 | const auto *Tok = llvm::partition_point( |
347 | 0 | spelledTokens(SourceMgr->getFileID(Loc)), |
348 | 0 | [&](const syntax::Token &Tok) { return Tok.location() < Loc; }); |
349 | 0 | if (!Tok || Tok->location() != Loc) |
350 | 0 | return nullptr; |
351 | 0 | return Tok; |
352 | 0 | } |
353 | | |
354 | 0 | std::string TokenBuffer::Mapping::str() const { |
355 | 0 | return std::string( |
356 | 0 | llvm::formatv("spelled tokens: [{0},{1}), expanded tokens: [{2},{3})", |
357 | 0 | BeginSpelled, EndSpelled, BeginExpanded, EndExpanded)); |
358 | 0 | } |
359 | | |
360 | | llvm::Optional<llvm::ArrayRef<syntax::Token>> |
361 | 68.1k | TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const { |
362 | | // Mapping an empty range is ambiguous in case of empty mappings at either end |
363 | | // of the range, bail out in that case. |
364 | 68.1k | if (Expanded.empty()) |
365 | 269 | return llvm::None; |
366 | | |
367 | 67.9k | const syntax::Token *BeginSpelled; |
368 | 67.9k | const Mapping *BeginMapping; |
369 | 67.9k | std::tie(BeginSpelled, BeginMapping) = |
370 | 67.9k | spelledForExpandedToken(&Expanded.front()); |
371 | | |
372 | 67.9k | const syntax::Token *LastSpelled; |
373 | 67.9k | const Mapping *LastMapping; |
374 | 67.9k | std::tie(LastSpelled, LastMapping) = |
375 | 67.9k | spelledForExpandedToken(&Expanded.back()); |
376 | | |
377 | 67.9k | FileID FID = SourceMgr->getFileID(BeginSpelled->location()); |
378 | | // FIXME: Handle multi-file changes by trying to map onto a common root. |
379 | 67.9k | if (FID != SourceMgr->getFileID(LastSpelled->location())) |
380 | 0 | return llvm::None; |
381 | | |
382 | 67.9k | const MarkedFile &File = Files.find(FID)->second; |
383 | | |
384 | | // If both tokens are coming from a macro argument expansion, try and map to |
385 | | // smallest part of the macro argument. BeginMapping && LastMapping check is |
386 | | // only for performance, they are a prerequisite for Expanded.front() and |
387 | | // Expanded.back() being part of a macro arg expansion. |
388 | 67.9k | if (BeginMapping && LastMapping1.24k && |
389 | 67.9k | SourceMgr->isMacroArgExpansion(Expanded.front().location())1.14k && |
390 | 67.9k | SourceMgr->isMacroArgExpansion(Expanded.back().location())478 ) { |
391 | 450 | auto CommonRange = findCommonRangeForMacroArgs(Expanded.front(), |
392 | 450 | Expanded.back(), *SourceMgr); |
393 | | // It might be the case that tokens are arguments of different macro calls, |
394 | | // in that case we should continue with the logic below instead of returning |
395 | | // an empty range. |
396 | 450 | if (CommonRange.isValid()) |
397 | 389 | return getTokensCovering(File.SpelledTokens, CommonRange, *SourceMgr); |
398 | 450 | } |
399 | | |
400 | | // Do not allow changes that doesn't cover full expansion. |
401 | 67.5k | unsigned BeginExpanded = Expanded.begin() - ExpandedTokens.data(); |
402 | 67.5k | unsigned EndExpanded = Expanded.end() - ExpandedTokens.data(); |
403 | 67.5k | if (BeginMapping && BeginExpanded != BeginMapping->BeginExpanded852 ) |
404 | 506 | return llvm::None; |
405 | 67.0k | if (LastMapping && LastMapping->EndExpanded != EndExpanded276 ) |
406 | 129 | return llvm::None; |
407 | | // All is good, return the result. |
408 | 66.8k | return llvm::makeArrayRef( |
409 | 66.8k | BeginMapping ? File.SpelledTokens.data() + BeginMapping->BeginSpelled217 |
410 | 66.8k | : BeginSpelled66.6k , |
411 | 66.8k | LastMapping ? File.SpelledTokens.data() + LastMapping->EndSpelled147 |
412 | 66.8k | : LastSpelled + 166.7k ); |
413 | 67.0k | } |
414 | | |
415 | | TokenBuffer::Expansion TokenBuffer::makeExpansion(const MarkedFile &F, |
416 | 13 | const Mapping &M) const { |
417 | 13 | Expansion E; |
418 | 13 | E.Spelled = llvm::makeArrayRef(F.SpelledTokens.data() + M.BeginSpelled, |
419 | 13 | F.SpelledTokens.data() + M.EndSpelled); |
420 | 13 | E.Expanded = llvm::makeArrayRef(ExpandedTokens.data() + M.BeginExpanded, |
421 | 13 | ExpandedTokens.data() + M.EndExpanded); |
422 | 13 | return E; |
423 | 13 | } |
424 | | |
425 | | const TokenBuffer::MarkedFile & |
426 | 54 | TokenBuffer::fileForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const { |
427 | 54 | assert(!Spelled.empty()); |
428 | 0 | assert(Spelled.front().location().isFileID() && "not a spelled token"); |
429 | 0 | auto FileIt = Files.find(SourceMgr->getFileID(Spelled.front().location())); |
430 | 54 | assert(FileIt != Files.end() && "file not tracked by token buffer"); |
431 | 0 | const auto &File = FileIt->second; |
432 | 54 | assert(File.SpelledTokens.data() <= Spelled.data() && |
433 | 54 | Spelled.end() <= |
434 | 54 | (File.SpelledTokens.data() + File.SpelledTokens.size()) && |
435 | 54 | "Tokens not in spelled range"); |
436 | 0 | #ifndef NDEBUG |
437 | 0 | auto T1 = Spelled.back().location(); |
438 | 54 | auto T2 = File.SpelledTokens.back().location(); |
439 | 54 | assert(T1 == T2 || sourceManager().isBeforeInTranslationUnit(T1, T2)); |
440 | 0 | #endif |
441 | 0 | return File; |
442 | 54 | } |
443 | | |
444 | | llvm::Optional<TokenBuffer::Expansion> |
445 | 28 | TokenBuffer::expansionStartingAt(const syntax::Token *Spelled) const { |
446 | 28 | assert(Spelled); |
447 | 0 | const auto &File = fileForSpelled(*Spelled); |
448 | | |
449 | 28 | unsigned SpelledIndex = Spelled - File.SpelledTokens.data(); |
450 | 56 | auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) { |
451 | 56 | return M.BeginSpelled < SpelledIndex; |
452 | 56 | }); |
453 | 28 | if (M == File.Mappings.end() || M->BeginSpelled != SpelledIndex16 ) |
454 | 22 | return llvm::None; |
455 | 6 | return makeExpansion(File, *M); |
456 | 28 | } |
457 | | |
458 | | std::vector<TokenBuffer::Expansion> TokenBuffer::expansionsOverlapping( |
459 | 4 | llvm::ArrayRef<syntax::Token> Spelled) const { |
460 | 4 | if (Spelled.empty()) |
461 | 0 | return {}; |
462 | 4 | const auto &File = fileForSpelled(Spelled); |
463 | | |
464 | | // Find the first overlapping range, and then copy until we stop overlapping. |
465 | 4 | unsigned SpelledBeginIndex = Spelled.begin() - File.SpelledTokens.data(); |
466 | 4 | unsigned SpelledEndIndex = Spelled.end() - File.SpelledTokens.data(); |
467 | 8 | auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) { |
468 | 8 | return M.EndSpelled <= SpelledBeginIndex; |
469 | 8 | }); |
470 | 4 | std::vector<TokenBuffer::Expansion> Expansions; |
471 | 11 | for (; M != File.Mappings.end() && M->BeginSpelled < SpelledEndIndex8 ; ++M7 ) |
472 | 7 | Expansions.push_back(makeExpansion(File, *M)); |
473 | 4 | return Expansions; |
474 | 4 | } |
475 | | |
476 | | llvm::ArrayRef<syntax::Token> |
477 | | syntax::spelledTokensTouching(SourceLocation Loc, |
478 | 16 | llvm::ArrayRef<syntax::Token> Tokens) { |
479 | 16 | assert(Loc.isFileID()); |
480 | | |
481 | 0 | auto *Right = llvm::partition_point( |
482 | 42 | Tokens, [&](const syntax::Token &Tok) { return Tok.location() < Loc; }); |
483 | 16 | bool AcceptRight = Right != Tokens.end() && Right->location() <= Loc14 ; |
484 | 16 | bool AcceptLeft = |
485 | 16 | Right != Tokens.begin() && (Right - 1)->endLocation() >= Loc14 ; |
486 | 16 | return llvm::makeArrayRef(Right - (AcceptLeft ? 112 : 04 ), |
487 | 16 | Right + (AcceptRight ? 18 : 08 )); |
488 | 16 | } |
489 | | |
490 | | llvm::ArrayRef<syntax::Token> |
491 | | syntax::spelledTokensTouching(SourceLocation Loc, |
492 | 8 | const syntax::TokenBuffer &Tokens) { |
493 | 8 | return spelledTokensTouching( |
494 | 8 | Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc))); |
495 | 8 | } |
496 | | |
497 | | const syntax::Token * |
498 | | syntax::spelledIdentifierTouching(SourceLocation Loc, |
499 | 8 | llvm::ArrayRef<syntax::Token> Tokens) { |
500 | 9 | for (const syntax::Token &Tok : spelledTokensTouching(Loc, Tokens)) { |
501 | 9 | if (Tok.kind() == tok::identifier) |
502 | 3 | return &Tok; |
503 | 9 | } |
504 | 5 | return nullptr; |
505 | 8 | } |
506 | | |
507 | | const syntax::Token * |
508 | | syntax::spelledIdentifierTouching(SourceLocation Loc, |
509 | 8 | const syntax::TokenBuffer &Tokens) { |
510 | 8 | return spelledIdentifierTouching( |
511 | 8 | Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc))); |
512 | 8 | } |
513 | | |
514 | | std::vector<const syntax::Token *> |
515 | 1 | TokenBuffer::macroExpansions(FileID FID) const { |
516 | 1 | auto FileIt = Files.find(FID); |
517 | 1 | assert(FileIt != Files.end() && "file not tracked by token buffer"); |
518 | 0 | auto &File = FileIt->second; |
519 | 1 | std::vector<const syntax::Token *> Expansions; |
520 | 1 | auto &Spelled = File.SpelledTokens; |
521 | 4 | for (auto Mapping : File.Mappings) { |
522 | 4 | const syntax::Token *Token = &Spelled[Mapping.BeginSpelled]; |
523 | 4 | if (Token->kind() == tok::TokenKind::identifier) |
524 | 3 | Expansions.push_back(Token); |
525 | 4 | } |
526 | 1 | return Expansions; |
527 | 1 | } |
528 | | |
529 | | std::vector<syntax::Token> syntax::tokenize(const FileRange &FR, |
530 | | const SourceManager &SM, |
531 | 3.91k | const LangOptions &LO) { |
532 | 3.91k | std::vector<syntax::Token> Tokens; |
533 | 3.91k | IdentifierTable Identifiers(LO); |
534 | 41.0k | auto AddToken = [&](clang::Token T) { |
535 | | // Fill the proper token kind for keywords, etc. |
536 | 41.0k | if (T.getKind() == tok::raw_identifier && !T.needsCleaning()16.1k && |
537 | 41.0k | !T.hasUCN()16.1k ) { // FIXME: support needsCleaning and hasUCN cases. |
538 | 16.1k | clang::IdentifierInfo &II = Identifiers.get(T.getRawIdentifier()); |
539 | 16.1k | T.setIdentifierInfo(&II); |
540 | 16.1k | T.setKind(II.getTokenID()); |
541 | 16.1k | } |
542 | 41.0k | Tokens.push_back(syntax::Token(T)); |
543 | 41.0k | }; |
544 | | |
545 | 3.91k | auto SrcBuffer = SM.getBufferData(FR.file()); |
546 | 3.91k | Lexer L(SM.getLocForStartOfFile(FR.file()), LO, SrcBuffer.data(), |
547 | 3.91k | SrcBuffer.data() + FR.beginOffset(), |
548 | | // We can't make BufEnd point to FR.endOffset, as Lexer requires a |
549 | | // null terminated buffer. |
550 | 3.91k | SrcBuffer.data() + SrcBuffer.size()); |
551 | | |
552 | 3.91k | clang::Token T; |
553 | 43.1k | while (!L.LexFromRawLexer(T) && L.getCurrentBufferOffset() < FR.endOffset()39.1k ) |
554 | 39.1k | AddToken(T); |
555 | | // LexFromRawLexer returns true when it parses the last token of the file, add |
556 | | // it iff it starts within the range we are interested in. |
557 | 3.91k | if (SM.getFileOffset(T.getLocation()) < FR.endOffset()) |
558 | 1.82k | AddToken(T); |
559 | 3.91k | return Tokens; |
560 | 3.91k | } |
561 | | |
562 | | std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM, |
563 | 3.91k | const LangOptions &LO) { |
564 | 3.91k | return tokenize(syntax::FileRange(FID, 0, SM.getFileIDSize(FID)), SM, LO); |
565 | 3.91k | } |
566 | | |
567 | | /// Records information reqired to construct mappings for the token buffer that |
568 | | /// we are collecting. |
569 | | class TokenCollector::CollectPPExpansions : public PPCallbacks { |
570 | | public: |
571 | 2.20k | CollectPPExpansions(TokenCollector &C) : Collector(&C) {} |
572 | | |
573 | | /// Disabled instance will stop reporting anything to TokenCollector. |
574 | | /// This ensures that uses of the preprocessor after TokenCollector::consume() |
575 | | /// is called do not access the (possibly invalid) collector instance. |
576 | 2.20k | void disable() { Collector = nullptr; } |
577 | | |
578 | | void MacroExpands(const clang::Token &MacroNameTok, const MacroDefinition &MD, |
579 | 260 | SourceRange Range, const MacroArgs *Args) override { |
580 | 260 | if (!Collector) |
581 | 0 | return; |
582 | 260 | const auto &SM = Collector->PP.getSourceManager(); |
583 | | // Only record top-level expansions that directly produce expanded tokens. |
584 | | // This excludes those where: |
585 | | // - the macro use is inside a macro body, |
586 | | // - the macro appears in an argument to another macro. |
587 | | // However macro expansion isn't really a tree, it's token rewrite rules, |
588 | | // so there are other cases, e.g. |
589 | | // #define B(X) X |
590 | | // #define A 1 + B |
591 | | // A(2) |
592 | | // Both A and B produce expanded tokens, though the macro name 'B' comes |
593 | | // from an expansion. The best we can do is merge the mappings for both. |
594 | | |
595 | | // The *last* token of any top-level macro expansion must be in a file. |
596 | | // (In the example above, see the closing paren of the expansion of B). |
597 | 260 | if (!Range.getEnd().isFileID()) |
598 | 2 | return; |
599 | | // If there's a current expansion that encloses this one, this one can't be |
600 | | // top-level. |
601 | 258 | if (LastExpansionEnd.isValid() && |
602 | 258 | !SM.isBeforeInTranslationUnit(LastExpansionEnd, Range.getEnd())134 ) |
603 | 16 | return; |
604 | | |
605 | | // If the macro invocation (B) starts in a macro (A) but ends in a file, |
606 | | // we'll create a merged mapping for A + B by overwriting the endpoint for |
607 | | // A's startpoint. |
608 | 242 | if (!Range.getBegin().isFileID()) { |
609 | 1 | Range.setBegin(SM.getExpansionLoc(Range.getBegin())); |
610 | 1 | assert(Collector->Expansions.count(Range.getBegin()) && |
611 | 1 | "Overlapping macros should have same expansion location"); |
612 | 1 | } |
613 | | |
614 | 0 | Collector->Expansions[Range.getBegin()] = Range.getEnd(); |
615 | 242 | LastExpansionEnd = Range.getEnd(); |
616 | 242 | } |
617 | | // FIXME: handle directives like #pragma, #include, etc. |
618 | | private: |
619 | | TokenCollector *Collector; |
620 | | /// Used to detect recursive macro expansions. |
621 | | SourceLocation LastExpansionEnd; |
622 | | }; |
623 | | |
624 | | /// Fills in the TokenBuffer by tracing the run of a preprocessor. The |
625 | | /// implementation tracks the tokens, macro expansions and directives coming |
626 | | /// from the preprocessor and: |
627 | | /// - for each token, figures out if it is a part of an expanded token stream, |
628 | | /// spelled token stream or both. Stores the tokens appropriately. |
629 | | /// - records mappings from the spelled to expanded token ranges, e.g. for macro |
630 | | /// expansions. |
631 | | /// FIXME: also properly record: |
632 | | /// - #include directives, |
633 | | /// - #pragma, #line and other PP directives, |
634 | | /// - skipped pp regions, |
635 | | /// - ... |
636 | | |
637 | 2.20k | TokenCollector::TokenCollector(Preprocessor &PP) : PP(PP) { |
638 | | // Collect the expanded token stream during preprocessing. |
639 | 39.9k | PP.setTokenWatcher([this](const clang::Token &T) { |
640 | 39.9k | if (T.isAnnotation()) |
641 | 3 | return; |
642 | 39.9k | DEBUG_WITH_TYPE("collect-tokens", llvm::dbgs() |
643 | 39.9k | << "Token: " |
644 | 39.9k | << syntax::Token(T).dumpForTests( |
645 | 39.9k | this->PP.getSourceManager()) |
646 | 39.9k | << "\n" |
647 | | |
648 | 39.9k | ); |
649 | 39.9k | Expanded.push_back(syntax::Token(T)); |
650 | 39.9k | }); |
651 | | // And locations of macro calls, to properly recover boundaries of those in |
652 | | // case of empty expansions. |
653 | 2.20k | auto CB = std::make_unique<CollectPPExpansions>(*this); |
654 | 2.20k | this->Collector = CB.get(); |
655 | 2.20k | PP.addPPCallbacks(std::move(CB)); |
656 | 2.20k | } |
657 | | |
658 | | /// Builds mappings and spelled tokens in the TokenBuffer based on the expanded |
659 | | /// token stream. |
660 | | class TokenCollector::Builder { |
661 | | public: |
662 | | Builder(std::vector<syntax::Token> Expanded, PPExpansions CollectedExpansions, |
663 | | const SourceManager &SM, const LangOptions &LangOpts) |
664 | | : Result(SM), CollectedExpansions(std::move(CollectedExpansions)), SM(SM), |
665 | 2.20k | LangOpts(LangOpts) { |
666 | 2.20k | Result.ExpandedTokens = std::move(Expanded); |
667 | 2.20k | } |
668 | | |
669 | 2.20k | TokenBuffer build() && { |
670 | 2.20k | assert(!Result.ExpandedTokens.empty()); |
671 | 0 | assert(Result.ExpandedTokens.back().kind() == tok::eof); |
672 | | |
673 | | // Tokenize every file that contributed tokens to the expanded stream. |
674 | 0 | buildSpelledTokens(); |
675 | | |
676 | | // The expanded token stream consists of runs of tokens that came from |
677 | | // the same source (a macro expansion, part of a file etc). |
678 | | // Between these runs are the logical positions of spelled tokens that |
679 | | // didn't expand to anything. |
680 | 4.49k | while (NextExpanded < Result.ExpandedTokens.size() - 1 /* eof */) { |
681 | | // Create empty mappings for spelled tokens that expanded to nothing here. |
682 | | // May advance NextSpelled, but NextExpanded is unchanged. |
683 | 2.28k | discard(); |
684 | | // Create mapping for a contiguous run of expanded tokens. |
685 | | // Advances NextExpanded past the run, and NextSpelled accordingly. |
686 | 2.28k | unsigned OldPosition = NextExpanded; |
687 | 2.28k | advance(); |
688 | 2.28k | if (NextExpanded == OldPosition) |
689 | 0 | diagnoseAdvanceFailure(); |
690 | 2.28k | } |
691 | | // If any tokens remain in any of the files, they didn't expand to anything. |
692 | | // Create empty mappings up until the end of the file. |
693 | 2.20k | for (const auto &File : Result.Files) |
694 | 2.21k | discard(File.first); |
695 | | |
696 | 2.20k | #ifndef NDEBUG |
697 | 2.21k | for (auto &pair : Result.Files) { |
698 | 2.21k | auto &mappings = pair.second.Mappings; |
699 | 2.21k | assert(llvm::is_sorted(mappings, [](const TokenBuffer::Mapping &M1, |
700 | 2.21k | const TokenBuffer::Mapping &M2) { |
701 | 2.21k | return M1.BeginSpelled < M2.BeginSpelled && |
702 | 2.21k | M1.EndSpelled < M2.EndSpelled && |
703 | 2.21k | M1.BeginExpanded < M2.BeginExpanded && |
704 | 2.21k | M1.EndExpanded < M2.EndExpanded; |
705 | 2.21k | })); |
706 | 2.21k | } |
707 | 2.20k | #endif |
708 | | |
709 | 2.20k | return std::move(Result); |
710 | 2.20k | } |
711 | | |
712 | | private: |
713 | | // Consume a sequence of spelled tokens that didn't expand to anything. |
714 | | // In the simplest case, skips spelled tokens until finding one that produced |
715 | | // the NextExpanded token, and creates an empty mapping for them. |
716 | | // If Drain is provided, skips remaining tokens from that file instead. |
717 | 4.49k | void discard(llvm::Optional<FileID> Drain = llvm::None) { |
718 | 4.49k | SourceLocation Target = |
719 | 4.49k | Drain ? SM.getLocForEndOfFile(*Drain)2.21k |
720 | 4.49k | : SM.getExpansionLoc( |
721 | 2.28k | Result.ExpandedTokens[NextExpanded].location()); |
722 | 4.49k | FileID File = SM.getFileID(Target); |
723 | 4.49k | const auto &SpelledTokens = Result.Files[File].SpelledTokens; |
724 | 4.49k | auto &NextSpelled = this->NextSpelled[File]; |
725 | | |
726 | 4.49k | TokenBuffer::Mapping Mapping; |
727 | 4.49k | Mapping.BeginSpelled = NextSpelled; |
728 | | // When dropping trailing tokens from a file, the empty mapping should |
729 | | // be positioned within the file's expanded-token range (at the end). |
730 | 4.49k | Mapping.BeginExpanded = Mapping.EndExpanded = |
731 | 4.49k | Drain ? Result.Files[*Drain].EndExpanded2.21k : NextExpanded2.28k ; |
732 | | // We may want to split into several adjacent empty mappings. |
733 | | // FlushMapping() emits the current mapping and starts a new one. |
734 | 4.53k | auto FlushMapping = [&, this] { |
735 | 4.53k | Mapping.EndSpelled = NextSpelled; |
736 | 4.53k | if (Mapping.BeginSpelled != Mapping.EndSpelled) |
737 | 153 | Result.Files[File].Mappings.push_back(Mapping); |
738 | 4.53k | Mapping.BeginSpelled = NextSpelled; |
739 | 4.53k | }; |
740 | | |
741 | 6.10k | while (NextSpelled < SpelledTokens.size() && |
742 | 6.10k | SpelledTokens[NextSpelled].location() < Target3.89k ) { |
743 | | // If we know mapping bounds at [NextSpelled, KnownEnd] (macro expansion) |
744 | | // then we want to partition our (empty) mapping. |
745 | | // [Start, NextSpelled) [NextSpelled, KnownEnd] (KnownEnd, Target) |
746 | 1.60k | SourceLocation KnownEnd = |
747 | 1.60k | CollectedExpansions.lookup(SpelledTokens[NextSpelled].location()); |
748 | 1.60k | if (KnownEnd.isValid()) { |
749 | 19 | FlushMapping(); // Emits [Start, NextSpelled) |
750 | 45 | while (NextSpelled < SpelledTokens.size() && |
751 | 45 | SpelledTokens[NextSpelled].location() <= KnownEnd42 ) |
752 | 26 | ++NextSpelled; |
753 | 19 | FlushMapping(); // Emits [NextSpelled, KnownEnd] |
754 | | // Now the loop contitues and will emit (KnownEnd, Target). |
755 | 1.58k | } else { |
756 | 1.58k | ++NextSpelled; |
757 | 1.58k | } |
758 | 1.60k | } |
759 | 4.49k | FlushMapping(); |
760 | 4.49k | } |
761 | | |
762 | | // Consumes the NextExpanded token and others that are part of the same run. |
763 | | // Increases NextExpanded and NextSpelled by at least one, and adds a mapping |
764 | | // (unless this is a run of file tokens, which we represent with no mapping). |
765 | 2.28k | void advance() { |
766 | 2.28k | const syntax::Token &Tok = Result.ExpandedTokens[NextExpanded]; |
767 | 2.28k | SourceLocation Expansion = SM.getExpansionLoc(Tok.location()); |
768 | 2.28k | FileID File = SM.getFileID(Expansion); |
769 | 2.28k | const auto &SpelledTokens = Result.Files[File].SpelledTokens; |
770 | 2.28k | auto &NextSpelled = this->NextSpelled[File]; |
771 | | |
772 | 2.28k | if (Tok.location().isFileID()) { |
773 | | // A run of file tokens continues while the expanded/spelled tokens match. |
774 | 39.0k | while (NextSpelled < SpelledTokens.size() && |
775 | 39.0k | NextExpanded < Result.ExpandedTokens.size()37.1k && |
776 | 39.0k | SpelledTokens[NextSpelled].location() == |
777 | 37.1k | Result.ExpandedTokens[NextExpanded].location()) { |
778 | 36.9k | ++NextSpelled; |
779 | 36.9k | ++NextExpanded; |
780 | 36.9k | } |
781 | | // We need no mapping for file tokens copied to the expanded stream. |
782 | 2.06k | } else { |
783 | | // We found a new macro expansion. We should have its spelling bounds. |
784 | 222 | auto End = CollectedExpansions.lookup(Expansion); |
785 | 222 | assert(End.isValid() && "Macro expansion wasn't captured?"); |
786 | | |
787 | | // Mapping starts here... |
788 | 0 | TokenBuffer::Mapping Mapping; |
789 | 222 | Mapping.BeginExpanded = NextExpanded; |
790 | 222 | Mapping.BeginSpelled = NextSpelled; |
791 | | // ... consumes spelled tokens within bounds we captured ... |
792 | 935 | while (NextSpelled < SpelledTokens.size() && |
793 | 935 | SpelledTokens[NextSpelled].location() <= End913 ) |
794 | 713 | ++NextSpelled; |
795 | | // ... consumes expanded tokens rooted at the same expansion ... |
796 | 971 | while (NextExpanded < Result.ExpandedTokens.size() && |
797 | 971 | SM.getExpansionLoc( |
798 | 971 | Result.ExpandedTokens[NextExpanded].location()) == Expansion) |
799 | 749 | ++NextExpanded; |
800 | | // ... and ends here. |
801 | 222 | Mapping.EndExpanded = NextExpanded; |
802 | 222 | Mapping.EndSpelled = NextSpelled; |
803 | 222 | Result.Files[File].Mappings.push_back(Mapping); |
804 | 222 | } |
805 | 2.28k | } |
806 | | |
807 | | // advance() is supposed to consume at least one token - if not, we crash. |
808 | 0 | void diagnoseAdvanceFailure() { |
809 | 0 | #ifndef NDEBUG |
810 | | // Show the failed-to-map token in context. |
811 | 0 | for (unsigned I = (NextExpanded < 10) ? 0 : NextExpanded - 10; |
812 | 0 | I < NextExpanded + 5 && I < Result.ExpandedTokens.size(); ++I) { |
813 | 0 | const char *L = |
814 | 0 | (I == NextExpanded) ? "!! " : (I < NextExpanded) ? "ok " : " "; |
815 | 0 | llvm::errs() << L << Result.ExpandedTokens[I].dumpForTests(SM) << "\n"; |
816 | 0 | } |
817 | 0 | #endif |
818 | 0 | llvm_unreachable("Couldn't map expanded token to spelled tokens!"); |
819 | 0 | } |
820 | | |
821 | | /// Initializes TokenBuffer::Files and fills spelled tokens and expanded |
822 | | /// ranges for each of the files. |
823 | 2.20k | void buildSpelledTokens() { |
824 | 42.1k | for (unsigned I = 0; I < Result.ExpandedTokens.size(); ++I39.9k ) { |
825 | 39.9k | const auto &Tok = Result.ExpandedTokens[I]; |
826 | 39.9k | auto FID = SM.getFileID(SM.getExpansionLoc(Tok.location())); |
827 | 39.9k | auto It = Result.Files.try_emplace(FID); |
828 | 39.9k | TokenBuffer::MarkedFile &File = It.first->second; |
829 | | |
830 | | // The eof token should not be considered part of the main-file's range. |
831 | 39.9k | File.EndExpanded = Tok.kind() == tok::eof ? I2.20k : I + 137.7k ; |
832 | | |
833 | 39.9k | if (!It.second) |
834 | 37.7k | continue; // we have seen this file before. |
835 | | // This is the first time we see this file. |
836 | 2.21k | File.BeginExpanded = I; |
837 | 2.21k | File.SpelledTokens = tokenize(FID, SM, LangOpts); |
838 | 2.21k | } |
839 | 2.20k | } |
840 | | |
841 | | TokenBuffer Result; |
842 | | unsigned NextExpanded = 0; // cursor in ExpandedTokens |
843 | | llvm::DenseMap<FileID, unsigned> NextSpelled; // cursor in SpelledTokens |
844 | | PPExpansions CollectedExpansions; |
845 | | const SourceManager &SM; |
846 | | const LangOptions &LangOpts; |
847 | | }; |
848 | | |
849 | 2.20k | TokenBuffer TokenCollector::consume() && { |
850 | 2.20k | PP.setTokenWatcher(nullptr); |
851 | 2.20k | Collector->disable(); |
852 | 2.20k | return Builder(std::move(Expanded), std::move(Expansions), |
853 | 2.20k | PP.getSourceManager(), PP.getLangOpts()) |
854 | 2.20k | .build(); |
855 | 2.20k | } |
856 | | |
857 | 90 | std::string syntax::Token::str() const { |
858 | 90 | return std::string(llvm::formatv("Token({0}, length = {1})", |
859 | 90 | tok::getTokenName(kind()), length())); |
860 | 90 | } |
861 | | |
862 | 0 | std::string syntax::Token::dumpForTests(const SourceManager &SM) const { |
863 | 0 | return std::string(llvm::formatv("Token(`{0}`, {1}, length = {2})", text(SM), |
864 | 0 | tok::getTokenName(kind()), length())); |
865 | 0 | } |
866 | | |
867 | 15 | std::string TokenBuffer::dumpForTests() const { |
868 | 485 | auto PrintToken = [this](const syntax::Token &T) -> std::string { |
869 | 485 | if (T.kind() == tok::eof) |
870 | 12 | return "<eof>"; |
871 | 473 | return std::string(T.text(*SourceMgr)); |
872 | 485 | }; |
873 | | |
874 | 15 | auto DumpTokens = [this, &PrintToken](llvm::raw_ostream &OS, |
875 | 32 | llvm::ArrayRef<syntax::Token> Tokens) { |
876 | 32 | if (Tokens.empty()) { |
877 | 4 | OS << "<empty>"; |
878 | 4 | return; |
879 | 4 | } |
880 | 28 | OS << Tokens[0].text(*SourceMgr); |
881 | 424 | for (unsigned I = 1; I < Tokens.size(); ++I396 ) { |
882 | 396 | if (Tokens[I].kind() == tok::eof) |
883 | 0 | continue; |
884 | 396 | OS << " " << PrintToken(Tokens[I]); |
885 | 396 | } |
886 | 28 | }; |
887 | | |
888 | 15 | std::string Dump; |
889 | 15 | llvm::raw_string_ostream OS(Dump); |
890 | | |
891 | 15 | OS << "expanded tokens:\n" |
892 | 15 | << " "; |
893 | | // (!) we do not show '<eof>'. |
894 | 15 | DumpTokens(OS, llvm::makeArrayRef(ExpandedTokens).drop_back()); |
895 | 15 | OS << "\n"; |
896 | | |
897 | 15 | std::vector<FileID> Keys; |
898 | 15 | for (auto F : Files) |
899 | 17 | Keys.push_back(F.first); |
900 | 15 | llvm::sort(Keys); |
901 | | |
902 | 17 | for (FileID ID : Keys) { |
903 | 17 | const MarkedFile &File = Files.find(ID)->second; |
904 | 17 | auto *Entry = SourceMgr->getFileEntryForID(ID); |
905 | 17 | if (!Entry) |
906 | 0 | continue; // Skip builtin files. |
907 | 17 | OS << llvm::formatv("file '{0}'\n", Entry->getName()) |
908 | 17 | << " spelled tokens:\n" |
909 | 17 | << " "; |
910 | 17 | DumpTokens(OS, File.SpelledTokens); |
911 | 17 | OS << "\n"; |
912 | | |
913 | 17 | if (File.Mappings.empty()) { |
914 | 4 | OS << " no mappings.\n"; |
915 | 4 | continue; |
916 | 4 | } |
917 | 13 | OS << " mappings:\n"; |
918 | 24 | for (auto &M : File.Mappings) { |
919 | 24 | OS << llvm::formatv( |
920 | 24 | " ['{0}'_{1}, '{2}'_{3}) => ['{4}'_{5}, '{6}'_{7})\n", |
921 | 24 | PrintToken(File.SpelledTokens[M.BeginSpelled]), M.BeginSpelled, |
922 | 24 | M.EndSpelled == File.SpelledTokens.size() |
923 | 24 | ? "<eof>"7 |
924 | 24 | : PrintToken(File.SpelledTokens[M.EndSpelled])17 , |
925 | 24 | M.EndSpelled, PrintToken(ExpandedTokens[M.BeginExpanded]), |
926 | 24 | M.BeginExpanded, PrintToken(ExpandedTokens[M.EndExpanded]), |
927 | 24 | M.EndExpanded); |
928 | 24 | } |
929 | 13 | } |
930 | 15 | return Dump; |
931 | 15 | } |