Coverage Report

Created: 2020-02-25 14:32

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Tooling/Syntax/Tokens.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- Tokens.cpp - collect tokens from preprocessing ---------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
#include "clang/Tooling/Syntax/Tokens.h"
9
10
#include "clang/Basic/Diagnostic.h"
11
#include "clang/Basic/IdentifierTable.h"
12
#include "clang/Basic/LLVM.h"
13
#include "clang/Basic/LangOptions.h"
14
#include "clang/Basic/SourceLocation.h"
15
#include "clang/Basic/SourceManager.h"
16
#include "clang/Basic/TokenKinds.h"
17
#include "clang/Lex/PPCallbacks.h"
18
#include "clang/Lex/Preprocessor.h"
19
#include "clang/Lex/Token.h"
20
#include "llvm/ADT/ArrayRef.h"
21
#include "llvm/ADT/None.h"
22
#include "llvm/ADT/Optional.h"
23
#include "llvm/ADT/STLExtras.h"
24
#include "llvm/Support/Debug.h"
25
#include "llvm/Support/ErrorHandling.h"
26
#include "llvm/Support/FormatVariadic.h"
27
#include "llvm/Support/raw_ostream.h"
28
#include <algorithm>
29
#include <cassert>
30
#include <iterator>
31
#include <string>
32
#include <utility>
33
#include <vector>
34
35
using namespace clang;
36
using namespace clang::syntax;
37
38
syntax::Token::Token(SourceLocation Location, unsigned Length,
39
                     tok::TokenKind Kind)
40
1.71k
    : Location(Location), Length(Length), Kind(Kind) {
41
1.71k
  assert(Location.isValid());
42
1.71k
}
43
44
syntax::Token::Token(const clang::Token &T)
45
1.71k
    : Token(T.getLocation(), T.getLength(), T.getKind()) {
46
1.71k
  assert(!T.isAnnotation());
47
1.71k
}
Unexecuted instantiation: clang::syntax::Token::Token(clang::Token const&)
clang::syntax::Token::Token(clang::Token const&)
Line
Count
Source
45
1.71k
    : Token(T.getLocation(), T.getLength(), T.getKind()) {
46
1.71k
  assert(!T.isAnnotation());
47
1.71k
}
48
49
1.63k
llvm::StringRef syntax::Token::text(const SourceManager &SM) const {
50
1.63k
  bool Invalid = false;
51
1.63k
  const char *Start = SM.getCharacterData(location(), &Invalid);
52
1.63k
  assert(!Invalid);
53
1.63k
  return llvm::StringRef(Start, length());
54
1.63k
}
55
56
80
FileRange syntax::Token::range(const SourceManager &SM) const {
57
80
  assert(location().isFileID() && "must be a spelled token");
58
80
  FileID File;
59
80
  unsigned StartOffset;
60
80
  std::tie(File, StartOffset) = SM.getDecomposedLoc(location());
61
80
  return FileRange(File, StartOffset, StartOffset + length());
62
80
}
63
64
FileRange syntax::Token::range(const SourceManager &SM,
65
                               const syntax::Token &First,
66
38
                               const syntax::Token &Last) {
67
38
  auto F = First.range(SM);
68
38
  auto L = Last.range(SM);
69
38
  assert(F.file() == L.file() && "tokens from different files");
70
38
  assert((F == L || F.endOffset() <= L.beginOffset()) && "wrong order of tokens");
71
38
  return FileRange(F.file(), F.beginOffset(), L.endOffset());
72
38
}
73
74
120
llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, const Token &T) {
75
120
  return OS << T.str();
76
120
}
77
78
FileRange::FileRange(FileID File, unsigned BeginOffset, unsigned EndOffset)
79
156
    : File(File), Begin(BeginOffset), End(EndOffset) {
80
156
  assert(File.isValid());
81
156
  assert(BeginOffset <= EndOffset);
82
156
}
83
84
FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc,
85
0
                     unsigned Length) {
86
0
  assert(BeginLoc.isValid());
87
0
  assert(BeginLoc.isFileID());
88
0
89
0
  std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc);
90
0
  End = Begin + Length;
91
0
}
92
FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc,
93
0
                     SourceLocation EndLoc) {
94
0
  assert(BeginLoc.isValid());
95
0
  assert(BeginLoc.isFileID());
96
0
  assert(EndLoc.isValid());
97
0
  assert(EndLoc.isFileID());
98
0
  assert(SM.getFileID(BeginLoc) == SM.getFileID(EndLoc));
99
0
  assert(SM.getFileOffset(BeginLoc) <= SM.getFileOffset(EndLoc));
100
0
101
0
  std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc);
102
0
  End = SM.getFileOffset(EndLoc);
103
0
}
104
105
llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS,
106
0
                                      const FileRange &R) {
107
0
  return OS << llvm::formatv("FileRange(file = {0}, offsets = {1}-{2})",
108
0
                             R.file().getHashValue(), R.beginOffset(),
109
0
                             R.endOffset());
110
0
}
111
112
1
llvm::StringRef FileRange::text(const SourceManager &SM) const {
113
1
  bool Invalid = false;
114
1
  StringRef Text = SM.getBufferData(File, &Invalid);
115
1
  if (Invalid)
116
0
    return "";
117
1
  assert(Begin <= Text.size());
118
1
  assert(End <= Text.size());
119
1
  return Text.substr(Begin, length());
120
1
}
121
122
2
llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const {
123
2
  if (R.isInvalid())
124
1
    return {};
125
1
  const Token *Begin =
126
3
      llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) {
127
3
        return SourceMgr->isBeforeInTranslationUnit(T.location(), R.getBegin());
128
3
      });
129
1
  const Token *End =
130
3
      llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) {
131
3
        return !SourceMgr->isBeforeInTranslationUnit(R.getEnd(), T.location());
132
3
      });
133
1
  if (Begin > End)
134
0
    return {};
135
1
  return {Begin, End};
136
1
}
137
138
3
CharSourceRange FileRange::toCharRange(const SourceManager &SM) const {
139
3
  return CharSourceRange(
140
3
      SourceRange(SM.getComposedLoc(File, Begin), SM.getComposedLoc(File, End)),
141
3
      /*IsTokenRange=*/false);
142
3
}
143
144
std::pair<const syntax::Token *, const TokenBuffer::Mapping *>
145
1.15k
TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
146
1.15k
  assert(Expanded);
147
1.15k
  assert(ExpandedTokens.data() <= Expanded &&
148
1.15k
         Expanded < ExpandedTokens.data() + ExpandedTokens.size());
149
1.15k
150
1.15k
  auto FileIt = Files.find(
151
1.15k
      SourceMgr->getFileID(SourceMgr->getExpansionLoc(Expanded->location())));
152
1.15k
  assert(FileIt != Files.end() && "no file for an expanded token");
153
1.15k
154
1.15k
  const MarkedFile &File = FileIt->second;
155
1.15k
156
1.15k
  unsigned ExpandedIndex = Expanded - ExpandedTokens.data();
157
1.15k
  // Find the first mapping that produced tokens after \p Expanded.
158
1.15k
  auto It = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
159
279
    return M.BeginExpanded <= ExpandedIndex;
160
279
  });
161
1.15k
  // Our token could only be produced by the previous mapping.
162
1.15k
  if (It == File.Mappings.begin()) {
163
1.02k
    // No previous mapping, no need to modify offsets.
164
1.02k
    return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded], nullptr};
165
1.02k
  }
166
128
  --It; // 'It' now points to last mapping that started before our token.
167
128
168
128
  // Check if the token is part of the mapping.
169
128
  if (ExpandedIndex < It->EndExpanded)
170
62
    return {&File.SpelledTokens[It->BeginSpelled], /*Mapping*/ &*It};
171
66
172
66
  // Not part of the mapping, use the index from previous mapping to compute the
173
66
  // corresponding spelled token.
174
66
  return {
175
66
      &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)],
176
66
      /*Mapping*/ nullptr};
177
66
}
178
179
45
llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const {
180
45
  auto It = Files.find(FID);
181
45
  assert(It != Files.end());
182
45
  return It->second.SpelledTokens;
183
45
}
184
185
0
std::string TokenBuffer::Mapping::str() const {
186
0
  return std::string(
187
0
      llvm::formatv("spelled tokens: [{0},{1}), expanded tokens: [{2},{3})",
188
0
                    BeginSpelled, EndSpelled, BeginExpanded, EndExpanded));
189
0
}
190
191
llvm::Optional<llvm::ArrayRef<syntax::Token>>
192
579
TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const {
193
579
  // Mapping an empty range is ambiguous in case of empty mappings at either end
194
579
  // of the range, bail out in that case.
195
579
  if (Expanded.empty())
196
1
    return llvm::None;
197
578
198
578
  // FIXME: also allow changes uniquely mapping to macro arguments.
199
578
200
578
  const syntax::Token *BeginSpelled;
201
578
  const Mapping *BeginMapping;
202
578
  std::tie(BeginSpelled, BeginMapping) =
203
578
      spelledForExpandedToken(&Expanded.front());
204
578
205
578
  const syntax::Token *LastSpelled;
206
578
  const Mapping *LastMapping;
207
578
  std::tie(LastSpelled, LastMapping) =
208
578
      spelledForExpandedToken(&Expanded.back());
209
578
210
578
  FileID FID = SourceMgr->getFileID(BeginSpelled->location());
211
578
  // FIXME: Handle multi-file changes by trying to map onto a common root.
212
578
  if (FID != SourceMgr->getFileID(LastSpelled->location()))
213
0
    return llvm::None;
214
578
215
578
  const MarkedFile &File = Files.find(FID)->second;
216
578
217
578
  // Do not allow changes that cross macro expansion boundaries.
218
578
  unsigned BeginExpanded = Expanded.begin() - ExpandedTokens.data();
219
578
  unsigned EndExpanded = Expanded.end() - ExpandedTokens.data();
220
578
  if (BeginMapping && 
BeginMapping->BeginExpanded < BeginExpanded32
)
221
12
    return llvm::None;
222
566
  if (LastMapping && 
EndExpanded < LastMapping->EndExpanded18
)
223
5
    return llvm::None;
224
561
  // All is good, return the result.
225
561
  return llvm::makeArrayRef(
226
561
      BeginMapping ? 
File.SpelledTokens.data() + BeginMapping->BeginSpelled15
227
561
                   : 
BeginSpelled546
,
228
561
      LastMapping ? 
File.SpelledTokens.data() + LastMapping->EndSpelled13
229
561
                  : 
LastSpelled + 1548
);
230
561
}
231
232
llvm::Optional<TokenBuffer::Expansion>
233
28
TokenBuffer::expansionStartingAt(const syntax::Token *Spelled) const {
234
28
  assert(Spelled);
235
28
  assert(Spelled->location().isFileID() && "not a spelled token");
236
28
  auto FileIt = Files.find(SourceMgr->getFileID(Spelled->location()));
237
28
  assert(FileIt != Files.end() && "file not tracked by token buffer");
238
28
239
28
  auto &File = FileIt->second;
240
28
  assert(File.SpelledTokens.data() <= Spelled &&
241
28
         Spelled < (File.SpelledTokens.data() + File.SpelledTokens.size()));
242
28
243
28
  unsigned SpelledIndex = Spelled - File.SpelledTokens.data();
244
56
  auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
245
56
    return M.BeginSpelled < SpelledIndex;
246
56
  });
247
28
  if (M == File.Mappings.end() || 
M->BeginSpelled != SpelledIndex16
)
248
22
    return llvm::None;
249
6
250
6
  Expansion E;
251
6
  E.Spelled = llvm::makeArrayRef(File.SpelledTokens.data() + M->BeginSpelled,
252
6
                                 File.SpelledTokens.data() + M->EndSpelled);
253
6
  E.Expanded = llvm::makeArrayRef(ExpandedTokens.data() + M->BeginExpanded,
254
6
                                  ExpandedTokens.data() + M->EndExpanded);
255
6
  return E;
256
6
}
257
llvm::ArrayRef<syntax::Token>
258
syntax::spelledTokensTouching(SourceLocation Loc,
259
16
                              llvm::ArrayRef<syntax::Token> Tokens) {
260
16
  assert(Loc.isFileID());
261
16
262
16
  auto *Right = llvm::partition_point(
263
42
      Tokens, [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
264
16
  bool AcceptRight = Right != Tokens.end() && 
Right->location() <= Loc14
;
265
16
  bool AcceptLeft =
266
16
      Right != Tokens.begin() && 
(Right - 1)->endLocation() >= Loc14
;
267
16
  return llvm::makeArrayRef(Right - (AcceptLeft ? 
112
:
04
),
268
16
                            Right + (AcceptRight ? 
18
:
08
));
269
16
}
270
271
llvm::ArrayRef<syntax::Token>
272
syntax::spelledTokensTouching(SourceLocation Loc,
273
8
                              const syntax::TokenBuffer &Tokens) {
274
8
  return spelledTokensTouching(
275
8
      Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
276
8
}
277
278
const syntax::Token *
279
syntax::spelledIdentifierTouching(SourceLocation Loc,
280
8
                                  llvm::ArrayRef<syntax::Token> Tokens) {
281
9
  for (const syntax::Token &Tok : spelledTokensTouching(Loc, Tokens)) {
282
9
    if (Tok.kind() == tok::identifier)
283
3
      return &Tok;
284
9
  }
285
8
  
return nullptr5
;
286
8
}
287
288
const syntax::Token *
289
syntax::spelledIdentifierTouching(SourceLocation Loc,
290
8
                                  const syntax::TokenBuffer &Tokens) {
291
8
  return spelledIdentifierTouching(
292
8
      Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
293
8
}
294
295
std::vector<const syntax::Token *>
296
1
TokenBuffer::macroExpansions(FileID FID) const {
297
1
  auto FileIt = Files.find(FID);
298
1
  assert(FileIt != Files.end() && "file not tracked by token buffer");
299
1
  auto &File = FileIt->second;
300
1
  std::vector<const syntax::Token *> Expansions;
301
1
  auto &Spelled = File.SpelledTokens;
302
4
  for (auto Mapping : File.Mappings) {
303
4
    const syntax::Token *Token = &Spelled[Mapping.BeginSpelled];
304
4
    if (Token->kind() == tok::TokenKind::identifier)
305
3
      Expansions.push_back(Token);
306
4
  }
307
1
  return Expansions;
308
1
}
309
310
std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM,
311
67
                                            const LangOptions &LO) {
312
67
  std::vector<syntax::Token> Tokens;
313
67
  IdentifierTable Identifiers(LO);
314
1.01k
  auto AddToken = [&](clang::Token T) {
315
1.01k
    // Fill the proper token kind for keywords, etc.
316
1.01k
    if (T.getKind() == tok::raw_identifier && 
!T.needsCleaning()463
&&
317
1.01k
        
!T.hasUCN()462
) { // FIXME: support needsCleaning and hasUCN cases.
318
462
      clang::IdentifierInfo &II = Identifiers.get(T.getRawIdentifier());
319
462
      T.setIdentifierInfo(&II);
320
462
      T.setKind(II.getTokenID());
321
462
    }
322
1.01k
    Tokens.push_back(syntax::Token(T));
323
1.01k
  };
324
67
325
67
  Lexer L(FID, SM.getBuffer(FID), SM, LO);
326
67
327
67
  clang::Token T;
328
1.06k
  while (!L.LexFromRawLexer(T))
329
1.00k
    AddToken(T);
330
67
  // 'eof' is only the last token if the input is null-terminated. Never store
331
67
  // it, for consistency.
332
67
  if (T.getKind() != tok::eof)
333
15
    AddToken(T);
334
67
  return Tokens;
335
67
}
336
337
/// Records information reqired to construct mappings for the token buffer that
338
/// we are collecting.
339
class TokenCollector::CollectPPExpansions : public PPCallbacks {
340
public:
341
59
  CollectPPExpansions(TokenCollector &C) : Collector(&C) {}
342
343
  /// Disabled instance will stop reporting anything to TokenCollector.
344
  /// This ensures that uses of the preprocessor after TokenCollector::consume()
345
  /// is called do not access the (possibly invalid) collector instance.
346
59
  void disable() { Collector = nullptr; }
347
348
  void MacroExpands(const clang::Token &MacroNameTok, const MacroDefinition &MD,
349
55
                    SourceRange Range, const MacroArgs *Args) override {
350
55
    if (!Collector)
351
0
      return;
352
55
    // Only record top-level expansions, not those where:
353
55
    //   - the macro use is inside a macro body,
354
55
    //   - the macro appears in an argument to another macro.
355
55
    if (!MacroNameTok.getLocation().isFileID() ||
356
55
        
(53
LastExpansionEnd.isValid()53
&&
357
53
         Collector->PP.getSourceManager().isBeforeInTranslationUnit(
358
33
             Range.getBegin(), LastExpansionEnd)))
359
12
      return;
360
43
    Collector->Expansions[Range.getBegin().getRawEncoding()] = Range.getEnd();
361
43
    LastExpansionEnd = Range.getEnd();
362
43
  }
363
  // FIXME: handle directives like #pragma, #include, etc.
364
private:
365
  TokenCollector *Collector;
366
  /// Used to detect recursive macro expansions.
367
  SourceLocation LastExpansionEnd;
368
};
369
370
/// Fills in the TokenBuffer by tracing the run of a preprocessor. The
371
/// implementation tracks the tokens, macro expansions and directives coming
372
/// from the preprocessor and:
373
/// - for each token, figures out if it is a part of an expanded token stream,
374
///   spelled token stream or both. Stores the tokens appropriately.
375
/// - records mappings from the spelled to expanded token ranges, e.g. for macro
376
///   expansions.
377
/// FIXME: also properly record:
378
///          - #include directives,
379
///          - #pragma, #line and other PP directives,
380
///          - skipped pp regions,
381
///          - ...
382
383
59
TokenCollector::TokenCollector(Preprocessor &PP) : PP(PP) {
384
59
  // Collect the expanded token stream during preprocessing.
385
703
  PP.setTokenWatcher([this](const clang::Token &T) {
386
703
    if (T.isAnnotation())
387
2
      return;
388
701
    DEBUG_WITH_TYPE("collect-tokens", llvm::dbgs()
389
701
                                          << "Token: "
390
701
                                          << syntax::Token(T).dumpForTests(
391
701
                                                 this->PP.getSourceManager())
392
701
                                          << "\n"
393
701
394
701
    );
395
701
    Expanded.push_back(syntax::Token(T));
396
701
  });
397
59
  // And locations of macro calls, to properly recover boundaries of those in
398
59
  // case of empty expansions.
399
59
  auto CB = std::make_unique<CollectPPExpansions>(*this);
400
59
  this->Collector = CB.get();
401
59
  PP.addPPCallbacks(std::move(CB));
402
59
}
403
404
/// Builds mappings and spelled tokens in the TokenBuffer based on the expanded
405
/// token stream.
406
class TokenCollector::Builder {
407
public:
408
  Builder(std::vector<syntax::Token> Expanded, PPExpansions CollectedExpansions,
409
          const SourceManager &SM, const LangOptions &LangOpts)
410
      : Result(SM), CollectedExpansions(std::move(CollectedExpansions)), SM(SM),
411
59
        LangOpts(LangOpts) {
412
59
    Result.ExpandedTokens = std::move(Expanded);
413
59
  }
414
415
59
  TokenBuffer build() && {
416
59
    buildSpelledTokens();
417
59
418
59
    // Walk over expanded tokens and spelled tokens in parallel, building the
419
59
    // mappings between those using source locations.
420
59
    // To correctly recover empty macro expansions, we also take locations
421
59
    // reported to PPCallbacks::MacroExpands into account as we do not have any
422
59
    // expanded tokens with source locations to guide us.
423
59
424
59
    // The 'eof' token is special, it is not part of spelled token stream. We
425
59
    // handle it separately at the end.
426
59
    assert(!Result.ExpandedTokens.empty());
427
59
    assert(Result.ExpandedTokens.back().kind() == tok::eof);
428
649
    for (unsigned I = 0; I < Result.ExpandedTokens.size() - 1; 
++I590
) {
429
590
      // (!) I might be updated by the following call.
430
590
      processExpandedToken(I);
431
590
    }
432
59
433
59
    // 'eof' not handled in the loop, do it here.
434
59
    assert(SM.getMainFileID() ==
435
59
           SM.getFileID(Result.ExpandedTokens.back().location()));
436
59
    fillGapUntil(Result.Files[SM.getMainFileID()],
437
59
                 Result.ExpandedTokens.back().location(),
438
59
                 Result.ExpandedTokens.size() - 1);
439
59
    Result.Files[SM.getMainFileID()].EndExpanded = Result.ExpandedTokens.size();
440
59
441
59
    // Some files might have unaccounted spelled tokens at the end, add an empty
442
59
    // mapping for those as they did not have expanded counterparts.
443
59
    fillGapsAtEndOfFiles();
444
59
445
59
    return std::move(Result);
446
59
  }
447
448
private:
449
  /// Process the next token in an expanded stream and move corresponding
450
  /// spelled tokens, record any mapping if needed.
451
  /// (!) \p I will be updated if this had to skip tokens, e.g. for macros.
452
590
  void processExpandedToken(unsigned &I) {
453
590
    auto L = Result.ExpandedTokens[I].location();
454
590
    if (L.isMacroID()) {
455
33
      processMacroExpansion(SM.getExpansionRange(L), I);
456
33
      return;
457
33
    }
458
557
    if (L.isFileID()) {
459
557
      auto FID = SM.getFileID(L);
460
557
      TokenBuffer::MarkedFile &File = Result.Files[FID];
461
557
462
557
      fillGapUntil(File, L, I);
463
557
464
557
      // Skip the token.
465
557
      assert(File.SpelledTokens[NextSpelled[FID]].location() == L &&
466
557
             "no corresponding token in the spelled stream");
467
557
      ++NextSpelled[FID];
468
557
      return;
469
557
    }
470
557
  }
471
472
  /// Skipped expanded and spelled tokens of a macro expansion that covers \p
473
  /// SpelledRange. Add a corresponding mapping.
474
  /// (!) \p I will be the index of the last token in an expansion after this
475
  /// function returns.
476
33
  void processMacroExpansion(CharSourceRange SpelledRange, unsigned &I) {
477
33
    auto FID = SM.getFileID(SpelledRange.getBegin());
478
33
    assert(FID == SM.getFileID(SpelledRange.getEnd()));
479
33
    TokenBuffer::MarkedFile &File = Result.Files[FID];
480
33
481
33
    fillGapUntil(File, SpelledRange.getBegin(), I);
482
33
483
33
    // Skip all expanded tokens from the same macro expansion.
484
33
    unsigned BeginExpanded = I;
485
85
    for (; I + 1 < Result.ExpandedTokens.size(); 
++I52
) {
486
85
      auto NextL = Result.ExpandedTokens[I + 1].location();
487
85
      if (!NextL.isMacroID() ||
488
85
          
SM.getExpansionLoc(NextL) != SpelledRange.getBegin()54
)
489
33
        break;
490
85
    }
491
33
    unsigned EndExpanded = I + 1;
492
33
    consumeMapping(File, SM.getFileOffset(SpelledRange.getEnd()), BeginExpanded,
493
33
                   EndExpanded, NextSpelled[FID]);
494
33
  }
495
496
  /// Initializes TokenBuffer::Files and fills spelled tokens and expanded
497
  /// ranges for each of the files.
498
59
  void buildSpelledTokens() {
499
760
    for (unsigned I = 0; I < Result.ExpandedTokens.size(); 
++I701
) {
500
701
      auto FID =
501
701
          SM.getFileID(SM.getExpansionLoc(Result.ExpandedTokens[I].location()));
502
701
      auto It = Result.Files.try_emplace(FID);
503
701
      TokenBuffer::MarkedFile &File = It.first->second;
504
701
505
701
      File.EndExpanded = I + 1;
506
701
      if (!It.second)
507
639
        continue; // we have seen this file before.
508
62
509
62
      // This is the first time we see this file.
510
62
      File.BeginExpanded = I;
511
62
      File.SpelledTokens = tokenize(FID, SM, LangOpts);
512
62
    }
513
59
  }
514
515
  void consumeEmptyMapping(TokenBuffer::MarkedFile &File, unsigned EndOffset,
516
10
                           unsigned ExpandedIndex, unsigned &SpelledIndex) {
517
10
    consumeMapping(File, EndOffset, ExpandedIndex, ExpandedIndex, SpelledIndex);
518
10
  }
519
520
  /// Consumes spelled tokens that form a macro expansion and adds a entry to
521
  /// the resulting token buffer.
522
  /// (!) SpelledIndex is updated in-place.
523
  void consumeMapping(TokenBuffer::MarkedFile &File, unsigned EndOffset,
524
                      unsigned BeginExpanded, unsigned EndExpanded,
525
43
                      unsigned &SpelledIndex) {
526
43
    // We need to record this mapping before continuing.
527
43
    unsigned MappingBegin = SpelledIndex;
528
43
    ++SpelledIndex;
529
43
530
43
    bool HitMapping =
531
43
        tryConsumeSpelledUntil(File, EndOffset + 1, SpelledIndex).hasValue();
532
43
    (void)HitMapping;
533
43
    assert(!HitMapping && "recursive macro expansion?");
534
43
535
43
    TokenBuffer::Mapping M;
536
43
    M.BeginExpanded = BeginExpanded;
537
43
    M.EndExpanded = EndExpanded;
538
43
    M.BeginSpelled = MappingBegin;
539
43
    M.EndSpelled = SpelledIndex;
540
43
541
43
    File.Mappings.push_back(M);
542
43
  }
543
544
  /// Consumes spelled tokens until location \p L is reached and adds a mapping
545
  /// covering the consumed tokens. The mapping will point to an empty expanded
546
  /// range at position \p ExpandedIndex.
547
  void fillGapUntil(TokenBuffer::MarkedFile &File, SourceLocation L,
548
709
                    unsigned ExpandedIndex) {
549
709
    assert(L.isFileID());
550
709
    FileID FID;
551
709
    unsigned Offset;
552
709
    std::tie(FID, Offset) = SM.getDecomposedLoc(L);
553
709
554
709
    unsigned &SpelledIndex = NextSpelled[FID];
555
709
    unsigned MappingBegin = SpelledIndex;
556
719
    while (true) {
557
719
      auto EndLoc = tryConsumeSpelledUntil(File, Offset, SpelledIndex);
558
719
      if (SpelledIndex != MappingBegin) {
559
28
        TokenBuffer::Mapping M;
560
28
        M.BeginSpelled = MappingBegin;
561
28
        M.EndSpelled = SpelledIndex;
562
28
        M.BeginExpanded = M.EndExpanded = ExpandedIndex;
563
28
        File.Mappings.push_back(M);
564
28
      }
565
719
      if (!EndLoc)
566
709
        break;
567
10
      consumeEmptyMapping(File, SM.getFileOffset(*EndLoc), ExpandedIndex,
568
10
                          SpelledIndex);
569
10
570
10
      MappingBegin = SpelledIndex;
571
10
    }
572
709
  };
573
574
  /// Consumes spelled tokens until it reaches Offset or a mapping boundary,
575
  /// i.e. a name of a macro expansion or the start '#' token of a PP directive.
576
  /// (!) NextSpelled is updated in place.
577
  ///
578
  /// returns None if \p Offset was reached, otherwise returns the end location
579
  /// of a mapping that starts at \p NextSpelled.
580
  llvm::Optional<SourceLocation>
581
  tryConsumeSpelledUntil(TokenBuffer::MarkedFile &File, unsigned Offset,
582
762
                         unsigned &NextSpelled) {
583
1.16k
    for (; NextSpelled < File.SpelledTokens.size(); 
++NextSpelled405
) {
584
1.04k
      auto L = File.SpelledTokens[NextSpelled].location();
585
1.04k
      if (Offset <= SM.getFileOffset(L))
586
628
        return llvm::None; // reached the offset we are looking for.
587
415
      auto Mapping = CollectedExpansions.find(L.getRawEncoding());
588
415
      if (Mapping != CollectedExpansions.end())
589
10
        return Mapping->second; // found a mapping before the offset.
590
415
    }
591
762
    
return llvm::None124
; // no more tokens, we "reached" the offset.
592
762
  }
593
594
  /// Adds empty mappings for unconsumed spelled tokens at the end of each file.
595
59
  void fillGapsAtEndOfFiles() {
596
62
    for (auto &F : Result.Files) {
597
62
      if (F.second.SpelledTokens.empty())
598
2
        continue;
599
60
      fillGapUntil(F.second, F.second.SpelledTokens.back().endLocation(),
600
60
                   F.second.EndExpanded);
601
60
    }
602
59
  }
603
604
  TokenBuffer Result;
605
  /// For each file, a position of the next spelled token we will consume.
606
  llvm::DenseMap<FileID, unsigned> NextSpelled;
607
  PPExpansions CollectedExpansions;
608
  const SourceManager &SM;
609
  const LangOptions &LangOpts;
610
};
611
612
59
TokenBuffer TokenCollector::consume() && {
613
59
  PP.setTokenWatcher(nullptr);
614
59
  Collector->disable();
615
59
  return Builder(std::move(Expanded), std::move(Expansions),
616
59
                 PP.getSourceManager(), PP.getLangOpts())
617
59
      .build();
618
59
}
619
620
120
std::string syntax::Token::str() const {
621
120
  return std::string(llvm::formatv("Token({0}, length = {1})",
622
120
                                   tok::getTokenName(kind()), length()));
623
120
}
624
625
0
std::string syntax::Token::dumpForTests(const SourceManager &SM) const {
626
0
  return std::string(
627
0
      llvm::formatv("{0}   {1}", tok::getTokenName(kind()), text(SM)));
628
0
}
629
630
14
std::string TokenBuffer::dumpForTests() const {
631
456
  auto PrintToken = [this](const syntax::Token &T) -> std::string {
632
456
    if (T.kind() == tok::eof)
633
11
      return "<eof>";
634
445
    return std::string(T.text(*SourceMgr));
635
445
  };
636
14
637
14
  auto DumpTokens = [this, &PrintToken](llvm::raw_ostream &OS,
638
30
                                        llvm::ArrayRef<syntax::Token> Tokens) {
639
30
    if (Tokens.empty()) {
640
4
      OS << "<empty>";
641
4
      return;
642
4
    }
643
26
    OS << Tokens[0].text(*SourceMgr);
644
400
    for (unsigned I = 1; I < Tokens.size(); 
++I374
) {
645
374
      if (Tokens[I].kind() == tok::eof)
646
0
        continue;
647
374
      OS << " " << PrintToken(Tokens[I]);
648
374
    }
649
26
  };
650
14
651
14
  std::string Dump;
652
14
  llvm::raw_string_ostream OS(Dump);
653
14
654
14
  OS << "expanded tokens:\n"
655
14
     << "  ";
656
14
  // (!) we do not show '<eof>'.
657
14
  DumpTokens(OS, llvm::makeArrayRef(ExpandedTokens).drop_back());
658
14
  OS << "\n";
659
14
660
14
  std::vector<FileID> Keys;
661
14
  for (auto F : Files)
662
16
    Keys.push_back(F.first);
663
14
  llvm::sort(Keys);
664
14
665
16
  for (FileID ID : Keys) {
666
16
    const MarkedFile &File = Files.find(ID)->second;
667
16
    auto *Entry = SourceMgr->getFileEntryForID(ID);
668
16
    if (!Entry)
669
0
      continue; // Skip builtin files.
670
16
    OS << llvm::formatv("file '{0}'\n", Entry->getName())
671
16
       << "  spelled tokens:\n"
672
16
       << "    ";
673
16
    DumpTokens(OS, File.SpelledTokens);
674
16
    OS << "\n";
675
16
676
16
    if (File.Mappings.empty()) {
677
4
      OS << "  no mappings.\n";
678
4
      continue;
679
4
    }
680
12
    OS << "  mappings:\n";
681
22
    for (auto &M : File.Mappings) {
682
22
      OS << llvm::formatv(
683
22
          "    ['{0}'_{1}, '{2}'_{3}) => ['{4}'_{5}, '{6}'_{7})\n",
684
22
          PrintToken(File.SpelledTokens[M.BeginSpelled]), M.BeginSpelled,
685
22
          M.EndSpelled == File.SpelledTokens.size()
686
22
              ? 
"<eof>"6
687
22
              : 
PrintToken(File.SpelledTokens[M.EndSpelled])16
,
688
22
          M.EndSpelled, PrintToken(ExpandedTokens[M.BeginExpanded]),
689
22
          M.BeginExpanded, PrintToken(ExpandedTokens[M.EndExpanded]),
690
22
          M.EndExpanded);
691
22
    }
692
12
  }
693
14
  return OS.str();
694
14
}