Coverage Report

Created: 2021-08-24 07:12

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Tooling/Syntax/Tokens.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- Tokens.cpp - collect tokens from preprocessing ---------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
#include "clang/Tooling/Syntax/Tokens.h"
9
10
#include "clang/Basic/Diagnostic.h"
11
#include "clang/Basic/IdentifierTable.h"
12
#include "clang/Basic/LLVM.h"
13
#include "clang/Basic/LangOptions.h"
14
#include "clang/Basic/SourceLocation.h"
15
#include "clang/Basic/SourceManager.h"
16
#include "clang/Basic/TokenKinds.h"
17
#include "clang/Lex/PPCallbacks.h"
18
#include "clang/Lex/Preprocessor.h"
19
#include "clang/Lex/Token.h"
20
#include "llvm/ADT/ArrayRef.h"
21
#include "llvm/ADT/None.h"
22
#include "llvm/ADT/Optional.h"
23
#include "llvm/ADT/STLExtras.h"
24
#include "llvm/Support/Debug.h"
25
#include "llvm/Support/ErrorHandling.h"
26
#include "llvm/Support/FormatVariadic.h"
27
#include "llvm/Support/raw_ostream.h"
28
#include <algorithm>
29
#include <cassert>
30
#include <iterator>
31
#include <string>
32
#include <utility>
33
#include <vector>
34
35
using namespace clang;
36
using namespace clang::syntax;
37
38
namespace {
39
// Finds the smallest consecutive subsuquence of Toks that covers R.
40
llvm::ArrayRef<syntax::Token>
41
getTokensCovering(llvm::ArrayRef<syntax::Token> Toks, SourceRange R,
42
389
                  const SourceManager &SM) {
43
389
  if (R.isInvalid())
44
0
    return {};
45
389
  const syntax::Token *Begin =
46
2.07k
      llvm::partition_point(Toks, [&](const syntax::Token &T) {
47
2.07k
        return SM.isBeforeInTranslationUnit(T.location(), R.getBegin());
48
2.07k
      });
49
389
  const syntax::Token *End =
50
2.07k
      llvm::partition_point(Toks, [&](const syntax::Token &T) {
51
2.07k
        return !SM.isBeforeInTranslationUnit(R.getEnd(), T.location());
52
2.07k
      });
53
389
  if (Begin > End)
54
0
    return {};
55
389
  return {Begin, End};
56
389
}
57
58
// Finds the smallest expansion range that contains expanded tokens First and
59
// Last, e.g.:
60
// #define ID(x) x
61
// ID(ID(ID(a1) a2))
62
//          ~~       -> a1
63
//              ~~   -> a2
64
//       ~~~~~~~~~   -> a1 a2
65
SourceRange findCommonRangeForMacroArgs(const syntax::Token &First,
66
                                        const syntax::Token &Last,
67
450
                                        const SourceManager &SM) {
68
450
  SourceRange Res;
69
450
  auto FirstLoc = First.location(), LastLoc = Last.location();
70
  // Keep traversing up the spelling chain as longs as tokens are part of the
71
  // same expansion.
72
845
  while (!FirstLoc.isFileID() && 
!LastLoc.isFileID()460
) {
73
457
    auto ExpInfoFirst = SM.getSLocEntry(SM.getFileID(FirstLoc)).getExpansion();
74
457
    auto ExpInfoLast = SM.getSLocEntry(SM.getFileID(LastLoc)).getExpansion();
75
    // Stop if expansions have diverged.
76
457
    if (ExpInfoFirst.getExpansionLocStart() !=
77
457
        ExpInfoLast.getExpansionLocStart())
78
61
      break;
79
    // Do not continue into macro bodies.
80
396
    if (!ExpInfoFirst.isMacroArgExpansion() ||
81
396
        
!ExpInfoLast.isMacroArgExpansion()395
)
82
1
      break;
83
395
    FirstLoc = SM.getImmediateSpellingLoc(FirstLoc);
84
395
    LastLoc = SM.getImmediateSpellingLoc(LastLoc);
85
    // Update the result afterwards, as we want the tokens that triggered the
86
    // expansion.
87
395
    Res = {FirstLoc, LastLoc};
88
395
  }
89
  // Normally mapping back to expansion location here only changes FileID, as
90
  // we've already found some tokens expanded from the same macro argument, and
91
  // they should map to a consecutive subset of spelled tokens. Unfortunately
92
  // SourceManager::isBeforeInTranslationUnit discriminates sourcelocations
93
  // based on their FileID in addition to offsets. So even though we are
94
  // referring to same tokens, SourceManager might tell us that one is before
95
  // the other if they've got different FileIDs.
96
450
  return SM.getExpansionRange(CharSourceRange(Res, true)).getAsRange();
97
450
}
98
99
} // namespace
100
101
syntax::Token::Token(SourceLocation Location, unsigned Length,
102
                     tok::TokenKind Kind)
103
80.9k
    : Location(Location), Length(Length), Kind(Kind) {
104
80.9k
  assert(Location.isValid());
105
80.9k
}
106
107
syntax::Token::Token(const clang::Token &T)
108
80.9k
    : Token(T.getLocation(), T.getLength(), T.getKind()) {
109
80.9k
  assert(!T.isAnnotation());
110
80.9k
}
Unexecuted instantiation: clang::syntax::Token::Token(clang::Token const&)
clang::syntax::Token::Token(clang::Token const&)
Line
Count
Source
108
80.9k
    : Token(T.getLocation(), T.getLength(), T.getKind()) {
109
80.9k
  assert(!T.isAnnotation());
110
80.9k
}
111
112
21.9k
llvm::StringRef syntax::Token::text(const SourceManager &SM) const {
113
21.9k
  bool Invalid = false;
114
21.9k
  const char *Start = SM.getCharacterData(location(), &Invalid);
115
21.9k
  assert(!Invalid);
116
0
  return llvm::StringRef(Start, length());
117
21.9k
}
118
119
137k
FileRange syntax::Token::range(const SourceManager &SM) const {
120
137k
  assert(location().isFileID() && "must be a spelled token");
121
0
  FileID File;
122
137k
  unsigned StartOffset;
123
137k
  std::tie(File, StartOffset) = SM.getDecomposedLoc(location());
124
137k
  return FileRange(File, StartOffset, StartOffset + length());
125
137k
}
126
127
FileRange syntax::Token::range(const SourceManager &SM,
128
                               const syntax::Token &First,
129
68.9k
                               const syntax::Token &Last) {
130
68.9k
  auto F = First.range(SM);
131
68.9k
  auto L = Last.range(SM);
132
68.9k
  assert(F.file() == L.file() && "tokens from different files");
133
0
  assert((F == L || F.endOffset() <= L.beginOffset()) &&
134
68.9k
         "wrong order of tokens");
135
0
  return FileRange(F.file(), F.beginOffset(), L.endOffset());
136
68.9k
}
137
138
90
llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, const Token &T) {
139
90
  return OS << T.str();
140
90
}
141
142
FileRange::FileRange(FileID File, unsigned BeginOffset, unsigned EndOffset)
143
279k
    : File(File), Begin(BeginOffset), End(EndOffset) {
144
279k
  assert(File.isValid());
145
0
  assert(BeginOffset <= EndOffset);
146
279k
}
147
148
FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc,
149
0
                     unsigned Length) {
150
0
  assert(BeginLoc.isValid());
151
0
  assert(BeginLoc.isFileID());
152
153
0
  std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc);
154
0
  End = Begin + Length;
155
0
}
156
FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc,
157
0
                     SourceLocation EndLoc) {
158
0
  assert(BeginLoc.isValid());
159
0
  assert(BeginLoc.isFileID());
160
0
  assert(EndLoc.isValid());
161
0
  assert(EndLoc.isFileID());
162
0
  assert(SM.getFileID(BeginLoc) == SM.getFileID(EndLoc));
163
0
  assert(SM.getFileOffset(BeginLoc) <= SM.getFileOffset(EndLoc));
164
165
0
  std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc);
166
0
  End = SM.getFileOffset(EndLoc);
167
0
}
168
169
llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS,
170
0
                                      const FileRange &R) {
171
0
  return OS << llvm::formatv("FileRange(file = {0}, offsets = {1}-{2})",
172
0
                             R.file().getHashValue(), R.beginOffset(),
173
0
                             R.endOffset());
174
0
}
175
176
14
llvm::StringRef FileRange::text(const SourceManager &SM) const {
177
14
  bool Invalid = false;
178
14
  StringRef Text = SM.getBufferData(File, &Invalid);
179
14
  if (Invalid)
180
0
    return "";
181
14
  assert(Begin <= Text.size());
182
0
  assert(End <= Text.size());
183
0
  return Text.substr(Begin, length());
184
14
}
185
186
40
void TokenBuffer::indexExpandedTokens() {
187
  // No-op if the index is already created.
188
40
  if (!ExpandedTokIndex.empty())
189
0
    return;
190
40
  ExpandedTokIndex.reserve(ExpandedTokens.size());
191
  // Index ExpandedTokens for faster lookups by SourceLocation.
192
402
  for (size_t I = 0, E = ExpandedTokens.size(); I != E; 
++I362
) {
193
362
    SourceLocation Loc = ExpandedTokens[I].location();
194
362
    if (Loc.isValid())
195
362
      ExpandedTokIndex[Loc] = I;
196
362
  }
197
40
}
198
199
2
llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const {
200
2
  if (R.isInvalid())
201
1
    return {};
202
1
  if (!ExpandedTokIndex.empty()) {
203
    // Quick lookup if `R` is a token range.
204
    // This is a huge win since majority of the users use ranges provided by an
205
    // AST. Ranges in AST are token ranges from expanded token stream.
206
1
    const auto B = ExpandedTokIndex.find(R.getBegin());
207
1
    const auto E = ExpandedTokIndex.find(R.getEnd());
208
1
    if (B != ExpandedTokIndex.end() && E != ExpandedTokIndex.end()) {
209
1
      const Token *L = ExpandedTokens.data() + B->getSecond();
210
      // Add 1 to End to make a half-open range.
211
1
      const Token *R = ExpandedTokens.data() + E->getSecond() + 1;
212
1
      if (L > R)
213
0
        return {};
214
1
      return {L, R};
215
1
    }
216
1
  }
217
  // Slow case. Use `isBeforeInTranslationUnit` to binary search for the
218
  // required range.
219
0
  return getTokensCovering(expandedTokens(), R, *SourceMgr);
220
1
}
221
222
42
CharSourceRange FileRange::toCharRange(const SourceManager &SM) const {
223
42
  return CharSourceRange(
224
42
      SourceRange(SM.getComposedLoc(File, Begin), SM.getComposedLoc(File, End)),
225
42
      /*IsTokenRange=*/false);
226
42
}
227
228
std::pair<const syntax::Token *, const TokenBuffer::Mapping *>
229
135k
TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
230
135k
  assert(Expanded);
231
0
  assert(ExpandedTokens.data() <= Expanded &&
232
135k
         Expanded < ExpandedTokens.data() + ExpandedTokens.size());
233
234
0
  auto FileIt = Files.find(
235
135k
      SourceMgr->getFileID(SourceMgr->getExpansionLoc(Expanded->location())));
236
135k
  assert(FileIt != Files.end() && "no file for an expanded token");
237
238
0
  const MarkedFile &File = FileIt->second;
239
240
135k
  unsigned ExpandedIndex = Expanded - ExpandedTokens.data();
241
  // Find the first mapping that produced tokens after \p Expanded.
242
135k
  auto It = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
243
12.0k
    return M.BeginExpanded <= ExpandedIndex;
244
12.0k
  });
245
  // Our token could only be produced by the previous mapping.
246
135k
  if (It == File.Mappings.begin()) {
247
    // No previous mapping, no need to modify offsets.
248
129k
    return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded],
249
129k
            /*Mapping=*/nullptr};
250
129k
  }
251
6.18k
  --It; // 'It' now points to last mapping that started before our token.
252
253
  // Check if the token is part of the mapping.
254
6.18k
  if (ExpandedIndex < It->EndExpanded)
255
2.41k
    return {&File.SpelledTokens[It->BeginSpelled], /*Mapping=*/&*It};
256
257
  // Not part of the mapping, use the index from previous mapping to compute the
258
  // corresponding spelled token.
259
3.77k
  return {
260
3.77k
      &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)],
261
3.77k
      /*Mapping=*/nullptr};
262
6.18k
}
263
264
const TokenBuffer::Mapping *
265
TokenBuffer::mappingStartingBeforeSpelled(const MarkedFile &F,
266
41
                                          const syntax::Token *Spelled) {
267
41
  assert(F.SpelledTokens.data() <= Spelled);
268
0
  unsigned SpelledI = Spelled - F.SpelledTokens.data();
269
41
  assert(SpelledI < F.SpelledTokens.size());
270
271
87
  auto It = llvm::partition_point(F.Mappings, [SpelledI](const Mapping &M) {
272
87
    return M.BeginSpelled <= SpelledI;
273
87
  });
274
41
  if (It == F.Mappings.begin())
275
6
    return nullptr;
276
35
  --It;
277
35
  return &*It;
278
41
}
279
280
llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1>
281
22
TokenBuffer::expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
282
22
  if (Spelled.empty())
283
0
    return {};
284
22
  const auto &File = fileForSpelled(Spelled);
285
286
22
  auto *FrontMapping = mappingStartingBeforeSpelled(File, &Spelled.front());
287
22
  unsigned SpelledFrontI = &Spelled.front() - File.SpelledTokens.data();
288
22
  assert(SpelledFrontI < File.SpelledTokens.size());
289
0
  unsigned ExpandedBegin;
290
22
  if (!FrontMapping) {
291
    // No mapping that starts before the first token of Spelled, we don't have
292
    // to modify offsets.
293
3
    ExpandedBegin = File.BeginExpanded + SpelledFrontI;
294
19
  } else if (SpelledFrontI < FrontMapping->EndSpelled) {
295
    // This mapping applies to Spelled tokens.
296
19
    if (SpelledFrontI != FrontMapping->BeginSpelled) {
297
      // Spelled tokens don't cover the entire mapping, returning empty result.
298
3
      return {}; // FIXME: support macro arguments.
299
3
    }
300
    // Spelled tokens start at the beginning of this mapping.
301
16
    ExpandedBegin = FrontMapping->BeginExpanded;
302
16
  } else {
303
    // Spelled tokens start after the mapping ends (they start in the hole
304
    // between 2 mappings, or between a mapping and end of the file).
305
0
    ExpandedBegin =
306
0
        FrontMapping->EndExpanded + (SpelledFrontI - FrontMapping->EndSpelled);
307
0
  }
308
309
19
  auto *BackMapping = mappingStartingBeforeSpelled(File, &Spelled.back());
310
19
  unsigned SpelledBackI = &Spelled.back() - File.SpelledTokens.data();
311
19
  unsigned ExpandedEnd;
312
19
  if (!BackMapping) {
313
    // No mapping that starts before the last token of Spelled, we don't have to
314
    // modify offsets.
315
3
    ExpandedEnd = File.BeginExpanded + SpelledBackI + 1;
316
16
  } else if (SpelledBackI < BackMapping->EndSpelled) {
317
    // This mapping applies to Spelled tokens.
318
16
    if (SpelledBackI + 1 != BackMapping->EndSpelled) {
319
      // Spelled tokens don't cover the entire mapping, returning empty result.
320
1
      return {}; // FIXME: support macro arguments.
321
1
    }
322
15
    ExpandedEnd = BackMapping->EndExpanded;
323
15
  } else {
324
    // Spelled tokens end after the mapping ends.
325
0
    ExpandedEnd =
326
0
        BackMapping->EndExpanded + (SpelledBackI - BackMapping->EndSpelled) + 1;
327
0
  }
328
329
18
  assert(ExpandedBegin < ExpandedTokens.size());
330
0
  assert(ExpandedEnd < ExpandedTokens.size());
331
  // Avoid returning empty ranges.
332
18
  if (ExpandedBegin == ExpandedEnd)
333
3
    return {};
334
15
  return {llvm::makeArrayRef(ExpandedTokens.data() + ExpandedBegin,
335
15
                             ExpandedTokens.data() + ExpandedEnd)};
336
18
}
337
338
82
llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const {
339
82
  auto It = Files.find(FID);
340
82
  assert(It != Files.end());
341
0
  return It->second.SpelledTokens;
342
82
}
343
344
0
const syntax::Token *TokenBuffer::spelledTokenAt(SourceLocation Loc) const {
345
0
  assert(Loc.isFileID());
346
0
  const auto *Tok = llvm::partition_point(
347
0
      spelledTokens(SourceMgr->getFileID(Loc)),
348
0
      [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
349
0
  if (!Tok || Tok->location() != Loc)
350
0
    return nullptr;
351
0
  return Tok;
352
0
}
353
354
0
std::string TokenBuffer::Mapping::str() const {
355
0
  return std::string(
356
0
      llvm::formatv("spelled tokens: [{0},{1}), expanded tokens: [{2},{3})",
357
0
                    BeginSpelled, EndSpelled, BeginExpanded, EndExpanded));
358
0
}
359
360
llvm::Optional<llvm::ArrayRef<syntax::Token>>
361
68.1k
TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const {
362
  // Mapping an empty range is ambiguous in case of empty mappings at either end
363
  // of the range, bail out in that case.
364
68.1k
  if (Expanded.empty())
365
269
    return llvm::None;
366
367
67.9k
  const syntax::Token *BeginSpelled;
368
67.9k
  const Mapping *BeginMapping;
369
67.9k
  std::tie(BeginSpelled, BeginMapping) =
370
67.9k
      spelledForExpandedToken(&Expanded.front());
371
372
67.9k
  const syntax::Token *LastSpelled;
373
67.9k
  const Mapping *LastMapping;
374
67.9k
  std::tie(LastSpelled, LastMapping) =
375
67.9k
      spelledForExpandedToken(&Expanded.back());
376
377
67.9k
  FileID FID = SourceMgr->getFileID(BeginSpelled->location());
378
  // FIXME: Handle multi-file changes by trying to map onto a common root.
379
67.9k
  if (FID != SourceMgr->getFileID(LastSpelled->location()))
380
0
    return llvm::None;
381
382
67.9k
  const MarkedFile &File = Files.find(FID)->second;
383
384
  // If both tokens are coming from a macro argument expansion, try and map to
385
  // smallest part of the macro argument. BeginMapping && LastMapping check is
386
  // only for performance, they are a prerequisite for Expanded.front() and
387
  // Expanded.back() being part of a macro arg expansion.
388
67.9k
  if (BeginMapping && 
LastMapping1.24k
&&
389
67.9k
      
SourceMgr->isMacroArgExpansion(Expanded.front().location())1.14k
&&
390
67.9k
      
SourceMgr->isMacroArgExpansion(Expanded.back().location())478
) {
391
450
    auto CommonRange = findCommonRangeForMacroArgs(Expanded.front(),
392
450
                                                   Expanded.back(), *SourceMgr);
393
    // It might be the case that tokens are arguments of different macro calls,
394
    // in that case we should continue with the logic below instead of returning
395
    // an empty range.
396
450
    if (CommonRange.isValid())
397
389
      return getTokensCovering(File.SpelledTokens, CommonRange, *SourceMgr);
398
450
  }
399
400
  // Do not allow changes that doesn't cover full expansion.
401
67.5k
  unsigned BeginExpanded = Expanded.begin() - ExpandedTokens.data();
402
67.5k
  unsigned EndExpanded = Expanded.end() - ExpandedTokens.data();
403
67.5k
  if (BeginMapping && 
BeginExpanded != BeginMapping->BeginExpanded852
)
404
506
    return llvm::None;
405
67.0k
  if (LastMapping && 
LastMapping->EndExpanded != EndExpanded276
)
406
129
    return llvm::None;
407
  // All is good, return the result.
408
66.8k
  return llvm::makeArrayRef(
409
66.8k
      BeginMapping ? 
File.SpelledTokens.data() + BeginMapping->BeginSpelled217
410
66.8k
                   : 
BeginSpelled66.6k
,
411
66.8k
      LastMapping ? 
File.SpelledTokens.data() + LastMapping->EndSpelled147
412
66.8k
                  : 
LastSpelled + 166.7k
);
413
67.0k
}
414
415
TokenBuffer::Expansion TokenBuffer::makeExpansion(const MarkedFile &F,
416
13
                                                  const Mapping &M) const {
417
13
  Expansion E;
418
13
  E.Spelled = llvm::makeArrayRef(F.SpelledTokens.data() + M.BeginSpelled,
419
13
                                 F.SpelledTokens.data() + M.EndSpelled);
420
13
  E.Expanded = llvm::makeArrayRef(ExpandedTokens.data() + M.BeginExpanded,
421
13
                                  ExpandedTokens.data() + M.EndExpanded);
422
13
  return E;
423
13
}
424
425
const TokenBuffer::MarkedFile &
426
54
TokenBuffer::fileForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
427
54
  assert(!Spelled.empty());
428
0
  assert(Spelled.front().location().isFileID() && "not a spelled token");
429
0
  auto FileIt = Files.find(SourceMgr->getFileID(Spelled.front().location()));
430
54
  assert(FileIt != Files.end() && "file not tracked by token buffer");
431
0
  const auto &File = FileIt->second;
432
54
  assert(File.SpelledTokens.data() <= Spelled.data() &&
433
54
         Spelled.end() <=
434
54
             (File.SpelledTokens.data() + File.SpelledTokens.size()) &&
435
54
         "Tokens not in spelled range");
436
0
#ifndef NDEBUG
437
0
  auto T1 = Spelled.back().location();
438
54
  auto T2 = File.SpelledTokens.back().location();
439
54
  assert(T1 == T2 || sourceManager().isBeforeInTranslationUnit(T1, T2));
440
0
#endif
441
0
  return File;
442
54
}
443
444
llvm::Optional<TokenBuffer::Expansion>
445
28
TokenBuffer::expansionStartingAt(const syntax::Token *Spelled) const {
446
28
  assert(Spelled);
447
0
  const auto &File = fileForSpelled(*Spelled);
448
449
28
  unsigned SpelledIndex = Spelled - File.SpelledTokens.data();
450
56
  auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
451
56
    return M.BeginSpelled < SpelledIndex;
452
56
  });
453
28
  if (M == File.Mappings.end() || 
M->BeginSpelled != SpelledIndex16
)
454
22
    return llvm::None;
455
6
  return makeExpansion(File, *M);
456
28
}
457
458
std::vector<TokenBuffer::Expansion> TokenBuffer::expansionsOverlapping(
459
4
    llvm::ArrayRef<syntax::Token> Spelled) const {
460
4
  if (Spelled.empty())
461
0
    return {};
462
4
  const auto &File = fileForSpelled(Spelled);
463
464
  // Find the first overlapping range, and then copy until we stop overlapping.
465
4
  unsigned SpelledBeginIndex = Spelled.begin() - File.SpelledTokens.data();
466
4
  unsigned SpelledEndIndex = Spelled.end() - File.SpelledTokens.data();
467
8
  auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
468
8
    return M.EndSpelled <= SpelledBeginIndex;
469
8
  });
470
4
  std::vector<TokenBuffer::Expansion> Expansions;
471
11
  for (; M != File.Mappings.end() && 
M->BeginSpelled < SpelledEndIndex8
;
++M7
)
472
7
    Expansions.push_back(makeExpansion(File, *M));
473
4
  return Expansions;
474
4
}
475
476
llvm::ArrayRef<syntax::Token>
477
syntax::spelledTokensTouching(SourceLocation Loc,
478
16
                              llvm::ArrayRef<syntax::Token> Tokens) {
479
16
  assert(Loc.isFileID());
480
481
0
  auto *Right = llvm::partition_point(
482
42
      Tokens, [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
483
16
  bool AcceptRight = Right != Tokens.end() && 
Right->location() <= Loc14
;
484
16
  bool AcceptLeft =
485
16
      Right != Tokens.begin() && 
(Right - 1)->endLocation() >= Loc14
;
486
16
  return llvm::makeArrayRef(Right - (AcceptLeft ? 
112
:
04
),
487
16
                            Right + (AcceptRight ? 
18
:
08
));
488
16
}
489
490
llvm::ArrayRef<syntax::Token>
491
syntax::spelledTokensTouching(SourceLocation Loc,
492
8
                              const syntax::TokenBuffer &Tokens) {
493
8
  return spelledTokensTouching(
494
8
      Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
495
8
}
496
497
const syntax::Token *
498
syntax::spelledIdentifierTouching(SourceLocation Loc,
499
8
                                  llvm::ArrayRef<syntax::Token> Tokens) {
500
9
  for (const syntax::Token &Tok : spelledTokensTouching(Loc, Tokens)) {
501
9
    if (Tok.kind() == tok::identifier)
502
3
      return &Tok;
503
9
  }
504
5
  return nullptr;
505
8
}
506
507
const syntax::Token *
508
syntax::spelledIdentifierTouching(SourceLocation Loc,
509
8
                                  const syntax::TokenBuffer &Tokens) {
510
8
  return spelledIdentifierTouching(
511
8
      Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
512
8
}
513
514
std::vector<const syntax::Token *>
515
1
TokenBuffer::macroExpansions(FileID FID) const {
516
1
  auto FileIt = Files.find(FID);
517
1
  assert(FileIt != Files.end() && "file not tracked by token buffer");
518
0
  auto &File = FileIt->second;
519
1
  std::vector<const syntax::Token *> Expansions;
520
1
  auto &Spelled = File.SpelledTokens;
521
4
  for (auto Mapping : File.Mappings) {
522
4
    const syntax::Token *Token = &Spelled[Mapping.BeginSpelled];
523
4
    if (Token->kind() == tok::TokenKind::identifier)
524
3
      Expansions.push_back(Token);
525
4
  }
526
1
  return Expansions;
527
1
}
528
529
std::vector<syntax::Token> syntax::tokenize(const FileRange &FR,
530
                                            const SourceManager &SM,
531
3.91k
                                            const LangOptions &LO) {
532
3.91k
  std::vector<syntax::Token> Tokens;
533
3.91k
  IdentifierTable Identifiers(LO);
534
41.0k
  auto AddToken = [&](clang::Token T) {
535
    // Fill the proper token kind for keywords, etc.
536
41.0k
    if (T.getKind() == tok::raw_identifier && 
!T.needsCleaning()16.1k
&&
537
41.0k
        
!T.hasUCN()16.1k
) { // FIXME: support needsCleaning and hasUCN cases.
538
16.1k
      clang::IdentifierInfo &II = Identifiers.get(T.getRawIdentifier());
539
16.1k
      T.setIdentifierInfo(&II);
540
16.1k
      T.setKind(II.getTokenID());
541
16.1k
    }
542
41.0k
    Tokens.push_back(syntax::Token(T));
543
41.0k
  };
544
545
3.91k
  auto SrcBuffer = SM.getBufferData(FR.file());
546
3.91k
  Lexer L(SM.getLocForStartOfFile(FR.file()), LO, SrcBuffer.data(),
547
3.91k
          SrcBuffer.data() + FR.beginOffset(),
548
          // We can't make BufEnd point to FR.endOffset, as Lexer requires a
549
          // null terminated buffer.
550
3.91k
          SrcBuffer.data() + SrcBuffer.size());
551
552
3.91k
  clang::Token T;
553
43.1k
  while (!L.LexFromRawLexer(T) && 
L.getCurrentBufferOffset() < FR.endOffset()39.1k
)
554
39.1k
    AddToken(T);
555
  // LexFromRawLexer returns true when it parses the last token of the file, add
556
  // it iff it starts within the range we are interested in.
557
3.91k
  if (SM.getFileOffset(T.getLocation()) < FR.endOffset())
558
1.82k
    AddToken(T);
559
3.91k
  return Tokens;
560
3.91k
}
561
562
std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM,
563
3.91k
                                            const LangOptions &LO) {
564
3.91k
  return tokenize(syntax::FileRange(FID, 0, SM.getFileIDSize(FID)), SM, LO);
565
3.91k
}
566
567
/// Records information reqired to construct mappings for the token buffer that
568
/// we are collecting.
569
class TokenCollector::CollectPPExpansions : public PPCallbacks {
570
public:
571
2.20k
  CollectPPExpansions(TokenCollector &C) : Collector(&C) {}
572
573
  /// Disabled instance will stop reporting anything to TokenCollector.
574
  /// This ensures that uses of the preprocessor after TokenCollector::consume()
575
  /// is called do not access the (possibly invalid) collector instance.
576
2.20k
  void disable() { Collector = nullptr; }
577
578
  void MacroExpands(const clang::Token &MacroNameTok, const MacroDefinition &MD,
579
260
                    SourceRange Range, const MacroArgs *Args) override {
580
260
    if (!Collector)
581
0
      return;
582
260
    const auto &SM = Collector->PP.getSourceManager();
583
    // Only record top-level expansions that directly produce expanded tokens.
584
    // This excludes those where:
585
    //   - the macro use is inside a macro body,
586
    //   - the macro appears in an argument to another macro.
587
    // However macro expansion isn't really a tree, it's token rewrite rules,
588
    // so there are other cases, e.g.
589
    //   #define B(X) X
590
    //   #define A 1 + B
591
    //   A(2)
592
    // Both A and B produce expanded tokens, though the macro name 'B' comes
593
    // from an expansion. The best we can do is merge the mappings for both.
594
595
    // The *last* token of any top-level macro expansion must be in a file.
596
    // (In the example above, see the closing paren of the expansion of B).
597
260
    if (!Range.getEnd().isFileID())
598
2
      return;
599
    // If there's a current expansion that encloses this one, this one can't be
600
    // top-level.
601
258
    if (LastExpansionEnd.isValid() &&
602
258
        
!SM.isBeforeInTranslationUnit(LastExpansionEnd, Range.getEnd())134
)
603
16
      return;
604
605
    // If the macro invocation (B) starts in a macro (A) but ends in a file,
606
    // we'll create a merged mapping for A + B by overwriting the endpoint for
607
    // A's startpoint.
608
242
    if (!Range.getBegin().isFileID()) {
609
1
      Range.setBegin(SM.getExpansionLoc(Range.getBegin()));
610
1
      assert(Collector->Expansions.count(Range.getBegin()) &&
611
1
             "Overlapping macros should have same expansion location");
612
1
    }
613
614
0
    Collector->Expansions[Range.getBegin()] = Range.getEnd();
615
242
    LastExpansionEnd = Range.getEnd();
616
242
  }
617
  // FIXME: handle directives like #pragma, #include, etc.
618
private:
619
  TokenCollector *Collector;
620
  /// Used to detect recursive macro expansions.
621
  SourceLocation LastExpansionEnd;
622
};
623
624
/// Fills in the TokenBuffer by tracing the run of a preprocessor. The
625
/// implementation tracks the tokens, macro expansions and directives coming
626
/// from the preprocessor and:
627
/// - for each token, figures out if it is a part of an expanded token stream,
628
///   spelled token stream or both. Stores the tokens appropriately.
629
/// - records mappings from the spelled to expanded token ranges, e.g. for macro
630
///   expansions.
631
/// FIXME: also properly record:
632
///          - #include directives,
633
///          - #pragma, #line and other PP directives,
634
///          - skipped pp regions,
635
///          - ...
636
637
2.20k
TokenCollector::TokenCollector(Preprocessor &PP) : PP(PP) {
638
  // Collect the expanded token stream during preprocessing.
639
39.9k
  PP.setTokenWatcher([this](const clang::Token &T) {
640
39.9k
    if (T.isAnnotation())
641
3
      return;
642
39.9k
    DEBUG_WITH_TYPE("collect-tokens", llvm::dbgs()
643
39.9k
                                          << "Token: "
644
39.9k
                                          << syntax::Token(T).dumpForTests(
645
39.9k
                                                 this->PP.getSourceManager())
646
39.9k
                                          << "\n"
647
648
39.9k
    );
649
39.9k
    Expanded.push_back(syntax::Token(T));
650
39.9k
  });
651
  // And locations of macro calls, to properly recover boundaries of those in
652
  // case of empty expansions.
653
2.20k
  auto CB = std::make_unique<CollectPPExpansions>(*this);
654
2.20k
  this->Collector = CB.get();
655
2.20k
  PP.addPPCallbacks(std::move(CB));
656
2.20k
}
657
658
/// Builds mappings and spelled tokens in the TokenBuffer based on the expanded
659
/// token stream.
660
class TokenCollector::Builder {
661
public:
662
  Builder(std::vector<syntax::Token> Expanded, PPExpansions CollectedExpansions,
663
          const SourceManager &SM, const LangOptions &LangOpts)
664
      : Result(SM), CollectedExpansions(std::move(CollectedExpansions)), SM(SM),
665
2.20k
        LangOpts(LangOpts) {
666
2.20k
    Result.ExpandedTokens = std::move(Expanded);
667
2.20k
  }
668
669
2.20k
  TokenBuffer build() && {
670
2.20k
    assert(!Result.ExpandedTokens.empty());
671
0
    assert(Result.ExpandedTokens.back().kind() == tok::eof);
672
673
    // Tokenize every file that contributed tokens to the expanded stream.
674
0
    buildSpelledTokens();
675
676
    // The expanded token stream consists of runs of tokens that came from
677
    // the same source (a macro expansion, part of a file etc).
678
    // Between these runs are the logical positions of spelled tokens that
679
    // didn't expand to anything.
680
4.49k
    while (NextExpanded < Result.ExpandedTokens.size() - 1 /* eof */) {
681
      // Create empty mappings for spelled tokens that expanded to nothing here.
682
      // May advance NextSpelled, but NextExpanded is unchanged.
683
2.28k
      discard();
684
      // Create mapping for a contiguous run of expanded tokens.
685
      // Advances NextExpanded past the run, and NextSpelled accordingly.
686
2.28k
      unsigned OldPosition = NextExpanded;
687
2.28k
      advance();
688
2.28k
      if (NextExpanded == OldPosition)
689
0
        diagnoseAdvanceFailure();
690
2.28k
    }
691
    // If any tokens remain in any of the files, they didn't expand to anything.
692
    // Create empty mappings up until the end of the file.
693
2.20k
    for (const auto &File : Result.Files)
694
2.21k
      discard(File.first);
695
696
2.20k
#ifndef NDEBUG
697
2.21k
    for (auto &pair : Result.Files) {
698
2.21k
      auto &mappings = pair.second.Mappings;
699
2.21k
      assert(llvm::is_sorted(mappings, [](const TokenBuffer::Mapping &M1,
700
2.21k
                                          const TokenBuffer::Mapping &M2) {
701
2.21k
        return M1.BeginSpelled < M2.BeginSpelled &&
702
2.21k
               M1.EndSpelled < M2.EndSpelled &&
703
2.21k
               M1.BeginExpanded < M2.BeginExpanded &&
704
2.21k
               M1.EndExpanded < M2.EndExpanded;
705
2.21k
      }));
706
2.21k
    }
707
2.20k
#endif
708
709
2.20k
    return std::move(Result);
710
2.20k
  }
711
712
private:
713
  // Consume a sequence of spelled tokens that didn't expand to anything.
714
  // In the simplest case, skips spelled tokens until finding one that produced
715
  // the NextExpanded token, and creates an empty mapping for them.
716
  // If Drain is provided, skips remaining tokens from that file instead.
717
4.49k
  void discard(llvm::Optional<FileID> Drain = llvm::None) {
718
4.49k
    SourceLocation Target =
719
4.49k
        Drain ? 
SM.getLocForEndOfFile(*Drain)2.21k
720
4.49k
              : SM.getExpansionLoc(
721
2.28k
                    Result.ExpandedTokens[NextExpanded].location());
722
4.49k
    FileID File = SM.getFileID(Target);
723
4.49k
    const auto &SpelledTokens = Result.Files[File].SpelledTokens;
724
4.49k
    auto &NextSpelled = this->NextSpelled[File];
725
726
4.49k
    TokenBuffer::Mapping Mapping;
727
4.49k
    Mapping.BeginSpelled = NextSpelled;
728
    // When dropping trailing tokens from a file, the empty mapping should
729
    // be positioned within the file's expanded-token range (at the end).
730
4.49k
    Mapping.BeginExpanded = Mapping.EndExpanded =
731
4.49k
        Drain ? 
Result.Files[*Drain].EndExpanded2.21k
:
NextExpanded2.28k
;
732
    // We may want to split into several adjacent empty mappings.
733
    // FlushMapping() emits the current mapping and starts a new one.
734
4.53k
    auto FlushMapping = [&, this] {
735
4.53k
      Mapping.EndSpelled = NextSpelled;
736
4.53k
      if (Mapping.BeginSpelled != Mapping.EndSpelled)
737
153
        Result.Files[File].Mappings.push_back(Mapping);
738
4.53k
      Mapping.BeginSpelled = NextSpelled;
739
4.53k
    };
740
741
6.10k
    while (NextSpelled < SpelledTokens.size() &&
742
6.10k
           
SpelledTokens[NextSpelled].location() < Target3.89k
) {
743
      // If we know mapping bounds at [NextSpelled, KnownEnd] (macro expansion)
744
      // then we want to partition our (empty) mapping.
745
      //   [Start, NextSpelled) [NextSpelled, KnownEnd] (KnownEnd, Target)
746
1.60k
      SourceLocation KnownEnd =
747
1.60k
          CollectedExpansions.lookup(SpelledTokens[NextSpelled].location());
748
1.60k
      if (KnownEnd.isValid()) {
749
19
        FlushMapping(); // Emits [Start, NextSpelled)
750
45
        while (NextSpelled < SpelledTokens.size() &&
751
45
               
SpelledTokens[NextSpelled].location() <= KnownEnd42
)
752
26
          ++NextSpelled;
753
19
        FlushMapping(); // Emits [NextSpelled, KnownEnd]
754
        // Now the loop contitues and will emit (KnownEnd, Target).
755
1.58k
      } else {
756
1.58k
        ++NextSpelled;
757
1.58k
      }
758
1.60k
    }
759
4.49k
    FlushMapping();
760
4.49k
  }
761
762
  // Consumes the NextExpanded token and others that are part of the same run.
763
  // Increases NextExpanded and NextSpelled by at least one, and adds a mapping
764
  // (unless this is a run of file tokens, which we represent with no mapping).
765
2.28k
  void advance() {
766
2.28k
    const syntax::Token &Tok = Result.ExpandedTokens[NextExpanded];
767
2.28k
    SourceLocation Expansion = SM.getExpansionLoc(Tok.location());
768
2.28k
    FileID File = SM.getFileID(Expansion);
769
2.28k
    const auto &SpelledTokens = Result.Files[File].SpelledTokens;
770
2.28k
    auto &NextSpelled = this->NextSpelled[File];
771
772
2.28k
    if (Tok.location().isFileID()) {
773
      // A run of file tokens continues while the expanded/spelled tokens match.
774
39.0k
      while (NextSpelled < SpelledTokens.size() &&
775
39.0k
             
NextExpanded < Result.ExpandedTokens.size()37.1k
&&
776
39.0k
             SpelledTokens[NextSpelled].location() ==
777
37.1k
                 Result.ExpandedTokens[NextExpanded].location()) {
778
36.9k
        ++NextSpelled;
779
36.9k
        ++NextExpanded;
780
36.9k
      }
781
      // We need no mapping for file tokens copied to the expanded stream.
782
2.06k
    } else {
783
      // We found a new macro expansion. We should have its spelling bounds.
784
222
      auto End = CollectedExpansions.lookup(Expansion);
785
222
      assert(End.isValid() && "Macro expansion wasn't captured?");
786
787
      // Mapping starts here...
788
0
      TokenBuffer::Mapping Mapping;
789
222
      Mapping.BeginExpanded = NextExpanded;
790
222
      Mapping.BeginSpelled = NextSpelled;
791
      // ... consumes spelled tokens within bounds we captured ...
792
935
      while (NextSpelled < SpelledTokens.size() &&
793
935
             
SpelledTokens[NextSpelled].location() <= End913
)
794
713
        ++NextSpelled;
795
      // ... consumes expanded tokens rooted at the same expansion ...
796
971
      while (NextExpanded < Result.ExpandedTokens.size() &&
797
971
             SM.getExpansionLoc(
798
971
                 Result.ExpandedTokens[NextExpanded].location()) == Expansion)
799
749
        ++NextExpanded;
800
      // ... and ends here.
801
222
      Mapping.EndExpanded = NextExpanded;
802
222
      Mapping.EndSpelled = NextSpelled;
803
222
      Result.Files[File].Mappings.push_back(Mapping);
804
222
    }
805
2.28k
  }
806
807
  // advance() is supposed to consume at least one token - if not, we crash.
808
0
  void diagnoseAdvanceFailure() {
809
0
#ifndef NDEBUG
810
    // Show the failed-to-map token in context.
811
0
    for (unsigned I = (NextExpanded < 10) ? 0 : NextExpanded - 10;
812
0
         I < NextExpanded + 5 && I < Result.ExpandedTokens.size(); ++I) {
813
0
      const char *L =
814
0
          (I == NextExpanded) ? "!! " : (I < NextExpanded) ? "ok " : "   ";
815
0
      llvm::errs() << L << Result.ExpandedTokens[I].dumpForTests(SM) << "\n";
816
0
    }
817
0
#endif
818
0
    llvm_unreachable("Couldn't map expanded token to spelled tokens!");
819
0
  }
820
821
  /// Initializes TokenBuffer::Files and fills spelled tokens and expanded
822
  /// ranges for each of the files.
823
2.20k
  void buildSpelledTokens() {
824
42.1k
    for (unsigned I = 0; I < Result.ExpandedTokens.size(); 
++I39.9k
) {
825
39.9k
      const auto &Tok = Result.ExpandedTokens[I];
826
39.9k
      auto FID = SM.getFileID(SM.getExpansionLoc(Tok.location()));
827
39.9k
      auto It = Result.Files.try_emplace(FID);
828
39.9k
      TokenBuffer::MarkedFile &File = It.first->second;
829
830
      // The eof token should not be considered part of the main-file's range.
831
39.9k
      File.EndExpanded = Tok.kind() == tok::eof ? 
I2.20k
:
I + 137.7k
;
832
833
39.9k
      if (!It.second)
834
37.7k
        continue; // we have seen this file before.
835
      // This is the first time we see this file.
836
2.21k
      File.BeginExpanded = I;
837
2.21k
      File.SpelledTokens = tokenize(FID, SM, LangOpts);
838
2.21k
    }
839
2.20k
  }
840
841
  TokenBuffer Result;
842
  unsigned NextExpanded = 0;                    // cursor in ExpandedTokens
843
  llvm::DenseMap<FileID, unsigned> NextSpelled; // cursor in SpelledTokens
844
  PPExpansions CollectedExpansions;
845
  const SourceManager &SM;
846
  const LangOptions &LangOpts;
847
};
848
849
2.20k
TokenBuffer TokenCollector::consume() && {
850
2.20k
  PP.setTokenWatcher(nullptr);
851
2.20k
  Collector->disable();
852
2.20k
  return Builder(std::move(Expanded), std::move(Expansions),
853
2.20k
                 PP.getSourceManager(), PP.getLangOpts())
854
2.20k
      .build();
855
2.20k
}
856
857
90
std::string syntax::Token::str() const {
858
90
  return std::string(llvm::formatv("Token({0}, length = {1})",
859
90
                                   tok::getTokenName(kind()), length()));
860
90
}
861
862
0
std::string syntax::Token::dumpForTests(const SourceManager &SM) const {
863
0
  return std::string(llvm::formatv("Token(`{0}`, {1}, length = {2})", text(SM),
864
0
                                   tok::getTokenName(kind()), length()));
865
0
}
866
867
15
std::string TokenBuffer::dumpForTests() const {
868
485
  auto PrintToken = [this](const syntax::Token &T) -> std::string {
869
485
    if (T.kind() == tok::eof)
870
12
      return "<eof>";
871
473
    return std::string(T.text(*SourceMgr));
872
485
  };
873
874
15
  auto DumpTokens = [this, &PrintToken](llvm::raw_ostream &OS,
875
32
                                        llvm::ArrayRef<syntax::Token> Tokens) {
876
32
    if (Tokens.empty()) {
877
4
      OS << "<empty>";
878
4
      return;
879
4
    }
880
28
    OS << Tokens[0].text(*SourceMgr);
881
424
    for (unsigned I = 1; I < Tokens.size(); 
++I396
) {
882
396
      if (Tokens[I].kind() == tok::eof)
883
0
        continue;
884
396
      OS << " " << PrintToken(Tokens[I]);
885
396
    }
886
28
  };
887
888
15
  std::string Dump;
889
15
  llvm::raw_string_ostream OS(Dump);
890
891
15
  OS << "expanded tokens:\n"
892
15
     << "  ";
893
  // (!) we do not show '<eof>'.
894
15
  DumpTokens(OS, llvm::makeArrayRef(ExpandedTokens).drop_back());
895
15
  OS << "\n";
896
897
15
  std::vector<FileID> Keys;
898
15
  for (auto F : Files)
899
17
    Keys.push_back(F.first);
900
15
  llvm::sort(Keys);
901
902
17
  for (FileID ID : Keys) {
903
17
    const MarkedFile &File = Files.find(ID)->second;
904
17
    auto *Entry = SourceMgr->getFileEntryForID(ID);
905
17
    if (!Entry)
906
0
      continue; // Skip builtin files.
907
17
    OS << llvm::formatv("file '{0}'\n", Entry->getName())
908
17
       << "  spelled tokens:\n"
909
17
       << "    ";
910
17
    DumpTokens(OS, File.SpelledTokens);
911
17
    OS << "\n";
912
913
17
    if (File.Mappings.empty()) {
914
4
      OS << "  no mappings.\n";
915
4
      continue;
916
4
    }
917
13
    OS << "  mappings:\n";
918
24
    for (auto &M : File.Mappings) {
919
24
      OS << llvm::formatv(
920
24
          "    ['{0}'_{1}, '{2}'_{3}) => ['{4}'_{5}, '{6}'_{7})\n",
921
24
          PrintToken(File.SpelledTokens[M.BeginSpelled]), M.BeginSpelled,
922
24
          M.EndSpelled == File.SpelledTokens.size()
923
24
              ? 
"<eof>"7
924
24
              : 
PrintToken(File.SpelledTokens[M.EndSpelled])17
,
925
24
          M.EndSpelled, PrintToken(ExpandedTokens[M.BeginExpanded]),
926
24
          M.BeginExpanded, PrintToken(ExpandedTokens[M.EndExpanded]),
927
24
          M.EndExpanded);
928
24
    }
929
13
  }
930
15
  return OS.str();
931
15
}