Coverage Report

Created: 2020-09-15 12:33

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Tooling/Syntax/Tokens.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- Tokens.cpp - collect tokens from preprocessing ---------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
#include "clang/Tooling/Syntax/Tokens.h"
9
10
#include "clang/Basic/Diagnostic.h"
11
#include "clang/Basic/IdentifierTable.h"
12
#include "clang/Basic/LLVM.h"
13
#include "clang/Basic/LangOptions.h"
14
#include "clang/Basic/SourceLocation.h"
15
#include "clang/Basic/SourceManager.h"
16
#include "clang/Basic/TokenKinds.h"
17
#include "clang/Lex/PPCallbacks.h"
18
#include "clang/Lex/Preprocessor.h"
19
#include "clang/Lex/Token.h"
20
#include "llvm/ADT/ArrayRef.h"
21
#include "llvm/ADT/None.h"
22
#include "llvm/ADT/Optional.h"
23
#include "llvm/ADT/STLExtras.h"
24
#include "llvm/Support/Debug.h"
25
#include "llvm/Support/ErrorHandling.h"
26
#include "llvm/Support/FormatVariadic.h"
27
#include "llvm/Support/raw_ostream.h"
28
#include <algorithm>
29
#include <cassert>
30
#include <iterator>
31
#include <string>
32
#include <utility>
33
#include <vector>
34
35
using namespace clang;
36
using namespace clang::syntax;
37
38
namespace {
39
// Finds the smallest consecutive subsuquence of Toks that covers R.
40
llvm::ArrayRef<syntax::Token>
41
getTokensCovering(llvm::ArrayRef<syntax::Token> Toks, SourceRange R,
42
13
                  const SourceManager &SM) {
43
13
  if (R.isInvalid())
44
1
    return {};
45
12
  const syntax::Token *Begin =
46
59
      llvm::partition_point(Toks, [&](const syntax::Token &T) {
47
59
        return SM.isBeforeInTranslationUnit(T.location(), R.getBegin());
48
59
      });
49
12
  const syntax::Token *End =
50
58
      llvm::partition_point(Toks, [&](const syntax::Token &T) {
51
58
        return !SM.isBeforeInTranslationUnit(R.getEnd(), T.location());
52
58
      });
53
12
  if (Begin > End)
54
0
    return {};
55
12
  return {Begin, End};
56
12
}
57
58
// Finds the smallest expansion range that contains expanded tokens First and
59
// Last, e.g.:
60
// #define ID(x) x
61
// ID(ID(ID(a1) a2))
62
//          ~~       -> a1
63
//              ~~   -> a2
64
//       ~~~~~~~~~   -> a1 a2
65
SourceRange findCommonRangeForMacroArgs(const syntax::Token &First,
66
                                        const syntax::Token &Last,
67
16
                                        const SourceManager &SM) {
68
16
  SourceRange Res;
69
16
  auto FirstLoc = First.location(), LastLoc = Last.location();
70
  // Keep traversing up the spelling chain as longs as tokens are part of the
71
  // same expansion.
72
33
  while (!FirstLoc.isFileID() && 
!LastLoc.isFileID()26
) {
73
23
    auto ExpInfoFirst = SM.getSLocEntry(SM.getFileID(FirstLoc)).getExpansion();
74
23
    auto ExpInfoLast = SM.getSLocEntry(SM.getFileID(LastLoc)).getExpansion();
75
    // Stop if expansions have diverged.
76
23
    if (ExpInfoFirst.getExpansionLocStart() !=
77
23
        ExpInfoLast.getExpansionLocStart())
78
5
      break;
79
    // Do not continue into macro bodies.
80
18
    if (!ExpInfoFirst.isMacroArgExpansion() ||
81
17
        !ExpInfoLast.isMacroArgExpansion())
82
1
      break;
83
17
    FirstLoc = SM.getImmediateSpellingLoc(FirstLoc);
84
17
    LastLoc = SM.getImmediateSpellingLoc(LastLoc);
85
    // Update the result afterwards, as we want the tokens that triggered the
86
    // expansion.
87
17
    Res = {FirstLoc, LastLoc};
88
17
  }
89
  // Normally mapping back to expansion location here only changes FileID, as
90
  // we've already found some tokens expanded from the same macro argument, and
91
  // they should map to a consecutive subset of spelled tokens. Unfortunately
92
  // SourceManager::isBeforeInTranslationUnit discriminates sourcelocations
93
  // based on their FileID in addition to offsets. So even though we are
94
  // referring to same tokens, SourceManager might tell us that one is before
95
  // the other if they've got different FileIDs.
96
16
  return SM.getExpansionRange(CharSourceRange(Res, true)).getAsRange();
97
16
}
98
99
} // namespace
100
101
syntax::Token::Token(SourceLocation Location, unsigned Length,
102
                     tok::TokenKind Kind)
103
74.7k
    : Location(Location), Length(Length), Kind(Kind) {
104
74.7k
  assert(Location.isValid());
105
74.7k
}
106
107
syntax::Token::Token(const clang::Token &T)
108
74.7k
    : Token(T.getLocation(), T.getLength(), T.getKind()) {
109
74.7k
  assert(!T.isAnnotation());
110
74.7k
}
Unexecuted instantiation: clang::syntax::Token::Token(clang::Token const&)
clang::syntax::Token::Token(clang::Token const&)
Line
Count
Source
108
74.7k
    : Token(T.getLocation(), T.getLength(), T.getKind()) {
109
74.7k
  assert(!T.isAnnotation());
110
74.7k
}
111
112
19.1k
llvm::StringRef syntax::Token::text(const SourceManager &SM) const {
113
19.1k
  bool Invalid = false;
114
19.1k
  const char *Start = SM.getCharacterData(location(), &Invalid);
115
19.1k
  assert(!Invalid);
116
19.1k
  return llvm::StringRef(Start, length());
117
19.1k
}
118
119
123k
FileRange syntax::Token::range(const SourceManager &SM) const {
120
123k
  assert(location().isFileID() && "must be a spelled token");
121
123k
  FileID File;
122
123k
  unsigned StartOffset;
123
123k
  std::tie(File, StartOffset) = SM.getDecomposedLoc(location());
124
123k
  return FileRange(File, StartOffset, StartOffset + length());
125
123k
}
126
127
FileRange syntax::Token::range(const SourceManager &SM,
128
                               const syntax::Token &First,
129
61.8k
                               const syntax::Token &Last) {
130
61.8k
  auto F = First.range(SM);
131
61.8k
  auto L = Last.range(SM);
132
61.8k
  assert(F.file() == L.file() && "tokens from different files");
133
61.8k
  assert((F == L || F.endOffset() <= L.beginOffset()) &&
134
61.8k
         "wrong order of tokens");
135
61.8k
  return FileRange(F.file(), F.beginOffset(), L.endOffset());
136
61.8k
}
137
138
231
llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, const Token &T) {
139
231
  return OS << T.str();
140
231
}
141
142
FileRange::FileRange(FileID File, unsigned BeginOffset, unsigned EndOffset)
143
249k
    : File(File), Begin(BeginOffset), End(EndOffset) {
144
249k
  assert(File.isValid());
145
249k
  assert(BeginOffset <= EndOffset);
146
249k
}
147
148
FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc,
149
0
                     unsigned Length) {
150
0
  assert(BeginLoc.isValid());
151
0
  assert(BeginLoc.isFileID());
152
0
153
0
  std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc);
154
0
  End = Begin + Length;
155
0
}
156
FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc,
157
0
                     SourceLocation EndLoc) {
158
0
  assert(BeginLoc.isValid());
159
0
  assert(BeginLoc.isFileID());
160
0
  assert(EndLoc.isValid());
161
0
  assert(EndLoc.isFileID());
162
0
  assert(SM.getFileID(BeginLoc) == SM.getFileID(EndLoc));
163
0
  assert(SM.getFileOffset(BeginLoc) <= SM.getFileOffset(EndLoc));
164
0
165
0
  std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc);
166
0
  End = SM.getFileOffset(EndLoc);
167
0
}
168
169
llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS,
170
0
                                      const FileRange &R) {
171
0
  return OS << llvm::formatv("FileRange(file = {0}, offsets = {1}-{2})",
172
0
                             R.file().getHashValue(), R.beginOffset(),
173
0
                             R.endOffset());
174
0
}
175
176
14
llvm::StringRef FileRange::text(const SourceManager &SM) const {
177
14
  bool Invalid = false;
178
14
  StringRef Text = SM.getBufferData(File, &Invalid);
179
14
  if (Invalid)
180
0
    return "";
181
14
  assert(Begin <= Text.size());
182
14
  assert(End <= Text.size());
183
14
  return Text.substr(Begin, length());
184
14
}
185
186
2
llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const {
187
2
  return getTokensCovering(expandedTokens(), R, *SourceMgr);
188
2
}
189
190
42
CharSourceRange FileRange::toCharRange(const SourceManager &SM) const {
191
42
  return CharSourceRange(
192
42
      SourceRange(SM.getComposedLoc(File, Begin), SM.getComposedLoc(File, End)),
193
42
      /*IsTokenRange=*/false);
194
42
}
195
196
std::pair<const syntax::Token *, const TokenBuffer::Mapping *>
197
121k
TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
198
121k
  assert(Expanded);
199
121k
  assert(ExpandedTokens.data() <= Expanded &&
200
121k
         Expanded < ExpandedTokens.data() + ExpandedTokens.size());
201
121k
202
121k
  auto FileIt = Files.find(
203
121k
      SourceMgr->getFileID(SourceMgr->getExpansionLoc(Expanded->location())));
204
121k
  assert(FileIt != Files.end() && "no file for an expanded token");
205
121k
206
121k
  const MarkedFile &File = FileIt->second;
207
121k
208
121k
  unsigned ExpandedIndex = Expanded - ExpandedTokens.data();
209
  // Find the first mapping that produced tokens after \p Expanded.
210
3.31k
  auto It = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
211
3.31k
    return M.BeginExpanded <= ExpandedIndex;
212
3.31k
  });
213
  // Our token could only be produced by the previous mapping.
214
121k
  if (It == File.Mappings.begin()) {
215
    // No previous mapping, no need to modify offsets.
216
119k
    return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded],
217
119k
            /*Mapping=*/nullptr};
218
119k
  }
219
1.53k
  --It; // 'It' now points to last mapping that started before our token.
220
1.53k
221
  // Check if the token is part of the mapping.
222
1.53k
  if (ExpandedIndex < It->EndExpanded)
223
522
    return {&File.SpelledTokens[It->BeginSpelled], /*Mapping=*/&*It};
224
1.01k
225
  // Not part of the mapping, use the index from previous mapping to compute the
226
  // corresponding spelled token.
227
1.01k
  return {
228
1.01k
      &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)],
229
1.01k
      /*Mapping=*/nullptr};
230
1.01k
}
231
232
const TokenBuffer::Mapping *
233
TokenBuffer::mappingStartingBeforeSpelled(const MarkedFile &F,
234
41
                                          const syntax::Token *Spelled) {
235
41
  assert(F.SpelledTokens.data() <= Spelled);
236
41
  unsigned SpelledI = Spelled - F.SpelledTokens.data();
237
41
  assert(SpelledI < F.SpelledTokens.size());
238
41
239
87
  auto It = llvm::partition_point(F.Mappings, [SpelledI](const Mapping &M) {
240
87
    return M.BeginSpelled <= SpelledI;
241
87
  });
242
41
  if (It == F.Mappings.begin())
243
6
    return nullptr;
244
35
  --It;
245
35
  return &*It;
246
35
}
247
248
llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1>
249
22
TokenBuffer::expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
250
22
  if (Spelled.empty())
251
0
    return {};
252
22
  const auto &File = fileForSpelled(Spelled);
253
22
254
22
  auto *FrontMapping = mappingStartingBeforeSpelled(File, &Spelled.front());
255
22
  unsigned SpelledFrontI = &Spelled.front() - File.SpelledTokens.data();
256
22
  assert(SpelledFrontI < File.SpelledTokens.size());
257
22
  unsigned ExpandedBegin;
258
22
  if (!FrontMapping) {
259
    // No mapping that starts before the first token of Spelled, we don't have
260
    // to modify offsets.
261
3
    ExpandedBegin = File.BeginExpanded + SpelledFrontI;
262
19
  } else if (SpelledFrontI < FrontMapping->EndSpelled) {
263
    // This mapping applies to Spelled tokens.
264
19
    if (SpelledFrontI != FrontMapping->BeginSpelled) {
265
      // Spelled tokens don't cover the entire mapping, returning empty result.
266
3
      return {}; // FIXME: support macro arguments.
267
3
    }
268
    // Spelled tokens start at the beginning of this mapping.
269
16
    ExpandedBegin = FrontMapping->BeginExpanded;
270
0
  } else {
271
    // Spelled tokens start after the mapping ends (they start in the hole
272
    // between 2 mappings, or between a mapping and end of the file).
273
0
    ExpandedBegin =
274
0
        FrontMapping->EndExpanded + (SpelledFrontI - FrontMapping->EndSpelled);
275
0
  }
276
22
277
19
  auto *BackMapping = mappingStartingBeforeSpelled(File, &Spelled.back());
278
19
  unsigned SpelledBackI = &Spelled.back() - File.SpelledTokens.data();
279
19
  unsigned ExpandedEnd;
280
19
  if (!BackMapping) {
281
    // No mapping that starts before the last token of Spelled, we don't have to
282
    // modify offsets.
283
3
    ExpandedEnd = File.BeginExpanded + SpelledBackI + 1;
284
16
  } else if (SpelledBackI < BackMapping->EndSpelled) {
285
    // This mapping applies to Spelled tokens.
286
16
    if (SpelledBackI + 1 != BackMapping->EndSpelled) {
287
      // Spelled tokens don't cover the entire mapping, returning empty result.
288
1
      return {}; // FIXME: support macro arguments.
289
1
    }
290
15
    ExpandedEnd = BackMapping->EndExpanded;
291
0
  } else {
292
    // Spelled tokens end after the mapping ends.
293
0
    ExpandedEnd =
294
0
        BackMapping->EndExpanded + (SpelledBackI - BackMapping->EndSpelled) + 1;
295
0
  }
296
19
297
18
  assert(ExpandedBegin < ExpandedTokens.size());
298
18
  assert(ExpandedEnd < ExpandedTokens.size());
299
  // Avoid returning empty ranges.
300
18
  if (ExpandedBegin == ExpandedEnd)
301
3
    return {};
302
15
  return {llvm::makeArrayRef(ExpandedTokens.data() + ExpandedBegin,
303
15
                             ExpandedTokens.data() + ExpandedEnd)};
304
15
}
305
306
82
llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const {
307
82
  auto It = Files.find(FID);
308
82
  assert(It != Files.end());
309
82
  return It->second.SpelledTokens;
310
82
}
311
312
0
const syntax::Token *TokenBuffer::spelledTokenAt(SourceLocation Loc) const {
313
0
  assert(Loc.isFileID());
314
0
  const auto *Tok = llvm::partition_point(
315
0
      spelledTokens(SourceMgr->getFileID(Loc)),
316
0
      [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
317
0
  if (!Tok || Tok->location() != Loc)
318
0
    return nullptr;
319
0
  return Tok;
320
0
}
321
322
0
std::string TokenBuffer::Mapping::str() const {
323
0
  return std::string(
324
0
      llvm::formatv("spelled tokens: [{0},{1}), expanded tokens: [{2},{3})",
325
0
                    BeginSpelled, EndSpelled, BeginExpanded, EndExpanded));
326
0
}
327
328
llvm::Optional<llvm::ArrayRef<syntax::Token>>
329
60.8k
TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const {
330
  // Mapping an empty range is ambiguous in case of empty mappings at either end
331
  // of the range, bail out in that case.
332
60.8k
  if (Expanded.empty())
333
112
    return llvm::None;
334
60.7k
335
60.7k
  const syntax::Token *BeginSpelled;
336
60.7k
  const Mapping *BeginMapping;
337
60.7k
  std::tie(BeginSpelled, BeginMapping) =
338
60.7k
      spelledForExpandedToken(&Expanded.front());
339
60.7k
340
60.7k
  const syntax::Token *LastSpelled;
341
60.7k
  const Mapping *LastMapping;
342
60.7k
  std::tie(LastSpelled, LastMapping) =
343
60.7k
      spelledForExpandedToken(&Expanded.back());
344
60.7k
345
60.7k
  FileID FID = SourceMgr->getFileID(BeginSpelled->location());
346
  // FIXME: Handle multi-file changes by trying to map onto a common root.
347
60.7k
  if (FID != SourceMgr->getFileID(LastSpelled->location()))
348
0
    return llvm::None;
349
60.7k
350
60.7k
  const MarkedFile &File = Files.find(FID)->second;
351
60.7k
352
  // If both tokens are coming from a macro argument expansion, try and map to
353
  // smallest part of the macro argument. BeginMapping && LastMapping check is
354
  // only for performance, they are a prerequisite for Expanded.front() and
355
  // Expanded.back() being part of a macro arg expansion.
356
60.7k
  if (BeginMapping && 
LastMapping275
&&
357
246
      SourceMgr->isMacroArgExpansion(Expanded.front().location()) &&
358
16
      SourceMgr->isMacroArgExpansion(Expanded.back().location())) {
359
16
    auto CommonRange = findCommonRangeForMacroArgs(Expanded.front(),
360
16
                                                   Expanded.back(), *SourceMgr);
361
    // It might be the case that tokens are arguments of different macro calls,
362
    // in that case we should continue with the logic below instead of returning
363
    // an empty range.
364
16
    if (CommonRange.isValid())
365
11
      return getTokensCovering(File.SpelledTokens, CommonRange, *SourceMgr);
366
60.7k
  }
367
60.7k
368
  // Do not allow changes that doesn't cover full expansion.
369
60.7k
  unsigned BeginExpanded = Expanded.begin() - ExpandedTokens.data();
370
60.7k
  unsigned EndExpanded = Expanded.end() - ExpandedTokens.data();
371
60.7k
  if (BeginMapping && 
BeginExpanded != BeginMapping->BeginExpanded264
)
372
128
    return llvm::None;
373
60.5k
  if (LastMapping && 
LastMapping->EndExpanded != EndExpanded108
)
374
45
    return llvm::None;
375
  // All is good, return the result.
376
60.5k
  return llvm::makeArrayRef(
377
91
      BeginMapping ? File.SpelledTokens.data() + BeginMapping->BeginSpelled
378
60.4k
                   : BeginSpelled,
379
63
      LastMapping ? File.SpelledTokens.data() + LastMapping->EndSpelled
380
60.4k
                  : LastSpelled + 1);
381
60.5k
}
382
383
TokenBuffer::Expansion TokenBuffer::makeExpansion(const MarkedFile &F,
384
13
                                                  const Mapping &M) const {
385
13
  Expansion E;
386
13
  E.Spelled = llvm::makeArrayRef(F.SpelledTokens.data() + M.BeginSpelled,
387
13
                                 F.SpelledTokens.data() + M.EndSpelled);
388
13
  E.Expanded = llvm::makeArrayRef(ExpandedTokens.data() + M.BeginExpanded,
389
13
                                  ExpandedTokens.data() + M.EndExpanded);
390
13
  return E;
391
13
}
392
393
const TokenBuffer::MarkedFile &
394
54
TokenBuffer::fileForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
395
54
  assert(!Spelled.empty());
396
54
  assert(Spelled.front().location().isFileID() && "not a spelled token");
397
54
  auto FileIt = Files.find(SourceMgr->getFileID(Spelled.front().location()));
398
54
  assert(FileIt != Files.end() && "file not tracked by token buffer");
399
54
  const auto &File = FileIt->second;
400
54
  assert(File.SpelledTokens.data() <= Spelled.data() &&
401
54
         Spelled.end() <=
402
54
             (File.SpelledTokens.data() + File.SpelledTokens.size()) &&
403
54
         "Tokens not in spelled range");
404
54
#ifndef NDEBUG
405
54
  auto T1 = Spelled.back().location();
406
54
  auto T2 = File.SpelledTokens.back().location();
407
54
  assert(T1 == T2 || sourceManager().isBeforeInTranslationUnit(T1, T2));
408
54
#endif
409
54
  return File;
410
54
}
411
412
llvm::Optional<TokenBuffer::Expansion>
413
28
TokenBuffer::expansionStartingAt(const syntax::Token *Spelled) const {
414
28
  assert(Spelled);
415
28
  const auto &File = fileForSpelled(*Spelled);
416
28
417
28
  unsigned SpelledIndex = Spelled - File.SpelledTokens.data();
418
56
  auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
419
56
    return M.BeginSpelled < SpelledIndex;
420
56
  });
421
28
  if (M == File.Mappings.end() || 
M->BeginSpelled != SpelledIndex16
)
422
22
    return llvm::None;
423
6
  return makeExpansion(File, *M);
424
6
}
425
426
std::vector<TokenBuffer::Expansion> TokenBuffer::expansionsOverlapping(
427
4
    llvm::ArrayRef<syntax::Token> Spelled) const {
428
4
  if (Spelled.empty())
429
0
    return {};
430
4
  const auto &File = fileForSpelled(Spelled);
431
4
432
  // Find the first overlapping range, and then copy until we stop overlapping.
433
4
  unsigned SpelledBeginIndex = Spelled.begin() - File.SpelledTokens.data();
434
4
  unsigned SpelledEndIndex = Spelled.end() - File.SpelledTokens.data();
435
8
  auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
436
8
    return M.EndSpelled <= SpelledBeginIndex;
437
8
  });
438
4
  std::vector<TokenBuffer::Expansion> Expansions;
439
11
  for (; M != File.Mappings.end() && 
M->BeginSpelled < SpelledEndIndex8
;
++M7
)
440
7
    Expansions.push_back(makeExpansion(File, *M));
441
4
  return Expansions;
442
4
}
443
444
llvm::ArrayRef<syntax::Token>
445
syntax::spelledTokensTouching(SourceLocation Loc,
446
16
                              llvm::ArrayRef<syntax::Token> Tokens) {
447
16
  assert(Loc.isFileID());
448
16
449
16
  auto *Right = llvm::partition_point(
450
42
      Tokens, [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
451
16
  bool AcceptRight = Right != Tokens.end() && 
Right->location() <= Loc14
;
452
16
  bool AcceptLeft =
453
16
      Right != Tokens.begin() && 
(Right - 1)->endLocation() >= Loc14
;
454
12
  return llvm::makeArrayRef(Right - (AcceptLeft ? 1 : 
04
),
455
8
                            Right + (AcceptRight ? 1 : 0));
456
16
}
457
458
llvm::ArrayRef<syntax::Token>
459
syntax::spelledTokensTouching(SourceLocation Loc,
460
8
                              const syntax::TokenBuffer &Tokens) {
461
8
  return spelledTokensTouching(
462
8
      Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
463
8
}
464
465
const syntax::Token *
466
syntax::spelledIdentifierTouching(SourceLocation Loc,
467
8
                                  llvm::ArrayRef<syntax::Token> Tokens) {
468
9
  for (const syntax::Token &Tok : spelledTokensTouching(Loc, Tokens)) {
469
9
    if (Tok.kind() == tok::identifier)
470
3
      return &Tok;
471
9
  }
472
5
  return nullptr;
473
8
}
474
475
const syntax::Token *
476
syntax::spelledIdentifierTouching(SourceLocation Loc,
477
8
                                  const syntax::TokenBuffer &Tokens) {
478
8
  return spelledIdentifierTouching(
479
8
      Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
480
8
}
481
482
std::vector<const syntax::Token *>
483
1
TokenBuffer::macroExpansions(FileID FID) const {
484
1
  auto FileIt = Files.find(FID);
485
1
  assert(FileIt != Files.end() && "file not tracked by token buffer");
486
1
  auto &File = FileIt->second;
487
1
  std::vector<const syntax::Token *> Expansions;
488
1
  auto &Spelled = File.SpelledTokens;
489
4
  for (auto Mapping : File.Mappings) {
490
4
    const syntax::Token *Token = &Spelled[Mapping.BeginSpelled];
491
4
    if (Token->kind() == tok::TokenKind::identifier)
492
3
      Expansions.push_back(Token);
493
4
  }
494
1
  return Expansions;
495
1
}
496
497
std::vector<syntax::Token> syntax::tokenize(const FileRange &FR,
498
                                            const SourceManager &SM,
499
2.08k
                                            const LangOptions &LO) {
500
2.08k
  std::vector<syntax::Token> Tokens;
501
2.08k
  IdentifierTable Identifiers(LO);
502
36.8k
  auto AddToken = [&](clang::Token T) {
503
    // Fill the proper token kind for keywords, etc.
504
36.8k
    if (T.getKind() == tok::raw_identifier && 
!T.needsCleaning()14.7k
&&
505
14.7k
        !T.hasUCN()) { // FIXME: support needsCleaning and hasUCN cases.
506
14.7k
      clang::IdentifierInfo &II = Identifiers.get(T.getRawIdentifier());
507
14.7k
      T.setIdentifierInfo(&II);
508
14.7k
      T.setKind(II.getTokenID());
509
14.7k
    }
510
36.8k
    Tokens.push_back(syntax::Token(T));
511
36.8k
  };
512
2.08k
513
2.08k
  auto SrcBuffer = SM.getBufferData(FR.file());
514
2.08k
  Lexer L(SM.getLocForStartOfFile(FR.file()), LO, SrcBuffer.data(),
515
2.08k
          SrcBuffer.data() + FR.beginOffset(),
516
          // We can't make BufEnd point to FR.endOffset, as Lexer requires a
517
          // null terminated buffer.
518
2.08k
          SrcBuffer.data() + SrcBuffer.size());
519
2.08k
520
2.08k
  clang::Token T;
521
38.7k
  while (!L.LexFromRawLexer(T) && 
L.getCurrentBufferOffset() < FR.endOffset()36.6k
)
522
36.6k
    AddToken(T);
523
  // LexFromRawLexer returns true when it parses the last token of the file, add
524
  // it iff it starts within the range we are interested in.
525
2.08k
  if (SM.getFileOffset(T.getLocation()) < FR.endOffset())
526
217
    AddToken(T);
527
2.08k
  return Tokens;
528
2.08k
}
529
530
std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM,
531
2.07k
                                            const LangOptions &LO) {
532
2.07k
  return tokenize(syntax::FileRange(FID, 0, SM.getFileIDSize(FID)), SM, LO);
533
2.07k
}
534
535
/// Records information reqired to construct mappings for the token buffer that
536
/// we are collecting.
537
class TokenCollector::CollectPPExpansions : public PPCallbacks {
538
public:
539
1.91k
  CollectPPExpansions(TokenCollector &C) : Collector(&C) {}
540
541
  /// Disabled instance will stop reporting anything to TokenCollector.
542
  /// This ensures that uses of the preprocessor after TokenCollector::consume()
543
  /// is called do not access the (possibly invalid) collector instance.
544
1.91k
  void disable() { Collector = nullptr; }
545
546
  void MacroExpands(const clang::Token &MacroNameTok, const MacroDefinition &MD,
547
148
                    SourceRange Range, const MacroArgs *Args) override {
548
148
    if (!Collector)
549
0
      return;
550
148
    const auto &SM = Collector->PP.getSourceManager();
551
    // Only record top-level expansions that directly produce expanded tokens.
552
    // This excludes those where:
553
    //   - the macro use is inside a macro body,
554
    //   - the macro appears in an argument to another macro.
555
    // However macro expansion isn't really a tree, it's token rewrite rules,
556
    // so there are other cases, e.g.
557
    //   #define B(X) X
558
    //   #define A 1 + B
559
    //   A(2)
560
    // Both A and B produce expanded tokens, though the macro name 'B' comes
561
    // from an expansion. The best we can do is merge the mappings for both.
562
148
563
    // The *last* token of any top-level macro expansion must be in a file.
564
    // (In the example above, see the closing paren of the expansion of B).
565
148
    if (!Range.getEnd().isFileID())
566
2
      return;
567
    // If there's a current expansion that encloses this one, this one can't be
568
    // top-level.
569
146
    if (LastExpansionEnd.isValid() &&
570
92
        !SM.isBeforeInTranslationUnit(LastExpansionEnd, Range.getEnd()))
571
16
      return;
572
130
573
    // If the macro invocation (B) starts in a macro (A) but ends in a file,
574
    // we'll create a merged mapping for A + B by overwriting the endpoint for
575
    // A's startpoint.
576
130
    if (!Range.getBegin().isFileID()) {
577
1
      Range.setBegin(SM.getExpansionLoc(Range.getBegin()));
578
1
      assert(Collector->Expansions.count(Range.getBegin().getRawEncoding()) &&
579
1
             "Overlapping macros should have same expansion location");
580
1
    }
581
130
582
130
    Collector->Expansions[Range.getBegin().getRawEncoding()] = Range.getEnd();
583
130
    LastExpansionEnd = Range.getEnd();
584
130
  }
585
  // FIXME: handle directives like #pragma, #include, etc.
586
private:
587
  TokenCollector *Collector;
588
  /// Used to detect recursive macro expansions.
589
  SourceLocation LastExpansionEnd;
590
};
591
592
/// Fills in the TokenBuffer by tracing the run of a preprocessor. The
593
/// implementation tracks the tokens, macro expansions and directives coming
594
/// from the preprocessor and:
595
/// - for each token, figures out if it is a part of an expanded token stream,
596
///   spelled token stream or both. Stores the tokens appropriately.
597
/// - records mappings from the spelled to expanded token ranges, e.g. for macro
598
///   expansions.
599
/// FIXME: also properly record:
600
///          - #include directives,
601
///          - #pragma, #line and other PP directives,
602
///          - skipped pp regions,
603
///          - ...
604
605
1.91k
TokenCollector::TokenCollector(Preprocessor &PP) : PP(PP) {
606
  // Collect the expanded token stream during preprocessing.
607
37.9k
  PP.setTokenWatcher([this](const clang::Token &T) {
608
37.9k
    if (T.isAnnotation())
609
2
      return;
610
37.9k
    DEBUG_WITH_TYPE("collect-tokens", llvm::dbgs()
611
37.9k
                                          << "Token: "
612
37.9k
                                          << syntax::Token(T).dumpForTests(
613
37.9k
                                                 this->PP.getSourceManager())
614
37.9k
                                          << "\n"
615
37.9k
616
37.9k
    );
617
37.9k
    Expanded.push_back(syntax::Token(T));
618
37.9k
  });
619
  // And locations of macro calls, to properly recover boundaries of those in
620
  // case of empty expansions.
621
1.91k
  auto CB = std::make_unique<CollectPPExpansions>(*this);
622
1.91k
  this->Collector = CB.get();
623
1.91k
  PP.addPPCallbacks(std::move(CB));
624
1.91k
}
625
626
/// Builds mappings and spelled tokens in the TokenBuffer based on the expanded
627
/// token stream.
628
class TokenCollector::Builder {
629
public:
630
  Builder(std::vector<syntax::Token> Expanded, PPExpansions CollectedExpansions,
631
          const SourceManager &SM, const LangOptions &LangOpts)
632
      : Result(SM), CollectedExpansions(std::move(CollectedExpansions)), SM(SM),
633
1.91k
        LangOpts(LangOpts) {
634
1.91k
    Result.ExpandedTokens = std::move(Expanded);
635
1.91k
  }
636
637
1.91k
  TokenBuffer build() && {
638
1.91k
    assert(!Result.ExpandedTokens.empty());
639
1.91k
    assert(Result.ExpandedTokens.back().kind() == tok::eof);
640
1.91k
641
    // Tokenize every file that contributed tokens to the expanded stream.
642
1.91k
    buildSpelledTokens();
643
1.91k
644
    // The expanded token stream consists of runs of tokens that came from
645
    // the same source (a macro expansion, part of a file etc).
646
    // Between these runs are the logical positions of spelled tokens that
647
    // didn't expand to anything.
648
3.89k
    while (NextExpanded < Result.ExpandedTokens.size() - 1 /* eof */) {
649
      // Create empty mappings for spelled tokens that expanded to nothing here.
650
      // May advance NextSpelled, but NextExpanded is unchanged.
651
1.97k
      discard();
652
      // Create mapping for a contiguous run of expanded tokens.
653
      // Advances NextExpanded past the run, and NextSpelled accordingly.
654
1.97k
      unsigned OldPosition = NextExpanded;
655
1.97k
      advance();
656
1.97k
      if (NextExpanded == OldPosition)
657
0
        diagnoseAdvanceFailure();
658
1.97k
    }
659
    // If any tokens remain in any of the files, they didn't expand to anything.
660
    // Create empty mappings up until the end of the file.
661
1.91k
    for (const auto &File : Result.Files)
662
1.92k
      discard(File.first);
663
1.91k
664
1.91k
#ifndef NDEBUG
665
1.92k
    for (auto &pair : Result.Files) {
666
1.92k
      auto &mappings = pair.second.Mappings;
667
1.92k
      assert(llvm::is_sorted(mappings, [](const TokenBuffer::Mapping &M1,
668
1.92k
                                          const TokenBuffer::Mapping &M2) {
669
1.92k
        return M1.BeginSpelled < M2.BeginSpelled &&
670
1.92k
               M1.EndSpelled < M2.EndSpelled &&
671
1.92k
               M1.BeginExpanded < M2.BeginExpanded &&
672
1.92k
               M1.EndExpanded < M2.EndExpanded;
673
1.92k
      }));
674
1.92k
    }
675
1.91k
#endif
676
1.91k
677
1.91k
    return std::move(Result);
678
1.91k
  }
679
680
private:
681
  // Consume a sequence of spelled tokens that didn't expand to anything.
682
  // In the simplest case, skips spelled tokens until finding one that produced
683
  // the NextExpanded token, and creates an empty mapping for them.
684
  // If Drain is provided, skips remaining tokens from that file instead.
685
3.89k
  void discard(llvm::Optional<FileID> Drain = llvm::None) {
686
3.89k
    SourceLocation Target =
687
1.92k
        Drain ? SM.getLocForEndOfFile(*Drain)
688
1.97k
              : SM.getExpansionLoc(
689
1.97k
                    Result.ExpandedTokens[NextExpanded].location());
690
3.89k
    FileID File = SM.getFileID(Target);
691
3.89k
    const auto &SpelledTokens = Result.Files[File].SpelledTokens;
692
3.89k
    auto &NextSpelled = this->NextSpelled[File];
693
3.89k
694
3.89k
    TokenBuffer::Mapping Mapping;
695
3.89k
    Mapping.BeginSpelled = NextSpelled;
696
    // When dropping trailing tokens from a file, the empty mapping should
697
    // be positioned within the file's expanded-token range (at the end).
698
3.89k
    Mapping.BeginExpanded = Mapping.EndExpanded =
699
1.97k
        Drain ? 
Result.Files[*Drain].EndExpanded1.92k
: NextExpanded;
700
    // We may want to split into several adjacent empty mappings.
701
    // FlushMapping() emits the current mapping and starts a new one.
702
3.93k
    auto FlushMapping = [&, this] {
703
3.93k
      Mapping.EndSpelled = NextSpelled;
704
3.93k
      if (Mapping.BeginSpelled != Mapping.EndSpelled)
705
82
        Result.Files[File].Mappings.push_back(Mapping);
706
3.93k
      Mapping.BeginSpelled = NextSpelled;
707
3.93k
    };
708
3.89k
709
4.57k
    while (NextSpelled < SpelledTokens.size() &&
710
2.65k
           SpelledTokens[NextSpelled].location() < Target) {
711
      // If we know mapping bounds at [NextSpelled, KnownEnd] (macro expansion)
712
      // then we want to partition our (empty) mapping.
713
      //   [Start, NextSpelled) [NextSpelled, KnownEnd] (KnownEnd, Target)
714
678
      SourceLocation KnownEnd = CollectedExpansions.lookup(
715
678
          SpelledTokens[NextSpelled].location().getRawEncoding());
716
678
      if (KnownEnd.isValid()) {
717
19
        FlushMapping(); // Emits [Start, NextSpelled)
718
45
        while (NextSpelled < SpelledTokens.size() &&
719
42
               SpelledTokens[NextSpelled].location() <= KnownEnd)
720
26
          ++NextSpelled;
721
19
        FlushMapping(); // Emits [NextSpelled, KnownEnd]
722
        // Now the loop contitues and will emit (KnownEnd, Target).
723
659
      } else {
724
659
        ++NextSpelled;
725
659
      }
726
678
    }
727
3.89k
    FlushMapping();
728
3.89k
  }
729
730
  // Consumes the NextExpanded token and others that are part of the same run.
731
  // Increases NextExpanded and NextSpelled by at least one, and adds a mapping
732
  // (unless this is a run of file tokens, which we represent with no mapping).
733
1.97k
  void advance() {
734
1.97k
    const syntax::Token &Tok = Result.ExpandedTokens[NextExpanded];
735
1.97k
    SourceLocation Expansion = SM.getExpansionLoc(Tok.location());
736
1.97k
    FileID File = SM.getFileID(Expansion);
737
1.97k
    const auto &SpelledTokens = Result.Files[File].SpelledTokens;
738
1.97k
    auto &NextSpelled = this->NextSpelled[File];
739
1.97k
740
1.97k
    if (Tok.location().isFileID()) {
741
      // A run of file tokens continues while the expanded/spelled tokens match.
742
37.5k
      while (NextSpelled < SpelledTokens.size() &&
743
35.8k
             NextExpanded < Result.ExpandedTokens.size() &&
744
35.8k
             SpelledTokens[NextSpelled].location() ==
745
35.7k
                 Result.ExpandedTokens[NextExpanded].location()) {
746
35.7k
        ++NextSpelled;
747
35.7k
        ++NextExpanded;
748
35.7k
      }
749
      // We need no mapping for file tokens copied to the expanded stream.
750
110
    } else {
751
      // We found a new macro expansion. We should have its spelling bounds.
752
110
      auto End = CollectedExpansions.lookup(Expansion.getRawEncoding());
753
110
      assert(End.isValid() && "Macro expansion wasn't captured?");
754
110
755
      // Mapping starts here...
756
110
      TokenBuffer::Mapping Mapping;
757
110
      Mapping.BeginExpanded = NextExpanded;
758
110
      Mapping.BeginSpelled = NextSpelled;
759
      // ... consumes spelled tokens within bounds we captured ...
760
389
      while (NextSpelled < SpelledTokens.size() &&
761
381
             SpelledTokens[NextSpelled].location() <= End)
762
279
        ++NextSpelled;
763
      // ... consumes expanded tokens rooted at the same expansion ...
764
369
      while (NextExpanded < Result.ExpandedTokens.size() &&
765
369
             SM.getExpansionLoc(
766
369
                 Result.ExpandedTokens[NextExpanded].location()) == Expansion)
767
259
        ++NextExpanded;
768
      // ... and ends here.
769
110
      Mapping.EndExpanded = NextExpanded;
770
110
      Mapping.EndSpelled = NextSpelled;
771
110
      Result.Files[File].Mappings.push_back(Mapping);
772
110
    }
773
1.97k
  }
774
775
  // advance() is supposed to consume at least one token - if not, we crash.
776
0
  void diagnoseAdvanceFailure() {
777
0
#ifndef NDEBUG
778
    // Show the failed-to-map token in context.
779
0
    for (unsigned I = (NextExpanded < 10) ? 0 : NextExpanded - 10;
780
0
         I < NextExpanded + 5 && I < Result.ExpandedTokens.size(); ++I) {
781
0
      const char *L =
782
0
          (I == NextExpanded) ? "!! " : (I < NextExpanded) ? "ok " : "   ";
783
0
      llvm::errs() << L << Result.ExpandedTokens[I].dumpForTests(SM) << "\n";
784
0
    }
785
0
#endif
786
0
    llvm_unreachable("Couldn't map expanded token to spelled tokens!");
787
0
  }
788
789
  /// Initializes TokenBuffer::Files and fills spelled tokens and expanded
790
  /// ranges for each of the files.
791
1.91k
  void buildSpelledTokens() {
792
39.8k
    for (unsigned I = 0; I < Result.ExpandedTokens.size(); 
++I37.9k
) {
793
37.9k
      const auto &Tok = Result.ExpandedTokens[I];
794
37.9k
      auto FID = SM.getFileID(SM.getExpansionLoc(Tok.location()));
795
37.9k
      auto It = Result.Files.try_emplace(FID);
796
37.9k
      TokenBuffer::MarkedFile &File = It.first->second;
797
37.9k
798
      // The eof token should not be considered part of the main-file's range.
799
35.9k
      File.EndExpanded = Tok.kind() == tok::eof ? 
I1.91k
: I + 1;
800
37.9k
801
37.9k
      if (!It.second)
802
35.9k
        continue; // we have seen this file before.
803
      // This is the first time we see this file.
804
1.92k
      File.BeginExpanded = I;
805
1.92k
      File.SpelledTokens = tokenize(FID, SM, LangOpts);
806
1.92k
    }
807
1.91k
  }
808
809
  TokenBuffer Result;
810
  unsigned NextExpanded = 0;                    // cursor in ExpandedTokens
811
  llvm::DenseMap<FileID, unsigned> NextSpelled; // cursor in SpelledTokens
812
  PPExpansions CollectedExpansions;
813
  const SourceManager &SM;
814
  const LangOptions &LangOpts;
815
};
816
817
1.91k
TokenBuffer TokenCollector::consume() && {
818
1.91k
  PP.setTokenWatcher(nullptr);
819
1.91k
  Collector->disable();
820
1.91k
  return Builder(std::move(Expanded), std::move(Expansions),
821
1.91k
                 PP.getSourceManager(), PP.getLangOpts())
822
1.91k
      .build();
823
1.91k
}
824
825
231
std::string syntax::Token::str() const {
826
231
  return std::string(llvm::formatv("Token({0}, length = {1})",
827
231
                                   tok::getTokenName(kind()), length()));
828
231
}
829
830
0
std::string syntax::Token::dumpForTests(const SourceManager &SM) const {
831
0
  return std::string(llvm::formatv("Token(`{0}`, {1}, length = {2})", text(SM),
832
0
                                   tok::getTokenName(kind()), length()));
833
0
}
834
835
15
std::string TokenBuffer::dumpForTests() const {
836
485
  auto PrintToken = [this](const syntax::Token &T) -> std::string {
837
485
    if (T.kind() == tok::eof)
838
12
      return "<eof>";
839
473
    return std::string(T.text(*SourceMgr));
840
473
  };
841
15
842
15
  auto DumpTokens = [this, &PrintToken](llvm::raw_ostream &OS,
843
32
                                        llvm::ArrayRef<syntax::Token> Tokens) {
844
32
    if (Tokens.empty()) {
845
4
      OS << "<empty>";
846
4
      return;
847
4
    }
848
28
    OS << Tokens[0].text(*SourceMgr);
849
424
    for (unsigned I = 1; I < Tokens.size(); 
++I396
) {
850
396
      if (Tokens[I].kind() == tok::eof)
851
0
        continue;
852
396
      OS << " " << PrintToken(Tokens[I]);
853
396
    }
854
28
  };
855
15
856
15
  std::string Dump;
857
15
  llvm::raw_string_ostream OS(Dump);
858
15
859
15
  OS << "expanded tokens:\n"
860
15
     << "  ";
861
  // (!) we do not show '<eof>'.
862
15
  DumpTokens(OS, llvm::makeArrayRef(ExpandedTokens).drop_back());
863
15
  OS << "\n";
864
15
865
15
  std::vector<FileID> Keys;
866
15
  for (auto F : Files)
867
17
    Keys.push_back(F.first);
868
15
  llvm::sort(Keys);
869
15
870
17
  for (FileID ID : Keys) {
871
17
    const MarkedFile &File = Files.find(ID)->second;
872
17
    auto *Entry = SourceMgr->getFileEntryForID(ID);
873
17
    if (!Entry)
874
0
      continue; // Skip builtin files.
875
17
    OS << llvm::formatv("file '{0}'\n", Entry->getName())
876
17
       << "  spelled tokens:\n"
877
17
       << "    ";
878
17
    DumpTokens(OS, File.SpelledTokens);
879
17
    OS << "\n";
880
17
881
17
    if (File.Mappings.empty()) {
882
4
      OS << "  no mappings.\n";
883
4
      continue;
884
4
    }
885
13
    OS << "  mappings:\n";
886
24
    for (auto &M : File.Mappings) {
887
24
      OS << llvm::formatv(
888
24
          "    ['{0}'_{1}, '{2}'_{3}) => ['{4}'_{5}, '{6}'_{7})\n",
889
24
          PrintToken(File.SpelledTokens[M.BeginSpelled]), M.BeginSpelled,
890
24
          M.EndSpelled == File.SpelledTokens.size()
891
7
              ? "<eof>"
892
17
              : PrintToken(File.SpelledTokens[M.EndSpelled]),
893
24
          M.EndSpelled, PrintToken(ExpandedTokens[M.BeginExpanded]),
894
24
          M.BeginExpanded, PrintToken(ExpandedTokens[M.EndExpanded]),
895
24
          M.EndExpanded);
896
24
    }
897
13
  }
898
15
  return OS.str();
899
15
}