Coverage Report

Created: 2020-10-24 06:27

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Lex/Preprocessor.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
//  This file implements the Preprocessor interface.
10
//
11
//===----------------------------------------------------------------------===//
12
//
13
// Options to support:
14
//   -H       - Print the name of each header file used.
15
//   -d[DNI] - Dump various things.
16
//   -fworking-directory - #line's with preprocessor's working dir.
17
//   -fpreprocessed
18
//   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
19
//   -W*
20
//   -w
21
//
22
// Messages to emit:
23
//   "Multiple include guards may be useful for:\n"
24
//
25
//===----------------------------------------------------------------------===//
26
27
#include "clang/Lex/Preprocessor.h"
28
#include "clang/Basic/Builtins.h"
29
#include "clang/Basic/FileManager.h"
30
#include "clang/Basic/FileSystemStatCache.h"
31
#include "clang/Basic/IdentifierTable.h"
32
#include "clang/Basic/LLVM.h"
33
#include "clang/Basic/LangOptions.h"
34
#include "clang/Basic/Module.h"
35
#include "clang/Basic/SourceLocation.h"
36
#include "clang/Basic/SourceManager.h"
37
#include "clang/Basic/TargetInfo.h"
38
#include "clang/Lex/CodeCompletionHandler.h"
39
#include "clang/Lex/ExternalPreprocessorSource.h"
40
#include "clang/Lex/HeaderSearch.h"
41
#include "clang/Lex/LexDiagnostic.h"
42
#include "clang/Lex/Lexer.h"
43
#include "clang/Lex/LiteralSupport.h"
44
#include "clang/Lex/MacroArgs.h"
45
#include "clang/Lex/MacroInfo.h"
46
#include "clang/Lex/ModuleLoader.h"
47
#include "clang/Lex/Pragma.h"
48
#include "clang/Lex/PreprocessingRecord.h"
49
#include "clang/Lex/PreprocessorLexer.h"
50
#include "clang/Lex/PreprocessorOptions.h"
51
#include "clang/Lex/ScratchBuffer.h"
52
#include "clang/Lex/Token.h"
53
#include "clang/Lex/TokenLexer.h"
54
#include "llvm/ADT/APInt.h"
55
#include "llvm/ADT/ArrayRef.h"
56
#include "llvm/ADT/DenseMap.h"
57
#include "llvm/ADT/STLExtras.h"
58
#include "llvm/ADT/SmallString.h"
59
#include "llvm/ADT/SmallVector.h"
60
#include "llvm/ADT/StringRef.h"
61
#include "llvm/ADT/StringSwitch.h"
62
#include "llvm/Support/Capacity.h"
63
#include "llvm/Support/ErrorHandling.h"
64
#include "llvm/Support/MemoryBuffer.h"
65
#include "llvm/Support/raw_ostream.h"
66
#include <algorithm>
67
#include <cassert>
68
#include <memory>
69
#include <string>
70
#include <utility>
71
#include <vector>
72
73
using namespace clang;
74
75
LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
76
77
11.5k
ExternalPreprocessorSource::~ExternalPreprocessorSource() = default;
78
79
Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
80
                           DiagnosticsEngine &diags, LangOptions &opts,
81
                           SourceManager &SM, HeaderSearch &Headers,
82
                           ModuleLoader &TheModuleLoader,
83
                           IdentifierInfoLookup *IILookup, bool OwnsHeaders,
84
                           TranslationUnitKind TUKind)
85
    : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts),
86
      FileMgr(Headers.getFileMgr()), SourceMgr(SM),
87
      ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
88
      TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
89
      // As the language options may have not been loaded yet (when
90
      // deserializing an ASTUnit), adding keywords to the identifier table is
91
      // deferred to Preprocessor::Initialize().
92
      Identifiers(IILookup), PragmaHandlers(new PragmaNamespace(StringRef())),
93
      TUKind(TUKind), SkipMainFilePreamble(0, true),
94
81.9k
      CurSubmoduleState(&NullSubmoduleState) {
95
81.9k
  OwnsHeaderSearch = OwnsHeaders;
96
97
  // Default to discarding comments.
98
81.9k
  KeepComments = false;
99
81.9k
  KeepMacroComments = false;
100
81.9k
  SuppressIncludeNotFoundError = false;
101
102
  // Macro expansion is enabled.
103
81.9k
  DisableMacroExpansion = false;
104
81.9k
  MacroExpansionInDirectivesOverride = false;
105
81.9k
  InMacroArgs = false;
106
81.9k
  ArgMacro = nullptr;
107
81.9k
  InMacroArgPreExpansion = false;
108
81.9k
  NumCachedTokenLexers = 0;
109
81.9k
  PragmasEnabled = true;
110
81.9k
  ParsingIfOrElifDirective = false;
111
81.9k
  PreprocessedOutput = false;
112
113
  // We haven't read anything from the external source.
114
81.9k
  ReadMacrosFromExternalSource = false;
115
116
81.9k
  BuiltinInfo = std::make_unique<Builtin::Context>();
117
118
  // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of
119
  // a macro. They get unpoisoned where it is allowed.
120
81.9k
  (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
121
81.9k
  SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
122
81.9k
  if (getLangOpts().CPlusPlus20) {
123
3.10k
    (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned();
124
3.10k
    SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use);
125
78.8k
  } else {
126
78.8k
    Ident__VA_OPT__ = nullptr;
127
78.8k
  }
128
129
  // Initialize the pragma handlers.
130
81.9k
  RegisterBuiltinPragmas();
131
132
  // Initialize builtin macros like __LINE__ and friends.
133
81.9k
  RegisterBuiltinMacros();
134
135
81.9k
  if(LangOpts.Borland) {
136
6
    Ident__exception_info        = getIdentifierInfo("_exception_info");
137
6
    Ident___exception_info       = getIdentifierInfo("__exception_info");
138
6
    Ident_GetExceptionInfo       = getIdentifierInfo("GetExceptionInformation");
139
6
    Ident__exception_code        = getIdentifierInfo("_exception_code");
140
6
    Ident___exception_code       = getIdentifierInfo("__exception_code");
141
6
    Ident_GetExceptionCode       = getIdentifierInfo("GetExceptionCode");
142
6
    Ident__abnormal_termination  = getIdentifierInfo("_abnormal_termination");
143
6
    Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
144
6
    Ident_AbnormalTermination    = getIdentifierInfo("AbnormalTermination");
145
81.9k
  } else {
146
81.9k
    Ident__exception_info = Ident__exception_code = nullptr;
147
81.9k
    Ident__abnormal_termination = Ident___exception_info = nullptr;
148
81.9k
    Ident___exception_code = Ident___abnormal_termination = nullptr;
149
81.9k
    Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
150
81.9k
    Ident_AbnormalTermination = nullptr;
151
81.9k
  }
152
153
  // If using a PCH where a #pragma hdrstop is expected, start skipping tokens.
154
81.9k
  if (usingPCHWithPragmaHdrStop())
155
8
    SkippingUntilPragmaHdrStop = true;
156
157
  // If using a PCH with a through header, start skipping tokens.
158
81.9k
  if (!this->PPOpts->PCHThroughHeader.empty() &&
159
32
      !this->PPOpts->ImplicitPCHInclude.empty())
160
17
    SkippingUntilPCHThroughHeader = true;
161
162
81.9k
  if (this->PPOpts->GeneratePreamble)
163
90
    PreambleConditionalStack.startRecording();
164
165
81.9k
  ExcludedConditionalDirectiveSkipMappings =
166
81.9k
      this->PPOpts->ExcludedConditionalDirectiveSkipMappings;
167
81.9k
  if (ExcludedConditionalDirectiveSkipMappings)
168
50
    ExcludedConditionalDirectiveSkipMappings->clear();
169
170
81.9k
  MaxTokens = LangOpts.MaxTokens;
171
81.9k
}
172
173
73.6k
Preprocessor::~Preprocessor() {
174
73.6k
  assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
175
176
73.6k
  IncludeMacroStack.clear();
177
178
  // Destroy any macro definitions.
179
40.4M
  while (MacroInfoChain *I = MIChainHead) {
180
40.3M
    MIChainHead = I->Next;
181
40.3M
    I->~MacroInfoChain();
182
40.3M
  }
183
184
  // Free any cached macro expanders.
185
  // This populates MacroArgCache, so all TokenLexers need to be destroyed
186
  // before the code below that frees up the MacroArgCache list.
187
73.6k
  std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
188
73.6k
  CurTokenLexer.reset();
189
190
  // Free any cached MacroArgs.
191
105k
  for (MacroArgs *ArgList = MacroArgCache; ArgList;)
192
32.0k
    ArgList = ArgList->deallocate();
193
194
  // Delete the header search info, if we own it.
195
73.6k
  if (OwnsHeaderSearch)
196
73.4k
    delete &HeaderInfo;
197
73.6k
}
198
199
void Preprocessor::Initialize(const TargetInfo &Target,
200
81.9k
                              const TargetInfo *AuxTarget) {
201
81.9k
  assert((!this->Target || this->Target == &Target) &&
202
81.9k
         "Invalid override of target information");
203
81.9k
  this->Target = &Target;
204
205
81.9k
  assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
206
81.9k
         "Invalid override of aux target information.");
207
81.9k
  this->AuxTarget = AuxTarget;
208
209
  // Initialize information about built-ins.
210
81.9k
  BuiltinInfo->InitializeTarget(Target, AuxTarget);
211
81.9k
  HeaderInfo.setTarget(Target);
212
213
  // Populate the identifier table with info about keywords for the current language.
214
81.9k
  Identifiers.AddKeywords(LangOpts);
215
81.9k
}
216
217
2
void Preprocessor::InitializeForModelFile() {
218
2
  NumEnteredSourceFiles = 0;
219
220
  // Reset pragmas
221
2
  PragmaHandlersBackup = std::move(PragmaHandlers);
222
2
  PragmaHandlers = std::make_unique<PragmaNamespace>(StringRef());
223
2
  RegisterBuiltinPragmas();
224
225
  // Reset PredefinesFileID
226
2
  PredefinesFileID = FileID();
227
2
}
228
229
2
void Preprocessor::FinalizeForModelFile() {
230
2
  NumEnteredSourceFiles = 1;
231
232
2
  PragmaHandlers = std::move(PragmaHandlersBackup);
233
2
}
234
235
7
void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
236
7
  llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
237
7
               << getSpelling(Tok) << "'";
238
239
7
  if (!DumpFlags) 
return0
;
240
241
7
  llvm::errs() << "\t";
242
7
  if (Tok.isAtStartOfLine())
243
3
    llvm::errs() << " [StartOfLine]";
244
7
  if (Tok.hasLeadingSpace())
245
0
    llvm::errs() << " [LeadingSpace]";
246
7
  if (Tok.isExpandDisabled())
247
0
    llvm::errs() << " [ExpandDisabled]";
248
7
  if (Tok.needsCleaning()) {
249
0
    const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
250
0
    llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
251
0
                 << "']";
252
0
  }
253
254
7
  llvm::errs() << "\tLoc=<";
255
7
  DumpLocation(Tok.getLocation());
256
7
  llvm::errs() << ">";
257
7
}
258
259
7
void Preprocessor::DumpLocation(SourceLocation Loc) const {
260
7
  Loc.print(llvm::errs(), SourceMgr);
261
7
}
262
263
0
void Preprocessor::DumpMacro(const MacroInfo &MI) const {
264
0
  llvm::errs() << "MACRO: ";
265
0
  for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
266
0
    DumpToken(MI.getReplacementToken(i));
267
0
    llvm::errs() << "  ";
268
0
  }
269
0
  llvm::errs() << "\n";
270
0
}
271
272
3
void Preprocessor::PrintStats() {
273
3
  llvm::errs() << "\n*** Preprocessor Stats:\n";
274
3
  llvm::errs() << NumDirectives << " directives found:\n";
275
3
  llvm::errs() << "  " << NumDefined << " #define.\n";
276
3
  llvm::errs() << "  " << NumUndefined << " #undef.\n";
277
3
  llvm::errs() << "  #include/#include_next/#import:\n";
278
3
  llvm::errs() << "    " << NumEnteredSourceFiles << " source files entered.\n";
279
3
  llvm::errs() << "    " << MaxIncludeStackDepth << " max include stack depth\n";
280
3
  llvm::errs() << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
281
3
  llvm::errs() << "  " << NumElse << " #else/#elif.\n";
282
3
  llvm::errs() << "  " << NumEndif << " #endif.\n";
283
3
  llvm::errs() << "  " << NumPragma << " #pragma.\n";
284
3
  llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
285
286
3
  llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
287
3
             << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
288
3
             << NumFastMacroExpanded << " on the fast path.\n";
289
3
  llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
290
3
             << " token paste (##) operations performed, "
291
3
             << NumFastTokenPaste << " on the fast path.\n";
292
293
3
  llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
294
295
3
  llvm::errs() << "\n  BumpPtr: " << BP.getTotalMemory();
296
3
  llvm::errs() << "\n  Macro Expanded Tokens: "
297
3
               << llvm::capacity_in_bytes(MacroExpandedTokens);
298
3
  llvm::errs() << "\n  Predefines Buffer: " << Predefines.capacity();
299
  // FIXME: List information for all submodules.
300
3
  llvm::errs() << "\n  Macros: "
301
3
               << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
302
3
  llvm::errs() << "\n  #pragma push_macro Info: "
303
3
               << llvm::capacity_in_bytes(PragmaPushMacroInfo);
304
3
  llvm::errs() << "\n  Poison Reasons: "
305
3
               << llvm::capacity_in_bytes(PoisonReasons);
306
3
  llvm::errs() << "\n  Comment Handlers: "
307
3
               << llvm::capacity_in_bytes(CommentHandlers) << "\n";
308
3
}
309
310
Preprocessor::macro_iterator
311
2.39k
Preprocessor::macro_begin(bool IncludeExternalMacros) const {
312
2.39k
  if (IncludeExternalMacros && 
ExternalSource2.38k
&&
313
440
      !ReadMacrosFromExternalSource) {
314
191
    ReadMacrosFromExternalSource = true;
315
191
    ExternalSource->ReadDefinedMacros();
316
191
  }
317
318
  // Make sure we cover all macros in visible modules.
319
2.39k
  for (const ModuleMacro &Macro : ModuleMacros)
320
5.22M
    CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
321
322
2.39k
  return CurSubmoduleState->Macros.begin();
323
2.39k
}
324
325
4
size_t Preprocessor::getTotalMemory() const {
326
4
  return BP.getTotalMemory()
327
4
    + llvm::capacity_in_bytes(MacroExpandedTokens)
328
4
    + Predefines.capacity() /* Predefines buffer. */
329
    // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
330
    // and ModuleMacros.
331
4
    + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
332
4
    + llvm::capacity_in_bytes(PragmaPushMacroInfo)
333
4
    + llvm::capacity_in_bytes(PoisonReasons)
334
4
    + llvm::capacity_in_bytes(CommentHandlers);
335
4
}
336
337
Preprocessor::macro_iterator
338
2.39k
Preprocessor::macro_end(bool IncludeExternalMacros) const {
339
2.39k
  if (IncludeExternalMacros && 
ExternalSource2.38k
&&
340
440
      !ReadMacrosFromExternalSource) {
341
0
    ReadMacrosFromExternalSource = true;
342
0
    ExternalSource->ReadDefinedMacros();
343
0
  }
344
345
2.39k
  return CurSubmoduleState->Macros.end();
346
2.39k
}
347
348
/// Compares macro tokens with a specified token value sequence.
349
static bool MacroDefinitionEquals(const MacroInfo *MI,
350
114k
                                  ArrayRef<TokenValue> Tokens) {
351
114k
  return Tokens.size() == MI->getNumTokens() &&
352
12.0k
      std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
353
114k
}
354
355
StringRef Preprocessor::getLastMacroWithSpelling(
356
                                    SourceLocation Loc,
357
301
                                    ArrayRef<TokenValue> Tokens) const {
358
301
  SourceLocation BestLocation;
359
301
  StringRef BestSpelling;
360
301
  for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
361
114k
       I != E; 
++I114k
) {
362
114k
    const MacroDirective::DefInfo
363
114k
      Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
364
114k
    if (!Def || 
!Def.getMacroInfo()114k
)
365
284
      continue;
366
114k
    if (!Def.getMacroInfo()->isObjectLike())
367
25
      continue;
368
114k
    if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
369
114k
      continue;
370
73
    SourceLocation Location = Def.getLocation();
371
    // Choose the macro defined latest.
372
73
    if (BestLocation.isInvalid() ||
373
13
        (Location.isValid() &&
374
69
         
SourceMgr.isBeforeInTranslationUnit(BestLocation, Location)13
)) {
375
69
      BestLocation = Location;
376
69
      BestSpelling = I->first->getName();
377
69
    }
378
73
  }
379
301
  return BestSpelling;
380
301
}
381
382
3.05M
void Preprocessor::recomputeCurLexerKind() {
383
3.05M
  if (CurLexer)
384
3.98k
    CurLexerKind = CLK_Lexer;
385
3.05M
  else if (CurTokenLexer)
386
9
    CurLexerKind = CLK_TokenLexer;
387
3.05M
  else
388
3.05M
    CurLexerKind = CLK_CachingLexer;
389
3.05M
}
390
391
bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
392
                                          unsigned CompleteLine,
393
1.22k
                                          unsigned CompleteColumn) {
394
1.22k
  assert(File);
395
1.22k
  assert(CompleteLine && CompleteColumn && "Starts from 1:1");
396
1.22k
  assert(!CodeCompletionFile && "Already set");
397
398
  // Load the actual file's contents.
399
1.22k
  Optional<llvm::MemoryBufferRef> Buffer =
400
1.22k
      SourceMgr.getMemoryBufferForFileOrNone(File);
401
1.22k
  if (!Buffer)
402
0
    return true;
403
404
  // Find the byte position of the truncation point.
405
1.22k
  const char *Position = Buffer->getBufferStart();
406
35.7k
  for (unsigned Line = 1; Line < CompleteLine; 
++Line34.5k
) {
407
886k
    for (; *Position; 
++Position852k
) {
408
886k
      if (*Position != '\r' && *Position != '\n')
409
852k
        continue;
410
411
      // Eat \r\n or \n\r as a single line.
412
34.5k
      if ((Position[1] == '\r' || Position[1] == '\n') &&
413
5.03k
          Position[0] != Position[1])
414
0
        ++Position;
415
34.5k
      ++Position;
416
34.5k
      break;
417
34.5k
    }
418
34.5k
  }
419
420
1.22k
  Position += CompleteColumn - 1;
421
422
  // If pointing inside the preamble, adjust the position at the beginning of
423
  // the file after the preamble.
424
1.22k
  if (SkipMainFilePreamble.first &&
425
75
      SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
426
75
    if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
427
5
      Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
428
75
  }
429
430
1.22k
  if (Position > Buffer->getBufferEnd())
431
1
    Position = Buffer->getBufferEnd();
432
433
1.22k
  CodeCompletionFile = File;
434
1.22k
  CodeCompletionOffset = Position - Buffer->getBufferStart();
435
436
1.22k
  auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
437
1.22k
      Buffer->getBufferSize() + 1, Buffer->getBufferIdentifier());
438
1.22k
  char *NewBuf = NewBuffer->getBufferStart();
439
1.22k
  char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
440
1.22k
  *NewPos = '\0';
441
1.22k
  std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
442
1.22k
  SourceMgr.overrideFileContents(File, std::move(NewBuffer));
443
444
1.22k
  return false;
445
1.22k
}
446
447
void Preprocessor::CodeCompleteIncludedFile(llvm::StringRef Dir,
448
11
                                            bool IsAngled) {
449
11
  if (CodeComplete)
450
11
    CodeComplete->CodeCompleteIncludedFile(Dir, IsAngled);
451
11
  setCodeCompletionReached();
452
11
}
453
454
50
void Preprocessor::CodeCompleteNaturalLanguage() {
455
50
  if (CodeComplete)
456
50
    CodeComplete->CodeCompleteNaturalLanguage();
457
50
  setCodeCompletionReached();
458
50
}
459
460
/// getSpelling - This method is used to get the spelling of a token into a
461
/// SmallVector. Note that the returned StringRef may not point to the
462
/// supplied buffer if a copy can be avoided.
463
StringRef Preprocessor::getSpelling(const Token &Tok,
464
                                          SmallVectorImpl<char> &Buffer,
465
12.2M
                                          bool *Invalid) const {
466
  // NOTE: this has to be checked *before* testing for an IdentifierInfo.
467
12.2M
  if (Tok.isNot(tok::raw_identifier) && 
!Tok.hasUCN()12.2M
) {
468
    // Try the fast path.
469
12.2M
    if (const IdentifierInfo *II = Tok.getIdentifierInfo())
470
124k
      return II->getName();
471
12.1M
  }
472
473
  // Resize the buffer if we need to copy into it.
474
12.1M
  if (Tok.needsCleaning())
475
9.95k
    Buffer.resize(Tok.getLength());
476
477
12.1M
  const char *Ptr = Buffer.data();
478
12.1M
  unsigned Len = getSpelling(Tok, Ptr, Invalid);
479
12.1M
  return StringRef(Ptr, Len);
480
12.1M
}
481
482
/// CreateString - Plop the specified string into a scratch buffer and return a
483
/// location for it.  If specified, the source location provides a source
484
/// location for the token.
485
void Preprocessor::CreateString(StringRef Str, Token &Tok,
486
                                SourceLocation ExpansionLocStart,
487
19.5M
                                SourceLocation ExpansionLocEnd) {
488
19.5M
  Tok.setLength(Str.size());
489
490
19.5M
  const char *DestPtr;
491
19.5M
  SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
492
493
19.5M
  if (ExpansionLocStart.isValid())
494
1.39M
    Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
495
1.39M
                                       ExpansionLocEnd, Str.size());
496
19.5M
  Tok.setLocation(Loc);
497
498
  // If this is a raw identifier or a literal token, set the pointer data.
499
19.5M
  if (Tok.is(tok::raw_identifier))
500
42.1k
    Tok.setRawIdentifierData(DestPtr);
501
19.4M
  else if (Tok.isLiteral())
502
18.8M
    Tok.setLiteralData(DestPtr);
503
19.5M
}
504
505
17.1k
SourceLocation Preprocessor::SplitToken(SourceLocation Loc, unsigned Length) {
506
17.1k
  auto &SM = getSourceManager();
507
17.1k
  SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
508
17.1k
  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(SpellingLoc);
509
17.1k
  bool Invalid = false;
510
17.1k
  StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
511
17.1k
  if (Invalid)
512
0
    return SourceLocation();
513
514
  // FIXME: We could consider re-using spelling for tokens we see repeatedly.
515
17.1k
  const char *DestPtr;
516
17.1k
  SourceLocation Spelling =
517
17.1k
      ScratchBuf->getToken(Buffer.data() + LocInfo.second, Length, DestPtr);
518
17.1k
  return SM.createTokenSplitLoc(Spelling, Loc, Loc.getLocWithOffset(Length));
519
17.1k
}
520
521
18.3M
Module *Preprocessor::getCurrentModule() {
522
18.3M
  if (!getLangOpts().isCompilingModule())
523
16.4M
    return nullptr;
524
525
1.90M
  return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
526
1.90M
}
527
528
//===----------------------------------------------------------------------===//
529
// Preprocessor Initialization Methods
530
//===----------------------------------------------------------------------===//
531
532
/// EnterMainSourceFile - Enter the specified FileID as the main source file,
533
/// which implicitly adds the builtin defines etc.
534
81.6k
void Preprocessor::EnterMainSourceFile() {
535
  // We do not allow the preprocessor to reenter the main file.  Doing so will
536
  // cause FileID's to accumulate information from both runs (e.g. #line
537
  // information) and predefined macros aren't guaranteed to be set properly.
538
81.6k
  assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
539
81.6k
  FileID MainFileID = SourceMgr.getMainFileID();
540
541
  // If MainFileID is loaded it means we loaded an AST file, no need to enter
542
  // a main file.
543
81.6k
  if (!SourceMgr.isLoadedFileID(MainFileID)) {
544
    // Enter the main file source buffer.
545
81.6k
    EnterSourceFile(MainFileID, nullptr, SourceLocation());
546
547
    // If we've been asked to skip bytes in the main file (e.g., as part of a
548
    // precompiled preamble), do so now.
549
81.6k
    if (SkipMainFilePreamble.first > 0)
550
418
      CurLexer->SetByteOffset(SkipMainFilePreamble.first,
551
418
                              SkipMainFilePreamble.second);
552
553
    // Tell the header info that the main file was entered.  If the file is later
554
    // #imported, it won't be re-entered.
555
81.6k
    if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
556
71.4k
      HeaderInfo.IncrementIncludeCount(FE);
557
81.6k
  }
558
559
  // Preprocess Predefines to populate the initial preprocessor state.
560
81.6k
  std::unique_ptr<llvm::MemoryBuffer> SB =
561
81.6k
    llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
562
81.6k
  assert(SB && "Cannot create predefined source buffer");
563
81.6k
  FileID FID = SourceMgr.createFileID(std::move(SB));
564
81.6k
  assert(FID.isValid() && "Could not create FileID for predefines?");
565
81.6k
  setPredefinesFileID(FID);
566
567
  // Start parsing the predefines.
568
81.6k
  EnterSourceFile(FID, nullptr, SourceLocation());
569
570
81.6k
  if (!PPOpts->PCHThroughHeader.empty()) {
571
    // Lookup and save the FileID for the through header. If it isn't found
572
    // in the search path, it's a fatal error.
573
30
    const DirectoryLookup *CurDir;
574
30
    Optional<FileEntryRef> File = LookupFile(
575
30
        SourceLocation(), PPOpts->PCHThroughHeader,
576
30
        /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr, CurDir,
577
30
        /*SearchPath=*/nullptr, /*RelativePath=*/nullptr,
578
30
        /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr,
579
30
        /*IsFrameworkFound=*/nullptr);
580
30
    if (!File) {
581
2
      Diag(SourceLocation(), diag::err_pp_through_header_not_found)
582
2
          << PPOpts->PCHThroughHeader;
583
2
      return;
584
2
    }
585
28
    setPCHThroughHeaderFileID(
586
28
        SourceMgr.createFileID(*File, SourceLocation(), SrcMgr::C_User));
587
28
  }
588
589
  // Skip tokens from the Predefines and if needed the main file.
590
81.6k
  if ((usingPCHWithThroughHeader() && 
SkippingUntilPCHThroughHeader15
) ||
591
81.6k
      (usingPCHWithPragmaHdrStop() && 
SkippingUntilPragmaHdrStop8
))
592
23
    SkipTokensWhileUsingPCH();
593
81.6k
}
594
595
28
void Preprocessor::setPCHThroughHeaderFileID(FileID FID) {
596
28
  assert(PCHThroughHeaderFileID.isInvalid() &&
597
28
         "PCHThroughHeaderFileID already set!");
598
28
  PCHThroughHeaderFileID = FID;
599
28
}
600
601
43
bool Preprocessor::isPCHThroughHeader(const FileEntry *FE) {
602
43
  assert(PCHThroughHeaderFileID.isValid() &&
603
43
         "Invalid PCH through header FileID");
604
43
  return FE == SourceMgr.getFileEntryForID(PCHThroughHeaderFileID);
605
43
}
606
607
1.70M
bool Preprocessor::creatingPCHWithThroughHeader() {
608
1.70M
  return TUKind == TU_Prefix && 
!PPOpts->PCHThroughHeader.empty()7.35k
&&
609
40
         PCHThroughHeaderFileID.isValid();
610
1.70M
}
611
612
2.40M
bool Preprocessor::usingPCHWithThroughHeader() {
613
2.40M
  return TUKind != TU_Prefix && 
!PPOpts->PCHThroughHeader.empty()2.40M
&&
614
43
         PCHThroughHeaderFileID.isValid();
615
2.40M
}
616
617
7
bool Preprocessor::creatingPCHWithPragmaHdrStop() {
618
7
  return TUKind == TU_Prefix && 
PPOpts->PCHWithHdrStop2
;
619
7
}
620
621
163k
bool Preprocessor::usingPCHWithPragmaHdrStop() {
622
163k
  return TUKind != TU_Prefix && 
PPOpts->PCHWithHdrStop157k
;
623
163k
}
624
625
/// Skip tokens until after the #include of the through header or
626
/// until after a #pragma hdrstop is seen. Tokens in the predefines file
627
/// and the main file may be skipped. If the end of the predefines file
628
/// is reached, skipping continues into the main file. If the end of the
629
/// main file is reached, it's a fatal error.
630
23
void Preprocessor::SkipTokensWhileUsingPCH() {
631
23
  bool ReachedMainFileEOF = false;
632
23
  bool UsingPCHThroughHeader = SkippingUntilPCHThroughHeader;
633
23
  bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop;
634
23
  Token Tok;
635
197
  while (true) {
636
197
    bool InPredefines =
637
197
        (CurLexer && 
CurLexer->getFileID() == getPredefinesFileID()193
);
638
197
    switch (CurLexerKind) {
639
193
    case CLK_Lexer:
640
193
      CurLexer->Lex(Tok);
641
193
     break;
642
4
    case CLK_TokenLexer:
643
4
      CurTokenLexer->Lex(Tok);
644
4
      break;
645
0
    case CLK_CachingLexer:
646
0
      CachingLex(Tok);
647
0
      break;
648
0
    case CLK_LexAfterModuleImport:
649
0
      LexAfterModuleImport(Tok);
650
0
      break;
651
197
    }
652
197
    if (Tok.is(tok::eof) && 
!InPredefines4
) {
653
4
      ReachedMainFileEOF = true;
654
4
      break;
655
4
    }
656
193
    if (UsingPCHThroughHeader && 
!SkippingUntilPCHThroughHeader61
)
657
14
      break;
658
179
    if (UsingPragmaHdrStop && 
!SkippingUntilPragmaHdrStop132
)
659
5
      break;
660
179
  }
661
23
  if (ReachedMainFileEOF) {
662
4
    if (UsingPCHThroughHeader)
663
1
      Diag(SourceLocation(), diag::err_pp_through_header_not_seen)
664
1
          << PPOpts->PCHThroughHeader << 1;
665
3
    else if (!PPOpts->PCHWithHdrStopCreate)
666
1
      Diag(SourceLocation(), diag::err_pp_pragma_hdrstop_not_seen);
667
4
  }
668
23
}
669
670
81.6k
void Preprocessor::replayPreambleConditionalStack() {
671
  // Restore the conditional stack from the preamble, if there is one.
672
81.6k
  if (PreambleConditionalStack.isReplaying()) {
673
28
    assert(CurPPLexer &&
674
28
           "CurPPLexer is null when calling replayPreambleConditionalStack.");
675
28
    CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
676
28
    PreambleConditionalStack.doneReplaying();
677
28
    if (PreambleConditionalStack.reachedEOFWhileSkipping())
678
18
      SkipExcludedConditionalBlock(
679
18
          PreambleConditionalStack.SkipInfo->HashTokenLoc,
680
18
          PreambleConditionalStack.SkipInfo->IfTokenLoc,
681
18
          PreambleConditionalStack.SkipInfo->FoundNonSkipPortion,
682
18
          PreambleConditionalStack.SkipInfo->FoundElse,
683
18
          PreambleConditionalStack.SkipInfo->ElseLoc);
684
28
  }
685
81.6k
}
686
687
67.8k
void Preprocessor::EndSourceFile() {
688
  // Notify the client that we reached the end of the source file.
689
67.8k
  if (Callbacks)
690
66.3k
    Callbacks->EndOfMainFile();
691
67.8k
}
692
693
//===----------------------------------------------------------------------===//
694
// Lexer Event Handling.
695
//===----------------------------------------------------------------------===//
696
697
/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
698
/// identifier information for the token and install it into the token,
699
/// updating the token kind accordingly.
700
513M
IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
701
513M
  assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
702
703
  // Look up this token, see if it is a macro, or if it is a language keyword.
704
513M
  IdentifierInfo *II;
705
513M
  if (!Identifier.needsCleaning() && 
!Identifier.hasUCN()513M
) {
706
    // No cleaning needed, just use the characters from the lexed buffer.
707
513M
    II = getIdentifierInfo(Identifier.getRawIdentifier());
708
10.3k
  } else {
709
    // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
710
10.3k
    SmallString<64> IdentifierBuffer;
711
10.3k
    StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
712
713
10.3k
    if (Identifier.hasUCN()) {
714
155
      SmallString<64> UCNIdentifierBuffer;
715
155
      expandUCNs(UCNIdentifierBuffer, CleanedStr);
716
155
      II = getIdentifierInfo(UCNIdentifierBuffer);
717
10.1k
    } else {
718
10.1k
      II = getIdentifierInfo(CleanedStr);
719
10.1k
    }
720
10.3k
  }
721
722
  // Update the token info (identifier info and appropriate token kind).
723
513M
  Identifier.setIdentifierInfo(II);
724
513M
  if (getLangOpts().MSVCCompat && 
II->isCPlusPlusOperatorKeyword()9.38M
&&
725
31
      getSourceManager().isInSystemHeader(Identifier.getLocation()))
726
3
    Identifier.setKind(tok::identifier);
727
513M
  else
728
513M
    Identifier.setKind(II->getTokenID());
729
730
513M
  return II;
731
513M
}
732
733
85.0k
void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
734
85.0k
  PoisonReasons[II] = DiagID;
735
85.0k
}
736
737
0
void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
738
0
  assert(Ident__exception_code && Ident__exception_info);
739
0
  assert(Ident___exception_code && Ident___exception_info);
740
0
  Ident__exception_code->setIsPoisoned(Poison);
741
0
  Ident___exception_code->setIsPoisoned(Poison);
742
0
  Ident_GetExceptionCode->setIsPoisoned(Poison);
743
0
  Ident__exception_info->setIsPoisoned(Poison);
744
0
  Ident___exception_info->setIsPoisoned(Poison);
745
0
  Ident_GetExceptionInfo->setIsPoisoned(Poison);
746
0
  Ident__abnormal_termination->setIsPoisoned(Poison);
747
0
  Ident___abnormal_termination->setIsPoisoned(Poison);
748
0
  Ident_AbnormalTermination->setIsPoisoned(Poison);
749
0
}
750
751
27.7k
void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
752
27.7k
  assert(Identifier.getIdentifierInfo() &&
753
27.7k
         "Can't handle identifiers without identifier info!");
754
27.7k
  llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
755
27.7k
    PoisonReasons.find(Identifier.getIdentifierInfo());
756
27.7k
  if(it == PoisonReasons.end())
757
4
    Diag(Identifier, diag::err_pp_used_poisoned_id);
758
27.7k
  else
759
27.7k
    Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
760
27.7k
}
761
762
/// Returns a diagnostic message kind for reporting a future keyword as
763
/// appropriate for the identifier and specified language.
764
static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
765
166
                                          const LangOptions &LangOpts) {
766
166
  assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
767
768
166
  if (LangOpts.CPlusPlus)
769
166
    return llvm::StringSwitch<diag::kind>(II.getName())
770
166
#define CXX11_KEYWORD(NAME, FLAGS)                                             \
771
1.66k
        .Case(#NAME, diag::warn_cxx11_keyword)
772
166
#define CXX20_KEYWORD(NAME, FLAGS)                                             \
773
1.16k
        .Case(#NAME, diag::warn_cxx20_keyword)
774
166
#include "clang/Basic/TokenKinds.def"
775
        // char8_t is not modeled as a CXX20_KEYWORD because it's not
776
        // unconditionally enabled in C++20 mode. (It can be disabled
777
        // by -fno-char8_t.)
778
166
        .Case("char8_t", diag::warn_cxx20_keyword)
779
0
        ;
780
781
0
  llvm_unreachable(
782
0
      "Keyword not known to come from a newer Standard or proposed Standard");
783
0
}
784
785
61.3k
void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const {
786
61.3k
  assert(II.isOutOfDate() && "not out of date");
787
61.3k
  getExternalSource()->updateOutOfDateIdentifier(II);
788
61.3k
}
789
790
/// HandleIdentifier - This callback is invoked when the lexer reads an
791
/// identifier.  This callback looks up the identifier in the map and/or
792
/// potentially macro expands it or turns it into a named token (like 'for').
793
///
794
/// Note that callers of this method are guarded by checking the
795
/// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
796
/// IdentifierInfo methods that compute these properties will need to change to
797
/// match.
798
143M
bool Preprocessor::HandleIdentifier(Token &Identifier) {
799
143M
  assert(Identifier.getIdentifierInfo() &&
800
143M
         "Can't handle identifiers without identifier info!");
801
802
143M
  IdentifierInfo &II = *Identifier.getIdentifierInfo();
803
804
  // If the information about this identifier is out of date, update it from
805
  // the external source.
806
  // We have to treat __VA_ARGS__ in a special way, since it gets
807
  // serialized with isPoisoned = true, but our preprocessor may have
808
  // unpoisoned it if we're defining a C99 macro.
809
143M
  if (II.isOutOfDate()) {
810
60.2k
    bool CurrentIsPoisoned = false;
811
60.2k
    const bool IsSpecialVariadicMacro =
812
60.2k
        &II == Ident__VA_ARGS__ || 
&II == Ident__VA_OPT__60.1k
;
813
60.2k
    if (IsSpecialVariadicMacro)
814
121
      CurrentIsPoisoned = II.isPoisoned();
815
816
60.2k
    updateOutOfDateIdentifier(II);
817
60.2k
    Identifier.setKind(II.getTokenID());
818
819
60.2k
    if (IsSpecialVariadicMacro)
820
121
      II.setIsPoisoned(CurrentIsPoisoned);
821
60.2k
  }
822
823
  // If this identifier was poisoned, and if it was not produced from a macro
824
  // expansion, emit an error.
825
143M
  if (II.isPoisoned() && 
CurPPLexer27.7k
) {
826
27.7k
    HandlePoisonedIdentifier(Identifier);
827
27.7k
  }
828
829
  // If this is a macro to be expanded, do it.
830
143M
  if (MacroDefinition MD = getMacroDefinition(&II)) {
831
143M
    auto *MI = MD.getMacroInfo();
832
143M
    assert(MI && "macro definition with no macro info?");
833
143M
    if (!DisableMacroExpansion) {
834
92.8M
      if (!Identifier.isExpandDisabled() && 
MI->isEnabled()92.8M
) {
835
        // C99 6.10.3p10: If the preprocessing token immediately after the
836
        // macro name isn't a '(', this macro should not be expanded.
837
92.8M
        if (!MI->isFunctionLike() || 
isNextPPTokenLParen()53.2M
)
838
87.0M
          return HandleMacroExpandedIdentifier(Identifier, MD);
839
2.56k
      } else {
840
        // C99 6.10.3.4p2 says that a disabled macro may never again be
841
        // expanded, even if it's in a context where it could be expanded in the
842
        // future.
843
2.56k
        Identifier.setFlag(Token::DisableExpand);
844
2.56k
        if (MI->isObjectLike() || 
isNextPPTokenLParen()574
)
845
2.14k
          Diag(Identifier, diag::pp_disabled_macro_expansion);
846
2.56k
      }
847
92.8M
    }
848
143M
  }
849
850
  // If this identifier is a keyword in a newer Standard or proposed Standard,
851
  // produce a warning. Don't warn if we're not considering macro expansion,
852
  // since this identifier might be the name of a macro.
853
  // FIXME: This warning is disabled in cases where it shouldn't be, like
854
  //   "#define constexpr constexpr", "int constexpr;"
855
56.8M
  if (II.isFutureCompatKeyword() && 
!DisableMacroExpansion1.00k
) {
856
166
    Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
857
166
        << II.getName();
858
    // Don't diagnose this keyword again in this translation unit.
859
166
    II.setIsFutureCompatKeyword(false);
860
166
  }
861
862
  // If this is an extension token, diagnose its use.
863
  // We avoid diagnosing tokens that originate from macro definitions.
864
  // FIXME: This warning is disabled in cases where it shouldn't be,
865
  // like "#define TY typeof", "TY(1) x".
866
56.8M
  if (II.isExtensionToken() && 
!DisableMacroExpansion7.61k
)
867
5.94k
    Diag(Identifier, diag::ext_token_used);
868
869
  // If this is the 'import' contextual keyword following an '@', note
870
  // that the next token indicates a module name.
871
  //
872
  // Note that we do not treat 'import' as a contextual
873
  // keyword when we're in a caching lexer, because caching lexers only get
874
  // used in contexts where import declarations are disallowed.
875
  //
876
  // Likewise if this is the C++ Modules TS import keyword.
877
56.8M
  if (((LastTokenWasAt && 
II.isModulesImport()3.12k
) ||
878
56.8M
       Identifier.is(tok::kw_import)) &&
879
1.72k
      !InMacroArgs && 
!DisableMacroExpansion1.72k
&&
880
1.71k
      (getLangOpts().Modules || 
getLangOpts().DebuggerSupport41
) &&
881
1.71k
      CurLexerKind != CLK_CachingLexer) {
882
1.71k
    ModuleImportLoc = Identifier.getLocation();
883
1.71k
    ModuleImportPath.clear();
884
1.71k
    ModuleImportExpectsIdentifier = true;
885
1.71k
    CurLexerKind = CLK_LexAfterModuleImport;
886
1.71k
  }
887
56.8M
  return true;
888
143M
}
889
890
2.16G
void Preprocessor::Lex(Token &Result) {
891
2.16G
  ++LexLevel;
892
893
  // We loop here until a lex function returns a token; this avoids recursion.
894
2.16G
  bool ReturnedToken;
895
2.39G
  do {
896
2.39G
    switch (CurLexerKind) {
897
1.35G
    case CLK_Lexer:
898
1.35G
      ReturnedToken = CurLexer->Lex(Result);
899
1.35G
      break;
900
919M
    case CLK_TokenLexer:
901
919M
      ReturnedToken = CurTokenLexer->Lex(Result);
902
919M
      break;
903
122M
    case CLK_CachingLexer:
904
122M
      CachingLex(Result);
905
122M
      ReturnedToken = true;
906
122M
      break;
907
3.99k
    case CLK_LexAfterModuleImport:
908
3.99k
      ReturnedToken = LexAfterModuleImport(Result);
909
3.99k
      break;
910
2.39G
    }
911
2.39G
  } while (!ReturnedToken);
912
913
2.16G
  if (Result.is(tok::unknown) && 
TheModuleLoader.HadFatalFailure13.9k
)
914
0
    return;
915
916
2.16G
  if (Result.is(tok::code_completion) && 
Result.getIdentifierInfo()3.29k
) {
917
    // Remember the identifier before code completion token.
918
85
    setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
919
85
    setCodeCompletionTokenRange(Result.getLocation(), Result.getEndLoc());
920
    // Set IdenfitierInfo to null to avoid confusing code that handles both
921
    // identifiers and completion tokens.
922
85
    Result.setIdentifierInfo(nullptr);
923
85
  }
924
925
  // Update ImportSeqState to track our position within a C++20 import-seq
926
  // if this token is being produced as a result of phase 4 of translation.
927
2.16G
  if (getLangOpts().CPlusPlusModules && 
LexLevel == 15.91M
&&
928
691k
      !Result.getFlag(Token::IsReinjected)) {
929
516k
    switch (Result.getKind()) {
930
80.9k
    case tok::l_paren: case tok::l_square: case tok::l_brace:
931
80.9k
      ImportSeqState.handleOpenBracket();
932
80.9k
      break;
933
67.1k
    case tok::r_paren: case tok::r_square:
934
67.1k
      ImportSeqState.handleCloseBracket();
935
67.1k
      break;
936
13.8k
    case tok::r_brace:
937
13.8k
      ImportSeqState.handleCloseBrace();
938
13.8k
      break;
939
37.3k
    case tok::semi:
940
37.3k
      ImportSeqState.handleSemi();
941
37.3k
      break;
942
24
    case tok::header_name:
943
24
    case tok::annot_header_unit:
944
24
      ImportSeqState.handleHeaderName();
945
24
      break;
946
86
    case tok::kw_export:
947
86
      ImportSeqState.handleExport();
948
86
      break;
949
132k
    case tok::identifier:
950
132k
      if (Result.getIdentifierInfo()->isModulesImport()) {
951
50
        ImportSeqState.handleImport();
952
50
        if (ImportSeqState.afterImportSeq()) {
953
32
          ModuleImportLoc = Result.getLocation();
954
32
          ModuleImportPath.clear();
955
32
          ModuleImportExpectsIdentifier = true;
956
32
          CurLexerKind = CLK_LexAfterModuleImport;
957
32
        }
958
50
        break;
959
50
      }
960
132k
      LLVM_FALLTHROUGH;
961
317k
    default:
962
317k
      ImportSeqState.handleMisc();
963
317k
      break;
964
2.16G
    }
965
2.16G
  }
966
967
2.16G
  LastTokenWasAt = Result.is(tok::at);
968
2.16G
  --LexLevel;
969
970
2.16G
  if ((LexLevel == 0 || 
PreprocessToken1.13G
) &&
971
1.03G
      !Result.getFlag(Token::IsReinjected)) {
972
954M
    if (LexLevel == 0)
973
954M
      ++TokenCount;
974
954M
    if (OnToken)
975
52.9k
      OnToken(Result);
976
954M
  }
977
2.16G
}
978
979
/// Lex a header-name token (including one formed from header-name-tokens if
980
/// \p AllowConcatenation is \c true).
981
///
982
/// \param FilenameTok Filled in with the next token. On success, this will
983
///        be either a header_name token. On failure, it will be whatever other
984
///        token was found instead.
985
/// \param AllowMacroExpansion If \c true, allow the header name to be formed
986
///        by macro expansion (concatenating tokens as necessary if the first
987
///        token is a '<').
988
/// \return \c true if we reached EOD or EOF while looking for a > token in
989
///         a concatenated header name and diagnosed it. \c false otherwise.
990
2.38M
bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
991
  // Lex using header-name tokenization rules if tokens are being lexed from
992
  // a file. Just grab a token normally if we're in a macro expansion.
993
2.38M
  if (CurPPLexer)
994
2.38M
    CurPPLexer->LexIncludeFilename(FilenameTok);
995
21
  else
996
21
    Lex(FilenameTok);
997
998
  // This could be a <foo/bar.h> file coming from a macro expansion.  In this
999
  // case, glue the tokens together into an angle_string_literal token.
1000
2.38M
  SmallString<128> FilenameBuffer;
1001
2.38M
  if (FilenameTok.is(tok::less) && 
AllowMacroExpansion23
) {
1002
23
    bool StartOfLine = FilenameTok.isAtStartOfLine();
1003
23
    bool LeadingSpace = FilenameTok.hasLeadingSpace();
1004
23
    bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro();
1005
1006
23
    SourceLocation Start = FilenameTok.getLocation();
1007
23
    SourceLocation End;
1008
23
    FilenameBuffer.push_back('<');
1009
1010
    // Consume tokens until we find a '>'.
1011
    // FIXME: A header-name could be formed starting or ending with an
1012
    // alternative token. It's not clear whether that's ill-formed in all
1013
    // cases.
1014
109
    while (FilenameTok.isNot(tok::greater)) {
1015
91
      Lex(FilenameTok);
1016
91
      if (FilenameTok.isOneOf(tok::eod, tok::eof)) {
1017
5
        Diag(FilenameTok.getLocation(), diag::err_expected) << tok::greater;
1018
5
        Diag(Start, diag::note_matching) << tok::less;
1019
5
        return true;
1020
5
      }
1021
1022
86
      End = FilenameTok.getLocation();
1023
1024
      // FIXME: Provide code completion for #includes.
1025
86
      if (FilenameTok.is(tok::code_completion)) {
1026
0
        setCodeCompletionReached();
1027
0
        Lex(FilenameTok);
1028
0
        continue;
1029
0
      }
1030
1031
      // Append the spelling of this token to the buffer. If there was a space
1032
      // before it, add it now.
1033
86
      if (FilenameTok.hasLeadingSpace())
1034
4
        FilenameBuffer.push_back(' ');
1035
1036
      // Get the spelling of the token, directly into FilenameBuffer if
1037
      // possible.
1038
86
      size_t PreAppendSize = FilenameBuffer.size();
1039
86
      FilenameBuffer.resize(PreAppendSize + FilenameTok.getLength());
1040
1041
86
      const char *BufPtr = &FilenameBuffer[PreAppendSize];
1042
86
      unsigned ActualLen = getSpelling(FilenameTok, BufPtr);
1043
1044
      // If the token was spelled somewhere else, copy it into FilenameBuffer.
1045
86
      if (BufPtr != &FilenameBuffer[PreAppendSize])
1046
86
        memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen);
1047
1048
      // Resize FilenameBuffer to the correct size.
1049
86
      if (FilenameTok.getLength() != ActualLen)
1050
0
        FilenameBuffer.resize(PreAppendSize + ActualLen);
1051
86
    }
1052
1053
18
    FilenameTok.startToken();
1054
18
    FilenameTok.setKind(tok::header_name);
1055
18
    FilenameTok.setFlagValue(Token::StartOfLine, StartOfLine);
1056
18
    FilenameTok.setFlagValue(Token::LeadingSpace, LeadingSpace);
1057
18
    FilenameTok.setFlagValue(Token::LeadingEmptyMacro, LeadingEmptyMacro);
1058
18
    CreateString(FilenameBuffer, FilenameTok, Start, End);
1059
2.38M
  } else if (FilenameTok.is(tok::string_literal) && 
AllowMacroExpansion72
) {
1060
    // Convert a string-literal token of the form " h-char-sequence "
1061
    // (produced by macro expansion) into a header-name token.
1062
    //
1063
    // The rules for header-names don't quite match the rules for
1064
    // string-literals, but all the places where they differ result in
1065
    // undefined behavior, so we can and do treat them the same.
1066
    //
1067
    // A string-literal with a prefix or suffix is not translated into a
1068
    // header-name. This could theoretically be observable via the C++20
1069
    // context-sensitive header-name formation rules.
1070
71
    StringRef Str = getSpelling(FilenameTok, FilenameBuffer);
1071
71
    if (Str.size() >= 2 && Str.front() == '"' && Str.back() == '"')
1072
71
      FilenameTok.setKind(tok::header_name);
1073
71
  }
1074
1075
2.38M
  return false;
1076
2.38M
}
1077
1078
/// Collect the tokens of a C++20 pp-import-suffix.
1079
39
void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) {
1080
  // FIXME: For error recovery, consider recognizing attribute syntax here
1081
  // and terminating / diagnosing a missing semicolon if we find anything
1082
  // else? (Can we leave that to the parser?)
1083
39
  unsigned BracketDepth = 0;
1084
104
  while (true) {
1085
104
    Toks.emplace_back();
1086
104
    Lex(Toks.back());
1087
1088
104
    switch (Toks.back().getKind()) {
1089
16
    case tok::l_paren: case tok::l_square: case tok::l_brace:
1090
16
      ++BracketDepth;
1091
16
      break;
1092
1093
28
    case tok::r_paren: case tok::r_square: case tok::r_brace:
1094
28
      if (BracketDepth == 0)
1095
12
        return;
1096
16
      --BracketDepth;
1097
16
      break;
1098
1099
26
    case tok::semi:
1100
26
      if (BracketDepth == 0)
1101
26
        return;
1102
0
    break;
1103
1104
1
    case tok::eof:
1105
1
      return;
1106
1107
33
    default:
1108
33
      break;
1109
104
    }
1110
104
  }
1111
39
}
1112
1113
1114
/// Lex a token following the 'import' contextual keyword.
1115
///
1116
///     pp-import: [C++20]
1117
///           import header-name pp-import-suffix[opt] ;
1118
///           import header-name-tokens pp-import-suffix[opt] ;
1119
/// [ObjC]    @ import module-name ;
1120
/// [Clang]   import module-name ;
1121
///
1122
///     header-name-tokens:
1123
///           string-literal
1124
///           < [any sequence of preprocessing-tokens other than >] >
1125
///
1126
///     module-name:
1127
///           module-name-qualifier[opt] identifier
1128
///
1129
///     module-name-qualifier
1130
///           module-name-qualifier[opt] identifier .
1131
///
1132
/// We respond to a pp-import by importing macros from the named module.
1133
3.99k
bool Preprocessor::LexAfterModuleImport(Token &Result) {
1134
  // Figure out what kind of lexer we actually have.
1135
3.99k
  recomputeCurLexerKind();
1136
1137
  // Lex the next token. The header-name lexing rules are used at the start of
1138
  // a pp-import.
1139
  //
1140
  // For now, we only support header-name imports in C++20 mode.
1141
  // FIXME: Should we allow this in all language modes that support an import
1142
  // declaration as an extension?
1143
3.99k
  if (ModuleImportPath.empty() && 
getLangOpts().CPlusPlusModules1.74k
) {
1144
32
    if (LexHeaderName(Result))
1145
0
      return true;
1146
3.96k
  } else {
1147
3.96k
    Lex(Result);
1148
3.96k
  }
1149
1150
  // Allocate a holding buffer for a sequence of tokens and introduce it into
1151
  // the token stream.
1152
3.99k
  auto EnterTokens = [this](ArrayRef<Token> Toks) {
1153
39
    auto ToksCopy = std::make_unique<Token[]>(Toks.size());
1154
39
    std::copy(Toks.begin(), Toks.end(), ToksCopy.get());
1155
39
    EnterTokenStream(std::move(ToksCopy), Toks.size(),
1156
39
                     /*DisableMacroExpansion*/ true, /*IsReinject*/ false);
1157
39
  };
1158
1159
  // Check for a header-name.
1160
3.99k
  SmallVector<Token, 32> Suffix;
1161
3.99k
  if (Result.is(tok::header_name)) {
1162
    // Enter the header-name token into the token stream; a Lex action cannot
1163
    // both return a token and cache tokens (doing so would corrupt the token
1164
    // cache if the call to Lex comes from CachingLex / PeekAhead).
1165
24
    Suffix.push_back(Result);
1166
1167
    // Consume the pp-import-suffix and expand any macros in it now. We'll add
1168
    // it back into the token stream later.
1169
24
    CollectPpImportSuffix(Suffix);
1170
24
    if (Suffix.back().isNot(tok::semi)) {
1171
      // This is not a pp-import after all.
1172
0
      EnterTokens(Suffix);
1173
0
      return false;
1174
0
    }
1175
1176
    // C++2a [cpp.module]p1:
1177
    //   The ';' preprocessing-token terminating a pp-import shall not have
1178
    //   been produced by macro replacement.
1179
24
    SourceLocation SemiLoc = Suffix.back().getLocation();
1180
24
    if (SemiLoc.isMacroID())
1181
1
      Diag(SemiLoc, diag::err_header_import_semi_in_macro);
1182
1183
    // Reconstitute the import token.
1184
24
    Token ImportTok;
1185
24
    ImportTok.startToken();
1186
24
    ImportTok.setKind(tok::kw_import);
1187
24
    ImportTok.setLocation(ModuleImportLoc);
1188
24
    ImportTok.setIdentifierInfo(getIdentifierInfo("import"));
1189
24
    ImportTok.setLength(6);
1190
1191
24
    auto Action = HandleHeaderIncludeOrImport(
1192
24
        /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc);
1193
24
    switch (Action.Kind) {
1194
15
    case ImportAction::None:
1195
15
      break;
1196
1197
0
    case ImportAction::ModuleBegin:
1198
      // Let the parser know we're textually entering the module.
1199
0
      Suffix.emplace_back();
1200
0
      Suffix.back().startToken();
1201
0
      Suffix.back().setKind(tok::annot_module_begin);
1202
0
      Suffix.back().setLocation(SemiLoc);
1203
0
      Suffix.back().setAnnotationEndLoc(SemiLoc);
1204
0
      Suffix.back().setAnnotationValue(Action.ModuleForHeader);
1205
0
      LLVM_FALLTHROUGH;
1206
1207
9
    case ImportAction::ModuleImport:
1208
9
    case ImportAction::SkippedModuleImport:
1209
      // We chose to import (or textually enter) the file. Convert the
1210
      // header-name token into a header unit annotation token.
1211
9
      Suffix[0].setKind(tok::annot_header_unit);
1212
9
      Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation());
1213
9
      Suffix[0].setAnnotationValue(Action.ModuleForHeader);
1214
      // FIXME: Call the moduleImport callback?
1215
9
      break;
1216
0
    case ImportAction::Failure:
1217
0
      assert(TheModuleLoader.HadFatalFailure &&
1218
0
             "This should be an early exit only to a fatal error");
1219
0
      Result.setKind(tok::eof);
1220
0
      CurLexer->cutOffLexing();
1221
0
      EnterTokens(Suffix);
1222
0
      return true;
1223
24
    }
1224
1225
24
    EnterTokens(Suffix);
1226
24
    return false;
1227
24
  }
1228
1229
  // The token sequence
1230
  //
1231
  //   import identifier (. identifier)*
1232
  //
1233
  // indicates a module import directive. We already saw the 'import'
1234
  // contextual keyword, so now we're looking for the identifiers.
1235
3.97k
  if (ModuleImportExpectsIdentifier && 
Result.getKind() == tok::identifier1.99k
) {
1236
    // We expected to see an identifier here, and we did; continue handling
1237
    // identifiers.
1238
1.98k
    ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
1239
1.98k
                                              Result.getLocation()));
1240
1.98k
    ModuleImportExpectsIdentifier = false;
1241
1.98k
    CurLexerKind = CLK_LexAfterModuleImport;
1242
1.98k
    return true;
1243
1.98k
  }
1244
1245
  // If we're expecting a '.' or a ';', and we got a '.', then wait until we
1246
  // see the next identifier. (We can also see a '[[' that begins an
1247
  // attribute-specifier-seq here under the C++ Modules TS.)
1248
1.99k
  if (!ModuleImportExpectsIdentifier && 
Result.getKind() == tok::period1.98k
) {
1249
270
    ModuleImportExpectsIdentifier = true;
1250
270
    CurLexerKind = CLK_LexAfterModuleImport;
1251
270
    return true;
1252
270
  }
1253
1254
  // If we didn't recognize a module name at all, this is not a (valid) import.
1255
1.72k
  if (ModuleImportPath.empty() || 
Result.is(tok::eof)1.71k
)
1256
8
    return true;
1257
1258
  // Consume the pp-import-suffix and expand any macros in it now, if we're not
1259
  // at the semicolon already.
1260
1.71k
  SourceLocation SemiLoc = Result.getLocation();
1261
1.71k
  if (Result.isNot(tok::semi)) {
1262
15
    Suffix.push_back(Result);
1263
15
    CollectPpImportSuffix(Suffix);
1264
15
    if (Suffix.back().isNot(tok::semi)) {
1265
      // This is not an import after all.
1266
13
      EnterTokens(Suffix);
1267
13
      return false;
1268
13
    }
1269
2
    SemiLoc = Suffix.back().getLocation();
1270
2
  }
1271
1272
  // Under the Modules TS, the dot is just part of the module name, and not
1273
  // a real hierarchy separator. Flatten such module names now.
1274
  //
1275
  // FIXME: Is this the right level to be performing this transformation?
1276
1.70k
  std::string FlatModuleName;
1277
1.70k
  if (getLangOpts().ModulesTS || 
getLangOpts().CPlusPlusModules1.64k
) {
1278
72
    for (auto &Piece : ModuleImportPath) {
1279
72
      if (!FlatModuleName.empty())
1280
7
        FlatModuleName += ".";
1281
72
      FlatModuleName += Piece.first->getName();
1282
72
    }
1283
65
    SourceLocation FirstPathLoc = ModuleImportPath[0].second;
1284
65
    ModuleImportPath.clear();
1285
65
    ModuleImportPath.push_back(
1286
65
        std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
1287
65
  }
1288
1289
1.70k
  Module *Imported = nullptr;
1290
1.70k
  if (getLangOpts().Modules) {
1291
1.66k
    Imported = TheModuleLoader.loadModule(ModuleImportLoc,
1292
1.66k
                                          ModuleImportPath,
1293
1.66k
                                          Module::Hidden,
1294
1.66k
                                          /*IsInclusionDirective=*/false);
1295
1.66k
    if (Imported)
1296
1.57k
      makeModuleVisible(Imported, SemiLoc);
1297
1.66k
  }
1298
1.70k
  if (Callbacks)
1299
1.69k
    Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
1300
1301
1.70k
  if (!Suffix.empty()) {
1302
2
    EnterTokens(Suffix);
1303
2
    return false;
1304
2
  }
1305
1.70k
  return true;
1306
1.70k
}
1307
1308
84.8k
void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
1309
84.8k
  CurSubmoduleState->VisibleModules.setVisible(
1310
607k
      M, Loc, [](Module *) {},
1311
1
      [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
1312
        // FIXME: Include the path in the diagnostic.
1313
        // FIXME: Include the import location for the conflicting module.
1314
1
        Diag(ModuleImportLoc, diag::warn_module_conflict)
1315
1
            << Path[0]->getFullModuleName()
1316
1
            << Conflict->getFullModuleName()
1317
1
            << Message;
1318
1
      });
1319
1320
  // Add this module to the imports list of the currently-built submodule.
1321
84.8k
  if (!BuildingSubmoduleStack.empty() && 
M != BuildingSubmoduleStack.back().M64.4k
)
1322
62.7k
    BuildingSubmoduleStack.back().M->Imports.insert(M);
1323
84.8k
}
1324
1325
bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
1326
                                          const char *DiagnosticTag,
1327
24.6k
                                          bool AllowMacroExpansion) {
1328
  // We need at least one string literal.
1329
24.6k
  if (Result.isNot(tok::string_literal)) {
1330
15
    Diag(Result, diag::err_expected_string_literal)
1331
15
      << /*Source='in...'*/0 << DiagnosticTag;
1332
15
    return false;
1333
15
  }
1334
1335
  // Lex string literal tokens, optionally with macro expansion.
1336
24.5k
  SmallVector<Token, 4> StrToks;
1337
24.6k
  do {
1338
24.6k
    StrToks.push_back(Result);
1339
1340
24.6k
    if (Result.hasUDSuffix())
1341
4
      Diag(Result, diag::err_invalid_string_udl);
1342
1343
24.6k
    if (AllowMacroExpansion)
1344
139
      Lex(Result);
1345
24.4k
    else
1346
24.4k
      LexUnexpandedToken(Result);
1347
24.6k
  } while (Result.is(tok::string_literal));
1348
1349
  // Concatenate and parse the strings.
1350
24.5k
  StringLiteralParser Literal(StrToks, *this);
1351
24.5k
  assert(Literal.isAscii() && "Didn't allow wide strings in");
1352
1353
24.5k
  if (Literal.hadError)
1354
0
    return false;
1355
1356
24.5k
  if (Literal.Pascal) {
1357
0
    Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
1358
0
      << /*Source='in...'*/0 << DiagnosticTag;
1359
0
    return false;
1360
0
  }
1361
1362
24.5k
  String = std::string(Literal.GetString());
1363
24.5k
  return true;
1364
24.5k
}
1365
1366
101
bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
1367
101
  assert(Tok.is(tok::numeric_constant));
1368
101
  SmallString<8> IntegerBuffer;
1369
101
  bool NumberInvalid = false;
1370
101
  StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
1371
101
  if (NumberInvalid)
1372
0
    return false;
1373
101
  NumericLiteralParser Literal(Spelling, Tok.getLocation(), getSourceManager(),
1374
101
                               getLangOpts(), getTargetInfo(),
1375
101
                               getDiagnostics());
1376
101
  if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
1377
1
    return false;
1378
100
  llvm::APInt APVal(64, 0);
1379
100
  if (Literal.GetIntegerValue(APVal))
1380
0
    return false;
1381
100
  Lex(Tok);
1382
100
  Value = APVal.getLimitedValue();
1383
100
  return true;
1384
100
}
1385
1386
93.3k
void Preprocessor::addCommentHandler(CommentHandler *Handler) {
1387
93.3k
  assert(Handler && "NULL comment handler");
1388
93.3k
  assert(llvm::find(CommentHandlers, Handler) == CommentHandlers.end() &&
1389
93.3k
         "Comment handler already registered");
1390
93.3k
  CommentHandlers.push_back(Handler);
1391
93.3k
}
1392
1393
93.2k
void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
1394
93.2k
  std::vector<CommentHandler *>::iterator Pos =
1395
93.2k
      llvm::find(CommentHandlers, Handler);
1396
93.2k
  assert(Pos != CommentHandlers.end() && "Comment handler not registered");
1397
93.2k
  CommentHandlers.erase(Pos);
1398
93.2k
}
1399
1400
57.7M
bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
1401
57.7M
  bool AnyPendingTokens = false;
1402
57.7M
  for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
1403
57.7M
       HEnd = CommentHandlers.end();
1404
102M
       H != HEnd; 
++H45.0M
) {
1405
45.0M
    if ((*H)->HandleComment(*this, Comment))
1406
0
      AnyPendingTokens = true;
1407
45.0M
  }
1408
57.7M
  if (!AnyPendingTokens || 
getCommentRetentionState()0
)
1409
57.7M
    return false;
1410
0
  Lex(result);
1411
0
  return true;
1412
0
}
1413
1414
80.9k
ModuleLoader::~ModuleLoader() = default;
1415
1416
93.2k
CommentHandler::~CommentHandler() = default;
1417
1418
64
EmptylineHandler::~EmptylineHandler() = default;
1419
1420
76.5k
CodeCompletionHandler::~CodeCompletionHandler() = default;
1421
1422
2.07k
void Preprocessor::createPreprocessingRecord() {
1423
2.07k
  if (Record)
1424
0
    return;
1425
1426
2.07k
  Record = new PreprocessingRecord(getSourceManager());
1427
2.07k
  addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
1428
2.07k
}