Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/clang/lib/Lex/Preprocessor.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
//  This file implements the Preprocessor interface.
10
//
11
//===----------------------------------------------------------------------===//
12
//
13
// Options to support:
14
//   -H       - Print the name of each header file used.
15
//   -d[DNI] - Dump various things.
16
//   -fworking-directory - #line's with preprocessor's working dir.
17
//   -fpreprocessed
18
//   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
19
//   -W*
20
//   -w
21
//
22
// Messages to emit:
23
//   "Multiple include guards may be useful for:\n"
24
//
25
//===----------------------------------------------------------------------===//
26
27
#include "clang/Lex/Preprocessor.h"
28
#include "clang/Basic/FileManager.h"
29
#include "clang/Basic/FileSystemStatCache.h"
30
#include "clang/Basic/IdentifierTable.h"
31
#include "clang/Basic/LLVM.h"
32
#include "clang/Basic/LangOptions.h"
33
#include "clang/Basic/Module.h"
34
#include "clang/Basic/SourceLocation.h"
35
#include "clang/Basic/SourceManager.h"
36
#include "clang/Basic/TargetInfo.h"
37
#include "clang/Lex/CodeCompletionHandler.h"
38
#include "clang/Lex/ExternalPreprocessorSource.h"
39
#include "clang/Lex/HeaderSearch.h"
40
#include "clang/Lex/LexDiagnostic.h"
41
#include "clang/Lex/Lexer.h"
42
#include "clang/Lex/LiteralSupport.h"
43
#include "clang/Lex/MacroArgs.h"
44
#include "clang/Lex/MacroInfo.h"
45
#include "clang/Lex/ModuleLoader.h"
46
#include "clang/Lex/Pragma.h"
47
#include "clang/Lex/PreprocessingRecord.h"
48
#include "clang/Lex/PreprocessorLexer.h"
49
#include "clang/Lex/PreprocessorOptions.h"
50
#include "clang/Lex/ScratchBuffer.h"
51
#include "clang/Lex/Token.h"
52
#include "clang/Lex/TokenLexer.h"
53
#include "llvm/ADT/APInt.h"
54
#include "llvm/ADT/ArrayRef.h"
55
#include "llvm/ADT/DenseMap.h"
56
#include "llvm/ADT/SmallString.h"
57
#include "llvm/ADT/SmallVector.h"
58
#include "llvm/ADT/STLExtras.h"
59
#include "llvm/ADT/StringRef.h"
60
#include "llvm/ADT/StringSwitch.h"
61
#include "llvm/Support/Capacity.h"
62
#include "llvm/Support/ErrorHandling.h"
63
#include "llvm/Support/MemoryBuffer.h"
64
#include "llvm/Support/raw_ostream.h"
65
#include <algorithm>
66
#include <cassert>
67
#include <memory>
68
#include <string>
69
#include <utility>
70
#include <vector>
71
72
using namespace clang;
73
74
LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
75
76
5.27k
ExternalPreprocessorSource::~ExternalPreprocessorSource() = default;
77
78
Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
79
                           DiagnosticsEngine &diags, LangOptions &opts,
80
                           SourceManager &SM, HeaderSearch &Headers,
81
                           ModuleLoader &TheModuleLoader,
82
                           IdentifierInfoLookup *IILookup, bool OwnsHeaders,
83
                           TranslationUnitKind TUKind)
84
    : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts),
85
      FileMgr(Headers.getFileMgr()), SourceMgr(SM),
86
      ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
87
      TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
88
      // As the language options may have not been loaded yet (when
89
      // deserializing an ASTUnit), adding keywords to the identifier table is
90
      // deferred to Preprocessor::Initialize().
91
      Identifiers(IILookup), PragmaHandlers(new PragmaNamespace(StringRef())),
92
      TUKind(TUKind), SkipMainFilePreamble(0, true),
93
44.2k
      CurSubmoduleState(&NullSubmoduleState) {
94
44.2k
  OwnsHeaderSearch = OwnsHeaders;
95
44.2k
96
44.2k
  // Default to discarding comments.
97
44.2k
  KeepComments = false;
98
44.2k
  KeepMacroComments = false;
99
44.2k
  SuppressIncludeNotFoundError = false;
100
44.2k
101
44.2k
  // Macro expansion is enabled.
102
44.2k
  DisableMacroExpansion = false;
103
44.2k
  MacroExpansionInDirectivesOverride = false;
104
44.2k
  InMacroArgs = false;
105
44.2k
  ArgMacro = nullptr;
106
44.2k
  InMacroArgPreExpansion = false;
107
44.2k
  NumCachedTokenLexers = 0;
108
44.2k
  PragmasEnabled = true;
109
44.2k
  ParsingIfOrElifDirective = false;
110
44.2k
  PreprocessedOutput = false;
111
44.2k
112
44.2k
  // We haven't read anything from the external source.
113
44.2k
  ReadMacrosFromExternalSource = false;
114
44.2k
115
44.2k
  // "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of
116
44.2k
  // a macro. They get unpoisoned where it is allowed.
117
44.2k
  (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
118
44.2k
  SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
119
44.2k
  if (getLangOpts().CPlusPlus2a) {
120
223
    (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned();
121
223
    SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use);
122
44.0k
  } else {
123
44.0k
    Ident__VA_OPT__ = nullptr;
124
44.0k
  }
125
44.2k
126
44.2k
  // Initialize the pragma handlers.
127
44.2k
  RegisterBuiltinPragmas();
128
44.2k
129
44.2k
  // Initialize builtin macros like __LINE__ and friends.
130
44.2k
  RegisterBuiltinMacros();
131
44.2k
132
44.2k
  if(LangOpts.Borland) {
133
6
    Ident__exception_info        = getIdentifierInfo("_exception_info");
134
6
    Ident___exception_info       = getIdentifierInfo("__exception_info");
135
6
    Ident_GetExceptionInfo       = getIdentifierInfo("GetExceptionInformation");
136
6
    Ident__exception_code        = getIdentifierInfo("_exception_code");
137
6
    Ident___exception_code       = getIdentifierInfo("__exception_code");
138
6
    Ident_GetExceptionCode       = getIdentifierInfo("GetExceptionCode");
139
6
    Ident__abnormal_termination  = getIdentifierInfo("_abnormal_termination");
140
6
    Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
141
6
    Ident_AbnormalTermination    = getIdentifierInfo("AbnormalTermination");
142
44.2k
  } else {
143
44.2k
    Ident__exception_info = Ident__exception_code = nullptr;
144
44.2k
    Ident__abnormal_termination = Ident___exception_info = nullptr;
145
44.2k
    Ident___exception_code = Ident___abnormal_termination = nullptr;
146
44.2k
    Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
147
44.2k
    Ident_AbnormalTermination = nullptr;
148
44.2k
  }
149
44.2k
150
44.2k
  // If using a PCH where a #pragma hdrstop is expected, start skipping tokens.
151
44.2k
  if (usingPCHWithPragmaHdrStop())
152
8
    SkippingUntilPragmaHdrStop = true;
153
44.2k
154
44.2k
  // If using a PCH with a through header, start skipping tokens.
155
44.2k
  if (!this->PPOpts->PCHThroughHeader.empty() &&
156
44.2k
      
!this->PPOpts->ImplicitPCHInclude.empty()28
)
157
13
    SkippingUntilPCHThroughHeader = true;
158
44.2k
159
44.2k
  if (this->PPOpts->GeneratePreamble)
160
89
    PreambleConditionalStack.startRecording();
161
44.2k
}
162
163
34.0k
Preprocessor::~Preprocessor() {
164
34.0k
  assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
165
34.0k
166
34.0k
  IncludeMacroStack.clear();
167
34.0k
168
34.0k
  // Destroy any macro definitions.
169
12.8M
  while (MacroInfoChain *I = MIChainHead) {
170
12.7M
    MIChainHead = I->Next;
171
12.7M
    I->~MacroInfoChain();
172
12.7M
  }
173
34.0k
174
34.0k
  // Free any cached macro expanders.
175
34.0k
  // This populates MacroArgCache, so all TokenLexers need to be destroyed
176
34.0k
  // before the code below that frees up the MacroArgCache list.
177
34.0k
  std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
178
34.0k
  CurTokenLexer.reset();
179
34.0k
180
34.0k
  // Free any cached MacroArgs.
181
41.2k
  for (MacroArgs *ArgList = MacroArgCache; ArgList;)
182
7.24k
    ArgList = ArgList->deallocate();
183
34.0k
184
34.0k
  // Delete the header search info, if we own it.
185
34.0k
  if (OwnsHeaderSearch)
186
33.8k
    delete &HeaderInfo;
187
34.0k
}
188
189
void Preprocessor::Initialize(const TargetInfo &Target,
190
44.2k
                              const TargetInfo *AuxTarget) {
191
44.2k
  assert((!this->Target || this->Target == &Target) &&
192
44.2k
         "Invalid override of target information");
193
44.2k
  this->Target = &Target;
194
44.2k
195
44.2k
  assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
196
44.2k
         "Invalid override of aux target information.");
197
44.2k
  this->AuxTarget = AuxTarget;
198
44.2k
199
44.2k
  // Initialize information about built-ins.
200
44.2k
  BuiltinInfo.InitializeTarget(Target, AuxTarget);
201
44.2k
  HeaderInfo.setTarget(Target);
202
44.2k
203
44.2k
  // Populate the identifier table with info about keywords for the current language.
204
44.2k
  Identifiers.AddKeywords(LangOpts);
205
44.2k
}
206
207
2
void Preprocessor::InitializeForModelFile() {
208
2
  NumEnteredSourceFiles = 0;
209
2
210
2
  // Reset pragmas
211
2
  PragmaHandlersBackup = std::move(PragmaHandlers);
212
2
  PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
213
2
  RegisterBuiltinPragmas();
214
2
215
2
  // Reset PredefinesFileID
216
2
  PredefinesFileID = FileID();
217
2
}
218
219
2
void Preprocessor::FinalizeForModelFile() {
220
2
  NumEnteredSourceFiles = 1;
221
2
222
2
  PragmaHandlers = std::move(PragmaHandlersBackup);
223
2
}
224
225
7
void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
226
7
  llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
227
7
               << getSpelling(Tok) << "'";
228
7
229
7
  if (!DumpFlags) 
return0
;
230
7
231
7
  llvm::errs() << "\t";
232
7
  if (Tok.isAtStartOfLine())
233
3
    llvm::errs() << " [StartOfLine]";
234
7
  if (Tok.hasLeadingSpace())
235
0
    llvm::errs() << " [LeadingSpace]";
236
7
  if (Tok.isExpandDisabled())
237
0
    llvm::errs() << " [ExpandDisabled]";
238
7
  if (Tok.needsCleaning()) {
239
0
    const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
240
0
    llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
241
0
                 << "']";
242
0
  }
243
7
244
7
  llvm::errs() << "\tLoc=<";
245
7
  DumpLocation(Tok.getLocation());
246
7
  llvm::errs() << ">";
247
7
}
248
249
7
void Preprocessor::DumpLocation(SourceLocation Loc) const {
250
7
  Loc.print(llvm::errs(), SourceMgr);
251
7
}
252
253
0
void Preprocessor::DumpMacro(const MacroInfo &MI) const {
254
0
  llvm::errs() << "MACRO: ";
255
0
  for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
256
0
    DumpToken(MI.getReplacementToken(i));
257
0
    llvm::errs() << "  ";
258
0
  }
259
0
  llvm::errs() << "\n";
260
0
}
261
262
3
void Preprocessor::PrintStats() {
263
3
  llvm::errs() << "\n*** Preprocessor Stats:\n";
264
3
  llvm::errs() << NumDirectives << " directives found:\n";
265
3
  llvm::errs() << "  " << NumDefined << " #define.\n";
266
3
  llvm::errs() << "  " << NumUndefined << " #undef.\n";
267
3
  llvm::errs() << "  #include/#include_next/#import:\n";
268
3
  llvm::errs() << "    " << NumEnteredSourceFiles << " source files entered.\n";
269
3
  llvm::errs() << "    " << MaxIncludeStackDepth << " max include stack depth\n";
270
3
  llvm::errs() << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
271
3
  llvm::errs() << "  " << NumElse << " #else/#elif.\n";
272
3
  llvm::errs() << "  " << NumEndif << " #endif.\n";
273
3
  llvm::errs() << "  " << NumPragma << " #pragma.\n";
274
3
  llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
275
3
276
3
  llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
277
3
             << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
278
3
             << NumFastMacroExpanded << " on the fast path.\n";
279
3
  llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
280
3
             << " token paste (##) operations performed, "
281
3
             << NumFastTokenPaste << " on the fast path.\n";
282
3
283
3
  llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
284
3
285
3
  llvm::errs() << "\n  BumpPtr: " << BP.getTotalMemory();
286
3
  llvm::errs() << "\n  Macro Expanded Tokens: "
287
3
               << llvm::capacity_in_bytes(MacroExpandedTokens);
288
3
  llvm::errs() << "\n  Predefines Buffer: " << Predefines.capacity();
289
3
  // FIXME: List information for all submodules.
290
3
  llvm::errs() << "\n  Macros: "
291
3
               << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
292
3
  llvm::errs() << "\n  #pragma push_macro Info: "
293
3
               << llvm::capacity_in_bytes(PragmaPushMacroInfo);
294
3
  llvm::errs() << "\n  Poison Reasons: "
295
3
               << llvm::capacity_in_bytes(PoisonReasons);
296
3
  llvm::errs() << "\n  Comment Handlers: "
297
3
               << llvm::capacity_in_bytes(CommentHandlers) << "\n";
298
3
}
299
300
Preprocessor::macro_iterator
301
1.69k
Preprocessor::macro_begin(bool IncludeExternalMacros) const {
302
1.69k
  if (IncludeExternalMacros && 
ExternalSource1.69k
&&
303
1.69k
      
!ReadMacrosFromExternalSource76
) {
304
74
    ReadMacrosFromExternalSource = true;
305
74
    ExternalSource->ReadDefinedMacros();
306
74
  }
307
1.69k
308
1.69k
  // Make sure we cover all macros in visible modules.
309
1.69k
  for (const ModuleMacro &Macro : ModuleMacros)
310
5
    CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
311
1.69k
312
1.69k
  return CurSubmoduleState->Macros.begin();
313
1.69k
}
314
315
4
size_t Preprocessor::getTotalMemory() const {
316
4
  return BP.getTotalMemory()
317
4
    + llvm::capacity_in_bytes(MacroExpandedTokens)
318
4
    + Predefines.capacity() /* Predefines buffer. */
319
4
    // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
320
4
    // and ModuleMacros.
321
4
    + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
322
4
    + llvm::capacity_in_bytes(PragmaPushMacroInfo)
323
4
    + llvm::capacity_in_bytes(PoisonReasons)
324
4
    + llvm::capacity_in_bytes(CommentHandlers);
325
4
}
326
327
Preprocessor::macro_iterator
328
1.69k
Preprocessor::macro_end(bool IncludeExternalMacros) const {
329
1.69k
  if (IncludeExternalMacros && 
ExternalSource1.69k
&&
330
1.69k
      
!ReadMacrosFromExternalSource76
) {
331
0
    ReadMacrosFromExternalSource = true;
332
0
    ExternalSource->ReadDefinedMacros();
333
0
  }
334
1.69k
335
1.69k
  return CurSubmoduleState->Macros.end();
336
1.69k
}
337
338
/// Compares macro tokens with a specified token value sequence.
339
static bool MacroDefinitionEquals(const MacroInfo *MI,
340
56.1k
                                  ArrayRef<TokenValue> Tokens) {
341
56.1k
  return Tokens.size() == MI->getNumTokens() &&
342
56.1k
      
std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin())5.59k
;
343
56.1k
}
344
345
StringRef Preprocessor::getLastMacroWithSpelling(
346
                                    SourceLocation Loc,
347
141
                                    ArrayRef<TokenValue> Tokens) const {
348
141
  SourceLocation BestLocation;
349
141
  StringRef BestSpelling;
350
141
  for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
351
56.5k
       I != E; 
++I56.4k
) {
352
56.4k
    const MacroDirective::DefInfo
353
56.4k
      Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
354
56.4k
    if (!Def || 
!Def.getMacroInfo()56.1k
)
355
271
      continue;
356
56.1k
    if (!Def.getMacroInfo()->isObjectLike())
357
25
      continue;
358
56.1k
    if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
359
56.0k
      continue;
360
62
    SourceLocation Location = Def.getLocation();
361
62
    // Choose the macro defined latest.
362
62
    if (BestLocation.isInvalid() ||
363
62
        
(13
Location.isValid()13
&&
364
56
         
SourceMgr.isBeforeInTranslationUnit(BestLocation, Location)13
)) {
365
56
      BestLocation = Location;
366
56
      BestSpelling = I->first->getName();
367
56
    }
368
62
  }
369
141
  return BestSpelling;
370
141
}
371
372
4.00M
void Preprocessor::recomputeCurLexerKind() {
373
4.00M
  if (CurLexer)
374
2.42k
    CurLexerKind = CLK_Lexer;
375
4.00M
  else if (CurTokenLexer)
376
9
    CurLexerKind = CLK_TokenLexer;
377
4.00M
  else
378
4.00M
    CurLexerKind = CLK_CachingLexer;
379
4.00M
}
380
381
bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
382
                                          unsigned CompleteLine,
383
1.10k
                                          unsigned CompleteColumn) {
384
1.10k
  assert(File);
385
1.10k
  assert(CompleteLine && CompleteColumn && "Starts from 1:1");
386
1.10k
  assert(!CodeCompletionFile && "Already set");
387
1.10k
388
1.10k
  using llvm::MemoryBuffer;
389
1.10k
390
1.10k
  // Load the actual file's contents.
391
1.10k
  bool Invalid = false;
392
1.10k
  const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
393
1.10k
  if (Invalid)
394
0
    return true;
395
1.10k
396
1.10k
  // Find the byte position of the truncation point.
397
1.10k
  const char *Position = Buffer->getBufferStart();
398
29.9k
  for (unsigned Line = 1; Line < CompleteLine; 
++Line28.8k
) {
399
725k
    for (; *Position; 
++Position696k
) {
400
725k
      if (*Position != '\r' && *Position != '\n')
401
696k
        continue;
402
28.8k
403
28.8k
      // Eat \r\n or \n\r as a single line.
404
28.8k
      if ((Position[1] == '\r' || Position[1] == '\n') &&
405
28.8k
          
Position[0] != Position[1]4.48k
)
406
0
        ++Position;
407
28.8k
      ++Position;
408
28.8k
      break;
409
28.8k
    }
410
28.8k
  }
411
1.10k
412
1.10k
  Position += CompleteColumn - 1;
413
1.10k
414
1.10k
  // If pointing inside the preamble, adjust the position at the beginning of
415
1.10k
  // the file after the preamble.
416
1.10k
  if (SkipMainFilePreamble.first &&
417
1.10k
      
SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File75
) {
418
75
    if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
419
5
      Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
420
75
  }
421
1.10k
422
1.10k
  if (Position > Buffer->getBufferEnd())
423
0
    Position = Buffer->getBufferEnd();
424
1.10k
425
1.10k
  CodeCompletionFile = File;
426
1.10k
  CodeCompletionOffset = Position - Buffer->getBufferStart();
427
1.10k
428
1.10k
  auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
429
1.10k
      Buffer->getBufferSize() + 1, Buffer->getBufferIdentifier());
430
1.10k
  char *NewBuf = NewBuffer->getBufferStart();
431
1.10k
  char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
432
1.10k
  *NewPos = '\0';
433
1.10k
  std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
434
1.10k
  SourceMgr.overrideFileContents(File, std::move(NewBuffer));
435
1.10k
436
1.10k
  return false;
437
1.10k
}
438
439
void Preprocessor::CodeCompleteIncludedFile(llvm::StringRef Dir,
440
9
                                            bool IsAngled) {
441
9
  if (CodeComplete)
442
9
    CodeComplete->CodeCompleteIncludedFile(Dir, IsAngled);
443
9
  setCodeCompletionReached();
444
9
}
445
446
49
void Preprocessor::CodeCompleteNaturalLanguage() {
447
49
  if (CodeComplete)
448
49
    CodeComplete->CodeCompleteNaturalLanguage();
449
49
  setCodeCompletionReached();
450
49
}
451
452
/// getSpelling - This method is used to get the spelling of a token into a
453
/// SmallVector. Note that the returned StringRef may not point to the
454
/// supplied buffer if a copy can be avoided.
455
StringRef Preprocessor::getSpelling(const Token &Tok,
456
                                          SmallVectorImpl<char> &Buffer,
457
8.29M
                                          bool *Invalid) const {
458
8.29M
  // NOTE: this has to be checked *before* testing for an IdentifierInfo.
459
8.29M
  if (Tok.isNot(tok::raw_identifier) && 
!Tok.hasUCN()8.27M
) {
460
8.27M
    // Try the fast path.
461
8.27M
    if (const IdentifierInfo *II = Tok.getIdentifierInfo())
462
101k
      return II->getName();
463
8.18M
  }
464
8.18M
465
8.18M
  // Resize the buffer if we need to copy into it.
466
8.18M
  if (Tok.needsCleaning())
467
12.3k
    Buffer.resize(Tok.getLength());
468
8.18M
469
8.18M
  const char *Ptr = Buffer.data();
470
8.18M
  unsigned Len = getSpelling(Tok, Ptr, Invalid);
471
8.18M
  return StringRef(Ptr, Len);
472
8.18M
}
473
474
/// CreateString - Plop the specified string into a scratch buffer and return a
475
/// location for it.  If specified, the source location provides a source
476
/// location for the token.
477
void Preprocessor::CreateString(StringRef Str, Token &Tok,
478
                                SourceLocation ExpansionLocStart,
479
3.87M
                                SourceLocation ExpansionLocEnd) {
480
3.87M
  Tok.setLength(Str.size());
481
3.87M
482
3.87M
  const char *DestPtr;
483
3.87M
  SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
484
3.87M
485
3.87M
  if (ExpansionLocStart.isValid())
486
2.77M
    Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
487
2.77M
                                       ExpansionLocEnd, Str.size());
488
3.87M
  Tok.setLocation(Loc);
489
3.87M
490
3.87M
  // If this is a raw identifier or a literal token, set the pointer data.
491
3.87M
  if (Tok.is(tok::raw_identifier))
492
24.7k
    Tok.setRawIdentifierData(DestPtr);
493
3.85M
  else if (Tok.isLiteral())
494
3.81M
    Tok.setLiteralData(DestPtr);
495
3.87M
}
496
497
19.1k
SourceLocation Preprocessor::SplitToken(SourceLocation Loc, unsigned Length) {
498
19.1k
  auto &SM = getSourceManager();
499
19.1k
  SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
500
19.1k
  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(SpellingLoc);
501
19.1k
  bool Invalid = false;
502
19.1k
  StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
503
19.1k
  if (Invalid)
504
0
    return SourceLocation();
505
19.1k
506
19.1k
  // FIXME: We could consider re-using spelling for tokens we see repeatedly.
507
19.1k
  const char *DestPtr;
508
19.1k
  SourceLocation Spelling =
509
19.1k
      ScratchBuf->getToken(Buffer.data() + LocInfo.second, Length, DestPtr);
510
19.1k
  return SM.createTokenSplitLoc(Spelling, Loc, Loc.getLocWithOffset(Length));
511
19.1k
}
512
513
498k
Module *Preprocessor::getCurrentModule() {
514
498k
  if (!getLangOpts().isCompilingModule())
515
478k
    return nullptr;
516
20.7k
517
20.7k
  return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
518
20.7k
}
519
520
//===----------------------------------------------------------------------===//
521
// Preprocessor Initialization Methods
522
//===----------------------------------------------------------------------===//
523
524
/// EnterMainSourceFile - Enter the specified FileID as the main source file,
525
/// which implicitly adds the builtin defines etc.
526
44.0k
void Preprocessor::EnterMainSourceFile() {
527
44.0k
  // We do not allow the preprocessor to reenter the main file.  Doing so will
528
44.0k
  // cause FileID's to accumulate information from both runs (e.g. #line
529
44.0k
  // information) and predefined macros aren't guaranteed to be set properly.
530
44.0k
  assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
531
44.0k
  FileID MainFileID = SourceMgr.getMainFileID();
532
44.0k
533
44.0k
  // If MainFileID is loaded it means we loaded an AST file, no need to enter
534
44.0k
  // a main file.
535
44.0k
  if (!SourceMgr.isLoadedFileID(MainFileID)) {
536
43.9k
    // Enter the main file source buffer.
537
43.9k
    EnterSourceFile(MainFileID, nullptr, SourceLocation());
538
43.9k
539
43.9k
    // If we've been asked to skip bytes in the main file (e.g., as part of a
540
43.9k
    // precompiled preamble), do so now.
541
43.9k
    if (SkipMainFilePreamble.first > 0)
542
415
      CurLexer->SetByteOffset(SkipMainFilePreamble.first,
543
415
                              SkipMainFilePreamble.second);
544
43.9k
545
43.9k
    // Tell the header info that the main file was entered.  If the file is later
546
43.9k
    // #imported, it won't be re-entered.
547
43.9k
    if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
548
42.5k
      HeaderInfo.IncrementIncludeCount(FE);
549
43.9k
  }
550
44.0k
551
44.0k
  // Preprocess Predefines to populate the initial preprocessor state.
552
44.0k
  std::unique_ptr<llvm::MemoryBuffer> SB =
553
44.0k
    llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
554
44.0k
  assert(SB && "Cannot create predefined source buffer");
555
44.0k
  FileID FID = SourceMgr.createFileID(std::move(SB));
556
44.0k
  assert(FID.isValid() && "Could not create FileID for predefines?");
557
44.0k
  setPredefinesFileID(FID);
558
44.0k
559
44.0k
  // Start parsing the predefines.
560
44.0k
  EnterSourceFile(FID, nullptr, SourceLocation());
561
44.0k
562
44.0k
  if (!PPOpts->PCHThroughHeader.empty()) {
563
28
    // Lookup and save the FileID for the through header. If it isn't found
564
28
    // in the search path, it's a fatal error.
565
28
    const DirectoryLookup *CurDir;
566
28
    const FileEntry *File = LookupFile(
567
28
        SourceLocation(), PPOpts->PCHThroughHeader,
568
28
        /*isAngled=*/false, /*FromDir=*/nullptr, /*FromFile=*/nullptr, CurDir,
569
28
        /*SearchPath=*/nullptr, /*RelativePath=*/nullptr,
570
28
        /*SuggestedModule=*/nullptr, /*IsMapped=*/nullptr,
571
28
        /*IsFrameworkFound=*/nullptr);
572
28
    if (!File) {
573
2
      Diag(SourceLocation(), diag::err_pp_through_header_not_found)
574
2
          << PPOpts->PCHThroughHeader;
575
2
      return;
576
2
    }
577
26
    setPCHThroughHeaderFileID(
578
26
        SourceMgr.createFileID(File, SourceLocation(), SrcMgr::C_User));
579
26
  }
580
44.0k
581
44.0k
  // Skip tokens from the Predefines and if needed the main file.
582
44.0k
  
if (44.0k
(44.0k
usingPCHWithThroughHeader()44.0k
&&
SkippingUntilPCHThroughHeader13
) ||
583
44.0k
      
(43.9k
usingPCHWithPragmaHdrStop()43.9k
&&
SkippingUntilPragmaHdrStop8
))
584
21
    SkipTokensWhileUsingPCH();
585
44.0k
}
586
587
26
void Preprocessor::setPCHThroughHeaderFileID(FileID FID) {
588
26
  assert(PCHThroughHeaderFileID.isInvalid() &&
589
26
         "PCHThroughHeaderFileID already set!");
590
26
  PCHThroughHeaderFileID = FID;
591
26
}
592
593
41
bool Preprocessor::isPCHThroughHeader(const FileEntry *FE) {
594
41
  assert(PCHThroughHeaderFileID.isValid() &&
595
41
         "Invalid PCH through header FileID");
596
41
  return FE == SourceMgr.getFileEntryForID(PCHThroughHeaderFileID);
597
41
}
598
599
601k
bool Preprocessor::creatingPCHWithThroughHeader() {
600
601k
  return TUKind == TU_Prefix && 
!PPOpts->PCHThroughHeader.empty()4.55k
&&
601
601k
         
PCHThroughHeaderFileID.isValid()40
;
602
601k
}
603
604
1.04M
bool Preprocessor::usingPCHWithThroughHeader() {
605
1.04M
  return TUKind != TU_Prefix && 
!PPOpts->PCHThroughHeader.empty()1.04M
&&
606
1.04M
         
PCHThroughHeaderFileID.isValid()39
;
607
1.04M
}
608
609
7
bool Preprocessor::creatingPCHWithPragmaHdrStop() {
610
7
  return TUKind == TU_Prefix && 
PPOpts->PCHWithHdrStop2
;
611
7
}
612
613
88.2k
bool Preprocessor::usingPCHWithPragmaHdrStop() {
614
88.2k
  return TUKind != TU_Prefix && 
PPOpts->PCHWithHdrStop83.8k
;
615
88.2k
}
616
617
/// Skip tokens until after the #include of the through header or
618
/// until after a #pragma hdrstop is seen. Tokens in the predefines file
619
/// and the main file may be skipped. If the end of the predefines file
620
/// is reached, skipping continues into the main file. If the end of the
621
/// main file is reached, it's a fatal error.
622
21
void Preprocessor::SkipTokensWhileUsingPCH() {
623
21
  bool ReachedMainFileEOF = false;
624
21
  bool UsingPCHThroughHeader = SkippingUntilPCHThroughHeader;
625
21
  bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop;
626
21
  Token Tok;
627
191
  while (true) {
628
191
    bool InPredefines =
629
191
        (CurLexer && 
CurLexer->getFileID() == getPredefinesFileID()187
);
630
191
    switch (CurLexerKind) {
631
191
    case CLK_Lexer:
632
187
      CurLexer->Lex(Tok);
633
187
     break;
634
191
    case CLK_TokenLexer:
635
4
      CurTokenLexer->Lex(Tok);
636
4
      break;
637
191
    case CLK_CachingLexer:
638
0
      CachingLex(Tok);
639
0
      break;
640
191
    case CLK_LexAfterModuleImport:
641
0
      LexAfterModuleImport(Tok);
642
0
      break;
643
191
    }
644
191
    if (Tok.is(tok::eof) && 
!InPredefines4
) {
645
4
      ReachedMainFileEOF = true;
646
4
      break;
647
4
    }
648
187
    if (UsingPCHThroughHeader && 
!SkippingUntilPCHThroughHeader55
)
649
12
      break;
650
175
    if (UsingPragmaHdrStop && 
!SkippingUntilPragmaHdrStop132
)
651
5
      break;
652
175
  }
653
21
  if (ReachedMainFileEOF) {
654
4
    if (UsingPCHThroughHeader)
655
1
      Diag(SourceLocation(), diag::err_pp_through_header_not_seen)
656
1
          << PPOpts->PCHThroughHeader << 1;
657
3
    else if (!PPOpts->PCHWithHdrStopCreate)
658
1
      Diag(SourceLocation(), diag::err_pp_pragma_hdrstop_not_seen);
659
4
  }
660
21
}
661
662
43.9k
void Preprocessor::replayPreambleConditionalStack() {
663
43.9k
  // Restore the conditional stack from the preamble, if there is one.
664
43.9k
  if (PreambleConditionalStack.isReplaying()) {
665
28
    assert(CurPPLexer &&
666
28
           "CurPPLexer is null when calling replayPreambleConditionalStack.");
667
28
    CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
668
28
    PreambleConditionalStack.doneReplaying();
669
28
    if (PreambleConditionalStack.reachedEOFWhileSkipping())
670
18
      SkipExcludedConditionalBlock(
671
18
          PreambleConditionalStack.SkipInfo->HashTokenLoc,
672
18
          PreambleConditionalStack.SkipInfo->IfTokenLoc,
673
18
          PreambleConditionalStack.SkipInfo->FoundNonSkipPortion,
674
18
          PreambleConditionalStack.SkipInfo->FoundElse,
675
18
          PreambleConditionalStack.SkipInfo->ElseLoc);
676
28
  }
677
43.9k
}
678
679
43.7k
void Preprocessor::EndSourceFile() {
680
43.7k
  // Notify the client that we reached the end of the source file.
681
43.7k
  if (Callbacks)
682
42.5k
    Callbacks->EndOfMainFile();
683
43.7k
}
684
685
//===----------------------------------------------------------------------===//
686
// Lexer Event Handling.
687
//===----------------------------------------------------------------------===//
688
689
/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
690
/// identifier information for the token and install it into the token,
691
/// updating the token kind accordingly.
692
228M
IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
693
228M
  assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
694
228M
695
228M
  // Look up this token, see if it is a macro, or if it is a language keyword.
696
228M
  IdentifierInfo *II;
697
228M
  if (!Identifier.needsCleaning() && 
!Identifier.hasUCN()228M
) {
698
228M
    // No cleaning needed, just use the characters from the lexed buffer.
699
228M
    II = getIdentifierInfo(Identifier.getRawIdentifier());
700
228M
  } else {
701
12.3k
    // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
702
12.3k
    SmallString<64> IdentifierBuffer;
703
12.3k
    StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
704
12.3k
705
12.3k
    if (Identifier.hasUCN()) {
706
122
      SmallString<64> UCNIdentifierBuffer;
707
122
      expandUCNs(UCNIdentifierBuffer, CleanedStr);
708
122
      II = getIdentifierInfo(UCNIdentifierBuffer);
709
12.2k
    } else {
710
12.2k
      II = getIdentifierInfo(CleanedStr);
711
12.2k
    }
712
12.3k
  }
713
228M
714
228M
  // Update the token info (identifier info and appropriate token kind).
715
228M
  Identifier.setIdentifierInfo(II);
716
228M
  if (getLangOpts().MSVCCompat && 
II->isCPlusPlusOperatorKeyword()2.14M
&&
717
228M
      
getSourceManager().isInSystemHeader(Identifier.getLocation())6
)
718
3
    Identifier.setKind(tok::identifier);
719
228M
  else
720
228M
    Identifier.setKind(II->getTokenID());
721
228M
722
228M
  return II;
723
228M
}
724
725
44.5k
void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
726
44.5k
  PoisonReasons[II] = DiagID;
727
44.5k
}
728
729
0
void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
730
0
  assert(Ident__exception_code && Ident__exception_info);
731
0
  assert(Ident___exception_code && Ident___exception_info);
732
0
  Ident__exception_code->setIsPoisoned(Poison);
733
0
  Ident___exception_code->setIsPoisoned(Poison);
734
0
  Ident_GetExceptionCode->setIsPoisoned(Poison);
735
0
  Ident__exception_info->setIsPoisoned(Poison);
736
0
  Ident___exception_info->setIsPoisoned(Poison);
737
0
  Ident_GetExceptionInfo->setIsPoisoned(Poison);
738
0
  Ident__abnormal_termination->setIsPoisoned(Poison);
739
0
  Ident___abnormal_termination->setIsPoisoned(Poison);
740
0
  Ident_AbnormalTermination->setIsPoisoned(Poison);
741
0
}
742
743
38
void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
744
38
  assert(Identifier.getIdentifierInfo() &&
745
38
         "Can't handle identifiers without identifier info!");
746
38
  llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
747
38
    PoisonReasons.find(Identifier.getIdentifierInfo());
748
38
  if(it == PoisonReasons.end())
749
4
    Diag(Identifier, diag::err_pp_used_poisoned_id);
750
34
  else
751
34
    Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
752
38
}
753
754
/// Returns a diagnostic message kind for reporting a future keyword as
755
/// appropriate for the identifier and specified language.
756
static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
757
301
                                          const LangOptions &LangOpts) {
758
301
  assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
759
301
760
301
  if (LangOpts.CPlusPlus)
761
301
    return llvm::StringSwitch<diag::kind>(II.getName())
762
301
#define CXX11_KEYWORD(NAME, FLAGS)                                             \
763
3.01k
        .Case(#NAME, diag::warn_cxx11_keyword)
764
301
#define CXX2A_KEYWORD(NAME, FLAGS)                                             \
765
2.10k
        .Case(#NAME, diag::warn_cxx2a_keyword)
766
301
#include "clang/Basic/TokenKinds.def"
767
0
        ;
768
0
769
0
  llvm_unreachable(
770
0
      "Keyword not known to come from a newer Standard or proposed Standard");
771
0
}
772
773
5.20k
void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const {
774
5.20k
  assert(II.isOutOfDate() && "not out of date");
775
5.20k
  getExternalSource()->updateOutOfDateIdentifier(II);
776
5.20k
}
777
778
/// HandleIdentifier - This callback is invoked when the lexer reads an
779
/// identifier.  This callback looks up the identifier in the map and/or
780
/// potentially macro expands it or turns it into a named token (like 'for').
781
///
782
/// Note that callers of this method are guarded by checking the
783
/// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
784
/// IdentifierInfo methods that compute these properties will need to change to
785
/// match.
786
25.8M
bool Preprocessor::HandleIdentifier(Token &Identifier) {
787
25.8M
  assert(Identifier.getIdentifierInfo() &&
788
25.8M
         "Can't handle identifiers without identifier info!");
789
25.8M
790
25.8M
  IdentifierInfo &II = *Identifier.getIdentifierInfo();
791
25.8M
792
25.8M
  // If the information about this identifier is out of date, update it from
793
25.8M
  // the external source.
794
25.8M
  // We have to treat __VA_ARGS__ in a special way, since it gets
795
25.8M
  // serialized with isPoisoned = true, but our preprocessor may have
796
25.8M
  // unpoisoned it if we're defining a C99 macro.
797
25.8M
  if (II.isOutOfDate()) {
798
4.66k
    bool CurrentIsPoisoned = false;
799
4.66k
    const bool IsSpecialVariadicMacro =
800
4.66k
        &II == Ident__VA_ARGS__ || 
&II == Ident__VA_OPT__4.66k
;
801
4.66k
    if (IsSpecialVariadicMacro)
802
1
      CurrentIsPoisoned = II.isPoisoned();
803
4.66k
804
4.66k
    updateOutOfDateIdentifier(II);
805
4.66k
    Identifier.setKind(II.getTokenID());
806
4.66k
807
4.66k
    if (IsSpecialVariadicMacro)
808
1
      II.setIsPoisoned(CurrentIsPoisoned);
809
4.66k
  }
810
25.8M
811
25.8M
  // If this identifier was poisoned, and if it was not produced from a macro
812
25.8M
  // expansion, emit an error.
813
25.8M
  if (II.isPoisoned() && 
CurPPLexer40
) {
814
36
    HandlePoisonedIdentifier(Identifier);
815
36
  }
816
25.8M
817
25.8M
  // If this is a macro to be expanded, do it.
818
25.8M
  if (MacroDefinition MD = getMacroDefinition(&II)) {
819
25.8M
    auto *MI = MD.getMacroInfo();
820
25.8M
    assert(MI && "macro definition with no macro info?");
821
25.8M
    if (!DisableMacroExpansion) {
822
19.6M
      if (!Identifier.isExpandDisabled() && 
MI->isEnabled()19.6M
) {
823
19.6M
        // C99 6.10.3p10: If the preprocessing token immediately after the
824
19.6M
        // macro name isn't a '(', this macro should not be expanded.
825
19.6M
        if (!MI->isFunctionLike() || 
isNextPPTokenLParen()4.64M
)
826
19.5M
          return HandleMacroExpandedIdentifier(Identifier, MD);
827
3.38k
      } else {
828
3.38k
        // C99 6.10.3.4p2 says that a disabled macro may never again be
829
3.38k
        // expanded, even if it's in a context where it could be expanded in the
830
3.38k
        // future.
831
3.38k
        Identifier.setFlag(Token::DisableExpand);
832
3.38k
        if (MI->isObjectLike() || 
isNextPPTokenLParen()450
)
833
2.96k
          Diag(Identifier, diag::pp_disabled_macro_expansion);
834
3.38k
      }
835
19.6M
    }
836
25.8M
  }
837
25.8M
838
25.8M
  // If this identifier is a keyword in a newer Standard or proposed Standard,
839
25.8M
  // produce a warning. Don't warn if we're not considering macro expansion,
840
25.8M
  // since this identifier might be the name of a macro.
841
25.8M
  // FIXME: This warning is disabled in cases where it shouldn't be, like
842
25.8M
  //   "#define constexpr constexpr", "int constexpr;"
843
25.8M
  
if (6.35M
II.isFutureCompatKeyword()6.35M
&&
!DisableMacroExpansion1.25k
) {
844
301
    Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
845
301
        << II.getName();
846
301
    // Don't diagnose this keyword again in this translation unit.
847
301
    II.setIsFutureCompatKeyword(false);
848
301
  }
849
6.35M
850
6.35M
  // If this is an extension token, diagnose its use.
851
6.35M
  // We avoid diagnosing tokens that originate from macro definitions.
852
6.35M
  // FIXME: This warning is disabled in cases where it shouldn't be,
853
6.35M
  // like "#define TY typeof", "TY(1) x".
854
6.35M
  if (II.isExtensionToken() && 
!DisableMacroExpansion5.16k
)
855
4.96k
    Diag(Identifier, diag::ext_token_used);
856
6.35M
857
6.35M
  // If this is the 'import' contextual keyword following an '@', note
858
6.35M
  // that the next token indicates a module name.
859
6.35M
  //
860
6.35M
  // Note that we do not treat 'import' as a contextual
861
6.35M
  // keyword when we're in a caching lexer, because caching lexers only get
862
6.35M
  // used in contexts where import declarations are disallowed.
863
6.35M
  //
864
6.35M
  // Likewise if this is the C++ Modules TS import keyword.
865
6.35M
  if (((LastTokenWasAt && 
II.isModulesImport()1.12k
) ||
866
6.35M
       
Identifier.is(tok::kw_import)6.35M
) &&
867
6.35M
      
!InMacroArgs951
&&
!DisableMacroExpansion950
&&
868
6.35M
      
(940
getLangOpts().Modules940
||
getLangOpts().DebuggerSupport4
) &&
869
6.35M
      
CurLexerKind != CLK_CachingLexer937
) {
870
937
    ModuleImportLoc = Identifier.getLocation();
871
937
    ModuleImportPath.clear();
872
937
    ModuleImportExpectsIdentifier = true;
873
937
    CurLexerKind = CLK_LexAfterModuleImport;
874
937
  }
875
6.35M
  return true;
876
25.8M
}
877
878
902M
void Preprocessor::Lex(Token &Result) {
879
902M
  ++LexLevel;
880
902M
881
902M
  // We loop here until a lex function returns a token; this avoids recursion.
882
902M
  bool ReturnedToken;
883
968M
  do {
884
968M
    switch (CurLexerKind) {
885
968M
    case CLK_Lexer:
886
682M
      ReturnedToken = CurLexer->Lex(Result);
887
682M
      break;
888
968M
    case CLK_TokenLexer:
889
157M
      ReturnedToken = CurTokenLexer->Lex(Result);
890
157M
      break;
891
968M
    case CLK_CachingLexer:
892
128M
      CachingLex(Result);
893
128M
      ReturnedToken = true;
894
128M
      break;
895
968M
    case CLK_LexAfterModuleImport:
896
2.43k
      ReturnedToken = LexAfterModuleImport(Result);
897
2.43k
      break;
898
968M
    }
899
968M
  } while (!ReturnedToken);
900
902M
901
902M
  
if (902M
Result.is(tok::code_completion)902M
&&
Result.getIdentifierInfo()2.95k
) {
902
39
    // Remember the identifier before code completion token.
903
39
    setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
904
39
    setCodeCompletionTokenRange(Result.getLocation(), Result.getEndLoc());
905
39
    // Set IdenfitierInfo to null to avoid confusing code that handles both
906
39
    // identifiers and completion tokens.
907
39
    Result.setIdentifierInfo(nullptr);
908
39
  }
909
902M
910
902M
  // Update ImportSeqState to track our position within a C++20 import-seq
911
902M
  // if this token is being produced as a result of phase 4 of translation.
912
902M
  if (getLangOpts().CPlusPlusModules && 
LexLevel == 1584k
&&
913
902M
      
!Result.getFlag(Token::IsReinjected)177k
) {
914
132k
    switch (Result.getKind()) {
915
132k
    
case tok::l_paren: 18.5k
case tok::l_square: 18.5k
case tok::l_brace:
916
18.5k
      ImportSeqState.handleOpenBracket();
917
18.5k
      break;
918
18.5k
    
case tok::r_paren: 14.0k
case tok::r_square:
919
14.0k
      ImportSeqState.handleCloseBracket();
920
14.0k
      break;
921
14.0k
    case tok::r_brace:
922
4.46k
      ImportSeqState.handleCloseBrace();
923
4.46k
      break;
924
14.0k
    case tok::semi:
925
10.8k
      ImportSeqState.handleSemi();
926
10.8k
      break;
927
14.0k
    case tok::header_name:
928
24
    case tok::annot_header_unit:
929
24
      ImportSeqState.handleHeaderName();
930
24
      break;
931
86
    case tok::kw_export:
932
86
      ImportSeqState.handleExport();
933
86
      break;
934
26.1k
    case tok::identifier:
935
26.1k
      if (Result.getIdentifierInfo()->isModulesImport()) {
936
50
        ImportSeqState.handleImport();
937
50
        if (ImportSeqState.afterImportSeq()) {
938
32
          ModuleImportLoc = Result.getLocation();
939
32
          ModuleImportPath.clear();
940
32
          ModuleImportExpectsIdentifier = true;
941
32
          CurLexerKind = CLK_LexAfterModuleImport;
942
32
        }
943
50
        break;
944
50
      }
945
26.1k
      LLVM_FALLTHROUGH;
946
83.9k
    default:
947
83.9k
      ImportSeqState.handleMisc();
948
83.9k
      break;
949
902M
    }
950
902M
  }
951
902M
952
902M
  LastTokenWasAt = Result.is(tok::at);
953
902M
  --LexLevel;
954
902M
  if (OnToken && 
LexLevel == 042.5k
&&
!Result.getFlag(Token::IsReinjected)358
)
955
272
    OnToken(Result);
956
902M
}
957
958
/// Lex a header-name token (including one formed from header-name-tokens if
959
/// \p AllowConcatenation is \c true).
960
///
961
/// \param FilenameTok Filled in with the next token. On success, this will
962
///        be either a header_name token. On failure, it will be whatever other
963
///        token was found instead.
964
/// \param AllowMacroExpansion If \c true, allow the header name to be formed
965
///        by macro expansion (concatenating tokens as necessary if the first
966
///        token is a '<').
967
/// \return \c true if we reached EOD or EOF while looking for a > token in
968
///         a concatenated header name and diagnosed it. \c false otherwise.
969
1.04M
bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
970
1.04M
  // Lex using header-name tokenization rules if tokens are being lexed from
971
1.04M
  // a file. Just grab a token normally if we're in a macro expansion.
972
1.04M
  if (CurPPLexer)
973
1.04M
    CurPPLexer->LexIncludeFilename(FilenameTok);
974
8
  else
975
8
    Lex(FilenameTok);
976
1.04M
977
1.04M
  // This could be a <foo/bar.h> file coming from a macro expansion.  In this
978
1.04M
  // case, glue the tokens together into an angle_string_literal token.
979
1.04M
  SmallString<128> FilenameBuffer;
980
1.04M
  if (FilenameTok.is(tok::less) && 
AllowMacroExpansion21
) {
981
21
    bool StartOfLine = FilenameTok.isAtStartOfLine();
982
21
    bool LeadingSpace = FilenameTok.hasLeadingSpace();
983
21
    bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro();
984
21
985
21
    SourceLocation Start = FilenameTok.getLocation();
986
21
    SourceLocation End;
987
21
    FilenameBuffer.push_back('<');
988
21
989
21
    // Consume tokens until we find a '>'.
990
21
    // FIXME: A header-name could be formed starting or ending with an
991
21
    // alternative token. It's not clear whether that's ill-formed in all
992
21
    // cases.
993
95
    while (FilenameTok.isNot(tok::greater)) {
994
79
      Lex(FilenameTok);
995
79
      if (FilenameTok.isOneOf(tok::eod, tok::eof)) {
996
5
        Diag(FilenameTok.getLocation(), diag::err_expected) << tok::greater;
997
5
        Diag(Start, diag::note_matching) << tok::less;
998
5
        return true;
999
5
      }
1000
74
1001
74
      End = FilenameTok.getLocation();
1002
74
1003
74
      // FIXME: Provide code completion for #includes.
1004
74
      if (FilenameTok.is(tok::code_completion)) {
1005
0
        setCodeCompletionReached();
1006
0
        Lex(FilenameTok);
1007
0
        continue;
1008
0
      }
1009
74
1010
74
      // Append the spelling of this token to the buffer. If there was a space
1011
74
      // before it, add it now.
1012
74
      if (FilenameTok.hasLeadingSpace())
1013
4
        FilenameBuffer.push_back(' ');
1014
74
1015
74
      // Get the spelling of the token, directly into FilenameBuffer if
1016
74
      // possible.
1017
74
      size_t PreAppendSize = FilenameBuffer.size();
1018
74
      FilenameBuffer.resize(PreAppendSize + FilenameTok.getLength());
1019
74
1020
74
      const char *BufPtr = &FilenameBuffer[PreAppendSize];
1021
74
      unsigned ActualLen = getSpelling(FilenameTok, BufPtr);
1022
74
1023
74
      // If the token was spelled somewhere else, copy it into FilenameBuffer.
1024
74
      if (BufPtr != &FilenameBuffer[PreAppendSize])
1025
74
        memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen);
1026
74
1027
74
      // Resize FilenameBuffer to the correct size.
1028
74
      if (FilenameTok.getLength() != ActualLen)
1029
0
        FilenameBuffer.resize(PreAppendSize + ActualLen);
1030
74
    }
1031
21
1032
21
    FilenameTok.startToken();
1033
16
    FilenameTok.setKind(tok::header_name);
1034
16
    FilenameTok.setFlagValue(Token::StartOfLine, StartOfLine);
1035
16
    FilenameTok.setFlagValue(Token::LeadingSpace, LeadingSpace);
1036
16
    FilenameTok.setFlagValue(Token::LeadingEmptyMacro, LeadingEmptyMacro);
1037
16
    CreateString(FilenameBuffer, FilenameTok, Start, End);
1038
1.04M
  } else if (FilenameTok.is(tok::string_literal) && 
AllowMacroExpansion64
) {
1039
63
    // Convert a string-literal token of the form " h-char-sequence "
1040
63
    // (produced by macro expansion) into a header-name token.
1041
63
    //
1042
63
    // The rules for header-names don't quite match the rules for
1043
63
    // string-literals, but all the places where they differ result in
1044
63
    // undefined behavior, so we can and do treat them the same.
1045
63
    //
1046
63
    // A string-literal with a prefix or suffix is not translated into a
1047
63
    // header-name. This could theoretically be observable via the C++20
1048
63
    // context-sensitive header-name formation rules.
1049
63
    StringRef Str = getSpelling(FilenameTok, FilenameBuffer);
1050
63
    if (Str.size() >= 2 && Str.front() == '"' && Str.back() == '"')
1051
63
      FilenameTok.setKind(tok::header_name);
1052
63
  }
1053
1.04M
1054
1.04M
  
return false1.04M
;
1055
1.04M
}
1056
1057
/// Collect the tokens of a C++20 pp-import-suffix.
1058
39
void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) {
1059
39
  // FIXME: For error recovery, consider recognizing attribute syntax here
1060
39
  // and terminating / diagnosing a missing semicolon if we find anything
1061
39
  // else? (Can we leave that to the parser?)
1062
39
  unsigned BracketDepth = 0;
1063
104
  while (true) {
1064
104
    Toks.emplace_back();
1065
104
    Lex(Toks.back());
1066
104
1067
104
    switch (Toks.back().getKind()) {
1068
104
    
case tok::l_paren: 16
case tok::l_square: 16
case tok::l_brace:
1069
16
      ++BracketDepth;
1070
16
      break;
1071
16
1072
28
    case tok::r_paren: case tok::r_square: case tok::r_brace:
1073
28
      if (BracketDepth == 0)
1074
12
        return;
1075
16
      --BracketDepth;
1076
16
      break;
1077
16
1078
26
    case tok::semi:
1079
26
      if (BracketDepth == 0)
1080
26
        return;
1081
0
    break;
1082
0
1083
1
    case tok::eof:
1084
1
      return;
1085
0
1086
33
    default:
1087
33
      break;
1088
104
    }
1089
104
  }
1090
39
}
1091
1092
1093
/// Lex a token following the 'import' contextual keyword.
1094
///
1095
///     pp-import: [C++20]
1096
///           import header-name pp-import-suffix[opt] ;
1097
///           import header-name-tokens pp-import-suffix[opt] ;
1098
/// [ObjC]    @ import module-name ;
1099
/// [Clang]   import module-name ;
1100
///
1101
///     header-name-tokens:
1102
///           string-literal
1103
///           < [any sequence of preprocessing-tokens other than >] >
1104
///
1105
///     module-name:
1106
///           module-name-qualifier[opt] identifier
1107
///
1108
///     module-name-qualifier
1109
///           module-name-qualifier[opt] identifier .
1110
///
1111
/// We respond to a pp-import by importing macros from the named module.
1112
2.43k
bool Preprocessor::LexAfterModuleImport(Token &Result) {
1113
2.43k
  // Figure out what kind of lexer we actually have.
1114
2.43k
  recomputeCurLexerKind();
1115
2.43k
1116
2.43k
  // Lex the next token. The header-name lexing rules are used at the start of
1117
2.43k
  // a pp-import.
1118
2.43k
  //
1119
2.43k
  // For now, we only support header-name imports in C++20 mode.
1120
2.43k
  // FIXME: Should we allow this in all language modes that support an import
1121
2.43k
  // declaration as an extension?
1122
2.43k
  if (ModuleImportPath.empty() && 
getLangOpts().CPlusPlusModules969
) {
1123
32
    if (LexHeaderName(Result))
1124
0
      return true;
1125
2.40k
  } else {
1126
2.40k
    Lex(Result);
1127
2.40k
  }
1128
2.43k
1129
2.43k
  // Allocate a holding buffer for a sequence of tokens and introduce it into
1130
2.43k
  // the token stream.
1131
2.43k
  auto EnterTokens = [this](ArrayRef<Token> Toks) {
1132
39
    auto ToksCopy = llvm::make_unique<Token[]>(Toks.size());
1133
39
    std::copy(Toks.begin(), Toks.end(), ToksCopy.get());
1134
39
    EnterTokenStream(std::move(ToksCopy), Toks.size(),
1135
39
                     /*DisableMacroExpansion*/ true, /*IsReinject*/ false);
1136
39
  };
1137
2.43k
1138
2.43k
  // Check for a header-name.
1139
2.43k
  SmallVector<Token, 32> Suffix;
1140
2.43k
  if (Result.is(tok::header_name)) {
1141
24
    // Enter the header-name token into the token stream; a Lex action cannot
1142
24
    // both return a token and cache tokens (doing so would corrupt the token
1143
24
    // cache if the call to Lex comes from CachingLex / PeekAhead).
1144
24
    Suffix.push_back(Result);
1145
24
1146
24
    // Consume the pp-import-suffix and expand any macros in it now. We'll add
1147
24
    // it back into the token stream later.
1148
24
    CollectPpImportSuffix(Suffix);
1149
24
    if (Suffix.back().isNot(tok::semi)) {
1150
0
      // This is not a pp-import after all.
1151
0
      EnterTokens(Suffix);
1152
0
      return false;
1153
0
    }
1154
24
1155
24
    // C++2a [cpp.module]p1:
1156
24
    //   The ';' preprocessing-token terminating a pp-import shall not have
1157
24
    //   been produced by macro replacement.
1158
24
    SourceLocation SemiLoc = Suffix.back().getLocation();
1159
24
    if (SemiLoc.isMacroID())
1160
1
      Diag(SemiLoc, diag::err_header_import_semi_in_macro);
1161
24
1162
24
    // Reconstitute the import token.
1163
24
    Token ImportTok;
1164
24
    ImportTok.startToken();
1165
24
    ImportTok.setKind(tok::kw_import);
1166
24
    ImportTok.setLocation(ModuleImportLoc);
1167
24
    ImportTok.setIdentifierInfo(getIdentifierInfo("import"));
1168
24
    ImportTok.setLength(6);
1169
24
1170
24
    auto Action = HandleHeaderIncludeOrImport(
1171
24
        /*HashLoc*/ SourceLocation(), ImportTok, Suffix.front(), SemiLoc);
1172
24
    switch (Action.Kind) {
1173
24
    case ImportAction::None:
1174
15
      break;
1175
24
1176
24
    case ImportAction::ModuleBegin:
1177
0
      // Let the parser know we're textually entering the module.
1178
0
      Suffix.emplace_back();
1179
0
      Suffix.back().startToken();
1180
0
      Suffix.back().setKind(tok::annot_module_begin);
1181
0
      Suffix.back().setLocation(SemiLoc);
1182
0
      Suffix.back().setAnnotationEndLoc(SemiLoc);
1183
0
      Suffix.back().setAnnotationValue(Action.ModuleForHeader);
1184
0
      LLVM_FALLTHROUGH;
1185
0
1186
9
    case ImportAction::ModuleImport:
1187
9
    case ImportAction::SkippedModuleImport:
1188
9
      // We chose to import (or textually enter) the file. Convert the
1189
9
      // header-name token into a header unit annotation token.
1190
9
      Suffix[0].setKind(tok::annot_header_unit);
1191
9
      Suffix[0].setAnnotationEndLoc(Suffix[0].getLocation());
1192
9
      Suffix[0].setAnnotationValue(Action.ModuleForHeader);
1193
9
      // FIXME: Call the moduleImport callback?
1194
9
      break;
1195
24
    }
1196
24
1197
24
    EnterTokens(Suffix);
1198
24
    return false;
1199
24
  }
1200
2.41k
1201
2.41k
  // The token sequence
1202
2.41k
  //
1203
2.41k
  //   import identifier (. identifier)*
1204
2.41k
  //
1205
2.41k
  // indicates a module import directive. We already saw the 'import'
1206
2.41k
  // contextual keyword, so now we're looking for the identifiers.
1207
2.41k
  if (ModuleImportExpectsIdentifier && 
Result.getKind() == tok::identifier1.21k
) {
1208
1.20k
    // We expected to see an identifier here, and we did; continue handling
1209
1.20k
    // identifiers.
1210
1.20k
    ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
1211
1.20k
                                              Result.getLocation()));
1212
1.20k
    ModuleImportExpectsIdentifier = false;
1213
1.20k
    CurLexerKind = CLK_LexAfterModuleImport;
1214
1.20k
    return true;
1215
1.20k
  }
1216
1.21k
1217
1.21k
  // If we're expecting a '.' or a ';', and we got a '.', then wait until we
1218
1.21k
  // see the next identifier. (We can also see a '[[' that begins an
1219
1.21k
  // attribute-specifier-seq here under the C++ Modules TS.)
1220
1.21k
  if (!ModuleImportExpectsIdentifier && 
Result.getKind() == tok::period1.20k
) {
1221
268
    ModuleImportExpectsIdentifier = true;
1222
268
    CurLexerKind = CLK_LexAfterModuleImport;
1223
268
    return true;
1224
268
  }
1225
945
1226
945
  // If we didn't recognize a module name at all, this is not a (valid) import.
1227
945
  if (ModuleImportPath.empty() || 
Result.is(tok::eof)937
)
1228
8
    return true;
1229
937
1230
937
  // Consume the pp-import-suffix and expand any macros in it now, if we're not
1231
937
  // at the semicolon already.
1232
937
  SourceLocation SemiLoc = Result.getLocation();
1233
937
  if (Result.isNot(tok::semi)) {
1234
15
    Suffix.push_back(Result);
1235
15
    CollectPpImportSuffix(Suffix);
1236
15
    if (Suffix.back().isNot(tok::semi)) {
1237
13
      // This is not an import after all.
1238
13
      EnterTokens(Suffix);
1239
13
      return false;
1240
13
    }
1241
2
    SemiLoc = Suffix.back().getLocation();
1242
2
  }
1243
937
1244
937
  // Under the Modules TS, the dot is just part of the module name, and not
1245
937
  // a real hierarchy separator. Flatten such module names now.
1246
937
  //
1247
937
  // FIXME: Is this the right level to be performing this transformation?
1248
937
  std::string FlatModuleName;
1249
924
  if (getLangOpts().ModulesTS || 
getLangOpts().CPlusPlusModules864
) {
1250
72
    for (auto &Piece : ModuleImportPath) {
1251
72
      if (!FlatModuleName.empty())
1252
7
        FlatModuleName += ".";
1253
72
      FlatModuleName += Piece.first->getName();
1254
72
    }
1255
65
    SourceLocation FirstPathLoc = ModuleImportPath[0].second;
1256
65
    ModuleImportPath.clear();
1257
65
    ModuleImportPath.push_back(
1258
65
        std::make_pair(getIdentifierInfo(FlatModuleName), FirstPathLoc));
1259
65
  }
1260
924
1261
924
  Module *Imported = nullptr;
1262
924
  if (getLangOpts().Modules) {
1263
924
    Imported = TheModuleLoader.loadModule(ModuleImportLoc,
1264
924
                                          ModuleImportPath,
1265
924
                                          Module::Hidden,
1266
924
                                          /*IsInclusionDirective=*/false);
1267
924
    if (Imported)
1268
831
      makeModuleVisible(Imported, SemiLoc);
1269
924
  }
1270
924
  if (Callbacks)
1271
916
    Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
1272
924
1273
924
  if (!Suffix.empty()) {
1274
2
    EnterTokens(Suffix);
1275
2
    return false;
1276
2
  }
1277
922
  return true;
1278
922
}
1279
1280
5.59k
void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
1281
5.59k
  CurSubmoduleState->VisibleModules.setVisible(
1282
6.79k
      M, Loc, [](Module *) {},
1283
5.59k
      [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
1284
1
        // FIXME: Include the path in the diagnostic.
1285
1
        // FIXME: Include the import location for the conflicting module.
1286
1
        Diag(ModuleImportLoc, diag::warn_module_conflict)
1287
1
            << Path[0]->getFullModuleName()
1288
1
            << Conflict->getFullModuleName()
1289
1
            << Message;
1290
1
      });
1291
5.59k
1292
5.59k
  // Add this module to the imports list of the currently-built submodule.
1293
5.59k
  if (!BuildingSubmoduleStack.empty() && 
M != BuildingSubmoduleStack.back().M1.53k
)
1294
1.20k
    BuildingSubmoduleStack.back().M->Imports.insert(M);
1295
5.59k
}
1296
1297
bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
1298
                                          const char *DiagnosticTag,
1299
529
                                          bool AllowMacroExpansion) {
1300
529
  // We need at least one string literal.
1301
529
  if (Result.isNot(tok::string_literal)) {
1302
15
    Diag(Result, diag::err_expected_string_literal)
1303
15
      << /*Source='in...'*/0 << DiagnosticTag;
1304
15
    return false;
1305
15
  }
1306
514
1307
514
  // Lex string literal tokens, optionally with macro expansion.
1308
514
  SmallVector<Token, 4> StrToks;
1309
543
  do {
1310
543
    StrToks.push_back(Result);
1311
543
1312
543
    if (Result.hasUDSuffix())
1313
4
      Diag(Result, diag::err_invalid_string_udl);
1314
543
1315
543
    if (AllowMacroExpansion)
1316
123
      Lex(Result);
1317
420
    else
1318
420
      LexUnexpandedToken(Result);
1319
543
  } while (Result.is(tok::string_literal));
1320
514
1321
514
  // Concatenate and parse the strings.
1322
514
  StringLiteralParser Literal(StrToks, *this);
1323
514
  assert(Literal.isAscii() && "Didn't allow wide strings in");
1324
514
1325
514
  if (Literal.hadError)
1326
0
    return false;
1327
514
1328
514
  if (Literal.Pascal) {
1329
0
    Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
1330
0
      << /*Source='in...'*/0 << DiagnosticTag;
1331
0
    return false;
1332
0
  }
1333
514
1334
514
  String = Literal.GetString();
1335
514
  return true;
1336
514
}
1337
1338
89
bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
1339
89
  assert(Tok.is(tok::numeric_constant));
1340
89
  SmallString<8> IntegerBuffer;
1341
89
  bool NumberInvalid = false;
1342
89
  StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
1343
89
  if (NumberInvalid)
1344
0
    return false;
1345
89
  NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
1346
89
  if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
1347
1
    return false;
1348
88
  llvm::APInt APVal(64, 0);
1349
88
  if (Literal.GetIntegerValue(APVal))
1350
0
    return false;
1351
88
  Lex(Tok);
1352
88
  Value = APVal.getLimitedValue();
1353
88
  return true;
1354
88
}
1355
1356
53.5k
void Preprocessor::addCommentHandler(CommentHandler *Handler) {
1357
53.5k
  assert(Handler && "NULL comment handler");
1358
53.5k
  assert(llvm::find(CommentHandlers, Handler) == CommentHandlers.end() &&
1359
53.5k
         "Comment handler already registered");
1360
53.5k
  CommentHandlers.push_back(Handler);
1361
53.5k
}
1362
1363
53.5k
void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
1364
53.5k
  std::vector<CommentHandler *>::iterator Pos =
1365
53.5k
      llvm::find(CommentHandlers, Handler);
1366
53.5k
  assert(Pos != CommentHandlers.end() && "Comment handler not registered");
1367
53.5k
  CommentHandlers.erase(Pos);
1368
53.5k
}
1369
1370
14.3M
bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
1371
14.3M
  bool AnyPendingTokens = false;
1372
14.3M
  for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
1373
14.3M
       HEnd = CommentHandlers.end();
1374
28.8M
       H != HEnd; 
++H14.4M
) {
1375
14.4M
    if ((*H)->HandleComment(*this, Comment))
1376
0
      AnyPendingTokens = true;
1377
14.4M
  }
1378
14.3M
  if (!AnyPendingTokens || 
getCommentRetentionState()0
)
1379
14.3M
    return false;
1380
18.4E
  Lex(result);
1381
18.4E
  return true;
1382
18.4E
}
1383
1384
38.8k
ModuleLoader::~ModuleLoader() = default;
1385
1386
53.4k
CommentHandler::~CommentHandler() = default;
1387
1388
41.4k
CodeCompletionHandler::~CodeCompletionHandler() = default;
1389
1390
2.04k
void Preprocessor::createPreprocessingRecord() {
1391
2.04k
  if (Record)
1392
0
    return;
1393
2.04k
1394
2.04k
  Record = new PreprocessingRecord(getSourceManager());
1395
2.04k
  addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
1396
2.04k
}