Coverage Report

Created: 2020-02-25 14:32

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- DependencyDirectivesSourceMinimizer.cpp -  -------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
///
9
/// \file
10
/// This is the implementation for minimizing header and source files to the
11
/// minimum necessary preprocessor directives for evaluating includes. It
12
/// reduces the source down to #define, #include, #import, @import, and any
13
/// conditional preprocessor logic that contains one of those.
14
///
15
//===----------------------------------------------------------------------===//
16
17
#include "clang/Lex/DependencyDirectivesSourceMinimizer.h"
18
#include "clang/Basic/CharInfo.h"
19
#include "clang/Basic/Diagnostic.h"
20
#include "clang/Lex/LexDiagnostic.h"
21
#include "llvm/ADT/StringSwitch.h"
22
#include "llvm/Support/MemoryBuffer.h"
23
24
using namespace llvm;
25
using namespace clang;
26
using namespace clang::minimize_source_to_dependency_directives;
27
28
namespace {
29
30
struct Minimizer {
31
  /// Minimized output.
32
  SmallVectorImpl<char> &Out;
33
  /// The known tokens encountered during the minimization.
34
  SmallVectorImpl<Token> &Tokens;
35
36
  Minimizer(SmallVectorImpl<char> &Out, SmallVectorImpl<Token> &Tokens,
37
            StringRef Input, DiagnosticsEngine *Diags,
38
            SourceLocation InputSourceLoc)
39
      : Out(Out), Tokens(Tokens), Input(Input), Diags(Diags),
40
165
        InputSourceLoc(InputSourceLoc) {}
41
42
  /// Lex the provided source and emit the minimized output.
43
  ///
44
  /// \returns True on error.
45
  bool minimize();
46
47
private:
48
  struct IdInfo {
49
    const char *Last;
50
    StringRef Name;
51
  };
52
53
  /// Lex an identifier.
54
  ///
55
  /// \pre First points at a valid identifier head.
56
  LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End);
57
  LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First,
58
                                       const char *const End);
59
  LLVM_NODISCARD bool minimizeImpl(const char *First, const char *const End);
60
  LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End);
61
  LLVM_NODISCARD bool lexAt(const char *&First, const char *const End);
62
  LLVM_NODISCARD bool lexModule(const char *&First, const char *const End);
63
  LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End);
64
  LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End);
65
  LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End);
66
  LLVM_NODISCARD bool lexDefault(TokenKind Kind, StringRef Directive,
67
                                 const char *&First, const char *const End);
68
455
  Token &makeToken(TokenKind K) {
69
455
    Tokens.emplace_back(K, Out.size());
70
455
    return Tokens.back();
71
455
  }
72
3
  void popToken() {
73
3
    Out.resize(Tokens.back().Offset);
74
3
    Tokens.pop_back();
75
3
  }
76
188
  TokenKind top() const { return Tokens.empty() ? 
pp_none0
: Tokens.back().K; }
77
78
758
  Minimizer &put(char Byte) {
79
758
    Out.push_back(Byte);
80
758
    return *this;
81
758
  }
82
378
  Minimizer &append(StringRef S) { return append(S.begin(), S.end()); }
83
612
  Minimizer &append(const char *First, const char *Last) {
84
612
    Out.append(First, Last);
85
612
    return *this;
86
612
  }
87
88
  void printToNewline(const char *&First, const char *const End);
89
  void printAdjacentModuleNameParts(const char *&First, const char *const End);
90
  LLVM_NODISCARD bool printAtImportBody(const char *&First,
91
                                        const char *const End);
92
  void printDirectiveBody(const char *&First, const char *const End);
93
  void printAdjacentMacroArgs(const char *&First, const char *const End);
94
  LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End);
95
96
  /// Reports a diagnostic if the diagnostic engine is provided. Always returns
97
  /// true at the end.
98
  bool reportError(const char *CurPtr, unsigned Err);
99
100
  StringMap<char> SplitIds;
101
  StringRef Input;
102
  DiagnosticsEngine *Diags;
103
  SourceLocation InputSourceLoc;
104
};
105
106
} // end anonymous namespace
107
108
8
bool Minimizer::reportError(const char *CurPtr, unsigned Err) {
109
8
  if (!Diags)
110
5
    return true;
111
3
  assert(CurPtr >= Input.data() && "invalid buffer ptr");
112
3
  Diags->Report(InputSourceLoc.getLocWithOffset(CurPtr - Input.data()), Err);
113
3
  return true;
114
3
}
115
116
2.43k
static void skipOverSpaces(const char *&First, const char *const End) {
117
3.18k
  while (First != End && 
isHorizontalWhitespace(*First)3.17k
)
118
745
    ++First;
119
2.43k
}
120
121
LLVM_NODISCARD static bool isRawStringLiteral(const char *First,
122
101
                                              const char *Current) {
123
101
  assert(First <= Current);
124
101
125
101
  // Check if we can even back up.
126
101
  if (*Current != '"' || 
First == Current70
)
127
94
    return false;
128
7
129
7
  // Check for an "R".
130
7
  --Current;
131
7
  if (*Current != 'R')
132
4
    return false;
133
3
  if (First == Current || 
!isIdentifierBody(*--Current)2
)
134
3
    return true;
135
0
136
0
  // Check for a prefix of "u", "U", or "L".
137
0
  if (*Current == 'u' || *Current == 'U' || *Current == 'L')
138
0
    return First == Current || !isIdentifierBody(*--Current);
139
0
140
0
  // Check for a prefix of "u8".
141
0
  if (*Current != '8' || First == Current || *Current-- != 'u')
142
0
    return false;
143
0
  return First == Current || !isIdentifierBody(*--Current);
144
0
}
145
146
3
static void skipRawString(const char *&First, const char *const End) {
147
3
  assert(First[0] == '"');
148
3
  assert(First[-1] == 'R');
149
3
150
3
  const char *Last = ++First;
151
6
  while (Last != End && *Last != '(')
152
3
    ++Last;
153
3
  if (Last == End) {
154
0
    First = Last; // Hit the end... just give up.
155
0
    return;
156
0
  }
157
3
158
3
  StringRef Terminator(First, Last - First);
159
3
  for (;;) {
160
3
    // Move First to just past the next ")".
161
3
    First = Last;
162
46
    while (First != End && *First != ')')
163
43
      ++First;
164
3
    if (First == End)
165
0
      return;
166
3
    ++First;
167
3
168
3
    // Look ahead for the terminator sequence.
169
3
    Last = First;
170
6
    while (Last != End && size_t(Last - First) < Terminator.size() &&
171
6
           
Terminator[Last - First] == *Last3
)
172
3
      ++Last;
173
3
174
3
    // Check if we hit it (or the end of the file).
175
3
    if (Last == End) {
176
0
      First = Last;
177
0
      return;
178
0
    }
179
3
    if (size_t(Last - First) < Terminator.size())
180
0
      continue;
181
3
    if (*Last != '"')
182
0
      continue;
183
3
    First = Last + 1;
184
3
    return;
185
3
  }
186
3
}
187
188
// Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n)
189
41.2k
static unsigned isEOL(const char *First, const char *const End) {
190
41.2k
  if (First == End)
191
0
    return 0;
192
41.2k
  if (End - First > 1 && 
isVerticalWhitespace(First[0])41.1k
&&
193
41.2k
      
isVerticalWhitespace(First[1])2.46k
&&
First[0] != First[1]210
)
194
20
    return 2;
195
41.2k
  return !!isVerticalWhitespace(First[0]);
196
41.2k
}
197
198
98
static void skipString(const char *&First, const char *const End) {
199
98
  assert(*First == '\'' || *First == '"' || *First == '<');
200
98
  const char Terminator = *First == '<' ? 
'>'24
:
*First74
;
201
853
  for (++First; First != End && 
*First != Terminator852
;
++First755
) {
202
758
    // String and character literals don't extend past the end of the line.
203
758
    if (isVerticalWhitespace(*First))
204
3
      return;
205
755
    if (*First != '\\')
206
752
      continue;
207
3
    // Skip past backslash to the next character. This ensures that the
208
3
    // character right after it is skipped as well, which matters if it's
209
3
    // the terminator.
210
3
    if (++First == End)
211
0
      return;
212
3
    if (!isWhitespace(*First))
213
0
      continue;
214
3
    // Whitespace after the backslash might indicate a line continuation.
215
3
    const char *FirstAfterBackslashPastSpace = First;
216
3
    skipOverSpaces(FirstAfterBackslashPastSpace, End);
217
3
    if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) {
218
3
      // Advance the character pointer to the next line for the next
219
3
      // iteration.
220
3
      First = FirstAfterBackslashPastSpace + NLSize - 1;
221
3
    }
222
3
  }
223
98
  
if (95
First != End95
)
224
93
    ++First; // Finish off the string.
225
95
}
226
227
// Returns the length of the skipped newline
228
1.52k
static unsigned skipNewline(const char *&First, const char *End) {
229
1.52k
  if (First == End)
230
12
    return 0;
231
1.51k
  assert(isVerticalWhitespace(*First));
232
1.51k
  unsigned Len = isEOL(First, End);
233
1.51k
  assert(Len && "expected newline");
234
1.51k
  First += Len;
235
1.51k
  return Len;
236
1.51k
}
237
238
28
static bool wasLineContinuation(const char *First, unsigned EOLLen) {
239
28
  return *(First - (int)EOLLen - 1) == '\\';
240
28
}
241
242
1.07k
static void skipToNewlineRaw(const char *&First, const char *const End) {
243
1.17k
  for (;;) {
244
1.17k
    if (First == End)
245
0
      return;
246
1.17k
247
1.17k
    unsigned Len = isEOL(First, End);
248
1.17k
    if (Len)
249
58
      return;
250
1.11k
251
39.9k
    
do 1.11k
{
252
39.9k
      if (++First == End)
253
0
        return;
254
39.9k
      Len = isEOL(First, End);
255
39.9k
    } while (!Len);
256
1.11k
257
1.11k
    if (First[-1] != '\\')
258
996
      return;
259
122
260
122
    First += Len;
261
122
    // Keep skipping lines...
262
122
  }
263
1.07k
}
264
265
212
static const char *findLastNonSpace(const char *First, const char *Last) {
266
212
  assert(First <= Last);
267
257
  while (First != Last && 
isHorizontalWhitespace(Last[-1])256
)
268
45
    --Last;
269
212
  return Last;
270
212
}
271
272
static const char *findFirstTrailingSpace(const char *First,
273
8
                                          const char *Last) {
274
8
  const char *LastNonSpace = findLastNonSpace(First, Last);
275
8
  if (Last == LastNonSpace)
276
1
    return Last;
277
7
  assert(isHorizontalWhitespace(LastNonSpace[0]));
278
7
  return LastNonSpace + 1;
279
7
}
280
281
1.05k
static void skipLineComment(const char *&First, const char *const End) {
282
1.05k
  assert(First[0] == '/' && First[1] == '/');
283
1.05k
  First += 2;
284
1.05k
  skipToNewlineRaw(First, End);
285
1.05k
}
286
287
8
static void skipBlockComment(const char *&First, const char *const End) {
288
8
  assert(First[0] == '/' && First[1] == '*');
289
8
  if (End - First < 4) {
290
0
    First = End;
291
0
    return;
292
0
  }
293
67
  
for (First += 3; 8
First != End;
++First59
)
294
67
    if (First[-1] == '*' && 
First[0] == '/'11
) {
295
8
      ++First;
296
8
      return;
297
8
    }
298
8
}
299
300
/// \returns True if the current single quotation mark character is a C++ 14
301
/// digit separator.
302
static bool isQuoteCppDigitSeparator(const char *const Start,
303
                                     const char *const Cur,
304
9
                                     const char *const End) {
305
9
  assert(*Cur == '\'' && "expected quotation character");
306
9
  // skipLine called in places where we don't expect a valid number
307
9
  // body before `start` on the same line, so always return false at the start.
308
9
  if (Start == Cur)
309
0
    return false;
310
9
  // The previous character must be a valid PP number character.
311
9
  // Make sure that the L, u, U, u8 prefixes don't get marked as a
312
9
  // separator though.
313
9
  char Prev = *(Cur - 1);
314
9
  if (Prev == 'L' || 
Prev == 'U'8
||
Prev == 'u'7
)
315
3
    return false;
316
6
  if (Prev == '8' && 
(Cur - 1 != Start)2
&&
*(Cur - 2) == 'u'2
)
317
1
    return false;
318
5
  if (!isPreprocessingNumberBody(Prev))
319
1
    return false;
320
4
  // The next character should be a valid identifier body character.
321
4
  return (Cur + 1) < End && isIdentifierBody(*(Cur + 1));
322
4
}
323
324
1.30k
static void skipLine(const char *&First, const char *const End) {
325
1.30k
  for (;;) {
326
1.30k
    assert(First <= End);
327
1.30k
    if (First == End)
328
1
      return;
329
1.30k
330
1.30k
    if (isVerticalWhitespace(*First)) {
331
1.26k
      skipNewline(First, End);
332
1.26k
      return;
333
1.26k
    }
334
40
    const char *Start = First;
335
438
    while (First != End && 
!isVerticalWhitespace(*First)426
) {
336
398
      // Iterate over strings correctly to avoid comments and newlines.
337
398
      if (*First == '"' ||
338
398
          
(395
*First == '\''395
&&
!isQuoteCppDigitSeparator(Start, First, End)9
)) {
339
8
        if (isRawStringLiteral(Start, First))
340
3
          skipRawString(First, End);
341
5
        else
342
5
          skipString(First, End);
343
8
        continue;
344
8
      }
345
390
346
390
      // Iterate over comments correctly.
347
390
      if (*First != '/' || 
End - First < 20
) {
348
390
        ++First;
349
390
        continue;
350
390
      }
351
0
352
0
      if (First[1] == '/') {
353
0
        // "//...".
354
0
        skipLineComment(First, End);
355
0
        continue;
356
0
      }
357
0
358
0
      if (First[1] != '*') {
359
0
        ++First;
360
0
        continue;
361
0
      }
362
0
363
0
      // "/*...*/".
364
0
      skipBlockComment(First, End);
365
0
    }
366
40
    if (First == End)
367
12
      return;
368
28
369
28
    // Skip over the newline.
370
28
    unsigned Len = skipNewline(First, End);
371
28
    if (!wasLineContinuation(First, Len)) // Continue past line-continuations.
372
27
      break;
373
28
  }
374
1.30k
}
375
376
static void skipDirective(StringRef Name, const char *&First,
377
18
                          const char *const End) {
378
18
  if (llvm::StringSwitch<bool>(Name)
379
18
          .Case("warning", true)
380
18
          .Case("error", true)
381
18
          .Default(false))
382
18
    // Do not process quotes or comments.
383
18
    skipToNewlineRaw(First, End);
384
0
  else
385
0
    skipLine(First, End);
386
18
}
387
388
272
void Minimizer::printToNewline(const char *&First, const char *const End) {
389
281
  while (First != End && 
!isVerticalWhitespace(*First)279
) {
390
206
    const char *Last = First;
391
953
    do {
392
953
      // Iterate over strings correctly to avoid comments and newlines.
393
953
      if (*Last == '"' || 
*Last == '\''886
||
394
953
          
(884
*Last == '<'884
&&
top() == pp_include32
)) {
395
93
        if (LLVM_UNLIKELY(isRawStringLiteral(First, Last)))
396
93
          
skipRawString(Last, End)0
;
397
93
        else
398
93
          skipString(Last, End);
399
93
        continue;
400
93
      }
401
860
      if (*Last != '/' || 
End - Last < 21
) {
402
859
        ++Last;
403
859
        continue; // Gather the rest up to print verbatim.
404
859
      }
405
1
406
1
      if (Last[1] != '/' && Last[1] != '*') {
407
0
        ++Last;
408
0
        continue;
409
0
      }
410
1
411
1
      // Deal with "//..." and "/*...*/".
412
1
      append(First, findFirstTrailingSpace(First, Last));
413
1
      First = Last;
414
1
415
1
      if (Last[1] == '/') {
416
0
        skipLineComment(First, End);
417
0
        return;
418
0
      }
419
1
420
1
      put(' ');
421
1
      skipBlockComment(First, End);
422
1
      skipOverSpaces(First, End);
423
1
      Last = First;
424
953
    } while (Last != End && 
!isVerticalWhitespace(*Last)939
);
425
206
426
206
    // Print out the string.
427
206
    const char *LastBeforeTrailingSpace = findLastNonSpace(First, Last);
428
206
    if (Last == End || 
LastBeforeTrailingSpace == First192
||
429
206
        
LastBeforeTrailingSpace[-1] != '\\'191
) {
430
197
      append(First, LastBeforeTrailingSpace);
431
197
      First = Last;
432
197
      skipNewline(First, End);
433
197
      return;
434
197
    }
435
9
436
9
    // Print up to the backslash, backing up over spaces. Preserve at least one
437
9
    // space, as the space matters when tokens are separated by a line
438
9
    // continuation.
439
9
    append(First, findFirstTrailingSpace(
440
9
                      First, LastBeforeTrailingSpace - 1));
441
9
442
9
    First = Last;
443
9
    skipNewline(First, End);
444
9
    skipOverSpaces(First, End);
445
9
  }
446
272
}
447
448
2.40k
static void skipWhitespace(const char *&First, const char *const End) {
449
2.42k
  for (;;) {
450
2.42k
    assert(First <= End);
451
2.42k
    skipOverSpaces(First, End);
452
2.42k
453
2.42k
    if (End - First < 2)
454
107
      return;
455
2.31k
456
2.31k
    if (First[0] == '\\' && 
isVerticalWhitespace(First[1])14
) {
457
14
      skipNewline(++First, End);
458
14
      continue;
459
14
    }
460
2.30k
461
2.30k
    // Check for a non-comment character.
462
2.30k
    if (First[0] != '/')
463
1.25k
      return;
464
1.04k
465
1.04k
    // "// ...".
466
1.05k
    
if (1.04k
First[1] == '/'1.04k
) {
467
1.05k
      skipLineComment(First, End);
468
1.05k
      return;
469
1.05k
    }
470
18.4E
471
18.4E
    // Cannot be a comment.
472
18.4E
    if (First[1] != '*')
473
0
      return;
474
18.4E
475
18.4E
    // "/*...*/".
476
18.4E
    skipBlockComment(First, End);
477
18.4E
  }
478
2.40k
}
479
480
void Minimizer::printAdjacentModuleNameParts(const char *&First,
481
12
                                             const char *const End) {
482
12
  // Skip over parts of the body.
483
12
  const char *Last = First;
484
12
  do
485
18
    ++Last;
486
18
  while (Last != End && (isIdentifierBody(*Last) || 
*Last == '.'13
));
487
12
  append(First, Last);
488
12
  First = Last;
489
12
}
490
491
11
bool Minimizer::printAtImportBody(const char *&First, const char *const End) {
492
27
  for (;;) {
493
27
    skipWhitespace(First, End);
494
27
    if (First == End)
495
2
      return true;
496
25
497
25
    if (isVerticalWhitespace(*First)) {
498
4
      skipNewline(First, End);
499
4
      continue;
500
4
    }
501
21
502
21
    // Found a semicolon.
503
21
    if (*First == ';') {
504
7
      put(*First++).put('\n');
505
7
      return false;
506
7
    }
507
14
508
14
    // Don't handle macro expansions inside @import for now.
509
14
    if (!isIdentifierBody(*First) && 
*First != '.'3
)
510
2
      return true;
511
12
512
12
    printAdjacentModuleNameParts(First, End);
513
12
  }
514
11
}
515
516
262
void Minimizer::printDirectiveBody(const char *&First, const char *const End) {
517
262
  skipWhitespace(First, End); // Skip initial whitespace.
518
262
  printToNewline(First, End);
519
319
  while (Out.back() == ' ')
520
57
    Out.pop_back();
521
262
  put('\n');
522
262
}
523
524
LLVM_NODISCARD static const char *lexRawIdentifier(const char *First,
525
411
                                                   const char *const End) {
526
411
  assert(isIdentifierBody(*First) && "invalid identifer");
527
411
  const char *Last = First + 1;
528
2.24k
  while (Last != End && 
isIdentifierBody(*Last)2.24k
)
529
1.83k
    ++Last;
530
411
  return Last;
531
411
}
532
533
LLVM_NODISCARD static const char *
534
412
getIdentifierContinuation(const char *First, const char *const End) {
535
412
  if (End - First < 3 || 
First[0] != '\\'366
||
!isVerticalWhitespace(First[1])6
)
536
406
    return nullptr;
537
6
538
6
  ++First;
539
6
  skipNewline(First, End);
540
6
  if (First == End)
541
0
    return nullptr;
542
6
  return isIdentifierBody(First[0]) ? 
First4
:
nullptr2
;
543
6
}
544
545
Minimizer::IdInfo Minimizer::lexIdentifier(const char *First,
546
407
                                           const char *const End) {
547
407
  const char *Last = lexRawIdentifier(First, End);
548
407
  const char *Next = getIdentifierContinuation(Last, End);
549
407
  if (LLVM_LIKELY(!Next))
550
407
    
return IdInfo{Last, StringRef(First, Last - First)}404
;
551
3
552
3
  // Slow path, where identifiers are split over lines.
553
3
  SmallVector<char, 64> Id(First, Last);
554
7
  while (Next) {
555
4
    Last = lexRawIdentifier(Next, End);
556
4
    Id.append(Next, Last);
557
4
    Next = getIdentifierContinuation(Last, End);
558
4
  }
559
3
  return IdInfo{
560
3
      Last,
561
3
      SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()};
562
3
}
563
564
void Minimizer::printAdjacentMacroArgs(const char *&First,
565
17
                                       const char *const End) {
566
17
  // Skip over parts of the body.
567
17
  const char *Last = First;
568
17
  do
569
37
    ++Last;
570
37
  while (Last != End &&
571
37
         (isIdentifierBody(*Last) || 
*Last == '.'32
||
*Last == ','25
));
572
17
  append(First, Last);
573
17
  First = Last;
574
17
}
575
576
17
bool Minimizer::printMacroArgs(const char *&First, const char *const End) {
577
17
  assert(*First == '(');
578
17
  put(*First++);
579
34
  for (;;) {
580
34
    skipWhitespace(First, End);
581
34
    if (First == End)
582
1
      return true;
583
33
584
33
    if (*First == ')') {
585
14
      put(*First++);
586
14
      return false;
587
14
    }
588
19
589
19
    // This is intentionally fairly liberal.
590
19
    if (!(isIdentifierBody(*First) || 
*First == '.'4
||
*First == ','2
))
591
2
      return true;
592
17
593
17
    printAdjacentMacroArgs(First, End);
594
17
  }
595
17
}
596
597
/// Looks for an identifier starting from Last.
598
///
599
/// Updates "First" to just past the next identifier, if any.  Returns true iff
600
/// the identifier matches "Id".
601
bool Minimizer::isNextIdentifier(StringRef Id, const char *&First,
602
20
                                 const char *const End) {
603
20
  skipWhitespace(First, End);
604
20
  if (First == End || !isIdentifierHead(*First))
605
2
    return false;
606
18
607
18
  IdInfo FoundId = lexIdentifier(First, End);
608
18
  First = FoundId.Last;
609
18
  return FoundId.Name == Id;
610
18
}
611
612
11
bool Minimizer::lexAt(const char *&First, const char *const End) {
613
11
  // Handle "@import".
614
11
  const char *ImportLoc = First++;
615
11
  if (!isNextIdentifier("import", First, End)) {
616
0
    skipLine(First, End);
617
0
    return false;
618
0
  }
619
11
  makeToken(decl_at_import);
620
11
  append("@import ");
621
11
  if (printAtImportBody(First, End))
622
4
    return reportError(
623
4
        ImportLoc, diag::err_dep_source_minimizer_missing_sema_after_at_import);
624
7
  skipWhitespace(First, End);
625
7
  if (First == End)
626
2
    return false;
627
5
  if (!isVerticalWhitespace(*First))
628
1
    return reportError(
629
1
        ImportLoc, diag::err_dep_source_minimizer_unexpected_tokens_at_import);
630
4
  skipNewline(First, End);
631
4
  return false;
632
4
}
633
634
24
bool Minimizer::lexModule(const char *&First, const char *const End) {
635
24
  IdInfo Id = lexIdentifier(First, End);
636
24
  First = Id.Last;
637
24
  bool Export = false;
638
24
  if (Id.Name == "export") {
639
4
    Export = true;
640
4
    skipWhitespace(First, End);
641
4
    if (!isIdentifierBody(*First)) {
642
0
      skipLine(First, End);
643
0
      return false;
644
0
    }
645
4
    Id = lexIdentifier(First, End);
646
4
    First = Id.Last;
647
4
  }
648
24
649
24
  if (Id.Name != "module" && 
Id.Name != "import"21
) {
650
8
    skipLine(First, End);
651
8
    return false;
652
8
  }
653
16
654
16
  skipWhitespace(First, End);
655
16
656
16
  // Ignore this as a module directive if the next character can't be part of
657
16
  // an import.
658
16
659
16
  switch (*First) {
660
2
  case ':':
661
2
  case '<':
662
2
  case '"':
663
2
    break;
664
14
  default:
665
14
    if (!isIdentifierBody(*First)) {
666
6
      skipLine(First, End);
667
6
      return false;
668
6
    }
669
10
  }
670
10
671
10
  if (Export) {
672
3
    makeToken(cxx_export_decl);
673
3
    append("export ");
674
3
  }
675
10
676
10
  if (Id.Name == "module")
677
2
    makeToken(cxx_module_decl);
678
8
  else
679
8
    makeToken(cxx_import_decl);
680
10
  append(Id.Name);
681
10
  append(" ");
682
10
  printToNewline(First, End);
683
10
  append("\n");
684
10
  return false;
685
10
}
686
687
60
bool Minimizer::lexDefine(const char *&First, const char *const End) {
688
60
  makeToken(pp_define);
689
60
  append("#define ");
690
60
  skipWhitespace(First, End);
691
60
692
60
  if (!isIdentifierHead(*First))
693
3
    return reportError(First, diag::err_pp_macro_not_identifier);
694
57
695
57
  IdInfo Id = lexIdentifier(First, End);
696
57
  const char *Last = Id.Last;
697
57
  append(Id.Name);
698
57
  if (Last == End)
699
1
    return false;
700
56
  if (*Last == '(') {
701
17
    size_t Size = Out.size();
702
17
    if (printMacroArgs(Last, End)) {
703
3
      // Be robust to bad macro arguments, since they can show up in disabled
704
3
      // code.
705
3
      Out.resize(Size);
706
3
      append("(/* invalid */\n");
707
3
      skipLine(Last, End);
708
3
      return false;
709
3
    }
710
53
  }
711
53
  skipWhitespace(Last, End);
712
53
  if (Last == End)
713
3
    return false;
714
50
  if (!isVerticalWhitespace(*Last))
715
33
    put(' ');
716
50
  printDirectiveBody(Last, End);
717
50
  First = Last;
718
50
  return false;
719
50
}
720
721
8
bool Minimizer::lexPragma(const char *&First, const char *const End) {
722
8
  // #pragma.
723
8
  skipWhitespace(First, End);
724
8
  if (First == End || !isIdentifierHead(*First))
725
0
    return false;
726
8
727
8
  IdInfo FoundId = lexIdentifier(First, End);
728
8
  First = FoundId.Last;
729
8
  if (FoundId.Name == "once") {
730
2
    // #pragma once
731
2
    skipLine(First, End);
732
2
    makeToken(pp_pragma_once);
733
2
    append("#pragma once\n");
734
2
    return false;
735
2
  }
736
6
737
6
  if (FoundId.Name != "clang") {
738
1
    skipLine(First, End);
739
1
    return false;
740
1
  }
741
5
742
5
  // #pragma clang.
743
5
  if (!isNextIdentifier("module", First, End)) {
744
1
    skipLine(First, End);
745
1
    return false;
746
1
  }
747
4
748
4
  // #pragma clang module.
749
4
  if (!isNextIdentifier("import", First, End)) {
750
2
    skipLine(First, End);
751
2
    return false;
752
2
  }
753
2
754
2
  // #pragma clang module import.
755
2
  makeToken(pp_pragma_import);
756
2
  append("#pragma clang module import ");
757
2
  printDirectiveBody(First, End);
758
2
  return false;
759
2
}
760
761
52
bool Minimizer::lexEndif(const char *&First, const char *const End) {
762
52
  // Strip out "#else" if it's empty.
763
52
  if (top() == pp_else)
764
2
    popToken();
765
52
766
52
  // If "#ifdef" is empty, strip it and skip the "#endif".
767
52
  //
768
52
  // FIXME: Once/if Clang starts disallowing __has_include in macro expansions,
769
52
  // we can skip empty `#if` and `#elif` blocks as well after scanning for a
770
52
  // literal __has_include in the condition.  Even without that rule we could
771
52
  // drop the tokens if we scan for identifiers in the condition and find none.
772
52
  if (top() == pp_ifdef || top() == pp_ifndef) {
773
1
    popToken();
774
1
    skipLine(First, End);
775
1
    return false;
776
1
  }
777
51
778
51
  return lexDefault(pp_endif, "endif", First, End);
779
51
}
780
781
bool Minimizer::lexDefault(TokenKind Kind, StringRef Directive,
782
210
                           const char *&First, const char *const End) {
783
210
  makeToken(Kind);
784
210
  put('#').append(Directive).put(' ');
785
210
  printDirectiveBody(First, End);
786
210
  return false;
787
210
}
788
789
1.60k
static bool isStartOfRelevantLine(char First) {
790
1.60k
  switch (First) {
791
332
  case '#':
792
332
  case '@':
793
332
  case 'i':
794
332
  case 'e':
795
332
  case 'm':
796
332
    return true;
797
1.27k
  }
798
1.27k
  return false;
799
1.27k
}
800
801
1.61k
bool Minimizer::lexPPLine(const char *&First, const char *const End) {
802
1.61k
  assert(First != End);
803
1.61k
804
1.61k
  skipWhitespace(First, End);
805
1.61k
  assert(First <= End);
806
1.61k
  if (First == End)
807
3
    return false;
808
1.60k
809
1.60k
  if (!isStartOfRelevantLine(*First)) {
810
1.27k
    skipLine(First, End);
811
1.27k
    assert(First <= End);
812
1.27k
    return false;
813
1.27k
  }
814
335
815
335
  // Handle "@import".
816
335
  if (*First == '@')
817
11
    return lexAt(First, End);
818
324
819
324
  if (*First == 'i' || 
*First == 'e'302
||
*First == 'm'298
)
820
24
    return lexModule(First, End);
821
300
822
300
  // Handle preprocessing directives.
823
300
  ++First; // Skip over '#'.
824
300
  skipWhitespace(First, End);
825
300
826
300
  if (First == End)
827
0
    return reportError(First, diag::err_pp_expected_eol);
828
300
829
300
  if (!isIdentifierHead(*First)) {
830
0
    skipLine(First, End);
831
0
    return false;
832
0
  }
833
300
834
300
  // Figure out the token.
835
300
  IdInfo Id = lexIdentifier(First, End);
836
300
  First = Id.Last;
837
300
  auto Kind = llvm::StringSwitch<TokenKind>(Id.Name)
838
300
                  .Case("include", pp_include)
839
300
                  .Case("__include_macros", pp___include_macros)
840
300
                  .Case("define", pp_define)
841
300
                  .Case("undef", pp_undef)
842
300
                  .Case("import", pp_import)
843
300
                  .Case("include_next", pp_include_next)
844
300
                  .Case("if", pp_if)
845
300
                  .Case("ifdef", pp_ifdef)
846
300
                  .Case("ifndef", pp_ifndef)
847
300
                  .Case("elif", pp_elif)
848
300
                  .Case("else", pp_else)
849
300
                  .Case("endif", pp_endif)
850
300
                  .Case("pragma", pp_pragma_import)
851
300
                  .Default(pp_none);
852
300
  if (Kind == pp_none) {
853
18
    skipDirective(Id.Name, First, End);
854
18
    return false;
855
18
  }
856
282
857
282
  if (Kind == pp_endif)
858
52
    return lexEndif(First, End);
859
230
860
230
  if (Kind == pp_define)
861
60
    return lexDefine(First, End);
862
170
863
170
  if (Kind == pp_pragma_import)
864
8
    return lexPragma(First, End);
865
162
866
162
  // Everything else.
867
162
  return lexDefault(Kind, Id.Name, First, End);
868
162
}
869
870
165
static void skipUTF8ByteOrderMark(const char *&First, const char *const End) {
871
165
  if ((End - First) >= 3 && 
First[0] == '\xef'164
&&
First[1] == '\xbb'1
&&
872
165
      
First[2] == '\xbf'1
)
873
1
    First += 3;
874
165
}
875
876
165
bool Minimizer::minimizeImpl(const char *First, const char *const End) {
877
165
  skipUTF8ByteOrderMark(First, End);
878
1.76k
  while (First != End)
879
1.61k
    if (lexPPLine(First, End))
880
8
      return true;
881
165
  
return false157
;
882
165
}
883
884
165
bool Minimizer::minimize() {
885
165
  bool Error = minimizeImpl(Input.begin(), Input.end());
886
165
887
165
  if (!Error) {
888
157
    // Add a trailing newline and an EOF on success.
889
157
    if (!Out.empty() && 
Out.back() != '\n'138
)
890
4
      Out.push_back('\n');
891
157
    makeToken(pp_eof);
892
157
  }
893
165
894
165
  // Null-terminate the output. This way the memory buffer that's passed to
895
165
  // Clang will not have to worry about the terminating '\0'.
896
165
  Out.push_back(0);
897
165
  Out.pop_back();
898
165
  return Error;
899
165
}
900
901
bool clang::minimize_source_to_dependency_directives::computeSkippedRanges(
902
68
    ArrayRef<Token> Input, llvm::SmallVectorImpl<SkippedRange> &Range) {
903
68
  struct Directive {
904
68
    enum DirectiveKind {
905
68
      If,  // if/ifdef/ifndef
906
68
      Else // elif,else
907
68
    };
908
68
    int Offset;
909
68
    DirectiveKind Kind;
910
68
  };
911
68
  llvm::SmallVector<Directive, 32> Offsets;
912
201
  for (const Token &T : Input) {
913
201
    switch (T.K) {
914
31
    case pp_if:
915
31
    case pp_ifdef:
916
31
    case pp_ifndef:
917
31
      Offsets.push_back({T.Offset, Directive::If});
918
31
      break;
919
31
920
31
    case pp_elif:
921
4
    case pp_else: {
922
4
      if (Offsets.empty())
923
0
        return true;
924
4
      int PreviousOffset = Offsets.back().Offset;
925
4
      Range.push_back({PreviousOffset, T.Offset - PreviousOffset});
926
4
      Offsets.push_back({T.Offset, Directive::Else});
927
4
      break;
928
4
    }
929
4
930
31
    case pp_endif: {
931
31
      if (Offsets.empty())
932
0
        return true;
933
31
      int PreviousOffset = Offsets.back().Offset;
934
31
      Range.push_back({PreviousOffset, T.Offset - PreviousOffset});
935
35
      do {
936
35
        Directive::DirectiveKind Kind = Offsets.pop_back_val().Kind;
937
35
        if (Kind == Directive::If)
938
31
          break;
939
4
      } while (!Offsets.empty());
940
31
      break;
941
31
    }
942
134
    default:
943
134
      break;
944
201
    }
945
201
  }
946
68
  return false;
947
68
}
948
949
bool clang::minimizeSourceToDependencyDirectives(
950
    StringRef Input, SmallVectorImpl<char> &Output,
951
    SmallVectorImpl<Token> &Tokens, DiagnosticsEngine *Diags,
952
165
    SourceLocation InputSourceLoc) {
953
165
  Output.clear();
954
165
  Tokens.clear();
955
165
  return Minimizer(Output, Tokens, Input, Diags, InputSourceLoc).minimize();
956
165
}