Coverage Report

Created: 2020-02-15 09:57

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/lib/AST/RawCommentList.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- RawCommentList.cpp - Processing raw comments -----------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "clang/AST/RawCommentList.h"
10
#include "clang/AST/ASTContext.h"
11
#include "clang/AST/Comment.h"
12
#include "clang/AST/CommentBriefParser.h"
13
#include "clang/AST/CommentCommandTraits.h"
14
#include "clang/AST/CommentLexer.h"
15
#include "clang/AST/CommentParser.h"
16
#include "clang/AST/CommentSema.h"
17
#include "clang/Basic/CharInfo.h"
18
#include "llvm/ADT/STLExtras.h"
19
20
using namespace clang;
21
22
namespace {
23
/// Get comment kind and bool describing if it is a trailing comment.
24
std::pair<RawComment::CommentKind, bool> getCommentKind(StringRef Comment,
25
23.9M
                                                        bool ParseAllComments) {
26
23.9M
  const size_t MinCommentLength = ParseAllComments ? 
2224
:
323.9M
;
27
23.9M
  if ((Comment.size() < MinCommentLength) || 
Comment[0] != '/'23.9M
)
28
27.3k
    return std::make_pair(RawComment::RCK_Invalid, false);
29
23.9M
30
23.9M
  RawComment::CommentKind K;
31
23.9M
  if (Comment[1] == '/') {
32
2.49M
    if (Comment.size() < 3)
33
2
      return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
34
2.49M
35
2.49M
    if (Comment[2] == '/')
36
195k
      K = RawComment::RCK_BCPLSlash;
37
2.29M
    else if (Comment[2] == '!')
38
31
      K = RawComment::RCK_BCPLExcl;
39
2.29M
    else
40
2.29M
      return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
41
21.4M
  } else {
42
21.4M
    assert(Comment.size() >= 4);
43
21.4M
44
21.4M
    // Comment lexer does not understand escapes in comment markers, so pretend
45
21.4M
    // that this is not a comment.
46
21.4M
    if (Comment[1] != '*' ||
47
21.4M
        Comment[Comment.size() - 2] != '*' ||
48
21.4M
        
Comment[Comment.size() - 1] != '/'21.4M
)
49
11
      return std::make_pair(RawComment::RCK_Invalid, false);
50
21.4M
51
21.4M
    if (Comment[2] == '*')
52
855
      K = RawComment::RCK_JavaDoc;
53
21.4M
    else if (Comment[2] == '!')
54
79
      K = RawComment::RCK_Qt;
55
21.4M
    else
56
21.4M
      return std::make_pair(RawComment::RCK_OrdinaryC, false);
57
196k
  }
58
196k
  const bool TrailingComment = (Comment.size() > 3) && 
(Comment[3] == '<')191k
;
59
196k
  return std::make_pair(K, TrailingComment);
60
196k
}
61
62
87.1k
bool mergedCommentIsTrailingComment(StringRef Comment) {
63
87.1k
  return (Comment.size() > 3) && (Comment[3] == '<');
64
87.1k
}
65
66
/// Returns true if R1 and R2 both have valid locations that start on the same
67
/// column.
68
bool commentsStartOnSameColumn(const SourceManager &SM, const RawComment &R1,
69
8
                               const RawComment &R2) {
70
8
  SourceLocation L1 = R1.getBeginLoc();
71
8
  SourceLocation L2 = R2.getBeginLoc();
72
8
  bool Invalid = false;
73
8
  unsigned C1 = SM.getPresumedColumnNumber(L1, &Invalid);
74
8
  if (!Invalid) {
75
8
    unsigned C2 = SM.getPresumedColumnNumber(L2, &Invalid);
76
8
    return !Invalid && (C1 == C2);
77
8
  }
78
0
  return false;
79
0
}
80
} // unnamed namespace
81
82
/// Determines whether there is only whitespace in `Buffer` between `P`
83
/// and the previous line.
84
/// \param Buffer The buffer to search in.
85
/// \param P The offset from the beginning of `Buffer` to start from.
86
/// \return true if all of the characters in `Buffer` ranging from the closest
87
/// line-ending character before `P` (or the beginning of `Buffer`) to `P - 1`
88
/// are whitespace.
89
188
static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P) {
90
188
  // Search backwards until we see linefeed or carriage return.
91
316
  for (unsigned I = P; I != 0; 
--I128
) {
92
316
    char C = Buffer[I - 1];
93
316
    if (isVerticalWhitespace(C))
94
166
      return true;
95
150
    if (!isHorizontalWhitespace(C))
96
22
      return false;
97
150
  }
98
188
  // We hit the beginning of the buffer.
99
188
  
return true0
;
100
188
}
101
102
/// Returns whether `K` is an ordinary comment kind.
103
543
static bool isOrdinaryKind(RawComment::CommentKind K) {
104
543
  return (K == RawComment::RCK_OrdinaryBCPL) ||
105
543
         
(K == RawComment::RCK_OrdinaryC)353
;
106
543
}
107
108
RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR,
109
                       const CommentOptions &CommentOpts, bool Merged) :
110
    Range(SR), RawTextValid(false), BriefTextValid(false),
111
    IsAttached(false), IsTrailingComment(false),
112
23.9M
    IsAlmostTrailingComment(false) {
113
23.9M
  // Extract raw comment text, if possible.
114
23.9M
  if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) {
115
2
    Kind = RCK_Invalid;
116
2
    return;
117
2
  }
118
23.9M
119
23.9M
  // Guess comment kind.
120
23.9M
  std::pair<CommentKind, bool> K =
121
23.9M
      getCommentKind(RawText, CommentOpts.ParseAllComments);
122
23.9M
123
23.9M
  // Guess whether an ordinary comment is trailing.
124
23.9M
  if (CommentOpts.ParseAllComments && 
isOrdinaryKind(K.first)224
) {
125
190
    FileID BeginFileID;
126
190
    unsigned BeginOffset;
127
190
    std::tie(BeginFileID, BeginOffset) =
128
190
        SourceMgr.getDecomposedLoc(Range.getBegin());
129
190
    if (BeginOffset != 0) {
130
188
      bool Invalid = false;
131
188
      const char *Buffer =
132
188
          SourceMgr.getBufferData(BeginFileID, &Invalid).data();
133
188
      IsTrailingComment |=
134
188
          (!Invalid && !onlyWhitespaceOnLineBefore(Buffer, BeginOffset));
135
188
    }
136
190
  }
137
23.9M
138
23.9M
  if (!Merged) {
139
23.9M
    Kind = K.first;
140
23.9M
    IsTrailingComment |= K.second;
141
23.9M
142
23.9M
    IsAlmostTrailingComment = RawText.startswith("//<") ||
143
23.9M
                                 
RawText.startswith("/*<")23.9M
;
144
23.9M
  } else {
145
87.1k
    Kind = RCK_Merged;
146
87.1k
    IsTrailingComment =
147
87.1k
        IsTrailingComment || 
mergedCommentIsTrailingComment(RawText)87.1k
;
148
87.1k
  }
149
23.9M
}
150
151
23.9M
StringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const {
152
23.9M
  FileID BeginFileID;
153
23.9M
  FileID EndFileID;
154
23.9M
  unsigned BeginOffset;
155
23.9M
  unsigned EndOffset;
156
23.9M
157
23.9M
  std::tie(BeginFileID, BeginOffset) =
158
23.9M
      SourceMgr.getDecomposedLoc(Range.getBegin());
159
23.9M
  std::tie(EndFileID, EndOffset) = SourceMgr.getDecomposedLoc(Range.getEnd());
160
23.9M
161
23.9M
  const unsigned Length = EndOffset - BeginOffset;
162
23.9M
  if (Length < 2)
163
2
    return StringRef();
164
23.9M
165
23.9M
  // The comment can't begin in one file and end in another.
166
23.9M
  assert(BeginFileID == EndFileID);
167
23.9M
168
23.9M
  bool Invalid = false;
169
23.9M
  const char *BufferStart = SourceMgr.getBufferData(BeginFileID,
170
23.9M
                                                    &Invalid).data();
171
23.9M
  if (Invalid)
172
0
    return StringRef();
173
23.9M
174
23.9M
  return StringRef(BufferStart + BeginOffset, Length);
175
23.9M
}
176
177
890
const char *RawComment::extractBriefText(const ASTContext &Context) const {
178
890
  // Lazily initialize RawText using the accessor before using it.
179
890
  (void)getRawText(Context.getSourceManager());
180
890
181
890
  // Since we will be copying the resulting text, all allocations made during
182
890
  // parsing are garbage after resulting string is formed.  Thus we can use
183
890
  // a separate allocator for all temporary stuff.
184
890
  llvm::BumpPtrAllocator Allocator;
185
890
186
890
  comments::Lexer L(Allocator, Context.getDiagnostics(),
187
890
                    Context.getCommentCommandTraits(),
188
890
                    Range.getBegin(),
189
890
                    RawText.begin(), RawText.end());
190
890
  comments::BriefParser P(L, Context.getCommentCommandTraits());
191
890
192
890
  const std::string Result = P.Parse();
193
890
  const unsigned BriefTextLength = Result.size();
194
890
  char *BriefTextPtr = new (Context) char[BriefTextLength + 1];
195
890
  memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1);
196
890
  BriefText = BriefTextPtr;
197
890
  BriefTextValid = true;
198
890
199
890
  return BriefTextPtr;
200
890
}
201
202
comments::FullComment *RawComment::parse(const ASTContext &Context,
203
                                         const Preprocessor *PP,
204
1.53k
                                         const Decl *D) const {
205
1.53k
  // Lazily initialize RawText using the accessor before using it.
206
1.53k
  (void)getRawText(Context.getSourceManager());
207
1.53k
208
1.53k
  comments::Lexer L(Context.getAllocator(), Context.getDiagnostics(),
209
1.53k
                    Context.getCommentCommandTraits(),
210
1.53k
                    getSourceRange().getBegin(),
211
1.53k
                    RawText.begin(), RawText.end());
212
1.53k
  comments::Sema S(Context.getAllocator(), Context.getSourceManager(),
213
1.53k
                   Context.getDiagnostics(),
214
1.53k
                   Context.getCommentCommandTraits(),
215
1.53k
                   PP);
216
1.53k
  S.setDecl(D);
217
1.53k
  comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(),
218
1.53k
                     Context.getDiagnostics(),
219
1.53k
                     Context.getCommentCommandTraits());
220
1.53k
221
1.53k
  return P.parseFullComment();
222
1.53k
}
223
224
static bool onlyWhitespaceBetween(SourceManager &SM,
225
                                  SourceLocation Loc1, SourceLocation Loc2,
226
107k
                                  unsigned MaxNewlinesAllowed) {
227
107k
  std::pair<FileID, unsigned> Loc1Info = SM.getDecomposedLoc(Loc1);
228
107k
  std::pair<FileID, unsigned> Loc2Info = SM.getDecomposedLoc(Loc2);
229
107k
230
107k
  // Question does not make sense if locations are in different files.
231
107k
  if (Loc1Info.first != Loc2Info.first)
232
0
    return false;
233
107k
234
107k
  bool Invalid = false;
235
107k
  const char *Buffer = SM.getBufferData(Loc1Info.first, &Invalid).data();
236
107k
  if (Invalid)
237
0
    return false;
238
107k
239
107k
  unsigned NumNewlines = 0;
240
107k
  assert(Loc1Info.second <= Loc2Info.second && "Loc1 after Loc2!");
241
107k
  // Look for non-whitespace characters and remember any newlines seen.
242
281k
  for (unsigned I = Loc1Info.second; I != Loc2Info.second; 
++I173k
) {
243
193k
    switch (Buffer[I]) {
244
19.4k
    default:
245
19.4k
      return false;
246
66.6k
    case ' ':
247
66.6k
    case '\t':
248
66.6k
    case '\f':
249
66.6k
    case '\v':
250
66.6k
      break;
251
107k
    case '\r':
252
107k
    case '\n':
253
107k
      ++NumNewlines;
254
107k
255
107k
      // Check if we have found more than the maximum allowed number of
256
107k
      // newlines.
257
107k
      if (NumNewlines > MaxNewlinesAllowed)
258
604
        return false;
259
107k
260
107k
      // Collapse \r\n and \n\r into a single newline.
261
107k
      if (I + 1 != Loc2Info.second &&
262
107k
          
(38.9k
Buffer[I + 1] == '\n'38.9k
||
Buffer[I + 1] == '\r'38.3k
) &&
263
107k
          
Buffer[I] != Buffer[I + 1]604
)
264
0
        ++I;
265
107k
      break;
266
193k
    }
267
193k
  }
268
107k
269
107k
  
return true87.1k
;
270
107k
}
271
272
void RawCommentList::addComment(const RawComment &RC,
273
                                const CommentOptions &CommentOpts,
274
23.9M
                                llvm::BumpPtrAllocator &Allocator) {
275
23.9M
  if (RC.isInvalid())
276
27.3k
    return;
277
23.8M
278
23.8M
  // Ordinary comments are not interesting for us.
279
23.8M
  if (RC.isOrdinary() && 
!CommentOpts.ParseAllComments23.7M
)
280
23.7M
    return;
281
109k
282
109k
  std::pair<FileID, unsigned> Loc =
283
109k
      SourceMgr.getDecomposedLoc(RC.getBeginLoc());
284
109k
285
109k
  const FileID CommentFile = Loc.first;
286
109k
  const unsigned CommentOffset = Loc.second;
287
109k
288
109k
  // If this is the first Doxygen comment, save it (because there isn't
289
109k
  // anything to merge it with).
290
109k
  if (OrderedComments[CommentFile].empty()) {
291
1.46k
    OrderedComments[CommentFile][CommentOffset] =
292
1.46k
        new (Allocator) RawComment(RC);
293
1.46k
    return;
294
1.46k
  }
295
107k
296
107k
  const RawComment &C1 = *OrderedComments[CommentFile].rbegin()->second;
297
107k
  const RawComment &C2 = RC;
298
107k
299
107k
  // Merge comments only if there is only whitespace between them.
300
107k
  // Can't merge trailing and non-trailing comments unless the second is
301
107k
  // non-trailing ordinary in the same column, as in the case:
302
107k
  //   int x; // documents x
303
107k
  //          // more text
304
107k
  // versus:
305
107k
  //   int x; // documents x
306
107k
  //   int y; // documents y
307
107k
  // or:
308
107k
  //   int x; // documents x
309
107k
  //   // documents y
310
107k
  //   int y;
311
107k
  // Merge comments if they are on same or consecutive lines.
312
107k
  if ((C1.isTrailingComment() == C2.isTrailingComment() ||
313
107k
       
(641
C1.isTrailingComment()641
&&
!C2.isTrailingComment()319
&&
314
641
        
isOrdinaryKind(C2.getKind())319
&&
315
641
        
commentsStartOnSameColumn(SourceMgr, C1, C2)8
)) &&
316
107k
      onlyWhitespaceBetween(SourceMgr, C1.getEndLoc(), C2.getBeginLoc(),
317
107k
                            /*MaxNewlinesAllowed=*/1)) {
318
87.1k
    SourceRange MergedRange(C1.getBeginLoc(), C2.getEndLoc());
319
87.1k
    *OrderedComments[CommentFile].rbegin()->second =
320
87.1k
        RawComment(SourceMgr, MergedRange, CommentOpts, true);
321
87.1k
  } else {
322
20.7k
    OrderedComments[CommentFile][CommentOffset] =
323
20.7k
        new (Allocator) RawComment(RC);
324
20.7k
  }
325
107k
}
326
327
const std::map<unsigned, RawComment *> *
328
2.45k
RawCommentList::getCommentsInFile(FileID File) const {
329
2.45k
  auto CommentsInFile = OrderedComments.find(File);
330
2.45k
  if (CommentsInFile == OrderedComments.end())
331
4
    return nullptr;
332
2.44k
333
2.44k
  return &CommentsInFile->second;
334
2.44k
}
335
336
21.8k
bool RawCommentList::empty() const { return OrderedComments.empty(); }
337
338
unsigned RawCommentList::getCommentBeginLine(RawComment *C, FileID File,
339
132
                                             unsigned Offset) const {
340
132
  auto Cached = CommentBeginLine.find(C);
341
132
  if (Cached != CommentBeginLine.end())
342
18
    return Cached->second;
343
114
  const unsigned Line = SourceMgr.getLineNumber(File, Offset);
344
114
  CommentBeginLine[C] = Line;
345
114
  return Line;
346
114
}
347
348
2.01k
unsigned RawCommentList::getCommentEndOffset(RawComment *C) const {
349
2.01k
  auto Cached = CommentEndOffset.find(C);
350
2.01k
  if (Cached != CommentEndOffset.end())
351
576
    return Cached->second;
352
1.43k
  const unsigned Offset =
353
1.43k
      SourceMgr.getDecomposedLoc(C->getSourceRange().getEnd()).second;
354
1.43k
  CommentEndOffset[C] = Offset;
355
1.43k
  return Offset;
356
1.43k
}
357
358
std::string RawComment::getFormattedText(const SourceManager &SourceMgr,
359
6
                                         DiagnosticsEngine &Diags) const {
360
6
  llvm::StringRef CommentText = getRawText(SourceMgr);
361
6
  if (CommentText.empty())
362
0
    return "";
363
6
364
6
  llvm::BumpPtrAllocator Allocator;
365
6
  // We do not parse any commands, so CommentOptions are ignored by
366
6
  // comments::Lexer. Therefore, we just use default-constructed options.
367
6
  CommentOptions DefOpts;
368
6
  comments::CommandTraits EmptyTraits(Allocator, DefOpts);
369
6
  comments::Lexer L(Allocator, Diags, EmptyTraits, getSourceRange().getBegin(),
370
6
                    CommentText.begin(), CommentText.end(),
371
6
                    /*ParseCommands=*/false);
372
6
373
6
  std::string Result;
374
6
  // A column number of the first non-whitespace token in the comment text.
375
6
  // We skip whitespace up to this column, but keep the whitespace after this
376
6
  // column. IndentColumn is calculated when lexing the first line and reused
377
6
  // for the rest of lines.
378
6
  unsigned IndentColumn = 0;
379
6
380
6
  // Processes one line of the comment and adds it to the result.
381
6
  // Handles skipping the indent at the start of the line.
382
6
  // Returns false when eof is reached and true otherwise.
383
36
  auto LexLine = [&](bool IsFirstLine) -> bool {
384
36
    comments::Token Tok;
385
36
    // Lex the first token on the line. We handle it separately, because we to
386
36
    // fix up its indentation.
387
36
    L.lex(Tok);
388
36
    if (Tok.is(comments::tok::eof))
389
6
      return false;
390
30
    if (Tok.is(comments::tok::newline)) {
391
2
      Result += "\n";
392
2
      return true;
393
2
    }
394
28
    llvm::StringRef TokText = L.getSpelling(Tok, SourceMgr);
395
28
    bool LocInvalid = false;
396
28
    unsigned TokColumn =
397
28
        SourceMgr.getSpellingColumnNumber(Tok.getLocation(), &LocInvalid);
398
28
    assert(!LocInvalid && "getFormattedText for invalid location");
399
28
400
28
    // Amount of leading whitespace in TokText.
401
28
    size_t WhitespaceLen = TokText.find_first_not_of(" \t");
402
28
    if (WhitespaceLen == StringRef::npos)
403
0
      WhitespaceLen = TokText.size();
404
28
    // Remember the amount of whitespace we skipped in the first line to remove
405
28
    // indent up to that column in the following lines.
406
28
    if (IsFirstLine)
407
6
      IndentColumn = TokColumn + WhitespaceLen;
408
28
409
28
    // Amount of leading whitespace we actually want to skip.
410
28
    // For the first line we skip all the whitespace.
411
28
    // For the rest of the lines, we skip whitespace up to IndentColumn.
412
28
    unsigned SkipLen =
413
28
        IsFirstLine
414
28
            ? 
WhitespaceLen6
415
28
            : std::min<size_t>(
416
22
                  WhitespaceLen,
417
22
                  std::max<int>(static_cast<int>(IndentColumn) - TokColumn, 0));
418
28
    llvm::StringRef Trimmed = TokText.drop_front(SkipLen);
419
28
    Result += Trimmed;
420
28
    // Lex all tokens in the rest of the line.
421
28
    for (L.lex(Tok); Tok.isNot(comments::tok::eof); 
L.lex(Tok)0
) {
422
28
      if (Tok.is(comments::tok::newline)) {
423
28
        Result += "\n";
424
28
        return true;
425
28
      }
426
0
      Result += L.getSpelling(Tok, SourceMgr);
427
0
    }
428
28
    // We've reached the end of file token.
429
28
    
return false0
;
430
28
  };
431
6
432
6
  auto DropTrailingNewLines = [](std::string &Str) {
433
14
    while (Str.back() == '\n')
434
8
      Str.pop_back();
435
6
  };
436
6
437
6
  // Process first line separately to remember indent for the following lines.
438
6
  if (!LexLine(/*IsFirstLine=*/true)) {
439
0
    DropTrailingNewLines(Result);
440
0
    return Result;
441
0
  }
442
6
  // Process the rest of the lines.
443
30
  
while (6
LexLine(/*IsFirstLine=*/false))
444
24
    ;
445
6
  DropTrailingNewLines(Result);
446
6
  return Result;
447
6
}