Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/clang/lib/AST/RawCommentList.cpp
Line
Count
Source (jump to first uncovered line)
1
//===--- RawCommentList.cpp - Processing raw comments -----------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "clang/AST/RawCommentList.h"
10
#include "clang/AST/ASTContext.h"
11
#include "clang/AST/Comment.h"
12
#include "clang/AST/CommentBriefParser.h"
13
#include "clang/AST/CommentCommandTraits.h"
14
#include "clang/AST/CommentLexer.h"
15
#include "clang/AST/CommentParser.h"
16
#include "clang/AST/CommentSema.h"
17
#include "clang/Basic/CharInfo.h"
18
#include "llvm/ADT/STLExtras.h"
19
20
using namespace clang;
21
22
namespace {
23
/// Get comment kind and bool describing if it is a trailing comment.
24
std::pair<RawComment::CommentKind, bool> getCommentKind(StringRef Comment,
25
4.99M
                                                        bool ParseAllComments) {
26
4.99M
  const size_t MinCommentLength = ParseAllComments ? 
2224
:
34.99M
;
27
4.99M
  if ((Comment.size() < MinCommentLength) || 
Comment[0] != '/'4.64M
)
28
351k
    return std::make_pair(RawComment::RCK_Invalid, false);
29
4.63M
30
4.63M
  RawComment::CommentKind K;
31
4.63M
  if (Comment[1] == '/') {
32
4.09M
    if (Comment.size() < 3)
33
2
      return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
34
4.09M
35
4.09M
    if (Comment[2] == '/')
36
212k
      K = RawComment::RCK_BCPLSlash;
37
3.88M
    else if (Comment[2] == '!')
38
27.1k
      K = RawComment::RCK_BCPLExcl;
39
3.86M
    else
40
3.86M
      return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
41
539k
  } else {
42
539k
    assert(Comment.size() >= 4);
43
539k
44
539k
    // Comment lexer does not understand escapes in comment markers, so pretend
45
539k
    // that this is not a comment.
46
539k
    if (Comment[1] != '*' ||
47
539k
        Comment[Comment.size() - 2] != '*' ||
48
539k
        
Comment[Comment.size() - 1] != '/'539k
)
49
24
      return std::make_pair(RawComment::RCK_Invalid, false);
50
539k
51
539k
    if (Comment[2] == '*')
52
53.9k
      K = RawComment::RCK_JavaDoc;
53
485k
    else if (Comment[2] == '!')
54
7.31k
      K = RawComment::RCK_Qt;
55
478k
    else
56
478k
      return std::make_pair(RawComment::RCK_OrdinaryC, false);
57
300k
  }
58
300k
  const bool TrailingComment = (Comment.size() > 3) && 
(Comment[3] == '<')298k
;
59
300k
  return std::make_pair(K, TrailingComment);
60
300k
}
61
62
91.5k
bool mergedCommentIsTrailingComment(StringRef Comment) {
63
91.5k
  return (Comment.size() > 3) && (Comment[3] == '<');
64
91.5k
}
65
66
/// Returns true if R1 and R2 both have valid locations that start on the same
67
/// column.
68
bool commentsStartOnSameColumn(const SourceManager &SM, const RawComment &R1,
69
8
                               const RawComment &R2) {
70
8
  SourceLocation L1 = R1.getBeginLoc();
71
8
  SourceLocation L2 = R2.getBeginLoc();
72
8
  bool Invalid = false;
73
8
  unsigned C1 = SM.getPresumedColumnNumber(L1, &Invalid);
74
8
  if (!Invalid) {
75
8
    unsigned C2 = SM.getPresumedColumnNumber(L2, &Invalid);
76
8
    return !Invalid && (C1 == C2);
77
8
  }
78
0
  return false;
79
0
}
80
} // unnamed namespace
81
82
/// Determines whether there is only whitespace in `Buffer` between `P`
83
/// and the previous line.
84
/// \param Buffer The buffer to search in.
85
/// \param P The offset from the beginning of `Buffer` to start from.
86
/// \return true if all of the characters in `Buffer` ranging from the closest
87
/// line-ending character before `P` (or the beginning of `Buffer`) to `P - 1`
88
/// are whitespace.
89
188
static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P) {
90
188
  // Search backwards until we see linefeed or carriage return.
91
316
  for (unsigned I = P; I != 0; 
--I128
) {
92
316
    char C = Buffer[I - 1];
93
316
    if (isVerticalWhitespace(C))
94
166
      return true;
95
150
    if (!isHorizontalWhitespace(C))
96
22
      return false;
97
150
  }
98
188
  // We hit the beginning of the buffer.
99
188
  
return true0
;
100
188
}
101
102
/// Returns whether `K` is an ordinary comment kind.
103
2.06k
static bool isOrdinaryKind(RawComment::CommentKind K) {
104
2.06k
  return (K == RawComment::RCK_OrdinaryBCPL) ||
105
2.06k
         
(K == RawComment::RCK_OrdinaryC)1.87k
;
106
2.06k
}
107
108
RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR,
109
                       const CommentOptions &CommentOpts, bool Merged) :
110
    Range(SR), RawTextValid(false), BriefTextValid(false),
111
    IsAttached(false), IsTrailingComment(false),
112
4.99M
    IsAlmostTrailingComment(false) {
113
4.99M
  // Extract raw comment text, if possible.
114
4.99M
  if (SR.getBegin() == SR.getEnd() || 
getRawText(SourceMgr).empty()4.99M
) {
115
1.84k
    Kind = RCK_Invalid;
116
1.84k
    return;
117
1.84k
  }
118
4.99M
119
4.99M
  // Guess comment kind.
120
4.99M
  std::pair<CommentKind, bool> K =
121
4.99M
      getCommentKind(RawText, CommentOpts.ParseAllComments);
122
4.99M
123
4.99M
  // Guess whether an ordinary comment is trailing.
124
4.99M
  if (CommentOpts.ParseAllComments && 
isOrdinaryKind(K.first)224
) {
125
190
    FileID BeginFileID;
126
190
    unsigned BeginOffset;
127
190
    std::tie(BeginFileID, BeginOffset) =
128
190
        SourceMgr.getDecomposedLoc(Range.getBegin());
129
190
    if (BeginOffset != 0) {
130
188
      bool Invalid = false;
131
188
      const char *Buffer =
132
188
          SourceMgr.getBufferData(BeginFileID, &Invalid).data();
133
188
      IsTrailingComment |=
134
188
          (!Invalid && !onlyWhitespaceOnLineBefore(Buffer, BeginOffset));
135
188
    }
136
190
  }
137
4.99M
138
4.99M
  if (!Merged) {
139
4.89M
    Kind = K.first;
140
4.89M
    IsTrailingComment |= K.second;
141
4.89M
142
4.89M
    IsAlmostTrailingComment = RawText.startswith("//<") ||
143
4.89M
                                 
RawText.startswith("/*<")4.89M
;
144
4.89M
  } else {
145
91.5k
    Kind = RCK_Merged;
146
91.5k
    IsTrailingComment =
147
91.5k
        IsTrailingComment || 
mergedCommentIsTrailingComment(RawText)91.5k
;
148
91.5k
  }
149
4.99M
}
150
151
4.99M
StringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const {
152
4.99M
  FileID BeginFileID;
153
4.99M
  FileID EndFileID;
154
4.99M
  unsigned BeginOffset;
155
4.99M
  unsigned EndOffset;
156
4.99M
157
4.99M
  std::tie(BeginFileID, BeginOffset) =
158
4.99M
      SourceMgr.getDecomposedLoc(Range.getBegin());
159
4.99M
  std::tie(EndFileID, EndOffset) = SourceMgr.getDecomposedLoc(Range.getEnd());
160
4.99M
161
4.99M
  const unsigned Length = EndOffset - BeginOffset;
162
4.99M
  if (Length < 2)
163
2
    return StringRef();
164
4.99M
165
4.99M
  // The comment can't begin in one file and end in another.
166
4.99M
  assert(BeginFileID == EndFileID);
167
4.99M
168
4.99M
  bool Invalid = false;
169
4.99M
  const char *BufferStart = SourceMgr.getBufferData(BeginFileID,
170
4.99M
                                                    &Invalid).data();
171
4.99M
  if (Invalid)
172
0
    return StringRef();
173
4.99M
174
4.99M
  return StringRef(BufferStart + BeginOffset, Length);
175
4.99M
}
176
177
859
const char *RawComment::extractBriefText(const ASTContext &Context) const {
178
859
  // Lazily initialize RawText using the accessor before using it.
179
859
  (void)getRawText(Context.getSourceManager());
180
859
181
859
  // Since we will be copying the resulting text, all allocations made during
182
859
  // parsing are garbage after resulting string is formed.  Thus we can use
183
859
  // a separate allocator for all temporary stuff.
184
859
  llvm::BumpPtrAllocator Allocator;
185
859
186
859
  comments::Lexer L(Allocator, Context.getDiagnostics(),
187
859
                    Context.getCommentCommandTraits(),
188
859
                    Range.getBegin(),
189
859
                    RawText.begin(), RawText.end());
190
859
  comments::BriefParser P(L, Context.getCommentCommandTraits());
191
859
192
859
  const std::string Result = P.Parse();
193
859
  const unsigned BriefTextLength = Result.size();
194
859
  char *BriefTextPtr = new (Context) char[BriefTextLength + 1];
195
859
  memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1);
196
859
  BriefText = BriefTextPtr;
197
859
  BriefTextValid = true;
198
859
199
859
  return BriefTextPtr;
200
859
}
201
202
comments::FullComment *RawComment::parse(const ASTContext &Context,
203
                                         const Preprocessor *PP,
204
1.18k
                                         const Decl *D) const {
205
1.18k
  // Lazily initialize RawText using the accessor before using it.
206
1.18k
  (void)getRawText(Context.getSourceManager());
207
1.18k
208
1.18k
  comments::Lexer L(Context.getAllocator(), Context.getDiagnostics(),
209
1.18k
                    Context.getCommentCommandTraits(),
210
1.18k
                    getSourceRange().getBegin(),
211
1.18k
                    RawText.begin(), RawText.end());
212
1.18k
  comments::Sema S(Context.getAllocator(), Context.getSourceManager(),
213
1.18k
                   Context.getDiagnostics(),
214
1.18k
                   Context.getCommentCommandTraits(),
215
1.18k
                   PP);
216
1.18k
  S.setDecl(D);
217
1.18k
  comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(),
218
1.18k
                     Context.getDiagnostics(),
219
1.18k
                     Context.getCommentCommandTraits());
220
1.18k
221
1.18k
  return P.parseFullComment();
222
1.18k
}
223
224
static bool onlyWhitespaceBetween(SourceManager &SM,
225
                                  SourceLocation Loc1, SourceLocation Loc2,
226
199k
                                  unsigned MaxNewlinesAllowed) {
227
199k
  std::pair<FileID, unsigned> Loc1Info = SM.getDecomposedLoc(Loc1);
228
199k
  std::pair<FileID, unsigned> Loc2Info = SM.getDecomposedLoc(Loc2);
229
199k
230
199k
  // Question does not make sense if locations are in different files.
231
199k
  if (Loc1Info.first != Loc2Info.first)
232
7.87k
    return false;
233
191k
234
191k
  bool Invalid = false;
235
191k
  const char *Buffer = SM.getBufferData(Loc1Info.first, &Invalid).data();
236
191k
  if (Invalid)
237
0
    return false;
238
191k
239
191k
  unsigned NumNewlines = 0;
240
191k
  assert(Loc1Info.second <= Loc2Info.second && "Loc1 after Loc2!");
241
191k
  // Look for non-whitespace characters and remember any newlines seen.
242
478k
  for (unsigned I = Loc1Info.second; I != Loc2Info.second; 
++I287k
) {
243
387k
    switch (Buffer[I]) {
244
387k
    default:
245
83.7k
      return false;
246
387k
    case ' ':
247
96.2k
    case '\t':
248
96.2k
    case '\f':
249
96.2k
    case '\v':
250
96.2k
      break;
251
207k
    case '\r':
252
207k
    case '\n':
253
207k
      ++NumNewlines;
254
207k
255
207k
      // Check if we have found more than the maximum allowed number of
256
207k
      // newlines.
257
207k
      if (NumNewlines > MaxNewlinesAllowed)
258
16.2k
        return false;
259
190k
260
190k
      // Collapse \r\n and \n\r into a single newline.
261
190k
      if (I + 1 != Loc2Info.second &&
262
190k
          
(104k
Buffer[I + 1] == '\n'104k
||
Buffer[I + 1] == '\r'88.3k
) &&
263
190k
          
Buffer[I] != Buffer[I + 1]16.0k
)
264
114
        ++I;
265
190k
      break;
266
387k
    }
267
387k
  }
268
191k
269
191k
  
return true91.5k
;
270
191k
}
271
272
void RawCommentList::addComment(const RawComment &RC,
273
                                const CommentOptions &CommentOpts,
274
4.89M
                                llvm::BumpPtrAllocator &Allocator) {
275
4.89M
  if (RC.isInvalid())
276
351k
    return;
277
4.54M
278
4.54M
  // Check if the comments are not in source order.
279
4.54M
  
while (4.54M
!Comments.empty() &&
280
4.54M
         !SourceMgr.isBeforeInTranslationUnit(Comments.back()->getBeginLoc(),
281
2.27M
                                              RC.getBeginLoc())) {
282
4
    // If they are, just pop a few last comments that don't fit.
283
4
    // This happens if an \#include directive contains comments.
284
4
    Comments.pop_back();
285
4
  }
286
4.54M
287
4.54M
  // Ordinary comments are not interesting for us.
288
4.54M
  if (RC.isOrdinary() && 
!CommentOpts.ParseAllComments4.33M
)
289
4.33M
    return;
290
209k
291
209k
  // If this is the first Doxygen comment, save it (because there isn't
292
209k
  // anything to merge it with).
293
209k
  if (Comments.empty()) {
294
6.57k
    Comments.push_back(new (Allocator) RawComment(RC));
295
6.57k
    return;
296
6.57k
  }
297
203k
298
203k
  const RawComment &C1 = *Comments.back();
299
203k
  const RawComment &C2 = RC;
300
203k
301
203k
  // Merge comments only if there is only whitespace between them.
302
203k
  // Can't merge trailing and non-trailing comments unless the second is
303
203k
  // non-trailing ordinary in the same column, as in the case:
304
203k
  //   int x; // documents x
305
203k
  //          // more text
306
203k
  // versus:
307
203k
  //   int x; // documents x
308
203k
  //   int y; // documents y
309
203k
  // or:
310
203k
  //   int x; // documents x
311
203k
  //   // documents y
312
203k
  //   int y;
313
203k
  // Merge comments if they are on same or consecutive lines.
314
203k
  if ((C1.isTrailingComment() == C2.isTrailingComment() ||
315
203k
       
(3.67k
C1.isTrailingComment()3.67k
&&
!C2.isTrailingComment()1.83k
&&
316
3.67k
        
isOrdinaryKind(C2.getKind())1.83k
&&
317
3.67k
        
commentsStartOnSameColumn(SourceMgr, C1, C2)8
)) &&
318
203k
      onlyWhitespaceBetween(SourceMgr, C1.getEndLoc(), C2.getBeginLoc(),
319
199k
                            /*MaxNewlinesAllowed=*/1)) {
320
91.5k
    SourceRange MergedRange(C1.getBeginLoc(), C2.getEndLoc());
321
91.5k
    *Comments.back() = RawComment(SourceMgr, MergedRange, CommentOpts, true);
322
111k
  } else {
323
111k
    Comments.push_back(new (Allocator) RawComment(RC));
324
111k
  }
325
203k
}
326
327
155
void RawCommentList::addDeserializedComments(ArrayRef<RawComment *> DeserializedComments) {
328
155
  std::vector<RawComment *> MergedComments;
329
155
  MergedComments.reserve(Comments.size() + DeserializedComments.size());
330
155
331
155
  std::merge(Comments.begin(), Comments.end(),
332
155
             DeserializedComments.begin(), DeserializedComments.end(),
333
155
             std::back_inserter(MergedComments),
334
155
             BeforeThanCompare<RawComment>(SourceMgr));
335
155
  std::swap(Comments, MergedComments);
336
155
}
337
338
std::string RawComment::getFormattedText(const SourceManager &SourceMgr,
339
6
                                         DiagnosticsEngine &Diags) const {
340
6
  llvm::StringRef CommentText = getRawText(SourceMgr);
341
6
  if (CommentText.empty())
342
0
    return "";
343
6
344
6
  llvm::BumpPtrAllocator Allocator;
345
6
  // We do not parse any commands, so CommentOptions are ignored by
346
6
  // comments::Lexer. Therefore, we just use default-constructed options.
347
6
  CommentOptions DefOpts;
348
6
  comments::CommandTraits EmptyTraits(Allocator, DefOpts);
349
6
  comments::Lexer L(Allocator, Diags, EmptyTraits, getSourceRange().getBegin(),
350
6
                    CommentText.begin(), CommentText.end(),
351
6
                    /*ParseCommands=*/false);
352
6
353
6
  std::string Result;
354
6
  // A column number of the first non-whitespace token in the comment text.
355
6
  // We skip whitespace up to this column, but keep the whitespace after this
356
6
  // column. IndentColumn is calculated when lexing the first line and reused
357
6
  // for the rest of lines.
358
6
  unsigned IndentColumn = 0;
359
6
360
6
  // Processes one line of the comment and adds it to the result.
361
6
  // Handles skipping the indent at the start of the line.
362
6
  // Returns false when eof is reached and true otherwise.
363
36
  auto LexLine = [&](bool IsFirstLine) -> bool {
364
36
    comments::Token Tok;
365
36
    // Lex the first token on the line. We handle it separately, because we to
366
36
    // fix up its indentation.
367
36
    L.lex(Tok);
368
36
    if (Tok.is(comments::tok::eof))
369
6
      return false;
370
30
    if (Tok.is(comments::tok::newline)) {
371
2
      Result += "\n";
372
2
      return true;
373
2
    }
374
28
    llvm::StringRef TokText = L.getSpelling(Tok, SourceMgr);
375
28
    bool LocInvalid = false;
376
28
    unsigned TokColumn =
377
28
        SourceMgr.getSpellingColumnNumber(Tok.getLocation(), &LocInvalid);
378
28
    assert(!LocInvalid && "getFormattedText for invalid location");
379
28
380
28
    // Amount of leading whitespace in TokText.
381
28
    size_t WhitespaceLen = TokText.find_first_not_of(" \t");
382
28
    if (WhitespaceLen == StringRef::npos)
383
0
      WhitespaceLen = TokText.size();
384
28
    // Remember the amount of whitespace we skipped in the first line to remove
385
28
    // indent up to that column in the following lines.
386
28
    if (IsFirstLine)
387
6
      IndentColumn = TokColumn + WhitespaceLen;
388
28
389
28
    // Amount of leading whitespace we actually want to skip.
390
28
    // For the first line we skip all the whitespace.
391
28
    // For the rest of the lines, we skip whitespace up to IndentColumn.
392
28
    unsigned SkipLen =
393
28
        IsFirstLine
394
28
            ? 
WhitespaceLen6
395
28
            : std::min<size_t>(
396
22
                  WhitespaceLen,
397
22
                  std::max<int>(static_cast<int>(IndentColumn) - TokColumn, 0));
398
28
    llvm::StringRef Trimmed = TokText.drop_front(SkipLen);
399
28
    Result += Trimmed;
400
28
    // Lex all tokens in the rest of the line.
401
28
    for (L.lex(Tok); Tok.isNot(comments::tok::eof); 
L.lex(Tok)0
) {
402
28
      if (Tok.is(comments::tok::newline)) {
403
28
        Result += "\n";
404
28
        return true;
405
28
      }
406
0
      Result += L.getSpelling(Tok, SourceMgr);
407
0
    }
408
28
    // We've reached the end of file token.
409
28
    
return false0
;
410
28
  };
411
6
412
6
  auto DropTrailingNewLines = [](std::string &Str) {
413
14
    while (Str.back() == '\n')
414
8
      Str.pop_back();
415
6
  };
416
6
417
6
  // Process first line separately to remember indent for the following lines.
418
6
  if (!LexLine(/*IsFirstLine=*/true)) {
419
0
    DropTrailingNewLines(Result);
420
0
    return Result;
421
0
  }
422
6
  // Process the rest of the lines.
423
30
  
while (6
LexLine(/*IsFirstLine=*/false))
424
24
    ;
425
6
  DropTrailingNewLines(Result);
426
6
  return Result;
427
6
}