/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/tools/clang/lib/AST/RawCommentList.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- RawCommentList.cpp - Processing raw comments -----------*- C++ -*-===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | |
10 | | #include "clang/AST/RawCommentList.h" |
11 | | #include "clang/AST/ASTContext.h" |
12 | | #include "clang/AST/Comment.h" |
13 | | #include "clang/AST/CommentBriefParser.h" |
14 | | #include "clang/AST/CommentCommandTraits.h" |
15 | | #include "clang/AST/CommentLexer.h" |
16 | | #include "clang/AST/CommentParser.h" |
17 | | #include "clang/AST/CommentSema.h" |
18 | | #include "clang/Basic/CharInfo.h" |
19 | | #include "llvm/ADT/STLExtras.h" |
20 | | |
21 | | using namespace clang; |
22 | | |
23 | | namespace { |
24 | | /// Get comment kind and bool describing if it is a trailing comment. |
25 | | std::pair<RawComment::CommentKind, bool> getCommentKind(StringRef Comment, |
26 | 3.05M | bool ParseAllComments) { |
27 | 3.05M | const size_t MinCommentLength = ParseAllComments ? 2224 : 33.05M ; |
28 | 3.05M | if ((Comment.size() < MinCommentLength) || 3.05M Comment[0] != '/'2.96M ) |
29 | 95.4k | return std::make_pair(RawComment::RCK_Invalid, false); |
30 | 2.96M | |
31 | 2.96M | RawComment::CommentKind K; |
32 | 2.96M | if (Comment[1] == '/'2.96M ) { |
33 | 1.84M | if (Comment.size() < 3) |
34 | 2 | return std::make_pair(RawComment::RCK_OrdinaryBCPL, false); |
35 | 1.84M | |
36 | 1.84M | if (1.84M Comment[2] == '/'1.84M ) |
37 | 188k | K = RawComment::RCK_BCPLSlash; |
38 | 1.65M | else if (1.65M Comment[2] == '!'1.65M ) |
39 | 53.3k | K = RawComment::RCK_BCPLExcl; |
40 | 1.65M | else |
41 | 1.60M | return std::make_pair(RawComment::RCK_OrdinaryBCPL, false); |
42 | 1.11M | } else { |
43 | 1.11M | assert(Comment.size() >= 4); |
44 | 1.11M | |
45 | 1.11M | // Comment lexer does not understand escapes in comment markers, so pretend |
46 | 1.11M | // that this is not a comment. |
47 | 1.11M | if (Comment[1] != '*' || |
48 | 1.11M | Comment[Comment.size() - 2] != '*' || |
49 | 1.11M | Comment[Comment.size() - 1] != '/') |
50 | 21 | return std::make_pair(RawComment::RCK_Invalid, false); |
51 | 1.11M | |
52 | 1.11M | if (1.11M Comment[2] == '*'1.11M ) |
53 | 109k | K = RawComment::RCK_JavaDoc; |
54 | 1.00M | else if (1.00M Comment[2] == '!'1.00M ) |
55 | 10.3k | K = RawComment::RCK_Qt; |
56 | 1.00M | else |
57 | 994k | return std::make_pair(RawComment::RCK_OrdinaryC, false); |
58 | 362k | } |
59 | 362k | const bool TrailingComment = (Comment.size() > 3) && 362k (Comment[3] == '<')361k ; |
60 | 3.05M | return std::make_pair(K, TrailingComment); |
61 | 3.05M | } |
62 | | |
63 | 83.5k | bool mergedCommentIsTrailingComment(StringRef Comment) { |
64 | 83.5k | return (Comment.size() > 3) && (Comment[3] == '<'); |
65 | 83.5k | } |
66 | | |
67 | | /// Returns true if R1 and R2 both have valid locations that start on the same |
68 | | /// column. |
69 | | bool commentsStartOnSameColumn(const SourceManager &SM, const RawComment &R1, |
70 | 8 | const RawComment &R2) { |
71 | 8 | SourceLocation L1 = R1.getLocStart(); |
72 | 8 | SourceLocation L2 = R2.getLocStart(); |
73 | 8 | bool Invalid = false; |
74 | 8 | unsigned C1 = SM.getPresumedColumnNumber(L1, &Invalid); |
75 | 8 | if (!Invalid8 ) { |
76 | 8 | unsigned C2 = SM.getPresumedColumnNumber(L2, &Invalid); |
77 | 8 | return !Invalid && (C1 == C2); |
78 | 8 | } |
79 | 0 | return false; |
80 | 0 | } |
81 | | } // unnamed namespace |
82 | | |
83 | | /// \brief Determines whether there is only whitespace in `Buffer` between `P` |
84 | | /// and the previous line. |
85 | | /// \param Buffer The buffer to search in. |
86 | | /// \param P The offset from the beginning of `Buffer` to start from. |
87 | | /// \return true if all of the characters in `Buffer` ranging from the closest |
88 | | /// line-ending character before `P` (or the beginning of `Buffer`) to `P - 1` |
89 | | /// are whitespace. |
90 | 188 | static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P) { |
91 | 188 | // Search backwards until we see linefeed or carriage return. |
92 | 316 | for (unsigned I = P; I != 0316 ; --I128 ) { |
93 | 316 | char C = Buffer[I - 1]; |
94 | 316 | if (isVerticalWhitespace(C)) |
95 | 166 | return true; |
96 | 150 | if (150 !isHorizontalWhitespace(C)150 ) |
97 | 22 | return false; |
98 | 316 | } |
99 | 188 | // We hit the beginning of the buffer. |
100 | 0 | return true; |
101 | 188 | } |
102 | | |
103 | | /// Returns whether `K` is an ordinary comment kind. |
104 | 3.48k | static bool isOrdinaryKind(RawComment::CommentKind K) { |
105 | 3.48k | return (K == RawComment::RCK_OrdinaryBCPL) || |
106 | 3.29k | (K == RawComment::RCK_OrdinaryC); |
107 | 3.48k | } |
108 | | |
109 | | RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR, |
110 | | bool Merged, bool ParseAllComments) : |
111 | | Range(SR), RawTextValid(false), BriefTextValid(false), |
112 | | IsAttached(false), IsTrailingComment(false), IsAlmostTrailingComment(false), |
113 | 3.05M | ParseAllComments(ParseAllComments) { |
114 | 3.05M | // Extract raw comment text, if possible. |
115 | 3.05M | if (SR.getBegin() == SR.getEnd() || 3.05M getRawText(SourceMgr).empty()3.05M ) { |
116 | 1.83k | Kind = RCK_Invalid; |
117 | 1.83k | return; |
118 | 1.83k | } |
119 | 3.05M | |
120 | 3.05M | // Guess comment kind. |
121 | 3.05M | std::pair<CommentKind, bool> K = getCommentKind(RawText, ParseAllComments); |
122 | 3.05M | |
123 | 3.05M | // Guess whether an ordinary comment is trailing. |
124 | 3.05M | if (ParseAllComments && 3.05M isOrdinaryKind(K.first)224 ) { |
125 | 190 | FileID BeginFileID; |
126 | 190 | unsigned BeginOffset; |
127 | 190 | std::tie(BeginFileID, BeginOffset) = |
128 | 190 | SourceMgr.getDecomposedLoc(Range.getBegin()); |
129 | 190 | if (BeginOffset != 0190 ) { |
130 | 188 | bool Invalid = false; |
131 | 188 | const char *Buffer = |
132 | 188 | SourceMgr.getBufferData(BeginFileID, &Invalid).data(); |
133 | 188 | IsTrailingComment |= |
134 | 188 | (!Invalid && !onlyWhitespaceOnLineBefore(Buffer, BeginOffset)); |
135 | 188 | } |
136 | 190 | } |
137 | 3.05M | |
138 | 3.05M | if (!Merged3.05M ) { |
139 | 2.97M | Kind = K.first; |
140 | 2.97M | IsTrailingComment |= K.second; |
141 | 2.97M | |
142 | 2.97M | IsAlmostTrailingComment = RawText.startswith("//<") || |
143 | 2.97M | RawText.startswith("/*<"); |
144 | 3.05M | } else { |
145 | 83.5k | Kind = RCK_Merged; |
146 | 83.5k | IsTrailingComment = |
147 | 83.5k | IsTrailingComment || mergedCommentIsTrailingComment(RawText); |
148 | 83.5k | } |
149 | 3.05M | } |
150 | | |
151 | 3.05M | StringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const { |
152 | 3.05M | FileID BeginFileID; |
153 | 3.05M | FileID EndFileID; |
154 | 3.05M | unsigned BeginOffset; |
155 | 3.05M | unsigned EndOffset; |
156 | 3.05M | |
157 | 3.05M | std::tie(BeginFileID, BeginOffset) = |
158 | 3.05M | SourceMgr.getDecomposedLoc(Range.getBegin()); |
159 | 3.05M | std::tie(EndFileID, EndOffset) = SourceMgr.getDecomposedLoc(Range.getEnd()); |
160 | 3.05M | |
161 | 3.05M | const unsigned Length = EndOffset - BeginOffset; |
162 | 3.05M | if (Length < 2) |
163 | 2 | return StringRef(); |
164 | 3.05M | |
165 | 3.05M | // The comment can't begin in one file and end in another. |
166 | 3.05M | assert(BeginFileID == EndFileID); |
167 | 3.05M | |
168 | 3.05M | bool Invalid = false; |
169 | 3.05M | const char *BufferStart = SourceMgr.getBufferData(BeginFileID, |
170 | 3.05M | &Invalid).data(); |
171 | 3.05M | if (Invalid) |
172 | 0 | return StringRef(); |
173 | 3.05M | |
174 | 3.05M | return StringRef(BufferStart + BeginOffset, Length); |
175 | 3.05M | } |
176 | | |
177 | 856 | const char *RawComment::extractBriefText(const ASTContext &Context) const { |
178 | 856 | // Lazily initialize RawText using the accessor before using it. |
179 | 856 | (void)getRawText(Context.getSourceManager()); |
180 | 856 | |
181 | 856 | // Since we will be copying the resulting text, all allocations made during |
182 | 856 | // parsing are garbage after resulting string is formed. Thus we can use |
183 | 856 | // a separate allocator for all temporary stuff. |
184 | 856 | llvm::BumpPtrAllocator Allocator; |
185 | 856 | |
186 | 856 | comments::Lexer L(Allocator, Context.getDiagnostics(), |
187 | 856 | Context.getCommentCommandTraits(), |
188 | 856 | Range.getBegin(), |
189 | 856 | RawText.begin(), RawText.end()); |
190 | 856 | comments::BriefParser P(L, Context.getCommentCommandTraits()); |
191 | 856 | |
192 | 856 | const std::string Result = P.Parse(); |
193 | 856 | const unsigned BriefTextLength = Result.size(); |
194 | 856 | char *BriefTextPtr = new (Context) char[BriefTextLength + 1]; |
195 | 856 | memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1); |
196 | 856 | BriefText = BriefTextPtr; |
197 | 856 | BriefTextValid = true; |
198 | 856 | |
199 | 856 | return BriefTextPtr; |
200 | 856 | } |
201 | | |
202 | | comments::FullComment *RawComment::parse(const ASTContext &Context, |
203 | | const Preprocessor *PP, |
204 | 1.15k | const Decl *D) const { |
205 | 1.15k | // Lazily initialize RawText using the accessor before using it. |
206 | 1.15k | (void)getRawText(Context.getSourceManager()); |
207 | 1.15k | |
208 | 1.15k | comments::Lexer L(Context.getAllocator(), Context.getDiagnostics(), |
209 | 1.15k | Context.getCommentCommandTraits(), |
210 | 1.15k | getSourceRange().getBegin(), |
211 | 1.15k | RawText.begin(), RawText.end()); |
212 | 1.15k | comments::Sema S(Context.getAllocator(), Context.getSourceManager(), |
213 | 1.15k | Context.getDiagnostics(), |
214 | 1.15k | Context.getCommentCommandTraits(), |
215 | 1.15k | PP); |
216 | 1.15k | S.setDecl(D); |
217 | 1.15k | comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(), |
218 | 1.15k | Context.getDiagnostics(), |
219 | 1.15k | Context.getCommentCommandTraits()); |
220 | 1.15k | |
221 | 1.15k | return P.parseFullComment(); |
222 | 1.15k | } |
223 | | |
224 | | static bool onlyWhitespaceBetween(SourceManager &SM, |
225 | | SourceLocation Loc1, SourceLocation Loc2, |
226 | 266k | unsigned MaxNewlinesAllowed) { |
227 | 266k | std::pair<FileID, unsigned> Loc1Info = SM.getDecomposedLoc(Loc1); |
228 | 266k | std::pair<FileID, unsigned> Loc2Info = SM.getDecomposedLoc(Loc2); |
229 | 266k | |
230 | 266k | // Question does not make sense if locations are in different files. |
231 | 266k | if (Loc1Info.first != Loc2Info.first) |
232 | 13.0k | return false; |
233 | 253k | |
234 | 253k | bool Invalid = false; |
235 | 253k | const char *Buffer = SM.getBufferData(Loc1Info.first, &Invalid).data(); |
236 | 253k | if (Invalid) |
237 | 0 | return false; |
238 | 253k | |
239 | 253k | unsigned NumNewlines = 0; |
240 | 253k | assert(Loc1Info.second <= Loc2Info.second && "Loc1 after Loc2!"); |
241 | 253k | // Look for non-whitespace characters and remember any newlines seen. |
242 | 666k | for (unsigned I = Loc1Info.second; I != Loc2Info.second666k ; ++I413k ) { |
243 | 583k | switch (Buffer[I]) { |
244 | 138k | default: |
245 | 138k | return false; |
246 | 161k | case ' ': |
247 | 161k | case '\t': |
248 | 161k | case '\f': |
249 | 161k | case '\v': |
250 | 161k | break; |
251 | 283k | case '\r': |
252 | 283k | case '\n': |
253 | 283k | ++NumNewlines; |
254 | 283k | |
255 | 283k | // Check if we have found more than the maximum allowed number of |
256 | 283k | // newlines. |
257 | 283k | if (NumNewlines > MaxNewlinesAllowed) |
258 | 31.4k | return false; |
259 | 252k | |
260 | 252k | // Collapse \r\n and \n\r into a single newline. |
261 | 252k | if (252k I + 1 != Loc2Info.second && |
262 | 173k | (Buffer[I + 1] == '\n' || 173k Buffer[I + 1] == '\r'142k ) && |
263 | 30.9k | Buffer[I] != Buffer[I + 1]) |
264 | 226 | ++I; |
265 | 161k | break; |
266 | 583k | } |
267 | 583k | } |
268 | 253k | |
269 | 83.5k | return true; |
270 | 266k | } |
271 | | |
272 | | void RawCommentList::addComment(const RawComment &RC, |
273 | 2.97M | llvm::BumpPtrAllocator &Allocator) { |
274 | 2.97M | if (RC.isInvalid()) |
275 | 95.4k | return; |
276 | 2.87M | |
277 | 2.87M | // Check if the comments are not in source order. |
278 | 2.87M | while (2.87M !Comments.empty() && |
279 | 1.29M | !SourceMgr.isBeforeInTranslationUnit(Comments.back()->getLocStart(), |
280 | 2.87M | RC.getLocStart())) { |
281 | 4 | // If they are, just pop a few last comments that don't fit. |
282 | 4 | // This happens if an \#include directive contains comments. |
283 | 4 | Comments.pop_back(); |
284 | 4 | } |
285 | 2.87M | |
286 | 2.87M | // Ordinary comments are not interesting for us. |
287 | 2.87M | if (RC.isOrdinary()) |
288 | 2.59M | return; |
289 | 278k | |
290 | 278k | // If this is the first Doxygen comment, save it (because there isn't |
291 | 278k | // anything to merge it with). |
292 | 278k | if (278k Comments.empty()278k ) { |
293 | 6.04k | Comments.push_back(new (Allocator) RawComment(RC)); |
294 | 6.04k | return; |
295 | 6.04k | } |
296 | 272k | |
297 | 272k | const RawComment &C1 = *Comments.back(); |
298 | 272k | const RawComment &C2 = RC; |
299 | 272k | |
300 | 272k | // Merge comments only if there is only whitespace between them. |
301 | 272k | // Can't merge trailing and non-trailing comments unless the second is |
302 | 272k | // non-trailing ordinary in the same column, as in the case: |
303 | 272k | // int x; // documents x |
304 | 272k | // // more text |
305 | 272k | // versus: |
306 | 272k | // int x; // documents x |
307 | 272k | // int y; // documents y |
308 | 272k | // or: |
309 | 272k | // int x; // documents x |
310 | 272k | // // documents y |
311 | 272k | // int y; |
312 | 272k | // Merge comments if they are on same or consecutive lines. |
313 | 272k | if ((C1.isTrailingComment() == C2.isTrailingComment() || |
314 | 6.52k | (C1.isTrailingComment() && 6.52k !C2.isTrailingComment()3.26k && |
315 | 3.26k | isOrdinaryKind(C2.getKind()) && |
316 | 6.52k | commentsStartOnSameColumn(SourceMgr, C1, C2))) && |
317 | 266k | onlyWhitespaceBetween(SourceMgr, C1.getLocEnd(), C2.getLocStart(), |
318 | 272k | /*MaxNewlinesAllowed=*/1)) { |
319 | 83.5k | SourceRange MergedRange(C1.getLocStart(), C2.getLocEnd()); |
320 | 83.5k | *Comments.back() = RawComment(SourceMgr, MergedRange, true, |
321 | 83.5k | RC.isParseAllComments()); |
322 | 272k | } else { |
323 | 189k | Comments.push_back(new (Allocator) RawComment(RC)); |
324 | 189k | } |
325 | 2.97M | } |
326 | | |
327 | 159 | void RawCommentList::addDeserializedComments(ArrayRef<RawComment *> DeserializedComments) { |
328 | 159 | std::vector<RawComment *> MergedComments; |
329 | 159 | MergedComments.reserve(Comments.size() + DeserializedComments.size()); |
330 | 159 | |
331 | 159 | std::merge(Comments.begin(), Comments.end(), |
332 | 159 | DeserializedComments.begin(), DeserializedComments.end(), |
333 | 159 | std::back_inserter(MergedComments), |
334 | 159 | BeforeThanCompare<RawComment>(SourceMgr)); |
335 | 159 | std::swap(Comments, MergedComments); |
336 | 159 | } |