Coverage Report

Created: 2022-05-17 06:19

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/include/clang/AST/CommentLexer.h
Line
Count
Source (jump to first uncovered line)
1
//===--- CommentLexer.h - Lexer for structured comments ---------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
//  This file defines lexer for structured comments and supporting token class.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#ifndef LLVM_CLANG_AST_COMMENTLEXER_H
14
#define LLVM_CLANG_AST_COMMENTLEXER_H
15
16
#include "clang/Basic/Diagnostic.h"
17
#include "clang/Basic/SourceManager.h"
18
#include "llvm/ADT/SmallString.h"
19
#include "llvm/ADT/StringRef.h"
20
#include "llvm/Support/Allocator.h"
21
#include "llvm/Support/raw_ostream.h"
22
23
namespace clang {
24
namespace comments {
25
26
class Lexer;
27
class TextTokenRetokenizer;
28
struct CommandInfo;
29
class CommandTraits;
30
31
namespace tok {
32
enum TokenKind {
33
  eof,
34
  newline,
35
  text,
36
  unknown_command,   // Command that does not have an ID.
37
  backslash_command, // Command with an ID, that used backslash marker.
38
  at_command,        // Command with an ID, that used 'at' marker.
39
  verbatim_block_begin,
40
  verbatim_block_line,
41
  verbatim_block_end,
42
  verbatim_line_name,
43
  verbatim_line_text,
44
  html_start_tag,     // <tag
45
  html_ident,         // attr
46
  html_equals,        // =
47
  html_quoted_string, // "blah\"blah" or 'blah\'blah'
48
  html_greater,       // >
49
  html_slash_greater, // />
50
  html_end_tag        // </tag
51
};
52
} // end namespace tok
53
54
/// Comment token.
55
class Token {
56
  friend class Lexer;
57
  friend class TextTokenRetokenizer;
58
59
  /// The location of the token.
60
  SourceLocation Loc;
61
62
  /// The actual kind of the token.
63
  tok::TokenKind Kind;
64
65
  /// Integer value associated with a token.
66
  ///
67
  /// If the token is a known command, contains command ID and TextPtr is
68
  /// unused (command spelling can be found with CommandTraits).  Otherwise,
69
  /// contains the length of the string that starts at TextPtr.
70
  unsigned IntVal;
71
72
  /// Length of the token spelling in comment.  Can be 0 for synthenized
73
  /// tokens.
74
  unsigned Length;
75
76
  /// Contains text value associated with a token.
77
  const char *TextPtr;
78
79
public:
80
64.8k
  SourceLocation getLocation() const LLVM_READONLY { return Loc; }
81
95.4k
  void setLocation(SourceLocation SL) { Loc = SL; }
82
83
48.2k
  SourceLocation getEndLocation() const LLVM_READONLY {
84
48.2k
    if (Length == 0 || Length == 1)
85
9.69k
      return Loc;
86
38.5k
    return Loc.getLocWithOffset(Length - 1);
87
48.2k
  }
88
89
85.2k
  tok::TokenKind getKind() const LLVM_READONLY { return Kind; }
90
95.4k
  void setKind(tok::TokenKind K) { Kind = K; }
91
92
309k
  bool is(tok::TokenKind K) const LLVM_READONLY { return Kind == K; }
93
36.9k
  bool isNot(tok::TokenKind K) const LLVM_READONLY { return Kind != K; }
94
95
42
  unsigned getLength() const LLVM_READONLY { return Length; }
96
95.4k
  void setLength(unsigned L) { Length = L; }
97
98
66.0k
  StringRef getText() const LLVM_READONLY {
99
66.0k
    assert(is(tok::text));
100
0
    return StringRef(TextPtr, IntVal);
101
66.0k
  }
102
103
45.6k
  void setText(StringRef Text) {
104
45.6k
    assert(is(tok::text));
105
0
    TextPtr = Text.data();
106
45.6k
    IntVal = Text.size();
107
45.6k
  }
108
109
154
  StringRef getUnknownCommandName() const LLVM_READONLY {
110
154
    assert(is(tok::unknown_command));
111
0
    return StringRef(TextPtr, IntVal);
112
154
  }
113
114
291
  void setUnknownCommandName(StringRef Name) {
115
291
    assert(is(tok::unknown_command));
116
0
    TextPtr = Name.data();
117
291
    IntVal = Name.size();
118
291
  }
119
120
35.2k
  unsigned getCommandID() const LLVM_READONLY {
121
35.2k
    assert(is(tok::backslash_command) || is(tok::at_command));
122
0
    return IntVal;
123
35.2k
  }
124
125
9.72k
  void setCommandID(unsigned ID) {
126
9.72k
    assert(is(tok::backslash_command) || is(tok::at_command));
127
0
    IntVal = ID;
128
9.72k
  }
129
130
227
  unsigned getVerbatimBlockID() const LLVM_READONLY {
131
227
    assert(is(tok::verbatim_block_begin) || is(tok::verbatim_block_end));
132
0
    return IntVal;
133
227
  }
134
135
239
  void setVerbatimBlockID(unsigned ID) {
136
239
    assert(is(tok::verbatim_block_begin) || is(tok::verbatim_block_end));
137
0
    IntVal = ID;
138
239
  }
139
140
907
  StringRef getVerbatimBlockText() const LLVM_READONLY {
141
907
    assert(is(tok::verbatim_block_line));
142
0
    return StringRef(TextPtr, IntVal);
143
907
  }
144
145
918
  void setVerbatimBlockText(StringRef Text) {
146
918
    assert(is(tok::verbatim_block_line));
147
0
    TextPtr = Text.data();
148
918
    IntVal = Text.size();
149
918
  }
150
151
116
  unsigned getVerbatimLineID() const LLVM_READONLY {
152
116
    assert(is(tok::verbatim_line_name));
153
0
    return IntVal;
154
116
  }
155
156
161
  void setVerbatimLineID(unsigned ID) {
157
161
    assert(is(tok::verbatim_line_name));
158
0
    IntVal = ID;
159
161
  }
160
161
112
  StringRef getVerbatimLineText() const LLVM_READONLY {
162
112
    assert(is(tok::verbatim_line_text));
163
0
    return StringRef(TextPtr, IntVal);
164
112
  }
165
166
157
  void setVerbatimLineText(StringRef Text) {
167
157
    assert(is(tok::verbatim_line_text));
168
0
    TextPtr = Text.data();
169
157
    IntVal = Text.size();
170
157
  }
171
172
172
  StringRef getHTMLTagStartName() const LLVM_READONLY {
173
172
    assert(is(tok::html_start_tag));
174
0
    return StringRef(TextPtr, IntVal);
175
172
  }
176
177
219
  void setHTMLTagStartName(StringRef Name) {
178
219
    assert(is(tok::html_start_tag));
179
0
    TextPtr = Name.data();
180
219
    IntVal = Name.size();
181
219
  }
182
183
65
  StringRef getHTMLIdent() const LLVM_READONLY {
184
65
    assert(is(tok::html_ident));
185
0
    return StringRef(TextPtr, IntVal);
186
65
  }
187
188
86
  void setHTMLIdent(StringRef Name) {
189
86
    assert(is(tok::html_ident));
190
0
    TextPtr = Name.data();
191
86
    IntVal = Name.size();
192
86
  }
193
194
31
  StringRef getHTMLQuotedString() const LLVM_READONLY {
195
31
    assert(is(tok::html_quoted_string));
196
0
    return StringRef(TextPtr, IntVal);
197
31
  }
198
199
48
  void setHTMLQuotedString(StringRef Str) {
200
48
    assert(is(tok::html_quoted_string));
201
0
    TextPtr = Str.data();
202
48
    IntVal = Str.size();
203
48
  }
204
205
89
  StringRef getHTMLTagEndName() const LLVM_READONLY {
206
89
    assert(is(tok::html_end_tag));
207
0
    return StringRef(TextPtr, IntVal);
208
89
  }
209
210
120
  void setHTMLTagEndName(StringRef Name) {
211
120
    assert(is(tok::html_end_tag));
212
0
    TextPtr = Name.data();
213
120
    IntVal = Name.size();
214
120
  }
215
216
  void dump(const Lexer &L, const SourceManager &SM) const;
217
};
218
219
/// Comment lexer.
220
class Lexer {
221
private:
222
  Lexer(const Lexer &) = delete;
223
  void operator=(const Lexer &) = delete;
224
225
  /// Allocator for strings that are semantic values of tokens and have to be
226
  /// computed (for example, resolved decimal character references).
227
  llvm::BumpPtrAllocator &Allocator;
228
229
  DiagnosticsEngine &Diags;
230
231
  const CommandTraits &Traits;
232
233
  const char *const BufferStart;
234
  const char *const BufferEnd;
235
236
  const char *BufferPtr;
237
238
  /// One past end pointer for the current comment.  For BCPL comments points
239
  /// to newline or BufferEnd, for C comments points to star in '*/'.
240
  const char *CommentEnd;
241
242
  SourceLocation FileLoc;
243
244
  /// If true, the commands, html tags, etc will be parsed and reported as
245
  /// separate tokens inside the comment body. If false, the comment text will
246
  /// be parsed into text and newline tokens.
247
  bool ParseCommands;
248
249
  enum LexerCommentState : uint8_t {
250
    LCS_BeforeComment,
251
    LCS_InsideBCPLComment,
252
    LCS_InsideCComment,
253
    LCS_BetweenComments
254
  };
255
256
  /// Low-level lexer state, track if we are inside or outside of comment.
257
  LexerCommentState CommentState;
258
259
  enum LexerState : uint8_t {
260
    /// Lexing normal comment text
261
    LS_Normal,
262
263
    /// Finished lexing verbatim block beginning command, will lex first body
264
    /// line.
265
    LS_VerbatimBlockFirstLine,
266
267
    /// Lexing verbatim block body line-by-line, skipping line-starting
268
    /// decorations.
269
    LS_VerbatimBlockBody,
270
271
    /// Finished lexing verbatim line beginning command, will lex text (one
272
    /// line).
273
    LS_VerbatimLineText,
274
275
    /// Finished lexing \verbatim <TAG \endverbatim part, lexing tag attributes.
276
    LS_HTMLStartTag,
277
278
    /// Finished lexing \verbatim </TAG \endverbatim part, lexing '>'.
279
    LS_HTMLEndTag
280
  };
281
282
  /// Current lexing mode.
283
  LexerState State;
284
285
  /// If State is LS_VerbatimBlock, contains the name of verbatim end
286
  /// command, including command marker.
287
  SmallString<16> VerbatimBlockEndCommandName;
288
289
  /// Given a character reference name (e.g., "lt"), return the character that
290
  /// it stands for (e.g., "<").
291
  StringRef resolveHTMLNamedCharacterReference(StringRef Name) const;
292
293
  /// Given a Unicode codepoint as base-10 integer, return the character.
294
  StringRef resolveHTMLDecimalCharacterReference(StringRef Name) const;
295
296
  /// Given a Unicode codepoint as base-16 integer, return the character.
297
  StringRef resolveHTMLHexCharacterReference(StringRef Name) const;
298
299
  void formTokenWithChars(Token &Result, const char *TokEnd,
300
                          tok::TokenKind Kind);
301
302
39.9k
  void formTextToken(Token &Result, const char *TokEnd) {
303
39.9k
    StringRef Text(BufferPtr, TokEnd - BufferPtr);
304
39.9k
    formTokenWithChars(Result, TokEnd, tok::text);
305
39.9k
    Result.setText(Text);
306
39.9k
  }
307
308
89.8k
  SourceLocation getSourceLocation(const char *Loc) const {
309
89.8k
    assert(Loc >= BufferStart && Loc <= BufferEnd &&
310
89.8k
           "Location out of range for this buffer!");
311
312
0
    const unsigned CharNo = Loc - BufferStart;
313
89.8k
    return FileLoc.getLocWithOffset(CharNo);
314
89.8k
  }
315
316
314
  DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) {
317
314
    return Diags.Report(Loc, DiagID);
318
314
  }
319
320
  /// Eat string matching regexp \code \s*\* \endcode.
321
  void skipLineStartingDecorations();
322
323
  /// Skip over pure text.
324
  const char *skipTextToken();
325
326
  /// Lex comment text, including commands if ParseCommands is set to true.
327
  void lexCommentText(Token &T);
328
329
  void setupAndLexVerbatimBlock(Token &T, const char *TextBegin, char Marker,
330
                                const CommandInfo *Info);
331
332
  void lexVerbatimBlockFirstLine(Token &T);
333
334
  void lexVerbatimBlockBody(Token &T);
335
336
  void setupAndLexVerbatimLine(Token &T, const char *TextBegin,
337
                               const CommandInfo *Info);
338
339
  void lexVerbatimLineText(Token &T);
340
341
  void lexHTMLCharacterReference(Token &T);
342
343
  void setupAndLexHTMLStartTag(Token &T);
344
345
  void lexHTMLStartTag(Token &T);
346
347
  void setupAndLexHTMLEndTag(Token &T);
348
349
  void lexHTMLEndTag(Token &T);
350
351
public:
352
  Lexer(llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags,
353
        const CommandTraits &Traits, SourceLocation FileLoc,
354
        const char *BufferStart, const char *BufferEnd,
355
        bool ParseCommands = true);
356
357
  void lex(Token &T);
358
359
  StringRef getSpelling(const Token &Tok, const SourceManager &SourceMgr) const;
360
};
361
362
} // end namespace comments
363
} // end namespace clang
364
365
#endif
366