Coverage Report

Created: 2019-01-21 03:01

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/clang/include/clang/AST/CommentLexer.h
Line
Count
Source (jump to first uncovered line)
1
//===--- CommentLexer.h - Lexer for structured comments ---------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
//  This file defines lexer for structured comments and supporting token class.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#ifndef LLVM_CLANG_AST_COMMENTLEXER_H
14
#define LLVM_CLANG_AST_COMMENTLEXER_H
15
16
#include "clang/Basic/Diagnostic.h"
17
#include "clang/Basic/SourceManager.h"
18
#include "llvm/ADT/SmallString.h"
19
#include "llvm/ADT/StringRef.h"
20
#include "llvm/Support/Allocator.h"
21
#include "llvm/Support/raw_ostream.h"
22
23
namespace clang {
24
namespace comments {
25
26
class Lexer;
27
class TextTokenRetokenizer;
28
struct CommandInfo;
29
class CommandTraits;
30
31
namespace tok {
32
enum TokenKind {
33
  eof,
34
  newline,
35
  text,
36
  unknown_command,   // Command that does not have an ID.
37
  backslash_command, // Command with an ID, that used backslash marker.
38
  at_command,        // Command with an ID, that used 'at' marker.
39
  verbatim_block_begin,
40
  verbatim_block_line,
41
  verbatim_block_end,
42
  verbatim_line_name,
43
  verbatim_line_text,
44
  html_start_tag,     // <tag
45
  html_ident,         // attr
46
  html_equals,        // =
47
  html_quoted_string, // "blah\"blah" or 'blah\'blah'
48
  html_greater,       // >
49
  html_slash_greater, // />
50
  html_end_tag        // </tag
51
};
52
} // end namespace tok
53
54
/// Comment token.
55
0
class Token {
Unexecuted instantiation: clang::comments::Token::operator=(clang::comments::Token const&)
Unexecuted instantiation: clang::comments::Token::operator=(clang::comments::Token&&)
56
  friend class Lexer;
57
  friend class TextTokenRetokenizer;
58
59
  /// The location of the token.
60
  SourceLocation Loc;
61
62
  /// The actual kind of the token.
63
  tok::TokenKind Kind;
64
65
  /// Length of the token spelling in comment.  Can be 0 for synthenized
66
  /// tokens.
67
  unsigned Length;
68
69
  /// Contains text value associated with a token.
70
  const char *TextPtr;
71
72
  /// Integer value associated with a token.
73
  ///
74
  /// If the token is a known command, contains command ID and TextPtr is
75
  /// unused (command spelling can be found with CommandTraits).  Otherwise,
76
  /// contains the length of the string that starts at TextPtr.
77
  unsigned IntVal;
78
79
public:
80
12.6k
  SourceLocation getLocation() const LLVM_READONLY { return Loc; }
81
18.0k
  void setLocation(SourceLocation SL) { Loc = SL; }
82
83
5.30k
  SourceLocation getEndLocation() const LLVM_READONLY {
84
5.30k
    if (Length == 0 || Length == 1)
85
1.45k
      return Loc;
86
3.85k
    return Loc.getLocWithOffset(Length - 1);
87
3.85k
  }
88
89
10.9k
  tok::TokenKind getKind() const LLVM_READONLY { return Kind; }
90
18.0k
  void setKind(tok::TokenKind K) { Kind = K; }
91
92
32.8k
  bool is(tok::TokenKind K) const LLVM_READONLY { return Kind == K; }
93
9.59k
  bool isNot(tok::TokenKind K) const LLVM_READONLY { return Kind != K; }
94
95
28
  unsigned getLength() const LLVM_READONLY { return Length; }
96
18.0k
  void setLength(unsigned L) { Length = L; }
97
98
7.16k
  StringRef getText() const LLVM_READONLY {
99
7.16k
    assert(is(tok::text));
100
7.16k
    return StringRef(TextPtr, IntVal);
101
7.16k
  }
102
103
6.46k
  void setText(StringRef Text) {
104
6.46k
    assert(is(tok::text));
105
6.46k
    TextPtr = Text.data();
106
6.46k
    IntVal = Text.size();
107
6.46k
  }
108
109
154
  StringRef getUnknownCommandName() const LLVM_READONLY {
110
154
    assert(is(tok::unknown_command));
111
154
    return StringRef(TextPtr, IntVal);
112
154
  }
113
114
298
  void setUnknownCommandName(StringRef Name) {
115
298
    assert(is(tok::unknown_command));
116
298
    TextPtr = Name.data();
117
298
    IntVal = Name.size();
118
298
  }
119
120
5.59k
  unsigned getCommandID() const LLVM_READONLY {
121
5.59k
    assert(is(tok::backslash_command) || is(tok::at_command));
122
5.59k
    return IntVal;
123
5.59k
  }
124
125
1.95k
  void setCommandID(unsigned ID) {
126
1.95k
    assert(is(tok::backslash_command) || is(tok::at_command));
127
1.95k
    IntVal = ID;
128
1.95k
  }
129
130
77
  unsigned getVerbatimBlockID() const LLVM_READONLY {
131
77
    assert(is(tok::verbatim_block_begin) || is(tok::verbatim_block_end));
132
77
    return IntVal;
133
77
  }
134
135
89
  void setVerbatimBlockID(unsigned ID) {
136
89
    assert(is(tok::verbatim_block_begin) || is(tok::verbatim_block_end));
137
89
    IntVal = ID;
138
89
  }
139
140
56
  StringRef getVerbatimBlockText() const LLVM_READONLY {
141
56
    assert(is(tok::verbatim_block_line));
142
56
    return StringRef(TextPtr, IntVal);
143
56
  }
144
145
67
  void setVerbatimBlockText(StringRef Text) {
146
67
    assert(is(tok::verbatim_block_line));
147
67
    TextPtr = Text.data();
148
67
    IntVal = Text.size();
149
67
  }
150
151
78
  unsigned getVerbatimLineID() const LLVM_READONLY {
152
78
    assert(is(tok::verbatim_line_name));
153
78
    return IntVal;
154
78
  }
155
156
120
  void setVerbatimLineID(unsigned ID) {
157
120
    assert(is(tok::verbatim_line_name));
158
120
    IntVal = ID;
159
120
  }
160
161
74
  StringRef getVerbatimLineText() const LLVM_READONLY {
162
74
    assert(is(tok::verbatim_line_text));
163
74
    return StringRef(TextPtr, IntVal);
164
74
  }
165
166
116
  void setVerbatimLineText(StringRef Text) {
167
116
    assert(is(tok::verbatim_line_text));
168
116
    TextPtr = Text.data();
169
116
    IntVal = Text.size();
170
116
  }
171
172
115
  StringRef getHTMLTagStartName() const LLVM_READONLY {
173
115
    assert(is(tok::html_start_tag));
174
115
    return StringRef(TextPtr, IntVal);
175
115
  }
176
177
162
  void setHTMLTagStartName(StringRef Name) {
178
162
    assert(is(tok::html_start_tag));
179
162
    TextPtr = Name.data();
180
162
    IntVal = Name.size();
181
162
  }
182
183
54
  StringRef getHTMLIdent() const LLVM_READONLY {
184
54
    assert(is(tok::html_ident));
185
54
    return StringRef(TextPtr, IntVal);
186
54
  }
187
188
75
  void setHTMLIdent(StringRef Name) {
189
75
    assert(is(tok::html_ident));
190
75
    TextPtr = Name.data();
191
75
    IntVal = Name.size();
192
75
  }
193
194
28
  StringRef getHTMLQuotedString() const LLVM_READONLY {
195
28
    assert(is(tok::html_quoted_string));
196
28
    return StringRef(TextPtr, IntVal);
197
28
  }
198
199
44
  void setHTMLQuotedString(StringRef Str) {
200
44
    assert(is(tok::html_quoted_string));
201
44
    TextPtr = Str.data();
202
44
    IntVal = Str.size();
203
44
  }
204
205
50
  StringRef getHTMLTagEndName() const LLVM_READONLY {
206
50
    assert(is(tok::html_end_tag));
207
50
    return StringRef(TextPtr, IntVal);
208
50
  }
209
210
81
  void setHTMLTagEndName(StringRef Name) {
211
81
    assert(is(tok::html_end_tag));
212
81
    TextPtr = Name.data();
213
81
    IntVal = Name.size();
214
81
  }
215
216
  void dump(const Lexer &L, const SourceManager &SM) const;
217
};
218
219
/// Comment lexer.
220
class Lexer {
221
private:
222
  Lexer(const Lexer &) = delete;
223
  void operator=(const Lexer &) = delete;
224
225
  /// Allocator for strings that are semantic values of tokens and have to be
226
  /// computed (for example, resolved decimal character references).
227
  llvm::BumpPtrAllocator &Allocator;
228
229
  DiagnosticsEngine &Diags;
230
231
  const CommandTraits &Traits;
232
233
  const char *const BufferStart;
234
  const char *const BufferEnd;
235
  SourceLocation FileLoc;
236
237
  const char *BufferPtr;
238
239
  /// One past end pointer for the current comment.  For BCPL comments points
240
  /// to newline or BufferEnd, for C comments points to star in '*/'.
241
  const char *CommentEnd;
242
243
  enum LexerCommentState {
244
    LCS_BeforeComment,
245
    LCS_InsideBCPLComment,
246
    LCS_InsideCComment,
247
    LCS_BetweenComments
248
  };
249
250
  /// Low-level lexer state, track if we are inside or outside of comment.
251
  LexerCommentState CommentState;
252
253
  enum LexerState {
254
    /// Lexing normal comment text
255
    LS_Normal,
256
257
    /// Finished lexing verbatim block beginning command, will lex first body
258
    /// line.
259
    LS_VerbatimBlockFirstLine,
260
261
    /// Lexing verbatim block body line-by-line, skipping line-starting
262
    /// decorations.
263
    LS_VerbatimBlockBody,
264
265
    /// Finished lexing verbatim line beginning command, will lex text (one
266
    /// line).
267
    LS_VerbatimLineText,
268
269
    /// Finished lexing \verbatim <TAG \endverbatim part, lexing tag attributes.
270
    LS_HTMLStartTag,
271
272
    /// Finished lexing \verbatim </TAG \endverbatim part, lexing '>'.
273
    LS_HTMLEndTag
274
  };
275
276
  /// Current lexing mode.
277
  LexerState State;
278
279
  /// If State is LS_VerbatimBlock, contains the name of verbatim end
280
  /// command, including command marker.
281
  SmallString<16> VerbatimBlockEndCommandName;
282
283
  /// If true, the commands, html tags, etc will be parsed and reported as
284
  /// separate tokens inside the comment body. If false, the comment text will
285
  /// be parsed into text and newline tokens.
286
  bool ParseCommands;
287
288
  /// Given a character reference name (e.g., "lt"), return the character that
289
  /// it stands for (e.g., "<").
290
  StringRef resolveHTMLNamedCharacterReference(StringRef Name) const;
291
292
  /// Given a Unicode codepoint as base-10 integer, return the character.
293
  StringRef resolveHTMLDecimalCharacterReference(StringRef Name) const;
294
295
  /// Given a Unicode codepoint as base-16 integer, return the character.
296
  StringRef resolveHTMLHexCharacterReference(StringRef Name) const;
297
298
  void formTokenWithChars(Token &Result, const char *TokEnd,
299
                          tok::TokenKind Kind);
300
301
5.28k
  void formTextToken(Token &Result, const char *TokEnd) {
302
5.28k
    StringRef Text(BufferPtr, TokEnd - BufferPtr);
303
5.28k
    formTokenWithChars(Result, TokEnd, tok::text);
304
5.28k
    Result.setText(Text);
305
5.28k
  }
306
307
17.0k
  SourceLocation getSourceLocation(const char *Loc) const {
308
17.0k
    assert(Loc >= BufferStart && Loc <= BufferEnd &&
309
17.0k
           "Location out of range for this buffer!");
310
17.0k
311
17.0k
    const unsigned CharNo = Loc - BufferStart;
312
17.0k
    return FileLoc.getLocWithOffset(CharNo);
313
17.0k
  }
314
315
309
  DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) {
316
309
    return Diags.Report(Loc, DiagID);
317
309
  }
318
319
  /// Eat string matching regexp \code \s*\* \endcode.
320
  void skipLineStartingDecorations();
321
322
  /// Lex comment text, including commands if ParseCommands is set to true.
323
  void lexCommentText(Token &T);
324
325
  void setupAndLexVerbatimBlock(Token &T, const char *TextBegin, char Marker,
326
                                const CommandInfo *Info);
327
328
  void lexVerbatimBlockFirstLine(Token &T);
329
330
  void lexVerbatimBlockBody(Token &T);
331
332
  void setupAndLexVerbatimLine(Token &T, const char *TextBegin,
333
                               const CommandInfo *Info);
334
335
  void lexVerbatimLineText(Token &T);
336
337
  void lexHTMLCharacterReference(Token &T);
338
339
  void setupAndLexHTMLStartTag(Token &T);
340
341
  void lexHTMLStartTag(Token &T);
342
343
  void setupAndLexHTMLEndTag(Token &T);
344
345
  void lexHTMLEndTag(Token &T);
346
347
public:
348
  Lexer(llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags,
349
        const CommandTraits &Traits, SourceLocation FileLoc,
350
        const char *BufferStart, const char *BufferEnd,
351
        bool ParseCommands = true);
352
353
  void lex(Token &T);
354
355
  StringRef getSpelling(const Token &Tok, const SourceManager &SourceMgr,
356
                        bool *Invalid = nullptr) const;
357
};
358
359
} // end namespace comments
360
} // end namespace clang
361
362
#endif
363