Coverage Report

Created: 2018-09-25 23:22

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/clang/include/clang/AST/CommentLexer.h
Line
Count
Source (jump to first uncovered line)
1
//===--- CommentLexer.h - Lexer for structured comments ---------*- C++ -*-===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
//
10
//  This file defines lexer for structured comments and supporting token class.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#ifndef LLVM_CLANG_AST_COMMENTLEXER_H
15
#define LLVM_CLANG_AST_COMMENTLEXER_H
16
17
#include "clang/Basic/Diagnostic.h"
18
#include "clang/Basic/SourceManager.h"
19
#include "llvm/ADT/SmallString.h"
20
#include "llvm/ADT/StringRef.h"
21
#include "llvm/Support/Allocator.h"
22
#include "llvm/Support/raw_ostream.h"
23
24
namespace clang {
25
namespace comments {
26
27
class Lexer;
28
class TextTokenRetokenizer;
29
struct CommandInfo;
30
class CommandTraits;
31
32
namespace tok {
33
enum TokenKind {
34
  eof,
35
  newline,
36
  text,
37
  unknown_command,   // Command that does not have an ID.
38
  backslash_command, // Command with an ID, that used backslash marker.
39
  at_command,        // Command with an ID, that used 'at' marker.
40
  verbatim_block_begin,
41
  verbatim_block_line,
42
  verbatim_block_end,
43
  verbatim_line_name,
44
  verbatim_line_text,
45
  html_start_tag,     // <tag
46
  html_ident,         // attr
47
  html_equals,        // =
48
  html_quoted_string, // "blah\"blah" or 'blah\'blah'
49
  html_greater,       // >
50
  html_slash_greater, // />
51
  html_end_tag        // </tag
52
};
53
} // end namespace tok
54
55
/// Comment token.
56
0
class Token {
Unexecuted instantiation: clang::comments::Token::operator=(clang::comments::Token const&)
Unexecuted instantiation: clang::comments::Token::operator=(clang::comments::Token&&)
57
  friend class Lexer;
58
  friend class TextTokenRetokenizer;
59
60
  /// The location of the token.
61
  SourceLocation Loc;
62
63
  /// The actual kind of the token.
64
  tok::TokenKind Kind;
65
66
  /// Length of the token spelling in comment.  Can be 0 for synthenized
67
  /// tokens.
68
  unsigned Length;
69
70
  /// Contains text value associated with a token.
71
  const char *TextPtr;
72
73
  /// Integer value associated with a token.
74
  ///
75
  /// If the token is a known command, contains command ID and TextPtr is
76
  /// unused (command spelling can be found with CommandTraits).  Otherwise,
77
  /// contains the length of the string that starts at TextPtr.
78
  unsigned IntVal;
79
80
public:
81
12.6k
  SourceLocation getLocation() const LLVM_READONLY { return Loc; }
82
17.9k
  void setLocation(SourceLocation SL) { Loc = SL; }
83
84
5.30k
  SourceLocation getEndLocation() const LLVM_READONLY {
85
5.30k
    if (Length == 0 || Length == 1)
86
1.45k
      return Loc;
87
3.84k
    return Loc.getLocWithOffset(Length - 1);
88
3.84k
  }
89
90
10.9k
  tok::TokenKind getKind() const LLVM_READONLY { return Kind; }
91
17.9k
  void setKind(tok::TokenKind K) { Kind = K; }
92
93
32.6k
  bool is(tok::TokenKind K) const LLVM_READONLY { return Kind == K; }
94
9.56k
  bool isNot(tok::TokenKind K) const LLVM_READONLY { return Kind != K; }
95
96
28
  unsigned getLength() const LLVM_READONLY { return Length; }
97
17.9k
  void setLength(unsigned L) { Length = L; }
98
99
7.16k
  StringRef getText() const LLVM_READONLY {
100
7.16k
    assert(is(tok::text));
101
7.16k
    return StringRef(TextPtr, IntVal);
102
7.16k
  }
103
104
6.45k
  void setText(StringRef Text) {
105
6.45k
    assert(is(tok::text));
106
6.45k
    TextPtr = Text.data();
107
6.45k
    IntVal = Text.size();
108
6.45k
  }
109
110
154
  StringRef getUnknownCommandName() const LLVM_READONLY {
111
154
    assert(is(tok::unknown_command));
112
154
    return StringRef(TextPtr, IntVal);
113
154
  }
114
115
298
  void setUnknownCommandName(StringRef Name) {
116
298
    assert(is(tok::unknown_command));
117
298
    TextPtr = Name.data();
118
298
    IntVal = Name.size();
119
298
  }
120
121
5.59k
  unsigned getCommandID() const LLVM_READONLY {
122
5.59k
    assert(is(tok::backslash_command) || is(tok::at_command));
123
5.59k
    return IntVal;
124
5.59k
  }
125
126
1.95k
  void setCommandID(unsigned ID) {
127
1.95k
    assert(is(tok::backslash_command) || is(tok::at_command));
128
1.95k
    IntVal = ID;
129
1.95k
  }
130
131
65
  unsigned getVerbatimBlockID() const LLVM_READONLY {
132
65
    assert(is(tok::verbatim_block_begin) || is(tok::verbatim_block_end));
133
65
    return IntVal;
134
65
  }
135
136
71
  void setVerbatimBlockID(unsigned ID) {
137
71
    assert(is(tok::verbatim_block_begin) || is(tok::verbatim_block_end));
138
71
    IntVal = ID;
139
71
  }
140
141
50
  StringRef getVerbatimBlockText() const LLVM_READONLY {
142
50
    assert(is(tok::verbatim_block_line));
143
50
    return StringRef(TextPtr, IntVal);
144
50
  }
145
146
58
  void setVerbatimBlockText(StringRef Text) {
147
58
    assert(is(tok::verbatim_block_line));
148
58
    TextPtr = Text.data();
149
58
    IntVal = Text.size();
150
58
  }
151
152
78
  unsigned getVerbatimLineID() const LLVM_READONLY {
153
78
    assert(is(tok::verbatim_line_name));
154
78
    return IntVal;
155
78
  }
156
157
120
  void setVerbatimLineID(unsigned ID) {
158
120
    assert(is(tok::verbatim_line_name));
159
120
    IntVal = ID;
160
120
  }
161
162
74
  StringRef getVerbatimLineText() const LLVM_READONLY {
163
74
    assert(is(tok::verbatim_line_text));
164
74
    return StringRef(TextPtr, IntVal);
165
74
  }
166
167
116
  void setVerbatimLineText(StringRef Text) {
168
116
    assert(is(tok::verbatim_line_text));
169
116
    TextPtr = Text.data();
170
116
    IntVal = Text.size();
171
116
  }
172
173
115
  StringRef getHTMLTagStartName() const LLVM_READONLY {
174
115
    assert(is(tok::html_start_tag));
175
115
    return StringRef(TextPtr, IntVal);
176
115
  }
177
178
162
  void setHTMLTagStartName(StringRef Name) {
179
162
    assert(is(tok::html_start_tag));
180
162
    TextPtr = Name.data();
181
162
    IntVal = Name.size();
182
162
  }
183
184
54
  StringRef getHTMLIdent() const LLVM_READONLY {
185
54
    assert(is(tok::html_ident));
186
54
    return StringRef(TextPtr, IntVal);
187
54
  }
188
189
75
  void setHTMLIdent(StringRef Name) {
190
75
    assert(is(tok::html_ident));
191
75
    TextPtr = Name.data();
192
75
    IntVal = Name.size();
193
75
  }
194
195
28
  StringRef getHTMLQuotedString() const LLVM_READONLY {
196
28
    assert(is(tok::html_quoted_string));
197
28
    return StringRef(TextPtr, IntVal);
198
28
  }
199
200
44
  void setHTMLQuotedString(StringRef Str) {
201
44
    assert(is(tok::html_quoted_string));
202
44
    TextPtr = Str.data();
203
44
    IntVal = Str.size();
204
44
  }
205
206
50
  StringRef getHTMLTagEndName() const LLVM_READONLY {
207
50
    assert(is(tok::html_end_tag));
208
50
    return StringRef(TextPtr, IntVal);
209
50
  }
210
211
81
  void setHTMLTagEndName(StringRef Name) {
212
81
    assert(is(tok::html_end_tag));
213
81
    TextPtr = Name.data();
214
81
    IntVal = Name.size();
215
81
  }
216
217
  void dump(const Lexer &L, const SourceManager &SM) const;
218
};
219
220
/// Comment lexer.
221
class Lexer {
222
private:
223
  Lexer(const Lexer &) = delete;
224
  void operator=(const Lexer &) = delete;
225
226
  /// Allocator for strings that are semantic values of tokens and have to be
227
  /// computed (for example, resolved decimal character references).
228
  llvm::BumpPtrAllocator &Allocator;
229
230
  DiagnosticsEngine &Diags;
231
232
  const CommandTraits &Traits;
233
234
  const char *const BufferStart;
235
  const char *const BufferEnd;
236
  SourceLocation FileLoc;
237
238
  const char *BufferPtr;
239
240
  /// One past end pointer for the current comment.  For BCPL comments points
241
  /// to newline or BufferEnd, for C comments points to star in '*/'.
242
  const char *CommentEnd;
243
244
  enum LexerCommentState {
245
    LCS_BeforeComment,
246
    LCS_InsideBCPLComment,
247
    LCS_InsideCComment,
248
    LCS_BetweenComments
249
  };
250
251
  /// Low-level lexer state, track if we are inside or outside of comment.
252
  LexerCommentState CommentState;
253
254
  enum LexerState {
255
    /// Lexing normal comment text
256
    LS_Normal,
257
258
    /// Finished lexing verbatim block beginning command, will lex first body
259
    /// line.
260
    LS_VerbatimBlockFirstLine,
261
262
    /// Lexing verbatim block body line-by-line, skipping line-starting
263
    /// decorations.
264
    LS_VerbatimBlockBody,
265
266
    /// Finished lexing verbatim line beginning command, will lex text (one
267
    /// line).
268
    LS_VerbatimLineText,
269
270
    /// Finished lexing \verbatim <TAG \endverbatim part, lexing tag attributes.
271
    LS_HTMLStartTag,
272
273
    /// Finished lexing \verbatim </TAG \endverbatim part, lexing '>'.
274
    LS_HTMLEndTag
275
  };
276
277
  /// Current lexing mode.
278
  LexerState State;
279
280
  /// If State is LS_VerbatimBlock, contains the name of verbatim end
281
  /// command, including command marker.
282
  SmallString<16> VerbatimBlockEndCommandName;
283
284
  /// If true, the commands, html tags, etc will be parsed and reported as
285
  /// separate tokens inside the comment body. If false, the comment text will
286
  /// be parsed into text and newline tokens.
287
  bool ParseCommands;
288
289
  /// Given a character reference name (e.g., "lt"), return the character that
290
  /// it stands for (e.g., "<").
291
  StringRef resolveHTMLNamedCharacterReference(StringRef Name) const;
292
293
  /// Given a Unicode codepoint as base-10 integer, return the character.
294
  StringRef resolveHTMLDecimalCharacterReference(StringRef Name) const;
295
296
  /// Given a Unicode codepoint as base-16 integer, return the character.
297
  StringRef resolveHTMLHexCharacterReference(StringRef Name) const;
298
299
  void formTokenWithChars(Token &Result, const char *TokEnd,
300
                          tok::TokenKind Kind);
301
302
5.28k
  void formTextToken(Token &Result, const char *TokEnd) {
303
5.28k
    StringRef Text(BufferPtr, TokEnd - BufferPtr);
304
5.28k
    formTokenWithChars(Result, TokEnd, tok::text);
305
5.28k
    Result.setText(Text);
306
5.28k
  }
307
308
16.9k
  SourceLocation getSourceLocation(const char *Loc) const {
309
16.9k
    assert(Loc >= BufferStart && Loc <= BufferEnd &&
310
16.9k
           "Location out of range for this buffer!");
311
16.9k
312
16.9k
    const unsigned CharNo = Loc - BufferStart;
313
16.9k
    return FileLoc.getLocWithOffset(CharNo);
314
16.9k
  }
315
316
309
  DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) {
317
309
    return Diags.Report(Loc, DiagID);
318
309
  }
319
320
  /// Eat string matching regexp \code \s*\* \endcode.
321
  void skipLineStartingDecorations();
322
323
  /// Lex comment text, including commands if ParseCommands is set to true.
324
  void lexCommentText(Token &T);
325
326
  void setupAndLexVerbatimBlock(Token &T, const char *TextBegin, char Marker,
327
                                const CommandInfo *Info);
328
329
  void lexVerbatimBlockFirstLine(Token &T);
330
331
  void lexVerbatimBlockBody(Token &T);
332
333
  void setupAndLexVerbatimLine(Token &T, const char *TextBegin,
334
                               const CommandInfo *Info);
335
336
  void lexVerbatimLineText(Token &T);
337
338
  void lexHTMLCharacterReference(Token &T);
339
340
  void setupAndLexHTMLStartTag(Token &T);
341
342
  void lexHTMLStartTag(Token &T);
343
344
  void setupAndLexHTMLEndTag(Token &T);
345
346
  void lexHTMLEndTag(Token &T);
347
348
public:
349
  Lexer(llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags,
350
        const CommandTraits &Traits, SourceLocation FileLoc,
351
        const char *BufferStart, const char *BufferEnd,
352
        bool ParseCommands = true);
353
354
  void lex(Token &T);
355
356
  StringRef getSpelling(const Token &Tok, const SourceManager &SourceMgr,
357
                        bool *Invalid = nullptr) const;
358
};
359
360
} // end namespace comments
361
} // end namespace clang
362
363
#endif
364