/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
Line | Count | Source |
1 | | //===- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface ------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H |
10 | | #define LLVM_MC_MCPARSER_MCASMLEXER_H |
11 | | |
12 | | #include "llvm/ADT/ArrayRef.h" |
13 | | #include "llvm/ADT/SmallVector.h" |
14 | | #include "llvm/MC/MCAsmMacro.h" |
15 | | #include <algorithm> |
16 | | #include <cassert> |
17 | | #include <cstddef> |
18 | | #include <cstdint> |
19 | | #include <string> |
20 | | |
21 | | namespace llvm { |
22 | | |
23 | | /// A callback class which is notified of each comment in an assembly file as |
24 | | /// it is lexed. |
25 | | class AsmCommentConsumer { |
26 | | public: |
27 | | virtual ~AsmCommentConsumer() = default; |
28 | | |
29 | | /// Callback function for when a comment is lexed. Loc is the start of the |
30 | | /// comment text (excluding the comment-start marker). CommentText is the text |
31 | | /// of the comment, excluding the comment start and end markers, and the |
32 | | /// newline for single-line comments. |
33 | | virtual void HandleComment(SMLoc Loc, StringRef CommentText) = 0; |
34 | | }; |
35 | | |
36 | | |
37 | | /// Generic assembler lexer interface, for use by target specific assembly |
38 | | /// lexers. |
39 | | class MCAsmLexer { |
40 | | /// The current token, stored in the base class for faster access. |
41 | | SmallVector<AsmToken, 1> CurTok; |
42 | | |
43 | | /// The location and description of the current error |
44 | | SMLoc ErrLoc; |
45 | | std::string Err; |
46 | | |
47 | | protected: // Can only create subclasses. |
48 | | const char *TokStart = nullptr; |
49 | | bool SkipSpace = true; |
50 | | bool AllowAtInIdentifier; |
51 | | bool IsAtStartOfStatement = true; |
52 | | bool LexMasmIntegers = false; |
53 | | AsmCommentConsumer *CommentConsumer = nullptr; |
54 | | |
55 | | MCAsmLexer(); |
56 | | |
57 | | virtual AsmToken LexToken() = 0; |
58 | | |
59 | 2.08M | void SetError(SMLoc errLoc, const std::string &err) { |
60 | 2.08M | ErrLoc = errLoc; |
61 | 2.08M | Err = err; |
62 | 2.08M | } |
63 | | |
64 | | public: |
65 | | MCAsmLexer(const MCAsmLexer &) = delete; |
66 | | MCAsmLexer &operator=(const MCAsmLexer &) = delete; |
67 | | virtual ~MCAsmLexer(); |
68 | | |
69 | | /// Consume the next token from the input stream and return it. |
70 | | /// |
71 | | /// The lexer will continuously return the end-of-file token once the end of |
72 | | /// the main input file has been reached. |
73 | 13.2M | const AsmToken &Lex() { |
74 | 13.2M | assert(!CurTok.empty()); |
75 | 13.2M | // Mark if we parsing out a EndOfStatement. |
76 | 13.2M | IsAtStartOfStatement = CurTok.front().getKind() == AsmToken::EndOfStatement; |
77 | 13.2M | CurTok.erase(CurTok.begin()); |
78 | 13.2M | // LexToken may generate multiple tokens via UnLex but will always return |
79 | 13.2M | // the first one. Place returned value at head of CurTok vector. |
80 | 13.2M | if (CurTok.empty()) { |
81 | 13.2M | AsmToken T = LexToken(); |
82 | 13.2M | CurTok.insert(CurTok.begin(), T); |
83 | 13.2M | } |
84 | 13.2M | return CurTok.front(); |
85 | 13.2M | } |
86 | | |
87 | 29.9k | void UnLex(AsmToken const &Token) { |
88 | 29.9k | IsAtStartOfStatement = false; |
89 | 29.9k | CurTok.insert(CurTok.begin(), Token); |
90 | 29.9k | } |
91 | | |
92 | 78.8k | bool isAtStartOfStatement() { return IsAtStartOfStatement; } |
93 | | |
94 | | virtual StringRef LexUntilEndOfStatement() = 0; |
95 | | |
96 | | /// Get the current source location. |
97 | | SMLoc getLoc() const; |
98 | | |
99 | | /// Get the current (last) lexed token. |
100 | 143M | const AsmToken &getTok() const { |
101 | 143M | return CurTok[0]; |
102 | 143M | } |
103 | | |
104 | | /// Look ahead at the next token to be lexed. |
105 | 1.37M | const AsmToken peekTok(bool ShouldSkipSpace = true) { |
106 | 1.37M | AsmToken Tok; |
107 | 1.37M | |
108 | 1.37M | MutableArrayRef<AsmToken> Buf(Tok); |
109 | 1.37M | size_t ReadCount = peekTokens(Buf, ShouldSkipSpace); |
110 | 1.37M | |
111 | 1.37M | assert(ReadCount == 1); |
112 | 1.37M | (void)ReadCount; |
113 | 1.37M | |
114 | 1.37M | return Tok; |
115 | 1.37M | } |
116 | | |
117 | | /// Look ahead an arbitrary number of tokens. |
118 | | virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf, |
119 | | bool ShouldSkipSpace = true) = 0; |
120 | | |
121 | | /// Get the current error location |
122 | 2.08M | SMLoc getErrLoc() { |
123 | 2.08M | return ErrLoc; |
124 | 2.08M | } |
125 | | |
126 | | /// Get the current error string |
127 | 2.08M | const std::string &getErr() { |
128 | 2.08M | return Err; |
129 | 2.08M | } |
130 | | |
131 | | /// Get the kind of current token. |
132 | 12.0M | AsmToken::TokenKind getKind() const { return getTok().getKind(); } |
133 | | |
134 | | /// Check if the current token has kind \p K. |
135 | 37.2M | bool is(AsmToken::TokenKind K) const { return getTok().is(K); } |
136 | | |
137 | | /// Check if the current token has kind \p K. |
138 | 14.5M | bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); } |
139 | | |
140 | | /// Set whether spaces should be ignored by the lexer |
141 | 592k | void setSkipSpace(bool val) { SkipSpace = val; } |
142 | | |
143 | 1.56k | bool getAllowAtInIdentifier() { return AllowAtInIdentifier; } |
144 | 306 | void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; } |
145 | | |
146 | | void setCommentConsumer(AsmCommentConsumer *CommentConsumer) { |
147 | | this->CommentConsumer = CommentConsumer; |
148 | | } |
149 | | |
150 | | /// Set whether to lex masm-style binary and hex literals. They look like |
151 | | /// 0b1101 and 0ABCh respectively. |
152 | 9.89k | void setLexMasmIntegers(bool V) { LexMasmIntegers = V; } |
153 | | }; |
154 | | |
155 | | } // end namespace llvm |
156 | | |
157 | | #endif // LLVM_MC_MCPARSER_MCASMLEXER_H |