/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
Line | Count | Source (jump to first uncovered line) |
1 | | //===- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface ------*- C++ -*-===// |
2 | | // |
3 | | // The LLVM Compiler Infrastructure |
4 | | // |
5 | | // This file is distributed under the University of Illinois Open Source |
6 | | // License. See LICENSE.TXT for details. |
7 | | // |
8 | | //===----------------------------------------------------------------------===// |
9 | | |
10 | | #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H |
11 | | #define LLVM_MC_MCPARSER_MCASMLEXER_H |
12 | | |
13 | | #include "llvm/ADT/APInt.h" |
14 | | #include "llvm/ADT/ArrayRef.h" |
15 | | #include "llvm/ADT/SmallVector.h" |
16 | | #include "llvm/ADT/StringRef.h" |
17 | | #include "llvm/Support/SMLoc.h" |
18 | | #include <algorithm> |
19 | | #include <cassert> |
20 | | #include <cstddef> |
21 | | #include <cstdint> |
22 | | #include <string> |
23 | | |
24 | | namespace llvm { |
25 | | |
26 | | /// Target independent representation for an assembler token. |
27 | | class AsmToken { |
28 | | public: |
29 | | enum TokenKind { |
30 | | // Markers |
31 | | Eof, Error, |
32 | | |
33 | | // String values. |
34 | | Identifier, |
35 | | String, |
36 | | |
37 | | // Integer values. |
38 | | Integer, |
39 | | BigNum, // larger than 64 bits |
40 | | |
41 | | // Real values. |
42 | | Real, |
43 | | |
44 | | // Comments |
45 | | Comment, |
46 | | HashDirective, |
47 | | // No-value. |
48 | | EndOfStatement, |
49 | | Colon, |
50 | | Space, |
51 | | Plus, Minus, Tilde, |
52 | | Slash, // '/' |
53 | | BackSlash, // '\' |
54 | | LParen, RParen, LBrac, RBrac, LCurly, RCurly, |
55 | | Star, Dot, Comma, Dollar, Equal, EqualEqual, |
56 | | |
57 | | Pipe, PipePipe, Caret, |
58 | | Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash, |
59 | | Less, LessEqual, LessLess, LessGreater, |
60 | | Greater, GreaterEqual, GreaterGreater, At, |
61 | | |
62 | | // MIPS unary expression operators such as %neg. |
63 | | PercentCall16, PercentCall_Hi, PercentCall_Lo, PercentDtprel_Hi, |
64 | | PercentDtprel_Lo, PercentGot, PercentGot_Disp, PercentGot_Hi, PercentGot_Lo, |
65 | | PercentGot_Ofst, PercentGot_Page, PercentGottprel, PercentGp_Rel, PercentHi, |
66 | | PercentHigher, PercentHighest, PercentLo, PercentNeg, PercentPcrel_Hi, |
67 | | PercentPcrel_Lo, PercentTlsgd, PercentTlsldm, PercentTprel_Hi, |
68 | | PercentTprel_Lo |
69 | | }; |
70 | | |
71 | | private: |
72 | | TokenKind Kind; |
73 | | |
74 | | /// A reference to the entire token contents; this is always a pointer into |
75 | | /// a memory buffer owned by the source manager. |
76 | | StringRef Str; |
77 | | |
78 | | APInt IntVal; |
79 | | |
80 | | public: |
81 | 479k | AsmToken() = default; |
82 | | AsmToken(TokenKind Kind, StringRef Str, APInt IntVal) |
83 | 402k | : Kind(Kind), Str(Str), IntVal(std::move(IntVal)) {} |
84 | | AsmToken(TokenKind Kind, StringRef Str, int64_t IntVal = 0) |
85 | 5.95M | : Kind(Kind), Str(Str), IntVal(64, IntVal, true) {} |
86 | | |
87 | 11.8M | TokenKind getKind() const { return Kind; } |
88 | 43.1M | bool is(TokenKind K) const { return Kind == K; } |
89 | 6.57M | bool isNot(TokenKind K) const { return Kind != K; } |
90 | | |
91 | | SMLoc getLoc() const; |
92 | | SMLoc getEndLoc() const; |
93 | | SMRange getLocRange() const; |
94 | | |
95 | | /// Get the contents of a string token (without quotes). |
96 | 67.9k | StringRef getStringContents() const { |
97 | 67.9k | assert(Kind == String && "This token isn't a string!"); |
98 | 67.9k | return Str.slice(1, Str.size() - 1); |
99 | 67.9k | } |
100 | | |
101 | | /// Get the identifier string for the current token, which should be an |
102 | | /// identifier or a string. This gets the portion of the string which should |
103 | | /// be used as the identifier, e.g., it does not include the quotes on |
104 | | /// strings. |
105 | 1.95M | StringRef getIdentifier() const { |
106 | 1.95M | if (Kind == Identifier) |
107 | 1.95M | return getString(); |
108 | 241 | return getStringContents(); |
109 | 1.95M | } |
110 | | |
111 | | /// Get the string for the current token, this includes all characters (for |
112 | | /// example, the quotes on strings) in the token. |
113 | | /// |
114 | | /// The returned StringRef points into the source manager's memory buffer, and |
115 | | /// is safe to store across calls to Lex(). |
116 | 12.1M | StringRef getString() const { return Str; } |
117 | | |
118 | | // FIXME: Don't compute this in advance, it makes every token larger, and is |
119 | | // also not generally what we want (it is nicer for recovery etc. to lex 123br |
120 | | // as a single token, then diagnose as an invalid number). |
121 | 369k | int64_t getIntVal() const { |
122 | 369k | assert(Kind == Integer && "This token isn't an integer!"); |
123 | 369k | return IntVal.getZExtValue(); |
124 | 369k | } |
125 | | |
126 | 7 | APInt getAPIntVal() const { |
127 | 7 | assert((Kind == Integer || Kind == BigNum) && |
128 | 7 | "This token isn't an integer!"); |
129 | 7 | return IntVal; |
130 | 7 | } |
131 | | }; |
132 | | |
133 | | /// A callback class which is notified of each comment in an assembly file as |
134 | | /// it is lexed. |
135 | | class AsmCommentConsumer { |
136 | | public: |
137 | | virtual ~AsmCommentConsumer() = default; |
138 | | |
139 | | /// Callback function for when a comment is lexed. Loc is the start of the |
140 | | /// comment text (excluding the comment-start marker). CommentText is the text |
141 | | /// of the comment, excluding the comment start and end markers, and the |
142 | | /// newline for single-line comments. |
143 | | virtual void HandleComment(SMLoc Loc, StringRef CommentText) = 0; |
144 | | }; |
145 | | |
146 | | |
147 | | /// Generic assembler lexer interface, for use by target specific assembly |
148 | | /// lexers. |
149 | | class MCAsmLexer { |
150 | | /// The current token, stored in the base class for faster access. |
151 | | SmallVector<AsmToken, 1> CurTok; |
152 | | |
153 | | /// The location and description of the current error |
154 | | SMLoc ErrLoc; |
155 | | std::string Err; |
156 | | |
157 | | protected: // Can only create subclasses. |
158 | | const char *TokStart = nullptr; |
159 | | bool SkipSpace = true; |
160 | | bool AllowAtInIdentifier; |
161 | | bool IsAtStartOfStatement = true; |
162 | | AsmCommentConsumer *CommentConsumer = nullptr; |
163 | | |
164 | | bool AltMacroMode; |
165 | | MCAsmLexer(); |
166 | | |
167 | | virtual AsmToken LexToken() = 0; |
168 | | |
169 | 343k | void SetError(SMLoc errLoc, const std::string &err) { |
170 | 343k | ErrLoc = errLoc; |
171 | 343k | Err = err; |
172 | 343k | } |
173 | | |
174 | | public: |
175 | | MCAsmLexer(const MCAsmLexer &) = delete; |
176 | | MCAsmLexer &operator=(const MCAsmLexer &) = delete; |
177 | | virtual ~MCAsmLexer(); |
178 | | |
179 | 1.31M | bool IsaAltMacroMode() { |
180 | 1.31M | return AltMacroMode; |
181 | 1.31M | } |
182 | | |
183 | 8 | void SetAltMacroMode(bool AltMacroSet) { |
184 | 8 | AltMacroMode = AltMacroSet; |
185 | 8 | } |
186 | | |
187 | | /// Consume the next token from the input stream and return it. |
188 | | /// |
189 | | /// The lexer will continuosly return the end-of-file token once the end of |
190 | | /// the main input file has been reached. |
191 | 5.89M | const AsmToken &Lex() { |
192 | 5.89M | assert(!CurTok.empty()); |
193 | 5.89M | // Mark if we parsing out a EndOfStatement. |
194 | 5.89M | IsAtStartOfStatement = CurTok.front().getKind() == AsmToken::EndOfStatement; |
195 | 5.89M | CurTok.erase(CurTok.begin()); |
196 | 5.89M | // LexToken may generate multiple tokens via UnLex but will always return |
197 | 5.89M | // the first one. Place returned value at head of CurTok vector. |
198 | 5.89M | if (CurTok.empty()5.89M ) { |
199 | 5.86M | AsmToken T = LexToken(); |
200 | 5.86M | CurTok.insert(CurTok.begin(), T); |
201 | 5.86M | } |
202 | 5.89M | return CurTok.front(); |
203 | 5.89M | } |
204 | | |
205 | 28.3k | void UnLex(AsmToken const &Token) { |
206 | 28.3k | IsAtStartOfStatement = false; |
207 | 28.3k | CurTok.insert(CurTok.begin(), Token); |
208 | 28.3k | } |
209 | | |
210 | 29.8k | bool isAtStartOfStatement() { return IsAtStartOfStatement; } |
211 | | |
212 | | virtual StringRef LexUntilEndOfStatement() = 0; |
213 | | |
214 | | /// Get the current source location. |
215 | | SMLoc getLoc() const; |
216 | | |
217 | | /// Get the current (last) lexed token. |
218 | 61.1M | const AsmToken &getTok() const { |
219 | 61.1M | return CurTok[0]; |
220 | 61.1M | } |
221 | | |
222 | | /// Look ahead at the next token to be lexed. |
223 | 208k | const AsmToken peekTok(bool ShouldSkipSpace = true) { |
224 | 208k | AsmToken Tok; |
225 | 208k | |
226 | 208k | MutableArrayRef<AsmToken> Buf(Tok); |
227 | 208k | size_t ReadCount = peekTokens(Buf, ShouldSkipSpace); |
228 | 208k | |
229 | 208k | assert(ReadCount == 1); |
230 | 208k | (void)ReadCount; |
231 | 208k | |
232 | 208k | return Tok; |
233 | 208k | } |
234 | | |
235 | | /// Look ahead an arbitrary number of tokens. |
236 | | virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf, |
237 | | bool ShouldSkipSpace = true) = 0; |
238 | | |
239 | | /// Get the current error location |
240 | 343k | SMLoc getErrLoc() { |
241 | 343k | return ErrLoc; |
242 | 343k | } |
243 | | |
244 | | /// Get the current error string |
245 | 343k | const std::string &getErr() { |
246 | 343k | return Err; |
247 | 343k | } |
248 | | |
249 | | /// Get the kind of current token. |
250 | 5.43M | AsmToken::TokenKind getKind() const { return getTok().getKind(); } |
251 | | |
252 | | /// Check if the current token has kind \p K. |
253 | 19.0M | bool is(AsmToken::TokenKind K) const { return getTok().is(K); } |
254 | | |
255 | | /// Check if the current token has kind \p K. |
256 | 5.53M | bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); } |
257 | | |
258 | | /// Set whether spaces should be ignored by the lexer |
259 | 327k | void setSkipSpace(bool val) { SkipSpace = val; } |
260 | | |
261 | 960 | bool getAllowAtInIdentifier() { return AllowAtInIdentifier; } |
262 | 208 | void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; } |
263 | | |
264 | 0 | void setCommentConsumer(AsmCommentConsumer *CommentConsumer) { |
265 | 0 | this->CommentConsumer = CommentConsumer; |
266 | 0 | } |
267 | | }; |
268 | | |
269 | | } // end namespace llvm |
270 | | |
271 | | #endif // LLVM_MC_MCPARSER_MCASMLEXER_H |