/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/MIRParser/MILexer.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===- MILexer.cpp - Machine instructions lexer implementation ------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file implements the lexing of machine instructions. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "MILexer.h" |
14 | | #include "llvm/ADT/APSInt.h" |
15 | | #include "llvm/ADT/None.h" |
16 | | #include "llvm/ADT/STLExtras.h" |
17 | | #include "llvm/ADT/StringExtras.h" |
18 | | #include "llvm/ADT/StringSwitch.h" |
19 | | #include "llvm/ADT/StringRef.h" |
20 | | #include "llvm/ADT/Twine.h" |
21 | | #include <algorithm> |
22 | | #include <cassert> |
23 | | #include <cctype> |
24 | | #include <string> |
25 | | |
26 | | using namespace llvm; |
27 | | |
28 | | namespace { |
29 | | |
30 | | using ErrorCallbackType = |
31 | | function_ref<void(StringRef::iterator Loc, const Twine &)>; |
32 | | |
33 | | /// This class provides a way to iterate and get characters from the source |
34 | | /// string. |
35 | | class Cursor { |
36 | | const char *Ptr = nullptr; |
37 | | const char *End = nullptr; |
38 | | |
39 | | public: |
40 | 24.2M | Cursor(NoneType) {} |
41 | | |
42 | 2.16M | explicit Cursor(StringRef Str) { |
43 | 2.16M | Ptr = Str.data(); |
44 | 2.16M | End = Ptr + Str.size(); |
45 | 2.16M | } |
46 | | |
47 | 17.1M | bool isEOF() const { return Ptr == End; } |
48 | | |
49 | 44.2M | char peek(int I = 0) const { return End - Ptr <= I ? 0127k : Ptr[I]44.1M ; } |
50 | | |
51 | 22.4M | void advance(unsigned I = 1) { Ptr += I; } |
52 | | |
53 | 19.1M | StringRef remaining() const { return StringRef(Ptr, End - Ptr); } |
54 | | |
55 | 2.59M | StringRef upto(Cursor C) const { |
56 | 2.59M | assert(C.Ptr >= Ptr && C.Ptr <= End); |
57 | 2.59M | return StringRef(Ptr, C.Ptr - Ptr); |
58 | 2.59M | } |
59 | | |
60 | 3 | StringRef::iterator location() const { return Ptr; } |
61 | | |
62 | 26.3M | operator bool() const { return Ptr != nullptr; } |
63 | | }; |
64 | | |
65 | | } // end anonymous namespace |
66 | | |
67 | 2.16M | MIToken &MIToken::reset(TokenKind Kind, StringRef Range) { |
68 | 2.16M | this->Kind = Kind; |
69 | 2.16M | this->Range = Range; |
70 | 2.16M | return *this; |
71 | 2.16M | } |
72 | | |
73 | 698k | MIToken &MIToken::setStringValue(StringRef StrVal) { |
74 | 698k | StringValue = StrVal; |
75 | 698k | return *this; |
76 | 698k | } |
77 | | |
78 | 682 | MIToken &MIToken::setOwnedStringValue(std::string StrVal) { |
79 | 682 | StringValueStorage = std::move(StrVal); |
80 | 682 | StringValue = StringValueStorage; |
81 | 682 | return *this; |
82 | 682 | } |
83 | | |
84 | 310k | MIToken &MIToken::setIntegerValue(APSInt IntVal) { |
85 | 310k | this->IntVal = std::move(IntVal); |
86 | 310k | return *this; |
87 | 310k | } |
88 | | |
89 | | /// Skip the leading whitespace characters and return the updated cursor. |
90 | 2.16M | static Cursor skipWhitespace(Cursor C) { |
91 | 3.88M | while (isblank(C.peek())) |
92 | 1.71M | C.advance(); |
93 | 2.16M | return C; |
94 | 2.16M | } |
95 | | |
96 | 15.7M | static bool isNewlineChar(char C) { return C == '\n' || C == '\r'15.0M ; } |
97 | | |
98 | | /// Skip a line comment and return the updated cursor. |
99 | 2.16M | static Cursor skipComment(Cursor C) { |
100 | 2.16M | if (C.peek() != ';') |
101 | 1.89M | return C; |
102 | 15.2M | while (273k !isNewlineChar(C.peek()) && !C.isEOF()14.9M ) |
103 | 14.9M | C.advance(); |
104 | 273k | return C; |
105 | 273k | } |
106 | | |
107 | | /// Return true if the given character satisfies the following regular |
108 | | /// expression: [-a-zA-Z$._0-9] |
109 | 4.36M | static bool isIdentifierChar(char C) { |
110 | 4.36M | return isalpha(C) || isdigit(C)1.33M || C == '_'901k || C == '-'684k || C == '.'668k || |
111 | 4.36M | C == '$'663k ; |
112 | 4.36M | } |
113 | | |
114 | | /// Unescapes the given string value. |
115 | | /// |
116 | | /// Expects the string value to be quoted. |
117 | 682 | static std::string unescapeQuotedString(StringRef Value) { |
118 | 682 | assert(Value.front() == '"' && Value.back() == '"'); |
119 | 682 | Cursor C = Cursor(Value.substr(1, Value.size() - 2)); |
120 | 682 | |
121 | 682 | std::string Str; |
122 | 682 | Str.reserve(C.remaining().size()); |
123 | 9.92k | while (!C.isEOF()) { |
124 | 9.24k | char Char = C.peek(); |
125 | 9.24k | if (Char == '\\') { |
126 | 52 | if (C.peek(1) == '\\') { |
127 | 2 | // Two '\' become one |
128 | 2 | Str += '\\'; |
129 | 2 | C.advance(2); |
130 | 2 | continue; |
131 | 2 | } |
132 | 50 | if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) { |
133 | 50 | Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2)); |
134 | 50 | C.advance(3); |
135 | 50 | continue; |
136 | 50 | } |
137 | 9.19k | } |
138 | 9.19k | Str += Char; |
139 | 9.19k | C.advance(); |
140 | 9.19k | } |
141 | 682 | return Str; |
142 | 682 | } |
143 | | |
144 | | /// Lex a string constant using the following regular expression: \"[^\"]*\" |
145 | 683 | static Cursor lexStringConstant(Cursor C, ErrorCallbackType ErrorCallback) { |
146 | 683 | assert(C.peek() == '"'); |
147 | 10.0k | for (C.advance(); C.peek() != '"'; C.advance()9.36k ) { |
148 | 9.36k | if (C.isEOF() || isNewlineChar(C.peek())) { |
149 | 1 | ErrorCallback( |
150 | 1 | C.location(), |
151 | 1 | "end of machine instruction reached before the closing '\"'"); |
152 | 1 | return None; |
153 | 1 | } |
154 | 9.36k | } |
155 | 683 | C.advance(); |
156 | 682 | return C; |
157 | 683 | } |
158 | | |
159 | | static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type, |
160 | 7.37k | unsigned PrefixLength, ErrorCallbackType ErrorCallback) { |
161 | 7.37k | auto Range = C; |
162 | 7.37k | C.advance(PrefixLength); |
163 | 7.37k | if (C.peek() == '"') { |
164 | 683 | if (Cursor R = lexStringConstant(C, ErrorCallback)) { |
165 | 682 | StringRef String = Range.upto(R); |
166 | 682 | Token.reset(Type, String) |
167 | 682 | .setOwnedStringValue( |
168 | 682 | unescapeQuotedString(String.drop_front(PrefixLength))); |
169 | 682 | return R; |
170 | 682 | } |
171 | 1 | Token.reset(MIToken::Error, Range.remaining()); |
172 | 1 | return Range; |
173 | 1 | } |
174 | 55.9k | while (6.68k isIdentifierChar(C.peek())) |
175 | 49.2k | C.advance(); |
176 | 6.68k | Token.reset(Type, Range.upto(C)) |
177 | 6.68k | .setStringValue(Range.upto(C).drop_front(PrefixLength)); |
178 | 6.68k | return C; |
179 | 6.68k | } |
180 | | |
181 | 438k | static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { |
182 | 438k | return StringSwitch<MIToken::TokenKind>(Identifier) |
183 | 438k | .Case("_", MIToken::underscore) |
184 | 438k | .Case("implicit", MIToken::kw_implicit) |
185 | 438k | .Case("implicit-def", MIToken::kw_implicit_define) |
186 | 438k | .Case("def", MIToken::kw_def) |
187 | 438k | .Case("dead", MIToken::kw_dead) |
188 | 438k | .Case("killed", MIToken::kw_killed) |
189 | 438k | .Case("undef", MIToken::kw_undef) |
190 | 438k | .Case("internal", MIToken::kw_internal) |
191 | 438k | .Case("early-clobber", MIToken::kw_early_clobber) |
192 | 438k | .Case("debug-use", MIToken::kw_debug_use) |
193 | 438k | .Case("renamable", MIToken::kw_renamable) |
194 | 438k | .Case("tied-def", MIToken::kw_tied_def) |
195 | 438k | .Case("frame-setup", MIToken::kw_frame_setup) |
196 | 438k | .Case("frame-destroy", MIToken::kw_frame_destroy) |
197 | 438k | .Case("nnan", MIToken::kw_nnan) |
198 | 438k | .Case("ninf", MIToken::kw_ninf) |
199 | 438k | .Case("nsz", MIToken::kw_nsz) |
200 | 438k | .Case("arcp", MIToken::kw_arcp) |
201 | 438k | .Case("contract", MIToken::kw_contract) |
202 | 438k | .Case("afn", MIToken::kw_afn) |
203 | 438k | .Case("reassoc", MIToken::kw_reassoc) |
204 | 438k | .Case("nuw" , MIToken::kw_nuw) |
205 | 438k | .Case("nsw" , MIToken::kw_nsw) |
206 | 438k | .Case("exact" , MIToken::kw_exact) |
207 | 438k | .Case("fpexcept", MIToken::kw_fpexcept) |
208 | 438k | .Case("debug-location", MIToken::kw_debug_location) |
209 | 438k | .Case("same_value", MIToken::kw_cfi_same_value) |
210 | 438k | .Case("offset", MIToken::kw_cfi_offset) |
211 | 438k | .Case("rel_offset", MIToken::kw_cfi_rel_offset) |
212 | 438k | .Case("def_cfa_register", MIToken::kw_cfi_def_cfa_register) |
213 | 438k | .Case("def_cfa_offset", MIToken::kw_cfi_def_cfa_offset) |
214 | 438k | .Case("adjust_cfa_offset", MIToken::kw_cfi_adjust_cfa_offset) |
215 | 438k | .Case("escape", MIToken::kw_cfi_escape) |
216 | 438k | .Case("def_cfa", MIToken::kw_cfi_def_cfa) |
217 | 438k | .Case("remember_state", MIToken::kw_cfi_remember_state) |
218 | 438k | .Case("restore", MIToken::kw_cfi_restore) |
219 | 438k | .Case("restore_state", MIToken::kw_cfi_restore_state) |
220 | 438k | .Case("undefined", MIToken::kw_cfi_undefined) |
221 | 438k | .Case("register", MIToken::kw_cfi_register) |
222 | 438k | .Case("window_save", MIToken::kw_cfi_window_save) |
223 | 438k | .Case("negate_ra_sign_state", MIToken::kw_cfi_aarch64_negate_ra_sign_state) |
224 | 438k | .Case("blockaddress", MIToken::kw_blockaddress) |
225 | 438k | .Case("intrinsic", MIToken::kw_intrinsic) |
226 | 438k | .Case("target-index", MIToken::kw_target_index) |
227 | 438k | .Case("half", MIToken::kw_half) |
228 | 438k | .Case("float", MIToken::kw_float) |
229 | 438k | .Case("double", MIToken::kw_double) |
230 | 438k | .Case("x86_fp80", MIToken::kw_x86_fp80) |
231 | 438k | .Case("fp128", MIToken::kw_fp128) |
232 | 438k | .Case("ppc_fp128", MIToken::kw_ppc_fp128) |
233 | 438k | .Case("target-flags", MIToken::kw_target_flags) |
234 | 438k | .Case("volatile", MIToken::kw_volatile) |
235 | 438k | .Case("non-temporal", MIToken::kw_non_temporal) |
236 | 438k | .Case("dereferenceable", MIToken::kw_dereferenceable) |
237 | 438k | .Case("invariant", MIToken::kw_invariant) |
238 | 438k | .Case("align", MIToken::kw_align) |
239 | 438k | .Case("addrspace", MIToken::kw_addrspace) |
240 | 438k | .Case("stack", MIToken::kw_stack) |
241 | 438k | .Case("got", MIToken::kw_got) |
242 | 438k | .Case("jump-table", MIToken::kw_jump_table) |
243 | 438k | .Case("constant-pool", MIToken::kw_constant_pool) |
244 | 438k | .Case("call-entry", MIToken::kw_call_entry) |
245 | 438k | .Case("liveout", MIToken::kw_liveout) |
246 | 438k | .Case("address-taken", MIToken::kw_address_taken) |
247 | 438k | .Case("landing-pad", MIToken::kw_landing_pad) |
248 | 438k | .Case("liveins", MIToken::kw_liveins) |
249 | 438k | .Case("successors", MIToken::kw_successors) |
250 | 438k | .Case("floatpred", MIToken::kw_floatpred) |
251 | 438k | .Case("intpred", MIToken::kw_intpred) |
252 | 438k | .Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol) |
253 | 438k | .Case("post-instr-symbol", MIToken::kw_post_instr_symbol) |
254 | 438k | .Case("unknown-size", MIToken::kw_unknown_size) |
255 | 438k | .Default(MIToken::Identifier); |
256 | 438k | } |
257 | | |
258 | 2.07M | static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { |
259 | 2.07M | if (!isalpha(C.peek()) && C.peek() != '_'1.67M ) |
260 | 1.63M | return None; |
261 | 438k | auto Range = C; |
262 | 2.87M | while (isIdentifierChar(C.peek())) |
263 | 2.43M | C.advance(); |
264 | 438k | auto Identifier = Range.upto(C); |
265 | 438k | Token.reset(getIdentifierKind(Identifier), Identifier) |
266 | 438k | .setStringValue(Identifier); |
267 | 438k | return C; |
268 | 438k | } |
269 | | |
270 | | static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token, |
271 | 2.11M | ErrorCallbackType ErrorCallback) { |
272 | 2.11M | bool IsReference = C.remaining().startswith("%bb."); |
273 | 2.11M | if (!IsReference && !C.remaining().startswith("bb.")2.10M ) |
274 | 2.07M | return None; |
275 | 40.8k | auto Range = C; |
276 | 40.8k | unsigned PrefixLength = IsReference ? 412.0k : 328.8k ; |
277 | 40.8k | C.advance(PrefixLength); // Skip '%bb.' or 'bb.' |
278 | 40.8k | if (!isdigit(C.peek())) { |
279 | 1 | Token.reset(MIToken::Error, C.remaining()); |
280 | 1 | ErrorCallback(C.location(), "expected a number after '%bb.'"); |
281 | 1 | return C; |
282 | 1 | } |
283 | 40.8k | auto NumberRange = C; |
284 | 83.1k | while (isdigit(C.peek())) |
285 | 42.2k | C.advance(); |
286 | 40.8k | StringRef Number = NumberRange.upto(C); |
287 | 40.8k | unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>' |
288 | 40.8k | // TODO: The format bb.<id>.<irname> is supported only when it's not a |
289 | 40.8k | // reference. Once we deprecate the format where the irname shows up, we |
290 | 40.8k | // should only lex forward if it is a reference. |
291 | 40.8k | if (C.peek() == '.') { |
292 | 7.19k | C.advance(); // Skip '.' |
293 | 7.19k | ++StringOffset; |
294 | 47.7k | while (isIdentifierChar(C.peek())) |
295 | 40.6k | C.advance(); |
296 | 7.19k | } |
297 | 40.8k | Token.reset(IsReference ? MIToken::MachineBasicBlock12.0k |
298 | 40.8k | : MIToken::MachineBasicBlockLabel28.8k , |
299 | 40.8k | Range.upto(C)) |
300 | 40.8k | .setIntegerValue(APSInt(Number)) |
301 | 40.8k | .setStringValue(Range.upto(C).drop_front(StringOffset)); |
302 | 40.8k | return C; |
303 | 40.8k | } |
304 | | |
305 | | static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule, |
306 | 4.91M | MIToken::TokenKind Kind) { |
307 | 4.91M | if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))3.85k ) |
308 | 4.91M | return None; |
309 | 3.85k | auto Range = C; |
310 | 3.85k | C.advance(Rule.size()); |
311 | 3.85k | auto NumberRange = C; |
312 | 7.94k | while (isdigit(C.peek())) |
313 | 4.08k | C.advance(); |
314 | 3.85k | Token.reset(Kind, Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C))); |
315 | 3.85k | return C; |
316 | 3.85k | } |
317 | | |
318 | | static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule, |
319 | 1.63M | MIToken::TokenKind Kind) { |
320 | 1.63M | if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))1.92k ) |
321 | 1.63M | return None; |
322 | 1.92k | auto Range = C; |
323 | 1.92k | C.advance(Rule.size()); |
324 | 1.92k | auto NumberRange = C; |
325 | 4.01k | while (isdigit(C.peek())) |
326 | 2.09k | C.advance(); |
327 | 1.92k | StringRef Number = NumberRange.upto(C); |
328 | 1.92k | unsigned StringOffset = Rule.size() + Number.size(); |
329 | 1.92k | if (C.peek() == '.') { |
330 | 510 | C.advance(); |
331 | 510 | ++StringOffset; |
332 | 2.50k | while (isIdentifierChar(C.peek())) |
333 | 1.99k | C.advance(); |
334 | 510 | } |
335 | 1.92k | Token.reset(Kind, Range.upto(C)) |
336 | 1.92k | .setIntegerValue(APSInt(Number)) |
337 | 1.92k | .setStringValue(Range.upto(C).drop_front(StringOffset)); |
338 | 1.92k | return C; |
339 | 1.92k | } |
340 | | |
341 | 1.63M | static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) { |
342 | 1.63M | return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex); |
343 | 1.63M | } |
344 | | |
345 | 1.63M | static Cursor maybeLexStackObject(Cursor C, MIToken &Token) { |
346 | 1.63M | return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject); |
347 | 1.63M | } |
348 | | |
349 | 1.63M | static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) { |
350 | 1.63M | return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject); |
351 | 1.63M | } |
352 | | |
353 | 1.63M | static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) { |
354 | 1.63M | return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem); |
355 | 1.63M | } |
356 | | |
357 | | static Cursor maybeLexSubRegisterIndex(Cursor C, MIToken &Token, |
358 | 1.63M | ErrorCallbackType ErrorCallback) { |
359 | 1.63M | const StringRef Rule = "%subreg."; |
360 | 1.63M | if (!C.remaining().startswith(Rule)) |
361 | 1.63M | return None; |
362 | 742 | return lexName(C, Token, MIToken::SubRegisterIndex, Rule.size(), |
363 | 742 | ErrorCallback); |
364 | 742 | } |
365 | | |
366 | | static Cursor maybeLexIRBlock(Cursor C, MIToken &Token, |
367 | 1.63M | ErrorCallbackType ErrorCallback) { |
368 | 1.63M | const StringRef Rule = "%ir-block."; |
369 | 1.63M | if (!C.remaining().startswith(Rule)) |
370 | 1.63M | return None; |
371 | 2.81k | if (isdigit(C.peek(Rule.size()))) |
372 | 2.77k | return maybeLexIndex(C, Token, Rule, MIToken::IRBlock); |
373 | 36 | return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback); |
374 | 36 | } |
375 | | |
376 | | static Cursor maybeLexIRValue(Cursor C, MIToken &Token, |
377 | 1.63M | ErrorCallbackType ErrorCallback) { |
378 | 1.63M | const StringRef Rule = "%ir."; |
379 | 1.63M | if (!C.remaining().startswith(Rule)) |
380 | 1.63M | return None; |
381 | 3.03k | if (isdigit(C.peek(Rule.size()))) |
382 | 344 | return maybeLexIndex(C, Token, Rule, MIToken::IRValue); |
383 | 2.68k | return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback); |
384 | 2.68k | } |
385 | | |
386 | | static Cursor maybeLexStringConstant(Cursor C, MIToken &Token, |
387 | 251 | ErrorCallbackType ErrorCallback) { |
388 | 251 | if (C.peek() != '"') |
389 | 1 | return None; |
390 | 250 | return lexName(C, Token, MIToken::StringConstant, /*PrefixLength=*/0, |
391 | 250 | ErrorCallback); |
392 | 250 | } |
393 | | |
394 | 167k | static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { |
395 | 167k | auto Range = C; |
396 | 167k | C.advance(); // Skip '%' |
397 | 167k | auto NumberRange = C; |
398 | 353k | while (isdigit(C.peek())) |
399 | 186k | C.advance(); |
400 | 167k | Token.reset(MIToken::VirtualRegister, Range.upto(C)) |
401 | 167k | .setIntegerValue(APSInt(NumberRange.upto(C))); |
402 | 167k | return C; |
403 | 167k | } |
404 | | |
405 | | /// Returns true for a character allowed in a register name. |
406 | 1.37M | static bool isRegisterChar(char C) { |
407 | 1.37M | return isIdentifierChar(C) && C != '.'1.16M ; |
408 | 1.37M | } |
409 | | |
410 | 114 | static Cursor lexNamedVirtualRegister(Cursor C, MIToken &Token) { |
411 | 114 | Cursor Range = C; |
412 | 114 | C.advance(); // Skip '%' |
413 | 710 | while (isRegisterChar(C.peek())) |
414 | 596 | C.advance(); |
415 | 114 | Token.reset(MIToken::NamedVirtualRegister, Range.upto(C)) |
416 | 114 | .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%' |
417 | 114 | return C; |
418 | 114 | } |
419 | | |
420 | | static Cursor maybeLexRegister(Cursor C, MIToken &Token, |
421 | 1.63M | ErrorCallbackType ErrorCallback) { |
422 | 1.63M | if (C.peek() != '%' && C.peek() != '$'1.46M ) |
423 | 1.25M | return None; |
424 | 377k | |
425 | 377k | if (C.peek() == '%') { |
426 | 167k | if (isdigit(C.peek(1))) |
427 | 167k | return lexVirtualRegister(C, Token); |
428 | 114 | |
429 | 114 | if (isRegisterChar(C.peek(1))) |
430 | 114 | return lexNamedVirtualRegister(C, Token); |
431 | 0 | |
432 | 0 | return None; |
433 | 0 | } |
434 | 209k | |
435 | 209k | assert(C.peek() == '$'); |
436 | 209k | auto Range = C; |
437 | 209k | C.advance(); // Skip '$' |
438 | 1.37M | while (isRegisterChar(C.peek())) |
439 | 1.16M | C.advance(); |
440 | 209k | Token.reset(MIToken::NamedRegister, Range.upto(C)) |
441 | 209k | .setStringValue(Range.upto(C).drop_front(1)); // Drop the '$' |
442 | 209k | return C; |
443 | 209k | } |
444 | | |
445 | | static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token, |
446 | 1.25M | ErrorCallbackType ErrorCallback) { |
447 | 1.25M | if (C.peek() != '@') |
448 | 1.25M | return None; |
449 | 3.15k | if (!isdigit(C.peek(1))) |
450 | 3.14k | return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1, |
451 | 3.14k | ErrorCallback); |
452 | 12 | auto Range = C; |
453 | 12 | C.advance(1); // Skip the '@' |
454 | 12 | auto NumberRange = C; |
455 | 24 | while (isdigit(C.peek())) |
456 | 12 | C.advance(); |
457 | 12 | Token.reset(MIToken::GlobalValue, Range.upto(C)) |
458 | 12 | .setIntegerValue(APSInt(NumberRange.upto(C))); |
459 | 12 | return C; |
460 | 12 | } |
461 | | |
462 | | static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token, |
463 | 1.25M | ErrorCallbackType ErrorCallback) { |
464 | 1.25M | if (C.peek() != '&') |
465 | 1.24M | return None; |
466 | 514 | return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1, |
467 | 514 | ErrorCallback); |
468 | 514 | } |
469 | | |
470 | | static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token, |
471 | 1.24M | ErrorCallbackType ErrorCallback) { |
472 | 1.24M | const StringRef Rule = "<mcsymbol "; |
473 | 1.24M | if (!C.remaining().startswith(Rule)) |
474 | 1.24M | return None; |
475 | 54 | auto Start = C; |
476 | 54 | C.advance(Rule.size()); |
477 | 54 | |
478 | 54 | // Try a simple unquoted name. |
479 | 54 | if (C.peek() != '"') { |
480 | 482 | while (isIdentifierChar(C.peek())) |
481 | 428 | C.advance(); |
482 | 54 | StringRef String = Start.upto(C).drop_front(Rule.size()); |
483 | 54 | if (C.peek() != '>') { |
484 | 0 | ErrorCallback(C.location(), |
485 | 0 | "expected the '<mcsymbol ...' to be closed by a '>'"); |
486 | 0 | Token.reset(MIToken::Error, Start.remaining()); |
487 | 0 | return Start; |
488 | 0 | } |
489 | 54 | C.advance(); |
490 | 54 | |
491 | 54 | Token.reset(MIToken::MCSymbol, Start.upto(C)).setStringValue(String); |
492 | 54 | return C; |
493 | 54 | } |
494 | 0 | |
495 | 0 | // Otherwise lex out a quoted name. |
496 | 0 | Cursor R = lexStringConstant(C, ErrorCallback); |
497 | 0 | if (!R) { |
498 | 0 | ErrorCallback(C.location(), |
499 | 0 | "unable to parse quoted string from opening quote"); |
500 | 0 | Token.reset(MIToken::Error, Start.remaining()); |
501 | 0 | return Start; |
502 | 0 | } |
503 | 0 | StringRef String = Start.upto(R).drop_front(Rule.size()); |
504 | 0 | if (R.peek() != '>') { |
505 | 0 | ErrorCallback(R.location(), |
506 | 0 | "expected the '<mcsymbol ...' to be closed by a '>'"); |
507 | 0 | Token.reset(MIToken::Error, Start.remaining()); |
508 | 0 | return Start; |
509 | 0 | } |
510 | 0 | R.advance(); |
511 | 0 |
|
512 | 0 | Token.reset(MIToken::MCSymbol, Start.upto(R)) |
513 | 0 | .setOwnedStringValue(unescapeQuotedString(String)); |
514 | 0 | return R; |
515 | 0 | } |
516 | | |
517 | 2.82k | static bool isValidHexFloatingPointPrefix(char C) { |
518 | 2.82k | return C == 'H' || C == 'K'2.81k || C == 'L'2.81k || C == 'M'2.81k ; |
519 | 2.82k | } |
520 | | |
521 | 218 | static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) { |
522 | 218 | C.advance(); |
523 | 218 | // Skip over [0-9]*([eE][-+]?[0-9]+)? |
524 | 812 | while (isdigit(C.peek())) |
525 | 594 | C.advance(); |
526 | 218 | if ((C.peek() == 'e' || C.peek() == 'E'138 ) && |
527 | 218 | (80 isdigit(C.peek(1))80 || |
528 | 80 | ((C.peek(1) == '-' || C.peek(1) == '+'72 ) && isdigit(C.peek(2))))) { |
529 | 80 | C.advance(2); |
530 | 232 | while (isdigit(C.peek())) |
531 | 152 | C.advance(); |
532 | 80 | } |
533 | 218 | Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); |
534 | 218 | return C; |
535 | 218 | } |
536 | | |
537 | 1.24M | static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token) { |
538 | 1.24M | if (C.peek() != '0' || (37.1k C.peek(1) != 'x'37.1k && C.peek(1) != 'X'34.3k )) |
539 | 1.24M | return None; |
540 | 2.82k | Cursor Range = C; |
541 | 2.82k | C.advance(2); |
542 | 2.82k | unsigned PrefLen = 2; |
543 | 2.82k | if (isValidHexFloatingPointPrefix(C.peek())) { |
544 | 2 | C.advance(); |
545 | 2 | PrefLen++; |
546 | 2 | } |
547 | 25.4k | while (isxdigit(C.peek())) |
548 | 22.6k | C.advance(); |
549 | 2.82k | StringRef StrVal = Range.upto(C); |
550 | 2.82k | if (StrVal.size() <= PrefLen) |
551 | 0 | return None; |
552 | 2.82k | if (PrefLen == 2) |
553 | 2.81k | Token.reset(MIToken::HexLiteral, Range.upto(C)); |
554 | 2 | else // It must be 3, which means that there was a floating-point prefix. |
555 | 2 | Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); |
556 | 2.82k | return C; |
557 | 2.82k | } |
558 | | |
559 | 1.24M | static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) { |
560 | 1.24M | if (!isdigit(C.peek()) && (1.15M C.peek() != '-'1.15M || !isdigit(C.peek(1))2.10k )) |
561 | 1.14M | return None; |
562 | 96.8k | auto Range = C; |
563 | 96.8k | C.advance(); |
564 | 129k | while (isdigit(C.peek())) |
565 | 32.6k | C.advance(); |
566 | 96.8k | if (C.peek() == '.') |
567 | 218 | return lexFloatingPointLiteral(Range, C, Token); |
568 | 96.6k | StringRef StrVal = Range.upto(C); |
569 | 96.6k | Token.reset(MIToken::IntegerLiteral, StrVal).setIntegerValue(APSInt(StrVal)); |
570 | 96.6k | return C; |
571 | 96.6k | } |
572 | | |
573 | 998 | static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) { |
574 | 998 | return StringSwitch<MIToken::TokenKind>(Identifier) |
575 | 998 | .Case("!tbaa", MIToken::md_tbaa) |
576 | 998 | .Case("!alias.scope", MIToken::md_alias_scope) |
577 | 998 | .Case("!noalias", MIToken::md_noalias) |
578 | 998 | .Case("!range", MIToken::md_range) |
579 | 998 | .Case("!DIExpression", MIToken::md_diexpr) |
580 | 998 | .Case("!DILocation", MIToken::md_dilocation) |
581 | 998 | .Default(MIToken::Error); |
582 | 998 | } |
583 | | |
584 | | static Cursor maybeLexExlaim(Cursor C, MIToken &Token, |
585 | 1.14M | ErrorCallbackType ErrorCallback) { |
586 | 1.14M | if (C.peek() != '!') |
587 | 1.14M | return None; |
588 | 5.09k | auto Range = C; |
589 | 5.09k | C.advance(1); |
590 | 5.09k | if (isdigit(C.peek()) || !isIdentifierChar(C.peek())1.00k ) { |
591 | 4.09k | Token.reset(MIToken::exclaim, Range.upto(C)); |
592 | 4.09k | return C; |
593 | 4.09k | } |
594 | 9.89k | while (998 isIdentifierChar(C.peek())) |
595 | 8.89k | C.advance(); |
596 | 998 | StringRef StrVal = Range.upto(C); |
597 | 998 | Token.reset(getMetadataKeywordKind(StrVal), StrVal); |
598 | 998 | if (Token.isError()) |
599 | 1 | ErrorCallback(Token.location(), |
600 | 1 | "use of unknown metadata keyword '" + StrVal + "'"); |
601 | 998 | return C; |
602 | 998 | } |
603 | | |
604 | 1.13M | static MIToken::TokenKind symbolToken(char C) { |
605 | 1.13M | switch (C) { |
606 | 1.13M | case ',': |
607 | 170k | return MIToken::comma; |
608 | 1.13M | case '.': |
609 | 3.04k | return MIToken::dot; |
610 | 1.13M | case '=': |
611 | 112k | return MIToken::equal; |
612 | 1.13M | case ':': |
613 | 110k | return MIToken::colon; |
614 | 1.13M | case '(': |
615 | 105k | return MIToken::lparen; |
616 | 1.13M | case ')': |
617 | 105k | return MIToken::rparen; |
618 | 1.13M | case '{': |
619 | 141 | return MIToken::lbrace; |
620 | 1.13M | case '}': |
621 | 139 | return MIToken::rbrace; |
622 | 1.13M | case '+': |
623 | 180 | return MIToken::plus; |
624 | 1.13M | case '-': |
625 | 8 | return MIToken::minus; |
626 | 1.13M | case '<': |
627 | 14.1k | return MIToken::less; |
628 | 1.13M | case '>': |
629 | 14.1k | return MIToken::greater; |
630 | 1.13M | default: |
631 | 498k | return MIToken::Error; |
632 | 1.13M | } |
633 | 1.13M | } |
634 | | |
635 | 1.14M | static Cursor maybeLexSymbol(Cursor C, MIToken &Token) { |
636 | 1.14M | MIToken::TokenKind Kind; |
637 | 1.14M | unsigned Length = 1; |
638 | 1.14M | if (C.peek() == ':' && C.peek(1) == ':'120k ) { |
639 | 9.63k | Kind = MIToken::coloncolon; |
640 | 9.63k | Length = 2; |
641 | 9.63k | } else |
642 | 1.13M | Kind = symbolToken(C.peek()); |
643 | 1.14M | if (Kind == MIToken::Error) |
644 | 498k | return None; |
645 | 646k | auto Range = C; |
646 | 646k | C.advance(Length); |
647 | 646k | Token.reset(Kind, Range.upto(C)); |
648 | 646k | return C; |
649 | 646k | } |
650 | | |
651 | 498k | static Cursor maybeLexNewline(Cursor C, MIToken &Token) { |
652 | 498k | if (!isNewlineChar(C.peek())) |
653 | 1.40k | return None; |
654 | 496k | auto Range = C; |
655 | 496k | C.advance(); |
656 | 496k | Token.reset(MIToken::Newline, Range.upto(C)); |
657 | 496k | return C; |
658 | 496k | } |
659 | | |
660 | | static Cursor maybeLexEscapedIRValue(Cursor C, MIToken &Token, |
661 | 1.40k | ErrorCallbackType ErrorCallback) { |
662 | 1.40k | if (C.peek() != '`') |
663 | 251 | return None; |
664 | 1.15k | auto Range = C; |
665 | 1.15k | C.advance(); |
666 | 1.15k | auto StrRange = C; |
667 | 33.1k | while (C.peek() != '`') { |
668 | 31.9k | if (C.isEOF() || isNewlineChar(C.peek())) { |
669 | 0 | ErrorCallback( |
670 | 0 | C.location(), |
671 | 0 | "end of machine instruction reached before the closing '`'"); |
672 | 0 | Token.reset(MIToken::Error, Range.remaining()); |
673 | 0 | return C; |
674 | 0 | } |
675 | 31.9k | C.advance(); |
676 | 31.9k | } |
677 | 1.15k | StringRef Value = StrRange.upto(C); |
678 | 1.15k | C.advance(); |
679 | 1.15k | Token.reset(MIToken::QuotedIRValue, Range.upto(C)).setStringValue(Value); |
680 | 1.15k | return C; |
681 | 1.15k | } |
682 | | |
683 | | StringRef llvm::lexMIToken(StringRef Source, MIToken &Token, |
684 | 2.16M | ErrorCallbackType ErrorCallback) { |
685 | 2.16M | auto C = skipComment(skipWhitespace(Cursor(Source))); |
686 | 2.16M | if (C.isEOF()) { |
687 | 49.7k | Token.reset(MIToken::Eof, C.remaining()); |
688 | 49.7k | return C.remaining(); |
689 | 49.7k | } |
690 | 2.11M | |
691 | 2.11M | if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback)) |
692 | 40.8k | return R.remaining(); |
693 | 2.07M | if (Cursor R = maybeLexIdentifier(C, Token)) |
694 | 438k | return R.remaining(); |
695 | 1.63M | if (Cursor R = maybeLexJumpTableIndex(C, Token)) |
696 | 53 | return R.remaining(); |
697 | 1.63M | if (Cursor R = maybeLexStackObject(C, Token)) |
698 | 1.92k | return R.remaining(); |
699 | 1.63M | if (Cursor R = maybeLexFixedStackObject(C, Token)) |
700 | 580 | return R.remaining(); |
701 | 1.63M | if (Cursor R = maybeLexConstantPoolItem(C, Token)) |
702 | 98 | return R.remaining(); |
703 | 1.63M | if (Cursor R = maybeLexSubRegisterIndex(C, Token, ErrorCallback)) |
704 | 742 | return R.remaining(); |
705 | 1.63M | if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback)) |
706 | 2.81k | return R.remaining(); |
707 | 1.63M | if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback)) |
708 | 3.03k | return R.remaining(); |
709 | 1.63M | if (Cursor R = maybeLexRegister(C, Token, ErrorCallback)) |
710 | 377k | return R.remaining(); |
711 | 1.25M | if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback)) |
712 | 3.15k | return R.remaining(); |
713 | 1.25M | if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback)) |
714 | 514 | return R.remaining(); |
715 | 1.24M | if (Cursor R = maybeLexMCSymbol(C, Token, ErrorCallback)) |
716 | 54 | return R.remaining(); |
717 | 1.24M | if (Cursor R = maybeLexHexadecimalLiteral(C, Token)) |
718 | 2.82k | return R.remaining(); |
719 | 1.24M | if (Cursor R = maybeLexNumericalLiteral(C, Token)) |
720 | 96.8k | return R.remaining(); |
721 | 1.14M | if (Cursor R = maybeLexExlaim(C, Token, ErrorCallback)) |
722 | 5.09k | return R.remaining(); |
723 | 1.14M | if (Cursor R = maybeLexSymbol(C, Token)) |
724 | 646k | return R.remaining(); |
725 | 498k | if (Cursor R = maybeLexNewline(C, Token)) |
726 | 496k | return R.remaining(); |
727 | 1.40k | if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback)) |
728 | 1.15k | return R.remaining(); |
729 | 251 | if (Cursor R = maybeLexStringConstant(C, Token, ErrorCallback)) |
730 | 250 | return R.remaining(); |
731 | 1 | |
732 | 1 | Token.reset(MIToken::Error, C.remaining()); |
733 | 1 | ErrorCallback(C.location(), |
734 | 1 | Twine("unexpected character '") + Twine(C.peek()) + "'"); |
735 | 1 | return C.remaining(); |
736 | 1 | } |