Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/CodeGen/MIRParser/MILexer.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- MILexer.cpp - Machine instructions lexer implementation ------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the lexing of machine instructions.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "MILexer.h"
14
#include "llvm/ADT/APSInt.h"
15
#include "llvm/ADT/None.h"
16
#include "llvm/ADT/STLExtras.h"
17
#include "llvm/ADT/StringExtras.h"
18
#include "llvm/ADT/StringSwitch.h"
19
#include "llvm/ADT/StringRef.h"
20
#include "llvm/ADT/Twine.h"
21
#include <algorithm>
22
#include <cassert>
23
#include <cctype>
24
#include <string>
25
26
using namespace llvm;
27
28
namespace {
29
30
using ErrorCallbackType =
31
    function_ref<void(StringRef::iterator Loc, const Twine &)>;
32
33
/// This class provides a way to iterate and get characters from the source
34
/// string.
35
class Cursor {
36
  const char *Ptr = nullptr;
37
  const char *End = nullptr;
38
39
public:
40
24.2M
  Cursor(NoneType) {}
41
42
2.16M
  explicit Cursor(StringRef Str) {
43
2.16M
    Ptr = Str.data();
44
2.16M
    End = Ptr + Str.size();
45
2.16M
  }
46
47
17.1M
  bool isEOF() const { return Ptr == End; }
48
49
44.2M
  char peek(int I = 0) const { return End - Ptr <= I ? 
0127k
:
Ptr[I]44.1M
; }
50
51
22.4M
  void advance(unsigned I = 1) { Ptr += I; }
52
53
19.1M
  StringRef remaining() const { return StringRef(Ptr, End - Ptr); }
54
55
2.59M
  StringRef upto(Cursor C) const {
56
2.59M
    assert(C.Ptr >= Ptr && C.Ptr <= End);
57
2.59M
    return StringRef(Ptr, C.Ptr - Ptr);
58
2.59M
  }
59
60
3
  StringRef::iterator location() const { return Ptr; }
61
62
26.3M
  operator bool() const { return Ptr != nullptr; }
63
};
64
65
} // end anonymous namespace
66
67
2.16M
MIToken &MIToken::reset(TokenKind Kind, StringRef Range) {
68
2.16M
  this->Kind = Kind;
69
2.16M
  this->Range = Range;
70
2.16M
  return *this;
71
2.16M
}
72
73
698k
MIToken &MIToken::setStringValue(StringRef StrVal) {
74
698k
  StringValue = StrVal;
75
698k
  return *this;
76
698k
}
77
78
682
MIToken &MIToken::setOwnedStringValue(std::string StrVal) {
79
682
  StringValueStorage = std::move(StrVal);
80
682
  StringValue = StringValueStorage;
81
682
  return *this;
82
682
}
83
84
310k
MIToken &MIToken::setIntegerValue(APSInt IntVal) {
85
310k
  this->IntVal = std::move(IntVal);
86
310k
  return *this;
87
310k
}
88
89
/// Skip the leading whitespace characters and return the updated cursor.
90
2.16M
static Cursor skipWhitespace(Cursor C) {
91
3.88M
  while (isblank(C.peek()))
92
1.71M
    C.advance();
93
2.16M
  return C;
94
2.16M
}
95
96
15.7M
static bool isNewlineChar(char C) { return C == '\n' || 
C == '\r'15.0M
; }
97
98
/// Skip a line comment and return the updated cursor.
99
2.16M
static Cursor skipComment(Cursor C) {
100
2.16M
  if (C.peek() != ';')
101
1.89M
    return C;
102
15.2M
  
while (273k
!isNewlineChar(C.peek()) &&
!C.isEOF()14.9M
)
103
14.9M
    C.advance();
104
273k
  return C;
105
273k
}
106
107
/// Return true if the given character satisfies the following regular
108
/// expression: [-a-zA-Z$._0-9]
109
4.36M
static bool isIdentifierChar(char C) {
110
4.36M
  return isalpha(C) || 
isdigit(C)1.33M
||
C == '_'901k
||
C == '-'684k
||
C == '.'668k
||
111
4.36M
         
C == '$'663k
;
112
4.36M
}
113
114
/// Unescapes the given string value.
115
///
116
/// Expects the string value to be quoted.
117
682
static std::string unescapeQuotedString(StringRef Value) {
118
682
  assert(Value.front() == '"' && Value.back() == '"');
119
682
  Cursor C = Cursor(Value.substr(1, Value.size() - 2));
120
682
121
682
  std::string Str;
122
682
  Str.reserve(C.remaining().size());
123
9.92k
  while (!C.isEOF()) {
124
9.24k
    char Char = C.peek();
125
9.24k
    if (Char == '\\') {
126
52
      if (C.peek(1) == '\\') {
127
2
        // Two '\' become one
128
2
        Str += '\\';
129
2
        C.advance(2);
130
2
        continue;
131
2
      }
132
50
      if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) {
133
50
        Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2));
134
50
        C.advance(3);
135
50
        continue;
136
50
      }
137
9.19k
    }
138
9.19k
    Str += Char;
139
9.19k
    C.advance();
140
9.19k
  }
141
682
  return Str;
142
682
}
143
144
/// Lex a string constant using the following regular expression: \"[^\"]*\"
145
683
static Cursor lexStringConstant(Cursor C, ErrorCallbackType ErrorCallback) {
146
683
  assert(C.peek() == '"');
147
10.0k
  for (C.advance(); C.peek() != '"'; 
C.advance()9.36k
) {
148
9.36k
    if (C.isEOF() || isNewlineChar(C.peek())) {
149
1
      ErrorCallback(
150
1
          C.location(),
151
1
          "end of machine instruction reached before the closing '\"'");
152
1
      return None;
153
1
    }
154
9.36k
  }
155
683
  C.advance();
156
682
  return C;
157
683
}
158
159
static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type,
160
7.37k
                      unsigned PrefixLength, ErrorCallbackType ErrorCallback) {
161
7.37k
  auto Range = C;
162
7.37k
  C.advance(PrefixLength);
163
7.37k
  if (C.peek() == '"') {
164
683
    if (Cursor R = lexStringConstant(C, ErrorCallback)) {
165
682
      StringRef String = Range.upto(R);
166
682
      Token.reset(Type, String)
167
682
          .setOwnedStringValue(
168
682
              unescapeQuotedString(String.drop_front(PrefixLength)));
169
682
      return R;
170
682
    }
171
1
    Token.reset(MIToken::Error, Range.remaining());
172
1
    return Range;
173
1
  }
174
55.9k
  
while (6.68k
isIdentifierChar(C.peek()))
175
49.2k
    C.advance();
176
6.68k
  Token.reset(Type, Range.upto(C))
177
6.68k
      .setStringValue(Range.upto(C).drop_front(PrefixLength));
178
6.68k
  return C;
179
6.68k
}
180
181
438k
static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
182
438k
  return StringSwitch<MIToken::TokenKind>(Identifier)
183
438k
      .Case("_", MIToken::underscore)
184
438k
      .Case("implicit", MIToken::kw_implicit)
185
438k
      .Case("implicit-def", MIToken::kw_implicit_define)
186
438k
      .Case("def", MIToken::kw_def)
187
438k
      .Case("dead", MIToken::kw_dead)
188
438k
      .Case("killed", MIToken::kw_killed)
189
438k
      .Case("undef", MIToken::kw_undef)
190
438k
      .Case("internal", MIToken::kw_internal)
191
438k
      .Case("early-clobber", MIToken::kw_early_clobber)
192
438k
      .Case("debug-use", MIToken::kw_debug_use)
193
438k
      .Case("renamable", MIToken::kw_renamable)
194
438k
      .Case("tied-def", MIToken::kw_tied_def)
195
438k
      .Case("frame-setup", MIToken::kw_frame_setup)
196
438k
      .Case("frame-destroy", MIToken::kw_frame_destroy)
197
438k
      .Case("nnan", MIToken::kw_nnan)
198
438k
      .Case("ninf", MIToken::kw_ninf)
199
438k
      .Case("nsz", MIToken::kw_nsz)
200
438k
      .Case("arcp", MIToken::kw_arcp)
201
438k
      .Case("contract", MIToken::kw_contract)
202
438k
      .Case("afn", MIToken::kw_afn)
203
438k
      .Case("reassoc", MIToken::kw_reassoc)
204
438k
      .Case("nuw" , MIToken::kw_nuw)
205
438k
      .Case("nsw" , MIToken::kw_nsw)
206
438k
      .Case("exact" , MIToken::kw_exact)
207
438k
      .Case("fpexcept", MIToken::kw_fpexcept)
208
438k
      .Case("debug-location", MIToken::kw_debug_location)
209
438k
      .Case("same_value", MIToken::kw_cfi_same_value)
210
438k
      .Case("offset", MIToken::kw_cfi_offset)
211
438k
      .Case("rel_offset", MIToken::kw_cfi_rel_offset)
212
438k
      .Case("def_cfa_register", MIToken::kw_cfi_def_cfa_register)
213
438k
      .Case("def_cfa_offset", MIToken::kw_cfi_def_cfa_offset)
214
438k
      .Case("adjust_cfa_offset", MIToken::kw_cfi_adjust_cfa_offset)
215
438k
      .Case("escape", MIToken::kw_cfi_escape)
216
438k
      .Case("def_cfa", MIToken::kw_cfi_def_cfa)
217
438k
      .Case("remember_state", MIToken::kw_cfi_remember_state)
218
438k
      .Case("restore", MIToken::kw_cfi_restore)
219
438k
      .Case("restore_state", MIToken::kw_cfi_restore_state)
220
438k
      .Case("undefined", MIToken::kw_cfi_undefined)
221
438k
      .Case("register", MIToken::kw_cfi_register)
222
438k
      .Case("window_save", MIToken::kw_cfi_window_save)
223
438k
      .Case("negate_ra_sign_state", MIToken::kw_cfi_aarch64_negate_ra_sign_state)
224
438k
      .Case("blockaddress", MIToken::kw_blockaddress)
225
438k
      .Case("intrinsic", MIToken::kw_intrinsic)
226
438k
      .Case("target-index", MIToken::kw_target_index)
227
438k
      .Case("half", MIToken::kw_half)
228
438k
      .Case("float", MIToken::kw_float)
229
438k
      .Case("double", MIToken::kw_double)
230
438k
      .Case("x86_fp80", MIToken::kw_x86_fp80)
231
438k
      .Case("fp128", MIToken::kw_fp128)
232
438k
      .Case("ppc_fp128", MIToken::kw_ppc_fp128)
233
438k
      .Case("target-flags", MIToken::kw_target_flags)
234
438k
      .Case("volatile", MIToken::kw_volatile)
235
438k
      .Case("non-temporal", MIToken::kw_non_temporal)
236
438k
      .Case("dereferenceable", MIToken::kw_dereferenceable)
237
438k
      .Case("invariant", MIToken::kw_invariant)
238
438k
      .Case("align", MIToken::kw_align)
239
438k
      .Case("addrspace", MIToken::kw_addrspace)
240
438k
      .Case("stack", MIToken::kw_stack)
241
438k
      .Case("got", MIToken::kw_got)
242
438k
      .Case("jump-table", MIToken::kw_jump_table)
243
438k
      .Case("constant-pool", MIToken::kw_constant_pool)
244
438k
      .Case("call-entry", MIToken::kw_call_entry)
245
438k
      .Case("liveout", MIToken::kw_liveout)
246
438k
      .Case("address-taken", MIToken::kw_address_taken)
247
438k
      .Case("landing-pad", MIToken::kw_landing_pad)
248
438k
      .Case("liveins", MIToken::kw_liveins)
249
438k
      .Case("successors", MIToken::kw_successors)
250
438k
      .Case("floatpred", MIToken::kw_floatpred)
251
438k
      .Case("intpred", MIToken::kw_intpred)
252
438k
      .Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol)
253
438k
      .Case("post-instr-symbol", MIToken::kw_post_instr_symbol)
254
438k
      .Case("unknown-size", MIToken::kw_unknown_size)
255
438k
      .Default(MIToken::Identifier);
256
438k
}
257
258
2.07M
static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) {
259
2.07M
  if (!isalpha(C.peek()) && 
C.peek() != '_'1.67M
)
260
1.63M
    return None;
261
438k
  auto Range = C;
262
2.87M
  while (isIdentifierChar(C.peek()))
263
2.43M
    C.advance();
264
438k
  auto Identifier = Range.upto(C);
265
438k
  Token.reset(getIdentifierKind(Identifier), Identifier)
266
438k
      .setStringValue(Identifier);
267
438k
  return C;
268
438k
}
269
270
static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token,
271
2.11M
                                        ErrorCallbackType ErrorCallback) {
272
2.11M
  bool IsReference = C.remaining().startswith("%bb.");
273
2.11M
  if (!IsReference && 
!C.remaining().startswith("bb.")2.10M
)
274
2.07M
    return None;
275
40.8k
  auto Range = C;
276
40.8k
  unsigned PrefixLength = IsReference ? 
412.0k
:
328.8k
;
277
40.8k
  C.advance(PrefixLength); // Skip '%bb.' or 'bb.'
278
40.8k
  if (!isdigit(C.peek())) {
279
1
    Token.reset(MIToken::Error, C.remaining());
280
1
    ErrorCallback(C.location(), "expected a number after '%bb.'");
281
1
    return C;
282
1
  }
283
40.8k
  auto NumberRange = C;
284
83.1k
  while (isdigit(C.peek()))
285
42.2k
    C.advance();
286
40.8k
  StringRef Number = NumberRange.upto(C);
287
40.8k
  unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>'
288
40.8k
  // TODO: The format bb.<id>.<irname> is supported only when it's not a
289
40.8k
  // reference. Once we deprecate the format where the irname shows up, we
290
40.8k
  // should only lex forward if it is a reference.
291
40.8k
  if (C.peek() == '.') {
292
7.19k
    C.advance(); // Skip '.'
293
7.19k
    ++StringOffset;
294
47.7k
    while (isIdentifierChar(C.peek()))
295
40.6k
      C.advance();
296
7.19k
  }
297
40.8k
  Token.reset(IsReference ? 
MIToken::MachineBasicBlock12.0k
298
40.8k
                          : 
MIToken::MachineBasicBlockLabel28.8k
,
299
40.8k
              Range.upto(C))
300
40.8k
      .setIntegerValue(APSInt(Number))
301
40.8k
      .setStringValue(Range.upto(C).drop_front(StringOffset));
302
40.8k
  return C;
303
40.8k
}
304
305
static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule,
306
4.91M
                            MIToken::TokenKind Kind) {
307
4.91M
  if (!C.remaining().startswith(Rule) || 
!isdigit(C.peek(Rule.size()))3.85k
)
308
4.91M
    return None;
309
3.85k
  auto Range = C;
310
3.85k
  C.advance(Rule.size());
311
3.85k
  auto NumberRange = C;
312
7.94k
  while (isdigit(C.peek()))
313
4.08k
    C.advance();
314
3.85k
  Token.reset(Kind, Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C)));
315
3.85k
  return C;
316
3.85k
}
317
318
static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule,
319
1.63M
                                   MIToken::TokenKind Kind) {
320
1.63M
  if (!C.remaining().startswith(Rule) || 
!isdigit(C.peek(Rule.size()))1.92k
)
321
1.63M
    return None;
322
1.92k
  auto Range = C;
323
1.92k
  C.advance(Rule.size());
324
1.92k
  auto NumberRange = C;
325
4.01k
  while (isdigit(C.peek()))
326
2.09k
    C.advance();
327
1.92k
  StringRef Number = NumberRange.upto(C);
328
1.92k
  unsigned StringOffset = Rule.size() + Number.size();
329
1.92k
  if (C.peek() == '.') {
330
510
    C.advance();
331
510
    ++StringOffset;
332
2.50k
    while (isIdentifierChar(C.peek()))
333
1.99k
      C.advance();
334
510
  }
335
1.92k
  Token.reset(Kind, Range.upto(C))
336
1.92k
      .setIntegerValue(APSInt(Number))
337
1.92k
      .setStringValue(Range.upto(C).drop_front(StringOffset));
338
1.92k
  return C;
339
1.92k
}
340
341
1.63M
static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) {
342
1.63M
  return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex);
343
1.63M
}
344
345
1.63M
static Cursor maybeLexStackObject(Cursor C, MIToken &Token) {
346
1.63M
  return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject);
347
1.63M
}
348
349
1.63M
static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) {
350
1.63M
  return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject);
351
1.63M
}
352
353
1.63M
static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) {
354
1.63M
  return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem);
355
1.63M
}
356
357
static Cursor maybeLexSubRegisterIndex(Cursor C, MIToken &Token,
358
1.63M
                                       ErrorCallbackType ErrorCallback) {
359
1.63M
  const StringRef Rule = "%subreg.";
360
1.63M
  if (!C.remaining().startswith(Rule))
361
1.63M
    return None;
362
742
  return lexName(C, Token, MIToken::SubRegisterIndex, Rule.size(),
363
742
                 ErrorCallback);
364
742
}
365
366
static Cursor maybeLexIRBlock(Cursor C, MIToken &Token,
367
1.63M
                              ErrorCallbackType ErrorCallback) {
368
1.63M
  const StringRef Rule = "%ir-block.";
369
1.63M
  if (!C.remaining().startswith(Rule))
370
1.63M
    return None;
371
2.81k
  if (isdigit(C.peek(Rule.size())))
372
2.77k
    return maybeLexIndex(C, Token, Rule, MIToken::IRBlock);
373
36
  return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback);
374
36
}
375
376
static Cursor maybeLexIRValue(Cursor C, MIToken &Token,
377
1.63M
                              ErrorCallbackType ErrorCallback) {
378
1.63M
  const StringRef Rule = "%ir.";
379
1.63M
  if (!C.remaining().startswith(Rule))
380
1.63M
    return None;
381
3.03k
  if (isdigit(C.peek(Rule.size())))
382
344
    return maybeLexIndex(C, Token, Rule, MIToken::IRValue);
383
2.68k
  return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback);
384
2.68k
}
385
386
static Cursor maybeLexStringConstant(Cursor C, MIToken &Token,
387
251
                                     ErrorCallbackType ErrorCallback) {
388
251
  if (C.peek() != '"')
389
1
    return None;
390
250
  return lexName(C, Token, MIToken::StringConstant, /*PrefixLength=*/0,
391
250
                 ErrorCallback);
392
250
}
393
394
167k
static Cursor lexVirtualRegister(Cursor C, MIToken &Token) {
395
167k
  auto Range = C;
396
167k
  C.advance(); // Skip '%'
397
167k
  auto NumberRange = C;
398
353k
  while (isdigit(C.peek()))
399
186k
    C.advance();
400
167k
  Token.reset(MIToken::VirtualRegister, Range.upto(C))
401
167k
      .setIntegerValue(APSInt(NumberRange.upto(C)));
402
167k
  return C;
403
167k
}
404
405
/// Returns true for a character allowed in a register name.
406
1.37M
static bool isRegisterChar(char C) {
407
1.37M
  return isIdentifierChar(C) && 
C != '.'1.16M
;
408
1.37M
}
409
410
114
static Cursor lexNamedVirtualRegister(Cursor C, MIToken &Token) {
411
114
  Cursor Range = C;
412
114
  C.advance(); // Skip '%'
413
710
  while (isRegisterChar(C.peek()))
414
596
    C.advance();
415
114
  Token.reset(MIToken::NamedVirtualRegister, Range.upto(C))
416
114
      .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%'
417
114
  return C;
418
114
}
419
420
static Cursor maybeLexRegister(Cursor C, MIToken &Token,
421
1.63M
                               ErrorCallbackType ErrorCallback) {
422
1.63M
  if (C.peek() != '%' && 
C.peek() != '$'1.46M
)
423
1.25M
    return None;
424
377k
425
377k
  if (C.peek() == '%') {
426
167k
    if (isdigit(C.peek(1)))
427
167k
      return lexVirtualRegister(C, Token);
428
114
429
114
    if (isRegisterChar(C.peek(1)))
430
114
      return lexNamedVirtualRegister(C, Token);
431
0
432
0
    return None;
433
0
  }
434
209k
435
209k
  assert(C.peek() == '$');
436
209k
  auto Range = C;
437
209k
  C.advance(); // Skip '$'
438
1.37M
  while (isRegisterChar(C.peek()))
439
1.16M
    C.advance();
440
209k
  Token.reset(MIToken::NamedRegister, Range.upto(C))
441
209k
      .setStringValue(Range.upto(C).drop_front(1)); // Drop the '$'
442
209k
  return C;
443
209k
}
444
445
static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token,
446
1.25M
                                  ErrorCallbackType ErrorCallback) {
447
1.25M
  if (C.peek() != '@')
448
1.25M
    return None;
449
3.15k
  if (!isdigit(C.peek(1)))
450
3.14k
    return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1,
451
3.14k
                   ErrorCallback);
452
12
  auto Range = C;
453
12
  C.advance(1); // Skip the '@'
454
12
  auto NumberRange = C;
455
24
  while (isdigit(C.peek()))
456
12
    C.advance();
457
12
  Token.reset(MIToken::GlobalValue, Range.upto(C))
458
12
      .setIntegerValue(APSInt(NumberRange.upto(C)));
459
12
  return C;
460
12
}
461
462
static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token,
463
1.25M
                                     ErrorCallbackType ErrorCallback) {
464
1.25M
  if (C.peek() != '&')
465
1.24M
    return None;
466
514
  return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1,
467
514
                 ErrorCallback);
468
514
}
469
470
static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token,
471
1.24M
                               ErrorCallbackType ErrorCallback) {
472
1.24M
  const StringRef Rule = "<mcsymbol ";
473
1.24M
  if (!C.remaining().startswith(Rule))
474
1.24M
    return None;
475
54
  auto Start = C;
476
54
  C.advance(Rule.size());
477
54
478
54
  // Try a simple unquoted name.
479
54
  if (C.peek() != '"') {
480
482
    while (isIdentifierChar(C.peek()))
481
428
      C.advance();
482
54
    StringRef String = Start.upto(C).drop_front(Rule.size());
483
54
    if (C.peek() != '>') {
484
0
      ErrorCallback(C.location(),
485
0
                    "expected the '<mcsymbol ...' to be closed by a '>'");
486
0
      Token.reset(MIToken::Error, Start.remaining());
487
0
      return Start;
488
0
    }
489
54
    C.advance();
490
54
491
54
    Token.reset(MIToken::MCSymbol, Start.upto(C)).setStringValue(String);
492
54
    return C;
493
54
  }
494
0
495
0
  // Otherwise lex out a quoted name.
496
0
  Cursor R = lexStringConstant(C, ErrorCallback);
497
0
  if (!R) {
498
0
    ErrorCallback(C.location(),
499
0
                  "unable to parse quoted string from opening quote");
500
0
    Token.reset(MIToken::Error, Start.remaining());
501
0
    return Start;
502
0
  }
503
0
  StringRef String = Start.upto(R).drop_front(Rule.size());
504
0
  if (R.peek() != '>') {
505
0
    ErrorCallback(R.location(),
506
0
                  "expected the '<mcsymbol ...' to be closed by a '>'");
507
0
    Token.reset(MIToken::Error, Start.remaining());
508
0
    return Start;
509
0
  }
510
0
  R.advance();
511
0
512
0
  Token.reset(MIToken::MCSymbol, Start.upto(R))
513
0
      .setOwnedStringValue(unescapeQuotedString(String));
514
0
  return R;
515
0
}
516
517
2.82k
static bool isValidHexFloatingPointPrefix(char C) {
518
2.82k
  return C == 'H' || 
C == 'K'2.81k
||
C == 'L'2.81k
||
C == 'M'2.81k
;
519
2.82k
}
520
521
218
static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) {
522
218
  C.advance();
523
218
  // Skip over [0-9]*([eE][-+]?[0-9]+)?
524
812
  while (isdigit(C.peek()))
525
594
    C.advance();
526
218
  if ((C.peek() == 'e' || 
C.peek() == 'E'138
) &&
527
218
      
(80
isdigit(C.peek(1))80
||
528
80
       ((C.peek(1) == '-' || 
C.peek(1) == '+'72
) && isdigit(C.peek(2))))) {
529
80
    C.advance(2);
530
232
    while (isdigit(C.peek()))
531
152
      C.advance();
532
80
  }
533
218
  Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
534
218
  return C;
535
218
}
536
537
1.24M
static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token) {
538
1.24M
  if (C.peek() != '0' || 
(37.1k
C.peek(1) != 'x'37.1k
&&
C.peek(1) != 'X'34.3k
))
539
1.24M
    return None;
540
2.82k
  Cursor Range = C;
541
2.82k
  C.advance(2);
542
2.82k
  unsigned PrefLen = 2;
543
2.82k
  if (isValidHexFloatingPointPrefix(C.peek())) {
544
2
    C.advance();
545
2
    PrefLen++;
546
2
  }
547
25.4k
  while (isxdigit(C.peek()))
548
22.6k
    C.advance();
549
2.82k
  StringRef StrVal = Range.upto(C);
550
2.82k
  if (StrVal.size() <= PrefLen)
551
0
    return None;
552
2.82k
  if (PrefLen == 2)
553
2.81k
    Token.reset(MIToken::HexLiteral, Range.upto(C));
554
2
  else // It must be 3, which means that there was a floating-point prefix.
555
2
    Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
556
2.82k
  return C;
557
2.82k
}
558
559
1.24M
static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) {
560
1.24M
  if (!isdigit(C.peek()) && 
(1.15M
C.peek() != '-'1.15M
||
!isdigit(C.peek(1))2.10k
))
561
1.14M
    return None;
562
96.8k
  auto Range = C;
563
96.8k
  C.advance();
564
129k
  while (isdigit(C.peek()))
565
32.6k
    C.advance();
566
96.8k
  if (C.peek() == '.')
567
218
    return lexFloatingPointLiteral(Range, C, Token);
568
96.6k
  StringRef StrVal = Range.upto(C);
569
96.6k
  Token.reset(MIToken::IntegerLiteral, StrVal).setIntegerValue(APSInt(StrVal));
570
96.6k
  return C;
571
96.6k
}
572
573
998
static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) {
574
998
  return StringSwitch<MIToken::TokenKind>(Identifier)
575
998
      .Case("!tbaa", MIToken::md_tbaa)
576
998
      .Case("!alias.scope", MIToken::md_alias_scope)
577
998
      .Case("!noalias", MIToken::md_noalias)
578
998
      .Case("!range", MIToken::md_range)
579
998
      .Case("!DIExpression", MIToken::md_diexpr)
580
998
      .Case("!DILocation", MIToken::md_dilocation)
581
998
      .Default(MIToken::Error);
582
998
}
583
584
static Cursor maybeLexExlaim(Cursor C, MIToken &Token,
585
1.14M
                             ErrorCallbackType ErrorCallback) {
586
1.14M
  if (C.peek() != '!')
587
1.14M
    return None;
588
5.09k
  auto Range = C;
589
5.09k
  C.advance(1);
590
5.09k
  if (isdigit(C.peek()) || 
!isIdentifierChar(C.peek())1.00k
) {
591
4.09k
    Token.reset(MIToken::exclaim, Range.upto(C));
592
4.09k
    return C;
593
4.09k
  }
594
9.89k
  
while (998
isIdentifierChar(C.peek()))
595
8.89k
    C.advance();
596
998
  StringRef StrVal = Range.upto(C);
597
998
  Token.reset(getMetadataKeywordKind(StrVal), StrVal);
598
998
  if (Token.isError())
599
1
    ErrorCallback(Token.location(),
600
1
                  "use of unknown metadata keyword '" + StrVal + "'");
601
998
  return C;
602
998
}
603
604
1.13M
static MIToken::TokenKind symbolToken(char C) {
605
1.13M
  switch (C) {
606
1.13M
  case ',':
607
170k
    return MIToken::comma;
608
1.13M
  case '.':
609
3.04k
    return MIToken::dot;
610
1.13M
  case '=':
611
112k
    return MIToken::equal;
612
1.13M
  case ':':
613
110k
    return MIToken::colon;
614
1.13M
  case '(':
615
105k
    return MIToken::lparen;
616
1.13M
  case ')':
617
105k
    return MIToken::rparen;
618
1.13M
  case '{':
619
141
    return MIToken::lbrace;
620
1.13M
  case '}':
621
139
    return MIToken::rbrace;
622
1.13M
  case '+':
623
180
    return MIToken::plus;
624
1.13M
  case '-':
625
8
    return MIToken::minus;
626
1.13M
  case '<':
627
14.1k
    return MIToken::less;
628
1.13M
  case '>':
629
14.1k
    return MIToken::greater;
630
1.13M
  default:
631
498k
    return MIToken::Error;
632
1.13M
  }
633
1.13M
}
634
635
1.14M
static Cursor maybeLexSymbol(Cursor C, MIToken &Token) {
636
1.14M
  MIToken::TokenKind Kind;
637
1.14M
  unsigned Length = 1;
638
1.14M
  if (C.peek() == ':' && 
C.peek(1) == ':'120k
) {
639
9.63k
    Kind = MIToken::coloncolon;
640
9.63k
    Length = 2;
641
9.63k
  } else
642
1.13M
    Kind = symbolToken(C.peek());
643
1.14M
  if (Kind == MIToken::Error)
644
498k
    return None;
645
646k
  auto Range = C;
646
646k
  C.advance(Length);
647
646k
  Token.reset(Kind, Range.upto(C));
648
646k
  return C;
649
646k
}
650
651
498k
static Cursor maybeLexNewline(Cursor C, MIToken &Token) {
652
498k
  if (!isNewlineChar(C.peek()))
653
1.40k
    return None;
654
496k
  auto Range = C;
655
496k
  C.advance();
656
496k
  Token.reset(MIToken::Newline, Range.upto(C));
657
496k
  return C;
658
496k
}
659
660
static Cursor maybeLexEscapedIRValue(Cursor C, MIToken &Token,
661
1.40k
                                     ErrorCallbackType ErrorCallback) {
662
1.40k
  if (C.peek() != '`')
663
251
    return None;
664
1.15k
  auto Range = C;
665
1.15k
  C.advance();
666
1.15k
  auto StrRange = C;
667
33.1k
  while (C.peek() != '`') {
668
31.9k
    if (C.isEOF() || isNewlineChar(C.peek())) {
669
0
      ErrorCallback(
670
0
          C.location(),
671
0
          "end of machine instruction reached before the closing '`'");
672
0
      Token.reset(MIToken::Error, Range.remaining());
673
0
      return C;
674
0
    }
675
31.9k
    C.advance();
676
31.9k
  }
677
1.15k
  StringRef Value = StrRange.upto(C);
678
1.15k
  C.advance();
679
1.15k
  Token.reset(MIToken::QuotedIRValue, Range.upto(C)).setStringValue(Value);
680
1.15k
  return C;
681
1.15k
}
682
683
StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,
684
2.16M
                           ErrorCallbackType ErrorCallback) {
685
2.16M
  auto C = skipComment(skipWhitespace(Cursor(Source)));
686
2.16M
  if (C.isEOF()) {
687
49.7k
    Token.reset(MIToken::Eof, C.remaining());
688
49.7k
    return C.remaining();
689
49.7k
  }
690
2.11M
691
2.11M
  if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback))
692
40.8k
    return R.remaining();
693
2.07M
  if (Cursor R = maybeLexIdentifier(C, Token))
694
438k
    return R.remaining();
695
1.63M
  if (Cursor R = maybeLexJumpTableIndex(C, Token))
696
53
    return R.remaining();
697
1.63M
  if (Cursor R = maybeLexStackObject(C, Token))
698
1.92k
    return R.remaining();
699
1.63M
  if (Cursor R = maybeLexFixedStackObject(C, Token))
700
580
    return R.remaining();
701
1.63M
  if (Cursor R = maybeLexConstantPoolItem(C, Token))
702
98
    return R.remaining();
703
1.63M
  if (Cursor R = maybeLexSubRegisterIndex(C, Token, ErrorCallback))
704
742
    return R.remaining();
705
1.63M
  if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback))
706
2.81k
    return R.remaining();
707
1.63M
  if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback))
708
3.03k
    return R.remaining();
709
1.63M
  if (Cursor R = maybeLexRegister(C, Token, ErrorCallback))
710
377k
    return R.remaining();
711
1.25M
  if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback))
712
3.15k
    return R.remaining();
713
1.25M
  if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback))
714
514
    return R.remaining();
715
1.24M
  if (Cursor R = maybeLexMCSymbol(C, Token, ErrorCallback))
716
54
    return R.remaining();
717
1.24M
  if (Cursor R = maybeLexHexadecimalLiteral(C, Token))
718
2.82k
    return R.remaining();
719
1.24M
  if (Cursor R = maybeLexNumericalLiteral(C, Token))
720
96.8k
    return R.remaining();
721
1.14M
  if (Cursor R = maybeLexExlaim(C, Token, ErrorCallback))
722
5.09k
    return R.remaining();
723
1.14M
  if (Cursor R = maybeLexSymbol(C, Token))
724
646k
    return R.remaining();
725
498k
  if (Cursor R = maybeLexNewline(C, Token))
726
496k
    return R.remaining();
727
1.40k
  if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback))
728
1.15k
    return R.remaining();
729
251
  if (Cursor R = maybeLexStringConstant(C, Token, ErrorCallback))
730
250
    return R.remaining();
731
1
732
1
  Token.reset(MIToken::Error, C.remaining());
733
1
  ErrorCallback(C.location(),
734
1
                Twine("unexpected character '") + Twine(C.peek()) + "'");
735
1
  return C.remaining();
736
1
}