Coverage Report

Created: 2019-07-24 05:18

/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/MC/MCParser/AsmLexer.cpp
Line
Count
Source (jump to first uncovered line)
1
//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This class implements the lexer for assembly files.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "llvm/MC/MCParser/AsmLexer.h"
14
#include "llvm/ADT/APInt.h"
15
#include "llvm/ADT/ArrayRef.h"
16
#include "llvm/ADT/StringExtras.h"
17
#include "llvm/ADT/StringRef.h"
18
#include "llvm/ADT/StringSwitch.h"
19
#include "llvm/MC/MCAsmInfo.h"
20
#include "llvm/MC/MCParser/MCAsmLexer.h"
21
#include "llvm/Support/SMLoc.h"
22
#include "llvm/Support/SaveAndRestore.h"
23
#include <cassert>
24
#include <cctype>
25
#include <cstdio>
26
#include <cstring>
27
#include <string>
28
#include <tuple>
29
#include <utility>
30
31
using namespace llvm;
32
33
18.4k
AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
34
18.4k
  AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
35
18.4k
}
36
37
18.4k
AsmLexer::~AsmLexer() = default;
38
39
432k
void AsmLexer::setBuffer(StringRef Buf, const char *ptr) {
40
432k
  CurBuf = Buf;
41
432k
42
432k
  if (ptr)
43
207k
    CurPtr = ptr;
44
225k
  else
45
225k
    CurPtr = CurBuf.begin();
46
432k
47
432k
  TokStart = nullptr;
48
432k
}
49
50
/// ReturnError - Set the error to the specified string at the specified
51
/// location.  This is defined to always return AsmToken::Error.
52
237
AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
53
237
  SetError(SMLoc::getFromPointer(Loc), Msg);
54
237
55
237
  return AsmToken(AsmToken::Error, StringRef(Loc, CurPtr - Loc));
56
237
}
57
58
95.9M
int AsmLexer::getNextChar() {
59
95.9M
  if (CurPtr == CurBuf.end())
60
18.6k
    return EOF;
61
95.9M
  return (unsigned char)*CurPtr++;
62
95.9M
}
63
64
/// The leading integral digit sequence and dot should have already been
65
/// consumed, some or all of the fractional digit sequence *can* have been
66
/// consumed.
67
80.3k
AsmToken AsmLexer::LexFloatLiteral() {
68
80.3k
  // Skip the fractional digit sequence.
69
188k
  while (isDigit(*CurPtr))
70
107k
    ++CurPtr;
71
80.3k
72
80.3k
  if (*CurPtr == '-' || *CurPtr == '+')
73
2
    return ReturnError(CurPtr, "Invalid sign in float literal");
74
80.3k
75
80.3k
  // Check for exponent
76
80.3k
  if ((*CurPtr == 'e' || 
*CurPtr == 'E'80.1k
)) {
77
184
    ++CurPtr;
78
184
79
184
    if (*CurPtr == '-' || 
*CurPtr == '+'85
)
80
152
      ++CurPtr;
81
184
82
488
    while (isDigit(*CurPtr))
83
304
      ++CurPtr;
84
184
  }
85
80.3k
86
80.3k
  return AsmToken(AsmToken::Real,
87
80.3k
                  StringRef(TokStart, CurPtr - TokStart));
88
80.3k
}
89
90
/// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+
91
/// while making sure there are enough actual digits around for the constant to
92
/// be valid.
93
///
94
/// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed
95
/// before we get here.
96
34
AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) {
97
34
  assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') &&
98
34
         "unexpected parse state in floating hex");
99
34
  bool NoFracDigits = true;
100
34
101
34
  // Skip the fractional part if there is one
102
34
  if (*CurPtr == '.') {
103
27
    ++CurPtr;
104
27
105
27
    const char *FracStart = CurPtr;
106
114
    while (isHexDigit(*CurPtr))
107
87
      ++CurPtr;
108
27
109
27
    NoFracDigits = CurPtr == FracStart;
110
27
  }
111
34
112
34
  if (NoIntDigits && 
NoFracDigits7
)
113
5
    return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
114
5
                                 "expected at least one significand digit");
115
29
116
29
  // Make sure we do have some kind of proper exponent part
117
29
  if (*CurPtr != 'p' && 
*CurPtr != 'P'2
)
118
2
    return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
119
2
                                 "expected exponent part 'p'");
120
27
  ++CurPtr;
121
27
122
27
  if (*CurPtr == '+' || 
*CurPtr == '-'21
)
123
16
    ++CurPtr;
124
27
125
27
  // N.b. exponent digits are *not* hex
126
27
  const char *ExpStart = CurPtr;
127
62
  while (isDigit(*CurPtr))
128
35
    ++CurPtr;
129
27
130
27
  if (CurPtr == ExpStart)
131
7
    return ReturnError(TokStart, "invalid hexadecimal floating-point constant: "
132
7
                                 "expected at least one exponent digit");
133
20
134
20
  return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart));
135
20
}
136
137
/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]*
138
48.5M
static bool IsIdentifierChar(char c, bool AllowAt) {
139
48.5M
  return isAlnum(c) || 
c == '_'7.66M
||
c == '$'5.58M
||
c == '.'5.58M
||
140
48.5M
         
(5.48M
c == '@'5.48M
&&
AllowAt3.32k
) ||
c == '?'5.47M
;
141
48.5M
}
142
143
5.47M
AsmToken AsmLexer::LexIdentifier() {
144
5.47M
  // Check for floating point literals.
145
5.47M
  if (CurPtr[-1] == '.' && 
isDigit(*CurPtr)929k
) {
146
93
    // Disambiguate a .1243foo identifier from a floating literal.
147
198
    while (isDigit(*CurPtr))
148
105
      ++CurPtr;
149
93
150
93
    if (!IsIdentifierChar(*CurPtr, AllowAtInIdentifier) ||
151
93
        
*CurPtr == 'e'82
||
*CurPtr == 'E'81
)
152
12
      return LexFloatLiteral();
153
5.47M
  }
154
5.47M
155
48.5M
  
while (5.47M
IsIdentifierChar(*CurPtr, AllowAtInIdentifier))
156
43.0M
    ++CurPtr;
157
5.47M
158
5.47M
  // Handle . as a special case.
159
5.47M
  if (CurPtr == TokStart+1 && 
TokStart[0] == '.'272k
)
160
472
    return AsmToken(AsmToken::Dot, StringRef(TokStart, 1));
161
5.47M
162
5.47M
  return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
163
5.47M
}
164
165
/// LexSlash: Slash: /
166
///           C-Style Comment: /* ... */
167
737k
AsmToken AsmLexer::LexSlash() {
168
737k
  switch (*CurPtr) {
169
737k
  case '*':
170
145
    IsAtStartOfStatement = false;
171
145
    break; // C style comment.
172
737k
  case '/':
173
723k
    ++CurPtr;
174
723k
    return LexLineComment();
175
737k
  default:
176
13.0k
    IsAtStartOfStatement = false;
177
13.0k
    return AsmToken(AsmToken::Slash, StringRef(TokStart, 1));
178
145
  }
179
145
180
145
  // C Style comment.
181
145
  ++CurPtr;  // skip the star.
182
145
  const char *CommentTextStart = CurPtr;
183
3.43k
  while (CurPtr != CurBuf.end()) {
184
3.43k
    switch (*CurPtr++) {
185
3.43k
    case '*':
186
145
      // End of the comment?
187
145
      if (*CurPtr != '/')
188
0
        break;
189
145
      // If we have a CommentConsumer, notify it about the comment.
190
145
      if (CommentConsumer) {
191
0
        CommentConsumer->HandleComment(
192
0
            SMLoc::getFromPointer(CommentTextStart),
193
0
            StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
194
0
      }
195
145
      ++CurPtr;   // End the */.
196
145
      return AsmToken(AsmToken::Comment,
197
145
                      StringRef(TokStart, CurPtr - TokStart));
198
3.43k
    }
199
3.43k
  }
200
145
  
return ReturnError(TokStart, "unterminated comment")0
;
201
145
}
202
203
/// LexLineComment: Comment: #[^\n]*
204
///                        : //[^\n]*
205
1.36M
AsmToken AsmLexer::LexLineComment() {
206
1.36M
  // Mark This as an end of statement with a body of the
207
1.36M
  // comment. While it would be nicer to leave this two tokens,
208
1.36M
  // backwards compatability with TargetParsers makes keeping this in this form
209
1.36M
  // better.
210
1.36M
  const char *CommentTextStart = CurPtr;
211
1.36M
  int CurChar = getNextChar();
212
73.8M
  while (CurChar != '\n' && 
CurChar != '\r'72.5M
&&
CurChar != EOF72.5M
)
213
72.5M
    CurChar = getNextChar();
214
1.36M
  if (CurChar == '\r' && 
CurPtr != CurBuf.end()30
&&
*CurPtr == '\n'30
)
215
30
    ++CurPtr;
216
1.36M
217
1.36M
  // If we have a CommentConsumer, notify it about the comment.
218
1.36M
  if (CommentConsumer) {
219
112k
    CommentConsumer->HandleComment(
220
112k
        SMLoc::getFromPointer(CommentTextStart),
221
112k
        StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
222
112k
  }
223
1.36M
224
1.36M
  IsAtStartOfLine = true;
225
1.36M
  // This is a whole line comment. leave newline
226
1.36M
  if (IsAtStartOfStatement)
227
1.30M
    return AsmToken(AsmToken::EndOfStatement,
228
1.30M
                    StringRef(TokStart, CurPtr - TokStart));
229
62.1k
  IsAtStartOfStatement = true;
230
62.1k
231
62.1k
  return AsmToken(AsmToken::EndOfStatement,
232
62.1k
                  StringRef(TokStart, CurPtr - 1 - TokStart));
233
62.1k
}
234
235
1.11M
static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
236
1.11M
  // Skip ULL, UL, U, L and LL suffices.
237
1.11M
  if (CurPtr[0] == 'U')
238
7
    ++CurPtr;
239
1.11M
  if (CurPtr[0] == 'L')
240
16
    ++CurPtr;
241
1.11M
  if (CurPtr[0] == 'L')
242
4
    ++CurPtr;
243
1.11M
}
244
245
// Look ahead to search for first non-hex digit, if it's [hH], then we treat the
246
// integer as a hexadecimal, possibly with leading zeroes.
247
static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix,
248
1.09M
                               bool LexHex) {
249
1.09M
  const char *FirstNonDec = nullptr;
250
1.09M
  const char *LookAhead = CurPtr;
251
1.68M
  while (true) {
252
1.68M
    if (isDigit(*LookAhead)) {
253
588k
      ++LookAhead;
254
1.09M
    } else {
255
1.09M
      if (!FirstNonDec)
256
1.09M
        FirstNonDec = LookAhead;
257
1.09M
258
1.09M
      // Keep going if we are looking for a 'h' suffix.
259
1.09M
      if (LexHex && 
isHexDigit(*LookAhead)270
)
260
0
        ++LookAhead;
261
1.09M
      else
262
1.09M
        break;
263
1.09M
    }
264
1.68M
  }
265
1.09M
  bool isHex = LexHex && 
(270
*LookAhead == 'h'270
||
*LookAhead == 'H'270
);
266
1.09M
  CurPtr = isHex || !FirstNonDec ? 
LookAhead0
: FirstNonDec;
267
1.09M
  if (isHex)
268
0
    return 16;
269
1.09M
  return DefaultRadix;
270
1.09M
}
271
272
static AsmToken intToken(StringRef Ref, APInt &Value)
273
1.11M
{
274
1.11M
  if (Value.isIntN(64))
275
1.11M
    return AsmToken(AsmToken::Integer, Ref, Value);
276
23
  return AsmToken(AsmToken::BigNum, Ref, Value);
277
23
}
278
279
/// LexDigit: First character is [0-9].
280
///   Local Label: [0-9][:]
281
///   Forward/Backward Label: [0-9][fb]
282
///   Binary integer: 0b[01]+
283
///   Octal integer: 0[0-7]+
284
///   Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
285
///   Decimal integer: [1-9][0-9]*
286
1.19M
AsmToken AsmLexer::LexDigit() {
287
1.19M
  // MASM-flavor binary integer: [01]+[bB]
288
1.19M
  // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
289
1.19M
  if (LexMasmIntegers && 
isdigit(CurPtr[-1])320
) {
290
320
    const char *FirstNonBinary = (CurPtr[-1] != '0' && 
CurPtr[-1] != '1'259
) ?
291
203
                                   CurPtr - 1 : 
nullptr117
;
292
320
    const char *OldCurPtr = CurPtr;
293
496
    while (isHexDigit(*CurPtr)) {
294
176
      if (*CurPtr != '0' && 
*CurPtr != '1'143
&&
!FirstNonBinary126
)
295
43
        FirstNonBinary = CurPtr;
296
176
      ++CurPtr;
297
176
    }
298
320
299
320
    unsigned Radix = 0;
300
320
    if (*CurPtr == 'h' || 
*CurPtr == 'H'295
) {
301
28
      // hexadecimal number
302
28
      ++CurPtr;
303
28
      Radix = 16;
304
292
    } else if (FirstNonBinary && 
FirstNonBinary + 1 == CurPtr224
&&
305
292
               
(163
*FirstNonBinary == 'b'163
||
*FirstNonBinary == 'B'161
))
306
3
      Radix = 2;
307
320
308
320
    if (Radix == 2 || 
Radix == 16317
) {
309
31
      StringRef Result(TokStart, CurPtr - TokStart);
310
31
      APInt Value(128, 0, true);
311
31
312
31
      if (Result.drop_back().getAsInteger(Radix, Value))
313
0
        return ReturnError(TokStart, Radix == 2 ? "invalid binary number" :
314
0
                             "invalid hexdecimal number");
315
31
316
31
      // MSVC accepts and ignores type suffices on integer literals.
317
31
      SkipIgnoredIntegerSuffix(CurPtr);
318
31
319
31
      return intToken(Result, Value);
320
31
   }
321
289
322
289
    // octal/decimal integers, or floating point numbers, fall through
323
289
    CurPtr = OldCurPtr;
324
289
  }
325
1.19M
326
1.19M
  // Decimal integer: [1-9][0-9]*
327
1.19M
  
if (1.19M
CurPtr[-1] != '0'1.19M
||
CurPtr[0] == '.'213k
) {
328
996k
    unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers);
329
996k
    bool isHex = Radix == 16;
330
996k
    // Check for floating point literals.
331
996k
    if (!isHex && (*CurPtr == '.' || 
*CurPtr == 'e'916k
||
*CurPtr == 'E'916k
)) {
332
80.3k
      if (*CurPtr == '.')
333
80.3k
        ++CurPtr;
334
80.3k
      return LexFloatLiteral();
335
80.3k
    }
336
916k
337
916k
    StringRef Result(TokStart, CurPtr - TokStart);
338
916k
339
916k
    APInt Value(128, 0, true);
340
916k
    if (Result.getAsInteger(Radix, Value))
341
0
      return ReturnError(TokStart, !isHex ? "invalid decimal number" :
342
0
                           "invalid hexdecimal number");
343
916k
344
916k
    // Consume the [hH].
345
916k
    if (LexMasmIntegers && 
Radix == 16248
)
346
0
      ++CurPtr;
347
916k
348
916k
    // The darwin/x86 (and x86-64) assembler accepts and ignores type
349
916k
    // suffices on integer literals.
350
916k
    SkipIgnoredIntegerSuffix(CurPtr);
351
916k
352
916k
    return intToken(Result, Value);
353
916k
  }
354
197k
355
197k
  if (!LexMasmIntegers && 
(197k
(*CurPtr == 'b')197k
||
(*CurPtr == 'B')197k
)) {
356
39
    ++CurPtr;
357
39
    // See if we actually have "0b" as part of something like "jmp 0b\n"
358
39
    if (!isDigit(CurPtr[0])) {
359
34
      --CurPtr;
360
34
      StringRef Result(TokStart, CurPtr - TokStart);
361
34
      return AsmToken(AsmToken::Integer, Result, 0);
362
34
    }
363
5
    const char *NumStart = CurPtr;
364
143
    while (CurPtr[0] == '0' || 
CurPtr[0] == '1'75
)
365
138
      ++CurPtr;
366
5
367
5
    // Requires at least one binary digit.
368
5
    if (CurPtr == NumStart)
369
0
      return ReturnError(TokStart, "invalid binary number");
370
5
371
5
    StringRef Result(TokStart, CurPtr - TokStart);
372
5
373
5
    APInt Value(128, 0, true);
374
5
    if (Result.substr(2).getAsInteger(2, Value))
375
0
      return ReturnError(TokStart, "invalid binary number");
376
5
377
5
    // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
378
5
    // suffixes on integer literals.
379
5
    SkipIgnoredIntegerSuffix(CurPtr);
380
5
381
5
    return intToken(Result, Value);
382
5
  }
383
197k
384
197k
  if ((*CurPtr == 'x') || 
(*CurPtr == 'X')95.5k
) {
385
101k
    ++CurPtr;
386
101k
    const char *NumStart = CurPtr;
387
395k
    while (isHexDigit(CurPtr[0]))
388
293k
      ++CurPtr;
389
101k
390
101k
    // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be
391
101k
    // diagnosed by LexHexFloatLiteral).
392
101k
    if (CurPtr[0] == '.' || 
CurPtr[0] == 'p'101k
||
CurPtr[0] == 'P'101k
)
393
34
      return LexHexFloatLiteral(NumStart == CurPtr);
394
101k
395
101k
    // Otherwise requires at least one hex digit.
396
101k
    if (CurPtr == NumStart)
397
3
      return ReturnError(CurPtr-2, "invalid hexadecimal number");
398
101k
399
101k
    APInt Result(128, 0);
400
101k
    if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
401
0
      return ReturnError(TokStart, "invalid hexadecimal number");
402
101k
403
101k
    // Consume the optional [hH].
404
101k
    if (LexMasmIntegers && 
(19
*CurPtr == 'h'19
||
*CurPtr == 'H'17
))
405
2
      ++CurPtr;
406
101k
407
101k
    // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
408
101k
    // suffixes on integer literals.
409
101k
    SkipIgnoredIntegerSuffix(CurPtr);
410
101k
411
101k
    return intToken(StringRef(TokStart, CurPtr - TokStart), Result);
412
101k
  }
413
95.5k
414
95.5k
  // Either octal or hexadecimal.
415
95.5k
  APInt Value(128, 0, true);
416
95.5k
  unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers);
417
95.5k
  bool isHex = Radix == 16;
418
95.5k
  StringRef Result(TokStart, CurPtr - TokStart);
419
95.5k
  if (Result.getAsInteger(Radix, Value))
420
25
    return ReturnError(TokStart, !isHex ? "invalid octal number" :
421
25
                       
"invalid hexdecimal number"0
);
422
95.5k
423
95.5k
  // Consume the [hH].
424
95.5k
  if (Radix == 16)
425
0
    ++CurPtr;
426
95.5k
427
95.5k
  // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
428
95.5k
  // suffixes on integer literals.
429
95.5k
  SkipIgnoredIntegerSuffix(CurPtr);
430
95.5k
431
95.5k
  return intToken(Result, Value);
432
95.5k
}
433
434
/// LexSingleQuote: Integer: 'b'
435
208
AsmToken AsmLexer::LexSingleQuote() {
436
208
  int CurChar = getNextChar();
437
208
438
208
  if (CurChar == '\\')
439
5
    CurChar = getNextChar();
440
208
441
208
  if (CurChar == EOF)
442
208
    
return ReturnError(TokStart, "unterminated single quote")0
;
443
208
444
208
  CurChar = getNextChar();
445
208
446
208
  if (CurChar != '\'')
447
184
    return ReturnError(TokStart, "single quote way too long");
448
24
449
24
  // The idea here being that 'c' is basically just an integral
450
24
  // constant.
451
24
  StringRef Res = StringRef(TokStart,CurPtr - TokStart);
452
24
  long long Value;
453
24
454
24
  if (Res.startswith("\'\\")) {
455
5
    char theChar = Res[2];
456
5
    switch (theChar) {
457
5
      
default: Value = theChar; break2
;
458
5
      
case '\'': Value = '\''; break1
;
459
5
      
case 't': Value = '\t'; break1
;
460
5
      
case 'n': Value = '\n'; break1
;
461
5
      
case 'b': Value = '\b'; break0
;
462
19
    }
463
19
  } else
464
19
    Value = TokStart[1];
465
24
466
24
  return AsmToken(AsmToken::Integer, Res, Value);
467
24
}
468
469
/// LexQuote: String: "..."
470
87.3k
AsmToken AsmLexer::LexQuote() {
471
87.3k
  int CurChar = getNextChar();
472
87.3k
  // TODO: does gas allow multiline string constants?
473
1.22M
  while (CurChar != '"') {
474
1.13M
    if (CurChar == '\\') {
475
1.45k
      // Allow \", etc.
476
1.45k
      CurChar = getNextChar();
477
1.45k
    }
478
1.13M
479
1.13M
    if (CurChar == EOF)
480
1.13M
      
return ReturnError(TokStart, "unterminated string constant")0
;
481
1.13M
482
1.13M
    CurChar = getNextChar();
483
1.13M
  }
484
87.3k
485
87.3k
  return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
486
87.3k
}
487
488
781
StringRef AsmLexer::LexUntilEndOfStatement() {
489
781
  TokStart = CurPtr;
490
781
491
21.8k
  while (!isAtStartOfComment(CurPtr) &&     // Start of line comment.
492
21.8k
         !isAtStatementSeparator(CurPtr) && // End of statement marker.
493
21.8k
         *CurPtr != '\n' && 
*CurPtr != '\r'21.0k
&&
CurPtr != CurBuf.end()21.0k
) {
494
21.0k
    ++CurPtr;
495
21.0k
  }
496
781
  return StringRef(TokStart, CurPtr-TokStart);
497
781
}
498
499
5.53k
StringRef AsmLexer::LexUntilEndOfLine() {
500
5.53k
  TokStart = CurPtr;
501
5.53k
502
614k
  while (*CurPtr != '\n' && 
*CurPtr != '\r'608k
&&
CurPtr != CurBuf.end()608k
) {
503
608k
    ++CurPtr;
504
608k
  }
505
5.53k
  return StringRef(TokStart, CurPtr-TokStart);
506
5.53k
}
507
508
size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf,
509
2.08M
                            bool ShouldSkipSpace) {
510
2.08M
  SaveAndRestore<const char *> SavedTokenStart(TokStart);
511
2.08M
  SaveAndRestore<const char *> SavedCurPtr(CurPtr);
512
2.08M
  SaveAndRestore<bool> SavedAtStartOfLine(IsAtStartOfLine);
513
2.08M
  SaveAndRestore<bool> SavedAtStartOfStatement(IsAtStartOfStatement);
514
2.08M
  SaveAndRestore<bool> SavedSkipSpace(SkipSpace, ShouldSkipSpace);
515
2.08M
  SaveAndRestore<bool> SavedIsPeeking(IsPeeking, true);
516
2.08M
  std::string SavedErr = getErr();
517
2.08M
  SMLoc SavedErrLoc = getErrLoc();
518
2.08M
519
2.08M
  size_t ReadCount;
520
4.87M
  for (ReadCount = 0; ReadCount < Buf.size(); 
++ReadCount2.78M
) {
521
2.78M
    AsmToken Token = LexToken();
522
2.78M
523
2.78M
    Buf[ReadCount] = Token;
524
2.78M
525
2.78M
    if (Token.is(AsmToken::Eof))
526
19
      break;
527
2.78M
  }
528
2.08M
529
2.08M
  SetError(SavedErrLoc, SavedErr);
530
2.08M
  return ReadCount;
531
2.08M
}
532
533
20.4M
bool AsmLexer::isAtStartOfComment(const char *Ptr) {
534
20.4M
  StringRef CommentString = MAI.getCommentString();
535
20.4M
536
20.4M
  if (CommentString.size() == 1)
537
19.3M
    return CommentString[0] == Ptr[0];
538
1.12M
539
1.12M
  // Allow # preprocessor commments also be counted as comments for "##" cases
540
1.12M
  if (CommentString[1] == '#')
541
185k
    return CommentString[0] == Ptr[0];
542
944k
543
944k
  return strncmp(Ptr, CommentString.data(), CommentString.size()) == 0;
544
944k
}
545
546
20.1M
bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
547
20.1M
  return strncmp(Ptr, MAI.getSeparatorString(),
548
20.1M
                 strlen(MAI.getSeparatorString())) == 0;
549
20.1M
}
550
551
20.8M
AsmToken AsmLexer::LexToken() {
552
20.8M
  TokStart = CurPtr;
553
20.8M
  // This always consumes at least one character.
554
20.8M
  int CurChar = getNextChar();
555
20.8M
556
20.8M
  if (!IsPeeking && 
CurChar == '#'17.4M
&&
IsAtStartOfStatement447k
) {
557
351k
    // If this starts with a '#', this may be a cpp
558
351k
    // hash directive and otherwise a line comment.
559
351k
    AsmToken TokenBuf[2];
560
351k
    MutableArrayRef<AsmToken> Buf(TokenBuf, 2);
561
351k
    size_t num = peekTokens(Buf, true);
562
351k
    // There cannot be a space preceding this
563
351k
    if (IsAtStartOfLine && 
num == 2299k
&&
TokenBuf[0].is(AsmToken::Integer)299k
&&
564
351k
        
TokenBuf[1].is(AsmToken::String)5.81k
) {
565
5.53k
      CurPtr = TokStart; // reset curPtr;
566
5.53k
      StringRef s = LexUntilEndOfLine();
567
5.53k
      UnLex(TokenBuf[1]);
568
5.53k
      UnLex(TokenBuf[0]);
569
5.53k
      return AsmToken(AsmToken::HashDirective, s);
570
5.53k
    }
571
346k
    return LexLineComment();
572
346k
  }
573
20.4M
574
20.4M
  if (isAtStartOfComment(TokStart))
575
299k
    return LexLineComment();
576
20.1M
577
20.1M
  if (isAtStatementSeparator(TokStart)) {
578
966k
    CurPtr += strlen(MAI.getSeparatorString()) - 1;
579
966k
    IsAtStartOfLine = true;
580
966k
    IsAtStartOfStatement = true;
581
966k
    return AsmToken(AsmToken::EndOfStatement,
582
966k
                    StringRef(TokStart, strlen(MAI.getSeparatorString())));
583
966k
  }
584
19.2M
585
19.2M
  // If we're missing a newline at EOF, make sure we still get an
586
19.2M
  // EndOfStatement token before the Eof token.
587
19.2M
  if (CurChar == EOF && 
!IsAtStartOfStatement18.6k
) {
588
215
    IsAtStartOfLine = true;
589
215
    IsAtStartOfStatement = true;
590
215
    return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
591
215
  }
592
19.2M
  IsAtStartOfLine = false;
593
19.2M
  bool OldIsAtStartOfStatement = IsAtStartOfStatement;
594
19.2M
  IsAtStartOfStatement = false;
595
19.2M
  switch (CurChar) {
596
19.2M
  default:
597
5.47M
    // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
598
5.47M
    if (isalpha(CurChar) || 
CurChar == '_'939k
||
CurChar == '.'929k
)
599
5.47M
      return LexIdentifier();
600
9
601
9
    // Unknown character, emit an error.
602
9
    return ReturnError(TokStart, "invalid character in input");
603
18.4k
  case EOF:
604
18.4k
    IsAtStartOfLine = true;
605
18.4k
    IsAtStartOfStatement = true;
606
18.4k
    return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
607
4.81M
  case 0:
608
4.81M
  case ' ':
609
4.81M
  case '\t':
610
4.81M
    IsAtStartOfStatement = OldIsAtStartOfStatement;
611
12.0M
    while (*CurPtr == ' ' || 
*CurPtr == '\t'4.81M
)
612
7.25M
      CurPtr++;
613
4.81M
    if (SkipSpace)
614
4.80M
      return LexToken(); // Ignore whitespace.
615
2.22k
    else
616
2.22k
      return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart));
617
8
  case '\r': {
618
8
    IsAtStartOfLine = true;
619
8
    IsAtStartOfStatement = true;
620
8
    // If this is a CR followed by LF, treat that as one token.
621
8
    if (CurPtr != CurBuf.end() && *CurPtr == '\n')
622
8
      ++CurPtr;
623
8
    return AsmToken(AsmToken::EndOfStatement,
624
8
                    StringRef(TokStart, CurPtr - TokStart));
625
0
  }
626
2.12M
  case '\n':
627
2.12M
    IsAtStartOfLine = true;
628
2.12M
    IsAtStartOfStatement = true;
629
2.12M
    return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
630
799k
  case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
631
9.25k
  case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
632
216
  case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
633
121k
  case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
634
120k
  case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
635
454k
  case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
636
236k
  case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
637
68.3k
  case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
638
69.4k
  case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
639
2.15k
  case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
640
2.00M
  case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
641
81.8k
  case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
642
3.52k
  case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1));
643
30.2k
  case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1));
644
5.22k
  case '=':
645
5.22k
    if (*CurPtr == '=') {
646
81
      ++CurPtr;
647
81
      return AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
648
81
    }
649
5.14k
    return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
650
260k
  case '-':
651
260k
    if (*CurPtr == '>') {
652
29
      ++CurPtr;
653
29
      return AsmToken(AsmToken::MinusGreater, StringRef(TokStart, 2));
654
29
    }
655
260k
    return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
656
260k
  case '|':
657
19.9k
    if (*CurPtr == '|') {
658
10
      ++CurPtr;
659
10
      return AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
660
10
    }
661
19.8k
    return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
662
19.8k
  
case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1))181
;
663
19.8k
  case '&':
664
422
    if (*CurPtr == '&') {
665
146
      ++CurPtr;
666
146
      return AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
667
146
    }
668
276
    return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
669
2.73k
  case '!':
670
2.73k
    if (*CurPtr == '=') {
671
89
      ++CurPtr;
672
89
      return AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
673
89
    }
674
2.64k
    return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
675
427k
  case '%':
676
427k
    if (MAI.hasMipsExpressions()) {
677
12.3k
      AsmToken::TokenKind Operator;
678
12.3k
      unsigned OperatorLength;
679
12.3k
680
12.3k
      std::tie(Operator, OperatorLength) =
681
12.3k
          StringSwitch<std::pair<AsmToken::TokenKind, unsigned>>(
682
12.3k
              StringRef(CurPtr))
683
12.3k
              .StartsWith("call16", {AsmToken::PercentCall16, 7})
684
12.3k
              .StartsWith("call_hi", {AsmToken::PercentCall_Hi, 8})
685
12.3k
              .StartsWith("call_lo", {AsmToken::PercentCall_Lo, 8})
686
12.3k
              .StartsWith("dtprel_hi", {AsmToken::PercentDtprel_Hi, 10})
687
12.3k
              .StartsWith("dtprel_lo", {AsmToken::PercentDtprel_Lo, 10})
688
12.3k
              .StartsWith("got_disp", {AsmToken::PercentGot_Disp, 9})
689
12.3k
              .StartsWith("got_hi", {AsmToken::PercentGot_Hi, 7})
690
12.3k
              .StartsWith("got_lo", {AsmToken::PercentGot_Lo, 7})
691
12.3k
              .StartsWith("got_ofst", {AsmToken::PercentGot_Ofst, 9})
692
12.3k
              .StartsWith("got_page", {AsmToken::PercentGot_Page, 9})
693
12.3k
              .StartsWith("gottprel", {AsmToken::PercentGottprel, 9})
694
12.3k
              .StartsWith("got", {AsmToken::PercentGot, 4})
695
12.3k
              .StartsWith("gp_rel", {AsmToken::PercentGp_Rel, 7})
696
12.3k
              .StartsWith("higher", {AsmToken::PercentHigher, 7})
697
12.3k
              .StartsWith("highest", {AsmToken::PercentHighest, 8})
698
12.3k
              .StartsWith("hi", {AsmToken::PercentHi, 3})
699
12.3k
              .StartsWith("lo", {AsmToken::PercentLo, 3})
700
12.3k
              .StartsWith("neg", {AsmToken::PercentNeg, 4})
701
12.3k
              .StartsWith("pcrel_hi", {AsmToken::PercentPcrel_Hi, 9})
702
12.3k
              .StartsWith("pcrel_lo", {AsmToken::PercentPcrel_Lo, 9})
703
12.3k
              .StartsWith("tlsgd", {AsmToken::PercentTlsgd, 6})
704
12.3k
              .StartsWith("tlsldm", {AsmToken::PercentTlsldm, 7})
705
12.3k
              .StartsWith("tprel_hi", {AsmToken::PercentTprel_Hi, 9})
706
12.3k
              .StartsWith("tprel_lo", {AsmToken::PercentTprel_Lo, 9})
707
12.3k
              .Default({AsmToken::Percent, 1});
708
12.3k
709
12.3k
      if (Operator != AsmToken::Percent) {
710
12.2k
        CurPtr += OperatorLength - 1;
711
12.2k
        return AsmToken(Operator, StringRef(TokStart, OperatorLength));
712
12.2k
      }
713
414k
    }
714
414k
    return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
715
737k
  case '/':
716
737k
    IsAtStartOfStatement = OldIsAtStartOfStatement;
717
737k
    return LexSlash();
718
414k
  
case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1))38.4k
;
719
414k
  
case '\'': return LexSingleQuote()208
;
720
414k
  
case '"': return LexQuote()87.3k
;
721
1.19M
  case '0': case '1': case '2': case '3': case '4':
722
1.19M
  case '5': case '6': case '7': case '8': case '9':
723
1.19M
    return LexDigit();
724
1.19M
  case '<':
725
917
    switch (*CurPtr) {
726
917
    case '<':
727
861
      ++CurPtr;
728
861
      return AsmToken(AsmToken::LessLess, StringRef(TokStart, 2));
729
917
    case '=':
730
13
      ++CurPtr;
731
13
      return AsmToken(AsmToken::LessEqual, StringRef(TokStart, 2));
732
917
    case '>':
733
7
      ++CurPtr;
734
7
      return AsmToken(AsmToken::LessGreater, StringRef(TokStart, 2));
735
917
    default:
736
36
      return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
737
0
    }
738
108
  case '>':
739
108
    switch (*CurPtr) {
740
108
    case '>':
741
60
      ++CurPtr;
742
60
      return AsmToken(AsmToken::GreaterGreater, StringRef(TokStart, 2));
743
108
    case '=':
744
13
      ++CurPtr;
745
13
      return AsmToken(AsmToken::GreaterEqual, StringRef(TokStart, 2));
746
108
    default:
747
35
      return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
748
108
    }
749
19.2M
750
19.2M
  // TODO: Quoted identifiers (objc methods etc)
751
19.2M
  // local labels: [0-9][:]
752
19.2M
  // Forward/backward labels: [0-9][fb]
753
19.2M
  // Integers, fp constants, character constants.
754
19.2M
  }
755
19.2M
}