Coverage Report

Created: 2017-10-03 07:32

/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
Line
Count
Source (jump to first uncovered line)
1
//===- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface ------*- C++ -*-===//
2
//
3
//                     The LLVM Compiler Infrastructure
4
//
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
7
//
8
//===----------------------------------------------------------------------===//
9
10
#ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
11
#define LLVM_MC_MCPARSER_MCASMLEXER_H
12
13
#include "llvm/ADT/APInt.h"
14
#include "llvm/ADT/ArrayRef.h"
15
#include "llvm/ADT/SmallVector.h"
16
#include "llvm/ADT/StringRef.h"
17
#include "llvm/Support/SMLoc.h"
18
#include <algorithm>
19
#include <cassert>
20
#include <cstddef>
21
#include <cstdint>
22
#include <string>
23
24
namespace llvm {
25
26
/// Target independent representation for an assembler token.
27
class AsmToken {
28
public:
29
  enum TokenKind {
30
    // Markers
31
    Eof, Error,
32
33
    // String values.
34
    Identifier,
35
    String,
36
37
    // Integer values.
38
    Integer,
39
    BigNum, // larger than 64 bits
40
41
    // Real values.
42
    Real,
43
44
    // Comments
45
    Comment,
46
    HashDirective,
47
    // No-value.
48
    EndOfStatement,
49
    Colon,
50
    Space,
51
    Plus, Minus, Tilde,
52
    Slash,     // '/'
53
    BackSlash, // '\'
54
    LParen, RParen, LBrac, RBrac, LCurly, RCurly,
55
    Star, Dot, Comma, Dollar, Equal, EqualEqual,
56
57
    Pipe, PipePipe, Caret,
58
    Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
59
    Less, LessEqual, LessLess, LessGreater,
60
    Greater, GreaterEqual, GreaterGreater, At,
61
62
    // MIPS unary expression operators such as %neg.
63
    PercentCall16, PercentCall_Hi, PercentCall_Lo, PercentDtprel_Hi,
64
    PercentDtprel_Lo, PercentGot, PercentGot_Disp, PercentGot_Hi, PercentGot_Lo,
65
    PercentGot_Ofst, PercentGot_Page, PercentGottprel, PercentGp_Rel, PercentHi,
66
    PercentHigher, PercentHighest, PercentLo, PercentNeg, PercentPcrel_Hi,
67
    PercentPcrel_Lo, PercentTlsgd, PercentTlsldm, PercentTprel_Hi,
68
    PercentTprel_Lo
69
  };
70
71
private:
72
  TokenKind Kind;
73
74
  /// A reference to the entire token contents; this is always a pointer into
75
  /// a memory buffer owned by the source manager.
76
  StringRef Str;
77
78
  APInt IntVal;
79
80
public:
81
479k
  AsmToken() = default;
82
  AsmToken(TokenKind Kind, StringRef Str, APInt IntVal)
83
402k
      : Kind(Kind), Str(Str), IntVal(std::move(IntVal)) {}
84
  AsmToken(TokenKind Kind, StringRef Str, int64_t IntVal = 0)
85
5.95M
      : Kind(Kind), Str(Str), IntVal(64, IntVal, true) {}
86
87
11.8M
  TokenKind getKind() const { return Kind; }
88
43.1M
  bool is(TokenKind K) const { return Kind == K; }
89
6.57M
  bool isNot(TokenKind K) const { return Kind != K; }
90
91
  SMLoc getLoc() const;
92
  SMLoc getEndLoc() const;
93
  SMRange getLocRange() const;
94
95
  /// Get the contents of a string token (without quotes).
96
67.9k
  StringRef getStringContents() const {
97
67.9k
    assert(Kind == String && "This token isn't a string!");
98
67.9k
    return Str.slice(1, Str.size() - 1);
99
67.9k
  }
100
101
  /// Get the identifier string for the current token, which should be an
102
  /// identifier or a string. This gets the portion of the string which should
103
  /// be used as the identifier, e.g., it does not include the quotes on
104
  /// strings.
105
1.95M
  StringRef getIdentifier() const {
106
1.95M
    if (Kind == Identifier)
107
1.95M
      return getString();
108
241
    return getStringContents();
109
1.95M
  }
110
111
  /// Get the string for the current token, this includes all characters (for
112
  /// example, the quotes on strings) in the token.
113
  ///
114
  /// The returned StringRef points into the source manager's memory buffer, and
115
  /// is safe to store across calls to Lex().
116
12.1M
  StringRef getString() const { return Str; }
117
118
  // FIXME: Don't compute this in advance, it makes every token larger, and is
119
  // also not generally what we want (it is nicer for recovery etc. to lex 123br
120
  // as a single token, then diagnose as an invalid number).
121
369k
  int64_t getIntVal() const {
122
369k
    assert(Kind == Integer && "This token isn't an integer!");
123
369k
    return IntVal.getZExtValue();
124
369k
  }
125
126
7
  APInt getAPIntVal() const {
127
7
    assert((Kind == Integer || Kind == BigNum) &&
128
7
           "This token isn't an integer!");
129
7
    return IntVal;
130
7
  }
131
};
132
133
/// A callback class which is notified of each comment in an assembly file as
134
/// it is lexed.
135
class AsmCommentConsumer {
136
public:
137
  virtual ~AsmCommentConsumer() = default;
138
139
  /// Callback function for when a comment is lexed. Loc is the start of the
140
  /// comment text (excluding the comment-start marker). CommentText is the text
141
  /// of the comment, excluding the comment start and end markers, and the
142
  /// newline for single-line comments.
143
  virtual void HandleComment(SMLoc Loc, StringRef CommentText) = 0;
144
};
145
146
147
/// Generic assembler lexer interface, for use by target specific assembly
148
/// lexers.
149
class MCAsmLexer {
150
  /// The current token, stored in the base class for faster access.
151
  SmallVector<AsmToken, 1> CurTok;
152
153
  /// The location and description of the current error
154
  SMLoc ErrLoc;
155
  std::string Err;
156
157
protected: // Can only create subclasses.
158
  const char *TokStart = nullptr;
159
  bool SkipSpace = true;
160
  bool AllowAtInIdentifier;
161
  bool IsAtStartOfStatement = true;
162
  AsmCommentConsumer *CommentConsumer = nullptr;
163
164
  bool AltMacroMode;
165
  MCAsmLexer();
166
167
  virtual AsmToken LexToken() = 0;
168
169
343k
  void SetError(SMLoc errLoc, const std::string &err) {
170
343k
    ErrLoc = errLoc;
171
343k
    Err = err;
172
343k
  }
173
174
public:
175
  MCAsmLexer(const MCAsmLexer &) = delete;
176
  MCAsmLexer &operator=(const MCAsmLexer &) = delete;
177
  virtual ~MCAsmLexer();
178
179
1.31M
  bool IsaAltMacroMode() {
180
1.31M
    return AltMacroMode;
181
1.31M
  }
182
183
8
  void SetAltMacroMode(bool AltMacroSet) {
184
8
    AltMacroMode = AltMacroSet;
185
8
  }
186
187
  /// Consume the next token from the input stream and return it.
188
  ///
189
  /// The lexer will continuosly return the end-of-file token once the end of
190
  /// the main input file has been reached.
191
5.89M
  const AsmToken &Lex() {
192
5.89M
    assert(!CurTok.empty());
193
5.89M
    // Mark if we parsing out a EndOfStatement.
194
5.89M
    IsAtStartOfStatement = CurTok.front().getKind() == AsmToken::EndOfStatement;
195
5.89M
    CurTok.erase(CurTok.begin());
196
5.89M
    // LexToken may generate multiple tokens via UnLex but will always return
197
5.89M
    // the first one. Place returned value at head of CurTok vector.
198
5.89M
    if (
CurTok.empty()5.89M
) {
199
5.86M
      AsmToken T = LexToken();
200
5.86M
      CurTok.insert(CurTok.begin(), T);
201
5.86M
    }
202
5.89M
    return CurTok.front();
203
5.89M
  }
204
205
28.3k
  void UnLex(AsmToken const &Token) {
206
28.3k
    IsAtStartOfStatement = false;
207
28.3k
    CurTok.insert(CurTok.begin(), Token);
208
28.3k
  }
209
210
29.8k
  bool isAtStartOfStatement() { return IsAtStartOfStatement; }
211
212
  virtual StringRef LexUntilEndOfStatement() = 0;
213
214
  /// Get the current source location.
215
  SMLoc getLoc() const;
216
217
  /// Get the current (last) lexed token.
218
61.1M
  const AsmToken &getTok() const {
219
61.1M
    return CurTok[0];
220
61.1M
  }
221
222
  /// Look ahead at the next token to be lexed.
223
208k
  const AsmToken peekTok(bool ShouldSkipSpace = true) {
224
208k
    AsmToken Tok;
225
208k
226
208k
    MutableArrayRef<AsmToken> Buf(Tok);
227
208k
    size_t ReadCount = peekTokens(Buf, ShouldSkipSpace);
228
208k
229
208k
    assert(ReadCount == 1);
230
208k
    (void)ReadCount;
231
208k
232
208k
    return Tok;
233
208k
  }
234
235
  /// Look ahead an arbitrary number of tokens.
236
  virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf,
237
                            bool ShouldSkipSpace = true) = 0;
238
239
  /// Get the current error location
240
343k
  SMLoc getErrLoc() {
241
343k
    return ErrLoc;
242
343k
  }
243
244
  /// Get the current error string
245
343k
  const std::string &getErr() {
246
343k
    return Err;
247
343k
  }
248
249
  /// Get the kind of current token.
250
5.43M
  AsmToken::TokenKind getKind() const { return getTok().getKind(); }
251
252
  /// Check if the current token has kind \p K.
253
19.0M
  bool is(AsmToken::TokenKind K) const { return getTok().is(K); }
254
255
  /// Check if the current token has kind \p K.
256
5.53M
  bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); }
257
258
  /// Set whether spaces should be ignored by the lexer
259
327k
  void setSkipSpace(bool val) { SkipSpace = val; }
260
261
960
  bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
262
208
  void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
263
264
0
  void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
265
0
    this->CommentConsumer = CommentConsumer;
266
0
  }
267
};
268
269
} // end namespace llvm
270
271
#endif // LLVM_MC_MCPARSER_MCASMLEXER_H