Coverage Report

Created: 2020-02-15 09:57

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/include/clang/Lex/LiteralSupport.h
Line
Count
Source
1
//===--- LiteralSupport.h ---------------------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines the NumericLiteralParser, CharLiteralParser, and
10
// StringLiteralParser interfaces.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#ifndef LLVM_CLANG_LEX_LITERALSUPPORT_H
15
#define LLVM_CLANG_LEX_LITERALSUPPORT_H
16
17
#include "clang/Basic/CharInfo.h"
18
#include "clang/Basic/LLVM.h"
19
#include "clang/Basic/TokenKinds.h"
20
#include "llvm/ADT/APFloat.h"
21
#include "llvm/ADT/ArrayRef.h"
22
#include "llvm/ADT/SmallString.h"
23
#include "llvm/ADT/StringRef.h"
24
#include "llvm/Support/DataTypes.h"
25
26
namespace clang {
27
28
class DiagnosticsEngine;
29
class Preprocessor;
30
class Token;
31
class SourceLocation;
32
class TargetInfo;
33
class SourceManager;
34
class LangOptions;
35
36
/// Copy characters from Input to Buf, expanding any UCNs.
37
void expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input);
38
39
/// NumericLiteralParser - This performs strict semantic analysis of the content
40
/// of a ppnumber, classifying it as either integer, floating, or erroneous,
41
/// determines the radix of the value and can convert it to a useful value.
42
class NumericLiteralParser {
43
  Preprocessor &PP; // needed for diagnostics
44
45
  const char *const ThisTokBegin;
46
  const char *const ThisTokEnd;
47
  const char *DigitsBegin, *SuffixBegin; // markers
48
  const char *s; // cursor
49
50
  unsigned radix;
51
52
  bool saw_exponent, saw_period, saw_ud_suffix, saw_fixed_point_suffix;
53
54
  SmallString<32> UDSuffixBuf;
55
56
public:
57
  NumericLiteralParser(StringRef TokSpelling,
58
                       SourceLocation TokLoc,
59
                       Preprocessor &PP);
60
  bool hadError : 1;
61
  bool isUnsigned : 1;
62
  bool isLong : 1;          // This is *not* set for long long.
63
  bool isLongLong : 1;
64
  bool isHalf : 1;          // 1.0h
65
  bool isFloat : 1;         // 1.0f
66
  bool isImaginary : 1;     // 1.0i
67
  bool isFloat16 : 1;       // 1.0f16
68
  bool isFloat128 : 1;      // 1.0q
69
  uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64.
70
71
  bool isFract : 1;         // 1.0hr/r/lr/uhr/ur/ulr
72
  bool isAccum : 1;         // 1.0hk/k/lk/uhk/uk/ulk
73
74
13.1M
  bool isFixedPointLiteral() const { return saw_fixed_point_suffix; }
75
76
8.56M
  bool isIntegerLiteral() const {
77
8.56M
    return !saw_period && 
!saw_exponent8.56M
&&
!isFixedPointLiteral()8.56M
;
78
8.56M
  }
79
17.2M
  bool isFloatingLiteral() const {
80
17.2M
    return (saw_period || 
saw_exponent17.1M
) &&
!isFixedPointLiteral()69.7k
;
81
17.2M
  }
82
83
8.60M
  bool hasUDSuffix() const {
84
8.60M
    return saw_ud_suffix;
85
8.60M
  }
86
146
  StringRef getUDSuffix() const {
87
146
    assert(saw_ud_suffix);
88
146
    return UDSuffixBuf;
89
146
  }
90
173
  unsigned getUDSuffixOffset() const {
91
173
    assert(saw_ud_suffix);
92
173
    return SuffixBegin - ThisTokBegin;
93
173
  }
94
95
  static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);
96
97
4.38M
  unsigned getRadix() const { return radix; }
98
99
  /// GetIntegerValue - Convert this numeric literal value to an APInt that
100
  /// matches Val's input width.  If there is an overflow (i.e., if the unsigned
101
  /// value read is larger than the APInt's bits will hold), set Val to the low
102
  /// bits of the result and return true.  Otherwise, return false.
103
  bool GetIntegerValue(llvm::APInt &Val);
104
105
  /// GetFloatValue - Convert this numeric literal to a floating value, using
106
  /// the specified APFloat fltSemantics (specifying float, double, etc).
107
  /// The optional bool isExact (passed-by-reference) has its value
108
  /// set to true if the returned APFloat can represent the number in the
109
  /// literal exactly, and false otherwise.
110
  llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result);
111
112
  /// GetFixedPointValue - Convert this numeric literal value into a
113
  /// scaled integer that represents this value. Returns true if an overflow
114
  /// occurred when calculating the integral part of the scaled integer or
115
  /// calculating the digit sequence of the exponent.
116
  bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale);
117
118
private:
119
120
  void ParseNumberStartingWithZero(SourceLocation TokLoc);
121
  void ParseDecimalOrOctalCommon(SourceLocation TokLoc);
122
123
33.4M
  static bool isDigitSeparator(char C) { return C == '\''; }
124
125
  /// Determine whether the sequence of characters [Start, End) contains
126
  /// any real digits (not digit separators).
127
1.09M
  bool containsDigits(const char *Start, const char *End) {
128
1.09M
    return Start != End && 
(1.09M
Start + 1 != End1.09M
||
!isDigitSeparator(Start[0])42.5k
);
129
1.09M
  }
130
131
  enum CheckSeparatorKind { CSK_BeforeDigits, CSK_AfterDigits };
132
133
  /// Ensure that we don't have a digit separator here.
134
  void checkSeparator(SourceLocation TokLoc, const char *Pos,
135
                      CheckSeparatorKind IsAfterDigits);
136
137
  /// SkipHexDigits - Read and skip over any hex digits, up to End.
138
  /// Return a pointer to the first non-hex digit or End.
139
1.08M
  const char *SkipHexDigits(const char *ptr) {
140
6.27M
    while (ptr != ThisTokEnd && 
(5.23M
isHexDigit(*ptr)5.23M
||
isDigitSeparator(*ptr)54.1k
))
141
5.18M
      ptr++;
142
1.08M
    return ptr;
143
1.08M
  }
144
145
  /// SkipOctalDigits - Read and skip over any octal digits, up to End.
146
  /// Return a pointer to the first non-hex digit or End.
147
523k
  const char *SkipOctalDigits(const char *ptr) {
148
536k
    while (ptr != ThisTokEnd &&
149
536k
           
(36.7k
(36.7k
*ptr >= '0'36.7k
&&
*ptr <= '7'23.9k
) ||
isDigitSeparator(*ptr)23.4k
))
150
13.3k
      ptr++;
151
523k
    return ptr;
152
523k
  }
153
154
  /// SkipDigits - Read and skip over any digits, up to End.
155
  /// Return a pointer to the first non-hex digit or End.
156
7.03M
  const char *SkipDigits(const char *ptr) {
157
26.1M
    while (ptr != ThisTokEnd && 
(19.7M
isDigit(*ptr)19.7M
||
isDigitSeparator(*ptr)592k
))
158
19.1M
      ptr++;
159
7.03M
    return ptr;
160
7.03M
  }
161
162
  /// SkipBinaryDigits - Read and skip over any binary digits, up to End.
163
  /// Return a pointer to the first non-binary digit or End.
164
49
  const char *SkipBinaryDigits(const char *ptr) {
165
298
    while (ptr != ThisTokEnd &&
166
298
           
(256
*ptr == '0'256
||
*ptr == '1'105
||
isDigitSeparator(*ptr)11
))
167
249
      ptr++;
168
49
    return ptr;
169
49
  }
170
171
};
172
173
/// CharLiteralParser - Perform interpretation and semantic analysis of a
174
/// character literal.
175
class CharLiteralParser {
176
  uint64_t Value;
177
  tok::TokenKind Kind;
178
  bool IsMultiChar;
179
  bool HadError;
180
  SmallString<32> UDSuffixBuf;
181
  unsigned UDSuffixOffset;
182
public:
183
  CharLiteralParser(const char *begin, const char *end,
184
                    SourceLocation Loc, Preprocessor &PP,
185
                    tok::TokenKind kind);
186
187
708k
  bool hadError() const { return HadError; }
188
2.09M
  bool isAscii() const { return Kind == tok::char_constant; }
189
2.09M
  bool isWide() const { return Kind == tok::wide_char_constant; }
190
1.41M
  bool isUTF8() const { return Kind == tok::utf8_char_constant; }
191
1.41M
  bool isUTF16() const { return Kind == tok::utf16_char_constant; }
192
1.41M
  bool isUTF32() const { return Kind == tok::utf32_char_constant; }
193
805k
  bool isMultiChar() const { return IsMultiChar; }
194
708k
  uint64_t getValue() const { return Value; }
195
708k
  StringRef getUDSuffix() const { return UDSuffixBuf; }
196
42
  unsigned getUDSuffixOffset() const {
197
42
    assert(!UDSuffixBuf.empty() && "no ud-suffix");
198
42
    return UDSuffixOffset;
199
42
  }
200
};
201
202
/// StringLiteralParser - This decodes string escape characters and performs
203
/// wide string analysis and Translation Phase #6 (concatenation of string
204
/// literals) (C99 5.1.1.2p1).
205
class StringLiteralParser {
206
  const SourceManager &SM;
207
  const LangOptions &Features;
208
  const TargetInfo &Target;
209
  DiagnosticsEngine *Diags;
210
211
  unsigned MaxTokenLength;
212
  unsigned SizeBound;
213
  unsigned CharByteWidth;
214
  tok::TokenKind Kind;
215
  SmallString<512> ResultBuf;
216
  char *ResultPtr; // cursor
217
  SmallString<32> UDSuffixBuf;
218
  unsigned UDSuffixToken;
219
  unsigned UDSuffixOffset;
220
public:
221
  StringLiteralParser(ArrayRef<Token> StringToks,
222
                      Preprocessor &PP, bool Complain = true);
223
  StringLiteralParser(ArrayRef<Token> StringToks,
224
                      const SourceManager &sm, const LangOptions &features,
225
                      const TargetInfo &target,
226
                      DiagnosticsEngine *diags = nullptr)
227
    : SM(sm), Features(features), Target(target), Diags(diags),
228
      MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
229
1.41M
      ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
230
1.41M
    init(StringToks);
231
1.41M
  }
232
233
234
  bool hadError;
235
  bool Pascal;
236
237
4.80M
  StringRef GetString() const {
238
4.80M
    return StringRef(ResultBuf.data(), GetStringLength());
239
4.80M
  }
240
12.8M
  unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); }
241
242
6.61M
  unsigned GetNumStringChars() const {
243
6.61M
    return GetStringLength() / CharByteWidth;
244
6.61M
  }
245
  /// getOffsetOfStringByte - This function returns the offset of the
246
  /// specified byte of the string data represented by Token.  This handles
247
  /// advancing over escape sequences in the string.
248
  ///
249
  /// If the Diagnostics pointer is non-null, then this will do semantic
250
  /// checking of the string literal and emit errors and warnings.
251
  unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const;
252
253
209k
  bool isAscii() const { return Kind == tok::string_literal; }
254
3.20M
  bool isWide() const { return Kind == tok::wide_string_literal; }
255
3.20M
  bool isUTF8() const { return Kind == tok::utf8_string_literal; }
256
3.20M
  bool isUTF16() const { return Kind == tok::utf16_string_literal; }
257
3.20M
  bool isUTF32() const { return Kind == tok::utf32_string_literal; }
258
3.20M
  bool isPascal() const { return Pascal; }
259
260
3.20M
  StringRef getUDSuffix() const { return UDSuffixBuf; }
261
262
  /// Get the index of a token containing a ud-suffix.
263
235
  unsigned getUDSuffixToken() const {
264
235
    assert(!UDSuffixBuf.empty() && "no ud-suffix");
265
235
    return UDSuffixToken;
266
235
  }
267
  /// Get the spelling offset of the first byte of the ud-suffix.
268
235
  unsigned getUDSuffixOffset() const {
269
235
    assert(!UDSuffixBuf.empty() && "no ud-suffix");
270
235
    return UDSuffixOffset;
271
235
  }
272
273
  static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);
274
275
private:
276
  void init(ArrayRef<Token> StringToks);
277
  bool CopyStringFragment(const Token &Tok, const char *TokBegin,
278
                          StringRef Fragment);
279
  void DiagnoseLexingError(SourceLocation Loc);
280
};
281
282
}  // end namespace clang
283
284
#endif