/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/include/clang/Lex/LiteralSupport.h
Line | Count | Source |
1 | | //===--- LiteralSupport.h ---------------------------------------*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file defines the NumericLiteralParser, CharLiteralParser, and |
10 | | // StringLiteralParser interfaces. |
11 | | // |
12 | | //===----------------------------------------------------------------------===// |
13 | | |
14 | | #ifndef LLVM_CLANG_LEX_LITERALSUPPORT_H |
15 | | #define LLVM_CLANG_LEX_LITERALSUPPORT_H |
16 | | |
17 | | #include "clang/Basic/CharInfo.h" |
18 | | #include "clang/Basic/LLVM.h" |
19 | | #include "clang/Basic/TokenKinds.h" |
20 | | #include "llvm/ADT/APFloat.h" |
21 | | #include "llvm/ADT/ArrayRef.h" |
22 | | #include "llvm/ADT/SmallString.h" |
23 | | #include "llvm/ADT/StringRef.h" |
24 | | #include "llvm/Support/DataTypes.h" |
25 | | |
26 | | namespace clang { |
27 | | |
28 | | class DiagnosticsEngine; |
29 | | class Preprocessor; |
30 | | class Token; |
31 | | class SourceLocation; |
32 | | class TargetInfo; |
33 | | class SourceManager; |
34 | | class LangOptions; |
35 | | |
36 | | /// Copy characters from Input to Buf, expanding any UCNs. |
37 | | void expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input); |
38 | | |
39 | | /// NumericLiteralParser - This performs strict semantic analysis of the content |
40 | | /// of a ppnumber, classifying it as either integer, floating, or erroneous, |
41 | | /// determines the radix of the value and can convert it to a useful value. |
42 | | class NumericLiteralParser { |
43 | | const SourceManager &SM; |
44 | | const LangOptions &LangOpts; |
45 | | DiagnosticsEngine &Diags; |
46 | | |
47 | | const char *const ThisTokBegin; |
48 | | const char *const ThisTokEnd; |
49 | | const char *DigitsBegin, *SuffixBegin; // markers |
50 | | const char *s; // cursor |
51 | | |
52 | | unsigned radix; |
53 | | |
54 | | bool saw_exponent, saw_period, saw_ud_suffix, saw_fixed_point_suffix; |
55 | | |
56 | | SmallString<32> UDSuffixBuf; |
57 | | |
58 | | public: |
59 | | NumericLiteralParser(StringRef TokSpelling, SourceLocation TokLoc, |
60 | | const SourceManager &SM, const LangOptions &LangOpts, |
61 | | const TargetInfo &Target, DiagnosticsEngine &Diags); |
62 | | bool hadError : 1; |
63 | | bool isUnsigned : 1; |
64 | | bool isLong : 1; // This is *not* set for long long. |
65 | | bool isLongLong : 1; |
66 | | bool isHalf : 1; // 1.0h |
67 | | bool isFloat : 1; // 1.0f |
68 | | bool isImaginary : 1; // 1.0i |
69 | | bool isFloat16 : 1; // 1.0f16 |
70 | | bool isFloat128 : 1; // 1.0q |
71 | | uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64. |
72 | | |
73 | | bool isFract : 1; // 1.0hr/r/lr/uhr/ur/ulr |
74 | | bool isAccum : 1; // 1.0hk/k/lk/uhk/uk/ulk |
75 | | |
76 | 18.6M | bool isFixedPointLiteral() const { |
77 | 18.6M | return (saw_period || saw_exponent18.4M ) && saw_fixed_point_suffix147k ; |
78 | 18.6M | } |
79 | | |
80 | 6.81M | bool isIntegerLiteral() const { |
81 | 6.81M | return !saw_period && !saw_exponent6.81M && !isFixedPointLiteral()6.81M ; |
82 | 6.81M | } |
83 | 13.7M | bool isFloatingLiteral() const { |
84 | 13.7M | return (saw_period || saw_exponent13.6M ) && !isFixedPointLiteral()73.5k ; |
85 | 13.7M | } |
86 | | |
87 | 6.85M | bool hasUDSuffix() const { |
88 | 6.85M | return saw_ud_suffix; |
89 | 6.85M | } |
90 | 209 | StringRef getUDSuffix() const { |
91 | 209 | assert(saw_ud_suffix); |
92 | 209 | return UDSuffixBuf; |
93 | 209 | } |
94 | 271 | unsigned getUDSuffixOffset() const { |
95 | 271 | assert(saw_ud_suffix); |
96 | 271 | return SuffixBegin - ThisTokBegin; |
97 | 271 | } |
98 | | |
99 | | static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix); |
100 | | |
101 | 4.72M | unsigned getRadix() const { return radix; } |
102 | | |
103 | | /// GetIntegerValue - Convert this numeric literal value to an APInt that |
104 | | /// matches Val's input width. If there is an overflow (i.e., if the unsigned |
105 | | /// value read is larger than the APInt's bits will hold), set Val to the low |
106 | | /// bits of the result and return true. Otherwise, return false. |
107 | | bool GetIntegerValue(llvm::APInt &Val); |
108 | | |
109 | | /// GetFloatValue - Convert this numeric literal to a floating value, using |
110 | | /// the specified APFloat fltSemantics (specifying float, double, etc). |
111 | | /// The optional bool isExact (passed-by-reference) has its value |
112 | | /// set to true if the returned APFloat can represent the number in the |
113 | | /// literal exactly, and false otherwise. |
114 | | llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result); |
115 | | |
116 | | /// GetFixedPointValue - Convert this numeric literal value into a |
117 | | /// scaled integer that represents this value. Returns true if an overflow |
118 | | /// occurred when calculating the integral part of the scaled integer or |
119 | | /// calculating the digit sequence of the exponent. |
120 | | bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale); |
121 | | |
122 | | private: |
123 | | |
124 | | void ParseNumberStartingWithZero(SourceLocation TokLoc); |
125 | | void ParseDecimalOrOctalCommon(SourceLocation TokLoc); |
126 | | |
127 | 28.9M | static bool isDigitSeparator(char C) { return C == '\''; } |
128 | | |
129 | | /// Determine whether the sequence of characters [Start, End) contains |
130 | | /// any real digits (not digit separators). |
131 | 1.13M | bool containsDigits(const char *Start, const char *End) { |
132 | 1.13M | return Start != End && (1.13M Start + 1 != End1.13M || !isDigitSeparator(Start[0])40.2k ); |
133 | 1.13M | } |
134 | | |
135 | | enum CheckSeparatorKind { CSK_BeforeDigits, CSK_AfterDigits }; |
136 | | |
137 | | /// Ensure that we don't have a digit separator here. |
138 | | void checkSeparator(SourceLocation TokLoc, const char *Pos, |
139 | | CheckSeparatorKind IsAfterDigits); |
140 | | |
141 | | /// SkipHexDigits - Read and skip over any hex digits, up to End. |
142 | | /// Return a pointer to the first non-hex digit or End. |
143 | 1.12M | const char *SkipHexDigits(const char *ptr) { |
144 | 6.42M | while (ptr != ThisTokEnd && (5.35M isHexDigit(*ptr)5.35M || isDigitSeparator(*ptr)53.0k )) |
145 | 5.30M | ptr++; |
146 | 1.12M | return ptr; |
147 | 1.12M | } |
148 | | |
149 | | /// SkipOctalDigits - Read and skip over any octal digits, up to End. |
150 | | /// Return a pointer to the first non-hex digit or End. |
151 | 278k | const char *SkipOctalDigits(const char *ptr) { |
152 | 287k | while (ptr != ThisTokEnd && |
153 | 33.0k | ((*ptr >= '0' && *ptr <= '7'18.4k ) || isDigitSeparator(*ptr)24.4k )) |
154 | 8.59k | ptr++; |
155 | 278k | return ptr; |
156 | 278k | } |
157 | | |
158 | | /// SkipDigits - Read and skip over any digits, up to End. |
159 | | /// Return a pointer to the first non-hex digit or End. |
160 | 5.49M | const char *SkipDigits(const char *ptr) { |
161 | 21.8M | while (ptr != ThisTokEnd && (16.7M isDigit(*ptr)16.7M || isDigitSeparator(*ptr)441k )) |
162 | 16.3M | ptr++; |
163 | 5.49M | return ptr; |
164 | 5.49M | } |
165 | | |
166 | | /// SkipBinaryDigits - Read and skip over any binary digits, up to End. |
167 | | /// Return a pointer to the first non-binary digit or End. |
168 | 55 | const char *SkipBinaryDigits(const char *ptr) { |
169 | 328 | while (ptr != ThisTokEnd && |
170 | 280 | (*ptr == '0' || *ptr == '1'117 || isDigitSeparator(*ptr)11 )) |
171 | 273 | ptr++; |
172 | 55 | return ptr; |
173 | 55 | } |
174 | | |
175 | | }; |
176 | | |
177 | | /// CharLiteralParser - Perform interpretation and semantic analysis of a |
178 | | /// character literal. |
179 | | class CharLiteralParser { |
180 | | uint64_t Value; |
181 | | tok::TokenKind Kind; |
182 | | bool IsMultiChar; |
183 | | bool HadError; |
184 | | SmallString<32> UDSuffixBuf; |
185 | | unsigned UDSuffixOffset; |
186 | | public: |
187 | | CharLiteralParser(const char *begin, const char *end, |
188 | | SourceLocation Loc, Preprocessor &PP, |
189 | | tok::TokenKind kind); |
190 | | |
191 | 709k | bool hadError() const { return HadError; } |
192 | 2.09M | bool isAscii() const { return Kind == tok::char_constant; } |
193 | 2.09M | bool isWide() const { return Kind == tok::wide_char_constant; } |
194 | 1.41M | bool isUTF8() const { return Kind == tok::utf8_char_constant; } |
195 | 1.41M | bool isUTF16() const { return Kind == tok::utf16_char_constant; } |
196 | 1.41M | bool isUTF32() const { return Kind == tok::utf32_char_constant; } |
197 | 807k | bool isMultiChar() const { return IsMultiChar; } |
198 | 709k | uint64_t getValue() const { return Value; } |
199 | 709k | StringRef getUDSuffix() const { return UDSuffixBuf; } |
200 | 57 | unsigned getUDSuffixOffset() const { |
201 | 57 | assert(!UDSuffixBuf.empty() && "no ud-suffix"); |
202 | 57 | return UDSuffixOffset; |
203 | 57 | } |
204 | | }; |
205 | | |
206 | | /// StringLiteralParser - This decodes string escape characters and performs |
207 | | /// wide string analysis and Translation Phase #6 (concatenation of string |
208 | | /// literals) (C99 5.1.1.2p1). |
209 | | class StringLiteralParser { |
210 | | const SourceManager &SM; |
211 | | const LangOptions &Features; |
212 | | const TargetInfo &Target; |
213 | | DiagnosticsEngine *Diags; |
214 | | |
215 | | unsigned MaxTokenLength; |
216 | | unsigned SizeBound; |
217 | | unsigned CharByteWidth; |
218 | | tok::TokenKind Kind; |
219 | | SmallString<512> ResultBuf; |
220 | | char *ResultPtr; // cursor |
221 | | SmallString<32> UDSuffixBuf; |
222 | | unsigned UDSuffixToken; |
223 | | unsigned UDSuffixOffset; |
224 | | public: |
225 | | StringLiteralParser(ArrayRef<Token> StringToks, |
226 | | Preprocessor &PP, bool Complain = true); |
227 | | StringLiteralParser(ArrayRef<Token> StringToks, |
228 | | const SourceManager &sm, const LangOptions &features, |
229 | | const TargetInfo &target, |
230 | | DiagnosticsEngine *diags = nullptr) |
231 | | : SM(sm), Features(features), Target(target), Diags(diags), |
232 | | MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), |
233 | 1.12M | ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { |
234 | 1.12M | init(StringToks); |
235 | 1.12M | } |
236 | | |
237 | | |
238 | | bool hadError; |
239 | | bool Pascal; |
240 | | |
241 | 5.41M | StringRef GetString() const { |
242 | 5.41M | return StringRef(ResultBuf.data(), GetStringLength()); |
243 | 5.41M | } |
244 | 14.9M | unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); } |
245 | | |
246 | 8.39M | unsigned GetNumStringChars() const { |
247 | 8.39M | return GetStringLength() / CharByteWidth; |
248 | 8.39M | } |
249 | | /// getOffsetOfStringByte - This function returns the offset of the |
250 | | /// specified byte of the string data represented by Token. This handles |
251 | | /// advancing over escape sequences in the string. |
252 | | /// |
253 | | /// If the Diagnostics pointer is non-null, then this will do semantic |
254 | | /// checking of the string literal and emit errors and warnings. |
255 | | unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const; |
256 | | |
257 | 275k | bool isAscii() const { return Kind == tok::string_literal; } |
258 | 4.05M | bool isWide() const { return Kind == tok::wide_string_literal; } |
259 | 4.05M | bool isUTF8() const { return Kind == tok::utf8_string_literal; } |
260 | 4.05M | bool isUTF16() const { return Kind == tok::utf16_string_literal; } |
261 | 4.05M | bool isUTF32() const { return Kind == tok::utf32_string_literal; } |
262 | 4.05M | bool isPascal() const { return Pascal; } |
263 | | |
264 | 4.05M | StringRef getUDSuffix() const { return UDSuffixBuf; } |
265 | | |
266 | | /// Get the index of a token containing a ud-suffix. |
267 | 401 | unsigned getUDSuffixToken() const { |
268 | 401 | assert(!UDSuffixBuf.empty() && "no ud-suffix"); |
269 | 401 | return UDSuffixToken; |
270 | 401 | } |
271 | | /// Get the spelling offset of the first byte of the ud-suffix. |
272 | 401 | unsigned getUDSuffixOffset() const { |
273 | 401 | assert(!UDSuffixBuf.empty() && "no ud-suffix"); |
274 | 401 | return UDSuffixOffset; |
275 | 401 | } |
276 | | |
277 | | static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix); |
278 | | |
279 | | private: |
280 | | void init(ArrayRef<Token> StringToks); |
281 | | bool CopyStringFragment(const Token &Tok, const char *TokBegin, |
282 | | StringRef Fragment); |
283 | | void DiagnoseLexingError(SourceLocation Loc); |
284 | | }; |
285 | | |
286 | | } // end namespace clang |
287 | | |
288 | | #endif |