Coverage Report

Created: 2021-09-21 08:58

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/include/clang/Lex/LiteralSupport.h
Line
Count
Source (jump to first uncovered line)
1
//===--- LiteralSupport.h ---------------------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines the NumericLiteralParser, CharLiteralParser, and
10
// StringLiteralParser interfaces.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#ifndef LLVM_CLANG_LEX_LITERALSUPPORT_H
15
#define LLVM_CLANG_LEX_LITERALSUPPORT_H
16
17
#include "clang/Basic/CharInfo.h"
18
#include "clang/Basic/LLVM.h"
19
#include "clang/Basic/TokenKinds.h"
20
#include "llvm/ADT/APFloat.h"
21
#include "llvm/ADT/ArrayRef.h"
22
#include "llvm/ADT/SmallString.h"
23
#include "llvm/ADT/StringRef.h"
24
#include "llvm/Support/DataTypes.h"
25
26
namespace clang {
27
28
class DiagnosticsEngine;
29
class Preprocessor;
30
class Token;
31
class SourceLocation;
32
class TargetInfo;
33
class SourceManager;
34
class LangOptions;
35
36
/// Copy characters from Input to Buf, expanding any UCNs.
37
void expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input);
38
39
/// NumericLiteralParser - This performs strict semantic analysis of the content
40
/// of a ppnumber, classifying it as either integer, floating, or erroneous,
41
/// determines the radix of the value and can convert it to a useful value.
42
class NumericLiteralParser {
43
  const SourceManager &SM;
44
  const LangOptions &LangOpts;
45
  DiagnosticsEngine &Diags;
46
47
  const char *const ThisTokBegin;
48
  const char *const ThisTokEnd;
49
  const char *DigitsBegin, *SuffixBegin; // markers
50
  const char *s; // cursor
51
52
  unsigned radix;
53
54
  bool saw_exponent, saw_period, saw_ud_suffix, saw_fixed_point_suffix;
55
56
  SmallString<32> UDSuffixBuf;
57
58
public:
59
  NumericLiteralParser(StringRef TokSpelling, SourceLocation TokLoc,
60
                       const SourceManager &SM, const LangOptions &LangOpts,
61
                       const TargetInfo &Target, DiagnosticsEngine &Diags);
62
  bool hadError : 1;
63
  bool isUnsigned : 1;
64
  bool isLong : 1;          // This is *not* set for long long.
65
  bool isLongLong : 1;
66
  bool isSizeT : 1;         // 1z, 1uz (C++2b)
67
  bool isHalf : 1;          // 1.0h
68
  bool isFloat : 1;         // 1.0f
69
  bool isImaginary : 1;     // 1.0i
70
  bool isFloat16 : 1;       // 1.0f16
71
  bool isFloat128 : 1;      // 1.0q
72
  uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64.
73
74
  bool isFract : 1;         // 1.0hr/r/lr/uhr/ur/ulr
75
  bool isAccum : 1;         // 1.0hk/k/lk/uhk/uk/ulk
76
77
16.9M
  bool isFixedPointLiteral() const {
78
16.9M
    return (saw_period || 
saw_exponent16.7M
) &&
saw_fixed_point_suffix152k
;
79
16.9M
  }
80
81
6.26M
  bool isIntegerLiteral() const {
82
6.26M
    return !saw_period && 
!saw_exponent6.26M
&&
!isFixedPointLiteral()6.26M
;
83
6.26M
  }
84
12.6M
  bool isFloatingLiteral() const {
85
12.6M
    return (saw_period || 
saw_exponent12.5M
) &&
!isFixedPointLiteral()76.1k
;
86
12.6M
  }
87
88
6.30M
  bool hasUDSuffix() const {
89
6.30M
    return saw_ud_suffix;
90
6.30M
  }
91
213
  StringRef getUDSuffix() const {
92
213
    assert(saw_ud_suffix);
93
0
    return UDSuffixBuf;
94
213
  }
95
275
  unsigned getUDSuffixOffset() const {
96
275
    assert(saw_ud_suffix);
97
0
    return SuffixBegin - ThisTokBegin;
98
275
  }
99
100
  static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);
101
102
4.16M
  unsigned getRadix() const { return radix; }
103
104
  /// GetIntegerValue - Convert this numeric literal value to an APInt that
105
  /// matches Val's input width.  If there is an overflow (i.e., if the unsigned
106
  /// value read is larger than the APInt's bits will hold), set Val to the low
107
  /// bits of the result and return true.  Otherwise, return false.
108
  bool GetIntegerValue(llvm::APInt &Val);
109
110
  /// GetFloatValue - Convert this numeric literal to a floating value, using
111
  /// the specified APFloat fltSemantics (specifying float, double, etc).
112
  /// The optional bool isExact (passed-by-reference) has its value
113
  /// set to true if the returned APFloat can represent the number in the
114
  /// literal exactly, and false otherwise.
115
  llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result);
116
117
  /// GetFixedPointValue - Convert this numeric literal value into a
118
  /// scaled integer that represents this value. Returns true if an overflow
119
  /// occurred when calculating the integral part of the scaled integer or
120
  /// calculating the digit sequence of the exponent.
121
  bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale);
122
123
private:
124
125
  void ParseNumberStartingWithZero(SourceLocation TokLoc);
126
  void ParseDecimalOrOctalCommon(SourceLocation TokLoc);
127
128
25.8M
  static bool isDigitSeparator(char C) { return C == '\''; }
129
130
  /// Determine whether the sequence of characters [Start, End) contains
131
  /// any real digits (not digit separators).
132
961k
  bool containsDigits(const char *Start, const char *End) {
133
961k
    return Start != End && 
(961k
Start + 1 != End961k
||
!isDigitSeparator(Start[0])35.3k
);
134
961k
  }
135
136
  enum CheckSeparatorKind { CSK_BeforeDigits, CSK_AfterDigits };
137
138
  /// Ensure that we don't have a digit separator here.
139
  void checkSeparator(SourceLocation TokLoc, const char *Pos,
140
                      CheckSeparatorKind IsAfterDigits);
141
142
  /// SkipHexDigits - Read and skip over any hex digits, up to End.
143
  /// Return a pointer to the first non-hex digit or End.
144
954k
  const char *SkipHexDigits(const char *ptr) {
145
5.22M
    while (ptr != ThisTokEnd && 
(4.31M
isHexDigit(*ptr)4.31M
||
isDigitSeparator(*ptr)48.0k
))
146
4.27M
      ptr++;
147
954k
    return ptr;
148
954k
  }
149
150
  /// SkipOctalDigits - Read and skip over any octal digits, up to End.
151
  /// Return a pointer to the first non-hex digit or End.
152
252k
  const char *SkipOctalDigits(const char *ptr) {
153
261k
    while (ptr != ThisTokEnd &&
154
261k
           
(32.2k
(32.2k
*ptr >= '0'32.2k
&&
*ptr <= '7'17.0k
) ||
isDigitSeparator(*ptr)24.0k
))
155
8.16k
      ptr++;
156
252k
    return ptr;
157
252k
  }
158
159
  /// SkipDigits - Read and skip over any digits, up to End.
160
  /// Return a pointer to the first non-hex digit or End.
161
5.14M
  const char *SkipDigits(const char *ptr) {
162
19.9M
    while (ptr != ThisTokEnd && 
(15.2M
isDigit(*ptr)15.2M
||
isDigitSeparator(*ptr)394k
))
163
14.8M
      ptr++;
164
5.14M
    return ptr;
165
5.14M
  }
166
167
  /// SkipBinaryDigits - Read and skip over any binary digits, up to End.
168
  /// Return a pointer to the first non-binary digit or End.
169
85
  const char *SkipBinaryDigits(const char *ptr) {
170
481
    while (ptr != ThisTokEnd &&
171
481
           
(415
*ptr == '0'415
||
*ptr == '1'206
||
isDigitSeparator(*ptr)26
))
172
396
      ptr++;
173
85
    return ptr;
174
85
  }
175
176
};
177
178
/// CharLiteralParser - Perform interpretation and semantic analysis of a
179
/// character literal.
180
class CharLiteralParser {
181
  uint64_t Value;
182
  tok::TokenKind Kind;
183
  bool IsMultiChar;
184
  bool HadError;
185
  SmallString<32> UDSuffixBuf;
186
  unsigned UDSuffixOffset;
187
public:
188
  CharLiteralParser(const char *begin, const char *end,
189
                    SourceLocation Loc, Preprocessor &PP,
190
                    tok::TokenKind kind);
191
192
549k
  bool hadError() const { return HadError; }
193
1.61M
  bool isAscii() const { return Kind == tok::char_constant; }
194
1.09M
  bool isWide() const { return Kind == tok::wide_char_constant; }
195
1.09M
  bool isUTF8() const { return Kind == tok::utf8_char_constant; }
196
1.09M
  bool isUTF16() const { return Kind == tok::utf16_char_constant; }
197
1.09M
  bool isUTF32() const { return Kind == tok::utf32_char_constant; }
198
639k
  bool isMultiChar() const { return IsMultiChar; }
199
549k
  uint64_t getValue() const { return Value; }
200
549k
  StringRef getUDSuffix() const { return UDSuffixBuf; }
201
57
  unsigned getUDSuffixOffset() const {
202
57
    assert(!UDSuffixBuf.empty() && "no ud-suffix");
203
0
    return UDSuffixOffset;
204
57
  }
205
};
206
207
/// StringLiteralParser - This decodes string escape characters and performs
208
/// wide string analysis and Translation Phase #6 (concatenation of string
209
/// literals) (C99 5.1.1.2p1).
210
class StringLiteralParser {
211
  const SourceManager &SM;
212
  const LangOptions &Features;
213
  const TargetInfo &Target;
214
  DiagnosticsEngine *Diags;
215
216
  unsigned MaxTokenLength;
217
  unsigned SizeBound;
218
  unsigned CharByteWidth;
219
  tok::TokenKind Kind;
220
  SmallString<512> ResultBuf;
221
  char *ResultPtr; // cursor
222
  SmallString<32> UDSuffixBuf;
223
  unsigned UDSuffixToken;
224
  unsigned UDSuffixOffset;
225
public:
226
  StringLiteralParser(ArrayRef<Token> StringToks,
227
                      Preprocessor &PP, bool Complain = true);
228
  StringLiteralParser(ArrayRef<Token> StringToks,
229
                      const SourceManager &sm, const LangOptions &features,
230
                      const TargetInfo &target,
231
                      DiagnosticsEngine *diags = nullptr)
232
    : SM(sm), Features(features), Target(target), Diags(diags),
233
      MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
234
1.43M
      ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
235
1.43M
    init(StringToks);
236
1.43M
  }
237
238
239
  bool hadError;
240
  bool Pascal;
241
242
5.62M
  StringRef GetString() const {
243
5.62M
    return StringRef(ResultBuf.data(), GetStringLength());
244
5.62M
  }
245
15.1M
  unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); }
246
247
8.06M
  unsigned GetNumStringChars() const {
248
8.06M
    return GetStringLength() / CharByteWidth;
249
8.06M
  }
250
  /// getOffsetOfStringByte - This function returns the offset of the
251
  /// specified byte of the string data represented by Token.  This handles
252
  /// advancing over escape sequences in the string.
253
  ///
254
  /// If the Diagnostics pointer is non-null, then this will do semantic
255
  /// checking of the string literal and emit errors and warnings.
256
  unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const;
257
258
408k
  bool isAscii() const { return Kind == tok::string_literal; }
259
3.82M
  bool isWide() const { return Kind == tok::wide_string_literal; }
260
3.82M
  bool isUTF8() const { return Kind == tok::utf8_string_literal; }
261
3.82M
  bool isUTF16() const { return Kind == tok::utf16_string_literal; }
262
3.82M
  bool isUTF32() const { return Kind == tok::utf32_string_literal; }
263
3.82M
  bool isPascal() const { return Pascal; }
264
265
3.82M
  StringRef getUDSuffix() const { return UDSuffixBuf; }
266
267
  /// Get the index of a token containing a ud-suffix.
268
448
  unsigned getUDSuffixToken() const {
269
448
    assert(!UDSuffixBuf.empty() && "no ud-suffix");
270
0
    return UDSuffixToken;
271
448
  }
272
  /// Get the spelling offset of the first byte of the ud-suffix.
273
448
  unsigned getUDSuffixOffset() const {
274
448
    assert(!UDSuffixBuf.empty() && "no ud-suffix");
275
0
    return UDSuffixOffset;
276
448
  }
277
278
  static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);
279
280
private:
281
  void init(ArrayRef<Token> StringToks);
282
  bool CopyStringFragment(const Token &Tok, const char *TokBegin,
283
                          StringRef Fragment);
284
  void DiagnoseLexingError(SourceLocation Loc);
285
};
286
287
}  // end namespace clang
288
289
#endif