Coverage Report

Created: 2020-09-22 08:39

/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/include/clang/Lex/LiteralSupport.h
Line
Count
Source
1
//===--- LiteralSupport.h ---------------------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines the NumericLiteralParser, CharLiteralParser, and
10
// StringLiteralParser interfaces.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#ifndef LLVM_CLANG_LEX_LITERALSUPPORT_H
15
#define LLVM_CLANG_LEX_LITERALSUPPORT_H
16
17
#include "clang/Basic/CharInfo.h"
18
#include "clang/Basic/LLVM.h"
19
#include "clang/Basic/TokenKinds.h"
20
#include "llvm/ADT/APFloat.h"
21
#include "llvm/ADT/ArrayRef.h"
22
#include "llvm/ADT/SmallString.h"
23
#include "llvm/ADT/StringRef.h"
24
#include "llvm/Support/DataTypes.h"
25
26
namespace clang {
27
28
class DiagnosticsEngine;
29
class Preprocessor;
30
class Token;
31
class SourceLocation;
32
class TargetInfo;
33
class SourceManager;
34
class LangOptions;
35
36
/// Copy characters from Input to Buf, expanding any UCNs.
37
void expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input);
38
39
/// NumericLiteralParser - This performs strict semantic analysis of the content
40
/// of a ppnumber, classifying it as either integer, floating, or erroneous,
41
/// determines the radix of the value and can convert it to a useful value.
42
class NumericLiteralParser {
43
  const SourceManager &SM;
44
  const LangOptions &LangOpts;
45
  DiagnosticsEngine &Diags;
46
47
  const char *const ThisTokBegin;
48
  const char *const ThisTokEnd;
49
  const char *DigitsBegin, *SuffixBegin; // markers
50
  const char *s; // cursor
51
52
  unsigned radix;
53
54
  bool saw_exponent, saw_period, saw_ud_suffix, saw_fixed_point_suffix;
55
56
  SmallString<32> UDSuffixBuf;
57
58
public:
59
  NumericLiteralParser(StringRef TokSpelling, SourceLocation TokLoc,
60
                       const SourceManager &SM, const LangOptions &LangOpts,
61
                       const TargetInfo &Target, DiagnosticsEngine &Diags);
62
  bool hadError : 1;
63
  bool isUnsigned : 1;
64
  bool isLong : 1;          // This is *not* set for long long.
65
  bool isLongLong : 1;
66
  bool isHalf : 1;          // 1.0h
67
  bool isFloat : 1;         // 1.0f
68
  bool isImaginary : 1;     // 1.0i
69
  bool isFloat16 : 1;       // 1.0f16
70
  bool isFloat128 : 1;      // 1.0q
71
  uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64.
72
73
  bool isFract : 1;         // 1.0hr/r/lr/uhr/ur/ulr
74
  bool isAccum : 1;         // 1.0hk/k/lk/uhk/uk/ulk
75
76
21.8M
  bool isFixedPointLiteral() const {
77
21.8M
    return (saw_period || 
saw_exponent21.7M
) &&
saw_fixed_point_suffix133k
;
78
21.8M
  }
79
80
8.47M
  bool isIntegerLiteral() const {
81
8.47M
    return !saw_period && 
!saw_exponent8.47M
&&
!isFixedPointLiteral()8.47M
;
82
8.47M
  }
83
17.0M
  bool isFloatingLiteral() const {
84
17.0M
    return (saw_period || 
saw_exponent16.9M
) &&
!isFixedPointLiteral()66.2k
;
85
17.0M
  }
86
87
8.51M
  bool hasUDSuffix() const {
88
8.51M
    return saw_ud_suffix;
89
8.51M
  }
90
202
  StringRef getUDSuffix() const {
91
202
    assert(saw_ud_suffix);
92
202
    return UDSuffixBuf;
93
202
  }
94
261
  unsigned getUDSuffixOffset() const {
95
261
    assert(saw_ud_suffix);
96
261
    return SuffixBegin - ThisTokBegin;
97
261
  }
98
99
  static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);
100
101
4.66M
  unsigned getRadix() const { return radix; }
102
103
  /// GetIntegerValue - Convert this numeric literal value to an APInt that
104
  /// matches Val's input width.  If there is an overflow (i.e., if the unsigned
105
  /// value read is larger than the APInt's bits will hold), set Val to the low
106
  /// bits of the result and return true.  Otherwise, return false.
107
  bool GetIntegerValue(llvm::APInt &Val);
108
109
  /// GetFloatValue - Convert this numeric literal to a floating value, using
110
  /// the specified APFloat fltSemantics (specifying float, double, etc).
111
  /// The optional bool isExact (passed-by-reference) has its value
112
  /// set to true if the returned APFloat can represent the number in the
113
  /// literal exactly, and false otherwise.
114
  llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result);
115
116
  /// GetFixedPointValue - Convert this numeric literal value into a
117
  /// scaled integer that represents this value. Returns true if an overflow
118
  /// occurred when calculating the integral part of the scaled integer or
119
  /// calculating the digit sequence of the exponent.
120
  bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale);
121
122
private:
123
124
  void ParseNumberStartingWithZero(SourceLocation TokLoc);
125
  void ParseDecimalOrOctalCommon(SourceLocation TokLoc);
126
127
34.3M
  static bool isDigitSeparator(char C) { return C == '\''; }
128
129
  /// Determine whether the sequence of characters [Start, End) contains
130
  /// any real digits (not digit separators).
131
1.12M
  bool containsDigits(const char *Start, const char *End) {
132
1.12M
    return Start != End && 
(1.12M
Start + 1 != End1.12M
||
!isDigitSeparator(Start[0])39.8k
);
133
1.12M
  }
134
135
  enum CheckSeparatorKind { CSK_BeforeDigits, CSK_AfterDigits };
136
137
  /// Ensure that we don't have a digit separator here.
138
  void checkSeparator(SourceLocation TokLoc, const char *Pos,
139
                      CheckSeparatorKind IsAfterDigits);
140
141
  /// SkipHexDigits - Read and skip over any hex digits, up to End.
142
  /// Return a pointer to the first non-hex digit or End.
143
1.11M
  const char *SkipHexDigits(const char *ptr) {
144
6.40M
    while (ptr != ThisTokEnd && 
(5.34M
isHexDigit(*ptr)5.34M
||
isDigitSeparator(*ptr)55.3k
))
145
5.29M
      ptr++;
146
1.11M
    return ptr;
147
1.11M
  }
148
149
  /// SkipOctalDigits - Read and skip over any octal digits, up to End.
150
  /// Return a pointer to the first non-hex digit or End.
151
487k
  const char *SkipOctalDigits(const char *ptr) {
152
502k
    while (ptr != ThisTokEnd &&
153
39.7k
           ((*ptr >= '0' && 
*ptr <= '7'26.5k
) ||
isDigitSeparator(*ptr)24.8k
))
154
15.0k
      ptr++;
155
487k
    return ptr;
156
487k
  }
157
158
  /// SkipDigits - Read and skip over any digits, up to End.
159
  /// Return a pointer to the first non-hex digit or End.
160
6.94M
  const char *SkipDigits(const char *ptr) {
161
26.8M
    while (ptr != ThisTokEnd && 
(20.5M
isDigit(*ptr)20.5M
||
isDigitSeparator(*ptr)626k
))
162
19.9M
      ptr++;
163
6.94M
    return ptr;
164
6.94M
  }
165
166
  /// SkipBinaryDigits - Read and skip over any binary digits, up to End.
167
  /// Return a pointer to the first non-binary digit or End.
168
55
  const char *SkipBinaryDigits(const char *ptr) {
169
328
    while (ptr != ThisTokEnd &&
170
280
           (*ptr == '0' || 
*ptr == '1'117
||
isDigitSeparator(*ptr)11
))
171
273
      ptr++;
172
55
    return ptr;
173
55
  }
174
175
};
176
177
/// CharLiteralParser - Perform interpretation and semantic analysis of a
178
/// character literal.
179
class CharLiteralParser {
180
  uint64_t Value;
181
  tok::TokenKind Kind;
182
  bool IsMultiChar;
183
  bool HadError;
184
  SmallString<32> UDSuffixBuf;
185
  unsigned UDSuffixOffset;
186
public:
187
  CharLiteralParser(const char *begin, const char *end,
188
                    SourceLocation Loc, Preprocessor &PP,
189
                    tok::TokenKind kind);
190
191
710k
  bool hadError() const { return HadError; }
192
2.09M
  bool isAscii() const { return Kind == tok::char_constant; }
193
2.09M
  bool isWide() const { return Kind == tok::wide_char_constant; }
194
1.41M
  bool isUTF8() const { return Kind == tok::utf8_char_constant; }
195
1.41M
  bool isUTF16() const { return Kind == tok::utf16_char_constant; }
196
1.41M
  bool isUTF32() const { return Kind == tok::utf32_char_constant; }
197
808k
  bool isMultiChar() const { return IsMultiChar; }
198
710k
  uint64_t getValue() const { return Value; }
199
710k
  StringRef getUDSuffix() const { return UDSuffixBuf; }
200
57
  unsigned getUDSuffixOffset() const {
201
57
    assert(!UDSuffixBuf.empty() && "no ud-suffix");
202
57
    return UDSuffixOffset;
203
57
  }
204
};
205
206
/// StringLiteralParser - This decodes string escape characters and performs
207
/// wide string analysis and Translation Phase #6 (concatenation of string
208
/// literals) (C99 5.1.1.2p1).
209
class StringLiteralParser {
210
  const SourceManager &SM;
211
  const LangOptions &Features;
212
  const TargetInfo &Target;
213
  DiagnosticsEngine *Diags;
214
215
  unsigned MaxTokenLength;
216
  unsigned SizeBound;
217
  unsigned CharByteWidth;
218
  tok::TokenKind Kind;
219
  SmallString<512> ResultBuf;
220
  char *ResultPtr; // cursor
221
  SmallString<32> UDSuffixBuf;
222
  unsigned UDSuffixToken;
223
  unsigned UDSuffixOffset;
224
public:
225
  StringLiteralParser(ArrayRef<Token> StringToks,
226
                      Preprocessor &PP, bool Complain = true);
227
  StringLiteralParser(ArrayRef<Token> StringToks,
228
                      const SourceManager &sm, const LangOptions &features,
229
                      const TargetInfo &target,
230
                      DiagnosticsEngine *diags = nullptr)
231
    : SM(sm), Features(features), Target(target), Diags(diags),
232
      MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
233
1.32M
      ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
234
1.32M
    init(StringToks);
235
1.32M
  }
236
237
238
  bool hadError;
239
  bool Pascal;
240
241
5.54M
  StringRef GetString() const {
242
5.54M
    return StringRef(ResultBuf.data(), GetStringLength());
243
5.54M
  }
244
15.0M
  unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); }
245
246
8.22M
  unsigned GetNumStringChars() const {
247
8.22M
    return GetStringLength() / CharByteWidth;
248
8.22M
  }
249
  /// getOffsetOfStringByte - This function returns the offset of the
250
  /// specified byte of the string data represented by Token.  This handles
251
  /// advancing over escape sequences in the string.
252
  ///
253
  /// If the Diagnostics pointer is non-null, then this will do semantic
254
  /// checking of the string literal and emit errors and warnings.
255
  unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const;
256
257
284k
  bool isAscii() const { return Kind == tok::string_literal; }
258
3.96M
  bool isWide() const { return Kind == tok::wide_string_literal; }
259
3.96M
  bool isUTF8() const { return Kind == tok::utf8_string_literal; }
260
3.96M
  bool isUTF16() const { return Kind == tok::utf16_string_literal; }
261
3.96M
  bool isUTF32() const { return Kind == tok::utf32_string_literal; }
262
3.96M
  bool isPascal() const { return Pascal; }
263
264
3.97M
  StringRef getUDSuffix() const { return UDSuffixBuf; }
265
266
  /// Get the index of a token containing a ud-suffix.
267
348
  unsigned getUDSuffixToken() const {
268
348
    assert(!UDSuffixBuf.empty() && "no ud-suffix");
269
348
    return UDSuffixToken;
270
348
  }
271
  /// Get the spelling offset of the first byte of the ud-suffix.
272
348
  unsigned getUDSuffixOffset() const {
273
348
    assert(!UDSuffixBuf.empty() && "no ud-suffix");
274
348
    return UDSuffixOffset;
275
348
  }
276
277
  static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);
278
279
private:
280
  void init(ArrayRef<Token> StringToks);
281
  bool CopyStringFragment(const Token &Tok, const char *TokBegin,
282
                          StringRef Fragment);
283
  void DiagnoseLexingError(SourceLocation Loc);
284
};
285
286
}  // end namespace clang
287
288
#endif