/Users/buildslave/jenkins/workspace/coverage/llvm-project/clang/include/clang/Basic/CharInfo.h
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- clang/Basic/CharInfo.h - Classifying ASCII Characters --*- C++ -*-===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | #ifndef LLVM_CLANG_BASIC_CHARINFO_H |
10 | | #define LLVM_CLANG_BASIC_CHARINFO_H |
11 | | |
12 | | #include "clang/Basic/LLVM.h" |
13 | | #include "llvm/ADT/StringRef.h" |
14 | | #include "llvm/Support/Compiler.h" |
15 | | #include "llvm/Support/DataTypes.h" |
16 | | |
17 | | namespace clang { |
18 | | namespace charinfo { |
19 | | extern const uint16_t InfoTable[256]; |
20 | | |
21 | | enum { |
22 | | CHAR_HORZ_WS = 0x0001, // '\t', '\f', '\v'. Note, no '\0' |
23 | | CHAR_VERT_WS = 0x0002, // '\r', '\n' |
24 | | CHAR_SPACE = 0x0004, // ' ' |
25 | | CHAR_DIGIT = 0x0008, // 0-9 |
26 | | CHAR_XLETTER = 0x0010, // a-f,A-F |
27 | | CHAR_UPPER = 0x0020, // A-Z |
28 | | CHAR_LOWER = 0x0040, // a-z |
29 | | CHAR_UNDER = 0x0080, // _ |
30 | | CHAR_PERIOD = 0x0100, // . |
31 | | CHAR_RAWDEL = 0x0200, // {}[]#<>%:;?*+-/^&|~!=,"' |
32 | | CHAR_PUNCT = 0x0400 // `$@() |
33 | | }; |
34 | | |
35 | | enum { |
36 | | CHAR_XUPPER = CHAR_XLETTER | CHAR_UPPER, |
37 | | CHAR_XLOWER = CHAR_XLETTER | CHAR_LOWER |
38 | | }; |
39 | | } // end namespace charinfo |
40 | | |
41 | | /// Returns true if a byte is an ASCII character. |
42 | 2.03G | LLVM_READNONE inline bool isASCII(char c) { |
43 | 2.03G | return static_cast<unsigned char>(c) <= 127; |
44 | 2.03G | } |
45 | | |
46 | 937M | LLVM_READNONE inline bool isASCII(unsigned char c) { return c <= 127; } |
47 | | |
48 | | /// Returns true if a codepoint is an ASCII character. |
49 | 121 | LLVM_READNONE inline bool isASCII(uint32_t c) { return c <= 127; } |
50 | 1.00k | LLVM_READNONE inline bool isASCII(int64_t c) { return 0 <= c && c <= 127; } |
51 | | |
52 | | /// Returns true if this is a valid first character of a C identifier, |
53 | | /// which is [a-zA-Z_]. |
54 | | LLVM_READONLY inline bool isAsciiIdentifierStart(unsigned char c, |
55 | 8.21M | bool AllowDollar = false) { |
56 | 8.21M | using namespace charinfo; |
57 | 8.21M | if (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_UNDER)) |
58 | 31.8k | return true; |
59 | 8.18M | return AllowDollar && c == '$'5 ; |
60 | 8.21M | } |
61 | | |
62 | | /// Returns true if this is a body character of a C identifier, |
63 | | /// which is [a-zA-Z0-9_]. |
64 | | LLVM_READONLY inline bool isAsciiIdentifierContinue(unsigned char c, |
65 | 8.04G | bool AllowDollar = false) { |
66 | 8.04G | using namespace charinfo; |
67 | 8.04G | if (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_DIGIT|CHAR_UNDER)) |
68 | 6.72G | return true; |
69 | 1.32G | return AllowDollar && c == '$'4.01k ; |
70 | 8.04G | } |
71 | | |
72 | | /// Returns true if this character is horizontal ASCII whitespace: |
73 | | /// ' ', '\\t', '\\f', '\\v'. |
74 | | /// |
75 | | /// Note that this returns false for '\\0'. |
76 | 3.01G | LLVM_READONLY inline bool isHorizontalWhitespace(unsigned char c) { |
77 | 3.01G | using namespace charinfo; |
78 | 3.01G | return (InfoTable[c] & (CHAR_HORZ_WS|CHAR_SPACE)) != 0; |
79 | 3.01G | } |
80 | | |
81 | | /// Returns true if this character is vertical ASCII whitespace: '\\n', '\\r'. |
82 | | /// |
83 | | /// Note that this returns false for '\\0'. |
84 | 2.32G | LLVM_READONLY inline bool isVerticalWhitespace(unsigned char c) { |
85 | 2.32G | using namespace charinfo; |
86 | 2.32G | return (InfoTable[c] & CHAR_VERT_WS) != 0; |
87 | 2.32G | } |
88 | | |
89 | | /// Return true if this character is horizontal or vertical ASCII whitespace: |
90 | | /// ' ', '\\t', '\\f', '\\v', '\\n', '\\r'. |
91 | | /// |
92 | | /// Note that this returns false for '\\0'. |
93 | 67.4M | LLVM_READONLY inline bool isWhitespace(unsigned char c) { |
94 | 67.4M | using namespace charinfo; |
95 | 67.4M | return (InfoTable[c] & (CHAR_HORZ_WS|CHAR_VERT_WS|CHAR_SPACE)) != 0; |
96 | 67.4M | } |
97 | | |
98 | | /// Return true if this character is an ASCII digit: [0-9] |
99 | 41.0M | LLVM_READONLY inline bool isDigit(unsigned char c) { |
100 | 41.0M | using namespace charinfo; |
101 | 41.0M | return (InfoTable[c] & CHAR_DIGIT) != 0; |
102 | 41.0M | } |
103 | | |
104 | | /// Return true if this character is a lowercase ASCII letter: [a-z] |
105 | 457k | LLVM_READONLY inline bool isLowercase(unsigned char c) { |
106 | 457k | using namespace charinfo; |
107 | 457k | return (InfoTable[c] & CHAR_LOWER) != 0; |
108 | 457k | } |
109 | | |
110 | | /// Return true if this character is an uppercase ASCII letter: [A-Z] |
111 | 1.06k | LLVM_READONLY inline bool isUppercase(unsigned char c) { |
112 | 1.06k | using namespace charinfo; |
113 | 1.06k | return (InfoTable[c] & CHAR_UPPER) != 0; |
114 | 1.06k | } |
115 | | |
116 | | /// Return true if this character is an ASCII letter: [a-zA-Z] |
117 | 3.51M | LLVM_READONLY inline bool isLetter(unsigned char c) { |
118 | 3.51M | using namespace charinfo; |
119 | 3.51M | return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER)) != 0; |
120 | 3.51M | } |
121 | | |
122 | | /// Return true if this character is an ASCII letter or digit: [a-zA-Z0-9] |
123 | 5.51M | LLVM_READONLY inline bool isAlphanumeric(unsigned char c) { |
124 | 5.51M | using namespace charinfo; |
125 | 5.51M | return (InfoTable[c] & (CHAR_DIGIT|CHAR_UPPER|CHAR_LOWER)) != 0; |
126 | 5.51M | } |
127 | | |
128 | | /// Return true if this character is an ASCII hex digit: [0-9a-fA-F] |
129 | 5.59M | LLVM_READONLY inline bool isHexDigit(unsigned char c) { |
130 | 5.59M | using namespace charinfo; |
131 | 5.59M | return (InfoTable[c] & (CHAR_DIGIT|CHAR_XLETTER)) != 0; |
132 | 5.59M | } |
133 | | |
134 | | /// Return true if this character is an ASCII punctuation character. |
135 | | /// |
136 | | /// Note that '_' is both a punctuation character and an identifier character! |
137 | 665k | LLVM_READONLY inline bool isPunctuation(unsigned char c) { |
138 | 665k | using namespace charinfo; |
139 | 665k | return (InfoTable[c] & (CHAR_UNDER|CHAR_PERIOD|CHAR_RAWDEL|CHAR_PUNCT)) != 0; |
140 | 665k | } |
141 | | |
142 | | /// Return true if this character is an ASCII printable character; that is, a |
143 | | /// character that should take exactly one column to print in a fixed-width |
144 | | /// terminal. |
145 | 3.41M | LLVM_READONLY inline bool isPrintable(unsigned char c) { |
146 | 3.41M | using namespace charinfo; |
147 | 3.41M | return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD|CHAR_PUNCT| |
148 | 3.41M | CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL|CHAR_SPACE)) != 0; |
149 | 3.41M | } |
150 | | |
151 | | /// Return true if this is the body character of a C preprocessing number, |
152 | | /// which is [a-zA-Z0-9_.]. |
153 | 219M | LLVM_READONLY inline bool isPreprocessingNumberBody(unsigned char c) { |
154 | 219M | using namespace charinfo; |
155 | 219M | return (InfoTable[c] & |
156 | 219M | (CHAR_UPPER|CHAR_LOWER|CHAR_DIGIT|CHAR_UNDER|CHAR_PERIOD)) != 0; |
157 | 219M | } |
158 | | |
159 | | /// Return true if this is the body character of a C++ raw string delimiter. |
160 | 1.81k | LLVM_READONLY inline bool isRawStringDelimBody(unsigned char c) { |
161 | 1.81k | using namespace charinfo; |
162 | 1.81k | return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD| |
163 | 1.81k | CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL)) != 0; |
164 | 1.81k | } |
165 | | |
166 | | enum class EscapeChar { |
167 | | Single = 1, |
168 | | Double = 2, |
169 | | SingleAndDouble = static_cast<int>(Single) | static_cast<int>(Double), |
170 | | }; |
171 | | |
172 | | /// Return C-style escaped string for special characters, or an empty string if |
173 | | /// there is no such mapping. |
174 | | template <EscapeChar Opt, class CharT> |
175 | 42.7k | LLVM_READONLY inline auto escapeCStyle(CharT Ch) -> StringRef { |
176 | 42.7k | switch (Ch) { |
177 | 59 | case '\\': |
178 | 59 | return "\\\\"; |
179 | 32 | case '\'': |
180 | 32 | if ((static_cast<int>(Opt) & static_cast<int>(EscapeChar::Single)) == 0) |
181 | 6 | break; |
182 | 26 | return "\\'"; |
183 | 38 | case '"': |
184 | 38 | if ((static_cast<int>(Opt) & static_cast<int>(EscapeChar::Double)) == 0) |
185 | 9 | break; |
186 | 29 | return "\\\""; |
187 | 32 | case '\a': |
188 | 32 | return "\\a"; |
189 | 32 | case '\b': |
190 | 32 | return "\\b"; |
191 | 16 | case '\f': |
192 | 16 | return "\\f"; |
193 | 24 | case '\n': |
194 | 24 | return "\\n"; |
195 | 22 | case '\r': |
196 | 22 | return "\\r"; |
197 | 32 | case '\t': |
198 | 32 | return "\\t"; |
199 | 16 | case '\v': |
200 | 16 | return "\\v"; |
201 | 42.7k | } |
202 | 42.4k | return {}; |
203 | 42.7k | } llvm::StringRef clang::escapeCStyle<(clang::EscapeChar)3, unsigned char>(unsigned char) Line | Count | Source | 175 | 998 | LLVM_READONLY inline auto escapeCStyle(CharT Ch) -> StringRef { | 176 | 998 | switch (Ch) { | 177 | 7 | case '\\': | 178 | 7 | return "\\\\"; | 179 | 5 | case '\'': | 180 | 5 | if ((static_cast<int>(Opt) & static_cast<int>(EscapeChar::Single)) == 0) | 181 | 0 | break; | 182 | 5 | return "\\'"; | 183 | 5 | case '"': | 184 | 5 | if ((static_cast<int>(Opt) & static_cast<int>(EscapeChar::Double)) == 0) | 185 | 0 | break; | 186 | 5 | return "\\\""; | 187 | 2 | case '\a': | 188 | 2 | return "\\a"; | 189 | 2 | case '\b': | 190 | 2 | return "\\b"; | 191 | 2 | case '\f': | 192 | 2 | return "\\f"; | 193 | 5 | case '\n': | 194 | 5 | return "\\n"; | 195 | 5 | case '\r': | 196 | 5 | return "\\r"; | 197 | 2 | case '\t': | 198 | 2 | return "\\t"; | 199 | 2 | case '\v': | 200 | 2 | return "\\v"; | 201 | 998 | } | 202 | 961 | return {}; | 203 | 998 | } |
llvm::StringRef clang::escapeCStyle<(clang::EscapeChar)1, unsigned int>(unsigned int) Line | Count | Source | 175 | 653 | LLVM_READONLY inline auto escapeCStyle(CharT Ch) -> StringRef { | 176 | 653 | switch (Ch) { | 177 | 19 | case '\\': | 178 | 19 | return "\\\\"; | 179 | 21 | case '\'': | 180 | 21 | if ((static_cast<int>(Opt) & static_cast<int>(EscapeChar::Single)) == 0) | 181 | 0 | break; | 182 | 21 | return "\\'"; | 183 | 9 | case '"': | 184 | 9 | if ((static_cast<int>(Opt) & static_cast<int>(EscapeChar::Double)) == 0) | 185 | 9 | break; | 186 | 0 | return "\\\""; | 187 | 10 | case '\a': | 188 | 10 | return "\\a"; | 189 | 10 | case '\b': | 190 | 10 | return "\\b"; | 191 | 10 | case '\f': | 192 | 10 | return "\\f"; | 193 | 10 | case '\n': | 194 | 10 | return "\\n"; | 195 | 10 | case '\r': | 196 | 10 | return "\\r"; | 197 | 10 | case '\t': | 198 | 10 | return "\\t"; | 199 | 10 | case '\v': | 200 | 10 | return "\\v"; | 201 | 653 | } | 202 | 543 | return {}; | 203 | 653 | } |
llvm::StringRef clang::escapeCStyle<(clang::EscapeChar)2, unsigned int>(unsigned int) Line | Count | Source | 175 | 41.0k | LLVM_READONLY inline auto escapeCStyle(CharT Ch) -> StringRef { | 176 | 41.0k | switch (Ch) { | 177 | 33 | case '\\': | 178 | 33 | return "\\\\"; | 179 | 6 | case '\'': | 180 | 6 | if ((static_cast<int>(Opt) & static_cast<int>(EscapeChar::Single)) == 0) | 181 | 6 | break; | 182 | 0 | return "\\'"; | 183 | 24 | case '"': | 184 | 24 | if ((static_cast<int>(Opt) & static_cast<int>(EscapeChar::Double)) == 0) | 185 | 0 | break; | 186 | 24 | return "\\\""; | 187 | 20 | case '\a': | 188 | 20 | return "\\a"; | 189 | 20 | case '\b': | 190 | 20 | return "\\b"; | 191 | 4 | case '\f': | 192 | 4 | return "\\f"; | 193 | 9 | case '\n': | 194 | 9 | return "\\n"; | 195 | 7 | case '\r': | 196 | 7 | return "\\r"; | 197 | 20 | case '\t': | 198 | 20 | return "\\t"; | 199 | 4 | case '\v': | 200 | 4 | return "\\v"; | 201 | 41.0k | } | 202 | 40.9k | return {}; | 203 | 41.0k | } |
|
204 | | |
205 | | /// Converts the given ASCII character to its lowercase equivalent. |
206 | | /// |
207 | | /// If the character is not an uppercase character, it is returned as is. |
208 | 892 | LLVM_READONLY inline char toLowercase(char c) { |
209 | 892 | if (isUppercase(c)) |
210 | 247 | return c + 'a' - 'A'; |
211 | 645 | return c; |
212 | 892 | } |
213 | | |
214 | | /// Converts the given ASCII character to its uppercase equivalent. |
215 | | /// |
216 | | /// If the character is not a lowercase character, it is returned as is. |
217 | 360k | LLVM_READONLY inline char toUppercase(char c) { |
218 | 360k | if (isLowercase(c)) |
219 | 346k | return c + 'A' - 'a'; |
220 | 14.2k | return c; |
221 | 360k | } |
222 | | |
223 | | |
224 | | /// Return true if this is a valid ASCII identifier. |
225 | | /// |
226 | | /// Note that this is a very simple check; it does not accept UCNs as valid |
227 | | /// identifier characters. |
228 | | LLVM_READONLY inline bool isValidAsciiIdentifier(StringRef S, |
229 | 24.6k | bool AllowDollar = false) { |
230 | 24.6k | if (S.empty() || !isAsciiIdentifierStart(S[0], AllowDollar)24.6k ) |
231 | 23 | return false; |
232 | | |
233 | 400k | for (StringRef::iterator I = S.begin(), E = S.end(); 24.6k I != E; ++I376k ) |
234 | 376k | if (!isAsciiIdentifierContinue(*I, AllowDollar)) |
235 | 130 | return false; |
236 | | |
237 | 24.4k | return true; |
238 | 24.6k | } |
239 | | |
240 | | } // end namespace clang |
241 | | |
242 | | #endif |