/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/tools/clang/lib/Lex/TokenConcatenation.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===--- TokenConcatenation.cpp - Token Concatenation Avoidance -----------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | // |
9 | | // This file implements the TokenConcatenation class. |
10 | | // |
11 | | //===----------------------------------------------------------------------===// |
12 | | |
13 | | #include "clang/Lex/TokenConcatenation.h" |
14 | | #include "clang/Basic/CharInfo.h" |
15 | | #include "clang/Lex/Preprocessor.h" |
16 | | #include "llvm/Support/ErrorHandling.h" |
17 | | using namespace clang; |
18 | | |
19 | | |
20 | | /// IsStringPrefix - Return true if Str is a string prefix. |
21 | | /// 'L', 'u', 'U', or 'u8'. Including raw versions. |
22 | 2 | static bool IsStringPrefix(StringRef Str, bool CPlusPlus11) { |
23 | 2 | |
24 | 2 | if (Str[0] == 'L' || |
25 | 2 | (0 CPlusPlus110 && (0 Str[0] == 'u'0 || Str[0] == 'U'0 || Str[0] == 'R'0 ))) { |
26 | 2 | |
27 | 2 | if (Str.size() == 1) |
28 | 2 | return true; // "L", "u", "U", and "R" |
29 | 0 | |
30 | 0 | // Check for raw flavors. Need to make sure the first character wasn't |
31 | 0 | // already R. Need CPlusPlus11 check for "LR". |
32 | 0 | if (Str[1] == 'R' && Str[0] != 'R' && Str.size() == 2 && CPlusPlus11) |
33 | 0 | return true; // "LR", "uR", "UR" |
34 | 0 | |
35 | 0 | // Check for "u8" and "u8R" |
36 | 0 | if (Str[0] == 'u' && Str[1] == '8') { |
37 | 0 | if (Str.size() == 2) return true; // "u8" |
38 | 0 | if (Str.size() == 3 && Str[2] == 'R') return true; // "u8R" |
39 | 0 | } |
40 | 0 | } |
41 | 0 | |
42 | 0 | return false; |
43 | 0 | } |
44 | | |
45 | | /// IsIdentifierStringPrefix - Return true if the spelling of the token |
46 | | /// is literally 'L', 'u', 'U', or 'u8'. Including raw versions. |
47 | 2 | bool TokenConcatenation::IsIdentifierStringPrefix(const Token &Tok) const { |
48 | 2 | const LangOptions &LangOpts = PP.getLangOpts(); |
49 | 2 | |
50 | 2 | if (!Tok.needsCleaning()) { |
51 | 2 | if (Tok.getLength() < 1 || Tok.getLength() > 3) |
52 | 0 | return false; |
53 | 2 | SourceManager &SM = PP.getSourceManager(); |
54 | 2 | const char *Ptr = SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())); |
55 | 2 | return IsStringPrefix(StringRef(Ptr, Tok.getLength()), |
56 | 2 | LangOpts.CPlusPlus11); |
57 | 2 | } |
58 | 0 | |
59 | 0 | if (Tok.getLength() < 256) { |
60 | 0 | char Buffer[256]; |
61 | 0 | const char *TokPtr = Buffer; |
62 | 0 | unsigned length = PP.getSpelling(Tok, TokPtr); |
63 | 0 | return IsStringPrefix(StringRef(TokPtr, length), LangOpts.CPlusPlus11); |
64 | 0 | } |
65 | 0 | |
66 | 0 | return IsStringPrefix(StringRef(PP.getSpelling(Tok)), LangOpts.CPlusPlus11); |
67 | 0 | } |
68 | | |
69 | 1.29k | TokenConcatenation::TokenConcatenation(const Preprocessor &pp) : PP(pp) { |
70 | 1.29k | memset(TokenInfo, 0, sizeof(TokenInfo)); |
71 | 1.29k | |
72 | 1.29k | // These tokens have custom code in AvoidConcat. |
73 | 1.29k | TokenInfo[tok::identifier ] |= aci_custom; |
74 | 1.29k | TokenInfo[tok::numeric_constant] |= aci_custom_firstchar; |
75 | 1.29k | TokenInfo[tok::period ] |= aci_custom_firstchar; |
76 | 1.29k | TokenInfo[tok::amp ] |= aci_custom_firstchar; |
77 | 1.29k | TokenInfo[tok::plus ] |= aci_custom_firstchar; |
78 | 1.29k | TokenInfo[tok::minus ] |= aci_custom_firstchar; |
79 | 1.29k | TokenInfo[tok::slash ] |= aci_custom_firstchar; |
80 | 1.29k | TokenInfo[tok::less ] |= aci_custom_firstchar; |
81 | 1.29k | TokenInfo[tok::greater ] |= aci_custom_firstchar; |
82 | 1.29k | TokenInfo[tok::pipe ] |= aci_custom_firstchar; |
83 | 1.29k | TokenInfo[tok::percent ] |= aci_custom_firstchar; |
84 | 1.29k | TokenInfo[tok::colon ] |= aci_custom_firstchar; |
85 | 1.29k | TokenInfo[tok::hash ] |= aci_custom_firstchar; |
86 | 1.29k | TokenInfo[tok::arrow ] |= aci_custom_firstchar; |
87 | 1.29k | |
88 | 1.29k | // These tokens have custom code in C++11 mode. |
89 | 1.29k | if (PP.getLangOpts().CPlusPlus11) { |
90 | 206 | TokenInfo[tok::string_literal ] |= aci_custom; |
91 | 206 | TokenInfo[tok::wide_string_literal ] |= aci_custom; |
92 | 206 | TokenInfo[tok::utf8_string_literal ] |= aci_custom; |
93 | 206 | TokenInfo[tok::utf16_string_literal] |= aci_custom; |
94 | 206 | TokenInfo[tok::utf32_string_literal] |= aci_custom; |
95 | 206 | TokenInfo[tok::char_constant ] |= aci_custom; |
96 | 206 | TokenInfo[tok::wide_char_constant ] |= aci_custom; |
97 | 206 | TokenInfo[tok::utf16_char_constant ] |= aci_custom; |
98 | 206 | TokenInfo[tok::utf32_char_constant ] |= aci_custom; |
99 | 206 | } |
100 | 1.29k | |
101 | 1.29k | // These tokens have custom code in C++17 mode. |
102 | 1.29k | if (PP.getLangOpts().CPlusPlus17) |
103 | 8 | TokenInfo[tok::utf8_char_constant] |= aci_custom; |
104 | 1.29k | |
105 | 1.29k | // These tokens have custom code in C++2a mode. |
106 | 1.29k | if (PP.getLangOpts().CPlusPlus2a) |
107 | 6 | TokenInfo[tok::lessequal ] |= aci_custom_firstchar; |
108 | 1.29k | |
109 | 1.29k | // These tokens change behavior if followed by an '='. |
110 | 1.29k | TokenInfo[tok::amp ] |= aci_avoid_equal; // &= |
111 | 1.29k | TokenInfo[tok::plus ] |= aci_avoid_equal; // += |
112 | 1.29k | TokenInfo[tok::minus ] |= aci_avoid_equal; // -= |
113 | 1.29k | TokenInfo[tok::slash ] |= aci_avoid_equal; // /= |
114 | 1.29k | TokenInfo[tok::less ] |= aci_avoid_equal; // <= |
115 | 1.29k | TokenInfo[tok::greater ] |= aci_avoid_equal; // >= |
116 | 1.29k | TokenInfo[tok::pipe ] |= aci_avoid_equal; // |= |
117 | 1.29k | TokenInfo[tok::percent ] |= aci_avoid_equal; // %= |
118 | 1.29k | TokenInfo[tok::star ] |= aci_avoid_equal; // *= |
119 | 1.29k | TokenInfo[tok::exclaim ] |= aci_avoid_equal; // != |
120 | 1.29k | TokenInfo[tok::lessless ] |= aci_avoid_equal; // <<= |
121 | 1.29k | TokenInfo[tok::greatergreater] |= aci_avoid_equal; // >>= |
122 | 1.29k | TokenInfo[tok::caret ] |= aci_avoid_equal; // ^= |
123 | 1.29k | TokenInfo[tok::equal ] |= aci_avoid_equal; // == |
124 | 1.29k | } |
125 | | |
126 | | /// GetFirstChar - Get the first character of the token \arg Tok, |
127 | | /// avoiding calls to getSpelling where possible. |
128 | 1.76k | static char GetFirstChar(const Preprocessor &PP, const Token &Tok) { |
129 | 1.76k | if (IdentifierInfo *II = Tok.getIdentifierInfo()) { |
130 | 17 | // Avoid spelling identifiers, the most common form of token. |
131 | 17 | return II->getNameStart()[0]; |
132 | 1.74k | } else if (!Tok.needsCleaning()) { |
133 | 1.74k | if (Tok.isLiteral() && Tok.getLiteralData()301 ) { |
134 | 301 | return *Tok.getLiteralData(); |
135 | 1.44k | } else { |
136 | 1.44k | SourceManager &SM = PP.getSourceManager(); |
137 | 1.44k | return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())); |
138 | 1.44k | } |
139 | 0 | } else if (Tok.getLength() < 256) { |
140 | 0 | char Buffer[256]; |
141 | 0 | const char *TokPtr = Buffer; |
142 | 0 | PP.getSpelling(Tok, TokPtr); |
143 | 0 | return TokPtr[0]; |
144 | 0 | } else { |
145 | 0 | return PP.getSpelling(Tok)[0]; |
146 | 0 | } |
147 | 1.76k | } |
148 | | |
149 | | /// AvoidConcat - If printing PrevTok immediately followed by Tok would cause |
150 | | /// the two individual tokens to be lexed as a single token, return true |
151 | | /// (which causes a space to be printed between them). This allows the output |
152 | | /// of -E mode to be lexed to the same token stream as lexing the input |
153 | | /// directly would. |
154 | | /// |
155 | | /// This code must conservatively return true if it doesn't want to be 100% |
156 | | /// accurate. This will cause the output to include extra space characters, |
157 | | /// but the resulting output won't have incorrect concatenations going on. |
158 | | /// Examples include "..", which we print with a space between, because we |
159 | | /// don't want to track enough to tell "x.." from "...". |
160 | | bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok, |
161 | | const Token &PrevTok, |
162 | 350k | const Token &Tok) const { |
163 | 350k | // Conservatively assume that every annotation token that has a printable |
164 | 350k | // form requires whitespace. |
165 | 350k | if (PrevTok.isAnnotation()) |
166 | 3 | return true; |
167 | 350k | |
168 | 350k | // First, check to see if the tokens were directly adjacent in the original |
169 | 350k | // source. If they were, it must be okay to stick them together: if there |
170 | 350k | // were an issue, the tokens would have been lexed differently. |
171 | 350k | SourceManager &SM = PP.getSourceManager(); |
172 | 350k | SourceLocation PrevSpellLoc = SM.getSpellingLoc(PrevTok.getLocation()); |
173 | 350k | SourceLocation SpellLoc = SM.getSpellingLoc(Tok.getLocation()); |
174 | 350k | if (PrevSpellLoc.getLocWithOffset(PrevTok.getLength()) == SpellLoc) |
175 | 342k | return false; |
176 | 7.90k | |
177 | 7.90k | tok::TokenKind PrevKind = PrevTok.getKind(); |
178 | 7.90k | if (!PrevTok.isAnnotation() && PrevTok.getIdentifierInfo()) |
179 | 5.29k | PrevKind = tok::identifier; // Language keyword or named operator. |
180 | 7.90k | |
181 | 7.90k | // Look up information on when we should avoid concatenation with prevtok. |
182 | 7.90k | unsigned ConcatInfo = TokenInfo[PrevKind]; |
183 | 7.90k | |
184 | 7.90k | // If prevtok never causes a problem for anything after it, return quickly. |
185 | 7.90k | if (ConcatInfo == 0) return false798 ; |
186 | 7.10k | |
187 | 7.10k | if (ConcatInfo & aci_avoid_equal) { |
188 | 380 | // If the next token is '=' or '==', avoid concatenation. |
189 | 380 | if (Tok.isOneOf(tok::equal, tok::equalequal)) |
190 | 7 | return true; |
191 | 373 | ConcatInfo &= ~aci_avoid_equal; |
192 | 373 | } |
193 | 7.10k | if (7.10k Tok.isAnnotation()7.10k ) { |
194 | 6 | // Modules annotation can show up when generated automatically for includes. |
195 | 6 | assert(Tok.isOneOf(tok::annot_module_include, tok::annot_module_begin, |
196 | 6 | tok::annot_module_end) && |
197 | 6 | "unexpected annotation in AvoidConcat"); |
198 | 6 | ConcatInfo = 0; |
199 | 6 | } |
200 | 7.10k | |
201 | 7.10k | if (ConcatInfo == 0) |
202 | 38 | return false; |
203 | 7.06k | |
204 | 7.06k | // Basic algorithm: we look at the first character of the second token, and |
205 | 7.06k | // determine whether it, if appended to the first token, would form (or |
206 | 7.06k | // would contribute) to a larger token if concatenated. |
207 | 7.06k | char FirstChar = 0; |
208 | 7.06k | if (ConcatInfo & aci_custom) { |
209 | 5.31k | // If the token does not need to know the first character, don't get it. |
210 | 5.31k | } else { |
211 | 1.74k | FirstChar = GetFirstChar(PP, Tok); |
212 | 1.74k | } |
213 | 7.06k | |
214 | 7.06k | switch (PrevKind) { |
215 | 7.06k | default: |
216 | 0 | llvm_unreachable("InitAvoidConcatTokenInfo built wrong"); |
217 | 7.06k | |
218 | 7.06k | case tok::raw_identifier: |
219 | 0 | llvm_unreachable("tok::raw_identifier in non-raw lexing mode!"); |
220 | 7.06k | |
221 | 7.06k | case tok::string_literal: |
222 | 27 | case tok::wide_string_literal: |
223 | 27 | case tok::utf8_string_literal: |
224 | 27 | case tok::utf16_string_literal: |
225 | 27 | case tok::utf32_string_literal: |
226 | 27 | case tok::char_constant: |
227 | 27 | case tok::wide_char_constant: |
228 | 27 | case tok::utf8_char_constant: |
229 | 27 | case tok::utf16_char_constant: |
230 | 27 | case tok::utf32_char_constant: |
231 | 27 | if (!PP.getLangOpts().CPlusPlus11) |
232 | 0 | return false; |
233 | 27 | |
234 | 27 | // In C++11, a string or character literal followed by an identifier is a |
235 | 27 | // single token. |
236 | 27 | if (Tok.getIdentifierInfo()) |
237 | 11 | return true; |
238 | 16 | |
239 | 16 | // A ud-suffix is an identifier. If the previous token ends with one, treat |
240 | 16 | // it as an identifier. |
241 | 16 | if (!PrevTok.hasUDSuffix()) |
242 | 15 | return false; |
243 | 1 | LLVM_FALLTHROUGH; |
244 | 5.28k | case tok::identifier: // id+id or id+number or id+L"foo". |
245 | 5.28k | // id+'.'... will not append. |
246 | 5.28k | if (Tok.is(tok::numeric_constant)) |
247 | 14 | return GetFirstChar(PP, Tok) != '.'; |
248 | 5.27k | |
249 | 5.27k | if (Tok.getIdentifierInfo() || |
250 | 5.27k | Tok.isOneOf(tok::wide_string_literal, tok::utf8_string_literal, |
251 | 5.27k | tok::utf16_string_literal, tok::utf32_string_literal, |
252 | 5.27k | tok::wide_char_constant, tok::utf8_char_constant, |
253 | 5.27k | tok::utf16_char_constant, tok::utf32_char_constant)) |
254 | 0 | return true; |
255 | 5.27k | |
256 | 5.27k | // If this isn't identifier + string, we're done. |
257 | 5.27k | if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal)) |
258 | 5.27k | return false; |
259 | 2 | |
260 | 2 | // Otherwise, this is a narrow character or string. If the *identifier* |
261 | 2 | // is a literal 'L', 'u8', 'u' or 'U', avoid pasting L "foo" -> L"foo". |
262 | 2 | return IsIdentifierStringPrefix(PrevTok); |
263 | 2 | |
264 | 1.38k | case tok::numeric_constant: |
265 | 1.38k | return isPreprocessingNumberBody(FirstChar) || |
266 | 1.38k | FirstChar == '+'1.37k || FirstChar == '-'1.37k ; |
267 | 4 | case tok::period: // ..., .*, .1234 |
268 | 4 | return (FirstChar == '.' && PrevPrevTok.is(tok::period)2 ) || |
269 | 4 | isDigit(FirstChar)2 || |
270 | 4 | (2 PP.getLangOpts().CPlusPlus2 && FirstChar == '*'0 ); |
271 | 2 | case tok::amp: // && |
272 | 0 | return FirstChar == '&'; |
273 | 4 | case tok::plus: // ++ |
274 | 4 | return FirstChar == '+'; |
275 | 285 | case tok::minus: // --, ->, ->* |
276 | 285 | return FirstChar == '-' || FirstChar == '>'284 ; |
277 | 2 | case tok::slash: //, /*, // |
278 | 0 | return FirstChar == '*' || FirstChar == '/'; |
279 | 38 | case tok::less: // <<, <<=, <:, <% |
280 | 38 | return FirstChar == '<' || FirstChar == ':' || FirstChar == '%'; |
281 | 11 | case tok::greater: // >>, >>= |
282 | 11 | return FirstChar == '>'; |
283 | 3 | case tok::pipe: // || |
284 | 3 | return FirstChar == '|'; |
285 | 2 | case tok::percent: // %>, %: |
286 | 0 | return FirstChar == '>' || FirstChar == ':'; |
287 | 8 | case tok::colon: // ::, :> |
288 | 8 | return FirstChar == '>' || |
289 | 8 | (PP.getLangOpts().CPlusPlus && FirstChar == ':'0 ); |
290 | 5 | case tok::hash: // ##, #@, %:%: |
291 | 5 | return FirstChar == '#' || FirstChar == '@' || FirstChar == '%'; |
292 | 3 | case tok::arrow: // ->* |
293 | 3 | return PP.getLangOpts().CPlusPlus && FirstChar == '*'0 ; |
294 | 4 | case tok::lessequal: // <=> (C++2a) |
295 | 4 | return PP.getLangOpts().CPlusPlus2a && FirstChar == '>'; |
296 | 7.06k | } |
297 | 7.06k | } |