/Users/buildslave/jenkins/workspace/coverage/llvm-project/libcxx/src/regex.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | //===----------------------------------------------------------------------===// |
2 | | // |
3 | | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | | // See https://llvm.org/LICENSE.txt for license information. |
5 | | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | | // |
7 | | //===----------------------------------------------------------------------===// |
8 | | |
9 | | #include <algorithm> |
10 | | #include <iterator> |
11 | | #include <regex> |
12 | | |
13 | | _LIBCPP_BEGIN_NAMESPACE_STD |
14 | | |
15 | | static |
16 | | const char* |
17 | | make_error_type_string(regex_constants::error_type ecode) |
18 | 0 | { |
19 | 0 | switch (ecode) |
20 | 0 | { |
21 | 0 | case regex_constants::error_collate: |
22 | 0 | return "The expression contained an invalid collating element name."; |
23 | 0 | case regex_constants::error_ctype: |
24 | 0 | return "The expression contained an invalid character class name."; |
25 | 0 | case regex_constants::error_escape: |
26 | 0 | return "The expression contained an invalid escaped character, or a " |
27 | 0 | "trailing escape."; |
28 | 0 | case regex_constants::error_backref: |
29 | 0 | return "The expression contained an invalid back reference."; |
30 | 0 | case regex_constants::error_brack: |
31 | 0 | return "The expression contained mismatched [ and ]."; |
32 | 0 | case regex_constants::error_paren: |
33 | 0 | return "The expression contained mismatched ( and )."; |
34 | 0 | case regex_constants::error_brace: |
35 | 0 | return "The expression contained mismatched { and }."; |
36 | 0 | case regex_constants::error_badbrace: |
37 | 0 | return "The expression contained an invalid range in a {} expression."; |
38 | 0 | case regex_constants::error_range: |
39 | 0 | return "The expression contained an invalid character range, " |
40 | 0 | "such as [b-a] in most encodings."; |
41 | 0 | case regex_constants::error_space: |
42 | 0 | return "There was insufficient memory to convert the expression into " |
43 | 0 | "a finite state machine."; |
44 | 0 | case regex_constants::error_badrepeat: |
45 | 0 | return "One of *?+{ was not preceded by a valid regular expression."; |
46 | 0 | case regex_constants::error_complexity: |
47 | 0 | return "The complexity of an attempted match against a regular " |
48 | 0 | "expression exceeded a pre-set level."; |
49 | 0 | case regex_constants::error_stack: |
50 | 0 | return "There was insufficient memory to determine whether the regular " |
51 | 0 | "expression could match the specified character sequence."; |
52 | 0 | case regex_constants::__re_err_grammar: |
53 | 0 | return "An invalid regex grammar has been requested."; |
54 | 0 | case regex_constants::__re_err_empty: |
55 | 0 | return "An empty regex is not allowed in the POSIX grammar."; |
56 | 0 | case regex_constants::__re_err_parse: |
57 | 0 | return "The parser did not consume the entire regular expression."; |
58 | 0 | default: |
59 | 0 | break; |
60 | 0 | } |
61 | 0 | return "Unknown error type"; |
62 | 0 | } |
63 | | |
64 | | regex_error::regex_error(regex_constants::error_type ecode) |
65 | | : runtime_error(make_error_type_string(ecode)), |
66 | | __code_(ecode) |
67 | 0 | {} |
68 | | |
69 | 0 | regex_error::~regex_error() throw() {} |
70 | | |
71 | | namespace { |
72 | | |
73 | | struct collationnames |
74 | | { |
75 | | const char* elem_; |
76 | | char char_; |
77 | | }; |
78 | | |
79 | | #if defined(__MVS__) && !defined(__NATIVE_ASCII_F) |
80 | | // EBCDIC IBM-1047 |
81 | | // Sorted via the EBCDIC collating sequence |
82 | | const collationnames collatenames[] = |
83 | | { |
84 | | {"a", 0x81}, |
85 | | {"alert", 0x2f}, |
86 | | {"ampersand", 0x50}, |
87 | | {"apostrophe", 0x7d}, |
88 | | {"asterisk", 0x5c}, |
89 | | {"b", 0x82}, |
90 | | {"backslash", 0xe0}, |
91 | | {"backspace", 0x16}, |
92 | | {"c", 0x83}, |
93 | | {"carriage-return", 0xd}, |
94 | | {"circumflex", 0x5f}, |
95 | | {"circumflex-accent", 0x5f}, |
96 | | {"colon", 0x7a}, |
97 | | {"comma", 0x6b}, |
98 | | {"commercial-at", 0x7c}, |
99 | | {"d", 0x84}, |
100 | | {"dollar-sign", 0x5b}, |
101 | | {"e", 0x85}, |
102 | | {"eight", 0xf8}, |
103 | | {"equals-sign", 0x7e}, |
104 | | {"exclamation-mark", 0x5a}, |
105 | | {"f", 0x86}, |
106 | | {"five", 0xf5}, |
107 | | {"form-feed", 0xc}, |
108 | | {"four", 0xf4}, |
109 | | {"full-stop", 0x4b}, |
110 | | {"g", 0x87}, |
111 | | {"grave-accent", 0x79}, |
112 | | {"greater-than-sign", 0x6e}, |
113 | | {"h", 0x88}, |
114 | | {"hyphen", 0x60}, |
115 | | {"hyphen-minus", 0x60}, |
116 | | {"i", 0x89}, |
117 | | {"j", 0x91}, |
118 | | {"k", 0x92}, |
119 | | {"l", 0x93}, |
120 | | {"left-brace", 0xc0}, |
121 | | {"left-curly-bracket", 0xc0}, |
122 | | {"left-parenthesis", 0x4d}, |
123 | | {"left-square-bracket", 0xad}, |
124 | | {"less-than-sign", 0x4c}, |
125 | | {"low-line", 0x6d}, |
126 | | {"m", 0x94}, |
127 | | {"n", 0x95}, |
128 | | {"newline", 0x15}, |
129 | | {"nine", 0xf9}, |
130 | | {"number-sign", 0x7b}, |
131 | | {"o", 0x96}, |
132 | | {"one", 0xf1}, |
133 | | {"p", 0x97}, |
134 | | {"percent-sign", 0x6c}, |
135 | | {"period", 0x4b}, |
136 | | {"plus-sign", 0x4e}, |
137 | | {"q", 0x98}, |
138 | | {"question-mark", 0x6f}, |
139 | | {"quotation-mark", 0x7f}, |
140 | | {"r", 0x99}, |
141 | | {"reverse-solidus", 0xe0}, |
142 | | {"right-brace", 0xd0}, |
143 | | {"right-curly-bracket", 0xd0}, |
144 | | {"right-parenthesis", 0x5d}, |
145 | | {"right-square-bracket", 0xbd}, |
146 | | {"s", 0xa2}, |
147 | | {"semicolon", 0x5e}, |
148 | | {"seven", 0xf7}, |
149 | | {"six", 0xf6}, |
150 | | {"slash", 0x61}, |
151 | | {"solidus", 0x61}, |
152 | | {"space", 0x40}, |
153 | | {"t", 0xa3}, |
154 | | {"tab", 0x5}, |
155 | | {"three", 0xf3}, |
156 | | {"tilde", 0xa1}, |
157 | | {"two", 0xf2}, |
158 | | {"u", 0xa4}, |
159 | | {"underscore", 0x6d}, |
160 | | {"v", 0xa5}, |
161 | | {"vertical-line", 0x4f}, |
162 | | {"vertical-tab", 0xb}, |
163 | | {"w", 0xa6}, |
164 | | {"x", 0xa7}, |
165 | | {"y", 0xa8}, |
166 | | {"z", 0xa9}, |
167 | | {"zero", 0xf0}, |
168 | | {"A", 0xc1}, |
169 | | {"B", 0xc2}, |
170 | | {"C", 0xc3}, |
171 | | {"D", 0xc4}, |
172 | | {"E", 0xc5}, |
173 | | {"F", 0xc6}, |
174 | | {"G", 0xc7}, |
175 | | {"H", 0xc8}, |
176 | | {"I", 0xc9}, |
177 | | {"J", 0xd1}, |
178 | | {"K", 0xd2}, |
179 | | {"L", 0xd3}, |
180 | | {"M", 0xd4}, |
181 | | {"N", 0xd5}, |
182 | | {"NUL", 0}, |
183 | | {"O", 0xd6}, |
184 | | {"P", 0xd7}, |
185 | | {"Q", 0xd8}, |
186 | | {"R", 0xd9}, |
187 | | {"S", 0xe2}, |
188 | | {"T", 0xe3}, |
189 | | {"U", 0xe4}, |
190 | | {"V", 0xe5}, |
191 | | {"W", 0xe6}, |
192 | | {"X", 0xe7}, |
193 | | {"Y", 0xe8}, |
194 | | {"Z", 0xe9} |
195 | | }; |
196 | | #else |
197 | | // ASCII |
198 | | const collationnames collatenames[] = |
199 | | { |
200 | | {"A", 0x41}, |
201 | | {"B", 0x42}, |
202 | | {"C", 0x43}, |
203 | | {"D", 0x44}, |
204 | | {"E", 0x45}, |
205 | | {"F", 0x46}, |
206 | | {"G", 0x47}, |
207 | | {"H", 0x48}, |
208 | | {"I", 0x49}, |
209 | | {"J", 0x4a}, |
210 | | {"K", 0x4b}, |
211 | | {"L", 0x4c}, |
212 | | {"M", 0x4d}, |
213 | | {"N", 0x4e}, |
214 | | {"NUL", 0x00}, |
215 | | {"O", 0x4f}, |
216 | | {"P", 0x50}, |
217 | | {"Q", 0x51}, |
218 | | {"R", 0x52}, |
219 | | {"S", 0x53}, |
220 | | {"T", 0x54}, |
221 | | {"U", 0x55}, |
222 | | {"V", 0x56}, |
223 | | {"W", 0x57}, |
224 | | {"X", 0x58}, |
225 | | {"Y", 0x59}, |
226 | | {"Z", 0x5a}, |
227 | | {"a", 0x61}, |
228 | | {"alert", 0x07}, |
229 | | {"ampersand", 0x26}, |
230 | | {"apostrophe", 0x27}, |
231 | | {"asterisk", 0x2a}, |
232 | | {"b", 0x62}, |
233 | | {"backslash", 0x5c}, |
234 | | {"backspace", 0x08}, |
235 | | {"c", 0x63}, |
236 | | {"carriage-return", 0x0d}, |
237 | | {"circumflex", 0x5e}, |
238 | | {"circumflex-accent", 0x5e}, |
239 | | {"colon", 0x3a}, |
240 | | {"comma", 0x2c}, |
241 | | {"commercial-at", 0x40}, |
242 | | {"d", 0x64}, |
243 | | {"dollar-sign", 0x24}, |
244 | | {"e", 0x65}, |
245 | | {"eight", 0x38}, |
246 | | {"equals-sign", 0x3d}, |
247 | | {"exclamation-mark", 0x21}, |
248 | | {"f", 0x66}, |
249 | | {"five", 0x35}, |
250 | | {"form-feed", 0x0c}, |
251 | | {"four", 0x34}, |
252 | | {"full-stop", 0x2e}, |
253 | | {"g", 0x67}, |
254 | | {"grave-accent", 0x60}, |
255 | | {"greater-than-sign", 0x3e}, |
256 | | {"h", 0x68}, |
257 | | {"hyphen", 0x2d}, |
258 | | {"hyphen-minus", 0x2d}, |
259 | | {"i", 0x69}, |
260 | | {"j", 0x6a}, |
261 | | {"k", 0x6b}, |
262 | | {"l", 0x6c}, |
263 | | {"left-brace", 0x7b}, |
264 | | {"left-curly-bracket", 0x7b}, |
265 | | {"left-parenthesis", 0x28}, |
266 | | {"left-square-bracket", 0x5b}, |
267 | | {"less-than-sign", 0x3c}, |
268 | | {"low-line", 0x5f}, |
269 | | {"m", 0x6d}, |
270 | | {"n", 0x6e}, |
271 | | {"newline", 0x0a}, |
272 | | {"nine", 0x39}, |
273 | | {"number-sign", 0x23}, |
274 | | {"o", 0x6f}, |
275 | | {"one", 0x31}, |
276 | | {"p", 0x70}, |
277 | | {"percent-sign", 0x25}, |
278 | | {"period", 0x2e}, |
279 | | {"plus-sign", 0x2b}, |
280 | | {"q", 0x71}, |
281 | | {"question-mark", 0x3f}, |
282 | | {"quotation-mark", 0x22}, |
283 | | {"r", 0x72}, |
284 | | {"reverse-solidus", 0x5c}, |
285 | | {"right-brace", 0x7d}, |
286 | | {"right-curly-bracket", 0x7d}, |
287 | | {"right-parenthesis", 0x29}, |
288 | | {"right-square-bracket", 0x5d}, |
289 | | {"s", 0x73}, |
290 | | {"semicolon", 0x3b}, |
291 | | {"seven", 0x37}, |
292 | | {"six", 0x36}, |
293 | | {"slash", 0x2f}, |
294 | | {"solidus", 0x2f}, |
295 | | {"space", 0x20}, |
296 | | {"t", 0x74}, |
297 | | {"tab", 0x09}, |
298 | | {"three", 0x33}, |
299 | | {"tilde", 0x7e}, |
300 | | {"two", 0x32}, |
301 | | {"u", 0x75}, |
302 | | {"underscore", 0x5f}, |
303 | | {"v", 0x76}, |
304 | | {"vertical-line", 0x7c}, |
305 | | {"vertical-tab", 0x0b}, |
306 | | {"w", 0x77}, |
307 | | {"x", 0x78}, |
308 | | {"y", 0x79}, |
309 | | {"z", 0x7a}, |
310 | | {"zero", 0x30} |
311 | | }; |
312 | | #endif |
313 | | |
314 | | struct classnames |
315 | | { |
316 | | const char* elem_; |
317 | | regex_traits<char>::char_class_type mask_; |
318 | | }; |
319 | | |
320 | | const classnames ClassNames[] = |
321 | | { |
322 | | {"alnum", ctype_base::alnum}, |
323 | | {"alpha", ctype_base::alpha}, |
324 | | {"blank", ctype_base::blank}, |
325 | | {"cntrl", ctype_base::cntrl}, |
326 | | {"d", ctype_base::digit}, |
327 | | {"digit", ctype_base::digit}, |
328 | | {"graph", ctype_base::graph}, |
329 | | {"lower", ctype_base::lower}, |
330 | | {"print", ctype_base::print}, |
331 | | {"punct", ctype_base::punct}, |
332 | | {"s", ctype_base::space}, |
333 | | {"space", ctype_base::space}, |
334 | | {"upper", ctype_base::upper}, |
335 | | {"w", regex_traits<char>::__regex_word}, |
336 | | {"xdigit", ctype_base::xdigit} |
337 | | }; |
338 | | |
339 | | struct use_strcmp |
340 | | { |
341 | | bool operator()(const collationnames& x, const char* y) |
342 | 0 | {return strcmp(x.elem_, y) < 0;} |
343 | | bool operator()(const classnames& x, const char* y) |
344 | 0 | {return strcmp(x.elem_, y) < 0;} |
345 | | }; |
346 | | |
347 | | } |
348 | | |
349 | | string |
350 | | __get_collation_name(const char* s) |
351 | 0 | { |
352 | 0 | const collationnames* i = |
353 | 0 | _VSTD::lower_bound(begin(collatenames), end(collatenames), s, use_strcmp()); |
354 | 0 | string r; |
355 | 0 | if (i != end(collatenames) && strcmp(s, i->elem_) == 0) |
356 | 0 | r = char(i->char_); |
357 | 0 | return r; |
358 | 0 | } |
359 | | |
360 | | regex_traits<char>::char_class_type |
361 | | __get_classname(const char* s, bool __icase) |
362 | 0 | { |
363 | 0 | const classnames* i = |
364 | 0 | _VSTD::lower_bound(begin(ClassNames), end(ClassNames), s, use_strcmp()); |
365 | 0 | regex_traits<char>::char_class_type r = 0; |
366 | 0 | if (i != end(ClassNames) && strcmp(s, i->elem_) == 0) |
367 | 0 | { |
368 | 0 | r = i->mask_; |
369 | 0 | if (r == regex_traits<char>::__regex_word) |
370 | 0 | r |= ctype_base::alnum | ctype_base::upper | ctype_base::lower; |
371 | 0 | else if (__icase) |
372 | 0 | { |
373 | 0 | if (r & (ctype_base::lower | ctype_base::upper)) |
374 | 0 | r |= ctype_base::alpha; |
375 | 0 | } |
376 | 0 | } |
377 | 0 | return r; |
378 | 0 | } |
379 | | |
380 | | template <> |
381 | | void |
382 | | __match_any_but_newline<char>::__exec(__state& __s) const |
383 | 0 | { |
384 | 0 | if (__s.__current_ != __s.__last_) |
385 | 0 | { |
386 | 0 | switch (*__s.__current_) |
387 | 0 | { |
388 | 0 | case '\r': |
389 | 0 | case '\n': |
390 | 0 | __s.__do_ = __state::__reject; |
391 | 0 | __s.__node_ = nullptr; |
392 | 0 | break; |
393 | 0 | default: |
394 | 0 | __s.__do_ = __state::__accept_and_consume; |
395 | 0 | ++__s.__current_; |
396 | 0 | __s.__node_ = this->first(); |
397 | 0 | break; |
398 | 0 | } |
399 | 0 | } |
400 | 0 | else |
401 | 0 | { |
402 | 0 | __s.__do_ = __state::__reject; |
403 | 0 | __s.__node_ = nullptr; |
404 | 0 | } |
405 | 0 | } |
406 | | |
407 | | template <> |
408 | | void |
409 | | __match_any_but_newline<wchar_t>::__exec(__state& __s) const |
410 | 0 | { |
411 | 0 | if (__s.__current_ != __s.__last_) |
412 | 0 | { |
413 | 0 | switch (*__s.__current_) |
414 | 0 | { |
415 | 0 | case '\r': |
416 | 0 | case '\n': |
417 | 0 | case 0x2028: |
418 | 0 | case 0x2029: |
419 | 0 | __s.__do_ = __state::__reject; |
420 | 0 | __s.__node_ = nullptr; |
421 | 0 | break; |
422 | 0 | default: |
423 | 0 | __s.__do_ = __state::__accept_and_consume; |
424 | 0 | ++__s.__current_; |
425 | 0 | __s.__node_ = this->first(); |
426 | 0 | break; |
427 | 0 | } |
428 | 0 | } |
429 | 0 | else |
430 | 0 | { |
431 | 0 | __s.__do_ = __state::__reject; |
432 | 0 | __s.__node_ = nullptr; |
433 | 0 | } |
434 | 0 | } |
435 | | |
436 | | _LIBCPP_END_NAMESPACE_STD |