Coverage Report

Created: 2022-07-16 07:03

/Users/buildslave/jenkins/workspace/coverage/llvm-project/libcxx/src/regex.cpp
Line
Count
Source (jump to first uncovered line)
1
//===----------------------------------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include <algorithm>
10
#include <iterator>
11
#include <regex>
12
13
_LIBCPP_BEGIN_NAMESPACE_STD
14
15
static
16
const char*
17
make_error_type_string(regex_constants::error_type ecode)
18
0
{
19
0
    switch (ecode)
20
0
    {
21
0
    case regex_constants::error_collate:
22
0
        return "The expression contained an invalid collating element name.";
23
0
    case regex_constants::error_ctype:
24
0
        return "The expression contained an invalid character class name.";
25
0
    case regex_constants::error_escape:
26
0
        return "The expression contained an invalid escaped character, or a "
27
0
               "trailing escape.";
28
0
    case regex_constants::error_backref:
29
0
        return "The expression contained an invalid back reference.";
30
0
    case regex_constants::error_brack:
31
0
        return "The expression contained mismatched [ and ].";
32
0
    case regex_constants::error_paren:
33
0
        return "The expression contained mismatched ( and ).";
34
0
    case regex_constants::error_brace:
35
0
        return "The expression contained mismatched { and }.";
36
0
    case regex_constants::error_badbrace:
37
0
        return "The expression contained an invalid range in a {} expression.";
38
0
    case regex_constants::error_range:
39
0
        return "The expression contained an invalid character range, "
40
0
               "such as [b-a] in most encodings.";
41
0
    case regex_constants::error_space:
42
0
        return "There was insufficient memory to convert the expression into "
43
0
               "a finite state machine.";
44
0
    case regex_constants::error_badrepeat:
45
0
        return "One of *?+{ was not preceded by a valid regular expression.";
46
0
    case regex_constants::error_complexity:
47
0
        return "The complexity of an attempted match against a regular "
48
0
               "expression exceeded a pre-set level.";
49
0
    case regex_constants::error_stack:
50
0
        return "There was insufficient memory to determine whether the regular "
51
0
               "expression could match the specified character sequence.";
52
0
    case regex_constants::__re_err_grammar:
53
0
        return "An invalid regex grammar has been requested.";
54
0
    case regex_constants::__re_err_empty:
55
0
        return "An empty regex is not allowed in the POSIX grammar.";
56
0
    case regex_constants::__re_err_parse:
57
0
        return "The parser did not consume the entire regular expression.";
58
0
    default:
59
0
        break;
60
0
    }
61
0
    return "Unknown error type";
62
0
}
63
64
regex_error::regex_error(regex_constants::error_type ecode)
65
    : runtime_error(make_error_type_string(ecode)),
66
      __code_(ecode)
67
0
{}
68
69
0
regex_error::~regex_error() throw() {}
70
71
namespace {
72
73
struct collationnames
74
{
75
    const char* elem_;
76
    char char_;
77
};
78
79
#if defined(__MVS__) && !defined(__NATIVE_ASCII_F)
80
// EBCDIC IBM-1047
81
// Sorted via the EBCDIC collating sequence
82
const collationnames collatenames[] =
83
{
84
    {"a", 0x81},
85
    {"alert", 0x2f},
86
    {"ampersand", 0x50},
87
    {"apostrophe", 0x7d},
88
    {"asterisk", 0x5c},
89
    {"b", 0x82},
90
    {"backslash", 0xe0},
91
    {"backspace", 0x16},
92
    {"c", 0x83},
93
    {"carriage-return", 0xd},
94
    {"circumflex", 0x5f},
95
    {"circumflex-accent", 0x5f},
96
    {"colon", 0x7a},
97
    {"comma", 0x6b},
98
    {"commercial-at", 0x7c},
99
    {"d", 0x84},
100
    {"dollar-sign", 0x5b},
101
    {"e", 0x85},
102
    {"eight", 0xf8},
103
    {"equals-sign", 0x7e},
104
    {"exclamation-mark", 0x5a},
105
    {"f", 0x86},
106
    {"five", 0xf5},
107
    {"form-feed", 0xc},
108
    {"four", 0xf4},
109
    {"full-stop", 0x4b},
110
    {"g", 0x87},
111
    {"grave-accent", 0x79},
112
    {"greater-than-sign", 0x6e},
113
    {"h", 0x88},
114
    {"hyphen", 0x60},
115
    {"hyphen-minus", 0x60},
116
    {"i", 0x89},
117
    {"j", 0x91},
118
    {"k", 0x92},
119
    {"l", 0x93},
120
    {"left-brace", 0xc0},
121
    {"left-curly-bracket", 0xc0},
122
    {"left-parenthesis", 0x4d},
123
    {"left-square-bracket", 0xad},
124
    {"less-than-sign", 0x4c},
125
    {"low-line", 0x6d},
126
    {"m", 0x94},
127
    {"n", 0x95},
128
    {"newline", 0x15},
129
    {"nine", 0xf9},
130
    {"number-sign", 0x7b},
131
    {"o", 0x96},
132
    {"one", 0xf1},
133
    {"p", 0x97},
134
    {"percent-sign", 0x6c},
135
    {"period", 0x4b},
136
    {"plus-sign", 0x4e},
137
    {"q", 0x98},
138
    {"question-mark", 0x6f},
139
    {"quotation-mark", 0x7f},
140
    {"r", 0x99},
141
    {"reverse-solidus", 0xe0},
142
    {"right-brace", 0xd0},
143
    {"right-curly-bracket", 0xd0},
144
    {"right-parenthesis", 0x5d},
145
    {"right-square-bracket", 0xbd},
146
    {"s", 0xa2},
147
    {"semicolon", 0x5e},
148
    {"seven", 0xf7},
149
    {"six", 0xf6},
150
    {"slash", 0x61},
151
    {"solidus", 0x61},
152
    {"space", 0x40},
153
    {"t", 0xa3},
154
    {"tab", 0x5},
155
    {"three", 0xf3},
156
    {"tilde", 0xa1},
157
    {"two", 0xf2},
158
    {"u", 0xa4},
159
    {"underscore", 0x6d},
160
    {"v", 0xa5},
161
    {"vertical-line", 0x4f},
162
    {"vertical-tab", 0xb},
163
    {"w", 0xa6},
164
    {"x", 0xa7},
165
    {"y", 0xa8},
166
    {"z", 0xa9},
167
    {"zero", 0xf0},
168
    {"A", 0xc1},
169
    {"B", 0xc2},
170
    {"C", 0xc3},
171
    {"D", 0xc4},
172
    {"E", 0xc5},
173
    {"F", 0xc6},
174
    {"G", 0xc7},
175
    {"H", 0xc8},
176
    {"I", 0xc9},
177
    {"J", 0xd1},
178
    {"K", 0xd2},
179
    {"L", 0xd3},
180
    {"M", 0xd4},
181
    {"N", 0xd5},
182
    {"NUL", 0},
183
    {"O", 0xd6},
184
    {"P", 0xd7},
185
    {"Q", 0xd8},
186
    {"R", 0xd9},
187
    {"S", 0xe2},
188
    {"T", 0xe3},
189
    {"U", 0xe4},
190
    {"V", 0xe5},
191
    {"W", 0xe6},
192
    {"X", 0xe7},
193
    {"Y", 0xe8},
194
    {"Z", 0xe9}
195
};
196
#else
197
// ASCII
198
const collationnames collatenames[] =
199
{
200
    {"A", 0x41},
201
    {"B", 0x42},
202
    {"C", 0x43},
203
    {"D", 0x44},
204
    {"E", 0x45},
205
    {"F", 0x46},
206
    {"G", 0x47},
207
    {"H", 0x48},
208
    {"I", 0x49},
209
    {"J", 0x4a},
210
    {"K", 0x4b},
211
    {"L", 0x4c},
212
    {"M", 0x4d},
213
    {"N", 0x4e},
214
    {"NUL", 0x00},
215
    {"O", 0x4f},
216
    {"P", 0x50},
217
    {"Q", 0x51},
218
    {"R", 0x52},
219
    {"S", 0x53},
220
    {"T", 0x54},
221
    {"U", 0x55},
222
    {"V", 0x56},
223
    {"W", 0x57},
224
    {"X", 0x58},
225
    {"Y", 0x59},
226
    {"Z", 0x5a},
227
    {"a", 0x61},
228
    {"alert", 0x07},
229
    {"ampersand", 0x26},
230
    {"apostrophe", 0x27},
231
    {"asterisk", 0x2a},
232
    {"b", 0x62},
233
    {"backslash", 0x5c},
234
    {"backspace", 0x08},
235
    {"c", 0x63},
236
    {"carriage-return", 0x0d},
237
    {"circumflex", 0x5e},
238
    {"circumflex-accent", 0x5e},
239
    {"colon", 0x3a},
240
    {"comma", 0x2c},
241
    {"commercial-at", 0x40},
242
    {"d", 0x64},
243
    {"dollar-sign", 0x24},
244
    {"e", 0x65},
245
    {"eight", 0x38},
246
    {"equals-sign", 0x3d},
247
    {"exclamation-mark", 0x21},
248
    {"f", 0x66},
249
    {"five", 0x35},
250
    {"form-feed", 0x0c},
251
    {"four", 0x34},
252
    {"full-stop", 0x2e},
253
    {"g", 0x67},
254
    {"grave-accent", 0x60},
255
    {"greater-than-sign", 0x3e},
256
    {"h", 0x68},
257
    {"hyphen", 0x2d},
258
    {"hyphen-minus", 0x2d},
259
    {"i", 0x69},
260
    {"j", 0x6a},
261
    {"k", 0x6b},
262
    {"l", 0x6c},
263
    {"left-brace", 0x7b},
264
    {"left-curly-bracket", 0x7b},
265
    {"left-parenthesis", 0x28},
266
    {"left-square-bracket", 0x5b},
267
    {"less-than-sign", 0x3c},
268
    {"low-line", 0x5f},
269
    {"m", 0x6d},
270
    {"n", 0x6e},
271
    {"newline", 0x0a},
272
    {"nine", 0x39},
273
    {"number-sign", 0x23},
274
    {"o", 0x6f},
275
    {"one", 0x31},
276
    {"p", 0x70},
277
    {"percent-sign", 0x25},
278
    {"period", 0x2e},
279
    {"plus-sign", 0x2b},
280
    {"q", 0x71},
281
    {"question-mark", 0x3f},
282
    {"quotation-mark", 0x22},
283
    {"r", 0x72},
284
    {"reverse-solidus", 0x5c},
285
    {"right-brace", 0x7d},
286
    {"right-curly-bracket", 0x7d},
287
    {"right-parenthesis", 0x29},
288
    {"right-square-bracket", 0x5d},
289
    {"s", 0x73},
290
    {"semicolon", 0x3b},
291
    {"seven", 0x37},
292
    {"six", 0x36},
293
    {"slash", 0x2f},
294
    {"solidus", 0x2f},
295
    {"space", 0x20},
296
    {"t", 0x74},
297
    {"tab", 0x09},
298
    {"three", 0x33},
299
    {"tilde", 0x7e},
300
    {"two", 0x32},
301
    {"u", 0x75},
302
    {"underscore", 0x5f},
303
    {"v", 0x76},
304
    {"vertical-line", 0x7c},
305
    {"vertical-tab", 0x0b},
306
    {"w", 0x77},
307
    {"x", 0x78},
308
    {"y", 0x79},
309
    {"z", 0x7a},
310
    {"zero", 0x30}
311
};
312
#endif
313
314
struct classnames
315
{
316
    const char* elem_;
317
    regex_traits<char>::char_class_type mask_;
318
};
319
320
const classnames ClassNames[] =
321
{
322
    {"alnum",  ctype_base::alnum},
323
    {"alpha",  ctype_base::alpha},
324
    {"blank",  ctype_base::blank},
325
    {"cntrl",  ctype_base::cntrl},
326
    {"d",      ctype_base::digit},
327
    {"digit",  ctype_base::digit},
328
    {"graph",  ctype_base::graph},
329
    {"lower",  ctype_base::lower},
330
    {"print",  ctype_base::print},
331
    {"punct",  ctype_base::punct},
332
    {"s",      ctype_base::space},
333
    {"space",  ctype_base::space},
334
    {"upper",  ctype_base::upper},
335
    {"w",      regex_traits<char>::__regex_word},
336
    {"xdigit", ctype_base::xdigit}
337
};
338
339
struct use_strcmp
340
{
341
    bool operator()(const collationnames& x, const char* y)
342
0
        {return strcmp(x.elem_, y) < 0;}
343
    bool operator()(const classnames& x, const char* y)
344
0
        {return strcmp(x.elem_, y) < 0;}
345
};
346
347
}
348
349
string
350
__get_collation_name(const char* s)
351
0
{
352
0
    const collationnames* i =
353
0
            _VSTD::lower_bound(begin(collatenames), end(collatenames), s, use_strcmp());
354
0
    string r;
355
0
    if (i != end(collatenames) && strcmp(s, i->elem_) == 0)
356
0
        r = char(i->char_);
357
0
    return r;
358
0
}
359
360
regex_traits<char>::char_class_type
361
__get_classname(const char* s, bool __icase)
362
0
{
363
0
    const classnames* i =
364
0
            _VSTD::lower_bound(begin(ClassNames), end(ClassNames), s, use_strcmp());
365
0
    regex_traits<char>::char_class_type r = 0;
366
0
    if (i != end(ClassNames) && strcmp(s, i->elem_) == 0)
367
0
    {
368
0
        r = i->mask_;
369
0
        if (r == regex_traits<char>::__regex_word)
370
0
            r |= ctype_base::alnum | ctype_base::upper | ctype_base::lower;
371
0
        else if (__icase)
372
0
        {
373
0
            if (r & (ctype_base::lower | ctype_base::upper))
374
0
                r |= ctype_base::alpha;
375
0
        }
376
0
    }
377
0
    return r;
378
0
}
379
380
template <>
381
void
382
__match_any_but_newline<char>::__exec(__state& __s) const
383
0
{
384
0
    if (__s.__current_ != __s.__last_)
385
0
    {
386
0
        switch (*__s.__current_)
387
0
        {
388
0
        case '\r':
389
0
        case '\n':
390
0
            __s.__do_ = __state::__reject;
391
0
            __s.__node_ = nullptr;
392
0
            break;
393
0
        default:
394
0
            __s.__do_ = __state::__accept_and_consume;
395
0
            ++__s.__current_;
396
0
            __s.__node_ = this->first();
397
0
            break;
398
0
        }
399
0
    }
400
0
    else
401
0
    {
402
0
        __s.__do_ = __state::__reject;
403
0
        __s.__node_ = nullptr;
404
0
    }
405
0
}
406
407
template <>
408
void
409
__match_any_but_newline<wchar_t>::__exec(__state& __s) const
410
0
{
411
0
    if (__s.__current_ != __s.__last_)
412
0
    {
413
0
        switch (*__s.__current_)
414
0
        {
415
0
        case '\r':
416
0
        case '\n':
417
0
        case 0x2028:
418
0
        case 0x2029:
419
0
            __s.__do_ = __state::__reject;
420
0
            __s.__node_ = nullptr;
421
0
            break;
422
0
        default:
423
0
            __s.__do_ = __state::__accept_and_consume;
424
0
            ++__s.__current_;
425
0
            __s.__node_ = this->first();
426
0
            break;
427
0
        }
428
0
    }
429
0
    else
430
0
    {
431
0
        __s.__do_ = __state::__reject;
432
0
        __s.__node_ = nullptr;
433
0
    }
434
0
}
435
436
_LIBCPP_END_NAMESPACE_STD