/Users/buildslave/jenkins/workspace/clang-stage2-coverage-R/llvm/lib/Support/regengine.inc
Line | Count | Source (jump to first uncovered line) |
1 | | /*- |
2 | | * This code is derived from OpenBSD's libc/regex, original license follows: |
3 | | * |
4 | | * Copyright (c) 1992, 1993, 1994 Henry Spencer. |
5 | | * Copyright (c) 1992, 1993, 1994 |
6 | | * The Regents of the University of California. All rights reserved. |
7 | | * |
8 | | * This code is derived from software contributed to Berkeley by |
9 | | * Henry Spencer. |
10 | | * |
11 | | * Redistribution and use in source and binary forms, with or without |
12 | | * modification, are permitted provided that the following conditions |
13 | | * are met: |
14 | | * 1. Redistributions of source code must retain the above copyright |
15 | | * notice, this list of conditions and the following disclaimer. |
16 | | * 2. Redistributions in binary form must reproduce the above copyright |
17 | | * notice, this list of conditions and the following disclaimer in the |
18 | | * documentation and/or other materials provided with the distribution. |
19 | | * 3. Neither the name of the University nor the names of its contributors |
20 | | * may be used to endorse or promote products derived from this software |
21 | | * without specific prior written permission. |
22 | | * |
23 | | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
24 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
25 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
26 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
27 | | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
28 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
29 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
30 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
31 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
32 | | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
33 | | * SUCH DAMAGE. |
34 | | * |
35 | | * @(#)engine.c 8.5 (Berkeley) 3/20/94 |
36 | | */ |
37 | | |
38 | | /* |
39 | | * The matching engine and friends. This file is #included by regexec.c |
40 | | * after suitable #defines of a variety of macros used herein, so that |
41 | | * different state representations can be used without duplicating masses |
42 | | * of code. |
43 | | */ |
44 | | |
45 | | #ifdef SNAMES |
46 | | #define matcher smatcher |
47 | 1.98M | #define fast sfast |
48 | 62.1k | #define slow sslow |
49 | 10.3k | #define dissect sdissect |
50 | 90 | #define backref sbackref |
51 | 29.8M | #define step sstep |
52 | | #define print sprint |
53 | | #define at sat |
54 | | #define match smat |
55 | | #define nope snope |
56 | | #endif |
57 | | #ifdef LNAMES |
58 | | #define matcher lmatcher |
59 | 302k | #define fast lfast |
60 | 19.6k | #define slow lslow |
61 | 4.16k | #define dissect ldissect |
62 | 0 | #define backref lbackref |
63 | 4.93M | #define step lstep |
64 | | #define print lprint |
65 | | #define at lat |
66 | | #define match lmat |
67 | | #define nope lnope |
68 | | #endif |
69 | | |
70 | | /* another structure passed up and down to avoid zillions of parameters */ |
71 | | struct match { |
72 | | struct re_guts *g; |
73 | | int eflags; |
74 | | llvm_regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ |
75 | | const char *offp; /* offsets work from here */ |
76 | | const char *beginp; /* start of string -- virtual NUL precedes */ |
77 | | const char *endp; /* end of string -- virtual NUL here */ |
78 | | const char *coldp; /* can be no match starting before here */ |
79 | | const char **lastpos; /* [nplus+1] */ |
80 | | STATEVARS; |
81 | | states st; /* current states */ |
82 | | states fresh; /* states for a fresh start */ |
83 | | states tmp; /* temporary */ |
84 | | states empty; /* empty set of states */ |
85 | | }; |
86 | | |
87 | | static int matcher(struct re_guts *, const char *, size_t, |
88 | | llvm_regmatch_t[], int); |
89 | | static const char *dissect(struct match *, const char *, const char *, sopno, |
90 | | sopno); |
91 | | static const char *backref(struct match *, const char *, const char *, sopno, |
92 | | sopno, sopno, int); |
93 | | static const char *fast(struct match *, const char *, const char *, sopno, sopno); |
94 | | static const char *slow(struct match *, const char *, const char *, sopno, sopno); |
95 | | static states step(struct re_guts *, sopno, sopno, states, int, states); |
96 | 0 | #define MAX_RECURSION 100 |
97 | 273M | #define BOL (OUT213M +1) |
98 | 86.3M | #define EOL (BOL43.1M +1) |
99 | 47.3M | #define BOLEOL (BOL+2) |
100 | 2.37M | #define NOTHING (BOL+3) |
101 | 55.3M | #define BOW (BOL28.8M +4) |
102 | 26.4M | #define EOW (BOL+5) |
103 | | #define CODEMAX (BOL+5) /* highest code used */ |
104 | 29.6M | #define NONCHAR(c) ((c) > CHAR_MAX) |
105 | | #define NNONCHAR (CODEMAX-CHAR_MAX) |
106 | | #ifdef REDEBUG |
107 | | static void print(struct match *, char *, states, int, FILE *); |
108 | | #endif |
109 | | #ifdef REDEBUG |
110 | | static void at(struct match *, char *, char *, char *, sopno, sopno); |
111 | | #endif |
112 | | #ifdef REDEBUG |
113 | | static char *pchar(int); |
114 | | #endif |
115 | | |
116 | | #ifdef REDEBUG |
117 | | #define SP(t, s, c) print(m, t, s, c, stdout) |
118 | | #define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) |
119 | | #define NOTE(str) { if (m->eflags®_TRACE) (void)printf("=%s\n", (str)); } |
120 | | static int nope = 0; |
121 | | #else |
122 | | #define SP(t, s, c) /* nothing */ |
123 | | #define AT(t, p1, p2, s1, s2) /* nothing */ |
124 | | #define NOTE(s) /* nothing */ |
125 | | #endif |
126 | | |
127 | | /* |
128 | | - matcher - the actual matching engine |
129 | | */ |
130 | | static int /* 0 success, REG_NOMATCH failure */ |
131 | | matcher(struct re_guts *g, const char *string, size_t nmatch, |
132 | | llvm_regmatch_t pmatch[], |
133 | | int eflags) |
134 | 35.3M | { |
135 | 35.3M | const char *endp; |
136 | 35.3M | size_t i; |
137 | 35.3M | struct match mv; |
138 | 35.3M | struct match *m = &mv; |
139 | 35.3M | const char *dp; |
140 | 35.3M | const sopno gf = g->firststate+1; /* +1 for OEND */ |
141 | 35.3M | const sopno gl = g->laststate; |
142 | 35.3M | const char *start; |
143 | 35.3M | const char *stop; |
144 | 35.3M | |
145 | 35.3M | /* simplify the situation where possible */ |
146 | 35.3M | if (g->cflags®_NOSUB) |
147 | 35.3M | nmatch = 00 ; |
148 | 35.3M | if (eflags®_STARTEND) { |
149 | 35.3M | start = string + pmatch[0].rm_so; |
150 | 35.3M | stop = string + pmatch[0].rm_eo; |
151 | 35.3M | } else { |
152 | 0 | start = string; |
153 | 0 | stop = start + strlen(start); |
154 | 0 | } |
155 | 35.3M | if (stop < start) |
156 | 0 | return(REG_INVARG); |
157 | 35.3M | |
158 | 35.3M | /* prescreening; this does wonders for this rather slow code */ |
159 | 35.3M | if (g->must != NULL) { |
160 | 381M | for (dp = start; dp < stop; dp++347M ) |
161 | 348M | if (*dp == g->must[0] && stop - dp >= g->mlen15.8M && |
162 | 348M | memcmp(dp, g->must, (size_t)g->mlen) == 011.6M ) |
163 | 1.02M | break; |
164 | 34.0M | if (dp == stop) /* we didn't find g->must */ |
165 | 33.0M | return(REG_NOMATCH); |
166 | 2.29M | } |
167 | 2.29M | |
168 | 2.29M | /* match struct setup */ |
169 | 2.29M | m->g = g; |
170 | 2.29M | m->eflags = eflags; |
171 | 2.29M | m->pmatch = NULL; |
172 | 2.29M | m->lastpos = NULL; |
173 | 2.29M | m->offp = string; |
174 | 2.29M | m->beginp = start; |
175 | 2.29M | m->endp = stop; |
176 | 2.29M | STATESETUP302k (m, 4); |
177 | 2.29M | SETUP(m->st); |
178 | 2.29M | SETUP(m->fresh); |
179 | 2.29M | SETUP(m->tmp); |
180 | 2.29M | SETUP(m->empty); |
181 | 2.29M | CLEAR(m->empty); |
182 | 302k | |
183 | 302k | /* this loop does only one repetition except for backrefs */ |
184 | 2.29M | for (;;) { |
185 | 2.29M | endp = fast(m, start, stop, gf, gl); |
186 | 2.29M | if (endp == NULL) { /* a miss */ |
187 | 2.19M | free(m->pmatch); |
188 | 2.19M | free((void*)m->lastpos); |
189 | 2.19M | STATETEARDOWN297k (m); |
190 | 2.19M | return(REG_NOMATCH); |
191 | 2.19M | } |
192 | 96.0k | if (nmatch == 0 && !g->backrefs93.2k ) |
193 | 93.2k | break; /* no further info needed */ |
194 | 2.72k | |
195 | 2.72k | /* where? */ |
196 | 2.72k | assert(m->coldp != NULL); |
197 | 2.72k | for (;;) { |
198 | 2.72k | NOTE("finding start"); |
199 | 2.72k | endp = slow(m, m->coldp, stop, gf, gl); |
200 | 2.72k | if (endp != NULL) |
201 | 2.72k | break; |
202 | 0 | assert(m->coldp < m->endp); |
203 | 0 | m->coldp++; |
204 | 0 | } |
205 | 2.72k | if (nmatch == 1 && !g->backrefs273 ) |
206 | 273 | break; /* no further info needed */ |
207 | 2.44k | |
208 | 2.44k | /* oh my, they want the subexpressions... */ |
209 | 2.44k | if (m->pmatch == NULL) |
210 | 2.44k | m->pmatch = (llvm_regmatch_t *)malloc((m->g->nsub + 1) * |
211 | 2.44k | sizeof(llvm_regmatch_t)); |
212 | 2.44k | if (m->pmatch == NULL) { |
213 | 0 | STATETEARDOWN(m); |
214 | 0 | return(REG_ESPACE); |
215 | 0 | } |
216 | 8.59k | for (i = 1; 2.44k i <= m->g->nsub; i++6.14k ) |
217 | 6.14k | m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; |
218 | 2.44k | if (!g->backrefs && !(m->eflags&2.43k REG_BACKR2.43k )) { |
219 | 2.43k | NOTE("dissecting"); |
220 | 2.43k | dp = dissect(m, m->coldp, endp, gf, gl); |
221 | 2.43k | } else { |
222 | 9 | if (g->nplus > 0 && m->lastpos == NULL4 ) |
223 | 9 | m->lastpos = (const char **)malloc((g->nplus+1) * |
224 | 2 | sizeof(char *)); |
225 | 9 | if (g->nplus > 0 && m->lastpos == NULL4 ) { |
226 | 0 | free(m->pmatch); |
227 | 0 | STATETEARDOWN(m); |
228 | 0 | return(REG_ESPACE); |
229 | 0 | } |
230 | 9 | NOTE("backref dissect"); |
231 | 9 | dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); |
232 | 9 | } |
233 | 2.44k | if (dp != NULL) |
234 | 2.44k | break2.44k ; |
235 | 6 | |
236 | 6 | /* uh-oh... we couldn't find a subexpression-level match */ |
237 | 6 | assert(g->backrefs); /* must be back references doing it */ |
238 | 6 | assert(g->nplus == 0 || m->lastpos != NULL); |
239 | 9 | for (;;) { |
240 | 9 | if (dp != NULL || endp <= m->coldp) |
241 | 0 | break; /* defeat */ |
242 | 9 | NOTE("backoff"); |
243 | 9 | endp = slow(m, m->coldp, endp-1, gf, gl); |
244 | 9 | if (endp == NULL) |
245 | 9 | break6 ; /* defeat */ |
246 | 3 | /* try it on a shorter possibility */ |
247 | | #ifndef NDEBUG |
248 | | for (i = 1; i <= m->g->nsub; i++) { |
249 | | assert(m->pmatch[i].rm_so == -1); |
250 | | assert(m->pmatch[i].rm_eo == -1); |
251 | | } |
252 | | #endif |
253 | 3 | NOTE("backoff dissect"); |
254 | 3 | dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); |
255 | 3 | } |
256 | 6 | assert(dp == NULL || dp == endp); |
257 | 6 | if (dp != NULL) /* found a shorter one */ |
258 | 6 | break0 ; |
259 | 6 | |
260 | 6 | /* despite initial appearances, there is no match here */ |
261 | 6 | NOTE("false alarm"); |
262 | 6 | if (m->coldp == stop) |
263 | 0 | break; |
264 | 6 | start = m->coldp + 1; /* recycle starting later */ |
265 | 6 | } |
266 | 302k | |
267 | 302k | /* fill in the details if requested */ |
268 | 302k | if (96.0k nmatch > 096.0k ) { |
269 | 2.71k | pmatch[0].rm_so = m->coldp - m->offp; |
270 | 2.71k | pmatch[0].rm_eo = endp - m->offp; |
271 | 2.71k | } |
272 | 96.0k | if (nmatch > 1) { |
273 | 2.44k | assert(m->pmatch != NULL); |
274 | 8.58k | for (i = 1; i < nmatch; i++6.14k ) |
275 | 6.14k | if (i <= m->g->nsub) |
276 | 6.14k | pmatch[i] = m->pmatch[i]; |
277 | 0 | else { |
278 | 0 | pmatch[i].rm_so = -1; |
279 | 0 | pmatch[i].rm_eo = -1; |
280 | 0 | } |
281 | 2.44k | } |
282 | 96.0k | |
283 | 96.0k | if (m->pmatch != NULL) |
284 | 96.0k | free((char *)m->pmatch)2.44k ; |
285 | 96.0k | if (m->lastpos != NULL) |
286 | 96.0k | free((char *)m->lastpos)1 ; |
287 | 96.0k | STATETEARDOWN4.83k (m); |
288 | 96.0k | return(0); |
289 | 302k | } Line | Count | Source | 134 | 34.7M | { | 135 | 34.7M | const char *endp; | 136 | 34.7M | size_t i; | 137 | 34.7M | struct match mv; | 138 | 34.7M | struct match *m = &mv; | 139 | 34.7M | const char *dp; | 140 | 34.7M | const sopno gf = g->firststate+1; /* +1 for OEND */ | 141 | 34.7M | const sopno gl = g->laststate; | 142 | 34.7M | const char *start; | 143 | 34.7M | const char *stop; | 144 | 34.7M | | 145 | 34.7M | /* simplify the situation where possible */ | 146 | 34.7M | if (g->cflags®_NOSUB) | 147 | 34.7M | nmatch = 00 ; | 148 | 34.7M | if (eflags®_STARTEND) { | 149 | 34.7M | start = string + pmatch[0].rm_so; | 150 | 34.7M | stop = string + pmatch[0].rm_eo; | 151 | 34.7M | } else { | 152 | 0 | start = string; | 153 | 0 | stop = start + strlen(start); | 154 | 0 | } | 155 | 34.7M | if (stop < start) | 156 | 0 | return(REG_INVARG); | 157 | 34.7M | | 158 | 34.7M | /* prescreening; this does wonders for this rather slow code */ | 159 | 34.7M | if (g->must != NULL) { | 160 | 378M | for (dp = start; dp < stop; dp++344M ) | 161 | 345M | if (*dp == g->must[0] && stop - dp >= g->mlen15.6M && | 162 | 345M | memcmp(dp, g->must, (size_t)g->mlen) == 011.5M ) | 163 | 1.01M | break; | 164 | 33.7M | if (dp == stop) /* we didn't find g->must */ | 165 | 32.7M | return(REG_NOMATCH); | 166 | 1.98M | } | 167 | 1.98M | | 168 | 1.98M | /* match struct setup */ | 169 | 1.98M | m->g = g; | 170 | 1.98M | m->eflags = eflags; | 171 | 1.98M | m->pmatch = NULL; | 172 | 1.98M | m->lastpos = NULL; | 173 | 1.98M | m->offp = string; | 174 | 1.98M | m->beginp = start; | 175 | 1.98M | m->endp = stop; | 176 | 1.98M | STATESETUP(m, 4); | 177 | 1.98M | SETUP(m->st); | 178 | 1.98M | SETUP(m->fresh); | 179 | 1.98M | SETUP(m->tmp); | 180 | 1.98M | SETUP(m->empty); | 181 | 1.98M | CLEAR(m->empty); | 182 | 1.98M | | 183 | 1.98M | /* this loop does only one repetition except for backrefs */ | 184 | 1.98M | for (;;) { | 185 | 1.98M | endp = fast(m, start, stop, gf, gl); | 186 | 1.98M | if (endp == NULL) { /* a miss */ | 187 | 1.89M | free(m->pmatch); | 188 | 1.89M | free((void*)m->lastpos); | 189 | 1.89M | STATETEARDOWN(m); | 190 | 1.89M | return(REG_NOMATCH); | 191 | 1.89M | } | 192 | 91.1k | if (nmatch == 0 && !g->backrefs88.9k ) | 193 | 88.9k | break; /* no further info needed */ | 194 | 2.23k | | 195 | 2.23k | /* where? */ | 196 | 2.23k | assert(m->coldp != NULL); | 197 | 2.23k | for (;;) { | 198 | 2.23k | NOTE("finding start"); | 199 | 2.23k | endp = slow(m, m->coldp, stop, gf, gl); | 200 | 2.23k | if (endp != NULL) | 201 | 2.23k | break; | 202 | 0 | assert(m->coldp < m->endp); | 203 | 0 | m->coldp++; | 204 | 0 | } | 205 | 2.23k | if (nmatch == 1 && !g->backrefs273 ) | 206 | 273 | break; /* no further info needed */ | 207 | 1.95k | | 208 | 1.95k | /* oh my, they want the subexpressions... */ | 209 | 1.95k | if (m->pmatch == NULL) | 210 | 1.95k | m->pmatch = (llvm_regmatch_t *)malloc((m->g->nsub + 1) * | 211 | 1.95k | sizeof(llvm_regmatch_t)); | 212 | 1.95k | if (m->pmatch == NULL) { | 213 | 0 | STATETEARDOWN(m); | 214 | 0 | return(REG_ESPACE); | 215 | 0 | } | 216 | 5.68k | for (i = 1; 1.95k i <= m->g->nsub; i++3.72k ) | 217 | 3.72k | m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; | 218 | 1.95k | if (!g->backrefs && !(m->eflags&1.95k REG_BACKR1.95k )) { | 219 | 1.95k | NOTE("dissecting"); | 220 | 1.95k | dp = dissect(m, m->coldp, endp, gf, gl); | 221 | 1.95k | } else { | 222 | 9 | if (g->nplus > 0 && m->lastpos == NULL4 ) | 223 | 9 | m->lastpos = (const char **)malloc((g->nplus+1) * | 224 | 2 | sizeof(char *)); | 225 | 9 | if (g->nplus > 0 && m->lastpos == NULL4 ) { | 226 | 0 | free(m->pmatch); | 227 | 0 | STATETEARDOWN(m); | 228 | 0 | return(REG_ESPACE); | 229 | 0 | } | 230 | 9 | NOTE("backref dissect"); | 231 | 9 | dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); | 232 | 9 | } | 233 | 1.95k | if (dp != NULL) | 234 | 1.95k | break1.95k ; | 235 | 6 | | 236 | 6 | /* uh-oh... we couldn't find a subexpression-level match */ | 237 | 6 | assert(g->backrefs); /* must be back references doing it */ | 238 | 6 | assert(g->nplus == 0 || m->lastpos != NULL); | 239 | 9 | for (;;) { | 240 | 9 | if (dp != NULL || endp <= m->coldp) | 241 | 0 | break; /* defeat */ | 242 | 9 | NOTE("backoff"); | 243 | 9 | endp = slow(m, m->coldp, endp-1, gf, gl); | 244 | 9 | if (endp == NULL) | 245 | 9 | break6 ; /* defeat */ | 246 | 3 | /* try it on a shorter possibility */ | 247 | | #ifndef NDEBUG | 248 | | for (i = 1; i <= m->g->nsub; i++) { | 249 | | assert(m->pmatch[i].rm_so == -1); | 250 | | assert(m->pmatch[i].rm_eo == -1); | 251 | | } | 252 | | #endif | 253 | 3 | NOTE("backoff dissect"); | 254 | 3 | dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); | 255 | 3 | } | 256 | 6 | assert(dp == NULL || dp == endp); | 257 | 6 | if (dp != NULL) /* found a shorter one */ | 258 | 6 | break0 ; | 259 | 6 | | 260 | 6 | /* despite initial appearances, there is no match here */ | 261 | 6 | NOTE("false alarm"); | 262 | 6 | if (m->coldp == stop) | 263 | 0 | break; | 264 | 6 | start = m->coldp + 1; /* recycle starting later */ | 265 | 6 | } | 266 | 1.98M | | 267 | 1.98M | /* fill in the details if requested */ | 268 | 1.98M | if (91.1k nmatch > 091.1k ) { | 269 | 2.22k | pmatch[0].rm_so = m->coldp - m->offp; | 270 | 2.22k | pmatch[0].rm_eo = endp - m->offp; | 271 | 2.22k | } | 272 | 91.1k | if (nmatch > 1) { | 273 | 1.95k | assert(m->pmatch != NULL); | 274 | 5.67k | for (i = 1; i < nmatch; i++3.71k ) | 275 | 3.71k | if (i <= m->g->nsub) | 276 | 3.71k | pmatch[i] = m->pmatch[i]; | 277 | 0 | else { | 278 | 0 | pmatch[i].rm_so = -1; | 279 | 0 | pmatch[i].rm_eo = -1; | 280 | 0 | } | 281 | 1.95k | } | 282 | 91.1k | | 283 | 91.1k | if (m->pmatch != NULL) | 284 | 91.1k | free((char *)m->pmatch)1.95k ; | 285 | 91.1k | if (m->lastpos != NULL) | 286 | 91.1k | free((char *)m->lastpos)1 ; | 287 | 91.1k | STATETEARDOWN(m); | 288 | 91.1k | return(0); | 289 | 1.98M | } |
Line | Count | Source | 134 | 606k | { | 135 | 606k | const char *endp; | 136 | 606k | size_t i; | 137 | 606k | struct match mv; | 138 | 606k | struct match *m = &mv; | 139 | 606k | const char *dp; | 140 | 606k | const sopno gf = g->firststate+1; /* +1 for OEND */ | 141 | 606k | const sopno gl = g->laststate; | 142 | 606k | const char *start; | 143 | 606k | const char *stop; | 144 | 606k | | 145 | 606k | /* simplify the situation where possible */ | 146 | 606k | if (g->cflags®_NOSUB) | 147 | 606k | nmatch = 00 ; | 148 | 606k | if (eflags®_STARTEND) { | 149 | 606k | start = string + pmatch[0].rm_so; | 150 | 606k | stop = string + pmatch[0].rm_eo; | 151 | 606k | } else { | 152 | 0 | start = string; | 153 | 0 | stop = start + strlen(start); | 154 | 0 | } | 155 | 606k | if (stop < start) | 156 | 0 | return(REG_INVARG); | 157 | 606k | | 158 | 606k | /* prescreening; this does wonders for this rather slow code */ | 159 | 606k | if (g->must != NULL) { | 160 | 3.63M | for (dp = start; dp < stop; dp++3.32M ) | 161 | 3.33M | if (*dp == g->must[0] && stop - dp >= g->mlen183k && | 162 | 3.33M | memcmp(dp, g->must, (size_t)g->mlen) == 0138k ) | 163 | 9.30k | break; | 164 | 313k | if (dp == stop) /* we didn't find g->must */ | 165 | 304k | return(REG_NOMATCH); | 166 | 302k | } | 167 | 302k | | 168 | 302k | /* match struct setup */ | 169 | 302k | m->g = g; | 170 | 302k | m->eflags = eflags; | 171 | 302k | m->pmatch = NULL; | 172 | 302k | m->lastpos = NULL; | 173 | 302k | m->offp = string; | 174 | 302k | m->beginp = start; | 175 | 302k | m->endp = stop; | 176 | 302k | STATESETUP(m, 4); | 177 | 302k | SETUP(m->st); | 178 | 302k | SETUP(m->fresh); | 179 | 302k | SETUP(m->tmp); | 180 | 302k | SETUP(m->empty); | 181 | 302k | CLEAR(m->empty); | 182 | 302k | | 183 | 302k | /* this loop does only one repetition except for backrefs */ | 184 | 302k | for (;;) { | 185 | 302k | endp = fast(m, start, stop, gf, gl); | 186 | 302k | if (endp == NULL) { /* a miss */ | 187 | 297k | free(m->pmatch); | 188 | 297k | free((void*)m->lastpos); | 189 | 297k | STATETEARDOWN(m); | 190 | 297k | return(REG_NOMATCH); | 191 | 297k | } | 192 | 4.83k | if (nmatch == 0 && !g->backrefs4.34k ) | 193 | 4.34k | break; /* no further info needed */ | 194 | 489 | | 195 | 489 | /* where? */ | 196 | 489 | assert(m->coldp != NULL); | 197 | 489 | for (;;) { | 198 | 489 | NOTE("finding start"); | 199 | 489 | endp = slow(m, m->coldp, stop, gf, gl); | 200 | 489 | if (endp != NULL) | 201 | 489 | break; | 202 | 0 | assert(m->coldp < m->endp); | 203 | 0 | m->coldp++; | 204 | 0 | } | 205 | 489 | if (nmatch == 1 && !g->backrefs0 ) | 206 | 0 | break; /* no further info needed */ | 207 | 489 | | 208 | 489 | /* oh my, they want the subexpressions... */ | 209 | 489 | if (m->pmatch == NULL) | 210 | 489 | m->pmatch = (llvm_regmatch_t *)malloc((m->g->nsub + 1) * | 211 | 489 | sizeof(llvm_regmatch_t)); | 212 | 489 | if (m->pmatch == NULL) { | 213 | 0 | STATETEARDOWN(m); | 214 | 0 | return(REG_ESPACE); | 215 | 0 | } | 216 | 2.91k | for (i = 1; 489 i <= m->g->nsub; i++2.42k ) | 217 | 2.42k | m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; | 218 | 489 | if (!g->backrefs && !(m->eflags®_BACKR)) { | 219 | 489 | NOTE("dissecting"); | 220 | 489 | dp = dissect(m, m->coldp, endp, gf, gl); | 221 | 489 | } else { | 222 | 0 | if (g->nplus > 0 && m->lastpos == NULL) | 223 | 0 | m->lastpos = (const char **)malloc((g->nplus+1) * | 224 | 0 | sizeof(char *)); | 225 | 0 | if (g->nplus > 0 && m->lastpos == NULL) { | 226 | 0 | free(m->pmatch); | 227 | 0 | STATETEARDOWN(m); | 228 | 0 | return(REG_ESPACE); | 229 | 0 | } | 230 | 0 | NOTE("backref dissect"); | 231 | 0 | dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); | 232 | 0 | } | 233 | 489 | if (dp != NULL) | 234 | 489 | break; | 235 | 0 | | 236 | 0 | /* uh-oh... we couldn't find a subexpression-level match */ | 237 | 0 | assert(g->backrefs); /* must be back references doing it */ | 238 | 0 | assert(g->nplus == 0 || m->lastpos != NULL); | 239 | 0 | for (;;) { | 240 | 0 | if (dp != NULL || endp <= m->coldp) | 241 | 0 | break; /* defeat */ | 242 | 0 | NOTE("backoff"); | 243 | 0 | endp = slow(m, m->coldp, endp-1, gf, gl); | 244 | 0 | if (endp == NULL) | 245 | 0 | break; /* defeat */ | 246 | 0 | /* try it on a shorter possibility */ | 247 | | #ifndef NDEBUG | 248 | | for (i = 1; i <= m->g->nsub; i++) { | 249 | | assert(m->pmatch[i].rm_so == -1); | 250 | | assert(m->pmatch[i].rm_eo == -1); | 251 | | } | 252 | | #endif | 253 | 0 | NOTE("backoff dissect"); | 254 | 0 | dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); | 255 | 0 | } | 256 | 0 | assert(dp == NULL || dp == endp); | 257 | 0 | if (dp != NULL) /* found a shorter one */ | 258 | 0 | break; | 259 | 0 | | 260 | 0 | /* despite initial appearances, there is no match here */ | 261 | 0 | NOTE("false alarm"); | 262 | 0 | if (m->coldp == stop) | 263 | 0 | break; | 264 | 0 | start = m->coldp + 1; /* recycle starting later */ | 265 | 0 | } | 266 | 302k | | 267 | 302k | /* fill in the details if requested */ | 268 | 302k | if (4.83k nmatch > 04.83k ) { | 269 | 489 | pmatch[0].rm_so = m->coldp - m->offp; | 270 | 489 | pmatch[0].rm_eo = endp - m->offp; | 271 | 489 | } | 272 | 4.83k | if (nmatch > 1) { | 273 | 489 | assert(m->pmatch != NULL); | 274 | 2.91k | for (i = 1; i < nmatch; i++2.42k ) | 275 | 2.42k | if (i <= m->g->nsub) | 276 | 2.42k | pmatch[i] = m->pmatch[i]; | 277 | 0 | else { | 278 | 0 | pmatch[i].rm_so = -1; | 279 | 0 | pmatch[i].rm_eo = -1; | 280 | 0 | } | 281 | 489 | } | 282 | 4.83k | | 283 | 4.83k | if (m->pmatch != NULL) | 284 | 4.83k | free((char *)m->pmatch)489 ; | 285 | 4.83k | if (m->lastpos != NULL) | 286 | 4.83k | free((char *)m->lastpos)0 ; | 287 | 4.83k | STATETEARDOWN(m); | 288 | 4.83k | return(0); | 289 | 302k | } |
|
290 | | |
291 | | /* |
292 | | - dissect - figure out what matched what, no back references |
293 | | */ |
294 | | static const char * /* == stop (success) always */ |
295 | | dissect(struct match *m, const char *start, const char *stop, sopno startst, |
296 | | sopno stopst) |
297 | 14.5k | { |
298 | 14.5k | int i; |
299 | 14.5k | sopno ss; /* start sop of current subRE */ |
300 | 14.5k | sopno es; /* end sop of current subRE */ |
301 | 14.5k | const char *sp; /* start of string matched by it */ |
302 | 14.5k | const char *stp; /* string matched by it cannot pass here */ |
303 | 14.5k | const char *rest; /* start of rest of string */ |
304 | 14.5k | const char *tail; /* string unmatched by rest of RE */ |
305 | 14.5k | sopno ssub; /* start sop of subsubRE */ |
306 | 14.5k | sopno esub; /* end sop of subsubRE */ |
307 | 14.5k | const char *ssp; /* start of string matched by subsubRE */ |
308 | 14.5k | const char *sep; /* end of string matched by subsubRE */ |
309 | 14.5k | const char *oldssp; /* previous ssp */ |
310 | 14.5k | |
311 | 14.5k | AT("diss", start, stop, startst, stopst); |
312 | 14.5k | sp = start; |
313 | 76.9k | for (ss = startst; ss < stopst; ss = es62.4k ) { |
314 | 62.4k | /* identify end of subRE */ |
315 | 62.4k | es = ss; |
316 | 62.4k | switch (OP(m->g->strip[es])) { |
317 | 62.4k | case 12.8k OPLUS_12.8k : |
318 | 12.8k | case OQUEST_: |
319 | 12.8k | es += OPND(m->g->strip[es]); |
320 | 12.8k | break; |
321 | 12.8k | case 3.77k OCH_3.77k : |
322 | 11.7k | while (OP(m->g->strip[es]) != O_CH) |
323 | 7.95k | es += OPND(m->g->strip[es]); |
324 | 3.77k | break; |
325 | 62.4k | } |
326 | 62.4k | es++; |
327 | 62.4k | |
328 | 62.4k | /* figure out what it matched */ |
329 | 62.4k | switch (OP(m->g->strip[ss])) { |
330 | 62.4k | case 0 OEND0 : |
331 | 0 | assert(nope); |
332 | 0 | break; |
333 | 62.4k | case 23.7k OCHAR23.7k : |
334 | 23.7k | sp++; |
335 | 23.7k | break; |
336 | 62.4k | case 2.52k OBOL2.52k : |
337 | 2.52k | case OEOL: |
338 | 2.52k | case OBOW: |
339 | 2.52k | case OEOW: |
340 | 2.52k | break; |
341 | 10.4k | case OANY: |
342 | 10.4k | case OANYOF: |
343 | 10.4k | sp++; |
344 | 10.4k | break; |
345 | 10.4k | case 0 OBACK_0 : |
346 | 0 | case O_BACK: |
347 | 0 | assert(nope); |
348 | 0 | break; |
349 | 0 | /* cases where length of match is hard to find */ |
350 | 8.14k | case OQUEST_: |
351 | 8.14k | stp = stop; |
352 | 8.19k | for (;;) { |
353 | 8.19k | /* how long could this one be? */ |
354 | 8.19k | rest = slow(m, sp, stp, ss, es); |
355 | 8.19k | assert(rest != NULL); /* it did match */ |
356 | 8.19k | /* could the rest match the rest? */ |
357 | 8.19k | tail = slow(m, rest, stop, es, stopst); |
358 | 8.19k | if (tail == stop) |
359 | 8.14k | break; /* yes! */ |
360 | 55 | /* no -- try a shorter match for this one */ |
361 | 55 | stp = rest - 1; |
362 | 55 | assert(stp >= sp); /* it did work */ |
363 | 55 | } |
364 | 8.14k | ssub = ss + 1; |
365 | 8.14k | esub = es - 1; |
366 | 8.14k | /* did innards match? */ |
367 | 8.14k | if (slow(m, sp, rest, ssub, esub) != NULL) { |
368 | 3.64k | const char *dp = dissect(m, sp, rest, ssub, esub); |
369 | 3.64k | (void)dp; /* avoid warning if assertions off */ |
370 | 3.64k | assert(dp == rest); |
371 | 3.64k | } else /* no */ |
372 | 8.14k | assert(sp == rest); |
373 | 8.14k | sp = rest; |
374 | 8.14k | break; |
375 | 4.66k | case OPLUS_: |
376 | 4.66k | stp = stop; |
377 | 4.66k | for (;;) { |
378 | 4.66k | /* how long could this one be? */ |
379 | 4.66k | rest = slow(m, sp, stp, ss, es); |
380 | 4.66k | assert(rest != NULL); /* it did match */ |
381 | 4.66k | /* could the rest match the rest? */ |
382 | 4.66k | tail = slow(m, rest, stop, es, stopst); |
383 | 4.66k | if (tail == stop) |
384 | 4.66k | break; /* yes! */ |
385 | 0 | /* no -- try a shorter match for this one */ |
386 | 0 | stp = rest - 1; |
387 | 0 | assert(stp >= sp); /* it did work */ |
388 | 0 | } |
389 | 4.66k | ssub = ss + 1; |
390 | 4.66k | esub = es - 1; |
391 | 4.66k | ssp = sp; |
392 | 4.66k | oldssp = ssp; |
393 | 30.5k | for (;;) { /* find last match of innards */ |
394 | 30.5k | sep = slow(m, ssp, rest, ssub, esub); |
395 | 30.5k | if (sep == NULL || sep == ssp25.8k ) |
396 | 4.66k | break; /* failed or matched null */ |
397 | 25.8k | oldssp = ssp; /* on to next try */ |
398 | 25.8k | ssp = sep; |
399 | 25.8k | } |
400 | 4.66k | if (sep == NULL) { |
401 | 4.66k | /* last successful match */ |
402 | 4.66k | sep = ssp; |
403 | 4.66k | ssp = oldssp; |
404 | 4.66k | } |
405 | 4.66k | assert(sep == rest); /* must exhaust substring */ |
406 | 4.66k | assert(slow(m, ssp, sep, ssub, esub) == rest); |
407 | 4.66k | { |
408 | 4.66k | const char *dp = dissect(m, ssp, sep, ssub, esub); |
409 | 4.66k | (void)dp; /* avoid warning if assertions off */ |
410 | 4.66k | assert(dp == sep); |
411 | 4.66k | } |
412 | 4.66k | sp = rest; |
413 | 4.66k | break; |
414 | 3.77k | case OCH_: |
415 | 3.77k | stp = stop; |
416 | 3.77k | for (;;) { |
417 | 3.77k | /* how long could this one be? */ |
418 | 3.77k | rest = slow(m, sp, stp, ss, es); |
419 | 3.77k | assert(rest != NULL); /* it did match */ |
420 | 3.77k | /* could the rest match the rest? */ |
421 | 3.77k | tail = slow(m, rest, stop, es, stopst); |
422 | 3.77k | if (tail == stop) |
423 | 3.77k | break; /* yes! */ |
424 | 0 | /* no -- try a shorter match for this one */ |
425 | 0 | stp = rest - 1; |
426 | 0 | assert(stp >= sp); /* it did work */ |
427 | 0 | } |
428 | 3.77k | ssub = ss + 1; |
429 | 3.77k | esub = ss + OPND(m->g->strip[ss]) - 1; |
430 | 3.77k | assert(OP(m->g->strip[esub]) == OOR1); |
431 | 7.05k | for (;;) { /* find first matching branch */ |
432 | 7.05k | if (slow(m, sp, rest, ssub, esub) == rest) |
433 | 3.77k | break; /* it matched all of it */ |
434 | 3.27k | /* that one missed, try next one */ |
435 | 3.27k | assert(OP(m->g->strip[esub]) == OOR1); |
436 | 3.27k | esub++; |
437 | 3.27k | assert(OP(m->g->strip[esub]) == OOR2); |
438 | 3.27k | ssub = esub + 1; |
439 | 3.27k | esub += OPND(m->g->strip[esub]); |
440 | 3.27k | if (OP(m->g->strip[esub]) == OOR2) |
441 | 3.27k | esub--159 ; |
442 | 3.27k | else |
443 | 3.27k | assert(OP(m->g->strip[esub]) == O_CH); |
444 | 3.27k | } |
445 | 3.77k | { |
446 | 3.77k | const char *dp = dissect(m, sp, rest, ssub, esub); |
447 | 3.77k | (void)dp; /* avoid warning if assertions off */ |
448 | 3.77k | assert(dp == rest); |
449 | 3.77k | } |
450 | 3.77k | sp = rest; |
451 | 3.77k | break; |
452 | 0 | case O_PLUS: |
453 | 0 | case O_QUEST: |
454 | 0 | case OOR1: |
455 | 0 | case OOR2: |
456 | 0 | case O_CH: |
457 | 0 | assert(nope); |
458 | 0 | break; |
459 | 4.57k | case OLPAREN: |
460 | 4.57k | i = OPND(m->g->strip[ss]); |
461 | 4.57k | assert(0 < i && i <= m->g->nsub); |
462 | 4.57k | m->pmatch[i].rm_so = sp - m->offp; |
463 | 4.57k | break; |
464 | 4.57k | case ORPAREN: |
465 | 4.57k | i = OPND(m->g->strip[ss]); |
466 | 4.57k | assert(0 < i && i <= m->g->nsub); |
467 | 4.57k | m->pmatch[i].rm_eo = sp - m->offp; |
468 | 4.57k | break; |
469 | 0 | default: /* uh oh */ |
470 | 0 | assert(nope); |
471 | 0 | break; |
472 | 62.4k | } |
473 | 62.4k | } |
474 | 14.5k | |
475 | 14.5k | assert(sp == stop); |
476 | 14.5k | return(sp); |
477 | 14.5k | } Line | Count | Source | 297 | 10.3k | { | 298 | 10.3k | int i; | 299 | 10.3k | sopno ss; /* start sop of current subRE */ | 300 | 10.3k | sopno es; /* end sop of current subRE */ | 301 | 10.3k | const char *sp; /* start of string matched by it */ | 302 | 10.3k | const char *stp; /* string matched by it cannot pass here */ | 303 | 10.3k | const char *rest; /* start of rest of string */ | 304 | 10.3k | const char *tail; /* string unmatched by rest of RE */ | 305 | 10.3k | sopno ssub; /* start sop of subsubRE */ | 306 | 10.3k | sopno esub; /* end sop of subsubRE */ | 307 | 10.3k | const char *ssp; /* start of string matched by subsubRE */ | 308 | 10.3k | const char *sep; /* end of string matched by subsubRE */ | 309 | 10.3k | const char *oldssp; /* previous ssp */ | 310 | 10.3k | | 311 | 10.3k | AT("diss", start, stop, startst, stopst); | 312 | 10.3k | sp = start; | 313 | 58.2k | for (ss = startst; ss < stopst; ss = es47.9k ) { | 314 | 47.9k | /* identify end of subRE */ | 315 | 47.9k | es = ss; | 316 | 47.9k | switch (OP(m->g->strip[es])) { | 317 | 47.9k | case 9.85k OPLUS_9.85k : | 318 | 9.85k | case OQUEST_: | 319 | 9.85k | es += OPND(m->g->strip[es]); | 320 | 9.85k | break; | 321 | 9.85k | case 1.78k OCH_1.78k : | 322 | 5.55k | while (OP(m->g->strip[es]) != O_CH) | 323 | 3.77k | es += OPND(m->g->strip[es]); | 324 | 1.78k | break; | 325 | 47.9k | } | 326 | 47.9k | es++; | 327 | 47.9k | | 328 | 47.9k | /* figure out what it matched */ | 329 | 47.9k | switch (OP(m->g->strip[ss])) { | 330 | 47.9k | case 0 OEND0 : | 331 | 0 | assert(nope); | 332 | 0 | break; | 333 | 47.9k | case 20.7k OCHAR20.7k : | 334 | 20.7k | sp++; | 335 | 20.7k | break; | 336 | 47.9k | case 1.69k OBOL1.69k : | 337 | 1.69k | case OEOL: | 338 | 1.69k | case OBOW: | 339 | 1.69k | case OEOW: | 340 | 1.69k | break; | 341 | 6.58k | case OANY: | 342 | 6.58k | case OANYOF: | 343 | 6.58k | sp++; | 344 | 6.58k | break; | 345 | 6.58k | case 0 OBACK_0 : | 346 | 0 | case O_BACK: | 347 | 0 | assert(nope); | 348 | 0 | break; | 349 | 0 | /* cases where length of match is hard to find */ | 350 | 6.34k | case OQUEST_: | 351 | 6.34k | stp = stop; | 352 | 6.39k | for (;;) { | 353 | 6.39k | /* how long could this one be? */ | 354 | 6.39k | rest = slow(m, sp, stp, ss, es); | 355 | 6.39k | assert(rest != NULL); /* it did match */ | 356 | 6.39k | /* could the rest match the rest? */ | 357 | 6.39k | tail = slow(m, rest, stop, es, stopst); | 358 | 6.39k | if (tail == stop) | 359 | 6.34k | break; /* yes! */ | 360 | 55 | /* no -- try a shorter match for this one */ | 361 | 55 | stp = rest - 1; | 362 | 55 | assert(stp >= sp); /* it did work */ | 363 | 55 | } | 364 | 6.34k | ssub = ss + 1; | 365 | 6.34k | esub = es - 1; | 366 | 6.34k | /* did innards match? */ | 367 | 6.34k | if (slow(m, sp, rest, ssub, esub) != NULL) { | 368 | 3.11k | const char *dp = dissect(m, sp, rest, ssub, esub); | 369 | 3.11k | (void)dp; /* avoid warning if assertions off */ | 370 | 3.11k | assert(dp == rest); | 371 | 3.11k | } else /* no */ | 372 | 6.34k | assert(sp == rest); | 373 | 6.34k | sp = rest; | 374 | 6.34k | break; | 375 | 3.51k | case OPLUS_: | 376 | 3.51k | stp = stop; | 377 | 3.51k | for (;;) { | 378 | 3.51k | /* how long could this one be? */ | 379 | 3.51k | rest = slow(m, sp, stp, ss, es); | 380 | 3.51k | assert(rest != NULL); /* it did match */ | 381 | 3.51k | /* could the rest match the rest? */ | 382 | 3.51k | tail = slow(m, rest, stop, es, stopst); | 383 | 3.51k | if (tail == stop) | 384 | 3.51k | break; /* yes! */ | 385 | 0 | /* no -- try a shorter match for this one */ | 386 | 0 | stp = rest - 1; | 387 | 0 | assert(stp >= sp); /* it did work */ | 388 | 0 | } | 389 | 3.51k | ssub = ss + 1; | 390 | 3.51k | esub = es - 1; | 391 | 3.51k | ssp = sp; | 392 | 3.51k | oldssp = ssp; | 393 | 26.6k | for (;;) { /* find last match of innards */ | 394 | 26.6k | sep = slow(m, ssp, rest, ssub, esub); | 395 | 26.6k | if (sep == NULL || sep == ssp23.1k ) | 396 | 3.51k | break; /* failed or matched null */ | 397 | 23.1k | oldssp = ssp; /* on to next try */ | 398 | 23.1k | ssp = sep; | 399 | 23.1k | } | 400 | 3.51k | if (sep == NULL) { | 401 | 3.51k | /* last successful match */ | 402 | 3.51k | sep = ssp; | 403 | 3.51k | ssp = oldssp; | 404 | 3.51k | } | 405 | 3.51k | assert(sep == rest); /* must exhaust substring */ | 406 | 3.51k | assert(slow(m, ssp, sep, ssub, esub) == rest); | 407 | 3.51k | { | 408 | 3.51k | const char *dp = dissect(m, ssp, sep, ssub, esub); | 409 | 3.51k | (void)dp; /* avoid warning if assertions off */ | 410 | 3.51k | assert(dp == sep); | 411 | 3.51k | } | 412 | 3.51k | sp = rest; | 413 | 3.51k | break; | 414 | 1.78k | case OCH_: | 415 | 1.78k | stp = stop; | 416 | 1.78k | for (;;) { | 417 | 1.78k | /* how long could this one be? */ | 418 | 1.78k | rest = slow(m, sp, stp, ss, es); | 419 | 1.78k | assert(rest != NULL); /* it did match */ | 420 | 1.78k | /* could the rest match the rest? */ | 421 | 1.78k | tail = slow(m, rest, stop, es, stopst); | 422 | 1.78k | if (tail == stop) | 423 | 1.78k | break; /* yes! */ | 424 | 0 | /* no -- try a shorter match for this one */ | 425 | 0 | stp = rest - 1; | 426 | 0 | assert(stp >= sp); /* it did work */ | 427 | 0 | } | 428 | 1.78k | ssub = ss + 1; | 429 | 1.78k | esub = ss + OPND(m->g->strip[ss]) - 1; | 430 | 1.78k | assert(OP(m->g->strip[esub]) == OOR1); | 431 | 3.49k | for (;;) { /* find first matching branch */ | 432 | 3.49k | if (slow(m, sp, rest, ssub, esub) == rest) | 433 | 1.78k | break; /* it matched all of it */ | 434 | 1.71k | /* that one missed, try next one */ | 435 | 1.71k | assert(OP(m->g->strip[esub]) == OOR1); | 436 | 1.71k | esub++; | 437 | 1.71k | assert(OP(m->g->strip[esub]) == OOR2); | 438 | 1.71k | ssub = esub + 1; | 439 | 1.71k | esub += OPND(m->g->strip[esub]); | 440 | 1.71k | if (OP(m->g->strip[esub]) == OOR2) | 441 | 1.71k | esub--88 ; | 442 | 1.71k | else | 443 | 1.71k | assert(OP(m->g->strip[esub]) == O_CH); | 444 | 1.71k | } | 445 | 1.78k | { | 446 | 1.78k | const char *dp = dissect(m, sp, rest, ssub, esub); | 447 | 1.78k | (void)dp; /* avoid warning if assertions off */ | 448 | 1.78k | assert(dp == rest); | 449 | 1.78k | } | 450 | 1.78k | sp = rest; | 451 | 1.78k | break; | 452 | 0 | case O_PLUS: | 453 | 0 | case O_QUEST: | 454 | 0 | case OOR1: | 455 | 0 | case OOR2: | 456 | 0 | case O_CH: | 457 | 0 | assert(nope); | 458 | 0 | break; | 459 | 3.65k | case OLPAREN: | 460 | 3.65k | i = OPND(m->g->strip[ss]); | 461 | 3.65k | assert(0 < i && i <= m->g->nsub); | 462 | 3.65k | m->pmatch[i].rm_so = sp - m->offp; | 463 | 3.65k | break; | 464 | 3.65k | case ORPAREN: | 465 | 3.65k | i = OPND(m->g->strip[ss]); | 466 | 3.65k | assert(0 < i && i <= m->g->nsub); | 467 | 3.65k | m->pmatch[i].rm_eo = sp - m->offp; | 468 | 3.65k | break; | 469 | 0 | default: /* uh oh */ | 470 | 0 | assert(nope); | 471 | 0 | break; | 472 | 47.9k | } | 473 | 47.9k | } | 474 | 10.3k | | 475 | 10.3k | assert(sp == stop); | 476 | 10.3k | return(sp); | 477 | 10.3k | } |
Line | Count | Source | 297 | 4.16k | { | 298 | 4.16k | int i; | 299 | 4.16k | sopno ss; /* start sop of current subRE */ | 300 | 4.16k | sopno es; /* end sop of current subRE */ | 301 | 4.16k | const char *sp; /* start of string matched by it */ | 302 | 4.16k | const char *stp; /* string matched by it cannot pass here */ | 303 | 4.16k | const char *rest; /* start of rest of string */ | 304 | 4.16k | const char *tail; /* string unmatched by rest of RE */ | 305 | 4.16k | sopno ssub; /* start sop of subsubRE */ | 306 | 4.16k | sopno esub; /* end sop of subsubRE */ | 307 | 4.16k | const char *ssp; /* start of string matched by subsubRE */ | 308 | 4.16k | const char *sep; /* end of string matched by subsubRE */ | 309 | 4.16k | const char *oldssp; /* previous ssp */ | 310 | 4.16k | | 311 | 4.16k | AT("diss", start, stop, startst, stopst); | 312 | 4.16k | sp = start; | 313 | 18.6k | for (ss = startst; ss < stopst; ss = es14.5k ) { | 314 | 14.5k | /* identify end of subRE */ | 315 | 14.5k | es = ss; | 316 | 14.5k | switch (OP(m->g->strip[es])) { | 317 | 14.5k | case 2.95k OPLUS_2.95k : | 318 | 2.95k | case OQUEST_: | 319 | 2.95k | es += OPND(m->g->strip[es]); | 320 | 2.95k | break; | 321 | 2.95k | case 1.99k OCH_1.99k : | 322 | 6.17k | while (OP(m->g->strip[es]) != O_CH) | 323 | 4.18k | es += OPND(m->g->strip[es]); | 324 | 1.99k | break; | 325 | 14.5k | } | 326 | 14.5k | es++; | 327 | 14.5k | | 328 | 14.5k | /* figure out what it matched */ | 329 | 14.5k | switch (OP(m->g->strip[ss])) { | 330 | 14.5k | case 0 OEND0 : | 331 | 0 | assert(nope); | 332 | 0 | break; | 333 | 14.5k | case 3.03k OCHAR3.03k : | 334 | 3.03k | sp++; | 335 | 3.03k | break; | 336 | 14.5k | case 828 OBOL828 : | 337 | 828 | case OEOL: | 338 | 828 | case OBOW: | 339 | 828 | case OEOW: | 340 | 828 | break; | 341 | 3.86k | case OANY: | 342 | 3.86k | case OANYOF: | 343 | 3.86k | sp++; | 344 | 3.86k | break; | 345 | 3.86k | case 0 OBACK_0 : | 346 | 0 | case O_BACK: | 347 | 0 | assert(nope); | 348 | 0 | break; | 349 | 0 | /* cases where length of match is hard to find */ | 350 | 1.80k | case OQUEST_: | 351 | 1.80k | stp = stop; | 352 | 1.80k | for (;;) { | 353 | 1.80k | /* how long could this one be? */ | 354 | 1.80k | rest = slow(m, sp, stp, ss, es); | 355 | 1.80k | assert(rest != NULL); /* it did match */ | 356 | 1.80k | /* could the rest match the rest? */ | 357 | 1.80k | tail = slow(m, rest, stop, es, stopst); | 358 | 1.80k | if (tail == stop) | 359 | 1.80k | break; /* yes! */ | 360 | 0 | /* no -- try a shorter match for this one */ | 361 | 0 | stp = rest - 1; | 362 | 0 | assert(stp >= sp); /* it did work */ | 363 | 0 | } | 364 | 1.80k | ssub = ss + 1; | 365 | 1.80k | esub = es - 1; | 366 | 1.80k | /* did innards match? */ | 367 | 1.80k | if (slow(m, sp, rest, ssub, esub) != NULL) { | 368 | 530 | const char *dp = dissect(m, sp, rest, ssub, esub); | 369 | 530 | (void)dp; /* avoid warning if assertions off */ | 370 | 530 | assert(dp == rest); | 371 | 530 | } else /* no */ | 372 | 1.80k | assert(sp == rest); | 373 | 1.80k | sp = rest; | 374 | 1.80k | break; | 375 | 1.15k | case OPLUS_: | 376 | 1.15k | stp = stop; | 377 | 1.15k | for (;;) { | 378 | 1.15k | /* how long could this one be? */ | 379 | 1.15k | rest = slow(m, sp, stp, ss, es); | 380 | 1.15k | assert(rest != NULL); /* it did match */ | 381 | 1.15k | /* could the rest match the rest? */ | 382 | 1.15k | tail = slow(m, rest, stop, es, stopst); | 383 | 1.15k | if (tail == stop) | 384 | 1.15k | break; /* yes! */ | 385 | 0 | /* no -- try a shorter match for this one */ | 386 | 0 | stp = rest - 1; | 387 | 0 | assert(stp >= sp); /* it did work */ | 388 | 0 | } | 389 | 1.15k | ssub = ss + 1; | 390 | 1.15k | esub = es - 1; | 391 | 1.15k | ssp = sp; | 392 | 1.15k | oldssp = ssp; | 393 | 3.88k | for (;;) { /* find last match of innards */ | 394 | 3.88k | sep = slow(m, ssp, rest, ssub, esub); | 395 | 3.88k | if (sep == NULL || sep == ssp2.73k ) | 396 | 1.15k | break; /* failed or matched null */ | 397 | 2.73k | oldssp = ssp; /* on to next try */ | 398 | 2.73k | ssp = sep; | 399 | 2.73k | } | 400 | 1.15k | if (sep == NULL) { | 401 | 1.15k | /* last successful match */ | 402 | 1.15k | sep = ssp; | 403 | 1.15k | ssp = oldssp; | 404 | 1.15k | } | 405 | 1.15k | assert(sep == rest); /* must exhaust substring */ | 406 | 1.15k | assert(slow(m, ssp, sep, ssub, esub) == rest); | 407 | 1.15k | { | 408 | 1.15k | const char *dp = dissect(m, ssp, sep, ssub, esub); | 409 | 1.15k | (void)dp; /* avoid warning if assertions off */ | 410 | 1.15k | assert(dp == sep); | 411 | 1.15k | } | 412 | 1.15k | sp = rest; | 413 | 1.15k | break; | 414 | 1.99k | case OCH_: | 415 | 1.99k | stp = stop; | 416 | 1.99k | for (;;) { | 417 | 1.99k | /* how long could this one be? */ | 418 | 1.99k | rest = slow(m, sp, stp, ss, es); | 419 | 1.99k | assert(rest != NULL); /* it did match */ | 420 | 1.99k | /* could the rest match the rest? */ | 421 | 1.99k | tail = slow(m, rest, stop, es, stopst); | 422 | 1.99k | if (tail == stop) | 423 | 1.99k | break; /* yes! */ | 424 | 0 | /* no -- try a shorter match for this one */ | 425 | 0 | stp = rest - 1; | 426 | 0 | assert(stp >= sp); /* it did work */ | 427 | 0 | } | 428 | 1.99k | ssub = ss + 1; | 429 | 1.99k | esub = ss + OPND(m->g->strip[ss]) - 1; | 430 | 1.99k | assert(OP(m->g->strip[esub]) == OOR1); | 431 | 3.55k | for (;;) { /* find first matching branch */ | 432 | 3.55k | if (slow(m, sp, rest, ssub, esub) == rest) | 433 | 1.99k | break; /* it matched all of it */ | 434 | 1.56k | /* that one missed, try next one */ | 435 | 1.56k | assert(OP(m->g->strip[esub]) == OOR1); | 436 | 1.56k | esub++; | 437 | 1.56k | assert(OP(m->g->strip[esub]) == OOR2); | 438 | 1.56k | ssub = esub + 1; | 439 | 1.56k | esub += OPND(m->g->strip[esub]); | 440 | 1.56k | if (OP(m->g->strip[esub]) == OOR2) | 441 | 1.56k | esub--71 ; | 442 | 1.56k | else | 443 | 1.56k | assert(OP(m->g->strip[esub]) == O_CH); | 444 | 1.56k | } | 445 | 1.99k | { | 446 | 1.99k | const char *dp = dissect(m, sp, rest, ssub, esub); | 447 | 1.99k | (void)dp; /* avoid warning if assertions off */ | 448 | 1.99k | assert(dp == rest); | 449 | 1.99k | } | 450 | 1.99k | sp = rest; | 451 | 1.99k | break; | 452 | 0 | case O_PLUS: | 453 | 0 | case O_QUEST: | 454 | 0 | case OOR1: | 455 | 0 | case OOR2: | 456 | 0 | case O_CH: | 457 | 0 | assert(nope); | 458 | 0 | break; | 459 | 921 | case OLPAREN: | 460 | 921 | i = OPND(m->g->strip[ss]); | 461 | 921 | assert(0 < i && i <= m->g->nsub); | 462 | 921 | m->pmatch[i].rm_so = sp - m->offp; | 463 | 921 | break; | 464 | 921 | case ORPAREN: | 465 | 921 | i = OPND(m->g->strip[ss]); | 466 | 921 | assert(0 < i && i <= m->g->nsub); | 467 | 921 | m->pmatch[i].rm_eo = sp - m->offp; | 468 | 921 | break; | 469 | 0 | default: /* uh oh */ | 470 | 0 | assert(nope); | 471 | 0 | break; | 472 | 14.5k | } | 473 | 14.5k | } | 474 | 4.16k | | 475 | 4.16k | assert(sp == stop); | 476 | 4.16k | return(sp); | 477 | 4.16k | } |
|
478 | | |
479 | | /* |
480 | | - backref - figure out what matched what, figuring in back references |
481 | | */ |
482 | | static const char * /* == stop (success) or NULL (failure) */ |
483 | | backref(struct match *m, const char *start, const char *stop, sopno startst, |
484 | | sopno stopst, sopno lev, int rec) /* PLUS nesting level */ |
485 | 90 | { |
486 | 90 | int i; |
487 | 90 | sopno ss; /* start sop of current subRE */ |
488 | 90 | const char *sp; /* start of string matched by it */ |
489 | 90 | sopno ssub; /* start sop of subsubRE */ |
490 | 90 | sopno esub; /* end sop of subsubRE */ |
491 | 90 | const char *ssp; /* start of string matched by subsubRE */ |
492 | 90 | const char *dp; |
493 | 90 | size_t len; |
494 | 90 | int hard; |
495 | 90 | sop s; |
496 | 90 | llvm_regoff_t offsave; |
497 | 90 | cset *cs; |
498 | 90 | |
499 | 90 | AT("back", start, stop, startst, stopst); |
500 | 90 | sp = start; |
501 | 90 | |
502 | 90 | /* get as far as we can with easy stuff */ |
503 | 90 | hard = 0; |
504 | 206 | for (ss = startst; !hard && ss < stopst132 ; ss++116 ) |
505 | 129 | switch (OP(s = m->g->strip[ss])) { |
506 | 129 | case 25 OCHAR25 : |
507 | 25 | if (sp == stop || *sp++ != (char)OPND(s)) |
508 | 25 | return(NULL)6 ; |
509 | 19 | break; |
510 | 19 | case 0 OANY0 : |
511 | 0 | if (sp == stop) |
512 | 0 | return(NULL); |
513 | 0 | sp++; |
514 | 0 | break; |
515 | 30 | case OANYOF: |
516 | 30 | cs = &m->g->sets[OPND(s)]; |
517 | 30 | if (sp == stop || !CHIN(cs, *sp++)) |
518 | 30 | return(NULL)7 ; |
519 | 23 | break; |
520 | 23 | case 0 OBOL0 : |
521 | 0 | if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || |
522 | 0 | (sp < m->endp && *(sp-1) == '\n' && |
523 | 0 | (m->g->cflags®_NEWLINE)) ) |
524 | 0 | { /* yes */ } |
525 | 0 | else |
526 | 0 | return(NULL); |
527 | 0 | break; |
528 | 0 | case OEOL: |
529 | 0 | if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || |
530 | 0 | (sp < m->endp && *sp == '\n' && |
531 | 0 | (m->g->cflags®_NEWLINE)) ) |
532 | 0 | { /* yes */ } |
533 | 0 | else |
534 | 0 | return(NULL); |
535 | 0 | break; |
536 | 0 | case OBOW: |
537 | 0 | if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || |
538 | 0 | (sp < m->endp && *(sp-1) == '\n' && |
539 | 0 | (m->g->cflags®_NEWLINE)) || |
540 | 0 | (sp > m->beginp && |
541 | 0 | !ISWORD(*(sp-1))) ) && |
542 | 0 | (sp < m->endp && ISWORD(*sp)) ) |
543 | 0 | { /* yes */ } |
544 | 0 | else |
545 | 0 | return(NULL); |
546 | 0 | break; |
547 | 0 | case OEOW: |
548 | 0 | if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || |
549 | 0 | (sp < m->endp && *sp == '\n' && |
550 | 0 | (m->g->cflags®_NEWLINE)) || |
551 | 0 | (sp < m->endp && !ISWORD(*sp)) ) && |
552 | 0 | (sp > m->beginp && ISWORD(*(sp-1))) ) |
553 | 0 | { /* yes */ } |
554 | 0 | else |
555 | 0 | return(NULL); |
556 | 0 | break; |
557 | 0 | case O_QUEST: |
558 | 0 | break; |
559 | 0 | case OOR1: /* matches null but needs to skip */ |
560 | 0 | ss++; |
561 | 0 | s = m->g->strip[ss]; |
562 | 0 | do { |
563 | 0 | assert(OP(s) == OOR2); |
564 | 0 | ss += OPND(s); |
565 | 0 | } while (OP(s = m->g->strip[ss]) != O_CH); |
566 | 0 | /* note that the ss++ gets us past the O_CH */ |
567 | 0 | break; |
568 | 74 | default: /* have to make a choice */ |
569 | 74 | hard = 1; |
570 | 74 | break; |
571 | 129 | } |
572 | 90 | if (77 !hard77 ) { /* that was it! */ |
573 | 3 | if (sp != stop) |
574 | 0 | return(NULL); |
575 | 3 | return(sp); |
576 | 3 | } |
577 | 74 | ss--; /* adjust for the for's final increment */ |
578 | 74 | |
579 | 74 | /* the hard stuff */ |
580 | 74 | AT("hard", sp, stop, ss, stopst); |
581 | 74 | s = m->g->strip[ss]; |
582 | 74 | switch (OP(s)) { |
583 | 74 | case 16 OBACK_16 : /* the vilest depths */ |
584 | 16 | i = OPND(s); |
585 | 16 | assert(0 < i && i <= m->g->nsub); |
586 | 16 | if (m->pmatch[i].rm_eo == -1) |
587 | 0 | return(NULL); |
588 | 16 | assert(m->pmatch[i].rm_so != -1); |
589 | 16 | len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; |
590 | 16 | if (len == 0 && rec++ > 0 MAX_RECURSION0 ) |
591 | 16 | return(NULL)0 ; |
592 | 16 | assert(stop - m->beginp >= len); |
593 | 16 | if (sp > stop - len) |
594 | 3 | return(NULL); /* not enough left to match */ |
595 | 13 | ssp = m->offp + m->pmatch[i].rm_so; |
596 | 13 | if (memcmp(sp, ssp, len) != 0) |
597 | 6 | return(NULL); |
598 | 23 | while (7 m->g->strip[ss] != SOP(O_BACK, i)) |
599 | 16 | ss++; |
600 | 7 | return(backref(m, sp+len, stop, ss+1, stopst, lev, rec)); |
601 | 7 | break0 ; |
602 | 7 | case 0 OQUEST_0 : /* to null or not */ |
603 | 0 | dp = backref(m, sp, stop, ss+1, stopst, lev, rec); |
604 | 0 | if (dp != NULL) |
605 | 0 | return(dp); /* not */ |
606 | 0 | return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec)); |
607 | 0 | break; |
608 | 7 | case OPLUS_: |
609 | 7 | assert(m->lastpos != NULL); |
610 | 7 | assert(lev+1 <= m->g->nplus); |
611 | 7 | m->lastpos[lev+1] = sp; |
612 | 7 | return(backref(m, sp, stop, ss+1, stopst, lev+1, rec)); |
613 | 0 | break; |
614 | 15 | case O_PLUS: |
615 | 15 | if (sp == m->lastpos[lev]) /* last pass matched null */ |
616 | 0 | return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); |
617 | 15 | /* try another pass */ |
618 | 15 | m->lastpos[lev] = sp; |
619 | 15 | dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec); |
620 | 15 | if (dp == NULL) |
621 | 15 | return(13 backref13 (m, sp, stop, ss+1, stopst, lev-1, rec)); |
622 | 2 | else |
623 | 2 | return(dp); |
624 | 0 | break; |
625 | 0 | case OCH_: /* find the right one, if any */ |
626 | 0 | ssub = ss + 1; |
627 | 0 | esub = ss + OPND(s) - 1; |
628 | 0 | assert(OP(m->g->strip[esub]) == OOR1); |
629 | 0 | for (;;) { /* find first matching branch */ |
630 | 0 | dp = backref(m, sp, stop, ssub, esub, lev, rec); |
631 | 0 | if (dp != NULL) |
632 | 0 | return(dp); |
633 | 0 | /* that one missed, try next one */ |
634 | 0 | if (OP(m->g->strip[esub]) == O_CH) |
635 | 0 | return(NULL); /* there is none */ |
636 | 0 | esub++; |
637 | 0 | assert(OP(m->g->strip[esub]) == OOR2); |
638 | 0 | ssub = esub + 1; |
639 | 0 | esub += OPND(m->g->strip[esub]); |
640 | 0 | if (OP(m->g->strip[esub]) == OOR2) |
641 | 0 | esub--; |
642 | 0 | else |
643 | 0 | assert(OP(m->g->strip[esub]) == O_CH); |
644 | 0 | } |
645 | 0 | break; |
646 | 15 | case OLPAREN: /* must undo assignment if rest fails */ |
647 | 15 | i = OPND(s); |
648 | 15 | assert(0 < i && i <= m->g->nsub); |
649 | 15 | offsave = m->pmatch[i].rm_so; |
650 | 15 | m->pmatch[i].rm_so = sp - m->offp; |
651 | 15 | dp = backref(m, sp, stop, ss+1, stopst, lev, rec); |
652 | 15 | if (dp != NULL) |
653 | 15 | return(dp)4 ; |
654 | 11 | m->pmatch[i].rm_so = offsave; |
655 | 11 | return(NULL); |
656 | 11 | break0 ; |
657 | 21 | case ORPAREN: /* must undo assignment if rest fails */ |
658 | 21 | i = OPND(s); |
659 | 21 | assert(0 < i && i <= m->g->nsub); |
660 | 21 | offsave = m->pmatch[i].rm_eo; |
661 | 21 | m->pmatch[i].rm_eo = sp - m->offp; |
662 | 21 | dp = backref(m, sp, stop, ss+1, stopst, lev, rec); |
663 | 21 | if (dp != NULL) |
664 | 21 | return(dp)4 ; |
665 | 17 | m->pmatch[i].rm_eo = offsave; |
666 | 17 | return(NULL); |
667 | 17 | break0 ; |
668 | 17 | default: /* uh oh */ |
669 | 0 | assert(nope); |
670 | 0 | break; |
671 | 0 | } |
672 | 0 | |
673 | 0 | /* "can't happen" */ |
674 | 0 | assert(nope); |
675 | 0 | /* NOTREACHED */ |
676 | 0 | return NULL; |
677 | 0 | } Line | Count | Source | 485 | 90 | { | 486 | 90 | int i; | 487 | 90 | sopno ss; /* start sop of current subRE */ | 488 | 90 | const char *sp; /* start of string matched by it */ | 489 | 90 | sopno ssub; /* start sop of subsubRE */ | 490 | 90 | sopno esub; /* end sop of subsubRE */ | 491 | 90 | const char *ssp; /* start of string matched by subsubRE */ | 492 | 90 | const char *dp; | 493 | 90 | size_t len; | 494 | 90 | int hard; | 495 | 90 | sop s; | 496 | 90 | llvm_regoff_t offsave; | 497 | 90 | cset *cs; | 498 | 90 | | 499 | 90 | AT("back", start, stop, startst, stopst); | 500 | 90 | sp = start; | 501 | 90 | | 502 | 90 | /* get as far as we can with easy stuff */ | 503 | 90 | hard = 0; | 504 | 206 | for (ss = startst; !hard && ss < stopst132 ; ss++116 ) | 505 | 129 | switch (OP(s = m->g->strip[ss])) { | 506 | 129 | case 25 OCHAR25 : | 507 | 25 | if (sp == stop || *sp++ != (char)OPND(s)) | 508 | 25 | return(NULL)6 ; | 509 | 19 | break; | 510 | 19 | case 0 OANY0 : | 511 | 0 | if (sp == stop) | 512 | 0 | return(NULL); | 513 | 0 | sp++; | 514 | 0 | break; | 515 | 30 | case OANYOF: | 516 | 30 | cs = &m->g->sets[OPND(s)]; | 517 | 30 | if (sp == stop || !CHIN(cs, *sp++)) | 518 | 30 | return(NULL)7 ; | 519 | 23 | break; | 520 | 23 | case 0 OBOL0 : | 521 | 0 | if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || | 522 | 0 | (sp < m->endp && *(sp-1) == '\n' && | 523 | 0 | (m->g->cflags®_NEWLINE)) ) | 524 | 0 | { /* yes */ } | 525 | 0 | else | 526 | 0 | return(NULL); | 527 | 0 | break; | 528 | 0 | case OEOL: | 529 | 0 | if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || | 530 | 0 | (sp < m->endp && *sp == '\n' && | 531 | 0 | (m->g->cflags®_NEWLINE)) ) | 532 | 0 | { /* yes */ } | 533 | 0 | else | 534 | 0 | return(NULL); | 535 | 0 | break; | 536 | 0 | case OBOW: | 537 | 0 | if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || | 538 | 0 | (sp < m->endp && *(sp-1) == '\n' && | 539 | 0 | (m->g->cflags®_NEWLINE)) || | 540 | 0 | (sp > m->beginp && | 541 | 0 | !ISWORD(*(sp-1))) ) && | 542 | 0 | (sp < m->endp && ISWORD(*sp)) ) | 543 | 0 | { /* yes */ } | 544 | 0 | else | 545 | 0 | return(NULL); | 546 | 0 | break; | 547 | 0 | case OEOW: | 548 | 0 | if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || | 549 | 0 | (sp < m->endp && *sp == '\n' && | 550 | 0 | (m->g->cflags®_NEWLINE)) || | 551 | 0 | (sp < m->endp && !ISWORD(*sp)) ) && | 552 | 0 | (sp > m->beginp && ISWORD(*(sp-1))) ) | 553 | 0 | { /* yes */ } | 554 | 0 | else | 555 | 0 | return(NULL); | 556 | 0 | break; | 557 | 0 | case O_QUEST: | 558 | 0 | break; | 559 | 0 | case OOR1: /* matches null but needs to skip */ | 560 | 0 | ss++; | 561 | 0 | s = m->g->strip[ss]; | 562 | 0 | do { | 563 | 0 | assert(OP(s) == OOR2); | 564 | 0 | ss += OPND(s); | 565 | 0 | } while (OP(s = m->g->strip[ss]) != O_CH); | 566 | 0 | /* note that the ss++ gets us past the O_CH */ | 567 | 0 | break; | 568 | 74 | default: /* have to make a choice */ | 569 | 74 | hard = 1; | 570 | 74 | break; | 571 | 129 | } | 572 | 90 | if (77 !hard77 ) { /* that was it! */ | 573 | 3 | if (sp != stop) | 574 | 0 | return(NULL); | 575 | 3 | return(sp); | 576 | 3 | } | 577 | 74 | ss--; /* adjust for the for's final increment */ | 578 | 74 | | 579 | 74 | /* the hard stuff */ | 580 | 74 | AT("hard", sp, stop, ss, stopst); | 581 | 74 | s = m->g->strip[ss]; | 582 | 74 | switch (OP(s)) { | 583 | 74 | case 16 OBACK_16 : /* the vilest depths */ | 584 | 16 | i = OPND(s); | 585 | 16 | assert(0 < i && i <= m->g->nsub); | 586 | 16 | if (m->pmatch[i].rm_eo == -1) | 587 | 0 | return(NULL); | 588 | 16 | assert(m->pmatch[i].rm_so != -1); | 589 | 16 | len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; | 590 | 16 | if (len == 0 && rec++ > 0 MAX_RECURSION0 ) | 591 | 16 | return(NULL)0 ; | 592 | 16 | assert(stop - m->beginp >= len); | 593 | 16 | if (sp > stop - len) | 594 | 3 | return(NULL); /* not enough left to match */ | 595 | 13 | ssp = m->offp + m->pmatch[i].rm_so; | 596 | 13 | if (memcmp(sp, ssp, len) != 0) | 597 | 6 | return(NULL); | 598 | 23 | while (7 m->g->strip[ss] != SOP(O_BACK, i)) | 599 | 16 | ss++; | 600 | 7 | return(backref(m, sp+len, stop, ss+1, stopst, lev, rec)); | 601 | 7 | break0 ; | 602 | 7 | case 0 OQUEST_0 : /* to null or not */ | 603 | 0 | dp = backref(m, sp, stop, ss+1, stopst, lev, rec); | 604 | 0 | if (dp != NULL) | 605 | 0 | return(dp); /* not */ | 606 | 0 | return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec)); | 607 | 0 | break; | 608 | 7 | case OPLUS_: | 609 | 7 | assert(m->lastpos != NULL); | 610 | 7 | assert(lev+1 <= m->g->nplus); | 611 | 7 | m->lastpos[lev+1] = sp; | 612 | 7 | return(backref(m, sp, stop, ss+1, stopst, lev+1, rec)); | 613 | 0 | break; | 614 | 15 | case O_PLUS: | 615 | 15 | if (sp == m->lastpos[lev]) /* last pass matched null */ | 616 | 0 | return(backref(m, sp, stop, ss+1, stopst, lev-1, rec)); | 617 | 15 | /* try another pass */ | 618 | 15 | m->lastpos[lev] = sp; | 619 | 15 | dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec); | 620 | 15 | if (dp == NULL) | 621 | 15 | return(13 backref13 (m, sp, stop, ss+1, stopst, lev-1, rec)); | 622 | 2 | else | 623 | 2 | return(dp); | 624 | 0 | break; | 625 | 0 | case OCH_: /* find the right one, if any */ | 626 | 0 | ssub = ss + 1; | 627 | 0 | esub = ss + OPND(s) - 1; | 628 | 0 | assert(OP(m->g->strip[esub]) == OOR1); | 629 | 0 | for (;;) { /* find first matching branch */ | 630 | 0 | dp = backref(m, sp, stop, ssub, esub, lev, rec); | 631 | 0 | if (dp != NULL) | 632 | 0 | return(dp); | 633 | 0 | /* that one missed, try next one */ | 634 | 0 | if (OP(m->g->strip[esub]) == O_CH) | 635 | 0 | return(NULL); /* there is none */ | 636 | 0 | esub++; | 637 | 0 | assert(OP(m->g->strip[esub]) == OOR2); | 638 | 0 | ssub = esub + 1; | 639 | 0 | esub += OPND(m->g->strip[esub]); | 640 | 0 | if (OP(m->g->strip[esub]) == OOR2) | 641 | 0 | esub--; | 642 | 0 | else | 643 | 0 | assert(OP(m->g->strip[esub]) == O_CH); | 644 | 0 | } | 645 | 0 | break; | 646 | 15 | case OLPAREN: /* must undo assignment if rest fails */ | 647 | 15 | i = OPND(s); | 648 | 15 | assert(0 < i && i <= m->g->nsub); | 649 | 15 | offsave = m->pmatch[i].rm_so; | 650 | 15 | m->pmatch[i].rm_so = sp - m->offp; | 651 | 15 | dp = backref(m, sp, stop, ss+1, stopst, lev, rec); | 652 | 15 | if (dp != NULL) | 653 | 15 | return(dp)4 ; | 654 | 11 | m->pmatch[i].rm_so = offsave; | 655 | 11 | return(NULL); | 656 | 11 | break0 ; | 657 | 21 | case ORPAREN: /* must undo assignment if rest fails */ | 658 | 21 | i = OPND(s); | 659 | 21 | assert(0 < i && i <= m->g->nsub); | 660 | 21 | offsave = m->pmatch[i].rm_eo; | 661 | 21 | m->pmatch[i].rm_eo = sp - m->offp; | 662 | 21 | dp = backref(m, sp, stop, ss+1, stopst, lev, rec); | 663 | 21 | if (dp != NULL) | 664 | 21 | return(dp)4 ; | 665 | 17 | m->pmatch[i].rm_eo = offsave; | 666 | 17 | return(NULL); | 667 | 17 | break0 ; | 668 | 17 | default: /* uh oh */ | 669 | 0 | assert(nope); | 670 | 0 | break; | 671 | 0 | } | 672 | 0 | | 673 | 0 | /* "can't happen" */ | 674 | 0 | assert(nope); | 675 | 0 | /* NOTREACHED */ | 676 | 0 | return NULL; | 677 | 0 | } |
Unexecuted instantiation: regexec.c:lbackref |
678 | | |
679 | | /* |
680 | | - fast - step through the string at top speed |
681 | | */ |
682 | | static const char * /* where tentative match ended, or NULL */ |
683 | | fast(struct match *m, const char *start, const char *stop, sopno startst, |
684 | | sopno stopst) |
685 | 2.29M | { |
686 | 2.29M | states st = m->st; |
687 | 2.29M | states fresh = m->fresh; |
688 | 2.29M | states tmp = m->tmp; |
689 | 2.29M | const char *p = start; |
690 | 2.29M | int c = (start == m->beginp) ? OUT2.29M : *(start-1)6 ; |
691 | 2.29M | int lastc; /* previous c */ |
692 | 2.29M | int flagch; |
693 | 2.29M | int i; |
694 | 2.29M | const char *coldp; /* last p after which no match was underway */ |
695 | 2.29M | |
696 | 2.29M | CLEAR(st); |
697 | 2.29M | SET1(st, startst); |
698 | 2.29M | st = step(m->g, startst, stopst, st, NOTHING, st); |
699 | 2.29M | ASSIGN(fresh, st); |
700 | 2.29M | SP("start", st, *p); |
701 | 2.29M | coldp = NULL; |
702 | 25.9M | for (;;) { |
703 | 25.9M | /* next character */ |
704 | 25.9M | lastc = c; |
705 | 25.9M | c = (p == m->endp) ? OUT2.26M : *p23.6M ; |
706 | 25.9M | if (EQ(st, fresh)) |
707 | 25.9M | coldp = p23.5M ; |
708 | 25.9M | |
709 | 25.9M | /* is there an EOL and/or BOL between lastc and c? */ |
710 | 25.9M | flagch = '\0'; |
711 | 25.9M | i = 0; |
712 | 25.9M | if ( (lastc == '\n' && m->g->cflags&738 REG_NEWLINE738 ) || |
713 | 25.9M | (25.9M lastc == 25.9M OUT25.9M && !(m->eflags&2.29M REG_NOTBOL2.29M )) ) { |
714 | 2.29M | flagch = BOL; |
715 | 2.29M | i = m->g->nbol; |
716 | 2.29M | } |
717 | 25.9M | if ( (c == '\n' && m->g->cflags&912 REG_NEWLINE912 ) || |
718 | 25.9M | (25.9M c == 25.9M OUT25.9M && !(m->eflags&2.26M REG_NOTEOL2.26M )) ) { |
719 | 2.26M | flagch = (flagch == BOL) ? BOLEOL3.81k : EOL2.26M ; |
720 | 2.26M | i += m->g->neol; |
721 | 2.26M | } |
722 | 25.9M | if (i != 0) { |
723 | 6.96M | for (; i > 0; i--3.48M ) |
724 | 3.48M | st = step(m->g, startst, stopst, st, flagch, st); |
725 | 3.47M | SP("boleol", st, c); |
726 | 3.47M | } |
727 | 25.9M | |
728 | 25.9M | /* how about a word boundary? */ |
729 | 25.9M | if ( (flagch == BOL || (23.6M lastc != 23.6M OUT23.6M && !23.6M ISWORD23.6M (lastc))) && |
730 | 25.9M | (2.41M c != 2.41M OUT2.41M && ISWORD2.41M (c)) ) { |
731 | 2.37M | flagch = BOW; |
732 | 2.37M | } |
733 | 25.9M | if ( (lastc != OUT && ISWORD23.6M (lastc)) && |
734 | 25.9M | (23.5M flagch == 23.5M EOL23.5M || (21.2M c != 21.2M OUT21.2M && !21.2M ISWORD21.2M (c))) ) { |
735 | 2.35M | flagch = EOW; |
736 | 2.35M | } |
737 | 25.9M | if (flagch == BOW || flagch == 23.5M EOW23.5M ) { |
738 | 4.73M | st = step(m->g, startst, stopst, st, flagch, st); |
739 | 4.73M | SP("boweow", st, c); |
740 | 4.73M | } |
741 | 25.9M | |
742 | 25.9M | /* are we done? */ |
743 | 25.9M | if (ISSET(st, stopst) || p == stop25.8M ) |
744 | 2.29M | break; /* NOTE BREAK OUT */ |
745 | 23.6M | |
746 | 23.6M | /* no, we must deal with this character */ |
747 | 23.6M | ASSIGN(tmp, st); |
748 | 23.6M | ASSIGN(st, fresh); |
749 | 23.6M | assert(c != OUT); |
750 | 23.6M | st = step(m->g, startst, stopst, tmp, c, st); |
751 | 23.6M | SP("aft", st, c); |
752 | 23.6M | assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); |
753 | 23.6M | p++; |
754 | 23.6M | } |
755 | 2.29M | |
756 | 2.29M | assert(coldp != NULL); |
757 | 2.29M | m->coldp = coldp; |
758 | 2.29M | if (ISSET(st, stopst)) |
759 | 2.29M | return(p+1)96.0k ; |
760 | 2.19M | else |
761 | 2.19M | return(NULL); |
762 | 2.29M | } Line | Count | Source | 685 | 1.98M | { | 686 | 1.98M | states st = m->st; | 687 | 1.98M | states fresh = m->fresh; | 688 | 1.98M | states tmp = m->tmp; | 689 | 1.98M | const char *p = start; | 690 | 1.98M | int c = (start == m->beginp) ? OUT1.98M : *(start-1)6 ; | 691 | 1.98M | int lastc; /* previous c */ | 692 | 1.98M | int flagch; | 693 | 1.98M | int i; | 694 | 1.98M | const char *coldp; /* last p after which no match was underway */ | 695 | 1.98M | | 696 | 1.98M | CLEAR(st); | 697 | 1.98M | SET1(st, startst); | 698 | 1.98M | st = step(m->g, startst, stopst, st, NOTHING, st); | 699 | 1.98M | ASSIGN(fresh, st); | 700 | 1.98M | SP("start", st, *p); | 701 | 1.98M | coldp = NULL; | 702 | 22.2M | for (;;) { | 703 | 22.2M | /* next character */ | 704 | 22.2M | lastc = c; | 705 | 22.2M | c = (p == m->endp) ? OUT1.96M : *p20.2M ; | 706 | 22.2M | if (EQ(st, fresh)) | 707 | 22.2M | coldp = p20.1M ; | 708 | 22.2M | | 709 | 22.2M | /* is there an EOL and/or BOL between lastc and c? */ | 710 | 22.2M | flagch = '\0'; | 711 | 22.2M | i = 0; | 712 | 22.2M | if ( (lastc == '\n' && m->g->cflags&718 REG_NEWLINE718 ) || | 713 | 22.2M | (22.2M lastc == 22.2M OUT22.2M && !(m->eflags&1.98M REG_NOTBOL1.98M )) ) { | 714 | 1.98M | flagch = BOL; | 715 | 1.98M | i = m->g->nbol; | 716 | 1.98M | } | 717 | 22.2M | if ( (c == '\n' && m->g->cflags&883 REG_NEWLINE883 ) || | 718 | 22.2M | (22.2M c == 22.2M OUT22.2M && !(m->eflags&1.96M REG_NOTEOL1.96M )) ) { | 719 | 1.96M | flagch = (flagch == BOL) ? BOLEOL3.80k : EOL1.96M ; | 720 | 1.96M | i += m->g->neol; | 721 | 1.96M | } | 722 | 22.2M | if (i != 0) { | 723 | 5.98M | for (; i > 0; i--2.99M ) | 724 | 2.99M | st = step(m->g, startst, stopst, st, flagch, st); | 725 | 2.98M | SP("boleol", st, c); | 726 | 2.98M | } | 727 | 22.2M | | 728 | 22.2M | /* how about a word boundary? */ | 729 | 22.2M | if ( (flagch == BOL || (20.2M lastc != 20.2M OUT20.2M && !20.2M ISWORD20.2M (lastc))) && | 730 | 22.2M | (2.09M c != 2.09M OUT2.09M && ISWORD2.09M (c)) ) { | 731 | 2.06M | flagch = BOW; | 732 | 2.06M | } | 733 | 22.2M | if ( (lastc != OUT && ISWORD20.2M (lastc)) && | 734 | 22.2M | (20.1M flagch == 20.1M EOL20.1M || (18.2M c != 18.2M OUT18.2M && !18.2M ISWORD18.2M (c))) ) { | 735 | 2.04M | flagch = EOW; | 736 | 2.04M | } | 737 | 22.2M | if (flagch == BOW || flagch == 20.1M EOW20.1M ) { | 738 | 4.10M | st = step(m->g, startst, stopst, st, flagch, st); | 739 | 4.10M | SP("boweow", st, c); | 740 | 4.10M | } | 741 | 22.2M | | 742 | 22.2M | /* are we done? */ | 743 | 22.2M | if (ISSET(st, stopst) || p == stop22.1M ) | 744 | 1.98M | break; /* NOTE BREAK OUT */ | 745 | 20.2M | | 746 | 20.2M | /* no, we must deal with this character */ | 747 | 20.2M | ASSIGN(tmp, st); | 748 | 20.2M | ASSIGN(st, fresh); | 749 | 20.2M | assert(c != OUT); | 750 | 20.2M | st = step(m->g, startst, stopst, tmp, c, st); | 751 | 20.2M | SP("aft", st, c); | 752 | 20.2M | assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); | 753 | 20.2M | p++; | 754 | 20.2M | } | 755 | 1.98M | | 756 | 1.98M | assert(coldp != NULL); | 757 | 1.98M | m->coldp = coldp; | 758 | 1.98M | if (ISSET(st, stopst)) | 759 | 1.98M | return(p+1)91.1k ; | 760 | 1.89M | else | 761 | 1.89M | return(NULL); | 762 | 1.98M | } |
Line | Count | Source | 685 | 302k | { | 686 | 302k | states st = m->st; | 687 | 302k | states fresh = m->fresh; | 688 | 302k | states tmp = m->tmp; | 689 | 302k | const char *p = start; | 690 | 302k | int c = (start == m->beginp) ? OUT : *(start-1)0 ; | 691 | 302k | int lastc; /* previous c */ | 692 | 302k | int flagch; | 693 | 302k | int i; | 694 | 302k | const char *coldp; /* last p after which no match was underway */ | 695 | 302k | | 696 | 302k | CLEAR(st); | 697 | 302k | SET1(st, startst); | 698 | 302k | st = step(m->g, startst, stopst, st, NOTHING, st); | 699 | 302k | ASSIGN(fresh, st); | 700 | 302k | SP("start", st, *p); | 701 | 302k | coldp = NULL; | 702 | 3.70M | for (;;) { | 703 | 3.70M | /* next character */ | 704 | 3.70M | lastc = c; | 705 | 3.70M | c = (p == m->endp) ? OUT301k : *p3.40M ; | 706 | 3.70M | if (EQ(st, fresh)) | 707 | 3.70M | coldp = p3.42M ; | 708 | 3.70M | | 709 | 3.70M | /* is there an EOL and/or BOL between lastc and c? */ | 710 | 3.70M | flagch = '\0'; | 711 | 3.70M | i = 0; | 712 | 3.70M | if ( (lastc == '\n' && m->g->cflags&20 REG_NEWLINE20 ) || | 713 | 3.70M | (3.70M lastc == 3.70M OUT3.70M && !(m->eflags&302k REG_NOTBOL302k )) ) { | 714 | 302k | flagch = BOL; | 715 | 302k | i = m->g->nbol; | 716 | 302k | } | 717 | 3.70M | if ( (c == '\n' && m->g->cflags&29 REG_NEWLINE29 ) || | 718 | 3.70M | (3.70M c == 3.70M OUT3.70M && !(m->eflags&301k REG_NOTEOL301k )) ) { | 719 | 301k | flagch = (flagch == BOL) ? BOLEOL9 : EOL301k ; | 720 | 301k | i += m->g->neol; | 721 | 301k | } | 722 | 3.70M | if (i != 0) { | 723 | 979k | for (; i > 0; i--489k ) | 724 | 489k | st = step(m->g, startst, stopst, st, flagch, st); | 725 | 489k | SP("boleol", st, c); | 726 | 489k | } | 727 | 3.70M | | 728 | 3.70M | /* how about a word boundary? */ | 729 | 3.70M | if ( (flagch == BOL || (3.40M lastc != 3.40M OUT3.40M && !3.40M ISWORD3.40M (lastc))) && | 730 | 3.70M | (320k c != 320k OUT320k && ISWORD319k (c)) ) { | 731 | 313k | flagch = BOW; | 732 | 313k | } | 733 | 3.70M | if ( (lastc != OUT && ISWORD3.40M (lastc)) && | 734 | 3.70M | (3.38M flagch == 3.38M EOL3.38M || (3.08M c != 3.08M OUT3.08M && !3.08M ISWORD3.08M (c))) ) { | 735 | 312k | flagch = EOW; | 736 | 312k | } | 737 | 3.70M | if (flagch == BOW || flagch == 3.39M EOW3.39M ) { | 738 | 626k | st = step(m->g, startst, stopst, st, flagch, st); | 739 | 626k | SP("boweow", st, c); | 740 | 626k | } | 741 | 3.70M | | 742 | 3.70M | /* are we done? */ | 743 | 3.70M | if (ISSET(st, stopst) || p == stop3.70M ) | 744 | 302k | break; /* NOTE BREAK OUT */ | 745 | 3.40M | | 746 | 3.40M | /* no, we must deal with this character */ | 747 | 3.40M | ASSIGN(tmp, st); | 748 | 3.40M | ASSIGN(st, fresh); | 749 | 3.40M | assert(c != OUT); | 750 | 3.40M | st = step(m->g, startst, stopst, tmp, c, st); | 751 | 3.40M | SP("aft", st, c); | 752 | 3.40M | assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); | 753 | 3.40M | p++; | 754 | 3.40M | } | 755 | 302k | | 756 | 302k | assert(coldp != NULL); | 757 | 302k | m->coldp = coldp; | 758 | 302k | if (ISSET(st, stopst)) | 759 | 302k | return(p+1)4.83k ; | 760 | 297k | else | 761 | 297k | return(NULL); | 762 | 302k | } |
|
763 | | |
764 | | /* |
765 | | - slow - step through the string more deliberately |
766 | | */ |
767 | | static const char * /* where it ended */ |
768 | | slow(struct match *m, const char *start, const char *stop, sopno startst, |
769 | | sopno stopst) |
770 | 81.7k | { |
771 | 81.7k | states st = m->st; |
772 | 81.7k | states empty = m->empty; |
773 | 81.7k | states tmp = m->tmp; |
774 | 81.7k | const char *p = start; |
775 | 81.7k | int c = (start == m->beginp) ? OUT7.49k : *(start-1)74.2k ; |
776 | 81.7k | int lastc; /* previous c */ |
777 | 81.7k | int flagch; |
778 | 81.7k | int i; |
779 | 81.7k | const char *matchp; /* last p at which a match ended */ |
780 | 81.7k | |
781 | 81.7k | AT("slow", start, stop, startst, stopst); |
782 | 81.7k | CLEAR(st); |
783 | 81.7k | SET1(st, startst); |
784 | 81.7k | SP("sstart", st, *p); |
785 | 81.7k | st = step(m->g, startst, stopst, st, NOTHING, st); |
786 | 81.7k | matchp = NULL; |
787 | 461k | for (;;) { |
788 | 461k | /* next character */ |
789 | 461k | lastc = c; |
790 | 461k | c = (p == m->endp) ? OUT19.9k : *p441k ; |
791 | 461k | |
792 | 461k | /* is there an EOL and/or BOL between lastc and c? */ |
793 | 461k | flagch = '\0'; |
794 | 461k | i = 0; |
795 | 461k | if ( (lastc == '\n' && m->g->cflags&186 REG_NEWLINE186 ) || |
796 | 461k | (461k lastc == 461k OUT461k && !(m->eflags&7.49k REG_NOTBOL7.49k )) ) { |
797 | 7.67k | flagch = BOL; |
798 | 7.67k | i = m->g->nbol; |
799 | 7.67k | } |
800 | 461k | if ( (c == '\n' && m->g->cflags&365 REG_NEWLINE365 ) || |
801 | 461k | (461k c == 461k OUT461k && !(m->eflags&19.9k REG_NOTEOL19.9k )) ) { |
802 | 20.2k | flagch = (flagch == BOL) ? BOLEOL14 : EOL20.2k ; |
803 | 20.2k | i += m->g->neol; |
804 | 20.2k | } |
805 | 461k | if (i != 0) { |
806 | 32.2k | for (; i > 0; i--16.1k ) |
807 | 16.1k | st = step(m->g, startst, stopst, st, flagch, st); |
808 | 16.1k | SP("sboleol", st, c); |
809 | 16.1k | } |
810 | 461k | |
811 | 461k | /* how about a word boundary? */ |
812 | 461k | if ( (flagch == BOL || (454k lastc != 454k OUT454k && !454k ISWORD454k (lastc))) && |
813 | 461k | (125k c != 125k OUT125k && ISWORD113k (c)) ) { |
814 | 79.4k | flagch = BOW; |
815 | 79.4k | } |
816 | 461k | if ( (lastc != OUT && ISWORD454k (lastc)) && |
817 | 461k | (336k flagch == 336k EOL336k || (328k c != 328k OUT328k && !328k ISWORD328k (c))) ) { |
818 | 85.1k | flagch = EOW; |
819 | 85.1k | } |
820 | 461k | if (flagch == BOW || flagch == 382k EOW382k ) { |
821 | 164k | st = step(m->g, startst, stopst, st, flagch, st); |
822 | 164k | SP("sboweow", st, c); |
823 | 164k | } |
824 | 461k | |
825 | 461k | /* are we done? */ |
826 | 461k | if (ISSET(st, stopst)) |
827 | 461k | matchp = p139k ; |
828 | 461k | if (EQ(st, empty) || p == stop427k ) |
829 | 81.7k | break; /* NOTE BREAK OUT */ |
830 | 380k | |
831 | 380k | /* no, we must deal with this character */ |
832 | 380k | ASSIGN(tmp, st); |
833 | 380k | ASSIGN(st, empty); |
834 | 380k | assert(c != OUT); |
835 | 380k | st = step(m->g, startst, stopst, tmp, c, st); |
836 | 380k | SP("saft", st, c); |
837 | 380k | assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); |
838 | 380k | p++; |
839 | 380k | } |
840 | 81.7k | |
841 | 81.7k | return(matchp); |
842 | 81.7k | } Line | Count | Source | 770 | 62.1k | { | 771 | 62.1k | states st = m->st; | 772 | 62.1k | states empty = m->empty; | 773 | 62.1k | states tmp = m->tmp; | 774 | 62.1k | const char *p = start; | 775 | 62.1k | int c = (start == m->beginp) ? OUT6.87k : *(start-1)55.2k ; | 776 | 62.1k | int lastc; /* previous c */ | 777 | 62.1k | int flagch; | 778 | 62.1k | int i; | 779 | 62.1k | const char *matchp; /* last p at which a match ended */ | 780 | 62.1k | | 781 | 62.1k | AT("slow", start, stop, startst, stopst); | 782 | 62.1k | CLEAR(st); | 783 | 62.1k | SET1(st, startst); | 784 | 62.1k | SP("sstart", st, *p); | 785 | 62.1k | st = step(m->g, startst, stopst, st, NOTHING, st); | 786 | 62.1k | matchp = NULL; | 787 | 388k | for (;;) { | 788 | 388k | /* next character */ | 789 | 388k | lastc = c; | 790 | 388k | c = (p == m->endp) ? OUT11.3k : *p377k ; | 791 | 388k | | 792 | 388k | /* is there an EOL and/or BOL between lastc and c? */ | 793 | 388k | flagch = '\0'; | 794 | 388k | i = 0; | 795 | 388k | if ( (lastc == '\n' && m->g->cflags&177 REG_NEWLINE177 ) || | 796 | 388k | (388k lastc == 388k OUT388k && !(m->eflags&6.87k REG_NOTBOL6.87k )) ) { | 797 | 7.03k | flagch = BOL; | 798 | 7.03k | i = m->g->nbol; | 799 | 7.03k | } | 800 | 388k | if ( (c == '\n' && m->g->cflags&346 REG_NEWLINE346 ) || | 801 | 388k | (388k c == 388k OUT388k && !(m->eflags&11.3k REG_NOTEOL11.3k )) ) { | 802 | 11.7k | flagch = (flagch == BOL) ? BOLEOL13 : EOL11.6k ; | 803 | 11.7k | i += m->g->neol; | 804 | 11.7k | } | 805 | 388k | if (i != 0) { | 806 | 13.8k | for (; i > 0; i--6.92k ) | 807 | 6.92k | st = step(m->g, startst, stopst, st, flagch, st); | 808 | 6.92k | SP("sboleol", st, c); | 809 | 6.92k | } | 810 | 388k | | 811 | 388k | /* how about a word boundary? */ | 812 | 388k | if ( (flagch == BOL || (381k lastc != 381k OUT381k && !381k ISWORD381k (lastc))) && | 813 | 388k | (103k c != 103k OUT103k && ISWORD92.3k (c)) ) { | 814 | 64.1k | flagch = BOW; | 815 | 64.1k | } | 816 | 388k | if ( (lastc != OUT && ISWORD381k (lastc)) && | 817 | 388k | (285k flagch == 285k EOL285k || (284k c != 284k OUT284k && !284k ISWORD284k (c))) ) { | 818 | 69.9k | flagch = EOW; | 819 | 69.9k | } | 820 | 388k | if (flagch == BOW || flagch == 324k EOW324k ) { | 821 | 134k | st = step(m->g, startst, stopst, st, flagch, st); | 822 | 134k | SP("sboweow", st, c); | 823 | 134k | } | 824 | 388k | | 825 | 388k | /* are we done? */ | 826 | 388k | if (ISSET(st, stopst)) | 827 | 388k | matchp = p116k ; | 828 | 388k | if (EQ(st, empty) || p == stop358k ) | 829 | 62.1k | break; /* NOTE BREAK OUT */ | 830 | 326k | | 831 | 326k | /* no, we must deal with this character */ | 832 | 326k | ASSIGN(tmp, st); | 833 | 326k | ASSIGN(st, empty); | 834 | 326k | assert(c != OUT); | 835 | 326k | st = step(m->g, startst, stopst, tmp, c, st); | 836 | 326k | SP("saft", st, c); | 837 | 326k | assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); | 838 | 326k | p++; | 839 | 326k | } | 840 | 62.1k | | 841 | 62.1k | return(matchp); | 842 | 62.1k | } |
Line | Count | Source | 770 | 19.6k | { | 771 | 19.6k | states st = m->st; | 772 | 19.6k | states empty = m->empty; | 773 | 19.6k | states tmp = m->tmp; | 774 | 19.6k | const char *p = start; | 775 | 19.6k | int c = (start == m->beginp) ? OUT627 : *(start-1)18.9k ; | 776 | 19.6k | int lastc; /* previous c */ | 777 | 19.6k | int flagch; | 778 | 19.6k | int i; | 779 | 19.6k | const char *matchp; /* last p at which a match ended */ | 780 | 19.6k | | 781 | 19.6k | AT("slow", start, stop, startst, stopst); | 782 | 19.6k | CLEAR(st); | 783 | 19.6k | SET1(st, startst); | 784 | 19.6k | SP("sstart", st, *p); | 785 | 19.6k | st = step(m->g, startst, stopst, st, NOTHING, st); | 786 | 19.6k | matchp = NULL; | 787 | 73.4k | for (;;) { | 788 | 73.4k | /* next character */ | 789 | 73.4k | lastc = c; | 790 | 73.4k | c = (p == m->endp) ? OUT8.55k : *p64.8k ; | 791 | 73.4k | | 792 | 73.4k | /* is there an EOL and/or BOL between lastc and c? */ | 793 | 73.4k | flagch = '\0'; | 794 | 73.4k | i = 0; | 795 | 73.4k | if ( (lastc == '\n' && m->g->cflags&9 REG_NEWLINE9 ) || | 796 | 73.4k | (73.4k lastc == 73.4k OUT73.4k && !(m->eflags&627 REG_NOTBOL627 )) ) { | 797 | 636 | flagch = BOL; | 798 | 636 | i = m->g->nbol; | 799 | 636 | } | 800 | 73.4k | if ( (c == '\n' && m->g->cflags&19 REG_NEWLINE19 ) || | 801 | 73.4k | (73.3k c == 73.3k OUT73.3k && !(m->eflags&8.55k REG_NOTEOL8.55k )) ) { | 802 | 8.57k | flagch = (flagch == BOL) ? BOLEOL1 : EOL8.57k ; | 803 | 8.57k | i += m->g->neol; | 804 | 8.57k | } | 805 | 73.4k | if (i != 0) { | 806 | 18.3k | for (; i > 0; i--9.18k ) | 807 | 9.18k | st = step(m->g, startst, stopst, st, flagch, st); | 808 | 9.18k | SP("sboleol", st, c); | 809 | 9.18k | } | 810 | 73.4k | | 811 | 73.4k | /* how about a word boundary? */ | 812 | 73.4k | if ( (flagch == BOL || (72.7k lastc != 72.7k OUT72.7k && !72.7k ISWORD72.7k (lastc))) && | 813 | 73.4k | (22.3k c != 22.3k OUT22.3k && ISWORD21.2k (c)) ) { | 814 | 15.2k | flagch = BOW; | 815 | 15.2k | } | 816 | 73.4k | if ( (lastc != OUT && ISWORD72.7k (lastc)) && | 817 | 73.4k | (51.0k flagch == 51.0k EOL51.0k || (43.5k c != 43.5k OUT43.5k && !43.5k ISWORD43.5k (c))) ) { | 818 | 15.2k | flagch = EOW; | 819 | 15.2k | } | 820 | 73.4k | if (flagch == BOW || flagch == 58.1k EOW58.1k ) { | 821 | 30.5k | st = step(m->g, startst, stopst, st, flagch, st); | 822 | 30.5k | SP("sboweow", st, c); | 823 | 30.5k | } | 824 | 73.4k | | 825 | 73.4k | /* are we done? */ | 826 | 73.4k | if (ISSET(st, stopst)) | 827 | 73.4k | matchp = p23.7k ; | 828 | 73.4k | if (EQ(st, empty) || p == stop69.1k ) | 829 | 19.6k | break; /* NOTE BREAK OUT */ | 830 | 53.7k | | 831 | 53.7k | /* no, we must deal with this character */ | 832 | 53.7k | ASSIGN(tmp, st); | 833 | 53.7k | ASSIGN(st, empty); | 834 | 53.7k | assert(c != OUT); | 835 | 53.7k | st = step(m->g, startst, stopst, tmp, c, st); | 836 | 53.7k | SP("saft", st, c); | 837 | 53.7k | assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); | 838 | 53.7k | p++; | 839 | 53.7k | } | 840 | 19.6k | | 841 | 19.6k | return(matchp); | 842 | 19.6k | } |
|
843 | | |
844 | | |
845 | | /* |
846 | | - step - map set of states reachable before char to set reachable after |
847 | | */ |
848 | | static states |
849 | | step(struct re_guts *g, |
850 | | sopno start, /* start state within strip */ |
851 | | sopno stop, /* state after stop state within strip */ |
852 | | states bef, /* states reachable before */ |
853 | | int ch, /* character or NONCHAR code */ |
854 | | states aft) /* states already known reachable after */ |
855 | 34.8M | { |
856 | 34.8M | cset *cs; |
857 | 34.8M | sop s; |
858 | 34.8M | sopno pc; |
859 | 34.8M | onestate here; /* note, macros know this name */ |
860 | 34.8M | sopno look; |
861 | 34.8M | int i; |
862 | 34.8M | |
863 | 1.43G | for (pc = start, INIT34.8M (here, pc); pc != stop; pc++, 1.40G INC1.40G (here)) { |
864 | 1.40G | s = g->strip[pc]; |
865 | 1.40G | switch (OP(s)) { |
866 | 1.40G | case 0 OEND0 : |
867 | 0 | assert(pc == stop-1); |
868 | 0 | break; |
869 | 1.40G | case 705M OCHAR705M : |
870 | 705M | /* only characters can match */ |
871 | 705M | assert(!NONCHAR(ch) || ch != (char)OPND(s)); |
872 | 705M | if (ch == (char)OPND(s)) |
873 | 705M | FWD17.5M (aft, bef, 1); |
874 | 705M | break; |
875 | 1.40G | case 33.8M OBOL33.8M : |
876 | 33.8M | if (ch == BOL || ch == 31.5M BOLEOL31.5M ) |
877 | 33.8M | FWD2.28M (aft, bef, 1); |
878 | 33.8M | break; |
879 | 1.40G | case 17.0M OEOL17.0M : |
880 | 17.0M | if (ch == EOL || ch == 15.7M BOLEOL15.7M ) |
881 | 17.0M | FWD1.20M (aft, bef, 1); |
882 | 17.0M | break; |
883 | 1.40G | case 0 OBOW0 : |
884 | 0 | if (ch == BOW) |
885 | 0 | FWD(aft, bef, 1); |
886 | 0 | break; |
887 | 1.40G | case 0 OEOW0 : |
888 | 0 | if (ch == EOW) |
889 | 0 | FWD(aft, bef, 1); |
890 | 0 | break; |
891 | 1.40G | case 4.27M OANY4.27M : |
892 | 4.27M | if (!NONCHAR(ch)) |
893 | 4.27M | FWD3.04M (aft, bef, 1); |
894 | 4.27M | break; |
895 | 1.40G | case 12.6M OANYOF12.6M : |
896 | 12.6M | cs = &g->sets[OPND(s)]; |
897 | 12.6M | if (!NONCHAR(ch) && CHIN9.00M (cs, ch)) |
898 | 12.6M | FWD1.91M (aft, bef, 1); |
899 | 12.6M | break; |
900 | 1.40G | case 706 OBACK_706 : /* ignored here */ |
901 | 706 | case O_BACK: |
902 | 706 | FWD(aft, aft, 1); |
903 | 706 | break; |
904 | 5.32M | case OPLUS_: /* forward, this is just an empty */ |
905 | 5.32M | FWD(aft, aft, 1); |
906 | 5.32M | break; |
907 | 5.32M | case O_PLUS: /* both forward and back */ |
908 | 5.32M | FWD(aft, aft, 1); |
909 | 5.32M | i = ISSETBACK(aft, OPND(s)); |
910 | 5.32M | BACK(aft, aft, OPND(s)); |
911 | 5.32M | if (!i && ISSETBACK4.46M (aft, OPND(s))) { |
912 | 362k | /* oho, must reconsider loop body */ |
913 | 362k | pc -= OPND(s) + 1; |
914 | 362k | INIT(here, pc); |
915 | 362k | } |
916 | 5.32M | break; |
917 | 3.84M | case OQUEST_: /* two branches, both forward */ |
918 | 3.84M | FWD(aft, aft, 1); |
919 | 3.84M | FWD(aft, aft, OPND(s)); |
920 | 3.84M | break; |
921 | 3.84M | case O_QUEST: /* just an empty */ |
922 | 3.84M | FWD(aft, aft, 1); |
923 | 3.84M | break; |
924 | 183M | case OLPAREN: /* not significant here */ |
925 | 183M | case ORPAREN: |
926 | 183M | FWD(aft, aft, 1); |
927 | 183M | break; |
928 | 183M | case 80.5M OCH_80.5M : /* mark the first two branches */ |
929 | 80.5M | FWD(aft, aft, 1); |
930 | 80.5M | assert(OP(g->strip[pc+OPND(s)]) == OOR2); |
931 | 80.5M | FWD(aft, aft, OPND(s)); |
932 | 80.5M | break; |
933 | 183M | case 133M OOR1133M : /* done a branch, find the O_CH */ |
934 | 133M | if (ISSTATEIN(aft, here)) { |
935 | 358k | for (look = 1; |
936 | 815k | OP(s = g->strip[pc+look]) != O_CH; |
937 | 457k | look += OPND(s)) |
938 | 358k | assert(OP(s) == OOR2); |
939 | 358k | FWD(aft, aft, look); |
940 | 358k | } |
941 | 133M | break; |
942 | 183M | case 133M OOR2133M : /* propagate OCH_'s marking */ |
943 | 133M | FWD(aft, aft, 1); |
944 | 133M | if (OP(g->strip[pc+OPND(s)]) != O_CH) { |
945 | 53.3M | assert(OP(g->strip[pc+OPND(s)]) == OOR2); |
946 | 53.3M | FWD(aft, aft, OPND(s)); |
947 | 53.3M | } |
948 | 133M | break; |
949 | 183M | case 80.5M O_CH80.5M : /* just empty */ |
950 | 80.5M | FWD(aft, aft, 1); |
951 | 80.5M | break; |
952 | 183M | default: /* ooooops... */ |
953 | 0 | assert(nope); |
954 | 0 | break; |
955 | 1.40G | } |
956 | 1.40G | } |
957 | 34.8M | |
958 | 34.8M | return(aft); |
959 | 34.8M | } Line | Count | Source | 855 | 29.8M | { | 856 | 29.8M | cset *cs; | 857 | 29.8M | sop s; | 858 | 29.8M | sopno pc; | 859 | 29.8M | onestate here; /* note, macros know this name */ | 860 | 29.8M | sopno look; | 861 | 29.8M | int i; | 862 | 29.8M | | 863 | 1.03G | for (pc = start, INIT29.8M (here, pc); pc != stop; pc++, 1.00G INC1.00G (here)) { | 864 | 1.00G | s = g->strip[pc]; | 865 | 1.00G | switch (OP(s)) { | 866 | 1.00G | case 0 OEND0 : | 867 | 0 | assert(pc == stop-1); | 868 | 0 | break; | 869 | 1.00G | case 491M OCHAR491M : | 870 | 491M | /* only characters can match */ | 871 | 491M | assert(!NONCHAR(ch) || ch != (char)OPND(s)); | 872 | 491M | if (ch == (char)OPND(s)) | 873 | 491M | FWD12.9M (aft, bef, 1); | 874 | 491M | break; | 875 | 1.00G | case 29.0M OBOL29.0M : | 876 | 29.0M | if (ch == BOL || ch == 27.1M BOLEOL27.1M ) | 877 | 29.0M | FWD1.98M (aft, bef, 1); | 878 | 29.0M | break; | 879 | 1.00G | case 13.9M OEOL13.9M : | 880 | 13.9M | if (ch == EOL || ch == 12.9M BOLEOL12.9M ) | 881 | 13.9M | FWD1.01M (aft, bef, 1); | 882 | 13.9M | break; | 883 | 1.00G | case 0 OBOW0 : | 884 | 0 | if (ch == BOW) | 885 | 0 | FWD(aft, bef, 1); | 886 | 0 | break; | 887 | 1.00G | case 0 OEOW0 : | 888 | 0 | if (ch == EOW) | 889 | 0 | FWD(aft, bef, 1); | 890 | 0 | break; | 891 | 1.00G | case 3.49M OANY3.49M : | 892 | 3.49M | if (!NONCHAR(ch)) | 893 | 3.49M | FWD2.49M (aft, bef, 1); | 894 | 3.49M | break; | 895 | 1.00G | case 10.3M OANYOF10.3M : | 896 | 10.3M | cs = &g->sets[OPND(s)]; | 897 | 10.3M | if (!NONCHAR(ch) && CHIN7.36M (cs, ch)) | 898 | 10.3M | FWD1.74M (aft, bef, 1); | 899 | 10.3M | break; | 900 | 1.00G | case 706 OBACK_706 : /* ignored here */ | 901 | 706 | case O_BACK: | 902 | 706 | FWD(aft, aft, 1); | 903 | 706 | break; | 904 | 4.19M | case OPLUS_: /* forward, this is just an empty */ | 905 | 4.19M | FWD(aft, aft, 1); | 906 | 4.19M | break; | 907 | 4.19M | case O_PLUS: /* both forward and back */ | 908 | 4.19M | FWD(aft, aft, 1); | 909 | 4.19M | i = ISSETBACK(aft, OPND(s)); | 910 | 4.19M | BACK(aft, aft, OPND(s)); | 911 | 4.19M | if (!i && ISSETBACK3.45M (aft, OPND(s))) { | 912 | 321k | /* oho, must reconsider loop body */ | 913 | 321k | pc -= OPND(s) + 1; | 914 | 321k | INIT(here, pc); | 915 | 321k | } | 916 | 4.19M | break; | 917 | 2.89M | case OQUEST_: /* two branches, both forward */ | 918 | 2.89M | FWD(aft, aft, 1); | 919 | 2.89M | FWD(aft, aft, OPND(s)); | 920 | 2.89M | break; | 921 | 2.89M | case O_QUEST: /* just an empty */ | 922 | 2.89M | FWD(aft, aft, 1); | 923 | 2.89M | break; | 924 | 143M | case OLPAREN: /* not significant here */ | 925 | 143M | case ORPAREN: | 926 | 143M | FWD(aft, aft, 1); | 927 | 143M | break; | 928 | 143M | case 58.3M OCH_58.3M : /* mark the first two branches */ | 929 | 58.3M | FWD(aft, aft, 1); | 930 | 58.3M | assert(OP(g->strip[pc+OPND(s)]) == OOR2); | 931 | 58.3M | FWD(aft, aft, OPND(s)); | 932 | 58.3M | break; | 933 | 143M | case 91.0M OOR191.0M : /* done a branch, find the O_CH */ | 934 | 91.0M | if (ISSTATEIN(aft, here)) { | 935 | 277k | for (look = 1; | 936 | 633k | OP(s = g->strip[pc+look]) != O_CH; | 937 | 355k | look += OPND(s)) | 938 | 277k | assert(OP(s) == OOR2); | 939 | 277k | FWD(aft, aft, look); | 940 | 277k | } | 941 | 91.0M | break; | 942 | 143M | case 91.0M OOR291.0M : /* propagate OCH_'s marking */ | 943 | 91.0M | FWD(aft, aft, 1); | 944 | 91.0M | if (OP(g->strip[pc+OPND(s)]) != O_CH) { | 945 | 32.7M | assert(OP(g->strip[pc+OPND(s)]) == OOR2); | 946 | 32.7M | FWD(aft, aft, OPND(s)); | 947 | 32.7M | } | 948 | 91.0M | break; | 949 | 143M | case 58.3M O_CH58.3M : /* just empty */ | 950 | 58.3M | FWD(aft, aft, 1); | 951 | 58.3M | break; | 952 | 143M | default: /* ooooops... */ | 953 | 0 | assert(nope); | 954 | 0 | break; | 955 | 1.00G | } | 956 | 1.00G | } | 957 | 29.8M | | 958 | 29.8M | return(aft); | 959 | 29.8M | } |
Line | Count | Source | 855 | 4.93M | { | 856 | 4.93M | cset *cs; | 857 | 4.93M | sop s; | 858 | 4.93M | sopno pc; | 859 | 4.93M | onestate here; /* note, macros know this name */ | 860 | 4.93M | sopno look; | 861 | 4.93M | int i; | 862 | 4.93M | | 863 | 403M | for (pc = start, INIT4.93M (here, pc); pc != stop; pc++, 398M INC398M (here)) { | 864 | 398M | s = g->strip[pc]; | 865 | 398M | switch (OP(s)) { | 866 | 398M | case 0 OEND0 : | 867 | 0 | assert(pc == stop-1); | 868 | 0 | break; | 869 | 398M | case 213M OCHAR213M : | 870 | 213M | /* only characters can match */ | 871 | 213M | assert(!NONCHAR(ch) || ch != (char)OPND(s)); | 872 | 213M | if (ch == (char)OPND(s)) | 873 | 213M | FWD4.61M (aft, bef, 1); | 874 | 213M | break; | 875 | 398M | case 4.73M OBOL4.73M : | 876 | 4.73M | if (ch == BOL || ch == 4.43M BOLEOL4.43M ) | 877 | 4.73M | FWD301k (aft, bef, 1); | 878 | 4.73M | break; | 879 | 398M | case 3.08M OEOL3.08M : | 880 | 3.08M | if (ch == EOL || ch == 2.89M BOLEOL2.89M ) | 881 | 3.08M | FWD192k (aft, bef, 1); | 882 | 3.08M | break; | 883 | 398M | case 0 OBOW0 : | 884 | 0 | if (ch == BOW) | 885 | 0 | FWD(aft, bef, 1); | 886 | 0 | break; | 887 | 398M | case 0 OEOW0 : | 888 | 0 | if (ch == EOW) | 889 | 0 | FWD(aft, bef, 1); | 890 | 0 | break; | 891 | 398M | case 780k OANY780k : | 892 | 780k | if (!NONCHAR(ch)) | 893 | 780k | FWD558k (aft, bef, 1); | 894 | 780k | break; | 895 | 398M | case 2.32M OANYOF2.32M : | 896 | 2.32M | cs = &g->sets[OPND(s)]; | 897 | 2.32M | if (!NONCHAR(ch) && CHIN1.63M (cs, ch)) | 898 | 2.32M | FWD166k (aft, bef, 1); | 899 | 2.32M | break; | 900 | 398M | case 0 OBACK_0 : /* ignored here */ | 901 | 0 | case O_BACK: | 902 | 0 | FWD(aft, aft, 1); | 903 | 0 | break; | 904 | 1.12M | case OPLUS_: /* forward, this is just an empty */ | 905 | 1.12M | FWD(aft, aft, 1); | 906 | 1.12M | break; | 907 | 1.12M | case O_PLUS: /* both forward and back */ | 908 | 1.12M | FWD(aft, aft, 1); | 909 | 1.12M | i = ISSETBACK(aft, OPND(s)); | 910 | 1.12M | BACK(aft, aft, OPND(s)); | 911 | 1.12M | if (!i && ISSETBACK1.01M (aft, OPND(s))) { | 912 | 41.4k | /* oho, must reconsider loop body */ | 913 | 41.4k | pc -= OPND(s) + 1; | 914 | 41.4k | INIT(here, pc); | 915 | 41.4k | } | 916 | 1.12M | break; | 917 | 948k | case OQUEST_: /* two branches, both forward */ | 918 | 948k | FWD(aft, aft, 1); | 919 | 948k | FWD(aft, aft, OPND(s)); | 920 | 948k | break; | 921 | 948k | case O_QUEST: /* just an empty */ | 922 | 948k | FWD(aft, aft, 1); | 923 | 948k | break; | 924 | 40.0M | case OLPAREN: /* not significant here */ | 925 | 40.0M | case ORPAREN: | 926 | 40.0M | FWD(aft, aft, 1); | 927 | 40.0M | break; | 928 | 40.0M | case 22.2M OCH_22.2M : /* mark the first two branches */ | 929 | 22.2M | FWD(aft, aft, 1); | 930 | 22.2M | assert(OP(g->strip[pc+OPND(s)]) == OOR2); | 931 | 22.2M | FWD(aft, aft, OPND(s)); | 932 | 22.2M | break; | 933 | 42.8M | case OOR1: /* done a branch, find the O_CH */ | 934 | 42.8M | if (ISSTATEIN(aft, here)) { | 935 | 80.9k | for (look = 1; | 936 | 182k | OP(s = g->strip[pc+look]) != O_CH; | 937 | 101k | look += OPND(s)) | 938 | 80.9k | assert(OP(s) == OOR2); | 939 | 80.9k | FWD(aft, aft, look); | 940 | 80.9k | } | 941 | 42.8M | break; | 942 | 42.8M | case OOR2: /* propagate OCH_'s marking */ | 943 | 42.8M | FWD(aft, aft, 1); | 944 | 42.8M | if (OP(g->strip[pc+OPND(s)]) != O_CH) { | 945 | 20.6M | assert(OP(g->strip[pc+OPND(s)]) == OOR2); | 946 | 20.6M | FWD(aft, aft, OPND(s)); | 947 | 20.6M | } | 948 | 42.8M | break; | 949 | 40.0M | case 22.2M O_CH22.2M : /* just empty */ | 950 | 22.2M | FWD(aft, aft, 1); | 951 | 22.2M | break; | 952 | 40.0M | default: /* ooooops... */ | 953 | 0 | assert(nope); | 954 | 0 | break; | 955 | 398M | } | 956 | 398M | } | 957 | 4.93M | | 958 | 4.93M | return(aft); | 959 | 4.93M | } |
|
960 | | |
961 | | #ifdef REDEBUG |
962 | | /* |
963 | | - print - print a set of states |
964 | | */ |
965 | | static void |
966 | | print(struct match *m, char *caption, states st, int ch, FILE *d) |
967 | | { |
968 | | struct re_guts *g = m->g; |
969 | | int i; |
970 | | int first = 1; |
971 | | |
972 | | if (!(m->eflags®_TRACE)) |
973 | | return; |
974 | | |
975 | | (void)fprintf(d, "%s", caption); |
976 | | if (ch != '\0') |
977 | | (void)fprintf(d, " %s", pchar(ch)); |
978 | | for (i = 0; i < g->nstates; i++) |
979 | | if (ISSET(st, i)) { |
980 | | (void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i); |
981 | | first = 0; |
982 | | } |
983 | | (void)fprintf(d, "\n"); |
984 | | } |
985 | | |
986 | | /* |
987 | | - at - print current situation |
988 | | */ |
989 | | static void |
990 | | at(struct match *m, char *title, char *start, char *stop, sopno startst, |
991 | | sopno stopst) |
992 | | { |
993 | | if (!(m->eflags®_TRACE)) |
994 | | return; |
995 | | |
996 | | (void)printf("%s %s-", title, pchar(*start)); |
997 | | (void)printf("%s ", pchar(*stop)); |
998 | | (void)printf("%ld-%ld\n", (long)startst, (long)stopst); |
999 | | } |
1000 | | |
1001 | | #ifndef PCHARDONE |
1002 | | #define PCHARDONE /* never again */ |
1003 | | /* |
1004 | | - pchar - make a character printable |
1005 | | * |
1006 | | * Is this identical to regchar() over in debug.c? Well, yes. But a |
1007 | | * duplicate here avoids having a debugging-capable regexec.o tied to |
1008 | | * a matching debug.o, and this is convenient. It all disappears in |
1009 | | * the non-debug compilation anyway, so it doesn't matter much. |
1010 | | */ |
1011 | | static char * /* -> representation */ |
1012 | | pchar(int ch) |
1013 | | { |
1014 | | static char pbuf[10]; |
1015 | | |
1016 | | if (isPrint(ch) || ch == ' ') |
1017 | | (void)snprintf(pbuf, sizeof pbuf, "%c", ch); |
1018 | | else |
1019 | | (void)snprintf(pbuf, sizeof pbuf, "\\%o", ch); |
1020 | | return(pbuf); |
1021 | | } |
1022 | | #endif |
1023 | | #endif |
1024 | | |
1025 | | #undef matcher |
1026 | | #undef fast |
1027 | | #undef slow |
1028 | | #undef dissect |
1029 | | #undef backref |
1030 | | #undef step |
1031 | | #undef print |
1032 | | #undef at |
1033 | | #undef match |
1034 | | #undef nope |