/Users/buildslave/jenkins/sharedspace/clang-stage2-coverage-R@2/llvm/lib/Support/regengine.inc
Line | Count | Source (jump to first uncovered line) |
1 | | /*- |
2 | | * This code is derived from OpenBSD's libc/regex, original license follows: |
3 | | * |
4 | | * Copyright (c) 1992, 1993, 1994 Henry Spencer. |
5 | | * Copyright (c) 1992, 1993, 1994 |
6 | | * The Regents of the University of California. All rights reserved. |
7 | | * |
8 | | * This code is derived from software contributed to Berkeley by |
9 | | * Henry Spencer. |
10 | | * |
11 | | * Redistribution and use in source and binary forms, with or without |
12 | | * modification, are permitted provided that the following conditions |
13 | | * are met: |
14 | | * 1. Redistributions of source code must retain the above copyright |
15 | | * notice, this list of conditions and the following disclaimer. |
16 | | * 2. Redistributions in binary form must reproduce the above copyright |
17 | | * notice, this list of conditions and the following disclaimer in the |
18 | | * documentation and/or other materials provided with the distribution. |
19 | | * 3. Neither the name of the University nor the names of its contributors |
20 | | * may be used to endorse or promote products derived from this software |
21 | | * without specific prior written permission. |
22 | | * |
23 | | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
24 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
25 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
26 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
27 | | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
28 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
29 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
30 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
31 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
32 | | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
33 | | * SUCH DAMAGE. |
34 | | * |
35 | | * @(#)engine.c 8.5 (Berkeley) 3/20/94 |
36 | | */ |
37 | | |
38 | | /* |
39 | | * The matching engine and friends. This file is #included by regexec.c |
40 | | * after suitable #defines of a variety of macros used herein, so that |
41 | | * different state representations can be used without duplicating masses |
42 | | * of code. |
43 | | */ |
44 | | |
45 | | #ifdef SNAMES |
46 | | #define matcher smatcher |
47 | 1.85M | #define fast sfast |
48 | 57.9k | #define slow sslow |
49 | 8.89k | #define dissect sdissect |
50 | 90 | #define backref sbackref |
51 | 27.8M | #define step sstep |
52 | | #define print sprint |
53 | | #define at sat |
54 | | #define match smat |
55 | | #define nope snope |
56 | | #endif |
57 | | #ifdef LNAMES |
58 | | #define matcher lmatcher |
59 | 179k | #define fast lfast |
60 | 10.2k | #define slow lslow |
61 | 2.28k | #define dissect ldissect |
62 | 0 | #define backref lbackref |
63 | 2.85M | #define step lstep |
64 | | #define print lprint |
65 | | #define at lat |
66 | | #define match lmat |
67 | | #define nope lnope |
68 | | #endif |
69 | | |
70 | | /* another structure passed up and down to avoid zillions of parameters */ |
71 | | struct match { |
72 | | struct re_guts *g; |
73 | | int eflags; |
74 | | llvm_regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ |
75 | | const char *offp; /* offsets work from here */ |
76 | | const char *beginp; /* start of string -- virtual NUL precedes */ |
77 | | const char *endp; /* end of string -- virtual NUL here */ |
78 | | const char *coldp; /* can be no match starting before here */ |
79 | | const char **lastpos; /* [nplus+1] */ |
80 | | STATEVARS; |
81 | | states st; /* current states */ |
82 | | states fresh; /* states for a fresh start */ |
83 | | states tmp; /* temporary */ |
84 | | states empty; /* empty set of states */ |
85 | | }; |
86 | | |
87 | | static int matcher(struct re_guts *, const char *, size_t, |
88 | | llvm_regmatch_t[], int); |
89 | | static const char *dissect(struct match *, const char *, const char *, sopno, |
90 | | sopno); |
91 | | static const char *backref(struct match *, const char *, const char *, sopno, |
92 | | sopno, sopno, int); |
93 | | static const char *fast(struct match *, const char *, const char *, sopno, sopno); |
94 | | static const char *slow(struct match *, const char *, const char *, sopno, sopno); |
95 | | static states step(struct re_guts *, sopno, sopno, states, int, states); |
96 | 0 | #define MAX_RECURSION 100 |
97 | 187M | #define BOL (187M OUT187M +1) |
98 | 39.7M | #define EOL (39.7M BOL37.7M +1) |
99 | 41.4M | #define BOLEOL (41.4M BOL41.4M +2) |
100 | 2.10M | #define NOTHING (2.10M BOL2.10M +3) |
101 | 25.5M | #define BOW (25.5M BOL25.5M +4) |
102 | 23.3M | #define EOW (23.3M BOL23.3M +5) |
103 | | #define CODEMAX (BOL+5) /* highest code used */ |
104 | 11.7M | #define NONCHAR(c) ((c) > CHAR_MAX) |
105 | | #define NNONCHAR (CODEMAX-CHAR_MAX) |
106 | | #ifdef REDEBUG |
107 | | static void print(struct match *, char *, states, int, FILE *); |
108 | | #endif |
109 | | #ifdef REDEBUG |
110 | | static void at(struct match *, char *, char *, char *, sopno, sopno); |
111 | | #endif |
112 | | #ifdef REDEBUG |
113 | | static char *pchar(int); |
114 | | #endif |
115 | | |
116 | | #ifdef REDEBUG |
117 | | #define SP(t, s, c) print(m, t, s, c, stdout) |
118 | | #define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) |
119 | | #define NOTE(str) { if (m->eflags®_TRACE) (void)printf("=%s\n", (str)); } |
120 | | static int nope = 0; |
121 | | #else |
122 | | #define SP(t, s, c) /* nothing */ |
123 | | #define AT(t, p1, p2, s1, s2) /* nothing */ |
124 | | #define NOTE(s) /* nothing */ |
125 | | #endif |
126 | | |
127 | | /* |
128 | | - matcher - the actual matching engine |
129 | | */ |
130 | | static int /* 0 success, REG_NOMATCH failure */ |
131 | | matcher(struct re_guts *g, const char *string, size_t nmatch, |
132 | | llvm_regmatch_t pmatch[], |
133 | | int eflags) |
134 | 251M | { |
135 | 251M | const char *endp; |
136 | 251M | size_t i; |
137 | 251M | struct match mv; |
138 | 251M | struct match *m = &mv; |
139 | 251M | const char *dp; |
140 | 251M | const sopno gf = g->firststate+1; /* +1 for OEND */ |
141 | 251M | const sopno gl = g->laststate; |
142 | 251M | const char *start; |
143 | 251M | const char *stop; |
144 | 251M | |
145 | 251M | /* simplify the situation where possible */ |
146 | 251M | if (g->cflags&251M REG_NOSUB251M ) |
147 | 0 | nmatch = 0; |
148 | 251M | if (eflags&251M REG_STARTEND251M ) { |
149 | 251M | start = string + pmatch[0].rm_so; |
150 | 251M | stop = string + pmatch[0].rm_eo; |
151 | 251M | } else { |
152 | 0 | start = string; |
153 | 0 | stop = start + strlen(start); |
154 | 0 | } |
155 | 251M | if (stop < start) |
156 | 0 | return(0 REG_INVARG0 ); |
157 | 251M | |
158 | 251M | /* prescreening; this does wonders for this rather slow code */ |
159 | 251M | if (251M g->must != NULL251M ) { |
160 | 3.35G | for (dp = start; dp < stop3.35G ; dp++3.10G ) |
161 | 3.10G | if (3.10G *dp == g->must[0] && 3.10G stop - dp >= g->mlen173M && |
162 | 128M | memcmp(dp, g->must, (size_t)g->mlen) == 0) |
163 | 1.27M | break; |
164 | 250M | if (dp == stop) /* we didn't find g->must */ |
165 | 249M | return(249M REG_NOMATCH249M ); |
166 | 250M | } |
167 | 251M | |
168 | 251M | /* match struct setup */ |
169 | 2.03M | m->g = g; |
170 | 2.03M | m->eflags = eflags; |
171 | 2.03M | m->pmatch = NULL; |
172 | 2.03M | m->lastpos = NULL; |
173 | 2.03M | m->offp = string; |
174 | 2.03M | m->beginp = start; |
175 | 2.03M | m->endp = stop; |
176 | 179k | STATESETUP179k (m, 4); |
177 | 2.03M | SETUP(m->st); |
178 | 2.03M | SETUP(m->fresh); |
179 | 2.03M | SETUP(m->tmp); |
180 | 2.03M | SETUP(m->empty); |
181 | 2.03M | CLEAR(m->empty); |
182 | 179k | |
183 | 179k | /* this loop does only one repetition except for backrefs */ |
184 | 2.03M | for (;;) { |
185 | 2.03M | endp = fast(m, start, stop, gf, gl); |
186 | 2.03M | if (endp == NULL2.03M ) { /* a miss */ |
187 | 1.94M | free(m->pmatch); |
188 | 1.94M | free((void*)m->lastpos); |
189 | 175k | STATETEARDOWN(m); |
190 | 1.94M | return(REG_NOMATCH); |
191 | 1.94M | } |
192 | 90.5k | if (90.5k nmatch == 0 && 90.5k !g->backrefs88.7k ) |
193 | 88.7k | break; /* no further info needed */ |
194 | 90.5k | |
195 | 90.5k | /* where? */ |
196 | 1.77k | assert(m->coldp != NULL); |
197 | 1.77k | for (;;) { |
198 | 1.77k | NOTE("finding start"); |
199 | 1.77k | endp = slow(m, m->coldp, stop, gf, gl); |
200 | 1.77k | if (endp != NULL) |
201 | 1.77k | break; |
202 | 0 | assert(m->coldp < m->endp); |
203 | 0 | m->coldp++; |
204 | 0 | } |
205 | 1.77k | if (nmatch == 1 && 1.77k !g->backrefs10 ) |
206 | 10 | break; /* no further info needed */ |
207 | 1.77k | |
208 | 1.77k | /* oh my, they want the subexpressions... */ |
209 | 1.76k | if (1.76k m->pmatch == NULL1.76k ) |
210 | 1.76k | m->pmatch = (llvm_regmatch_t *)malloc((m->g->nsub + 1) * |
211 | 1.76k | sizeof(llvm_regmatch_t)); |
212 | 1.76k | if (m->pmatch == NULL1.76k ) { |
213 | 0 | STATETEARDOWN(m); |
214 | 0 | return(REG_ESPACE); |
215 | 0 | } |
216 | 6.22k | for (i = 1; 1.76k i <= m->g->nsub6.22k ; i++4.45k ) |
217 | 4.45k | m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; |
218 | 1.76k | if (!g->backrefs && 1.76k !(m->eflags&1.76k REG_BACKR1.76k )) { |
219 | 1.76k | NOTE("dissecting"); |
220 | 1.76k | dp = dissect(m, m->coldp, endp, gf, gl); |
221 | 1.76k | } else { |
222 | 9 | if (g->nplus > 0 && 9 m->lastpos == NULL4 ) |
223 | 2 | m->lastpos = (const char **)malloc((g->nplus+1) * |
224 | 2 | sizeof(char *)); |
225 | 9 | if (g->nplus > 0 && 9 m->lastpos == NULL4 ) { |
226 | 0 | free(m->pmatch); |
227 | 0 | STATETEARDOWN(m); |
228 | 0 | return(REG_ESPACE); |
229 | 0 | } |
230 | 9 | NOTE("backref dissect"); |
231 | 9 | dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); |
232 | 9 | } |
233 | 1.76k | if (1.76k dp != NULL1.76k ) |
234 | 1.76k | break; |
235 | 1.76k | |
236 | 1.76k | /* uh-oh... we couldn't find a subexpression-level match */ |
237 | 6 | assert(g->backrefs); /* must be back references doing it */ |
238 | 6 | assert(g->nplus == 0 || m->lastpos != NULL); |
239 | 9 | for (;;) { |
240 | 9 | if (dp != NULL || 9 endp <= m->coldp9 ) |
241 | 0 | break; /* defeat */ |
242 | 9 | NOTE("backoff"); |
243 | 9 | endp = slow(m, m->coldp, endp-1, gf, gl); |
244 | 9 | if (endp == NULL) |
245 | 6 | break; /* defeat */ |
246 | 9 | /* try it on a shorter possibility */ |
247 | | #ifndef NDEBUG |
248 | | for (i = 1; i <= m->g->nsub; i++) { |
249 | | assert(m->pmatch[i].rm_so == -1); |
250 | | assert(m->pmatch[i].rm_eo == -1); |
251 | | } |
252 | | #endif |
253 | 9 | NOTE("backoff dissect"); |
254 | 3 | dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); |
255 | 9 | } |
256 | 6 | assert(dp == NULL || dp == endp); |
257 | 6 | if (dp != NULL) /* found a shorter one */ |
258 | 0 | break; |
259 | 6 | |
260 | 6 | /* despite initial appearances, there is no match here */ |
261 | 6 | NOTE("false alarm"); |
262 | 6 | if (m->coldp == stop) |
263 | 0 | break; |
264 | 6 | start = m->coldp + 1; /* recycle starting later */ |
265 | 6 | } |
266 | 179k | |
267 | 179k | /* fill in the details if requested */ |
268 | 90.5k | if (90.5k nmatch > 090.5k ) { |
269 | 1.77k | pmatch[0].rm_so = m->coldp - m->offp; |
270 | 1.77k | pmatch[0].rm_eo = endp - m->offp; |
271 | 1.77k | } |
272 | 90.5k | if (nmatch > 190.5k ) { |
273 | 1.76k | assert(m->pmatch != NULL); |
274 | 6.20k | for (i = 1; i < nmatch6.20k ; i++4.44k ) |
275 | 4.44k | if (4.44k i <= m->g->nsub4.44k ) |
276 | 4.44k | pmatch[i] = m->pmatch[i]; |
277 | 0 | else { |
278 | 0 | pmatch[i].rm_so = -1; |
279 | 0 | pmatch[i].rm_eo = -1; |
280 | 0 | } |
281 | 1.76k | } |
282 | 90.5k | |
283 | 90.5k | if (m->pmatch != NULL) |
284 | 1.76k | free((char *)m->pmatch); |
285 | 90.5k | if (m->lastpos != NULL) |
286 | 1 | free((char *)m->lastpos); |
287 | 4.27k | STATETEARDOWN(m); |
288 | 90.5k | return(0); |
289 | 251M | } Line | Count | Source | 134 | 614k | { | 135 | 614k | const char *endp; | 136 | 614k | size_t i; | 137 | 614k | struct match mv; | 138 | 614k | struct match *m = &mv; | 139 | 614k | const char *dp; | 140 | 614k | const sopno gf = g->firststate+1; /* +1 for OEND */ | 141 | 614k | const sopno gl = g->laststate; | 142 | 614k | const char *start; | 143 | 614k | const char *stop; | 144 | 614k | | 145 | 614k | /* simplify the situation where possible */ | 146 | 614k | if (g->cflags&614k REG_NOSUB614k ) | 147 | 0 | nmatch = 0; | 148 | 614k | if (eflags&614k REG_STARTEND614k ) { | 149 | 614k | start = string + pmatch[0].rm_so; | 150 | 614k | stop = string + pmatch[0].rm_eo; | 151 | 614k | } else { | 152 | 0 | start = string; | 153 | 0 | stop = start + strlen(start); | 154 | 0 | } | 155 | 614k | if (stop < start) | 156 | 0 | return(0 REG_INVARG0 ); | 157 | 614k | | 158 | 614k | /* prescreening; this does wonders for this rather slow code */ | 159 | 614k | if (614k g->must != NULL614k ) { | 160 | 5.37M | for (dp = start; dp < stop5.37M ; dp++4.91M ) | 161 | 4.93M | if (4.93M *dp == g->must[0] && 4.93M stop - dp >= g->mlen237k && | 162 | 224k | memcmp(dp, g->must, (size_t)g->mlen) == 0) | 163 | 18.9k | break; | 164 | 454k | if (dp == stop) /* we didn't find g->must */ | 165 | 435k | return(435k REG_NOMATCH435k ); | 166 | 454k | } | 167 | 614k | | 168 | 614k | /* match struct setup */ | 169 | 179k | m->g = g; | 170 | 179k | m->eflags = eflags; | 171 | 179k | m->pmatch = NULL; | 172 | 179k | m->lastpos = NULL; | 173 | 179k | m->offp = string; | 174 | 179k | m->beginp = start; | 175 | 179k | m->endp = stop; | 176 | 179k | STATESETUP179k (m, 4); | 177 | 179k | SETUP(m->st); | 178 | 179k | SETUP(m->fresh); | 179 | 179k | SETUP(m->tmp); | 180 | 179k | SETUP(m->empty); | 181 | 179k | CLEAR(m->empty); | 182 | 179k | | 183 | 179k | /* this loop does only one repetition except for backrefs */ | 184 | 179k | for (;;) { | 185 | 179k | endp = fast(m, start, stop, gf, gl); | 186 | 179k | if (endp == NULL179k ) { /* a miss */ | 187 | 175k | free(m->pmatch); | 188 | 175k | free((void*)m->lastpos); | 189 | 175k | STATETEARDOWN(m); | 190 | 175k | return(REG_NOMATCH); | 191 | 175k | } | 192 | 4.27k | if (4.27k nmatch == 0 && 4.27k !g->backrefs4.01k ) | 193 | 4.01k | break; /* no further info needed */ | 194 | 4.27k | | 195 | 4.27k | /* where? */ | 196 | 261 | assert(m->coldp != NULL); | 197 | 261 | for (;;) { | 198 | 261 | NOTE("finding start"); | 199 | 261 | endp = slow(m, m->coldp, stop, gf, gl); | 200 | 261 | if (endp != NULL) | 201 | 261 | break; | 202 | 0 | assert(m->coldp < m->endp); | 203 | 0 | m->coldp++; | 204 | 0 | } | 205 | 261 | if (nmatch == 1 && 261 !g->backrefs0 ) | 206 | 0 | break; /* no further info needed */ | 207 | 261 | | 208 | 261 | /* oh my, they want the subexpressions... */ | 209 | 261 | if (261 m->pmatch == NULL261 ) | 210 | 261 | m->pmatch = (llvm_regmatch_t *)malloc((m->g->nsub + 1) * | 211 | 261 | sizeof(llvm_regmatch_t)); | 212 | 261 | if (m->pmatch == NULL261 ) { | 213 | 0 | STATETEARDOWN(m); | 214 | 0 | return(REG_ESPACE); | 215 | 0 | } | 216 | 1.67k | for (i = 1; 261 i <= m->g->nsub1.67k ; i++1.41k ) | 217 | 1.41k | m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; | 218 | 261 | if (!g->backrefs && 261 !(m->eflags&261 REG_BACKR261 )) { | 219 | 261 | NOTE("dissecting"); | 220 | 261 | dp = dissect(m, m->coldp, endp, gf, gl); | 221 | 0 | } else { | 222 | 0 | if (g->nplus > 0 && 0 m->lastpos == NULL0 ) | 223 | 0 | m->lastpos = (const char **)malloc((g->nplus+1) * | 224 | 0 | sizeof(char *)); | 225 | 0 | if (g->nplus > 0 && 0 m->lastpos == NULL0 ) { | 226 | 0 | free(m->pmatch); | 227 | 0 | STATETEARDOWN(m); | 228 | 0 | return(REG_ESPACE); | 229 | 0 | } | 230 | 0 | NOTE("backref dissect"); | 231 | 0 | dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); | 232 | 0 | } | 233 | 261 | if (261 dp != NULL261 ) | 234 | 261 | break; | 235 | 261 | | 236 | 261 | /* uh-oh... we couldn't find a subexpression-level match */ | 237 | 0 | assert(g->backrefs); /* must be back references doing it */ | 238 | 0 | assert(g->nplus == 0 || m->lastpos != NULL); | 239 | 0 | for (;;) { | 240 | 0 | if (dp != NULL || 0 endp <= m->coldp0 ) | 241 | 0 | break; /* defeat */ | 242 | 0 | NOTE("backoff"); | 243 | 0 | endp = slow(m, m->coldp, endp-1, gf, gl); | 244 | 0 | if (endp == NULL) | 245 | 0 | break; /* defeat */ | 246 | 0 | /* try it on a shorter possibility */ | 247 | | #ifndef NDEBUG | 248 | | for (i = 1; i <= m->g->nsub; i++) { | 249 | | assert(m->pmatch[i].rm_so == -1); | 250 | | assert(m->pmatch[i].rm_eo == -1); | 251 | | } | 252 | | #endif | 253 | 0 | NOTE("backoff dissect"); | 254 | 0 | dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); | 255 | 0 | } | 256 | 0 | assert(dp == NULL || dp == endp); | 257 | 0 | if (dp != NULL) /* found a shorter one */ | 258 | 0 | break; | 259 | 0 |
| 260 | 0 | /* despite initial appearances, there is no match here */ | 261 | 0 | NOTE("false alarm"); | 262 | 0 | if (m->coldp == stop) | 263 | 0 | break; | 264 | 0 | start = m->coldp + 1; /* recycle starting later */ | 265 | 0 | } | 266 | 179k | | 267 | 179k | /* fill in the details if requested */ | 268 | 4.27k | if (4.27k nmatch > 04.27k ) { | 269 | 261 | pmatch[0].rm_so = m->coldp - m->offp; | 270 | 261 | pmatch[0].rm_eo = endp - m->offp; | 271 | 261 | } | 272 | 4.27k | if (nmatch > 14.27k ) { | 273 | 261 | assert(m->pmatch != NULL); | 274 | 1.67k | for (i = 1; i < nmatch1.67k ; i++1.41k ) | 275 | 1.41k | if (1.41k i <= m->g->nsub1.41k ) | 276 | 1.41k | pmatch[i] = m->pmatch[i]; | 277 | 0 | else { | 278 | 0 | pmatch[i].rm_so = -1; | 279 | 0 | pmatch[i].rm_eo = -1; | 280 | 0 | } | 281 | 261 | } | 282 | 4.27k | | 283 | 4.27k | if (m->pmatch != NULL) | 284 | 261 | free((char *)m->pmatch); | 285 | 4.27k | if (m->lastpos != NULL) | 286 | 0 | free((char *)m->lastpos); | 287 | 4.27k | STATETEARDOWN(m); | 288 | 4.27k | return(0); | 289 | 614k | } |
Line | Count | Source | 134 | 250M | { | 135 | 250M | const char *endp; | 136 | 250M | size_t i; | 137 | 250M | struct match mv; | 138 | 250M | struct match *m = &mv; | 139 | 250M | const char *dp; | 140 | 250M | const sopno gf = g->firststate+1; /* +1 for OEND */ | 141 | 250M | const sopno gl = g->laststate; | 142 | 250M | const char *start; | 143 | 250M | const char *stop; | 144 | 250M | | 145 | 250M | /* simplify the situation where possible */ | 146 | 250M | if (g->cflags&250M REG_NOSUB250M ) | 147 | 0 | nmatch = 0; | 148 | 250M | if (eflags&250M REG_STARTEND250M ) { | 149 | 250M | start = string + pmatch[0].rm_so; | 150 | 250M | stop = string + pmatch[0].rm_eo; | 151 | 250M | } else { | 152 | 0 | start = string; | 153 | 0 | stop = start + strlen(start); | 154 | 0 | } | 155 | 250M | if (stop < start) | 156 | 0 | return(0 REG_INVARG0 ); | 157 | 250M | | 158 | 250M | /* prescreening; this does wonders for this rather slow code */ | 159 | 250M | if (250M g->must != NULL250M ) { | 160 | 3.35G | for (dp = start; dp < stop3.35G ; dp++3.10G ) | 161 | 3.10G | if (3.10G *dp == g->must[0] && 3.10G stop - dp >= g->mlen173M && | 162 | 128M | memcmp(dp, g->must, (size_t)g->mlen) == 0) | 163 | 1.25M | break; | 164 | 249M | if (dp == stop) /* we didn't find g->must */ | 165 | 248M | return(248M REG_NOMATCH248M ); | 166 | 249M | } | 167 | 250M | | 168 | 250M | /* match struct setup */ | 169 | 1.85M | m->g = g; | 170 | 1.85M | m->eflags = eflags; | 171 | 1.85M | m->pmatch = NULL; | 172 | 1.85M | m->lastpos = NULL; | 173 | 1.85M | m->offp = string; | 174 | 1.85M | m->beginp = start; | 175 | 1.85M | m->endp = stop; | 176 | 1.85M | STATESETUP(m, 4); | 177 | 1.85M | SETUP(m->st); | 178 | 1.85M | SETUP(m->fresh); | 179 | 1.85M | SETUP(m->tmp); | 180 | 1.85M | SETUP(m->empty); | 181 | 1.85M | CLEAR(m->empty); | 182 | 1.85M | | 183 | 1.85M | /* this loop does only one repetition except for backrefs */ | 184 | 1.85M | for (;;) { | 185 | 1.85M | endp = fast(m, start, stop, gf, gl); | 186 | 1.85M | if (endp == NULL1.85M ) { /* a miss */ | 187 | 1.76M | free(m->pmatch); | 188 | 1.76M | free((void*)m->lastpos); | 189 | 1.76M | STATETEARDOWN(m); | 190 | 1.76M | return(REG_NOMATCH); | 191 | 1.76M | } | 192 | 86.2k | if (86.2k nmatch == 0 && 86.2k !g->backrefs84.7k ) | 193 | 84.7k | break; /* no further info needed */ | 194 | 86.2k | | 195 | 86.2k | /* where? */ | 196 | 1.51k | assert(m->coldp != NULL); | 197 | 1.51k | for (;;) { | 198 | 1.51k | NOTE("finding start"); | 199 | 1.51k | endp = slow(m, m->coldp, stop, gf, gl); | 200 | 1.51k | if (endp != NULL) | 201 | 1.51k | break; | 202 | 0 | assert(m->coldp < m->endp); | 203 | 0 | m->coldp++; | 204 | 0 | } | 205 | 1.51k | if (nmatch == 1 && 1.51k !g->backrefs10 ) | 206 | 10 | break; /* no further info needed */ | 207 | 1.51k | | 208 | 1.51k | /* oh my, they want the subexpressions... */ | 209 | 1.50k | if (1.50k m->pmatch == NULL1.50k ) | 210 | 1.50k | m->pmatch = (llvm_regmatch_t *)malloc((m->g->nsub + 1) * | 211 | 1.50k | sizeof(llvm_regmatch_t)); | 212 | 1.50k | if (m->pmatch == NULL1.50k ) { | 213 | 0 | STATETEARDOWN(m); | 214 | 0 | return(REG_ESPACE); | 215 | 0 | } | 216 | 4.54k | for (i = 1; 1.50k i <= m->g->nsub4.54k ; i++3.04k ) | 217 | 3.04k | m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; | 218 | 1.50k | if (!g->backrefs && 1.50k !(m->eflags&1.49k REG_BACKR1.49k )) { | 219 | 1.49k | NOTE("dissecting"); | 220 | 1.49k | dp = dissect(m, m->coldp, endp, gf, gl); | 221 | 1.50k | } else { | 222 | 9 | if (g->nplus > 0 && 9 m->lastpos == NULL4 ) | 223 | 2 | m->lastpos = (const char **)malloc((g->nplus+1) * | 224 | 2 | sizeof(char *)); | 225 | 9 | if (g->nplus > 0 && 9 m->lastpos == NULL4 ) { | 226 | 0 | free(m->pmatch); | 227 | 0 | STATETEARDOWN(m); | 228 | 0 | return(REG_ESPACE); | 229 | 0 | } | 230 | 9 | NOTE("backref dissect"); | 231 | 9 | dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); | 232 | 9 | } | 233 | 1.50k | if (1.50k dp != NULL1.50k ) | 234 | 1.50k | break; | 235 | 1.50k | | 236 | 1.50k | /* uh-oh... we couldn't find a subexpression-level match */ | 237 | 6 | assert(g->backrefs); /* must be back references doing it */ | 238 | 6 | assert(g->nplus == 0 || m->lastpos != NULL); | 239 | 9 | for (;;) { | 240 | 9 | if (dp != NULL || 9 endp <= m->coldp9 ) | 241 | 0 | break; /* defeat */ | 242 | 9 | NOTE("backoff"); | 243 | 9 | endp = slow(m, m->coldp, endp-1, gf, gl); | 244 | 9 | if (endp == NULL) | 245 | 6 | break; /* defeat */ | 246 | 9 | /* try it on a shorter possibility */ | 247 | | #ifndef NDEBUG | 248 | | for (i = 1; i <= m->g->nsub; i++) { | 249 | | assert(m->pmatch[i].rm_so == -1); | 250 | | assert(m->pmatch[i].rm_eo == -1); | 251 | | } | 252 | | #endif | 253 | 9 | NOTE("backoff dissect"); | 254 | 3 | dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0); | 255 | 9 | } | 256 | 6 | assert(dp == NULL || dp == endp); | 257 | 6 | if (dp != NULL) /* found a shorter one */ | 258 | 0 | break; | 259 | 6 | | 260 | 6 | /* despite initial appearances, there is no match here */ | 261 | 6 | NOTE("false alarm"); | 262 | 6 | if (m->coldp == stop) | 263 | 0 | break; | 264 | 6 | start = m->coldp + 1; /* recycle starting later */ | 265 | 6 | } | 266 | 1.85M | | 267 | 1.85M | /* fill in the details if requested */ | 268 | 86.2k | if (86.2k nmatch > 086.2k ) { | 269 | 1.51k | pmatch[0].rm_so = m->coldp - m->offp; | 270 | 1.51k | pmatch[0].rm_eo = endp - m->offp; | 271 | 1.51k | } | 272 | 86.2k | if (nmatch > 186.2k ) { | 273 | 1.50k | assert(m->pmatch != NULL); | 274 | 4.53k | for (i = 1; i < nmatch4.53k ; i++3.03k ) | 275 | 3.03k | if (3.03k i <= m->g->nsub3.03k ) | 276 | 3.03k | pmatch[i] = m->pmatch[i]; | 277 | 0 | else { | 278 | 0 | pmatch[i].rm_so = -1; | 279 | 0 | pmatch[i].rm_eo = -1; | 280 | 0 | } | 281 | 1.50k | } | 282 | 86.2k | | 283 | 86.2k | if (m->pmatch != NULL) | 284 | 1.50k | free((char *)m->pmatch); | 285 | 86.2k | if (m->lastpos != NULL) | 286 | 1 | free((char *)m->lastpos); | 287 | 86.2k | STATETEARDOWN(m); | 288 | 86.2k | return(0); | 289 | 250M | } |
|
290 | | |
291 | | /* |
292 | | - dissect - figure out what matched what, no back references |
293 | | */ |
294 | | static const char * /* == stop (success) always */ |
295 | | dissect(struct match *m, const char *start, const char *stop, sopno startst, |
296 | | sopno stopst) |
297 | 11.1k | { |
298 | 11.1k | int i; |
299 | 11.1k | sopno ss; /* start sop of current subRE */ |
300 | 11.1k | sopno es; /* end sop of current subRE */ |
301 | 11.1k | const char *sp; /* start of string matched by it */ |
302 | 11.1k | const char *stp; /* string matched by it cannot pass here */ |
303 | 11.1k | const char *rest; /* start of rest of string */ |
304 | 11.1k | const char *tail; /* string unmatched by rest of RE */ |
305 | 11.1k | sopno ssub; /* start sop of subsubRE */ |
306 | 11.1k | sopno esub; /* end sop of subsubRE */ |
307 | 11.1k | const char *ssp; /* start of string matched by subsubRE */ |
308 | 11.1k | const char *sep; /* end of string matched by subsubRE */ |
309 | 11.1k | const char *oldssp; /* previous ssp */ |
310 | 11.1k | |
311 | 11.1k | AT("diss", start, stop, startst, stopst); |
312 | 11.1k | sp = start; |
313 | 54.8k | for (ss = startst; ss < stopst54.8k ; ss = es43.6k ) { |
314 | 43.6k | /* identify end of subRE */ |
315 | 43.6k | es = ss; |
316 | 43.6k | switch (OP(m->g->strip[es])) { |
317 | 10.3k | case 10.3k OPLUS_10.3k : |
318 | 10.3k | case 10.3k OQUEST_10.3k : |
319 | 10.3k | es += OPND(m->g->strip[es]); |
320 | 10.3k | break; |
321 | 2.68k | case 2.68k OCH_2.68k : |
322 | 8.17k | while (OP8.17k (m->g->strip[es]) != 8.17k O_CH8.17k ) |
323 | 5.49k | es += 5.49k OPND5.49k (m->g->strip[es]); |
324 | 10.3k | break; |
325 | 43.6k | } |
326 | 43.6k | es++; |
327 | 43.6k | |
328 | 43.6k | /* figure out what it matched */ |
329 | 43.6k | switch (OP(m->g->strip[ss])) { |
330 | 0 | case 0 OEND0 : |
331 | 0 | assert(nope); |
332 | 0 | break; |
333 | 13.1k | case 13.1k OCHAR13.1k : |
334 | 13.1k | sp++; |
335 | 13.1k | break; |
336 | 2.01k | case 2.01k OBOL2.01k : |
337 | 2.01k | case 2.01k OEOL2.01k : |
338 | 2.01k | case 2.01k OBOW2.01k : |
339 | 2.01k | case 2.01k OEOW2.01k : |
340 | 2.01k | break; |
341 | 8.54k | case 8.54k OANY8.54k : |
342 | 8.54k | case 8.54k OANYOF8.54k : |
343 | 8.54k | sp++; |
344 | 8.54k | break; |
345 | 0 | case 0 OBACK_0 : |
346 | 0 | case 0 O_BACK0 : |
347 | 0 | assert(nope); |
348 | 0 | break; |
349 | 0 | /* cases where length of match is hard to find */ |
350 | 6.75k | case 6.75k OQUEST_6.75k : |
351 | 6.75k | stp = stop; |
352 | 6.75k | for (;;) { |
353 | 6.75k | /* how long could this one be? */ |
354 | 6.75k | rest = slow(m, sp, stp, ss, es); |
355 | 6.75k | assert(rest != NULL); /* it did match */ |
356 | 6.75k | /* could the rest match the rest? */ |
357 | 6.75k | tail = slow(m, rest, stop, es, stopst); |
358 | 6.75k | if (tail == stop) |
359 | 6.75k | break; /* yes! */ |
360 | 6.75k | /* no -- try a shorter match for this one */ |
361 | 0 | stp = rest - 1; |
362 | 0 | assert(stp >= sp); /* it did work */ |
363 | 0 | } |
364 | 6.75k | ssub = ss + 1; |
365 | 6.75k | esub = es - 1; |
366 | 6.75k | /* did innards match? */ |
367 | 6.75k | if (slow6.75k (m, sp, rest, ssub, esub) != NULL6.75k ) { |
368 | 3.17k | const char *dp = dissect(m, sp, rest, ssub, esub); |
369 | 3.17k | (void)dp; /* avoid warning if assertions off */ |
370 | 3.17k | assert(dp == rest); |
371 | 3.17k | } else /* no */ |
372 | 6.75k | assert(sp == rest); |
373 | 6.75k | sp = rest; |
374 | 6.75k | break; |
375 | 3.55k | case 3.55k OPLUS_3.55k : |
376 | 3.55k | stp = stop; |
377 | 3.55k | for (;;) { |
378 | 3.55k | /* how long could this one be? */ |
379 | 3.55k | rest = slow(m, sp, stp, ss, es); |
380 | 3.55k | assert(rest != NULL); /* it did match */ |
381 | 3.55k | /* could the rest match the rest? */ |
382 | 3.55k | tail = slow(m, rest, stop, es, stopst); |
383 | 3.55k | if (tail == stop) |
384 | 3.55k | break; /* yes! */ |
385 | 3.55k | /* no -- try a shorter match for this one */ |
386 | 0 | stp = rest - 1; |
387 | 0 | assert(stp >= sp); /* it did work */ |
388 | 0 | } |
389 | 3.55k | ssub = ss + 1; |
390 | 3.55k | esub = es - 1; |
391 | 3.55k | ssp = sp; |
392 | 3.55k | oldssp = ssp; |
393 | 28.5k | for (;;) { /* find last match of innards */ |
394 | 28.5k | sep = slow(m, ssp, rest, ssub, esub); |
395 | 28.5k | if (sep == NULL || 28.5k sep == ssp25.0k ) |
396 | 3.55k | break; /* failed or matched null */ |
397 | 25.0k | oldssp = ssp; /* on to next try */ |
398 | 25.0k | ssp = sep; |
399 | 25.0k | } |
400 | 3.55k | if (sep == NULL3.55k ) { |
401 | 3.55k | /* last successful match */ |
402 | 3.55k | sep = ssp; |
403 | 3.55k | ssp = oldssp; |
404 | 3.55k | } |
405 | 3.55k | assert(sep == rest); /* must exhaust substring */ |
406 | 3.55k | assert(slow(m, ssp, sep, ssub, esub) == rest); |
407 | 3.55k | { |
408 | 3.55k | const char *dp = dissect(m, ssp, sep, ssub, esub); |
409 | 3.55k | (void)dp; /* avoid warning if assertions off */ |
410 | 3.55k | assert(dp == sep); |
411 | 3.55k | } |
412 | 3.55k | sp = rest; |
413 | 3.55k | break; |
414 | 2.68k | case 2.68k OCH_2.68k : |
415 | 2.68k | stp = stop; |
416 | 2.68k | for (;;) { |
417 | 2.68k | /* how long could this one be? */ |
418 | 2.68k | rest = slow(m, sp, stp, ss, es); |
419 | 2.68k | assert(rest != NULL); /* it did match */ |
420 | 2.68k | /* could the rest match the rest? */ |
421 | 2.68k | tail = slow(m, rest, stop, es, stopst); |
422 | 2.68k | if (tail == stop) |
423 | 2.68k | break; /* yes! */ |
424 | 2.68k | /* no -- try a shorter match for this one */ |
425 | 0 | stp = rest - 1; |
426 | 0 | assert(stp >= sp); /* it did work */ |
427 | 0 | } |
428 | 2.68k | ssub = ss + 1; |
429 | 2.68k | esub = ss + OPND(m->g->strip[ss]) - 1; |
430 | 2.68k | assert(OP(m->g->strip[esub]) == OOR1); |
431 | 5.11k | for (;;) { /* find first matching branch */ |
432 | 5.11k | if (slow5.11k (m, sp, rest, ssub, esub) == rest5.11k ) |
433 | 2.68k | break; /* it matched all of it */ |
434 | 5.11k | /* that one missed, try next one */ |
435 | 2.43k | assert(OP(m->g->strip[esub]) == OOR1); |
436 | 2.43k | esub++; |
437 | 2.43k | assert(OP(m->g->strip[esub]) == OOR2); |
438 | 2.43k | ssub = esub + 1; |
439 | 2.43k | esub += OPND(m->g->strip[esub]); |
440 | 2.43k | if (OP2.43k (m->g->strip[esub]) == 2.43k OOR22.43k ) |
441 | 42 | esub--; |
442 | 5.11k | else |
443 | 5.11k | assert(OP(m->g->strip[esub]) == O_CH); |
444 | 5.11k | } |
445 | 2.68k | { |
446 | 2.68k | const char *dp = dissect(m, sp, rest, ssub, esub); |
447 | 2.68k | (void)dp; /* avoid warning if assertions off */ |
448 | 2.68k | assert(dp == rest); |
449 | 2.68k | } |
450 | 2.68k | sp = rest; |
451 | 2.68k | break; |
452 | 0 | case 0 O_PLUS0 : |
453 | 0 | case 0 O_QUEST0 : |
454 | 0 | case 0 OOR10 : |
455 | 0 | case 0 OOR20 : |
456 | 0 | case 0 O_CH0 : |
457 | 0 | assert(nope); |
458 | 0 | break; |
459 | 3.49k | case 3.49k OLPAREN3.49k : |
460 | 3.49k | i = OPND(m->g->strip[ss]); |
461 | 3.49k | assert(0 < i && i <= m->g->nsub); |
462 | 3.49k | m->pmatch[i].rm_so = sp - m->offp; |
463 | 3.49k | break; |
464 | 3.49k | case 3.49k ORPAREN3.49k : |
465 | 3.49k | i = OPND(m->g->strip[ss]); |
466 | 3.49k | assert(0 < i && i <= m->g->nsub); |
467 | 3.49k | m->pmatch[i].rm_eo = sp - m->offp; |
468 | 3.49k | break; |
469 | 0 | default: /* uh oh */ |
470 | 0 | assert(nope); |
471 | 0 | break; |
472 | 43.6k | } |
473 | 43.6k | } |
474 | 11.1k | |
475 | 11.1k | assert(sp == stop); |
476 | 11.1k | return(sp); |
477 | 11.1k | } Line | Count | Source | 297 | 8.89k | { | 298 | 8.89k | int i; | 299 | 8.89k | sopno ss; /* start sop of current subRE */ | 300 | 8.89k | sopno es; /* end sop of current subRE */ | 301 | 8.89k | const char *sp; /* start of string matched by it */ | 302 | 8.89k | const char *stp; /* string matched by it cannot pass here */ | 303 | 8.89k | const char *rest; /* start of rest of string */ | 304 | 8.89k | const char *tail; /* string unmatched by rest of RE */ | 305 | 8.89k | sopno ssub; /* start sop of subsubRE */ | 306 | 8.89k | sopno esub; /* end sop of subsubRE */ | 307 | 8.89k | const char *ssp; /* start of string matched by subsubRE */ | 308 | 8.89k | const char *sep; /* end of string matched by subsubRE */ | 309 | 8.89k | const char *oldssp; /* previous ssp */ | 310 | 8.89k | | 311 | 8.89k | AT("diss", start, stop, startst, stopst); | 312 | 8.89k | sp = start; | 313 | 44.7k | for (ss = startst; ss < stopst44.7k ; ss = es35.8k ) { | 314 | 35.8k | /* identify end of subRE */ | 315 | 35.8k | es = ss; | 316 | 35.8k | switch (OP(m->g->strip[es])) { | 317 | 8.82k | case 8.82k OPLUS_8.82k : | 318 | 8.82k | case 8.82k OQUEST_8.82k : | 319 | 8.82k | es += OPND(m->g->strip[es]); | 320 | 8.82k | break; | 321 | 1.50k | case 1.50k OCH_1.50k : | 322 | 4.52k | while (OP4.52k (m->g->strip[es]) != 4.52k O_CH4.52k ) | 323 | 3.01k | es += 3.01k OPND3.01k (m->g->strip[es]); | 324 | 8.82k | break; | 325 | 35.8k | } | 326 | 35.8k | es++; | 327 | 35.8k | | 328 | 35.8k | /* figure out what it matched */ | 329 | 35.8k | switch (OP(m->g->strip[ss])) { | 330 | 0 | case 0 OEND0 : | 331 | 0 | assert(nope); | 332 | 0 | break; | 333 | 12.0k | case 12.0k OCHAR12.0k : | 334 | 12.0k | sp++; | 335 | 12.0k | break; | 336 | 1.49k | case 1.49k OBOL1.49k : | 337 | 1.49k | case 1.49k OEOL1.49k : | 338 | 1.49k | case 1.49k OBOW1.49k : | 339 | 1.49k | case 1.49k OEOW1.49k : | 340 | 1.49k | break; | 341 | 5.92k | case 5.92k OANY5.92k : | 342 | 5.92k | case 5.92k OANYOF5.92k : | 343 | 5.92k | sp++; | 344 | 5.92k | break; | 345 | 0 | case 0 OBACK_0 : | 346 | 0 | case 0 O_BACK0 : | 347 | 0 | assert(nope); | 348 | 0 | break; | 349 | 0 | /* cases where length of match is hard to find */ | 350 | 5.86k | case 5.86k OQUEST_5.86k : | 351 | 5.86k | stp = stop; | 352 | 5.86k | for (;;) { | 353 | 5.86k | /* how long could this one be? */ | 354 | 5.86k | rest = slow(m, sp, stp, ss, es); | 355 | 5.86k | assert(rest != NULL); /* it did match */ | 356 | 5.86k | /* could the rest match the rest? */ | 357 | 5.86k | tail = slow(m, rest, stop, es, stopst); | 358 | 5.86k | if (tail == stop) | 359 | 5.86k | break; /* yes! */ | 360 | 5.86k | /* no -- try a shorter match for this one */ | 361 | 0 | stp = rest - 1; | 362 | 0 | assert(stp >= sp); /* it did work */ | 363 | 0 | } | 364 | 5.86k | ssub = ss + 1; | 365 | 5.86k | esub = es - 1; | 366 | 5.86k | /* did innards match? */ | 367 | 5.86k | if (slow5.86k (m, sp, rest, ssub, esub) != NULL5.86k ) { | 368 | 2.93k | const char *dp = dissect(m, sp, rest, ssub, esub); | 369 | 2.93k | (void)dp; /* avoid warning if assertions off */ | 370 | 2.93k | assert(dp == rest); | 371 | 2.93k | } else /* no */ | 372 | 5.86k | assert(sp == rest); | 373 | 5.86k | sp = rest; | 374 | 5.86k | break; | 375 | 2.95k | case 2.95k OPLUS_2.95k : | 376 | 2.95k | stp = stop; | 377 | 2.95k | for (;;) { | 378 | 2.95k | /* how long could this one be? */ | 379 | 2.95k | rest = slow(m, sp, stp, ss, es); | 380 | 2.95k | assert(rest != NULL); /* it did match */ | 381 | 2.95k | /* could the rest match the rest? */ | 382 | 2.95k | tail = slow(m, rest, stop, es, stopst); | 383 | 2.95k | if (tail == stop) | 384 | 2.95k | break; /* yes! */ | 385 | 2.95k | /* no -- try a shorter match for this one */ | 386 | 0 | stp = rest - 1; | 387 | 0 | assert(stp >= sp); /* it did work */ | 388 | 0 | } | 389 | 2.95k | ssub = ss + 1; | 390 | 2.95k | esub = es - 1; | 391 | 2.95k | ssp = sp; | 392 | 2.95k | oldssp = ssp; | 393 | 26.8k | for (;;) { /* find last match of innards */ | 394 | 26.8k | sep = slow(m, ssp, rest, ssub, esub); | 395 | 26.8k | if (sep == NULL || 26.8k sep == ssp23.9k ) | 396 | 2.95k | break; /* failed or matched null */ | 397 | 23.9k | oldssp = ssp; /* on to next try */ | 398 | 23.9k | ssp = sep; | 399 | 23.9k | } | 400 | 2.95k | if (sep == NULL2.95k ) { | 401 | 2.95k | /* last successful match */ | 402 | 2.95k | sep = ssp; | 403 | 2.95k | ssp = oldssp; | 404 | 2.95k | } | 405 | 2.95k | assert(sep == rest); /* must exhaust substring */ | 406 | 2.95k | assert(slow(m, ssp, sep, ssub, esub) == rest); | 407 | 2.95k | { | 408 | 2.95k | const char *dp = dissect(m, ssp, sep, ssub, esub); | 409 | 2.95k | (void)dp; /* avoid warning if assertions off */ | 410 | 2.95k | assert(dp == sep); | 411 | 2.95k | } | 412 | 2.95k | sp = rest; | 413 | 2.95k | break; | 414 | 1.50k | case 1.50k OCH_1.50k : | 415 | 1.50k | stp = stop; | 416 | 1.50k | for (;;) { | 417 | 1.50k | /* how long could this one be? */ | 418 | 1.50k | rest = slow(m, sp, stp, ss, es); | 419 | 1.50k | assert(rest != NULL); /* it did match */ | 420 | 1.50k | /* could the rest match the rest? */ | 421 | 1.50k | tail = slow(m, rest, stop, es, stopst); | 422 | 1.50k | if (tail == stop) | 423 | 1.50k | break; /* yes! */ | 424 | 1.50k | /* no -- try a shorter match for this one */ | 425 | 0 | stp = rest - 1; | 426 | 0 | assert(stp >= sp); /* it did work */ | 427 | 0 | } | 428 | 1.50k | ssub = ss + 1; | 429 | 1.50k | esub = ss + OPND(m->g->strip[ss]) - 1; | 430 | 1.50k | assert(OP(m->g->strip[esub]) == OOR1); | 431 | 2.98k | for (;;) { /* find first matching branch */ | 432 | 2.98k | if (slow2.98k (m, sp, rest, ssub, esub) == rest2.98k ) | 433 | 1.50k | break; /* it matched all of it */ | 434 | 2.98k | /* that one missed, try next one */ | 435 | 1.47k | assert(OP(m->g->strip[esub]) == OOR1); | 436 | 1.47k | esub++; | 437 | 1.47k | assert(OP(m->g->strip[esub]) == OOR2); | 438 | 1.47k | ssub = esub + 1; | 439 | 1.47k | esub += OPND(m->g->strip[esub]); | 440 | 1.47k | if (OP1.47k (m->g->strip[esub]) == 1.47k OOR21.47k ) | 441 | 0 | esub--; | 442 | 2.98k | else | 443 | 2.98k | assert(OP(m->g->strip[esub]) == O_CH); | 444 | 2.98k | } | 445 | 1.50k | { | 446 | 1.50k | const char *dp = dissect(m, sp, rest, ssub, esub); | 447 | 1.50k | (void)dp; /* avoid warning if assertions off */ | 448 | 1.50k | assert(dp == rest); | 449 | 1.50k | } | 450 | 1.50k | sp = rest; | 451 | 1.50k | break; | 452 | 0 | case 0 O_PLUS0 : | 453 | 0 | case 0 O_QUEST0 : | 454 | 0 | case 0 OOR10 : | 455 | 0 | case 0 OOR20 : | 456 | 0 | case 0 O_CH0 : | 457 | 0 | assert(nope); | 458 | 0 | break; | 459 | 3.01k | case 3.01k OLPAREN3.01k : | 460 | 3.01k | i = OPND(m->g->strip[ss]); | 461 | 3.01k | assert(0 < i && i <= m->g->nsub); | 462 | 3.01k | m->pmatch[i].rm_so = sp - m->offp; | 463 | 3.01k | break; | 464 | 3.01k | case 3.01k ORPAREN3.01k : | 465 | 3.01k | i = OPND(m->g->strip[ss]); | 466 | 3.01k | assert(0 < i && i <= m->g->nsub); | 467 | 3.01k | m->pmatch[i].rm_eo = sp - m->offp; | 468 | 3.01k | break; | 469 | 0 | default: /* uh oh */ | 470 | 0 | assert(nope); | 471 | 0 | break; | 472 | 35.8k | } | 473 | 35.8k | } | 474 | 8.89k | | 475 | 8.89k | assert(sp == stop); | 476 | 8.89k | return(sp); | 477 | 8.89k | } |
Line | Count | Source | 297 | 2.28k | { | 298 | 2.28k | int i; | 299 | 2.28k | sopno ss; /* start sop of current subRE */ | 300 | 2.28k | sopno es; /* end sop of current subRE */ | 301 | 2.28k | const char *sp; /* start of string matched by it */ | 302 | 2.28k | const char *stp; /* string matched by it cannot pass here */ | 303 | 2.28k | const char *rest; /* start of rest of string */ | 304 | 2.28k | const char *tail; /* string unmatched by rest of RE */ | 305 | 2.28k | sopno ssub; /* start sop of subsubRE */ | 306 | 2.28k | sopno esub; /* end sop of subsubRE */ | 307 | 2.28k | const char *ssp; /* start of string matched by subsubRE */ | 308 | 2.28k | const char *sep; /* end of string matched by subsubRE */ | 309 | 2.28k | const char *oldssp; /* previous ssp */ | 310 | 2.28k | | 311 | 2.28k | AT("diss", start, stop, startst, stopst); | 312 | 2.28k | sp = start; | 313 | 10.1k | for (ss = startst; ss < stopst10.1k ; ss = es7.86k ) { | 314 | 7.86k | /* identify end of subRE */ | 315 | 7.86k | es = ss; | 316 | 7.86k | switch (OP(m->g->strip[es])) { | 317 | 1.48k | case 1.48k OPLUS_1.48k : | 318 | 1.48k | case 1.48k OQUEST_1.48k : | 319 | 1.48k | es += OPND(m->g->strip[es]); | 320 | 1.48k | break; | 321 | 1.18k | case 1.18k OCH_1.18k : | 322 | 3.65k | while (OP3.65k (m->g->strip[es]) != 3.65k O_CH3.65k ) | 323 | 2.47k | es += 2.47k OPND2.47k (m->g->strip[es]); | 324 | 1.48k | break; | 325 | 7.86k | } | 326 | 7.86k | es++; | 327 | 7.86k | | 328 | 7.86k | /* figure out what it matched */ | 329 | 7.86k | switch (OP(m->g->strip[ss])) { | 330 | 0 | case 0 OEND0 : | 331 | 0 | assert(nope); | 332 | 0 | break; | 333 | 1.10k | case 1.10k OCHAR1.10k : | 334 | 1.10k | sp++; | 335 | 1.10k | break; | 336 | 522 | case 522 OBOL522 : | 337 | 522 | case 522 OEOL522 : | 338 | 522 | case 522 OBOW522 : | 339 | 522 | case 522 OEOW522 : | 340 | 522 | break; | 341 | 2.62k | case 2.62k OANY2.62k : | 342 | 2.62k | case 2.62k OANYOF2.62k : | 343 | 2.62k | sp++; | 344 | 2.62k | break; | 345 | 0 | case 0 OBACK_0 : | 346 | 0 | case 0 O_BACK0 : | 347 | 0 | assert(nope); | 348 | 0 | break; | 349 | 0 | /* cases where length of match is hard to find */ | 350 | 888 | case 888 OQUEST_888 : | 351 | 888 | stp = stop; | 352 | 888 | for (;;) { | 353 | 888 | /* how long could this one be? */ | 354 | 888 | rest = slow(m, sp, stp, ss, es); | 355 | 888 | assert(rest != NULL); /* it did match */ | 356 | 888 | /* could the rest match the rest? */ | 357 | 888 | tail = slow(m, rest, stop, es, stopst); | 358 | 888 | if (tail == stop) | 359 | 888 | break; /* yes! */ | 360 | 888 | /* no -- try a shorter match for this one */ | 361 | 0 | stp = rest - 1; | 362 | 0 | assert(stp >= sp); /* it did work */ | 363 | 0 | } | 364 | 888 | ssub = ss + 1; | 365 | 888 | esub = es - 1; | 366 | 888 | /* did innards match? */ | 367 | 888 | if (slow888 (m, sp, rest, ssub, esub) != NULL888 ) { | 368 | 242 | const char *dp = dissect(m, sp, rest, ssub, esub); | 369 | 242 | (void)dp; /* avoid warning if assertions off */ | 370 | 242 | assert(dp == rest); | 371 | 242 | } else /* no */ | 372 | 888 | assert(sp == rest); | 373 | 888 | sp = rest; | 374 | 888 | break; | 375 | 600 | case 600 OPLUS_600 : | 376 | 600 | stp = stop; | 377 | 600 | for (;;) { | 378 | 600 | /* how long could this one be? */ | 379 | 600 | rest = slow(m, sp, stp, ss, es); | 380 | 600 | assert(rest != NULL); /* it did match */ | 381 | 600 | /* could the rest match the rest? */ | 382 | 600 | tail = slow(m, rest, stop, es, stopst); | 383 | 600 | if (tail == stop) | 384 | 600 | break; /* yes! */ | 385 | 600 | /* no -- try a shorter match for this one */ | 386 | 0 | stp = rest - 1; | 387 | 0 | assert(stp >= sp); /* it did work */ | 388 | 0 | } | 389 | 600 | ssub = ss + 1; | 390 | 600 | esub = es - 1; | 391 | 600 | ssp = sp; | 392 | 600 | oldssp = ssp; | 393 | 1.67k | for (;;) { /* find last match of innards */ | 394 | 1.67k | sep = slow(m, ssp, rest, ssub, esub); | 395 | 1.67k | if (sep == NULL || 1.67k sep == ssp1.07k ) | 396 | 600 | break; /* failed or matched null */ | 397 | 1.07k | oldssp = ssp; /* on to next try */ | 398 | 1.07k | ssp = sep; | 399 | 1.07k | } | 400 | 600 | if (sep == NULL600 ) { | 401 | 600 | /* last successful match */ | 402 | 600 | sep = ssp; | 403 | 600 | ssp = oldssp; | 404 | 600 | } | 405 | 600 | assert(sep == rest); /* must exhaust substring */ | 406 | 600 | assert(slow(m, ssp, sep, ssub, esub) == rest); | 407 | 600 | { | 408 | 600 | const char *dp = dissect(m, ssp, sep, ssub, esub); | 409 | 600 | (void)dp; /* avoid warning if assertions off */ | 410 | 600 | assert(dp == sep); | 411 | 600 | } | 412 | 600 | sp = rest; | 413 | 600 | break; | 414 | 1.18k | case 1.18k OCH_1.18k : | 415 | 1.18k | stp = stop; | 416 | 1.18k | for (;;) { | 417 | 1.18k | /* how long could this one be? */ | 418 | 1.18k | rest = slow(m, sp, stp, ss, es); | 419 | 1.18k | assert(rest != NULL); /* it did match */ | 420 | 1.18k | /* could the rest match the rest? */ | 421 | 1.18k | tail = slow(m, rest, stop, es, stopst); | 422 | 1.18k | if (tail == stop) | 423 | 1.18k | break; /* yes! */ | 424 | 1.18k | /* no -- try a shorter match for this one */ | 425 | 0 | stp = rest - 1; | 426 | 0 | assert(stp >= sp); /* it did work */ | 427 | 0 | } | 428 | 1.18k | ssub = ss + 1; | 429 | 1.18k | esub = ss + OPND(m->g->strip[ss]) - 1; | 430 | 1.18k | assert(OP(m->g->strip[esub]) == OOR1); | 431 | 2.13k | for (;;) { /* find first matching branch */ | 432 | 2.13k | if (slow2.13k (m, sp, rest, ssub, esub) == rest2.13k ) | 433 | 1.18k | break; /* it matched all of it */ | 434 | 2.13k | /* that one missed, try next one */ | 435 | 958 | assert(OP(m->g->strip[esub]) == OOR1); | 436 | 958 | esub++; | 437 | 958 | assert(OP(m->g->strip[esub]) == OOR2); | 438 | 958 | ssub = esub + 1; | 439 | 958 | esub += OPND(m->g->strip[esub]); | 440 | 958 | if (OP958 (m->g->strip[esub]) == 958 OOR2958 ) | 441 | 42 | esub--; | 442 | 2.13k | else | 443 | 2.13k | assert(OP(m->g->strip[esub]) == O_CH); | 444 | 2.13k | } | 445 | 1.18k | { | 446 | 1.18k | const char *dp = dissect(m, sp, rest, ssub, esub); | 447 | 1.18k | (void)dp; /* avoid warning if assertions off */ | 448 | 1.18k | assert(dp == rest); | 449 | 1.18k | } | 450 | 1.18k | sp = rest; | 451 | 1.18k | break; | 452 | 0 | case 0 O_PLUS0 : | 453 | 0 | case 0 O_QUEST0 : | 454 | 0 | case 0 OOR10 : | 455 | 0 | case 0 OOR20 : | 456 | 0 | case 0 O_CH0 : | 457 | 0 | assert(nope); | 458 | 0 | break; | 459 | 474 | case 474 OLPAREN474 : | 460 | 474 | i = OPND(m->g->strip[ss]); | 461 | 474 | assert(0 < i && i <= m->g->nsub); | 462 | 474 | m->pmatch[i].rm_so = sp - m->offp; | 463 | 474 | break; | 464 | 474 | case 474 ORPAREN474 : | 465 | 474 | i = OPND(m->g->strip[ss]); | 466 | 474 | assert(0 < i && i <= m->g->nsub); | 467 | 474 | m->pmatch[i].rm_eo = sp - m->offp; | 468 | 474 | break; | 469 | 0 | default: /* uh oh */ | 470 | 0 | assert(nope); | 471 | 0 | break; | 472 | 7.86k | } | 473 | 7.86k | } | 474 | 2.28k | | 475 | 2.28k | assert(sp == stop); | 476 | 2.28k | return(sp); | 477 | 2.28k | } |
|
478 | | |
479 | | /* |
480 | | - backref - figure out what matched what, figuring in back references |
481 | | */ |
482 | | static const char * /* == stop (success) or NULL (failure) */ |
483 | | backref(struct match *m, const char *start, const char *stop, sopno startst, |
484 | | sopno stopst, sopno lev, int rec) /* PLUS nesting level */ |
485 | 90 | { |
486 | 90 | int i; |
487 | 90 | sopno ss; /* start sop of current subRE */ |
488 | 90 | const char *sp; /* start of string matched by it */ |
489 | 90 | sopno ssub; /* start sop of subsubRE */ |
490 | 90 | sopno esub; /* end sop of subsubRE */ |
491 | 90 | const char *ssp; /* start of string matched by subsubRE */ |
492 | 90 | const char *dp; |
493 | 90 | size_t len; |
494 | 90 | int hard; |
495 | 90 | sop s; |
496 | 90 | llvm_regoff_t offsave; |
497 | 90 | cset *cs; |
498 | 90 | |
499 | 90 | AT("back", start, stop, startst, stopst); |
500 | 90 | sp = start; |
501 | 90 | |
502 | 90 | /* get as far as we can with easy stuff */ |
503 | 90 | hard = 0; |
504 | 206 | for (ss = startst; !hard && 206 ss < stopst132 ; ss++116 ) |
505 | 129 | switch (129 OP129 (s = m->g->strip[ss])) { |
506 | 25 | case 25 OCHAR25 : |
507 | 25 | if (sp == stop || 25 *sp++ != (char)25 OPND25 (s)) |
508 | 6 | return(NULL); |
509 | 19 | break; |
510 | 0 | case 0 OANY0 : |
511 | 0 | if (sp == stop) |
512 | 0 | return(NULL); |
513 | 0 | sp++; |
514 | 0 | break; |
515 | 30 | case 30 OANYOF30 : |
516 | 30 | cs = &m->g->sets[OPND(s)]; |
517 | 30 | if (sp == stop || 30 !30 CHIN30 (cs, *sp++)) |
518 | 7 | return(NULL); |
519 | 23 | break; |
520 | 0 | case 0 OBOL0 : |
521 | 0 | if ( (sp == m->beginp && 0 !(m->eflags&0 REG_NOTBOL0 )) || |
522 | 0 | (sp < m->endp && 0 *(sp-1) == '\n'0 && |
523 | 0 | (m->g->cflags&0 REG_NEWLINE0 )) ) |
524 | 0 | { /* yes */ } |
525 | 0 | else |
526 | 0 | return(NULL); |
527 | 0 | break; |
528 | 0 | case 0 OEOL0 : |
529 | 0 | if ( (sp == m->endp && 0 !(m->eflags&0 REG_NOTEOL0 )) || |
530 | 0 | (sp < m->endp && 0 *sp == '\n'0 && |
531 | 0 | (m->g->cflags&0 REG_NEWLINE0 )) ) |
532 | 0 | { /* yes */ } |
533 | 0 | else |
534 | 0 | return(NULL); |
535 | 0 | break; |
536 | 0 | case 0 OBOW0 : |
537 | 0 | if (( (sp == m->beginp && 0 !(m->eflags&0 REG_NOTBOL0 )) || |
538 | 0 | (sp < m->endp && 0 *(sp-1) == '\n'0 && |
539 | 0 | (m->g->cflags&0 REG_NEWLINE0 )) || |
540 | 0 | (sp > m->beginp && |
541 | 0 | !0 ISWORD0 (*(sp-1))) ) && |
542 | 0 | (sp < m->endp && 0 ISWORD0 (*sp)) ) |
543 | 0 | { /* yes */ } |
544 | 0 | else |
545 | 0 | return(NULL); |
546 | 0 | break; |
547 | 0 | case 0 OEOW0 : |
548 | 0 | if (( (sp == m->endp && 0 !(m->eflags&0 REG_NOTEOL0 )) || |
549 | 0 | (sp < m->endp && 0 *sp == '\n'0 && |
550 | 0 | (m->g->cflags&0 REG_NEWLINE0 )) || |
551 | 0 | (sp < m->endp && 0 !0 ISWORD0 (*sp)) ) && |
552 | 0 | (sp > m->beginp && 0 ISWORD0 (*(sp-1))) ) |
553 | 0 | { /* yes */ } |
554 | 0 | else |
555 | 0 | return(NULL); |
556 | 0 | break; |
557 | 0 | case 0 O_QUEST0 : |
558 | 0 | break; |
559 | 0 | case 0 OOR10 : /* matches null but needs to skip */ |
560 | 0 | ss++; |
561 | 0 | s = m->g->strip[ss]; |
562 | 0 | do { |
563 | 0 | assert(OP(s) == OOR2); |
564 | 0 | ss += OPND(s); |
565 | 0 | } while (OP0 (s = m->g->strip[ss]) != 0 O_CH0 ); |
566 | 0 | /* note that the ss++ gets us past the O_CH */ |
567 | 0 | break; |
568 | 74 | default: /* have to make a choice */ |
569 | 74 | hard = 1; |
570 | 74 | break; |
571 | 129 | } |
572 | 77 | if (77 !hard77 ) { /* that was it! */ |
573 | 3 | if (sp != stop) |
574 | 0 | return(NULL); |
575 | 3 | return(sp); |
576 | 3 | } |
577 | 74 | ss--; /* adjust for the for's final increment */ |
578 | 74 | |
579 | 74 | /* the hard stuff */ |
580 | 74 | AT("hard", sp, stop, ss, stopst); |
581 | 74 | s = m->g->strip[ss]; |
582 | 74 | switch (OP(s)) { |
583 | 16 | case 16 OBACK_16 : /* the vilest depths */ |
584 | 16 | i = OPND(s); |
585 | 16 | assert(0 < i && i <= m->g->nsub); |
586 | 16 | if (m->pmatch[i].rm_eo == -1) |
587 | 0 | return(NULL); |
588 | 16 | assert(m->pmatch[i].rm_so != -1); |
589 | 16 | len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; |
590 | 16 | if (len == 0 && 16 rec++ > 0 MAX_RECURSION0 ) |
591 | 0 | return(NULL); |
592 | 16 | assert(stop - m->beginp >= len); |
593 | 16 | if (sp > stop - len) |
594 | 3 | return(NULL); /* not enough left to match */ |
595 | 13 | ssp = m->offp + m->pmatch[i].rm_so; |
596 | 13 | if (memcmp(sp, ssp, len) != 0) |
597 | 6 | return(NULL); |
598 | 23 | while (7 m->g->strip[ss] != 23 SOP23 (O_BACK, i)) |
599 | 16 | ss++; |
600 | 7 | return(backref(m, sp+len, stop, ss+1, stopst, lev, rec)); |
601 | 0 | break; |
602 | 0 | case 0 OQUEST_0 : /* to null or not */ |
603 | 0 | dp = backref(m, sp, stop, ss+1, stopst, lev, rec); |
604 | 0 | if (dp != NULL) |
605 | 0 | return(dp); /* not */ |
606 | 0 | return(0 backref0 (m, sp, stop, ss+OPND0 (s)+1, stopst, lev, rec)); |
607 | 0 | break; |
608 | 7 | case 7 OPLUS_7 : |
609 | 7 | assert(m->lastpos != NULL); |
610 | 7 | assert(lev+1 <= m->g->nplus); |
611 | 7 | m->lastpos[lev+1] = sp; |
612 | 7 | return(backref(m, sp, stop, ss+1, stopst, lev+1, rec)); |
613 | 0 | break; |
614 | 15 | case 15 O_PLUS15 : |
615 | 15 | if (sp == m->lastpos[lev]) /* last pass matched null */ |
616 | 0 | return(0 backref0 (m, sp, stop, ss+1, stopst, lev-1, rec)); |
617 | 15 | /* try another pass */ |
618 | 15 | m->lastpos[lev] = sp; |
619 | 15 | dp = backref15 (m, sp, stop, ss-OPND15 (s)+1, stopst, lev, rec); |
620 | 15 | if (dp == NULL) |
621 | 13 | return(13 backref13 (m, sp, stop, ss+1, stopst, lev-1, rec)); |
622 | 15 | else |
623 | 2 | return(dp); |
624 | 0 | break; |
625 | 0 | case 0 OCH_0 : /* find the right one, if any */ |
626 | 0 | ssub = ss + 1; |
627 | 0 | esub = ss + OPND(s) - 1; |
628 | 0 | assert(OP(m->g->strip[esub]) == OOR1); |
629 | 0 | for (;;) { /* find first matching branch */ |
630 | 0 | dp = backref(m, sp, stop, ssub, esub, lev, rec); |
631 | 0 | if (dp != NULL) |
632 | 0 | return(dp); |
633 | 0 | /* that one missed, try next one */ |
634 | 0 | if (0 OP0 (m->g->strip[esub]) == 0 O_CH0 ) |
635 | 0 | return(NULL); /* there is none */ |
636 | 0 | esub++; |
637 | 0 | assert(OP(m->g->strip[esub]) == OOR2); |
638 | 0 | ssub = esub + 1; |
639 | 0 | esub += OPND(m->g->strip[esub]); |
640 | 0 | if (OP0 (m->g->strip[esub]) == 0 OOR20 ) |
641 | 0 | esub--; |
642 | 0 | else |
643 | 0 | assert(OP(m->g->strip[esub]) == O_CH); |
644 | 0 | } |
645 | 0 | break; |
646 | 15 | case 15 OLPAREN15 : /* must undo assignment if rest fails */ |
647 | 15 | i = OPND(s); |
648 | 15 | assert(0 < i && i <= m->g->nsub); |
649 | 15 | offsave = m->pmatch[i].rm_so; |
650 | 15 | m->pmatch[i].rm_so = sp - m->offp; |
651 | 15 | dp = backref(m, sp, stop, ss+1, stopst, lev, rec); |
652 | 15 | if (dp != NULL) |
653 | 4 | return(dp); |
654 | 11 | m->pmatch[i].rm_so = offsave; |
655 | 11 | return(NULL); |
656 | 0 | break; |
657 | 21 | case 21 ORPAREN21 : /* must undo assignment if rest fails */ |
658 | 21 | i = OPND(s); |
659 | 21 | assert(0 < i && i <= m->g->nsub); |
660 | 21 | offsave = m->pmatch[i].rm_eo; |
661 | 21 | m->pmatch[i].rm_eo = sp - m->offp; |
662 | 21 | dp = backref(m, sp, stop, ss+1, stopst, lev, rec); |
663 | 21 | if (dp != NULL) |
664 | 4 | return(dp); |
665 | 17 | m->pmatch[i].rm_eo = offsave; |
666 | 17 | return(NULL); |
667 | 0 | break; |
668 | 0 | default: /* uh oh */ |
669 | 0 | assert(nope); |
670 | 0 | break; |
671 | 74 | } |
672 | 74 | |
673 | 74 | /* "can't happen" */ |
674 | 0 | assert(nope); |
675 | 0 | /* NOTREACHED */ |
676 | 0 | return NULL; |
677 | 90 | } Line | Count | Source | 485 | 90 | { | 486 | 90 | int i; | 487 | 90 | sopno ss; /* start sop of current subRE */ | 488 | 90 | const char *sp; /* start of string matched by it */ | 489 | 90 | sopno ssub; /* start sop of subsubRE */ | 490 | 90 | sopno esub; /* end sop of subsubRE */ | 491 | 90 | const char *ssp; /* start of string matched by subsubRE */ | 492 | 90 | const char *dp; | 493 | 90 | size_t len; | 494 | 90 | int hard; | 495 | 90 | sop s; | 496 | 90 | llvm_regoff_t offsave; | 497 | 90 | cset *cs; | 498 | 90 | | 499 | 90 | AT("back", start, stop, startst, stopst); | 500 | 90 | sp = start; | 501 | 90 | | 502 | 90 | /* get as far as we can with easy stuff */ | 503 | 90 | hard = 0; | 504 | 206 | for (ss = startst; !hard && 206 ss < stopst132 ; ss++116 ) | 505 | 129 | switch (129 OP129 (s = m->g->strip[ss])) { | 506 | 25 | case 25 OCHAR25 : | 507 | 25 | if (sp == stop || 25 *sp++ != (char)25 OPND25 (s)) | 508 | 6 | return(NULL); | 509 | 19 | break; | 510 | 0 | case 0 OANY0 : | 511 | 0 | if (sp == stop) | 512 | 0 | return(NULL); | 513 | 0 | sp++; | 514 | 0 | break; | 515 | 30 | case 30 OANYOF30 : | 516 | 30 | cs = &m->g->sets[OPND(s)]; | 517 | 30 | if (sp == stop || 30 !30 CHIN30 (cs, *sp++)) | 518 | 7 | return(NULL); | 519 | 23 | break; | 520 | 0 | case 0 OBOL0 : | 521 | 0 | if ( (sp == m->beginp && 0 !(m->eflags&0 REG_NOTBOL0 )) || | 522 | 0 | (sp < m->endp && 0 *(sp-1) == '\n'0 && | 523 | 0 | (m->g->cflags&0 REG_NEWLINE0 )) ) | 524 | 0 | { /* yes */ } | 525 | 0 | else | 526 | 0 | return(NULL); | 527 | 0 | break; | 528 | 0 | case 0 OEOL0 : | 529 | 0 | if ( (sp == m->endp && 0 !(m->eflags&0 REG_NOTEOL0 )) || | 530 | 0 | (sp < m->endp && 0 *sp == '\n'0 && | 531 | 0 | (m->g->cflags&0 REG_NEWLINE0 )) ) | 532 | 0 | { /* yes */ } | 533 | 0 | else | 534 | 0 | return(NULL); | 535 | 0 | break; | 536 | 0 | case 0 OBOW0 : | 537 | 0 | if (( (sp == m->beginp && 0 !(m->eflags&0 REG_NOTBOL0 )) || | 538 | 0 | (sp < m->endp && 0 *(sp-1) == '\n'0 && | 539 | 0 | (m->g->cflags&0 REG_NEWLINE0 )) || | 540 | 0 | (sp > m->beginp && | 541 | 0 | !0 ISWORD0 (*(sp-1))) ) && | 542 | 0 | (sp < m->endp && 0 ISWORD0 (*sp)) ) | 543 | 0 | { /* yes */ } | 544 | 0 | else | 545 | 0 | return(NULL); | 546 | 0 | break; | 547 | 0 | case 0 OEOW0 : | 548 | 0 | if (( (sp == m->endp && 0 !(m->eflags&0 REG_NOTEOL0 )) || | 549 | 0 | (sp < m->endp && 0 *sp == '\n'0 && | 550 | 0 | (m->g->cflags&0 REG_NEWLINE0 )) || | 551 | 0 | (sp < m->endp && 0 !0 ISWORD0 (*sp)) ) && | 552 | 0 | (sp > m->beginp && 0 ISWORD0 (*(sp-1))) ) | 553 | 0 | { /* yes */ } | 554 | 0 | else | 555 | 0 | return(NULL); | 556 | 0 | break; | 557 | 0 | case 0 O_QUEST0 : | 558 | 0 | break; | 559 | 0 | case 0 OOR10 : /* matches null but needs to skip */ | 560 | 0 | ss++; | 561 | 0 | s = m->g->strip[ss]; | 562 | 0 | do { | 563 | 0 | assert(OP(s) == OOR2); | 564 | 0 | ss += OPND(s); | 565 | 0 | } while (OP0 (s = m->g->strip[ss]) != 0 O_CH0 ); | 566 | 0 | /* note that the ss++ gets us past the O_CH */ | 567 | 0 | break; | 568 | 74 | default: /* have to make a choice */ | 569 | 74 | hard = 1; | 570 | 74 | break; | 571 | 129 | } | 572 | 77 | if (77 !hard77 ) { /* that was it! */ | 573 | 3 | if (sp != stop) | 574 | 0 | return(NULL); | 575 | 3 | return(sp); | 576 | 3 | } | 577 | 74 | ss--; /* adjust for the for's final increment */ | 578 | 74 | | 579 | 74 | /* the hard stuff */ | 580 | 74 | AT("hard", sp, stop, ss, stopst); | 581 | 74 | s = m->g->strip[ss]; | 582 | 74 | switch (OP(s)) { | 583 | 16 | case 16 OBACK_16 : /* the vilest depths */ | 584 | 16 | i = OPND(s); | 585 | 16 | assert(0 < i && i <= m->g->nsub); | 586 | 16 | if (m->pmatch[i].rm_eo == -1) | 587 | 0 | return(NULL); | 588 | 16 | assert(m->pmatch[i].rm_so != -1); | 589 | 16 | len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; | 590 | 16 | if (len == 0 && 16 rec++ > 0 MAX_RECURSION0 ) | 591 | 0 | return(NULL); | 592 | 16 | assert(stop - m->beginp >= len); | 593 | 16 | if (sp > stop - len) | 594 | 3 | return(NULL); /* not enough left to match */ | 595 | 13 | ssp = m->offp + m->pmatch[i].rm_so; | 596 | 13 | if (memcmp(sp, ssp, len) != 0) | 597 | 6 | return(NULL); | 598 | 23 | while (7 m->g->strip[ss] != 23 SOP23 (O_BACK, i)) | 599 | 16 | ss++; | 600 | 7 | return(backref(m, sp+len, stop, ss+1, stopst, lev, rec)); | 601 | 0 | break; | 602 | 0 | case 0 OQUEST_0 : /* to null or not */ | 603 | 0 | dp = backref(m, sp, stop, ss+1, stopst, lev, rec); | 604 | 0 | if (dp != NULL) | 605 | 0 | return(dp); /* not */ | 606 | 0 | return(0 backref0 (m, sp, stop, ss+OPND0 (s)+1, stopst, lev, rec)); | 607 | 0 | break; | 608 | 7 | case 7 OPLUS_7 : | 609 | 7 | assert(m->lastpos != NULL); | 610 | 7 | assert(lev+1 <= m->g->nplus); | 611 | 7 | m->lastpos[lev+1] = sp; | 612 | 7 | return(backref(m, sp, stop, ss+1, stopst, lev+1, rec)); | 613 | 0 | break; | 614 | 15 | case 15 O_PLUS15 : | 615 | 15 | if (sp == m->lastpos[lev]) /* last pass matched null */ | 616 | 0 | return(0 backref0 (m, sp, stop, ss+1, stopst, lev-1, rec)); | 617 | 15 | /* try another pass */ | 618 | 15 | m->lastpos[lev] = sp; | 619 | 15 | dp = backref15 (m, sp, stop, ss-OPND15 (s)+1, stopst, lev, rec); | 620 | 15 | if (dp == NULL) | 621 | 13 | return(13 backref13 (m, sp, stop, ss+1, stopst, lev-1, rec)); | 622 | 15 | else | 623 | 2 | return(dp); | 624 | 0 | break; | 625 | 0 | case 0 OCH_0 : /* find the right one, if any */ | 626 | 0 | ssub = ss + 1; | 627 | 0 | esub = ss + OPND(s) - 1; | 628 | 0 | assert(OP(m->g->strip[esub]) == OOR1); | 629 | 0 | for (;;) { /* find first matching branch */ | 630 | 0 | dp = backref(m, sp, stop, ssub, esub, lev, rec); | 631 | 0 | if (dp != NULL) | 632 | 0 | return(dp); | 633 | 0 | /* that one missed, try next one */ | 634 | 0 | if (0 OP0 (m->g->strip[esub]) == 0 O_CH0 ) | 635 | 0 | return(NULL); /* there is none */ | 636 | 0 | esub++; | 637 | 0 | assert(OP(m->g->strip[esub]) == OOR2); | 638 | 0 | ssub = esub + 1; | 639 | 0 | esub += OPND(m->g->strip[esub]); | 640 | 0 | if (OP0 (m->g->strip[esub]) == 0 OOR20 ) | 641 | 0 | esub--; | 642 | 0 | else | 643 | 0 | assert(OP(m->g->strip[esub]) == O_CH); | 644 | 0 | } | 645 | 0 | break; | 646 | 15 | case 15 OLPAREN15 : /* must undo assignment if rest fails */ | 647 | 15 | i = OPND(s); | 648 | 15 | assert(0 < i && i <= m->g->nsub); | 649 | 15 | offsave = m->pmatch[i].rm_so; | 650 | 15 | m->pmatch[i].rm_so = sp - m->offp; | 651 | 15 | dp = backref(m, sp, stop, ss+1, stopst, lev, rec); | 652 | 15 | if (dp != NULL) | 653 | 4 | return(dp); | 654 | 11 | m->pmatch[i].rm_so = offsave; | 655 | 11 | return(NULL); | 656 | 0 | break; | 657 | 21 | case 21 ORPAREN21 : /* must undo assignment if rest fails */ | 658 | 21 | i = OPND(s); | 659 | 21 | assert(0 < i && i <= m->g->nsub); | 660 | 21 | offsave = m->pmatch[i].rm_eo; | 661 | 21 | m->pmatch[i].rm_eo = sp - m->offp; | 662 | 21 | dp = backref(m, sp, stop, ss+1, stopst, lev, rec); | 663 | 21 | if (dp != NULL) | 664 | 4 | return(dp); | 665 | 17 | m->pmatch[i].rm_eo = offsave; | 666 | 17 | return(NULL); | 667 | 0 | break; | 668 | 0 | default: /* uh oh */ | 669 | 0 | assert(nope); | 670 | 0 | break; | 671 | 74 | } | 672 | 74 | | 673 | 74 | /* "can't happen" */ | 674 | 0 | assert(nope); | 675 | 0 | /* NOTREACHED */ | 676 | 0 | return NULL; | 677 | 90 | } |
Unexecuted instantiation: regexec.c:lbackref |
678 | | |
679 | | /* |
680 | | - fast - step through the string at top speed |
681 | | */ |
682 | | static const char * /* where tentative match ended, or NULL */ |
683 | | fast(struct match *m, const char *start, const char *stop, sopno startst, |
684 | | sopno stopst) |
685 | 2.03M | { |
686 | 2.03M | states st = m->st; |
687 | 2.03M | states fresh = m->fresh; |
688 | 2.03M | states tmp = m->tmp; |
689 | 2.03M | const char *p = start; |
690 | 2.03M | int c = (start == m->beginp) ? OUT2.03M : *(start-1)6 ; |
691 | 2.03M | int lastc; /* previous c */ |
692 | 2.03M | int flagch; |
693 | 2.03M | int i; |
694 | 2.03M | const char *coldp; /* last p after which no match was underway */ |
695 | 2.03M | |
696 | 2.03M | CLEAR(st); |
697 | 2.03M | SET1(st, startst); |
698 | 2.03M | st = step2.03M (m->g, startst, stopst, st, NOTHING2.03M , st); |
699 | 2.03M | ASSIGN(fresh, st); |
700 | 2.03M | SP("start", st, *p); |
701 | 2.03M | coldp = NULL; |
702 | 22.9M | for (;;) { |
703 | 22.9M | /* next character */ |
704 | 22.9M | lastc = c; |
705 | 22.9M | c = (p == m->endp) ? OUT2.01M : *p20.9M ; |
706 | 22.9M | if (EQ(st, fresh)) |
707 | 20.6M | coldp = p; |
708 | 22.9M | |
709 | 22.9M | /* is there an EOL and/or BOL between lastc and c? */ |
710 | 22.9M | flagch = '\0'; |
711 | 22.9M | i = 0; |
712 | 22.9M | if ( (lastc == '\n' && 22.9M m->g->cflags&49 REG_NEWLINE49 ) || |
713 | 22.9M | (lastc == 22.9M OUT22.9M && !(m->eflags&2.03M REG_NOTBOL2.03M )) ) { |
714 | 2.03M | flagch = BOL; |
715 | 2.03M | i = m->g->nbol; |
716 | 2.03M | } |
717 | 22.9M | if ( (c == '\n' && 22.9M m->g->cflags&49 REG_NEWLINE49 ) || |
718 | 22.9M | (c == 22.9M OUT22.9M && !(m->eflags&2.01M REG_NOTEOL2.01M )) ) { |
719 | 2.01M | flagch = (flagch == BOL2.01M ) ? BOLEOL188 : EOL2.01M ; |
720 | 2.01M | i += m->g->neol; |
721 | 2.01M | } |
722 | 22.9M | if (i != 022.9M ) { |
723 | 6.15M | for (; i > 06.15M ; i--3.07M ) |
724 | 3.07M | st = 3.07M step3.07M (m->g, startst, stopst, st, flagch, st); |
725 | 3.07M | SP("boleol", st, c); |
726 | 3.07M | } |
727 | 22.9M | |
728 | 22.9M | /* how about a word boundary? */ |
729 | 22.9M | if ( (flagch == 22.9M BOL22.9M || (lastc != 20.9M OUT20.9M && !20.9M ISWORD20.9M (lastc))) && |
730 | 22.9M | (c != 2.10M OUT2.10M && ISWORD2.09M (c)) ) { |
731 | 2.07M | flagch = BOW; |
732 | 2.07M | } |
733 | 22.9M | if ( (lastc != 22.9M OUT22.9M && ISWORD20.9M (lastc)) && |
734 | 22.9M | (flagch == 20.8M EOL20.8M || (c != 18.8M OUT18.8M && !18.8M ISWORD18.8M (c))) ) { |
735 | 2.05M | flagch = EOW; |
736 | 2.05M | } |
737 | 22.9M | if (flagch == 22.9M BOW22.9M || flagch == 20.8M EOW20.8M ) { |
738 | 4.13M | st = step(m->g, startst, stopst, st, flagch, st); |
739 | 4.13M | SP("boweow", st, c); |
740 | 4.13M | } |
741 | 22.9M | |
742 | 22.9M | /* are we done? */ |
743 | 22.9M | if (ISSET22.9M (st, stopst) || 22.9M p == stop22.8M ) |
744 | 2.03M | break; /* NOTE BREAK OUT */ |
745 | 22.9M | |
746 | 22.9M | /* no, we must deal with this character */ |
747 | 20.9M | ASSIGN20.9M (tmp, st); |
748 | 20.9M | ASSIGN(st, fresh); |
749 | 20.9M | assert(c != OUT); |
750 | 20.9M | st = step(m->g, startst, stopst, tmp, c, st); |
751 | 22.9M | SP("aft", st, c); |
752 | 22.9M | assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); |
753 | 22.9M | p++; |
754 | 22.9M | } |
755 | 2.03M | |
756 | 2.03M | assert(coldp != NULL); |
757 | 2.03M | m->coldp = coldp; |
758 | 2.03M | if (ISSET(st, stopst)) |
759 | 90.5k | return(p+1); |
760 | 2.03M | else |
761 | 1.94M | return(NULL); |
762 | 2.03M | } Line | Count | Source | 685 | 1.85M | { | 686 | 1.85M | states st = m->st; | 687 | 1.85M | states fresh = m->fresh; | 688 | 1.85M | states tmp = m->tmp; | 689 | 1.85M | const char *p = start; | 690 | 1.85M | int c = (start == m->beginp) ? OUT1.85M : *(start-1)6 ; | 691 | 1.85M | int lastc; /* previous c */ | 692 | 1.85M | int flagch; | 693 | 1.85M | int i; | 694 | 1.85M | const char *coldp; /* last p after which no match was underway */ | 695 | 1.85M | | 696 | 1.85M | CLEAR(st); | 697 | 1.85M | SET1(st, startst); | 698 | 1.85M | st = step1.85M (m->g, startst, stopst, st, NOTHING1.85M , st); | 699 | 1.85M | ASSIGN(fresh, st); | 700 | 1.85M | SP("start", st, *p); | 701 | 1.85M | coldp = NULL; | 702 | 20.8M | for (;;) { | 703 | 20.8M | /* next character */ | 704 | 20.8M | lastc = c; | 705 | 20.8M | c = (p == m->endp) ? OUT1.83M : *p18.9M ; | 706 | 20.8M | if (EQ(st, fresh)) | 707 | 18.8M | coldp = p; | 708 | 20.8M | | 709 | 20.8M | /* is there an EOL and/or BOL between lastc and c? */ | 710 | 20.8M | flagch = '\0'; | 711 | 20.8M | i = 0; | 712 | 20.8M | if ( (lastc == '\n' && 20.8M m->g->cflags&43 REG_NEWLINE43 ) || | 713 | 20.8M | (lastc == 20.8M OUT20.8M && !(m->eflags&1.85M REG_NOTBOL1.85M )) ) { | 714 | 1.85M | flagch = BOL; | 715 | 1.85M | i = m->g->nbol; | 716 | 1.85M | } | 717 | 20.8M | if ( (c == '\n' && 20.8M m->g->cflags&43 REG_NEWLINE43 ) || | 718 | 20.8M | (c == 20.8M OUT20.8M && !(m->eflags&1.83M REG_NOTEOL1.83M )) ) { | 719 | 1.83M | flagch = (flagch == BOL1.83M ) ? BOLEOL188 : EOL1.83M ; | 720 | 1.83M | i += m->g->neol; | 721 | 1.83M | } | 722 | 20.8M | if (i != 020.8M ) { | 723 | 5.58M | for (; i > 05.58M ; i--2.79M ) | 724 | 2.79M | st = 2.79M step2.79M (m->g, startst, stopst, st, flagch, st); | 725 | 2.79M | SP("boleol", st, c); | 726 | 2.79M | } | 727 | 20.8M | | 728 | 20.8M | /* how about a word boundary? */ | 729 | 20.8M | if ( (flagch == 20.8M BOL20.8M || (lastc != 18.9M OUT18.9M && !18.9M ISWORD18.9M (lastc))) && | 730 | 20.8M | (c != 1.91M OUT1.91M && ISWORD1.90M (c)) ) { | 731 | 1.88M | flagch = BOW; | 732 | 1.88M | } | 733 | 20.8M | if ( (lastc != 20.8M OUT20.8M && ISWORD18.9M (lastc)) && | 734 | 20.8M | (flagch == 18.9M EOL18.9M || (c != 17.0M OUT17.0M && !17.0M ISWORD17.0M (c))) ) { | 735 | 1.86M | flagch = EOW; | 736 | 1.86M | } | 737 | 20.8M | if (flagch == 20.8M BOW20.8M || flagch == 18.9M EOW18.9M ) { | 738 | 3.75M | st = step(m->g, startst, stopst, st, flagch, st); | 739 | 3.75M | SP("boweow", st, c); | 740 | 3.75M | } | 741 | 20.8M | | 742 | 20.8M | /* are we done? */ | 743 | 20.8M | if (ISSET20.8M (st, stopst) || 20.8M p == stop20.7M ) | 744 | 1.85M | break; /* NOTE BREAK OUT */ | 745 | 20.8M | | 746 | 20.8M | /* no, we must deal with this character */ | 747 | 18.9M | ASSIGN18.9M (tmp, st); | 748 | 18.9M | ASSIGN(st, fresh); | 749 | 18.9M | assert(c != OUT); | 750 | 18.9M | st = step(m->g, startst, stopst, tmp, c, st); | 751 | 20.8M | SP("aft", st, c); | 752 | 20.8M | assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); | 753 | 20.8M | p++; | 754 | 20.8M | } | 755 | 1.85M | | 756 | 1.85M | assert(coldp != NULL); | 757 | 1.85M | m->coldp = coldp; | 758 | 1.85M | if (ISSET(st, stopst)) | 759 | 86.2k | return(p+1); | 760 | 1.85M | else | 761 | 1.76M | return(NULL); | 762 | 1.85M | } |
Line | Count | Source | 685 | 179k | { | 686 | 179k | states st = m->st; | 687 | 179k | states fresh = m->fresh; | 688 | 179k | states tmp = m->tmp; | 689 | 179k | const char *p = start; | 690 | 179k | int c = (start == m->beginp) ? OUT179k : *(start-1)0 ; | 691 | 179k | int lastc; /* previous c */ | 692 | 179k | int flagch; | 693 | 179k | int i; | 694 | 179k | const char *coldp; /* last p after which no match was underway */ | 695 | 179k | | 696 | 179k | CLEAR(st); | 697 | 179k | SET1(st, startst); | 698 | 179k | st = step179k (m->g, startst, stopst, st, NOTHING179k , st); | 699 | 179k | ASSIGN(fresh, st); | 700 | 179k | SP("start", st, *p); | 701 | 179k | coldp = NULL; | 702 | 2.13M | for (;;) { | 703 | 2.13M | /* next character */ | 704 | 2.13M | lastc = c; | 705 | 2.13M | c = (p == m->endp) ? OUT179k : *p1.95M ; | 706 | 2.13M | if (EQ(st, fresh)) | 707 | 1.88M | coldp = p; | 708 | 2.13M | | 709 | 2.13M | /* is there an EOL and/or BOL between lastc and c? */ | 710 | 2.13M | flagch = '\0'; | 711 | 2.13M | i = 0; | 712 | 2.13M | if ( (lastc == '\n' && 2.13M m->g->cflags&6 REG_NEWLINE6 ) || | 713 | 2.13M | (lastc == 2.13M OUT2.13M && !(m->eflags&179k REG_NOTBOL179k )) ) { | 714 | 179k | flagch = BOL; | 715 | 179k | i = m->g->nbol; | 716 | 179k | } | 717 | 2.13M | if ( (c == '\n' && 2.13M m->g->cflags&6 REG_NEWLINE6 ) || | 718 | 2.13M | (c == 2.13M OUT2.13M && !(m->eflags&179k REG_NOTEOL179k )) ) { | 719 | 179k | flagch = (flagch == BOL179k ) ? BOLEOL0 : EOL179k ; | 720 | 179k | i += m->g->neol; | 721 | 179k | } | 722 | 2.13M | if (i != 02.13M ) { | 723 | 574k | for (; i > 0574k ; i--287k ) | 724 | 287k | st = 287k step287k (m->g, startst, stopst, st, flagch, st); | 725 | 287k | SP("boleol", st, c); | 726 | 287k | } | 727 | 2.13M | | 728 | 2.13M | /* how about a word boundary? */ | 729 | 2.13M | if ( (flagch == 2.13M BOL2.13M || (lastc != 1.95M OUT1.95M && !1.95M ISWORD1.95M (lastc))) && | 730 | 2.13M | (c != 190k OUT190k && ISWORD190k (c)) ) { | 731 | 186k | flagch = BOW; | 732 | 186k | } | 733 | 2.13M | if ( (lastc != 2.13M OUT2.13M && ISWORD1.95M (lastc)) && | 734 | 2.13M | (flagch == 1.94M EOL1.94M || (c != 1.76M OUT1.76M && !1.76M ISWORD1.76M (c))) ) { | 735 | 186k | flagch = EOW; | 736 | 186k | } | 737 | 2.13M | if (flagch == 2.13M BOW2.13M || flagch == 1.94M EOW1.94M ) { | 738 | 372k | st = step(m->g, startst, stopst, st, flagch, st); | 739 | 372k | SP("boweow", st, c); | 740 | 372k | } | 741 | 2.13M | | 742 | 2.13M | /* are we done? */ | 743 | 2.13M | if (ISSET2.13M (st, stopst) || 2.13M p == stop2.12M ) | 744 | 179k | break; /* NOTE BREAK OUT */ | 745 | 2.13M | | 746 | 2.13M | /* no, we must deal with this character */ | 747 | 1.95M | ASSIGN1.95M (tmp, st); | 748 | 1.95M | ASSIGN(st, fresh); | 749 | 1.95M | assert(c != OUT); | 750 | 1.95M | st = step(m->g, startst, stopst, tmp, c, st); | 751 | 2.13M | SP("aft", st, c); | 752 | 2.13M | assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); | 753 | 2.13M | p++; | 754 | 2.13M | } | 755 | 179k | | 756 | 179k | assert(coldp != NULL); | 757 | 179k | m->coldp = coldp; | 758 | 179k | if (ISSET(st, stopst)) | 759 | 4.27k | return(p+1); | 760 | 179k | else | 761 | 175k | return(NULL); | 762 | 179k | } |
|
763 | | |
764 | | /* |
765 | | - slow - step through the string more deliberately |
766 | | */ |
767 | | static const char * /* where it ended */ |
768 | | slow(struct match *m, const char *start, const char *stop, sopno startst, |
769 | | sopno stopst) |
770 | 68.2k | { |
771 | 68.2k | states st = m->st; |
772 | 68.2k | states empty = m->empty; |
773 | 68.2k | states tmp = m->tmp; |
774 | 68.2k | const char *p = start; |
775 | 68.2k | int c = (start == m->beginp) ? OUT6.28k : *(start-1)61.9k ; |
776 | 68.2k | int lastc; /* previous c */ |
777 | 68.2k | int flagch; |
778 | 68.2k | int i; |
779 | 68.2k | const char *matchp; /* last p at which a match ended */ |
780 | 68.2k | |
781 | 68.2k | AT("slow", start, stop, startst, stopst); |
782 | 68.2k | CLEAR(st); |
783 | 68.2k | SET1(st, startst); |
784 | 68.2k | SP("sstart", st, *p); |
785 | 68.2k | st = step68.2k (m->g, startst, stopst, st, NOTHING68.2k , st); |
786 | 68.2k | matchp = NULL; |
787 | 410k | for (;;) { |
788 | 410k | /* next character */ |
789 | 410k | lastc = c; |
790 | 410k | c = (p == m->endp) ? OUT15.9k : *p394k ; |
791 | 410k | |
792 | 410k | /* is there an EOL and/or BOL between lastc and c? */ |
793 | 410k | flagch = '\0'; |
794 | 410k | i = 0; |
795 | 410k | if ( (lastc == '\n' && 410k m->g->cflags&12 REG_NEWLINE12 ) || |
796 | 410k | (lastc == 410k OUT410k && !(m->eflags&6.28k REG_NOTBOL6.28k )) ) { |
797 | 6.28k | flagch = BOL; |
798 | 6.28k | i = m->g->nbol; |
799 | 6.28k | } |
800 | 410k | if ( (c == '\n' && 410k m->g->cflags&10 REG_NEWLINE10 ) || |
801 | 410k | (c == 410k OUT410k && !(m->eflags&15.9k REG_NOTEOL15.9k )) ) { |
802 | 15.9k | flagch = (flagch == BOL15.9k ) ? BOLEOL0 : EOL15.9k ; |
803 | 15.9k | i += m->g->neol; |
804 | 15.9k | } |
805 | 410k | if (i != 0410k ) { |
806 | 23.8k | for (; i > 023.8k ; i--11.9k ) |
807 | 11.9k | st = 11.9k step11.9k (m->g, startst, stopst, st, flagch, st); |
808 | 11.9k | SP("sboleol", st, c); |
809 | 11.9k | } |
810 | 410k | |
811 | 410k | /* how about a word boundary? */ |
812 | 410k | if ( (flagch == 410k BOL410k || (lastc != 403k OUT403k && !403k ISWORD403k (lastc))) && |
813 | 410k | (c != 101k OUT101k && ISWORD90.9k (c)) ) { |
814 | 65.5k | flagch = BOW; |
815 | 65.5k | } |
816 | 410k | if ( (lastc != 410k OUT410k && ISWORD403k (lastc)) && |
817 | 410k | (flagch == 308k EOL308k || (c != 303k OUT303k && !303k ISWORD303k (c))) ) { |
818 | 71.2k | flagch = EOW; |
819 | 71.2k | } |
820 | 410k | if (flagch == 410k BOW410k || flagch == 344k EOW344k ) { |
821 | 136k | st = step(m->g, startst, stopst, st, flagch, st); |
822 | 136k | SP("sboweow", st, c); |
823 | 136k | } |
824 | 410k | |
825 | 410k | /* are we done? */ |
826 | 410k | if (ISSET(st, stopst)) |
827 | 127k | matchp = p; |
828 | 410k | if (EQ410k (st, empty) || 410k p == stop378k ) |
829 | 68.2k | break; /* NOTE BREAK OUT */ |
830 | 410k | |
831 | 410k | /* no, we must deal with this character */ |
832 | 342k | ASSIGN342k (tmp, st); |
833 | 342k | ASSIGN(st, empty); |
834 | 342k | assert(c != OUT); |
835 | 342k | st = step(m->g, startst, stopst, tmp, c, st); |
836 | 410k | SP("saft", st, c); |
837 | 410k | assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); |
838 | 410k | p++; |
839 | 410k | } |
840 | 68.2k | |
841 | 68.2k | return(matchp); |
842 | 68.2k | } Line | Count | Source | 770 | 10.2k | { | 771 | 10.2k | states st = m->st; | 772 | 10.2k | states empty = m->empty; | 773 | 10.2k | states tmp = m->tmp; | 774 | 10.2k | const char *p = start; | 775 | 10.2k | int c = (start == m->beginp) ? OUT387 : *(start-1)9.90k ; | 776 | 10.2k | int lastc; /* previous c */ | 777 | 10.2k | int flagch; | 778 | 10.2k | int i; | 779 | 10.2k | const char *matchp; /* last p at which a match ended */ | 780 | 10.2k | | 781 | 10.2k | AT("slow", start, stop, startst, stopst); | 782 | 10.2k | CLEAR(st); | 783 | 10.2k | SET1(st, startst); | 784 | 10.2k | SP("sstart", st, *p); | 785 | 10.2k | st = step10.2k (m->g, startst, stopst, st, NOTHING10.2k , st); | 786 | 10.2k | matchp = NULL; | 787 | 37.6k | for (;;) { | 788 | 37.6k | /* next character */ | 789 | 37.6k | lastc = c; | 790 | 37.6k | c = (p == m->endp) ? OUT5.57k : *p32.0k ; | 791 | 37.6k | | 792 | 37.6k | /* is there an EOL and/or BOL between lastc and c? */ | 793 | 37.6k | flagch = '\0'; | 794 | 37.6k | i = 0; | 795 | 37.6k | if ( (lastc == '\n' && 37.6k m->g->cflags&0 REG_NEWLINE0 ) || | 796 | 37.6k | (lastc == 37.6k OUT37.6k && !(m->eflags&387 REG_NOTBOL387 )) ) { | 797 | 387 | flagch = BOL; | 798 | 387 | i = m->g->nbol; | 799 | 387 | } | 800 | 37.6k | if ( (c == '\n' && 37.6k m->g->cflags&0 REG_NEWLINE0 ) || | 801 | 37.6k | (c == 37.6k OUT37.6k && !(m->eflags&5.57k REG_NOTEOL5.57k )) ) { | 802 | 5.57k | flagch = (flagch == BOL5.57k ) ? BOLEOL0 : EOL5.57k ; | 803 | 5.57k | i += m->g->neol; | 804 | 5.57k | } | 805 | 37.6k | if (i != 037.6k ) { | 806 | 11.9k | for (; i > 011.9k ; i--5.96k ) | 807 | 5.96k | st = 5.96k step5.96k (m->g, startst, stopst, st, flagch, st); | 808 | 5.96k | SP("sboleol", st, c); | 809 | 5.96k | } | 810 | 37.6k | | 811 | 37.6k | /* how about a word boundary? */ | 812 | 37.6k | if ( (flagch == 37.6k BOL37.6k || (lastc != 37.2k OUT37.2k && !37.2k ISWORD37.2k (lastc))) && | 813 | 37.6k | (c != 9.95k OUT9.95k && ISWORD9.79k (c)) ) { | 814 | 8.03k | flagch = BOW; | 815 | 8.03k | } | 816 | 37.6k | if ( (lastc != 37.6k OUT37.6k && ISWORD37.2k (lastc)) && | 817 | 37.6k | (flagch == 27.6k EOL27.6k || (c != 22.2k OUT22.2k && !22.2k ISWORD22.2k (c))) ) { | 818 | 7.94k | flagch = EOW; | 819 | 7.94k | } | 820 | 37.6k | if (flagch == 37.6k BOW37.6k || flagch == 29.5k EOW29.5k ) { | 821 | 15.9k | st = step(m->g, startst, stopst, st, flagch, st); | 822 | 15.9k | SP("sboweow", st, c); | 823 | 15.9k | } | 824 | 37.6k | | 825 | 37.6k | /* are we done? */ | 826 | 37.6k | if (ISSET(st, stopst)) | 827 | 10.4k | matchp = p; | 828 | 37.6k | if (EQ37.6k (st, empty) || 37.6k p == stop35.7k ) | 829 | 10.2k | break; /* NOTE BREAK OUT */ | 830 | 37.6k | | 831 | 37.6k | /* no, we must deal with this character */ | 832 | 27.3k | ASSIGN27.3k (tmp, st); | 833 | 27.3k | ASSIGN(st, empty); | 834 | 27.3k | assert(c != OUT); | 835 | 27.3k | st = step(m->g, startst, stopst, tmp, c, st); | 836 | 37.6k | SP("saft", st, c); | 837 | 37.6k | assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); | 838 | 37.6k | p++; | 839 | 37.6k | } | 840 | 10.2k | | 841 | 10.2k | return(matchp); | 842 | 10.2k | } |
Line | Count | Source | 770 | 57.9k | { | 771 | 57.9k | states st = m->st; | 772 | 57.9k | states empty = m->empty; | 773 | 57.9k | states tmp = m->tmp; | 774 | 57.9k | const char *p = start; | 775 | 57.9k | int c = (start == m->beginp) ? OUT5.89k : *(start-1)52.0k ; | 776 | 57.9k | int lastc; /* previous c */ | 777 | 57.9k | int flagch; | 778 | 57.9k | int i; | 779 | 57.9k | const char *matchp; /* last p at which a match ended */ | 780 | 57.9k | | 781 | 57.9k | AT("slow", start, stop, startst, stopst); | 782 | 57.9k | CLEAR(st); | 783 | 57.9k | SET1(st, startst); | 784 | 57.9k | SP("sstart", st, *p); | 785 | 57.9k | st = step57.9k (m->g, startst, stopst, st, NOTHING57.9k , st); | 786 | 57.9k | matchp = NULL; | 787 | 372k | for (;;) { | 788 | 372k | /* next character */ | 789 | 372k | lastc = c; | 790 | 372k | c = (p == m->endp) ? OUT10.3k : *p362k ; | 791 | 372k | | 792 | 372k | /* is there an EOL and/or BOL between lastc and c? */ | 793 | 372k | flagch = '\0'; | 794 | 372k | i = 0; | 795 | 372k | if ( (lastc == '\n' && 372k m->g->cflags&12 REG_NEWLINE12 ) || | 796 | 372k | (lastc == 372k OUT372k && !(m->eflags&5.89k REG_NOTBOL5.89k )) ) { | 797 | 5.89k | flagch = BOL; | 798 | 5.89k | i = m->g->nbol; | 799 | 5.89k | } | 800 | 372k | if ( (c == '\n' && 372k m->g->cflags&10 REG_NEWLINE10 ) || | 801 | 372k | (c == 372k OUT372k && !(m->eflags&10.3k REG_NOTEOL10.3k )) ) { | 802 | 10.3k | flagch = (flagch == BOL10.3k ) ? BOLEOL0 : EOL10.3k ; | 803 | 10.3k | i += m->g->neol; | 804 | 10.3k | } | 805 | 372k | if (i != 0372k ) { | 806 | 11.9k | for (; i > 011.9k ; i--5.97k ) | 807 | 5.97k | st = 5.97k step5.97k (m->g, startst, stopst, st, flagch, st); | 808 | 5.97k | SP("sboleol", st, c); | 809 | 5.97k | } | 810 | 372k | | 811 | 372k | /* how about a word boundary? */ | 812 | 372k | if ( (flagch == 372k BOL372k || (lastc != 366k OUT366k && !366k ISWORD366k (lastc))) && | 813 | 372k | (c != 91.3k OUT91.3k && ISWORD81.1k (c)) ) { | 814 | 57.5k | flagch = BOW; | 815 | 57.5k | } | 816 | 372k | if ( (lastc != 372k OUT372k && ISWORD366k (lastc)) && | 817 | 372k | (flagch == 281k EOL281k || (c != 281k OUT281k && !281k ISWORD281k (c))) ) { | 818 | 63.3k | flagch = EOW; | 819 | 63.3k | } | 820 | 372k | if (flagch == 372k BOW372k || flagch == 315k EOW315k ) { | 821 | 120k | st = step(m->g, startst, stopst, st, flagch, st); | 822 | 120k | SP("sboweow", st, c); | 823 | 120k | } | 824 | 372k | | 825 | 372k | /* are we done? */ | 826 | 372k | if (ISSET(st, stopst)) | 827 | 116k | matchp = p; | 828 | 372k | if (EQ372k (st, empty) || 372k p == stop342k ) | 829 | 57.9k | break; /* NOTE BREAK OUT */ | 830 | 372k | | 831 | 372k | /* no, we must deal with this character */ | 832 | 314k | ASSIGN314k (tmp, st); | 833 | 314k | ASSIGN(st, empty); | 834 | 314k | assert(c != OUT); | 835 | 314k | st = step(m->g, startst, stopst, tmp, c, st); | 836 | 372k | SP("saft", st, c); | 837 | 372k | assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); | 838 | 372k | p++; | 839 | 372k | } | 840 | 57.9k | | 841 | 57.9k | return(matchp); | 842 | 57.9k | } |
|
843 | | |
844 | | |
845 | | /* |
846 | | - step - map set of states reachable before char to set reachable after |
847 | | */ |
848 | | static states |
849 | | step(struct re_guts *g, |
850 | | sopno start, /* start state within strip */ |
851 | | sopno stop, /* state after stop state within strip */ |
852 | | states bef, /* states reachable before */ |
853 | | int ch, /* character or NONCHAR code */ |
854 | | states aft) /* states already known reachable after */ |
855 | 30.7M | { |
856 | 30.7M | cset *cs; |
857 | 30.7M | sop s; |
858 | 30.7M | sopno pc; |
859 | 30.7M | onestate here; /* note, macros know this name */ |
860 | 30.7M | sopno look; |
861 | 30.7M | int i; |
862 | 30.7M | |
863 | 1.12G | for (pc = start, INIT30.7M (here, pc); pc != stop1.12G ; pc++, 1.09G INC1.09G (here)) { |
864 | 1.09G | s = g->strip[pc]; |
865 | 1.09G | switch (OP(s)) { |
866 | 0 | case 0 OEND0 : |
867 | 0 | assert(pc == stop-1); |
868 | 0 | break; |
869 | 525M | case 525M OCHAR525M : |
870 | 525M | /* only characters can match */ |
871 | 525M | assert(!NONCHAR(ch) || ch != (char)OPND(s)); |
872 | 525M | if (ch == (char)525M OPND525M (s)) |
873 | 16.1M | FWD(aft, bef, 1); |
874 | 525M | break; |
875 | 29.9M | case 29.9M OBOL29.9M : |
876 | 29.9M | if (ch == 29.9M BOL29.9M || ch == 27.8M BOLEOL27.8M ) |
877 | 2.02M | FWD(aft, bef, 1); |
878 | 29.9M | break; |
879 | 14.5M | case 14.5M OEOL14.5M : |
880 | 14.5M | if (ch == 14.5M EOL14.5M || ch == 13.5M BOLEOL13.5M ) |
881 | 1.05M | FWD(aft, bef, 1); |
882 | 14.5M | break; |
883 | 0 | case 0 OBOW0 : |
884 | 0 | if (ch == 0 BOW0 ) |
885 | 0 | FWD(aft, bef, 1); |
886 | 0 | break; |
887 | 0 | case 0 OEOW0 : |
888 | 0 | if (ch == 0 EOW0 ) |
889 | 0 | FWD(aft, bef, 1); |
890 | 0 | break; |
891 | 1.97M | case 1.97M OANY1.97M : |
892 | 1.97M | if (!1.97M NONCHAR1.97M (ch)) |
893 | 1.35M | FWD(aft, bef, 1); |
894 | 1.97M | break; |
895 | 9.79M | case 9.79M OANYOF9.79M : |
896 | 9.79M | cs = &g->sets[OPND(s)]; |
897 | 9.79M | if (!9.79M NONCHAR9.79M (ch) && CHIN6.67M (cs, ch)) |
898 | 1.36M | FWD(aft, bef, 1); |
899 | 9.79M | break; |
900 | 706 | case 706 OBACK_706 : /* ignored here */ |
901 | 706 | case 706 O_BACK706 : |
902 | 706 | FWD(aft, aft, 1); |
903 | 706 | break; |
904 | 4.61M | case 4.61M OPLUS_4.61M : /* forward, this is just an empty */ |
905 | 4.61M | FWD(aft, aft, 1); |
906 | 4.61M | break; |
907 | 4.61M | case 4.61M O_PLUS4.61M : /* both forward and back */ |
908 | 4.61M | FWD(aft, aft, 1); |
909 | 4.61M | i = ISSETBACK(aft, OPND(s)); |
910 | 4.61M | BACK(aft, aft, OPND(s)); |
911 | 4.61M | if (!i && 4.61M ISSETBACK3.86M (aft, OPND(s))) { |
912 | 300k | /* oho, must reconsider loop body */ |
913 | 300k | pc -= OPND(s) + 1; |
914 | 300k | INIT(here, pc); |
915 | 300k | } |
916 | 4.61M | break; |
917 | 3.15M | case 3.15M OQUEST_3.15M : /* two branches, both forward */ |
918 | 3.15M | FWD(aft, aft, 1); |
919 | 3.15M | FWD(aft, aft, OPND(s)); |
920 | 3.15M | break; |
921 | 3.15M | case 3.15M O_QUEST3.15M : /* just an empty */ |
922 | 3.15M | FWD(aft, aft, 1); |
923 | 3.15M | break; |
924 | 161M | case 161M OLPAREN161M : /* not significant here */ |
925 | 161M | case 161M ORPAREN161M : |
926 | 161M | FWD(aft, aft, 1); |
927 | 161M | break; |
928 | 64.2M | case 64.2M OCH_64.2M : /* mark the first two branches */ |
929 | 64.2M | FWD(aft, aft, 1); |
930 | 64.2M | assert(OP(g->strip[pc+OPND(s)]) == OOR2); |
931 | 64.2M | FWD(aft, aft, OPND(s)); |
932 | 64.2M | break; |
933 | 104M | case 104M OOR1104M : /* done a branch, find the O_CH */ |
934 | 104M | if (ISSTATEIN104M (aft, here)) { |
935 | 300k | for (look = 1; |
936 | 703k | OP703k (s = g->strip[pc+look]) != 703k O_CH703k ; |
937 | 402k | look += 402k OPND402k (s)) |
938 | 300k | assert(OP(s) == OOR2); |
939 | 300k | FWD(aft, aft, look); |
940 | 300k | } |
941 | 104M | break; |
942 | 104M | case 104M OOR2104M : /* propagate OCH_'s marking */ |
943 | 104M | FWD(aft, aft, 1); |
944 | 104M | if (OP104M (g->strip[pc+OPND(s)]) != 104M O_CH104M ) { |
945 | 40.1M | assert(OP(g->strip[pc+OPND(s)]) == OOR2); |
946 | 40.1M | FWD(aft, aft, OPND(s)); |
947 | 40.1M | } |
948 | 104M | break; |
949 | 64.2M | case 64.2M O_CH64.2M : /* just empty */ |
950 | 64.2M | FWD(aft, aft, 1); |
951 | 64.2M | break; |
952 | 0 | default: /* ooooops... */ |
953 | 0 | assert(nope); |
954 | 0 | break; |
955 | 1.09G | } |
956 | 1.09G | } |
957 | 30.7M | |
958 | 30.7M | return(aft); |
959 | 30.7M | } Line | Count | Source | 855 | 27.8M | { | 856 | 27.8M | cset *cs; | 857 | 27.8M | sop s; | 858 | 27.8M | sopno pc; | 859 | 27.8M | onestate here; /* note, macros know this name */ | 860 | 27.8M | sopno look; | 861 | 27.8M | int i; | 862 | 27.8M | | 863 | 893M | for (pc = start, INIT27.8M (here, pc); pc != stop893M ; pc++, 865M INC865M (here)) { | 864 | 865M | s = g->strip[pc]; | 865 | 865M | switch (OP(s)) { | 866 | 0 | case 0 OEND0 : | 867 | 0 | assert(pc == stop-1); | 868 | 0 | break; | 869 | 402M | case 402M OCHAR402M : | 870 | 402M | /* only characters can match */ | 871 | 402M | assert(!NONCHAR(ch) || ch != (char)OPND(s)); | 872 | 402M | if (ch == (char)402M OPND402M (s)) | 873 | 12.6M | FWD(aft, bef, 1); | 874 | 402M | break; | 875 | 27.1M | case 27.1M OBOL27.1M : | 876 | 27.1M | if (ch == 27.1M BOL27.1M || ch == 25.3M BOLEOL25.3M ) | 877 | 1.84M | FWD(aft, bef, 1); | 878 | 27.1M | break; | 879 | 12.8M | case 12.8M OEOL12.8M : | 880 | 12.8M | if (ch == 12.8M EOL12.8M || ch == 11.9M BOLEOL11.9M ) | 881 | 946k | FWD(aft, bef, 1); | 882 | 12.8M | break; | 883 | 0 | case 0 OBOW0 : | 884 | 0 | if (ch == 0 BOW0 ) | 885 | 0 | FWD(aft, bef, 1); | 886 | 0 | break; | 887 | 0 | case 0 OEOW0 : | 888 | 0 | if (ch == 0 EOW0 ) | 889 | 0 | FWD(aft, bef, 1); | 890 | 0 | break; | 891 | 1.58M | case 1.58M OANY1.58M : | 892 | 1.58M | if (!1.58M NONCHAR1.58M (ch)) | 893 | 1.08M | FWD(aft, bef, 1); | 894 | 1.58M | break; | 895 | 8.43M | case 8.43M OANYOF8.43M : | 896 | 8.43M | cs = &g->sets[OPND(s)]; | 897 | 8.43M | if (!8.43M NONCHAR8.43M (ch) && CHIN5.71M (cs, ch)) | 898 | 1.26M | FWD(aft, bef, 1); | 899 | 8.43M | break; | 900 | 706 | case 706 OBACK_706 : /* ignored here */ | 901 | 706 | case 706 O_BACK706 : | 902 | 706 | FWD(aft, aft, 1); | 903 | 706 | break; | 904 | 4.05M | case 4.05M OPLUS_4.05M : /* forward, this is just an empty */ | 905 | 4.05M | FWD(aft, aft, 1); | 906 | 4.05M | break; | 907 | 4.05M | case 4.05M O_PLUS4.05M : /* both forward and back */ | 908 | 4.05M | FWD(aft, aft, 1); | 909 | 4.05M | i = ISSETBACK(aft, OPND(s)); | 910 | 4.05M | BACK(aft, aft, OPND(s)); | 911 | 4.05M | if (!i && 4.05M ISSETBACK3.35M (aft, OPND(s))) { | 912 | 281k | /* oho, must reconsider loop body */ | 913 | 281k | pc -= OPND(s) + 1; | 914 | 281k | INIT(here, pc); | 915 | 281k | } | 916 | 4.05M | break; | 917 | 2.69M | case 2.69M OQUEST_2.69M : /* two branches, both forward */ | 918 | 2.69M | FWD(aft, aft, 1); | 919 | 2.69M | FWD(aft, aft, OPND(s)); | 920 | 2.69M | break; | 921 | 2.69M | case 2.69M O_QUEST2.69M : /* just an empty */ | 922 | 2.69M | FWD(aft, aft, 1); | 923 | 2.69M | break; | 924 | 137M | case 137M OLPAREN137M : /* not significant here */ | 925 | 137M | case 137M ORPAREN137M : | 926 | 137M | FWD(aft, aft, 1); | 927 | 137M | break; | 928 | 51.4M | case 51.4M OCH_51.4M : /* mark the first two branches */ | 929 | 51.4M | FWD(aft, aft, 1); | 930 | 51.4M | assert(OP(g->strip[pc+OPND(s)]) == OOR2); | 931 | 51.4M | FWD(aft, aft, OPND(s)); | 932 | 51.4M | break; | 933 | 79.7M | case 79.7M OOR179.7M : /* done a branch, find the O_CH */ | 934 | 79.7M | if (ISSTATEIN79.7M (aft, here)) { | 935 | 239k | for (look = 1; | 936 | 549k | OP549k (s = g->strip[pc+look]) != 549k O_CH549k ; | 937 | 310k | look += 310k OPND310k (s)) | 938 | 239k | assert(OP(s) == OOR2); | 939 | 239k | FWD(aft, aft, look); | 940 | 239k | } | 941 | 79.7M | break; | 942 | 79.7M | case 79.7M OOR279.7M : /* propagate OCH_'s marking */ | 943 | 79.7M | FWD(aft, aft, 1); | 944 | 79.7M | if (OP79.7M (g->strip[pc+OPND(s)]) != 79.7M O_CH79.7M ) { | 945 | 28.3M | assert(OP(g->strip[pc+OPND(s)]) == OOR2); | 946 | 28.3M | FWD(aft, aft, OPND(s)); | 947 | 28.3M | } | 948 | 79.7M | break; | 949 | 51.4M | case 51.4M O_CH51.4M : /* just empty */ | 950 | 51.4M | FWD(aft, aft, 1); | 951 | 51.4M | break; | 952 | 0 | default: /* ooooops... */ | 953 | 0 | assert(nope); | 954 | 0 | break; | 955 | 865M | } | 956 | 865M | } | 957 | 27.8M | | 958 | 27.8M | return(aft); | 959 | 27.8M | } |
Line | Count | Source | 855 | 2.85M | { | 856 | 2.85M | cset *cs; | 857 | 2.85M | sop s; | 858 | 2.85M | sopno pc; | 859 | 2.85M | onestate here; /* note, macros know this name */ | 860 | 2.85M | sopno look; | 861 | 2.85M | int i; | 862 | 2.85M | | 863 | 233M | for (pc = start, INIT2.85M (here, pc); pc != stop233M ; pc++, 230M INC230M (here)) { | 864 | 230M | s = g->strip[pc]; | 865 | 230M | switch (OP(s)) { | 866 | 0 | case 0 OEND0 : | 867 | 0 | assert(pc == stop-1); | 868 | 0 | break; | 869 | 123M | case 123M OCHAR123M : | 870 | 123M | /* only characters can match */ | 871 | 123M | assert(!NONCHAR(ch) || ch != (char)OPND(s)); | 872 | 123M | if (ch == (char)123M OPND123M (s)) | 873 | 3.47M | FWD(aft, bef, 1); | 874 | 123M | break; | 875 | 2.74M | case 2.74M OBOL2.74M : | 876 | 2.74M | if (ch == 2.74M BOL2.74M || ch == 2.56M BOLEOL2.56M ) | 877 | 179k | FWD(aft, bef, 1); | 878 | 2.74M | break; | 879 | 1.70M | case 1.70M OEOL1.70M : | 880 | 1.70M | if (ch == 1.70M EOL1.70M || ch == 1.59M BOLEOL1.59M ) | 881 | 110k | FWD(aft, bef, 1); | 882 | 1.70M | break; | 883 | 0 | case 0 OBOW0 : | 884 | 0 | if (ch == 0 BOW0 ) | 885 | 0 | FWD(aft, bef, 1); | 886 | 0 | break; | 887 | 0 | case 0 OEOW0 : | 888 | 0 | if (ch == 0 EOW0 ) | 889 | 0 | FWD(aft, bef, 1); | 890 | 0 | break; | 891 | 389k | case 389k OANY389k : | 892 | 389k | if (!389k NONCHAR389k (ch)) | 893 | 270k | FWD(aft, bef, 1); | 894 | 389k | break; | 895 | 1.36M | case 1.36M OANYOF1.36M : | 896 | 1.36M | cs = &g->sets[OPND(s)]; | 897 | 1.36M | if (!1.36M NONCHAR1.36M (ch) && CHIN957k (cs, ch)) | 898 | 105k | FWD(aft, bef, 1); | 899 | 1.36M | break; | 900 | 0 | case 0 OBACK_0 : /* ignored here */ | 901 | 0 | case 0 O_BACK0 : | 902 | 0 | FWD(aft, aft, 1); | 903 | 0 | break; | 904 | 564k | case 564k OPLUS_564k : /* forward, this is just an empty */ | 905 | 564k | FWD(aft, aft, 1); | 906 | 564k | break; | 907 | 564k | case 564k O_PLUS564k : /* both forward and back */ | 908 | 564k | FWD(aft, aft, 1); | 909 | 564k | i = ISSETBACK(aft, OPND(s)); | 910 | 564k | BACK(aft, aft, OPND(s)); | 911 | 564k | if (!i && 564k ISSETBACK503k (aft, OPND(s))) { | 912 | 19.5k | /* oho, must reconsider loop body */ | 913 | 19.5k | pc -= OPND(s) + 1; | 914 | 19.5k | INIT(here, pc); | 915 | 19.5k | } | 916 | 564k | break; | 917 | 464k | case 464k OQUEST_464k : /* two branches, both forward */ | 918 | 464k | FWD(aft, aft, 1); | 919 | 464k | FWD(aft, aft, OPND(s)); | 920 | 464k | break; | 921 | 464k | case 464k O_QUEST464k : /* just an empty */ | 922 | 464k | FWD(aft, aft, 1); | 923 | 464k | break; | 924 | 23.7M | case 23.7M OLPAREN23.7M : /* not significant here */ | 925 | 23.7M | case 23.7M ORPAREN23.7M : | 926 | 23.7M | FWD(aft, aft, 1); | 927 | 23.7M | break; | 928 | 12.8M | case 12.8M OCH_12.8M : /* mark the first two branches */ | 929 | 12.8M | FWD(aft, aft, 1); | 930 | 12.8M | assert(OP(g->strip[pc+OPND(s)]) == OOR2); | 931 | 12.8M | FWD(aft, aft, OPND(s)); | 932 | 12.8M | break; | 933 | 24.6M | case 24.6M OOR124.6M : /* done a branch, find the O_CH */ | 934 | 24.6M | if (ISSTATEIN24.6M (aft, here)) { | 935 | 61.2k | for (look = 1; | 936 | 153k | OP153k (s = g->strip[pc+look]) != 153k O_CH153k ; | 937 | 92.1k | look += 92.1k OPND92.1k (s)) | 938 | 61.2k | assert(OP(s) == OOR2); | 939 | 61.2k | FWD(aft, aft, look); | 940 | 61.2k | } | 941 | 24.6M | break; | 942 | 24.6M | case 24.6M OOR224.6M : /* propagate OCH_'s marking */ | 943 | 24.6M | FWD(aft, aft, 1); | 944 | 24.6M | if (OP24.6M (g->strip[pc+OPND(s)]) != 24.6M O_CH24.6M ) { | 945 | 11.7M | assert(OP(g->strip[pc+OPND(s)]) == OOR2); | 946 | 11.7M | FWD(aft, aft, OPND(s)); | 947 | 11.7M | } | 948 | 24.6M | break; | 949 | 12.8M | case 12.8M O_CH12.8M : /* just empty */ | 950 | 12.8M | FWD(aft, aft, 1); | 951 | 12.8M | break; | 952 | 0 | default: /* ooooops... */ | 953 | 0 | assert(nope); | 954 | 0 | break; | 955 | 230M | } | 956 | 230M | } | 957 | 2.85M | | 958 | 2.85M | return(aft); | 959 | 2.85M | } |
|
960 | | |
961 | | #ifdef REDEBUG |
962 | | /* |
963 | | - print - print a set of states |
964 | | */ |
965 | | static void |
966 | | print(struct match *m, char *caption, states st, int ch, FILE *d) |
967 | | { |
968 | | struct re_guts *g = m->g; |
969 | | int i; |
970 | | int first = 1; |
971 | | |
972 | | if (!(m->eflags®_TRACE)) |
973 | | return; |
974 | | |
975 | | (void)fprintf(d, "%s", caption); |
976 | | if (ch != '\0') |
977 | | (void)fprintf(d, " %s", pchar(ch)); |
978 | | for (i = 0; i < g->nstates; i++) |
979 | | if (ISSET(st, i)) { |
980 | | (void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i); |
981 | | first = 0; |
982 | | } |
983 | | (void)fprintf(d, "\n"); |
984 | | } |
985 | | |
986 | | /* |
987 | | - at - print current situation |
988 | | */ |
989 | | static void |
990 | | at(struct match *m, char *title, char *start, char *stop, sopno startst, |
991 | | sopno stopst) |
992 | | { |
993 | | if (!(m->eflags®_TRACE)) |
994 | | return; |
995 | | |
996 | | (void)printf("%s %s-", title, pchar(*start)); |
997 | | (void)printf("%s ", pchar(*stop)); |
998 | | (void)printf("%ld-%ld\n", (long)startst, (long)stopst); |
999 | | } |
1000 | | |
1001 | | #ifndef PCHARDONE |
1002 | | #define PCHARDONE /* never again */ |
1003 | | /* |
1004 | | - pchar - make a character printable |
1005 | | * |
1006 | | * Is this identical to regchar() over in debug.c? Well, yes. But a |
1007 | | * duplicate here avoids having a debugging-capable regexec.o tied to |
1008 | | * a matching debug.o, and this is convenient. It all disappears in |
1009 | | * the non-debug compilation anyway, so it doesn't matter much. |
1010 | | */ |
1011 | | static char * /* -> representation */ |
1012 | | pchar(int ch) |
1013 | | { |
1014 | | static char pbuf[10]; |
1015 | | |
1016 | | if (isprint(ch) || ch == ' ') |
1017 | | (void)snprintf(pbuf, sizeof pbuf, "%c", ch); |
1018 | | else |
1019 | | (void)snprintf(pbuf, sizeof pbuf, "\\%o", ch); |
1020 | | return(pbuf); |
1021 | | } |
1022 | | #endif |
1023 | | #endif |
1024 | | |
1025 | | #undef matcher |
1026 | | #undef fast |
1027 | | #undef slow |
1028 | | #undef dissect |
1029 | | #undef backref |
1030 | | #undef step |
1031 | | #undef print |
1032 | | #undef at |
1033 | | #undef match |
1034 | | #undef nope |