1 /*
2  * Secret Labs' Regular Expression Engine
3  *
4  * regular expression matching engine
5  *
6  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7  *
8  * See the _sre.c file for information on usage and redistribution.
9  */
10 
11 /* String matching engine */
12 
13 /* This file is included three times, with different character settings */
14 
15 LOCAL(int)
SRE(at)16 SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
17 {
18     /* check if pointer is at given position */
19 
20     Py_ssize_t thisp, thatp;
21 
22     switch (at) {
23 
24     case SRE_AT_BEGINNING:
25     case SRE_AT_BEGINNING_STRING:
26         return ((void*) ptr == state->beginning);
27 
28     case SRE_AT_BEGINNING_LINE:
29         return ((void*) ptr == state->beginning ||
30                 SRE_IS_LINEBREAK((int) ptr[-1]));
31 
32     case SRE_AT_END:
33         return (((SRE_CHAR *)state->end - ptr == 1 &&
34                  SRE_IS_LINEBREAK((int) ptr[0])) ||
35                 ((void*) ptr == state->end));
36 
37     case SRE_AT_END_LINE:
38         return ((void*) ptr == state->end ||
39                 SRE_IS_LINEBREAK((int) ptr[0]));
40 
41     case SRE_AT_END_STRING:
42         return ((void*) ptr == state->end);
43 
44     case SRE_AT_BOUNDARY:
45         if (state->beginning == state->end)
46             return 0;
47         thatp = ((void*) ptr > state->beginning) ?
48             SRE_IS_WORD((int) ptr[-1]) : 0;
49         thisp = ((void*) ptr < state->end) ?
50             SRE_IS_WORD((int) ptr[0]) : 0;
51         return thisp != thatp;
52 
53     case SRE_AT_NON_BOUNDARY:
54         if (state->beginning == state->end)
55             return 0;
56         thatp = ((void*) ptr > state->beginning) ?
57             SRE_IS_WORD((int) ptr[-1]) : 0;
58         thisp = ((void*) ptr < state->end) ?
59             SRE_IS_WORD((int) ptr[0]) : 0;
60         return thisp == thatp;
61 
62     case SRE_AT_LOC_BOUNDARY:
63         if (state->beginning == state->end)
64             return 0;
65         thatp = ((void*) ptr > state->beginning) ?
66             SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
67         thisp = ((void*) ptr < state->end) ?
68             SRE_LOC_IS_WORD((int) ptr[0]) : 0;
69         return thisp != thatp;
70 
71     case SRE_AT_LOC_NON_BOUNDARY:
72         if (state->beginning == state->end)
73             return 0;
74         thatp = ((void*) ptr > state->beginning) ?
75             SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
76         thisp = ((void*) ptr < state->end) ?
77             SRE_LOC_IS_WORD((int) ptr[0]) : 0;
78         return thisp == thatp;
79 
80     case SRE_AT_UNI_BOUNDARY:
81         if (state->beginning == state->end)
82             return 0;
83         thatp = ((void*) ptr > state->beginning) ?
84             SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
85         thisp = ((void*) ptr < state->end) ?
86             SRE_UNI_IS_WORD((int) ptr[0]) : 0;
87         return thisp != thatp;
88 
89     case SRE_AT_UNI_NON_BOUNDARY:
90         if (state->beginning == state->end)
91             return 0;
92         thatp = ((void*) ptr > state->beginning) ?
93             SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
94         thisp = ((void*) ptr < state->end) ?
95             SRE_UNI_IS_WORD((int) ptr[0]) : 0;
96         return thisp == thatp;
97 
98     }
99 
100     return 0;
101 }
102 
103 LOCAL(int)
SRE(charset)104 SRE(charset)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch)
105 {
106     /* check if character is a member of the given set */
107 
108     int ok = 1;
109 
110     for (;;) {
111         switch (*set++) {
112 
113         case SRE_OP_FAILURE:
114             return !ok;
115 
116         case SRE_OP_LITERAL:
117             /* <LITERAL> <code> */
118             if (ch == set[0])
119                 return ok;
120             set++;
121             break;
122 
123         case SRE_OP_CATEGORY:
124             /* <CATEGORY> <code> */
125             if (sre_category(set[0], (int) ch))
126                 return ok;
127             set++;
128             break;
129 
130         case SRE_OP_CHARSET:
131             /* <CHARSET> <bitmap> */
132             if (ch < 256 &&
133                 (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
134                 return ok;
135             set += 256/SRE_CODE_BITS;
136             break;
137 
138         case SRE_OP_RANGE:
139             /* <RANGE> <lower> <upper> */
140             if (set[0] <= ch && ch <= set[1])
141                 return ok;
142             set += 2;
143             break;
144 
145         case SRE_OP_RANGE_UNI_IGNORE:
146             /* <RANGE_UNI_IGNORE> <lower> <upper> */
147         {
148             SRE_CODE uch;
149             /* ch is already lower cased */
150             if (set[0] <= ch && ch <= set[1])
151                 return ok;
152             uch = sre_upper_unicode(ch);
153             if (set[0] <= uch && uch <= set[1])
154                 return ok;
155             set += 2;
156             break;
157         }
158 
159         case SRE_OP_NEGATE:
160             ok = !ok;
161             break;
162 
163         case SRE_OP_BIGCHARSET:
164             /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
165         {
166             Py_ssize_t count, block;
167             count = *(set++);
168 
169             if (ch < 0x10000u)
170                 block = ((unsigned char*)set)[ch >> 8];
171             else
172                 block = -1;
173             set += 256/sizeof(SRE_CODE);
174             if (block >=0 &&
175                 (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
176                     (1u << (ch & (SRE_CODE_BITS-1)))))
177                 return ok;
178             set += count * (256/SRE_CODE_BITS);
179             break;
180         }
181 
182         default:
183             /* internal error -- there's not much we can do about it
184                here, so let's just pretend it didn't match... */
185             return 0;
186         }
187     }
188 }
189 
190 LOCAL(int)
SRE(charset_loc_ignore)191 SRE(charset_loc_ignore)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch)
192 {
193     SRE_CODE lo, up;
194     lo = sre_lower_locale(ch);
195     if (SRE(charset)(state, set, lo))
196        return 1;
197 
198     up = sre_upper_locale(ch);
199     return up != lo && SRE(charset)(state, set, up);
200 }
201 
202 LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int toplevel);
203 
204 LOCAL(Py_ssize_t)
SRE(count)205 SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
206 {
207     SRE_CODE chr;
208     SRE_CHAR c;
209     SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
210     SRE_CHAR* end = (SRE_CHAR *)state->end;
211     Py_ssize_t i;
212 
213     /* adjust end */
214     if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
215         end = ptr + maxcount;
216 
217     switch (pattern[0]) {
218 
219     case SRE_OP_IN:
220         /* repeated set */
221         TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
222         while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
223             ptr++;
224         break;
225 
226     case SRE_OP_ANY:
227         /* repeated dot wildcard. */
228         TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
229         while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
230             ptr++;
231         break;
232 
233     case SRE_OP_ANY_ALL:
234         /* repeated dot wildcard.  skip to the end of the target
235            string, and backtrack from there */
236         TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
237         ptr = end;
238         break;
239 
240     case SRE_OP_LITERAL:
241         /* repeated literal */
242         chr = pattern[1];
243         TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
244         c = (SRE_CHAR) chr;
245 #if SIZEOF_SRE_CHAR < 4
246         if ((SRE_CODE) c != chr)
247             ; /* literal can't match: doesn't fit in char width */
248         else
249 #endif
250         while (ptr < end && *ptr == c)
251             ptr++;
252         break;
253 
254     case SRE_OP_LITERAL_IGNORE:
255         /* repeated literal */
256         chr = pattern[1];
257         TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
258         while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
259             ptr++;
260         break;
261 
262     case SRE_OP_LITERAL_UNI_IGNORE:
263         /* repeated literal */
264         chr = pattern[1];
265         TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
266         while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
267             ptr++;
268         break;
269 
270     case SRE_OP_LITERAL_LOC_IGNORE:
271         /* repeated literal */
272         chr = pattern[1];
273         TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
274         while (ptr < end && char_loc_ignore(chr, *ptr))
275             ptr++;
276         break;
277 
278     case SRE_OP_NOT_LITERAL:
279         /* repeated non-literal */
280         chr = pattern[1];
281         TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
282         c = (SRE_CHAR) chr;
283 #if SIZEOF_SRE_CHAR < 4
284         if ((SRE_CODE) c != chr)
285             ptr = end; /* literal can't match: doesn't fit in char width */
286         else
287 #endif
288         while (ptr < end && *ptr != c)
289             ptr++;
290         break;
291 
292     case SRE_OP_NOT_LITERAL_IGNORE:
293         /* repeated non-literal */
294         chr = pattern[1];
295         TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
296         while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
297             ptr++;
298         break;
299 
300     case SRE_OP_NOT_LITERAL_UNI_IGNORE:
301         /* repeated non-literal */
302         chr = pattern[1];
303         TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
304         while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
305             ptr++;
306         break;
307 
308     case SRE_OP_NOT_LITERAL_LOC_IGNORE:
309         /* repeated non-literal */
310         chr = pattern[1];
311         TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
312         while (ptr < end && !char_loc_ignore(chr, *ptr))
313             ptr++;
314         break;
315 
316     default:
317         /* repeated single character pattern */
318         TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
319         while ((SRE_CHAR*) state->ptr < end) {
320             i = SRE(match)(state, pattern, 0);
321             if (i < 0)
322                 return i;
323             if (!i)
324                 break;
325         }
326         TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
327                (SRE_CHAR*) state->ptr - ptr));
328         return (SRE_CHAR*) state->ptr - ptr;
329     }
330 
331     TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
332            ptr - (SRE_CHAR*) state->ptr));
333     return ptr - (SRE_CHAR*) state->ptr;
334 }
335 
336 #if 0 /* not used in this release */
337 LOCAL(int)
338 SRE(info)(SRE_STATE* state, SRE_CODE* pattern)
339 {
340     /* check if an SRE_OP_INFO block matches at the current position.
341        returns the number of SRE_CODE objects to skip if successful, 0
342        if no match */
343 
344     SRE_CHAR* end = (SRE_CHAR*) state->end;
345     SRE_CHAR* ptr = (SRE_CHAR*) state->ptr;
346     Py_ssize_t i;
347 
348     /* check minimal length */
349     if (pattern[3] && end - ptr < pattern[3])
350         return 0;
351 
352     /* check known prefix */
353     if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
354         /* <length> <skip> <prefix data> <overlap data> */
355         for (i = 0; i < pattern[5]; i++)
356             if ((SRE_CODE) ptr[i] != pattern[7 + i])
357                 return 0;
358         return pattern[0] + 2 * pattern[6];
359     }
360     return pattern[0];
361 }
362 #endif
363 
364 /* The macros below should be used to protect recursive SRE(match)()
365  * calls that *failed* and do *not* return immediately (IOW, those
366  * that will backtrack). Explaining:
367  *
368  * - Recursive SRE(match)() returned true: that's usually a success
369  *   (besides atypical cases like ASSERT_NOT), therefore there's no
370  *   reason to restore lastmark;
371  *
372  * - Recursive SRE(match)() returned false but the current SRE(match)()
373  *   is returning to the caller: If the current SRE(match)() is the
374  *   top function of the recursion, returning false will be a matching
375  *   failure, and it doesn't matter where lastmark is pointing to.
376  *   If it's *not* the top function, it will be a recursive SRE(match)()
377  *   failure by itself, and the calling SRE(match)() will have to deal
378  *   with the failure by the same rules explained here (it will restore
379  *   lastmark by itself if necessary);
380  *
381  * - Recursive SRE(match)() returned false, and will continue the
382  *   outside 'for' loop: must be protected when breaking, since the next
383  *   OP could potentially depend on lastmark;
384  *
385  * - Recursive SRE(match)() returned false, and will be called again
386  *   inside a local for/while loop: must be protected between each
387  *   loop iteration, since the recursive SRE(match)() could do anything,
388  *   and could potentially depend on lastmark.
389  *
390  * For more information, check the discussion at SF patch #712900.
391  */
392 #define LASTMARK_SAVE()     \
393     do { \
394         ctx->lastmark = state->lastmark; \
395         ctx->lastindex = state->lastindex; \
396     } while (0)
397 #define LASTMARK_RESTORE()  \
398     do { \
399         state->lastmark = ctx->lastmark; \
400         state->lastindex = ctx->lastindex; \
401     } while (0)
402 
403 #define RETURN_ERROR(i) do { return i; } while(0)
404 #define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
405 #define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
406 
407 #define RETURN_ON_ERROR(i) \
408     do { if (i < 0) RETURN_ERROR(i); } while (0)
409 #define RETURN_ON_SUCCESS(i) \
410     do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
411 #define RETURN_ON_FAILURE(i) \
412     do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
413 
414 #define DATA_STACK_ALLOC(state, type, ptr) \
415 do { \
416     alloc_pos = state->data_stack_base; \
417     TRACE(("allocating %s in %" PY_FORMAT_SIZE_T "d " \
418            "(%" PY_FORMAT_SIZE_T "d)\n", \
419            Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
420     if (sizeof(type) > state->data_stack_size - alloc_pos) { \
421         int j = data_stack_grow(state, sizeof(type)); \
422         if (j < 0) return j; \
423         if (ctx_pos != -1) \
424             DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
425     } \
426     ptr = (type*)(state->data_stack+alloc_pos); \
427     state->data_stack_base += sizeof(type); \
428 } while (0)
429 
430 #define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
431 do { \
432     TRACE(("looking up %s at %" PY_FORMAT_SIZE_T "d\n", Py_STRINGIFY(type), pos)); \
433     ptr = (type*)(state->data_stack+pos); \
434 } while (0)
435 
436 #define DATA_STACK_PUSH(state, data, size) \
437 do { \
438     TRACE(("copy data in %p to %" PY_FORMAT_SIZE_T "d " \
439            "(%" PY_FORMAT_SIZE_T "d)\n", \
440            data, state->data_stack_base, size)); \
441     if (size > state->data_stack_size - state->data_stack_base) { \
442         int j = data_stack_grow(state, size); \
443         if (j < 0) return j; \
444         if (ctx_pos != -1) \
445             DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
446     } \
447     memcpy(state->data_stack+state->data_stack_base, data, size); \
448     state->data_stack_base += size; \
449 } while (0)
450 
451 #define DATA_STACK_POP(state, data, size, discard) \
452 do { \
453     TRACE(("copy data to %p from %" PY_FORMAT_SIZE_T "d " \
454            "(%" PY_FORMAT_SIZE_T "d)\n", \
455            data, state->data_stack_base-size, size)); \
456     memcpy(data, state->data_stack+state->data_stack_base-size, size); \
457     if (discard) \
458         state->data_stack_base -= size; \
459 } while (0)
460 
461 #define DATA_STACK_POP_DISCARD(state, size) \
462 do { \
463     TRACE(("discard data from %" PY_FORMAT_SIZE_T "d " \
464            "(%" PY_FORMAT_SIZE_T "d)\n", \
465            state->data_stack_base-size, size)); \
466     state->data_stack_base -= size; \
467 } while(0)
468 
469 #define DATA_PUSH(x) \
470     DATA_STACK_PUSH(state, (x), sizeof(*(x)))
471 #define DATA_POP(x) \
472     DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
473 #define DATA_POP_DISCARD(x) \
474     DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
475 #define DATA_ALLOC(t,p) \
476     DATA_STACK_ALLOC(state, t, p)
477 #define DATA_LOOKUP_AT(t,p,pos) \
478     DATA_STACK_LOOKUP_AT(state,t,p,pos)
479 
480 #define MARK_PUSH(lastmark) \
481     do if (lastmark > 0) { \
482         i = lastmark; /* ctx->lastmark may change if reallocated */ \
483         DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
484     } while (0)
485 #define MARK_POP(lastmark) \
486     do if (lastmark > 0) { \
487         DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
488     } while (0)
489 #define MARK_POP_KEEP(lastmark) \
490     do if (lastmark > 0) { \
491         DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
492     } while (0)
493 #define MARK_POP_DISCARD(lastmark) \
494     do if (lastmark > 0) { \
495         DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
496     } while (0)
497 
498 #define JUMP_NONE            0
499 #define JUMP_MAX_UNTIL_1     1
500 #define JUMP_MAX_UNTIL_2     2
501 #define JUMP_MAX_UNTIL_3     3
502 #define JUMP_MIN_UNTIL_1     4
503 #define JUMP_MIN_UNTIL_2     5
504 #define JUMP_MIN_UNTIL_3     6
505 #define JUMP_REPEAT          7
506 #define JUMP_REPEAT_ONE_1    8
507 #define JUMP_REPEAT_ONE_2    9
508 #define JUMP_MIN_REPEAT_ONE  10
509 #define JUMP_BRANCH          11
510 #define JUMP_ASSERT          12
511 #define JUMP_ASSERT_NOT      13
512 
513 #define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \
514     DATA_ALLOC(SRE(match_context), nextctx); \
515     nextctx->last_ctx_pos = ctx_pos; \
516     nextctx->jump = jumpvalue; \
517     nextctx->pattern = nextpattern; \
518     nextctx->toplevel = toplevel_; \
519     ctx_pos = alloc_pos; \
520     ctx = nextctx; \
521     goto entrance; \
522     jumplabel: \
523     while (0) /* gcc doesn't like labels at end of scopes */ \
524 
525 #define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
526     DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel)
527 
528 #define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
529     DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
530 
531 typedef struct {
532     Py_ssize_t last_ctx_pos;
533     Py_ssize_t jump;
534     SRE_CHAR* ptr;
535     SRE_CODE* pattern;
536     Py_ssize_t count;
537     Py_ssize_t lastmark;
538     Py_ssize_t lastindex;
539     union {
540         SRE_CODE chr;
541         SRE_REPEAT* rep;
542     } u;
543     int toplevel;
544 } SRE(match_context);
545 
546 /* check if string matches the given pattern.  returns <0 for
547    error, 0 for failure, and 1 for success */
548 LOCAL(Py_ssize_t)
SRE(match)549 SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int toplevel)
550 {
551     SRE_CHAR* end = (SRE_CHAR *)state->end;
552     Py_ssize_t alloc_pos, ctx_pos = -1;
553     Py_ssize_t i, ret = 0;
554     Py_ssize_t jump;
555     unsigned int sigcount=0;
556 
557     SRE(match_context)* ctx;
558     SRE(match_context)* nextctx;
559 
560     TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
561 
562     DATA_ALLOC(SRE(match_context), ctx);
563     ctx->last_ctx_pos = -1;
564     ctx->jump = JUMP_NONE;
565     ctx->pattern = pattern;
566     ctx->toplevel = toplevel;
567     ctx_pos = alloc_pos;
568 
569 entrance:
570 
571     ctx->ptr = (SRE_CHAR *)state->ptr;
572 
573     if (ctx->pattern[0] == SRE_OP_INFO) {
574         /* optimization info block */
575         /* <INFO> <1=skip> <2=flags> <3=min> ... */
576         if (ctx->pattern[3] && (uintptr_t)(end - ctx->ptr) < ctx->pattern[3]) {
577             TRACE(("reject (got %" PY_FORMAT_SIZE_T "d chars, "
578                    "need %" PY_FORMAT_SIZE_T "d)\n",
579                    end - ctx->ptr, (Py_ssize_t) ctx->pattern[3]));
580             RETURN_FAILURE;
581         }
582         ctx->pattern += ctx->pattern[1] + 1;
583     }
584 
585     for (;;) {
586         ++sigcount;
587         if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
588             RETURN_ERROR(SRE_ERROR_INTERRUPTED);
589 
590         switch (*ctx->pattern++) {
591 
592         case SRE_OP_MARK:
593             /* set mark */
594             /* <MARK> <gid> */
595             TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
596                    ctx->ptr, ctx->pattern[0]));
597             i = ctx->pattern[0];
598             if (i & 1)
599                 state->lastindex = i/2 + 1;
600             if (i > state->lastmark) {
601                 /* state->lastmark is the highest valid index in the
602                    state->mark array.  If it is increased by more than 1,
603                    the intervening marks must be set to NULL to signal
604                    that these marks have not been encountered. */
605                 Py_ssize_t j = state->lastmark + 1;
606                 while (j < i)
607                     state->mark[j++] = NULL;
608                 state->lastmark = i;
609             }
610             state->mark[i] = ctx->ptr;
611             ctx->pattern++;
612             break;
613 
614         case SRE_OP_LITERAL:
615             /* match literal string */
616             /* <LITERAL> <code> */
617             TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
618                    ctx->ptr, *ctx->pattern));
619             if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
620                 RETURN_FAILURE;
621             ctx->pattern++;
622             ctx->ptr++;
623             break;
624 
625         case SRE_OP_NOT_LITERAL:
626             /* match anything that is not literal character */
627             /* <NOT_LITERAL> <code> */
628             TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
629                    ctx->ptr, *ctx->pattern));
630             if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
631                 RETURN_FAILURE;
632             ctx->pattern++;
633             ctx->ptr++;
634             break;
635 
636         case SRE_OP_SUCCESS:
637             /* end of pattern */
638             TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
639             if (ctx->toplevel &&
640                 ((state->match_all && ctx->ptr != state->end) ||
641                  (state->must_advance && ctx->ptr == state->start)))
642             {
643                 RETURN_FAILURE;
644             }
645             state->ptr = ctx->ptr;
646             RETURN_SUCCESS;
647 
648         case SRE_OP_AT:
649             /* match at given position */
650             /* <AT> <code> */
651             TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
652             if (!SRE(at)(state, ctx->ptr, *ctx->pattern))
653                 RETURN_FAILURE;
654             ctx->pattern++;
655             break;
656 
657         case SRE_OP_CATEGORY:
658             /* match at given category */
659             /* <CATEGORY> <code> */
660             TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
661                    ctx->ptr, *ctx->pattern));
662             if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
663                 RETURN_FAILURE;
664             ctx->pattern++;
665             ctx->ptr++;
666             break;
667 
668         case SRE_OP_ANY:
669             /* match anything (except a newline) */
670             /* <ANY> */
671             TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
672             if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
673                 RETURN_FAILURE;
674             ctx->ptr++;
675             break;
676 
677         case SRE_OP_ANY_ALL:
678             /* match anything */
679             /* <ANY_ALL> */
680             TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
681             if (ctx->ptr >= end)
682                 RETURN_FAILURE;
683             ctx->ptr++;
684             break;
685 
686         case SRE_OP_IN:
687             /* match set member (or non_member) */
688             /* <IN> <skip> <set> */
689             TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
690             if (ctx->ptr >= end ||
691                 !SRE(charset)(state, ctx->pattern + 1, *ctx->ptr))
692                 RETURN_FAILURE;
693             ctx->pattern += ctx->pattern[0];
694             ctx->ptr++;
695             break;
696 
697         case SRE_OP_LITERAL_IGNORE:
698             TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
699                    ctx->pattern, ctx->ptr, ctx->pattern[0]));
700             if (ctx->ptr >= end ||
701                 sre_lower_ascii(*ctx->ptr) != *ctx->pattern)
702                 RETURN_FAILURE;
703             ctx->pattern++;
704             ctx->ptr++;
705             break;
706 
707         case SRE_OP_LITERAL_UNI_IGNORE:
708             TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
709                    ctx->pattern, ctx->ptr, ctx->pattern[0]));
710             if (ctx->ptr >= end ||
711                 sre_lower_unicode(*ctx->ptr) != *ctx->pattern)
712                 RETURN_FAILURE;
713             ctx->pattern++;
714             ctx->ptr++;
715             break;
716 
717         case SRE_OP_LITERAL_LOC_IGNORE:
718             TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
719                    ctx->pattern, ctx->ptr, ctx->pattern[0]));
720             if (ctx->ptr >= end
721                 || !char_loc_ignore(*ctx->pattern, *ctx->ptr))
722                 RETURN_FAILURE;
723             ctx->pattern++;
724             ctx->ptr++;
725             break;
726 
727         case SRE_OP_NOT_LITERAL_IGNORE:
728             TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
729                    ctx->pattern, ctx->ptr, *ctx->pattern));
730             if (ctx->ptr >= end ||
731                 sre_lower_ascii(*ctx->ptr) == *ctx->pattern)
732                 RETURN_FAILURE;
733             ctx->pattern++;
734             ctx->ptr++;
735             break;
736 
737         case SRE_OP_NOT_LITERAL_UNI_IGNORE:
738             TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
739                    ctx->pattern, ctx->ptr, *ctx->pattern));
740             if (ctx->ptr >= end ||
741                 sre_lower_unicode(*ctx->ptr) == *ctx->pattern)
742                 RETURN_FAILURE;
743             ctx->pattern++;
744             ctx->ptr++;
745             break;
746 
747         case SRE_OP_NOT_LITERAL_LOC_IGNORE:
748             TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
749                    ctx->pattern, ctx->ptr, *ctx->pattern));
750             if (ctx->ptr >= end
751                 || char_loc_ignore(*ctx->pattern, *ctx->ptr))
752                 RETURN_FAILURE;
753             ctx->pattern++;
754             ctx->ptr++;
755             break;
756 
757         case SRE_OP_IN_IGNORE:
758             TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
759             if (ctx->ptr >= end
760                 || !SRE(charset)(state, ctx->pattern+1,
761                                  (SRE_CODE)sre_lower_ascii(*ctx->ptr)))
762                 RETURN_FAILURE;
763             ctx->pattern += ctx->pattern[0];
764             ctx->ptr++;
765             break;
766 
767         case SRE_OP_IN_UNI_IGNORE:
768             TRACE(("|%p|%p|IN_UNI_IGNORE\n", ctx->pattern, ctx->ptr));
769             if (ctx->ptr >= end
770                 || !SRE(charset)(state, ctx->pattern+1,
771                                  (SRE_CODE)sre_lower_unicode(*ctx->ptr)))
772                 RETURN_FAILURE;
773             ctx->pattern += ctx->pattern[0];
774             ctx->ptr++;
775             break;
776 
777         case SRE_OP_IN_LOC_IGNORE:
778             TRACE(("|%p|%p|IN_LOC_IGNORE\n", ctx->pattern, ctx->ptr));
779             if (ctx->ptr >= end
780                 || !SRE(charset_loc_ignore)(state, ctx->pattern+1, *ctx->ptr))
781                 RETURN_FAILURE;
782             ctx->pattern += ctx->pattern[0];
783             ctx->ptr++;
784             break;
785 
786         case SRE_OP_JUMP:
787         case SRE_OP_INFO:
788             /* jump forward */
789             /* <JUMP> <offset> */
790             TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
791                    ctx->ptr, ctx->pattern[0]));
792             ctx->pattern += ctx->pattern[0];
793             break;
794 
795         case SRE_OP_BRANCH:
796             /* alternation */
797             /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
798             TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
799             LASTMARK_SAVE();
800             ctx->u.rep = state->repeat;
801             if (ctx->u.rep)
802                 MARK_PUSH(ctx->lastmark);
803             for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
804                 if (ctx->pattern[1] == SRE_OP_LITERAL &&
805                     (ctx->ptr >= end ||
806                      (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
807                     continue;
808                 if (ctx->pattern[1] == SRE_OP_IN &&
809                     (ctx->ptr >= end ||
810                      !SRE(charset)(state, ctx->pattern + 3,
811                                    (SRE_CODE) *ctx->ptr)))
812                     continue;
813                 state->ptr = ctx->ptr;
814                 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
815                 if (ret) {
816                     if (ctx->u.rep)
817                         MARK_POP_DISCARD(ctx->lastmark);
818                     RETURN_ON_ERROR(ret);
819                     RETURN_SUCCESS;
820                 }
821                 if (ctx->u.rep)
822                     MARK_POP_KEEP(ctx->lastmark);
823                 LASTMARK_RESTORE();
824             }
825             if (ctx->u.rep)
826                 MARK_POP_DISCARD(ctx->lastmark);
827             RETURN_FAILURE;
828 
829         case SRE_OP_REPEAT_ONE:
830             /* match repeated sequence (maximizing regexp) */
831 
832             /* this operator only works if the repeated item is
833                exactly one character wide, and we're not already
834                collecting backtracking points.  for other cases,
835                use the MAX_REPEAT operator */
836 
837             /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
838 
839             TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
840                    ctx->pattern[1], ctx->pattern[2]));
841 
842             if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
843                 RETURN_FAILURE; /* cannot match */
844 
845             state->ptr = ctx->ptr;
846 
847             ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[2]);
848             RETURN_ON_ERROR(ret);
849             DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
850             ctx->count = ret;
851             ctx->ptr += ctx->count;
852 
853             /* when we arrive here, count contains the number of
854                matches, and ctx->ptr points to the tail of the target
855                string.  check if the rest of the pattern matches,
856                and backtrack if not. */
857 
858             if (ctx->count < (Py_ssize_t) ctx->pattern[1])
859                 RETURN_FAILURE;
860 
861             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
862                 ctx->ptr == state->end &&
863                 !(ctx->toplevel && state->must_advance && ctx->ptr == state->start))
864             {
865                 /* tail is empty.  we're finished */
866                 state->ptr = ctx->ptr;
867                 RETURN_SUCCESS;
868             }
869 
870             LASTMARK_SAVE();
871 
872             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
873                 /* tail starts with a literal. skip positions where
874                    the rest of the pattern cannot possibly match */
875                 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
876                 for (;;) {
877                     while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
878                            (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
879                         ctx->ptr--;
880                         ctx->count--;
881                     }
882                     if (ctx->count < (Py_ssize_t) ctx->pattern[1])
883                         break;
884                     state->ptr = ctx->ptr;
885                     DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
886                             ctx->pattern+ctx->pattern[0]);
887                     if (ret) {
888                         RETURN_ON_ERROR(ret);
889                         RETURN_SUCCESS;
890                     }
891 
892                     LASTMARK_RESTORE();
893 
894                     ctx->ptr--;
895                     ctx->count--;
896                 }
897 
898             } else {
899                 /* general case */
900                 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
901                     state->ptr = ctx->ptr;
902                     DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
903                             ctx->pattern+ctx->pattern[0]);
904                     if (ret) {
905                         RETURN_ON_ERROR(ret);
906                         RETURN_SUCCESS;
907                     }
908                     ctx->ptr--;
909                     ctx->count--;
910                     LASTMARK_RESTORE();
911                 }
912             }
913             RETURN_FAILURE;
914 
915         case SRE_OP_MIN_REPEAT_ONE:
916             /* match repeated sequence (minimizing regexp) */
917 
918             /* this operator only works if the repeated item is
919                exactly one character wide, and we're not already
920                collecting backtracking points.  for other cases,
921                use the MIN_REPEAT operator */
922 
923             /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
924 
925             TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
926                    ctx->pattern[1], ctx->pattern[2]));
927 
928             if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
929                 RETURN_FAILURE; /* cannot match */
930 
931             state->ptr = ctx->ptr;
932 
933             if (ctx->pattern[1] == 0)
934                 ctx->count = 0;
935             else {
936                 /* count using pattern min as the maximum */
937                 ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[1]);
938                 RETURN_ON_ERROR(ret);
939                 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
940                 if (ret < (Py_ssize_t) ctx->pattern[1])
941                     /* didn't match minimum number of times */
942                     RETURN_FAILURE;
943                 /* advance past minimum matches of repeat */
944                 ctx->count = ret;
945                 ctx->ptr += ctx->count;
946             }
947 
948             if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
949                 !(ctx->toplevel &&
950                   ((state->match_all && ctx->ptr != state->end) ||
951                    (state->must_advance && ctx->ptr == state->start))))
952             {
953                 /* tail is empty.  we're finished */
954                 state->ptr = ctx->ptr;
955                 RETURN_SUCCESS;
956 
957             } else {
958                 /* general case */
959                 LASTMARK_SAVE();
960                 while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
961                        || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
962                     state->ptr = ctx->ptr;
963                     DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
964                             ctx->pattern+ctx->pattern[0]);
965                     if (ret) {
966                         RETURN_ON_ERROR(ret);
967                         RETURN_SUCCESS;
968                     }
969                     state->ptr = ctx->ptr;
970                     ret = SRE(count)(state, ctx->pattern+3, 1);
971                     RETURN_ON_ERROR(ret);
972                     DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
973                     if (ret == 0)
974                         break;
975                     assert(ret == 1);
976                     ctx->ptr++;
977                     ctx->count++;
978                     LASTMARK_RESTORE();
979                 }
980             }
981             RETURN_FAILURE;
982 
983         case SRE_OP_REPEAT:
984             /* create repeat context.  all the hard work is done
985                by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
986             /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
987             TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
988                    ctx->pattern[1], ctx->pattern[2]));
989 
990             /* install new repeat context */
991             ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
992             if (!ctx->u.rep) {
993                 PyErr_NoMemory();
994                 RETURN_FAILURE;
995             }
996             ctx->u.rep->count = -1;
997             ctx->u.rep->pattern = ctx->pattern;
998             ctx->u.rep->prev = state->repeat;
999             ctx->u.rep->last_ptr = NULL;
1000             state->repeat = ctx->u.rep;
1001 
1002             state->ptr = ctx->ptr;
1003             DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
1004             state->repeat = ctx->u.rep->prev;
1005             PyObject_FREE(ctx->u.rep);
1006 
1007             if (ret) {
1008                 RETURN_ON_ERROR(ret);
1009                 RETURN_SUCCESS;
1010             }
1011             RETURN_FAILURE;
1012 
1013         case SRE_OP_MAX_UNTIL:
1014             /* maximizing repeat */
1015             /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1016 
1017             /* FIXME: we probably need to deal with zero-width
1018                matches in here... */
1019 
1020             ctx->u.rep = state->repeat;
1021             if (!ctx->u.rep)
1022                 RETURN_ERROR(SRE_ERROR_STATE);
1023 
1024             state->ptr = ctx->ptr;
1025 
1026             ctx->count = ctx->u.rep->count+1;
1027 
1028             TRACE(("|%p|%p|MAX_UNTIL %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
1029                    ctx->ptr, ctx->count));
1030 
1031             if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1032                 /* not enough matches */
1033                 ctx->u.rep->count = ctx->count;
1034                 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1035                         ctx->u.rep->pattern+3);
1036                 if (ret) {
1037                     RETURN_ON_ERROR(ret);
1038                     RETURN_SUCCESS;
1039                 }
1040                 ctx->u.rep->count = ctx->count-1;
1041                 state->ptr = ctx->ptr;
1042                 RETURN_FAILURE;
1043             }
1044 
1045             if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1046                 ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1047                 state->ptr != ctx->u.rep->last_ptr) {
1048                 /* we may have enough matches, but if we can
1049                    match another item, do so */
1050                 ctx->u.rep->count = ctx->count;
1051                 LASTMARK_SAVE();
1052                 MARK_PUSH(ctx->lastmark);
1053                 /* zero-width match protection */
1054                 DATA_PUSH(&ctx->u.rep->last_ptr);
1055                 ctx->u.rep->last_ptr = state->ptr;
1056                 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1057                         ctx->u.rep->pattern+3);
1058                 DATA_POP(&ctx->u.rep->last_ptr);
1059                 if (ret) {
1060                     MARK_POP_DISCARD(ctx->lastmark);
1061                     RETURN_ON_ERROR(ret);
1062                     RETURN_SUCCESS;
1063                 }
1064                 MARK_POP(ctx->lastmark);
1065                 LASTMARK_RESTORE();
1066                 ctx->u.rep->count = ctx->count-1;
1067                 state->ptr = ctx->ptr;
1068             }
1069 
1070             /* cannot match more repeated items here.  make sure the
1071                tail matches */
1072             state->repeat = ctx->u.rep->prev;
1073             DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
1074             RETURN_ON_SUCCESS(ret);
1075             state->repeat = ctx->u.rep;
1076             state->ptr = ctx->ptr;
1077             RETURN_FAILURE;
1078 
1079         case SRE_OP_MIN_UNTIL:
1080             /* minimizing repeat */
1081             /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1082 
1083             ctx->u.rep = state->repeat;
1084             if (!ctx->u.rep)
1085                 RETURN_ERROR(SRE_ERROR_STATE);
1086 
1087             state->ptr = ctx->ptr;
1088 
1089             ctx->count = ctx->u.rep->count+1;
1090 
1091             TRACE(("|%p|%p|MIN_UNTIL %" PY_FORMAT_SIZE_T "d %p\n", ctx->pattern,
1092                    ctx->ptr, ctx->count, ctx->u.rep->pattern));
1093 
1094             if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1095                 /* not enough matches */
1096                 ctx->u.rep->count = ctx->count;
1097                 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1098                         ctx->u.rep->pattern+3);
1099                 if (ret) {
1100                     RETURN_ON_ERROR(ret);
1101                     RETURN_SUCCESS;
1102                 }
1103                 ctx->u.rep->count = ctx->count-1;
1104                 state->ptr = ctx->ptr;
1105                 RETURN_FAILURE;
1106             }
1107 
1108             LASTMARK_SAVE();
1109 
1110             /* see if the tail matches */
1111             state->repeat = ctx->u.rep->prev;
1112             DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
1113             if (ret) {
1114                 RETURN_ON_ERROR(ret);
1115                 RETURN_SUCCESS;
1116             }
1117 
1118             state->repeat = ctx->u.rep;
1119             state->ptr = ctx->ptr;
1120 
1121             LASTMARK_RESTORE();
1122 
1123             if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1124                 && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1125                 state->ptr == ctx->u.rep->last_ptr)
1126                 RETURN_FAILURE;
1127 
1128             ctx->u.rep->count = ctx->count;
1129             /* zero-width match protection */
1130             DATA_PUSH(&ctx->u.rep->last_ptr);
1131             ctx->u.rep->last_ptr = state->ptr;
1132             DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1133                     ctx->u.rep->pattern+3);
1134             DATA_POP(&ctx->u.rep->last_ptr);
1135             if (ret) {
1136                 RETURN_ON_ERROR(ret);
1137                 RETURN_SUCCESS;
1138             }
1139             ctx->u.rep->count = ctx->count-1;
1140             state->ptr = ctx->ptr;
1141             RETURN_FAILURE;
1142 
1143         case SRE_OP_GROUPREF:
1144             /* match backreference */
1145             TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
1146                    ctx->ptr, ctx->pattern[0]));
1147             i = ctx->pattern[0];
1148             {
1149                 Py_ssize_t groupref = i+i;
1150                 if (groupref >= state->lastmark) {
1151                     RETURN_FAILURE;
1152                 } else {
1153                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1154                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1155                     if (!p || !e || e < p)
1156                         RETURN_FAILURE;
1157                     while (p < e) {
1158                         if (ctx->ptr >= end || *ctx->ptr != *p)
1159                             RETURN_FAILURE;
1160                         p++;
1161                         ctx->ptr++;
1162                     }
1163                 }
1164             }
1165             ctx->pattern++;
1166             break;
1167 
1168         case SRE_OP_GROUPREF_IGNORE:
1169             /* match backreference */
1170             TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
1171                    ctx->ptr, ctx->pattern[0]));
1172             i = ctx->pattern[0];
1173             {
1174                 Py_ssize_t groupref = i+i;
1175                 if (groupref >= state->lastmark) {
1176                     RETURN_FAILURE;
1177                 } else {
1178                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1179                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1180                     if (!p || !e || e < p)
1181                         RETURN_FAILURE;
1182                     while (p < e) {
1183                         if (ctx->ptr >= end ||
1184                             sre_lower_ascii(*ctx->ptr) != sre_lower_ascii(*p))
1185                             RETURN_FAILURE;
1186                         p++;
1187                         ctx->ptr++;
1188                     }
1189                 }
1190             }
1191             ctx->pattern++;
1192             break;
1193 
1194         case SRE_OP_GROUPREF_UNI_IGNORE:
1195             /* match backreference */
1196             TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", ctx->pattern,
1197                    ctx->ptr, ctx->pattern[0]));
1198             i = ctx->pattern[0];
1199             {
1200                 Py_ssize_t groupref = i+i;
1201                 if (groupref >= state->lastmark) {
1202                     RETURN_FAILURE;
1203                 } else {
1204                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1205                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1206                     if (!p || !e || e < p)
1207                         RETURN_FAILURE;
1208                     while (p < e) {
1209                         if (ctx->ptr >= end ||
1210                             sre_lower_unicode(*ctx->ptr) != sre_lower_unicode(*p))
1211                             RETURN_FAILURE;
1212                         p++;
1213                         ctx->ptr++;
1214                     }
1215                 }
1216             }
1217             ctx->pattern++;
1218             break;
1219 
1220         case SRE_OP_GROUPREF_LOC_IGNORE:
1221             /* match backreference */
1222             TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", ctx->pattern,
1223                    ctx->ptr, ctx->pattern[0]));
1224             i = ctx->pattern[0];
1225             {
1226                 Py_ssize_t groupref = i+i;
1227                 if (groupref >= state->lastmark) {
1228                     RETURN_FAILURE;
1229                 } else {
1230                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1231                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1232                     if (!p || !e || e < p)
1233                         RETURN_FAILURE;
1234                     while (p < e) {
1235                         if (ctx->ptr >= end ||
1236                             sre_lower_locale(*ctx->ptr) != sre_lower_locale(*p))
1237                             RETURN_FAILURE;
1238                         p++;
1239                         ctx->ptr++;
1240                     }
1241                 }
1242             }
1243             ctx->pattern++;
1244             break;
1245 
1246         case SRE_OP_GROUPREF_EXISTS:
1247             TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
1248                    ctx->ptr, ctx->pattern[0]));
1249             /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1250             i = ctx->pattern[0];
1251             {
1252                 Py_ssize_t groupref = i+i;
1253                 if (groupref >= state->lastmark) {
1254                     ctx->pattern += ctx->pattern[1];
1255                     break;
1256                 } else {
1257                     SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1258                     SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1259                     if (!p || !e || e < p) {
1260                         ctx->pattern += ctx->pattern[1];
1261                         break;
1262                     }
1263                 }
1264             }
1265             ctx->pattern += 2;
1266             break;
1267 
1268         case SRE_OP_ASSERT:
1269             /* assert subpattern */
1270             /* <ASSERT> <skip> <back> <pattern> */
1271             TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
1272                    ctx->ptr, ctx->pattern[1]));
1273             if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1])
1274                 RETURN_FAILURE;
1275             state->ptr = ctx->ptr - ctx->pattern[1];
1276             DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2);
1277             RETURN_ON_FAILURE(ret);
1278             ctx->pattern += ctx->pattern[0];
1279             break;
1280 
1281         case SRE_OP_ASSERT_NOT:
1282             /* assert not subpattern */
1283             /* <ASSERT_NOT> <skip> <back> <pattern> */
1284             TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
1285                    ctx->ptr, ctx->pattern[1]));
1286             if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) {
1287                 state->ptr = ctx->ptr - ctx->pattern[1];
1288                 DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
1289                 if (ret) {
1290                     RETURN_ON_ERROR(ret);
1291                     RETURN_FAILURE;
1292                 }
1293             }
1294             ctx->pattern += ctx->pattern[0];
1295             break;
1296 
1297         case SRE_OP_FAILURE:
1298             /* immediate failure */
1299             TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
1300             RETURN_FAILURE;
1301 
1302         default:
1303             TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
1304                    ctx->pattern[-1]));
1305             RETURN_ERROR(SRE_ERROR_ILLEGAL);
1306         }
1307     }
1308 
1309 exit:
1310     ctx_pos = ctx->last_ctx_pos;
1311     jump = ctx->jump;
1312     DATA_POP_DISCARD(ctx);
1313     if (ctx_pos == -1)
1314         return ret;
1315     DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1316 
1317     switch (jump) {
1318         case JUMP_MAX_UNTIL_2:
1319             TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
1320             goto jump_max_until_2;
1321         case JUMP_MAX_UNTIL_3:
1322             TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
1323             goto jump_max_until_3;
1324         case JUMP_MIN_UNTIL_2:
1325             TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
1326             goto jump_min_until_2;
1327         case JUMP_MIN_UNTIL_3:
1328             TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
1329             goto jump_min_until_3;
1330         case JUMP_BRANCH:
1331             TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
1332             goto jump_branch;
1333         case JUMP_MAX_UNTIL_1:
1334             TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
1335             goto jump_max_until_1;
1336         case JUMP_MIN_UNTIL_1:
1337             TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
1338             goto jump_min_until_1;
1339         case JUMP_REPEAT:
1340             TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
1341             goto jump_repeat;
1342         case JUMP_REPEAT_ONE_1:
1343             TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
1344             goto jump_repeat_one_1;
1345         case JUMP_REPEAT_ONE_2:
1346             TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
1347             goto jump_repeat_one_2;
1348         case JUMP_MIN_REPEAT_ONE:
1349             TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
1350             goto jump_min_repeat_one;
1351         case JUMP_ASSERT:
1352             TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
1353             goto jump_assert;
1354         case JUMP_ASSERT_NOT:
1355             TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
1356             goto jump_assert_not;
1357         case JUMP_NONE:
1358             TRACE(("|%p|%p|RETURN %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
1359                    ctx->ptr, ret));
1360             break;
1361     }
1362 
1363     return ret; /* should never get here */
1364 }
1365 
1366 /* need to reset capturing groups between two SRE(match) callings in loops */
1367 #define RESET_CAPTURE_GROUP() \
1368     do { state->lastmark = state->lastindex = -1; } while (0)
1369 
1370 LOCAL(Py_ssize_t)
SRE(search)1371 SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1372 {
1373     SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1374     SRE_CHAR* end = (SRE_CHAR *)state->end;
1375     Py_ssize_t status = 0;
1376     Py_ssize_t prefix_len = 0;
1377     Py_ssize_t prefix_skip = 0;
1378     SRE_CODE* prefix = NULL;
1379     SRE_CODE* charset = NULL;
1380     SRE_CODE* overlap = NULL;
1381     int flags = 0;
1382 
1383     if (ptr > end)
1384         return 0;
1385 
1386     if (pattern[0] == SRE_OP_INFO) {
1387         /* optimization info block */
1388         /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1389 
1390         flags = pattern[2];
1391 
1392         if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) {
1393             TRACE(("reject (got %u chars, need %u)\n",
1394                    (unsigned int)(end - ptr), pattern[3]));
1395             return 0;
1396         }
1397         if (pattern[3] > 1) {
1398             /* adjust end point (but make sure we leave at least one
1399                character in there, so literal search will work) */
1400             end -= pattern[3] - 1;
1401             if (end <= ptr)
1402                 end = ptr;
1403         }
1404 
1405         if (flags & SRE_INFO_PREFIX) {
1406             /* pattern starts with a known prefix */
1407             /* <length> <skip> <prefix data> <overlap data> */
1408             prefix_len = pattern[5];
1409             prefix_skip = pattern[6];
1410             prefix = pattern + 7;
1411             overlap = prefix + prefix_len - 1;
1412         } else if (flags & SRE_INFO_CHARSET)
1413             /* pattern starts with a character from a known set */
1414             /* <charset> */
1415             charset = pattern + 5;
1416 
1417         pattern += 1 + pattern[1];
1418     }
1419 
1420     TRACE(("prefix = %p %" PY_FORMAT_SIZE_T "d %" PY_FORMAT_SIZE_T "d\n",
1421            prefix, prefix_len, prefix_skip));
1422     TRACE(("charset = %p\n", charset));
1423 
1424     if (prefix_len == 1) {
1425         /* pattern starts with a literal character */
1426         SRE_CHAR c = (SRE_CHAR) prefix[0];
1427 #if SIZEOF_SRE_CHAR < 4
1428         if ((SRE_CODE) c != prefix[0])
1429             return 0; /* literal can't match: doesn't fit in char width */
1430 #endif
1431         end = (SRE_CHAR *)state->end;
1432         state->must_advance = 0;
1433         while (ptr < end) {
1434             while (*ptr != c) {
1435                 if (++ptr >= end)
1436                     return 0;
1437             }
1438             TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1439             state->start = ptr;
1440             state->ptr = ptr + prefix_skip;
1441             if (flags & SRE_INFO_LITERAL)
1442                 return 1; /* we got all of it */
1443             status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1444             if (status != 0)
1445                 return status;
1446             ++ptr;
1447             RESET_CAPTURE_GROUP();
1448         }
1449         return 0;
1450     }
1451 
1452     if (prefix_len > 1) {
1453         /* pattern starts with a known prefix.  use the overlap
1454            table to skip forward as fast as we possibly can */
1455         Py_ssize_t i = 0;
1456 
1457         end = (SRE_CHAR *)state->end;
1458         if (prefix_len > end - ptr)
1459             return 0;
1460 #if SIZEOF_SRE_CHAR < 4
1461         for (i = 0; i < prefix_len; i++)
1462             if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1463                 return 0; /* literal can't match: doesn't fit in char width */
1464 #endif
1465         while (ptr < end) {
1466             SRE_CHAR c = (SRE_CHAR) prefix[0];
1467             while (*ptr++ != c) {
1468                 if (ptr >= end)
1469                     return 0;
1470             }
1471             if (ptr >= end)
1472                 return 0;
1473 
1474             i = 1;
1475             state->must_advance = 0;
1476             do {
1477                 if (*ptr == (SRE_CHAR) prefix[i]) {
1478                     if (++i != prefix_len) {
1479                         if (++ptr >= end)
1480                             return 0;
1481                         continue;
1482                     }
1483                     /* found a potential match */
1484                     TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1485                     state->start = ptr - (prefix_len - 1);
1486                     state->ptr = ptr - (prefix_len - prefix_skip - 1);
1487                     if (flags & SRE_INFO_LITERAL)
1488                         return 1; /* we got all of it */
1489                     status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1490                     if (status != 0)
1491                         return status;
1492                     /* close but no cigar -- try again */
1493                     if (++ptr >= end)
1494                         return 0;
1495                     RESET_CAPTURE_GROUP();
1496                 }
1497                 i = overlap[i];
1498             } while (i != 0);
1499         }
1500         return 0;
1501     }
1502 
1503     if (charset) {
1504         /* pattern starts with a character from a known set */
1505         end = (SRE_CHAR *)state->end;
1506         state->must_advance = 0;
1507         for (;;) {
1508             while (ptr < end && !SRE(charset)(state, charset, *ptr))
1509                 ptr++;
1510             if (ptr >= end)
1511                 return 0;
1512             TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1513             state->start = ptr;
1514             state->ptr = ptr;
1515             status = SRE(match)(state, pattern, 0);
1516             if (status != 0)
1517                 break;
1518             ptr++;
1519             RESET_CAPTURE_GROUP();
1520         }
1521     } else {
1522         /* general case */
1523         assert(ptr <= end);
1524         TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1525         state->start = state->ptr = ptr;
1526         status = SRE(match)(state, pattern, 1);
1527         state->must_advance = 0;
1528         while (status == 0 && ptr < end) {
1529             ptr++;
1530             RESET_CAPTURE_GROUP();
1531             TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1532             state->start = state->ptr = ptr;
1533             status = SRE(match)(state, pattern, 0);
1534         }
1535     }
1536 
1537     return status;
1538 }
1539 
1540 #undef SRE_CHAR
1541 #undef SIZEOF_SRE_CHAR
1542 #undef SRE
1543 
1544 /* vim:ts=4:sw=4:et
1545 */
1546