1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9            Copyright (c) 1997-2021 University of Cambridge
10 
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14 
15     * Redistributions of source code must retain the above copyright notice,
16       this list of conditions and the following disclaimer.
17 
18     * Redistributions in binary form must reproduce the above copyright
19       notice, this list of conditions and the following disclaimer in the
20       documentation and/or other materials provided with the distribution.
21 
22     * Neither the name of the University of Cambridge nor the names of its
23       contributors may be used to endorse or promote products derived from
24       this software without specific prior written permission.
25 
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39 
40 /* This module contains pcre_exec(), the externally visible function that does
41 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
42 possible. There are also some static supporting functions. */
43 
44 #ifdef HAVE_CONFIG_H
45 #include "config.h"
46 #endif
47 
48 #define NLBLOCK md             /* Block containing newline information */
49 #define PSSTART start_subject  /* Field containing processed string start */
50 #define PSEND   end_subject    /* Field containing processed string end */
51 
52 #include "pcre_internal.h"
53 
54 /* Undefine some potentially clashing cpp symbols */
55 
56 #undef min
57 #undef max
58 
59 /* The md->capture_last field uses the lower 16 bits for the last captured
60 substring (which can never be greater than 65535) and a bit in the top half
61 to mean "capture vector overflowed". This odd way of doing things was
62 implemented when it was realized that preserving and restoring the overflow bit
63 whenever the last capture number was saved/restored made for a neater
64 interface, and doing it this way saved on (a) another variable, which would
65 have increased the stack frame size (a big NO-NO in PCRE) and (b) another
66 separate set of save/restore instructions. The following defines are used in
67 implementing this. */
68 
69 #define CAPLMASK    0x0000ffff    /* The bits used for last_capture */
70 #define OVFLMASK    0xffff0000    /* The bits used for the overflow flag */
71 #define OVFLBIT     0x00010000    /* The bit that is set for overflow */
72 
73 /* Values for setting in md->match_function_type to indicate two special types
74 of call to match(). We do it this way to save on using another stack variable,
75 as stack usage is to be discouraged. */
76 
77 #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
78 #define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
79 
80 /* Non-error returns from the match() function. Error returns are externally
81 defined PCRE_ERROR_xxx codes, which are all negative. */
82 
83 #define MATCH_MATCH        1
84 #define MATCH_NOMATCH      0
85 
86 /* Special internal returns from the match() function. Make them sufficiently
87 negative to avoid the external error codes. */
88 
89 #define MATCH_ACCEPT       (-999)
90 #define MATCH_KETRPOS      (-998)
91 #define MATCH_ONCE         (-997)
92 /* The next 5 must be kept together and in sequence so that a test that checks
93 for any one of them can use a range. */
94 #define MATCH_COMMIT       (-996)
95 #define MATCH_PRUNE        (-995)
96 #define MATCH_SKIP         (-994)
97 #define MATCH_SKIP_ARG     (-993)
98 #define MATCH_THEN         (-992)
99 #define MATCH_BACKTRACK_MAX MATCH_THEN
100 #define MATCH_BACKTRACK_MIN MATCH_COMMIT
101 
102 /* Maximum number of ints of offset to save on the stack for recursive calls.
103 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
104 because the offset vector is always a multiple of 3 long. */
105 
106 #define REC_STACK_SAVE_MAX 30
107 
108 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
109 
110 static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
111 static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };
112 
113 #ifdef PCRE_DEBUG
114 /*************************************************
115 *        Debugging function to print chars       *
116 *************************************************/
117 
118 /* Print a sequence of chars in printable format, stopping at the end of the
119 subject if the requested.
120 
121 Arguments:
122   p           points to characters
123   length      number to print
124   is_subject  TRUE if printing from within md->start_subject
125   md          pointer to matching data block, if is_subject is TRUE
126 
127 Returns:     nothing
128 */
129 
130 static void
pchars(const pcre_uchar * p,int length,BOOL is_subject,match_data * md)131 pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
132 {
133 pcre_uint32 c;
134 BOOL utf = md->utf;
135 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
136 while (length-- > 0)
137   if (isprint(c = UCHAR21INCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
138 }
139 #endif
140 
141 
142 
143 /*************************************************
144 *          Match a back-reference                *
145 *************************************************/
146 
147 /* Normally, if a back reference hasn't been set, the length that is passed is
148 negative, so the match always fails. However, in JavaScript compatibility mode,
149 the length passed is zero. Note that in caseless UTF-8 mode, the number of
150 subject bytes matched may be different to the number of reference bytes.
151 
152 Arguments:
153   offset      index into the offset vector
154   eptr        pointer into the subject
155   length      length of reference to be matched (number of bytes)
156   md          points to match data block
157   caseless    TRUE if caseless
158 
159 Returns:      >= 0 the number of subject bytes matched
160               -1 no match
161               -2 partial match; always given if at end subject
162 */
163 
164 static int
match_ref(int offset,register PCRE_PUCHAR eptr,int length,match_data * md,BOOL caseless)165 match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
166   BOOL caseless)
167 {
168 PCRE_PUCHAR eptr_start = eptr;
169 register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
170 #if defined SUPPORT_UTF && defined SUPPORT_UCP
171 BOOL utf = md->utf;
172 #endif
173 
174 #ifdef PCRE_DEBUG
175 if (eptr >= md->end_subject)
176   printf("matching subject <null>");
177 else
178   {
179   printf("matching subject ");
180   pchars(eptr, length, TRUE, md);
181   }
182 printf(" against backref ");
183 pchars(p, length, FALSE, md);
184 printf("\n");
185 #endif
186 
187 /* Always fail if reference not set (and not JavaScript compatible - in that
188 case the length is passed as zero). */
189 
190 if (length < 0) return -1;
191 
192 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
193 properly if Unicode properties are supported. Otherwise, we can check only
194 ASCII characters. */
195 
196 if (caseless)
197   {
198 #if defined SUPPORT_UTF && defined SUPPORT_UCP
199   if (utf)
200     {
201     /* Match characters up to the end of the reference. NOTE: the number of
202     data units matched may differ, because in UTF-8 there are some characters
203     whose upper and lower case versions code have different numbers of bytes.
204     For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
205     (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
206     sequence of two of the latter. It is important, therefore, to check the
207     length along the reference, not along the subject (earlier code did this
208     wrong). */
209 
210     PCRE_PUCHAR endptr = p + length;
211     while (p < endptr)
212       {
213       pcre_uint32 c, d;
214       const ucd_record *ur;
215       if (eptr >= md->end_subject) return -2;   /* Partial match */
216       GETCHARINC(c, eptr);
217       GETCHARINC(d, p);
218       ur = GET_UCD(d);
219       if (c != d && c != d + ur->other_case)
220         {
221         const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
222         for (;;)
223           {
224           if (c < *pp) return -1;
225           if (c == *pp++) break;
226           }
227         }
228       }
229     }
230   else
231 #endif
232 
233   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
234   is no UCP support. */
235     {
236     while (length-- > 0)
237       {
238       pcre_uint32 cc, cp;
239       if (eptr >= md->end_subject) return -2;   /* Partial match */
240       cc = UCHAR21TEST(eptr);
241       cp = UCHAR21TEST(p);
242       if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
243       p++;
244       eptr++;
245       }
246     }
247   }
248 
249 /* In the caseful case, we can just compare the bytes, whether or not we
250 are in UTF-8 mode. */
251 
252 else
253   {
254   while (length-- > 0)
255     {
256     if (eptr >= md->end_subject) return -2;   /* Partial match */
257     if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;
258     }
259   }
260 
261 return (int)(eptr - eptr_start);
262 }
263 
264 
265 
266 /***************************************************************************
267 ****************************************************************************
268                    RECURSION IN THE match() FUNCTION
269 
270 The match() function is highly recursive, though not every recursive call
271 increases the recursive depth. Nevertheless, some regular expressions can cause
272 it to recurse to a great depth. I was writing for Unix, so I just let it call
273 itself recursively. This uses the stack for saving everything that has to be
274 saved for a recursive call. On Unix, the stack can be large, and this works
275 fine.
276 
277 It turns out that on some non-Unix-like systems there are problems with
278 programs that use a lot of stack. (This despite the fact that every last chip
279 has oodles of memory these days, and techniques for extending the stack have
280 been known for decades.) So....
281 
282 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
283 calls by keeping local variables that need to be preserved in blocks of memory
284 obtained from malloc() instead instead of on the stack. Macros are used to
285 achieve this so that the actual code doesn't look very different to what it
286 always used to.
287 
288 The original heap-recursive code used longjmp(). However, it seems that this
289 can be very slow on some operating systems. Following a suggestion from Stan
290 Switzer, the use of longjmp() has been abolished, at the cost of having to
291 provide a unique number for each call to RMATCH. There is no way of generating
292 a sequence of numbers at compile time in C. I have given them names, to make
293 them stand out more clearly.
294 
295 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
296 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
297 tests. Furthermore, not using longjmp() means that local dynamic variables
298 don't have indeterminate values; this has meant that the frame size can be
299 reduced because the result can be "passed back" by straight setting of the
300 variable instead of being passed in the frame.
301 ****************************************************************************
302 ***************************************************************************/
303 
304 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
305 below must be updated in sync.  */
306 
307 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
308        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
309        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
310        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
311        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
312        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
313        RM61,  RM62, RM63, RM64, RM65, RM66, RM67 };
314 
315 /* These versions of the macros use the stack, as normal. There are debugging
316 versions and production versions. Note that the "rw" argument of RMATCH isn't
317 actually used in this definition. */
318 
319 #ifndef NO_RECURSE
320 #define REGISTER register
321 
322 #ifdef PCRE_DEBUG
323 #define RMATCH(ra,rb,rc,rd,re,rw) \
324   { \
325   printf("match() called in line %d\n", __LINE__); \
326   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
327   printf("to line %d\n", __LINE__); \
328   }
329 #define RRETURN(ra) \
330   { \
331   printf("match() returned %d from line %d\n", ra, __LINE__); \
332   return ra; \
333   }
334 #else
335 #define RMATCH(ra,rb,rc,rd,re,rw) \
336   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
337 #define RRETURN(ra) return ra
338 #endif
339 
340 #else
341 
342 
343 /* These versions of the macros manage a private stack on the heap. Note that
344 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
345 argument of match(), which never changes. */
346 
347 #define REGISTER
348 
349 #define RMATCH(ra,rb,rc,rd,re,rw)\
350   {\
351   heapframe *newframe = frame->Xnextframe;\
352   if (newframe == NULL)\
353     {\
354     newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
355     if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
356     newframe->Xnextframe = NULL;\
357     frame->Xnextframe = newframe;\
358     }\
359   frame->Xwhere = rw;\
360   newframe->Xeptr = ra;\
361   newframe->Xecode = rb;\
362   newframe->Xmstart = mstart;\
363   newframe->Xoffset_top = rc;\
364   newframe->Xeptrb = re;\
365   newframe->Xrdepth = frame->Xrdepth + 1;\
366   newframe->Xprevframe = frame;\
367   frame = newframe;\
368   DPRINTF(("restarting from line %d\n", __LINE__));\
369   goto HEAP_RECURSE;\
370   L_##rw:\
371   DPRINTF(("jumped back to line %d\n", __LINE__));\
372   }
373 
374 #define RRETURN(ra)\
375   {\
376   heapframe *oldframe = frame;\
377   frame = oldframe->Xprevframe;\
378   if (frame != NULL)\
379     {\
380     rrc = ra;\
381     goto HEAP_RETURN;\
382     }\
383   return ra;\
384   }
385 
386 
387 /* Structure for remembering the local variables in a private frame */
388 
389 typedef struct heapframe {
390   struct heapframe *Xprevframe;
391   struct heapframe *Xnextframe;
392 
393   /* Function arguments that may change */
394 
395   PCRE_PUCHAR Xeptr;
396   const pcre_uchar *Xecode;
397   PCRE_PUCHAR Xmstart;
398   int Xoffset_top;
399   eptrblock *Xeptrb;
400   unsigned int Xrdepth;
401 
402   /* Function local variables */
403 
404   PCRE_PUCHAR Xcallpat;
405 #ifdef SUPPORT_UTF
406   PCRE_PUCHAR Xcharptr;
407 #endif
408   PCRE_PUCHAR Xdata;
409   PCRE_PUCHAR Xnext;
410   PCRE_PUCHAR Xpp;
411   PCRE_PUCHAR Xprev;
412   PCRE_PUCHAR Xsaved_eptr;
413 
414   recursion_info Xnew_recursive;
415 
416   BOOL Xcur_is_word;
417   BOOL Xcondition;
418   BOOL Xprev_is_word;
419 
420 #ifdef SUPPORT_UCP
421   int Xprop_type;
422   unsigned int Xprop_value;
423   int Xprop_fail_result;
424   int Xoclength;
425   pcre_uchar Xocchars[6];
426 #endif
427 
428   int Xcodelink;
429   int Xctype;
430   unsigned int Xfc;
431   int Xfi;
432   int Xlength;
433   int Xmax;
434   int Xmin;
435   unsigned int Xnumber;
436   int Xoffset;
437   unsigned int Xop;
438   pcre_int32 Xsave_capture_last;
439   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
440   int Xstacksave[REC_STACK_SAVE_MAX];
441 
442   eptrblock Xnewptrb;
443 
444   /* Where to jump back to */
445 
446   int Xwhere;
447 
448 } heapframe;
449 
450 #endif
451 
452 
453 /***************************************************************************
454 ***************************************************************************/
455 
456 
457 
458 /*************************************************
459 *         Match from current position            *
460 *************************************************/
461 
462 /* This function is called recursively in many circumstances. Whenever it
463 returns a negative (error) response, the outer incarnation must also return the
464 same response. */
465 
466 /* These macros pack up tests that are used for partial matching, and which
467 appear several times in the code. We set the "hit end" flag if the pointer is
468 at the end of the subject and also past the start of the subject (i.e.
469 something has been matched). For hard partial matching, we then return
470 immediately. The second one is used when we already know we are past the end of
471 the subject. */
472 
473 #define CHECK_PARTIAL()\
474   if (md->partial != 0 && eptr >= md->end_subject && \
475       eptr > md->start_used_ptr) \
476     { \
477     md->hitend = TRUE; \
478     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
479     }
480 
481 #define SCHECK_PARTIAL()\
482   if (md->partial != 0 && eptr > md->start_used_ptr) \
483     { \
484     md->hitend = TRUE; \
485     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
486     }
487 
488 
489 /* Performance note: It might be tempting to extract commonly used fields from
490 the md structure (e.g. utf, end_subject) into individual variables to improve
491 performance. Tests using gcc on a SPARC disproved this; in the first case, it
492 made performance worse.
493 
494 Arguments:
495    eptr        pointer to current character in subject
496    ecode       pointer to current position in compiled code
497    mstart      pointer to the current match start position (can be modified
498                  by encountering \K)
499    offset_top  current top pointer
500    md          pointer to "static" info for the match
501    eptrb       pointer to chain of blocks containing eptr at start of
502                  brackets - for testing for empty matches
503    rdepth      the recursion depth
504 
505 Returns:       MATCH_MATCH if matched            )  these values are >= 0
506                MATCH_NOMATCH if failed to match  )
507                a negative MATCH_xxx value for PRUNE, SKIP, etc
508                a negative PCRE_ERROR_xxx value if aborted by an error condition
509                  (e.g. stopped by repeated call or recursion limit)
510 */
511 
512 #ifdef __GNUC__
513 static int
514 match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
515   PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
516   unsigned int rdepth) __attribute__((noinline,noclone));
517 #endif
518 static int
match(REGISTER PCRE_PUCHAR eptr,REGISTER const pcre_uchar * ecode,PCRE_PUCHAR mstart,int offset_top,match_data * md,eptrblock * eptrb,unsigned int rdepth)519 match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
520   PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
521   unsigned int rdepth)
522 {
523 /* These variables do not need to be preserved over recursion in this function,
524 so they can be ordinary variables in all cases. Mark some of them with
525 "register" because they are used a lot in loops. */
526 
527 register int  rrc;         /* Returns from recursive calls */
528 register int  i;           /* Used for loops not involving calls to RMATCH() */
529 register pcre_uint32 c;    /* Character values not kept over RMATCH() calls */
530 register BOOL utf;         /* Local copy of UTF flag for speed */
531 
532 BOOL minimize, possessive; /* Quantifier options */
533 BOOL caseless;
534 int condcode;
535 
536 /* When recursion is not being used, all "local" variables that have to be
537 preserved over calls to RMATCH() are part of a "frame". We set up the top-level
538 frame on the stack here; subsequent instantiations are obtained from the heap
539 whenever RMATCH() does a "recursion". See the macro definitions above. Putting
540 the top-level on the stack rather than malloc-ing them all gives a performance
541 boost in many cases where there is not much "recursion". */
542 
543 #ifdef NO_RECURSE
544 heapframe *frame = (heapframe *)md->match_frames_base;
545 
546 /* Copy in the original argument variables */
547 
548 frame->Xeptr = eptr;
549 frame->Xecode = ecode;
550 frame->Xmstart = mstart;
551 frame->Xoffset_top = offset_top;
552 frame->Xeptrb = eptrb;
553 frame->Xrdepth = rdepth;
554 
555 /* This is where control jumps back to to effect "recursion" */
556 
557 HEAP_RECURSE:
558 
559 /* Macros make the argument variables come from the current frame */
560 
561 #define eptr               frame->Xeptr
562 #define ecode              frame->Xecode
563 #define mstart             frame->Xmstart
564 #define offset_top         frame->Xoffset_top
565 #define eptrb              frame->Xeptrb
566 #define rdepth             frame->Xrdepth
567 
568 /* Ditto for the local variables */
569 
570 #ifdef SUPPORT_UTF
571 #define charptr            frame->Xcharptr
572 #endif
573 #define callpat            frame->Xcallpat
574 #define codelink           frame->Xcodelink
575 #define data               frame->Xdata
576 #define next               frame->Xnext
577 #define pp                 frame->Xpp
578 #define prev               frame->Xprev
579 #define saved_eptr         frame->Xsaved_eptr
580 
581 #define new_recursive      frame->Xnew_recursive
582 
583 #define cur_is_word        frame->Xcur_is_word
584 #define condition          frame->Xcondition
585 #define prev_is_word       frame->Xprev_is_word
586 
587 #ifdef SUPPORT_UCP
588 #define prop_type          frame->Xprop_type
589 #define prop_value         frame->Xprop_value
590 #define prop_fail_result   frame->Xprop_fail_result
591 #define oclength           frame->Xoclength
592 #define occhars            frame->Xocchars
593 #endif
594 
595 #define ctype              frame->Xctype
596 #define fc                 frame->Xfc
597 #define fi                 frame->Xfi
598 #define length             frame->Xlength
599 #define max                frame->Xmax
600 #define min                frame->Xmin
601 #define number             frame->Xnumber
602 #define offset             frame->Xoffset
603 #define op                 frame->Xop
604 #define save_capture_last  frame->Xsave_capture_last
605 #define save_offset1       frame->Xsave_offset1
606 #define save_offset2       frame->Xsave_offset2
607 #define save_offset3       frame->Xsave_offset3
608 #define stacksave          frame->Xstacksave
609 
610 #define newptrb            frame->Xnewptrb
611 
612 /* When recursion is being used, local variables are allocated on the stack and
613 get preserved during recursion in the normal way. In this environment, fi and
614 i, and fc and c, can be the same variables. */
615 
616 #else         /* NO_RECURSE not defined */
617 #define fi i
618 #define fc c
619 
620 /* Many of the following variables are used only in small blocks of the code.
621 My normal style of coding would have declared them within each of those blocks.
622 However, in order to accommodate the version of this code that uses an external
623 "stack" implemented on the heap, it is easier to declare them all here, so the
624 declarations can be cut out in a block. The only declarations within blocks
625 below are for variables that do not have to be preserved over a recursive call
626 to RMATCH(). */
627 
628 #ifdef SUPPORT_UTF
629 const pcre_uchar *charptr;
630 #endif
631 const pcre_uchar *callpat;
632 const pcre_uchar *data;
633 const pcre_uchar *next;
634 PCRE_PUCHAR       pp;
635 const pcre_uchar *prev;
636 PCRE_PUCHAR       saved_eptr;
637 
638 recursion_info new_recursive;
639 
640 BOOL cur_is_word;
641 BOOL condition;
642 BOOL prev_is_word;
643 
644 #ifdef SUPPORT_UCP
645 int prop_type;
646 unsigned int prop_value;
647 int prop_fail_result;
648 int oclength;
649 pcre_uchar occhars[6];
650 #endif
651 
652 int codelink;
653 int ctype;
654 int length;
655 int max;
656 int min;
657 unsigned int number;
658 int offset;
659 unsigned int op;
660 pcre_int32 save_capture_last;
661 int save_offset1, save_offset2, save_offset3;
662 int stacksave[REC_STACK_SAVE_MAX];
663 
664 eptrblock newptrb;
665 
666 /* There is a special fudge for calling match() in a way that causes it to
667 measure the size of its basic stack frame when the stack is being used for
668 recursion. The second argument (ecode) being NULL triggers this behaviour. It
669 cannot normally ever be NULL. The return is the negated value of the frame
670 size. */
671 
672 if (ecode == NULL)
673   {
674   if (rdepth == 0)
675     return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
676   else
677     {
678     int len = (int)((char *)&rdepth - (char *)eptr);
679     return (len > 0)? -len : len;
680     }
681   }
682 #endif     /* NO_RECURSE */
683 
684 /* To save space on the stack and in the heap frame, I have doubled up on some
685 of the local variables that are used only in localised parts of the code, but
686 still need to be preserved over recursive calls of match(). These macros define
687 the alternative names that are used. */
688 
689 #define allow_zero    cur_is_word
690 #define cbegroup      condition
691 #define code_offset   codelink
692 #define condassert    condition
693 #define matched_once  prev_is_word
694 #define foc           number
695 #define save_mark     data
696 
697 /* These statements are here to stop the compiler complaining about unitialized
698 variables. */
699 
700 #ifdef SUPPORT_UCP
701 prop_value = 0;
702 prop_fail_result = 0;
703 #endif
704 
705 
706 /* This label is used for tail recursion, which is used in a few cases even
707 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
708 used. Thanks to Ian Taylor for noticing this possibility and sending the
709 original patch. */
710 
711 TAIL_RECURSE:
712 
713 /* OK, now we can get on with the real code of the function. Recursive calls
714 are specified by the macro RMATCH and RRETURN is used to return. When
715 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
716 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
717 defined). However, RMATCH isn't like a function call because it's quite a
718 complicated macro. It has to be used in one particular way. This shouldn't,
719 however, impact performance when true recursion is being used. */
720 
721 #ifdef SUPPORT_UTF
722 utf = md->utf;       /* Local copy of the flag */
723 #else
724 utf = FALSE;
725 #endif
726 
727 /* First check that we haven't called match() too many times, or that we
728 haven't exceeded the recursive call limit. */
729 
730 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
731 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
732 
733 /* At the start of a group with an unlimited repeat that may match an empty
734 string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
735 done this way to save having to use another function argument, which would take
736 up space on the stack. See also MATCH_CONDASSERT below.
737 
738 When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
739 such remembered pointers, to be checked when we hit the closing ket, in order
740 to break infinite loops that match no characters. When match() is called in
741 other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
742 NOT be used with tail recursion, because the memory block that is used is on
743 the stack, so a new one may be required for each match(). */
744 
745 if (md->match_function_type == MATCH_CBEGROUP)
746   {
747   newptrb.epb_saved_eptr = eptr;
748   newptrb.epb_prev = eptrb;
749   eptrb = &newptrb;
750   md->match_function_type = 0;
751   }
752 
753 /* Now start processing the opcodes. */
754 
755 for (;;)
756   {
757   minimize = possessive = FALSE;
758   op = *ecode;
759 
760   switch(op)
761     {
762     case OP_MARK:
763     md->nomatch_mark = ecode + 2;
764     md->mark = NULL;    /* In case previously set by assertion */
765     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
766       eptrb, RM55);
767     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT || rrc == MATCH_KETRPOS) &&
768          md->mark == NULL) md->mark = ecode + 2;
769 
770     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
771     argument, and we must check whether that argument matches this MARK's
772     argument. It is passed back in md->start_match_ptr (an overloading of that
773     variable). If it does match, we reset that variable to the current subject
774     position and return MATCH_SKIP. Otherwise, pass back the return code
775     unaltered. */
776 
777     else if (rrc == MATCH_SKIP_ARG &&
778         STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0)
779       {
780       md->start_match_ptr = eptr;
781       RRETURN(MATCH_SKIP);
782       }
783     RRETURN(rrc);
784 
785     case OP_FAIL:
786     RRETURN(MATCH_NOMATCH);
787 
788     case OP_COMMIT:
789     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
790       eptrb, RM52);
791     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
792     RRETURN(MATCH_COMMIT);
793 
794     case OP_PRUNE:
795     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
796       eptrb, RM51);
797     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
798     RRETURN(MATCH_PRUNE);
799 
800     case OP_PRUNE_ARG:
801     md->nomatch_mark = ecode + 2;
802     md->mark = NULL;    /* In case previously set by assertion */
803     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
804       eptrb, RM56);
805     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
806          md->mark == NULL) md->mark = ecode + 2;
807     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
808     RRETURN(MATCH_PRUNE);
809 
810     case OP_SKIP:
811     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
812       eptrb, RM53);
813     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
814     md->start_match_ptr = eptr;   /* Pass back current position */
815     RRETURN(MATCH_SKIP);
816 
817     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
818     nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
819     not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
820     that failed and any that precede it (either they also failed, or were not
821     triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
822     SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg
823     set to the count of the one that failed. */
824 
825     case OP_SKIP_ARG:
826     md->skip_arg_count++;
827     if (md->skip_arg_count <= md->ignore_skip_arg)
828       {
829       ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
830       break;
831       }
832     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
833       eptrb, RM57);
834     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
835 
836     /* Pass back the current skip name by overloading md->start_match_ptr and
837     returning the special MATCH_SKIP_ARG return code. This will either be
838     caught by a matching MARK, or get to the top, where it causes a rematch
839     with md->ignore_skip_arg set to the value of md->skip_arg_count. */
840 
841     md->start_match_ptr = ecode + 2;
842     RRETURN(MATCH_SKIP_ARG);
843 
844     /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
845     the branch in which it occurs can be determined. Overload the start of
846     match pointer to do this. */
847 
848     case OP_THEN:
849     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
850       eptrb, RM54);
851     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
852     md->start_match_ptr = ecode;
853     RRETURN(MATCH_THEN);
854 
855     case OP_THEN_ARG:
856     md->nomatch_mark = ecode + 2;
857     md->mark = NULL;    /* In case previously set by assertion */
858     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
859       md, eptrb, RM58);
860     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
861          md->mark == NULL) md->mark = ecode + 2;
862     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
863     md->start_match_ptr = ecode;
864     RRETURN(MATCH_THEN);
865 
866     /* Handle an atomic group that does not contain any capturing parentheses.
867     This can be handled like an assertion. Prior to 8.13, all atomic groups
868     were handled this way. In 8.13, the code was changed as below for ONCE, so
869     that backups pass through the group and thereby reset captured values.
870     However, this uses a lot more stack, so in 8.20, atomic groups that do not
871     contain any captures generate OP_ONCE_NC, which can be handled in the old,
872     less stack intensive way.
873 
874     Check the alternative branches in turn - the matching won't pass the KET
875     for this kind of subpattern. If any one branch matches, we carry on as at
876     the end of a normal bracket, leaving the subject pointer, but resetting
877     the start-of-match value in case it was changed by \K. */
878 
879     case OP_ONCE_NC:
880     prev = ecode;
881     saved_eptr = eptr;
882     save_mark = md->mark;
883     do
884       {
885       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
886       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
887         {
888         mstart = md->start_match_ptr;
889         break;
890         }
891       if (rrc == MATCH_THEN)
892         {
893         next = ecode + GET(ecode,1);
894         if (md->start_match_ptr < next &&
895             (*ecode == OP_ALT || *next == OP_ALT))
896           rrc = MATCH_NOMATCH;
897         }
898 
899       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
900       ecode += GET(ecode,1);
901       md->mark = save_mark;
902       }
903     while (*ecode == OP_ALT);
904 
905     /* If hit the end of the group (which could be repeated), fail */
906 
907     if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
908 
909     /* Continue as from after the group, updating the offsets high water
910     mark, since extracts may have been taken. */
911 
912     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
913 
914     offset_top = md->end_offset_top;
915     eptr = md->end_match_ptr;
916 
917     /* For a non-repeating ket, just continue at this level. This also
918     happens for a repeating ket if no characters were matched in the group.
919     This is the forcible breaking of infinite loops as implemented in Perl
920     5.005. */
921 
922     if (*ecode == OP_KET || eptr == saved_eptr)
923       {
924       ecode += 1+LINK_SIZE;
925       break;
926       }
927 
928     /* The repeating kets try the rest of the pattern or restart from the
929     preceding bracket, in the appropriate order. The second "call" of match()
930     uses tail recursion, to avoid using another stack frame. */
931 
932     if (*ecode == OP_KETRMIN)
933       {
934       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
935       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
936       ecode = prev;
937       goto TAIL_RECURSE;
938       }
939     else  /* OP_KETRMAX */
940       {
941       RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
942       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
943       ecode += 1 + LINK_SIZE;
944       goto TAIL_RECURSE;
945       }
946     /* Control never gets here */
947 
948     /* Handle a capturing bracket, other than those that are possessive with an
949     unlimited repeat. If there is space in the offset vector, save the current
950     subject position in the working slot at the top of the vector. We mustn't
951     change the current values of the data slot, because they may be set from a
952     previous iteration of this group, and be referred to by a reference inside
953     the group. A failure to match might occur after the group has succeeded,
954     if something later on doesn't match. For this reason, we need to restore
955     the working value and also the values of the final offsets, in case they
956     were set by a previous iteration of the same bracket.
957 
958     If there isn't enough space in the offset vector, treat this as if it were
959     a non-capturing bracket. Don't worry about setting the flag for the error
960     case here; that is handled in the code for KET. */
961 
962     case OP_CBRA:
963     case OP_SCBRA:
964     number = GET2(ecode, 1+LINK_SIZE);
965     offset = number << 1;
966 
967 #ifdef PCRE_DEBUG
968     printf("start bracket %d\n", number);
969     printf("subject=");
970     pchars(eptr, 16, TRUE, md);
971     printf("\n");
972 #endif
973 
974     if (offset < md->offset_max)
975       {
976       save_offset1 = md->offset_vector[offset];
977       save_offset2 = md->offset_vector[offset+1];
978       save_offset3 = md->offset_vector[md->offset_end - number];
979       save_capture_last = md->capture_last;
980       save_mark = md->mark;
981 
982       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
983       md->offset_vector[md->offset_end - number] =
984         (int)(eptr - md->start_subject);
985 
986       for (;;)
987         {
988         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
989         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
990           eptrb, RM1);
991         if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
992 
993         /* If we backed up to a THEN, check whether it is within the current
994         branch by comparing the address of the THEN that is passed back with
995         the end of the branch. If it is within the current branch, and the
996         branch is one of two or more alternatives (it either starts or ends
997         with OP_ALT), we have reached the limit of THEN's action, so convert
998         the return code to NOMATCH, which will cause normal backtracking to
999         happen from now on. Otherwise, THEN is passed back to an outer
1000         alternative. This implements Perl's treatment of parenthesized groups,
1001         where a group not containing | does not affect the current alternative,
1002         that is, (X) is NOT the same as (X|(*F)). */
1003 
1004         if (rrc == MATCH_THEN)
1005           {
1006           next = ecode + GET(ecode,1);
1007           if (md->start_match_ptr < next &&
1008               (*ecode == OP_ALT || *next == OP_ALT))
1009             rrc = MATCH_NOMATCH;
1010           }
1011 
1012         /* Anything other than NOMATCH is passed back. */
1013 
1014         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1015         md->capture_last = save_capture_last;
1016         ecode += GET(ecode, 1);
1017         md->mark = save_mark;
1018         if (*ecode != OP_ALT) break;
1019         }
1020 
1021       DPRINTF(("bracket %d failed\n", number));
1022       md->offset_vector[offset] = save_offset1;
1023       md->offset_vector[offset+1] = save_offset2;
1024       md->offset_vector[md->offset_end - number] = save_offset3;
1025 
1026       /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
1027 
1028       RRETURN(rrc);
1029       }
1030 
1031     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1032     as a non-capturing bracket. */
1033 
1034     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1035     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1036 
1037     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1038 
1039     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1040     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1041 
1042     /* Non-capturing or atomic group, except for possessive with unlimited
1043     repeat and ONCE group with no captures. Loop for all the alternatives.
1044 
1045     When we get to the final alternative within the brackets, we used to return
1046     the result of a recursive call to match() whatever happened so it was
1047     possible to reduce stack usage by turning this into a tail recursion,
1048     except in the case of a possibly empty group. However, now that there is
1049     the possiblity of (*THEN) occurring in the final alternative, this
1050     optimization is no longer always possible.
1051 
1052     We can optimize if we know there are no (*THEN)s in the pattern; at present
1053     this is the best that can be done.
1054 
1055     MATCH_ONCE is returned when the end of an atomic group is successfully
1056     reached, but subsequent matching fails. It passes back up the tree (causing
1057     captured values to be reset) until the original atomic group level is
1058     reached. This is tested by comparing md->once_target with the start of the
1059     group. At this point, the return is converted into MATCH_NOMATCH so that
1060     previous backup points can be taken. */
1061 
1062     /* fall through */
1063 
1064     case OP_ONCE:
1065     case OP_BRA:
1066     case OP_SBRA:
1067     DPRINTF(("start non-capturing bracket\n"));
1068 
1069     for (;;)
1070       {
1071       if (op >= OP_SBRA || op == OP_ONCE)
1072         md->match_function_type = MATCH_CBEGROUP;
1073 
1074       /* If this is not a possibly empty group, and there are no (*THEN)s in
1075       the pattern, and this is the final alternative, optimize as described
1076       above. */
1077 
1078       else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1079         {
1080         ecode += PRIV(OP_lengths)[*ecode];
1081         goto TAIL_RECURSE;
1082         }
1083 
1084       /* In all other cases, we have to make another call to match(). */
1085 
1086       save_mark = md->mark;
1087       save_capture_last = md->capture_last;
1088       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1089         RM2);
1090 
1091       /* See comment in the code for capturing groups above about handling
1092       THEN. */
1093 
1094       if (rrc == MATCH_THEN)
1095         {
1096         next = ecode + GET(ecode,1);
1097         if (md->start_match_ptr < next &&
1098             (*ecode == OP_ALT || *next == OP_ALT))
1099           rrc = MATCH_NOMATCH;
1100         }
1101 
1102       if (rrc != MATCH_NOMATCH)
1103         {
1104         if (rrc == MATCH_ONCE)
1105           {
1106           const pcre_uchar *scode = ecode;
1107           if (*scode != OP_ONCE)           /* If not at start, find it */
1108             {
1109             while (*scode == OP_ALT) scode += GET(scode, 1);
1110             scode -= GET(scode, 1);
1111             }
1112           if (md->once_target == scode) rrc = MATCH_NOMATCH;
1113           }
1114         RRETURN(rrc);
1115         }
1116       ecode += GET(ecode, 1);
1117       md->mark = save_mark;
1118       if (*ecode != OP_ALT) break;
1119       md->capture_last = save_capture_last;
1120       }
1121 
1122     RRETURN(MATCH_NOMATCH);
1123 
1124     /* Handle possessive capturing brackets with an unlimited repeat. We come
1125     here from BRAZERO with allow_zero set TRUE. The offset_vector values are
1126     handled similarly to the normal case above. However, the matching is
1127     different. The end of these brackets will always be OP_KETRPOS, which
1128     returns MATCH_KETRPOS without going further in the pattern. By this means
1129     we can handle the group by iteration rather than recursion, thereby
1130     reducing the amount of stack needed. */
1131 
1132     case OP_CBRAPOS:
1133     case OP_SCBRAPOS:
1134     allow_zero = FALSE;
1135 
1136     POSSESSIVE_CAPTURE:
1137     number = GET2(ecode, 1+LINK_SIZE);
1138     offset = number << 1;
1139 
1140 #ifdef PCRE_DEBUG
1141     printf("start possessive bracket %d\n", number);
1142     printf("subject=");
1143     pchars(eptr, 16, TRUE, md);
1144     printf("\n");
1145 #endif
1146 
1147     if (offset >= md->offset_max) goto POSSESSIVE_NON_CAPTURE;
1148 
1149     matched_once = FALSE;
1150     code_offset = (int)(ecode - md->start_code);
1151 
1152     save_offset1 = md->offset_vector[offset];
1153     save_offset2 = md->offset_vector[offset+1];
1154     save_offset3 = md->offset_vector[md->offset_end - number];
1155     save_capture_last = md->capture_last;
1156 
1157     DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1158 
1159     /* Each time round the loop, save the current subject position for use
1160     when the group matches. For MATCH_MATCH, the group has matched, so we
1161     restart it with a new subject starting position, remembering that we had
1162     at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
1163     usual. If we haven't matched any alternatives in any iteration, check to
1164     see if a previous iteration matched. If so, the group has matched;
1165     continue from afterwards. Otherwise it has failed; restore the previous
1166     capture values before returning NOMATCH. */
1167 
1168     for (;;)
1169       {
1170       md->offset_vector[md->offset_end - number] =
1171         (int)(eptr - md->start_subject);
1172       if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1173       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1174         eptrb, RM63);
1175       if (rrc == MATCH_KETRPOS)
1176         {
1177         offset_top = md->end_offset_top;
1178         ecode = md->start_code + code_offset;
1179         save_capture_last = md->capture_last;
1180         matched_once = TRUE;
1181         mstart = md->start_match_ptr;    /* In case \K changed it */
1182         if (eptr == md->end_match_ptr)   /* Matched an empty string */
1183           {
1184           do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1185           break;
1186           }
1187         eptr = md->end_match_ptr;
1188         continue;
1189         }
1190 
1191       /* See comment in the code for capturing groups above about handling
1192       THEN. */
1193 
1194       if (rrc == MATCH_THEN)
1195         {
1196         next = ecode + GET(ecode,1);
1197         if (md->start_match_ptr < next &&
1198             (*ecode == OP_ALT || *next == OP_ALT))
1199           rrc = MATCH_NOMATCH;
1200         }
1201 
1202       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1203       md->capture_last = save_capture_last;
1204       ecode += GET(ecode, 1);
1205       if (*ecode != OP_ALT) break;
1206       }
1207 
1208     if (!matched_once)
1209       {
1210       md->offset_vector[offset] = save_offset1;
1211       md->offset_vector[offset+1] = save_offset2;
1212       md->offset_vector[md->offset_end - number] = save_offset3;
1213       }
1214 
1215     if (allow_zero || matched_once)
1216       {
1217       ecode += 1 + LINK_SIZE;
1218       break;
1219       }
1220 
1221     RRETURN(MATCH_NOMATCH);
1222 
1223     /* Non-capturing possessive bracket with unlimited repeat. We come here
1224     from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1225     without the capturing complication. It is written out separately for speed
1226     and cleanliness. */
1227 
1228     case OP_BRAPOS:
1229     case OP_SBRAPOS:
1230     allow_zero = FALSE;
1231 
1232     POSSESSIVE_NON_CAPTURE:
1233     matched_once = FALSE;
1234     code_offset = (int)(ecode - md->start_code);
1235     save_capture_last = md->capture_last;
1236 
1237     for (;;)
1238       {
1239       if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1240       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1241         eptrb, RM48);
1242       if (rrc == MATCH_KETRPOS)
1243         {
1244         offset_top = md->end_offset_top;
1245         ecode = md->start_code + code_offset;
1246         matched_once = TRUE;
1247         mstart = md->start_match_ptr;   /* In case \K reset it */
1248         if (eptr == md->end_match_ptr)  /* Matched an empty string */
1249           {
1250           do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1251           break;
1252           }
1253         eptr = md->end_match_ptr;
1254         continue;
1255         }
1256 
1257       /* See comment in the code for capturing groups above about handling
1258       THEN. */
1259 
1260       if (rrc == MATCH_THEN)
1261         {
1262         next = ecode + GET(ecode,1);
1263         if (md->start_match_ptr < next &&
1264             (*ecode == OP_ALT || *next == OP_ALT))
1265           rrc = MATCH_NOMATCH;
1266         }
1267 
1268       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1269       ecode += GET(ecode, 1);
1270       if (*ecode != OP_ALT) break;
1271       md->capture_last = save_capture_last;
1272       }
1273 
1274     if (matched_once || allow_zero)
1275       {
1276       ecode += 1 + LINK_SIZE;
1277       break;
1278       }
1279     RRETURN(MATCH_NOMATCH);
1280 
1281     /* Control never reaches here. */
1282 
1283     /* Conditional group: compilation checked that there are no more than two
1284     branches. If the condition is false, skipping the first branch takes us
1285     past the end of the item if there is only one branch, but that's exactly
1286     what we want. */
1287 
1288     case OP_COND:
1289     case OP_SCOND:
1290 
1291     /* The variable codelink will be added to ecode when the condition is
1292     false, to get to the second branch. Setting it to the offset to the ALT
1293     or KET, then incrementing ecode achieves this effect. We now have ecode
1294     pointing to the condition or callout. */
1295 
1296     codelink = GET(ecode, 1);   /* Offset to the second branch */
1297     ecode += 1 + LINK_SIZE;     /* From this opcode */
1298 
1299     /* Because of the way auto-callout works during compile, a callout item is
1300     inserted between OP_COND and an assertion condition. */
1301 
1302     if (*ecode == OP_CALLOUT)
1303       {
1304       if (PUBL(callout) != NULL)
1305         {
1306         PUBL(callout_block) cb;
1307         cb.version          = 2;   /* Version 1 of the callout block */
1308         cb.callout_number   = ecode[1];
1309         cb.offset_vector    = md->offset_vector;
1310 #if defined COMPILE_PCRE8
1311         cb.subject          = (PCRE_SPTR)md->start_subject;
1312 #elif defined COMPILE_PCRE16
1313         cb.subject          = (PCRE_SPTR16)md->start_subject;
1314 #elif defined COMPILE_PCRE32
1315         cb.subject          = (PCRE_SPTR32)md->start_subject;
1316 #endif
1317         cb.subject_length   = (int)(md->end_subject - md->start_subject);
1318         cb.start_match      = (int)(mstart - md->start_subject);
1319         cb.current_position = (int)(eptr - md->start_subject);
1320         cb.pattern_position = GET(ecode, 2);
1321         cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1322         cb.capture_top      = offset_top/2;
1323         cb.capture_last     = md->capture_last & CAPLMASK;
1324         /* Internal change requires this for API compatibility. */
1325         if (cb.capture_last == 0) cb.capture_last = -1;
1326         cb.callout_data     = md->callout_data;
1327         cb.mark             = md->nomatch_mark;
1328         if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1329         if (rrc < 0) RRETURN(rrc);
1330         }
1331 
1332       /* Advance ecode past the callout, so it now points to the condition. We
1333       must adjust codelink so that the value of ecode+codelink is unchanged. */
1334 
1335       ecode += PRIV(OP_lengths)[OP_CALLOUT];
1336       codelink -= PRIV(OP_lengths)[OP_CALLOUT];
1337       }
1338 
1339     /* Test the various possible conditions */
1340 
1341     condition = FALSE;
1342     switch(condcode = *ecode)
1343       {
1344       case OP_RREF:         /* Numbered group recursion test */
1345       if (md->recursive != NULL)     /* Not recursing => FALSE */
1346         {
1347         unsigned int recno = GET2(ecode, 1);   /* Recursion group number*/
1348         condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1349         }
1350       break;
1351 
1352       case OP_DNRREF:       /* Duplicate named group recursion test */
1353       if (md->recursive != NULL)
1354         {
1355         int count = GET2(ecode, 1 + IMM2_SIZE);
1356         pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
1357         while (count-- > 0)
1358           {
1359           unsigned int recno = GET2(slot, 0);
1360           condition = recno == md->recursive->group_num;
1361           if (condition) break;
1362           slot += md->name_entry_size;
1363           }
1364         }
1365       break;
1366 
1367       case OP_CREF:         /* Numbered group used test */
1368       offset = GET2(ecode, 1) << 1;  /* Doubled ref number */
1369       condition = offset < offset_top && md->offset_vector[offset] >= 0;
1370       break;
1371 
1372       case OP_DNCREF:      /* Duplicate named group used test */
1373         {
1374         int count = GET2(ecode, 1 + IMM2_SIZE);
1375         pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
1376         while (count-- > 0)
1377           {
1378           offset = GET2(slot, 0) << 1;
1379           condition = offset < offset_top && md->offset_vector[offset] >= 0;
1380           if (condition) break;
1381           slot += md->name_entry_size;
1382           }
1383         }
1384       break;
1385 
1386       case OP_DEF:     /* DEFINE - always false */
1387       case OP_FAIL:    /* From optimized (?!) condition */
1388       break;
1389 
1390       /* The condition is an assertion. Call match() to evaluate it - setting
1391       md->match_function_type to MATCH_CONDASSERT causes it to stop at the end
1392       of an assertion. */
1393 
1394       default:
1395       md->match_function_type = MATCH_CONDASSERT;
1396       RMATCH(eptr, ecode, offset_top, md, NULL, RM3);
1397       if (rrc == MATCH_MATCH)
1398         {
1399         if (md->end_offset_top > offset_top)
1400           offset_top = md->end_offset_top;  /* Captures may have happened */
1401         condition = TRUE;
1402 
1403         /* Advance ecode past the assertion to the start of the first branch,
1404         but adjust it so that the general choosing code below works. If the
1405         assertion has a quantifier that allows zero repeats we must skip over
1406         the BRAZERO. This is a lunatic thing to do, but somebody did! */
1407 
1408         if (*ecode == OP_BRAZERO) ecode++;
1409         ecode += GET(ecode, 1);
1410         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1411         ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
1412         }
1413 
1414       /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1415       assertion; it is therefore treated as NOMATCH. Any other return is an
1416       error. */
1417 
1418       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1419         {
1420         RRETURN(rrc);         /* Need braces because of following else */
1421         }
1422       break;
1423       }
1424 
1425     /* Choose branch according to the condition */
1426 
1427     ecode += condition? PRIV(OP_lengths)[condcode] : codelink;
1428 
1429     /* We are now at the branch that is to be obeyed. As there is only one, we
1430     can use tail recursion to avoid using another stack frame, except when
1431     there is unlimited repeat of a possibly empty group. In the latter case, a
1432     recursive call to match() is always required, unless the second alternative
1433     doesn't exist, in which case we can just plough on. Note that, for
1434     compatibility with Perl, the | in a conditional group is NOT treated as
1435     creating two alternatives. If a THEN is encountered in the branch, it
1436     propagates out to the enclosing alternative (unless nested in a deeper set
1437     of alternatives, of course). */
1438 
1439     if (condition || ecode[-(1+LINK_SIZE)] == OP_ALT)
1440       {
1441       if (op != OP_SCOND)
1442         {
1443         goto TAIL_RECURSE;
1444         }
1445 
1446       md->match_function_type = MATCH_CBEGROUP;
1447       RMATCH(eptr, ecode, offset_top, md, eptrb, RM49);
1448       RRETURN(rrc);
1449       }
1450 
1451      /* Condition false & no alternative; continue after the group. */
1452 
1453     else
1454       {
1455       }
1456     break;
1457 
1458 
1459     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1460     to close any currently open capturing brackets. */
1461 
1462     case OP_CLOSE:
1463     number = GET2(ecode, 1);   /* Must be less than 65536 */
1464     offset = number << 1;
1465 
1466 #ifdef PCRE_DEBUG
1467       printf("end bracket %d at *ACCEPT", number);
1468       printf("\n");
1469 #endif
1470 
1471     md->capture_last = (md->capture_last & OVFLMASK) | number;
1472     if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1473       {
1474       md->offset_vector[offset] =
1475         md->offset_vector[md->offset_end - number];
1476       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1477 
1478       /* If this group is at or above the current highwater mark, ensure that
1479       any groups between the current high water mark and this group are marked
1480       unset and then update the high water mark. */
1481 
1482       if (offset >= offset_top)
1483         {
1484         register int *iptr = md->offset_vector + offset_top;
1485         register int *iend = md->offset_vector + offset;
1486         while (iptr < iend) *iptr++ = -1;
1487         offset_top = offset + 2;
1488         }
1489       }
1490     ecode += 1 + IMM2_SIZE;
1491     break;
1492 
1493 
1494     /* End of the pattern, either real or forced. */
1495 
1496     case OP_END:
1497     case OP_ACCEPT:
1498     case OP_ASSERT_ACCEPT:
1499 
1500     /* If we have matched an empty string, fail if not in an assertion and not
1501     in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1502     is set and we have matched at the start of the subject. In both cases,
1503     backtracking will then try other alternatives, if any. */
1504 
1505     if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1506          md->recursive == NULL &&
1507          (md->notempty ||
1508            (md->notempty_atstart &&
1509              mstart == md->start_subject + md->start_offset)))
1510       RRETURN(MATCH_NOMATCH);
1511 
1512     /* Otherwise, we have a match. */
1513 
1514     md->end_match_ptr = eptr;           /* Record where we ended */
1515     md->end_offset_top = offset_top;    /* and how many extracts were taken */
1516     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1517 
1518     /* For some reason, the macros don't work properly if an expression is
1519     given as the argument to RRETURN when the heap is in use. */
1520 
1521     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1522     RRETURN(rrc);
1523 
1524     /* Assertion brackets. Check the alternative branches in turn - the
1525     matching won't pass the KET for an assertion. If any one branch matches,
1526     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1527     start of each branch to move the current point backwards, so the code at
1528     this level is identical to the lookahead case. When the assertion is part
1529     of a condition, we want to return immediately afterwards. The caller of
1530     this incarnation of the match() function will have set MATCH_CONDASSERT in
1531     md->match_function type, and one of these opcodes will be the first opcode
1532     that is processed. We use a local variable that is preserved over calls to
1533     match() to remember this case. */
1534 
1535     case OP_ASSERT:
1536     case OP_ASSERTBACK:
1537     save_mark = md->mark;
1538     if (md->match_function_type == MATCH_CONDASSERT)
1539       {
1540       condassert = TRUE;
1541       md->match_function_type = 0;
1542       }
1543     else condassert = FALSE;
1544 
1545     /* Loop for each branch */
1546 
1547     do
1548       {
1549       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1550 
1551       /* A match means that the assertion is true; break out of the loop
1552       that matches its alternatives. */
1553 
1554       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1555         {
1556         mstart = md->start_match_ptr;   /* In case \K reset it */
1557         break;
1558         }
1559 
1560       /* If not matched, restore the previous mark setting. */
1561 
1562       md->mark = save_mark;
1563 
1564       /* See comment in the code for capturing groups above about handling
1565       THEN. */
1566 
1567       if (rrc == MATCH_THEN)
1568         {
1569         next = ecode + GET(ecode,1);
1570         if (md->start_match_ptr < next &&
1571             (*ecode == OP_ALT || *next == OP_ALT))
1572           rrc = MATCH_NOMATCH;
1573         }
1574 
1575       /* Anything other than NOMATCH causes the entire assertion to fail,
1576       passing back the return code. This includes COMMIT, SKIP, PRUNE and an
1577       uncaptured THEN, which means they take their normal effect. This
1578       consistent approach does not always have exactly the same effect as in
1579       Perl. */
1580 
1581       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1582       ecode += GET(ecode, 1);
1583       }
1584     while (*ecode == OP_ALT);   /* Continue for next alternative */
1585 
1586     /* If we have tried all the alternative branches, the assertion has
1587     failed. If not, we broke out after a match. */
1588 
1589     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1590 
1591     /* If checking an assertion for a condition, return MATCH_MATCH. */
1592 
1593     if (condassert) RRETURN(MATCH_MATCH);
1594 
1595     /* Continue from after a successful assertion, updating the offsets high
1596     water mark, since extracts may have been taken during the assertion. */
1597 
1598     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1599     ecode += 1 + LINK_SIZE;
1600     offset_top = md->end_offset_top;
1601     continue;
1602 
1603     /* Negative assertion: all branches must fail to match for the assertion to
1604     succeed. */
1605 
1606     case OP_ASSERT_NOT:
1607     case OP_ASSERTBACK_NOT:
1608     save_mark = md->mark;
1609     if (md->match_function_type == MATCH_CONDASSERT)
1610       {
1611       condassert = TRUE;
1612       md->match_function_type = 0;
1613       }
1614     else condassert = FALSE;
1615 
1616     /* Loop for each alternative branch. */
1617 
1618     do
1619       {
1620       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1621       md->mark = save_mark;   /* Always restore the mark setting */
1622 
1623       switch(rrc)
1624         {
1625         case MATCH_MATCH:            /* A successful match means */
1626         case MATCH_ACCEPT:           /* the assertion has failed. */
1627         RRETURN(MATCH_NOMATCH);
1628 
1629         case MATCH_NOMATCH:          /* Carry on with next branch */
1630         break;
1631 
1632         /* See comment in the code for capturing groups above about handling
1633         THEN. */
1634 
1635         case MATCH_THEN:
1636         next = ecode + GET(ecode,1);
1637         if (md->start_match_ptr < next &&
1638             (*ecode == OP_ALT || *next == OP_ALT))
1639           {
1640           rrc = MATCH_NOMATCH;
1641           break;
1642           }
1643         /* Otherwise fall through. */
1644 
1645         /* COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole
1646         assertion to fail to match, without considering any more alternatives.
1647         Failing to match means the assertion is true. This is a consistent
1648         approach, but does not always have the same effect as in Perl. */
1649 
1650         case MATCH_COMMIT:
1651         case MATCH_SKIP:
1652         case MATCH_SKIP_ARG:
1653         case MATCH_PRUNE:
1654         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1655         goto NEG_ASSERT_TRUE;   /* Break out of alternation loop */
1656 
1657         /* Anything else is an error */
1658 
1659         default:
1660         RRETURN(rrc);
1661         }
1662 
1663       /* Continue with next branch */
1664 
1665       ecode += GET(ecode,1);
1666       }
1667     while (*ecode == OP_ALT);
1668 
1669     /* All branches in the assertion failed to match. */
1670 
1671     NEG_ASSERT_TRUE:
1672     if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1673     ecode += 1 + LINK_SIZE;                /* Continue with current branch */
1674     continue;
1675 
1676     /* Move the subject pointer back. This occurs only at the start of
1677     each branch of a lookbehind assertion. If we are too close to the start to
1678     move back, this match function fails. When working with UTF-8 we move
1679     back a number of characters, not bytes. */
1680 
1681     case OP_REVERSE:
1682 #ifdef SUPPORT_UTF
1683     if (utf)
1684       {
1685       i = GET(ecode, 1);
1686       while (i-- > 0)
1687         {
1688         eptr--;
1689         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1690         BACKCHAR(eptr);
1691         }
1692       }
1693     else
1694 #endif
1695 
1696     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1697 
1698       {
1699       eptr -= GET(ecode, 1);
1700       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1701       }
1702 
1703     /* Save the earliest consulted character, then skip to next op code */
1704 
1705     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1706     ecode += 1 + LINK_SIZE;
1707     break;
1708 
1709     /* The callout item calls an external function, if one is provided, passing
1710     details of the match so far. This is mainly for debugging, though the
1711     function is able to force a failure. */
1712 
1713     case OP_CALLOUT:
1714     if (PUBL(callout) != NULL)
1715       {
1716       PUBL(callout_block) cb;
1717       cb.version          = 2;   /* Version 1 of the callout block */
1718       cb.callout_number   = ecode[1];
1719       cb.offset_vector    = md->offset_vector;
1720 #if defined COMPILE_PCRE8
1721       cb.subject          = (PCRE_SPTR)md->start_subject;
1722 #elif defined COMPILE_PCRE16
1723       cb.subject          = (PCRE_SPTR16)md->start_subject;
1724 #elif defined COMPILE_PCRE32
1725       cb.subject          = (PCRE_SPTR32)md->start_subject;
1726 #endif
1727       cb.subject_length   = (int)(md->end_subject - md->start_subject);
1728       cb.start_match      = (int)(mstart - md->start_subject);
1729       cb.current_position = (int)(eptr - md->start_subject);
1730       cb.pattern_position = GET(ecode, 2);
1731       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1732       cb.capture_top      = offset_top/2;
1733       cb.capture_last     = md->capture_last & CAPLMASK;
1734       /* Internal change requires this for API compatibility. */
1735       if (cb.capture_last == 0) cb.capture_last = -1;
1736       cb.callout_data     = md->callout_data;
1737       cb.mark             = md->nomatch_mark;
1738       if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1739       if (rrc < 0) RRETURN(rrc);
1740       }
1741     ecode += 2 + 2*LINK_SIZE;
1742     break;
1743 
1744     /* Recursion either matches the current regex, or some subexpression. The
1745     offset data is the offset to the starting bracket from the start of the
1746     whole pattern. (This is so that it works from duplicated subpatterns.)
1747 
1748     The state of the capturing groups is preserved over recursion, and
1749     re-instated afterwards. We don't know how many are started and not yet
1750     finished (offset_top records the completed total) so we just have to save
1751     all the potential data. There may be up to 65535 such values, which is too
1752     large to put on the stack, but using malloc for small numbers seems
1753     expensive. As a compromise, the stack is used when there are no more than
1754     REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
1755 
1756     There are also other values that have to be saved. We use a chained
1757     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1758     for the original version of this logic. It has, however, been hacked around
1759     a lot, so he is not to blame for the current way it works. */
1760 
1761     case OP_RECURSE:
1762       {
1763       recursion_info *ri;
1764       unsigned int recno;
1765 
1766       callpat = md->start_code + GET(ecode, 1);
1767       recno = (callpat == md->start_code)? 0 :
1768         GET2(callpat, 1 + LINK_SIZE);
1769 
1770       /* Check for repeating a recursion without advancing the subject pointer.
1771       This should catch convoluted mutual recursions. (Some simple cases are
1772       caught at compile time.) */
1773 
1774       for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1775         if (recno == ri->group_num && eptr == ri->subject_position)
1776           RRETURN(PCRE_ERROR_RECURSELOOP);
1777 
1778       /* Add to "recursing stack" */
1779 
1780       new_recursive.group_num = recno;
1781       new_recursive.saved_capture_last = md->capture_last;
1782       new_recursive.subject_position = eptr;
1783       new_recursive.prevrec = md->recursive;
1784       md->recursive = &new_recursive;
1785 
1786       /* Where to continue from afterwards */
1787 
1788       ecode += 1 + LINK_SIZE;
1789 
1790       /* Now save the offset data */
1791 
1792       new_recursive.saved_max = md->offset_end;
1793       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1794         new_recursive.offset_save = stacksave;
1795       else
1796         {
1797         new_recursive.offset_save =
1798           (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1799         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1800         }
1801       memcpy(new_recursive.offset_save, md->offset_vector,
1802             new_recursive.saved_max * sizeof(int));
1803 
1804       /* OK, now we can do the recursion. After processing each alternative,
1805       restore the offset data and the last captured value. If there were nested
1806       recursions, md->recursive might be changed, so reset it before looping.
1807       */
1808 
1809       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1810       cbegroup = (*callpat >= OP_SBRA);
1811       do
1812         {
1813         if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1814         RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1815           md, eptrb, RM6);
1816         memcpy(md->offset_vector, new_recursive.offset_save,
1817             new_recursive.saved_max * sizeof(int));
1818         md->capture_last = new_recursive.saved_capture_last;
1819         md->recursive = new_recursive.prevrec;
1820         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1821           {
1822           DPRINTF(("Recursion matched\n"));
1823           if (new_recursive.offset_save != stacksave)
1824             (PUBL(free))(new_recursive.offset_save);
1825 
1826           /* Set where we got to in the subject, and reset the start in case
1827           it was changed by \K. This *is* propagated back out of a recursion,
1828           for Perl compatibility. */
1829 
1830           eptr = md->end_match_ptr;
1831           mstart = md->start_match_ptr;
1832           goto RECURSION_MATCHED;        /* Exit loop; end processing */
1833           }
1834 
1835         /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
1836         recursion; they cause a NOMATCH for the entire recursion. These codes
1837         are defined in a range that can be tested for. */
1838 
1839         if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
1840           {
1841           if (new_recursive.offset_save != stacksave)
1842             (PUBL(free))(new_recursive.offset_save);
1843           RRETURN(MATCH_NOMATCH);
1844           }
1845 
1846         /* Any return code other than NOMATCH is an error. */
1847 
1848         if (rrc != MATCH_NOMATCH)
1849           {
1850           DPRINTF(("Recursion gave error %d\n", rrc));
1851           if (new_recursive.offset_save != stacksave)
1852             (PUBL(free))(new_recursive.offset_save);
1853           RRETURN(rrc);
1854           }
1855 
1856         md->recursive = &new_recursive;
1857         callpat += GET(callpat, 1);
1858         }
1859       while (*callpat == OP_ALT);
1860 
1861       DPRINTF(("Recursion didn't match\n"));
1862       md->recursive = new_recursive.prevrec;
1863       if (new_recursive.offset_save != stacksave)
1864         (PUBL(free))(new_recursive.offset_save);
1865       RRETURN(MATCH_NOMATCH);
1866       }
1867 
1868     RECURSION_MATCHED:
1869     break;
1870 
1871     /* An alternation is the end of a branch; scan along to find the end of the
1872     bracketed group and go to there. */
1873 
1874     case OP_ALT:
1875     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1876     break;
1877 
1878     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1879     indicating that it may occur zero times. It may repeat infinitely, or not
1880     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1881     with fixed upper repeat limits are compiled as a number of copies, with the
1882     optional ones preceded by BRAZERO or BRAMINZERO. */
1883 
1884     case OP_BRAZERO:
1885     next = ecode + 1;
1886     RMATCH(eptr, next, offset_top, md, eptrb, RM10);
1887     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1888     do next += GET(next, 1); while (*next == OP_ALT);
1889     ecode = next + 1 + LINK_SIZE;
1890     break;
1891 
1892     case OP_BRAMINZERO:
1893     next = ecode + 1;
1894     do next += GET(next, 1); while (*next == OP_ALT);
1895     RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
1896     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1897     ecode++;
1898     break;
1899 
1900     case OP_SKIPZERO:
1901     next = ecode+1;
1902     do next += GET(next,1); while (*next == OP_ALT);
1903     ecode = next + 1 + LINK_SIZE;
1904     break;
1905 
1906     /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
1907     here; just jump to the group, with allow_zero set TRUE. */
1908 
1909     case OP_BRAPOSZERO:
1910     op = *(++ecode);
1911     allow_zero = TRUE;
1912     if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
1913       goto POSSESSIVE_NON_CAPTURE;
1914 
1915     /* End of a group, repeated or non-repeating. */
1916 
1917     case OP_KET:
1918     case OP_KETRMIN:
1919     case OP_KETRMAX:
1920     case OP_KETRPOS:
1921     prev = ecode - GET(ecode, 1);
1922 
1923     /* If this was a group that remembered the subject start, in order to break
1924     infinite repeats of empty string matches, retrieve the subject start from
1925     the chain. Otherwise, set it NULL. */
1926 
1927     if (*prev >= OP_SBRA || *prev == OP_ONCE)
1928       {
1929       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1930       eptrb = eptrb->epb_prev;              /* Backup to previous group */
1931       }
1932     else saved_eptr = NULL;
1933 
1934     /* If we are at the end of an assertion group or a non-capturing atomic
1935     group, stop matching and return MATCH_MATCH, but record the current high
1936     water mark for use by positive assertions. We also need to record the match
1937     start in case it was changed by \K. */
1938 
1939     if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
1940          *prev == OP_ONCE_NC)
1941       {
1942       md->end_match_ptr = eptr;      /* For ONCE_NC */
1943       md->end_offset_top = offset_top;
1944       md->start_match_ptr = mstart;
1945       RRETURN(MATCH_MATCH);         /* Sets md->mark */
1946       }
1947 
1948     /* For capturing groups we have to check the group number back at the start
1949     and if necessary complete handling an extraction by setting the offsets and
1950     bumping the high water mark. Whole-pattern recursion is coded as a recurse
1951     into group 0, so it won't be picked up here. Instead, we catch it when the
1952     OP_END is reached. Other recursion is handled here. We just have to record
1953     the current subject position and start match pointer and give a MATCH
1954     return. */
1955 
1956     if (*prev == OP_CBRA || *prev == OP_SCBRA ||
1957         *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
1958       {
1959       number = GET2(prev, 1+LINK_SIZE);
1960       offset = number << 1;
1961 
1962 #ifdef PCRE_DEBUG
1963       printf("end bracket %d", number);
1964       printf("\n");
1965 #endif
1966 
1967       /* Handle a recursively called group. */
1968 
1969       if (md->recursive != NULL && md->recursive->group_num == number)
1970         {
1971         md->end_match_ptr = eptr;
1972         md->start_match_ptr = mstart;
1973         RRETURN(MATCH_MATCH);
1974         }
1975 
1976       /* Deal with capturing */
1977 
1978       md->capture_last = (md->capture_last & OVFLMASK) | number;
1979       if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1980         {
1981         /* If offset is greater than offset_top, it means that we are
1982         "skipping" a capturing group, and that group's offsets must be marked
1983         unset. In earlier versions of PCRE, all the offsets were unset at the
1984         start of matching, but this doesn't work because atomic groups and
1985         assertions can cause a value to be set that should later be unset.
1986         Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
1987         part of the atomic group, but this is not on the final matching path,
1988         so must be unset when 2 is set. (If there is no group 2, there is no
1989         problem, because offset_top will then be 2, indicating no capture.) */
1990 
1991         if (offset > offset_top)
1992           {
1993           register int *iptr = md->offset_vector + offset_top;
1994           register int *iend = md->offset_vector + offset;
1995           while (iptr < iend) *iptr++ = -1;
1996           }
1997 
1998         /* Now make the extraction */
1999 
2000         md->offset_vector[offset] =
2001           md->offset_vector[md->offset_end - number];
2002         md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
2003         if (offset_top <= offset) offset_top = offset + 2;
2004         }
2005       }
2006 
2007     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
2008     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
2009     at a time from the outer level, thus saving stack. This must precede the
2010     empty string test - in this case that test is done at the outer level. */
2011 
2012     if (*ecode == OP_KETRPOS)
2013       {
2014       md->start_match_ptr = mstart;    /* In case \K reset it */
2015       md->end_match_ptr = eptr;
2016       md->end_offset_top = offset_top;
2017       RRETURN(MATCH_KETRPOS);
2018       }
2019 
2020     /* For an ordinary non-repeating ket, just continue at this level. This
2021     also happens for a repeating ket if no characters were matched in the
2022     group. This is the forcible breaking of infinite loops as implemented in
2023     Perl 5.005. For a non-repeating atomic group that includes captures,
2024     establish a backup point by processing the rest of the pattern at a lower
2025     level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
2026     original OP_ONCE level, thereby bypassing intermediate backup points, but
2027     resetting any captures that happened along the way. */
2028 
2029     if (*ecode == OP_KET || eptr == saved_eptr)
2030       {
2031       if (*prev == OP_ONCE)
2032         {
2033         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
2034         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2035         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
2036         RRETURN(MATCH_ONCE);
2037         }
2038       ecode += 1 + LINK_SIZE;    /* Carry on at this level */
2039       break;
2040       }
2041 
2042     /* The normal repeating kets try the rest of the pattern or restart from
2043     the preceding bracket, in the appropriate order. In the second case, we can
2044     use tail recursion to avoid using another stack frame, unless we have an
2045     an atomic group or an unlimited repeat of a group that can match an empty
2046     string. */
2047 
2048     if (*ecode == OP_KETRMIN)
2049       {
2050       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
2051       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2052       if (*prev == OP_ONCE)
2053         {
2054         RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
2055         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2056         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
2057         RRETURN(MATCH_ONCE);
2058         }
2059       if (*prev >= OP_SBRA)    /* Could match an empty string */
2060         {
2061         RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
2062         RRETURN(rrc);
2063         }
2064       ecode = prev;
2065       goto TAIL_RECURSE;
2066       }
2067     else  /* OP_KETRMAX */
2068       {
2069       RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
2070       if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
2071       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2072       if (*prev == OP_ONCE)
2073         {
2074         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
2075         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2076         md->once_target = prev;
2077         RRETURN(MATCH_ONCE);
2078         }
2079       ecode += 1 + LINK_SIZE;
2080       goto TAIL_RECURSE;
2081       }
2082     /* Control never gets here */
2083 
2084     /* Not multiline mode: start of subject assertion, unless notbol. */
2085 
2086     case OP_CIRC:
2087     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2088 
2089     /* Fall through. Start of subject assertion */
2090 
2091     case OP_SOD:
2092     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
2093     ecode++;
2094     break;
2095 
2096     /* Multiline mode: start of subject unless notbol, or after any newline. */
2097 
2098     case OP_CIRCM:
2099     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2100     if (eptr != md->start_subject &&
2101         (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
2102       RRETURN(MATCH_NOMATCH);
2103     ecode++;
2104     break;
2105 
2106     /* Start of match assertion */
2107 
2108     case OP_SOM:
2109     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
2110     ecode++;
2111     break;
2112 
2113     /* Reset the start of match point */
2114 
2115     case OP_SET_SOM:
2116     mstart = eptr;
2117     ecode++;
2118     break;
2119 
2120     /* Multiline mode: assert before any newline, or before end of subject
2121     unless noteol is set. */
2122 
2123     case OP_DOLLM:
2124     if (eptr < md->end_subject)
2125       {
2126       if (!IS_NEWLINE(eptr))
2127         {
2128         if (md->partial != 0 &&
2129             eptr + 1 >= md->end_subject &&
2130             NLBLOCK->nltype == NLTYPE_FIXED &&
2131             NLBLOCK->nllen == 2 &&
2132             UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2133           {
2134           md->hitend = TRUE;
2135           if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2136           }
2137         RRETURN(MATCH_NOMATCH);
2138         }
2139       }
2140     else
2141       {
2142       if (md->noteol) RRETURN(MATCH_NOMATCH);
2143       SCHECK_PARTIAL();
2144       }
2145     ecode++;
2146     break;
2147 
2148     /* Not multiline mode: assert before a terminating newline or before end of
2149     subject unless noteol is set. */
2150 
2151     case OP_DOLL:
2152     if (md->noteol) RRETURN(MATCH_NOMATCH);
2153     if (!md->endonly) goto ASSERT_NL_OR_EOS;
2154 
2155     /* ... else fall through for endonly */
2156 
2157     /* End of subject assertion (\z) */
2158 
2159     case OP_EOD:
2160     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
2161     SCHECK_PARTIAL();
2162     ecode++;
2163     break;
2164 
2165     /* End of subject or ending \n assertion (\Z) */
2166 
2167     case OP_EODN:
2168     ASSERT_NL_OR_EOS:
2169     if (eptr < md->end_subject &&
2170         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2171       {
2172       if (md->partial != 0 &&
2173           eptr + 1 >= md->end_subject &&
2174           NLBLOCK->nltype == NLTYPE_FIXED &&
2175           NLBLOCK->nllen == 2 &&
2176           UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2177         {
2178         md->hitend = TRUE;
2179         if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2180         }
2181       RRETURN(MATCH_NOMATCH);
2182       }
2183 
2184     /* Either at end of string or \n before end. */
2185 
2186     SCHECK_PARTIAL();
2187     ecode++;
2188     break;
2189 
2190     /* Word boundary assertions */
2191 
2192     case OP_NOT_WORD_BOUNDARY:
2193     case OP_WORD_BOUNDARY:
2194       {
2195 
2196       /* Find out if the previous and current characters are "word" characters.
2197       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
2198       be "non-word" characters. Remember the earliest consulted character for
2199       partial matching. */
2200 
2201 #ifdef SUPPORT_UTF
2202       if (utf)
2203         {
2204         /* Get status of previous character */
2205 
2206         if (eptr == md->start_subject) prev_is_word = FALSE; else
2207           {
2208           PCRE_PUCHAR lastptr = eptr - 1;
2209           BACKCHAR(lastptr);
2210           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2211           GETCHAR(c, lastptr);
2212 #ifdef SUPPORT_UCP
2213           if (md->use_ucp)
2214             {
2215             if (c == '_') prev_is_word = TRUE; else
2216               {
2217               int cat = UCD_CATEGORY(c);
2218               prev_is_word = (cat == ucp_L || cat == ucp_N);
2219               }
2220             }
2221           else
2222 #endif
2223           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2224           }
2225 
2226         /* Get status of next character */
2227 
2228         if (eptr >= md->end_subject)
2229           {
2230           SCHECK_PARTIAL();
2231           cur_is_word = FALSE;
2232           }
2233         else
2234           {
2235           GETCHAR(c, eptr);
2236 #ifdef SUPPORT_UCP
2237           if (md->use_ucp)
2238             {
2239             if (c == '_') cur_is_word = TRUE; else
2240               {
2241               int cat = UCD_CATEGORY(c);
2242               cur_is_word = (cat == ucp_L || cat == ucp_N);
2243               }
2244             }
2245           else
2246 #endif
2247           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2248           }
2249         }
2250       else
2251 #endif
2252 
2253       /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
2254       consistency with the behaviour of \w we do use it in this case. */
2255 
2256         {
2257         /* Get status of previous character */
2258 
2259         if (eptr == md->start_subject) prev_is_word = FALSE; else
2260           {
2261           if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
2262 #ifdef SUPPORT_UCP
2263           if (md->use_ucp)
2264             {
2265             c = eptr[-1];
2266             if (c == '_') prev_is_word = TRUE; else
2267               {
2268               int cat = UCD_CATEGORY(c);
2269               prev_is_word = (cat == ucp_L || cat == ucp_N);
2270               }
2271             }
2272           else
2273 #endif
2274           prev_is_word = MAX_255(eptr[-1])
2275             && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2276           }
2277 
2278         /* Get status of next character */
2279 
2280         if (eptr >= md->end_subject)
2281           {
2282           SCHECK_PARTIAL();
2283           cur_is_word = FALSE;
2284           }
2285         else
2286 #ifdef SUPPORT_UCP
2287         if (md->use_ucp)
2288           {
2289           c = *eptr;
2290           if (c == '_') cur_is_word = TRUE; else
2291             {
2292             int cat = UCD_CATEGORY(c);
2293             cur_is_word = (cat == ucp_L || cat == ucp_N);
2294             }
2295           }
2296         else
2297 #endif
2298         cur_is_word = MAX_255(*eptr)
2299           && ((md->ctypes[*eptr] & ctype_word) != 0);
2300         }
2301 
2302       /* Now see if the situation is what we want */
2303 
2304       if ((*ecode++ == OP_WORD_BOUNDARY)?
2305            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2306         RRETURN(MATCH_NOMATCH);
2307       }
2308     break;
2309 
2310     /* Match any single character type except newline; have to take care with
2311     CRLF newlines and partial matching. */
2312 
2313     case OP_ANY:
2314     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2315     if (md->partial != 0 &&
2316         eptr == md->end_subject - 1 &&
2317         NLBLOCK->nltype == NLTYPE_FIXED &&
2318         NLBLOCK->nllen == 2 &&
2319         UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2320       {
2321       md->hitend = TRUE;
2322       if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2323       }
2324 
2325     /* Fall through */
2326 
2327     /* Match any single character whatsoever. */
2328 
2329     case OP_ALLANY:
2330     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2331       {                            /* not be updated before SCHECK_PARTIAL. */
2332       SCHECK_PARTIAL();
2333       RRETURN(MATCH_NOMATCH);
2334       }
2335     eptr++;
2336 #ifdef SUPPORT_UTF
2337     if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2338 #endif
2339     ecode++;
2340     break;
2341 
2342     /* Match a single byte, even in UTF-8 mode. This opcode really does match
2343     any byte, even newline, independent of the setting of PCRE_DOTALL. */
2344 
2345     case OP_ANYBYTE:
2346     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2347       {                            /* not be updated before SCHECK_PARTIAL. */
2348       SCHECK_PARTIAL();
2349       RRETURN(MATCH_NOMATCH);
2350       }
2351     eptr++;
2352     ecode++;
2353     break;
2354 
2355     case OP_NOT_DIGIT:
2356     if (eptr >= md->end_subject)
2357       {
2358       SCHECK_PARTIAL();
2359       RRETURN(MATCH_NOMATCH);
2360       }
2361     GETCHARINCTEST(c, eptr);
2362     if (
2363 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2364        c < 256 &&
2365 #endif
2366        (md->ctypes[c] & ctype_digit) != 0
2367        )
2368       RRETURN(MATCH_NOMATCH);
2369     ecode++;
2370     break;
2371 
2372     case OP_DIGIT:
2373     if (eptr >= md->end_subject)
2374       {
2375       SCHECK_PARTIAL();
2376       RRETURN(MATCH_NOMATCH);
2377       }
2378     GETCHARINCTEST(c, eptr);
2379     if (
2380 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2381        c > 255 ||
2382 #endif
2383        (md->ctypes[c] & ctype_digit) == 0
2384        )
2385       RRETURN(MATCH_NOMATCH);
2386     ecode++;
2387     break;
2388 
2389     case OP_NOT_WHITESPACE:
2390     if (eptr >= md->end_subject)
2391       {
2392       SCHECK_PARTIAL();
2393       RRETURN(MATCH_NOMATCH);
2394       }
2395     GETCHARINCTEST(c, eptr);
2396     if (
2397 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2398        c < 256 &&
2399 #endif
2400        (md->ctypes[c] & ctype_space) != 0
2401        )
2402       RRETURN(MATCH_NOMATCH);
2403     ecode++;
2404     break;
2405 
2406     case OP_WHITESPACE:
2407     if (eptr >= md->end_subject)
2408       {
2409       SCHECK_PARTIAL();
2410       RRETURN(MATCH_NOMATCH);
2411       }
2412     GETCHARINCTEST(c, eptr);
2413     if (
2414 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2415        c > 255 ||
2416 #endif
2417        (md->ctypes[c] & ctype_space) == 0
2418        )
2419       RRETURN(MATCH_NOMATCH);
2420     ecode++;
2421     break;
2422 
2423     case OP_NOT_WORDCHAR:
2424     if (eptr >= md->end_subject)
2425       {
2426       SCHECK_PARTIAL();
2427       RRETURN(MATCH_NOMATCH);
2428       }
2429     GETCHARINCTEST(c, eptr);
2430     if (
2431 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2432        c < 256 &&
2433 #endif
2434        (md->ctypes[c] & ctype_word) != 0
2435        )
2436       RRETURN(MATCH_NOMATCH);
2437     ecode++;
2438     break;
2439 
2440     case OP_WORDCHAR:
2441     if (eptr >= md->end_subject)
2442       {
2443       SCHECK_PARTIAL();
2444       RRETURN(MATCH_NOMATCH);
2445       }
2446     GETCHARINCTEST(c, eptr);
2447     if (
2448 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2449        c > 255 ||
2450 #endif
2451        (md->ctypes[c] & ctype_word) == 0
2452        )
2453       RRETURN(MATCH_NOMATCH);
2454     ecode++;
2455     break;
2456 
2457     case OP_ANYNL:
2458     if (eptr >= md->end_subject)
2459       {
2460       SCHECK_PARTIAL();
2461       RRETURN(MATCH_NOMATCH);
2462       }
2463     GETCHARINCTEST(c, eptr);
2464     switch(c)
2465       {
2466       default: RRETURN(MATCH_NOMATCH);
2467 
2468       case CHAR_CR:
2469       if (eptr >= md->end_subject)
2470         {
2471         SCHECK_PARTIAL();
2472         }
2473       else if (UCHAR21TEST(eptr) == CHAR_LF) eptr++;
2474       break;
2475 
2476       case CHAR_LF:
2477       break;
2478 
2479       case CHAR_VT:
2480       case CHAR_FF:
2481       case CHAR_NEL:
2482 #ifndef EBCDIC
2483       case 0x2028:
2484       case 0x2029:
2485 #endif  /* Not EBCDIC */
2486       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2487       break;
2488       }
2489     ecode++;
2490     break;
2491 
2492     case OP_NOT_HSPACE:
2493     if (eptr >= md->end_subject)
2494       {
2495       SCHECK_PARTIAL();
2496       RRETURN(MATCH_NOMATCH);
2497       }
2498     GETCHARINCTEST(c, eptr);
2499     switch(c)
2500       {
2501       HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2502       default: break;
2503       }
2504     ecode++;
2505     break;
2506 
2507     case OP_HSPACE:
2508     if (eptr >= md->end_subject)
2509       {
2510       SCHECK_PARTIAL();
2511       RRETURN(MATCH_NOMATCH);
2512       }
2513     GETCHARINCTEST(c, eptr);
2514     switch(c)
2515       {
2516       HSPACE_CASES: break;  /* Byte and multibyte cases */
2517       default: RRETURN(MATCH_NOMATCH);
2518       }
2519     ecode++;
2520     break;
2521 
2522     case OP_NOT_VSPACE:
2523     if (eptr >= md->end_subject)
2524       {
2525       SCHECK_PARTIAL();
2526       RRETURN(MATCH_NOMATCH);
2527       }
2528     GETCHARINCTEST(c, eptr);
2529     switch(c)
2530       {
2531       VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2532       default: break;
2533       }
2534     ecode++;
2535     break;
2536 
2537     case OP_VSPACE:
2538     if (eptr >= md->end_subject)
2539       {
2540       SCHECK_PARTIAL();
2541       RRETURN(MATCH_NOMATCH);
2542       }
2543     GETCHARINCTEST(c, eptr);
2544     switch(c)
2545       {
2546       VSPACE_CASES: break;
2547       default: RRETURN(MATCH_NOMATCH);
2548       }
2549     ecode++;
2550     break;
2551 
2552 #ifdef SUPPORT_UCP
2553     /* Check the next character by Unicode property. We will get here only
2554     if the support is in the binary; otherwise a compile-time error occurs. */
2555 
2556     case OP_PROP:
2557     case OP_NOTPROP:
2558     if (eptr >= md->end_subject)
2559       {
2560       SCHECK_PARTIAL();
2561       RRETURN(MATCH_NOMATCH);
2562       }
2563     GETCHARINCTEST(c, eptr);
2564       {
2565       const pcre_uint32 *cp;
2566       const ucd_record *prop = GET_UCD(c);
2567 
2568       switch(ecode[1])
2569         {
2570         case PT_ANY:
2571         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2572         break;
2573 
2574         case PT_LAMP:
2575         if ((prop->chartype == ucp_Lu ||
2576              prop->chartype == ucp_Ll ||
2577              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2578           RRETURN(MATCH_NOMATCH);
2579         break;
2580 
2581         case PT_GC:
2582         if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2583           RRETURN(MATCH_NOMATCH);
2584         break;
2585 
2586         case PT_PC:
2587         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2588           RRETURN(MATCH_NOMATCH);
2589         break;
2590 
2591         case PT_SC:
2592         if ((ecode[2] != prop->script) == (op == OP_PROP))
2593           RRETURN(MATCH_NOMATCH);
2594         break;
2595 
2596         /* These are specials */
2597 
2598         case PT_ALNUM:
2599         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2600              PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2601           RRETURN(MATCH_NOMATCH);
2602         break;
2603 
2604         /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2605         which means that Perl space and POSIX space are now identical. PCRE
2606         was changed at release 8.34. */
2607 
2608         case PT_SPACE:    /* Perl space */
2609         case PT_PXSPACE:  /* POSIX space */
2610         switch(c)
2611           {
2612           HSPACE_CASES:
2613           VSPACE_CASES:
2614           if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2615           break;
2616 
2617           default:
2618           if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
2619             (op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
2620           break;
2621           }
2622         break;
2623 
2624         case PT_WORD:
2625         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2626              PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2627              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2628           RRETURN(MATCH_NOMATCH);
2629         break;
2630 
2631         case PT_CLIST:
2632         cp = PRIV(ucd_caseless_sets) + ecode[2];
2633         for (;;)
2634           {
2635           if (c < *cp)
2636             { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
2637           if (c == *cp++)
2638             { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
2639           }
2640         break;
2641 
2642         case PT_UCNC:
2643         if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
2644              c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
2645              c >= 0xe000) == (op == OP_NOTPROP))
2646           RRETURN(MATCH_NOMATCH);
2647         break;
2648 
2649         /* This should never occur */
2650 
2651         default:
2652         RRETURN(PCRE_ERROR_INTERNAL);
2653         }
2654 
2655       ecode += 3;
2656       }
2657     break;
2658 
2659     /* Match an extended Unicode sequence. We will get here only if the support
2660     is in the binary; otherwise a compile-time error occurs. */
2661 
2662     case OP_EXTUNI:
2663     if (eptr >= md->end_subject)
2664       {
2665       SCHECK_PARTIAL();
2666       RRETURN(MATCH_NOMATCH);
2667       }
2668     else
2669       {
2670       int lgb, rgb;
2671       GETCHARINCTEST(c, eptr);
2672       lgb = UCD_GRAPHBREAK(c);
2673       while (eptr < md->end_subject)
2674         {
2675         int len = 1;
2676         if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2677         rgb = UCD_GRAPHBREAK(c);
2678         if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2679         lgb = rgb;
2680         eptr += len;
2681         }
2682       }
2683     CHECK_PARTIAL();
2684     ecode++;
2685     break;
2686 #endif  /* SUPPORT_UCP */
2687 
2688 
2689     /* Match a back reference, possibly repeatedly. Look past the end of the
2690     item to see if there is repeat information following. The code is similar
2691     to that for character classes, but repeated for efficiency. Then obey
2692     similar code to character type repeats - written out again for speed.
2693     However, if the referenced string is the empty string, always treat
2694     it as matched, any number of times (otherwise there could be infinite
2695     loops). If the reference is unset, there are two possibilities:
2696 
2697     (a) In the default, Perl-compatible state, set the length negative;
2698     this ensures that every attempt at a match fails. We can't just fail
2699     here, because of the possibility of quantifiers with zero minima.
2700 
2701     (b) If the JavaScript compatibility flag is set, set the length to zero
2702     so that the back reference matches an empty string.
2703 
2704     Otherwise, set the length to the length of what was matched by the
2705     referenced subpattern.
2706 
2707     The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
2708     or to a non-duplicated named group. For a duplicated named group, OP_DNREF
2709     and OP_DNREFI are used. In this case we must scan the list of groups to
2710     which the name refers, and use the first one that is set. */
2711 
2712     case OP_DNREF:
2713     case OP_DNREFI:
2714     caseless = op == OP_DNREFI;
2715       {
2716       int count = GET2(ecode, 1+IMM2_SIZE);
2717       pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
2718       ecode += 1 + 2*IMM2_SIZE;
2719 
2720       /* Setting the default length first and initializing 'offset' avoids
2721       compiler warnings in the REF_REPEAT code. */
2722 
2723       length = (md->jscript_compat)? 0 : -1;
2724       offset = 0;
2725 
2726       while (count-- > 0)
2727         {
2728         offset = GET2(slot, 0) << 1;
2729         if (offset < offset_top && md->offset_vector[offset] >= 0)
2730           {
2731           length = md->offset_vector[offset+1] - md->offset_vector[offset];
2732           break;
2733           }
2734         slot += md->name_entry_size;
2735         }
2736       }
2737     goto REF_REPEAT;
2738 
2739     case OP_REF:
2740     case OP_REFI:
2741     caseless = op == OP_REFI;
2742     offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2743     ecode += 1 + IMM2_SIZE;
2744     if (offset >= offset_top || md->offset_vector[offset] < 0)
2745       length = (md->jscript_compat)? 0 : -1;
2746     else
2747       length = md->offset_vector[offset+1] - md->offset_vector[offset];
2748 
2749     /* Set up for repetition, or handle the non-repeated case */
2750 
2751     REF_REPEAT:
2752     switch (*ecode)
2753       {
2754       case OP_CRSTAR:
2755       case OP_CRMINSTAR:
2756       case OP_CRPLUS:
2757       case OP_CRMINPLUS:
2758       case OP_CRQUERY:
2759       case OP_CRMINQUERY:
2760       c = *ecode++ - OP_CRSTAR;
2761       minimize = (c & 1) != 0;
2762       min = rep_min[c];                 /* Pick up values from tables; */
2763       max = rep_max[c];                 /* zero for max => infinity */
2764       if (max == 0) max = INT_MAX;
2765       break;
2766 
2767       case OP_CRRANGE:
2768       case OP_CRMINRANGE:
2769       minimize = (*ecode == OP_CRMINRANGE);
2770       min = GET2(ecode, 1);
2771       max = GET2(ecode, 1 + IMM2_SIZE);
2772       if (max == 0) max = INT_MAX;
2773       ecode += 1 + 2 * IMM2_SIZE;
2774       break;
2775 
2776       default:               /* No repeat follows */
2777       if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2778         {
2779         if (length == -2) eptr = md->end_subject;   /* Partial match */
2780         CHECK_PARTIAL();
2781         RRETURN(MATCH_NOMATCH);
2782         }
2783       eptr += length;
2784       continue;              /* With the main loop */
2785       }
2786 
2787     /* Handle repeated back references. If the length of the reference is
2788     zero, just continue with the main loop. If the length is negative, it
2789     means the reference is unset in non-Java-compatible mode. If the minimum is
2790     zero, we can continue at the same level without recursion. For any other
2791     minimum, carrying on will result in NOMATCH. */
2792 
2793     if (length == 0) continue;
2794     if (length < 0 && min == 0) continue;
2795 
2796     /* First, ensure the minimum number of matches are present. We get back
2797     the length of the reference string explicitly rather than passing the
2798     address of eptr, so that eptr can be a register variable. */
2799 
2800     for (i = 1; i <= min; i++)
2801       {
2802       int slength;
2803       if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2804         {
2805         if (slength == -2) eptr = md->end_subject;   /* Partial match */
2806         CHECK_PARTIAL();
2807         RRETURN(MATCH_NOMATCH);
2808         }
2809       eptr += slength;
2810       }
2811 
2812     /* If min = max, continue at the same level without recursion.
2813     They are not both allowed to be zero. */
2814 
2815     if (min == max) continue;
2816 
2817     /* If minimizing, keep trying and advancing the pointer */
2818 
2819     if (minimize)
2820       {
2821       for (fi = min;; fi++)
2822         {
2823         int slength;
2824         RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2825         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2826         if (fi >= max) RRETURN(MATCH_NOMATCH);
2827         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2828           {
2829           if (slength == -2) eptr = md->end_subject;   /* Partial match */
2830           CHECK_PARTIAL();
2831           RRETURN(MATCH_NOMATCH);
2832           }
2833         eptr += slength;
2834         }
2835       /* Control never gets here */
2836       }
2837 
2838     /* If maximizing, find the longest string and work backwards */
2839 
2840     else
2841       {
2842       pp = eptr;
2843       for (i = min; i < max; i++)
2844         {
2845         int slength;
2846         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2847           {
2848           /* Can't use CHECK_PARTIAL because we don't want to update eptr in
2849           the soft partial matching case. */
2850 
2851           if (slength == -2 && md->partial != 0 &&
2852               md->end_subject > md->start_used_ptr)
2853             {
2854             md->hitend = TRUE;
2855             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2856             }
2857           break;
2858           }
2859         eptr += slength;
2860         }
2861 
2862       while (eptr >= pp)
2863         {
2864         RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
2865         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2866         eptr -= length;
2867         }
2868       RRETURN(MATCH_NOMATCH);
2869       }
2870     /* Control never gets here */
2871 
2872     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2873     used when all the characters in the class have values in the range 0-255,
2874     and either the matching is caseful, or the characters are in the range
2875     0-127 when UTF-8 processing is enabled. The only difference between
2876     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2877     encountered.
2878 
2879     First, look past the end of the item to see if there is repeat information
2880     following. Then obey similar code to character type repeats - written out
2881     again for speed. */
2882 
2883     case OP_NCLASS:
2884     case OP_CLASS:
2885       {
2886       /* The data variable is saved across frames, so the byte map needs to
2887       be stored there. */
2888 #define BYTE_MAP ((pcre_uint8 *)data)
2889       data = ecode + 1;                /* Save for matching */
2890       ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2891 
2892       switch (*ecode)
2893         {
2894         case OP_CRSTAR:
2895         case OP_CRMINSTAR:
2896         case OP_CRPLUS:
2897         case OP_CRMINPLUS:
2898         case OP_CRQUERY:
2899         case OP_CRMINQUERY:
2900         case OP_CRPOSSTAR:
2901         case OP_CRPOSPLUS:
2902         case OP_CRPOSQUERY:
2903         c = *ecode++ - OP_CRSTAR;
2904         if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
2905         else possessive = TRUE;
2906         min = rep_min[c];                 /* Pick up values from tables; */
2907         max = rep_max[c];                 /* zero for max => infinity */
2908         if (max == 0) max = INT_MAX;
2909         break;
2910 
2911         case OP_CRRANGE:
2912         case OP_CRMINRANGE:
2913         case OP_CRPOSRANGE:
2914         minimize = (*ecode == OP_CRMINRANGE);
2915         possessive = (*ecode == OP_CRPOSRANGE);
2916         min = GET2(ecode, 1);
2917         max = GET2(ecode, 1 + IMM2_SIZE);
2918         if (max == 0) max = INT_MAX;
2919         ecode += 1 + 2 * IMM2_SIZE;
2920         break;
2921 
2922         default:               /* No repeat follows */
2923         min = max = 1;
2924         break;
2925         }
2926 
2927       /* First, ensure the minimum number of matches are present. */
2928 
2929 #ifdef SUPPORT_UTF
2930       if (utf)
2931         {
2932         for (i = 1; i <= min; i++)
2933           {
2934           if (eptr >= md->end_subject)
2935             {
2936             SCHECK_PARTIAL();
2937             RRETURN(MATCH_NOMATCH);
2938             }
2939           GETCHARINC(c, eptr);
2940           if (c > 255)
2941             {
2942             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2943             }
2944           else
2945             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2946           }
2947         }
2948       else
2949 #endif
2950       /* Not UTF mode */
2951         {
2952         for (i = 1; i <= min; i++)
2953           {
2954           if (eptr >= md->end_subject)
2955             {
2956             SCHECK_PARTIAL();
2957             RRETURN(MATCH_NOMATCH);
2958             }
2959           c = *eptr++;
2960 #ifndef COMPILE_PCRE8
2961           if (c > 255)
2962             {
2963             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2964             }
2965           else
2966 #endif
2967             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2968           }
2969         }
2970 
2971       /* If max == min we can continue with the main loop without the
2972       need to recurse. */
2973 
2974       if (min == max) continue;
2975 
2976       /* If minimizing, keep testing the rest of the expression and advancing
2977       the pointer while it matches the class. */
2978 
2979       if (minimize)
2980         {
2981 #ifdef SUPPORT_UTF
2982         if (utf)
2983           {
2984           for (fi = min;; fi++)
2985             {
2986             RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
2987             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2988             if (fi >= max) RRETURN(MATCH_NOMATCH);
2989             if (eptr >= md->end_subject)
2990               {
2991               SCHECK_PARTIAL();
2992               RRETURN(MATCH_NOMATCH);
2993               }
2994             GETCHARINC(c, eptr);
2995             if (c > 255)
2996               {
2997               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2998               }
2999             else
3000               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
3001             }
3002           }
3003         else
3004 #endif
3005         /* Not UTF mode */
3006           {
3007           for (fi = min;; fi++)
3008             {
3009             RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
3010             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3011             if (fi >= max) RRETURN(MATCH_NOMATCH);
3012             if (eptr >= md->end_subject)
3013               {
3014               SCHECK_PARTIAL();
3015               RRETURN(MATCH_NOMATCH);
3016               }
3017             c = *eptr++;
3018 #ifndef COMPILE_PCRE8
3019             if (c > 255)
3020               {
3021               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
3022               }
3023             else
3024 #endif
3025               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
3026             }
3027           }
3028         /* Control never gets here */
3029         }
3030 
3031       /* If maximizing, find the longest possible run, then work backwards. */
3032 
3033       else
3034         {
3035         pp = eptr;
3036 
3037 #ifdef SUPPORT_UTF
3038         if (utf)
3039           {
3040           for (i = min; i < max; i++)
3041             {
3042             int len = 1;
3043             if (eptr >= md->end_subject)
3044               {
3045               SCHECK_PARTIAL();
3046               break;
3047               }
3048             GETCHARLEN(c, eptr, len);
3049             if (c > 255)
3050               {
3051               if (op == OP_CLASS) break;
3052               }
3053             else
3054               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3055             eptr += len;
3056             }
3057 
3058           if (possessive) continue;    /* No backtracking */
3059 
3060           for (;;)
3061             {
3062             RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
3063             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3064             if (eptr-- <= pp) break;        /* Stop if tried at original pos */
3065             BACKCHAR(eptr);
3066             }
3067           }
3068         else
3069 #endif
3070           /* Not UTF mode */
3071           {
3072           for (i = min; i < max; i++)
3073             {
3074             if (eptr >= md->end_subject)
3075               {
3076               SCHECK_PARTIAL();
3077               break;
3078               }
3079             c = *eptr;
3080 #ifndef COMPILE_PCRE8
3081             if (c > 255)
3082               {
3083               if (op == OP_CLASS) break;
3084               }
3085             else
3086 #endif
3087               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3088             eptr++;
3089             }
3090 
3091           if (possessive) continue;    /* No backtracking */
3092 
3093           while (eptr >= pp)
3094             {
3095             RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
3096             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3097             eptr--;
3098             }
3099           }
3100 
3101         RRETURN(MATCH_NOMATCH);
3102         }
3103 #undef BYTE_MAP
3104       }
3105     /* Control never gets here */
3106 
3107 
3108     /* Match an extended character class. In the 8-bit library, this opcode is
3109     encountered only when UTF-8 mode mode is supported. In the 16-bit and
3110     32-bit libraries, codepoints greater than 255 may be encountered even when
3111     UTF is not supported. */
3112 
3113 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3114     case OP_XCLASS:
3115       {
3116       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
3117       ecode += GET(ecode, 1);                      /* Advance past the item */
3118 
3119       switch (*ecode)
3120         {
3121         case OP_CRSTAR:
3122         case OP_CRMINSTAR:
3123         case OP_CRPLUS:
3124         case OP_CRMINPLUS:
3125         case OP_CRQUERY:
3126         case OP_CRMINQUERY:
3127         case OP_CRPOSSTAR:
3128         case OP_CRPOSPLUS:
3129         case OP_CRPOSQUERY:
3130         c = *ecode++ - OP_CRSTAR;
3131         if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
3132         else possessive = TRUE;
3133         min = rep_min[c];                 /* Pick up values from tables; */
3134         max = rep_max[c];                 /* zero for max => infinity */
3135         if (max == 0) max = INT_MAX;
3136         break;
3137 
3138         case OP_CRRANGE:
3139         case OP_CRMINRANGE:
3140         case OP_CRPOSRANGE:
3141         minimize = (*ecode == OP_CRMINRANGE);
3142         possessive = (*ecode == OP_CRPOSRANGE);
3143         min = GET2(ecode, 1);
3144         max = GET2(ecode, 1 + IMM2_SIZE);
3145         if (max == 0) max = INT_MAX;
3146         ecode += 1 + 2 * IMM2_SIZE;
3147         break;
3148 
3149         default:               /* No repeat follows */
3150         min = max = 1;
3151         break;
3152         }
3153 
3154       /* First, ensure the minimum number of matches are present. */
3155 
3156       for (i = 1; i <= min; i++)
3157         {
3158         if (eptr >= md->end_subject)
3159           {
3160           SCHECK_PARTIAL();
3161           RRETURN(MATCH_NOMATCH);
3162           }
3163         GETCHARINCTEST(c, eptr);
3164         if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3165         }
3166 
3167       /* If max == min we can continue with the main loop without the
3168       need to recurse. */
3169 
3170       if (min == max) continue;
3171 
3172       /* If minimizing, keep testing the rest of the expression and advancing
3173       the pointer while it matches the class. */
3174 
3175       if (minimize)
3176         {
3177         for (fi = min;; fi++)
3178           {
3179           RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
3180           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3181           if (fi >= max) RRETURN(MATCH_NOMATCH);
3182           if (eptr >= md->end_subject)
3183             {
3184             SCHECK_PARTIAL();
3185             RRETURN(MATCH_NOMATCH);
3186             }
3187           GETCHARINCTEST(c, eptr);
3188           if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3189           }
3190         /* Control never gets here */
3191         }
3192 
3193       /* If maximizing, find the longest possible run, then work backwards. */
3194 
3195       else
3196         {
3197         pp = eptr;
3198         for (i = min; i < max; i++)
3199           {
3200           int len = 1;
3201           if (eptr >= md->end_subject)
3202             {
3203             SCHECK_PARTIAL();
3204             break;
3205             }
3206 #ifdef SUPPORT_UTF
3207           GETCHARLENTEST(c, eptr, len);
3208 #else
3209           c = *eptr;
3210 #endif
3211           if (!PRIV(xclass)(c, data, utf)) break;
3212           eptr += len;
3213           }
3214 
3215         if (possessive) continue;    /* No backtracking */
3216 
3217         for(;;)
3218           {
3219           RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3220           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3221           if (eptr-- <= pp) break;        /* Stop if tried at original pos */
3222 #ifdef SUPPORT_UTF
3223           if (utf) BACKCHAR(eptr);
3224 #endif
3225           }
3226         RRETURN(MATCH_NOMATCH);
3227         }
3228 
3229       /* Control never gets here */
3230       }
3231 #endif    /* End of XCLASS */
3232 
3233     /* Match a single character, casefully */
3234 
3235     case OP_CHAR:
3236 #ifdef SUPPORT_UTF
3237     if (utf)
3238       {
3239       length = 1;
3240       ecode++;
3241       GETCHARLEN(fc, ecode, length);
3242       if (length > md->end_subject - eptr)
3243         {
3244         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
3245         RRETURN(MATCH_NOMATCH);
3246         }
3247       while (length-- > 0) if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH);
3248       }
3249     else
3250 #endif
3251     /* Not UTF mode */
3252       {
3253       if (md->end_subject - eptr < 1)
3254         {
3255         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
3256         RRETURN(MATCH_NOMATCH);
3257         }
3258       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
3259       ecode += 2;
3260       }
3261     break;
3262 
3263     /* Match a single character, caselessly. If we are at the end of the
3264     subject, give up immediately. */
3265 
3266     case OP_CHARI:
3267     if (eptr >= md->end_subject)
3268       {
3269       SCHECK_PARTIAL();
3270       RRETURN(MATCH_NOMATCH);
3271       }
3272 
3273 #ifdef SUPPORT_UTF
3274     if (utf)
3275       {
3276       length = 1;
3277       ecode++;
3278       GETCHARLEN(fc, ecode, length);
3279 
3280       /* If the pattern character's value is < 128, we have only one byte, and
3281       we know that its other case must also be one byte long, so we can use the
3282       fast lookup table. We know that there is at least one byte left in the
3283       subject. */
3284 
3285       if (fc < 128)
3286         {
3287         pcre_uint32 cc = UCHAR21(eptr);
3288         if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
3289         ecode++;
3290         eptr++;
3291         }
3292 
3293       /* Otherwise we must pick up the subject character. Note that we cannot
3294       use the value of "length" to check for sufficient bytes left, because the
3295       other case of the character may have more or fewer bytes.  */
3296 
3297       else
3298         {
3299         pcre_uint32 dc;
3300         GETCHARINC(dc, eptr);
3301         ecode += length;
3302 
3303         /* If we have Unicode property support, we can use it to test the other
3304         case of the character, if there is one. */
3305 
3306         if (fc != dc)
3307           {
3308 #ifdef SUPPORT_UCP
3309           if (dc != UCD_OTHERCASE(fc))
3310 #endif
3311             RRETURN(MATCH_NOMATCH);
3312           }
3313         }
3314       }
3315     else
3316 #endif   /* SUPPORT_UTF */
3317 
3318     /* Not UTF mode */
3319       {
3320       if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3321           != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3322       eptr++;
3323       ecode += 2;
3324       }
3325     break;
3326 
3327     /* Match a single character repeatedly. */
3328 
3329     case OP_EXACT:
3330     case OP_EXACTI:
3331     min = max = GET2(ecode, 1);
3332     ecode += 1 + IMM2_SIZE;
3333     goto REPEATCHAR;
3334 
3335     case OP_POSUPTO:
3336     case OP_POSUPTOI:
3337     possessive = TRUE;
3338     /* Fall through */
3339 
3340     case OP_UPTO:
3341     case OP_UPTOI:
3342     case OP_MINUPTO:
3343     case OP_MINUPTOI:
3344     min = 0;
3345     max = GET2(ecode, 1);
3346     minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3347     ecode += 1 + IMM2_SIZE;
3348     goto REPEATCHAR;
3349 
3350     case OP_POSSTAR:
3351     case OP_POSSTARI:
3352     possessive = TRUE;
3353     min = 0;
3354     max = INT_MAX;
3355     ecode++;
3356     goto REPEATCHAR;
3357 
3358     case OP_POSPLUS:
3359     case OP_POSPLUSI:
3360     possessive = TRUE;
3361     min = 1;
3362     max = INT_MAX;
3363     ecode++;
3364     goto REPEATCHAR;
3365 
3366     case OP_POSQUERY:
3367     case OP_POSQUERYI:
3368     possessive = TRUE;
3369     min = 0;
3370     max = 1;
3371     ecode++;
3372     goto REPEATCHAR;
3373 
3374     case OP_STAR:
3375     case OP_STARI:
3376     case OP_MINSTAR:
3377     case OP_MINSTARI:
3378     case OP_PLUS:
3379     case OP_PLUSI:
3380     case OP_MINPLUS:
3381     case OP_MINPLUSI:
3382     case OP_QUERY:
3383     case OP_QUERYI:
3384     case OP_MINQUERY:
3385     case OP_MINQUERYI:
3386     c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
3387     minimize = (c & 1) != 0;
3388     min = rep_min[c];                 /* Pick up values from tables; */
3389     max = rep_max[c];                 /* zero for max => infinity */
3390     if (max == 0) max = INT_MAX;
3391 
3392     /* Common code for all repeated single-character matches. We first check
3393     for the minimum number of characters. If the minimum equals the maximum, we
3394     are done. Otherwise, if minimizing, check the rest of the pattern for a
3395     match; if there isn't one, advance up to the maximum, one character at a
3396     time.
3397 
3398     If maximizing, advance up to the maximum number of matching characters,
3399     until eptr is past the end of the maximum run. If possessive, we are
3400     then done (no backing up). Otherwise, match at this position; anything
3401     other than no match is immediately returned. For nomatch, back up one
3402     character, unless we are matching \R and the last thing matched was
3403     \r\n, in which case, back up two bytes. When we reach the first optional
3404     character position, we can save stack by doing a tail recurse.
3405 
3406     The various UTF/non-UTF and caseful/caseless cases are handled separately,
3407     for speed. */
3408 
3409     REPEATCHAR:
3410 #ifdef SUPPORT_UTF
3411     if (utf)
3412       {
3413       length = 1;
3414       charptr = ecode;
3415       GETCHARLEN(fc, ecode, length);
3416       ecode += length;
3417 
3418       /* Handle multibyte character matching specially here. There is
3419       support for caseless matching if UCP support is present. */
3420 
3421       if (length > 1)
3422         {
3423 #ifdef SUPPORT_UCP
3424         pcre_uint32 othercase;
3425         if (op >= OP_STARI &&     /* Caseless */
3426             (othercase = UCD_OTHERCASE(fc)) != fc)
3427           oclength = PRIV(ord2utf)(othercase, occhars);
3428         else oclength = 0;
3429 #endif  /* SUPPORT_UCP */
3430 
3431         for (i = 1; i <= min; i++)
3432           {
3433           if (eptr <= md->end_subject - length &&
3434             memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3435 #ifdef SUPPORT_UCP
3436           else if (oclength > 0 &&
3437                    eptr <= md->end_subject - oclength &&
3438                    memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3439 #endif  /* SUPPORT_UCP */
3440           else
3441             {
3442             CHECK_PARTIAL();
3443             RRETURN(MATCH_NOMATCH);
3444             }
3445           }
3446 
3447         if (min == max) continue;
3448 
3449         if (minimize)
3450           {
3451           for (fi = min;; fi++)
3452             {
3453             RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3454             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3455             if (fi >= max) RRETURN(MATCH_NOMATCH);
3456             if (eptr <= md->end_subject - length &&
3457               memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3458 #ifdef SUPPORT_UCP
3459             else if (oclength > 0 &&
3460                      eptr <= md->end_subject - oclength &&
3461                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3462 #endif  /* SUPPORT_UCP */
3463             else
3464               {
3465               CHECK_PARTIAL();
3466               RRETURN(MATCH_NOMATCH);
3467               }
3468             }
3469           /* Control never gets here */
3470           }
3471 
3472         else  /* Maximize */
3473           {
3474           pp = eptr;
3475           for (i = min; i < max; i++)
3476             {
3477             if (eptr <= md->end_subject - length &&
3478                 memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3479 #ifdef SUPPORT_UCP
3480             else if (oclength > 0 &&
3481                      eptr <= md->end_subject - oclength &&
3482                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3483 #endif  /* SUPPORT_UCP */
3484             else
3485               {
3486               CHECK_PARTIAL();
3487               break;
3488               }
3489             }
3490 
3491           if (possessive) continue;    /* No backtracking */
3492           for(;;)
3493             {
3494             if (eptr <= pp) goto TAIL_RECURSE;
3495             RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3496             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3497 #ifdef SUPPORT_UCP
3498             eptr--;
3499             BACKCHAR(eptr);
3500 #else   /* without SUPPORT_UCP */
3501             eptr -= length;
3502 #endif  /* SUPPORT_UCP */
3503             }
3504           }
3505         /* Control never gets here */
3506         }
3507 
3508       /* If the length of a UTF-8 character is 1, we fall through here, and
3509       obey the code as for non-UTF-8 characters below, though in this case the
3510       value of fc will always be < 128. */
3511       }
3512     else
3513 #endif  /* SUPPORT_UTF */
3514       /* When not in UTF-8 mode, load a single-byte character. */
3515       fc = *ecode++;
3516 
3517     /* The value of fc at this point is always one character, though we may
3518     or may not be in UTF mode. The code is duplicated for the caseless and
3519     caseful cases, for speed, since matching characters is likely to be quite
3520     common. First, ensure the minimum number of matches are present. If min =
3521     max, continue at the same level without recursing. Otherwise, if
3522     minimizing, keep trying the rest of the expression and advancing one
3523     matching character if failing, up to the maximum. Alternatively, if
3524     maximizing, find the maximum number of characters and work backwards. */
3525 
3526     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3527       max, (char *)eptr));
3528 
3529     if (op >= OP_STARI)  /* Caseless */
3530       {
3531 #ifdef COMPILE_PCRE8
3532       /* fc must be < 128 if UTF is enabled. */
3533       foc = md->fcc[fc];
3534 #else
3535 #ifdef SUPPORT_UTF
3536 #ifdef SUPPORT_UCP
3537       if (utf && fc > 127)
3538         foc = UCD_OTHERCASE(fc);
3539 #else
3540       if (utf && fc > 127)
3541         foc = fc;
3542 #endif /* SUPPORT_UCP */
3543       else
3544 #endif /* SUPPORT_UTF */
3545         foc = TABLE_GET(fc, md->fcc, fc);
3546 #endif /* COMPILE_PCRE8 */
3547 
3548       for (i = 1; i <= min; i++)
3549         {
3550         pcre_uint32 cc;                 /* Faster than pcre_uchar */
3551         if (eptr >= md->end_subject)
3552           {
3553           SCHECK_PARTIAL();
3554           RRETURN(MATCH_NOMATCH);
3555           }
3556         cc = UCHAR21TEST(eptr);
3557         if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3558         eptr++;
3559         }
3560       if (min == max) continue;
3561       if (minimize)
3562         {
3563         for (fi = min;; fi++)
3564           {
3565           pcre_uint32 cc;               /* Faster than pcre_uchar */
3566           RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3567           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3568           if (fi >= max) RRETURN(MATCH_NOMATCH);
3569           if (eptr >= md->end_subject)
3570             {
3571             SCHECK_PARTIAL();
3572             RRETURN(MATCH_NOMATCH);
3573             }
3574           cc = UCHAR21TEST(eptr);
3575           if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3576           eptr++;
3577           }
3578         /* Control never gets here */
3579         }
3580       else  /* Maximize */
3581         {
3582         pp = eptr;
3583         for (i = min; i < max; i++)
3584           {
3585           pcre_uint32 cc;               /* Faster than pcre_uchar */
3586           if (eptr >= md->end_subject)
3587             {
3588             SCHECK_PARTIAL();
3589             break;
3590             }
3591           cc = UCHAR21TEST(eptr);
3592           if (fc != cc && foc != cc) break;
3593           eptr++;
3594           }
3595         if (possessive) continue;       /* No backtracking */
3596         for (;;)
3597           {
3598           if (eptr == pp) goto TAIL_RECURSE;
3599           RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
3600           eptr--;
3601           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3602           }
3603         /* Control never gets here */
3604         }
3605       }
3606 
3607     /* Caseful comparisons (includes all multi-byte characters) */
3608 
3609     else
3610       {
3611       for (i = 1; i <= min; i++)
3612         {
3613         if (eptr >= md->end_subject)
3614           {
3615           SCHECK_PARTIAL();
3616           RRETURN(MATCH_NOMATCH);
3617           }
3618         if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3619         }
3620 
3621       if (min == max) continue;
3622 
3623       if (minimize)
3624         {
3625         for (fi = min;; fi++)
3626           {
3627           RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3628           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3629           if (fi >= max) RRETURN(MATCH_NOMATCH);
3630           if (eptr >= md->end_subject)
3631             {
3632             SCHECK_PARTIAL();
3633             RRETURN(MATCH_NOMATCH);
3634             }
3635           if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3636           }
3637         /* Control never gets here */
3638         }
3639       else  /* Maximize */
3640         {
3641         pp = eptr;
3642         for (i = min; i < max; i++)
3643           {
3644           if (eptr >= md->end_subject)
3645             {
3646             SCHECK_PARTIAL();
3647             break;
3648             }
3649           if (fc != UCHAR21TEST(eptr)) break;
3650           eptr++;
3651           }
3652         if (possessive) continue;    /* No backtracking */
3653         for (;;)
3654           {
3655           if (eptr == pp) goto TAIL_RECURSE;
3656           RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
3657           eptr--;
3658           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3659           }
3660         /* Control never gets here */
3661         }
3662       }
3663     /* Control never gets here */
3664 
3665     /* Match a negated single one-byte character. The character we are
3666     checking can be multibyte. */
3667 
3668     case OP_NOT:
3669     case OP_NOTI:
3670     if (eptr >= md->end_subject)
3671       {
3672       SCHECK_PARTIAL();
3673       RRETURN(MATCH_NOMATCH);
3674       }
3675 #ifdef SUPPORT_UTF
3676     if (utf)
3677       {
3678       register pcre_uint32 ch, och;
3679 
3680       ecode++;
3681       GETCHARINC(ch, ecode);
3682       GETCHARINC(c, eptr);
3683 
3684       if (op == OP_NOT)
3685         {
3686         if (ch == c) RRETURN(MATCH_NOMATCH);
3687         }
3688       else
3689         {
3690 #ifdef SUPPORT_UCP
3691         if (ch > 127)
3692           och = UCD_OTHERCASE(ch);
3693 #else
3694         if (ch > 127)
3695           och = ch;
3696 #endif /* SUPPORT_UCP */
3697         else
3698           och = TABLE_GET(ch, md->fcc, ch);
3699         if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3700         }
3701       }
3702     else
3703 #endif
3704       {
3705       register pcre_uint32 ch = ecode[1];
3706       c = *eptr++;
3707       if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
3708         RRETURN(MATCH_NOMATCH);
3709       ecode += 2;
3710       }
3711     break;
3712 
3713     /* Match a negated single one-byte character repeatedly. This is almost a
3714     repeat of the code for a repeated single character, but I haven't found a
3715     nice way of commoning these up that doesn't require a test of the
3716     positive/negative option for each character match. Maybe that wouldn't add
3717     very much to the time taken, but character matching *is* what this is all
3718     about... */
3719 
3720     case OP_NOTEXACT:
3721     case OP_NOTEXACTI:
3722     min = max = GET2(ecode, 1);
3723     ecode += 1 + IMM2_SIZE;
3724     goto REPEATNOTCHAR;
3725 
3726     case OP_NOTUPTO:
3727     case OP_NOTUPTOI:
3728     case OP_NOTMINUPTO:
3729     case OP_NOTMINUPTOI:
3730     min = 0;
3731     max = GET2(ecode, 1);
3732     minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3733     ecode += 1 + IMM2_SIZE;
3734     goto REPEATNOTCHAR;
3735 
3736     case OP_NOTPOSSTAR:
3737     case OP_NOTPOSSTARI:
3738     possessive = TRUE;
3739     min = 0;
3740     max = INT_MAX;
3741     ecode++;
3742     goto REPEATNOTCHAR;
3743 
3744     case OP_NOTPOSPLUS:
3745     case OP_NOTPOSPLUSI:
3746     possessive = TRUE;
3747     min = 1;
3748     max = INT_MAX;
3749     ecode++;
3750     goto REPEATNOTCHAR;
3751 
3752     case OP_NOTPOSQUERY:
3753     case OP_NOTPOSQUERYI:
3754     possessive = TRUE;
3755     min = 0;
3756     max = 1;
3757     ecode++;
3758     goto REPEATNOTCHAR;
3759 
3760     case OP_NOTPOSUPTO:
3761     case OP_NOTPOSUPTOI:
3762     possessive = TRUE;
3763     min = 0;
3764     max = GET2(ecode, 1);
3765     ecode += 1 + IMM2_SIZE;
3766     goto REPEATNOTCHAR;
3767 
3768     case OP_NOTSTAR:
3769     case OP_NOTSTARI:
3770     case OP_NOTMINSTAR:
3771     case OP_NOTMINSTARI:
3772     case OP_NOTPLUS:
3773     case OP_NOTPLUSI:
3774     case OP_NOTMINPLUS:
3775     case OP_NOTMINPLUSI:
3776     case OP_NOTQUERY:
3777     case OP_NOTQUERYI:
3778     case OP_NOTMINQUERY:
3779     case OP_NOTMINQUERYI:
3780     c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
3781     minimize = (c & 1) != 0;
3782     min = rep_min[c];                 /* Pick up values from tables; */
3783     max = rep_max[c];                 /* zero for max => infinity */
3784     if (max == 0) max = INT_MAX;
3785 
3786     /* Common code for all repeated single-byte matches. */
3787 
3788     REPEATNOTCHAR:
3789     GETCHARINCTEST(fc, ecode);
3790 
3791     /* The code is duplicated for the caseless and caseful cases, for speed,
3792     since matching characters is likely to be quite common. First, ensure the
3793     minimum number of matches are present. If min = max, continue at the same
3794     level without recursing. Otherwise, if minimizing, keep trying the rest of
3795     the expression and advancing one matching character if failing, up to the
3796     maximum. Alternatively, if maximizing, find the maximum number of
3797     characters and work backwards. */
3798 
3799     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3800       max, (char *)eptr));
3801 
3802     if (op >= OP_NOTSTARI)     /* Caseless */
3803       {
3804 #ifdef SUPPORT_UTF
3805 #ifdef SUPPORT_UCP
3806       if (utf && fc > 127)
3807         foc = UCD_OTHERCASE(fc);
3808 #else
3809       if (utf && fc > 127)
3810         foc = fc;
3811 #endif /* SUPPORT_UCP */
3812       else
3813 #endif /* SUPPORT_UTF */
3814         foc = TABLE_GET(fc, md->fcc, fc);
3815 
3816 #ifdef SUPPORT_UTF
3817       if (utf)
3818         {
3819         register pcre_uint32 d;
3820         for (i = 1; i <= min; i++)
3821           {
3822           if (eptr >= md->end_subject)
3823             {
3824             SCHECK_PARTIAL();
3825             RRETURN(MATCH_NOMATCH);
3826             }
3827           GETCHARINC(d, eptr);
3828           if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3829           }
3830         }
3831       else
3832 #endif  /* SUPPORT_UTF */
3833       /* Not UTF mode */
3834         {
3835         for (i = 1; i <= min; i++)
3836           {
3837           if (eptr >= md->end_subject)
3838             {
3839             SCHECK_PARTIAL();
3840             RRETURN(MATCH_NOMATCH);
3841             }
3842           if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3843           eptr++;
3844           }
3845         }
3846 
3847       if (min == max) continue;
3848 
3849       if (minimize)
3850         {
3851 #ifdef SUPPORT_UTF
3852         if (utf)
3853           {
3854           register pcre_uint32 d;
3855           for (fi = min;; fi++)
3856             {
3857             RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3858             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3859             if (fi >= max) RRETURN(MATCH_NOMATCH);
3860             if (eptr >= md->end_subject)
3861               {
3862               SCHECK_PARTIAL();
3863               RRETURN(MATCH_NOMATCH);
3864               }
3865             GETCHARINC(d, eptr);
3866             if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3867             }
3868           }
3869         else
3870 #endif  /*SUPPORT_UTF */
3871         /* Not UTF mode */
3872           {
3873           for (fi = min;; fi++)
3874             {
3875             RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3876             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3877             if (fi >= max) RRETURN(MATCH_NOMATCH);
3878             if (eptr >= md->end_subject)
3879               {
3880               SCHECK_PARTIAL();
3881               RRETURN(MATCH_NOMATCH);
3882               }
3883             if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3884             eptr++;
3885             }
3886           }
3887         /* Control never gets here */
3888         }
3889 
3890       /* Maximize case */
3891 
3892       else
3893         {
3894         pp = eptr;
3895 
3896 #ifdef SUPPORT_UTF
3897         if (utf)
3898           {
3899           register pcre_uint32 d;
3900           for (i = min; i < max; i++)
3901             {
3902             int len = 1;
3903             if (eptr >= md->end_subject)
3904               {
3905               SCHECK_PARTIAL();
3906               break;
3907               }
3908             GETCHARLEN(d, eptr, len);
3909             if (fc == d || (unsigned int)foc == d) break;
3910             eptr += len;
3911             }
3912           if (possessive) continue;    /* No backtracking */
3913           for(;;)
3914             {
3915             if (eptr <= pp) goto TAIL_RECURSE;
3916             RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3917             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3918             eptr--;
3919             BACKCHAR(eptr);
3920             }
3921           }
3922         else
3923 #endif  /* SUPPORT_UTF */
3924         /* Not UTF mode */
3925           {
3926           for (i = min; i < max; i++)
3927             {
3928             if (eptr >= md->end_subject)
3929               {
3930               SCHECK_PARTIAL();
3931               break;
3932               }
3933             if (fc == *eptr || foc == *eptr) break;
3934             eptr++;
3935             }
3936           if (possessive) continue;    /* No backtracking */
3937           for (;;)
3938             {
3939             if (eptr == pp) goto TAIL_RECURSE;
3940             RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
3941             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3942             eptr--;
3943             }
3944           }
3945         /* Control never gets here */
3946         }
3947       }
3948 
3949     /* Caseful comparisons */
3950 
3951     else
3952       {
3953 #ifdef SUPPORT_UTF
3954       if (utf)
3955         {
3956         register pcre_uint32 d;
3957         for (i = 1; i <= min; i++)
3958           {
3959           if (eptr >= md->end_subject)
3960             {
3961             SCHECK_PARTIAL();
3962             RRETURN(MATCH_NOMATCH);
3963             }
3964           GETCHARINC(d, eptr);
3965           if (fc == d) RRETURN(MATCH_NOMATCH);
3966           }
3967         }
3968       else
3969 #endif
3970       /* Not UTF mode */
3971         {
3972         for (i = 1; i <= min; i++)
3973           {
3974           if (eptr >= md->end_subject)
3975             {
3976             SCHECK_PARTIAL();
3977             RRETURN(MATCH_NOMATCH);
3978             }
3979           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3980           }
3981         }
3982 
3983       if (min == max) continue;
3984 
3985       if (minimize)
3986         {
3987 #ifdef SUPPORT_UTF
3988         if (utf)
3989           {
3990           register pcre_uint32 d;
3991           for (fi = min;; fi++)
3992             {
3993             RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
3994             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3995             if (fi >= max) RRETURN(MATCH_NOMATCH);
3996             if (eptr >= md->end_subject)
3997               {
3998               SCHECK_PARTIAL();
3999               RRETURN(MATCH_NOMATCH);
4000               }
4001             GETCHARINC(d, eptr);
4002             if (fc == d) RRETURN(MATCH_NOMATCH);
4003             }
4004           }
4005         else
4006 #endif
4007         /* Not UTF mode */
4008           {
4009           for (fi = min;; fi++)
4010             {
4011             RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
4012             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4013             if (fi >= max) RRETURN(MATCH_NOMATCH);
4014             if (eptr >= md->end_subject)
4015               {
4016               SCHECK_PARTIAL();
4017               RRETURN(MATCH_NOMATCH);
4018               }
4019             if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
4020             }
4021           }
4022         /* Control never gets here */
4023         }
4024 
4025       /* Maximize case */
4026 
4027       else
4028         {
4029         pp = eptr;
4030 
4031 #ifdef SUPPORT_UTF
4032         if (utf)
4033           {
4034           register pcre_uint32 d;
4035           for (i = min; i < max; i++)
4036             {
4037             int len = 1;
4038             if (eptr >= md->end_subject)
4039               {
4040               SCHECK_PARTIAL();
4041               break;
4042               }
4043             GETCHARLEN(d, eptr, len);
4044             if (fc == d) break;
4045             eptr += len;
4046             }
4047           if (possessive) continue;    /* No backtracking */
4048           for(;;)
4049             {
4050             if (eptr <= pp) goto TAIL_RECURSE;
4051             RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
4052             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4053             eptr--;
4054             BACKCHAR(eptr);
4055             }
4056           }
4057         else
4058 #endif
4059         /* Not UTF mode */
4060           {
4061           for (i = min; i < max; i++)
4062             {
4063             if (eptr >= md->end_subject)
4064               {
4065               SCHECK_PARTIAL();
4066               break;
4067               }
4068             if (fc == *eptr) break;
4069             eptr++;
4070             }
4071           if (possessive) continue;    /* No backtracking */
4072           for (;;)
4073             {
4074             if (eptr == pp) goto TAIL_RECURSE;
4075             RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
4076             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4077             eptr--;
4078             }
4079           }
4080         /* Control never gets here */
4081         }
4082       }
4083     /* Control never gets here */
4084 
4085     /* Match a single character type repeatedly; several different opcodes
4086     share code. This is very similar to the code for single characters, but we
4087     repeat it in the interests of efficiency. */
4088 
4089     case OP_TYPEEXACT:
4090     min = max = GET2(ecode, 1);
4091     minimize = TRUE;
4092     ecode += 1 + IMM2_SIZE;
4093     goto REPEATTYPE;
4094 
4095     case OP_TYPEUPTO:
4096     case OP_TYPEMINUPTO:
4097     min = 0;
4098     max = GET2(ecode, 1);
4099     minimize = *ecode == OP_TYPEMINUPTO;
4100     ecode += 1 + IMM2_SIZE;
4101     goto REPEATTYPE;
4102 
4103     case OP_TYPEPOSSTAR:
4104     possessive = TRUE;
4105     min = 0;
4106     max = INT_MAX;
4107     ecode++;
4108     goto REPEATTYPE;
4109 
4110     case OP_TYPEPOSPLUS:
4111     possessive = TRUE;
4112     min = 1;
4113     max = INT_MAX;
4114     ecode++;
4115     goto REPEATTYPE;
4116 
4117     case OP_TYPEPOSQUERY:
4118     possessive = TRUE;
4119     min = 0;
4120     max = 1;
4121     ecode++;
4122     goto REPEATTYPE;
4123 
4124     case OP_TYPEPOSUPTO:
4125     possessive = TRUE;
4126     min = 0;
4127     max = GET2(ecode, 1);
4128     ecode += 1 + IMM2_SIZE;
4129     goto REPEATTYPE;
4130 
4131     case OP_TYPESTAR:
4132     case OP_TYPEMINSTAR:
4133     case OP_TYPEPLUS:
4134     case OP_TYPEMINPLUS:
4135     case OP_TYPEQUERY:
4136     case OP_TYPEMINQUERY:
4137     c = *ecode++ - OP_TYPESTAR;
4138     minimize = (c & 1) != 0;
4139     min = rep_min[c];                 /* Pick up values from tables; */
4140     max = rep_max[c];                 /* zero for max => infinity */
4141     if (max == 0) max = INT_MAX;
4142 
4143     /* Common code for all repeated single character type matches. Note that
4144     in UTF-8 mode, '.' matches a character of any length, but for the other
4145     character types, the valid characters are all one-byte long. */
4146 
4147     REPEATTYPE:
4148     ctype = *ecode++;      /* Code for the character type */
4149 
4150 #ifdef SUPPORT_UCP
4151     if (ctype == OP_PROP || ctype == OP_NOTPROP)
4152       {
4153       prop_fail_result = ctype == OP_NOTPROP;
4154       prop_type = *ecode++;
4155       prop_value = *ecode++;
4156       }
4157     else prop_type = -1;
4158 #endif
4159 
4160     /* First, ensure the minimum number of matches are present. Use inline
4161     code for maximizing the speed, and do the type test once at the start
4162     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
4163     is tidier. Also separate the UCP code, which can be the same for both UTF-8
4164     and single-bytes. */
4165 
4166     if (min > 0)
4167       {
4168 #ifdef SUPPORT_UCP
4169       if (prop_type >= 0)
4170         {
4171         switch(prop_type)
4172           {
4173           case PT_ANY:
4174           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4175           for (i = 1; i <= min; i++)
4176             {
4177             if (eptr >= md->end_subject)
4178               {
4179               SCHECK_PARTIAL();
4180               RRETURN(MATCH_NOMATCH);
4181               }
4182             GETCHARINCTEST(c, eptr);
4183             }
4184           break;
4185 
4186           case PT_LAMP:
4187           for (i = 1; i <= min; i++)
4188             {
4189             int chartype;
4190             if (eptr >= md->end_subject)
4191               {
4192               SCHECK_PARTIAL();
4193               RRETURN(MATCH_NOMATCH);
4194               }
4195             GETCHARINCTEST(c, eptr);
4196             chartype = UCD_CHARTYPE(c);
4197             if ((chartype == ucp_Lu ||
4198                  chartype == ucp_Ll ||
4199                  chartype == ucp_Lt) == prop_fail_result)
4200               RRETURN(MATCH_NOMATCH);
4201             }
4202           break;
4203 
4204           case PT_GC:
4205           for (i = 1; i <= min; i++)
4206             {
4207             if (eptr >= md->end_subject)
4208               {
4209               SCHECK_PARTIAL();
4210               RRETURN(MATCH_NOMATCH);
4211               }
4212             GETCHARINCTEST(c, eptr);
4213             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4214               RRETURN(MATCH_NOMATCH);
4215             }
4216           break;
4217 
4218           case PT_PC:
4219           for (i = 1; i <= min; i++)
4220             {
4221             if (eptr >= md->end_subject)
4222               {
4223               SCHECK_PARTIAL();
4224               RRETURN(MATCH_NOMATCH);
4225               }
4226             GETCHARINCTEST(c, eptr);
4227             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4228               RRETURN(MATCH_NOMATCH);
4229             }
4230           break;
4231 
4232           case PT_SC:
4233           for (i = 1; i <= min; i++)
4234             {
4235             if (eptr >= md->end_subject)
4236               {
4237               SCHECK_PARTIAL();
4238               RRETURN(MATCH_NOMATCH);
4239               }
4240             GETCHARINCTEST(c, eptr);
4241             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4242               RRETURN(MATCH_NOMATCH);
4243             }
4244           break;
4245 
4246           case PT_ALNUM:
4247           for (i = 1; i <= min; i++)
4248             {
4249             int category;
4250             if (eptr >= md->end_subject)
4251               {
4252               SCHECK_PARTIAL();
4253               RRETURN(MATCH_NOMATCH);
4254               }
4255             GETCHARINCTEST(c, eptr);
4256             category = UCD_CATEGORY(c);
4257             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4258               RRETURN(MATCH_NOMATCH);
4259             }
4260           break;
4261 
4262           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
4263           which means that Perl space and POSIX space are now identical. PCRE
4264           was changed at release 8.34. */
4265 
4266           case PT_SPACE:    /* Perl space */
4267           case PT_PXSPACE:  /* POSIX space */
4268           for (i = 1; i <= min; i++)
4269             {
4270             if (eptr >= md->end_subject)
4271               {
4272               SCHECK_PARTIAL();
4273               RRETURN(MATCH_NOMATCH);
4274               }
4275             GETCHARINCTEST(c, eptr);
4276             switch(c)
4277               {
4278               HSPACE_CASES:
4279               VSPACE_CASES:
4280               if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4281               break;
4282 
4283               default:
4284               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
4285                 RRETURN(MATCH_NOMATCH);
4286               break;
4287               }
4288             }
4289           break;
4290 
4291           case PT_WORD:
4292           for (i = 1; i <= min; i++)
4293             {
4294             int category;
4295             if (eptr >= md->end_subject)
4296               {
4297               SCHECK_PARTIAL();
4298               RRETURN(MATCH_NOMATCH);
4299               }
4300             GETCHARINCTEST(c, eptr);
4301             category = UCD_CATEGORY(c);
4302             if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
4303                    == prop_fail_result)
4304               RRETURN(MATCH_NOMATCH);
4305             }
4306           break;
4307 
4308           case PT_CLIST:
4309           for (i = 1; i <= min; i++)
4310             {
4311             const pcre_uint32 *cp;
4312             if (eptr >= md->end_subject)
4313               {
4314               SCHECK_PARTIAL();
4315               RRETURN(MATCH_NOMATCH);
4316               }
4317             GETCHARINCTEST(c, eptr);
4318             cp = PRIV(ucd_caseless_sets) + prop_value;
4319             for (;;)
4320               {
4321               if (c < *cp)
4322                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
4323               if (c == *cp++)
4324                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
4325               }
4326             }
4327           break;
4328 
4329           case PT_UCNC:
4330           for (i = 1; i <= min; i++)
4331             {
4332             if (eptr >= md->end_subject)
4333               {
4334               SCHECK_PARTIAL();
4335               RRETURN(MATCH_NOMATCH);
4336               }
4337             GETCHARINCTEST(c, eptr);
4338             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
4339                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
4340                  c >= 0xe000) == prop_fail_result)
4341               RRETURN(MATCH_NOMATCH);
4342             }
4343           break;
4344 
4345           /* This should not occur */
4346 
4347           default:
4348           RRETURN(PCRE_ERROR_INTERNAL);
4349           }
4350         }
4351 
4352       /* Match extended Unicode sequences. We will get here only if the
4353       support is in the binary; otherwise a compile-time error occurs. */
4354 
4355       else if (ctype == OP_EXTUNI)
4356         {
4357         for (i = 1; i <= min; i++)
4358           {
4359           if (eptr >= md->end_subject)
4360             {
4361             SCHECK_PARTIAL();
4362             RRETURN(MATCH_NOMATCH);
4363             }
4364           else
4365             {
4366             int lgb, rgb;
4367             GETCHARINCTEST(c, eptr);
4368             lgb = UCD_GRAPHBREAK(c);
4369            while (eptr < md->end_subject)
4370               {
4371               int len = 1;
4372               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4373               rgb = UCD_GRAPHBREAK(c);
4374               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
4375               lgb = rgb;
4376               eptr += len;
4377               }
4378             }
4379           CHECK_PARTIAL();
4380           }
4381         }
4382 
4383       else
4384 #endif     /* SUPPORT_UCP */
4385 
4386 /* Handle all other cases when the coding is UTF-8 */
4387 
4388 #ifdef SUPPORT_UTF
4389       if (utf) switch(ctype)
4390         {
4391         case OP_ANY:
4392         for (i = 1; i <= min; i++)
4393           {
4394           if (eptr >= md->end_subject)
4395             {
4396             SCHECK_PARTIAL();
4397             RRETURN(MATCH_NOMATCH);
4398             }
4399           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4400           if (md->partial != 0 &&
4401               eptr + 1 >= md->end_subject &&
4402               NLBLOCK->nltype == NLTYPE_FIXED &&
4403               NLBLOCK->nllen == 2 &&
4404               UCHAR21(eptr) == NLBLOCK->nl[0])
4405             {
4406             md->hitend = TRUE;
4407             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4408             }
4409           eptr++;
4410           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4411           }
4412         break;
4413 
4414         case OP_ALLANY:
4415         for (i = 1; i <= min; i++)
4416           {
4417           if (eptr >= md->end_subject)
4418             {
4419             SCHECK_PARTIAL();
4420             RRETURN(MATCH_NOMATCH);
4421             }
4422           eptr++;
4423           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4424           }
4425         break;
4426 
4427         case OP_ANYBYTE:
4428         if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
4429         eptr += min;
4430         break;
4431 
4432         case OP_ANYNL:
4433         for (i = 1; i <= min; i++)
4434           {
4435           if (eptr >= md->end_subject)
4436             {
4437             SCHECK_PARTIAL();
4438             RRETURN(MATCH_NOMATCH);
4439             }
4440           GETCHARINC(c, eptr);
4441           switch(c)
4442             {
4443             default: RRETURN(MATCH_NOMATCH);
4444 
4445             case CHAR_CR:
4446             if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
4447             break;
4448 
4449             case CHAR_LF:
4450             break;
4451 
4452             case CHAR_VT:
4453             case CHAR_FF:
4454             case CHAR_NEL:
4455 #ifndef EBCDIC
4456             case 0x2028:
4457             case 0x2029:
4458 #endif  /* Not EBCDIC */
4459             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4460             break;
4461             }
4462           }
4463         break;
4464 
4465         case OP_NOT_HSPACE:
4466         for (i = 1; i <= min; i++)
4467           {
4468           if (eptr >= md->end_subject)
4469             {
4470             SCHECK_PARTIAL();
4471             RRETURN(MATCH_NOMATCH);
4472             }
4473           GETCHARINC(c, eptr);
4474           switch(c)
4475             {
4476             HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
4477             default: break;
4478             }
4479           }
4480         break;
4481 
4482         case OP_HSPACE:
4483         for (i = 1; i <= min; i++)
4484           {
4485           if (eptr >= md->end_subject)
4486             {
4487             SCHECK_PARTIAL();
4488             RRETURN(MATCH_NOMATCH);
4489             }
4490           GETCHARINC(c, eptr);
4491           switch(c)
4492             {
4493             HSPACE_CASES: break;  /* Byte and multibyte cases */
4494             default: RRETURN(MATCH_NOMATCH);
4495             }
4496           }
4497         break;
4498 
4499         case OP_NOT_VSPACE:
4500         for (i = 1; i <= min; i++)
4501           {
4502           if (eptr >= md->end_subject)
4503             {
4504             SCHECK_PARTIAL();
4505             RRETURN(MATCH_NOMATCH);
4506             }
4507           GETCHARINC(c, eptr);
4508           switch(c)
4509             {
4510             VSPACE_CASES: RRETURN(MATCH_NOMATCH);
4511             default: break;
4512             }
4513           }
4514         break;
4515 
4516         case OP_VSPACE:
4517         for (i = 1; i <= min; i++)
4518           {
4519           if (eptr >= md->end_subject)
4520             {
4521             SCHECK_PARTIAL();
4522             RRETURN(MATCH_NOMATCH);
4523             }
4524           GETCHARINC(c, eptr);
4525           switch(c)
4526             {
4527             VSPACE_CASES: break;
4528             default: RRETURN(MATCH_NOMATCH);
4529             }
4530           }
4531         break;
4532 
4533         case OP_NOT_DIGIT:
4534         for (i = 1; i <= min; i++)
4535           {
4536           if (eptr >= md->end_subject)
4537             {
4538             SCHECK_PARTIAL();
4539             RRETURN(MATCH_NOMATCH);
4540             }
4541           GETCHARINC(c, eptr);
4542           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
4543             RRETURN(MATCH_NOMATCH);
4544           }
4545         break;
4546 
4547         case OP_DIGIT:
4548         for (i = 1; i <= min; i++)
4549           {
4550           pcre_uint32 cc;
4551           if (eptr >= md->end_subject)
4552             {
4553             SCHECK_PARTIAL();
4554             RRETURN(MATCH_NOMATCH);
4555             }
4556           cc = UCHAR21(eptr);
4557           if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
4558             RRETURN(MATCH_NOMATCH);
4559           eptr++;
4560           /* No need to skip more bytes - we know it's a 1-byte character */
4561           }
4562         break;
4563 
4564         case OP_NOT_WHITESPACE:
4565         for (i = 1; i <= min; i++)
4566           {
4567           pcre_uint32 cc;
4568           if (eptr >= md->end_subject)
4569             {
4570             SCHECK_PARTIAL();
4571             RRETURN(MATCH_NOMATCH);
4572             }
4573           cc = UCHAR21(eptr);
4574           if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0)
4575             RRETURN(MATCH_NOMATCH);
4576           eptr++;
4577           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4578           }
4579         break;
4580 
4581         case OP_WHITESPACE:
4582         for (i = 1; i <= min; i++)
4583           {
4584           pcre_uint32 cc;
4585           if (eptr >= md->end_subject)
4586             {
4587             SCHECK_PARTIAL();
4588             RRETURN(MATCH_NOMATCH);
4589             }
4590           cc = UCHAR21(eptr);
4591           if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0)
4592             RRETURN(MATCH_NOMATCH);
4593           eptr++;
4594           /* No need to skip more bytes - we know it's a 1-byte character */
4595           }
4596         break;
4597 
4598         case OP_NOT_WORDCHAR:
4599         for (i = 1; i <= min; i++)
4600           {
4601           pcre_uint32 cc;
4602           if (eptr >= md->end_subject)
4603             {
4604             SCHECK_PARTIAL();
4605             RRETURN(MATCH_NOMATCH);
4606             }
4607           cc = UCHAR21(eptr);
4608           if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0)
4609             RRETURN(MATCH_NOMATCH);
4610           eptr++;
4611           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4612           }
4613         break;
4614 
4615         case OP_WORDCHAR:
4616         for (i = 1; i <= min; i++)
4617           {
4618           pcre_uint32 cc;
4619           if (eptr >= md->end_subject)
4620             {
4621             SCHECK_PARTIAL();
4622             RRETURN(MATCH_NOMATCH);
4623             }
4624           cc = UCHAR21(eptr);
4625           if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0)
4626             RRETURN(MATCH_NOMATCH);
4627           eptr++;
4628           /* No need to skip more bytes - we know it's a 1-byte character */
4629           }
4630         break;
4631 
4632         default:
4633         RRETURN(PCRE_ERROR_INTERNAL);
4634         }  /* End switch(ctype) */
4635 
4636       else
4637 #endif     /* SUPPORT_UTF */
4638 
4639       /* Code for the non-UTF-8 case for minimum matching of operators other
4640       than OP_PROP and OP_NOTPROP. */
4641 
4642       switch(ctype)
4643         {
4644         case OP_ANY:
4645         for (i = 1; i <= min; i++)
4646           {
4647           if (eptr >= md->end_subject)
4648             {
4649             SCHECK_PARTIAL();
4650             RRETURN(MATCH_NOMATCH);
4651             }
4652           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4653           if (md->partial != 0 &&
4654               eptr + 1 >= md->end_subject &&
4655               NLBLOCK->nltype == NLTYPE_FIXED &&
4656               NLBLOCK->nllen == 2 &&
4657               *eptr == NLBLOCK->nl[0])
4658             {
4659             md->hitend = TRUE;
4660             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4661             }
4662           eptr++;
4663           }
4664         break;
4665 
4666         case OP_ALLANY:
4667         if (eptr > md->end_subject - min)
4668           {
4669           SCHECK_PARTIAL();
4670           RRETURN(MATCH_NOMATCH);
4671           }
4672         eptr += min;
4673         break;
4674 
4675         case OP_ANYBYTE:
4676         if (eptr > md->end_subject - min)
4677           {
4678           SCHECK_PARTIAL();
4679           RRETURN(MATCH_NOMATCH);
4680           }
4681         eptr += min;
4682         break;
4683 
4684         case OP_ANYNL:
4685         for (i = 1; i <= min; i++)
4686           {
4687           if (eptr >= md->end_subject)
4688             {
4689             SCHECK_PARTIAL();
4690             RRETURN(MATCH_NOMATCH);
4691             }
4692           switch(*eptr++)
4693             {
4694             default: RRETURN(MATCH_NOMATCH);
4695 
4696             case CHAR_CR:
4697             if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
4698             break;
4699 
4700             case CHAR_LF:
4701             break;
4702 
4703             case CHAR_VT:
4704             case CHAR_FF:
4705             case CHAR_NEL:
4706 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4707             case 0x2028:
4708             case 0x2029:
4709 #endif
4710             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4711             break;
4712             }
4713           }
4714         break;
4715 
4716         case OP_NOT_HSPACE:
4717         for (i = 1; i <= min; i++)
4718           {
4719           if (eptr >= md->end_subject)
4720             {
4721             SCHECK_PARTIAL();
4722             RRETURN(MATCH_NOMATCH);
4723             }
4724           switch(*eptr++)
4725             {
4726             default: break;
4727             HSPACE_BYTE_CASES:
4728 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4729             HSPACE_MULTIBYTE_CASES:
4730 #endif
4731             RRETURN(MATCH_NOMATCH);
4732             }
4733           }
4734         break;
4735 
4736         case OP_HSPACE:
4737         for (i = 1; i <= min; i++)
4738           {
4739           if (eptr >= md->end_subject)
4740             {
4741             SCHECK_PARTIAL();
4742             RRETURN(MATCH_NOMATCH);
4743             }
4744           switch(*eptr++)
4745             {
4746             default: RRETURN(MATCH_NOMATCH);
4747             HSPACE_BYTE_CASES:
4748 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4749             HSPACE_MULTIBYTE_CASES:
4750 #endif
4751             break;
4752             }
4753           }
4754         break;
4755 
4756         case OP_NOT_VSPACE:
4757         for (i = 1; i <= min; i++)
4758           {
4759           if (eptr >= md->end_subject)
4760             {
4761             SCHECK_PARTIAL();
4762             RRETURN(MATCH_NOMATCH);
4763             }
4764           switch(*eptr++)
4765             {
4766             VSPACE_BYTE_CASES:
4767 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4768             VSPACE_MULTIBYTE_CASES:
4769 #endif
4770             RRETURN(MATCH_NOMATCH);
4771             default: break;
4772             }
4773           }
4774         break;
4775 
4776         case OP_VSPACE:
4777         for (i = 1; i <= min; i++)
4778           {
4779           if (eptr >= md->end_subject)
4780             {
4781             SCHECK_PARTIAL();
4782             RRETURN(MATCH_NOMATCH);
4783             }
4784           switch(*eptr++)
4785             {
4786             default: RRETURN(MATCH_NOMATCH);
4787             VSPACE_BYTE_CASES:
4788 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4789             VSPACE_MULTIBYTE_CASES:
4790 #endif
4791             break;
4792             }
4793           }
4794         break;
4795 
4796         case OP_NOT_DIGIT:
4797         for (i = 1; i <= min; i++)
4798           {
4799           if (eptr >= md->end_subject)
4800             {
4801             SCHECK_PARTIAL();
4802             RRETURN(MATCH_NOMATCH);
4803             }
4804           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
4805             RRETURN(MATCH_NOMATCH);
4806           eptr++;
4807           }
4808         break;
4809 
4810         case OP_DIGIT:
4811         for (i = 1; i <= min; i++)
4812           {
4813           if (eptr >= md->end_subject)
4814             {
4815             SCHECK_PARTIAL();
4816             RRETURN(MATCH_NOMATCH);
4817             }
4818           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
4819             RRETURN(MATCH_NOMATCH);
4820           eptr++;
4821           }
4822         break;
4823 
4824         case OP_NOT_WHITESPACE:
4825         for (i = 1; i <= min; i++)
4826           {
4827           if (eptr >= md->end_subject)
4828             {
4829             SCHECK_PARTIAL();
4830             RRETURN(MATCH_NOMATCH);
4831             }
4832           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
4833             RRETURN(MATCH_NOMATCH);
4834           eptr++;
4835           }
4836         break;
4837 
4838         case OP_WHITESPACE:
4839         for (i = 1; i <= min; i++)
4840           {
4841           if (eptr >= md->end_subject)
4842             {
4843             SCHECK_PARTIAL();
4844             RRETURN(MATCH_NOMATCH);
4845             }
4846           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
4847             RRETURN(MATCH_NOMATCH);
4848           eptr++;
4849           }
4850         break;
4851 
4852         case OP_NOT_WORDCHAR:
4853         for (i = 1; i <= min; i++)
4854           {
4855           if (eptr >= md->end_subject)
4856             {
4857             SCHECK_PARTIAL();
4858             RRETURN(MATCH_NOMATCH);
4859             }
4860           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
4861             RRETURN(MATCH_NOMATCH);
4862           eptr++;
4863           }
4864         break;
4865 
4866         case OP_WORDCHAR:
4867         for (i = 1; i <= min; i++)
4868           {
4869           if (eptr >= md->end_subject)
4870             {
4871             SCHECK_PARTIAL();
4872             RRETURN(MATCH_NOMATCH);
4873             }
4874           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
4875             RRETURN(MATCH_NOMATCH);
4876           eptr++;
4877           }
4878         break;
4879 
4880         default:
4881         RRETURN(PCRE_ERROR_INTERNAL);
4882         }
4883       }
4884 
4885     /* If min = max, continue at the same level without recursing */
4886 
4887     if (min == max) continue;
4888 
4889     /* If minimizing, we have to test the rest of the pattern before each
4890     subsequent match. Again, separate the UTF-8 case for speed, and also
4891     separate the UCP cases. */
4892 
4893     if (minimize)
4894       {
4895 #ifdef SUPPORT_UCP
4896       if (prop_type >= 0)
4897         {
4898         switch(prop_type)
4899           {
4900           case PT_ANY:
4901           for (fi = min;; fi++)
4902             {
4903             RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
4904             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4905             if (fi >= max) RRETURN(MATCH_NOMATCH);
4906             if (eptr >= md->end_subject)
4907               {
4908               SCHECK_PARTIAL();
4909               RRETURN(MATCH_NOMATCH);
4910               }
4911             GETCHARINCTEST(c, eptr);
4912             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4913             }
4914           /* Control never gets here */
4915 
4916           case PT_LAMP:
4917           for (fi = min;; fi++)
4918             {
4919             int chartype;
4920             RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
4921             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4922             if (fi >= max) RRETURN(MATCH_NOMATCH);
4923             if (eptr >= md->end_subject)
4924               {
4925               SCHECK_PARTIAL();
4926               RRETURN(MATCH_NOMATCH);
4927               }
4928             GETCHARINCTEST(c, eptr);
4929             chartype = UCD_CHARTYPE(c);
4930             if ((chartype == ucp_Lu ||
4931                  chartype == ucp_Ll ||
4932                  chartype == ucp_Lt) == prop_fail_result)
4933               RRETURN(MATCH_NOMATCH);
4934             }
4935           /* Control never gets here */
4936 
4937           case PT_GC:
4938           for (fi = min;; fi++)
4939             {
4940             RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
4941             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4942             if (fi >= max) RRETURN(MATCH_NOMATCH);
4943             if (eptr >= md->end_subject)
4944               {
4945               SCHECK_PARTIAL();
4946               RRETURN(MATCH_NOMATCH);
4947               }
4948             GETCHARINCTEST(c, eptr);
4949             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4950               RRETURN(MATCH_NOMATCH);
4951             }
4952           /* Control never gets here */
4953 
4954           case PT_PC:
4955           for (fi = min;; fi++)
4956             {
4957             RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
4958             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4959             if (fi >= max) RRETURN(MATCH_NOMATCH);
4960             if (eptr >= md->end_subject)
4961               {
4962               SCHECK_PARTIAL();
4963               RRETURN(MATCH_NOMATCH);
4964               }
4965             GETCHARINCTEST(c, eptr);
4966             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4967               RRETURN(MATCH_NOMATCH);
4968             }
4969           /* Control never gets here */
4970 
4971           case PT_SC:
4972           for (fi = min;; fi++)
4973             {
4974             RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
4975             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4976             if (fi >= max) RRETURN(MATCH_NOMATCH);
4977             if (eptr >= md->end_subject)
4978               {
4979               SCHECK_PARTIAL();
4980               RRETURN(MATCH_NOMATCH);
4981               }
4982             GETCHARINCTEST(c, eptr);
4983             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4984               RRETURN(MATCH_NOMATCH);
4985             }
4986           /* Control never gets here */
4987 
4988           case PT_ALNUM:
4989           for (fi = min;; fi++)
4990             {
4991             int category;
4992             RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
4993             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4994             if (fi >= max) RRETURN(MATCH_NOMATCH);
4995             if (eptr >= md->end_subject)
4996               {
4997               SCHECK_PARTIAL();
4998               RRETURN(MATCH_NOMATCH);
4999               }
5000             GETCHARINCTEST(c, eptr);
5001             category = UCD_CATEGORY(c);
5002             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
5003               RRETURN(MATCH_NOMATCH);
5004             }
5005           /* Control never gets here */
5006 
5007           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
5008           which means that Perl space and POSIX space are now identical. PCRE
5009           was changed at release 8.34. */
5010 
5011           case PT_SPACE:    /* Perl space */
5012           case PT_PXSPACE:  /* POSIX space */
5013           for (fi = min;; fi++)
5014             {
5015             RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
5016             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5017             if (fi >= max) RRETURN(MATCH_NOMATCH);
5018             if (eptr >= md->end_subject)
5019               {
5020               SCHECK_PARTIAL();
5021               RRETURN(MATCH_NOMATCH);
5022               }
5023             GETCHARINCTEST(c, eptr);
5024             switch(c)
5025               {
5026               HSPACE_CASES:
5027               VSPACE_CASES:
5028               if (prop_fail_result) RRETURN(MATCH_NOMATCH);
5029               break;
5030 
5031               default:
5032               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
5033                 RRETURN(MATCH_NOMATCH);
5034               break;
5035               }
5036             }
5037           /* Control never gets here */
5038 
5039           case PT_WORD:
5040           for (fi = min;; fi++)
5041             {
5042             int category;
5043             RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
5044             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5045             if (fi >= max) RRETURN(MATCH_NOMATCH);
5046             if (eptr >= md->end_subject)
5047               {
5048               SCHECK_PARTIAL();
5049               RRETURN(MATCH_NOMATCH);
5050               }
5051             GETCHARINCTEST(c, eptr);
5052             category = UCD_CATEGORY(c);
5053             if ((category == ucp_L ||
5054                  category == ucp_N ||
5055                  c == CHAR_UNDERSCORE)
5056                    == prop_fail_result)
5057               RRETURN(MATCH_NOMATCH);
5058             }
5059           /* Control never gets here */
5060 
5061           case PT_CLIST:
5062           for (fi = min;; fi++)
5063             {
5064             const pcre_uint32 *cp;
5065             RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
5066             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5067             if (fi >= max) RRETURN(MATCH_NOMATCH);
5068             if (eptr >= md->end_subject)
5069               {
5070               SCHECK_PARTIAL();
5071               RRETURN(MATCH_NOMATCH);
5072               }
5073             GETCHARINCTEST(c, eptr);
5074             cp = PRIV(ucd_caseless_sets) + prop_value;
5075             for (;;)
5076               {
5077               if (c < *cp)
5078                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
5079               if (c == *cp++)
5080                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
5081               }
5082             }
5083           /* Control never gets here */
5084 
5085           case PT_UCNC:
5086           for (fi = min;; fi++)
5087             {
5088             RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
5089             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5090             if (fi >= max) RRETURN(MATCH_NOMATCH);
5091             if (eptr >= md->end_subject)
5092               {
5093               SCHECK_PARTIAL();
5094               RRETURN(MATCH_NOMATCH);
5095               }
5096             GETCHARINCTEST(c, eptr);
5097             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5098                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5099                  c >= 0xe000) == prop_fail_result)
5100               RRETURN(MATCH_NOMATCH);
5101             }
5102           /* Control never gets here */
5103 
5104           /* This should never occur */
5105           default:
5106           RRETURN(PCRE_ERROR_INTERNAL);
5107           }
5108         }
5109 
5110       /* Match extended Unicode sequences. We will get here only if the
5111       support is in the binary; otherwise a compile-time error occurs. */
5112 
5113       else if (ctype == OP_EXTUNI)
5114         {
5115         for (fi = min;; fi++)
5116           {
5117           RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
5118           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5119           if (fi >= max) RRETURN(MATCH_NOMATCH);
5120           if (eptr >= md->end_subject)
5121             {
5122             SCHECK_PARTIAL();
5123             RRETURN(MATCH_NOMATCH);
5124             }
5125           else
5126             {
5127             int lgb, rgb;
5128             GETCHARINCTEST(c, eptr);
5129             lgb = UCD_GRAPHBREAK(c);
5130             while (eptr < md->end_subject)
5131               {
5132               int len = 1;
5133               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5134               rgb = UCD_GRAPHBREAK(c);
5135               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5136               lgb = rgb;
5137               eptr += len;
5138               }
5139             }
5140           CHECK_PARTIAL();
5141           }
5142         }
5143       else
5144 #endif     /* SUPPORT_UCP */
5145 
5146 #ifdef SUPPORT_UTF
5147       if (utf)
5148         {
5149         for (fi = min;; fi++)
5150           {
5151           RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
5152           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5153           if (fi >= max) RRETURN(MATCH_NOMATCH);
5154           if (eptr >= md->end_subject)
5155             {
5156             SCHECK_PARTIAL();
5157             RRETURN(MATCH_NOMATCH);
5158             }
5159           if (ctype == OP_ANY && IS_NEWLINE(eptr))
5160             RRETURN(MATCH_NOMATCH);
5161           GETCHARINC(c, eptr);
5162           switch(ctype)
5163             {
5164             case OP_ANY:               /* This is the non-NL case */
5165             if (md->partial != 0 &&    /* Take care with CRLF partial */
5166                 eptr >= md->end_subject &&
5167                 NLBLOCK->nltype == NLTYPE_FIXED &&
5168                 NLBLOCK->nllen == 2 &&
5169                 c == NLBLOCK->nl[0])
5170               {
5171               md->hitend = TRUE;
5172               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5173               }
5174             break;
5175 
5176             case OP_ALLANY:
5177             case OP_ANYBYTE:
5178             break;
5179 
5180             case OP_ANYNL:
5181             switch(c)
5182               {
5183               default: RRETURN(MATCH_NOMATCH);
5184               case CHAR_CR:
5185               if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
5186               break;
5187 
5188               case CHAR_LF:
5189               break;
5190 
5191               case CHAR_VT:
5192               case CHAR_FF:
5193               case CHAR_NEL:
5194 #ifndef EBCDIC
5195               case 0x2028:
5196               case 0x2029:
5197 #endif  /* Not EBCDIC */
5198               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5199               break;
5200               }
5201             break;
5202 
5203             case OP_NOT_HSPACE:
5204             switch(c)
5205               {
5206               HSPACE_CASES: RRETURN(MATCH_NOMATCH);
5207               default: break;
5208               }
5209             break;
5210 
5211             case OP_HSPACE:
5212             switch(c)
5213               {
5214               HSPACE_CASES: break;
5215               default: RRETURN(MATCH_NOMATCH);
5216               }
5217             break;
5218 
5219             case OP_NOT_VSPACE:
5220             switch(c)
5221               {
5222               VSPACE_CASES: RRETURN(MATCH_NOMATCH);
5223               default: break;
5224               }
5225             break;
5226 
5227             case OP_VSPACE:
5228             switch(c)
5229               {
5230               VSPACE_CASES: break;
5231               default: RRETURN(MATCH_NOMATCH);
5232               }
5233             break;
5234 
5235             case OP_NOT_DIGIT:
5236             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
5237               RRETURN(MATCH_NOMATCH);
5238             break;
5239 
5240             case OP_DIGIT:
5241             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
5242               RRETURN(MATCH_NOMATCH);
5243             break;
5244 
5245             case OP_NOT_WHITESPACE:
5246             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
5247               RRETURN(MATCH_NOMATCH);
5248             break;
5249 
5250             case OP_WHITESPACE:
5251             if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
5252               RRETURN(MATCH_NOMATCH);
5253             break;
5254 
5255             case OP_NOT_WORDCHAR:
5256             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
5257               RRETURN(MATCH_NOMATCH);
5258             break;
5259 
5260             case OP_WORDCHAR:
5261             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
5262               RRETURN(MATCH_NOMATCH);
5263             break;
5264 
5265             default:
5266             RRETURN(PCRE_ERROR_INTERNAL);
5267             }
5268           }
5269         }
5270       else
5271 #endif
5272       /* Not UTF mode */
5273         {
5274         for (fi = min;; fi++)
5275           {
5276           RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
5277           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5278           if (fi >= max) RRETURN(MATCH_NOMATCH);
5279           if (eptr >= md->end_subject)
5280             {
5281             SCHECK_PARTIAL();
5282             RRETURN(MATCH_NOMATCH);
5283             }
5284           if (ctype == OP_ANY && IS_NEWLINE(eptr))
5285             RRETURN(MATCH_NOMATCH);
5286           c = *eptr++;
5287           switch(ctype)
5288             {
5289             case OP_ANY:               /* This is the non-NL case */
5290             if (md->partial != 0 &&    /* Take care with CRLF partial */
5291                 eptr >= md->end_subject &&
5292                 NLBLOCK->nltype == NLTYPE_FIXED &&
5293                 NLBLOCK->nllen == 2 &&
5294                 c == NLBLOCK->nl[0])
5295               {
5296               md->hitend = TRUE;
5297               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5298               }
5299             break;
5300 
5301             case OP_ALLANY:
5302             case OP_ANYBYTE:
5303             break;
5304 
5305             case OP_ANYNL:
5306             switch(c)
5307               {
5308               default: RRETURN(MATCH_NOMATCH);
5309               case CHAR_CR:
5310               if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
5311               break;
5312 
5313               case CHAR_LF:
5314               break;
5315 
5316               case CHAR_VT:
5317               case CHAR_FF:
5318               case CHAR_NEL:
5319 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5320               case 0x2028:
5321               case 0x2029:
5322 #endif
5323               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5324               break;
5325               }
5326             break;
5327 
5328             case OP_NOT_HSPACE:
5329             switch(c)
5330               {
5331               default: break;
5332               HSPACE_BYTE_CASES:
5333 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5334               HSPACE_MULTIBYTE_CASES:
5335 #endif
5336               RRETURN(MATCH_NOMATCH);
5337               }
5338             break;
5339 
5340             case OP_HSPACE:
5341             switch(c)
5342               {
5343               default: RRETURN(MATCH_NOMATCH);
5344               HSPACE_BYTE_CASES:
5345 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5346               HSPACE_MULTIBYTE_CASES:
5347 #endif
5348               break;
5349               }
5350             break;
5351 
5352             case OP_NOT_VSPACE:
5353             switch(c)
5354               {
5355               default: break;
5356               VSPACE_BYTE_CASES:
5357 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5358               VSPACE_MULTIBYTE_CASES:
5359 #endif
5360               RRETURN(MATCH_NOMATCH);
5361               }
5362             break;
5363 
5364             case OP_VSPACE:
5365             switch(c)
5366               {
5367               default: RRETURN(MATCH_NOMATCH);
5368               VSPACE_BYTE_CASES:
5369 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5370               VSPACE_MULTIBYTE_CASES:
5371 #endif
5372               break;
5373               }
5374             break;
5375 
5376             case OP_NOT_DIGIT:
5377             if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
5378             break;
5379 
5380             case OP_DIGIT:
5381             if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
5382             break;
5383 
5384             case OP_NOT_WHITESPACE:
5385             if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
5386             break;
5387 
5388             case OP_WHITESPACE:
5389             if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
5390             break;
5391 
5392             case OP_NOT_WORDCHAR:
5393             if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
5394             break;
5395 
5396             case OP_WORDCHAR:
5397             if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
5398             break;
5399 
5400             default:
5401             RRETURN(PCRE_ERROR_INTERNAL);
5402             }
5403           }
5404         }
5405       /* Control never gets here */
5406       }
5407 
5408     /* If maximizing, it is worth using inline code for speed, doing the type
5409     test once at the start (i.e. keep it out of the loop). Again, keep the
5410     UTF-8 and UCP stuff separate. */
5411 
5412     else
5413       {
5414       pp = eptr;  /* Remember where we started */
5415 
5416 #ifdef SUPPORT_UCP
5417       if (prop_type >= 0)
5418         {
5419         switch(prop_type)
5420           {
5421           case PT_ANY:
5422           for (i = min; i < max; i++)
5423             {
5424             int len = 1;
5425             if (eptr >= md->end_subject)
5426               {
5427               SCHECK_PARTIAL();
5428               break;
5429               }
5430             GETCHARLENTEST(c, eptr, len);
5431             if (prop_fail_result) break;
5432             eptr+= len;
5433             }
5434           break;
5435 
5436           case PT_LAMP:
5437           for (i = min; i < max; i++)
5438             {
5439             int chartype;
5440             int len = 1;
5441             if (eptr >= md->end_subject)
5442               {
5443               SCHECK_PARTIAL();
5444               break;
5445               }
5446             GETCHARLENTEST(c, eptr, len);
5447             chartype = UCD_CHARTYPE(c);
5448             if ((chartype == ucp_Lu ||
5449                  chartype == ucp_Ll ||
5450                  chartype == ucp_Lt) == prop_fail_result)
5451               break;
5452             eptr+= len;
5453             }
5454           break;
5455 
5456           case PT_GC:
5457           for (i = min; i < max; i++)
5458             {
5459             int len = 1;
5460             if (eptr >= md->end_subject)
5461               {
5462               SCHECK_PARTIAL();
5463               break;
5464               }
5465             GETCHARLENTEST(c, eptr, len);
5466             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
5467             eptr+= len;
5468             }
5469           break;
5470 
5471           case PT_PC:
5472           for (i = min; i < max; i++)
5473             {
5474             int len = 1;
5475             if (eptr >= md->end_subject)
5476               {
5477               SCHECK_PARTIAL();
5478               break;
5479               }
5480             GETCHARLENTEST(c, eptr, len);
5481             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
5482             eptr+= len;
5483             }
5484           break;
5485 
5486           case PT_SC:
5487           for (i = min; i < max; i++)
5488             {
5489             int len = 1;
5490             if (eptr >= md->end_subject)
5491               {
5492               SCHECK_PARTIAL();
5493               break;
5494               }
5495             GETCHARLENTEST(c, eptr, len);
5496             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
5497             eptr+= len;
5498             }
5499           break;
5500 
5501           case PT_ALNUM:
5502           for (i = min; i < max; i++)
5503             {
5504             int category;
5505             int len = 1;
5506             if (eptr >= md->end_subject)
5507               {
5508               SCHECK_PARTIAL();
5509               break;
5510               }
5511             GETCHARLENTEST(c, eptr, len);
5512             category = UCD_CATEGORY(c);
5513             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
5514               break;
5515             eptr+= len;
5516             }
5517           break;
5518 
5519           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
5520           which means that Perl space and POSIX space are now identical. PCRE
5521           was changed at release 8.34. */
5522 
5523           case PT_SPACE:    /* Perl space */
5524           case PT_PXSPACE:  /* POSIX space */
5525           for (i = min; i < max; i++)
5526             {
5527             int len = 1;
5528             if (eptr >= md->end_subject)
5529               {
5530               SCHECK_PARTIAL();
5531               break;
5532               }
5533             GETCHARLENTEST(c, eptr, len);
5534             switch(c)
5535               {
5536               HSPACE_CASES:
5537               VSPACE_CASES:
5538               if (prop_fail_result) goto ENDLOOP99;  /* Break the loop */
5539               break;
5540 
5541               default:
5542               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
5543                 goto ENDLOOP99;   /* Break the loop */
5544               break;
5545               }
5546             eptr+= len;
5547             }
5548           ENDLOOP99:
5549           break;
5550 
5551           case PT_WORD:
5552           for (i = min; i < max; i++)
5553             {
5554             int category;
5555             int len = 1;
5556             if (eptr >= md->end_subject)
5557               {
5558               SCHECK_PARTIAL();
5559               break;
5560               }
5561             GETCHARLENTEST(c, eptr, len);
5562             category = UCD_CATEGORY(c);
5563             if ((category == ucp_L || category == ucp_N ||
5564                  c == CHAR_UNDERSCORE) == prop_fail_result)
5565               break;
5566             eptr+= len;
5567             }
5568           break;
5569 
5570           case PT_CLIST:
5571           for (i = min; i < max; i++)
5572             {
5573             const pcre_uint32 *cp;
5574             int len = 1;
5575             if (eptr >= md->end_subject)
5576               {
5577               SCHECK_PARTIAL();
5578               break;
5579               }
5580             GETCHARLENTEST(c, eptr, len);
5581             cp = PRIV(ucd_caseless_sets) + prop_value;
5582             for (;;)
5583               {
5584               if (c < *cp)
5585                 { if (prop_fail_result) break; else goto GOT_MAX; }
5586               if (c == *cp++)
5587                 { if (prop_fail_result) goto GOT_MAX; else break; }
5588               }
5589             eptr += len;
5590             }
5591           GOT_MAX:
5592           break;
5593 
5594           case PT_UCNC:
5595           for (i = min; i < max; i++)
5596             {
5597             int len = 1;
5598             if (eptr >= md->end_subject)
5599               {
5600               SCHECK_PARTIAL();
5601               break;
5602               }
5603             GETCHARLENTEST(c, eptr, len);
5604             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5605                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5606                  c >= 0xe000) == prop_fail_result)
5607               break;
5608             eptr += len;
5609             }
5610           break;
5611 
5612           default:
5613           RRETURN(PCRE_ERROR_INTERNAL);
5614           }
5615 
5616         /* eptr is now past the end of the maximum run */
5617 
5618         if (possessive) continue;    /* No backtracking */
5619         for(;;)
5620           {
5621           if (eptr <= pp) goto TAIL_RECURSE;
5622           RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5623           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5624           eptr--;
5625           if (utf) BACKCHAR(eptr);
5626           }
5627         }
5628 
5629       /* Match extended Unicode grapheme clusters. We will get here only if the
5630       support is in the binary; otherwise a compile-time error occurs. */
5631 
5632       else if (ctype == OP_EXTUNI)
5633         {
5634         for (i = min; i < max; i++)
5635           {
5636           if (eptr >= md->end_subject)
5637             {
5638             SCHECK_PARTIAL();
5639             break;
5640             }
5641           else
5642             {
5643             int lgb, rgb;
5644             GETCHARINCTEST(c, eptr);
5645             lgb = UCD_GRAPHBREAK(c);
5646             while (eptr < md->end_subject)
5647               {
5648               int len = 1;
5649               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5650               rgb = UCD_GRAPHBREAK(c);
5651               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5652               lgb = rgb;
5653               eptr += len;
5654               }
5655             }
5656           CHECK_PARTIAL();
5657           }
5658 
5659         /* eptr is now past the end of the maximum run */
5660 
5661         if (possessive) continue;    /* No backtracking */
5662 
5663         /* We use <= pp rather than == pp to detect the start of the run while
5664         backtracking because the use of \C in UTF mode can cause BACKCHAR to
5665         move back past pp. This is just palliative; the use of \C in UTF mode
5666         is fraught with danger. */
5667 
5668         for(;;)
5669           {
5670           int lgb, rgb;
5671           PCRE_PUCHAR fptr;
5672 
5673           if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
5674           RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
5675           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5676 
5677           /* Backtracking over an extended grapheme cluster involves inspecting
5678           the previous two characters (if present) to see if a break is
5679           permitted between them. */
5680 
5681           eptr--;
5682           if (!utf) c = *eptr; else
5683             {
5684             BACKCHAR(eptr);
5685             GETCHAR(c, eptr);
5686             }
5687           rgb = UCD_GRAPHBREAK(c);
5688 
5689           for (;;)
5690             {
5691             if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
5692             fptr = eptr - 1;
5693             if (!utf) c = *fptr; else
5694               {
5695               BACKCHAR(fptr);
5696               GETCHAR(c, fptr);
5697               }
5698             lgb = UCD_GRAPHBREAK(c);
5699             if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5700             eptr = fptr;
5701             rgb = lgb;
5702             }
5703           }
5704         }
5705 
5706       else
5707 #endif   /* SUPPORT_UCP */
5708 
5709 #ifdef SUPPORT_UTF
5710       if (utf)
5711         {
5712         switch(ctype)
5713           {
5714           case OP_ANY:
5715           for (i = min; i < max; i++)
5716             {
5717             if (eptr >= md->end_subject)
5718               {
5719               SCHECK_PARTIAL();
5720               break;
5721               }
5722             if (IS_NEWLINE(eptr)) break;
5723             if (md->partial != 0 &&    /* Take care with CRLF partial */
5724                 eptr + 1 >= md->end_subject &&
5725                 NLBLOCK->nltype == NLTYPE_FIXED &&
5726                 NLBLOCK->nllen == 2 &&
5727                 UCHAR21(eptr) == NLBLOCK->nl[0])
5728               {
5729               md->hitend = TRUE;
5730               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5731               }
5732             eptr++;
5733             ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5734             }
5735           break;
5736 
5737           case OP_ALLANY:
5738           if (max < INT_MAX)
5739             {
5740             for (i = min; i < max; i++)
5741               {
5742               if (eptr >= md->end_subject)
5743                 {
5744                 SCHECK_PARTIAL();
5745                 break;
5746                 }
5747               eptr++;
5748               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5749               }
5750             }
5751           else
5752             {
5753             eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
5754             SCHECK_PARTIAL();
5755             }
5756           break;
5757 
5758           /* The byte case is the same as non-UTF8 */
5759 
5760           case OP_ANYBYTE:
5761           c = max - min;
5762           if (c > (unsigned int)(md->end_subject - eptr))
5763             {
5764             eptr = md->end_subject;
5765             SCHECK_PARTIAL();
5766             }
5767           else eptr += c;
5768           break;
5769 
5770           case OP_ANYNL:
5771           for (i = min; i < max; i++)
5772             {
5773             int len = 1;
5774             if (eptr >= md->end_subject)
5775               {
5776               SCHECK_PARTIAL();
5777               break;
5778               }
5779             GETCHARLEN(c, eptr, len);
5780             if (c == CHAR_CR)
5781               {
5782               if (++eptr >= md->end_subject) break;
5783               if (UCHAR21(eptr) == CHAR_LF) eptr++;
5784               }
5785             else
5786               {
5787               if (c != CHAR_LF &&
5788                   (md->bsr_anycrlf ||
5789                    (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
5790 #ifndef EBCDIC
5791                     && c != 0x2028 && c != 0x2029
5792 #endif  /* Not EBCDIC */
5793                     )))
5794                 break;
5795               eptr += len;
5796               }
5797             }
5798           break;
5799 
5800           case OP_NOT_HSPACE:
5801           case OP_HSPACE:
5802           for (i = min; i < max; i++)
5803             {
5804             BOOL gotspace;
5805             int len = 1;
5806             if (eptr >= md->end_subject)
5807               {
5808               SCHECK_PARTIAL();
5809               break;
5810               }
5811             GETCHARLEN(c, eptr, len);
5812             switch(c)
5813               {
5814               HSPACE_CASES: gotspace = TRUE; break;
5815               default: gotspace = FALSE; break;
5816               }
5817             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5818             eptr += len;
5819             }
5820           break;
5821 
5822           case OP_NOT_VSPACE:
5823           case OP_VSPACE:
5824           for (i = min; i < max; i++)
5825             {
5826             BOOL gotspace;
5827             int len = 1;
5828             if (eptr >= md->end_subject)
5829               {
5830               SCHECK_PARTIAL();
5831               break;
5832               }
5833             GETCHARLEN(c, eptr, len);
5834             switch(c)
5835               {
5836               VSPACE_CASES: gotspace = TRUE; break;
5837               default: gotspace = FALSE; break;
5838               }
5839             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5840             eptr += len;
5841             }
5842           break;
5843 
5844           case OP_NOT_DIGIT:
5845           for (i = min; i < max; i++)
5846             {
5847             int len = 1;
5848             if (eptr >= md->end_subject)
5849               {
5850               SCHECK_PARTIAL();
5851               break;
5852               }
5853             GETCHARLEN(c, eptr, len);
5854             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
5855             eptr+= len;
5856             }
5857           break;
5858 
5859           case OP_DIGIT:
5860           for (i = min; i < max; i++)
5861             {
5862             int len = 1;
5863             if (eptr >= md->end_subject)
5864               {
5865               SCHECK_PARTIAL();
5866               break;
5867               }
5868             GETCHARLEN(c, eptr, len);
5869             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
5870             eptr+= len;
5871             }
5872           break;
5873 
5874           case OP_NOT_WHITESPACE:
5875           for (i = min; i < max; i++)
5876             {
5877             int len = 1;
5878             if (eptr >= md->end_subject)
5879               {
5880               SCHECK_PARTIAL();
5881               break;
5882               }
5883             GETCHARLEN(c, eptr, len);
5884             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
5885             eptr+= len;
5886             }
5887           break;
5888 
5889           case OP_WHITESPACE:
5890           for (i = min; i < max; i++)
5891             {
5892             int len = 1;
5893             if (eptr >= md->end_subject)
5894               {
5895               SCHECK_PARTIAL();
5896               break;
5897               }
5898             GETCHARLEN(c, eptr, len);
5899             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
5900             eptr+= len;
5901             }
5902           break;
5903 
5904           case OP_NOT_WORDCHAR:
5905           for (i = min; i < max; i++)
5906             {
5907             int len = 1;
5908             if (eptr >= md->end_subject)
5909               {
5910               SCHECK_PARTIAL();
5911               break;
5912               }
5913             GETCHARLEN(c, eptr, len);
5914             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
5915             eptr+= len;
5916             }
5917           break;
5918 
5919           case OP_WORDCHAR:
5920           for (i = min; i < max; i++)
5921             {
5922             int len = 1;
5923             if (eptr >= md->end_subject)
5924               {
5925               SCHECK_PARTIAL();
5926               break;
5927               }
5928             GETCHARLEN(c, eptr, len);
5929             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
5930             eptr+= len;
5931             }
5932           break;
5933 
5934           default:
5935           RRETURN(PCRE_ERROR_INTERNAL);
5936           }
5937 
5938         if (possessive) continue;    /* No backtracking */
5939         for(;;)
5940           {
5941           if (eptr <= pp) goto TAIL_RECURSE;
5942           RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
5943           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5944           eptr--;
5945           BACKCHAR(eptr);
5946           if (ctype == OP_ANYNL && eptr > pp  && UCHAR21(eptr) == CHAR_NL &&
5947               UCHAR21(eptr - 1) == CHAR_CR) eptr--;
5948           }
5949         }
5950       else
5951 #endif  /* SUPPORT_UTF */
5952       /* Not UTF mode */
5953         {
5954         switch(ctype)
5955           {
5956           case OP_ANY:
5957           for (i = min; i < max; i++)
5958             {
5959             if (eptr >= md->end_subject)
5960               {
5961               SCHECK_PARTIAL();
5962               break;
5963               }
5964             if (IS_NEWLINE(eptr)) break;
5965             if (md->partial != 0 &&    /* Take care with CRLF partial */
5966                 eptr + 1 >= md->end_subject &&
5967                 NLBLOCK->nltype == NLTYPE_FIXED &&
5968                 NLBLOCK->nllen == 2 &&
5969                 *eptr == NLBLOCK->nl[0])
5970               {
5971               md->hitend = TRUE;
5972               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5973               }
5974             eptr++;
5975             }
5976           break;
5977 
5978           case OP_ALLANY:
5979           case OP_ANYBYTE:
5980           c = max - min;
5981           if (c > (unsigned int)(md->end_subject - eptr))
5982             {
5983             eptr = md->end_subject;
5984             SCHECK_PARTIAL();
5985             }
5986           else eptr += c;
5987           break;
5988 
5989           case OP_ANYNL:
5990           for (i = min; i < max; i++)
5991             {
5992             if (eptr >= md->end_subject)
5993               {
5994               SCHECK_PARTIAL();
5995               break;
5996               }
5997             c = *eptr;
5998             if (c == CHAR_CR)
5999               {
6000               if (++eptr >= md->end_subject) break;
6001               if (*eptr == CHAR_LF) eptr++;
6002               }
6003             else
6004               {
6005               if (c != CHAR_LF && (md->bsr_anycrlf ||
6006                  (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
6007 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6008                  && c != 0x2028 && c != 0x2029
6009 #endif
6010                  ))) break;
6011               eptr++;
6012               }
6013             }
6014           break;
6015 
6016           case OP_NOT_HSPACE:
6017           for (i = min; i < max; i++)
6018             {
6019             if (eptr >= md->end_subject)
6020               {
6021               SCHECK_PARTIAL();
6022               break;
6023               }
6024             switch(*eptr)
6025               {
6026               default: eptr++; break;
6027               HSPACE_BYTE_CASES:
6028 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6029               HSPACE_MULTIBYTE_CASES:
6030 #endif
6031               goto ENDLOOP00;
6032               }
6033             }
6034           ENDLOOP00:
6035           break;
6036 
6037           case OP_HSPACE:
6038           for (i = min; i < max; i++)
6039             {
6040             if (eptr >= md->end_subject)
6041               {
6042               SCHECK_PARTIAL();
6043               break;
6044               }
6045             switch(*eptr)
6046               {
6047               default: goto ENDLOOP01;
6048               HSPACE_BYTE_CASES:
6049 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6050               HSPACE_MULTIBYTE_CASES:
6051 #endif
6052               eptr++; break;
6053               }
6054             }
6055           ENDLOOP01:
6056           break;
6057 
6058           case OP_NOT_VSPACE:
6059           for (i = min; i < max; i++)
6060             {
6061             if (eptr >= md->end_subject)
6062               {
6063               SCHECK_PARTIAL();
6064               break;
6065               }
6066             switch(*eptr)
6067               {
6068               default: eptr++; break;
6069               VSPACE_BYTE_CASES:
6070 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6071               VSPACE_MULTIBYTE_CASES:
6072 #endif
6073               goto ENDLOOP02;
6074               }
6075             }
6076           ENDLOOP02:
6077           break;
6078 
6079           case OP_VSPACE:
6080           for (i = min; i < max; i++)
6081             {
6082             if (eptr >= md->end_subject)
6083               {
6084               SCHECK_PARTIAL();
6085               break;
6086               }
6087             switch(*eptr)
6088               {
6089               default: goto ENDLOOP03;
6090               VSPACE_BYTE_CASES:
6091 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6092               VSPACE_MULTIBYTE_CASES:
6093 #endif
6094               eptr++; break;
6095               }
6096             }
6097           ENDLOOP03:
6098           break;
6099 
6100           case OP_NOT_DIGIT:
6101           for (i = min; i < max; i++)
6102             {
6103             if (eptr >= md->end_subject)
6104               {
6105               SCHECK_PARTIAL();
6106               break;
6107               }
6108             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
6109             eptr++;
6110             }
6111           break;
6112 
6113           case OP_DIGIT:
6114           for (i = min; i < max; i++)
6115             {
6116             if (eptr >= md->end_subject)
6117               {
6118               SCHECK_PARTIAL();
6119               break;
6120               }
6121             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
6122             eptr++;
6123             }
6124           break;
6125 
6126           case OP_NOT_WHITESPACE:
6127           for (i = min; i < max; i++)
6128             {
6129             if (eptr >= md->end_subject)
6130               {
6131               SCHECK_PARTIAL();
6132               break;
6133               }
6134             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
6135             eptr++;
6136             }
6137           break;
6138 
6139           case OP_WHITESPACE:
6140           for (i = min; i < max; i++)
6141             {
6142             if (eptr >= md->end_subject)
6143               {
6144               SCHECK_PARTIAL();
6145               break;
6146               }
6147             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
6148             eptr++;
6149             }
6150           break;
6151 
6152           case OP_NOT_WORDCHAR:
6153           for (i = min; i < max; i++)
6154             {
6155             if (eptr >= md->end_subject)
6156               {
6157               SCHECK_PARTIAL();
6158               break;
6159               }
6160             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
6161             eptr++;
6162             }
6163           break;
6164 
6165           case OP_WORDCHAR:
6166           for (i = min; i < max; i++)
6167             {
6168             if (eptr >= md->end_subject)
6169               {
6170               SCHECK_PARTIAL();
6171               break;
6172               }
6173             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
6174             eptr++;
6175             }
6176           break;
6177 
6178           default:
6179           RRETURN(PCRE_ERROR_INTERNAL);
6180           }
6181 
6182         if (possessive) continue;    /* No backtracking */
6183         for (;;)
6184           {
6185           if (eptr == pp) goto TAIL_RECURSE;
6186           RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
6187           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6188           eptr--;
6189           if (ctype == OP_ANYNL && eptr > pp  && *eptr == CHAR_LF &&
6190               eptr[-1] == CHAR_CR) eptr--;
6191           }
6192         }
6193 
6194       /* Control never gets here */
6195       }
6196 
6197     /* There's been some horrible disaster. Arrival here can only mean there is
6198     something seriously wrong in the code above or the OP_xxx definitions. */
6199 
6200     default:
6201     DPRINTF(("Unknown opcode %d\n", *ecode));
6202     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
6203     }
6204 
6205   /* Do not stick any code in here without much thought; it is assumed
6206   that "continue" in the code above comes out to here to repeat the main
6207   loop. */
6208 
6209   }             /* End of main loop */
6210 /* Control never reaches here */
6211 
6212 
6213 /* When compiling to use the heap rather than the stack for recursive calls to
6214 match(), the RRETURN() macro jumps here. The number that is saved in
6215 frame->Xwhere indicates which label we actually want to return to. */
6216 
6217 #ifdef NO_RECURSE
6218 #define LBL(val) case val: goto L_RM##val;
6219 HEAP_RETURN:
6220 switch (frame->Xwhere)
6221   {
6222   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6223   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
6224   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
6225   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
6226   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
6227   LBL(65) LBL(66)
6228 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6229   LBL(20) LBL(21)
6230 #endif
6231 #ifdef SUPPORT_UTF
6232   LBL(16) LBL(18)
6233   LBL(22) LBL(23) LBL(28) LBL(30)
6234   LBL(32) LBL(34) LBL(42) LBL(46)
6235 #ifdef SUPPORT_UCP
6236   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6237   LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
6238 #endif  /* SUPPORT_UCP */
6239 #endif  /* SUPPORT_UTF */
6240   default:
6241   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
6242   return PCRE_ERROR_INTERNAL;
6243   }
6244 #undef LBL
6245 #endif  /* NO_RECURSE */
6246 }
6247 
6248 
6249 /***************************************************************************
6250 ****************************************************************************
6251                    RECURSION IN THE match() FUNCTION
6252 
6253 Undefine all the macros that were defined above to handle this. */
6254 
6255 #ifdef NO_RECURSE
6256 #undef eptr
6257 #undef ecode
6258 #undef mstart
6259 #undef offset_top
6260 #undef eptrb
6261 #undef flags
6262 
6263 #undef callpat
6264 #undef charptr
6265 #undef data
6266 #undef next
6267 #undef pp
6268 #undef prev
6269 #undef saved_eptr
6270 
6271 #undef new_recursive
6272 
6273 #undef cur_is_word
6274 #undef condition
6275 #undef prev_is_word
6276 
6277 #undef ctype
6278 #undef length
6279 #undef max
6280 #undef min
6281 #undef number
6282 #undef offset
6283 #undef op
6284 #undef save_capture_last
6285 #undef save_offset1
6286 #undef save_offset2
6287 #undef save_offset3
6288 #undef stacksave
6289 
6290 #undef newptrb
6291 
6292 #endif
6293 
6294 /* These two are defined as macros in both cases */
6295 
6296 #undef fc
6297 #undef fi
6298 
6299 /***************************************************************************
6300 ***************************************************************************/
6301 
6302 
6303 #ifdef NO_RECURSE
6304 /*************************************************
6305 *          Release allocated heap frames         *
6306 *************************************************/
6307 
6308 /* This function releases all the allocated frames. The base frame is on the
6309 machine stack, and so must not be freed.
6310 
6311 Argument: the address of the base frame
6312 Returns:  nothing
6313 */
6314 
6315 static void
release_match_heapframes(heapframe * frame_base)6316 release_match_heapframes (heapframe *frame_base)
6317 {
6318 heapframe *nextframe = frame_base->Xnextframe;
6319 while (nextframe != NULL)
6320   {
6321   heapframe *oldframe = nextframe;
6322   nextframe = nextframe->Xnextframe;
6323   (PUBL(stack_free))(oldframe);
6324   }
6325 }
6326 #endif
6327 
6328 
6329 /*************************************************
6330 *         Execute a Regular Expression           *
6331 *************************************************/
6332 
6333 /* This function applies a compiled re to a subject string and picks out
6334 portions of the string if it matches. Two elements in the vector are set for
6335 each substring: the offsets to the start and end of the substring.
6336 
6337 Arguments:
6338   argument_re     points to the compiled expression
6339   extra_data      points to extra data or is NULL
6340   subject         points to the subject string
6341   length          length of subject string (may contain binary zeros)
6342   start_offset    where to start in the subject string
6343   options         option bits
6344   offsets         points to a vector of ints to be filled in with offsets
6345   offsetcount     the number of elements in the vector
6346 
6347 Returns:          > 0 => success; value is the number of elements filled in
6348                   = 0 => success, but offsets is not big enough
6349                    -1 => failed to match
6350                  < -1 => some kind of unexpected problem
6351 */
6352 
6353 #if defined COMPILE_PCRE8
6354 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_exec(const pcre * argument_re,const pcre_extra * extra_data,PCRE_SPTR subject,int length,int start_offset,int options,int * offsets,int offsetcount)6355 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
6356   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
6357   int offsetcount)
6358 #elif defined COMPILE_PCRE16
6359 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6360 pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
6361   PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
6362   int offsetcount)
6363 #elif defined COMPILE_PCRE32
6364 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6365 pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
6366   PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
6367   int offsetcount)
6368 #endif
6369 {
6370 int rc, ocount, arg_offset_max;
6371 int newline;
6372 BOOL using_temporary_offsets = FALSE;
6373 BOOL anchored;
6374 BOOL startline;
6375 BOOL firstline;
6376 BOOL utf;
6377 BOOL has_first_char = FALSE;
6378 BOOL has_req_char = FALSE;
6379 pcre_uchar first_char = 0;
6380 pcre_uchar first_char2 = 0;
6381 pcre_uchar req_char = 0;
6382 pcre_uchar req_char2 = 0;
6383 match_data match_block;
6384 match_data *md = &match_block;
6385 const pcre_uint8 *tables;
6386 const pcre_uint8 *start_bits = NULL;
6387 PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6388 PCRE_PUCHAR end_subject;
6389 PCRE_PUCHAR start_partial = NULL;
6390 PCRE_PUCHAR match_partial = NULL;
6391 PCRE_PUCHAR req_char_ptr = start_match - 1;
6392 
6393 const pcre_study_data *study;
6394 const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
6395 
6396 #ifdef NO_RECURSE
6397 heapframe frame_zero;
6398 frame_zero.Xprevframe = NULL;            /* Marks the top level */
6399 frame_zero.Xnextframe = NULL;            /* None are allocated yet */
6400 md->match_frames_base = &frame_zero;
6401 #endif
6402 
6403 /* Check for the special magic call that measures the size of the stack used
6404 per recursive call of match(). Without the funny casting for sizeof, a Windows
6405 compiler gave this error: "unary minus operator applied to unsigned type,
6406 result still unsigned". Hopefully the cast fixes that. */
6407 
6408 if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
6409     start_offset == -999)
6410 #ifdef NO_RECURSE
6411   return -((int)sizeof(heapframe));
6412 #else
6413   return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
6414 #endif
6415 
6416 /* Plausibility checks */
6417 
6418 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
6419 if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6420   return PCRE_ERROR_NULL;
6421 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6422 if (length < 0) return PCRE_ERROR_BADLENGTH;
6423 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6424 
6425 /* Check that the first field in the block is the magic number. If it is not,
6426 return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6427 REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6428 means that the pattern is likely compiled with different endianness. */
6429 
6430 if (re->magic_number != MAGIC_NUMBER)
6431   return re->magic_number == REVERSED_MAGIC_NUMBER?
6432     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6433 if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
6434 
6435 /* These two settings are used in the code for checking a UTF-8 string that
6436 follows immediately afterwards. Other values in the md block are used only
6437 during "normal" pcre_exec() processing, not when the JIT support is in use,
6438 so they are set up later. */
6439 
6440 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
6441 utf = md->utf = (re->options & PCRE_UTF8) != 0;
6442 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
6443               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
6444 
6445 /* Check a UTF-8 string if required. Pass back the character offset and error
6446 code for an invalid string if a results vector is available. */
6447 
6448 #ifdef SUPPORT_UTF
6449 if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
6450   {
6451   int erroroffset;
6452   int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
6453   if (errorcode != 0)
6454     {
6455     if (offsetcount >= 2)
6456       {
6457       offsets[0] = erroroffset;
6458       offsets[1] = errorcode;
6459       }
6460 #if defined COMPILE_PCRE8
6461     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6462       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6463 #elif defined COMPILE_PCRE16
6464     return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
6465       PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
6466 #elif defined COMPILE_PCRE32
6467     return PCRE_ERROR_BADUTF32;
6468 #endif
6469     }
6470 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6471   /* Check that a start_offset points to the start of a UTF character. */
6472   if (start_offset > 0 && start_offset < length &&
6473       NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
6474     return PCRE_ERROR_BADUTF8_OFFSET;
6475 #endif
6476   }
6477 #endif
6478 
6479 /* If the pattern was successfully studied with JIT support, run the JIT
6480 executable instead of the rest of this function. Most options must be set at
6481 compile time for the JIT code to be usable. Fallback to the normal code path if
6482 an unsupported flag is set. */
6483 
6484 #ifdef SUPPORT_JIT
6485 if (extra_data != NULL
6486     && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
6487                              PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6488     && extra_data->executable_jit != NULL
6489     && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0)
6490   {
6491   rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
6492        start_offset, options, offsets, offsetcount);
6493 
6494   /* PCRE_ERROR_NULL means that the selected normal or partial matching
6495   mode is not compiled. In this case we simply fallback to interpreter. */
6496 
6497   if (rc != PCRE_ERROR_JIT_BADOPTION) return rc;
6498   }
6499 #endif
6500 
6501 /* Carry on with non-JIT matching. This information is for finding all the
6502 numbers associated with a given name, for condition testing. */
6503 
6504 md->name_table = (pcre_uchar *)re + re->name_table_offset;
6505 md->name_count = re->name_count;
6506 md->name_entry_size = re->name_entry_size;
6507 
6508 /* Fish out the optional data from the extra_data structure, first setting
6509 the default values. */
6510 
6511 study = NULL;
6512 md->match_limit = MATCH_LIMIT;
6513 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
6514 md->callout_data = NULL;
6515 
6516 /* The table pointer is always in native byte order. */
6517 
6518 tables = re->tables;
6519 
6520 /* The two limit values override the defaults, whatever their value. */
6521 
6522 if (extra_data != NULL)
6523   {
6524   unsigned long int flags = extra_data->flags;
6525   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
6526     study = (const pcre_study_data *)extra_data->study_data;
6527   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
6528     md->match_limit = extra_data->match_limit;
6529   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
6530     md->match_limit_recursion = extra_data->match_limit_recursion;
6531   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
6532     md->callout_data = extra_data->callout_data;
6533   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
6534   }
6535 
6536 /* Limits in the regex override only if they are smaller. */
6537 
6538 if ((re->flags & PCRE_MLSET) != 0 && re->limit_match < md->match_limit)
6539   md->match_limit = re->limit_match;
6540 
6541 if ((re->flags & PCRE_RLSET) != 0 &&
6542     re->limit_recursion < md->match_limit_recursion)
6543   md->match_limit_recursion = re->limit_recursion;
6544 
6545 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
6546 is a feature that makes it possible to save compiled regex and re-use them
6547 in other programs later. */
6548 
6549 if (tables == NULL) tables = PRIV(default_tables);
6550 
6551 /* Set up other data */
6552 
6553 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
6554 startline = (re->flags & PCRE_STARTLINE) != 0;
6555 firstline = (re->options & PCRE_FIRSTLINE) != 0;
6556 
6557 /* The code starts after the real_pcre block and the capture name table. */
6558 
6559 md->start_code = (const pcre_uchar *)re + re->name_table_offset +
6560   re->name_count * re->name_entry_size;
6561 
6562 md->start_subject = (PCRE_PUCHAR)subject;
6563 md->start_offset = start_offset;
6564 md->end_subject = md->start_subject + length;
6565 end_subject = md->end_subject;
6566 
6567 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6568 md->use_ucp = (re->options & PCRE_UCP) != 0;
6569 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6570 md->ignore_skip_arg = 0;
6571 
6572 /* Some options are unpacked into BOOL variables in the hope that testing
6573 them will be faster than individual option bits. */
6574 
6575 md->notbol = (options & PCRE_NOTBOL) != 0;
6576 md->noteol = (options & PCRE_NOTEOL) != 0;
6577 md->notempty = (options & PCRE_NOTEMPTY) != 0;
6578 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
6579 
6580 md->hitend = FALSE;
6581 md->mark = md->nomatch_mark = NULL;     /* In case never set */
6582 
6583 md->recursive = NULL;                   /* No recursion at top level */
6584 md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
6585 
6586 md->lcc = tables + lcc_offset;
6587 md->fcc = tables + fcc_offset;
6588 md->ctypes = tables + ctypes_offset;
6589 
6590 /* Handle different \R options. */
6591 
6592 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
6593   {
6594   case 0:
6595   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
6596     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
6597   else
6598 #ifdef BSR_ANYCRLF
6599   md->bsr_anycrlf = TRUE;
6600 #else
6601   md->bsr_anycrlf = FALSE;
6602 #endif
6603   break;
6604 
6605   case PCRE_BSR_ANYCRLF:
6606   md->bsr_anycrlf = TRUE;
6607   break;
6608 
6609   case PCRE_BSR_UNICODE:
6610   md->bsr_anycrlf = FALSE;
6611   break;
6612 
6613   default: return PCRE_ERROR_BADNEWLINE;
6614   }
6615 
6616 /* Handle different types of newline. The three bits give eight cases. If
6617 nothing is set at run time, whatever was used at compile time applies. */
6618 
6619 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
6620         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
6621   {
6622   case 0: newline = NEWLINE; break;   /* Compile-time default */
6623   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
6624   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
6625   case PCRE_NEWLINE_CR+
6626        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
6627   case PCRE_NEWLINE_ANY: newline = -1; break;
6628   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
6629   default: return PCRE_ERROR_BADNEWLINE;
6630   }
6631 
6632 if (newline == -2)
6633   {
6634   md->nltype = NLTYPE_ANYCRLF;
6635   }
6636 else if (newline < 0)
6637   {
6638   md->nltype = NLTYPE_ANY;
6639   }
6640 else
6641   {
6642   md->nltype = NLTYPE_FIXED;
6643   if (newline > 255)
6644     {
6645     md->nllen = 2;
6646     md->nl[0] = (newline >> 8) & 255;
6647     md->nl[1] = newline & 255;
6648     }
6649   else
6650     {
6651     md->nllen = 1;
6652     md->nl[0] = newline;
6653     }
6654   }
6655 
6656 /* Partial matching was originally supported only for a restricted set of
6657 regexes; from release 8.00 there are no restrictions, but the bits are still
6658 defined (though never set). So there's no harm in leaving this code. */
6659 
6660 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
6661   return PCRE_ERROR_BADPARTIAL;
6662 
6663 /* If the expression has got more back references than the offsets supplied can
6664 hold, we get a temporary chunk of working store to use during the matching.
6665 Otherwise, we can use the vector supplied, rounding down its size to a multiple
6666 of 3. */
6667 
6668 ocount = offsetcount - (offsetcount % 3);
6669 arg_offset_max = (2*ocount)/3;
6670 
6671 if (re->top_backref > 0 && re->top_backref >= ocount/3)
6672   {
6673   ocount = re->top_backref * 3 + 3;
6674   md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
6675   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
6676   using_temporary_offsets = TRUE;
6677   DPRINTF(("Got memory to hold back references\n"));
6678   }
6679 else md->offset_vector = offsets;
6680 md->offset_end = ocount;
6681 md->offset_max = (2*ocount)/3;
6682 md->capture_last = 0;
6683 
6684 /* Reset the working variable associated with each extraction. These should
6685 never be used unless previously set, but they get saved and restored, and so we
6686 initialize them to avoid reading uninitialized locations. Also, unset the
6687 offsets for the matched string. This is really just for tidiness with callouts,
6688 in case they inspect these fields. */
6689 
6690 if (md->offset_vector != NULL)
6691   {
6692   register int *iptr = md->offset_vector + ocount;
6693   register int *iend = iptr - re->top_bracket;
6694   if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
6695   while (--iptr >= iend) *iptr = -1;
6696   if (offsetcount > 0) md->offset_vector[0] = -1;
6697   if (offsetcount > 1) md->offset_vector[1] = -1;
6698   }
6699 
6700 /* Set up the first character to match, if available. The first_char value is
6701 never set for an anchored regular expression, but the anchoring may be forced
6702 at run time, so we have to test for anchoring. The first char may be unset for
6703 an unanchored pattern, of course. If there's no first char and the pattern was
6704 studied, there may be a bitmap of possible first characters. */
6705 
6706 if (!anchored)
6707   {
6708   if ((re->flags & PCRE_FIRSTSET) != 0)
6709     {
6710     has_first_char = TRUE;
6711     first_char = first_char2 = (pcre_uchar)(re->first_char);
6712     if ((re->flags & PCRE_FCH_CASELESS) != 0)
6713       {
6714       first_char2 = TABLE_GET(first_char, md->fcc, first_char);
6715 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6716       if (utf && first_char > 127)
6717         first_char2 = UCD_OTHERCASE(first_char);
6718 #endif
6719       }
6720     }
6721   else
6722     if (!startline && study != NULL &&
6723       (study->flags & PCRE_STUDY_MAPPED) != 0)
6724         start_bits = study->start_bits;
6725   }
6726 
6727 /* For anchored or unanchored matches, there may be a "last known required
6728 character" set. */
6729 
6730 if ((re->flags & PCRE_REQCHSET) != 0)
6731   {
6732   has_req_char = TRUE;
6733   req_char = req_char2 = (pcre_uchar)(re->req_char);
6734   if ((re->flags & PCRE_RCH_CASELESS) != 0)
6735     {
6736     req_char2 = TABLE_GET(req_char, md->fcc, req_char);
6737 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6738     if (utf && req_char > 127)
6739       req_char2 = UCD_OTHERCASE(req_char);
6740 #endif
6741     }
6742   }
6743 
6744 
6745 /* ==========================================================================*/
6746 
6747 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
6748 the loop runs just once. */
6749 
6750 for(;;)
6751   {
6752   PCRE_PUCHAR save_end_subject = end_subject;
6753   PCRE_PUCHAR new_start_match;
6754 
6755   /* If firstline is TRUE, the start of the match is constrained to the first
6756   line of a multiline string. That is, the match must be before or at the first
6757   newline. Implement this by temporarily adjusting end_subject so that we stop
6758   scanning at a newline. If the match fails at the newline, later code breaks
6759   this loop. */
6760 
6761   if (firstline)
6762     {
6763     PCRE_PUCHAR t = start_match;
6764 #ifdef SUPPORT_UTF
6765     if (utf)
6766       {
6767       while (t < md->end_subject && !IS_NEWLINE(t))
6768         {
6769         t++;
6770         ACROSSCHAR(t < end_subject, *t, t++);
6771         }
6772       }
6773     else
6774 #endif
6775     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
6776     end_subject = t;
6777     }
6778 
6779   /* There are some optimizations that avoid running the match if a known
6780   starting point is not found, or if a known later character is not present.
6781   However, there is an option that disables these, for testing and for ensuring
6782   that all callouts do actually occur. The option can be set in the regex by
6783   (*NO_START_OPT) or passed in match-time options. */
6784 
6785   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
6786     {
6787     /* Advance to a unique first char if there is one. */
6788 
6789     if (has_first_char)
6790       {
6791       pcre_uchar smc;
6792 
6793       if (first_char != first_char2)
6794         while (start_match < end_subject &&
6795           (smc = UCHAR21TEST(start_match)) != first_char && smc != first_char2)
6796           start_match++;
6797       else
6798         while (start_match < end_subject && UCHAR21TEST(start_match) != first_char)
6799           start_match++;
6800       }
6801 
6802     /* Or to just after a linebreak for a multiline match */
6803 
6804     else if (startline)
6805       {
6806       if (start_match > md->start_subject + start_offset)
6807         {
6808 #ifdef SUPPORT_UTF
6809         if (utf)
6810           {
6811           while (start_match < end_subject && !WAS_NEWLINE(start_match))
6812             {
6813             start_match++;
6814             ACROSSCHAR(start_match < end_subject, *start_match,
6815               start_match++);
6816             }
6817           }
6818         else
6819 #endif
6820         while (start_match < end_subject && !WAS_NEWLINE(start_match))
6821           start_match++;
6822 
6823         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
6824         and we are now at a LF, advance the match position by one more character.
6825         */
6826 
6827         if (start_match[-1] == CHAR_CR &&
6828              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
6829              start_match < end_subject &&
6830              UCHAR21TEST(start_match) == CHAR_NL)
6831           start_match++;
6832         }
6833       }
6834 
6835     /* Or to a non-unique first byte after study */
6836 
6837     else if (start_bits != NULL)
6838       {
6839       while (start_match < end_subject)
6840         {
6841         register pcre_uint32 c = UCHAR21TEST(start_match);
6842 #ifndef COMPILE_PCRE8
6843         if (c > 255) c = 255;
6844 #endif
6845         if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
6846         start_match++;
6847         }
6848       }
6849     }   /* Starting optimizations */
6850 
6851   /* Restore fudged end_subject */
6852 
6853   end_subject = save_end_subject;
6854 
6855   /* The following two optimizations are disabled for partial matching or if
6856   disabling is explicitly requested. */
6857 
6858   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
6859     {
6860     /* If the pattern was studied, a minimum subject length may be set. This is
6861     a lower bound; no actual string of that length may actually match the
6862     pattern. Although the value is, strictly, in characters, we treat it as
6863     bytes to avoid spending too much time in this optimization. */
6864 
6865     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
6866         (pcre_uint32)(end_subject - start_match) < study->minlength)
6867       {
6868       rc = MATCH_NOMATCH;
6869       break;
6870       }
6871 
6872     /* If req_char is set, we know that that character must appear in the
6873     subject for the match to succeed. If the first character is set, req_char
6874     must be later in the subject; otherwise the test starts at the match point.
6875     This optimization can save a huge amount of backtracking in patterns with
6876     nested unlimited repeats that aren't going to match. Writing separate code
6877     for cased/caseless versions makes it go faster, as does using an
6878     autoincrement and backing off on a match.
6879 
6880     HOWEVER: when the subject string is very, very long, searching to its end
6881     can take a long time, and give bad performance on quite ordinary patterns.
6882     This showed up when somebody was matching something like /^\d+C/ on a
6883     32-megabyte string... so we don't do this when the string is sufficiently
6884     long. */
6885 
6886     if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
6887       {
6888       register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
6889 
6890       /* We don't need to repeat the search if we haven't yet reached the
6891       place we found it at last time. */
6892 
6893       if (p > req_char_ptr)
6894         {
6895         if (req_char != req_char2)
6896           {
6897           while (p < end_subject)
6898             {
6899             register pcre_uint32 pp = UCHAR21INCTEST(p);
6900             if (pp == req_char || pp == req_char2) { p--; break; }
6901             }
6902           }
6903         else
6904           {
6905           while (p < end_subject)
6906             {
6907             if (UCHAR21INCTEST(p) == req_char) { p--; break; }
6908             }
6909           }
6910 
6911         /* If we can't find the required character, break the matching loop,
6912         forcing a match failure. */
6913 
6914         if (p >= end_subject)
6915           {
6916           rc = MATCH_NOMATCH;
6917           break;
6918           }
6919 
6920         /* If we have found the required character, save the point where we
6921         found it, so that we don't search again next time round the loop if
6922         the start hasn't passed this character yet. */
6923 
6924         req_char_ptr = p;
6925         }
6926       }
6927     }
6928 
6929 #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
6930   printf(">>>> Match against: ");
6931   pchars(start_match, end_subject - start_match, TRUE, md);
6932   printf("\n");
6933 #endif
6934 
6935   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6936   first starting point for which a partial match was found. */
6937 
6938   md->start_match_ptr = start_match;
6939   md->start_used_ptr = start_match;
6940   md->match_call_count = 0;
6941   md->match_function_type = 0;
6942   md->end_offset_top = 0;
6943   md->skip_arg_count = 0;
6944   rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
6945   if (md->hitend && start_partial == NULL)
6946     {
6947     start_partial = md->start_used_ptr;
6948     match_partial = start_match;
6949     }
6950 
6951   switch(rc)
6952     {
6953     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6954     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
6955     entirely. The only way we can do that is to re-do the match at the same
6956     point, with a flag to force SKIP with an argument to be ignored. Just
6957     treating this case as NOMATCH does not work because it does not check other
6958     alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
6959 
6960     case MATCH_SKIP_ARG:
6961     new_start_match = start_match;
6962     md->ignore_skip_arg = md->skip_arg_count;
6963     break;
6964 
6965     /* SKIP passes back the next starting point explicitly, but if it is no
6966     greater than the match we have just done, treat it as NOMATCH. */
6967 
6968     case MATCH_SKIP:
6969     if (md->start_match_ptr > start_match)
6970       {
6971       new_start_match = md->start_match_ptr;
6972       break;
6973       }
6974     /* Fall through */
6975 
6976     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6977     exactly like PRUNE. Unset ignore SKIP-with-argument. */
6978 
6979     case MATCH_NOMATCH:
6980     case MATCH_PRUNE:
6981     case MATCH_THEN:
6982     md->ignore_skip_arg = 0;
6983     new_start_match = start_match + 1;
6984 #ifdef SUPPORT_UTF
6985     if (utf)
6986       ACROSSCHAR(new_start_match < end_subject, *new_start_match,
6987         new_start_match++);
6988 #endif
6989     break;
6990 
6991     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6992 
6993     case MATCH_COMMIT:
6994     rc = MATCH_NOMATCH;
6995     goto ENDLOOP;
6996 
6997     /* Any other return is either a match, or some kind of error. */
6998 
6999     default:
7000     goto ENDLOOP;
7001     }
7002 
7003   /* Control reaches here for the various types of "no match at this point"
7004   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
7005 
7006   rc = MATCH_NOMATCH;
7007 
7008   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
7009   newline in the subject (though it may continue over the newline). Therefore,
7010   if we have just failed to match, starting at a newline, do not continue. */
7011 
7012   if (firstline && IS_NEWLINE(start_match)) break;
7013 
7014   /* Advance to new matching position */
7015 
7016   start_match = new_start_match;
7017 
7018   /* Break the loop if the pattern is anchored or if we have passed the end of
7019   the subject. */
7020 
7021   if (anchored || start_match > end_subject) break;
7022 
7023   /* If we have just passed a CR and we are now at a LF, and the pattern does
7024   not contain any explicit matches for \r or \n, and the newline option is CRLF
7025   or ANY or ANYCRLF, advance the match position by one more character. In
7026   normal matching start_match will aways be greater than the first position at
7027   this stage, but a failed *SKIP can cause a return at the same point, which is
7028   why the first test exists. */
7029 
7030   if (start_match > (PCRE_PUCHAR)subject + start_offset &&
7031       start_match[-1] == CHAR_CR &&
7032       start_match < end_subject &&
7033       *start_match == CHAR_NL &&
7034       (re->flags & PCRE_HASCRORLF) == 0 &&
7035         (md->nltype == NLTYPE_ANY ||
7036          md->nltype == NLTYPE_ANYCRLF ||
7037          md->nllen == 2))
7038     start_match++;
7039 
7040   md->mark = NULL;   /* Reset for start of next match attempt */
7041   }                  /* End of for(;;) "bumpalong" loop */
7042 
7043 /* ==========================================================================*/
7044 
7045 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
7046 conditions is true:
7047 
7048 (1) The pattern is anchored or the match was failed by (*COMMIT);
7049 
7050 (2) We are past the end of the subject;
7051 
7052 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
7053     this option requests that a match occur at or before the first newline in
7054     the subject.
7055 
7056 When we have a match and the offset vector is big enough to deal with any
7057 backreferences, captured substring offsets will already be set up. In the case
7058 where we had to get some local store to hold offsets for backreference
7059 processing, copy those that we can. In this case there need not be overflow if
7060 certain parts of the pattern were not used, even though there are more
7061 capturing parentheses than vector slots. */
7062 
7063 ENDLOOP:
7064 
7065 if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
7066   {
7067   if (using_temporary_offsets)
7068     {
7069     if (arg_offset_max >= 4)
7070       {
7071       memcpy(offsets + 2, md->offset_vector + 2,
7072         (arg_offset_max - 2) * sizeof(int));
7073       DPRINTF(("Copied offsets from temporary memory\n"));
7074       }
7075     if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT;
7076     DPRINTF(("Freeing temporary memory\n"));
7077     (PUBL(free))(md->offset_vector);
7078     }
7079 
7080   /* Set the return code to the number of captured strings, or 0 if there were
7081   too many to fit into the vector. */
7082 
7083   rc = ((md->capture_last & OVFLBIT) != 0 &&
7084          md->end_offset_top >= arg_offset_max)?
7085     0 : md->end_offset_top/2;
7086 
7087   /* If there is space in the offset vector, set any unused pairs at the end of
7088   the pattern to -1 for backwards compatibility. It is documented that this
7089   happens. In earlier versions, the whole set of potential capturing offsets
7090   was set to -1 each time round the loop, but this is handled differently now.
7091   "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
7092   those at the end that need unsetting here. We can't just unset them all at
7093   the start of the whole thing because they may get set in one branch that is
7094   not the final matching branch. */
7095 
7096   if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
7097     {
7098     register int *iptr, *iend;
7099     int resetcount = 2 + re->top_bracket * 2;
7100     if (resetcount > offsetcount) resetcount = offsetcount;
7101     iptr = offsets + md->end_offset_top;
7102     iend = offsets + resetcount;
7103     while (iptr < iend) *iptr++ = -1;
7104     }
7105 
7106   /* If there is space, set up the whole thing as substring 0. The value of
7107   md->start_match_ptr might be modified if \K was encountered on the success
7108   matching path. */
7109 
7110   if (offsetcount < 2) rc = 0; else
7111     {
7112     offsets[0] = (int)(md->start_match_ptr - md->start_subject);
7113     offsets[1] = (int)(md->end_match_ptr - md->start_subject);
7114     }
7115 
7116   /* Return MARK data if requested */
7117 
7118   if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
7119     *(extra_data->mark) = (pcre_uchar *)md->mark;
7120   DPRINTF((">>>> returning %d\n", rc));
7121 #ifdef NO_RECURSE
7122   release_match_heapframes(&frame_zero);
7123 #endif
7124   return rc;
7125   }
7126 
7127 /* Control gets here if there has been an error, or if the overall match
7128 attempt has failed at all permitted starting positions. */
7129 
7130 if (using_temporary_offsets)
7131   {
7132   DPRINTF(("Freeing temporary memory\n"));
7133   (PUBL(free))(md->offset_vector);
7134   }
7135 
7136 /* For anything other than nomatch or partial match, just return the code. */
7137 
7138 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
7139   {
7140   DPRINTF((">>>> error: returning %d\n", rc));
7141 #ifdef NO_RECURSE
7142   release_match_heapframes(&frame_zero);
7143 #endif
7144   return rc;
7145   }
7146 
7147 /* Handle partial matches - disable any mark data */
7148 
7149 if (match_partial != NULL)
7150   {
7151   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
7152   md->mark = NULL;
7153   if (offsetcount > 1)
7154     {
7155     offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
7156     offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
7157     if (offsetcount > 2)
7158       offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
7159     }
7160   rc = PCRE_ERROR_PARTIAL;
7161   }
7162 
7163 /* This is the classic nomatch case */
7164 
7165 else
7166   {
7167   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
7168   rc = PCRE_ERROR_NOMATCH;
7169   }
7170 
7171 /* Return the MARK data if it has been requested. */
7172 
7173 if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
7174   *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
7175 #ifdef NO_RECURSE
7176   release_match_heapframes(&frame_zero);
7177 #endif
7178 return rc;
7179 }
7180 
7181 /* End of pcre_exec.c */
7182