1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9            Copyright (c) 1997-2018 University of Cambridge
10 
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14 
15     * Redistributions of source code must retain the above copyright notice,
16       this list of conditions and the following disclaimer.
17 
18     * Redistributions in binary form must reproduce the above copyright
19       notice, this list of conditions and the following disclaimer in the
20       documentation and/or other materials provided with the distribution.
21 
22     * Neither the name of the University of Cambridge nor the names of its
23       contributors may be used to endorse or promote products derived from
24       this software without specific prior written permission.
25 
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39 
40 /* #define ERLANG_DEBUG 1 */
41 
42 /* This module contains pcre_exec(), the externally visible function that does
43 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
44 possible. There are also some static supporting functions. */
45 
46 /* %ExternalCopyright% */
47 
48 #ifdef HAVE_CONFIG_H
49 #include "config.h"
50 #endif
51 
52 #define NLBLOCK md             /* Block containing newline information */
53 #define PSSTART start_subject  /* Field containing processed string start */
54 #define PSEND   end_subject    /* Field containing processed string end */
55 
56 #include "pcre_internal.h"
57 
58 /* Undefine some potentially clashing cpp symbols */
59 
60 #undef min
61 #undef max
62 
63 /* The md->capture_last field uses the lower 16 bits for the last captured
64 substring (which can never be greater than 65535) and a bit in the top half
65 to mean "capture vector overflowed". This odd way of doing things was
66 implemented when it was realized that preserving and restoring the overflow bit
67 whenever the last capture number was saved/restored made for a neater
68 interface, and doing it this way saved on (a) another variable, which would
69 have increased the stack frame size (a big NO-NO in PCRE) and (b) another
70 separate set of save/restore instructions. The following defines are used in
71 implementing this. */
72 
73 #define CAPLMASK    0x0000ffff    /* The bits used for last_capture */
74 #define OVFLMASK    0xffff0000    /* The bits used for the overflow flag */
75 #define OVFLBIT     0x00010000    /* The bit that is set for overflow */
76 
77 /* Values for setting in md->match_function_type to indicate two special types
78 of call to match(). We do it this way to save on using another stack variable,
79 as stack usage is to be discouraged. */
80 
81 #define MATCH_CONDASSERT     1  /* Called to check a condition assertion */
82 #define MATCH_CBEGROUP       2  /* Could-be-empty unlimited repeat group */
83 
84 /* Non-error returns from the match() function. Error returns are externally
85 defined PCRE_ERROR_xxx codes, which are all negative. */
86 
87 #define MATCH_MATCH        1
88 #define MATCH_NOMATCH      0
89 
90 /* Special internal returns from the match() function. Make them sufficiently
91 negative to avoid the external error codes. */
92 
93 #define MATCH_ACCEPT       (-999)
94 #define MATCH_KETRPOS      (-998)
95 #define MATCH_ONCE         (-997)
96 /* The next 5 must be kept together and in sequence so that a test that checks
97 for any one of them can use a range. */
98 #define MATCH_COMMIT       (-996)
99 #define MATCH_PRUNE        (-995)
100 #define MATCH_SKIP         (-994)
101 #define MATCH_SKIP_ARG     (-993)
102 #define MATCH_THEN         (-992)
103 #define MATCH_BACKTRACK_MAX MATCH_THEN
104 #define MATCH_BACKTRACK_MIN MATCH_COMMIT
105 
106 /* Maximum number of ints of offset to save on the stack for recursive calls.
107 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
108 because the offset vector is always a multiple of 3 long. */
109 
110 #define REC_STACK_SAVE_MAX 30
111 
112 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
113 
114 static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
115 static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };
116 
117 #ifdef PCRE_DEBUG
118 /*************************************************
119 *        Debugging function to print chars       *
120 *************************************************/
121 
122 /* Print a sequence of chars in printable format, stopping at the end of the
123 subject if the requested.
124 
125 Arguments:
126   p           points to characters
127   length      number to print
128   is_subject  TRUE if printing from within md->start_subject
129   md          pointer to matching data block, if is_subject is TRUE
130 
131 Returns:     nothing
132 */
133 
134 static void
pchars(const pcre_uchar * p,int length,BOOL is_subject,match_data * md)135 pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
136 {
137 pcre_uint32 c;
138 BOOL utf = md->utf;
139 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
140 while (length-- > 0)
141   if (isprint(c = UCHAR21INCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
142 }
143 #endif
144 
145 #ifdef ERLANG_INTEGRATION
146 #ifdef ERLANG_DEBUG
147 #include <stdarg.h>
148 static void
edebug_printf(const char * format,...)149 edebug_printf(const char *format, ...)
150 {
151   va_list args;
152 
153   va_start(args, format);
154   fprintf(stderr, "PCRE: ");
155   vfprintf(stderr, format, args);
156   va_end(args);
157   fprintf(stderr, "\r\n");
158 }
159 #endif
160 #endif
161 
162 
163 /*************************************************
164 *          Match a back-reference                *
165 *************************************************/
166 
167 /* Normally, if a back reference hasn't been set, the length that is passed is
168 negative, so the match always fails. However, in JavaScript compatibility mode,
169 the length passed is zero. Note that in caseless UTF-8 mode, the number of
170 subject bytes matched may be different to the number of reference bytes.
171 
172 Arguments:
173   offset      index into the offset vector
174   eptr        pointer into the subject
175   length      length of reference to be matched (number of bytes)
176   md          points to match data block
177   caseless    TRUE if caseless
178 
179 Returns:      >= 0 the number of subject bytes matched
180               -1 no match
181               -2 partial match; always given if at end subject
182 */
183 
184 static int
match_ref(int offset,register PCRE_PUCHAR eptr,int length,match_data * md,BOOL caseless)185 match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
186   BOOL caseless)
187 {
188 PCRE_PUCHAR eptr_start = eptr;
189 register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
190 #if defined SUPPORT_UTF && defined SUPPORT_UCP
191 BOOL utf = md->utf;
192 #endif
193 
194 #ifdef PCRE_DEBUG
195 if (eptr >= md->end_subject)
196   printf("matching subject <null>");
197 else
198   {
199   printf("matching subject ");
200   pchars(eptr, length, TRUE, md);
201   }
202 printf(" against backref ");
203 pchars(p, length, FALSE, md);
204 printf("\n");
205 #endif
206 
207 /* Always fail if reference not set (and not JavaScript compatible - in that
208 case the length is passed as zero). */
209 
210 if (length < 0) return -1;
211 
212 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
213 properly if Unicode properties are supported. Otherwise, we can check only
214 ASCII characters. */
215 
216 if (caseless)
217   {
218 #if defined SUPPORT_UTF && defined SUPPORT_UCP
219   if (utf)
220     {
221     /* Match characters up to the end of the reference. NOTE: the number of
222     data units matched may differ, because in UTF-8 there are some characters
223     whose upper and lower case versions code have different numbers of bytes.
224     For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
225     (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
226     sequence of two of the latter. It is important, therefore, to check the
227     length along the reference, not along the subject (earlier code did this
228     wrong). */
229 
230     PCRE_PUCHAR endptr = p + length;
231     while (p < endptr)
232       {
233       pcre_uint32 c, d;
234       const ucd_record *ur;
235       if (eptr >= md->end_subject) return -2;   /* Partial match */
236       GETCHARINC(c, eptr);
237       GETCHARINC(d, p);
238       ur = GET_UCD(d);
239       if (c != d && c != d + ur->other_case)
240         {
241         const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
242         for (;;)
243           {
244           if (c < *pp) return -1;
245           if (c == *pp++) break;
246           }
247         }
248       }
249     }
250   else
251 #endif
252 
253   /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
254   is no UCP support. */
255     {
256     while (length-- > 0)
257       {
258       pcre_uint32 cc, cp;
259       if (eptr >= md->end_subject) return -2;   /* Partial match */
260       cc = UCHAR21TEST(eptr);
261       cp = UCHAR21TEST(p);
262       if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
263       p++;
264       eptr++;
265       }
266     }
267   }
268 
269 /* In the caseful case, we can just compare the bytes, whether or not we
270 are in UTF-8 mode. */
271 
272 else
273   {
274   while (length-- > 0)
275     {
276     if (eptr >= md->end_subject) return -2;   /* Partial match */
277     if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;
278     }
279   }
280 
281 return (int)(eptr - eptr_start);
282 }
283 
284 
285 
286 /***************************************************************************
287 ****************************************************************************
288                    RECURSION IN THE match() FUNCTION
289 
290 The match() function is highly recursive, though not every recursive call
291 increases the recursive depth. Nevertheless, some regular expressions can cause
292 it to recurse to a great depth. I was writing for Unix, so I just let it call
293 itself recursively. This uses the stack for saving everything that has to be
294 saved for a recursive call. On Unix, the stack can be large, and this works
295 fine.
296 
297 It turns out that on some non-Unix-like systems there are problems with
298 programs that use a lot of stack. (This despite the fact that every last chip
299 has oodles of memory these days, and techniques for extending the stack have
300 been known for decades.) So....
301 
302 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
303 calls by keeping local variables that need to be preserved in blocks of memory
304 obtained from malloc() instead instead of on the stack. Macros are used to
305 achieve this so that the actual code doesn't look very different to what it
306 always used to.
307 
308 The original heap-recursive code used longjmp(). However, it seems that this
309 can be very slow on some operating systems. Following a suggestion from Stan
310 Switzer, the use of longjmp() has been abolished, at the cost of having to
311 provide a unique number for each call to RMATCH. There is no way of generating
312 a sequence of numbers at compile time in C. I have given them names, to make
313 them stand out more clearly.
314 
315 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
316 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
317 tests. Furthermore, not using longjmp() means that local dynamic variables
318 don't have indeterminate values; this has meant that the frame size can be
319 reduced because the result can be "passed back" by straight setting of the
320 variable instead of being passed in the frame.
321 ****************************************************************************
322 ***************************************************************************/
323 
324 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
325 below must be updated in sync.  */
326 
327 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
328        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
329        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
330        RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
331        RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
332        RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
333        RM61,  RM62, RM63, RM64, RM65, RM66, RM67 };
334 
335 /* These versions of the macros use the stack, as normal. There are debugging
336 versions and production versions. Note that the "rw" argument of RMATCH isn't
337 actually used in this definition. */
338 
339 #ifndef NO_RECURSE
340 #define REGISTER register
341 
342 #ifdef PCRE_DEBUG
343 #define RMATCH(ra,rb,rc,rd,re,rw) \
344   { \
345   printf("match() called in line %d\n", __LINE__); \
346   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
347   printf("to line %d\n", __LINE__); \
348   }
349 #define RRETURN(ra) \
350   { \
351   printf("match() returned %d from line %d\n", ra, __LINE__); \
352   return ra; \
353   }
354 #else
355 #define RMATCH(ra,rb,rc,rd,re,rw) \
356   rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
357 #define RRETURN(ra) return ra
358 #endif
359 
360 #else
361 
362 
363 /* These versions of the macros manage a private stack on the heap. Note that
364 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
365 argument of match(), which never changes. */
366 
367 #define REGISTER
368 
369 #define RMATCH(ra,rb,rc,rd,re,rw)\
370   {\
371   heapframe *newframe = frame->Xnextframe;\
372   if (newframe == NULL)\
373     {\
374     newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
375     if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
376     newframe->Xnextframe = NULL;\
377     frame->Xnextframe = newframe;\
378     }\
379   frame->Xwhere = rw;\
380   newframe->Xeptr = ra;\
381   newframe->Xecode = rb;\
382   newframe->Xmstart = mstart;\
383   newframe->Xoffset_top = rc;\
384   newframe->Xeptrb = re;\
385   newframe->Xrdepth = frame->Xrdepth + 1;\
386   newframe->Xprevframe = frame;\
387   frame = newframe;\
388   DPRINTF(("restarting from line %d\n", __LINE__));\
389   goto HEAP_RECURSE;\
390   L_##rw:\
391   DPRINTF(("jumped back to line %d\n", __LINE__));\
392   }
393 
394 #ifdef ERLANG_INTEGRATION
395 #define RRETURN(ra)\
396   {\
397   heapframe *oldframe = frame;\
398   frame = oldframe->Xprevframe;\
399   if (frame != NULL)\
400     {\
401     rrc = ra;\
402     goto HEAP_RETURN;\
403     }\
404   if (LOOP_LIMIT != 0) \
405     { \
406     md->loop_limit -= LOOP_COUNT; \
407     } \
408   return ra;\
409   }
410 #else
411 #define RRETURN(ra)\
412   {\
413   heapframe *oldframe = frame;\
414   frame = oldframe->Xprevframe;\
415   if (frame != NULL)\
416     {\
417     rrc = ra;\
418     goto HEAP_RETURN;\
419     }\
420   return ra;\
421   }
422 #endif
423 
424 /* Structure for remembering the local variables in a private frame */
425 
426 typedef struct heapframe {
427   struct heapframe *Xprevframe;
428   struct heapframe *Xnextframe;
429 
430   /* Function arguments that may change */
431 
432   PCRE_PUCHAR Xeptr;
433   const pcre_uchar *Xecode;
434   PCRE_PUCHAR Xmstart;
435   int Xoffset_top;
436   eptrblock *Xeptrb;
437   unsigned int Xrdepth;
438 
439   /* Function local variables */
440 
441   PCRE_PUCHAR Xcallpat;
442 #ifdef SUPPORT_UTF
443   PCRE_PUCHAR Xcharptr;
444 #endif
445   PCRE_PUCHAR Xdata;
446   PCRE_PUCHAR Xnext;
447   PCRE_PUCHAR Xpp;
448   PCRE_PUCHAR Xprev;
449   PCRE_PUCHAR Xsaved_eptr;
450 
451   recursion_info Xnew_recursive;
452 
453   BOOL Xcur_is_word;
454   BOOL Xcondition;
455   BOOL Xprev_is_word;
456 
457 #ifdef SUPPORT_UCP
458   int Xprop_type;
459   unsigned int Xprop_value;
460   int Xprop_fail_result;
461   int Xoclength;
462   pcre_uchar Xocchars[6];
463 #endif
464 
465   int Xcodelink;
466   int Xctype;
467   unsigned int Xfc;
468   int Xfi;
469   int Xlength;
470   int Xmax;
471   int Xmin;
472   unsigned int Xnumber;
473   int Xoffset;
474   unsigned int Xop;
475   pcre_int32 Xsave_capture_last;
476   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
477   int Xstacksave[REC_STACK_SAVE_MAX];
478 
479   eptrblock Xnewptrb;
480 
481   /* Where to jump back to */
482 
483   int Xwhere;
484 #if defined(ERLANG_INTEGRATION)
485     int Xlgb;
486     int Xrgb;
487 #endif
488 } heapframe;
489 
490 #endif
491 
492 
493 /***************************************************************************
494 ***************************************************************************/
495 
496 
497 
498 /*************************************************
499 *         Match from current position            *
500 *************************************************/
501 
502 /* This function is called recursively in many circumstances. Whenever it
503 returns a negative (error) response, the outer incarnation must also return the
504 same response. */
505 
506 /* These macros pack up tests that are used for partial matching, and which
507 appear several times in the code. We set the "hit end" flag if the pointer is
508 at the end of the subject and also past the start of the subject (i.e.
509 something has been matched). For hard partial matching, we then return
510 immediately. The second one is used when we already know we are past the end of
511 the subject. */
512 
513 #define CHECK_PARTIAL()\
514   if (md->partial != 0 && eptr >= md->end_subject && \
515       eptr > md->start_used_ptr) \
516     { \
517     md->hitend = TRUE; \
518     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
519     }
520 
521 #define SCHECK_PARTIAL()\
522   if (md->partial != 0 && eptr > md->start_used_ptr) \
523     { \
524     md->hitend = TRUE; \
525     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
526     }
527 
528 
529 /* Performance note: It might be tempting to extract commonly used fields from
530 the md structure (e.g. utf, end_subject) into individual variables to improve
531 performance. Tests using gcc on a SPARC disproved this; in the first case, it
532 made performance worse.
533 
534 Arguments:
535    eptr        pointer to current character in subject
536    ecode       pointer to current position in compiled code
537    mstart      pointer to the current match start position (can be modified
538                  by encountering \K)
539    offset_top  current top pointer
540    md          pointer to "static" info for the match
541    eptrb       pointer to chain of blocks containing eptr at start of
542                  brackets - for testing for empty matches
543    rdepth      the recursion depth
544 
545 Returns:       MATCH_MATCH if matched            )  these values are >= 0
546                MATCH_NOMATCH if failed to match  )
547                a negative MATCH_xxx value for PRUNE, SKIP, etc
548                a negative PCRE_ERROR_xxx value if aborted by an error condition
549                  (e.g. stopped by repeated call or recursion limit)
550 */
551 
552 static int
match(REGISTER PCRE_PUCHAR eptr,REGISTER const pcre_uchar * ecode,PCRE_PUCHAR mstart,int offset_top,match_data * md,eptrblock * eptrb,unsigned int rdepth)553 match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
554   PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
555   unsigned int rdepth)
556 {
557 /* These variables do not need to be preserved over recursion in this function,
558 so they can be ordinary variables in all cases. Mark some of them with
559 "register" because they are used a lot in loops. */
560 
561 register int  rrc;         /* Returns from recursive calls */
562 register int  i;           /* Used for loops not involving calls to RMATCH() */
563 register pcre_uint32 c;    /* Character values not kept over RMATCH() calls */
564 register BOOL utf;         /* Local copy of UTF flag for speed */
565 
566 BOOL minimize, possessive; /* Quantifier options */
567 BOOL caseless;
568 int condcode;
569 
570 /* When recursion is not being used, all "local" variables that have to be
571 preserved over calls to RMATCH() are part of a "frame". We set up the top-level
572 frame on the stack here; subsequent instantiations are obtained from the heap
573 whenever RMATCH() does a "recursion". See the macro definitions above. Putting
574 the top-level on the stack rather than malloc-ing them all gives a performance
575 boost in many cases where there is not much "recursion". */
576 
577 #ifdef NO_RECURSE
578 
579 #ifdef ERLANG_INTEGRATION
580 #define LOOP_COUNT loop_count
581 #define LOOP_LIMIT loop_limit
582 #ifdef ERLANG_DEBUG
583 #define EDEBUGF(X) edebug_printf X
584 #else
585 #define EDEBUGF(X)
586 #endif
587 #define COST(N) (LOOP_COUNT += (N))
588 #define LABEL_XCAT(A,B) A##B
589 #define LABEL_CAT(A,B) LABEL_XCAT(A,B)
590 
591 #define COST_CHK(N) 				\
592 do {						\
593   LOOP_COUNT += (N);				\
594   if (LOOP_LIMIT != 0) {			\
595     if (LOOP_COUNT > LOOP_LIMIT) {              \
596       frame->Xwhere = __LINE__ + 100;	        \
597       goto LOOP_COUNT_BREAK;			\
598       LABEL_CAT(L_LOOP_COUNT_,__LINE__):	\
599       ;                                         \
600     }						\
601   }						\
602 } while (0)
603 
604 register int loop_count = 0;
605 register int loop_limit = md->loop_limit;
606 heapframe *frame;
607 if (md->state_save) {
608   frame = md->state_save;
609   EDEBUGF(("Break restore!"));
610   goto LOOP_COUNT_RETURN;
611 }
612 frame = (heapframe *)md->match_frames_base;
613 #else
614 #define COST(N)
615 #define COST_CHK(N)
616 heapframe *frame = (heapframe *)md->match_frames_base;
617 #endif
618 
619 
620 /* Copy in the original argument variables */
621 
622 frame->Xeptr = eptr;
623 frame->Xecode = ecode;
624 frame->Xmstart = mstart;
625 frame->Xoffset_top = offset_top;
626 frame->Xeptrb = eptrb;
627 frame->Xrdepth = rdepth;
628 
629 /* This is where control jumps back to to effect "recursion" */
630 
631 HEAP_RECURSE:
632 
633 /* Macros make the argument variables come from the current frame */
634 
635 #define eptr               frame->Xeptr
636 #define ecode              frame->Xecode
637 #define mstart             frame->Xmstart
638 #define offset_top         frame->Xoffset_top
639 #define eptrb              frame->Xeptrb
640 #define rdepth             frame->Xrdepth
641 
642 /* Ditto for the local variables */
643 
644 #ifdef SUPPORT_UTF
645 #define charptr            frame->Xcharptr
646 #endif
647 #define callpat            frame->Xcallpat
648 #define codelink           frame->Xcodelink
649 #define data               frame->Xdata
650 #define next               frame->Xnext
651 #define pp                 frame->Xpp
652 #define prev               frame->Xprev
653 #define saved_eptr         frame->Xsaved_eptr
654 
655 #define new_recursive      frame->Xnew_recursive
656 
657 #define cur_is_word        frame->Xcur_is_word
658 #define condition          frame->Xcondition
659 #define prev_is_word       frame->Xprev_is_word
660 
661 #ifdef SUPPORT_UCP
662 #define prop_type          frame->Xprop_type
663 #define prop_value         frame->Xprop_value
664 #define prop_fail_result   frame->Xprop_fail_result
665 #define oclength           frame->Xoclength
666 #define occhars            frame->Xocchars
667 #endif
668 
669 #define ctype              frame->Xctype
670 #define fc                 frame->Xfc
671 #define fi                 frame->Xfi
672 #define length             frame->Xlength
673 #define max                frame->Xmax
674 #define min                frame->Xmin
675 #define number             frame->Xnumber
676 #define offset             frame->Xoffset
677 #define op                 frame->Xop
678 #define save_capture_last  frame->Xsave_capture_last
679 #define save_offset1       frame->Xsave_offset1
680 #define save_offset2       frame->Xsave_offset2
681 #define save_offset3       frame->Xsave_offset3
682 #define stacksave          frame->Xstacksave
683 #if defined(ERLANG_INTEGRATION)
684 #define lgb                frame->Xlgb
685 #define rgb                frame->Xrgb
686 #endif
687 
688 #define newptrb            frame->Xnewptrb
689 
690 /* When recursion is being used, local variables are allocated on the stack and
691 get preserved during recursion in the normal way. In this environment, fi and
692 i, and fc and c, can be the same variables. */
693 
694 #else         /* NO_RECURSE not defined */
695 #define COST(N)
696 #define COST_CHK(N)
697 #define fi i
698 #define fc c
699 
700 /* Many of the following variables are used only in small blocks of the code.
701 My normal style of coding would have declared them within each of those blocks.
702 However, in order to accommodate the version of this code that uses an external
703 "stack" implemented on the heap, it is easier to declare them all here, so the
704 declarations can be cut out in a block. The only declarations within blocks
705 below are for variables that do not have to be preserved over a recursive call
706 to RMATCH(). */
707 
708 #ifdef SUPPORT_UTF
709 const pcre_uchar *charptr;
710 #endif
711 const pcre_uchar *callpat;
712 const pcre_uchar *data;
713 const pcre_uchar *next;
714 PCRE_PUCHAR       pp;
715 const pcre_uchar *prev;
716 PCRE_PUCHAR       saved_eptr;
717 
718 recursion_info new_recursive;
719 
720 BOOL cur_is_word;
721 BOOL condition;
722 BOOL prev_is_word;
723 
724 #ifdef SUPPORT_UCP
725 int prop_type;
726 unsigned int prop_value;
727 int prop_fail_result;
728 int oclength;
729 pcre_uchar occhars[6];
730 #endif
731 
732 int codelink;
733 int ctype;
734 int length;
735 int max;
736 int min;
737 unsigned int number;
738 int offset;
739 unsigned int op;
740 pcre_int32 save_capture_last;
741 int save_offset1, save_offset2, save_offset3;
742 int stacksave[REC_STACK_SAVE_MAX];
743 
744 eptrblock newptrb;
745 
746 /* There is a special fudge for calling match() in a way that causes it to
747 measure the size of its basic stack frame when the stack is being used for
748 recursion. The second argument (ecode) being NULL triggers this behaviour. It
749 cannot normally ever be NULL. The return is the negated value of the frame
750 size. */
751 
752 if (ecode == NULL)
753   {
754   if (rdepth == 0)
755     return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
756   else
757     {
758     int len = (int)((char *)&rdepth - (char *)eptr);
759     return (len > 0)? -len : len;
760     }
761   }
762 #endif     /* NO_RECURSE */
763 
764 /* To save space on the stack and in the heap frame, I have doubled up on some
765 of the local variables that are used only in localised parts of the code, but
766 still need to be preserved over recursive calls of match(). These macros define
767 the alternative names that are used. */
768 
769 #define allow_zero    cur_is_word
770 #define cbegroup      condition
771 #define code_offset   codelink
772 #define condassert    condition
773 #define matched_once  prev_is_word
774 #define foc           number
775 #define save_mark     data
776 
777 /* These statements are here to stop the compiler complaining about unitialized
778 variables. */
779 
780 #ifdef SUPPORT_UCP
781 prop_value = 0;
782 prop_fail_result = 0;
783 #endif
784 
785 
786 /* This label is used for tail recursion, which is used in a few cases even
787 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
788 used. Thanks to Ian Taylor for noticing this possibility and sending the
789 original patch. */
790 
791 TAIL_RECURSE:
792 
793 /* OK, now we can get on with the real code of the function. Recursive calls
794 are specified by the macro RMATCH and RRETURN is used to return. When
795 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
796 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
797 defined). However, RMATCH isn't like a function call because it's quite a
798 complicated macro. It has to be used in one particular way. This shouldn't,
799 however, impact performance when true recursion is being used. */
800 
801 #ifdef SUPPORT_UTF
802 utf = md->utf;       /* Local copy of the flag */
803 #else
804 utf = FALSE;
805 #endif
806 
807 /* First check that we haven't called match() too many times, or that we
808 haven't exceeded the recursive call limit. */
809 
810 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
811 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
812 
813 /* At the start of a group with an unlimited repeat that may match an empty
814 string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
815 done this way to save having to use another function argument, which would take
816 up space on the stack. See also MATCH_CONDASSERT below.
817 
818 When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
819 such remembered pointers, to be checked when we hit the closing ket, in order
820 to break infinite loops that match no characters. When match() is called in
821 other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
822 NOT be used with tail recursion, because the memory block that is used is on
823 the stack, so a new one may be required for each match(). */
824 
825 if (md->match_function_type == MATCH_CBEGROUP)
826   {
827   newptrb.epb_saved_eptr = eptr;
828   newptrb.epb_prev = eptrb;
829   eptrb = &newptrb;
830   md->match_function_type = 0;
831   }
832 
833 /* Now start processing the opcodes. */
834 
835 for (;;)
836   {
837   COST_CHK(1);
838   minimize = possessive = FALSE;
839   op = *ecode;
840   EDEBUGF(("Op = %d",op));
841 
842   switch(op)
843     {
844     case OP_MARK:
845     md->nomatch_mark = ecode + 2;
846     md->mark = NULL;    /* In case previously set by assertion */
847     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
848       eptrb, RM55);
849     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
850          md->mark == NULL) md->mark = ecode + 2;
851 
852     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
853     argument, and we must check whether that argument matches this MARK's
854     argument. It is passed back in md->start_match_ptr (an overloading of that
855     variable). If it does match, we reset that variable to the current subject
856     position and return MATCH_SKIP. Otherwise, pass back the return code
857     unaltered. */
858 
859     else if (rrc == MATCH_SKIP_ARG &&
860         STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0)
861       {
862       md->start_match_ptr = eptr;
863       RRETURN(MATCH_SKIP);
864       }
865     RRETURN(rrc);
866 
867     case OP_FAIL:
868     RRETURN(MATCH_NOMATCH);
869 
870     case OP_COMMIT:
871     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
872       eptrb, RM52);
873     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
874     RRETURN(MATCH_COMMIT);
875 
876     case OP_PRUNE:
877     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
878       eptrb, RM51);
879     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
880     RRETURN(MATCH_PRUNE);
881 
882     case OP_PRUNE_ARG:
883     md->nomatch_mark = ecode + 2;
884     md->mark = NULL;    /* In case previously set by assertion */
885     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
886       eptrb, RM56);
887     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
888          md->mark == NULL) md->mark = ecode + 2;
889     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
890     RRETURN(MATCH_PRUNE);
891 
892     case OP_SKIP:
893     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
894       eptrb, RM53);
895     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
896     md->start_match_ptr = eptr;   /* Pass back current position */
897     RRETURN(MATCH_SKIP);
898 
899     /* Note that, for Perl compatibility, SKIP with an argument does NOT set
900     nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
901     not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
902     that failed and any that precede it (either they also failed, or were not
903     triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
904     SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg
905     set to the count of the one that failed. */
906 
907     case OP_SKIP_ARG:
908     md->skip_arg_count++;
909     if (md->skip_arg_count <= md->ignore_skip_arg)
910       {
911       ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
912       break;
913       }
914     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
915       eptrb, RM57);
916     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
917 
918     /* Pass back the current skip name by overloading md->start_match_ptr and
919     returning the special MATCH_SKIP_ARG return code. This will either be
920     caught by a matching MARK, or get to the top, where it causes a rematch
921     with md->ignore_skip_arg set to the value of md->skip_arg_count. */
922 
923     md->start_match_ptr = ecode + 2;
924     RRETURN(MATCH_SKIP_ARG);
925 
926     /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
927     the branch in which it occurs can be determined. Overload the start of
928     match pointer to do this. */
929 
930     case OP_THEN:
931     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
932       eptrb, RM54);
933     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
934     md->start_match_ptr = ecode;
935     RRETURN(MATCH_THEN);
936 
937     case OP_THEN_ARG:
938     md->nomatch_mark = ecode + 2;
939     md->mark = NULL;    /* In case previously set by assertion */
940     RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
941       md, eptrb, RM58);
942     if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
943          md->mark == NULL) md->mark = ecode + 2;
944     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
945     md->start_match_ptr = ecode;
946     RRETURN(MATCH_THEN);
947 
948     /* Handle an atomic group that does not contain any capturing parentheses.
949     This can be handled like an assertion. Prior to 8.13, all atomic groups
950     were handled this way. In 8.13, the code was changed as below for ONCE, so
951     that backups pass through the group and thereby reset captured values.
952     However, this uses a lot more stack, so in 8.20, atomic groups that do not
953     contain any captures generate OP_ONCE_NC, which can be handled in the old,
954     less stack intensive way.
955 
956     Check the alternative branches in turn - the matching won't pass the KET
957     for this kind of subpattern. If any one branch matches, we carry on as at
958     the end of a normal bracket, leaving the subject pointer, but resetting
959     the start-of-match value in case it was changed by \K. */
960 
961     case OP_ONCE_NC:
962     prev = ecode;
963     saved_eptr = eptr;
964     save_mark = md->mark;
965     do /* LOOP_COUNT: Ok */
966       {
967       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
968       if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
969         {
970         mstart = md->start_match_ptr;
971         break;
972         }
973       if (rrc == MATCH_THEN)
974         {
975         next = ecode + GET(ecode,1);
976         if (md->start_match_ptr < next &&
977             (*ecode == OP_ALT || *next == OP_ALT))
978           rrc = MATCH_NOMATCH;
979         }
980 
981       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
982       ecode += GET(ecode,1);
983       md->mark = save_mark;
984       }
985     while (*ecode == OP_ALT);
986 
987     /* If hit the end of the group (which could be repeated), fail */
988 
989     if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
990 
991     /* Continue as from after the group, updating the offsets high water
992     mark, since extracts may have been taken. */
993 
994     do ecode += GET(ecode, 1); while (*ecode == OP_ALT); /* LOOP_COUNT: Ok */
995 
996     offset_top = md->end_offset_top;
997     eptr = md->end_match_ptr;
998 
999     /* For a non-repeating ket, just continue at this level. This also
1000     happens for a repeating ket if no characters were matched in the group.
1001     This is the forcible breaking of infinite loops as implemented in Perl
1002     5.005. */
1003 
1004     if (*ecode == OP_KET || eptr == saved_eptr)
1005       {
1006       ecode += 1+LINK_SIZE;
1007       break;
1008       }
1009 
1010     /* The repeating kets try the rest of the pattern or restart from the
1011     preceding bracket, in the appropriate order. The second "call" of match()
1012     uses tail recursion, to avoid using another stack frame. */
1013 
1014     if (*ecode == OP_KETRMIN)
1015       {
1016       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
1017       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1018       ecode = prev;
1019       goto TAIL_RECURSE;
1020       }
1021     else  /* OP_KETRMAX */
1022       {
1023       RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
1024       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1025       ecode += 1 + LINK_SIZE;
1026       goto TAIL_RECURSE;
1027       }
1028     /* Control never gets here */
1029 
1030     /* Handle a capturing bracket, other than those that are possessive with an
1031     unlimited repeat. If there is space in the offset vector, save the current
1032     subject position in the working slot at the top of the vector. We mustn't
1033     change the current values of the data slot, because they may be set from a
1034     previous iteration of this group, and be referred to by a reference inside
1035     the group. A failure to match might occur after the group has succeeded,
1036     if something later on doesn't match. For this reason, we need to restore
1037     the working value and also the values of the final offsets, in case they
1038     were set by a previous iteration of the same bracket.
1039 
1040     If there isn't enough space in the offset vector, treat this as if it were
1041     a non-capturing bracket. Don't worry about setting the flag for the error
1042     case here; that is handled in the code for KET. */
1043 
1044     case OP_CBRA:
1045     case OP_SCBRA:
1046     number = GET2(ecode, 1+LINK_SIZE);
1047     offset = number << 1;
1048 
1049 #ifdef PCRE_DEBUG
1050     printf("start bracket %d\n", number);
1051     printf("subject=");
1052     pchars(eptr, 16, TRUE, md);
1053     printf("\n");
1054 #endif
1055 
1056     if (offset < md->offset_max)
1057       {
1058       save_offset1 = md->offset_vector[offset];
1059       save_offset2 = md->offset_vector[offset+1];
1060       save_offset3 = md->offset_vector[md->offset_end - number];
1061       save_capture_last = md->capture_last;
1062       save_mark = md->mark;
1063 
1064       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1065       md->offset_vector[md->offset_end - number] =
1066         (int)(eptr - md->start_subject);
1067 
1068       for (;;) /* LOOP_COUNT: Ok */
1069         {
1070         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1071         RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1072           eptrb, RM1);
1073         if (rrc == MATCH_ONCE) break;  /* Backing up through an atomic group */
1074 
1075         /* If we backed up to a THEN, check whether it is within the current
1076         branch by comparing the address of the THEN that is passed back with
1077         the end of the branch. If it is within the current branch, and the
1078         branch is one of two or more alternatives (it either starts or ends
1079         with OP_ALT), we have reached the limit of THEN's action, so convert
1080         the return code to NOMATCH, which will cause normal backtracking to
1081         happen from now on. Otherwise, THEN is passed back to an outer
1082         alternative. This implements Perl's treatment of parenthesized groups,
1083         where a group not containing | does not affect the current alternative,
1084         that is, (X) is NOT the same as (X|(*F)). */
1085 
1086         if (rrc == MATCH_THEN)
1087           {
1088           next = ecode + GET(ecode,1);
1089           if (md->start_match_ptr < next &&
1090               (*ecode == OP_ALT || *next == OP_ALT))
1091             rrc = MATCH_NOMATCH;
1092           }
1093 
1094         /* Anything other than NOMATCH is passed back. */
1095 
1096         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1097         md->capture_last = save_capture_last;
1098         ecode += GET(ecode, 1);
1099         md->mark = save_mark;
1100         if (*ecode != OP_ALT) break;
1101         }
1102 
1103       DPRINTF(("bracket %d failed\n", number));
1104       md->offset_vector[offset] = save_offset1;
1105       md->offset_vector[offset+1] = save_offset2;
1106       md->offset_vector[md->offset_end - number] = save_offset3;
1107 
1108       /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
1109 
1110       RRETURN(rrc);
1111       }
1112 
1113     /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1114     as a non-capturing bracket. */
1115 
1116     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1117     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1118 
1119     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1120 
1121     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1122     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1123 
1124     /* Non-capturing or atomic group, except for possessive with unlimited
1125     repeat and ONCE group with no captures. Loop for all the alternatives.
1126 
1127     When we get to the final alternative within the brackets, we used to return
1128     the result of a recursive call to match() whatever happened so it was
1129     possible to reduce stack usage by turning this into a tail recursion,
1130     except in the case of a possibly empty group. However, now that there is
1131     the possibility of (*THEN) occurring in the final alternative, this
1132     optimization is no longer always possible.
1133 
1134     We can optimize if we know there are no (*THEN)s in the pattern; at present
1135     this is the best that can be done.
1136 
1137     MATCH_ONCE is returned when the end of an atomic group is successfully
1138     reached, but subsequent matching fails. It passes back up the tree (causing
1139     captured values to be reset) until the original atomic group level is
1140     reached. This is tested by comparing md->once_target with the start of the
1141     group. At this point, the return is converted into MATCH_NOMATCH so that
1142     previous backup points can be taken. */
1143 
1144     case OP_ONCE:
1145     case OP_BRA:
1146     case OP_SBRA:
1147     DPRINTF(("start non-capturing bracket\n"));
1148 
1149     for (;;) /* LOOP_COUNT: Ok */
1150       {
1151       if (op >= OP_SBRA || op == OP_ONCE)
1152         md->match_function_type = MATCH_CBEGROUP;
1153 
1154       /* If this is not a possibly empty group, and there are no (*THEN)s in
1155       the pattern, and this is the final alternative, optimize as described
1156       above. */
1157 
1158       else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
1159         {
1160         ecode += PRIV(OP_lengths)[*ecode];
1161         goto TAIL_RECURSE;
1162         }
1163 
1164       /* In all other cases, we have to make another call to match(). */
1165 
1166       save_mark = md->mark;
1167       save_capture_last = md->capture_last;
1168       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1169         RM2);
1170 
1171       /* See comment in the code for capturing groups above about handling
1172       THEN. */
1173 
1174       if (rrc == MATCH_THEN)
1175         {
1176         next = ecode + GET(ecode,1);
1177         if (md->start_match_ptr < next &&
1178             (*ecode == OP_ALT || *next == OP_ALT))
1179           rrc = MATCH_NOMATCH;
1180         }
1181 
1182       if (rrc != MATCH_NOMATCH)
1183         {
1184         if (rrc == MATCH_ONCE)
1185           {
1186           const pcre_uchar *scode = ecode;
1187           if (*scode != OP_ONCE)           /* If not at start, find it */
1188             {
1189             while (*scode == OP_ALT) scode += GET(scode, 1); /* LOOP_COUNT: Ok */
1190             scode -= GET(scode, 1);
1191             }
1192           if (md->once_target == scode) rrc = MATCH_NOMATCH;
1193           }
1194         RRETURN(rrc);
1195         }
1196       ecode += GET(ecode, 1);
1197       md->mark = save_mark;
1198       if (*ecode != OP_ALT) break;
1199       md->capture_last = save_capture_last;
1200       }
1201 
1202     RRETURN(MATCH_NOMATCH);
1203 
1204     /* Handle possessive capturing brackets with an unlimited repeat. We come
1205     here from BRAZERO with allow_zero set TRUE. The offset_vector values are
1206     handled similarly to the normal case above. However, the matching is
1207     different. The end of these brackets will always be OP_KETRPOS, which
1208     returns MATCH_KETRPOS without going further in the pattern. By this means
1209     we can handle the group by iteration rather than recursion, thereby
1210     reducing the amount of stack needed. */
1211 
1212     case OP_CBRAPOS:
1213     case OP_SCBRAPOS:
1214     allow_zero = FALSE;
1215 
1216     POSSESSIVE_CAPTURE:
1217     number = GET2(ecode, 1+LINK_SIZE);
1218     offset = number << 1;
1219 
1220 #ifdef PCRE_DEBUG
1221     printf("start possessive bracket %d\n", number);
1222     printf("subject=");
1223     pchars(eptr, 16, TRUE, md);
1224     printf("\n");
1225 #endif
1226 
1227     if (offset >= md->offset_max) goto POSSESSIVE_NON_CAPTURE;
1228 
1229     matched_once = FALSE;
1230     code_offset = (int)(ecode - md->start_code);
1231 
1232     save_offset1 = md->offset_vector[offset];
1233     save_offset2 = md->offset_vector[offset+1];
1234     save_offset3 = md->offset_vector[md->offset_end - number];
1235     save_capture_last = md->capture_last;
1236 
1237     DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1238 
1239     /* Each time round the loop, save the current subject position for use
1240     when the group matches. For MATCH_MATCH, the group has matched, so we
1241     restart it with a new subject starting position, remembering that we had
1242     at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
1243     usual. If we haven't matched any alternatives in any iteration, check to
1244     see if a previous iteration matched. If so, the group has matched;
1245     continue from afterwards. Otherwise it has failed; restore the previous
1246     capture values before returning NOMATCH. */
1247 
1248     for (;;) /* LOOP_COUNT: Ok */
1249       {
1250       md->offset_vector[md->offset_end - number] =
1251         (int)(eptr - md->start_subject);
1252       if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1253       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1254         eptrb, RM63);
1255       if (rrc == MATCH_KETRPOS)
1256         {
1257         offset_top = md->end_offset_top;
1258         ecode = md->start_code + code_offset;
1259         save_capture_last = md->capture_last;
1260         matched_once = TRUE;
1261         mstart = md->start_match_ptr;    /* In case \K changed it */
1262         if (eptr == md->end_match_ptr)   /* Matched an empty string */
1263           {
1264           do ecode += GET(ecode, 1); while (*ecode == OP_ALT); /* LOOP_COUNT: Ok */
1265           break;
1266           }
1267         eptr = md->end_match_ptr;
1268         continue;
1269         }
1270 
1271       /* See comment in the code for capturing groups above about handling
1272       THEN. */
1273 
1274       if (rrc == MATCH_THEN)
1275         {
1276         next = ecode + GET(ecode,1);
1277         if (md->start_match_ptr < next &&
1278             (*ecode == OP_ALT || *next == OP_ALT))
1279           rrc = MATCH_NOMATCH;
1280         }
1281 
1282       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1283       md->capture_last = save_capture_last;
1284       ecode += GET(ecode, 1);
1285       if (*ecode != OP_ALT) break;
1286       }
1287 
1288     if (!matched_once)
1289       {
1290       md->offset_vector[offset] = save_offset1;
1291       md->offset_vector[offset+1] = save_offset2;
1292       md->offset_vector[md->offset_end - number] = save_offset3;
1293       }
1294 
1295     if (allow_zero || matched_once)
1296       {
1297       ecode += 1 + LINK_SIZE;
1298       break;
1299       }
1300 
1301     RRETURN(MATCH_NOMATCH);
1302 
1303     /* Non-capturing possessive bracket with unlimited repeat. We come here
1304     from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1305     without the capturing complication. It is written out separately for speed
1306     and cleanliness. */
1307 
1308     case OP_BRAPOS:
1309     case OP_SBRAPOS:
1310     allow_zero = FALSE;
1311 
1312     POSSESSIVE_NON_CAPTURE:
1313     matched_once = FALSE;
1314     code_offset = (int)(ecode - md->start_code);
1315     save_capture_last = md->capture_last;
1316 
1317     for (;;) /* LOOP_COUNT: Ok */
1318       {
1319       if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1320       RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1321         eptrb, RM48);
1322       if (rrc == MATCH_KETRPOS)
1323         {
1324         offset_top = md->end_offset_top;
1325         ecode = md->start_code + code_offset;
1326         matched_once = TRUE;
1327         mstart = md->start_match_ptr;   /* In case \K reset it */
1328         if (eptr == md->end_match_ptr)  /* Matched an empty string */
1329           {
1330           do ecode += GET(ecode, 1); while (*ecode == OP_ALT); /* LOOP_COUNT: Ok */
1331           break;
1332           }
1333         eptr = md->end_match_ptr;
1334         continue;
1335         }
1336 
1337       /* See comment in the code for capturing groups above about handling
1338       THEN. */
1339 
1340       if (rrc == MATCH_THEN)
1341         {
1342         next = ecode + GET(ecode,1);
1343         if (md->start_match_ptr < next &&
1344             (*ecode == OP_ALT || *next == OP_ALT))
1345           rrc = MATCH_NOMATCH;
1346         }
1347 
1348       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1349       ecode += GET(ecode, 1);
1350       if (*ecode != OP_ALT) break;
1351       md->capture_last = save_capture_last;
1352       }
1353 
1354     if (matched_once || allow_zero)
1355       {
1356       ecode += 1 + LINK_SIZE;
1357       break;
1358       }
1359     RRETURN(MATCH_NOMATCH);
1360 
1361     /* Control never reaches here. */
1362 
1363     /* Conditional group: compilation checked that there are no more than two
1364     branches. If the condition is false, skipping the first branch takes us
1365     past the end of the item if there is only one branch, but that's exactly
1366     what we want. */
1367 
1368     case OP_COND:
1369     case OP_SCOND:
1370 
1371     /* The variable codelink will be added to ecode when the condition is
1372     false, to get to the second branch. Setting it to the offset to the ALT
1373     or KET, then incrementing ecode achieves this effect. We now have ecode
1374     pointing to the condition or callout. */
1375 
1376     codelink = GET(ecode, 1);   /* Offset to the second branch */
1377     ecode += 1 + LINK_SIZE;     /* From this opcode */
1378 
1379     /* Because of the way auto-callout works during compile, a callout item is
1380     inserted between OP_COND and an assertion condition. */
1381 
1382     if (*ecode == OP_CALLOUT)
1383       {
1384       if (PUBL(callout) != NULL)
1385         {
1386         PUBL(callout_block) cb;
1387         cb.version          = 2;   /* Version 1 of the callout block */
1388         cb.callout_number   = ecode[1];
1389         cb.offset_vector    = md->offset_vector;
1390 #if defined COMPILE_PCRE8
1391         cb.subject          = (PCRE_SPTR)md->start_subject;
1392 #elif defined COMPILE_PCRE16
1393         cb.subject          = (PCRE_SPTR16)md->start_subject;
1394 #elif defined COMPILE_PCRE32
1395         cb.subject          = (PCRE_SPTR32)md->start_subject;
1396 #endif
1397         cb.subject_length   = (int)(md->end_subject - md->start_subject);
1398         cb.start_match      = (int)(mstart - md->start_subject);
1399         cb.current_position = (int)(eptr - md->start_subject);
1400         cb.pattern_position = GET(ecode, 2);
1401         cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1402         cb.capture_top      = offset_top/2;
1403         cb.capture_last     = md->capture_last & CAPLMASK;
1404         /* Internal change requires this for API compatibility. */
1405         if (cb.capture_last == 0) cb.capture_last = -1;
1406         cb.callout_data     = md->callout_data;
1407         cb.mark             = md->nomatch_mark;
1408         if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1409         if (rrc < 0) RRETURN(rrc);
1410         }
1411 
1412       /* Advance ecode past the callout, so it now points to the condition. We
1413       must adjust codelink so that the value of ecode+codelink is unchanged. */
1414 
1415       ecode += PRIV(OP_lengths)[OP_CALLOUT];
1416       codelink -= PRIV(OP_lengths)[OP_CALLOUT];
1417       }
1418 
1419     /* Test the various possible conditions */
1420 
1421     condition = FALSE;
1422     switch(condcode = *ecode)
1423       {
1424       case OP_RREF:         /* Numbered group recursion test */
1425       if (md->recursive != NULL)     /* Not recursing => FALSE */
1426         {
1427         unsigned int recno = GET2(ecode, 1);   /* Recursion group number*/
1428         condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1429         }
1430       break;
1431 
1432       case OP_DNRREF:       /* Duplicate named group recursion test */
1433       if (md->recursive != NULL)
1434         {
1435         int count = GET2(ecode, 1 + IMM2_SIZE);
1436         pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
1437         while (count-- > 0) /* LOOP_COUNT: COST */
1438           {
1439           unsigned int recno = GET2(slot, 0);
1440           condition = recno == md->recursive->group_num;
1441           if (condition) break;
1442           slot += md->name_entry_size;
1443 	  COST(1);
1444           }
1445         }
1446       break;
1447 
1448       case OP_CREF:         /* Numbered group used test */
1449       offset = GET2(ecode, 1) << 1;  /* Doubled ref number */
1450       condition = offset < offset_top && md->offset_vector[offset] >= 0;
1451       break;
1452 
1453       case OP_DNCREF:      /* Duplicate named group used test */
1454         {
1455         int count = GET2(ecode, 1 + IMM2_SIZE);
1456         pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
1457         while (count-- > 0) /* LOOP_COUNT: COST */
1458           {
1459           offset = GET2(slot, 0) << 1;
1460           condition = offset < offset_top && md->offset_vector[offset] >= 0;
1461           if (condition) break;
1462           slot += md->name_entry_size;
1463 	  COST(1);
1464           }
1465         }
1466       break;
1467 
1468       case OP_DEF:     /* DEFINE - always false */
1469       case OP_FAIL:    /* From optimized (?!) condition */
1470       break;
1471 
1472       /* The condition is an assertion. Call match() to evaluate it - setting
1473       md->match_function_type to MATCH_CONDASSERT causes it to stop at the end
1474       of an assertion. */
1475 
1476       default:
1477       md->match_function_type = MATCH_CONDASSERT;
1478       RMATCH(eptr, ecode, offset_top, md, NULL, RM3);
1479       if (rrc == MATCH_MATCH)
1480         {
1481         if (md->end_offset_top > offset_top)
1482           offset_top = md->end_offset_top;  /* Captures may have happened */
1483         condition = TRUE;
1484 
1485         /* Advance ecode past the assertion to the start of the first branch,
1486         but adjust it so that the general choosing code below works. If the
1487         assertion has a quantifier that allows zero repeats we must skip over
1488         the BRAZERO. This is a lunatic thing to do, but somebody did! */
1489 
1490         if (*ecode == OP_BRAZERO) ecode++;
1491         ecode += GET(ecode, 1);
1492         while (*ecode == OP_ALT) ecode += GET(ecode, 1); /* LOOP_COUNT: Ok */
1493         ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
1494         }
1495 
1496       /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1497       assertion; it is therefore treated as NOMATCH. Any other return is an
1498       error. */
1499 
1500       else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1501         {
1502         RRETURN(rrc);         /* Need braces because of following else */
1503         }
1504       break;
1505       }
1506 
1507     /* Choose branch according to the condition */
1508 
1509     ecode += condition? PRIV(OP_lengths)[condcode] : codelink;
1510 
1511     /* We are now at the branch that is to be obeyed. As there is only one, we
1512     can use tail recursion to avoid using another stack frame, except when
1513     there is unlimited repeat of a possibly empty group. In the latter case, a
1514     recursive call to match() is always required, unless the second alternative
1515     doesn't exist, in which case we can just plough on. Note that, for
1516     compatibility with Perl, the | in a conditional group is NOT treated as
1517     creating two alternatives. If a THEN is encountered in the branch, it
1518     propagates out to the enclosing alternative (unless nested in a deeper set
1519     of alternatives, of course). */
1520 
1521     if (condition || ecode[-(1+LINK_SIZE)] == OP_ALT)
1522       {
1523       if (op != OP_SCOND)
1524         {
1525         goto TAIL_RECURSE;
1526         }
1527 
1528       md->match_function_type = MATCH_CBEGROUP;
1529       RMATCH(eptr, ecode, offset_top, md, eptrb, RM49);
1530       RRETURN(rrc);
1531       }
1532 
1533      /* Condition false & no alternative; continue after the group. */
1534 
1535     else
1536       {
1537       }
1538     break;
1539 
1540 
1541     /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1542     to close any currently open capturing brackets. */
1543 
1544     case OP_CLOSE:
1545     number = GET2(ecode, 1);   /* Must be less than 65536 */
1546     offset = number << 1;
1547 
1548 #ifdef PCRE_DEBUG
1549       printf("end bracket %d at *ACCEPT", number);
1550       printf("\n");
1551 #endif
1552 
1553     md->capture_last = (md->capture_last & OVFLMASK) | number;
1554     if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1555       {
1556       md->offset_vector[offset] =
1557         md->offset_vector[md->offset_end - number];
1558       md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1559 
1560       /* If this group is at or above the current highwater mark, ensure that
1561       any groups between the current high water mark and this group are marked
1562       unset and then update the high water mark. */
1563 
1564       if (offset >= offset_top)
1565         {
1566         register int *iptr = md->offset_vector + offset_top;
1567         register int *iend = md->offset_vector + offset;
1568         if (iptr < iend)
1569           {
1570           COST(iend - iptr);
1571           while (iptr < iend) *iptr++ = -1; /* LOOP_COUNT: COST */
1572           }
1573         offset_top = offset + 2;
1574         }
1575       }
1576     ecode += 1 + IMM2_SIZE;
1577     break;
1578 
1579 
1580     /* End of the pattern, either real or forced. */
1581 
1582     case OP_END:
1583     case OP_ACCEPT:
1584     case OP_ASSERT_ACCEPT:
1585 
1586     /* If we have matched an empty string, fail if not in an assertion and not
1587     in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1588     is set and we have matched at the start of the subject. In both cases,
1589     backtracking will then try other alternatives, if any. */
1590 
1591     if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1592          md->recursive == NULL &&
1593          (md->notempty ||
1594            (md->notempty_atstart &&
1595              mstart == md->start_subject + md->start_offset)))
1596       RRETURN(MATCH_NOMATCH);
1597 
1598     /* Otherwise, we have a match. */
1599 
1600     md->end_match_ptr = eptr;           /* Record where we ended */
1601     md->end_offset_top = offset_top;    /* and how many extracts were taken */
1602     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1603 
1604     /* For some reason, the macros don't work properly if an expression is
1605     given as the argument to RRETURN when the heap is in use. */
1606 
1607     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1608     RRETURN(rrc);
1609 
1610     /* Assertion brackets. Check the alternative branches in turn - the
1611     matching won't pass the KET for an assertion. If any one branch matches,
1612     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1613     start of each branch to move the current point backwards, so the code at
1614     this level is identical to the lookahead case. When the assertion is part
1615     of a condition, we want to return immediately afterwards. The caller of
1616     this incarnation of the match() function will have set MATCH_CONDASSERT in
1617     md->match_function type, and one of these opcodes will be the first opcode
1618     that is processed. We use a local variable that is preserved over calls to
1619     match() to remember this case. */
1620 
1621     case OP_ASSERT:
1622     case OP_ASSERTBACK:
1623     save_mark = md->mark;
1624     if (md->match_function_type == MATCH_CONDASSERT)
1625       {
1626       condassert = TRUE;
1627       md->match_function_type = 0;
1628       }
1629     else condassert = FALSE;
1630 
1631     /* Loop for each branch */
1632 
1633     do /* LOOP_COUNT: Ok */
1634       {
1635       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1636 
1637       /* A match means that the assertion is true; break out of the loop
1638       that matches its alternatives. */
1639 
1640       if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1641         {
1642         mstart = md->start_match_ptr;   /* In case \K reset it */
1643         break;
1644         }
1645 
1646       /* If not matched, restore the previous mark setting. */
1647 
1648       md->mark = save_mark;
1649 
1650       /* See comment in the code for capturing groups above about handling
1651       THEN. */
1652 
1653       if (rrc == MATCH_THEN)
1654         {
1655         next = ecode + GET(ecode,1);
1656         if (md->start_match_ptr < next &&
1657             (*ecode == OP_ALT || *next == OP_ALT))
1658           rrc = MATCH_NOMATCH;
1659         }
1660 
1661       /* Anything other than NOMATCH causes the entire assertion to fail,
1662       passing back the return code. This includes COMMIT, SKIP, PRUNE and an
1663       uncaptured THEN, which means they take their normal effect. This
1664       consistent approach does not always have exactly the same effect as in
1665       Perl. */
1666 
1667       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1668       ecode += GET(ecode, 1);
1669       }
1670     while (*ecode == OP_ALT);   /* Continue for next alternative */ /* LOOP_COUNT: Ok */
1671 
1672     /* If we have tried all the alternative branches, the assertion has
1673     failed. If not, we broke out after a match. */
1674 
1675     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1676 
1677     /* If checking an assertion for a condition, return MATCH_MATCH. */
1678 
1679     if (condassert) RRETURN(MATCH_MATCH);
1680 
1681     /* Continue from after a successful assertion, updating the offsets high
1682     water mark, since extracts may have been taken during the assertion. */
1683 
1684     do ecode += GET(ecode,1); while (*ecode == OP_ALT); /* LOOP_COUNT: Ok */
1685     ecode += 1 + LINK_SIZE;
1686     offset_top = md->end_offset_top;
1687     continue;
1688 
1689     /* Negative assertion: all branches must fail to match for the assertion to
1690     succeed. */
1691 
1692     case OP_ASSERT_NOT:
1693     case OP_ASSERTBACK_NOT:
1694     save_mark = md->mark;
1695     if (md->match_function_type == MATCH_CONDASSERT)
1696       {
1697       condassert = TRUE;
1698       md->match_function_type = 0;
1699       }
1700     else condassert = FALSE;
1701 
1702     /* Loop for each alternative branch. */
1703 
1704     do /* LOOP_COUNT: Ok */
1705       {
1706       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1707       md->mark = save_mark;   /* Always restore the mark setting */
1708 
1709       switch(rrc)
1710         {
1711         case MATCH_MATCH:            /* A successful match means */
1712         case MATCH_ACCEPT:           /* the assertion has failed. */
1713         RRETURN(MATCH_NOMATCH);
1714 
1715         case MATCH_NOMATCH:          /* Carry on with next branch */
1716         break;
1717 
1718         /* See comment in the code for capturing groups above about handling
1719         THEN. */
1720 
1721         case MATCH_THEN:
1722         next = ecode + GET(ecode,1);
1723         if (md->start_match_ptr < next &&
1724             (*ecode == OP_ALT || *next == OP_ALT))
1725           {
1726           rrc = MATCH_NOMATCH;
1727           break;
1728           }
1729         /* Otherwise fall through. */
1730 
1731         /* COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole
1732         assertion to fail to match, without considering any more alternatives.
1733         Failing to match means the assertion is true. This is a consistent
1734         approach, but does not always have the same effect as in Perl. */
1735 
1736         case MATCH_COMMIT:
1737         case MATCH_SKIP:
1738         case MATCH_SKIP_ARG:
1739         case MATCH_PRUNE:
1740         do ecode += GET(ecode,1); while (*ecode == OP_ALT); /* LOOP_COUNT: Ok */
1741         goto NEG_ASSERT_TRUE;   /* Break out of alternation loop */
1742 
1743         /* Anything else is an error */
1744 
1745         default:
1746         RRETURN(rrc);
1747         }
1748 
1749       /* Continue with next branch */
1750 
1751       ecode += GET(ecode,1);
1752       }
1753     while (*ecode == OP_ALT);
1754 
1755     /* All branches in the assertion failed to match. */
1756 
1757     NEG_ASSERT_TRUE:
1758     if (condassert) RRETURN(MATCH_MATCH);  /* Condition assertion */
1759     ecode += 1 + LINK_SIZE;                /* Continue with current branch */
1760     continue;
1761 
1762     /* Move the subject pointer back. This occurs only at the start of
1763     each branch of a lookbehind assertion. If we are too close to the start to
1764     move back, this match function fails. When working with UTF-8 we move
1765     back a number of characters, not bytes. */
1766 
1767     case OP_REVERSE:
1768 #ifdef SUPPORT_UTF
1769     if (utf)
1770       {
1771       i = GET(ecode, 1);
1772       COST(i);
1773       while (i-- > 0) /* LOOP_COUNT: COST */
1774         {
1775         eptr--;
1776         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1777         BACKCHAR(eptr);
1778         }
1779       }
1780     else
1781 #endif
1782 
1783     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1784 
1785       {
1786       eptr -= GET(ecode, 1);
1787       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1788       }
1789 
1790     /* Save the earliest consulted character, then skip to next op code */
1791 
1792     if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1793     ecode += 1 + LINK_SIZE;
1794     break;
1795 
1796     /* The callout item calls an external function, if one is provided, passing
1797     details of the match so far. This is mainly for debugging, though the
1798     function is able to force a failure. */
1799 
1800     case OP_CALLOUT:
1801     if (PUBL(callout) != NULL)
1802       {
1803       PUBL(callout_block) cb;
1804       cb.version          = 2;   /* Version 1 of the callout block */
1805       cb.callout_number   = ecode[1];
1806       cb.offset_vector    = md->offset_vector;
1807 #if defined COMPILE_PCRE8
1808       cb.subject          = (PCRE_SPTR)md->start_subject;
1809 #elif defined COMPILE_PCRE16
1810       cb.subject          = (PCRE_SPTR16)md->start_subject;
1811 #elif defined COMPILE_PCRE32
1812       cb.subject          = (PCRE_SPTR32)md->start_subject;
1813 #endif
1814       cb.subject_length   = (int)(md->end_subject - md->start_subject);
1815       cb.start_match      = (int)(mstart - md->start_subject);
1816       cb.current_position = (int)(eptr - md->start_subject);
1817       cb.pattern_position = GET(ecode, 2);
1818       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1819       cb.capture_top      = offset_top/2;
1820       cb.capture_last     = md->capture_last & CAPLMASK;
1821       /* Internal change requires this for API compatibility. */
1822       if (cb.capture_last == 0) cb.capture_last = -1;
1823       cb.callout_data     = md->callout_data;
1824       cb.mark             = md->nomatch_mark;
1825       if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1826       if (rrc < 0) RRETURN(rrc);
1827       }
1828     ecode += 2 + 2*LINK_SIZE;
1829     break;
1830 
1831     /* Recursion either matches the current regex, or some subexpression. The
1832     offset data is the offset to the starting bracket from the start of the
1833     whole pattern. (This is so that it works from duplicated subpatterns.)
1834 
1835     The state of the capturing groups is preserved over recursion, and
1836     re-instated afterwards. We don't know how many are started and not yet
1837     finished (offset_top records the completed total) so we just have to save
1838     all the potential data. There may be up to 65535 such values, which is too
1839     large to put on the stack, but using malloc for small numbers seems
1840     expensive. As a compromise, the stack is used when there are no more than
1841     REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
1842 
1843     There are also other values that have to be saved. We use a chained
1844     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1845     for the original version of this logic. It has, however, been hacked around
1846     a lot, so he is not to blame for the current way it works. */
1847 
1848     case OP_RECURSE:
1849       {
1850       recursion_info *ri;
1851       unsigned int recno; /* LOOP_COUNT: Warning, no CHK until after Marker1 */
1852 
1853       callpat = md->start_code + GET(ecode, 1);
1854       recno = (callpat == md->start_code)? 0 :
1855         GET2(callpat, 1 + LINK_SIZE);
1856 
1857       /* Check for repeating a recursion without advancing the subject pointer.
1858       This should catch convoluted mutual recursions. (Some simple cases are
1859       caught at compile time.) */
1860 
1861       for (ri = md->recursive; ri != NULL; ri = ri->prevrec) /* LOOP_COUNT: COST */
1862 	{
1863         if (recno == ri->group_num && eptr == ri->subject_position)
1864           RRETURN(PCRE_ERROR_RECURSELOOP);
1865 	COST(1);
1866         }
1867 
1868       /* Add to "recursing stack" */
1869 
1870       new_recursive.group_num = recno; /* LOOP_COUNT: Marker1 */
1871       new_recursive.saved_capture_last = md->capture_last;
1872       new_recursive.subject_position = eptr;
1873       new_recursive.prevrec = md->recursive;
1874       md->recursive = &new_recursive;
1875 
1876       /* Where to continue from afterwards */
1877 
1878       ecode += 1 + LINK_SIZE;
1879 
1880       /* Now save the offset data */
1881 
1882       new_recursive.saved_max = md->offset_end;
1883       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1884         new_recursive.offset_save = stacksave;
1885       else
1886         {
1887         new_recursive.offset_save =
1888           (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1889         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1890         }
1891       memcpy(new_recursive.offset_save, md->offset_vector,
1892             new_recursive.saved_max * sizeof(int));
1893 
1894       /* OK, now we can do the recursion. After processing each alternative,
1895       restore the offset data and the last captured value. If there were nested
1896       recursions, md->recursive might be changed, so reset it before looping.
1897       */
1898 
1899       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1900       cbegroup = (*callpat >= OP_SBRA);
1901       do /* LOOP_COUNT: Ok */
1902         {
1903         if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1904         RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1905           md, eptrb, RM6);
1906         memcpy(md->offset_vector, new_recursive.offset_save,
1907             new_recursive.saved_max * sizeof(int));
1908         md->capture_last = new_recursive.saved_capture_last;
1909         md->recursive = new_recursive.prevrec;
1910         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1911           {
1912           DPRINTF(("Recursion matched\n"));
1913           if (new_recursive.offset_save != stacksave)
1914             (PUBL(free))(new_recursive.offset_save);
1915 
1916           /* Set where we got to in the subject, and reset the start in case
1917           it was changed by \K. This *is* propagated back out of a recursion,
1918           for Perl compatibility. */
1919 
1920           eptr = md->end_match_ptr;
1921           mstart = md->start_match_ptr;
1922           goto RECURSION_MATCHED;        /* Exit loop; end processing */
1923           }
1924 
1925         /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
1926         recursion; they cause a NOMATCH for the entire recursion. These codes
1927         are defined in a range that can be tested for. */
1928 
1929         if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
1930           {
1931           if (new_recursive.offset_save != stacksave)
1932             (PUBL(free))(new_recursive.offset_save);
1933           RRETURN(MATCH_NOMATCH);
1934           }
1935 
1936         /* Any return code other than NOMATCH is an error. */
1937 
1938         if (rrc != MATCH_NOMATCH)
1939           {
1940           DPRINTF(("Recursion gave error %d\n", rrc));
1941           if (new_recursive.offset_save != stacksave)
1942             (PUBL(free))(new_recursive.offset_save);
1943           RRETURN(rrc);
1944           }
1945 
1946         md->recursive = &new_recursive;
1947         callpat += GET(callpat, 1);
1948         }
1949       while (*callpat == OP_ALT);
1950 
1951       DPRINTF(("Recursion didn't match\n"));
1952       md->recursive = new_recursive.prevrec;
1953       if (new_recursive.offset_save != stacksave)
1954         (PUBL(free))(new_recursive.offset_save);
1955       RRETURN(MATCH_NOMATCH);
1956       }
1957 
1958     RECURSION_MATCHED:
1959     break;
1960 
1961     /* An alternation is the end of a branch; scan along to find the end of the
1962     bracketed group and go to there. */
1963 
1964     case OP_ALT:
1965     do ecode += GET(ecode,1); while (*ecode == OP_ALT); /* LOOP_COUNT: Ok */
1966     break;
1967 
1968     /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1969     indicating that it may occur zero times. It may repeat infinitely, or not
1970     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1971     with fixed upper repeat limits are compiled as a number of copies, with the
1972     optional ones preceded by BRAZERO or BRAMINZERO. */
1973 
1974     case OP_BRAZERO:
1975     next = ecode + 1;
1976     RMATCH(eptr, next, offset_top, md, eptrb, RM10);
1977     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1978     do next += GET(next, 1); while (*next == OP_ALT); /* LOOP_COUNT: Ok */
1979     ecode = next + 1 + LINK_SIZE;
1980     break;
1981 
1982     case OP_BRAMINZERO:
1983     next = ecode + 1;
1984     do next += GET(next, 1); while (*next == OP_ALT); /* LOOP_COUNT: Ok */
1985     RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
1986     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1987     ecode++;
1988     break;
1989 
1990     case OP_SKIPZERO:
1991     next = ecode+1;
1992     do next += GET(next,1); while (*next == OP_ALT); /* LOOP_COUNT: Ok */
1993     ecode = next + 1 + LINK_SIZE;
1994     break;
1995 
1996     /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
1997     here; just jump to the group, with allow_zero set TRUE. */
1998 
1999     case OP_BRAPOSZERO:
2000     op = *(++ecode);
2001     allow_zero = TRUE;
2002     if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
2003       goto POSSESSIVE_NON_CAPTURE;
2004 
2005     /* End of a group, repeated or non-repeating. */
2006 
2007     case OP_KET:
2008     case OP_KETRMIN:
2009     case OP_KETRMAX:
2010     case OP_KETRPOS:
2011     prev = ecode - GET(ecode, 1);
2012 
2013     /* If this was a group that remembered the subject start, in order to break
2014     infinite repeats of empty string matches, retrieve the subject start from
2015     the chain. Otherwise, set it NULL. */
2016 
2017     if (*prev >= OP_SBRA || *prev == OP_ONCE)
2018       {
2019       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
2020       eptrb = eptrb->epb_prev;              /* Backup to previous group */
2021       }
2022     else saved_eptr = NULL;
2023 
2024     /* If we are at the end of an assertion group or a non-capturing atomic
2025     group, stop matching and return MATCH_MATCH, but record the current high
2026     water mark for use by positive assertions. We also need to record the match
2027     start in case it was changed by \K. */
2028 
2029     if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
2030          *prev == OP_ONCE_NC)
2031       {
2032       md->end_match_ptr = eptr;      /* For ONCE_NC */
2033       md->end_offset_top = offset_top;
2034       md->start_match_ptr = mstart;
2035       RRETURN(MATCH_MATCH);         /* Sets md->mark */
2036       }
2037 
2038     /* For capturing groups we have to check the group number back at the start
2039     and if necessary complete handling an extraction by setting the offsets and
2040     bumping the high water mark. Whole-pattern recursion is coded as a recurse
2041     into group 0, so it won't be picked up here. Instead, we catch it when the
2042     OP_END is reached. Other recursion is handled here. We just have to record
2043     the current subject position and start match pointer and give a MATCH
2044     return. */
2045 
2046     if (*prev == OP_CBRA || *prev == OP_SCBRA ||
2047         *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
2048       {
2049       number = GET2(prev, 1+LINK_SIZE);
2050       offset = number << 1;
2051 
2052 #ifdef PCRE_DEBUG
2053       printf("end bracket %d", number);
2054       printf("\n");
2055 #endif
2056 
2057       /* Handle a recursively called group. */
2058 
2059       if (md->recursive != NULL && md->recursive->group_num == number)
2060         {
2061         md->end_match_ptr = eptr;
2062         md->start_match_ptr = mstart;
2063         RRETURN(MATCH_MATCH);
2064         }
2065 
2066       /* Deal with capturing */
2067 
2068       md->capture_last = (md->capture_last & OVFLMASK) | number;
2069       if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
2070         {
2071         /* If offset is greater than offset_top, it means that we are
2072         "skipping" a capturing group, and that group's offsets must be marked
2073         unset. In earlier versions of PCRE, all the offsets were unset at the
2074         start of matching, but this doesn't work because atomic groups and
2075         assertions can cause a value to be set that should later be unset.
2076         Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
2077         part of the atomic group, but this is not on the final matching path,
2078         so must be unset when 2 is set. (If there is no group 2, there is no
2079         problem, because offset_top will then be 2, indicating no capture.) */
2080 
2081         if (offset > offset_top)
2082           {
2083           register int *iptr = md->offset_vector + offset_top;
2084           register int *iend = md->offset_vector + offset;
2085           if (iptr < iend)
2086             {
2087             COST(iend - iptr);
2088             while (iptr < iend) *iptr++ = -1; /* LOOP_COUNT: COST */
2089             }
2090           }
2091 
2092         /* Now make the extraction */
2093 
2094         md->offset_vector[offset] =
2095           md->offset_vector[md->offset_end - number];
2096         md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
2097         if (offset_top <= offset) offset_top = offset + 2;
2098         }
2099       }
2100 
2101     /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
2102     and return the MATCH_KETRPOS. This makes it possible to do the repeats one
2103     at a time from the outer level, thus saving stack. This must precede the
2104     empty string test - in this case that test is done at the outer level. */
2105 
2106     if (*ecode == OP_KETRPOS)
2107       {
2108       md->start_match_ptr = mstart;    /* In case \K reset it */
2109       md->end_match_ptr = eptr;
2110       md->end_offset_top = offset_top;
2111       RRETURN(MATCH_KETRPOS);
2112       }
2113 
2114     /* For an ordinary non-repeating ket, just continue at this level. This
2115     also happens for a repeating ket if no characters were matched in the
2116     group. This is the forcible breaking of infinite loops as implemented in
2117     Perl 5.005. For a non-repeating atomic group that includes captures,
2118     establish a backup point by processing the rest of the pattern at a lower
2119     level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
2120     original OP_ONCE level, thereby bypassing intermediate backup points, but
2121     resetting any captures that happened along the way. */
2122 
2123     if (*ecode == OP_KET || eptr == saved_eptr)
2124       {
2125       if (*prev == OP_ONCE)
2126         {
2127         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
2128         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2129         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
2130         RRETURN(MATCH_ONCE);
2131         }
2132       ecode += 1 + LINK_SIZE;    /* Carry on at this level */
2133       break;
2134       }
2135 
2136     /* The normal repeating kets try the rest of the pattern or restart from
2137     the preceding bracket, in the appropriate order. In the second case, we can
2138     use tail recursion to avoid using another stack frame, unless we have an
2139     an atomic group or an unlimited repeat of a group that can match an empty
2140     string. */
2141 
2142     if (*ecode == OP_KETRMIN)
2143       {
2144       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
2145       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2146       if (*prev == OP_ONCE)
2147         {
2148         RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
2149         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2150         md->once_target = prev;  /* Level at which to change to MATCH_NOMATCH */
2151         RRETURN(MATCH_ONCE);
2152         }
2153       if (*prev >= OP_SBRA)    /* Could match an empty string */
2154         {
2155         RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
2156         RRETURN(rrc);
2157         }
2158       ecode = prev;
2159       goto TAIL_RECURSE;
2160       }
2161     else  /* OP_KETRMAX */
2162       {
2163       RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
2164       if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
2165       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2166       if (*prev == OP_ONCE)
2167         {
2168         RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
2169         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2170         md->once_target = prev;
2171         RRETURN(MATCH_ONCE);
2172         }
2173       ecode += 1 + LINK_SIZE;
2174       goto TAIL_RECURSE;
2175       }
2176     /* Control never gets here */
2177 
2178     /* Not multiline mode: start of subject assertion, unless notbol. */
2179 
2180     case OP_CIRC:
2181     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2182 
2183     /* Start of subject assertion */
2184 
2185     case OP_SOD:
2186     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
2187     ecode++;
2188     break;
2189 
2190     /* Multiline mode: start of subject unless notbol, or after any newline. */
2191 
2192     case OP_CIRCM:
2193     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2194     if (eptr != md->start_subject &&
2195         (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
2196       RRETURN(MATCH_NOMATCH);
2197     ecode++;
2198     break;
2199 
2200     /* Start of match assertion */
2201 
2202     case OP_SOM:
2203     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
2204     ecode++;
2205     break;
2206 
2207     /* Reset the start of match point */
2208 
2209     case OP_SET_SOM:
2210     mstart = eptr;
2211     ecode++;
2212     break;
2213 
2214     /* Multiline mode: assert before any newline, or before end of subject
2215     unless noteol is set. */
2216 
2217     case OP_DOLLM:
2218     if (eptr < md->end_subject)
2219       {
2220       if (!IS_NEWLINE(eptr))
2221         {
2222         if (md->partial != 0 &&
2223             eptr + 1 >= md->end_subject &&
2224             NLBLOCK->nltype == NLTYPE_FIXED &&
2225             NLBLOCK->nllen == 2 &&
2226             UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2227           {
2228           md->hitend = TRUE;
2229           if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2230           }
2231         RRETURN(MATCH_NOMATCH);
2232         }
2233       }
2234     else
2235       {
2236       if (md->noteol) RRETURN(MATCH_NOMATCH);
2237       SCHECK_PARTIAL();
2238       }
2239     ecode++;
2240     break;
2241 
2242     /* Not multiline mode: assert before a terminating newline or before end of
2243     subject unless noteol is set. */
2244 
2245     case OP_DOLL:
2246     if (md->noteol) RRETURN(MATCH_NOMATCH);
2247     if (!md->endonly) goto ASSERT_NL_OR_EOS;
2248 
2249     /* ... else fall through for endonly */
2250 
2251     /* End of subject assertion (\z) */
2252 
2253     case OP_EOD:
2254     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
2255     SCHECK_PARTIAL();
2256     ecode++;
2257     break;
2258 
2259     /* End of subject or ending \n assertion (\Z) */
2260 
2261     case OP_EODN:
2262     ASSERT_NL_OR_EOS:
2263     if (eptr < md->end_subject &&
2264         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2265       {
2266       if (md->partial != 0 &&
2267           eptr + 1 >= md->end_subject &&
2268           NLBLOCK->nltype == NLTYPE_FIXED &&
2269           NLBLOCK->nllen == 2 &&
2270           UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2271         {
2272         md->hitend = TRUE;
2273         if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2274         }
2275       RRETURN(MATCH_NOMATCH);
2276       }
2277 
2278     /* Either at end of string or \n before end. */
2279 
2280     SCHECK_PARTIAL();
2281     ecode++;
2282     break;
2283 
2284     /* Word boundary assertions */
2285 
2286     case OP_NOT_WORD_BOUNDARY:
2287     case OP_WORD_BOUNDARY:
2288       {
2289 
2290       /* Find out if the previous and current characters are "word" characters.
2291       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
2292       be "non-word" characters. Remember the earliest consulted character for
2293       partial matching. */
2294 
2295 #ifdef SUPPORT_UTF
2296       if (utf)
2297         {
2298         /* Get status of previous character */
2299 
2300         if (eptr == md->start_subject) prev_is_word = FALSE; else
2301           {
2302           PCRE_PUCHAR lastptr = eptr - 1;
2303           BACKCHAR(lastptr);
2304           if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2305           GETCHAR(c, lastptr);
2306 #ifdef SUPPORT_UCP
2307           if (md->use_ucp)
2308             {
2309             if (c == '_') prev_is_word = TRUE; else
2310               {
2311               int cat = UCD_CATEGORY(c);
2312               prev_is_word = (cat == ucp_L || cat == ucp_N);
2313               }
2314             }
2315           else
2316 #endif
2317           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2318           }
2319 
2320         /* Get status of next character */
2321 
2322         if (eptr >= md->end_subject)
2323           {
2324           SCHECK_PARTIAL();
2325           cur_is_word = FALSE;
2326           }
2327         else
2328           {
2329           GETCHAR(c, eptr);
2330 #ifdef SUPPORT_UCP
2331           if (md->use_ucp)
2332             {
2333             if (c == '_') cur_is_word = TRUE; else
2334               {
2335               int cat = UCD_CATEGORY(c);
2336               cur_is_word = (cat == ucp_L || cat == ucp_N);
2337               }
2338             }
2339           else
2340 #endif
2341           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2342           }
2343         }
2344       else
2345 #endif
2346 
2347       /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
2348       consistency with the behaviour of \w we do use it in this case. */
2349 
2350         {
2351         /* Get status of previous character */
2352 
2353         if (eptr == md->start_subject) prev_is_word = FALSE; else
2354           {
2355           if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
2356 #ifdef SUPPORT_UCP
2357           if (md->use_ucp)
2358             {
2359             c = eptr[-1];
2360             if (c == '_') prev_is_word = TRUE; else
2361               {
2362               int cat = UCD_CATEGORY(c);
2363               prev_is_word = (cat == ucp_L || cat == ucp_N);
2364               }
2365             }
2366           else
2367 #endif
2368           prev_is_word = MAX_255(eptr[-1])
2369             && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2370           }
2371 
2372         /* Get status of next character */
2373 
2374         if (eptr >= md->end_subject)
2375           {
2376           SCHECK_PARTIAL();
2377           cur_is_word = FALSE;
2378           }
2379         else
2380 #ifdef SUPPORT_UCP
2381         if (md->use_ucp)
2382           {
2383           c = *eptr;
2384           if (c == '_') cur_is_word = TRUE; else
2385             {
2386             int cat = UCD_CATEGORY(c);
2387             cur_is_word = (cat == ucp_L || cat == ucp_N);
2388             }
2389           }
2390         else
2391 #endif
2392         cur_is_word = MAX_255(*eptr)
2393           && ((md->ctypes[*eptr] & ctype_word) != 0);
2394         }
2395 
2396       /* Now see if the situation is what we want */
2397 
2398       if ((*ecode++ == OP_WORD_BOUNDARY)?
2399            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2400         RRETURN(MATCH_NOMATCH);
2401       }
2402     break;
2403 
2404     /* Match any single character type except newline; have to take care with
2405     CRLF newlines and partial matching. */
2406 
2407     case OP_ANY:
2408     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2409     if (md->partial != 0 &&
2410         eptr == md->end_subject - 1 &&
2411         NLBLOCK->nltype == NLTYPE_FIXED &&
2412         NLBLOCK->nllen == 2 &&
2413         UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2414       {
2415       md->hitend = TRUE;
2416       if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2417       }
2418 
2419     /* Fall through */
2420 
2421     /* Match any single character whatsoever. */
2422 
2423     case OP_ALLANY:
2424     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2425       {                            /* not be updated before SCHECK_PARTIAL. */
2426       SCHECK_PARTIAL();
2427       RRETURN(MATCH_NOMATCH);
2428       }
2429     eptr++;
2430 #ifdef SUPPORT_UTF
2431     if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2432 #endif
2433     ecode++;
2434     break;
2435 
2436     /* Match a single byte, even in UTF-8 mode. This opcode really does match
2437     any byte, even newline, independent of the setting of PCRE_DOTALL. */
2438 
2439     case OP_ANYBYTE:
2440     if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2441       {                            /* not be updated before SCHECK_PARTIAL. */
2442       SCHECK_PARTIAL();
2443       RRETURN(MATCH_NOMATCH);
2444       }
2445     eptr++;
2446     ecode++;
2447     break;
2448 
2449     case OP_NOT_DIGIT:
2450     if (eptr >= md->end_subject)
2451       {
2452       SCHECK_PARTIAL();
2453       RRETURN(MATCH_NOMATCH);
2454       }
2455     GETCHARINCTEST(c, eptr);
2456     if (
2457 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2458        c < 256 &&
2459 #endif
2460        (md->ctypes[c] & ctype_digit) != 0
2461        )
2462       RRETURN(MATCH_NOMATCH);
2463     ecode++;
2464     break;
2465 
2466     case OP_DIGIT:
2467     if (eptr >= md->end_subject)
2468       {
2469       SCHECK_PARTIAL();
2470       RRETURN(MATCH_NOMATCH);
2471       }
2472     GETCHARINCTEST(c, eptr);
2473     if (
2474 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2475        c > 255 ||
2476 #endif
2477        (md->ctypes[c] & ctype_digit) == 0
2478        )
2479       RRETURN(MATCH_NOMATCH);
2480     ecode++;
2481     break;
2482 
2483     case OP_NOT_WHITESPACE:
2484     if (eptr >= md->end_subject)
2485       {
2486       SCHECK_PARTIAL();
2487       RRETURN(MATCH_NOMATCH);
2488       }
2489     GETCHARINCTEST(c, eptr);
2490     if (
2491 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2492        c < 256 &&
2493 #endif
2494        (md->ctypes[c] & ctype_space) != 0
2495        )
2496       RRETURN(MATCH_NOMATCH);
2497     ecode++;
2498     break;
2499 
2500     case OP_WHITESPACE:
2501     if (eptr >= md->end_subject)
2502       {
2503       SCHECK_PARTIAL();
2504       RRETURN(MATCH_NOMATCH);
2505       }
2506     GETCHARINCTEST(c, eptr);
2507     if (
2508 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2509        c > 255 ||
2510 #endif
2511        (md->ctypes[c] & ctype_space) == 0
2512        )
2513       RRETURN(MATCH_NOMATCH);
2514     ecode++;
2515     break;
2516 
2517     case OP_NOT_WORDCHAR:
2518     if (eptr >= md->end_subject)
2519       {
2520       SCHECK_PARTIAL();
2521       RRETURN(MATCH_NOMATCH);
2522       }
2523     GETCHARINCTEST(c, eptr);
2524     if (
2525 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2526        c < 256 &&
2527 #endif
2528        (md->ctypes[c] & ctype_word) != 0
2529        )
2530       RRETURN(MATCH_NOMATCH);
2531     ecode++;
2532     break;
2533 
2534     case OP_WORDCHAR:
2535     if (eptr >= md->end_subject)
2536       {
2537       SCHECK_PARTIAL();
2538       RRETURN(MATCH_NOMATCH);
2539       }
2540     GETCHARINCTEST(c, eptr);
2541     if (
2542 #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2543        c > 255 ||
2544 #endif
2545        (md->ctypes[c] & ctype_word) == 0
2546        )
2547       RRETURN(MATCH_NOMATCH);
2548     ecode++;
2549     break;
2550 
2551     case OP_ANYNL:
2552     if (eptr >= md->end_subject)
2553       {
2554       SCHECK_PARTIAL();
2555       RRETURN(MATCH_NOMATCH);
2556       }
2557     GETCHARINCTEST(c, eptr);
2558     switch(c)
2559       {
2560       default: RRETURN(MATCH_NOMATCH);
2561 
2562       case CHAR_CR:
2563       if (eptr >= md->end_subject)
2564         {
2565         SCHECK_PARTIAL();
2566         }
2567       else if (UCHAR21TEST(eptr) == CHAR_LF) eptr++;
2568       break;
2569 
2570       case CHAR_LF:
2571       break;
2572 
2573       case CHAR_VT:
2574       case CHAR_FF:
2575       case CHAR_NEL:
2576 #ifndef EBCDIC
2577       case 0x2028:
2578       case 0x2029:
2579 #endif  /* Not EBCDIC */
2580       if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2581       break;
2582       }
2583     ecode++;
2584     break;
2585 
2586     case OP_NOT_HSPACE:
2587     if (eptr >= md->end_subject)
2588       {
2589       SCHECK_PARTIAL();
2590       RRETURN(MATCH_NOMATCH);
2591       }
2592     GETCHARINCTEST(c, eptr);
2593     switch(c)
2594       {
2595       HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
2596       default: break;
2597       }
2598     ecode++;
2599     break;
2600 
2601     case OP_HSPACE:
2602     if (eptr >= md->end_subject)
2603       {
2604       SCHECK_PARTIAL();
2605       RRETURN(MATCH_NOMATCH);
2606       }
2607     GETCHARINCTEST(c, eptr);
2608     switch(c)
2609       {
2610       HSPACE_CASES: break;  /* Byte and multibyte cases */
2611       default: RRETURN(MATCH_NOMATCH);
2612       }
2613     ecode++;
2614     break;
2615 
2616     case OP_NOT_VSPACE:
2617     if (eptr >= md->end_subject)
2618       {
2619       SCHECK_PARTIAL();
2620       RRETURN(MATCH_NOMATCH);
2621       }
2622     GETCHARINCTEST(c, eptr);
2623     switch(c)
2624       {
2625       VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2626       default: break;
2627       }
2628     ecode++;
2629     break;
2630 
2631     case OP_VSPACE:
2632     if (eptr >= md->end_subject)
2633       {
2634       SCHECK_PARTIAL();
2635       RRETURN(MATCH_NOMATCH);
2636       }
2637     GETCHARINCTEST(c, eptr);
2638     switch(c)
2639       {
2640       VSPACE_CASES: break;
2641       default: RRETURN(MATCH_NOMATCH);
2642       }
2643     ecode++;
2644     break;
2645 
2646 #ifdef SUPPORT_UCP
2647     /* Check the next character by Unicode property. We will get here only
2648     if the support is in the binary; otherwise a compile-time error occurs. */
2649 
2650     case OP_PROP:
2651     case OP_NOTPROP:
2652     if (eptr >= md->end_subject)
2653       {
2654       SCHECK_PARTIAL();
2655       RRETURN(MATCH_NOMATCH);
2656       }
2657     GETCHARINCTEST(c, eptr);
2658       {
2659       const pcre_uint32 *cp;
2660       const ucd_record *prop = GET_UCD(c); /* LOOP_COUNT: Warning, no CHK in this block! */
2661 
2662       switch(ecode[1])
2663         {
2664         case PT_ANY:
2665         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2666         break;
2667 
2668         case PT_LAMP:
2669         if ((prop->chartype == ucp_Lu ||
2670              prop->chartype == ucp_Ll ||
2671              prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2672           RRETURN(MATCH_NOMATCH);
2673         break;
2674 
2675         case PT_GC:
2676         if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2677           RRETURN(MATCH_NOMATCH);
2678         break;
2679 
2680         case PT_PC:
2681         if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2682           RRETURN(MATCH_NOMATCH);
2683         break;
2684 
2685         case PT_SC:
2686         if ((ecode[2] != prop->script) == (op == OP_PROP))
2687           RRETURN(MATCH_NOMATCH);
2688         break;
2689 
2690         /* These are specials */
2691 
2692         case PT_ALNUM:
2693         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2694              PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2695           RRETURN(MATCH_NOMATCH);
2696         break;
2697 
2698         /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2699         which means that Perl space and POSIX space are now identical. PCRE
2700         was changed at release 8.34. */
2701 
2702         case PT_SPACE:    /* Perl space */
2703         case PT_PXSPACE:  /* POSIX space */
2704         switch(c)
2705           {
2706           HSPACE_CASES:
2707           VSPACE_CASES:
2708           if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2709           break;
2710 
2711           default:
2712           if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
2713             (op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
2714           break;
2715           }
2716         break;
2717 
2718         case PT_WORD:
2719         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2720              PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2721              c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2722           RRETURN(MATCH_NOMATCH);
2723         break;
2724 
2725         case PT_CLIST:
2726         cp = PRIV(ucd_caseless_sets) + ecode[2];
2727         for (;;) /* LOOP_COUNT: COST */
2728           {
2729           if (c < *cp)
2730             { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
2731           if (c == *cp++)
2732             { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
2733 	  COST(1);
2734           }
2735         break;
2736 
2737         case PT_UCNC:
2738         if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
2739              c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
2740              c >= 0xe000) == (op == OP_NOTPROP))
2741           RRETURN(MATCH_NOMATCH);
2742         break;
2743 
2744         /* This should never occur */
2745 
2746         default:
2747         RRETURN(PCRE_ERROR_INTERNAL);
2748         }
2749 
2750       ecode += 3;
2751       }
2752     break;
2753 
2754     /* Match an extended Unicode sequence. We will get here only if the support
2755     is in the binary; otherwise a compile-time error occurs. */
2756 
2757     case OP_EXTUNI:
2758     if (eptr >= md->end_subject)
2759       {
2760       SCHECK_PARTIAL();
2761       RRETURN(MATCH_NOMATCH);
2762       }
2763     else
2764       {
2765 #ifndef ERLANG_INTEGRATION
2766       int lgb, rgb;
2767 #endif
2768       GETCHARINCTEST(c, eptr);
2769       lgb = UCD_GRAPHBREAK(c);
2770       while (eptr < md->end_subject) /* LOOP_COUNT: CHK */
2771         {
2772         int len = 1;
2773         if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2774         rgb = UCD_GRAPHBREAK(c);
2775         if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2776         lgb = rgb;
2777         eptr += len;
2778 	COST_CHK(1);
2779         }
2780       }
2781     CHECK_PARTIAL();
2782     ecode++;
2783     break;
2784 #endif  /* SUPPORT_UCP */
2785 
2786 
2787     /* Match a back reference, possibly repeatedly. Look past the end of the
2788     item to see if there is repeat information following. The code is similar
2789     to that for character classes, but repeated for efficiency. Then obey
2790     similar code to character type repeats - written out again for speed.
2791     However, if the referenced string is the empty string, always treat
2792     it as matched, any number of times (otherwise there could be infinite
2793     loops). If the reference is unset, there are two possibilities:
2794 
2795     (a) In the default, Perl-compatible state, set the length negative;
2796     this ensures that every attempt at a match fails. We can't just fail
2797     here, because of the possibility of quantifiers with zero minima.
2798 
2799     (b) If the JavaScript compatibility flag is set, set the length to zero
2800     so that the back reference matches an empty string.
2801 
2802     Otherwise, set the length to the length of what was matched by the
2803     referenced subpattern.
2804 
2805     The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
2806     or to a non-duplicated named group. For a duplicated named group, OP_DNREF
2807     and OP_DNREFI are used. In this case we must scan the list of groups to
2808     which the name refers, and use the first one that is set. */
2809 
2810     case OP_DNREF:
2811     case OP_DNREFI:
2812     caseless = op == OP_DNREFI;
2813       {
2814       int count = GET2(ecode, 1+IMM2_SIZE);
2815       pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
2816       ecode += 1 + 2*IMM2_SIZE;
2817 
2818       /* Setting the default length first and initializing 'offset' avoids
2819       compiler warnings in the REF_REPEAT code. */
2820 
2821       length = (md->jscript_compat)? 0 : -1;
2822       offset = 0;
2823 
2824       while (count-- > 0) /* LOOP_COUNT: COST */
2825         {
2826         offset = GET2(slot, 0) << 1;
2827         if (offset < offset_top && md->offset_vector[offset] >= 0)
2828           {
2829           length = md->offset_vector[offset+1] - md->offset_vector[offset];
2830           break;
2831           }
2832         slot += md->name_entry_size;
2833         }
2834         COST(1);
2835       }
2836     goto REF_REPEAT;
2837 
2838     case OP_REF:
2839     case OP_REFI:
2840     caseless = op == OP_REFI;
2841     offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2842     ecode += 1 + IMM2_SIZE;
2843     if (offset >= offset_top || md->offset_vector[offset] < 0)
2844       length = (md->jscript_compat)? 0 : -1;
2845     else
2846       length = md->offset_vector[offset+1] - md->offset_vector[offset];
2847 
2848     /* Set up for repetition, or handle the non-repeated case */
2849 
2850     REF_REPEAT:
2851     switch (*ecode)
2852       {
2853       case OP_CRSTAR:
2854       case OP_CRMINSTAR:
2855       case OP_CRPLUS:
2856       case OP_CRMINPLUS:
2857       case OP_CRQUERY:
2858       case OP_CRMINQUERY:
2859       c = *ecode++ - OP_CRSTAR;
2860       minimize = (c & 1) != 0;
2861       min = rep_min[c];                 /* Pick up values from tables; */
2862       max = rep_max[c];                 /* zero for max => infinity */
2863       if (max == 0) max = INT_MAX;
2864       break;
2865 
2866       case OP_CRRANGE:
2867       case OP_CRMINRANGE:
2868       minimize = (*ecode == OP_CRMINRANGE);
2869       min = GET2(ecode, 1);
2870       max = GET2(ecode, 1 + IMM2_SIZE);
2871       if (max == 0) max = INT_MAX;
2872       ecode += 1 + 2 * IMM2_SIZE;
2873       break;
2874 
2875       default:               /* No repeat follows */
2876       if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2877         {
2878         if (length == -2) eptr = md->end_subject;   /* Partial match */
2879         CHECK_PARTIAL();
2880         RRETURN(MATCH_NOMATCH);
2881         }
2882       eptr += length;
2883       continue;              /* With the main loop */
2884       }
2885 
2886     /* Handle repeated back references. If the length of the reference is
2887     zero, just continue with the main loop. If the length is negative, it
2888     means the reference is unset in non-Java-compatible mode. If the minimum is
2889     zero, we can continue at the same level without recursion. For any other
2890     minimum, carrying on will result in NOMATCH. */
2891 
2892     if (length == 0) continue;
2893     if (length < 0 && min == 0) continue;
2894 
2895     /* First, ensure the minimum number of matches are present. We get back
2896     the length of the reference string explicitly rather than passing the
2897     address of eptr, so that eptr can be a register variable. */
2898     COST(min);
2899     for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
2900       {
2901       int slength;
2902       if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2903         {
2904         if (slength == -2) eptr = md->end_subject;   /* Partial match */
2905         CHECK_PARTIAL();
2906         RRETURN(MATCH_NOMATCH);
2907         }
2908       eptr += slength;
2909       }
2910 
2911     /* If min = max, continue at the same level without recursion.
2912     They are not both allowed to be zero. */
2913 
2914     if (min == max) continue;
2915 
2916     /* If minimizing, keep trying and advancing the pointer */
2917 
2918     if (minimize)
2919       {
2920       for (fi = min;; fi++) /* LOOP_COUNT: Ok */
2921         {
2922         int slength;
2923         RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2924         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2925         if (fi >= max) RRETURN(MATCH_NOMATCH);
2926         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2927           {
2928           if (slength == -2) eptr = md->end_subject;   /* Partial match */
2929           CHECK_PARTIAL();
2930           RRETURN(MATCH_NOMATCH);
2931           }
2932         eptr += slength;
2933         }
2934       /* Control never gets here */
2935       }
2936 
2937     /* If maximizing, find the longest string and work backwards */
2938 
2939     else
2940       {
2941       pp = eptr;
2942       for (i = min; i < max; i++)
2943         {
2944         int slength;
2945         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0) /* LOOP_COUNT: CHK */
2946           {
2947           /* Can't use CHECK_PARTIAL because we don't want to update eptr in
2948           the soft partial matching case. */
2949 
2950           if (slength == -2 && md->partial != 0 &&
2951               md->end_subject > md->start_used_ptr)
2952             {
2953             md->hitend = TRUE;
2954             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2955             }
2956           break;
2957           }
2958         eptr += slength;
2959 	COST_CHK(1);
2960         }
2961 
2962       while (eptr >= pp) /* LOOP_COUNT: Ok */
2963         {
2964         RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
2965         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2966         eptr -= length;
2967         }
2968       RRETURN(MATCH_NOMATCH);
2969       }
2970     /* Control never gets here */
2971 
2972     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2973     used when all the characters in the class have values in the range 0-255,
2974     and either the matching is caseful, or the characters are in the range
2975     0-127 when UTF-8 processing is enabled. The only difference between
2976     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2977     encountered.
2978 
2979     First, look past the end of the item to see if there is repeat information
2980     following. Then obey similar code to character type repeats - written out
2981     again for speed. */
2982 
2983     case OP_NCLASS:
2984     case OP_CLASS:
2985       {
2986       /* The data variable is saved across frames, so the byte map needs to
2987       be stored there. */
2988 #define BYTE_MAP ((pcre_uint8 *)data)
2989       data = ecode + 1;                /* Save for matching */
2990       ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2991 #ifdef ERLANG_INTEGRATION
2992       EDEBUGF(("OP_(N)CLASS (%d)...",*ecode));
2993 #endif
2994 
2995       switch (*ecode)
2996         {
2997         case OP_CRSTAR:
2998         case OP_CRMINSTAR:
2999         case OP_CRPLUS:
3000         case OP_CRMINPLUS:
3001         case OP_CRQUERY:
3002         case OP_CRMINQUERY:
3003         case OP_CRPOSSTAR:
3004         case OP_CRPOSPLUS:
3005         case OP_CRPOSQUERY:
3006         c = *ecode++ - OP_CRSTAR;
3007         if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
3008         else possessive = TRUE;
3009         min = rep_min[c];                 /* Pick up values from tables; */
3010         max = rep_max[c];                 /* zero for max => infinity */
3011         if (max == 0) max = INT_MAX;
3012         break;
3013 
3014         case OP_CRRANGE:
3015         case OP_CRMINRANGE:
3016         case OP_CRPOSRANGE:
3017         minimize = (*ecode == OP_CRMINRANGE);
3018         possessive = (*ecode == OP_CRPOSRANGE);
3019         min = GET2(ecode, 1);
3020         max = GET2(ecode, 1 + IMM2_SIZE);
3021         if (max == 0) max = INT_MAX;
3022         ecode += 1 + 2 * IMM2_SIZE;
3023         break;
3024 
3025         default:               /* No repeat follows */
3026         min = max = 1;
3027         break;
3028         }
3029 
3030       /* First, ensure the minimum number of matches are present. */
3031 
3032 #ifdef SUPPORT_UTF
3033       if (utf)
3034         {
3035 	COST(min);
3036         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
3037           {
3038           if (eptr >= md->end_subject)
3039             {
3040             SCHECK_PARTIAL();
3041             RRETURN(MATCH_NOMATCH);
3042             }
3043           GETCHARINC(c, eptr);
3044           if (c > 255)
3045             {
3046             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
3047             }
3048           else
3049             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
3050           }
3051         }
3052       else
3053 #endif
3054       /* Not UTF mode */
3055         {
3056 	COST(min);
3057         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
3058           {
3059           if (eptr >= md->end_subject)
3060             {
3061             SCHECK_PARTIAL();
3062             RRETURN(MATCH_NOMATCH);
3063             }
3064           c = *eptr++;
3065 #ifndef COMPILE_PCRE8
3066           if (c > 255)
3067             {
3068             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
3069             }
3070           else
3071 #endif
3072             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
3073           }
3074         }
3075 
3076       /* If max == min we can continue with the main loop without the
3077       need to recurse. */
3078 
3079       if (min == max) continue;
3080 
3081       /* If minimizing, keep testing the rest of the expression and advancing
3082       the pointer while it matches the class. */
3083 
3084       if (minimize)
3085         {
3086 #ifdef SUPPORT_UTF
3087         if (utf)
3088           {
3089           for (fi = min;; fi++) /* LOOP_COUNT: Ok */
3090             {
3091             RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
3092             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3093             if (fi >= max) RRETURN(MATCH_NOMATCH);
3094             if (eptr >= md->end_subject)
3095               {
3096               SCHECK_PARTIAL();
3097               RRETURN(MATCH_NOMATCH);
3098               }
3099             GETCHARINC(c, eptr);
3100             if (c > 255)
3101               {
3102               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
3103               }
3104             else
3105               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
3106             }
3107           }
3108         else
3109 #endif
3110         /* Not UTF mode */
3111           {
3112           for (fi = min;; fi++)  /* LOOP_COUNT: Ok */
3113             {
3114             RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
3115             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3116             if (fi >= max) RRETURN(MATCH_NOMATCH);
3117             if (eptr >= md->end_subject)
3118               {
3119               SCHECK_PARTIAL();
3120               RRETURN(MATCH_NOMATCH);
3121               }
3122             c = *eptr++;
3123 #ifndef COMPILE_PCRE8
3124             if (c > 255)
3125               {
3126               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
3127               }
3128             else
3129 #endif
3130               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
3131             }
3132           }
3133         /* Control never gets here */
3134         }
3135 
3136       /* If maximizing, find the longest possible run, then work backwards. */
3137 
3138       else
3139         {
3140         pp = eptr;
3141 
3142 #ifdef SUPPORT_UTF
3143         if (utf)
3144           {
3145           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
3146             {
3147             int len = 1;
3148             if (eptr >= md->end_subject)
3149               {
3150               SCHECK_PARTIAL();
3151               break;
3152               }
3153             GETCHARLEN(c, eptr, len);
3154             if (c > 255)
3155               {
3156               if (op == OP_CLASS) break;
3157               }
3158             else
3159               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3160             eptr += len;
3161 	    COST_CHK(1);
3162             }
3163 
3164           if (possessive) continue;    /* No backtracking */
3165 
3166           for (;;) /* LOOP_COUNT: Ok */
3167             {
3168             RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
3169             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3170             if (eptr-- <= pp) break;        /* Stop if tried at original pos */
3171             BACKCHAR(eptr);
3172             }
3173           }
3174         else
3175 #endif
3176           /* Not UTF mode */
3177           {
3178           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
3179             {
3180             if (eptr >= md->end_subject)
3181               {
3182               SCHECK_PARTIAL();
3183               break;
3184               }
3185             c = *eptr;
3186 #ifndef COMPILE_PCRE8
3187             if (c > 255)
3188               {
3189               if (op == OP_CLASS) break;
3190               }
3191             else
3192 #endif
3193               if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3194 	    COST_CHK(1);
3195             eptr++;
3196             }
3197 
3198           if (possessive) continue;    /* No backtracking */
3199 
3200           while (eptr >= pp) /* LOOP_COUNT: Ok */
3201             {
3202             RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
3203             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3204             eptr--;
3205             }
3206           }
3207 
3208         RRETURN(MATCH_NOMATCH);
3209         }
3210 #undef BYTE_MAP
3211       }
3212     /* Control never gets here */
3213 
3214 
3215     /* Match an extended character class. In the 8-bit library, this opcode is
3216     encountered only when UTF-8 mode mode is supported. In the 16-bit and
3217     32-bit libraries, codepoints greater than 255 may be encountered even when
3218     UTF is not supported. */
3219 
3220 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3221     case OP_XCLASS:
3222       {
3223       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
3224       ecode += GET(ecode, 1);                      /* Advance past the item */
3225 
3226       switch (*ecode)
3227         {
3228         case OP_CRSTAR:
3229         case OP_CRMINSTAR:
3230         case OP_CRPLUS:
3231         case OP_CRMINPLUS:
3232         case OP_CRQUERY:
3233         case OP_CRMINQUERY:
3234         case OP_CRPOSSTAR:
3235         case OP_CRPOSPLUS:
3236         case OP_CRPOSQUERY:
3237         c = *ecode++ - OP_CRSTAR;
3238         if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
3239         else possessive = TRUE;
3240         min = rep_min[c];                 /* Pick up values from tables; */
3241         max = rep_max[c];                 /* zero for max => infinity */
3242         if (max == 0) max = INT_MAX;
3243         break;
3244 
3245         case OP_CRRANGE:
3246         case OP_CRMINRANGE:
3247         case OP_CRPOSRANGE:
3248         minimize = (*ecode == OP_CRMINRANGE);
3249         possessive = (*ecode == OP_CRPOSRANGE);
3250         min = GET2(ecode, 1);
3251         max = GET2(ecode, 1 + IMM2_SIZE);
3252         if (max == 0) max = INT_MAX;
3253         ecode += 1 + 2 * IMM2_SIZE;
3254         break;
3255 
3256         default:               /* No repeat follows */
3257         min = max = 1;
3258         break;
3259         }
3260 
3261       /* First, ensure the minimum number of matches are present. */
3262       COST(min);
3263       for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
3264         {
3265         if (eptr >= md->end_subject)
3266           {
3267           SCHECK_PARTIAL();
3268           RRETURN(MATCH_NOMATCH);
3269           }
3270         GETCHARINCTEST(c, eptr);
3271         if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3272         }
3273 
3274       /* If max == min we can continue with the main loop without the
3275       need to recurse. */
3276 
3277       if (min == max) continue;
3278 
3279       /* If minimizing, keep testing the rest of the expression and advancing
3280       the pointer while it matches the class. */
3281 
3282       if (minimize)
3283         {
3284         for (fi = min;; fi++) /* LOOP_COUNT: Ok */
3285           {
3286           RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
3287           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3288           if (fi >= max) RRETURN(MATCH_NOMATCH);
3289           if (eptr >= md->end_subject)
3290             {
3291             SCHECK_PARTIAL();
3292             RRETURN(MATCH_NOMATCH);
3293             }
3294           GETCHARINCTEST(c, eptr);
3295           if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3296           }
3297         /* Control never gets here */
3298         }
3299 
3300       /* If maximizing, find the longest possible run, then work backwards. */
3301 
3302       else
3303         {
3304         pp = eptr;
3305         for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
3306           {
3307           int len = 1;
3308           if (eptr >= md->end_subject)
3309             {
3310             SCHECK_PARTIAL();
3311             break;
3312             }
3313 #ifdef SUPPORT_UTF
3314           GETCHARLENTEST(c, eptr, len);
3315 #else
3316           c = *eptr;
3317 #endif
3318           if (!PRIV(xclass)(c, data, utf)) break;
3319           eptr += len;
3320 	  COST_CHK(1);
3321           }
3322 
3323         if (possessive) continue;    /* No backtracking */
3324 
3325         for(;;) /* LOOP_COUNT: Ok */
3326           {
3327           RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3328           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3329           if (eptr-- <= pp) break;        /* Stop if tried at original pos */
3330 #ifdef SUPPORT_UTF
3331           if (utf) BACKCHAR(eptr);
3332 #endif
3333           }
3334         RRETURN(MATCH_NOMATCH);
3335         }
3336 
3337       /* Control never gets here */
3338       }
3339 #endif    /* End of XCLASS */
3340 
3341     /* Match a single character, casefully */
3342 
3343     case OP_CHAR:
3344 #ifdef SUPPORT_UTF
3345     if (utf)
3346       {
3347       length = 1;
3348       ecode++;
3349       GETCHARLEN(fc, ecode, length);
3350       if (length > md->end_subject - eptr)
3351         {
3352         CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
3353         RRETURN(MATCH_NOMATCH);
3354         }
3355       while (length-- > 0) if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH); /* LOOP_COUNT: Ok */
3356       }
3357     else
3358 #endif
3359     /* Not UTF mode */
3360       {
3361       if (md->end_subject - eptr < 1)
3362         {
3363         SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
3364         RRETURN(MATCH_NOMATCH);
3365         }
3366       EDEBUGF(("code to match:%d, code is:%d",ecode[1],*eptr));
3367       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
3368       ecode += 2;
3369       }
3370     break;
3371 
3372     /* Match a single character, caselessly. If we are at the end of the
3373     subject, give up immediately. */
3374 
3375     case OP_CHARI:
3376     if (eptr >= md->end_subject)
3377       {
3378       SCHECK_PARTIAL();
3379       RRETURN(MATCH_NOMATCH);
3380       }
3381 
3382 #ifdef SUPPORT_UTF
3383     if (utf)
3384       {
3385       length = 1;
3386       ecode++;
3387       GETCHARLEN(fc, ecode, length);
3388 
3389       /* If the pattern character's value is < 128, we have only one byte, and
3390       we know that its other case must also be one byte long, so we can use the
3391       fast lookup table. We know that there is at least one byte left in the
3392       subject. */
3393 
3394       if (fc < 128)
3395         {
3396         pcre_uint32 cc = UCHAR21(eptr);
3397         if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
3398         ecode++;
3399         eptr++;
3400         }
3401 
3402       /* Otherwise we must pick up the subject character. Note that we cannot
3403       use the value of "length" to check for sufficient bytes left, because the
3404       other case of the character may have more or fewer bytes.  */
3405 
3406       else
3407         {
3408         pcre_uint32 dc;
3409         GETCHARINC(dc, eptr);
3410         ecode += length;
3411 
3412         /* If we have Unicode property support, we can use it to test the other
3413         case of the character, if there is one. */
3414 
3415         if (fc != dc)
3416           {
3417 #ifdef SUPPORT_UCP
3418           if (dc != UCD_OTHERCASE(fc))
3419 #endif
3420             RRETURN(MATCH_NOMATCH);
3421           }
3422         }
3423       }
3424     else
3425 #endif   /* SUPPORT_UTF */
3426 
3427     /* Not UTF mode */
3428       {
3429       if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3430           != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3431       eptr++;
3432       ecode += 2;
3433       }
3434     break;
3435 
3436     /* Match a single character repeatedly. */
3437 
3438     case OP_EXACT:
3439     case OP_EXACTI:
3440     min = max = GET2(ecode, 1);
3441     ecode += 1 + IMM2_SIZE;
3442     goto REPEATCHAR;
3443 
3444     case OP_POSUPTO:
3445     case OP_POSUPTOI:
3446     possessive = TRUE;
3447     /* Fall through */
3448 
3449     case OP_UPTO:
3450     case OP_UPTOI:
3451     case OP_MINUPTO:
3452     case OP_MINUPTOI:
3453     min = 0;
3454     max = GET2(ecode, 1);
3455     minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
3456     ecode += 1 + IMM2_SIZE;
3457     goto REPEATCHAR;
3458 
3459     case OP_POSSTAR:
3460     case OP_POSSTARI:
3461     possessive = TRUE;
3462     min = 0;
3463     max = INT_MAX;
3464     ecode++;
3465     goto REPEATCHAR;
3466 
3467     case OP_POSPLUS:
3468     case OP_POSPLUSI:
3469     possessive = TRUE;
3470     min = 1;
3471     max = INT_MAX;
3472     ecode++;
3473     goto REPEATCHAR;
3474 
3475     case OP_POSQUERY:
3476     case OP_POSQUERYI:
3477     possessive = TRUE;
3478     min = 0;
3479     max = 1;
3480     ecode++;
3481     goto REPEATCHAR;
3482 
3483     case OP_STAR:
3484     case OP_STARI:
3485     case OP_MINSTAR:
3486     case OP_MINSTARI:
3487     case OP_PLUS:
3488     case OP_PLUSI:
3489     case OP_MINPLUS:
3490     case OP_MINPLUSI:
3491     case OP_QUERY:
3492     case OP_QUERYI:
3493     case OP_MINQUERY:
3494     case OP_MINQUERYI:
3495     c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
3496     minimize = (c & 1) != 0;
3497     min = rep_min[c];                 /* Pick up values from tables; */
3498     max = rep_max[c];                 /* zero for max => infinity */
3499     if (max == 0) max = INT_MAX;
3500 
3501     /* Common code for all repeated single-character matches. We first check
3502     for the minimum number of characters. If the minimum equals the maximum, we
3503     are done. Otherwise, if minimizing, check the rest of the pattern for a
3504     match; if there isn't one, advance up to the maximum, one character at a
3505     time.
3506 
3507     If maximizing, advance up to the maximum number of matching characters,
3508     until eptr is past the end of the maximum run. If possessive, we are
3509     then done (no backing up). Otherwise, match at this position; anything
3510     other than no match is immediately returned. For nomatch, back up one
3511     character, unless we are matching \R and the last thing matched was
3512     \r\n, in which case, back up two bytes. When we reach the first optional
3513     character position, we can save stack by doing a tail recurse.
3514 
3515     The various UTF/non-UTF and caseful/caseless cases are handled separately,
3516     for speed. */
3517 
3518     REPEATCHAR:
3519 #ifdef SUPPORT_UTF
3520     if (utf)
3521       {
3522       length = 1;
3523       charptr = ecode;
3524       GETCHARLEN(fc, ecode, length);
3525       ecode += length;
3526 
3527       /* Handle multibyte character matching specially here. There is
3528       support for caseless matching if UCP support is present. */
3529 
3530       if (length > 1)
3531         {
3532 #ifdef SUPPORT_UCP
3533         pcre_uint32 othercase;
3534         if (op >= OP_STARI &&     /* Caseless */
3535             (othercase = UCD_OTHERCASE(fc)) != fc)
3536           oclength = PRIV(ord2utf)(othercase, occhars);
3537         else oclength = 0;
3538 #endif  /* SUPPORT_UCP */
3539 	COST(min);
3540         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
3541           {
3542           if (eptr <= md->end_subject - length &&
3543             memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3544 #ifdef SUPPORT_UCP
3545           else if (oclength > 0 &&
3546                    eptr <= md->end_subject - oclength &&
3547                    memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3548 #endif  /* SUPPORT_UCP */
3549           else
3550             {
3551             CHECK_PARTIAL();
3552             RRETURN(MATCH_NOMATCH);
3553             }
3554           }
3555 
3556         if (min == max) continue;
3557 
3558         if (minimize)
3559           {
3560           for (fi = min;; fi++) /* LOOP_COUNT: Ok */
3561             {
3562             RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3563             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3564             if (fi >= max) RRETURN(MATCH_NOMATCH);
3565             if (eptr <= md->end_subject - length &&
3566               memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3567 #ifdef SUPPORT_UCP
3568             else if (oclength > 0 &&
3569                      eptr <= md->end_subject - oclength &&
3570                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3571 #endif  /* SUPPORT_UCP */
3572             else
3573               {
3574               CHECK_PARTIAL();
3575               RRETURN(MATCH_NOMATCH);
3576               }
3577             }
3578           /* Control never gets here */
3579           }
3580 
3581         else  /* Maximize */
3582           {
3583           pp = eptr;
3584           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
3585             {
3586             if (eptr <= md->end_subject - length &&
3587                 memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3588 #ifdef SUPPORT_UCP
3589             else if (oclength > 0 &&
3590                      eptr <= md->end_subject - oclength &&
3591                      memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3592 #endif  /* SUPPORT_UCP */
3593             else
3594               {
3595               CHECK_PARTIAL();
3596               break;
3597               }
3598 	    COST_CHK(1);
3599             }
3600 
3601           if (possessive) continue;    /* No backtracking */
3602           for(;;) /* LOOP_COUNT: Ok */
3603             {
3604             if (eptr <= pp) goto TAIL_RECURSE;
3605             RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3606             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3607 #ifdef SUPPORT_UCP
3608             eptr--;
3609             BACKCHAR(eptr);
3610 #else   /* without SUPPORT_UCP */
3611             eptr -= length;
3612 #endif  /* SUPPORT_UCP */
3613             }
3614           }
3615         /* Control never gets here */
3616         }
3617 
3618       /* If the length of a UTF-8 character is 1, we fall through here, and
3619       obey the code as for non-UTF-8 characters below, though in this case the
3620       value of fc will always be < 128. */
3621       }
3622     else
3623 #endif  /* SUPPORT_UTF */
3624       /* When not in UTF-8 mode, load a single-byte character. */
3625       fc = *ecode++;
3626 
3627     /* The value of fc at this point is always one character, though we may
3628     or may not be in UTF mode. The code is duplicated for the caseless and
3629     caseful cases, for speed, since matching characters is likely to be quite
3630     common. First, ensure the minimum number of matches are present. If min =
3631     max, continue at the same level without recursing. Otherwise, if
3632     minimizing, keep trying the rest of the expression and advancing one
3633     matching character if failing, up to the maximum. Alternatively, if
3634     maximizing, find the maximum number of characters and work backwards. */
3635 
3636     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3637       max, (char *)eptr));
3638 
3639     if (op >= OP_STARI)  /* Caseless */
3640       {
3641 #ifdef COMPILE_PCRE8
3642       /* fc must be < 128 if UTF is enabled. */
3643       foc = md->fcc[fc];
3644 #else
3645 #ifdef SUPPORT_UTF
3646 #ifdef SUPPORT_UCP
3647       if (utf && fc > 127)
3648         foc = UCD_OTHERCASE(fc);
3649 #else
3650       if (utf && fc > 127)
3651         foc = fc;
3652 #endif /* SUPPORT_UCP */
3653       else
3654 #endif /* SUPPORT_UTF */
3655         foc = TABLE_GET(fc, md->fcc, fc);
3656 #endif /* COMPILE_PCRE8 */
3657 
3658       for (i = 1; i <= min; i++) /* LOOP_COUNT: CHK */
3659         {
3660         pcre_uint32 cc;                 /* Faster than pcre_uchar */
3661         if (eptr >= md->end_subject)
3662           {
3663           SCHECK_PARTIAL();
3664           RRETURN(MATCH_NOMATCH);
3665           }
3666         cc = UCHAR21TEST(eptr);
3667         if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3668         eptr++;
3669 	COST_CHK(1);
3670         }
3671       if (min == max) continue;
3672       if (minimize)
3673         {
3674         for (fi = min;; fi++)  /* LOOP_COUNT: Ok */
3675           {
3676           pcre_uint32 cc;               /* Faster than pcre_uchar */
3677           RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3678           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3679           if (fi >= max) RRETURN(MATCH_NOMATCH);
3680           if (eptr >= md->end_subject)
3681             {
3682             SCHECK_PARTIAL();
3683             RRETURN(MATCH_NOMATCH);
3684             }
3685           cc = UCHAR21TEST(eptr);
3686           if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3687           eptr++;
3688           }
3689         /* Control never gets here */
3690         }
3691       else  /* Maximize */
3692         {
3693         pp = eptr;
3694         for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
3695           {
3696           pcre_uint32 cc;               /* Faster than pcre_uchar */
3697           if (eptr >= md->end_subject)
3698             {
3699             SCHECK_PARTIAL();
3700             break;
3701             }
3702           cc = UCHAR21TEST(eptr);
3703           if (fc != cc && foc != cc) break;
3704           eptr++;
3705 	  COST_CHK(1);
3706           }
3707         if (possessive) continue;       /* No backtracking */
3708         for (;;)  /* LOOP_COUNT: Ok */
3709           {
3710           if (eptr == pp) goto TAIL_RECURSE;
3711           RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
3712           eptr--;
3713           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3714           }
3715         /* Control never gets here */
3716         }
3717       }
3718 
3719     /* Caseful comparisons (includes all multi-byte characters) */
3720 
3721     else
3722       {
3723       COST(min);
3724       for (i = 1; i <= min; i++)  /* LOOP_COUNT: COST */
3725         {
3726         if (eptr >= md->end_subject)
3727           {
3728           SCHECK_PARTIAL();
3729           RRETURN(MATCH_NOMATCH);
3730           }
3731         if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3732         }
3733 
3734       if (min == max) continue;
3735 
3736       if (minimize)
3737         {
3738         for (fi = min;; fi++)  /* LOOP_COUNT: Ok */
3739           {
3740           RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3741           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3742           if (fi >= max) RRETURN(MATCH_NOMATCH);
3743           if (eptr >= md->end_subject)
3744             {
3745             SCHECK_PARTIAL();
3746             RRETURN(MATCH_NOMATCH);
3747             }
3748           if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3749           }
3750         /* Control never gets here */
3751         }
3752       else  /* Maximize */
3753         {
3754         pp = eptr;
3755         for (i = min; i < max; i++)  /* LOOP_COUNT: CHK */
3756           {
3757           if (eptr >= md->end_subject)
3758             {
3759             SCHECK_PARTIAL();
3760             break;
3761             }
3762           if (fc != UCHAR21TEST(eptr)) break;
3763           eptr++;
3764 	  COST_CHK(1);
3765           }
3766         if (possessive) continue;    /* No backtracking */
3767         for (;;)  /* LOOP_COUNT: Ok */
3768           {
3769           if (eptr == pp) goto TAIL_RECURSE;
3770           RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
3771           eptr--;
3772           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3773           }
3774         /* Control never gets here */
3775         }
3776       }
3777     /* Control never gets here */
3778 
3779     /* Match a negated single one-byte character. The character we are
3780     checking can be multibyte. */
3781 
3782     case OP_NOT:
3783     case OP_NOTI:
3784     if (eptr >= md->end_subject)
3785       {
3786       SCHECK_PARTIAL();
3787       RRETURN(MATCH_NOMATCH);
3788       }
3789 #ifdef SUPPORT_UTF
3790     if (utf)
3791       {
3792       register pcre_uint32 ch, och;
3793 
3794       ecode++;
3795       GETCHARINC(ch, ecode);
3796       GETCHARINC(c, eptr);
3797 
3798       if (op == OP_NOT)
3799         {
3800         if (ch == c) RRETURN(MATCH_NOMATCH);
3801         }
3802       else
3803         {
3804 #ifdef SUPPORT_UCP
3805         if (ch > 127)
3806           och = UCD_OTHERCASE(ch);
3807 #else
3808         if (ch > 127)
3809           och = ch;
3810 #endif /* SUPPORT_UCP */
3811         else
3812           och = TABLE_GET(ch, md->fcc, ch);
3813         if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3814         }
3815       }
3816     else
3817 #endif
3818       {
3819       register pcre_uint32 ch = ecode[1];
3820       c = *eptr++;
3821       if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
3822         RRETURN(MATCH_NOMATCH);
3823       ecode += 2;
3824       }
3825     break;
3826 
3827     /* Match a negated single one-byte character repeatedly. This is almost a
3828     repeat of the code for a repeated single character, but I haven't found a
3829     nice way of commoning these up that doesn't require a test of the
3830     positive/negative option for each character match. Maybe that wouldn't add
3831     very much to the time taken, but character matching *is* what this is all
3832     about... */
3833 
3834     case OP_NOTEXACT:
3835     case OP_NOTEXACTI:
3836     min = max = GET2(ecode, 1);
3837     ecode += 1 + IMM2_SIZE;
3838     goto REPEATNOTCHAR;
3839 
3840     case OP_NOTUPTO:
3841     case OP_NOTUPTOI:
3842     case OP_NOTMINUPTO:
3843     case OP_NOTMINUPTOI:
3844     min = 0;
3845     max = GET2(ecode, 1);
3846     minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3847     ecode += 1 + IMM2_SIZE;
3848     goto REPEATNOTCHAR;
3849 
3850     case OP_NOTPOSSTAR:
3851     case OP_NOTPOSSTARI:
3852     possessive = TRUE;
3853     min = 0;
3854     max = INT_MAX;
3855     ecode++;
3856     goto REPEATNOTCHAR;
3857 
3858     case OP_NOTPOSPLUS:
3859     case OP_NOTPOSPLUSI:
3860     possessive = TRUE;
3861     min = 1;
3862     max = INT_MAX;
3863     ecode++;
3864     goto REPEATNOTCHAR;
3865 
3866     case OP_NOTPOSQUERY:
3867     case OP_NOTPOSQUERYI:
3868     possessive = TRUE;
3869     min = 0;
3870     max = 1;
3871     ecode++;
3872     goto REPEATNOTCHAR;
3873 
3874     case OP_NOTPOSUPTO:
3875     case OP_NOTPOSUPTOI:
3876     possessive = TRUE;
3877     min = 0;
3878     max = GET2(ecode, 1);
3879     ecode += 1 + IMM2_SIZE;
3880     goto REPEATNOTCHAR;
3881 
3882     case OP_NOTSTAR:
3883     case OP_NOTSTARI:
3884     case OP_NOTMINSTAR:
3885     case OP_NOTMINSTARI:
3886     case OP_NOTPLUS:
3887     case OP_NOTPLUSI:
3888     case OP_NOTMINPLUS:
3889     case OP_NOTMINPLUSI:
3890     case OP_NOTQUERY:
3891     case OP_NOTQUERYI:
3892     case OP_NOTMINQUERY:
3893     case OP_NOTMINQUERYI:
3894     c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
3895     minimize = (c & 1) != 0;
3896     min = rep_min[c];                 /* Pick up values from tables; */
3897     max = rep_max[c];                 /* zero for max => infinity */
3898     if (max == 0) max = INT_MAX;
3899 
3900     /* Common code for all repeated single-byte matches. */
3901 
3902     REPEATNOTCHAR:
3903     GETCHARINCTEST(fc, ecode);
3904 
3905     /* The code is duplicated for the caseless and caseful cases, for speed,
3906     since matching characters is likely to be quite common. First, ensure the
3907     minimum number of matches are present. If min = max, continue at the same
3908     level without recursing. Otherwise, if minimizing, keep trying the rest of
3909     the expression and advancing one matching character if failing, up to the
3910     maximum. Alternatively, if maximizing, find the maximum number of
3911     characters and work backwards. */
3912 
3913     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3914       max, (char *)eptr));
3915 
3916     if (op >= OP_NOTSTARI)     /* Caseless */
3917       {
3918 #ifdef SUPPORT_UTF
3919 #ifdef SUPPORT_UCP
3920       if (utf && fc > 127)
3921         foc = UCD_OTHERCASE(fc);
3922 #else
3923       if (utf && fc > 127)
3924         foc = fc;
3925 #endif /* SUPPORT_UCP */
3926       else
3927 #endif /* SUPPORT_UTF */
3928         foc = TABLE_GET(fc, md->fcc, fc);
3929 
3930 #ifdef SUPPORT_UTF
3931       if (utf)
3932         {
3933         register pcre_uint32 d;
3934 	COST(min);
3935         for (i = 1; i <= min; i++)  /* LOOP_COUNT: COST */
3936           {
3937           if (eptr >= md->end_subject)
3938             {
3939             SCHECK_PARTIAL();
3940             RRETURN(MATCH_NOMATCH);
3941             }
3942           GETCHARINC(d, eptr);
3943           if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3944           }
3945         }
3946       else
3947 #endif  /* SUPPORT_UTF */
3948       /* Not UTF mode */
3949         {
3950 	COST(min);
3951         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
3952           {
3953           if (eptr >= md->end_subject)
3954             {
3955             SCHECK_PARTIAL();
3956             RRETURN(MATCH_NOMATCH);
3957             }
3958           if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
3959           eptr++;
3960           }
3961         }
3962 
3963       if (min == max) continue;
3964 
3965       if (minimize)
3966         {
3967 #ifdef SUPPORT_UTF
3968         if (utf)
3969           {
3970           register pcre_uint32 d;
3971           for (fi = min;; fi++) /* LOOP_COUNT: Ok */
3972             {
3973             RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3974             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3975             if (fi >= max) RRETURN(MATCH_NOMATCH);
3976             if (eptr >= md->end_subject)
3977               {
3978               SCHECK_PARTIAL();
3979               RRETURN(MATCH_NOMATCH);
3980               }
3981             GETCHARINC(d, eptr);
3982             if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3983             }
3984           }
3985         else
3986 #endif  /*SUPPORT_UTF */
3987         /* Not UTF mode */
3988           {
3989           for (fi = min;; fi++) /* LOOP_COUNT: Ok */
3990             {
3991             RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3992             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3993             if (fi >= max) RRETURN(MATCH_NOMATCH);
3994             if (eptr >= md->end_subject)
3995               {
3996               SCHECK_PARTIAL();
3997               RRETURN(MATCH_NOMATCH);
3998               }
3999             if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
4000             eptr++;
4001             }
4002           }
4003         /* Control never gets here */
4004         }
4005 
4006       /* Maximize case */
4007 
4008       else
4009         {
4010         pp = eptr;
4011 
4012 #ifdef SUPPORT_UTF
4013         if (utf)
4014           {
4015 	  register pcre_uint32 d;
4016           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
4017             {
4018             int len = 1;
4019             if (eptr >= md->end_subject)
4020               {
4021               SCHECK_PARTIAL();
4022               break;
4023               }
4024             GETCHARLEN(d, eptr, len);
4025             if (fc == d || (unsigned int)foc == d) break;
4026             eptr += len;
4027 	    COST_CHK(1); /* 'd' is not alive */
4028             }
4029           if (possessive) continue;    /* No backtracking */
4030           for(;;)  /* LOOP_COUNT: Ok */
4031             {
4032             if (eptr <= pp) goto TAIL_RECURSE;
4033             RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
4034             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4035             eptr--;
4036             BACKCHAR(eptr);
4037             }
4038           }
4039         else
4040 #endif  /* SUPPORT_UTF */
4041         /* Not UTF mode */
4042           {
4043           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
4044             {
4045             if (eptr >= md->end_subject)
4046               {
4047               SCHECK_PARTIAL();
4048               break;
4049               }
4050             if (fc == *eptr || foc == *eptr) break;
4051             eptr++;
4052 	    COST_CHK(1);
4053             }
4054           if (possessive) continue;    /* No backtracking */
4055           for (;;) /* LOOP_COUNT: Ok */
4056             {
4057             if (eptr == pp) goto TAIL_RECURSE;
4058             RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
4059             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4060             eptr--;
4061             }
4062           }
4063         /* Control never gets here */
4064         }
4065       }
4066 
4067     /* Caseful comparisons */
4068 
4069     else
4070       {
4071 #ifdef SUPPORT_UTF
4072       if (utf)
4073         {
4074         register pcre_uint32 d;
4075         for (i = 1; i <= min; i++)  /* LOOP_COUNT: CHK */
4076           {
4077           if (eptr >= md->end_subject)
4078             {
4079             SCHECK_PARTIAL();
4080             RRETURN(MATCH_NOMATCH);
4081             }
4082           GETCHARINC(d, eptr);
4083           if (fc == d) RRETURN(MATCH_NOMATCH);
4084 	  COST_CHK(1);
4085           }
4086         }
4087       else
4088 #endif
4089       /* Not UTF mode */
4090         {
4091 	COST(min);
4092         for (i = 1; i <= min; i++) /* LOOP_COUNT: Cost */
4093           {
4094           if (eptr >= md->end_subject)
4095             {
4096             SCHECK_PARTIAL();
4097             RRETURN(MATCH_NOMATCH);
4098             }
4099           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
4100           }
4101         }
4102 
4103       if (min == max) continue;
4104 
4105       if (minimize)
4106         {
4107 #ifdef SUPPORT_UTF
4108         if (utf)
4109           {
4110           register pcre_uint32 d;
4111           for (fi = min;; fi++)  /* LOOP_COUNT: Ok */
4112             {
4113             RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
4114             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4115             if (fi >= max) RRETURN(MATCH_NOMATCH);
4116             if (eptr >= md->end_subject)
4117               {
4118               SCHECK_PARTIAL();
4119               RRETURN(MATCH_NOMATCH);
4120               }
4121             GETCHARINC(d, eptr);
4122             if (fc == d) RRETURN(MATCH_NOMATCH);
4123             }
4124           }
4125         else
4126 #endif
4127         /* Not UTF mode */
4128           {
4129           for (fi = min;; fi++) /* LOOP_COUNT: Ok */
4130             {
4131             RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
4132             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4133             if (fi >= max) RRETURN(MATCH_NOMATCH);
4134             if (eptr >= md->end_subject)
4135               {
4136               SCHECK_PARTIAL();
4137               RRETURN(MATCH_NOMATCH);
4138               }
4139             if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
4140             }
4141           }
4142         /* Control never gets here */
4143         }
4144 
4145       /* Maximize case */
4146 
4147       else
4148         {
4149         pp = eptr;
4150 
4151 #ifdef SUPPORT_UTF
4152         if (utf)
4153           {
4154           register pcre_uint32 d;
4155           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
4156             {
4157             int len = 1;
4158             if (eptr >= md->end_subject)
4159               {
4160               SCHECK_PARTIAL();
4161               break;
4162               }
4163             GETCHARLEN(d, eptr, len);
4164             if (fc == d) break;
4165             eptr += len;
4166 	    COST_CHK(1);
4167             }
4168           if (possessive) continue;    /* No backtracking */
4169           for(;;)  /* LOOP_COUNT: Ok */
4170             {
4171             if (eptr <= pp) goto TAIL_RECURSE;
4172             RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
4173             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4174             eptr--;
4175             BACKCHAR(eptr);
4176             }
4177           }
4178         else
4179 #endif
4180         /* Not UTF mode */
4181           {
4182           for (i = min; i < max; i++)  /* LOOP_COUNT: CHK */
4183             {
4184             if (eptr >= md->end_subject)
4185               {
4186               SCHECK_PARTIAL();
4187               break;
4188               }
4189             if (fc == *eptr) break;
4190             eptr++;
4191 	    COST_CHK(1);
4192             }
4193           if (possessive) continue;    /* No backtracking */
4194           for (;;) /* LOOP_COUNT: Ok */
4195             {
4196             if (eptr == pp) goto TAIL_RECURSE;
4197             RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
4198             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4199             eptr--;
4200             }
4201           }
4202         /* Control never gets here */
4203         }
4204       }
4205     /* Control never gets here */
4206 
4207     /* Match a single character type repeatedly; several different opcodes
4208     share code. This is very similar to the code for single characters, but we
4209     repeat it in the interests of efficiency. */
4210 
4211     case OP_TYPEEXACT:
4212     min = max = GET2(ecode, 1);
4213     minimize = TRUE;
4214     ecode += 1 + IMM2_SIZE;
4215     goto REPEATTYPE;
4216 
4217     case OP_TYPEUPTO:
4218     case OP_TYPEMINUPTO:
4219     min = 0;
4220     max = GET2(ecode, 1);
4221     minimize = *ecode == OP_TYPEMINUPTO;
4222     ecode += 1 + IMM2_SIZE;
4223     goto REPEATTYPE;
4224 
4225     case OP_TYPEPOSSTAR:
4226     possessive = TRUE;
4227     min = 0;
4228     max = INT_MAX;
4229     ecode++;
4230     goto REPEATTYPE;
4231 
4232     case OP_TYPEPOSPLUS:
4233     possessive = TRUE;
4234     min = 1;
4235     max = INT_MAX;
4236     ecode++;
4237     goto REPEATTYPE;
4238 
4239     case OP_TYPEPOSQUERY:
4240     possessive = TRUE;
4241     min = 0;
4242     max = 1;
4243     ecode++;
4244     goto REPEATTYPE;
4245 
4246     case OP_TYPEPOSUPTO:
4247     possessive = TRUE;
4248     min = 0;
4249     max = GET2(ecode, 1);
4250     ecode += 1 + IMM2_SIZE;
4251     goto REPEATTYPE;
4252 
4253     case OP_TYPESTAR:
4254     case OP_TYPEMINSTAR:
4255     case OP_TYPEPLUS:
4256     case OP_TYPEMINPLUS:
4257     case OP_TYPEQUERY:
4258     case OP_TYPEMINQUERY:
4259     c = *ecode++ - OP_TYPESTAR;
4260     minimize = (c & 1) != 0;
4261     min = rep_min[c];                 /* Pick up values from tables; */
4262     max = rep_max[c];                 /* zero for max => infinity */
4263     if (max == 0) max = INT_MAX;
4264 
4265     /* Common code for all repeated single character type matches. Note that
4266     in UTF-8 mode, '.' matches a character of any length, but for the other
4267     character types, the valid characters are all one-byte long. */
4268 
4269     REPEATTYPE:
4270     ctype = *ecode++;      /* Code for the character type */
4271 
4272 #ifdef SUPPORT_UCP
4273     if (ctype == OP_PROP || ctype == OP_NOTPROP)
4274       {
4275       prop_fail_result = ctype == OP_NOTPROP;
4276       prop_type = *ecode++;
4277       prop_value = *ecode++;
4278       }
4279     else prop_type = -1;
4280 #endif
4281 
4282     /* First, ensure the minimum number of matches are present. Use inline
4283     code for maximizing the speed, and do the type test once at the start
4284     (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
4285     is tidier. Also separate the UCP code, which can be the same for both UTF-8
4286     and single-bytes. */
4287 
4288     if (min > 0)
4289       {
4290 #ifdef SUPPORT_UCP
4291       if (prop_type >= 0)
4292         {
4293 	COST(min);
4294         switch(prop_type)
4295           {
4296           case PT_ANY:
4297           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4298           for (i = 1; i <= min; i++) /* LOOP_COUNT: COST (above) */
4299             {
4300             if (eptr >= md->end_subject)
4301               {
4302               SCHECK_PARTIAL();
4303               RRETURN(MATCH_NOMATCH);
4304               }
4305             GETCHARINCTEST(c, eptr);
4306             }
4307           break;
4308 
4309           case PT_LAMP:
4310           for (i = 1; i <= min; i++) /* LOOP_COUNT: COST (above) */
4311             {
4312             int chartype;
4313             if (eptr >= md->end_subject)
4314               {
4315               SCHECK_PARTIAL();
4316               RRETURN(MATCH_NOMATCH);
4317               }
4318             GETCHARINCTEST(c, eptr);
4319             chartype = UCD_CHARTYPE(c);
4320             if ((chartype == ucp_Lu ||
4321                  chartype == ucp_Ll ||
4322                  chartype == ucp_Lt) == prop_fail_result)
4323               RRETURN(MATCH_NOMATCH);
4324             }
4325           break;
4326 
4327           case PT_GC:
4328           for (i = 1; i <= min; i++) /* LOOP_COUNT: COST (above) */
4329             {
4330             if (eptr >= md->end_subject)
4331               {
4332               SCHECK_PARTIAL();
4333               RRETURN(MATCH_NOMATCH);
4334               }
4335             GETCHARINCTEST(c, eptr);
4336             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4337               RRETURN(MATCH_NOMATCH);
4338             }
4339           break;
4340 
4341           case PT_PC:
4342           for (i = 1; i <= min; i++) /* LOOP_COUNT: COST (above) */
4343             {
4344             if (eptr >= md->end_subject)
4345               {
4346               SCHECK_PARTIAL();
4347               RRETURN(MATCH_NOMATCH);
4348               }
4349             GETCHARINCTEST(c, eptr);
4350             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4351               RRETURN(MATCH_NOMATCH);
4352             }
4353           break;
4354 
4355           case PT_SC:
4356           for (i = 1; i <= min; i++) /* LOOP_COUNT: COST (above) */
4357             {
4358             if (eptr >= md->end_subject)
4359               {
4360               SCHECK_PARTIAL();
4361               RRETURN(MATCH_NOMATCH);
4362               }
4363             GETCHARINCTEST(c, eptr);
4364             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4365               RRETURN(MATCH_NOMATCH);
4366             }
4367           break;
4368 
4369           case PT_ALNUM: /* LOOP_COUNT: COST (above) */
4370           for (i = 1; i <= min; i++)
4371             {
4372             int category;
4373             if (eptr >= md->end_subject)
4374               {
4375               SCHECK_PARTIAL();
4376               RRETURN(MATCH_NOMATCH);
4377               }
4378             GETCHARINCTEST(c, eptr);
4379             category = UCD_CATEGORY(c);
4380             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4381               RRETURN(MATCH_NOMATCH);
4382             }
4383           break;
4384 
4385           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
4386           which means that Perl space and POSIX space are now identical. PCRE
4387           was changed at release 8.34. */
4388 
4389           case PT_SPACE:    /* Perl space */
4390           case PT_PXSPACE:  /* POSIX space */
4391           for (i = 1; i <= min; i++) /* LOOP_COUNT: COST (above) */
4392             {
4393             if (eptr >= md->end_subject)
4394               {
4395               SCHECK_PARTIAL();
4396               RRETURN(MATCH_NOMATCH);
4397               }
4398             GETCHARINCTEST(c, eptr);
4399             switch(c)
4400               {
4401               HSPACE_CASES:
4402               VSPACE_CASES:
4403               if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4404               break;
4405 
4406               default:
4407               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
4408                 RRETURN(MATCH_NOMATCH);
4409               break;
4410               }
4411             }
4412           break;
4413 
4414           case PT_WORD:
4415           for (i = 1; i <= min; i++) /* LOOP_COUNT: COST (above) */
4416             {
4417             int category;
4418             if (eptr >= md->end_subject)
4419               {
4420               SCHECK_PARTIAL();
4421               RRETURN(MATCH_NOMATCH);
4422               }
4423             GETCHARINCTEST(c, eptr);
4424             category = UCD_CATEGORY(c);
4425             if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
4426                    == prop_fail_result)
4427               RRETURN(MATCH_NOMATCH);
4428             }
4429           break;
4430 
4431           case PT_CLIST:
4432           for (i = 1; i <= min; i++) /* LOOP_COUNT: COST (above) */
4433             {
4434             const pcre_uint32 *cp;
4435             if (eptr >= md->end_subject)
4436               {
4437               SCHECK_PARTIAL();
4438               RRETURN(MATCH_NOMATCH);
4439               }
4440             GETCHARINCTEST(c, eptr);
4441             cp = PRIV(ucd_caseless_sets) + prop_value;
4442             for (;;) /* LOOP_COUNT: COST */
4443               {
4444               if (c < *cp)
4445                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
4446               if (c == *cp++)
4447                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
4448 	      COST(1);
4449               }
4450             }
4451           break;
4452 
4453           case PT_UCNC:
4454           for (i = 1; i <= min; i++) /* LOOP_COUNT: COST (above) */
4455             {
4456             if (eptr >= md->end_subject)
4457               {
4458               SCHECK_PARTIAL();
4459               RRETURN(MATCH_NOMATCH);
4460               }
4461             GETCHARINCTEST(c, eptr);
4462             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
4463                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
4464                  c >= 0xe000) == prop_fail_result)
4465               RRETURN(MATCH_NOMATCH);
4466             }
4467           break;
4468 
4469           /* This should not occur */
4470 
4471           default:
4472           RRETURN(PCRE_ERROR_INTERNAL);
4473           }
4474         }
4475 
4476       /* Match extended Unicode sequences. We will get here only if the
4477       support is in the binary; otherwise a compile-time error occurs. */
4478 
4479       else if (ctype == OP_EXTUNI)
4480         {
4481 	COST(min);
4482         for (i = 1; i <= min; i++)  /* LOOP_COUNT: COST */
4483           {
4484           if (eptr >= md->end_subject)
4485             {
4486             SCHECK_PARTIAL();
4487             RRETURN(MATCH_NOMATCH);
4488             }
4489           else
4490             {
4491 #ifndef ERLANG_INTEGRATION
4492             int lgb, rgb;
4493 #endif
4494             GETCHARINCTEST(c, eptr);
4495             lgb = UCD_GRAPHBREAK(c);
4496            while (eptr < md->end_subject) /* LOOP_COUNT: CHK */
4497               {
4498               int len = 1;
4499               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4500               rgb = UCD_GRAPHBREAK(c);
4501               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
4502               lgb = rgb;
4503               eptr += len;
4504 	      COST_CHK(1);
4505               }
4506             }
4507           CHECK_PARTIAL();
4508           }
4509         }
4510 
4511       else
4512 #endif     /* SUPPORT_UCP */
4513 
4514 /* Handle all other cases when the coding is UTF-8 */
4515 
4516 #ifdef SUPPORT_UTF
4517       if (utf) switch(ctype)
4518         {
4519         case OP_ANY:
4520 	COST(min);
4521         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
4522           {
4523           if (eptr >= md->end_subject)
4524             {
4525             SCHECK_PARTIAL();
4526             RRETURN(MATCH_NOMATCH);
4527             }
4528           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4529           if (md->partial != 0 &&
4530               eptr + 1 >= md->end_subject &&
4531               NLBLOCK->nltype == NLTYPE_FIXED &&
4532               NLBLOCK->nllen == 2 &&
4533               UCHAR21(eptr) == NLBLOCK->nl[0])
4534             {
4535             md->hitend = TRUE;
4536             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4537             }
4538           eptr++;
4539           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4540           }
4541         break;
4542 
4543         case OP_ALLANY:
4544 	COST(min);
4545         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
4546           {
4547           if (eptr >= md->end_subject)
4548             {
4549             SCHECK_PARTIAL();
4550             RRETURN(MATCH_NOMATCH);
4551             }
4552           eptr++;
4553           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4554           }
4555         break;
4556 
4557         case OP_ANYBYTE:
4558         if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
4559         eptr += min;
4560         break;
4561 
4562         case OP_ANYNL:
4563 	COST(min);
4564         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
4565           {
4566           if (eptr >= md->end_subject)
4567             {
4568             SCHECK_PARTIAL();
4569             RRETURN(MATCH_NOMATCH);
4570             }
4571           GETCHARINC(c, eptr);
4572           switch(c)
4573             {
4574             default: RRETURN(MATCH_NOMATCH);
4575 
4576             case CHAR_CR:
4577             if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
4578             break;
4579 
4580             case CHAR_LF:
4581             break;
4582 
4583             case CHAR_VT:
4584             case CHAR_FF:
4585             case CHAR_NEL:
4586 #ifndef EBCDIC
4587             case 0x2028:
4588             case 0x2029:
4589 #endif  /* Not EBCDIC */
4590             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4591             break;
4592             }
4593           }
4594         break;
4595 
4596         case OP_NOT_HSPACE:
4597 	COST(min);
4598         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
4599           {
4600           if (eptr >= md->end_subject)
4601             {
4602             SCHECK_PARTIAL();
4603             RRETURN(MATCH_NOMATCH);
4604             }
4605           GETCHARINC(c, eptr);
4606           switch(c)
4607             {
4608             HSPACE_CASES: RRETURN(MATCH_NOMATCH);  /* Byte and multibyte cases */
4609             default: break;
4610             }
4611           }
4612         break;
4613 
4614         case OP_HSPACE:
4615 	COST(min);
4616         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
4617           {
4618           if (eptr >= md->end_subject)
4619             {
4620             SCHECK_PARTIAL();
4621             RRETURN(MATCH_NOMATCH);
4622             }
4623           GETCHARINC(c, eptr);
4624           switch(c)
4625             {
4626             HSPACE_CASES: break;  /* Byte and multibyte cases */
4627             default: RRETURN(MATCH_NOMATCH);
4628             }
4629           }
4630         break;
4631 
4632         case OP_NOT_VSPACE:
4633 	COST(min);
4634         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
4635           {
4636           if (eptr >= md->end_subject)
4637             {
4638             SCHECK_PARTIAL();
4639             RRETURN(MATCH_NOMATCH);
4640             }
4641           GETCHARINC(c, eptr);
4642           switch(c)
4643             {
4644             VSPACE_CASES: RRETURN(MATCH_NOMATCH);
4645             default: break;
4646             }
4647           }
4648         break;
4649 
4650         case OP_VSPACE:
4651 	COST(min);
4652         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
4653           {
4654           if (eptr >= md->end_subject)
4655             {
4656             SCHECK_PARTIAL();
4657             RRETURN(MATCH_NOMATCH);
4658             }
4659           GETCHARINC(c, eptr);
4660           switch(c)
4661             {
4662             VSPACE_CASES: break;
4663             default: RRETURN(MATCH_NOMATCH);
4664             }
4665           }
4666         break;
4667 
4668         case OP_NOT_DIGIT:
4669 	COST(min);
4670         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
4671           {
4672           if (eptr >= md->end_subject)
4673             {
4674             SCHECK_PARTIAL();
4675             RRETURN(MATCH_NOMATCH);
4676             }
4677           GETCHARINC(c, eptr);
4678           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
4679             RRETURN(MATCH_NOMATCH);
4680           }
4681         break;
4682 
4683         case OP_DIGIT:
4684 	COST(min);
4685         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
4686           {
4687           pcre_uint32 cc;
4688           if (eptr >= md->end_subject)
4689             {
4690             SCHECK_PARTIAL();
4691             RRETURN(MATCH_NOMATCH);
4692             }
4693           cc = UCHAR21(eptr);
4694           if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
4695             RRETURN(MATCH_NOMATCH);
4696           eptr++;
4697           /* No need to skip more bytes - we know it's a 1-byte character */
4698           }
4699         break;
4700 
4701         case OP_NOT_WHITESPACE:
4702 	COST(min);
4703         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
4704           {
4705           pcre_uint32 cc;
4706           if (eptr >= md->end_subject)
4707             {
4708             SCHECK_PARTIAL();
4709             RRETURN(MATCH_NOMATCH);
4710             }
4711           cc = UCHAR21(eptr);
4712           if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0)
4713             RRETURN(MATCH_NOMATCH);
4714           eptr++;
4715           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4716           }
4717         break;
4718 
4719         case OP_WHITESPACE:
4720 	COST(min);
4721         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
4722           {
4723           pcre_uint32 cc;
4724           if (eptr >= md->end_subject)
4725             {
4726             SCHECK_PARTIAL();
4727             RRETURN(MATCH_NOMATCH);
4728             }
4729           cc = UCHAR21(eptr);
4730           if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0)
4731             RRETURN(MATCH_NOMATCH);
4732           eptr++;
4733           /* No need to skip more bytes - we know it's a 1-byte character */
4734           }
4735         break;
4736 
4737         case OP_NOT_WORDCHAR:
4738 	COST(min);
4739         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
4740           {
4741           pcre_uint32 cc;
4742           if (eptr >= md->end_subject)
4743             {
4744             SCHECK_PARTIAL();
4745             RRETURN(MATCH_NOMATCH);
4746             }
4747           cc = UCHAR21(eptr);
4748           if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0)
4749             RRETURN(MATCH_NOMATCH);
4750           eptr++;
4751           ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4752           }
4753         break;
4754 
4755         case OP_WORDCHAR:
4756 	COST(min);
4757         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
4758           {
4759           pcre_uint32 cc;
4760           if (eptr >= md->end_subject)
4761             {
4762             SCHECK_PARTIAL();
4763             RRETURN(MATCH_NOMATCH);
4764             }
4765           cc = UCHAR21(eptr);
4766           if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0)
4767             RRETURN(MATCH_NOMATCH);
4768           eptr++;
4769           /* No need to skip more bytes - we know it's a 1-byte character */
4770           }
4771         break;
4772 
4773         default:
4774         RRETURN(PCRE_ERROR_INTERNAL);
4775         }  /* End switch(ctype) */
4776 
4777       else
4778 #endif     /* SUPPORT_UTF */
4779 
4780       /* Code for the non-UTF-8 case for minimum matching of operators other
4781       than OP_PROP and OP_NOTPROP. */
4782 
4783       switch(ctype)
4784         {
4785         case OP_ANY:
4786 	COST(min);
4787         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
4788           {
4789           if (eptr >= md->end_subject)
4790             {
4791             SCHECK_PARTIAL();
4792             RRETURN(MATCH_NOMATCH);
4793             }
4794           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4795           if (md->partial != 0 &&
4796               eptr + 1 >= md->end_subject &&
4797               NLBLOCK->nltype == NLTYPE_FIXED &&
4798               NLBLOCK->nllen == 2 &&
4799               *eptr == NLBLOCK->nl[0])
4800             {
4801             md->hitend = TRUE;
4802             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4803             }
4804           eptr++;
4805           }
4806         break;
4807 
4808         case OP_ALLANY:
4809         if (eptr > md->end_subject - min)
4810           {
4811           SCHECK_PARTIAL();
4812           RRETURN(MATCH_NOMATCH);
4813           }
4814         eptr += min;
4815         break;
4816 
4817         case OP_ANYBYTE:
4818         if (eptr > md->end_subject - min)
4819           {
4820           SCHECK_PARTIAL();
4821           RRETURN(MATCH_NOMATCH);
4822           }
4823         eptr += min;
4824         break;
4825 
4826         case OP_ANYNL:
4827 	COST(min);
4828         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
4829           {
4830           if (eptr >= md->end_subject)
4831             {
4832             SCHECK_PARTIAL();
4833             RRETURN(MATCH_NOMATCH);
4834             }
4835           switch(*eptr++)
4836             {
4837             default: RRETURN(MATCH_NOMATCH);
4838 
4839             case CHAR_CR:
4840             if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
4841             break;
4842 
4843             case CHAR_LF:
4844             break;
4845 
4846             case CHAR_VT:
4847             case CHAR_FF:
4848             case CHAR_NEL:
4849 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4850             case 0x2028:
4851             case 0x2029:
4852 #endif
4853             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4854             break;
4855             }
4856           }
4857         break;
4858 
4859         case OP_NOT_HSPACE:
4860 	COST(min);
4861         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
4862           {
4863           if (eptr >= md->end_subject)
4864             {
4865             SCHECK_PARTIAL();
4866             RRETURN(MATCH_NOMATCH);
4867             }
4868           switch(*eptr++)
4869             {
4870             default: break;
4871             HSPACE_BYTE_CASES:
4872 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4873             HSPACE_MULTIBYTE_CASES:
4874 #endif
4875             RRETURN(MATCH_NOMATCH);
4876             }
4877           }
4878         break;
4879 
4880         case OP_HSPACE:
4881 	COST(min);
4882         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
4883           {
4884           if (eptr >= md->end_subject)
4885             {
4886             SCHECK_PARTIAL();
4887             RRETURN(MATCH_NOMATCH);
4888             }
4889           switch(*eptr++)
4890             {
4891             default: RRETURN(MATCH_NOMATCH);
4892             HSPACE_BYTE_CASES:
4893 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4894             HSPACE_MULTIBYTE_CASES:
4895 #endif
4896             break;
4897             }
4898           }
4899         break;
4900 
4901         case OP_NOT_VSPACE:
4902 	COST(min);
4903         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
4904           {
4905           if (eptr >= md->end_subject)
4906             {
4907             SCHECK_PARTIAL();
4908             RRETURN(MATCH_NOMATCH);
4909             }
4910           switch(*eptr++)
4911             {
4912             VSPACE_BYTE_CASES:
4913 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4914             VSPACE_MULTIBYTE_CASES:
4915 #endif
4916             RRETURN(MATCH_NOMATCH);
4917             default: break;
4918             }
4919           }
4920         break;
4921 
4922         case OP_VSPACE:
4923 	COST(min);
4924         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
4925           {
4926           if (eptr >= md->end_subject)
4927             {
4928             SCHECK_PARTIAL();
4929             RRETURN(MATCH_NOMATCH);
4930             }
4931           switch(*eptr++)
4932             {
4933             default: RRETURN(MATCH_NOMATCH);
4934             VSPACE_BYTE_CASES:
4935 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4936             VSPACE_MULTIBYTE_CASES:
4937 #endif
4938             break;
4939             }
4940           }
4941         break;
4942 
4943         case OP_NOT_DIGIT:
4944 	COST(min);
4945         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
4946           {
4947           if (eptr >= md->end_subject)
4948             {
4949             SCHECK_PARTIAL();
4950             RRETURN(MATCH_NOMATCH);
4951             }
4952           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
4953             RRETURN(MATCH_NOMATCH);
4954           eptr++;
4955           }
4956         break;
4957 
4958         case OP_DIGIT:
4959 	COST(min);
4960         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
4961           {
4962           if (eptr >= md->end_subject)
4963             {
4964             SCHECK_PARTIAL();
4965             RRETURN(MATCH_NOMATCH);
4966             }
4967           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
4968             RRETURN(MATCH_NOMATCH);
4969           eptr++;
4970           }
4971         break;
4972 
4973         case OP_NOT_WHITESPACE:
4974 	COST(min);
4975         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
4976           {
4977           if (eptr >= md->end_subject)
4978             {
4979             SCHECK_PARTIAL();
4980             RRETURN(MATCH_NOMATCH);
4981             }
4982           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
4983             RRETURN(MATCH_NOMATCH);
4984           eptr++;
4985           }
4986         break;
4987 
4988         case OP_WHITESPACE:
4989 	COST(min);
4990         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
4991           {
4992           if (eptr >= md->end_subject)
4993             {
4994             SCHECK_PARTIAL();
4995             RRETURN(MATCH_NOMATCH);
4996             }
4997           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
4998             RRETURN(MATCH_NOMATCH);
4999           eptr++;
5000           }
5001         break;
5002 
5003         case OP_NOT_WORDCHAR:
5004 	COST(min);
5005         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
5006           {
5007           if (eptr >= md->end_subject)
5008             {
5009             SCHECK_PARTIAL();
5010             RRETURN(MATCH_NOMATCH);
5011             }
5012           if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
5013             RRETURN(MATCH_NOMATCH);
5014           eptr++;
5015           }
5016         break;
5017 
5018         case OP_WORDCHAR:
5019 	COST(min);
5020         for (i = 1; i <= min; i++) /* LOOP_COUNT: COST */
5021           {
5022           if (eptr >= md->end_subject)
5023             {
5024             SCHECK_PARTIAL();
5025             RRETURN(MATCH_NOMATCH);
5026             }
5027           if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
5028             RRETURN(MATCH_NOMATCH);
5029           eptr++;
5030           }
5031         break;
5032 
5033         default:
5034         RRETURN(PCRE_ERROR_INTERNAL);
5035         }
5036       }
5037 
5038     /* If min = max, continue at the same level without recursing */
5039 
5040     if (min == max) continue;
5041 
5042     /* If minimizing, we have to test the rest of the pattern before each
5043     subsequent match. Again, separate the UTF-8 case for speed, and also
5044     separate the UCP cases. */
5045 
5046     if (minimize)
5047       {
5048 #ifdef SUPPORT_UCP
5049       if (prop_type >= 0)
5050         {
5051         switch(prop_type)
5052           {
5053           case PT_ANY:
5054           for (fi = min;; fi++) /* LOOP_COUNT: Ok */
5055             {
5056             RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
5057             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5058             if (fi >= max) RRETURN(MATCH_NOMATCH);
5059             if (eptr >= md->end_subject)
5060               {
5061               SCHECK_PARTIAL();
5062               RRETURN(MATCH_NOMATCH);
5063               }
5064             GETCHARINCTEST(c, eptr);
5065             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
5066             }
5067           /* Control never gets here */
5068 
5069           case PT_LAMP:
5070           for (fi = min;; fi++) /* LOOP_COUNT: Ok */
5071             {
5072             int chartype;
5073             RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
5074             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5075             if (fi >= max) RRETURN(MATCH_NOMATCH);
5076             if (eptr >= md->end_subject)
5077               {
5078               SCHECK_PARTIAL();
5079               RRETURN(MATCH_NOMATCH);
5080               }
5081             GETCHARINCTEST(c, eptr);
5082             chartype = UCD_CHARTYPE(c);
5083             if ((chartype == ucp_Lu ||
5084                  chartype == ucp_Ll ||
5085                  chartype == ucp_Lt) == prop_fail_result)
5086               RRETURN(MATCH_NOMATCH);
5087             }
5088           /* Control never gets here */
5089 
5090           case PT_GC:
5091           for (fi = min;; fi++) /* LOOP_COUNT: Ok */
5092             {
5093             RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
5094             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5095             if (fi >= max) RRETURN(MATCH_NOMATCH);
5096             if (eptr >= md->end_subject)
5097               {
5098               SCHECK_PARTIAL();
5099               RRETURN(MATCH_NOMATCH);
5100               }
5101             GETCHARINCTEST(c, eptr);
5102             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
5103               RRETURN(MATCH_NOMATCH);
5104             }
5105           /* Control never gets here */
5106 
5107           case PT_PC:
5108           for (fi = min;; fi++) /* LOOP_COUNT: Ok */
5109             {
5110             RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
5111             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5112             if (fi >= max) RRETURN(MATCH_NOMATCH);
5113             if (eptr >= md->end_subject)
5114               {
5115               SCHECK_PARTIAL();
5116               RRETURN(MATCH_NOMATCH);
5117               }
5118             GETCHARINCTEST(c, eptr);
5119             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
5120               RRETURN(MATCH_NOMATCH);
5121             }
5122           /* Control never gets here */
5123 
5124           case PT_SC:
5125           for (fi = min;; fi++) /* LOOP_COUNT: Ok */
5126             {
5127             RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
5128             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5129             if (fi >= max) RRETURN(MATCH_NOMATCH);
5130             if (eptr >= md->end_subject)
5131               {
5132               SCHECK_PARTIAL();
5133               RRETURN(MATCH_NOMATCH);
5134               }
5135             GETCHARINCTEST(c, eptr);
5136             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
5137               RRETURN(MATCH_NOMATCH);
5138             }
5139           /* Control never gets here */
5140 
5141           case PT_ALNUM:
5142           for (fi = min;; fi++) /* LOOP_COUNT: Ok */
5143             {
5144             int category;
5145             RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
5146             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5147             if (fi >= max) RRETURN(MATCH_NOMATCH);
5148             if (eptr >= md->end_subject)
5149               {
5150               SCHECK_PARTIAL();
5151               RRETURN(MATCH_NOMATCH);
5152               }
5153             GETCHARINCTEST(c, eptr);
5154             category = UCD_CATEGORY(c);
5155             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
5156               RRETURN(MATCH_NOMATCH);
5157             }
5158           /* Control never gets here */
5159 
5160           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
5161           which means that Perl space and POSIX space are now identical. PCRE
5162           was changed at release 8.34. */
5163 
5164           case PT_SPACE:    /* Perl space */
5165           case PT_PXSPACE:  /* POSIX space */
5166           for (fi = min;; fi++) /* LOOP_COUNT: Ok */
5167             {
5168             RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
5169             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5170             if (fi >= max) RRETURN(MATCH_NOMATCH);
5171             if (eptr >= md->end_subject)
5172               {
5173               SCHECK_PARTIAL();
5174               RRETURN(MATCH_NOMATCH);
5175               }
5176             GETCHARINCTEST(c, eptr);
5177             switch(c)
5178               {
5179               HSPACE_CASES:
5180               VSPACE_CASES:
5181               if (prop_fail_result) RRETURN(MATCH_NOMATCH);
5182               break;
5183 
5184               default:
5185               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
5186                 RRETURN(MATCH_NOMATCH);
5187               break;
5188               }
5189             }
5190           /* Control never gets here */
5191 
5192           case PT_WORD:
5193           for (fi = min;; fi++) /* LOOP_COUNT: Ok */
5194             {
5195             int category;
5196             RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
5197             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5198             if (fi >= max) RRETURN(MATCH_NOMATCH);
5199             if (eptr >= md->end_subject)
5200               {
5201               SCHECK_PARTIAL();
5202               RRETURN(MATCH_NOMATCH);
5203               }
5204             GETCHARINCTEST(c, eptr);
5205             category = UCD_CATEGORY(c);
5206             if ((category == ucp_L ||
5207                  category == ucp_N ||
5208                  c == CHAR_UNDERSCORE)
5209                    == prop_fail_result)
5210               RRETURN(MATCH_NOMATCH);
5211             }
5212           /* Control never gets here */
5213 
5214           case PT_CLIST:
5215           for (fi = min;; fi++) /* LOOP_COUNT: Ok */
5216             {
5217             const pcre_uint32 *cp;
5218             RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
5219             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5220             if (fi >= max) RRETURN(MATCH_NOMATCH);
5221             if (eptr >= md->end_subject)
5222               {
5223               SCHECK_PARTIAL();
5224               RRETURN(MATCH_NOMATCH);
5225               }
5226             GETCHARINCTEST(c, eptr);
5227             cp = PRIV(ucd_caseless_sets) + prop_value;
5228             for (;;) /* LOOP_COUNT: COST */
5229               {
5230               if (c < *cp)
5231                 { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
5232               if (c == *cp++)
5233                 { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
5234 	      COST(1);
5235               }
5236             }
5237           /* Control never gets here */
5238 
5239           case PT_UCNC:
5240           for (fi = min;; fi++) /* LOOP_COUNT: Ok */
5241             {
5242             RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
5243             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5244             if (fi >= max) RRETURN(MATCH_NOMATCH);
5245             if (eptr >= md->end_subject)
5246               {
5247               SCHECK_PARTIAL();
5248               RRETURN(MATCH_NOMATCH);
5249               }
5250             GETCHARINCTEST(c, eptr);
5251             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5252                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5253                  c >= 0xe000) == prop_fail_result)
5254               RRETURN(MATCH_NOMATCH);
5255             }
5256           /* Control never gets here */
5257 
5258           /* This should never occur */
5259           default:
5260           RRETURN(PCRE_ERROR_INTERNAL);
5261           }
5262         }
5263 
5264       /* Match extended Unicode sequences. We will get here only if the
5265       support is in the binary; otherwise a compile-time error occurs. */
5266 
5267       else if (ctype == OP_EXTUNI)
5268         {
5269         for (fi = min;; fi++) /* LOOP_COUNT: Ok */
5270           {
5271           RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
5272           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5273           if (fi >= max) RRETURN(MATCH_NOMATCH);
5274           if (eptr >= md->end_subject)
5275             {
5276             SCHECK_PARTIAL();
5277             RRETURN(MATCH_NOMATCH);
5278             }
5279           else
5280             {
5281 #ifndef ERLANG_INTEGRATION
5282             int lgb, rgb;
5283 #endif
5284             GETCHARINCTEST(c, eptr);
5285             lgb = UCD_GRAPHBREAK(c);
5286             while (eptr < md->end_subject) /* LOOP_COUNT: CHK */
5287               {
5288               int len = 1;
5289               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5290               rgb = UCD_GRAPHBREAK(c);
5291               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5292               lgb = rgb;
5293               eptr += len;
5294 	      COST_CHK(1);
5295               }
5296             }
5297           CHECK_PARTIAL();
5298           }
5299         }
5300       else
5301 #endif     /* SUPPORT_UCP */
5302 
5303 #ifdef SUPPORT_UTF
5304       if (utf)
5305         {
5306         for (fi = min;; fi++) /* LOOP_COUNT: Ok */
5307           {
5308           RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
5309           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5310           if (fi >= max) RRETURN(MATCH_NOMATCH);
5311           if (eptr >= md->end_subject)
5312             {
5313             SCHECK_PARTIAL();
5314             RRETURN(MATCH_NOMATCH);
5315             }
5316           if (ctype == OP_ANY && IS_NEWLINE(eptr))
5317             RRETURN(MATCH_NOMATCH);
5318           GETCHARINC(c, eptr);
5319           switch(ctype)
5320             {
5321             case OP_ANY:               /* This is the non-NL case */
5322             if (md->partial != 0 &&    /* Take care with CRLF partial */
5323                 eptr >= md->end_subject &&
5324                 NLBLOCK->nltype == NLTYPE_FIXED &&
5325                 NLBLOCK->nllen == 2 &&
5326                 c == NLBLOCK->nl[0])
5327               {
5328               md->hitend = TRUE;
5329               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5330               }
5331             break;
5332 
5333             case OP_ALLANY:
5334             case OP_ANYBYTE:
5335             break;
5336 
5337             case OP_ANYNL:
5338             switch(c)
5339               {
5340               default: RRETURN(MATCH_NOMATCH);
5341               case CHAR_CR:
5342               if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
5343               break;
5344 
5345               case CHAR_LF:
5346               break;
5347 
5348               case CHAR_VT:
5349               case CHAR_FF:
5350               case CHAR_NEL:
5351 #ifndef EBCDIC
5352               case 0x2028:
5353               case 0x2029:
5354 #endif  /* Not EBCDIC */
5355               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5356               break;
5357               }
5358             break;
5359 
5360             case OP_NOT_HSPACE:
5361             switch(c)
5362               {
5363               HSPACE_CASES: RRETURN(MATCH_NOMATCH);
5364               default: break;
5365               }
5366             break;
5367 
5368             case OP_HSPACE:
5369             switch(c)
5370               {
5371               HSPACE_CASES: break;
5372               default: RRETURN(MATCH_NOMATCH);
5373               }
5374             break;
5375 
5376             case OP_NOT_VSPACE:
5377             switch(c)
5378               {
5379               VSPACE_CASES: RRETURN(MATCH_NOMATCH);
5380               default: break;
5381               }
5382             break;
5383 
5384             case OP_VSPACE:
5385             switch(c)
5386               {
5387               VSPACE_CASES: break;
5388               default: RRETURN(MATCH_NOMATCH);
5389               }
5390             break;
5391 
5392             case OP_NOT_DIGIT:
5393             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
5394               RRETURN(MATCH_NOMATCH);
5395             break;
5396 
5397             case OP_DIGIT:
5398             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
5399               RRETURN(MATCH_NOMATCH);
5400             break;
5401 
5402             case OP_NOT_WHITESPACE:
5403             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
5404               RRETURN(MATCH_NOMATCH);
5405             break;
5406 
5407             case OP_WHITESPACE:
5408             if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
5409               RRETURN(MATCH_NOMATCH);
5410             break;
5411 
5412             case OP_NOT_WORDCHAR:
5413             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
5414               RRETURN(MATCH_NOMATCH);
5415             break;
5416 
5417             case OP_WORDCHAR:
5418             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
5419               RRETURN(MATCH_NOMATCH);
5420             break;
5421 
5422             default:
5423             RRETURN(PCRE_ERROR_INTERNAL);
5424             }
5425           }
5426         }
5427       else
5428 #endif
5429       /* Not UTF mode */
5430         {
5431         for (fi = min;; fi++) /* LOOP_COUNT: Ok */
5432           {
5433           RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
5434           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5435           if (fi >= max) RRETURN(MATCH_NOMATCH);
5436           if (eptr >= md->end_subject)
5437             {
5438             SCHECK_PARTIAL();
5439             RRETURN(MATCH_NOMATCH);
5440             }
5441           if (ctype == OP_ANY && IS_NEWLINE(eptr))
5442             RRETURN(MATCH_NOMATCH);
5443           c = *eptr++;
5444           switch(ctype)
5445             {
5446             case OP_ANY:               /* This is the non-NL case */
5447             if (md->partial != 0 &&    /* Take care with CRLF partial */
5448                 eptr >= md->end_subject &&
5449                 NLBLOCK->nltype == NLTYPE_FIXED &&
5450                 NLBLOCK->nllen == 2 &&
5451                 c == NLBLOCK->nl[0])
5452               {
5453               md->hitend = TRUE;
5454               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5455               }
5456             break;
5457 
5458             case OP_ALLANY:
5459             case OP_ANYBYTE:
5460             break;
5461 
5462             case OP_ANYNL:
5463             switch(c)
5464               {
5465               default: RRETURN(MATCH_NOMATCH);
5466               case CHAR_CR:
5467               if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
5468               break;
5469 
5470               case CHAR_LF:
5471               break;
5472 
5473               case CHAR_VT:
5474               case CHAR_FF:
5475               case CHAR_NEL:
5476 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5477               case 0x2028:
5478               case 0x2029:
5479 #endif
5480               if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5481               break;
5482               }
5483             break;
5484 
5485             case OP_NOT_HSPACE:
5486             switch(c)
5487               {
5488               default: break;
5489               HSPACE_BYTE_CASES:
5490 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5491               HSPACE_MULTIBYTE_CASES:
5492 #endif
5493               RRETURN(MATCH_NOMATCH);
5494               }
5495             break;
5496 
5497             case OP_HSPACE:
5498             switch(c)
5499               {
5500               default: RRETURN(MATCH_NOMATCH);
5501               HSPACE_BYTE_CASES:
5502 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5503               HSPACE_MULTIBYTE_CASES:
5504 #endif
5505               break;
5506               }
5507             break;
5508 
5509             case OP_NOT_VSPACE:
5510             switch(c)
5511               {
5512               default: break;
5513               VSPACE_BYTE_CASES:
5514 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5515               VSPACE_MULTIBYTE_CASES:
5516 #endif
5517               RRETURN(MATCH_NOMATCH);
5518               }
5519             break;
5520 
5521             case OP_VSPACE:
5522             switch(c)
5523               {
5524               default: RRETURN(MATCH_NOMATCH);
5525               VSPACE_BYTE_CASES:
5526 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5527               VSPACE_MULTIBYTE_CASES:
5528 #endif
5529               break;
5530               }
5531             break;
5532 
5533             case OP_NOT_DIGIT:
5534             if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
5535             break;
5536 
5537             case OP_DIGIT:
5538             if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
5539             break;
5540 
5541             case OP_NOT_WHITESPACE:
5542             if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
5543             break;
5544 
5545             case OP_WHITESPACE:
5546             if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
5547             break;
5548 
5549             case OP_NOT_WORDCHAR:
5550             if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
5551             break;
5552 
5553             case OP_WORDCHAR:
5554             if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
5555             break;
5556 
5557             default:
5558             RRETURN(PCRE_ERROR_INTERNAL);
5559             }
5560           }
5561         }
5562       /* Control never gets here */
5563       }
5564 
5565     /* If maximizing, it is worth using inline code for speed, doing the type
5566     test once at the start (i.e. keep it out of the loop). Again, keep the
5567     UTF-8 and UCP stuff separate. */
5568 
5569     else
5570       {
5571       pp = eptr;  /* Remember where we started */
5572 
5573 #ifdef SUPPORT_UCP
5574       if (prop_type >= 0)
5575         {
5576         switch(prop_type)
5577           {
5578           case PT_ANY:
5579           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
5580             {
5581             int len = 1;
5582             if (eptr >= md->end_subject)
5583               {
5584               SCHECK_PARTIAL();
5585               break;
5586               }
5587             GETCHARLENTEST(c, eptr, len);
5588             if (prop_fail_result) break;
5589             eptr+= len;
5590 	    COST_CHK(1);
5591             }
5592           break;
5593 
5594           case PT_LAMP:
5595           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
5596             {
5597             int chartype;
5598             int len = 1;
5599             if (eptr >= md->end_subject)
5600               {
5601               SCHECK_PARTIAL();
5602               break;
5603               }
5604             GETCHARLENTEST(c, eptr, len);
5605             chartype = UCD_CHARTYPE(c);
5606             if ((chartype == ucp_Lu ||
5607                  chartype == ucp_Ll ||
5608                  chartype == ucp_Lt) == prop_fail_result)
5609               break;
5610             eptr+= len;
5611 	    COST_CHK(1);
5612             }
5613           break;
5614 
5615           case PT_GC:
5616           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
5617             {
5618             int len = 1;
5619             if (eptr >= md->end_subject)
5620               {
5621               SCHECK_PARTIAL();
5622               break;
5623               }
5624             GETCHARLENTEST(c, eptr, len);
5625             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
5626             eptr+= len;
5627 	    COST_CHK(1);
5628             }
5629           break;
5630 
5631           case PT_PC:
5632           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
5633             {
5634             int len = 1;
5635             if (eptr >= md->end_subject)
5636               {
5637               SCHECK_PARTIAL();
5638               break;
5639               }
5640             GETCHARLENTEST(c, eptr, len);
5641             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
5642             eptr+= len;
5643 	    COST_CHK(1);
5644             }
5645           break;
5646 
5647           case PT_SC:
5648           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
5649             {
5650             int len = 1;
5651             if (eptr >= md->end_subject)
5652               {
5653               SCHECK_PARTIAL();
5654               break;
5655               }
5656             GETCHARLENTEST(c, eptr, len);
5657             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
5658             eptr+= len;
5659 	    COST_CHK(1);
5660             }
5661           break;
5662 
5663           case PT_ALNUM:
5664           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
5665             {
5666             int category;
5667             int len = 1;
5668             if (eptr >= md->end_subject)
5669               {
5670               SCHECK_PARTIAL();
5671               break;
5672               }
5673             GETCHARLENTEST(c, eptr, len);
5674             category = UCD_CATEGORY(c);
5675             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
5676               break;
5677             eptr+= len;
5678 	    COST_CHK(1);
5679             }
5680           break;
5681 
5682           /* Perl space used to exclude VT, but from Perl 5.18 it is included,
5683           which means that Perl space and POSIX space are now identical. PCRE
5684           was changed at release 8.34. */
5685 
5686           case PT_SPACE:    /* Perl space */
5687           case PT_PXSPACE:  /* POSIX space */
5688           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
5689             {
5690             int len = 1;
5691             if (eptr >= md->end_subject)
5692               {
5693               SCHECK_PARTIAL();
5694               break;
5695               }
5696             GETCHARLENTEST(c, eptr, len);
5697             switch(c)
5698               {
5699               HSPACE_CASES:
5700               VSPACE_CASES:
5701               if (prop_fail_result) goto ENDLOOP99;  /* Break the loop */
5702               break;
5703 
5704               default:
5705               if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
5706                 goto ENDLOOP99;   /* Break the loop */
5707               break;
5708               }
5709             eptr+= len;
5710 	    COST_CHK(1);
5711             }
5712           ENDLOOP99:
5713           break;
5714 
5715           case PT_WORD:
5716           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
5717             {
5718             int category;
5719             int len = 1;
5720             if (eptr >= md->end_subject)
5721               {
5722               SCHECK_PARTIAL();
5723               break;
5724               }
5725             GETCHARLENTEST(c, eptr, len);
5726             category = UCD_CATEGORY(c);
5727             if ((category == ucp_L || category == ucp_N ||
5728                  c == CHAR_UNDERSCORE) == prop_fail_result)
5729               break;
5730             eptr+= len;
5731 	    COST_CHK(1);
5732             }
5733           break;
5734 
5735           case PT_CLIST:
5736           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
5737             {
5738             const pcre_uint32 *cp;
5739             int len = 1;
5740             if (eptr >= md->end_subject)
5741               {
5742               SCHECK_PARTIAL();
5743               break;
5744               }
5745             GETCHARLENTEST(c, eptr, len);
5746             cp = PRIV(ucd_caseless_sets) + prop_value;
5747             for (;;) /* LOOP_COUNT: COST */
5748               {
5749               if (c < *cp)
5750                 { if (prop_fail_result) break; else goto GOT_MAX; }
5751               if (c == *cp++)
5752                 { if (prop_fail_result) goto GOT_MAX; else break; }
5753 	      COST(1);
5754               }
5755             eptr += len;
5756 	    COST_CHK(1);
5757             }
5758           GOT_MAX:
5759           break;
5760 
5761           case PT_UCNC:
5762           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
5763             {
5764             int len = 1;
5765             if (eptr >= md->end_subject)
5766               {
5767               SCHECK_PARTIAL();
5768               break;
5769               }
5770             GETCHARLENTEST(c, eptr, len);
5771             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5772                  c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5773                  c >= 0xe000) == prop_fail_result)
5774               break;
5775             eptr += len;
5776 	    COST_CHK(1);
5777             }
5778           break;
5779 
5780           default:
5781           RRETURN(PCRE_ERROR_INTERNAL);
5782           }
5783 
5784         /* eptr is now past the end of the maximum run */
5785 
5786         if (possessive) continue;    /* No backtracking */
5787         for(;;) /* LOOP_COUNT: Ok */
5788           {
5789           if (eptr <= pp) goto TAIL_RECURSE;
5790           RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5791           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5792           eptr--;
5793           if (utf) BACKCHAR(eptr);
5794           }
5795         }
5796 
5797       /* Match extended Unicode grapheme clusters. We will get here only if the
5798       support is in the binary; otherwise a compile-time error occurs. */
5799 
5800       else if (ctype == OP_EXTUNI)
5801         {
5802         for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
5803           {
5804           if (eptr >= md->end_subject)
5805             {
5806             SCHECK_PARTIAL();
5807             break;
5808             }
5809           else
5810             {
5811 #ifndef ERLANG_INTEGRATION
5812             int lgb, rgb;
5813 #endif
5814             GETCHARINCTEST(c, eptr);
5815             lgb = UCD_GRAPHBREAK(c);
5816             while (eptr < md->end_subject)  /* LOOP_COUNT: CHK */
5817               {
5818               int len = 1;
5819               if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5820               rgb = UCD_GRAPHBREAK(c);
5821               if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5822               lgb = rgb;
5823               eptr += len;
5824 	      COST_CHK(1);
5825               }
5826 	    COST_CHK(1);
5827             }
5828           CHECK_PARTIAL();
5829           }
5830 
5831         /* eptr is now past the end of the maximum run */
5832 
5833         if (possessive) continue;    /* No backtracking */
5834 
5835         /* We use <= pp rather than == pp to detect the start of the run while
5836         backtracking because the use of \C in UTF mode can cause BACKCHAR to
5837         move back past pp. This is just palliative; the use of \C in UTF mode
5838         is fraught with danger. */
5839 
5840         for(;;) /* LOOP_COUNT: Ok */
5841           {
5842 #ifndef ERLANG_INTEGRATION
5843           int lgb, rgb;
5844 #endif
5845           PCRE_PUCHAR fptr;
5846 
5847           if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
5848           RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
5849           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5850 
5851           /* Backtracking over an extended grapheme cluster involves inspecting
5852           the previous two characters (if present) to see if a break is
5853           permitted between them. */
5854 
5855           eptr--;
5856           if (!utf) c = *eptr; else
5857             {
5858             BACKCHAR(eptr);
5859             GETCHAR(c, eptr);
5860             }
5861           rgb = UCD_GRAPHBREAK(c);
5862 
5863           for (;;) /* LOOP_COUNT: COST */
5864             {
5865             if (eptr <= pp) goto TAIL_RECURSE;   /* At start of char run */
5866             fptr = eptr - 1;
5867             if (!utf) c = *fptr; else
5868               {
5869               BACKCHAR(fptr);
5870               GETCHAR(c, fptr);
5871               }
5872             lgb = UCD_GRAPHBREAK(c);
5873             if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5874             eptr = fptr;
5875             rgb = lgb;
5876 	    COST(1);
5877             }
5878           }
5879         }
5880 
5881       else
5882 #endif   /* SUPPORT_UCP */
5883 
5884 #ifdef SUPPORT_UTF
5885       if (utf)
5886         {
5887         switch(ctype)
5888           {
5889           case OP_ANY:
5890           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
5891             {
5892             if (eptr >= md->end_subject)
5893               {
5894               SCHECK_PARTIAL();
5895               break;
5896               }
5897             if (IS_NEWLINE(eptr)) break;
5898             if (md->partial != 0 &&    /* Take care with CRLF partial */
5899                 eptr + 1 >= md->end_subject &&
5900                 NLBLOCK->nltype == NLTYPE_FIXED &&
5901                 NLBLOCK->nllen == 2 &&
5902                 UCHAR21(eptr) == NLBLOCK->nl[0])
5903               {
5904               md->hitend = TRUE;
5905               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5906               }
5907             eptr++;
5908             ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5909 	    COST_CHK(1);
5910             }
5911           break;
5912 
5913           case OP_ALLANY:
5914           if (max < INT_MAX)
5915             {
5916             for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
5917               {
5918               if (eptr >= md->end_subject)
5919                 {
5920                 SCHECK_PARTIAL();
5921                 break;
5922                 }
5923               eptr++;
5924               ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5925 	      COST_CHK(1);
5926               }
5927             }
5928           else
5929             {
5930             eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
5931             SCHECK_PARTIAL();
5932             }
5933           break;
5934 
5935           /* The byte case is the same as non-UTF8 */
5936 
5937           case OP_ANYBYTE:
5938           c = max - min;
5939           if (c > (unsigned int)(md->end_subject - eptr))
5940             {
5941             eptr = md->end_subject;
5942             SCHECK_PARTIAL();
5943             }
5944           else eptr += c;
5945           break;
5946 
5947           case OP_ANYNL:
5948           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
5949             {
5950             int len = 1;
5951             if (eptr >= md->end_subject)
5952               {
5953               SCHECK_PARTIAL();
5954               break;
5955               }
5956             GETCHARLEN(c, eptr, len);
5957             if (c == CHAR_CR)
5958               {
5959               if (++eptr >= md->end_subject) break;
5960               if (UCHAR21(eptr) == CHAR_LF) eptr++;
5961               }
5962             else
5963               {
5964               if (c != CHAR_LF &&
5965                   (md->bsr_anycrlf ||
5966                    (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
5967 #ifndef EBCDIC
5968                     && c != 0x2028 && c != 0x2029
5969 #endif  /* Not EBCDIC */
5970                     )))
5971                 break;
5972               eptr += len;
5973               }
5974 	    COST_CHK(1);
5975             }
5976           break;
5977 
5978           case OP_NOT_HSPACE:
5979           case OP_HSPACE:
5980           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
5981             {
5982             BOOL gotspace;
5983             int len = 1;
5984             if (eptr >= md->end_subject)
5985               {
5986               SCHECK_PARTIAL();
5987               break;
5988               }
5989             GETCHARLEN(c, eptr, len);
5990             switch(c)
5991               {
5992               HSPACE_CASES: gotspace = TRUE; break;
5993               default: gotspace = FALSE; break;
5994               }
5995             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5996             eptr += len;
5997 	    COST_CHK(1);
5998             }
5999           break;
6000 
6001           case OP_NOT_VSPACE:
6002           case OP_VSPACE:
6003           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
6004             {
6005             BOOL gotspace;
6006             int len = 1;
6007             if (eptr >= md->end_subject)
6008               {
6009               SCHECK_PARTIAL();
6010               break;
6011               }
6012             GETCHARLEN(c, eptr, len);
6013             switch(c)
6014               {
6015               VSPACE_CASES: gotspace = TRUE; break;
6016               default: gotspace = FALSE; break;
6017               }
6018             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
6019             eptr += len;
6020 	    COST_CHK(1);
6021             }
6022           break;
6023 
6024           case OP_NOT_DIGIT:
6025           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
6026             {
6027             int len = 1;
6028             if (eptr >= md->end_subject)
6029               {
6030               SCHECK_PARTIAL();
6031               break;
6032               }
6033             GETCHARLEN(c, eptr, len);
6034             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
6035             eptr+= len;
6036 	    COST_CHK(1);
6037             }
6038           break;
6039 
6040           case OP_DIGIT:
6041           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
6042             {
6043             int len = 1;
6044             if (eptr >= md->end_subject)
6045               {
6046               SCHECK_PARTIAL();
6047               break;
6048               }
6049             GETCHARLEN(c, eptr, len);
6050             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
6051             eptr+= len;
6052 	    COST_CHK(1);
6053             }
6054           break;
6055 
6056           case OP_NOT_WHITESPACE:
6057           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
6058             {
6059             int len = 1;
6060             if (eptr >= md->end_subject)
6061               {
6062               SCHECK_PARTIAL();
6063               break;
6064               }
6065             GETCHARLEN(c, eptr, len);
6066             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
6067             eptr+= len;
6068 	    COST_CHK(1);
6069             }
6070           break;
6071 
6072           case OP_WHITESPACE:
6073           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
6074             {
6075             int len = 1;
6076             if (eptr >= md->end_subject)
6077               {
6078               SCHECK_PARTIAL();
6079               break;
6080               }
6081             GETCHARLEN(c, eptr, len);
6082             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
6083             eptr+= len;
6084 	    COST_CHK(1);
6085             }
6086           break;
6087 
6088           case OP_NOT_WORDCHAR:
6089           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
6090             {
6091             int len = 1;
6092             if (eptr >= md->end_subject)
6093               {
6094               SCHECK_PARTIAL();
6095               break;
6096               }
6097             GETCHARLEN(c, eptr, len);
6098             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
6099             eptr+= len;
6100 	    COST_CHK(1);
6101             }
6102           break;
6103 
6104           case OP_WORDCHAR:
6105           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
6106             {
6107             int len = 1;
6108             if (eptr >= md->end_subject)
6109               {
6110               SCHECK_PARTIAL();
6111               break;
6112               }
6113             GETCHARLEN(c, eptr, len);
6114             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
6115             eptr+= len;
6116 	    COST_CHK(1);
6117             }
6118           break;
6119 
6120           default:
6121           RRETURN(PCRE_ERROR_INTERNAL);
6122           }
6123 
6124         if (possessive) continue;    /* No backtracking */
6125         for(;;) /* LOOP_COUNT: Ok */
6126           {
6127           if (eptr <= pp) goto TAIL_RECURSE;
6128           RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
6129           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6130           eptr--;
6131           BACKCHAR(eptr);
6132           if (ctype == OP_ANYNL && eptr > pp  && UCHAR21(eptr) == CHAR_NL &&
6133               UCHAR21(eptr - 1) == CHAR_CR) eptr--;
6134           }
6135         }
6136       else
6137 #endif  /* SUPPORT_UTF */
6138       /* Not UTF mode */
6139         {
6140         switch(ctype)
6141           {
6142           case OP_ANY:
6143           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
6144             {
6145             if (eptr >= md->end_subject)
6146               {
6147               SCHECK_PARTIAL();
6148               break;
6149               }
6150             if (IS_NEWLINE(eptr)) break;
6151             if (md->partial != 0 &&    /* Take care with CRLF partial */
6152                 eptr + 1 >= md->end_subject &&
6153                 NLBLOCK->nltype == NLTYPE_FIXED &&
6154                 NLBLOCK->nllen == 2 &&
6155                 *eptr == NLBLOCK->nl[0])
6156               {
6157               md->hitend = TRUE;
6158               if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
6159               }
6160             eptr++;
6161 	    COST_CHK(1);
6162             }
6163           break;
6164 
6165           case OP_ALLANY:
6166           case OP_ANYBYTE:
6167           c = max - min;
6168           if (c > (unsigned int)(md->end_subject - eptr))
6169             {
6170             eptr = md->end_subject;
6171             SCHECK_PARTIAL();
6172             }
6173           else eptr += c;
6174           break;
6175 
6176           case OP_ANYNL:
6177           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
6178             {
6179             if (eptr >= md->end_subject)
6180               {
6181               SCHECK_PARTIAL();
6182               break;
6183               }
6184             c = *eptr;
6185             if (c == CHAR_CR)
6186               {
6187               if (++eptr >= md->end_subject) break;
6188               if (*eptr == CHAR_LF) eptr++;
6189               }
6190             else
6191               {
6192               if (c != CHAR_LF && (md->bsr_anycrlf ||
6193                  (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
6194 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6195                  && c != 0x2028 && c != 0x2029
6196 #endif
6197                  ))) break;
6198               eptr++;
6199               }
6200 	    COST_CHK(1);
6201             }
6202           break;
6203 
6204           case OP_NOT_HSPACE:
6205           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
6206             {
6207             if (eptr >= md->end_subject)
6208               {
6209               SCHECK_PARTIAL();
6210               break;
6211               }
6212             switch(*eptr)
6213               {
6214               default: eptr++; break;
6215               HSPACE_BYTE_CASES:
6216 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6217               HSPACE_MULTIBYTE_CASES:
6218 #endif
6219               goto ENDLOOP00;
6220               }
6221 	    COST_CHK(1);
6222             }
6223           ENDLOOP00:
6224           break;
6225 
6226           case OP_HSPACE:
6227           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
6228             {
6229             if (eptr >= md->end_subject)
6230               {
6231               SCHECK_PARTIAL();
6232               break;
6233               }
6234             switch(*eptr)
6235               {
6236               default: goto ENDLOOP01;
6237               HSPACE_BYTE_CASES:
6238 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6239               HSPACE_MULTIBYTE_CASES:
6240 #endif
6241               eptr++; break;
6242               }
6243 	    COST_CHK(1);
6244             }
6245           ENDLOOP01:
6246           break;
6247 
6248           case OP_NOT_VSPACE:
6249           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
6250             {
6251             if (eptr >= md->end_subject)
6252               {
6253               SCHECK_PARTIAL();
6254               break;
6255               }
6256             switch(*eptr)
6257               {
6258               default: eptr++; break;
6259               VSPACE_BYTE_CASES:
6260 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6261               VSPACE_MULTIBYTE_CASES:
6262 #endif
6263               goto ENDLOOP02;
6264               }
6265 	    COST_CHK(1);
6266             }
6267           ENDLOOP02:
6268           break;
6269 
6270           case OP_VSPACE:
6271           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
6272             {
6273             if (eptr >= md->end_subject)
6274               {
6275               SCHECK_PARTIAL();
6276               break;
6277               }
6278             switch(*eptr)
6279               {
6280               default: goto ENDLOOP03;
6281               VSPACE_BYTE_CASES:
6282 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6283               VSPACE_MULTIBYTE_CASES:
6284 #endif
6285               eptr++; break;
6286               }
6287 	    COST_CHK(1);
6288             }
6289           ENDLOOP03:
6290           break;
6291 
6292           case OP_NOT_DIGIT:
6293           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
6294             {
6295             if (eptr >= md->end_subject)
6296               {
6297               SCHECK_PARTIAL();
6298               break;
6299               }
6300             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
6301             eptr++;
6302 	    COST_CHK(1);
6303             }
6304           break;
6305 
6306           case OP_DIGIT:
6307           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
6308             {
6309             if (eptr >= md->end_subject)
6310               {
6311               SCHECK_PARTIAL();
6312               break;
6313               }
6314             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
6315             eptr++;
6316 	    COST_CHK(1);
6317             }
6318           break;
6319 
6320           case OP_NOT_WHITESPACE:
6321           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
6322             {
6323             if (eptr >= md->end_subject)
6324               {
6325               SCHECK_PARTIAL();
6326               break;
6327               }
6328             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
6329             eptr++;
6330 	    COST_CHK(1);
6331             }
6332           break;
6333 
6334           case OP_WHITESPACE:
6335           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
6336             {
6337             if (eptr >= md->end_subject)
6338               {
6339               SCHECK_PARTIAL();
6340               break;
6341               }
6342             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
6343             eptr++;
6344 	    COST_CHK(1);
6345             }
6346           break;
6347 
6348           case OP_NOT_WORDCHAR:
6349           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
6350             {
6351             if (eptr >= md->end_subject)
6352               {
6353               SCHECK_PARTIAL();
6354               break;
6355               }
6356             if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
6357             eptr++;
6358 	    COST_CHK(1);
6359             }
6360           break;
6361 
6362           case OP_WORDCHAR:
6363           for (i = min; i < max; i++) /* LOOP_COUNT: CHK */
6364             {
6365             if (eptr >= md->end_subject)
6366               {
6367               SCHECK_PARTIAL();
6368               break;
6369               }
6370             if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
6371             eptr++;
6372 	    COST_CHK(1);
6373             }
6374           break;
6375 
6376           default:
6377           RRETURN(PCRE_ERROR_INTERNAL);
6378           }
6379 
6380         if (possessive) continue;    /* No backtracking */
6381         for (;;) /* LOOP_COUNT: Ok */
6382           {
6383           if (eptr == pp) goto TAIL_RECURSE;
6384           RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
6385           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6386           eptr--;
6387           if (ctype == OP_ANYNL && eptr > pp  && *eptr == CHAR_LF &&
6388               eptr[-1] == CHAR_CR) eptr--;
6389           }
6390         }
6391 
6392       /* Control never gets here */
6393       }
6394 
6395     /* There's been some horrible disaster. Arrival here can only mean there is
6396     something seriously wrong in the code above or the OP_xxx definitions. */
6397 
6398     default:
6399     DPRINTF(("Unknown opcode %d\n", *ecode));
6400     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
6401     }
6402 
6403   /* Do not stick any code in here without much thought; it is assumed
6404   that "continue" in the code above comes out to here to repeat the main
6405   loop. */
6406 
6407   }             /* End of main loop */
6408 /* Control never reaches here */
6409 
6410 
6411 /* When compiling to use the heap rather than the stack for recursive calls to
6412 match(), the RRETURN() macro jumps here. The number that is saved in
6413 frame->Xwhere indicates which label we actually want to return to. */
6414 
6415 #ifdef NO_RECURSE
6416 #define LBL(val) case val: goto L_RM##val;
6417 HEAP_RETURN:
6418 switch (frame->Xwhere)
6419   {
6420   LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6421   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
6422   LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
6423   LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
6424   LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
6425   LBL(65) LBL(66)
6426 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6427   LBL(20) LBL(21)
6428 #endif
6429 #ifdef SUPPORT_UTF
6430   LBL(16) LBL(18)
6431   LBL(22) LBL(23) LBL(28) LBL(30)
6432   LBL(32) LBL(34) LBL(42) LBL(46)
6433 #ifdef SUPPORT_UCP
6434   LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6435   LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
6436 #endif  /* SUPPORT_UCP */
6437 #endif  /* SUPPORT_UTF */
6438   default:
6439   DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
6440   return PCRE_ERROR_INTERNAL;
6441   }
6442 #undef LBL
6443 #ifdef ERLANG_INTEGRATION
6444 LOOP_COUNT_RETURN:
6445   /* Restore the saved register variables in the upper dummy frame, description below */
6446  {
6447    heapframe *newframe = frame;
6448    frame = newframe->Xprevframe;
6449    rrc = newframe->Xop;
6450    i = newframe->Xfi;
6451    c = (pcre_uint32) newframe->Xfc;
6452    utf = newframe->Xcur_is_word;
6453    minimize = newframe->Xcondition;
6454    possessive = newframe->Xprev_is_word;
6455    caseless = (BOOL) newframe->Xcodelink;
6456    condcode = newframe->Xctype;
6457    /* Note, the frame is not freed until the whole match is done,
6458       the function release_match_heapframes takes care of that */
6459    EDEBUGF(("LOOP_COUNT_RETURN: %d",frame->Xwhere));
6460    switch (frame->Xwhere)
6461      {
6462 #include "pcre_exec_loop_break_cases.inc"
6463      default:
6464        DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
6465        return PCRE_ERROR_INTERNAL;
6466      }
6467  }
6468 
6469 LOOP_COUNT_BREAK:
6470   /* Save the local register variables in a dummy frame, to keep the
6471    * every frame of equal size rule */
6472   /*
6473    * Store Local                    in
6474    * ------------------------------ --------------
6475    * rrc                            Xop
6476    * i                              Xfi
6477    * c                              Xfc (cast)
6478    * utf                            Xcur_is_word
6479    * minimize                       Xcondition
6480    * possessive                     Xprev_is_word
6481    * caseless                       Xcodelink (cast)
6482    * condcode                       Xctype
6483    */
6484   {
6485     heapframe *newframe = frame->Xnextframe;
6486     if (newframe == NULL)
6487     {
6488       newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));
6489       if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
6490       newframe->Xnextframe = NULL;
6491       frame->Xnextframe = newframe;
6492     }
6493     newframe->Xprevframe = frame;
6494     newframe->Xop = rrc;
6495     newframe->Xfi = i;
6496     newframe->Xfc = (unsigned int) c;
6497     newframe->Xcur_is_word = utf;
6498     newframe->Xcondition = minimize;
6499     newframe->Xprev_is_word = possessive;
6500     newframe->Xcodelink = (int) caseless;
6501     newframe->Xctype = condcode;
6502     md->state_save = newframe;
6503     md->loop_limit = 0;
6504     EDEBUGF(("Break loop!"));
6505     return PCRE_ERROR_LOOP_LIMIT;
6506   }
6507 #endif
6508 #endif  /* NO_RECURSE */
6509 }
6510 
6511 
6512 /***************************************************************************
6513 ****************************************************************************
6514                    RECURSION IN THE match() FUNCTION
6515 
6516 Undefine all the macros that were defined above to handle this. */
6517 
6518 #ifdef NO_RECURSE
6519 #undef eptr
6520 #undef ecode
6521 #undef mstart
6522 #undef offset_top
6523 #undef eptrb
6524 #undef flags
6525 
6526 #undef callpat
6527 #undef charptr
6528 #undef data
6529 #undef next
6530 #undef pp
6531 #undef prev
6532 #undef saved_eptr
6533 
6534 #undef new_recursive
6535 
6536 #undef cur_is_word
6537 #undef condition
6538 #undef prev_is_word
6539 
6540 #undef ctype
6541 #undef length
6542 #undef max
6543 #undef min
6544 #undef number
6545 #undef offset
6546 #undef op
6547 #undef save_capture_last
6548 #undef save_offset1
6549 #undef save_offset2
6550 #undef save_offset3
6551 #undef stacksave
6552 
6553 #undef newptrb
6554 
6555 #endif
6556 
6557 /* These two are defined as macros in both cases */
6558 
6559 #undef fc
6560 #undef fi
6561 
6562 /***************************************************************************
6563 ***************************************************************************/
6564 
6565 
6566 #ifdef NO_RECURSE
6567 /*************************************************
6568 *          Release allocated heap frames         *
6569 *************************************************/
6570 
6571 /* This function releases all the allocated frames. The base frame is on the
6572 machine stack, and so must not be freed.
6573 
6574 Argument: the address of the base frame
6575 Returns:  nothing
6576 */
6577 
6578 static void
release_match_heapframes(heapframe * frame_base)6579 release_match_heapframes (heapframe *frame_base)
6580 {
6581 heapframe *nextframe = frame_base->Xnextframe;
6582 #ifdef ERLANG_INTEGRATION
6583 frame_base->Xnextframe = NULL; /* Protect against multiple free */
6584 #endif
6585 while (nextframe != NULL)
6586   {
6587   heapframe *oldframe = nextframe;
6588   nextframe = nextframe->Xnextframe;
6589   (PUBL(stack_free))(oldframe);
6590   }
6591 }
6592 #endif
6593 
6594 
6595 /*************************************************
6596 *         Execute a Regular Expression           *
6597 *************************************************/
6598 
6599 /* This function applies a compiled re to a subject string and picks out
6600 portions of the string if it matches. Two elements in the vector are set for
6601 each substring: the offsets to the start and end of the substring.
6602 
6603 Arguments:
6604   argument_re     points to the compiled expression
6605   extra_data      points to extra data or is NULL
6606   subject         points to the subject string
6607   length          length of subject string (may contain binary zeros)
6608   start_offset    where to start in the subject string
6609   options         option bits
6610   offsets         points to a vector of ints to be filled in with offsets
6611   offsetcount     the number of elements in the vector
6612 
6613 Returns:          > 0 => success; value is the number of elements filled in
6614                   = 0 => success, but offsets is not big enough
6615                    -1 => failed to match
6616                  < -1 => some kind of unexpected problem
6617 */
6618 #ifdef ERLANG_INTEGRATION
6619 typedef struct {
6620     int Xarg_offset_max;
6621     BOOL Xusing_temporary_offsets;
6622     BOOL Xanchored;
6623     BOOL Xstartline;
6624     BOOL Xfirstline;
6625     BOOL Xutf;
6626     BOOL Xhas_first_char;
6627     BOOL Xhas_req_char;
6628     pcre_uchar Xfirst_char;
6629     pcre_uchar Xfirst_char2;
6630     pcre_uchar Xreq_char;
6631     pcre_uchar Xreq_char2;
6632     match_data Xmatch_block;
6633     match_data *Xmd;
6634     const pcre_uint8 *Xtables;
6635     const pcre_uint8 *Xstart_bits;
6636     PCRE_PUCHAR Xstart_match;
6637     PCRE_PUCHAR Xend_subject;
6638     PCRE_PUCHAR Xstart_partial;
6639     PCRE_PUCHAR Xmatch_partial;
6640     PCRE_PUCHAR Xreq_char_ptr;
6641     const pcre_study_data *Xstudy;
6642     REAL_PCRE *Xre;
6643     heapframe Xframe_zero; /* Always NO_RECURSE */
6644 
6645     /* for yield in valid_utf() */
6646 
6647     struct PRIV(valid_utf_ystate) valid_utf_ystate;
6648 
6649     /* Original function parameters that need be saved */
6650     int Xstart_offset;
6651     int Xoffsetcount;
6652     int *Xoffsets;
6653     int Xlength;
6654     PCRE_SPTR Xsubject;
6655 } PcreExecContext;
6656 #endif
6657 
6658 
6659 #if defined COMPILE_PCRE8
6660 #if defined(ERLANG_INTEGRATION)
6661 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
erts_pcre_exec(const pcre * argument_re,const erts_pcre_extra * extra_data,PCRE_SPTR subject,int length,int start_offset,int options,int * offsets,int offsetcount)6662 erts_pcre_exec(const pcre *argument_re, const erts_pcre_extra *extra_data,
6663   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
6664   int offsetcount)
6665 #else
6666 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6667 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
6668   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
6669   int offsetcount)
6670 #endif
6671 #elif defined COMPILE_PCRE16
6672 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6673 pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
6674   PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
6675   int offsetcount)
6676 #elif defined COMPILE_PCRE32
6677 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6678 pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
6679   PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
6680   int offsetcount)
6681 #endif
6682 {
6683 #ifndef ERLANG_INTEGRATION
6684 #define ERTS_UPDATE_CONSUMED(X, MD)
6685 int rc, ocount, arg_offset_max;
6686 int newline;
6687 BOOL using_temporary_offsets = FALSE;
6688 BOOL anchored;
6689 BOOL startline;
6690 BOOL firstline;
6691 BOOL utf;
6692 BOOL has_first_char = FALSE;
6693 BOOL has_req_char = FALSE;
6694 pcre_uchar first_char = 0;
6695 pcre_uchar first_char2 = 0;
6696 pcre_uchar req_char = 0;
6697 pcre_uchar req_char2 = 0;
6698 match_data match_block;
6699 match_data *md = &match_block;
6700 const pcre_uint8 *tables;
6701 const pcre_uint8 *start_bits = NULL;
6702 PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6703 PCRE_PUCHAR end_subject;
6704 PCRE_PUCHAR start_partial = NULL;
6705 PCRE_PUCHAR match_partial = NULL;
6706 PCRE_PUCHAR req_char_ptr = start_match - 1;
6707 
6708 const pcre_study_data *study;
6709 const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
6710 #ifdef NO_RECURSE
6711 heapframe frame_zero;
6712 #endif
6713 #else
6714 
6715 /* "local" variables in faked stackframe instead */
6716 #define arg_offset_max (exec_context->Xarg_offset_max)
6717 #define using_temporary_offsets (exec_context->Xusing_temporary_offsets)
6718 #define anchored (exec_context->Xanchored)
6719 #define startline (exec_context->Xstartline)
6720 #define firstline (exec_context->Xfirstline)
6721 #define has_first_char (exec_context->Xhas_first_char)
6722 #define has_req_char (exec_context->Xhas_req_char)
6723 #define first_char2 (exec_context->Xfirst_char2)
6724 #define req_char2 (exec_context->Xreq_char2)
6725 #define match_block (exec_context->Xmatch_block)
6726 #define md (exec_context->Xmd)
6727 #define start_match (exec_context->Xstart_match)
6728 #define start_partial (exec_context->Xstart_partial)
6729 #define match_partial (exec_context->Xmatch_partial)
6730 #define study (exec_context->Xstudy)
6731 #define re (exec_context->Xre)
6732 #define frame_zero (exec_context->Xframe_zero)
6733 
6734 #define SWAPIN() do {				\
6735   utf = exec_context->Xutf;			\
6736   first_char = exec_context->Xfirst_char;	\
6737   tables = exec_context->Xtables;		\
6738   start_bits = exec_context->Xstart_bits;	\
6739   end_subject = exec_context->Xend_subject;	\
6740   req_char_ptr = exec_context->Xreq_char_ptr;	\
6741   req_char = exec_context->Xreq_char;           \
6742   /* Parameters */                              \
6743   start_offset = exec_context->Xstart_offset;   \
6744   offsetcount = exec_context->Xoffsetcount;     \
6745   offsets = exec_context->Xoffsets;             \
6746   length = exec_context->Xlength;               \
6747   subject = exec_context->Xsubject;             \
6748 } while (0)
6749 
6750 #define SWAPOUT() do {				\
6751   exec_context->Xutf = utf;		        \
6752   exec_context->Xfirst_char = first_char;	\
6753   exec_context->Xtables = tables;		\
6754   exec_context->Xstart_bits = start_bits;	\
6755   exec_context->Xend_subject = end_subject;	\
6756   exec_context->Xreq_char_ptr = req_char_ptr;	\
6757   exec_context->Xreq_char = req_char;           \
6758   /* Parameters */                              \
6759   exec_context->Xstart_offset = start_offset;   \
6760   exec_context->Xoffsetcount = offsetcount;     \
6761   exec_context->Xoffsets = offsets;             \
6762   exec_context->Xlength = length;               \
6763   exec_context->Xsubject = subject;             \
6764 } while (0)
6765 
6766 #define ERTS_UPDATE_CONSUMED(X, MD)                                 \
6767 do {                                                                \
6768     if (((X)->flags & PCRE_EXTRA_LOOP_LIMIT) != 0) {                \
6769         unsigned long consumed__;                                   \
6770         if (!(X)->restart_data) {                                   \
6771             consumed__ = 0;                                         \
6772         }                                                           \
6773         else {                                                      \
6774             PcreExecContext *ctx__ = (PcreExecContext *)            \
6775                 (*(X)->restart_data);                               \
6776             consumed__ = ctx__->valid_utf_ystate.cnt;               \
6777             ctx__->valid_utf_ystate.cnt = 0;                        \
6778         }                                                           \
6779         if ((MD)) {                                                 \
6780             match_data *md__ = (MD);                                \
6781             consumed__ += (X)->loop_limit - md__->loop_limit;       \
6782         }                                                           \
6783         *((X)->loop_counter_return) = consumed__;                   \
6784     }                                                               \
6785 } while (0)
6786 PcreExecContext *exec_context;
6787 PcreExecContext internal_context;
6788 
6789 /* Locals that need never be saved */
6790 int rc, ocount;
6791 int newline;
6792 
6793 /* Variables that we swap in and out */
6794 BOOL utf;
6795 pcre_uchar first_char;
6796 const pcre_uint8 *tables = NULL;
6797 const pcre_uint8 *start_bits;
6798 PCRE_PUCHAR end_subject = NULL;
6799 PCRE_PUCHAR req_char_ptr;
6800 pcre_uchar req_char;
6801 
6802 /* End special swapped variables */
6803 
6804  if (extra_data != NULL &&
6805      (extra_data->flags & PCRE_EXTRA_LOOP_LIMIT) &&
6806      *(extra_data->restart_data) != NULL) {
6807      /* we are restarting, every initialization is skipped and we jump directly into the loop */
6808    exec_context = (PcreExecContext *) *(extra_data->restart_data);
6809    SWAPIN();
6810    if (exec_context->valid_utf_ystate.yielded)
6811        goto restart_valid_utf;
6812    goto RESTART_INTERRUPTED;
6813  } else {
6814    if (extra_data != NULL &&
6815        (extra_data->flags & PCRE_EXTRA_LOOP_LIMIT)) {
6816      exec_context = (PcreExecContext *) (erts_pcre_malloc)(sizeof(PcreExecContext));
6817      *(extra_data->restart_data) = (void *) exec_context;
6818      exec_context->valid_utf_ystate.yielded = 0;
6819      /* need freeing by special routine from client */
6820    } else {
6821 #if defined(ERLANG_INTEGRATION)
6822      fprintf(stderr, "Unexpected execution path\n");
6823      abort();
6824 #endif
6825      exec_context = &internal_context;
6826    }
6827 
6828    /* OK, no restart here, initialize variables instead */
6829    using_temporary_offsets = FALSE;
6830    has_first_char = FALSE;
6831    has_req_char = FALSE;
6832    first_char = 0;
6833    first_char2 = 0;
6834    req_char = 0;
6835    req_char2 = 0;
6836    md = &match_block;
6837    start_bits = NULL;
6838    start_match = (PCRE_PUCHAR)subject + start_offset;
6839    start_partial = NULL;
6840    match_partial = NULL;
6841    req_char_ptr = start_match - 1;
6842    re = (REAL_PCRE *)argument_re;
6843 
6844    md->state_save = NULL;
6845 }
6846 
6847 #endif /* ERLANG_INTEGRATION */
6848 
6849 
6850 #ifdef NO_RECURSE
6851 frame_zero.Xprevframe = NULL;            /* Marks the top level */
6852 frame_zero.Xnextframe = NULL;            /* None are allocated yet */
6853 md->match_frames_base = &frame_zero;
6854 #endif
6855 
6856 /* Check for the special magic call that measures the size of the stack used
6857 per recursive call of match(). Without the funny casting for sizeof, a Windows
6858 compiler gave this error: "unary minus operator applied to unsigned type,
6859 result still unsigned". Hopefully the cast fixes that. */
6860 
6861 if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
6862     start_offset == -999)
6863 #ifdef NO_RECURSE
6864   return -((int)sizeof(heapframe));
6865 #else
6866   return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
6867 #endif
6868 
6869 /* Plausibility checks */
6870 
6871 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
6872 if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6873   return PCRE_ERROR_NULL;
6874 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6875 if (length < 0) return PCRE_ERROR_BADLENGTH;
6876 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6877 
6878 /* Check that the first field in the block is the magic number. If it is not,
6879 return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6880 REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6881 means that the pattern is likely compiled with different endianness. */
6882 
6883 if (re->magic_number != MAGIC_NUMBER)
6884   return re->magic_number == REVERSED_MAGIC_NUMBER?
6885     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6886 if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
6887 
6888 /* These two settings are used in the code for checking a UTF-8 string that
6889 follows immediately afterwards. Other values in the md block are used only
6890 during "normal" pcre_exec() processing, not when the JIT support is in use,
6891 so they are set up later. */
6892 
6893 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
6894 utf = md->utf = (re->options & PCRE_UTF8) != 0;
6895 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
6896               ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
6897 
6898 /* Check a UTF-8 string if required. Pass back the character offset and error
6899 code for an invalid string if a results vector is available. */
6900 
6901 #ifdef SUPPORT_UTF
6902 if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
6903   {
6904   int erroroffset;
6905   int errorcode;
6906 
6907 #if !defined(ERLANG_INTEGRATION)
6908   errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length);
6909 #else
6910   struct PRIV(valid_utf_ystate) *ystate;
6911 
6912   if (!extra_data || !extra_data->restart_data) {
6913       ystate = NULL;
6914   }
6915   else if (!(extra_data->flags & PCRE_EXTRA_LOOP_LIMIT)) {
6916       exec_context->valid_utf_ystate.cnt = 10;
6917       ystate = NULL;
6918   }
6919   else {
6920       exec_context->valid_utf_ystate.yielded = 0;
6921   restart_valid_utf:
6922       ystate = &exec_context->valid_utf_ystate;
6923       ystate->cnt = (int) extra_data->loop_limit;
6924   }
6925   errorcode = PRIV(yielding_valid_utf)((PCRE_PUCHAR)subject, length,
6926                                        &erroroffset, ystate);
6927 #endif
6928   if (errorcode != 0)
6929     {
6930 #if defined(ERLANG_INTEGRATION)
6931     if (ystate && ystate->yielded) {
6932         ERTS_UPDATE_CONSUMED(extra_data, NULL);
6933         SWAPOUT();
6934         return PCRE_ERROR_LOOP_LIMIT;
6935     }
6936 #endif
6937     if (offsetcount >= 2)
6938       {
6939       offsets[0] = erroroffset;
6940       offsets[1] = errorcode;
6941       }
6942 #if defined COMPILE_PCRE8
6943     return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6944       PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6945 #elif defined COMPILE_PCRE16
6946     return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
6947       PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
6948 #elif defined COMPILE_PCRE32
6949     return PCRE_ERROR_BADUTF32;
6950 #endif
6951     }
6952 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6953   /* Check that a start_offset points to the start of a UTF character. */
6954   if (start_offset > 0 && start_offset < length &&
6955       NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
6956     return PCRE_ERROR_BADUTF8_OFFSET;
6957 #endif
6958   }
6959 #if defined(ERLANG_INTEGRATION)
6960 else {
6961     exec_context->valid_utf_ystate.cnt = 0;
6962 }
6963 #endif
6964 #endif
6965 
6966 /* If the pattern was successfully studied with JIT support, run the JIT
6967 executable instead of the rest of this function. Most options must be set at
6968 compile time for the JIT code to be usable. Fallback to the normal code path if
6969 an unsupported flag is set. */
6970 
6971 #ifdef SUPPORT_JIT
6972 if (extra_data != NULL
6973     && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
6974                              PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6975     && extra_data->executable_jit != NULL
6976     && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0)
6977   {
6978   rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
6979        start_offset, options, offsets, offsetcount);
6980 
6981   /* PCRE_ERROR_NULL means that the selected normal or partial matching
6982   mode is not compiled. In this case we simply fallback to interpreter. */
6983 
6984   if (rc != PCRE_ERROR_JIT_BADOPTION) return rc;
6985   }
6986 #endif
6987 
6988 /* Carry on with non-JIT matching. This information is for finding all the
6989 numbers associated with a given name, for condition testing. */
6990 
6991 md->name_table = (pcre_uchar *)re + re->name_table_offset;
6992 md->name_count = re->name_count;
6993 md->name_entry_size = re->name_entry_size;
6994 
6995 /* Fish out the optional data from the extra_data structure, first setting
6996 the default values. */
6997 
6998 study = NULL;
6999 md->match_limit = MATCH_LIMIT;
7000 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
7001 md->callout_data = NULL;
7002 
7003 /* The table pointer is always in native byte order. */
7004 
7005 tables = re->tables;
7006 
7007 /* The two limit values override the defaults, whatever their value. */
7008 
7009 if (extra_data != NULL)
7010   {
7011   unsigned long int flags = extra_data->flags;
7012   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
7013     study = (const pcre_study_data *)extra_data->study_data;
7014   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
7015     md->match_limit = extra_data->match_limit;
7016   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
7017     md->match_limit_recursion = extra_data->match_limit_recursion;
7018   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
7019     md->callout_data = extra_data->callout_data;
7020   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
7021 #ifdef ERLANG_INTEGRATION
7022   if ((flags & PCRE_EXTRA_LOOP_LIMIT) != 0)
7023     {
7024         md->loop_limit = extra_data->loop_limit;
7025         if (extra_data->restart_data)
7026           md->loop_limit -= extra_data->loop_limit - exec_context->valid_utf_ystate.cnt;
7027         if (md->loop_limit < 10)
7028             md->loop_limit = 10; /* At least do something if we've come this far... */
7029     }
7030 #endif
7031   }
7032 
7033 /* Limits in the regex override only if they are smaller. */
7034 
7035 if ((re->flags & PCRE_MLSET) != 0 && re->limit_match < md->match_limit)
7036   md->match_limit = re->limit_match;
7037 
7038 if ((re->flags & PCRE_RLSET) != 0 &&
7039     re->limit_recursion < md->match_limit_recursion)
7040   md->match_limit_recursion = re->limit_recursion;
7041 
7042 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
7043 is a feature that makes it possible to save compiled regex and re-use them
7044 in other programs later. */
7045 
7046 if (tables == NULL) tables = PRIV(default_tables);
7047 
7048 /* Set up other data */
7049 
7050 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
7051 startline = (re->flags & PCRE_STARTLINE) != 0;
7052 firstline = (re->options & PCRE_FIRSTLINE) != 0;
7053 
7054 /* The code starts after the real_pcre block and the capture name table. */
7055 
7056 md->start_code = (const pcre_uchar *)re + re->name_table_offset +
7057   re->name_count * re->name_entry_size;
7058 
7059 md->start_subject = (PCRE_PUCHAR)subject;
7060 md->start_offset = start_offset;
7061 md->end_subject = md->start_subject + length;
7062 end_subject = md->end_subject;
7063 
7064 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
7065 md->use_ucp = (re->options & PCRE_UCP) != 0;
7066 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
7067 md->ignore_skip_arg = 0;
7068 
7069 /* Some options are unpacked into BOOL variables in the hope that testing
7070 them will be faster than individual option bits. */
7071 
7072 md->notbol = (options & PCRE_NOTBOL) != 0;
7073 md->noteol = (options & PCRE_NOTEOL) != 0;
7074 md->notempty = (options & PCRE_NOTEMPTY) != 0;
7075 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
7076 
7077 md->hitend = FALSE;
7078 md->mark = md->nomatch_mark = NULL;     /* In case never set */
7079 
7080 md->recursive = NULL;                   /* No recursion at top level */
7081 md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
7082 
7083 md->lcc = tables + lcc_offset;
7084 md->fcc = tables + fcc_offset;
7085 md->ctypes = tables + ctypes_offset;
7086 
7087 /* Handle different \R options. */
7088 
7089 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
7090   {
7091   case 0:
7092   if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
7093     md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
7094   else
7095 #ifdef BSR_ANYCRLF
7096   md->bsr_anycrlf = TRUE;
7097 #else
7098   md->bsr_anycrlf = FALSE;
7099 #endif
7100   break;
7101 
7102   case PCRE_BSR_ANYCRLF:
7103   md->bsr_anycrlf = TRUE;
7104   break;
7105 
7106   case PCRE_BSR_UNICODE:
7107   md->bsr_anycrlf = FALSE;
7108   break;
7109 
7110   default: return PCRE_ERROR_BADNEWLINE;
7111   }
7112 
7113 /* Handle different types of newline. The three bits give eight cases. If
7114 nothing is set at run time, whatever was used at compile time applies. */
7115 
7116 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
7117         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
7118   {
7119   case 0: newline = NEWLINE; break;   /* Compile-time default */
7120   case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
7121   case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
7122   case PCRE_NEWLINE_CR+
7123        PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
7124   case PCRE_NEWLINE_ANY: newline = -1; break;
7125   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
7126   default: return PCRE_ERROR_BADNEWLINE;
7127   }
7128 
7129 if (newline == -2)
7130   {
7131   md->nltype = NLTYPE_ANYCRLF;
7132   }
7133 else if (newline < 0)
7134   {
7135   md->nltype = NLTYPE_ANY;
7136   }
7137 else
7138   {
7139   md->nltype = NLTYPE_FIXED;
7140   if (newline > 255)
7141     {
7142     md->nllen = 2;
7143     md->nl[0] = (newline >> 8) & 255;
7144     md->nl[1] = newline & 255;
7145     }
7146   else
7147     {
7148     md->nllen = 1;
7149     md->nl[0] = newline;
7150     }
7151   }
7152 
7153 /* Partial matching was originally supported only for a restricted set of
7154 regexes; from release 8.00 there are no restrictions, but the bits are still
7155 defined (though never set). So there's no harm in leaving this code. */
7156 
7157 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
7158   return PCRE_ERROR_BADPARTIAL;
7159 
7160 /* If the expression has got more back references than the offsets supplied can
7161 hold, we get a temporary chunk of working store to use during the matching.
7162 Otherwise, we can use the vector supplied, rounding down its size to a multiple
7163 of 3. */
7164 
7165 ocount = offsetcount - (offsetcount % 3);
7166 arg_offset_max = (2*ocount)/3;
7167 
7168 if (re->top_backref > 0 && re->top_backref >= ocount/3)
7169   {
7170   ocount = re->top_backref * 3 + 3;
7171   md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
7172   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
7173   using_temporary_offsets = TRUE;
7174   DPRINTF(("Got memory to hold back references\n"));
7175   }
7176 else md->offset_vector = offsets;
7177 md->offset_end = ocount;
7178 md->offset_max = (2*ocount)/3;
7179 md->capture_last = 0;
7180 
7181 /* Reset the working variable associated with each extraction. These should
7182 never be used unless previously set, but they get saved and restored, and so we
7183 initialize them to avoid reading uninitialized locations. Also, unset the
7184 offsets for the matched string. This is really just for tidiness with callouts,
7185 in case they inspect these fields. */
7186 
7187 if (md->offset_vector != NULL)
7188   {
7189   register int *iptr = md->offset_vector + ocount;
7190   register int *iend = iptr - re->top_bracket;
7191   if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
7192   while (--iptr >= iend) *iptr = -1;
7193   if (offsetcount > 0) md->offset_vector[0] = -1;
7194   if (offsetcount > 1) md->offset_vector[1] = -1;
7195   }
7196 
7197 /* Set up the first character to match, if available. The first_char value is
7198 never set for an anchored regular expression, but the anchoring may be forced
7199 at run time, so we have to test for anchoring. The first char may be unset for
7200 an unanchored pattern, of course. If there's no first char and the pattern was
7201 studied, there may be a bitmap of possible first characters. */
7202 
7203 if (!anchored)
7204   {
7205   if ((re->flags & PCRE_FIRSTSET) != 0)
7206     {
7207     has_first_char = TRUE;
7208     first_char = first_char2 = (pcre_uchar)(re->first_char);
7209     if ((re->flags & PCRE_FCH_CASELESS) != 0)
7210       {
7211       first_char2 = TABLE_GET(first_char, md->fcc, first_char);
7212 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
7213       if (utf && first_char > 127)
7214         first_char2 = UCD_OTHERCASE(first_char);
7215 #endif
7216       }
7217     }
7218   else
7219     if (!startline && study != NULL &&
7220       (study->flags & PCRE_STUDY_MAPPED) != 0)
7221         start_bits = study->start_bits;
7222   }
7223 
7224 /* For anchored or unanchored matches, there may be a "last known required
7225 character" set. */
7226 
7227 if ((re->flags & PCRE_REQCHSET) != 0)
7228   {
7229   has_req_char = TRUE;
7230   req_char = req_char2 = (pcre_uchar)(re->req_char);
7231   if ((re->flags & PCRE_RCH_CASELESS) != 0)
7232     {
7233     req_char2 = TABLE_GET(req_char, md->fcc, req_char);
7234 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
7235     if (utf && req_char > 127)
7236       req_char2 = UCD_OTHERCASE(req_char);
7237 #endif
7238     }
7239   }
7240 
7241 
7242 /* ==========================================================================*/
7243 
7244 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
7245 the loop runs just once. */
7246 
7247 for(;;)
7248   {
7249   PCRE_PUCHAR save_end_subject = end_subject;
7250   PCRE_PUCHAR new_start_match;
7251 
7252   /* If firstline is TRUE, the start of the match is constrained to the first
7253   line of a multiline string. That is, the match must be before or at the first
7254   newline. Implement this by temporarily adjusting end_subject so that we stop
7255   scanning at a newline. If the match fails at the newline, later code breaks
7256   this loop. */
7257 
7258   if (firstline)
7259     {
7260     PCRE_PUCHAR t = start_match;
7261 #ifdef SUPPORT_UTF
7262     if (utf)
7263       {
7264       while (t < md->end_subject && !IS_NEWLINE(t))
7265         {
7266         t++;
7267         ACROSSCHAR(t < end_subject, *t, t++);
7268         }
7269       }
7270     else
7271 #endif
7272     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
7273     end_subject = t;
7274     }
7275 
7276   /* There are some optimizations that avoid running the match if a known
7277   starting point is not found, or if a known later character is not present.
7278   However, there is an option that disables these, for testing and for ensuring
7279   that all callouts do actually occur. The option can be set in the regex by
7280   (*NO_START_OPT) or passed in match-time options. */
7281 
7282   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
7283     {
7284     /* Advance to a unique first char if there is one. */
7285 
7286     if (has_first_char)
7287       {
7288       pcre_uchar smc;
7289 
7290       if (first_char != first_char2)
7291         while (start_match < end_subject &&
7292           (smc = UCHAR21TEST(start_match)) != first_char && smc != first_char2)
7293           start_match++;
7294       else
7295         while (start_match < end_subject && UCHAR21TEST(start_match) != first_char)
7296           start_match++;
7297       }
7298 
7299     /* Or to just after a linebreak for a multiline match */
7300 
7301     else if (startline)
7302       {
7303       if (start_match > md->start_subject + start_offset)
7304         {
7305 #ifdef SUPPORT_UTF
7306         if (utf)
7307           {
7308           while (start_match < end_subject && !WAS_NEWLINE(start_match))
7309             {
7310             start_match++;
7311             ACROSSCHAR(start_match < end_subject, *start_match,
7312               start_match++);
7313             }
7314           }
7315         else
7316 #endif
7317         while (start_match < end_subject && !WAS_NEWLINE(start_match))
7318           start_match++;
7319 
7320         /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
7321         and we are now at a LF, advance the match position by one more character.
7322         */
7323 
7324         if (start_match[-1] == CHAR_CR &&
7325              (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
7326              start_match < end_subject &&
7327              UCHAR21TEST(start_match) == CHAR_NL)
7328           start_match++;
7329         }
7330       }
7331 
7332     /* Or to a non-unique first byte after study */
7333 
7334     else if (start_bits != NULL)
7335       {
7336       while (start_match < end_subject)
7337         {
7338         register pcre_uint32 c = UCHAR21TEST(start_match);
7339 #ifndef COMPILE_PCRE8
7340         if (c > 255) c = 255;
7341 #endif
7342         if ((start_bits[c/8] & (1 << (c&7))) != 0)
7343 	  {
7344               ERTS_UPDATE_CONSUMED(extra_data, md);
7345               break;
7346 	  }
7347         start_match++;
7348         }
7349       }
7350     }   /* Starting optimizations */
7351 
7352   /* Restore fudged end_subject */
7353 
7354   end_subject = save_end_subject;
7355 
7356   /* The following two optimizations are disabled for partial matching or if
7357   disabling is explicitly requested. */
7358 
7359   if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
7360     {
7361     /* If the pattern was studied, a minimum subject length may be set. This is
7362     a lower bound; no actual string of that length may actually match the
7363     pattern. Although the value is, strictly, in characters, we treat it as
7364     bytes to avoid spending too much time in this optimization. */
7365 
7366     if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
7367         (pcre_uint32)(end_subject - start_match) < study->minlength)
7368       {
7369       rc = MATCH_NOMATCH;
7370       ERTS_UPDATE_CONSUMED(extra_data, md);
7371       break;
7372       }
7373 
7374     /* If req_char is set, we know that that character must appear in the
7375     subject for the match to succeed. If the first character is set, req_char
7376     must be later in the subject; otherwise the test starts at the match point.
7377     This optimization can save a huge amount of backtracking in patterns with
7378     nested unlimited repeats that aren't going to match. Writing separate code
7379     for cased/caseless versions makes it go faster, as does using an
7380     autoincrement and backing off on a match.
7381 
7382     HOWEVER: when the subject string is very, very long, searching to its end
7383     can take a long time, and give bad performance on quite ordinary patterns.
7384     This showed up when somebody was matching something like /^\d+C/ on a
7385     32-megabyte string... so we don't do this when the string is sufficiently
7386     long. */
7387 
7388     if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
7389       {
7390       register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
7391 
7392       /* We don't need to repeat the search if we haven't yet reached the
7393       place we found it at last time. */
7394 
7395       if (p > req_char_ptr)
7396         {
7397         if (req_char != req_char2)
7398           {
7399           while (p < end_subject)
7400             {
7401             register pcre_uint32 pp = UCHAR21INCTEST(p);
7402             if (pp == req_char || pp == req_char2) { p--; break; }
7403             }
7404           }
7405         else
7406           {
7407           while (p < end_subject)
7408             {
7409             if (UCHAR21INCTEST(p) == req_char) { p--; break; }
7410             }
7411           }
7412 
7413         /* If we can't find the required character, break the matching loop,
7414         forcing a match failure. */
7415 
7416         if (p >= end_subject)
7417           {
7418           rc = MATCH_NOMATCH;
7419           ERTS_UPDATE_CONSUMED(extra_data, md);
7420           break;
7421           }
7422 
7423         /* If we have found the required character, save the point where we
7424         found it, so that we don't search again next time round the loop if
7425         the start hasn't passed this character yet. */
7426 
7427         req_char_ptr = p;
7428         }
7429       }
7430     }
7431 
7432 #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
7433   printf(">>>> Match against: ");
7434   pchars(start_match, end_subject - start_match, TRUE, md);
7435   printf("\n");
7436 #endif
7437 
7438   /* OK, we can now run the match. If "hitend" is set afterwards, remember the
7439   first starting point for which a partial match was found. */
7440 
7441   md->start_match_ptr = start_match;
7442   md->start_used_ptr = start_match;
7443   md->match_call_count = 0;
7444   md->match_function_type = 0;
7445   md->end_offset_top = 0;
7446   md->skip_arg_count = 0;
7447   EDEBUGF(("Calling match..."));
7448   rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
7449 #ifdef ERLANG_INTEGRATION
7450   ERTS_UPDATE_CONSUMED(extra_data, md);
7451   SWAPOUT();
7452   while(rc == PCRE_ERROR_LOOP_LIMIT) {
7453       EDEBUGF(("Loop limit break detected"));
7454       return PCRE_ERROR_LOOP_LIMIT;
7455   RESTART_INTERRUPTED:
7456       md->loop_limit = extra_data->loop_limit;
7457       rc = match(NULL,NULL,NULL,0,md,NULL,0);
7458       *extra_data->loop_counter_return =
7459 	  (extra_data->loop_limit - md->loop_limit);
7460   }
7461   md->state_save = NULL; /* So that next call to free_saved... does not crash */
7462 #endif
7463   if (md->hitend && start_partial == NULL)
7464     {
7465     start_partial = md->start_used_ptr;
7466     match_partial = start_match;
7467     }
7468 
7469   switch(rc)
7470     {
7471     /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
7472     the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
7473     entirely. The only way we can do that is to re-do the match at the same
7474     point, with a flag to force SKIP with an argument to be ignored. Just
7475     treating this case as NOMATCH does not work because it does not check other
7476     alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
7477 
7478     case MATCH_SKIP_ARG:
7479     new_start_match = start_match;
7480     md->ignore_skip_arg = md->skip_arg_count;
7481     break;
7482 
7483     /* SKIP passes back the next starting point explicitly, but if it is no
7484     greater than the match we have just done, treat it as NOMATCH. */
7485 
7486     case MATCH_SKIP:
7487     if (md->start_match_ptr > start_match)
7488       {
7489       new_start_match = md->start_match_ptr;
7490       break;
7491       }
7492     /* Fall through */
7493 
7494     /* NOMATCH and PRUNE advance by one character. THEN at this level acts
7495     exactly like PRUNE. Unset ignore SKIP-with-argument. */
7496 
7497     case MATCH_NOMATCH:
7498     case MATCH_PRUNE:
7499     case MATCH_THEN:
7500     md->ignore_skip_arg = 0;
7501     new_start_match = start_match + 1;
7502 #ifdef SUPPORT_UTF
7503     if (utf)
7504       ACROSSCHAR(new_start_match < end_subject, *new_start_match,
7505         new_start_match++);
7506 #endif
7507     break;
7508 
7509     /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
7510 
7511     case MATCH_COMMIT:
7512     rc = MATCH_NOMATCH;
7513     goto ENDLOOP;
7514 
7515     /* Any other return is either a match, or some kind of error. */
7516 
7517     default:
7518     goto ENDLOOP;
7519     }
7520 
7521   /* Control reaches here for the various types of "no match at this point"
7522   result. Reset the code to MATCH_NOMATCH for subsequent checking. */
7523 
7524   rc = MATCH_NOMATCH;
7525 
7526   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
7527   newline in the subject (though it may continue over the newline). Therefore,
7528   if we have just failed to match, starting at a newline, do not continue. */
7529 
7530   if (firstline && IS_NEWLINE(start_match)) break;
7531 
7532   /* Advance to new matching position */
7533 
7534   start_match = new_start_match;
7535 
7536   /* Break the loop if the pattern is anchored or if we have passed the end of
7537   the subject. */
7538 
7539   if (anchored || start_match > end_subject) break;
7540 
7541   /* If we have just passed a CR and we are now at a LF, and the pattern does
7542   not contain any explicit matches for \r or \n, and the newline option is CRLF
7543   or ANY or ANYCRLF, advance the match position by one more character. In
7544   normal matching start_match will aways be greater than the first position at
7545   this stage, but a failed *SKIP can cause a return at the same point, which is
7546   why the first test exists. */
7547 
7548   if (start_match > (PCRE_PUCHAR)subject + start_offset &&
7549       start_match[-1] == CHAR_CR &&
7550       start_match < end_subject &&
7551       *start_match == CHAR_NL &&
7552       (re->flags & PCRE_HASCRORLF) == 0 &&
7553         (md->nltype == NLTYPE_ANY ||
7554          md->nltype == NLTYPE_ANYCRLF ||
7555          md->nllen == 2))
7556     start_match++;
7557 
7558   md->mark = NULL;   /* Reset for start of next match attempt */
7559   }                  /* End of for(;;) "bumpalong" loop */
7560 
7561 /* ==========================================================================*/
7562 
7563 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
7564 conditions is true:
7565 
7566 (1) The pattern is anchored or the match was failed by (*COMMIT);
7567 
7568 (2) We are past the end of the subject;
7569 
7570 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
7571     this option requests that a match occur at or before the first newline in
7572     the subject.
7573 
7574 When we have a match and the offset vector is big enough to deal with any
7575 backreferences, captured substring offsets will already be set up. In the case
7576 where we had to get some local store to hold offsets for backreference
7577 processing, copy those that we can. In this case there need not be overflow if
7578 certain parts of the pattern were not used, even though there are more
7579 capturing parentheses than vector slots. */
7580 
7581 ENDLOOP:
7582 
7583 if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
7584   {
7585   if (using_temporary_offsets)
7586     {
7587     if (arg_offset_max >= 4)
7588       {
7589       memcpy(offsets + 2, md->offset_vector + 2,
7590         (arg_offset_max - 2) * sizeof(int));
7591       DPRINTF(("Copied offsets from temporary memory\n"));
7592       }
7593     if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT;
7594     DPRINTF(("Freeing temporary memory\n"));
7595     (PUBL(free))(md->offset_vector);
7596 #ifdef ERLANG_INTEGRATION
7597     md->offset_vector = NULL;
7598 #endif
7599     }
7600 
7601   /* Set the return code to the number of captured strings, or 0 if there were
7602   too many to fit into the vector. */
7603 
7604   rc = ((md->capture_last & OVFLBIT) != 0 &&
7605          md->end_offset_top >= arg_offset_max)?
7606     0 : md->end_offset_top/2;
7607 
7608   /* If there is space in the offset vector, set any unused pairs at the end of
7609   the pattern to -1 for backwards compatibility. It is documented that this
7610   happens. In earlier versions, the whole set of potential capturing offsets
7611   was set to -1 each time round the loop, but this is handled differently now.
7612   "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
7613   those at the end that need unsetting here. We can't just unset them all at
7614   the start of the whole thing because they may get set in one branch that is
7615   not the final matching branch. */
7616 
7617   if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
7618     {
7619     register int *iptr, *iend;
7620     int resetcount = 2 + re->top_bracket * 2;
7621     if (resetcount > offsetcount) resetcount = offsetcount;
7622     iptr = offsets + md->end_offset_top;
7623     iend = offsets + resetcount;
7624     while (iptr < iend) *iptr++ = -1;
7625     }
7626 
7627   /* If there is space, set up the whole thing as substring 0. The value of
7628   md->start_match_ptr might be modified if \K was encountered on the success
7629   matching path. */
7630 
7631   if (offsetcount < 2) rc = 0; else
7632     {
7633     offsets[0] = (int)(md->start_match_ptr - md->start_subject);
7634     offsets[1] = (int)(md->end_match_ptr - md->start_subject);
7635     }
7636 
7637   /* Return MARK data if requested */
7638 
7639   if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
7640     *(extra_data->mark) = (pcre_uchar *)md->mark;
7641   DPRINTF((">>>> returning %d\n", rc));
7642 #ifdef NO_RECURSE
7643   release_match_heapframes(&frame_zero);
7644 #endif
7645   return rc;
7646   }
7647 
7648 /* Control gets here if there has been an error, or if the overall match
7649 attempt has failed at all permitted starting positions. */
7650 
7651 if (using_temporary_offsets)
7652   {
7653   DPRINTF(("Freeing temporary memory\n"));
7654 #ifdef ERLANG_INTEGRATION
7655   if (extra_data == NULL ||
7656       !(extra_data->flags & PCRE_EXTRA_LOOP_LIMIT))
7657     {
7658       (PUBL(free))(md->offset_vector);
7659       md->offset_vector = NULL;
7660     }
7661 #else
7662   (PUBL(free))(md->offset_vector);
7663 #endif
7664   }
7665 
7666 /* For anything other than nomatch or partial match, just return the code. */
7667 
7668 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
7669   {
7670   DPRINTF((">>>> error: returning %d\n", rc));
7671 #ifdef NO_RECURSE
7672   release_match_heapframes(&frame_zero);
7673 #endif
7674   return rc;
7675   }
7676 
7677 /* Handle partial matches - disable any mark data */
7678 
7679 if (match_partial != NULL)
7680   {
7681   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
7682   md->mark = NULL;
7683   if (offsetcount > 1)
7684     {
7685     offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
7686     offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
7687     if (offsetcount > 2)
7688       offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
7689     }
7690   rc = PCRE_ERROR_PARTIAL;
7691   }
7692 
7693 /* This is the classic nomatch case */
7694 
7695 else
7696   {
7697   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
7698   rc = PCRE_ERROR_NOMATCH;
7699   }
7700 
7701 /* Return the MARK data if it has been requested. */
7702 
7703 if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
7704   *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
7705 #ifdef NO_RECURSE
7706   release_match_heapframes(&frame_zero);
7707 #endif
7708 return rc;
7709 }
7710 
7711 #if defined(ERLANG_INTEGRATION)
7712 #undef arg_offset_max
7713 #undef using_temporary_offsets
7714 #undef anchored
7715 #undef startline
7716 #undef firstline
7717 #undef has_first_char
7718 #undef has_req_char
7719 #undef first_char2
7720 #undef req_char
7721 #undef req_char2
7722 #undef match_block
7723 #undef md
7724 #undef start_match
7725 #undef start_partial
7726 #undef match_partial
7727 #undef study
7728 #undef re
7729 #undef frame_zero
7730 
erts_pcre_free_restart_data(void * restart_data)7731 void erts_pcre_free_restart_data(void *restart_data) {
7732   PcreExecContext *top = (PcreExecContext *) restart_data;
7733   /* We might be done, or we might not, so there might be some saved match_states here */
7734   if (top != NULL) {
7735     match_data *md = top->Xmd;
7736     if (top->Xusing_temporary_offsets && md->offset_vector != NULL) {
7737 	(PUBL(free))(md->offset_vector);
7738     }
7739     release_match_heapframes(&(top->Xframe_zero));
7740     (PUBL(free))(top);
7741   }
7742 }
7743 #endif
7744 /* End of pcre_exec.c */
7745