1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9            Copyright (c) 1997-2006 University of Cambridge
10 
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14 
15     * Redistributions of source code must retain the above copyright notice,
16       this list of conditions and the following disclaimer.
17 
18     * Redistributions in binary form must reproduce the above copyright
19       notice, this list of conditions and the following disclaimer in the
20       documentation and/or other materials provided with the distribution.
21 
22     * Neither the name of the University of Cambridge nor the names of its
23       contributors may be used to endorse or promote products derived from
24       this software without specific prior written permission.
25 
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39 
40 
41 /* This module contains pcre_exec(), the externally visible function that does
42 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43 possible. There are also some static supporting functions. */
44 
45 #define NLBLOCK md             /* Block containing newline information */
46 #define PSSTART start_subject  /* Field containing processed string start */
47 #define PSEND   end_subject    /* Field containing processed string end */
48 
49 #include "pcre_internal.h"
50 
51 /* The chain of eptrblocks for tail recursions uses memory in stack workspace,
52 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */
53 
54 #define EPTR_WORK_SIZE (1000)
55 
56 /* Flag bits for the match() function */
57 
58 #define match_condassert     0x01  /* Called to check a condition assertion */
59 #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
60 #define match_tail_recursed  0x04  /* Tail recursive call */
61 
62 /* Non-error returns from the match() function. Error returns are externally
63 defined PCRE_ERROR_xxx codes, which are all negative. */
64 
65 #define MATCH_MATCH        1
66 #define MATCH_NOMATCH      0
67 
68 /* Maximum number of ints of offset to save on the stack for recursive calls.
69 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
70 because the offset vector is always a multiple of 3 long. */
71 
72 #define REC_STACK_SAVE_MAX 30
73 
74 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
75 
76 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
77 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
78 
79 
80 
81 #ifdef DEBUG
82 /*************************************************
83 *        Debugging function to print chars       *
84 *************************************************/
85 
86 /* Print a sequence of chars in printable format, stopping at the end of the
87 subject if the requested.
88 
89 Arguments:
90   p           points to characters
91   length      number to print
92   is_subject  TRUE if printing from within md->start_subject
93   md          pointer to matching data block, if is_subject is TRUE
94 
95 Returns:     nothing
96 */
97 
98 static void
pchars(const uschar * p,int length,BOOL is_subject,match_data * md)99 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
100 {
101 unsigned int c;
102 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
103 while (length-- > 0)
104   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
105 }
106 #endif
107 
108 
109 
110 /*************************************************
111 *          Match a back-reference                *
112 *************************************************/
113 
114 /* If a back reference hasn't been set, the length that is passed is greater
115 than the number of characters left in the string, so the match fails.
116 
117 Arguments:
118   offset      index into the offset vector
119   eptr        points into the subject
120   length      length to be matched
121   md          points to match data block
122   ims         the ims flags
123 
124 Returns:      TRUE if matched
125 */
126 
127 static BOOL
match_ref(int offset,register USPTR eptr,int length,match_data * md,unsigned long int ims)128 match_ref(int offset, register USPTR eptr, int length, match_data *md,
129   unsigned long int ims)
130 {
131 USPTR p = md->start_subject + md->offset_vector[offset];
132 
133 #ifdef DEBUG
134 if (eptr >= md->end_subject)
135   printf("matching subject <null>");
136 else
137   {
138   printf("matching subject ");
139   pchars(eptr, length, TRUE, md);
140   }
141 printf(" against backref ");
142 pchars(p, length, FALSE, md);
143 printf("\n");
144 #endif
145 
146 /* Always fail if not enough characters left */
147 
148 if (length > md->end_subject - eptr) return FALSE;
149 
150 /* Separate the caselesss case for speed */
151 
152 if ((ims & PCRE_CASELESS) != 0)
153   {
154   while (length-- > 0)
155     if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
156   }
157 else
158   { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
159 
160 return TRUE;
161 }
162 
163 
164 
165 /***************************************************************************
166 ****************************************************************************
167                    RECURSION IN THE match() FUNCTION
168 
169 The match() function is highly recursive, though not every recursive call
170 increases the recursive depth. Nevertheless, some regular expressions can cause
171 it to recurse to a great depth. I was writing for Unix, so I just let it call
172 itself recursively. This uses the stack for saving everything that has to be
173 saved for a recursive call. On Unix, the stack can be large, and this works
174 fine.
175 
176 It turns out that on some non-Unix-like systems there are problems with
177 programs that use a lot of stack. (This despite the fact that every last chip
178 has oodles of memory these days, and techniques for extending the stack have
179 been known for decades.) So....
180 
181 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
182 calls by keeping local variables that need to be preserved in blocks of memory
183 obtained from malloc() instead instead of on the stack. Macros are used to
184 achieve this so that the actual code doesn't look very different to what it
185 always used to.
186 ****************************************************************************
187 ***************************************************************************/
188 
189 
190 /* These versions of the macros use the stack, as normal. There are debugging
191 versions and production versions. */
192 
193 #ifndef NO_RECURSE
194 #define REGISTER register
195 #ifdef DEBUG
196 #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
197   { \
198   printf("match() called in line %d\n", __LINE__); \
199   rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \
200   printf("to line %d\n", __LINE__); \
201   }
202 #define RRETURN(ra) \
203   { \
204   printf("match() returned %d from line %d ", ra, __LINE__); \
205   return ra; \
206   }
207 #else
208 #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
209   rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)
210 #define RRETURN(ra) return ra
211 #endif
212 
213 #else
214 
215 
216 /* These versions of the macros manage a private stack on the heap. Note
217 that the rd argument of RMATCH isn't actually used. It's the md argument of
218 match(), which never changes. */
219 
220 #define REGISTER
221 
222 #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\
223   {\
224   heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
225   if (setjmp(frame->Xwhere) == 0)\
226     {\
227     newframe->Xeptr = ra;\
228     newframe->Xecode = rb;\
229     newframe->Xoffset_top = rc;\
230     newframe->Xims = re;\
231     newframe->Xeptrb = rf;\
232     newframe->Xflags = rg;\
233     newframe->Xrdepth = frame->Xrdepth + 1;\
234     newframe->Xprevframe = frame;\
235     frame = newframe;\
236     DPRINTF(("restarting from line %d\n", __LINE__));\
237     goto HEAP_RECURSE;\
238     }\
239   else\
240     {\
241     DPRINTF(("longjumped back to line %d\n", __LINE__));\
242     frame = md->thisframe;\
243     rx = frame->Xresult;\
244     }\
245   }
246 
247 #define RRETURN(ra)\
248   {\
249   heapframe *newframe = frame;\
250   frame = newframe->Xprevframe;\
251   (pcre_stack_free)(newframe);\
252   if (frame != NULL)\
253     {\
254     frame->Xresult = ra;\
255     md->thisframe = frame;\
256     longjmp(frame->Xwhere, 1);\
257     }\
258   return ra;\
259   }
260 
261 
262 /* Structure for remembering the local variables in a private frame */
263 
264 typedef struct heapframe {
265   struct heapframe *Xprevframe;
266 
267   /* Function arguments that may change */
268 
269   const uschar *Xeptr;
270   const uschar *Xecode;
271   int Xoffset_top;
272   long int Xims;
273   eptrblock *Xeptrb;
274   int Xflags;
275   unsigned int Xrdepth;
276 
277   /* Function local variables */
278 
279   const uschar *Xcallpat;
280   const uschar *Xcharptr;
281   const uschar *Xdata;
282   const uschar *Xnext;
283   const uschar *Xpp;
284   const uschar *Xprev;
285   const uschar *Xsaved_eptr;
286 
287   recursion_info Xnew_recursive;
288 
289   BOOL Xcur_is_word;
290   BOOL Xcondition;
291   BOOL Xprev_is_word;
292 
293   unsigned long int Xoriginal_ims;
294 
295 #ifdef SUPPORT_UCP
296   int Xprop_type;
297   int Xprop_value;
298   int Xprop_fail_result;
299   int Xprop_category;
300   int Xprop_chartype;
301   int Xprop_script;
302 #endif
303 
304   int Xctype;
305   unsigned int Xfc;
306   int Xfi;
307   int Xlength;
308   int Xmax;
309   int Xmin;
310   int Xnumber;
311   int Xoffset;
312   int Xop;
313   int Xsave_capture_last;
314   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
315   int Xstacksave[REC_STACK_SAVE_MAX];
316 
317   eptrblock Xnewptrb;
318 
319   /* Place to pass back result, and where to jump back to */
320 
321   int  Xresult;
322   jmp_buf Xwhere;
323 
324 } heapframe;
325 
326 #endif
327 
328 
329 /***************************************************************************
330 ***************************************************************************/
331 
332 
333 
334 /*************************************************
335 *         Match from current position            *
336 *************************************************/
337 
338 /* This function is called recursively in many circumstances. Whenever it
339 returns a negative (error) response, the outer incarnation must also return the
340 same response.
341 
342 Performance note: It might be tempting to extract commonly used fields from the
343 md structure (e.g. utf8, end_subject) into individual variables to improve
344 performance. Tests using gcc on a SPARC disproved this; in the first case, it
345 made performance worse.
346 
347 Arguments:
348    eptr        pointer to current character in subject
349    ecode       pointer to current position in compiled code
350    offset_top  current top pointer
351    md          pointer to "static" info for the match
352    ims         current /i, /m, and /s options
353    eptrb       pointer to chain of blocks containing eptr at start of
354                  brackets - for testing for empty matches
355    flags       can contain
356                  match_condassert - this is an assertion condition
357                  match_cbegroup - this is the start of an unlimited repeat
358                    group that can match an empty string
359                  match_tail_recursed - this is a tail_recursed group
360    rdepth      the recursion depth
361 
362 Returns:       MATCH_MATCH if matched            )  these values are >= 0
363                MATCH_NOMATCH if failed to match  )
364                a negative PCRE_ERROR_xxx value if aborted by an error condition
365                  (e.g. stopped by repeated call or recursion limit)
366 */
367 
368 static int
match(REGISTER USPTR eptr,REGISTER const uschar * ecode,int offset_top,match_data * md,unsigned long int ims,eptrblock * eptrb,int flags,unsigned int rdepth)369 match(REGISTER USPTR eptr, REGISTER const uschar *ecode,
370   int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
371   int flags, unsigned int rdepth)
372 {
373 /* These variables do not need to be preserved over recursion in this function,
374 so they can be ordinary variables in all cases. Mark some of them with
375 "register" because they are used a lot in loops. */
376 
377 register int  rrc;         /* Returns from recursive calls */
378 register int  i;           /* Used for loops not involving calls to RMATCH() */
379 register unsigned int c;   /* Character values not kept over RMATCH() calls */
380 register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
381 
382 BOOL minimize, possessive; /* Quantifier options */
383 
384 /* When recursion is not being used, all "local" variables that have to be
385 preserved over calls to RMATCH() are part of a "frame" which is obtained from
386 heap storage. Set up the top-level frame here; others are obtained from the
387 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
388 
389 #ifdef NO_RECURSE
390 heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
391 frame->Xprevframe = NULL;            /* Marks the top level */
392 
393 /* Copy in the original argument variables */
394 
395 frame->Xeptr = eptr;
396 frame->Xecode = ecode;
397 frame->Xoffset_top = offset_top;
398 frame->Xims = ims;
399 frame->Xeptrb = eptrb;
400 frame->Xflags = flags;
401 frame->Xrdepth = rdepth;
402 
403 /* This is where control jumps back to to effect "recursion" */
404 
405 HEAP_RECURSE:
406 
407 /* Macros make the argument variables come from the current frame */
408 
409 #define eptr               frame->Xeptr
410 #define ecode              frame->Xecode
411 #define offset_top         frame->Xoffset_top
412 #define ims                frame->Xims
413 #define eptrb              frame->Xeptrb
414 #define flags              frame->Xflags
415 #define rdepth             frame->Xrdepth
416 
417 /* Ditto for the local variables */
418 
419 #ifdef SUPPORT_UTF8
420 #define charptr            frame->Xcharptr
421 #endif
422 #define callpat            frame->Xcallpat
423 #define data               frame->Xdata
424 #define next               frame->Xnext
425 #define pp                 frame->Xpp
426 #define prev               frame->Xprev
427 #define saved_eptr         frame->Xsaved_eptr
428 
429 #define new_recursive      frame->Xnew_recursive
430 
431 #define cur_is_word        frame->Xcur_is_word
432 #define condition          frame->Xcondition
433 #define prev_is_word       frame->Xprev_is_word
434 
435 #define original_ims       frame->Xoriginal_ims
436 
437 #ifdef SUPPORT_UCP
438 #define prop_type          frame->Xprop_type
439 #define prop_value         frame->Xprop_value
440 #define prop_fail_result   frame->Xprop_fail_result
441 #define prop_category      frame->Xprop_category
442 #define prop_chartype      frame->Xprop_chartype
443 #define prop_script        frame->Xprop_script
444 #endif
445 
446 #define ctype              frame->Xctype
447 #define fc                 frame->Xfc
448 #define fi                 frame->Xfi
449 #define length             frame->Xlength
450 #define max                frame->Xmax
451 #define min                frame->Xmin
452 #define number             frame->Xnumber
453 #define offset             frame->Xoffset
454 #define op                 frame->Xop
455 #define save_capture_last  frame->Xsave_capture_last
456 #define save_offset1       frame->Xsave_offset1
457 #define save_offset2       frame->Xsave_offset2
458 #define save_offset3       frame->Xsave_offset3
459 #define stacksave          frame->Xstacksave
460 
461 #define newptrb            frame->Xnewptrb
462 
463 /* When recursion is being used, local variables are allocated on the stack and
464 get preserved during recursion in the normal way. In this environment, fi and
465 i, and fc and c, can be the same variables. */
466 
467 #else         /* NO_RECURSE not defined */
468 #define fi i
469 #define fc c
470 
471 
472 #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
473 const uschar *charptr;             /* in small blocks of the code. My normal */
474 #endif                             /* style of coding would have declared    */
475 const uschar *callpat;             /* them within each of those blocks.      */
476 const uschar *data;                /* However, in order to accommodate the   */
477 const uschar *next;                /* version of this code that uses an      */
478 USPTR         pp;                  /* external "stack" implemented on the    */
479 const uschar *prev;                /* heap, it is easier to declare them all */
480 USPTR         saved_eptr;          /* here, so the declarations can be cut   */
481                                    /* out in a block. The only declarations  */
482 recursion_info new_recursive;      /* within blocks below are for variables  */
483                                    /* that do not have to be preserved over  */
484 BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
485 BOOL condition;
486 BOOL prev_is_word;
487 
488 unsigned long int original_ims;
489 
490 #ifdef SUPPORT_UCP
491 int prop_type;
492 int prop_value;
493 int prop_fail_result;
494 int prop_category;
495 int prop_chartype;
496 int prop_script;
497 #endif
498 
499 int ctype;
500 int length;
501 int max;
502 int min;
503 int number;
504 int offset;
505 int op;
506 int save_capture_last;
507 int save_offset1, save_offset2, save_offset3;
508 int stacksave[REC_STACK_SAVE_MAX];
509 
510 eptrblock newptrb;
511 #endif     /* NO_RECURSE */
512 
513 /* These statements are here to stop the compiler complaining about unitialized
514 variables. */
515 
516 #ifdef SUPPORT_UCP
517 prop_value = 0;
518 prop_fail_result = 0;
519 #endif
520 
521 
522 /* This label is used for tail recursion, which is used in a few cases even
523 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
524 used. Thanks to Ian Taylor for noticing this possibility and sending the
525 original patch. */
526 
527 TAIL_RECURSE:
528 
529 /* OK, now we can get on with the real code of the function. Recursive calls
530 are specified by the macro RMATCH and RRETURN is used to return. When
531 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
532 and a "return", respectively (possibly with some debugging if DEBUG is
533 defined). However, RMATCH isn't like a function call because it's quite a
534 complicated macro. It has to be used in one particular way. This shouldn't,
535 however, impact performance when true recursion is being used. */
536 
537 /* First check that we haven't called match() too many times, or that we
538 haven't exceeded the recursive call limit. */
539 
540 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
541 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
542 
543 original_ims = ims;    /* Save for resetting on ')' */
544 
545 #ifdef SUPPORT_UTF8
546 utf8 = md->utf8;       /* Local copy of the flag */
547 #else
548 utf8 = FALSE;
549 #endif
550 
551 /* At the start of a group with an unlimited repeat that may match an empty
552 string, the match_cbegroup flag is set. When this is the case, add the current
553 subject pointer to the chain of such remembered pointers, to be checked when we
554 hit the closing ket, in order to break infinite loops that match no characters.
555 When match() is called in other circumstances, don't add to the chain. If this
556 is a tail recursion, use a block from the workspace, as the one on the stack is
557 already used. */
558 
559 if ((flags & match_cbegroup) != 0)
560   {
561   eptrblock *p;
562   if ((flags & match_tail_recursed) != 0)
563     {
564     if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);
565     p = md->eptrchain + md->eptrn++;
566     }
567   else p = &newptrb;
568   p->epb_saved_eptr = eptr;
569   p->epb_prev = eptrb;
570   eptrb = p;
571   }
572 
573 /* Now start processing the opcodes. */
574 
575 for (;;)
576   {
577   minimize = possessive = FALSE;
578   op = *ecode;
579 
580   /* For partial matching, remember if we ever hit the end of the subject after
581   matching at least one subject character. */
582 
583   if (md->partial &&
584       eptr >= md->end_subject &&
585       eptr > md->start_match)
586     md->hitend = TRUE;
587 
588   switch(op)
589     {
590     /* Handle a capturing bracket. If there is space in the offset vector, save
591     the current subject position in the working slot at the top of the vector.
592     We mustn't change the current values of the data slot, because they may be
593     set from a previous iteration of this group, and be referred to by a
594     reference inside the group.
595 
596     If the bracket fails to match, we need to restore this value and also the
597     values of the final offsets, in case they were set by a previous iteration
598     of the same bracket.
599 
600     If there isn't enough space in the offset vector, treat this as if it were
601     a non-capturing bracket. Don't worry about setting the flag for the error
602     case here; that is handled in the code for KET. */
603 
604     case OP_CBRA:
605     case OP_SCBRA:
606     number = GET2(ecode, 1+LINK_SIZE);
607     offset = number << 1;
608 
609 #ifdef DEBUG
610     printf("start bracket %d\n", number);
611     printf("subject=");
612     pchars(eptr, 16, TRUE, md);
613     printf("\n");
614 #endif
615 
616     if (offset < md->offset_max)
617       {
618       save_offset1 = md->offset_vector[offset];
619       save_offset2 = md->offset_vector[offset+1];
620       save_offset3 = md->offset_vector[md->offset_end - number];
621       save_capture_last = md->capture_last;
622 
623       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
624       md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
625 
626       flags = (op == OP_SCBRA)? match_cbegroup : 0;
627       do
628         {
629         RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
630           ims, eptrb, flags);
631         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
632         md->capture_last = save_capture_last;
633         ecode += GET(ecode, 1);
634         }
635       while (*ecode == OP_ALT);
636 
637       DPRINTF(("bracket %d failed\n", number));
638 
639       md->offset_vector[offset] = save_offset1;
640       md->offset_vector[offset+1] = save_offset2;
641       md->offset_vector[md->offset_end - number] = save_offset3;
642 
643       RRETURN(MATCH_NOMATCH);
644       }
645 
646     /* Insufficient room for saving captured contents. Treat as a non-capturing
647     bracket. */
648 
649     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
650 
651     /* Non-capturing bracket. Loop for all the alternatives. When we get to the
652     final alternative within the brackets, we would return the result of a
653     recursive call to match() whatever happened. We can reduce stack usage by
654     turning this into a tail recursion. */
655 
656     case OP_BRA:
657     case OP_SBRA:
658     DPRINTF(("start non-capturing bracket\n"));
659     flags = (op >= OP_SBRA)? match_cbegroup : 0;
660     for (;;)
661       {
662       if (ecode[GET(ecode, 1)] != OP_ALT)
663         {
664         ecode += _pcre_OP_lengths[*ecode];
665         flags |= match_tail_recursed;
666         DPRINTF(("bracket 0 tail recursion\n"));
667         goto TAIL_RECURSE;
668         }
669 
670       /* For non-final alternatives, continue the loop for a NOMATCH result;
671       otherwise return. */
672 
673       RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
674         eptrb, flags);
675       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
676       ecode += GET(ecode, 1);
677       }
678     /* Control never reaches here. */
679 
680     /* Conditional group: compilation checked that there are no more than
681     two branches. If the condition is false, skipping the first branch takes us
682     past the end if there is only one branch, but that's OK because that is
683     exactly what going to the ket would do. As there is only one branch to be
684     obeyed, we can use tail recursion to avoid using another stack frame. */
685 
686     case OP_COND:
687     case OP_SCOND:
688     if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
689       {
690       offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
691       condition = md->recursive != NULL &&
692         (offset == RREF_ANY || offset == md->recursive->group_num);
693       ecode += condition? 3 : GET(ecode, 1);
694       }
695 
696     else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */
697       {
698       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
699       condition = offset < offset_top && md->offset_vector[offset] >= 0;
700       ecode += condition? 3 : GET(ecode, 1);
701       }
702 
703     else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */
704       {
705       condition = FALSE;
706       ecode += GET(ecode, 1);
707       }
708 
709     /* The condition is an assertion. Call match() to evaluate it - setting
710     the final argument match_condassert causes it to stop at the end of an
711     assertion. */
712 
713     else
714       {
715       RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
716           match_condassert);
717       if (rrc == MATCH_MATCH)
718         {
719         condition = TRUE;
720         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
721         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
722         }
723       else if (rrc != MATCH_NOMATCH)
724         {
725         RRETURN(rrc);         /* Need braces because of following else */
726         }
727       else
728         {
729         condition = FALSE;
730         ecode += GET(ecode, 1);
731         }
732       }
733 
734     /* We are now at the branch that is to be obeyed. As there is only one,
735     we can use tail recursion to avoid using another stack frame. If the second
736     alternative doesn't exist, we can just plough on. */
737 
738     if (condition || *ecode == OP_ALT)
739       {
740       ecode += 1 + LINK_SIZE;
741       flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);
742       goto TAIL_RECURSE;
743       }
744     else
745       {
746       ecode += 1 + LINK_SIZE;
747       }
748     break;
749 
750 
751     /* End of the pattern. If we are in a top-level recursion, we should
752     restore the offsets appropriately and continue from after the call. */
753 
754     case OP_END:
755     if (md->recursive != NULL && md->recursive->group_num == 0)
756       {
757       recursion_info *rec = md->recursive;
758       DPRINTF(("End of pattern in a (?0) recursion\n"));
759       md->recursive = rec->prevrec;
760       memmove(md->offset_vector, rec->offset_save,
761         rec->saved_max * sizeof(int));
762       md->start_match = rec->save_start;
763       ims = original_ims;
764       ecode = rec->after_call;
765       break;
766       }
767 
768     /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
769     string - backtracking will then try other alternatives, if any. */
770 
771     if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);
772     md->end_match_ptr = eptr;          /* Record where we ended */
773     md->end_offset_top = offset_top;   /* and how many extracts were taken */
774     RRETURN(MATCH_MATCH);
775 
776     /* Change option settings */
777 
778     case OP_OPT:
779     ims = ecode[1];
780     ecode += 2;
781     DPRINTF(("ims set to %02lx\n", ims));
782     break;
783 
784     /* Assertion brackets. Check the alternative branches in turn - the
785     matching won't pass the KET for an assertion. If any one branch matches,
786     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
787     start of each branch to move the current point backwards, so the code at
788     this level is identical to the lookahead case. */
789 
790     case OP_ASSERT:
791     case OP_ASSERTBACK:
792     do
793       {
794       RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);
795       if (rrc == MATCH_MATCH) break;
796       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
797       ecode += GET(ecode, 1);
798       }
799     while (*ecode == OP_ALT);
800     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
801 
802     /* If checking an assertion for a condition, return MATCH_MATCH. */
803 
804     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
805 
806     /* Continue from after the assertion, updating the offsets high water
807     mark, since extracts may have been taken during the assertion. */
808 
809     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
810     ecode += 1 + LINK_SIZE;
811     offset_top = md->end_offset_top;
812     continue;
813 
814     /* Negative assertion: all branches must fail to match */
815 
816     case OP_ASSERT_NOT:
817     case OP_ASSERTBACK_NOT:
818     do
819       {
820       RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);
821       if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
822       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
823       ecode += GET(ecode,1);
824       }
825     while (*ecode == OP_ALT);
826 
827     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
828 
829     ecode += 1 + LINK_SIZE;
830     continue;
831 
832     /* Move the subject pointer back. This occurs only at the start of
833     each branch of a lookbehind assertion. If we are too close to the start to
834     move back, this match function fails. When working with UTF-8 we move
835     back a number of characters, not bytes. */
836 
837     case OP_REVERSE:
838 #ifdef SUPPORT_UTF8
839     if (utf8)
840       {
841       i = GET(ecode, 1);
842       while (i-- > 0)
843         {
844         eptr--;
845         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
846         BACKCHAR(eptr)
847         }
848       }
849     else
850 #endif
851 
852     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
853 
854       {
855       eptr -= GET(ecode, 1);
856       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
857       }
858 
859     /* Skip to next op code */
860 
861     ecode += 1 + LINK_SIZE;
862     break;
863 
864     /* The callout item calls an external function, if one is provided, passing
865     details of the match so far. This is mainly for debugging, though the
866     function is able to force a failure. */
867 
868     case OP_CALLOUT:
869     if (pcre_callout != NULL)
870       {
871       pcre_callout_block cb;
872       cb.version          = 1;   /* Version 1 of the callout block */
873       cb.callout_number   = ecode[1];
874       cb.offset_vector    = md->offset_vector;
875       cb.subject          = (PCRE_SPTR)md->start_subject;
876       cb.subject_length   = md->end_subject - md->start_subject;
877       cb.start_match      = md->start_match - md->start_subject;
878       cb.current_position = eptr - md->start_subject;
879       cb.pattern_position = GET(ecode, 2);
880       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
881       cb.capture_top      = offset_top/2;
882       cb.capture_last     = md->capture_last;
883       cb.callout_data     = md->callout_data;
884       if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
885       if (rrc < 0) RRETURN(rrc);
886       }
887     ecode += 2 + 2*LINK_SIZE;
888     break;
889 
890     /* Recursion either matches the current regex, or some subexpression. The
891     offset data is the offset to the starting bracket from the start of the
892     whole pattern. (This is so that it works from duplicated subpatterns.)
893 
894     If there are any capturing brackets started but not finished, we have to
895     save their starting points and reinstate them after the recursion. However,
896     we don't know how many such there are (offset_top records the completed
897     total) so we just have to save all the potential data. There may be up to
898     65535 such values, which is too large to put on the stack, but using malloc
899     for small numbers seems expensive. As a compromise, the stack is used when
900     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
901     is used. A problem is what to do if the malloc fails ... there is no way of
902     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
903     values on the stack, and accept that the rest may be wrong.
904 
905     There are also other values that have to be saved. We use a chained
906     sequence of blocks that actually live on the stack. Thanks to Robin Houston
907     for the original version of this logic. */
908 
909     case OP_RECURSE:
910       {
911       callpat = md->start_code + GET(ecode, 1);
912       new_recursive.group_num = (callpat == md->start_code)? 0 :
913         GET2(callpat, 1 + LINK_SIZE);
914 
915       /* Add to "recursing stack" */
916 
917       new_recursive.prevrec = md->recursive;
918       md->recursive = &new_recursive;
919 
920       /* Find where to continue from afterwards */
921 
922       ecode += 1 + LINK_SIZE;
923       new_recursive.after_call = ecode;
924 
925       /* Now save the offset data. */
926 
927       new_recursive.saved_max = md->offset_end;
928       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
929         new_recursive.offset_save = stacksave;
930       else
931         {
932         new_recursive.offset_save =
933           (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
934         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
935         }
936 
937       memcpy(new_recursive.offset_save, md->offset_vector,
938             new_recursive.saved_max * sizeof(int));
939       new_recursive.save_start = md->start_match;
940       md->start_match = eptr;
941 
942       /* OK, now we can do the recursion. For each top-level alternative we
943       restore the offset and recursion data. */
944 
945       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
946       flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
947       do
948         {
949         RMATCH(rrc, eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
950           md, ims, eptrb, flags);
951         if (rrc == MATCH_MATCH)
952           {
953           DPRINTF(("Recursion matched\n"));
954           md->recursive = new_recursive.prevrec;
955           if (new_recursive.offset_save != stacksave)
956             (pcre_free)(new_recursive.offset_save);
957           RRETURN(MATCH_MATCH);
958           }
959         else if (rrc != MATCH_NOMATCH)
960           {
961           DPRINTF(("Recursion gave error %d\n", rrc));
962           RRETURN(rrc);
963           }
964 
965         md->recursive = &new_recursive;
966         memcpy(md->offset_vector, new_recursive.offset_save,
967             new_recursive.saved_max * sizeof(int));
968         callpat += GET(callpat, 1);
969         }
970       while (*callpat == OP_ALT);
971 
972       DPRINTF(("Recursion didn't match\n"));
973       md->recursive = new_recursive.prevrec;
974       if (new_recursive.offset_save != stacksave)
975         (pcre_free)(new_recursive.offset_save);
976       RRETURN(MATCH_NOMATCH);
977       }
978     /* Control never reaches here */
979 
980     /* "Once" brackets are like assertion brackets except that after a match,
981     the point in the subject string is not moved back. Thus there can never be
982     a move back into the brackets. Friedl calls these "atomic" subpatterns.
983     Check the alternative branches in turn - the matching won't pass the KET
984     for this kind of subpattern. If any one branch matches, we carry on as at
985     the end of a normal bracket, leaving the subject pointer. */
986 
987     case OP_ONCE:
988     prev = ecode;
989     saved_eptr = eptr;
990 
991     do
992       {
993       RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
994         eptrb, 0);
995       if (rrc == MATCH_MATCH) break;
996       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
997       ecode += GET(ecode,1);
998       }
999     while (*ecode == OP_ALT);
1000 
1001     /* If hit the end of the group (which could be repeated), fail */
1002 
1003     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1004 
1005     /* Continue as from after the assertion, updating the offsets high water
1006     mark, since extracts may have been taken. */
1007 
1008     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1009 
1010     offset_top = md->end_offset_top;
1011     eptr = md->end_match_ptr;
1012 
1013     /* For a non-repeating ket, just continue at this level. This also
1014     happens for a repeating ket if no characters were matched in the group.
1015     This is the forcible breaking of infinite loops as implemented in Perl
1016     5.005. If there is an options reset, it will get obeyed in the normal
1017     course of events. */
1018 
1019     if (*ecode == OP_KET || eptr == saved_eptr)
1020       {
1021       ecode += 1+LINK_SIZE;
1022       break;
1023       }
1024 
1025     /* The repeating kets try the rest of the pattern or restart from the
1026     preceding bracket, in the appropriate order. The second "call" of match()
1027     uses tail recursion, to avoid using another stack frame. We need to reset
1028     any options that changed within the bracket before re-running it, so
1029     check the next opcode. */
1030 
1031     if (ecode[1+LINK_SIZE] == OP_OPT)
1032       {
1033       ims = (ims & ~PCRE_IMS) | ecode[4];
1034       DPRINTF(("ims set to %02lx at group repeat\n", ims));
1035       }
1036 
1037     if (*ecode == OP_KETRMIN)
1038       {
1039       RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);
1040       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1041       ecode = prev;
1042       flags = match_tail_recursed;
1043       goto TAIL_RECURSE;
1044       }
1045     else  /* OP_KETRMAX */
1046       {
1047       RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_cbegroup);
1048       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1049       ecode += 1 + LINK_SIZE;
1050       flags = match_tail_recursed;
1051       goto TAIL_RECURSE;
1052       }
1053     /* Control never gets here */
1054 
1055     /* An alternation is the end of a branch; scan along to find the end of the
1056     bracketed group and go to there. */
1057 
1058     case OP_ALT:
1059     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1060     break;
1061 
1062     /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
1063     that it may occur zero times. It may repeat infinitely, or not at all -
1064     i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
1065     repeat limits are compiled as a number of copies, with the optional ones
1066     preceded by BRAZERO or BRAMINZERO. */
1067 
1068     case OP_BRAZERO:
1069       {
1070       next = ecode+1;
1071       RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, 0);
1072       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1073       do next += GET(next,1); while (*next == OP_ALT);
1074       ecode = next + 1 + LINK_SIZE;
1075       }
1076     break;
1077 
1078     case OP_BRAMINZERO:
1079       {
1080       next = ecode+1;
1081       do next += GET(next, 1); while (*next == OP_ALT);
1082       RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
1083       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1084       ecode++;
1085       }
1086     break;
1087 
1088     /* End of a group, repeated or non-repeating. */
1089 
1090     case OP_KET:
1091     case OP_KETRMIN:
1092     case OP_KETRMAX:
1093     prev = ecode - GET(ecode, 1);
1094 
1095     /* If this was a group that remembered the subject start, in order to break
1096     infinite repeats of empty string matches, retrieve the subject start from
1097     the chain. Otherwise, set it NULL. */
1098 
1099     if (*prev >= OP_SBRA)
1100       {
1101       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1102       eptrb = eptrb->epb_prev;              /* Backup to previous group */
1103       }
1104     else saved_eptr = NULL;
1105 
1106     /* If we are at the end of an assertion group, stop matching and return
1107     MATCH_MATCH, but record the current high water mark for use by positive
1108     assertions. Do this also for the "once" (atomic) groups. */
1109 
1110     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1111         *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1112         *prev == OP_ONCE)
1113       {
1114       md->end_match_ptr = eptr;      /* For ONCE */
1115       md->end_offset_top = offset_top;
1116       RRETURN(MATCH_MATCH);
1117       }
1118 
1119     /* For capturing groups we have to check the group number back at the start
1120     and if necessary complete handling an extraction by setting the offsets and
1121     bumping the high water mark. Note that whole-pattern recursion is coded as
1122     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1123     when the OP_END is reached. Other recursion is handled here. */
1124 
1125     if (*prev == OP_CBRA || *prev == OP_SCBRA)
1126       {
1127       number = GET2(prev, 1+LINK_SIZE);
1128       offset = number << 1;
1129 
1130 #ifdef DEBUG
1131       printf("end bracket %d", number);
1132       printf("\n");
1133 #endif
1134 
1135       md->capture_last = number;
1136       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1137         {
1138         md->offset_vector[offset] =
1139           md->offset_vector[md->offset_end - number];
1140         md->offset_vector[offset+1] = eptr - md->start_subject;
1141         if (offset_top <= offset) offset_top = offset + 2;
1142         }
1143 
1144       /* Handle a recursively called group. Restore the offsets
1145       appropriately and continue from after the call. */
1146 
1147       if (md->recursive != NULL && md->recursive->group_num == number)
1148         {
1149         recursion_info *rec = md->recursive;
1150         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1151         md->recursive = rec->prevrec;
1152         md->start_match = rec->save_start;
1153         memcpy(md->offset_vector, rec->offset_save,
1154           rec->saved_max * sizeof(int));
1155         ecode = rec->after_call;
1156         ims = original_ims;
1157         break;
1158         }
1159       }
1160 
1161     /* For both capturing and non-capturing groups, reset the value of the ims
1162     flags, in case they got changed during the group. */
1163 
1164     ims = original_ims;
1165     DPRINTF(("ims reset to %02lx\n", ims));
1166 
1167     /* For a non-repeating ket, just continue at this level. This also
1168     happens for a repeating ket if no characters were matched in the group.
1169     This is the forcible breaking of infinite loops as implemented in Perl
1170     5.005. If there is an options reset, it will get obeyed in the normal
1171     course of events. */
1172 
1173     if (*ecode == OP_KET || eptr == saved_eptr)
1174       {
1175       ecode += 1 + LINK_SIZE;
1176       break;
1177       }
1178 
1179     /* The repeating kets try the rest of the pattern or restart from the
1180     preceding bracket, in the appropriate order. In the second case, we can use
1181     tail recursion to avoid using another stack frame. */
1182 
1183     flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1184 
1185     if (*ecode == OP_KETRMIN)
1186       {
1187       RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
1188       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1189       ecode = prev;
1190       flags |= match_tail_recursed;
1191       goto TAIL_RECURSE;
1192       }
1193     else  /* OP_KETRMAX */
1194       {
1195       RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, flags);
1196       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1197       ecode += 1 + LINK_SIZE;
1198       flags = match_tail_recursed;
1199       goto TAIL_RECURSE;
1200       }
1201     /* Control never gets here */
1202 
1203     /* Start of subject unless notbol, or after internal newline if multiline */
1204 
1205     case OP_CIRC:
1206     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1207     if ((ims & PCRE_MULTILINE) != 0)
1208       {
1209       if (eptr != md->start_subject &&
1210           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1211         RRETURN(MATCH_NOMATCH);
1212       ecode++;
1213       break;
1214       }
1215     /* ... else fall through */
1216 
1217     /* Start of subject assertion */
1218 
1219     case OP_SOD:
1220     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1221     ecode++;
1222     break;
1223 
1224     /* Start of match assertion */
1225 
1226     case OP_SOM:
1227     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1228     ecode++;
1229     break;
1230 
1231     /* Assert before internal newline if multiline, or before a terminating
1232     newline unless endonly is set, else end of subject unless noteol is set. */
1233 
1234     case OP_DOLL:
1235     if ((ims & PCRE_MULTILINE) != 0)
1236       {
1237       if (eptr < md->end_subject)
1238         { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1239       else
1240         { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1241       ecode++;
1242       break;
1243       }
1244     else
1245       {
1246       if (md->noteol) RRETURN(MATCH_NOMATCH);
1247       if (!md->endonly)
1248         {
1249         if (eptr != md->end_subject &&
1250             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1251           RRETURN(MATCH_NOMATCH);
1252         ecode++;
1253         break;
1254         }
1255       }
1256     /* ... else fall through for endonly */
1257 
1258     /* End of subject assertion (\z) */
1259 
1260     case OP_EOD:
1261     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1262     ecode++;
1263     break;
1264 
1265     /* End of subject or ending \n assertion (\Z) */
1266 
1267     case OP_EODN:
1268     if (eptr != md->end_subject &&
1269         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1270       RRETURN(MATCH_NOMATCH);
1271     ecode++;
1272     break;
1273 
1274     /* Word boundary assertions */
1275 
1276     case OP_NOT_WORD_BOUNDARY:
1277     case OP_WORD_BOUNDARY:
1278       {
1279 
1280       /* Find out if the previous and current characters are "word" characters.
1281       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1282       be "non-word" characters. */
1283 
1284 #ifdef SUPPORT_UTF8
1285       if (utf8)
1286         {
1287         if (eptr == md->start_subject) prev_is_word = FALSE; else
1288           {
1289           const uschar *lastptr = eptr - 1;
1290           while((*lastptr & 0xc0) == 0x80) lastptr--;
1291           GETCHAR(c, lastptr);
1292           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1293           }
1294         if (eptr >= md->end_subject) cur_is_word = FALSE; else
1295           {
1296           GETCHAR(c, eptr);
1297           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1298           }
1299         }
1300       else
1301 #endif
1302 
1303       /* More streamlined when not in UTF-8 mode */
1304 
1305         {
1306         prev_is_word = (eptr != md->start_subject) &&
1307           ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1308         cur_is_word = (eptr < md->end_subject) &&
1309           ((md->ctypes[*eptr] & ctype_word) != 0);
1310         }
1311 
1312       /* Now see if the situation is what we want */
1313 
1314       if ((*ecode++ == OP_WORD_BOUNDARY)?
1315            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1316         RRETURN(MATCH_NOMATCH);
1317       }
1318     break;
1319 
1320     /* Match a single character type; inline for speed */
1321 
1322     case OP_ANY:
1323     if ((ims & PCRE_DOTALL) == 0)
1324       {
1325       if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1326       }
1327     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1328     if (utf8)
1329       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1330     ecode++;
1331     break;
1332 
1333     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1334     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1335 
1336     case OP_ANYBYTE:
1337     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1338     ecode++;
1339     break;
1340 
1341     case OP_NOT_DIGIT:
1342     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1343     GETCHARINCTEST(c, eptr);
1344     if (
1345 #ifdef SUPPORT_UTF8
1346        c < 256 &&
1347 #endif
1348        (md->ctypes[c] & ctype_digit) != 0
1349        )
1350       RRETURN(MATCH_NOMATCH);
1351     ecode++;
1352     break;
1353 
1354     case OP_DIGIT:
1355     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1356     GETCHARINCTEST(c, eptr);
1357     if (
1358 #ifdef SUPPORT_UTF8
1359        c >= 256 ||
1360 #endif
1361        (md->ctypes[c] & ctype_digit) == 0
1362        )
1363       RRETURN(MATCH_NOMATCH);
1364     ecode++;
1365     break;
1366 
1367     case OP_NOT_WHITESPACE:
1368     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1369     GETCHARINCTEST(c, eptr);
1370     if (
1371 #ifdef SUPPORT_UTF8
1372        c < 256 &&
1373 #endif
1374        (md->ctypes[c] & ctype_space) != 0
1375        )
1376       RRETURN(MATCH_NOMATCH);
1377     ecode++;
1378     break;
1379 
1380     case OP_WHITESPACE:
1381     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1382     GETCHARINCTEST(c, eptr);
1383     if (
1384 #ifdef SUPPORT_UTF8
1385        c >= 256 ||
1386 #endif
1387        (md->ctypes[c] & ctype_space) == 0
1388        )
1389       RRETURN(MATCH_NOMATCH);
1390     ecode++;
1391     break;
1392 
1393     case OP_NOT_WORDCHAR:
1394     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1395     GETCHARINCTEST(c, eptr);
1396     if (
1397 #ifdef SUPPORT_UTF8
1398        c < 256 &&
1399 #endif
1400        (md->ctypes[c] & ctype_word) != 0
1401        )
1402       RRETURN(MATCH_NOMATCH);
1403     ecode++;
1404     break;
1405 
1406     case OP_WORDCHAR:
1407     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1408     GETCHARINCTEST(c, eptr);
1409     if (
1410 #ifdef SUPPORT_UTF8
1411        c >= 256 ||
1412 #endif
1413        (md->ctypes[c] & ctype_word) == 0
1414        )
1415       RRETURN(MATCH_NOMATCH);
1416     ecode++;
1417     break;
1418 
1419     case OP_ANYNL:
1420     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1421     GETCHARINCTEST(c, eptr);
1422     switch(c)
1423       {
1424       default: RRETURN(MATCH_NOMATCH);
1425       case 0x000d:
1426       if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1427       break;
1428       case 0x000a:
1429       case 0x000b:
1430       case 0x000c:
1431       case 0x0085:
1432       case 0x2028:
1433       case 0x2029:
1434       break;
1435       }
1436     ecode++;
1437     break;
1438 
1439 #ifdef SUPPORT_UCP
1440     /* Check the next character by Unicode property. We will get here only
1441     if the support is in the binary; otherwise a compile-time error occurs. */
1442 
1443     case OP_PROP:
1444     case OP_NOTPROP:
1445     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1446     GETCHARINCTEST(c, eptr);
1447       {
1448       int chartype, script;
1449       int category = _pcre_ucp_findprop(c, &chartype, &script);
1450 
1451       switch(ecode[1])
1452         {
1453         case PT_ANY:
1454         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1455         break;
1456 
1457         case PT_LAMP:
1458         if ((chartype == ucp_Lu ||
1459              chartype == ucp_Ll ||
1460              chartype == ucp_Lt) == (op == OP_NOTPROP))
1461           RRETURN(MATCH_NOMATCH);
1462          break;
1463 
1464         case PT_GC:
1465         if ((ecode[2] != category) == (op == OP_PROP))
1466           RRETURN(MATCH_NOMATCH);
1467         break;
1468 
1469         case PT_PC:
1470         if ((ecode[2] != chartype) == (op == OP_PROP))
1471           RRETURN(MATCH_NOMATCH);
1472         break;
1473 
1474         case PT_SC:
1475         if ((ecode[2] != script) == (op == OP_PROP))
1476           RRETURN(MATCH_NOMATCH);
1477         break;
1478 
1479         default:
1480         RRETURN(PCRE_ERROR_INTERNAL);
1481         }
1482 
1483       ecode += 3;
1484       }
1485     break;
1486 
1487     /* Match an extended Unicode sequence. We will get here only if the support
1488     is in the binary; otherwise a compile-time error occurs. */
1489 
1490     case OP_EXTUNI:
1491     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1492     GETCHARINCTEST(c, eptr);
1493       {
1494       int chartype, script;
1495       int category = _pcre_ucp_findprop(c, &chartype, &script);
1496       if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1497       while (eptr < md->end_subject)
1498         {
1499         int len = 1;
1500         if (!utf8) c = *eptr; else
1501           {
1502           GETCHARLEN(c, eptr, len);
1503           }
1504         category = _pcre_ucp_findprop(c, &chartype, &script);
1505         if (category != ucp_M) break;
1506         eptr += len;
1507         }
1508       }
1509     ecode++;
1510     break;
1511 #endif
1512 
1513 
1514     /* Match a back reference, possibly repeatedly. Look past the end of the
1515     item to see if there is repeat information following. The code is similar
1516     to that for character classes, but repeated for efficiency. Then obey
1517     similar code to character type repeats - written out again for speed.
1518     However, if the referenced string is the empty string, always treat
1519     it as matched, any number of times (otherwise there could be infinite
1520     loops). */
1521 
1522     case OP_REF:
1523       {
1524       offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1525       ecode += 3;                                 /* Advance past item */
1526 
1527       /* If the reference is unset, set the length to be longer than the amount
1528       of subject left; this ensures that every attempt at a match fails. We
1529       can't just fail here, because of the possibility of quantifiers with zero
1530       minima. */
1531 
1532       length = (offset >= offset_top || md->offset_vector[offset] < 0)?
1533         md->end_subject - eptr + 1 :
1534         md->offset_vector[offset+1] - md->offset_vector[offset];
1535 
1536       /* Set up for repetition, or handle the non-repeated case */
1537 
1538       switch (*ecode)
1539         {
1540         case OP_CRSTAR:
1541         case OP_CRMINSTAR:
1542         case OP_CRPLUS:
1543         case OP_CRMINPLUS:
1544         case OP_CRQUERY:
1545         case OP_CRMINQUERY:
1546         c = *ecode++ - OP_CRSTAR;
1547         minimize = (c & 1) != 0;
1548         min = rep_min[c];                 /* Pick up values from tables; */
1549         max = rep_max[c];                 /* zero for max => infinity */
1550         if (max == 0) max = INT_MAX;
1551         break;
1552 
1553         case OP_CRRANGE:
1554         case OP_CRMINRANGE:
1555         minimize = (*ecode == OP_CRMINRANGE);
1556         min = GET2(ecode, 1);
1557         max = GET2(ecode, 3);
1558         if (max == 0) max = INT_MAX;
1559         ecode += 5;
1560         break;
1561 
1562         default:               /* No repeat follows */
1563         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1564         eptr += length;
1565         continue;              /* With the main loop */
1566         }
1567 
1568       /* If the length of the reference is zero, just continue with the
1569       main loop. */
1570 
1571       if (length == 0) continue;
1572 
1573       /* First, ensure the minimum number of matches are present. We get back
1574       the length of the reference string explicitly rather than passing the
1575       address of eptr, so that eptr can be a register variable. */
1576 
1577       for (i = 1; i <= min; i++)
1578         {
1579         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1580         eptr += length;
1581         }
1582 
1583       /* If min = max, continue at the same level without recursion.
1584       They are not both allowed to be zero. */
1585 
1586       if (min == max) continue;
1587 
1588       /* If minimizing, keep trying and advancing the pointer */
1589 
1590       if (minimize)
1591         {
1592         for (fi = min;; fi++)
1593           {
1594           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1595           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1596           if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1597             RRETURN(MATCH_NOMATCH);
1598           eptr += length;
1599           }
1600         /* Control never gets here */
1601         }
1602 
1603       /* If maximizing, find the longest string and work backwards */
1604 
1605       else
1606         {
1607         pp = eptr;
1608         for (i = min; i < max; i++)
1609           {
1610           if (!match_ref(offset, eptr, length, md, ims)) break;
1611           eptr += length;
1612           }
1613         while (eptr >= pp)
1614           {
1615           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1616           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1617           eptr -= length;
1618           }
1619         RRETURN(MATCH_NOMATCH);
1620         }
1621       }
1622     /* Control never gets here */
1623 
1624 
1625 
1626     /* Match a bit-mapped character class, possibly repeatedly. This op code is
1627     used when all the characters in the class have values in the range 0-255,
1628     and either the matching is caseful, or the characters are in the range
1629     0-127 when UTF-8 processing is enabled. The only difference between
1630     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1631     encountered.
1632 
1633     First, look past the end of the item to see if there is repeat information
1634     following. Then obey similar code to character type repeats - written out
1635     again for speed. */
1636 
1637     case OP_NCLASS:
1638     case OP_CLASS:
1639       {
1640       data = ecode + 1;                /* Save for matching */
1641       ecode += 33;                     /* Advance past the item */
1642 
1643       switch (*ecode)
1644         {
1645         case OP_CRSTAR:
1646         case OP_CRMINSTAR:
1647         case OP_CRPLUS:
1648         case OP_CRMINPLUS:
1649         case OP_CRQUERY:
1650         case OP_CRMINQUERY:
1651         c = *ecode++ - OP_CRSTAR;
1652         minimize = (c & 1) != 0;
1653         min = rep_min[c];                 /* Pick up values from tables; */
1654         max = rep_max[c];                 /* zero for max => infinity */
1655         if (max == 0) max = INT_MAX;
1656         break;
1657 
1658         case OP_CRRANGE:
1659         case OP_CRMINRANGE:
1660         minimize = (*ecode == OP_CRMINRANGE);
1661         min = GET2(ecode, 1);
1662         max = GET2(ecode, 3);
1663         if (max == 0) max = INT_MAX;
1664         ecode += 5;
1665         break;
1666 
1667         default:               /* No repeat follows */
1668         min = max = 1;
1669         break;
1670         }
1671 
1672       /* First, ensure the minimum number of matches are present. */
1673 
1674 #ifdef SUPPORT_UTF8
1675       /* UTF-8 mode */
1676       if (utf8)
1677         {
1678         for (i = 1; i <= min; i++)
1679           {
1680           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1681           GETCHARINC(c, eptr);
1682           if (c > 255)
1683             {
1684             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1685             }
1686           else
1687             {
1688             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1689             }
1690           }
1691         }
1692       else
1693 #endif
1694       /* Not UTF-8 mode */
1695         {
1696         for (i = 1; i <= min; i++)
1697           {
1698           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1699           c = *eptr++;
1700           if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1701           }
1702         }
1703 
1704       /* If max == min we can continue with the main loop without the
1705       need to recurse. */
1706 
1707       if (min == max) continue;
1708 
1709       /* If minimizing, keep testing the rest of the expression and advancing
1710       the pointer while it matches the class. */
1711 
1712       if (minimize)
1713         {
1714 #ifdef SUPPORT_UTF8
1715         /* UTF-8 mode */
1716         if (utf8)
1717           {
1718           for (fi = min;; fi++)
1719             {
1720             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1721             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1722             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1723             GETCHARINC(c, eptr);
1724             if (c > 255)
1725               {
1726               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1727               }
1728             else
1729               {
1730               if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1731               }
1732             }
1733           }
1734         else
1735 #endif
1736         /* Not UTF-8 mode */
1737           {
1738           for (fi = min;; fi++)
1739             {
1740             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1741             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1742             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1743             c = *eptr++;
1744             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1745             }
1746           }
1747         /* Control never gets here */
1748         }
1749 
1750       /* If maximizing, find the longest possible run, then work backwards. */
1751 
1752       else
1753         {
1754         pp = eptr;
1755 
1756 #ifdef SUPPORT_UTF8
1757         /* UTF-8 mode */
1758         if (utf8)
1759           {
1760           for (i = min; i < max; i++)
1761             {
1762             int len = 1;
1763             if (eptr >= md->end_subject) break;
1764             GETCHARLEN(c, eptr, len);
1765             if (c > 255)
1766               {
1767               if (op == OP_CLASS) break;
1768               }
1769             else
1770               {
1771               if ((data[c/8] & (1 << (c&7))) == 0) break;
1772               }
1773             eptr += len;
1774             }
1775           for (;;)
1776             {
1777             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1778             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1779             if (eptr-- == pp) break;        /* Stop if tried at original pos */
1780             BACKCHAR(eptr);
1781             }
1782           }
1783         else
1784 #endif
1785           /* Not UTF-8 mode */
1786           {
1787           for (i = min; i < max; i++)
1788             {
1789             if (eptr >= md->end_subject) break;
1790             c = *eptr;
1791             if ((data[c/8] & (1 << (c&7))) == 0) break;
1792             eptr++;
1793             }
1794           while (eptr >= pp)
1795             {
1796             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1797             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1798             eptr--;
1799             }
1800           }
1801 
1802         RRETURN(MATCH_NOMATCH);
1803         }
1804       }
1805     /* Control never gets here */
1806 
1807 
1808     /* Match an extended character class. This opcode is encountered only
1809     in UTF-8 mode, because that's the only time it is compiled. */
1810 
1811 #ifdef SUPPORT_UTF8
1812     case OP_XCLASS:
1813       {
1814       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
1815       ecode += GET(ecode, 1);                      /* Advance past the item */
1816 
1817       switch (*ecode)
1818         {
1819         case OP_CRSTAR:
1820         case OP_CRMINSTAR:
1821         case OP_CRPLUS:
1822         case OP_CRMINPLUS:
1823         case OP_CRQUERY:
1824         case OP_CRMINQUERY:
1825         c = *ecode++ - OP_CRSTAR;
1826         minimize = (c & 1) != 0;
1827         min = rep_min[c];                 /* Pick up values from tables; */
1828         max = rep_max[c];                 /* zero for max => infinity */
1829         if (max == 0) max = INT_MAX;
1830         break;
1831 
1832         case OP_CRRANGE:
1833         case OP_CRMINRANGE:
1834         minimize = (*ecode == OP_CRMINRANGE);
1835         min = GET2(ecode, 1);
1836         max = GET2(ecode, 3);
1837         if (max == 0) max = INT_MAX;
1838         ecode += 5;
1839         break;
1840 
1841         default:               /* No repeat follows */
1842         min = max = 1;
1843         break;
1844         }
1845 
1846       /* First, ensure the minimum number of matches are present. */
1847 
1848       for (i = 1; i <= min; i++)
1849         {
1850         if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1851         GETCHARINC(c, eptr);
1852         if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
1853         }
1854 
1855       /* If max == min we can continue with the main loop without the
1856       need to recurse. */
1857 
1858       if (min == max) continue;
1859 
1860       /* If minimizing, keep testing the rest of the expression and advancing
1861       the pointer while it matches the class. */
1862 
1863       if (minimize)
1864         {
1865         for (fi = min;; fi++)
1866           {
1867           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1868           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1869           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1870           GETCHARINC(c, eptr);
1871           if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
1872           }
1873         /* Control never gets here */
1874         }
1875 
1876       /* If maximizing, find the longest possible run, then work backwards. */
1877 
1878       else
1879         {
1880         pp = eptr;
1881         for (i = min; i < max; i++)
1882           {
1883           int len = 1;
1884           if (eptr >= md->end_subject) break;
1885           GETCHARLEN(c, eptr, len);
1886           if (!_pcre_xclass(c, data)) break;
1887           eptr += len;
1888           }
1889         for(;;)
1890           {
1891           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1892           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1893           if (eptr-- == pp) break;        /* Stop if tried at original pos */
1894           BACKCHAR(eptr)
1895           }
1896         RRETURN(MATCH_NOMATCH);
1897         }
1898 
1899       /* Control never gets here */
1900       }
1901 #endif    /* End of XCLASS */
1902 
1903     /* Match a single character, casefully */
1904 
1905     case OP_CHAR:
1906 #ifdef SUPPORT_UTF8
1907     if (utf8)
1908       {
1909       length = 1;
1910       ecode++;
1911       GETCHARLEN(fc, ecode, length);
1912       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
1913       while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
1914       }
1915     else
1916 #endif
1917 
1918     /* Non-UTF-8 mode */
1919       {
1920       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
1921       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
1922       ecode += 2;
1923       }
1924     break;
1925 
1926     /* Match a single character, caselessly */
1927 
1928     case OP_CHARNC:
1929 #ifdef SUPPORT_UTF8
1930     if (utf8)
1931       {
1932       length = 1;
1933       ecode++;
1934       GETCHARLEN(fc, ecode, length);
1935 
1936       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
1937 
1938       /* If the pattern character's value is < 128, we have only one byte, and
1939       can use the fast lookup table. */
1940 
1941       if (fc < 128)
1942         {
1943         if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
1944         }
1945 
1946       /* Otherwise we must pick up the subject character */
1947 
1948       else
1949         {
1950         unsigned int dc;
1951         GETCHARINC(dc, eptr);
1952         ecode += length;
1953 
1954         /* If we have Unicode property support, we can use it to test the other
1955         case of the character, if there is one. */
1956 
1957         if (fc != dc)
1958           {
1959 #ifdef SUPPORT_UCP
1960           if (dc != _pcre_ucp_othercase(fc))
1961 #endif
1962             RRETURN(MATCH_NOMATCH);
1963           }
1964         }
1965       }
1966     else
1967 #endif   /* SUPPORT_UTF8 */
1968 
1969     /* Non-UTF-8 mode */
1970       {
1971       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
1972       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
1973       ecode += 2;
1974       }
1975     break;
1976 
1977     /* Match a single character repeatedly. */
1978 
1979     case OP_EXACT:
1980     min = max = GET2(ecode, 1);
1981     ecode += 3;
1982     goto REPEATCHAR;
1983 
1984     case OP_POSUPTO:
1985     possessive = TRUE;
1986     /* Fall through */
1987 
1988     case OP_UPTO:
1989     case OP_MINUPTO:
1990     min = 0;
1991     max = GET2(ecode, 1);
1992     minimize = *ecode == OP_MINUPTO;
1993     ecode += 3;
1994     goto REPEATCHAR;
1995 
1996     case OP_POSSTAR:
1997     possessive = TRUE;
1998     min = 0;
1999     max = INT_MAX;
2000     ecode++;
2001     goto REPEATCHAR;
2002 
2003     case OP_POSPLUS:
2004     possessive = TRUE;
2005     min = 1;
2006     max = INT_MAX;
2007     ecode++;
2008     goto REPEATCHAR;
2009 
2010     case OP_POSQUERY:
2011     possessive = TRUE;
2012     min = 0;
2013     max = 1;
2014     ecode++;
2015     goto REPEATCHAR;
2016 
2017     case OP_STAR:
2018     case OP_MINSTAR:
2019     case OP_PLUS:
2020     case OP_MINPLUS:
2021     case OP_QUERY:
2022     case OP_MINQUERY:
2023     c = *ecode++ - OP_STAR;
2024     minimize = (c & 1) != 0;
2025     min = rep_min[c];                 /* Pick up values from tables; */
2026     max = rep_max[c];                 /* zero for max => infinity */
2027     if (max == 0) max = INT_MAX;
2028 
2029     /* Common code for all repeated single-character matches. We can give
2030     up quickly if there are fewer than the minimum number of characters left in
2031     the subject. */
2032 
2033     REPEATCHAR:
2034 #ifdef SUPPORT_UTF8
2035     if (utf8)
2036       {
2037       length = 1;
2038       charptr = ecode;
2039       GETCHARLEN(fc, ecode, length);
2040       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2041       ecode += length;
2042 
2043       /* Handle multibyte character matching specially here. There is
2044       support for caseless matching if UCP support is present. */
2045 
2046       if (length > 1)
2047         {
2048         int oclength = 0;
2049         uschar occhars[8];
2050 
2051 #ifdef SUPPORT_UCP
2052         unsigned int othercase;
2053         if ((ims & PCRE_CASELESS) != 0 &&
2054             (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
2055           oclength = _pcre_ord2utf8(othercase, occhars);
2056 #endif  /* SUPPORT_UCP */
2057 
2058         for (i = 1; i <= min; i++)
2059           {
2060           if (memcmp(eptr, charptr, length) == 0) eptr += length;
2061           /* Need braces because of following else */
2062           else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2063           else
2064             {
2065             if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2066             eptr += oclength;
2067             }
2068           }
2069 
2070         if (min == max) continue;
2071 
2072         if (minimize)
2073           {
2074           for (fi = min;; fi++)
2075             {
2076             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2077             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2078             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2079             if (memcmp(eptr, charptr, length) == 0) eptr += length;
2080             /* Need braces because of following else */
2081             else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2082             else
2083               {
2084               if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2085               eptr += oclength;
2086               }
2087             }
2088           /* Control never gets here */
2089           }
2090 
2091         else  /* Maximize */
2092           {
2093           pp = eptr;
2094           for (i = min; i < max; i++)
2095             {
2096             if (eptr > md->end_subject - length) break;
2097             if (memcmp(eptr, charptr, length) == 0) eptr += length;
2098             else if (oclength == 0) break;
2099             else
2100               {
2101               if (memcmp(eptr, occhars, oclength) != 0) break;
2102               eptr += oclength;
2103               }
2104             }
2105 
2106           if (possessive) continue;
2107           while (eptr >= pp)
2108            {
2109            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2110            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2111            eptr -= length;
2112            }
2113           RRETURN(MATCH_NOMATCH);
2114           }
2115         /* Control never gets here */
2116         }
2117 
2118       /* If the length of a UTF-8 character is 1, we fall through here, and
2119       obey the code as for non-UTF-8 characters below, though in this case the
2120       value of fc will always be < 128. */
2121       }
2122     else
2123 #endif  /* SUPPORT_UTF8 */
2124 
2125     /* When not in UTF-8 mode, load a single-byte character. */
2126       {
2127       if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2128       fc = *ecode++;
2129       }
2130 
2131     /* The value of fc at this point is always less than 256, though we may or
2132     may not be in UTF-8 mode. The code is duplicated for the caseless and
2133     caseful cases, for speed, since matching characters is likely to be quite
2134     common. First, ensure the minimum number of matches are present. If min =
2135     max, continue at the same level without recursing. Otherwise, if
2136     minimizing, keep trying the rest of the expression and advancing one
2137     matching character if failing, up to the maximum. Alternatively, if
2138     maximizing, find the maximum number of characters and work backwards. */
2139 
2140     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2141       max, eptr));
2142 
2143     if ((ims & PCRE_CASELESS) != 0)
2144       {
2145       fc = md->lcc[fc];
2146       for (i = 1; i <= min; i++)
2147         if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2148       if (min == max) continue;
2149       if (minimize)
2150         {
2151         for (fi = min;; fi++)
2152           {
2153           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2154           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2155           if (fi >= max || eptr >= md->end_subject ||
2156               fc != md->lcc[*eptr++])
2157             RRETURN(MATCH_NOMATCH);
2158           }
2159         /* Control never gets here */
2160         }
2161       else  /* Maximize */
2162         {
2163         pp = eptr;
2164         for (i = min; i < max; i++)
2165           {
2166           if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2167           eptr++;
2168           }
2169         if (possessive) continue;
2170         while (eptr >= pp)
2171           {
2172           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2173           eptr--;
2174           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2175           }
2176         RRETURN(MATCH_NOMATCH);
2177         }
2178       /* Control never gets here */
2179       }
2180 
2181     /* Caseful comparisons (includes all multi-byte characters) */
2182 
2183     else
2184       {
2185       for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2186       if (min == max) continue;
2187       if (minimize)
2188         {
2189         for (fi = min;; fi++)
2190           {
2191           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2192           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2193           if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2194             RRETURN(MATCH_NOMATCH);
2195           }
2196         /* Control never gets here */
2197         }
2198       else  /* Maximize */
2199         {
2200         pp = eptr;
2201         for (i = min; i < max; i++)
2202           {
2203           if (eptr >= md->end_subject || fc != *eptr) break;
2204           eptr++;
2205           }
2206         if (possessive) continue;
2207         while (eptr >= pp)
2208           {
2209           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2210           eptr--;
2211           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2212           }
2213         RRETURN(MATCH_NOMATCH);
2214         }
2215       }
2216     /* Control never gets here */
2217 
2218     /* Match a negated single one-byte character. The character we are
2219     checking can be multibyte. */
2220 
2221     case OP_NOT:
2222     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2223     ecode++;
2224     GETCHARINCTEST(c, eptr);
2225     if ((ims & PCRE_CASELESS) != 0)
2226       {
2227 #ifdef SUPPORT_UTF8
2228       if (c < 256)
2229 #endif
2230       c = md->lcc[c];
2231       if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2232       }
2233     else
2234       {
2235       if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2236       }
2237     break;
2238 
2239     /* Match a negated single one-byte character repeatedly. This is almost a
2240     repeat of the code for a repeated single character, but I haven't found a
2241     nice way of commoning these up that doesn't require a test of the
2242     positive/negative option for each character match. Maybe that wouldn't add
2243     very much to the time taken, but character matching *is* what this is all
2244     about... */
2245 
2246     case OP_NOTEXACT:
2247     min = max = GET2(ecode, 1);
2248     ecode += 3;
2249     goto REPEATNOTCHAR;
2250 
2251     case OP_NOTUPTO:
2252     case OP_NOTMINUPTO:
2253     min = 0;
2254     max = GET2(ecode, 1);
2255     minimize = *ecode == OP_NOTMINUPTO;
2256     ecode += 3;
2257     goto REPEATNOTCHAR;
2258 
2259     case OP_NOTPOSSTAR:
2260     possessive = TRUE;
2261     min = 0;
2262     max = INT_MAX;
2263     ecode++;
2264     goto REPEATNOTCHAR;
2265 
2266     case OP_NOTPOSPLUS:
2267     possessive = TRUE;
2268     min = 1;
2269     max = INT_MAX;
2270     ecode++;
2271     goto REPEATNOTCHAR;
2272 
2273     case OP_NOTPOSQUERY:
2274     possessive = TRUE;
2275     min = 0;
2276     max = 1;
2277     ecode++;
2278     goto REPEATNOTCHAR;
2279 
2280     case OP_NOTPOSUPTO:
2281     possessive = TRUE;
2282     min = 0;
2283     max = GET2(ecode, 1);
2284     ecode += 3;
2285     goto REPEATNOTCHAR;
2286 
2287     case OP_NOTSTAR:
2288     case OP_NOTMINSTAR:
2289     case OP_NOTPLUS:
2290     case OP_NOTMINPLUS:
2291     case OP_NOTQUERY:
2292     case OP_NOTMINQUERY:
2293     c = *ecode++ - OP_NOTSTAR;
2294     minimize = (c & 1) != 0;
2295     min = rep_min[c];                 /* Pick up values from tables; */
2296     max = rep_max[c];                 /* zero for max => infinity */
2297     if (max == 0) max = INT_MAX;
2298 
2299     /* Common code for all repeated single-byte matches. We can give up quickly
2300     if there are fewer than the minimum number of bytes left in the
2301     subject. */
2302 
2303     REPEATNOTCHAR:
2304     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2305     fc = *ecode++;
2306 
2307     /* The code is duplicated for the caseless and caseful cases, for speed,
2308     since matching characters is likely to be quite common. First, ensure the
2309     minimum number of matches are present. If min = max, continue at the same
2310     level without recursing. Otherwise, if minimizing, keep trying the rest of
2311     the expression and advancing one matching character if failing, up to the
2312     maximum. Alternatively, if maximizing, find the maximum number of
2313     characters and work backwards. */
2314 
2315     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2316       max, eptr));
2317 
2318     if ((ims & PCRE_CASELESS) != 0)
2319       {
2320       fc = md->lcc[fc];
2321 
2322 #ifdef SUPPORT_UTF8
2323       /* UTF-8 mode */
2324       if (utf8)
2325         {
2326         register unsigned int d;
2327         for (i = 1; i <= min; i++)
2328           {
2329           GETCHARINC(d, eptr);
2330           if (d < 256) d = md->lcc[d];
2331           if (fc == d) RRETURN(MATCH_NOMATCH);
2332           }
2333         }
2334       else
2335 #endif
2336 
2337       /* Not UTF-8 mode */
2338         {
2339         for (i = 1; i <= min; i++)
2340           if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2341         }
2342 
2343       if (min == max) continue;
2344 
2345       if (minimize)
2346         {
2347 #ifdef SUPPORT_UTF8
2348         /* UTF-8 mode */
2349         if (utf8)
2350           {
2351           register unsigned int d;
2352           for (fi = min;; fi++)
2353             {
2354             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2355             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2356             GETCHARINC(d, eptr);
2357             if (d < 256) d = md->lcc[d];
2358             if (fi >= max || eptr >= md->end_subject || fc == d)
2359               RRETURN(MATCH_NOMATCH);
2360             }
2361           }
2362         else
2363 #endif
2364         /* Not UTF-8 mode */
2365           {
2366           for (fi = min;; fi++)
2367             {
2368             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2369             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2370             if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2371               RRETURN(MATCH_NOMATCH);
2372             }
2373           }
2374         /* Control never gets here */
2375         }
2376 
2377       /* Maximize case */
2378 
2379       else
2380         {
2381         pp = eptr;
2382 
2383 #ifdef SUPPORT_UTF8
2384         /* UTF-8 mode */
2385         if (utf8)
2386           {
2387           register unsigned int d;
2388           for (i = min; i < max; i++)
2389             {
2390             int len = 1;
2391             if (eptr >= md->end_subject) break;
2392             GETCHARLEN(d, eptr, len);
2393             if (d < 256) d = md->lcc[d];
2394             if (fc == d) break;
2395             eptr += len;
2396             }
2397         if (possessive) continue;
2398         for(;;)
2399             {
2400             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2401             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2402             if (eptr-- == pp) break;        /* Stop if tried at original pos */
2403             BACKCHAR(eptr);
2404             }
2405           }
2406         else
2407 #endif
2408         /* Not UTF-8 mode */
2409           {
2410           for (i = min; i < max; i++)
2411             {
2412             if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2413             eptr++;
2414             }
2415           if (possessive) continue;
2416           while (eptr >= pp)
2417             {
2418             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2419             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2420             eptr--;
2421             }
2422           }
2423 
2424         RRETURN(MATCH_NOMATCH);
2425         }
2426       /* Control never gets here */
2427       }
2428 
2429     /* Caseful comparisons */
2430 
2431     else
2432       {
2433 #ifdef SUPPORT_UTF8
2434       /* UTF-8 mode */
2435       if (utf8)
2436         {
2437         register unsigned int d;
2438         for (i = 1; i <= min; i++)
2439           {
2440           GETCHARINC(d, eptr);
2441           if (fc == d) RRETURN(MATCH_NOMATCH);
2442           }
2443         }
2444       else
2445 #endif
2446       /* Not UTF-8 mode */
2447         {
2448         for (i = 1; i <= min; i++)
2449           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2450         }
2451 
2452       if (min == max) continue;
2453 
2454       if (minimize)
2455         {
2456 #ifdef SUPPORT_UTF8
2457         /* UTF-8 mode */
2458         if (utf8)
2459           {
2460           register unsigned int d;
2461           for (fi = min;; fi++)
2462             {
2463             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2464             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2465             GETCHARINC(d, eptr);
2466             if (fi >= max || eptr >= md->end_subject || fc == d)
2467               RRETURN(MATCH_NOMATCH);
2468             }
2469           }
2470         else
2471 #endif
2472         /* Not UTF-8 mode */
2473           {
2474           for (fi = min;; fi++)
2475             {
2476             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2477             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2478             if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2479               RRETURN(MATCH_NOMATCH);
2480             }
2481           }
2482         /* Control never gets here */
2483         }
2484 
2485       /* Maximize case */
2486 
2487       else
2488         {
2489         pp = eptr;
2490 
2491 #ifdef SUPPORT_UTF8
2492         /* UTF-8 mode */
2493         if (utf8)
2494           {
2495           register unsigned int d;
2496           for (i = min; i < max; i++)
2497             {
2498             int len = 1;
2499             if (eptr >= md->end_subject) break;
2500             GETCHARLEN(d, eptr, len);
2501             if (fc == d) break;
2502             eptr += len;
2503             }
2504           if (possessive) continue;
2505           for(;;)
2506             {
2507             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2508             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2509             if (eptr-- == pp) break;        /* Stop if tried at original pos */
2510             BACKCHAR(eptr);
2511             }
2512           }
2513         else
2514 #endif
2515         /* Not UTF-8 mode */
2516           {
2517           for (i = min; i < max; i++)
2518             {
2519             if (eptr >= md->end_subject || fc == *eptr) break;
2520             eptr++;
2521             }
2522           if (possessive) continue;
2523           while (eptr >= pp)
2524             {
2525             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2526             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2527             eptr--;
2528             }
2529           }
2530 
2531         RRETURN(MATCH_NOMATCH);
2532         }
2533       }
2534     /* Control never gets here */
2535 
2536     /* Match a single character type repeatedly; several different opcodes
2537     share code. This is very similar to the code for single characters, but we
2538     repeat it in the interests of efficiency. */
2539 
2540     case OP_TYPEEXACT:
2541     min = max = GET2(ecode, 1);
2542     minimize = TRUE;
2543     ecode += 3;
2544     goto REPEATTYPE;
2545 
2546     case OP_TYPEUPTO:
2547     case OP_TYPEMINUPTO:
2548     min = 0;
2549     max = GET2(ecode, 1);
2550     minimize = *ecode == OP_TYPEMINUPTO;
2551     ecode += 3;
2552     goto REPEATTYPE;
2553 
2554     case OP_TYPEPOSSTAR:
2555     possessive = TRUE;
2556     min = 0;
2557     max = INT_MAX;
2558     ecode++;
2559     goto REPEATTYPE;
2560 
2561     case OP_TYPEPOSPLUS:
2562     possessive = TRUE;
2563     min = 1;
2564     max = INT_MAX;
2565     ecode++;
2566     goto REPEATTYPE;
2567 
2568     case OP_TYPEPOSQUERY:
2569     possessive = TRUE;
2570     min = 0;
2571     max = 1;
2572     ecode++;
2573     goto REPEATTYPE;
2574 
2575     case OP_TYPEPOSUPTO:
2576     possessive = TRUE;
2577     min = 0;
2578     max = GET2(ecode, 1);
2579     ecode += 3;
2580     goto REPEATTYPE;
2581 
2582     case OP_TYPESTAR:
2583     case OP_TYPEMINSTAR:
2584     case OP_TYPEPLUS:
2585     case OP_TYPEMINPLUS:
2586     case OP_TYPEQUERY:
2587     case OP_TYPEMINQUERY:
2588     c = *ecode++ - OP_TYPESTAR;
2589     minimize = (c & 1) != 0;
2590     min = rep_min[c];                 /* Pick up values from tables; */
2591     max = rep_max[c];                 /* zero for max => infinity */
2592     if (max == 0) max = INT_MAX;
2593 
2594     /* Common code for all repeated single character type matches. Note that
2595     in UTF-8 mode, '.' matches a character of any length, but for the other
2596     character types, the valid characters are all one-byte long. */
2597 
2598     REPEATTYPE:
2599     ctype = *ecode++;      /* Code for the character type */
2600 
2601 #ifdef SUPPORT_UCP
2602     if (ctype == OP_PROP || ctype == OP_NOTPROP)
2603       {
2604       prop_fail_result = ctype == OP_NOTPROP;
2605       prop_type = *ecode++;
2606       prop_value = *ecode++;
2607       }
2608     else prop_type = -1;
2609 #endif
2610 
2611     /* First, ensure the minimum number of matches are present. Use inline
2612     code for maximizing the speed, and do the type test once at the start
2613     (i.e. keep it out of the loop). Also we can test that there are at least
2614     the minimum number of bytes before we start. This isn't as effective in
2615     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
2616     is tidier. Also separate the UCP code, which can be the same for both UTF-8
2617     and single-bytes. */
2618 
2619     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2620     if (min > 0)
2621       {
2622 #ifdef SUPPORT_UCP
2623       if (prop_type >= 0)
2624         {
2625         switch(prop_type)
2626           {
2627           case PT_ANY:
2628           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2629           for (i = 1; i <= min; i++)
2630             {
2631             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2632             GETCHARINC(c, eptr);
2633             }
2634           break;
2635 
2636           case PT_LAMP:
2637           for (i = 1; i <= min; i++)
2638             {
2639             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2640             GETCHARINC(c, eptr);
2641             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2642             if ((prop_chartype == ucp_Lu ||
2643                  prop_chartype == ucp_Ll ||
2644                  prop_chartype == ucp_Lt) == prop_fail_result)
2645               RRETURN(MATCH_NOMATCH);
2646             }
2647           break;
2648 
2649           case PT_GC:
2650           for (i = 1; i <= min; i++)
2651             {
2652             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2653             GETCHARINC(c, eptr);
2654             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2655             if ((prop_category == prop_value) == prop_fail_result)
2656               RRETURN(MATCH_NOMATCH);
2657             }
2658           break;
2659 
2660           case PT_PC:
2661           for (i = 1; i <= min; i++)
2662             {
2663             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2664             GETCHARINC(c, eptr);
2665             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2666             if ((prop_chartype == prop_value) == prop_fail_result)
2667               RRETURN(MATCH_NOMATCH);
2668             }
2669           break;
2670 
2671           case PT_SC:
2672           for (i = 1; i <= min; i++)
2673             {
2674             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2675             GETCHARINC(c, eptr);
2676             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2677             if ((prop_script == prop_value) == prop_fail_result)
2678               RRETURN(MATCH_NOMATCH);
2679             }
2680           break;
2681 
2682           default:
2683           RRETURN(PCRE_ERROR_INTERNAL);
2684           }
2685         }
2686 
2687       /* Match extended Unicode sequences. We will get here only if the
2688       support is in the binary; otherwise a compile-time error occurs. */
2689 
2690       else if (ctype == OP_EXTUNI)
2691         {
2692         for (i = 1; i <= min; i++)
2693           {
2694           GETCHARINCTEST(c, eptr);
2695           prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2696           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2697           while (eptr < md->end_subject)
2698             {
2699             int len = 1;
2700             if (!utf8) c = *eptr; else
2701               {
2702               GETCHARLEN(c, eptr, len);
2703               }
2704             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2705             if (prop_category != ucp_M) break;
2706             eptr += len;
2707             }
2708           }
2709         }
2710 
2711       else
2712 #endif     /* SUPPORT_UCP */
2713 
2714 /* Handle all other cases when the coding is UTF-8 */
2715 
2716 #ifdef SUPPORT_UTF8
2717       if (utf8) switch(ctype)
2718         {
2719         case OP_ANY:
2720         for (i = 1; i <= min; i++)
2721           {
2722           if (eptr >= md->end_subject ||
2723                ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
2724             RRETURN(MATCH_NOMATCH);
2725           eptr++;
2726           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2727           }
2728         break;
2729 
2730         case OP_ANYBYTE:
2731         eptr += min;
2732         break;
2733 
2734         case OP_ANYNL:
2735         for (i = 1; i <= min; i++)
2736           {
2737           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2738           GETCHARINC(c, eptr);
2739           switch(c)
2740             {
2741             default: RRETURN(MATCH_NOMATCH);
2742             case 0x000d:
2743             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2744             break;
2745             case 0x000a:
2746             case 0x000b:
2747             case 0x000c:
2748             case 0x0085:
2749             case 0x2028:
2750             case 0x2029:
2751             break;
2752             }
2753           }
2754         break;
2755 
2756         case OP_NOT_DIGIT:
2757         for (i = 1; i <= min; i++)
2758           {
2759           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2760           GETCHARINC(c, eptr);
2761           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
2762             RRETURN(MATCH_NOMATCH);
2763           }
2764         break;
2765 
2766         case OP_DIGIT:
2767         for (i = 1; i <= min; i++)
2768           {
2769           if (eptr >= md->end_subject ||
2770              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
2771             RRETURN(MATCH_NOMATCH);
2772           /* No need to skip more bytes - we know it's a 1-byte character */
2773           }
2774         break;
2775 
2776         case OP_NOT_WHITESPACE:
2777         for (i = 1; i <= min; i++)
2778           {
2779           if (eptr >= md->end_subject ||
2780              (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))
2781             RRETURN(MATCH_NOMATCH);
2782           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2783           }
2784         break;
2785 
2786         case OP_WHITESPACE:
2787         for (i = 1; i <= min; i++)
2788           {
2789           if (eptr >= md->end_subject ||
2790              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
2791             RRETURN(MATCH_NOMATCH);
2792           /* No need to skip more bytes - we know it's a 1-byte character */
2793           }
2794         break;
2795 
2796         case OP_NOT_WORDCHAR:
2797         for (i = 1; i <= min; i++)
2798           {
2799           if (eptr >= md->end_subject ||
2800              (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))
2801             RRETURN(MATCH_NOMATCH);
2802           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2803           }
2804         break;
2805 
2806         case OP_WORDCHAR:
2807         for (i = 1; i <= min; i++)
2808           {
2809           if (eptr >= md->end_subject ||
2810              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
2811             RRETURN(MATCH_NOMATCH);
2812           /* No need to skip more bytes - we know it's a 1-byte character */
2813           }
2814         break;
2815 
2816         default:
2817         RRETURN(PCRE_ERROR_INTERNAL);
2818         }  /* End switch(ctype) */
2819 
2820       else
2821 #endif     /* SUPPORT_UTF8 */
2822 
2823       /* Code for the non-UTF-8 case for minimum matching of operators other
2824       than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
2825       number of bytes present, as this was tested above. */
2826 
2827       switch(ctype)
2828         {
2829         case OP_ANY:
2830         if ((ims & PCRE_DOTALL) == 0)
2831           {
2832           for (i = 1; i <= min; i++)
2833             {
2834             if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2835             eptr++;
2836             }
2837           }
2838         else eptr += min;
2839         break;
2840 
2841         case OP_ANYBYTE:
2842         eptr += min;
2843         break;
2844 
2845         /* Because of the CRLF case, we can't assume the minimum number of
2846         bytes are present in this case. */
2847 
2848         case OP_ANYNL:
2849         for (i = 1; i <= min; i++)
2850           {
2851           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2852           switch(*eptr++)
2853             {
2854             default: RRETURN(MATCH_NOMATCH);
2855             case 0x000d:
2856             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2857             break;
2858             case 0x000a:
2859             case 0x000b:
2860             case 0x000c:
2861             case 0x0085:
2862             break;
2863             }
2864           }
2865         break;
2866 
2867         case OP_NOT_DIGIT:
2868         for (i = 1; i <= min; i++)
2869           if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
2870         break;
2871 
2872         case OP_DIGIT:
2873         for (i = 1; i <= min; i++)
2874           if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
2875         break;
2876 
2877         case OP_NOT_WHITESPACE:
2878         for (i = 1; i <= min; i++)
2879           if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
2880         break;
2881 
2882         case OP_WHITESPACE:
2883         for (i = 1; i <= min; i++)
2884           if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
2885         break;
2886 
2887         case OP_NOT_WORDCHAR:
2888         for (i = 1; i <= min; i++)
2889           if ((md->ctypes[*eptr++] & ctype_word) != 0)
2890             RRETURN(MATCH_NOMATCH);
2891         break;
2892 
2893         case OP_WORDCHAR:
2894         for (i = 1; i <= min; i++)
2895           if ((md->ctypes[*eptr++] & ctype_word) == 0)
2896             RRETURN(MATCH_NOMATCH);
2897         break;
2898 
2899         default:
2900         RRETURN(PCRE_ERROR_INTERNAL);
2901         }
2902       }
2903 
2904     /* If min = max, continue at the same level without recursing */
2905 
2906     if (min == max) continue;
2907 
2908     /* If minimizing, we have to test the rest of the pattern before each
2909     subsequent match. Again, separate the UTF-8 case for speed, and also
2910     separate the UCP cases. */
2911 
2912     if (minimize)
2913       {
2914 #ifdef SUPPORT_UCP
2915       if (prop_type >= 0)
2916         {
2917         switch(prop_type)
2918           {
2919           case PT_ANY:
2920           for (fi = min;; fi++)
2921             {
2922             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2923             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2924             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2925             GETCHARINC(c, eptr);
2926             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2927             }
2928           /* Control never gets here */
2929 
2930           case PT_LAMP:
2931           for (fi = min;; fi++)
2932             {
2933             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2934             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2935             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2936             GETCHARINC(c, eptr);
2937             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2938             if ((prop_chartype == ucp_Lu ||
2939                  prop_chartype == ucp_Ll ||
2940                  prop_chartype == ucp_Lt) == prop_fail_result)
2941               RRETURN(MATCH_NOMATCH);
2942             }
2943           /* Control never gets here */
2944 
2945           case PT_GC:
2946           for (fi = min;; fi++)
2947             {
2948             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2949             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2950             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2951             GETCHARINC(c, eptr);
2952             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2953             if ((prop_category == prop_value) == prop_fail_result)
2954               RRETURN(MATCH_NOMATCH);
2955             }
2956           /* Control never gets here */
2957 
2958           case PT_PC:
2959           for (fi = min;; fi++)
2960             {
2961             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2962             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2963             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2964             GETCHARINC(c, eptr);
2965             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2966             if ((prop_chartype == prop_value) == prop_fail_result)
2967               RRETURN(MATCH_NOMATCH);
2968             }
2969           /* Control never gets here */
2970 
2971           case PT_SC:
2972           for (fi = min;; fi++)
2973             {
2974             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2975             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2976             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2977             GETCHARINC(c, eptr);
2978             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2979             if ((prop_script == prop_value) == prop_fail_result)
2980               RRETURN(MATCH_NOMATCH);
2981             }
2982           /* Control never gets here */
2983 
2984           default:
2985           RRETURN(PCRE_ERROR_INTERNAL);
2986           }
2987         }
2988 
2989       /* Match extended Unicode sequences. We will get here only if the
2990       support is in the binary; otherwise a compile-time error occurs. */
2991 
2992       else if (ctype == OP_EXTUNI)
2993         {
2994         for (fi = min;; fi++)
2995           {
2996           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2997           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2998           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2999           GETCHARINCTEST(c, eptr);
3000           prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3001           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3002           while (eptr < md->end_subject)
3003             {
3004             int len = 1;
3005             if (!utf8) c = *eptr; else
3006               {
3007               GETCHARLEN(c, eptr, len);
3008               }
3009             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3010             if (prop_category != ucp_M) break;
3011             eptr += len;
3012             }
3013           }
3014         }
3015 
3016       else
3017 #endif     /* SUPPORT_UCP */
3018 
3019 #ifdef SUPPORT_UTF8
3020       /* UTF-8 mode */
3021       if (utf8)
3022         {
3023         for (fi = min;; fi++)
3024           {
3025           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3026           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3027           if (fi >= max || eptr >= md->end_subject ||
3028                (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3029                 IS_NEWLINE(eptr)))
3030             RRETURN(MATCH_NOMATCH);
3031 
3032           GETCHARINC(c, eptr);
3033           switch(ctype)
3034             {
3035             case OP_ANY:        /* This is the DOTALL case */
3036             break;
3037 
3038             case OP_ANYBYTE:
3039             break;
3040 
3041             case OP_ANYNL:
3042             switch(c)
3043               {
3044               default: RRETURN(MATCH_NOMATCH);
3045               case 0x000d:
3046               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3047               break;
3048               case 0x000a:
3049               case 0x000b:
3050               case 0x000c:
3051               case 0x0085:
3052               case 0x2028:
3053               case 0x2029:
3054               break;
3055               }
3056             break;
3057 
3058             case OP_NOT_DIGIT:
3059             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3060               RRETURN(MATCH_NOMATCH);
3061             break;
3062 
3063             case OP_DIGIT:
3064             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
3065               RRETURN(MATCH_NOMATCH);
3066             break;
3067 
3068             case OP_NOT_WHITESPACE:
3069             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
3070               RRETURN(MATCH_NOMATCH);
3071             break;
3072 
3073             case OP_WHITESPACE:
3074             if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
3075               RRETURN(MATCH_NOMATCH);
3076             break;
3077 
3078             case OP_NOT_WORDCHAR:
3079             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
3080               RRETURN(MATCH_NOMATCH);
3081             break;
3082 
3083             case OP_WORDCHAR:
3084             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
3085               RRETURN(MATCH_NOMATCH);
3086             break;
3087 
3088             default:
3089             RRETURN(PCRE_ERROR_INTERNAL);
3090             }
3091           }
3092         }
3093       else
3094 #endif
3095       /* Not UTF-8 mode */
3096         {
3097         for (fi = min;; fi++)
3098           {
3099           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3100           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3101           if (fi >= max || eptr >= md->end_subject ||
3102                ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
3103             RRETURN(MATCH_NOMATCH);
3104 
3105           c = *eptr++;
3106           switch(ctype)
3107             {
3108             case OP_ANY:   /* This is the DOTALL case */
3109             break;
3110 
3111             case OP_ANYBYTE:
3112             break;
3113 
3114             case OP_ANYNL:
3115             switch(c)
3116               {
3117               default: RRETURN(MATCH_NOMATCH);
3118               case 0x000d:
3119               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3120               break;
3121               case 0x000a:
3122               case 0x000b:
3123               case 0x000c:
3124               case 0x0085:
3125               break;
3126               }
3127             break;
3128 
3129             case OP_NOT_DIGIT:
3130             if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3131             break;
3132 
3133             case OP_DIGIT:
3134             if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3135             break;
3136 
3137             case OP_NOT_WHITESPACE:
3138             if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3139             break;
3140 
3141             case OP_WHITESPACE:
3142             if  ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3143             break;
3144 
3145             case OP_NOT_WORDCHAR:
3146             if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
3147             break;
3148 
3149             case OP_WORDCHAR:
3150             if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
3151             break;
3152 
3153             default:
3154             RRETURN(PCRE_ERROR_INTERNAL);
3155             }
3156           }
3157         }
3158       /* Control never gets here */
3159       }
3160 
3161     /* If maximizing, it is worth using inline code for speed, doing the type
3162     test once at the start (i.e. keep it out of the loop). Again, keep the
3163     UTF-8 and UCP stuff separate. */
3164 
3165     else
3166       {
3167       pp = eptr;  /* Remember where we started */
3168 
3169 #ifdef SUPPORT_UCP
3170       if (prop_type >= 0)
3171         {
3172         switch(prop_type)
3173           {
3174           case PT_ANY:
3175           for (i = min; i < max; i++)
3176             {
3177             int len = 1;
3178             if (eptr >= md->end_subject) break;
3179             GETCHARLEN(c, eptr, len);
3180             if (prop_fail_result) break;
3181             eptr+= len;
3182             }
3183           break;
3184 
3185           case PT_LAMP:
3186           for (i = min; i < max; i++)
3187             {
3188             int len = 1;
3189             if (eptr >= md->end_subject) break;
3190             GETCHARLEN(c, eptr, len);
3191             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3192             if ((prop_chartype == ucp_Lu ||
3193                  prop_chartype == ucp_Ll ||
3194                  prop_chartype == ucp_Lt) == prop_fail_result)
3195               break;
3196             eptr+= len;
3197             }
3198           break;
3199 
3200           case PT_GC:
3201           for (i = min; i < max; i++)
3202             {
3203             int len = 1;
3204             if (eptr >= md->end_subject) break;
3205             GETCHARLEN(c, eptr, len);
3206             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3207             if ((prop_category == prop_value) == prop_fail_result)
3208               break;
3209             eptr+= len;
3210             }
3211           break;
3212 
3213           case PT_PC:
3214           for (i = min; i < max; i++)
3215             {
3216             int len = 1;
3217             if (eptr >= md->end_subject) break;
3218             GETCHARLEN(c, eptr, len);
3219             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3220             if ((prop_chartype == prop_value) == prop_fail_result)
3221               break;
3222             eptr+= len;
3223             }
3224           break;
3225 
3226           case PT_SC:
3227           for (i = min; i < max; i++)
3228             {
3229             int len = 1;
3230             if (eptr >= md->end_subject) break;
3231             GETCHARLEN(c, eptr, len);
3232             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3233             if ((prop_script == prop_value) == prop_fail_result)
3234               break;
3235             eptr+= len;
3236             }
3237           break;
3238           }
3239 
3240         /* eptr is now past the end of the maximum run */
3241 
3242         if (possessive) continue;
3243         for(;;)
3244           {
3245           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3246           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3247           if (eptr-- == pp) break;        /* Stop if tried at original pos */
3248           BACKCHAR(eptr);
3249           }
3250         }
3251 
3252       /* Match extended Unicode sequences. We will get here only if the
3253       support is in the binary; otherwise a compile-time error occurs. */
3254 
3255       else if (ctype == OP_EXTUNI)
3256         {
3257         for (i = min; i < max; i++)
3258           {
3259           if (eptr >= md->end_subject) break;
3260           GETCHARINCTEST(c, eptr);
3261           prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3262           if (prop_category == ucp_M) break;
3263           while (eptr < md->end_subject)
3264             {
3265             int len = 1;
3266             if (!utf8) c = *eptr; else
3267               {
3268               GETCHARLEN(c, eptr, len);
3269               }
3270             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3271             if (prop_category != ucp_M) break;
3272             eptr += len;
3273             }
3274           }
3275 
3276         /* eptr is now past the end of the maximum run */
3277 
3278         if (possessive) continue;
3279         for(;;)
3280           {
3281           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3282           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3283           if (eptr-- == pp) break;        /* Stop if tried at original pos */
3284           for (;;)                        /* Move back over one extended */
3285             {
3286             int len = 1;
3287             BACKCHAR(eptr);
3288             if (!utf8) c = *eptr; else
3289               {
3290               GETCHARLEN(c, eptr, len);
3291               }
3292             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3293             if (prop_category != ucp_M) break;
3294             eptr--;
3295             }
3296           }
3297         }
3298 
3299       else
3300 #endif   /* SUPPORT_UCP */
3301 
3302 #ifdef SUPPORT_UTF8
3303       /* UTF-8 mode */
3304 
3305       if (utf8)
3306         {
3307         switch(ctype)
3308           {
3309           case OP_ANY:
3310 
3311           /* Special code is required for UTF8, but when the maximum is
3312           unlimited we don't need it, so we repeat the non-UTF8 code. This is
3313           probably worth it, because .* is quite a common idiom. */
3314 
3315           if (max < INT_MAX)
3316             {
3317             if ((ims & PCRE_DOTALL) == 0)
3318               {
3319               for (i = min; i < max; i++)
3320                 {
3321                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3322                 eptr++;
3323                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3324                 }
3325               }
3326             else
3327               {
3328               for (i = min; i < max; i++)
3329                 {
3330                 if (eptr >= md->end_subject) break;
3331                 eptr++;
3332                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3333                 }
3334               }
3335             }
3336 
3337           /* Handle unlimited UTF-8 repeat */
3338 
3339           else
3340             {
3341             if ((ims & PCRE_DOTALL) == 0)
3342               {
3343               for (i = min; i < max; i++)
3344                 {
3345                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3346                 eptr++;
3347                 }
3348               break;
3349               }
3350             else
3351               {
3352               c = max - min;
3353               if (c > (unsigned int)(md->end_subject - eptr))
3354                 c = md->end_subject - eptr;
3355               eptr += c;
3356               }
3357             }
3358           break;
3359 
3360           /* The byte case is the same as non-UTF8 */
3361 
3362           case OP_ANYBYTE:
3363           c = max - min;
3364           if (c > (unsigned int)(md->end_subject - eptr))
3365             c = md->end_subject - eptr;
3366           eptr += c;
3367           break;
3368 
3369           case OP_ANYNL:
3370           for (i = min; i < max; i++)
3371             {
3372             int len = 1;
3373             if (eptr >= md->end_subject) break;
3374             GETCHARLEN(c, eptr, len);
3375             if (c == 0x000d)
3376               {
3377               if (++eptr >= md->end_subject) break;
3378               if (*eptr == 0x000a) eptr++;
3379               }
3380             else
3381               {
3382               if (c != 0x000a && c != 0x000b && c != 0x000c &&
3383                   c != 0x0085 && c != 0x2028 && c != 0x2029)
3384                 break;
3385               eptr += len;
3386               }
3387             }
3388           break;
3389 
3390           case OP_NOT_DIGIT:
3391           for (i = min; i < max; i++)
3392             {
3393             int len = 1;
3394             if (eptr >= md->end_subject) break;
3395             GETCHARLEN(c, eptr, len);
3396             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
3397             eptr+= len;
3398             }
3399           break;
3400 
3401           case OP_DIGIT:
3402           for (i = min; i < max; i++)
3403             {
3404             int len = 1;
3405             if (eptr >= md->end_subject) break;
3406             GETCHARLEN(c, eptr, len);
3407             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
3408             eptr+= len;
3409             }
3410           break;
3411 
3412           case OP_NOT_WHITESPACE:
3413           for (i = min; i < max; i++)
3414             {
3415             int len = 1;
3416             if (eptr >= md->end_subject) break;
3417             GETCHARLEN(c, eptr, len);
3418             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
3419             eptr+= len;
3420             }
3421           break;
3422 
3423           case OP_WHITESPACE:
3424           for (i = min; i < max; i++)
3425             {
3426             int len = 1;
3427             if (eptr >= md->end_subject) break;
3428             GETCHARLEN(c, eptr, len);
3429             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
3430             eptr+= len;
3431             }
3432           break;
3433 
3434           case OP_NOT_WORDCHAR:
3435           for (i = min; i < max; i++)
3436             {
3437             int len = 1;
3438             if (eptr >= md->end_subject) break;
3439             GETCHARLEN(c, eptr, len);
3440             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
3441             eptr+= len;
3442             }
3443           break;
3444 
3445           case OP_WORDCHAR:
3446           for (i = min; i < max; i++)
3447             {
3448             int len = 1;
3449             if (eptr >= md->end_subject) break;
3450             GETCHARLEN(c, eptr, len);
3451             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
3452             eptr+= len;
3453             }
3454           break;
3455 
3456           default:
3457           RRETURN(PCRE_ERROR_INTERNAL);
3458           }
3459 
3460         /* eptr is now past the end of the maximum run */
3461 
3462         if (possessive) continue;
3463         for(;;)
3464           {
3465           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3466           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3467           if (eptr-- == pp) break;        /* Stop if tried at original pos */
3468           BACKCHAR(eptr);
3469           }
3470         }
3471       else
3472 #endif
3473 
3474       /* Not UTF-8 mode */
3475         {
3476         switch(ctype)
3477           {
3478           case OP_ANY:
3479           if ((ims & PCRE_DOTALL) == 0)
3480             {
3481             for (i = min; i < max; i++)
3482               {
3483               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3484               eptr++;
3485               }
3486             break;
3487             }
3488           /* For DOTALL case, fall through and treat as \C */
3489 
3490           case OP_ANYBYTE:
3491           c = max - min;
3492           if (c > (unsigned int)(md->end_subject - eptr))
3493             c = md->end_subject - eptr;
3494           eptr += c;
3495           break;
3496 
3497           case OP_ANYNL:
3498           for (i = min; i < max; i++)
3499             {
3500             if (eptr >= md->end_subject) break;
3501             c = *eptr;
3502             if (c == 0x000d)
3503               {
3504               if (++eptr >= md->end_subject) break;
3505               if (*eptr == 0x000a) eptr++;
3506               }
3507             else
3508               {
3509               if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
3510                 break;
3511               eptr++;
3512               }
3513             }
3514           break;
3515 
3516           case OP_NOT_DIGIT:
3517           for (i = min; i < max; i++)
3518             {
3519             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
3520               break;
3521             eptr++;
3522             }
3523           break;
3524 
3525           case OP_DIGIT:
3526           for (i = min; i < max; i++)
3527             {
3528             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
3529               break;
3530             eptr++;
3531             }
3532           break;
3533 
3534           case OP_NOT_WHITESPACE:
3535           for (i = min; i < max; i++)
3536             {
3537             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
3538               break;
3539             eptr++;
3540             }
3541           break;
3542 
3543           case OP_WHITESPACE:
3544           for (i = min; i < max; i++)
3545             {
3546             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
3547               break;
3548             eptr++;
3549             }
3550           break;
3551 
3552           case OP_NOT_WORDCHAR:
3553           for (i = min; i < max; i++)
3554             {
3555             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
3556               break;
3557             eptr++;
3558             }
3559           break;
3560 
3561           case OP_WORDCHAR:
3562           for (i = min; i < max; i++)
3563             {
3564             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
3565               break;
3566             eptr++;
3567             }
3568           break;
3569 
3570           default:
3571           RRETURN(PCRE_ERROR_INTERNAL);
3572           }
3573 
3574         /* eptr is now past the end of the maximum run */
3575 
3576         if (possessive) continue;
3577         while (eptr >= pp)
3578           {
3579           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3580           eptr--;
3581           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3582           }
3583         }
3584 
3585       /* Get here if we can't make it match with any permitted repetitions */
3586 
3587       RRETURN(MATCH_NOMATCH);
3588       }
3589     /* Control never gets here */
3590 
3591     /* There's been some horrible disaster. Arrival here can only mean there is
3592     something seriously wrong in the code above or the OP_xxx definitions. */
3593 
3594     default:
3595     DPRINTF(("Unknown opcode %d\n", *ecode));
3596     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
3597     }
3598 
3599   /* Do not stick any code in here without much thought; it is assumed
3600   that "continue" in the code above comes out to here to repeat the main
3601   loop. */
3602 
3603   }             /* End of main loop */
3604 /* Control never reaches here */
3605 }
3606 
3607 
3608 /***************************************************************************
3609 ****************************************************************************
3610                    RECURSION IN THE match() FUNCTION
3611 
3612 Undefine all the macros that were defined above to handle this. */
3613 
3614 #ifdef NO_RECURSE
3615 #undef eptr
3616 #undef ecode
3617 #undef offset_top
3618 #undef ims
3619 #undef eptrb
3620 #undef flags
3621 
3622 #undef callpat
3623 #undef charptr
3624 #undef data
3625 #undef next
3626 #undef pp
3627 #undef prev
3628 #undef saved_eptr
3629 
3630 #undef new_recursive
3631 
3632 #undef cur_is_word
3633 #undef condition
3634 #undef prev_is_word
3635 
3636 #undef original_ims
3637 
3638 #undef ctype
3639 #undef length
3640 #undef max
3641 #undef min
3642 #undef number
3643 #undef offset
3644 #undef op
3645 #undef save_capture_last
3646 #undef save_offset1
3647 #undef save_offset2
3648 #undef save_offset3
3649 #undef stacksave
3650 
3651 #undef newptrb
3652 
3653 #endif
3654 
3655 /* These two are defined as macros in both cases */
3656 
3657 #undef fc
3658 #undef fi
3659 
3660 /***************************************************************************
3661 ***************************************************************************/
3662 
3663 
3664 
3665 /*************************************************
3666 *         Execute a Regular Expression           *
3667 *************************************************/
3668 
3669 /* This function applies a compiled re to a subject string and picks out
3670 portions of the string if it matches. Two elements in the vector are set for
3671 each substring: the offsets to the start and end of the substring.
3672 
3673 Arguments:
3674   argument_re     points to the compiled expression
3675   extra_data      points to extra data or is NULL
3676   subject         points to the subject string
3677   length          length of subject string (may contain binary zeros)
3678   start_offset    where to start in the subject string
3679   options         option bits
3680   offsets         points to a vector of ints to be filled in with offsets
3681   offsetcount     the number of elements in the vector
3682 
3683 Returns:          > 0 => success; value is the number of elements filled in
3684                   = 0 => success, but offsets is not big enough
3685                    -1 => failed to match
3686                  < -1 => some kind of unexpected problem
3687 */
3688 
3689 PCRE_DATA_SCOPE int
pcre_exec(const pcre * argument_re,const pcre_extra * extra_data,PCRE_SPTR subject,int length,int start_offset,int options,int * offsets,int offsetcount)3690 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
3691   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
3692   int offsetcount)
3693 {
3694 int rc, resetcount, ocount;
3695 int first_byte = -1;
3696 int req_byte = -1;
3697 int req_byte2 = -1;
3698 int newline;
3699 unsigned long int ims;
3700 BOOL using_temporary_offsets = FALSE;
3701 BOOL anchored;
3702 BOOL startline;
3703 BOOL firstline;
3704 BOOL first_byte_caseless = FALSE;
3705 BOOL req_byte_caseless = FALSE;
3706 BOOL utf8;
3707 match_data match_block;
3708 match_data *md = &match_block;
3709 const uschar *tables;
3710 const uschar *start_bits = NULL;
3711 USPTR start_match = (USPTR)subject + start_offset;
3712 USPTR end_subject;
3713 USPTR req_byte_ptr = start_match - 1;
3714 eptrblock eptrchain[EPTR_WORK_SIZE];
3715 
3716 pcre_study_data internal_study;
3717 const pcre_study_data *study;
3718 
3719 real_pcre internal_re;
3720 const real_pcre *external_re = (const real_pcre *)argument_re;
3721 const real_pcre *re = external_re;
3722 
3723 /* Plausibility checks */
3724 
3725 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
3726 if (re == NULL || subject == NULL ||
3727    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3728 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
3729 
3730 /* Fish out the optional data from the extra_data structure, first setting
3731 the default values. */
3732 
3733 study = NULL;
3734 md->match_limit = MATCH_LIMIT;
3735 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
3736 md->callout_data = NULL;
3737 
3738 /* The table pointer is always in native byte order. */
3739 
3740 tables = external_re->tables;
3741 
3742 if (extra_data != NULL)
3743   {
3744   register unsigned int flags = extra_data->flags;
3745   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
3746     study = (const pcre_study_data *)extra_data->study_data;
3747   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
3748     md->match_limit = extra_data->match_limit;
3749   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
3750     md->match_limit_recursion = extra_data->match_limit_recursion;
3751   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
3752     md->callout_data = extra_data->callout_data;
3753   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
3754   }
3755 
3756 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
3757 is a feature that makes it possible to save compiled regex and re-use them
3758 in other programs later. */
3759 
3760 if (tables == NULL) tables = _pcre_default_tables;
3761 
3762 /* Check that the first field in the block is the magic number. If it is not,
3763 test for a regex that was compiled on a host of opposite endianness. If this is
3764 the case, flipped values are put in internal_re and internal_study if there was
3765 study data too. */
3766 
3767 if (re->magic_number != MAGIC_NUMBER)
3768   {
3769   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
3770   if (re == NULL) return PCRE_ERROR_BADMAGIC;
3771   if (study != NULL) study = &internal_study;
3772   }
3773 
3774 /* Set up other data */
3775 
3776 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
3777 startline = (re->options & PCRE_STARTLINE) != 0;
3778 firstline = (re->options & PCRE_FIRSTLINE) != 0;
3779 
3780 /* The code starts after the real_pcre block and the capture name table. */
3781 
3782 md->start_code = (const uschar *)external_re + re->name_table_offset +
3783   re->name_count * re->name_entry_size;
3784 
3785 md->start_subject = (USPTR)subject;
3786 md->start_offset = start_offset;
3787 md->end_subject = md->start_subject + length;
3788 end_subject = md->end_subject;
3789 
3790 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
3791 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
3792 
3793 md->notbol = (options & PCRE_NOTBOL) != 0;
3794 md->noteol = (options & PCRE_NOTEOL) != 0;
3795 md->notempty = (options & PCRE_NOTEMPTY) != 0;
3796 md->partial = (options & PCRE_PARTIAL) != 0;
3797 md->hitend = FALSE;
3798 
3799 md->recursive = NULL;                   /* No recursion at top level */
3800 md->eptrchain = eptrchain;              /* Make workspace generally available */
3801 
3802 md->lcc = tables + lcc_offset;
3803 md->ctypes = tables + ctypes_offset;
3804 
3805 /* Handle different types of newline. The two bits give four cases. If nothing
3806 is set at run time, whatever was used at compile time applies. */
3807 
3808 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &
3809        PCRE_NEWLINE_BITS)
3810   {
3811   case 0: newline = NEWLINE; break;   /* Compile-time default */
3812   case PCRE_NEWLINE_CR: newline = '\r'; break;
3813   case PCRE_NEWLINE_LF: newline = '\n'; break;
3814   case PCRE_NEWLINE_CR+
3815        PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
3816   case PCRE_NEWLINE_ANY: newline = -1; break;
3817   default: return PCRE_ERROR_BADNEWLINE;
3818   }
3819 
3820 if (newline < 0)
3821   {
3822   md->nltype = NLTYPE_ANY;
3823   }
3824 else
3825   {
3826   md->nltype = NLTYPE_FIXED;
3827   if (newline > 255)
3828     {
3829     md->nllen = 2;
3830     md->nl[0] = (newline >> 8) & 255;
3831     md->nl[1] = newline & 255;
3832     }
3833   else
3834     {
3835     md->nllen = 1;
3836     md->nl[0] = newline;
3837     }
3838   }
3839 
3840 /* Partial matching is supported only for a restricted set of regexes at the
3841 moment. */
3842 
3843 if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
3844   return PCRE_ERROR_BADPARTIAL;
3845 
3846 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3847 back the character offset. */
3848 
3849 #ifdef SUPPORT_UTF8
3850 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
3851   {
3852   if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
3853     return PCRE_ERROR_BADUTF8;
3854   if (start_offset > 0 && start_offset < length)
3855     {
3856     int tb = ((uschar *)subject)[start_offset];
3857     if (tb > 127)
3858       {
3859       tb &= 0xc0;
3860       if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
3861       }
3862     }
3863   }
3864 #endif
3865 
3866 /* The ims options can vary during the matching as a result of the presence
3867 of (?ims) items in the pattern. They are kept in a local variable so that
3868 restoring at the exit of a group is easy. */
3869 
3870 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
3871 
3872 /* If the expression has got more back references than the offsets supplied can
3873 hold, we get a temporary chunk of working store to use during the matching.
3874 Otherwise, we can use the vector supplied, rounding down its size to a multiple
3875 of 3. */
3876 
3877 ocount = offsetcount - (offsetcount % 3);
3878 
3879 if (re->top_backref > 0 && re->top_backref >= ocount/3)
3880   {
3881   ocount = re->top_backref * 3 + 3;
3882   md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3883   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3884   using_temporary_offsets = TRUE;
3885   DPRINTF(("Got memory to hold back references\n"));
3886   }
3887 else md->offset_vector = offsets;
3888 
3889 md->offset_end = ocount;
3890 md->offset_max = (2*ocount)/3;
3891 md->offset_overflow = FALSE;
3892 md->capture_last = -1;
3893 
3894 /* Compute the minimum number of offsets that we need to reset each time. Doing
3895 this makes a huge difference to execution time when there aren't many brackets
3896 in the pattern. */
3897 
3898 resetcount = 2 + re->top_bracket * 2;
3899 if (resetcount > offsetcount) resetcount = ocount;
3900 
3901 /* Reset the working variable associated with each extraction. These should
3902 never be used unless previously set, but they get saved and restored, and so we
3903 initialize them to avoid reading uninitialized locations. */
3904 
3905 if (md->offset_vector != NULL)
3906   {
3907   register int *iptr = md->offset_vector + ocount;
3908   register int *iend = iptr - resetcount/2 + 1;
3909   while (--iptr >= iend) *iptr = -1;
3910   }
3911 
3912 /* Set up the first character to match, if available. The first_byte value is
3913 never set for an anchored regular expression, but the anchoring may be forced
3914 at run time, so we have to test for anchoring. The first char may be unset for
3915 an unanchored pattern, of course. If there's no first char and the pattern was
3916 studied, there may be a bitmap of possible first characters. */
3917 
3918 if (!anchored)
3919   {
3920   if ((re->options & PCRE_FIRSTSET) != 0)
3921     {
3922     first_byte = re->first_byte & 255;
3923     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
3924       first_byte = md->lcc[first_byte];
3925     }
3926   else
3927     if (!startline && study != NULL &&
3928       (study->options & PCRE_STUDY_MAPPED) != 0)
3929         start_bits = study->start_bits;
3930   }
3931 
3932 /* For anchored or unanchored matches, there may be a "last known required
3933 character" set. */
3934 
3935 if ((re->options & PCRE_REQCHSET) != 0)
3936   {
3937   req_byte = re->req_byte & 255;
3938   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
3939   req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
3940   }
3941 
3942 
3943 /* ==========================================================================*/
3944 
3945 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
3946 the loop runs just once. */
3947 
3948 for(;;)
3949   {
3950   USPTR save_end_subject = end_subject;
3951 
3952   /* Reset the maximum number of extractions we might see. */
3953 
3954   if (md->offset_vector != NULL)
3955     {
3956     register int *iptr = md->offset_vector;
3957     register int *iend = iptr + resetcount;
3958     while (iptr < iend) *iptr++ = -1;
3959     }
3960 
3961   /* Advance to a unique first char if possible. If firstline is TRUE, the
3962   start of the match is constrained to the first line of a multiline string.
3963   That is, the match must be before or at the first newline. Implement this by
3964   temporarily adjusting end_subject so that we stop scanning at a newline. If
3965   the match fails at the newline, later code breaks this loop. */
3966 
3967   if (firstline)
3968     {
3969     USPTR t = start_match;
3970     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
3971     end_subject = t;
3972     }
3973 
3974   /* Now test for a unique first byte */
3975 
3976   if (first_byte >= 0)
3977     {
3978     if (first_byte_caseless)
3979       while (start_match < end_subject &&
3980              md->lcc[*start_match] != first_byte)
3981         start_match++;
3982     else
3983       while (start_match < end_subject && *start_match != first_byte)
3984         start_match++;
3985     }
3986 
3987   /* Or to just after a linebreak for a multiline match if possible */
3988 
3989   else if (startline)
3990     {
3991     if (start_match > md->start_subject + start_offset)
3992       {
3993       while (start_match <= end_subject && !WAS_NEWLINE(start_match))
3994         start_match++;
3995       }
3996     }
3997 
3998   /* Or to a non-unique first char after study */
3999 
4000   else if (start_bits != NULL)
4001     {
4002     while (start_match < end_subject)
4003       {
4004       register unsigned int c = *start_match;
4005       if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
4006       }
4007     }
4008 
4009   /* Restore fudged end_subject */
4010 
4011   end_subject = save_end_subject;
4012 
4013 #ifdef DEBUG  /* Sigh. Some compilers never learn. */
4014   printf(">>>> Match against: ");
4015   pchars(start_match, end_subject - start_match, TRUE, md);
4016   printf("\n");
4017 #endif
4018 
4019   /* If req_byte is set, we know that that character must appear in the subject
4020   for the match to succeed. If the first character is set, req_byte must be
4021   later in the subject; otherwise the test starts at the match point. This
4022   optimization can save a huge amount of backtracking in patterns with nested
4023   unlimited repeats that aren't going to match. Writing separate code for
4024   cased/caseless versions makes it go faster, as does using an autoincrement
4025   and backing off on a match.
4026 
4027   HOWEVER: when the subject string is very, very long, searching to its end can
4028   take a long time, and give bad performance on quite ordinary patterns. This
4029   showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4030   string... so we don't do this when the string is sufficiently long.
4031 
4032   ALSO: this processing is disabled when partial matching is requested.
4033   */
4034 
4035   if (req_byte >= 0 &&
4036       end_subject - start_match < REQ_BYTE_MAX &&
4037       !md->partial)
4038     {
4039     register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4040 
4041     /* We don't need to repeat the search if we haven't yet reached the
4042     place we found it at last time. */
4043 
4044     if (p > req_byte_ptr)
4045       {
4046       if (req_byte_caseless)
4047         {
4048         while (p < end_subject)
4049           {
4050           register int pp = *p++;
4051           if (pp == req_byte || pp == req_byte2) { p--; break; }
4052           }
4053         }
4054       else
4055         {
4056         while (p < end_subject)
4057           {
4058           if (*p++ == req_byte) { p--; break; }
4059           }
4060         }
4061 
4062       /* If we can't find the required character, break the matching loop,
4063       forcing a match failure. */
4064 
4065       if (p >= end_subject)
4066         {
4067         rc = MATCH_NOMATCH;
4068         break;
4069         }
4070 
4071       /* If we have found the required character, save the point where we
4072       found it, so that we don't search again next time round the loop if
4073       the start hasn't passed this character yet. */
4074 
4075       req_byte_ptr = p;
4076       }
4077     }
4078 
4079   /* OK, we can now run the match. */
4080 
4081   md->start_match = start_match;
4082   md->match_call_count = 0;
4083   md->eptrn = 0;                          /* Next free eptrchain slot */
4084   rc = match(start_match, md->start_code, 2, md, ims, NULL, 0, 0);
4085 
4086   /* Any return other than MATCH_NOMATCH breaks the loop. */
4087 
4088   if (rc != MATCH_NOMATCH) break;
4089 
4090   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4091   newline in the subject (though it may continue over the newline). Therefore,
4092   if we have just failed to match, starting at a newline, do not continue. */
4093 
4094   if (firstline && IS_NEWLINE(start_match)) break;
4095 
4096   /* Advance the match position by one character. */
4097 
4098   start_match++;
4099 #ifdef SUPPORT_UTF8
4100   if (utf8)
4101     while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4102       start_match++;
4103 #endif
4104 
4105   /* Break the loop if the pattern is anchored or if we have passed the end of
4106   the subject. */
4107 
4108   if (anchored || start_match > end_subject) break;
4109 
4110   /* If we have just passed a CR and the newline option is CRLF or ANY, and we
4111   are now at a LF, advance the match position by one more character. */
4112 
4113   if (start_match[-1] == '\r' &&
4114        (md->nltype == NLTYPE_ANY || md->nllen == 2) &&
4115        start_match < end_subject &&
4116        *start_match == '\n')
4117     start_match++;
4118 
4119   }   /* End of for(;;) "bumpalong" loop */
4120 
4121 /* ==========================================================================*/
4122 
4123 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4124 conditions is true:
4125 
4126 (1) The pattern is anchored;
4127 
4128 (2) We are past the end of the subject;
4129 
4130 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4131     this option requests that a match occur at or before the first newline in
4132     the subject.
4133 
4134 When we have a match and the offset vector is big enough to deal with any
4135 backreferences, captured substring offsets will already be set up. In the case
4136 where we had to get some local store to hold offsets for backreference
4137 processing, copy those that we can. In this case there need not be overflow if
4138 certain parts of the pattern were not used, even though there are more
4139 capturing parentheses than vector slots. */
4140 
4141 if (rc == MATCH_MATCH)
4142   {
4143   if (using_temporary_offsets)
4144     {
4145     if (offsetcount >= 4)
4146       {
4147       memcpy(offsets + 2, md->offset_vector + 2,
4148         (offsetcount - 2) * sizeof(int));
4149       DPRINTF(("Copied offsets from temporary memory\n"));
4150       }
4151     if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
4152     DPRINTF(("Freeing temporary memory\n"));
4153     (pcre_free)(md->offset_vector);
4154     }
4155 
4156   /* Set the return code to the number of captured strings, or 0 if there are
4157   too many to fit into the vector. */
4158 
4159   rc = md->offset_overflow? 0 : md->end_offset_top/2;
4160 
4161   /* If there is space, set up the whole thing as substring 0. */
4162 
4163   if (offsetcount < 2) rc = 0; else
4164     {
4165     offsets[0] = start_match - md->start_subject;
4166     offsets[1] = md->end_match_ptr - md->start_subject;
4167     }
4168 
4169   DPRINTF((">>>> returning %d\n", rc));
4170   return rc;
4171   }
4172 
4173 /* Control gets here if there has been an error, or if the overall match
4174 attempt has failed at all permitted starting positions. */
4175 
4176 if (using_temporary_offsets)
4177   {
4178   DPRINTF(("Freeing temporary memory\n"));
4179   (pcre_free)(md->offset_vector);
4180   }
4181 
4182 if (rc != MATCH_NOMATCH)
4183   {
4184   DPRINTF((">>>> error: returning %d\n", rc));
4185   return rc;
4186   }
4187 else if (md->partial && md->hitend)
4188   {
4189   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4190   return PCRE_ERROR_PARTIAL;
4191   }
4192 else
4193   {
4194   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
4195   return PCRE_ERROR_NOMATCH;
4196   }
4197 }
4198 
4199 /* End of pcre_exec.c */
4200