1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10          New API code Copyright (c) 2014 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 /* This module contains functions that scan a compiled pattern and change
42 repeats into possessive repeats where possible. */
43 
44 
45 #ifdef HAVE_CONFIG_H
46 #include "config.h"
47 #endif
48 
49 
50 #include "pcre2_internal.h"
51 
52 
53 /*************************************************
54 *        Tables for auto-possessification        *
55 *************************************************/
56 
57 /* This table is used to check whether auto-possessification is possible
58 between adjacent character-type opcodes. The left-hand (repeated) opcode is
59 used to select the row, and the right-hand opcode is use to select the column.
60 A value of 1 means that auto-possessification is OK. For example, the second
61 value in the first row means that \D+\d can be turned into \D++\d.
62 
63 The Unicode property types (\P and \p) have to be present to fill out the table
64 because of what their opcode values are, but the table values should always be
65 zero because property types are handled separately in the code. The last four
66 columns apply to items that cannot be repeated, so there is no need to have
67 rows for them. Note that OP_DIGIT etc. are generated only when PCRE_UCP is
68 *not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
69 
70 #define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1)
71 #define APTCOLS (LAST_AUTOTAB_RIGHT_OP - FIRST_AUTOTAB_OP + 1)
72 
73 static const uint8_t autoposstab[APTROWS][APTCOLS] = {
74 /* \D \d \S \s \W \w  . .+ \C \P \p \R \H \h \V \v \X \Z \z  $ $M */
75   { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \D */
76   { 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \d */
77   { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \S */
78   { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \s */
79   { 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \W */
80   { 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 },  /* \w */
81   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* .  */
82   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* .+ */
83   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 },  /* \C */
84   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },  /* \P */
85   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },  /* \p */
86   { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 },  /* \R */
87   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 },  /* \H */
88   { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0 },  /* \h */
89   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0 },  /* \V */
90   { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0 },  /* \v */
91   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }   /* \X */
92 };
93 
94 /* This table is used to check whether auto-possessification is possible
95 between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP). The
96 left-hand (repeated) opcode is used to select the row, and the right-hand
97 opcode is used to select the column. The values are as follows:
98 
99   0   Always return FALSE (never auto-possessify)
100   1   Character groups are distinct (possessify if both are OP_PROP)
101   2   Check character categories in the same group (general or particular)
102   3   TRUE if the two opcodes are not the same (PROP vs NOTPROP)
103 
104   4   Check left general category vs right particular category
105   5   Check right general category vs left particular category
106 
107   6   Left alphanum vs right general category
108   7   Left space vs right general category
109   8   Left word vs right general category
110 
111   9   Right alphanum vs left general category
112  10   Right space vs left general category
113  11   Right word vs left general category
114 
115  12   Left alphanum vs right particular category
116  13   Left space vs right particular category
117  14   Left word vs right particular category
118 
119  15   Right alphanum vs left particular category
120  16   Right space vs left particular category
121  17   Right word vs left particular category
122 */
123 
124 static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = {
125 /* ANY LAMP GC  PC  SC ALNUM SPACE PXSPACE WORD CLIST UCNC */
126   { 0,  0,  0,  0,  0,    0,    0,      0,   0,    0,   0 },  /* PT_ANY */
127   { 0,  3,  0,  0,  0,    3,    1,      1,   0,    0,   0 },  /* PT_LAMP */
128   { 0,  0,  2,  4,  0,    9,   10,     10,  11,    0,   0 },  /* PT_GC */
129   { 0,  0,  5,  2,  0,   15,   16,     16,  17,    0,   0 },  /* PT_PC */
130   { 0,  0,  0,  0,  2,    0,    0,      0,   0,    0,   0 },  /* PT_SC */
131   { 0,  3,  6, 12,  0,    3,    1,      1,   0,    0,   0 },  /* PT_ALNUM */
132   { 0,  1,  7, 13,  0,    1,    3,      3,   1,    0,   0 },  /* PT_SPACE */
133   { 0,  1,  7, 13,  0,    1,    3,      3,   1,    0,   0 },  /* PT_PXSPACE */
134   { 0,  0,  8, 14,  0,    0,    1,      1,   3,    0,   0 },  /* PT_WORD */
135   { 0,  0,  0,  0,  0,    0,    0,      0,   0,    0,   0 },  /* PT_CLIST */
136   { 0,  0,  0,  0,  0,    0,    0,      0,   0,    0,   3 }   /* PT_UCNC */
137 };
138 
139 /* This table is used to check whether auto-possessification is possible
140 between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP) when one
141 specifies a general category and the other specifies a particular category. The
142 row is selected by the general category and the column by the particular
143 category. The value is 1 if the particular category is not part of the general
144 category. */
145 
146 static const uint8_t catposstab[7][30] = {
147 /* Cc Cf Cn Co Cs Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc Pd Pe Pf Pi Po Ps Sc Sk Sm So Zl Zp Zs */
148   { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* C */
149   { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* L */
150   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* M */
151   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },  /* N */
152   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 },  /* P */
153   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1 },  /* S */
154   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 }   /* Z */
155 };
156 
157 /* This table is used when checking ALNUM, (PX)SPACE, SPACE, and WORD against
158 a general or particular category. The properties in each row are those
159 that apply to the character set in question. Duplication means that a little
160 unnecessary work is done when checking, but this keeps things much simpler
161 because they can all use the same code. For more details see the comment where
162 this table is used.
163 
164 Note: SPACE and PXSPACE used to be different because Perl excluded VT from
165 "space", but from Perl 5.18 it's included, so both categories are treated the
166 same here. */
167 
168 static const uint8_t posspropstab[3][4] = {
169   { ucp_L, ucp_N, ucp_N, ucp_Nl },  /* ALNUM, 3rd and 4th values redundant */
170   { ucp_Z, ucp_Z, ucp_C, ucp_Cc },  /* SPACE and PXSPACE, 2nd value redundant */
171   { ucp_L, ucp_N, ucp_P, ucp_Po }   /* WORD */
172 };
173 
174 /* This table is used when converting repeating opcodes into possessified
175 versions as a result of an explicit possessive quantifier such as ++. A zero
176 value means there is no possessified version - in those cases the item in
177 question must be wrapped in ONCE brackets. The table is truncated at OP_CALLOUT
178 because all relevant opcodes are less than that. */
179 
180 static const uint8_t opcode_possessify[] = {
181   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   /* 0 - 15  */
182   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   /* 16 - 31 */
183 
184   0,                       /* NOTI */
185   OP_POSSTAR, 0,           /* STAR, MINSTAR */
186   OP_POSPLUS, 0,           /* PLUS, MINPLUS */
187   OP_POSQUERY, 0,          /* QUERY, MINQUERY */
188   OP_POSUPTO, 0,           /* UPTO, MINUPTO */
189   0,                       /* EXACT */
190   0, 0, 0, 0,              /* POS{STAR,PLUS,QUERY,UPTO} */
191 
192   OP_POSSTARI, 0,          /* STARI, MINSTARI */
193   OP_POSPLUSI, 0,          /* PLUSI, MINPLUSI */
194   OP_POSQUERYI, 0,         /* QUERYI, MINQUERYI */
195   OP_POSUPTOI, 0,          /* UPTOI, MINUPTOI */
196   0,                       /* EXACTI */
197   0, 0, 0, 0,              /* POS{STARI,PLUSI,QUERYI,UPTOI} */
198 
199   OP_NOTPOSSTAR, 0,        /* NOTSTAR, NOTMINSTAR */
200   OP_NOTPOSPLUS, 0,        /* NOTPLUS, NOTMINPLUS */
201   OP_NOTPOSQUERY, 0,       /* NOTQUERY, NOTMINQUERY */
202   OP_NOTPOSUPTO, 0,        /* NOTUPTO, NOTMINUPTO */
203   0,                       /* NOTEXACT */
204   0, 0, 0, 0,              /* NOTPOS{STAR,PLUS,QUERY,UPTO} */
205 
206   OP_NOTPOSSTARI, 0,       /* NOTSTARI, NOTMINSTARI */
207   OP_NOTPOSPLUSI, 0,       /* NOTPLUSI, NOTMINPLUSI */
208   OP_NOTPOSQUERYI, 0,      /* NOTQUERYI, NOTMINQUERYI */
209   OP_NOTPOSUPTOI, 0,       /* NOTUPTOI, NOTMINUPTOI */
210   0,                       /* NOTEXACTI */
211   0, 0, 0, 0,              /* NOTPOS{STARI,PLUSI,QUERYI,UPTOI} */
212 
213   OP_TYPEPOSSTAR, 0,       /* TYPESTAR, TYPEMINSTAR */
214   OP_TYPEPOSPLUS, 0,       /* TYPEPLUS, TYPEMINPLUS */
215   OP_TYPEPOSQUERY, 0,      /* TYPEQUERY, TYPEMINQUERY */
216   OP_TYPEPOSUPTO, 0,       /* TYPEUPTO, TYPEMINUPTO */
217   0,                       /* TYPEEXACT */
218   0, 0, 0, 0,              /* TYPEPOS{STAR,PLUS,QUERY,UPTO} */
219 
220   OP_CRPOSSTAR, 0,         /* CRSTAR, CRMINSTAR */
221   OP_CRPOSPLUS, 0,         /* CRPLUS, CRMINPLUS */
222   OP_CRPOSQUERY, 0,        /* CRQUERY, CRMINQUERY */
223   OP_CRPOSRANGE, 0,        /* CRRANGE, CRMINRANGE */
224   0, 0, 0, 0,              /* CRPOS{STAR,PLUS,QUERY,RANGE} */
225 
226   0, 0, 0,                 /* CLASS, NCLASS, XCLASS */
227   0, 0,                    /* REF, REFI */
228   0, 0,                    /* DNREF, DNREFI */
229   0, 0                     /* RECURSE, CALLOUT */
230 };
231 
232 
233 
234 #ifdef SUPPORT_UNICODE
235 /*************************************************
236 *        Check a character and a property        *
237 *************************************************/
238 
239 /* This function is called by compare_opcodes() when a property item is
240 adjacent to a fixed character.
241 
242 Arguments:
243   c            the character
244   ptype        the property type
245   pdata        the data for the type
246   negated      TRUE if it's a negated property (\P or \p{^)
247 
248 Returns:       TRUE if auto-possessifying is OK
249 */
250 
251 static BOOL
check_char_prop(uint32_t c,unsigned int ptype,unsigned int pdata,BOOL negated)252 check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata,
253   BOOL negated)
254 {
255 const uint32_t *p;
256 const ucd_record *prop = GET_UCD(c);
257 
258 switch(ptype)
259   {
260   case PT_LAMP:
261   return (prop->chartype == ucp_Lu ||
262           prop->chartype == ucp_Ll ||
263           prop->chartype == ucp_Lt) == negated;
264 
265   case PT_GC:
266   return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
267 
268   case PT_PC:
269   return (pdata == prop->chartype) == negated;
270 
271   case PT_SC:
272   return (pdata == prop->script) == negated;
273 
274   /* These are specials */
275 
276   case PT_ALNUM:
277   return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
278           PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
279 
280   /* Perl space used to exclude VT, but from Perl 5.18 it is included, which
281   means that Perl space and POSIX space are now identical. PCRE was changed
282   at release 8.34. */
283 
284   case PT_SPACE:    /* Perl space */
285   case PT_PXSPACE:  /* POSIX space */
286   switch(c)
287     {
288     HSPACE_CASES:
289     VSPACE_CASES:
290     return negated;
291 
292     default:
293     return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
294     }
295   break;  /* Control never reaches here */
296 
297   case PT_WORD:
298   return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
299           PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
300           c == CHAR_UNDERSCORE) == negated;
301 
302   case PT_CLIST:
303   p = PRIV(ucd_caseless_sets) + prop->caseset;
304   for (;;)
305     {
306     if (c < *p) return !negated;
307     if (c == *p++) return negated;
308     }
309   break;  /* Control never reaches here */
310   }
311 
312 return FALSE;
313 }
314 #endif  /* SUPPORT_UNICODE */
315 
316 
317 
318 /*************************************************
319 *        Base opcode of repeated opcodes         *
320 *************************************************/
321 
322 /* Returns the base opcode for repeated single character type opcodes. If the
323 opcode is not a repeated character type, it returns with the original value.
324 
325 Arguments:  c opcode
326 Returns:    base opcode for the type
327 */
328 
329 static PCRE2_UCHAR
get_repeat_base(PCRE2_UCHAR c)330 get_repeat_base(PCRE2_UCHAR c)
331 {
332 return (c > OP_TYPEPOSUPTO)? c :
333        (c >= OP_TYPESTAR)?   OP_TYPESTAR :
334        (c >= OP_NOTSTARI)?   OP_NOTSTARI :
335        (c >= OP_NOTSTAR)?    OP_NOTSTAR :
336        (c >= OP_STARI)?      OP_STARI :
337                              OP_STAR;
338 }
339 
340 
341 /*************************************************
342 *        Fill the character property list        *
343 *************************************************/
344 
345 /* Checks whether the code points to an opcode that can take part in auto-
346 possessification, and if so, fills a list with its properties.
347 
348 Arguments:
349   code        points to start of expression
350   utf         TRUE if in UTF mode
351   fcc         points to the case-flipping table
352   list        points to output list
353               list[0] will be filled with the opcode
354               list[1] will be non-zero if this opcode
355                 can match an empty character string
356               list[2..7] depends on the opcode
357 
358 Returns:      points to the start of the next opcode if *code is accepted
359               NULL if *code is not accepted
360 */
361 
362 static PCRE2_SPTR
get_chr_property_list(PCRE2_SPTR code,BOOL utf,const uint8_t * fcc,uint32_t * list)363 get_chr_property_list(PCRE2_SPTR code, BOOL utf, const uint8_t *fcc,
364   uint32_t *list)
365 {
366 PCRE2_UCHAR c = *code;
367 PCRE2_UCHAR base;
368 PCRE2_SPTR end;
369 uint32_t chr;
370 
371 #ifdef SUPPORT_UNICODE
372 uint32_t *clist_dest;
373 const uint32_t *clist_src;
374 #else
375 (void)utf;    /* Suppress "unused parameter" compiler warning */
376 #endif
377 
378 list[0] = c;
379 list[1] = FALSE;
380 code++;
381 
382 if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
383   {
384   base = get_repeat_base(c);
385   c -= (base - OP_STAR);
386 
387   if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
388     code += IMM2_SIZE;
389 
390   list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
391              c != OP_POSPLUS);
392 
393   switch(base)
394     {
395     case OP_STAR:
396     list[0] = OP_CHAR;
397     break;
398 
399     case OP_STARI:
400     list[0] = OP_CHARI;
401     break;
402 
403     case OP_NOTSTAR:
404     list[0] = OP_NOT;
405     break;
406 
407     case OP_NOTSTARI:
408     list[0] = OP_NOTI;
409     break;
410 
411     case OP_TYPESTAR:
412     list[0] = *code;
413     code++;
414     break;
415     }
416   c = list[0];
417   }
418 
419 switch(c)
420   {
421   case OP_NOT_DIGIT:
422   case OP_DIGIT:
423   case OP_NOT_WHITESPACE:
424   case OP_WHITESPACE:
425   case OP_NOT_WORDCHAR:
426   case OP_WORDCHAR:
427   case OP_ANY:
428   case OP_ALLANY:
429   case OP_ANYNL:
430   case OP_NOT_HSPACE:
431   case OP_HSPACE:
432   case OP_NOT_VSPACE:
433   case OP_VSPACE:
434   case OP_EXTUNI:
435   case OP_EODN:
436   case OP_EOD:
437   case OP_DOLL:
438   case OP_DOLLM:
439   return code;
440 
441   case OP_CHAR:
442   case OP_NOT:
443   GETCHARINCTEST(chr, code);
444   list[2] = chr;
445   list[3] = NOTACHAR;
446   return code;
447 
448   case OP_CHARI:
449   case OP_NOTI:
450   list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT;
451   GETCHARINCTEST(chr, code);
452   list[2] = chr;
453 
454 #ifdef SUPPORT_UNICODE
455   if (chr < 128 || (chr < 256 && !utf))
456     list[3] = fcc[chr];
457   else
458     list[3] = UCD_OTHERCASE(chr);
459 #elif defined SUPPORT_WIDE_CHARS
460   list[3] = (chr < 256) ? fcc[chr] : chr;
461 #else
462   list[3] = fcc[chr];
463 #endif
464 
465   /* The othercase might be the same value. */
466 
467   if (chr == list[3])
468     list[3] = NOTACHAR;
469   else
470     list[4] = NOTACHAR;
471   return code;
472 
473 #ifdef SUPPORT_UNICODE
474   case OP_PROP:
475   case OP_NOTPROP:
476   if (code[0] != PT_CLIST)
477     {
478     list[2] = code[0];
479     list[3] = code[1];
480     return code + 2;
481     }
482 
483   /* Convert only if we have enough space. */
484 
485   clist_src = PRIV(ucd_caseless_sets) + code[1];
486   clist_dest = list + 2;
487   code += 2;
488 
489   do {
490      if (clist_dest >= list + 8)
491        {
492        /* Early return if there is not enough space. This should never
493        happen, since all clists are shorter than 5 character now. */
494        list[2] = code[0];
495        list[3] = code[1];
496        return code;
497        }
498      *clist_dest++ = *clist_src;
499      }
500   while(*clist_src++ != NOTACHAR);
501 
502   /* All characters are stored. The terminating NOTACHAR is copied from the
503   clist itself. */
504 
505   list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT;
506   return code;
507 #endif
508 
509   case OP_NCLASS:
510   case OP_CLASS:
511 #ifdef SUPPORT_WIDE_CHARS
512   case OP_XCLASS:
513   if (c == OP_XCLASS)
514     end = code + GET(code, 0) - 1;
515   else
516 #endif
517     end = code + 32 / sizeof(PCRE2_UCHAR);
518 
519   switch(*end)
520     {
521     case OP_CRSTAR:
522     case OP_CRMINSTAR:
523     case OP_CRQUERY:
524     case OP_CRMINQUERY:
525     case OP_CRPOSSTAR:
526     case OP_CRPOSQUERY:
527     list[1] = TRUE;
528     end++;
529     break;
530 
531     case OP_CRPLUS:
532     case OP_CRMINPLUS:
533     case OP_CRPOSPLUS:
534     end++;
535     break;
536 
537     case OP_CRRANGE:
538     case OP_CRMINRANGE:
539     case OP_CRPOSRANGE:
540     list[1] = (GET2(end, 1) == 0);
541     end += 1 + 2 * IMM2_SIZE;
542     break;
543     }
544   list[2] = (uint32_t)(end - code);
545   return end;
546   }
547 return NULL;    /* Opcode not accepted */
548 }
549 
550 
551 
552 /*************************************************
553 *    Scan further character sets for match       *
554 *************************************************/
555 
556 /* Checks whether the base and the current opcode have a common character, in
557 which case the base cannot be possessified.
558 
559 Arguments:
560   code        points to the byte code
561   utf         TRUE in UTF mode
562   cb          compile data block
563   base_list   the data list of the base opcode
564 
565 Returns:      TRUE if the auto-possessification is possible
566 */
567 
568 static BOOL
compare_opcodes(PCRE2_SPTR code,BOOL utf,const compile_block * cb,const uint32_t * base_list,PCRE2_SPTR base_end)569 compare_opcodes(PCRE2_SPTR code, BOOL utf, const compile_block *cb,
570   const uint32_t *base_list, PCRE2_SPTR base_end)
571 {
572 PCRE2_UCHAR c;
573 uint32_t list[8];
574 const uint32_t *chr_ptr;
575 const uint32_t *ochr_ptr;
576 const uint32_t *list_ptr;
577 PCRE2_SPTR next_code;
578 #ifdef SUPPORT_WIDE_CHARS
579 PCRE2_SPTR xclass_flags;
580 #endif
581 const uint8_t *class_bitset;
582 const uint8_t *set1, *set2, *set_end;
583 uint32_t chr;
584 BOOL accepted, invert_bits;
585 BOOL entered_a_group = FALSE;
586 
587 /* Note: the base_list[1] contains whether the current opcode has a greedy
588 (represented by a non-zero value) quantifier. This is a different from
589 other character type lists, which store here that the character iterator
590 matches to an empty string (also represented by a non-zero value). */
591 
592 for(;;)
593   {
594   /* All operations move the code pointer forward.
595   Therefore infinite recursions are not possible. */
596 
597   c = *code;
598 
599   /* Skip over callouts */
600 
601   if (c == OP_CALLOUT)
602     {
603     code += PRIV(OP_lengths)[c];
604     continue;
605     }
606 
607   if (c == OP_ALT)
608     {
609     do code += GET(code, 1); while (*code == OP_ALT);
610     c = *code;
611     }
612 
613   switch(c)
614     {
615     case OP_END:
616     case OP_KETRPOS:
617     /* TRUE only in greedy case. The non-greedy case could be replaced by
618     an OP_EXACT, but it is probably not worth it. (And note that OP_EXACT
619     uses more memory, which we cannot get at this stage.) */
620 
621     return base_list[1] != 0;
622 
623     case OP_KET:
624     /* If the bracket is capturing, and referenced by an OP_RECURSE, or
625     it is an atomic sub-pattern (assert, once, etc.) the non-greedy case
626     cannot be converted to a possessive form. */
627 
628     if (base_list[1] == 0) return FALSE;
629 
630     switch(*(code - GET(code, 1)))
631       {
632       case OP_ASSERT:
633       case OP_ASSERT_NOT:
634       case OP_ASSERTBACK:
635       case OP_ASSERTBACK_NOT:
636       case OP_ONCE:
637       case OP_ONCE_NC:
638       /* Atomic sub-patterns and assertions can always auto-possessify their
639       last iterator. However, if the group was entered as a result of checking
640       a previous iterator, this is not possible. */
641 
642       return !entered_a_group;
643       }
644 
645     code += PRIV(OP_lengths)[c];
646     continue;
647 
648     case OP_ONCE:
649     case OP_ONCE_NC:
650     case OP_BRA:
651     case OP_CBRA:
652     next_code = code + GET(code, 1);
653     code += PRIV(OP_lengths)[c];
654 
655     while (*next_code == OP_ALT)
656       {
657       if (!compare_opcodes(code, utf, cb, base_list, base_end)) return FALSE;
658       code = next_code + 1 + LINK_SIZE;
659       next_code += GET(next_code, 1);
660       }
661 
662     entered_a_group = TRUE;
663     continue;
664 
665     case OP_BRAZERO:
666     case OP_BRAMINZERO:
667 
668     next_code = code + 1;
669     if (*next_code != OP_BRA && *next_code != OP_CBRA
670         && *next_code != OP_ONCE && *next_code != OP_ONCE_NC) return FALSE;
671 
672     do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
673 
674     /* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
675 
676     next_code += 1 + LINK_SIZE;
677     if (!compare_opcodes(next_code, utf, cb, base_list, base_end))
678       return FALSE;
679 
680     code += PRIV(OP_lengths)[c];
681     continue;
682 
683     default:
684     break;
685     }
686 
687   /* Check for a supported opcode, and load its properties. */
688 
689   code = get_chr_property_list(code, utf, cb->fcc, list);
690   if (code == NULL) return FALSE;    /* Unsupported */
691 
692   /* If either opcode is a small character list, set pointers for comparing
693   characters from that list with another list, or with a property. */
694 
695   if (base_list[0] == OP_CHAR)
696     {
697     chr_ptr = base_list + 2;
698     list_ptr = list;
699     }
700   else if (list[0] == OP_CHAR)
701     {
702     chr_ptr = list + 2;
703     list_ptr = base_list;
704     }
705 
706   /* Character bitsets can also be compared to certain opcodes. */
707 
708   else if (base_list[0] == OP_CLASS || list[0] == OP_CLASS
709 #if PCRE2_CODE_UNIT_WIDTH == 8
710       /* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */
711       || (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS))
712 #endif
713       )
714     {
715 #if PCRE2_CODE_UNIT_WIDTH == 8
716     if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS))
717 #else
718     if (base_list[0] == OP_CLASS)
719 #endif
720       {
721       set1 = (uint8_t *)(base_end - base_list[2]);
722       list_ptr = list;
723       }
724     else
725       {
726       set1 = (uint8_t *)(code - list[2]);
727       list_ptr = base_list;
728       }
729 
730     invert_bits = FALSE;
731     switch(list_ptr[0])
732       {
733       case OP_CLASS:
734       case OP_NCLASS:
735       set2 = (uint8_t *)
736         ((list_ptr == list ? code : base_end) - list_ptr[2]);
737       break;
738 
739 #ifdef SUPPORT_WIDE_CHARS
740       case OP_XCLASS:
741       xclass_flags = (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE;
742       if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE;
743       if ((*xclass_flags & XCL_MAP) == 0)
744         {
745         /* No bits are set for characters < 256. */
746         if (list[1] == 0) return TRUE;
747         /* Might be an empty repeat. */
748         continue;
749         }
750       set2 = (uint8_t *)(xclass_flags + 1);
751       break;
752 #endif
753 
754       case OP_NOT_DIGIT:
755       invert_bits = TRUE;
756       /* Fall through */
757       case OP_DIGIT:
758       set2 = (uint8_t *)(cb->cbits + cbit_digit);
759       break;
760 
761       case OP_NOT_WHITESPACE:
762       invert_bits = TRUE;
763       /* Fall through */
764       case OP_WHITESPACE:
765       set2 = (uint8_t *)(cb->cbits + cbit_space);
766       break;
767 
768       case OP_NOT_WORDCHAR:
769       invert_bits = TRUE;
770       /* Fall through */
771       case OP_WORDCHAR:
772       set2 = (uint8_t *)(cb->cbits + cbit_word);
773       break;
774 
775       default:
776       return FALSE;
777       }
778 
779     /* Because the bit sets are unaligned bytes, we need to perform byte
780     comparison here. */
781 
782     set_end = set1 + 32;
783     if (invert_bits)
784       {
785       do
786         {
787         if ((*set1++ & ~(*set2++)) != 0) return FALSE;
788         }
789       while (set1 < set_end);
790       }
791     else
792       {
793       do
794         {
795         if ((*set1++ & *set2++) != 0) return FALSE;
796         }
797       while (set1 < set_end);
798       }
799 
800     if (list[1] == 0) return TRUE;
801     /* Might be an empty repeat. */
802     continue;
803     }
804 
805   /* Some property combinations also acceptable. Unicode property opcodes are
806   processed specially; the rest can be handled with a lookup table. */
807 
808   else
809     {
810     uint32_t leftop, rightop;
811 
812     leftop = base_list[0];
813     rightop = list[0];
814 
815 #ifdef SUPPORT_UNICODE
816     accepted = FALSE; /* Always set in non-unicode case. */
817     if (leftop == OP_PROP || leftop == OP_NOTPROP)
818       {
819       if (rightop == OP_EOD)
820         accepted = TRUE;
821       else if (rightop == OP_PROP || rightop == OP_NOTPROP)
822         {
823         int n;
824         const uint8_t *p;
825         BOOL same = leftop == rightop;
826         BOOL lisprop = leftop == OP_PROP;
827         BOOL risprop = rightop == OP_PROP;
828         BOOL bothprop = lisprop && risprop;
829 
830         /* There's a table that specifies how each combination is to be
831         processed:
832           0   Always return FALSE (never auto-possessify)
833           1   Character groups are distinct (possessify if both are OP_PROP)
834           2   Check character categories in the same group (general or particular)
835           3   Return TRUE if the two opcodes are not the same
836           ... see comments below
837         */
838 
839         n = propposstab[base_list[2]][list[2]];
840         switch(n)
841           {
842           case 0: break;
843           case 1: accepted = bothprop; break;
844           case 2: accepted = (base_list[3] == list[3]) != same; break;
845           case 3: accepted = !same; break;
846 
847           case 4:  /* Left general category, right particular category */
848           accepted = risprop && catposstab[base_list[3]][list[3]] == same;
849           break;
850 
851           case 5:  /* Right general category, left particular category */
852           accepted = lisprop && catposstab[list[3]][base_list[3]] == same;
853           break;
854 
855           /* This code is logically tricky. Think hard before fiddling with it.
856           The posspropstab table has four entries per row. Each row relates to
857           one of PCRE's special properties such as ALNUM or SPACE or WORD.
858           Only WORD actually needs all four entries, but using repeats for the
859           others means they can all use the same code below.
860 
861           The first two entries in each row are Unicode general categories, and
862           apply always, because all the characters they include are part of the
863           PCRE character set. The third and fourth entries are a general and a
864           particular category, respectively, that include one or more relevant
865           characters. One or the other is used, depending on whether the check
866           is for a general or a particular category. However, in both cases the
867           category contains more characters than the specials that are defined
868           for the property being tested against. Therefore, it cannot be used
869           in a NOTPROP case.
870 
871           Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po.
872           Underscore is covered by ucp_P or ucp_Po. */
873 
874           case 6:  /* Left alphanum vs right general category */
875           case 7:  /* Left space vs right general category */
876           case 8:  /* Left word vs right general category */
877           p = posspropstab[n-6];
878           accepted = risprop && lisprop ==
879             (list[3] != p[0] &&
880              list[3] != p[1] &&
881             (list[3] != p[2] || !lisprop));
882           break;
883 
884           case 9:   /* Right alphanum vs left general category */
885           case 10:  /* Right space vs left general category */
886           case 11:  /* Right word vs left general category */
887           p = posspropstab[n-9];
888           accepted = lisprop && risprop ==
889             (base_list[3] != p[0] &&
890              base_list[3] != p[1] &&
891             (base_list[3] != p[2] || !risprop));
892           break;
893 
894           case 12:  /* Left alphanum vs right particular category */
895           case 13:  /* Left space vs right particular category */
896           case 14:  /* Left word vs right particular category */
897           p = posspropstab[n-12];
898           accepted = risprop && lisprop ==
899             (catposstab[p[0]][list[3]] &&
900              catposstab[p[1]][list[3]] &&
901             (list[3] != p[3] || !lisprop));
902           break;
903 
904           case 15:  /* Right alphanum vs left particular category */
905           case 16:  /* Right space vs left particular category */
906           case 17:  /* Right word vs left particular category */
907           p = posspropstab[n-15];
908           accepted = lisprop && risprop ==
909             (catposstab[p[0]][base_list[3]] &&
910              catposstab[p[1]][base_list[3]] &&
911             (base_list[3] != p[3] || !risprop));
912           break;
913           }
914         }
915       }
916 
917     else
918 #endif  /* SUPPORT_UNICODE */
919 
920     accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
921            rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
922            autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
923 
924     if (!accepted) return FALSE;
925 
926     if (list[1] == 0) return TRUE;
927     /* Might be an empty repeat. */
928     continue;
929     }
930 
931   /* Control reaches here only if one of the items is a small character list.
932   All characters are checked against the other side. */
933 
934   do
935     {
936     chr = *chr_ptr;
937 
938     switch(list_ptr[0])
939       {
940       case OP_CHAR:
941       ochr_ptr = list_ptr + 2;
942       do
943         {
944         if (chr == *ochr_ptr) return FALSE;
945         ochr_ptr++;
946         }
947       while(*ochr_ptr != NOTACHAR);
948       break;
949 
950       case OP_NOT:
951       ochr_ptr = list_ptr + 2;
952       do
953         {
954         if (chr == *ochr_ptr)
955           break;
956         ochr_ptr++;
957         }
958       while(*ochr_ptr != NOTACHAR);
959       if (*ochr_ptr == NOTACHAR) return FALSE;   /* Not found */
960       break;
961 
962       /* Note that OP_DIGIT etc. are generated only when PCRE2_UCP is *not*
963       set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
964 
965       case OP_DIGIT:
966       if (chr < 256 && (cb->ctypes[chr] & ctype_digit) != 0) return FALSE;
967       break;
968 
969       case OP_NOT_DIGIT:
970       if (chr > 255 || (cb->ctypes[chr] & ctype_digit) == 0) return FALSE;
971       break;
972 
973       case OP_WHITESPACE:
974       if (chr < 256 && (cb->ctypes[chr] & ctype_space) != 0) return FALSE;
975       break;
976 
977       case OP_NOT_WHITESPACE:
978       if (chr > 255 || (cb->ctypes[chr] & ctype_space) == 0) return FALSE;
979       break;
980 
981       case OP_WORDCHAR:
982       if (chr < 255 && (cb->ctypes[chr] & ctype_word) != 0) return FALSE;
983       break;
984 
985       case OP_NOT_WORDCHAR:
986       if (chr > 255 || (cb->ctypes[chr] & ctype_word) == 0) return FALSE;
987       break;
988 
989       case OP_HSPACE:
990       switch(chr)
991         {
992         HSPACE_CASES: return FALSE;
993         default: break;
994         }
995       break;
996 
997       case OP_NOT_HSPACE:
998       switch(chr)
999         {
1000         HSPACE_CASES: break;
1001         default: return FALSE;
1002         }
1003       break;
1004 
1005       case OP_ANYNL:
1006       case OP_VSPACE:
1007       switch(chr)
1008         {
1009         VSPACE_CASES: return FALSE;
1010         default: break;
1011         }
1012       break;
1013 
1014       case OP_NOT_VSPACE:
1015       switch(chr)
1016         {
1017         VSPACE_CASES: break;
1018         default: return FALSE;
1019         }
1020       break;
1021 
1022       case OP_DOLL:
1023       case OP_EODN:
1024       switch (chr)
1025         {
1026         case CHAR_CR:
1027         case CHAR_LF:
1028         case CHAR_VT:
1029         case CHAR_FF:
1030         case CHAR_NEL:
1031 #ifndef EBCDIC
1032         case 0x2028:
1033         case 0x2029:
1034 #endif  /* Not EBCDIC */
1035         return FALSE;
1036         }
1037       break;
1038 
1039       case OP_EOD:    /* Can always possessify before \z */
1040       break;
1041 
1042 #ifdef SUPPORT_UNICODE
1043       case OP_PROP:
1044       case OP_NOTPROP:
1045       if (!check_char_prop(chr, list_ptr[2], list_ptr[3],
1046             list_ptr[0] == OP_NOTPROP))
1047         return FALSE;
1048       break;
1049 #endif
1050 
1051       case OP_NCLASS:
1052       if (chr > 255) return FALSE;
1053       /* Fall through */
1054 
1055       case OP_CLASS:
1056       if (chr > 255) break;
1057       class_bitset = (uint8_t *)
1058         ((list_ptr == list ? code : base_end) - list_ptr[2]);
1059       if ((class_bitset[chr >> 3] & (1 << (chr & 7))) != 0) return FALSE;
1060       break;
1061 
1062 #ifdef SUPPORT_WIDE_CHARS
1063       case OP_XCLASS:
1064       if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) -
1065           list_ptr[2] + LINK_SIZE, utf)) return FALSE;
1066       break;
1067 #endif
1068 
1069       default:
1070       return FALSE;
1071       }
1072 
1073     chr_ptr++;
1074     }
1075   while(*chr_ptr != NOTACHAR);
1076 
1077   /* At least one character must be matched from this opcode. */
1078 
1079   if (list[1] == 0) return TRUE;
1080   }
1081 
1082 /* Control never reaches here. There used to be a fail-save return FALSE; here,
1083 but some compilers complain about an unreachable statement. */
1084 }
1085 
1086 
1087 
1088 /*************************************************
1089 *    Scan compiled regex for auto-possession     *
1090 *************************************************/
1091 
1092 /* Replaces single character iterations with their possessive alternatives
1093 if appropriate. This function modifies the compiled opcode!
1094 
1095 Arguments:
1096   code        points to start of the byte code
1097   utf         TRUE in UTF mode
1098   cb          compile data block
1099 
1100 Returns:      nothing
1101 */
1102 
1103 void
PRIV(auto_possessify)1104 PRIV(auto_possessify)(PCRE2_UCHAR *code, BOOL utf, const compile_block *cb)
1105 {
1106 register PCRE2_UCHAR c;
1107 PCRE2_SPTR end;
1108 PCRE2_UCHAR *repeat_opcode;
1109 uint32_t list[8];
1110 
1111 for (;;)
1112   {
1113   c = *code;
1114 
1115   if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
1116     {
1117     c -= get_repeat_base(c) - OP_STAR;
1118     end = (c <= OP_MINUPTO) ?
1119       get_chr_property_list(code, utf, cb->fcc, list) : NULL;
1120     list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
1121 
1122     if (end != NULL && compare_opcodes(end, utf, cb, list, end))
1123       {
1124       switch(c)
1125         {
1126         case OP_STAR:
1127         *code += OP_POSSTAR - OP_STAR;
1128         break;
1129 
1130         case OP_MINSTAR:
1131         *code += OP_POSSTAR - OP_MINSTAR;
1132         break;
1133 
1134         case OP_PLUS:
1135         *code += OP_POSPLUS - OP_PLUS;
1136         break;
1137 
1138         case OP_MINPLUS:
1139         *code += OP_POSPLUS - OP_MINPLUS;
1140         break;
1141 
1142         case OP_QUERY:
1143         *code += OP_POSQUERY - OP_QUERY;
1144         break;
1145 
1146         case OP_MINQUERY:
1147         *code += OP_POSQUERY - OP_MINQUERY;
1148         break;
1149 
1150         case OP_UPTO:
1151         *code += OP_POSUPTO - OP_UPTO;
1152         break;
1153 
1154         case OP_MINUPTO:
1155         *code += OP_POSUPTO - OP_MINUPTO;
1156         break;
1157         }
1158       }
1159     c = *code;
1160     }
1161   else if (c == OP_CLASS || c == OP_NCLASS || c == OP_XCLASS)
1162     {
1163 #ifdef SUPPORT_WIDE_CHARS
1164     if (c == OP_XCLASS)
1165       repeat_opcode = code + GET(code, 1);
1166     else
1167 #endif
1168       repeat_opcode = code + 1 + (32 / sizeof(PCRE2_UCHAR));
1169 
1170     c = *repeat_opcode;
1171     if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
1172       {
1173       /* end must not be NULL. */
1174       end = get_chr_property_list(code, utf, cb->fcc, list);
1175 
1176       list[1] = (c & 1) == 0;
1177 
1178       if (compare_opcodes(end, utf, cb, list, end))
1179         {
1180         switch (c)
1181           {
1182           case OP_CRSTAR:
1183           case OP_CRMINSTAR:
1184           *repeat_opcode = OP_CRPOSSTAR;
1185           break;
1186 
1187           case OP_CRPLUS:
1188           case OP_CRMINPLUS:
1189           *repeat_opcode = OP_CRPOSPLUS;
1190           break;
1191 
1192           case OP_CRQUERY:
1193           case OP_CRMINQUERY:
1194           *repeat_opcode = OP_CRPOSQUERY;
1195           break;
1196 
1197           case OP_CRRANGE:
1198           case OP_CRMINRANGE:
1199           *repeat_opcode = OP_CRPOSRANGE;
1200           break;
1201           }
1202         }
1203       }
1204     c = *code;
1205     }
1206 
1207   switch(c)
1208     {
1209     case OP_END:
1210     return;
1211 
1212     case OP_TYPESTAR:
1213     case OP_TYPEMINSTAR:
1214     case OP_TYPEPLUS:
1215     case OP_TYPEMINPLUS:
1216     case OP_TYPEQUERY:
1217     case OP_TYPEMINQUERY:
1218     case OP_TYPEPOSSTAR:
1219     case OP_TYPEPOSPLUS:
1220     case OP_TYPEPOSQUERY:
1221     if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
1222     break;
1223 
1224     case OP_TYPEUPTO:
1225     case OP_TYPEMINUPTO:
1226     case OP_TYPEEXACT:
1227     case OP_TYPEPOSUPTO:
1228     if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
1229       code += 2;
1230     break;
1231 
1232 #ifdef SUPPORT_WIDE_CHARS
1233     case OP_XCLASS:
1234     code += GET(code, 1);
1235     break;
1236 #endif
1237 
1238     case OP_MARK:
1239     case OP_PRUNE_ARG:
1240     case OP_SKIP_ARG:
1241     case OP_THEN_ARG:
1242     code += code[1];
1243     break;
1244     }
1245 
1246   /* Add in the fixed length from the table */
1247 
1248   code += PRIV(OP_lengths)[c];
1249 
1250   /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
1251   followed by a multi-byte character. The length in the table is a minimum, so
1252   we have to arrange to skip the extra code units. */
1253 
1254 #ifdef MAYBE_UTF_MULTI
1255   if (utf) switch(c)
1256     {
1257     case OP_CHAR:
1258     case OP_CHARI:
1259     case OP_NOT:
1260     case OP_NOTI:
1261     case OP_STAR:
1262     case OP_MINSTAR:
1263     case OP_PLUS:
1264     case OP_MINPLUS:
1265     case OP_QUERY:
1266     case OP_MINQUERY:
1267     case OP_UPTO:
1268     case OP_MINUPTO:
1269     case OP_EXACT:
1270     case OP_POSSTAR:
1271     case OP_POSPLUS:
1272     case OP_POSQUERY:
1273     case OP_POSUPTO:
1274     case OP_STARI:
1275     case OP_MINSTARI:
1276     case OP_PLUSI:
1277     case OP_MINPLUSI:
1278     case OP_QUERYI:
1279     case OP_MINQUERYI:
1280     case OP_UPTOI:
1281     case OP_MINUPTOI:
1282     case OP_EXACTI:
1283     case OP_POSSTARI:
1284     case OP_POSPLUSI:
1285     case OP_POSQUERYI:
1286     case OP_POSUPTOI:
1287     case OP_NOTSTAR:
1288     case OP_NOTMINSTAR:
1289     case OP_NOTPLUS:
1290     case OP_NOTMINPLUS:
1291     case OP_NOTQUERY:
1292     case OP_NOTMINQUERY:
1293     case OP_NOTUPTO:
1294     case OP_NOTMINUPTO:
1295     case OP_NOTEXACT:
1296     case OP_NOTPOSSTAR:
1297     case OP_NOTPOSPLUS:
1298     case OP_NOTPOSQUERY:
1299     case OP_NOTPOSUPTO:
1300     case OP_NOTSTARI:
1301     case OP_NOTMINSTARI:
1302     case OP_NOTPLUSI:
1303     case OP_NOTMINPLUSI:
1304     case OP_NOTQUERYI:
1305     case OP_NOTMINQUERYI:
1306     case OP_NOTUPTOI:
1307     case OP_NOTMINUPTOI:
1308     case OP_NOTEXACTI:
1309     case OP_NOTPOSSTARI:
1310     case OP_NOTPOSPLUSI:
1311     case OP_NOTPOSQUERYI:
1312     case OP_NOTPOSUPTOI:
1313     if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
1314     break;
1315     }
1316 #else
1317   (void)(utf);  /* Keep compiler happy by referencing function argument */
1318 #endif  /* SUPPORT_WIDE_CHARS */
1319   }
1320 }
1321 
1322 /* End of pcre2_auto_possess.c */
1323