1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2014 University of Cambridge
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41 /* This module contains functions that scan a compiled pattern and change
42 repeats into possessive repeats where possible. */
43
44
45 #ifdef HAVE_CONFIG_H
46 #include "config.h"
47 #endif
48
49
50 #include "pcre2_internal.h"
51
52
53 /*************************************************
54 * Tables for auto-possessification *
55 *************************************************/
56
57 /* This table is used to check whether auto-possessification is possible
58 between adjacent character-type opcodes. The left-hand (repeated) opcode is
59 used to select the row, and the right-hand opcode is use to select the column.
60 A value of 1 means that auto-possessification is OK. For example, the second
61 value in the first row means that \D+\d can be turned into \D++\d.
62
63 The Unicode property types (\P and \p) have to be present to fill out the table
64 because of what their opcode values are, but the table values should always be
65 zero because property types are handled separately in the code. The last four
66 columns apply to items that cannot be repeated, so there is no need to have
67 rows for them. Note that OP_DIGIT etc. are generated only when PCRE_UCP is
68 *not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
69
70 #define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1)
71 #define APTCOLS (LAST_AUTOTAB_RIGHT_OP - FIRST_AUTOTAB_OP + 1)
72
73 static const uint8_t autoposstab[APTROWS][APTCOLS] = {
74 /* \D \d \S \s \W \w . .+ \C \P \p \R \H \h \V \v \X \Z \z $ $M */
75 { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \D */
76 { 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \d */
77 { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \S */
78 { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \s */
79 { 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \W */
80 { 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \w */
81 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* . */
82 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* .+ */
83 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \C */
84 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* \P */
85 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* \p */
86 { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 }, /* \R */
87 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 }, /* \H */
88 { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0 }, /* \h */
89 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0 }, /* \V */
90 { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0 }, /* \v */
91 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 } /* \X */
92 };
93
94 /* This table is used to check whether auto-possessification is possible
95 between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP). The
96 left-hand (repeated) opcode is used to select the row, and the right-hand
97 opcode is used to select the column. The values are as follows:
98
99 0 Always return FALSE (never auto-possessify)
100 1 Character groups are distinct (possessify if both are OP_PROP)
101 2 Check character categories in the same group (general or particular)
102 3 TRUE if the two opcodes are not the same (PROP vs NOTPROP)
103
104 4 Check left general category vs right particular category
105 5 Check right general category vs left particular category
106
107 6 Left alphanum vs right general category
108 7 Left space vs right general category
109 8 Left word vs right general category
110
111 9 Right alphanum vs left general category
112 10 Right space vs left general category
113 11 Right word vs left general category
114
115 12 Left alphanum vs right particular category
116 13 Left space vs right particular category
117 14 Left word vs right particular category
118
119 15 Right alphanum vs left particular category
120 16 Right space vs left particular category
121 17 Right word vs left particular category
122 */
123
124 static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = {
125 /* ANY LAMP GC PC SC ALNUM SPACE PXSPACE WORD CLIST UCNC */
126 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_ANY */
127 { 0, 3, 0, 0, 0, 3, 1, 1, 0, 0, 0 }, /* PT_LAMP */
128 { 0, 0, 2, 4, 0, 9, 10, 10, 11, 0, 0 }, /* PT_GC */
129 { 0, 0, 5, 2, 0, 15, 16, 16, 17, 0, 0 }, /* PT_PC */
130 { 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0 }, /* PT_SC */
131 { 0, 3, 6, 12, 0, 3, 1, 1, 0, 0, 0 }, /* PT_ALNUM */
132 { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_SPACE */
133 { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_PXSPACE */
134 { 0, 0, 8, 14, 0, 0, 1, 1, 3, 0, 0 }, /* PT_WORD */
135 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */
136 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 } /* PT_UCNC */
137 };
138
139 /* This table is used to check whether auto-possessification is possible
140 between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP) when one
141 specifies a general category and the other specifies a particular category. The
142 row is selected by the general category and the column by the particular
143 category. The value is 1 if the particular category is not part of the general
144 category. */
145
146 static const uint8_t catposstab[7][30] = {
147 /* Cc Cf Cn Co Cs Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc Pd Pe Pf Pi Po Ps Sc Sk Sm So Zl Zp Zs */
148 { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* C */
149 { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* L */
150 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* M */
151 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* N */
152 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 }, /* P */
153 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1 }, /* S */
154 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 } /* Z */
155 };
156
157 /* This table is used when checking ALNUM, (PX)SPACE, SPACE, and WORD against
158 a general or particular category. The properties in each row are those
159 that apply to the character set in question. Duplication means that a little
160 unnecessary work is done when checking, but this keeps things much simpler
161 because they can all use the same code. For more details see the comment where
162 this table is used.
163
164 Note: SPACE and PXSPACE used to be different because Perl excluded VT from
165 "space", but from Perl 5.18 it's included, so both categories are treated the
166 same here. */
167
168 static const uint8_t posspropstab[3][4] = {
169 { ucp_L, ucp_N, ucp_N, ucp_Nl }, /* ALNUM, 3rd and 4th values redundant */
170 { ucp_Z, ucp_Z, ucp_C, ucp_Cc }, /* SPACE and PXSPACE, 2nd value redundant */
171 { ucp_L, ucp_N, ucp_P, ucp_Po } /* WORD */
172 };
173
174 /* This table is used when converting repeating opcodes into possessified
175 versions as a result of an explicit possessive quantifier such as ++. A zero
176 value means there is no possessified version - in those cases the item in
177 question must be wrapped in ONCE brackets. The table is truncated at OP_CALLOUT
178 because all relevant opcodes are less than that. */
179
180 static const uint8_t opcode_possessify[] = {
181 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 15 */
182 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16 - 31 */
183
184 0, /* NOTI */
185 OP_POSSTAR, 0, /* STAR, MINSTAR */
186 OP_POSPLUS, 0, /* PLUS, MINPLUS */
187 OP_POSQUERY, 0, /* QUERY, MINQUERY */
188 OP_POSUPTO, 0, /* UPTO, MINUPTO */
189 0, /* EXACT */
190 0, 0, 0, 0, /* POS{STAR,PLUS,QUERY,UPTO} */
191
192 OP_POSSTARI, 0, /* STARI, MINSTARI */
193 OP_POSPLUSI, 0, /* PLUSI, MINPLUSI */
194 OP_POSQUERYI, 0, /* QUERYI, MINQUERYI */
195 OP_POSUPTOI, 0, /* UPTOI, MINUPTOI */
196 0, /* EXACTI */
197 0, 0, 0, 0, /* POS{STARI,PLUSI,QUERYI,UPTOI} */
198
199 OP_NOTPOSSTAR, 0, /* NOTSTAR, NOTMINSTAR */
200 OP_NOTPOSPLUS, 0, /* NOTPLUS, NOTMINPLUS */
201 OP_NOTPOSQUERY, 0, /* NOTQUERY, NOTMINQUERY */
202 OP_NOTPOSUPTO, 0, /* NOTUPTO, NOTMINUPTO */
203 0, /* NOTEXACT */
204 0, 0, 0, 0, /* NOTPOS{STAR,PLUS,QUERY,UPTO} */
205
206 OP_NOTPOSSTARI, 0, /* NOTSTARI, NOTMINSTARI */
207 OP_NOTPOSPLUSI, 0, /* NOTPLUSI, NOTMINPLUSI */
208 OP_NOTPOSQUERYI, 0, /* NOTQUERYI, NOTMINQUERYI */
209 OP_NOTPOSUPTOI, 0, /* NOTUPTOI, NOTMINUPTOI */
210 0, /* NOTEXACTI */
211 0, 0, 0, 0, /* NOTPOS{STARI,PLUSI,QUERYI,UPTOI} */
212
213 OP_TYPEPOSSTAR, 0, /* TYPESTAR, TYPEMINSTAR */
214 OP_TYPEPOSPLUS, 0, /* TYPEPLUS, TYPEMINPLUS */
215 OP_TYPEPOSQUERY, 0, /* TYPEQUERY, TYPEMINQUERY */
216 OP_TYPEPOSUPTO, 0, /* TYPEUPTO, TYPEMINUPTO */
217 0, /* TYPEEXACT */
218 0, 0, 0, 0, /* TYPEPOS{STAR,PLUS,QUERY,UPTO} */
219
220 OP_CRPOSSTAR, 0, /* CRSTAR, CRMINSTAR */
221 OP_CRPOSPLUS, 0, /* CRPLUS, CRMINPLUS */
222 OP_CRPOSQUERY, 0, /* CRQUERY, CRMINQUERY */
223 OP_CRPOSRANGE, 0, /* CRRANGE, CRMINRANGE */
224 0, 0, 0, 0, /* CRPOS{STAR,PLUS,QUERY,RANGE} */
225
226 0, 0, 0, /* CLASS, NCLASS, XCLASS */
227 0, 0, /* REF, REFI */
228 0, 0, /* DNREF, DNREFI */
229 0, 0 /* RECURSE, CALLOUT */
230 };
231
232
233
234 #ifdef SUPPORT_UNICODE
235 /*************************************************
236 * Check a character and a property *
237 *************************************************/
238
239 /* This function is called by compare_opcodes() when a property item is
240 adjacent to a fixed character.
241
242 Arguments:
243 c the character
244 ptype the property type
245 pdata the data for the type
246 negated TRUE if it's a negated property (\P or \p{^)
247
248 Returns: TRUE if auto-possessifying is OK
249 */
250
251 static BOOL
check_char_prop(uint32_t c,unsigned int ptype,unsigned int pdata,BOOL negated)252 check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata,
253 BOOL negated)
254 {
255 const uint32_t *p;
256 const ucd_record *prop = GET_UCD(c);
257
258 switch(ptype)
259 {
260 case PT_LAMP:
261 return (prop->chartype == ucp_Lu ||
262 prop->chartype == ucp_Ll ||
263 prop->chartype == ucp_Lt) == negated;
264
265 case PT_GC:
266 return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
267
268 case PT_PC:
269 return (pdata == prop->chartype) == negated;
270
271 case PT_SC:
272 return (pdata == prop->script) == negated;
273
274 /* These are specials */
275
276 case PT_ALNUM:
277 return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
278 PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
279
280 /* Perl space used to exclude VT, but from Perl 5.18 it is included, which
281 means that Perl space and POSIX space are now identical. PCRE was changed
282 at release 8.34. */
283
284 case PT_SPACE: /* Perl space */
285 case PT_PXSPACE: /* POSIX space */
286 switch(c)
287 {
288 HSPACE_CASES:
289 VSPACE_CASES:
290 return negated;
291
292 default:
293 return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
294 }
295 break; /* Control never reaches here */
296
297 case PT_WORD:
298 return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
299 PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
300 c == CHAR_UNDERSCORE) == negated;
301
302 case PT_CLIST:
303 p = PRIV(ucd_caseless_sets) + prop->caseset;
304 for (;;)
305 {
306 if (c < *p) return !negated;
307 if (c == *p++) return negated;
308 }
309 break; /* Control never reaches here */
310 }
311
312 return FALSE;
313 }
314 #endif /* SUPPORT_UNICODE */
315
316
317
318 /*************************************************
319 * Base opcode of repeated opcodes *
320 *************************************************/
321
322 /* Returns the base opcode for repeated single character type opcodes. If the
323 opcode is not a repeated character type, it returns with the original value.
324
325 Arguments: c opcode
326 Returns: base opcode for the type
327 */
328
329 static PCRE2_UCHAR
get_repeat_base(PCRE2_UCHAR c)330 get_repeat_base(PCRE2_UCHAR c)
331 {
332 return (c > OP_TYPEPOSUPTO)? c :
333 (c >= OP_TYPESTAR)? OP_TYPESTAR :
334 (c >= OP_NOTSTARI)? OP_NOTSTARI :
335 (c >= OP_NOTSTAR)? OP_NOTSTAR :
336 (c >= OP_STARI)? OP_STARI :
337 OP_STAR;
338 }
339
340
341 /*************************************************
342 * Fill the character property list *
343 *************************************************/
344
345 /* Checks whether the code points to an opcode that can take part in auto-
346 possessification, and if so, fills a list with its properties.
347
348 Arguments:
349 code points to start of expression
350 utf TRUE if in UTF mode
351 fcc points to the case-flipping table
352 list points to output list
353 list[0] will be filled with the opcode
354 list[1] will be non-zero if this opcode
355 can match an empty character string
356 list[2..7] depends on the opcode
357
358 Returns: points to the start of the next opcode if *code is accepted
359 NULL if *code is not accepted
360 */
361
362 static PCRE2_SPTR
get_chr_property_list(PCRE2_SPTR code,BOOL utf,const uint8_t * fcc,uint32_t * list)363 get_chr_property_list(PCRE2_SPTR code, BOOL utf, const uint8_t *fcc,
364 uint32_t *list)
365 {
366 PCRE2_UCHAR c = *code;
367 PCRE2_UCHAR base;
368 PCRE2_SPTR end;
369 uint32_t chr;
370
371 #ifdef SUPPORT_UNICODE
372 uint32_t *clist_dest;
373 const uint32_t *clist_src;
374 #else
375 (void)utf; /* Suppress "unused parameter" compiler warning */
376 #endif
377
378 list[0] = c;
379 list[1] = FALSE;
380 code++;
381
382 if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
383 {
384 base = get_repeat_base(c);
385 c -= (base - OP_STAR);
386
387 if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
388 code += IMM2_SIZE;
389
390 list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
391 c != OP_POSPLUS);
392
393 switch(base)
394 {
395 case OP_STAR:
396 list[0] = OP_CHAR;
397 break;
398
399 case OP_STARI:
400 list[0] = OP_CHARI;
401 break;
402
403 case OP_NOTSTAR:
404 list[0] = OP_NOT;
405 break;
406
407 case OP_NOTSTARI:
408 list[0] = OP_NOTI;
409 break;
410
411 case OP_TYPESTAR:
412 list[0] = *code;
413 code++;
414 break;
415 }
416 c = list[0];
417 }
418
419 switch(c)
420 {
421 case OP_NOT_DIGIT:
422 case OP_DIGIT:
423 case OP_NOT_WHITESPACE:
424 case OP_WHITESPACE:
425 case OP_NOT_WORDCHAR:
426 case OP_WORDCHAR:
427 case OP_ANY:
428 case OP_ALLANY:
429 case OP_ANYNL:
430 case OP_NOT_HSPACE:
431 case OP_HSPACE:
432 case OP_NOT_VSPACE:
433 case OP_VSPACE:
434 case OP_EXTUNI:
435 case OP_EODN:
436 case OP_EOD:
437 case OP_DOLL:
438 case OP_DOLLM:
439 return code;
440
441 case OP_CHAR:
442 case OP_NOT:
443 GETCHARINCTEST(chr, code);
444 list[2] = chr;
445 list[3] = NOTACHAR;
446 return code;
447
448 case OP_CHARI:
449 case OP_NOTI:
450 list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT;
451 GETCHARINCTEST(chr, code);
452 list[2] = chr;
453
454 #ifdef SUPPORT_UNICODE
455 if (chr < 128 || (chr < 256 && !utf))
456 list[3] = fcc[chr];
457 else
458 list[3] = UCD_OTHERCASE(chr);
459 #elif defined SUPPORT_WIDE_CHARS
460 list[3] = (chr < 256) ? fcc[chr] : chr;
461 #else
462 list[3] = fcc[chr];
463 #endif
464
465 /* The othercase might be the same value. */
466
467 if (chr == list[3])
468 list[3] = NOTACHAR;
469 else
470 list[4] = NOTACHAR;
471 return code;
472
473 #ifdef SUPPORT_UNICODE
474 case OP_PROP:
475 case OP_NOTPROP:
476 if (code[0] != PT_CLIST)
477 {
478 list[2] = code[0];
479 list[3] = code[1];
480 return code + 2;
481 }
482
483 /* Convert only if we have enough space. */
484
485 clist_src = PRIV(ucd_caseless_sets) + code[1];
486 clist_dest = list + 2;
487 code += 2;
488
489 do {
490 if (clist_dest >= list + 8)
491 {
492 /* Early return if there is not enough space. This should never
493 happen, since all clists are shorter than 5 character now. */
494 list[2] = code[0];
495 list[3] = code[1];
496 return code;
497 }
498 *clist_dest++ = *clist_src;
499 }
500 while(*clist_src++ != NOTACHAR);
501
502 /* All characters are stored. The terminating NOTACHAR is copied from the
503 clist itself. */
504
505 list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT;
506 return code;
507 #endif
508
509 case OP_NCLASS:
510 case OP_CLASS:
511 #ifdef SUPPORT_WIDE_CHARS
512 case OP_XCLASS:
513 if (c == OP_XCLASS)
514 end = code + GET(code, 0) - 1;
515 else
516 #endif
517 end = code + 32 / sizeof(PCRE2_UCHAR);
518
519 switch(*end)
520 {
521 case OP_CRSTAR:
522 case OP_CRMINSTAR:
523 case OP_CRQUERY:
524 case OP_CRMINQUERY:
525 case OP_CRPOSSTAR:
526 case OP_CRPOSQUERY:
527 list[1] = TRUE;
528 end++;
529 break;
530
531 case OP_CRPLUS:
532 case OP_CRMINPLUS:
533 case OP_CRPOSPLUS:
534 end++;
535 break;
536
537 case OP_CRRANGE:
538 case OP_CRMINRANGE:
539 case OP_CRPOSRANGE:
540 list[1] = (GET2(end, 1) == 0);
541 end += 1 + 2 * IMM2_SIZE;
542 break;
543 }
544 list[2] = (uint32_t)(end - code);
545 return end;
546 }
547 return NULL; /* Opcode not accepted */
548 }
549
550
551
552 /*************************************************
553 * Scan further character sets for match *
554 *************************************************/
555
556 /* Checks whether the base and the current opcode have a common character, in
557 which case the base cannot be possessified.
558
559 Arguments:
560 code points to the byte code
561 utf TRUE in UTF mode
562 cb compile data block
563 base_list the data list of the base opcode
564
565 Returns: TRUE if the auto-possessification is possible
566 */
567
568 static BOOL
compare_opcodes(PCRE2_SPTR code,BOOL utf,const compile_block * cb,const uint32_t * base_list,PCRE2_SPTR base_end)569 compare_opcodes(PCRE2_SPTR code, BOOL utf, const compile_block *cb,
570 const uint32_t *base_list, PCRE2_SPTR base_end)
571 {
572 PCRE2_UCHAR c;
573 uint32_t list[8];
574 const uint32_t *chr_ptr;
575 const uint32_t *ochr_ptr;
576 const uint32_t *list_ptr;
577 PCRE2_SPTR next_code;
578 #ifdef SUPPORT_WIDE_CHARS
579 PCRE2_SPTR xclass_flags;
580 #endif
581 const uint8_t *class_bitset;
582 const uint8_t *set1, *set2, *set_end;
583 uint32_t chr;
584 BOOL accepted, invert_bits;
585 BOOL entered_a_group = FALSE;
586
587 /* Note: the base_list[1] contains whether the current opcode has a greedy
588 (represented by a non-zero value) quantifier. This is a different from
589 other character type lists, which store here that the character iterator
590 matches to an empty string (also represented by a non-zero value). */
591
592 for(;;)
593 {
594 /* All operations move the code pointer forward.
595 Therefore infinite recursions are not possible. */
596
597 c = *code;
598
599 /* Skip over callouts */
600
601 if (c == OP_CALLOUT)
602 {
603 code += PRIV(OP_lengths)[c];
604 continue;
605 }
606
607 if (c == OP_ALT)
608 {
609 do code += GET(code, 1); while (*code == OP_ALT);
610 c = *code;
611 }
612
613 switch(c)
614 {
615 case OP_END:
616 case OP_KETRPOS:
617 /* TRUE only in greedy case. The non-greedy case could be replaced by
618 an OP_EXACT, but it is probably not worth it. (And note that OP_EXACT
619 uses more memory, which we cannot get at this stage.) */
620
621 return base_list[1] != 0;
622
623 case OP_KET:
624 /* If the bracket is capturing, and referenced by an OP_RECURSE, or
625 it is an atomic sub-pattern (assert, once, etc.) the non-greedy case
626 cannot be converted to a possessive form. */
627
628 if (base_list[1] == 0) return FALSE;
629
630 switch(*(code - GET(code, 1)))
631 {
632 case OP_ASSERT:
633 case OP_ASSERT_NOT:
634 case OP_ASSERTBACK:
635 case OP_ASSERTBACK_NOT:
636 case OP_ONCE:
637 case OP_ONCE_NC:
638 /* Atomic sub-patterns and assertions can always auto-possessify their
639 last iterator. However, if the group was entered as a result of checking
640 a previous iterator, this is not possible. */
641
642 return !entered_a_group;
643 }
644
645 code += PRIV(OP_lengths)[c];
646 continue;
647
648 case OP_ONCE:
649 case OP_ONCE_NC:
650 case OP_BRA:
651 case OP_CBRA:
652 next_code = code + GET(code, 1);
653 code += PRIV(OP_lengths)[c];
654
655 while (*next_code == OP_ALT)
656 {
657 if (!compare_opcodes(code, utf, cb, base_list, base_end)) return FALSE;
658 code = next_code + 1 + LINK_SIZE;
659 next_code += GET(next_code, 1);
660 }
661
662 entered_a_group = TRUE;
663 continue;
664
665 case OP_BRAZERO:
666 case OP_BRAMINZERO:
667
668 next_code = code + 1;
669 if (*next_code != OP_BRA && *next_code != OP_CBRA
670 && *next_code != OP_ONCE && *next_code != OP_ONCE_NC) return FALSE;
671
672 do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
673
674 /* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
675
676 next_code += 1 + LINK_SIZE;
677 if (!compare_opcodes(next_code, utf, cb, base_list, base_end))
678 return FALSE;
679
680 code += PRIV(OP_lengths)[c];
681 continue;
682
683 default:
684 break;
685 }
686
687 /* Check for a supported opcode, and load its properties. */
688
689 code = get_chr_property_list(code, utf, cb->fcc, list);
690 if (code == NULL) return FALSE; /* Unsupported */
691
692 /* If either opcode is a small character list, set pointers for comparing
693 characters from that list with another list, or with a property. */
694
695 if (base_list[0] == OP_CHAR)
696 {
697 chr_ptr = base_list + 2;
698 list_ptr = list;
699 }
700 else if (list[0] == OP_CHAR)
701 {
702 chr_ptr = list + 2;
703 list_ptr = base_list;
704 }
705
706 /* Character bitsets can also be compared to certain opcodes. */
707
708 else if (base_list[0] == OP_CLASS || list[0] == OP_CLASS
709 #if PCRE2_CODE_UNIT_WIDTH == 8
710 /* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */
711 || (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS))
712 #endif
713 )
714 {
715 #if PCRE2_CODE_UNIT_WIDTH == 8
716 if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS))
717 #else
718 if (base_list[0] == OP_CLASS)
719 #endif
720 {
721 set1 = (uint8_t *)(base_end - base_list[2]);
722 list_ptr = list;
723 }
724 else
725 {
726 set1 = (uint8_t *)(code - list[2]);
727 list_ptr = base_list;
728 }
729
730 invert_bits = FALSE;
731 switch(list_ptr[0])
732 {
733 case OP_CLASS:
734 case OP_NCLASS:
735 set2 = (uint8_t *)
736 ((list_ptr == list ? code : base_end) - list_ptr[2]);
737 break;
738
739 #ifdef SUPPORT_WIDE_CHARS
740 case OP_XCLASS:
741 xclass_flags = (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE;
742 if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE;
743 if ((*xclass_flags & XCL_MAP) == 0)
744 {
745 /* No bits are set for characters < 256. */
746 if (list[1] == 0) return TRUE;
747 /* Might be an empty repeat. */
748 continue;
749 }
750 set2 = (uint8_t *)(xclass_flags + 1);
751 break;
752 #endif
753
754 case OP_NOT_DIGIT:
755 invert_bits = TRUE;
756 /* Fall through */
757 case OP_DIGIT:
758 set2 = (uint8_t *)(cb->cbits + cbit_digit);
759 break;
760
761 case OP_NOT_WHITESPACE:
762 invert_bits = TRUE;
763 /* Fall through */
764 case OP_WHITESPACE:
765 set2 = (uint8_t *)(cb->cbits + cbit_space);
766 break;
767
768 case OP_NOT_WORDCHAR:
769 invert_bits = TRUE;
770 /* Fall through */
771 case OP_WORDCHAR:
772 set2 = (uint8_t *)(cb->cbits + cbit_word);
773 break;
774
775 default:
776 return FALSE;
777 }
778
779 /* Because the bit sets are unaligned bytes, we need to perform byte
780 comparison here. */
781
782 set_end = set1 + 32;
783 if (invert_bits)
784 {
785 do
786 {
787 if ((*set1++ & ~(*set2++)) != 0) return FALSE;
788 }
789 while (set1 < set_end);
790 }
791 else
792 {
793 do
794 {
795 if ((*set1++ & *set2++) != 0) return FALSE;
796 }
797 while (set1 < set_end);
798 }
799
800 if (list[1] == 0) return TRUE;
801 /* Might be an empty repeat. */
802 continue;
803 }
804
805 /* Some property combinations also acceptable. Unicode property opcodes are
806 processed specially; the rest can be handled with a lookup table. */
807
808 else
809 {
810 uint32_t leftop, rightop;
811
812 leftop = base_list[0];
813 rightop = list[0];
814
815 #ifdef SUPPORT_UNICODE
816 accepted = FALSE; /* Always set in non-unicode case. */
817 if (leftop == OP_PROP || leftop == OP_NOTPROP)
818 {
819 if (rightop == OP_EOD)
820 accepted = TRUE;
821 else if (rightop == OP_PROP || rightop == OP_NOTPROP)
822 {
823 int n;
824 const uint8_t *p;
825 BOOL same = leftop == rightop;
826 BOOL lisprop = leftop == OP_PROP;
827 BOOL risprop = rightop == OP_PROP;
828 BOOL bothprop = lisprop && risprop;
829
830 /* There's a table that specifies how each combination is to be
831 processed:
832 0 Always return FALSE (never auto-possessify)
833 1 Character groups are distinct (possessify if both are OP_PROP)
834 2 Check character categories in the same group (general or particular)
835 3 Return TRUE if the two opcodes are not the same
836 ... see comments below
837 */
838
839 n = propposstab[base_list[2]][list[2]];
840 switch(n)
841 {
842 case 0: break;
843 case 1: accepted = bothprop; break;
844 case 2: accepted = (base_list[3] == list[3]) != same; break;
845 case 3: accepted = !same; break;
846
847 case 4: /* Left general category, right particular category */
848 accepted = risprop && catposstab[base_list[3]][list[3]] == same;
849 break;
850
851 case 5: /* Right general category, left particular category */
852 accepted = lisprop && catposstab[list[3]][base_list[3]] == same;
853 break;
854
855 /* This code is logically tricky. Think hard before fiddling with it.
856 The posspropstab table has four entries per row. Each row relates to
857 one of PCRE's special properties such as ALNUM or SPACE or WORD.
858 Only WORD actually needs all four entries, but using repeats for the
859 others means they can all use the same code below.
860
861 The first two entries in each row are Unicode general categories, and
862 apply always, because all the characters they include are part of the
863 PCRE character set. The third and fourth entries are a general and a
864 particular category, respectively, that include one or more relevant
865 characters. One or the other is used, depending on whether the check
866 is for a general or a particular category. However, in both cases the
867 category contains more characters than the specials that are defined
868 for the property being tested against. Therefore, it cannot be used
869 in a NOTPROP case.
870
871 Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po.
872 Underscore is covered by ucp_P or ucp_Po. */
873
874 case 6: /* Left alphanum vs right general category */
875 case 7: /* Left space vs right general category */
876 case 8: /* Left word vs right general category */
877 p = posspropstab[n-6];
878 accepted = risprop && lisprop ==
879 (list[3] != p[0] &&
880 list[3] != p[1] &&
881 (list[3] != p[2] || !lisprop));
882 break;
883
884 case 9: /* Right alphanum vs left general category */
885 case 10: /* Right space vs left general category */
886 case 11: /* Right word vs left general category */
887 p = posspropstab[n-9];
888 accepted = lisprop && risprop ==
889 (base_list[3] != p[0] &&
890 base_list[3] != p[1] &&
891 (base_list[3] != p[2] || !risprop));
892 break;
893
894 case 12: /* Left alphanum vs right particular category */
895 case 13: /* Left space vs right particular category */
896 case 14: /* Left word vs right particular category */
897 p = posspropstab[n-12];
898 accepted = risprop && lisprop ==
899 (catposstab[p[0]][list[3]] &&
900 catposstab[p[1]][list[3]] &&
901 (list[3] != p[3] || !lisprop));
902 break;
903
904 case 15: /* Right alphanum vs left particular category */
905 case 16: /* Right space vs left particular category */
906 case 17: /* Right word vs left particular category */
907 p = posspropstab[n-15];
908 accepted = lisprop && risprop ==
909 (catposstab[p[0]][base_list[3]] &&
910 catposstab[p[1]][base_list[3]] &&
911 (base_list[3] != p[3] || !risprop));
912 break;
913 }
914 }
915 }
916
917 else
918 #endif /* SUPPORT_UNICODE */
919
920 accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
921 rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
922 autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
923
924 if (!accepted) return FALSE;
925
926 if (list[1] == 0) return TRUE;
927 /* Might be an empty repeat. */
928 continue;
929 }
930
931 /* Control reaches here only if one of the items is a small character list.
932 All characters are checked against the other side. */
933
934 do
935 {
936 chr = *chr_ptr;
937
938 switch(list_ptr[0])
939 {
940 case OP_CHAR:
941 ochr_ptr = list_ptr + 2;
942 do
943 {
944 if (chr == *ochr_ptr) return FALSE;
945 ochr_ptr++;
946 }
947 while(*ochr_ptr != NOTACHAR);
948 break;
949
950 case OP_NOT:
951 ochr_ptr = list_ptr + 2;
952 do
953 {
954 if (chr == *ochr_ptr)
955 break;
956 ochr_ptr++;
957 }
958 while(*ochr_ptr != NOTACHAR);
959 if (*ochr_ptr == NOTACHAR) return FALSE; /* Not found */
960 break;
961
962 /* Note that OP_DIGIT etc. are generated only when PCRE2_UCP is *not*
963 set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
964
965 case OP_DIGIT:
966 if (chr < 256 && (cb->ctypes[chr] & ctype_digit) != 0) return FALSE;
967 break;
968
969 case OP_NOT_DIGIT:
970 if (chr > 255 || (cb->ctypes[chr] & ctype_digit) == 0) return FALSE;
971 break;
972
973 case OP_WHITESPACE:
974 if (chr < 256 && (cb->ctypes[chr] & ctype_space) != 0) return FALSE;
975 break;
976
977 case OP_NOT_WHITESPACE:
978 if (chr > 255 || (cb->ctypes[chr] & ctype_space) == 0) return FALSE;
979 break;
980
981 case OP_WORDCHAR:
982 if (chr < 255 && (cb->ctypes[chr] & ctype_word) != 0) return FALSE;
983 break;
984
985 case OP_NOT_WORDCHAR:
986 if (chr > 255 || (cb->ctypes[chr] & ctype_word) == 0) return FALSE;
987 break;
988
989 case OP_HSPACE:
990 switch(chr)
991 {
992 HSPACE_CASES: return FALSE;
993 default: break;
994 }
995 break;
996
997 case OP_NOT_HSPACE:
998 switch(chr)
999 {
1000 HSPACE_CASES: break;
1001 default: return FALSE;
1002 }
1003 break;
1004
1005 case OP_ANYNL:
1006 case OP_VSPACE:
1007 switch(chr)
1008 {
1009 VSPACE_CASES: return FALSE;
1010 default: break;
1011 }
1012 break;
1013
1014 case OP_NOT_VSPACE:
1015 switch(chr)
1016 {
1017 VSPACE_CASES: break;
1018 default: return FALSE;
1019 }
1020 break;
1021
1022 case OP_DOLL:
1023 case OP_EODN:
1024 switch (chr)
1025 {
1026 case CHAR_CR:
1027 case CHAR_LF:
1028 case CHAR_VT:
1029 case CHAR_FF:
1030 case CHAR_NEL:
1031 #ifndef EBCDIC
1032 case 0x2028:
1033 case 0x2029:
1034 #endif /* Not EBCDIC */
1035 return FALSE;
1036 }
1037 break;
1038
1039 case OP_EOD: /* Can always possessify before \z */
1040 break;
1041
1042 #ifdef SUPPORT_UNICODE
1043 case OP_PROP:
1044 case OP_NOTPROP:
1045 if (!check_char_prop(chr, list_ptr[2], list_ptr[3],
1046 list_ptr[0] == OP_NOTPROP))
1047 return FALSE;
1048 break;
1049 #endif
1050
1051 case OP_NCLASS:
1052 if (chr > 255) return FALSE;
1053 /* Fall through */
1054
1055 case OP_CLASS:
1056 if (chr > 255) break;
1057 class_bitset = (uint8_t *)
1058 ((list_ptr == list ? code : base_end) - list_ptr[2]);
1059 if ((class_bitset[chr >> 3] & (1 << (chr & 7))) != 0) return FALSE;
1060 break;
1061
1062 #ifdef SUPPORT_WIDE_CHARS
1063 case OP_XCLASS:
1064 if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) -
1065 list_ptr[2] + LINK_SIZE, utf)) return FALSE;
1066 break;
1067 #endif
1068
1069 default:
1070 return FALSE;
1071 }
1072
1073 chr_ptr++;
1074 }
1075 while(*chr_ptr != NOTACHAR);
1076
1077 /* At least one character must be matched from this opcode. */
1078
1079 if (list[1] == 0) return TRUE;
1080 }
1081
1082 /* Control never reaches here. There used to be a fail-save return FALSE; here,
1083 but some compilers complain about an unreachable statement. */
1084 }
1085
1086
1087
1088 /*************************************************
1089 * Scan compiled regex for auto-possession *
1090 *************************************************/
1091
1092 /* Replaces single character iterations with their possessive alternatives
1093 if appropriate. This function modifies the compiled opcode!
1094
1095 Arguments:
1096 code points to start of the byte code
1097 utf TRUE in UTF mode
1098 cb compile data block
1099
1100 Returns: nothing
1101 */
1102
1103 void
PRIV(auto_possessify)1104 PRIV(auto_possessify)(PCRE2_UCHAR *code, BOOL utf, const compile_block *cb)
1105 {
1106 register PCRE2_UCHAR c;
1107 PCRE2_SPTR end;
1108 PCRE2_UCHAR *repeat_opcode;
1109 uint32_t list[8];
1110
1111 for (;;)
1112 {
1113 c = *code;
1114
1115 if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
1116 {
1117 c -= get_repeat_base(c) - OP_STAR;
1118 end = (c <= OP_MINUPTO) ?
1119 get_chr_property_list(code, utf, cb->fcc, list) : NULL;
1120 list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
1121
1122 if (end != NULL && compare_opcodes(end, utf, cb, list, end))
1123 {
1124 switch(c)
1125 {
1126 case OP_STAR:
1127 *code += OP_POSSTAR - OP_STAR;
1128 break;
1129
1130 case OP_MINSTAR:
1131 *code += OP_POSSTAR - OP_MINSTAR;
1132 break;
1133
1134 case OP_PLUS:
1135 *code += OP_POSPLUS - OP_PLUS;
1136 break;
1137
1138 case OP_MINPLUS:
1139 *code += OP_POSPLUS - OP_MINPLUS;
1140 break;
1141
1142 case OP_QUERY:
1143 *code += OP_POSQUERY - OP_QUERY;
1144 break;
1145
1146 case OP_MINQUERY:
1147 *code += OP_POSQUERY - OP_MINQUERY;
1148 break;
1149
1150 case OP_UPTO:
1151 *code += OP_POSUPTO - OP_UPTO;
1152 break;
1153
1154 case OP_MINUPTO:
1155 *code += OP_POSUPTO - OP_MINUPTO;
1156 break;
1157 }
1158 }
1159 c = *code;
1160 }
1161 else if (c == OP_CLASS || c == OP_NCLASS || c == OP_XCLASS)
1162 {
1163 #ifdef SUPPORT_WIDE_CHARS
1164 if (c == OP_XCLASS)
1165 repeat_opcode = code + GET(code, 1);
1166 else
1167 #endif
1168 repeat_opcode = code + 1 + (32 / sizeof(PCRE2_UCHAR));
1169
1170 c = *repeat_opcode;
1171 if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
1172 {
1173 /* end must not be NULL. */
1174 end = get_chr_property_list(code, utf, cb->fcc, list);
1175
1176 list[1] = (c & 1) == 0;
1177
1178 if (compare_opcodes(end, utf, cb, list, end))
1179 {
1180 switch (c)
1181 {
1182 case OP_CRSTAR:
1183 case OP_CRMINSTAR:
1184 *repeat_opcode = OP_CRPOSSTAR;
1185 break;
1186
1187 case OP_CRPLUS:
1188 case OP_CRMINPLUS:
1189 *repeat_opcode = OP_CRPOSPLUS;
1190 break;
1191
1192 case OP_CRQUERY:
1193 case OP_CRMINQUERY:
1194 *repeat_opcode = OP_CRPOSQUERY;
1195 break;
1196
1197 case OP_CRRANGE:
1198 case OP_CRMINRANGE:
1199 *repeat_opcode = OP_CRPOSRANGE;
1200 break;
1201 }
1202 }
1203 }
1204 c = *code;
1205 }
1206
1207 switch(c)
1208 {
1209 case OP_END:
1210 return;
1211
1212 case OP_TYPESTAR:
1213 case OP_TYPEMINSTAR:
1214 case OP_TYPEPLUS:
1215 case OP_TYPEMINPLUS:
1216 case OP_TYPEQUERY:
1217 case OP_TYPEMINQUERY:
1218 case OP_TYPEPOSSTAR:
1219 case OP_TYPEPOSPLUS:
1220 case OP_TYPEPOSQUERY:
1221 if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
1222 break;
1223
1224 case OP_TYPEUPTO:
1225 case OP_TYPEMINUPTO:
1226 case OP_TYPEEXACT:
1227 case OP_TYPEPOSUPTO:
1228 if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
1229 code += 2;
1230 break;
1231
1232 #ifdef SUPPORT_WIDE_CHARS
1233 case OP_XCLASS:
1234 code += GET(code, 1);
1235 break;
1236 #endif
1237
1238 case OP_MARK:
1239 case OP_PRUNE_ARG:
1240 case OP_SKIP_ARG:
1241 case OP_THEN_ARG:
1242 code += code[1];
1243 break;
1244 }
1245
1246 /* Add in the fixed length from the table */
1247
1248 code += PRIV(OP_lengths)[c];
1249
1250 /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
1251 followed by a multi-byte character. The length in the table is a minimum, so
1252 we have to arrange to skip the extra code units. */
1253
1254 #ifdef MAYBE_UTF_MULTI
1255 if (utf) switch(c)
1256 {
1257 case OP_CHAR:
1258 case OP_CHARI:
1259 case OP_NOT:
1260 case OP_NOTI:
1261 case OP_STAR:
1262 case OP_MINSTAR:
1263 case OP_PLUS:
1264 case OP_MINPLUS:
1265 case OP_QUERY:
1266 case OP_MINQUERY:
1267 case OP_UPTO:
1268 case OP_MINUPTO:
1269 case OP_EXACT:
1270 case OP_POSSTAR:
1271 case OP_POSPLUS:
1272 case OP_POSQUERY:
1273 case OP_POSUPTO:
1274 case OP_STARI:
1275 case OP_MINSTARI:
1276 case OP_PLUSI:
1277 case OP_MINPLUSI:
1278 case OP_QUERYI:
1279 case OP_MINQUERYI:
1280 case OP_UPTOI:
1281 case OP_MINUPTOI:
1282 case OP_EXACTI:
1283 case OP_POSSTARI:
1284 case OP_POSPLUSI:
1285 case OP_POSQUERYI:
1286 case OP_POSUPTOI:
1287 case OP_NOTSTAR:
1288 case OP_NOTMINSTAR:
1289 case OP_NOTPLUS:
1290 case OP_NOTMINPLUS:
1291 case OP_NOTQUERY:
1292 case OP_NOTMINQUERY:
1293 case OP_NOTUPTO:
1294 case OP_NOTMINUPTO:
1295 case OP_NOTEXACT:
1296 case OP_NOTPOSSTAR:
1297 case OP_NOTPOSPLUS:
1298 case OP_NOTPOSQUERY:
1299 case OP_NOTPOSUPTO:
1300 case OP_NOTSTARI:
1301 case OP_NOTMINSTARI:
1302 case OP_NOTPLUSI:
1303 case OP_NOTMINPLUSI:
1304 case OP_NOTQUERYI:
1305 case OP_NOTMINQUERYI:
1306 case OP_NOTUPTOI:
1307 case OP_NOTMINUPTOI:
1308 case OP_NOTEXACTI:
1309 case OP_NOTPOSSTARI:
1310 case OP_NOTPOSPLUSI:
1311 case OP_NOTPOSQUERYI:
1312 case OP_NOTPOSUPTOI:
1313 if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
1314 break;
1315 }
1316 #else
1317 (void)(utf); /* Keep compiler happy by referencing function argument */
1318 #endif /* SUPPORT_WIDE_CHARS */
1319 }
1320 }
1321
1322 /* End of pcre2_auto_possess.c */
1323