1 /*************************************************
2 *             PCRE2 testing program              *
3 *************************************************/
4 
5 /* PCRE2 is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language. In 2014
7 the API was completely revised and '2' was added to the name, because the old
8 API, which had lasted for 16 years, could not accommodate new requirements. At
9 the same time, this testing program was re-designed because its original
10 hacked-up (non-) design had also run out of steam.
11 
12                        Written by Philip Hazel
13      Original code Copyright (c) 1997-2012 University of Cambridge
14     Rewritten code Copyright (c) 2016-2021 University of Cambridge
15 
16 -----------------------------------------------------------------------------
17 Redistribution and use in source and binary forms, with or without
18 modification, are permitted provided that the following conditions are met:
19 
20     * Redistributions of source code must retain the above copyright notice,
21       this list of conditions and the following disclaimer.
22 
23     * Redistributions in binary form must reproduce the above copyright
24       notice, this list of conditions and the following disclaimer in the
25       documentation and/or other materials provided with the distribution.
26 
27     * Neither the name of the University of Cambridge nor the names of its
28       contributors may be used to endorse or promote products derived from
29       this software without specific prior written permission.
30 
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 POSSIBILITY OF SUCH DAMAGE.
42 -----------------------------------------------------------------------------
43 */
44 
45 
46 /* This program supports testing of the 8-bit, 16-bit, and 32-bit PCRE2
47 libraries in a single program, though its input and output are always 8-bit.
48 It is different from modules such as pcre2_compile.c in the library itself,
49 which are compiled separately for each code unit width. If two widths are
50 enabled, for example, pcre2_compile.c is compiled twice. In contrast,
51 pcre2test.c is compiled only once, and linked with all the enabled libraries.
52 Therefore, it must not make use of any of the macros from pcre2.h or
53 pcre2_internal.h that depend on PCRE2_CODE_UNIT_WIDTH. It does, however, make
54 use of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, and SUPPORT_PCRE2_32, to ensure that
55 it references only the enabled library functions. */
56 
57 #ifdef HAVE_CONFIG_H
58 #include "config.h"
59 #endif
60 
61 #include <ctype.h>
62 #include <stdio.h>
63 #include <string.h>
64 #include <stdlib.h>
65 #include <time.h>
66 #include <locale.h>
67 #include <errno.h>
68 
69 #if defined NATIVE_ZOS
70 #include "pcrzoscs.h"
71 /* That header is not included in the main PCRE2 distribution because other
72 apparatus is needed to compile pcre2test for z/OS. The header can be found in
73 the special z/OS distribution, which is available from www.zaconsultants.net or
74 from www.cbttape.org. */
75 #endif
76 
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80 
81 /* Debugging code enabler */
82 
83 /* #define DEBUG_SHOW_MALLOC_ADDRESSES */
84 
85 /* Both libreadline and libedit are optionally supported. The user-supplied
86 original patch uses readline/readline.h for libedit, but in at least one system
87 it is installed as editline/readline.h, so the configuration code now looks for
88 that first, falling back to readline/readline.h. */
89 
90 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
91 #if defined(SUPPORT_LIBREADLINE)
92 #include <readline/readline.h>
93 #include <readline/history.h>
94 #else
95 #if defined(HAVE_EDITLINE_READLINE_H)
96 #include <editline/readline.h>
97 #else
98 #include <readline/readline.h>
99 #endif
100 #endif
101 #endif
102 
103 /* Put the test for interactive input into a macro so that it can be changed if
104 required for different environments. */
105 
106 #define INTERACTIVE(f) isatty(fileno(f))
107 
108 
109 /* ---------------------- System-specific definitions ---------------------- */
110 
111 /* A number of things vary for Windows builds. Originally, pcretest opened its
112 input and output without "b"; then I was told that "b" was needed in some
113 environments, so it was added for release 5.0 to both the input and output. (It
114 makes no difference on Unix-like systems.) Later I was told that it is wrong
115 for the input on Windows. I've now abstracted the modes into macros that are
116 set here, to make it easier to fiddle with them, and removed "b" from the input
117 mode under Windows. The BINARY versions are used when saving/restoring compiled
118 patterns. */
119 
120 #if defined(_WIN32) || defined(WIN32)
121 #include <io.h>                /* For _setmode() */
122 #include <fcntl.h>             /* For _O_BINARY */
123 #define INPUT_MODE          "r"
124 #define OUTPUT_MODE         "wb"
125 #define BINARY_INPUT_MODE   "rb"
126 #define BINARY_OUTPUT_MODE  "wb"
127 
128 #ifndef isatty
129 #define isatty _isatty         /* This is what Windows calls them, I'm told, */
130 #endif                         /* though in some environments they seem to   */
131                                /* be already defined, hence the #ifndefs.    */
132 #ifndef fileno
133 #define fileno _fileno
134 #endif
135 
136 /* A user sent this fix for Borland Builder 5 under Windows. */
137 
138 #ifdef __BORLANDC__
139 #define _setmode(handle, mode) setmode(handle, mode)
140 #endif
141 
142 /* Not Windows */
143 
144 #else
145 #include <sys/time.h>          /* These two includes are needed */
146 #include <sys/resource.h>      /* for setrlimit(). */
147 #if defined NATIVE_ZOS         /* z/OS uses non-binary I/O */
148 #define INPUT_MODE   "r"
149 #define OUTPUT_MODE  "w"
150 #define BINARY_INPUT_MODE   "rb"
151 #define BINARY_OUTPUT_MODE  "wb"
152 #else
153 #define INPUT_MODE          "rb"
154 #define OUTPUT_MODE         "wb"
155 #define BINARY_INPUT_MODE   "rb"
156 #define BINARY_OUTPUT_MODE  "wb"
157 #endif
158 #endif
159 
160 /* VMS-specific code was included as suggested by a VMS user [1]. Another VMS
161 user [2] provided alternative code which worked better for him. I have
162 commented out the original, but kept it around just in case. */
163 
164 #ifdef __VMS
165 #include <ssdef.h>
166 /* These two includes came from [2]. */
167 #include descrip
168 #include lib$routines
169 /* void vms_setsymbol( char *, char *, int ); Original code from [1]. */
170 #endif
171 
172 /* old VC and older compilers don't support %td or %zu, and even some that
173 claim to be C99 don't support it (hence DISABLE_PERCENT_ZT). */
174 
175 #if defined(DISABLE_PERCENT_ZT) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \
176   (!defined(_MSC_VER) && (!defined(__STDC_VERSION__) || (__STDC_VERSION__ < 199901L)))
177 #ifdef _WIN64
178 #define PTR_FORM "lld"
179 #define SIZ_FORM "llu"
180 #else
181 #define PTR_FORM "ld"
182 #define SIZ_FORM "lu"
183 #endif
184 #else
185 #define PTR_FORM "td"
186 #define SIZ_FORM "zu"
187 #endif
188 
189 /* ------------------End of system-specific definitions -------------------- */
190 
191 /* Glueing macros that are used in several places below. */
192 
193 #define glue(a,b) a##b
194 #define G(a,b) glue(a,b)
195 
196 /* Miscellaneous parameters and manifests */
197 
198 #ifndef CLOCKS_PER_SEC
199 #ifdef CLK_TCK
200 #define CLOCKS_PER_SEC CLK_TCK
201 #else
202 #define CLOCKS_PER_SEC 100
203 #endif
204 #endif
205 
206 #define CFORE_UNSET UINT32_MAX    /* Unset value for startend/cfail/cerror fields */
207 #define CONVERT_UNSET UINT32_MAX  /* Unset value for convert_type field */
208 #define DFA_WS_DIMENSION 1000     /* Size of DFA workspace */
209 #define DEFAULT_OVECCOUNT 15      /* Default ovector count */
210 #define JUNK_OFFSET 0xdeadbeef    /* For initializing ovector */
211 #define LOCALESIZE 32             /* Size of locale name */
212 #define LOOPREPEAT 500000         /* Default loop count for timing */
213 #define MALLOCLISTSIZE 20         /* For remembering mallocs */
214 #define PARENS_NEST_DEFAULT 220   /* Default parentheses nest limit */
215 #define PATSTACKSIZE 20           /* Pattern stack for save/restore testing */
216 #define REPLACE_MODSIZE 100       /* Field for reading 8-bit replacement */
217 #define VERSION_SIZE 64           /* Size of buffer for the version strings */
218 
219 /* Default JIT compile options */
220 
221 #define JIT_DEFAULT (PCRE2_JIT_COMPLETE|\
222                      PCRE2_JIT_PARTIAL_SOFT|\
223                      PCRE2_JIT_PARTIAL_HARD)
224 
225 /* Make sure the buffer into which replacement strings are copied is big enough
226 to hold them as 32-bit code units. */
227 
228 #define REPLACE_BUFFSIZE 1024   /* This is a byte value */
229 
230 /* Execution modes */
231 
232 #define PCRE8_MODE   8
233 #define PCRE16_MODE 16
234 #define PCRE32_MODE 32
235 
236 /* Processing returns */
237 
238 enum { PR_OK, PR_SKIP, PR_ABEND };
239 
240 /* The macro PRINTABLE determines whether to print an output character as-is or
241 as a hex value when showing compiled patterns. is We use it in cases when the
242 locale has not been explicitly changed, so as to get consistent output from
243 systems that differ in their output from isprint() even in the "C" locale. */
244 
245 #ifdef EBCDIC
246 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
247 #else
248 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
249 #endif
250 
251 #define PRINTOK(c) ((use_tables != NULL && c < 256)? isprint(c) : PRINTABLE(c))
252 
253 /* We have to include some of the library source files because we need
254 to use some of the macros, internal structure definitions, and other internal
255 values - pcre2test has "inside information" compared to an application program
256 that strictly follows the PCRE2 API.
257 
258 Before including pcre2_internal.h we define PRIV so that it does not get
259 defined therein. This ensures that PRIV names in the included files do not
260 clash with those in the libraries. Also, although pcre2_internal.h does itself
261 include pcre2.h, we explicitly include it beforehand, along with pcre2posix.h,
262 so that the PCRE2_EXP_xxx macros get set appropriately for an application, not
263 for building the library. */
264 
265 #define PRIV(name) name
266 #define PCRE2_CODE_UNIT_WIDTH 0
267 #include "pcre2.h"
268 #include "pcre2posix.h"
269 #include "pcre2_internal.h"
270 
271 /* We need access to some of the data tables that PCRE2 uses. Defining
272 PCRE2_PCRETEST makes some minor changes in the files. The previous definition
273 of PRIV avoids name clashes. */
274 
275 #define PCRE2_PCRE2TEST
276 #include "pcre2_tables.c"
277 #include "pcre2_ucd.c"
278 
279 /* 32-bit integer values in the input are read by strtoul() or strtol(). The
280 check needed for overflow depends on whether long ints are in fact longer than
281 ints. They are defined not to be shorter. */
282 
283 #if ULONG_MAX > UINT32_MAX
284 #define U32OVERFLOW(x) (x > UINT32_MAX)
285 #else
286 #define U32OVERFLOW(x) (x == UINT32_MAX)
287 #endif
288 
289 #if LONG_MAX > INT32_MAX
290 #define S32OVERFLOW(x) (x > INT32_MAX || x < INT32_MIN)
291 #else
292 #define S32OVERFLOW(x) (x == INT32_MAX || x == INT32_MIN)
293 #endif
294 
295 /* When PCRE2_CODE_UNIT_WIDTH is zero, pcre2_internal.h does not include
296 pcre2_intmodedep.h, which is where mode-dependent macros and structures are
297 defined. We can now include it for each supported code unit width. Because
298 PCRE2_CODE_UNIT_WIDTH was defined as zero before including pcre2.h, it will
299 have left PCRE2_SUFFIX defined as a no-op. We must re-define it appropriately
300 while including these files, and then restore it to a no-op. Because LINK_SIZE
301 may be changed in 16-bit mode and forced to 1 in 32-bit mode, the order of
302 these inclusions should not be changed. */
303 
304 #undef PCRE2_SUFFIX
305 #undef PCRE2_CODE_UNIT_WIDTH
306 
307 #ifdef   SUPPORT_PCRE2_8
308 #define  PCRE2_CODE_UNIT_WIDTH 8
309 #define  PCRE2_SUFFIX(a) G(a,8)
310 #include "pcre2_intmodedep.h"
311 #include "pcre2_printint.c"
312 #undef   PCRE2_CODE_UNIT_WIDTH
313 #undef   PCRE2_SUFFIX
314 #endif   /* SUPPORT_PCRE2_8 */
315 
316 #ifdef   SUPPORT_PCRE2_16
317 #define  PCRE2_CODE_UNIT_WIDTH 16
318 #define  PCRE2_SUFFIX(a) G(a,16)
319 #include "pcre2_intmodedep.h"
320 #include "pcre2_printint.c"
321 #undef   PCRE2_CODE_UNIT_WIDTH
322 #undef   PCRE2_SUFFIX
323 #endif   /* SUPPORT_PCRE2_16 */
324 
325 #ifdef   SUPPORT_PCRE2_32
326 #define  PCRE2_CODE_UNIT_WIDTH 32
327 #define  PCRE2_SUFFIX(a) G(a,32)
328 #include "pcre2_intmodedep.h"
329 #include "pcre2_printint.c"
330 #undef   PCRE2_CODE_UNIT_WIDTH
331 #undef   PCRE2_SUFFIX
332 #endif   /* SUPPORT_PCRE2_32 */
333 
334 #define PCRE2_SUFFIX(a) a
335 
336 /* We need to be able to check input text for UTF-8 validity, whatever code
337 widths are actually available, because the input to pcre2test is always in
338 8-bit code units. So we include the UTF validity checking function for 8-bit
339 code units. */
340 
341 extern int valid_utf(PCRE2_SPTR8, PCRE2_SIZE, PCRE2_SIZE *);
342 
343 #define  PCRE2_CODE_UNIT_WIDTH 8
344 #undef   PCRE2_SPTR
345 #define  PCRE2_SPTR PCRE2_SPTR8
346 #include "pcre2_valid_utf.c"
347 #undef   PCRE2_CODE_UNIT_WIDTH
348 #undef   PCRE2_SPTR
349 
350 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
351 support, it can be selected by a command-line option. If there is no 8-bit
352 support, there must be 16-bit or 32-bit support, so default to one of them. The
353 config function, JIT stack, contexts, and version string are the same in all
354 modes, so use the form of the first that is available. */
355 
356 #if defined SUPPORT_PCRE2_8
357 #define DEFAULT_TEST_MODE PCRE8_MODE
358 #define VERSION_TYPE PCRE2_UCHAR8
359 #define PCRE2_CONFIG pcre2_config_8
360 #define PCRE2_JIT_STACK pcre2_jit_stack_8
361 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_8
362 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_8
363 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_8
364 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_8
365 
366 #elif defined SUPPORT_PCRE2_16
367 #define DEFAULT_TEST_MODE PCRE16_MODE
368 #define VERSION_TYPE PCRE2_UCHAR16
369 #define PCRE2_CONFIG pcre2_config_16
370 #define PCRE2_JIT_STACK pcre2_jit_stack_16
371 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_16
372 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_16
373 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_16
374 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_16
375 
376 #elif defined SUPPORT_PCRE2_32
377 #define DEFAULT_TEST_MODE PCRE32_MODE
378 #define VERSION_TYPE PCRE2_UCHAR32
379 #define PCRE2_CONFIG pcre2_config_32
380 #define PCRE2_JIT_STACK pcre2_jit_stack_32
381 #define PCRE2_REAL_GENERAL_CONTEXT pcre2_real_general_context_32
382 #define PCRE2_REAL_COMPILE_CONTEXT pcre2_real_compile_context_32
383 #define PCRE2_REAL_CONVERT_CONTEXT pcre2_real_convert_context_32
384 #define PCRE2_REAL_MATCH_CONTEXT pcre2_real_match_context_32
385 #endif
386 
387 /* ------------- Structure and table for handling #-commands ------------- */
388 
389 typedef struct cmdstruct {
390   const char *name;
391   int  value;
392 } cmdstruct;
393 
394 enum { CMD_FORBID_UTF, CMD_LOAD, CMD_LOADTABLES, CMD_NEWLINE_DEFAULT,
395   CMD_PATTERN, CMD_PERLTEST, CMD_POP, CMD_POPCOPY, CMD_SAVE, CMD_SUBJECT,
396   CMD_UNKNOWN };
397 
398 static cmdstruct cmdlist[] = {
399   { "forbid_utf",      CMD_FORBID_UTF },
400   { "load",            CMD_LOAD },
401   { "loadtables",      CMD_LOADTABLES },
402   { "newline_default", CMD_NEWLINE_DEFAULT },
403   { "pattern",         CMD_PATTERN },
404   { "perltest",        CMD_PERLTEST },
405   { "pop",             CMD_POP },
406   { "popcopy",         CMD_POPCOPY },
407   { "save",            CMD_SAVE },
408   { "subject",         CMD_SUBJECT }};
409 
410 #define cmdlistcount (sizeof(cmdlist)/sizeof(cmdstruct))
411 
412 /* ------------- Structures and tables for handling modifiers -------------- */
413 
414 /* Table of names for newline types. Must be kept in step with the definitions
415 of PCRE2_NEWLINE_xx in pcre2.h. */
416 
417 static const char *newlines[] = {
418   "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
419 
420 /* Structure and table for handling pattern conversion types. */
421 
422 typedef struct convertstruct {
423   const char *name;
424   uint32_t option;
425 } convertstruct;
426 
427 static convertstruct convertlist[] = {
428   { "glob",                   PCRE2_CONVERT_GLOB },
429   { "glob_no_starstar",       PCRE2_CONVERT_GLOB_NO_STARSTAR },
430   { "glob_no_wild_separator", PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR },
431   { "posix_basic",            PCRE2_CONVERT_POSIX_BASIC },
432   { "posix_extended",         PCRE2_CONVERT_POSIX_EXTENDED },
433   { "unset",                  CONVERT_UNSET }};
434 
435 #define convertlistcount (sizeof(convertlist)/sizeof(convertstruct))
436 
437 /* Modifier types and applicability */
438 
439 enum { MOD_CTC,    /* Applies to a compile context */
440        MOD_CTM,    /* Applies to a match context */
441        MOD_PAT,    /* Applies to a pattern */
442        MOD_PATP,   /* Ditto, OK for Perl test */
443        MOD_DAT,    /* Applies to a data line */
444        MOD_PD,     /* Applies to a pattern or a data line */
445        MOD_PDP,    /* As MOD_PD, OK for Perl test */
446        MOD_PND,    /* As MOD_PD, but not for a default pattern */
447        MOD_PNDP,   /* As MOD_PND, OK for Perl test */
448        MOD_CHR,    /* Is a single character */
449        MOD_CON,    /* Is a "convert" type/options list */
450        MOD_CTL,    /* Is a control bit */
451        MOD_BSR,    /* Is a BSR value */
452        MOD_IN2,    /* Is one or two unsigned integers */
453        MOD_INS,    /* Is a signed integer */
454        MOD_INT,    /* Is an unsigned integer */
455        MOD_IND,    /* Is an unsigned integer, but no value => default */
456        MOD_NL,     /* Is a newline value */
457        MOD_NN,     /* Is a number or a name; more than one may occur */
458        MOD_OPT,    /* Is an option bit */
459        MOD_SIZ,    /* Is a PCRE2_SIZE value */
460        MOD_STR };  /* Is a string */
461 
462 /* Control bits. Some apply to compiling, some to matching, but some can be set
463 either on a pattern or a data line, so they must all be distinct. There are now
464 so many of them that they are split into two fields. */
465 
466 #define CTL_AFTERTEXT                    0x00000001u
467 #define CTL_ALLAFTERTEXT                 0x00000002u
468 #define CTL_ALLCAPTURES                  0x00000004u
469 #define CTL_ALLUSEDTEXT                  0x00000008u
470 #define CTL_ALTGLOBAL                    0x00000010u
471 #define CTL_BINCODE                      0x00000020u
472 #define CTL_CALLOUT_CAPTURE              0x00000040u
473 #define CTL_CALLOUT_INFO                 0x00000080u
474 #define CTL_CALLOUT_NONE                 0x00000100u
475 #define CTL_DFA                          0x00000200u
476 #define CTL_EXPAND                       0x00000400u
477 #define CTL_FINDLIMITS                   0x00000800u
478 #define CTL_FRAMESIZE                    0x00001000u
479 #define CTL_FULLBINCODE                  0x00002000u
480 #define CTL_GETALL                       0x00004000u
481 #define CTL_GLOBAL                       0x00008000u
482 #define CTL_HEXPAT                       0x00010000u  /* Same word as USE_LENGTH */
483 #define CTL_INFO                         0x00020000u
484 #define CTL_JITFAST                      0x00040000u
485 #define CTL_JITVERIFY                    0x00080000u
486 #define CTL_MARK                         0x00100000u
487 #define CTL_MEMORY                       0x00200000u
488 #define CTL_NULLCONTEXT                  0x00400000u
489 #define CTL_POSIX                        0x00800000u
490 #define CTL_POSIX_NOSUB                  0x01000000u
491 #define CTL_PUSH                         0x02000000u  /* These three must be */
492 #define CTL_PUSHCOPY                     0x04000000u  /*   all in the same */
493 #define CTL_PUSHTABLESCOPY               0x08000000u  /*     word. */
494 #define CTL_STARTCHAR                    0x10000000u
495 #define CTL_USE_LENGTH                   0x20000000u  /* Same word as HEXPAT */
496 #define CTL_UTF8_INPUT                   0x40000000u
497 #define CTL_ZERO_TERMINATE               0x80000000u
498 
499 /* Combinations */
500 
501 #define CTL_DEBUG            (CTL_FULLBINCODE|CTL_INFO)  /* For setting */
502 #define CTL_ANYINFO          (CTL_DEBUG|CTL_BINCODE|CTL_CALLOUT_INFO)
503 #define CTL_ANYGLOB          (CTL_ALTGLOBAL|CTL_GLOBAL)
504 
505 /* Second control word */
506 
507 #define CTL2_SUBSTITUTE_CALLOUT          0x00000001u
508 #define CTL2_SUBSTITUTE_EXTENDED         0x00000002u
509 #define CTL2_SUBSTITUTE_LITERAL          0x00000004u
510 #define CTL2_SUBSTITUTE_MATCHED          0x00000008u
511 #define CTL2_SUBSTITUTE_OVERFLOW_LENGTH  0x00000010u
512 #define CTL2_SUBSTITUTE_REPLACEMENT_ONLY 0x00000020u
513 #define CTL2_SUBSTITUTE_UNKNOWN_UNSET    0x00000040u
514 #define CTL2_SUBSTITUTE_UNSET_EMPTY      0x00000080u
515 #define CTL2_SUBJECT_LITERAL             0x00000100u
516 #define CTL2_CALLOUT_NO_WHERE            0x00000200u
517 #define CTL2_CALLOUT_EXTRA               0x00000400u
518 #define CTL2_ALLVECTOR                   0x00000800u
519 
520 #define CTL2_NL_SET                      0x40000000u  /* Informational */
521 #define CTL2_BSR_SET                     0x80000000u  /* Informational */
522 
523 /* These are the matching controls that may be set either on a pattern or on a
524 data line. They are copied from the pattern controls as initial settings for
525 data line controls. Note that CTL_MEMORY is not included here, because it does
526 different things in the two cases. */
527 
528 #define CTL_ALLPD  (CTL_AFTERTEXT|\
529                     CTL_ALLAFTERTEXT|\
530                     CTL_ALLCAPTURES|\
531                     CTL_ALLUSEDTEXT|\
532                     CTL_ALTGLOBAL|\
533                     CTL_GLOBAL|\
534                     CTL_MARK|\
535                     CTL_STARTCHAR|\
536                     CTL_UTF8_INPUT)
537 
538 #define CTL2_ALLPD (CTL2_SUBSTITUTE_CALLOUT|\
539                     CTL2_SUBSTITUTE_EXTENDED|\
540                     CTL2_SUBSTITUTE_LITERAL|\
541                     CTL2_SUBSTITUTE_MATCHED|\
542                     CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\
543                     CTL2_SUBSTITUTE_REPLACEMENT_ONLY|\
544                     CTL2_SUBSTITUTE_UNKNOWN_UNSET|\
545                     CTL2_SUBSTITUTE_UNSET_EMPTY|\
546                     CTL2_ALLVECTOR)
547 
548 /* Structures for holding modifier information for patterns and subject strings
549 (data). Fields containing modifiers that can be set either for a pattern or a
550 subject must be at the start and in the same order in both cases so that the
551 same offset in the big table below works for both. */
552 
553 typedef struct patctl {       /* Structure for pattern modifiers. */
554   uint32_t  options;          /* Must be in same position as datctl */
555   uint32_t  control;          /* Must be in same position as datctl */
556   uint32_t  control2;         /* Must be in same position as datctl */
557   uint32_t  jitstack;         /* Must be in same position as datctl */
558    uint8_t  replacement[REPLACE_MODSIZE];  /* So must this */
559   uint32_t  substitute_skip;  /* Must be in same position as patctl */
560   uint32_t  substitute_stop;  /* Must be in same position as patctl */
561   uint32_t  jit;
562   uint32_t  stackguard_test;
563   uint32_t  tables_id;
564   uint32_t  convert_type;
565   uint32_t  convert_length;
566   uint32_t  convert_glob_escape;
567   uint32_t  convert_glob_separator;
568   uint32_t  regerror_buffsize;
569    uint8_t  locale[LOCALESIZE];
570 } patctl;
571 
572 #define MAXCPYGET 10
573 #define LENCPYGET 64
574 
575 typedef struct datctl {       /* Structure for data line modifiers. */
576   uint32_t  options;          /* Must be in same position as patctl */
577   uint32_t  control;          /* Must be in same position as patctl */
578   uint32_t  control2;         /* Must be in same position as patctl */
579   uint32_t  jitstack;         /* Must be in same position as patctl */
580    uint8_t  replacement[REPLACE_MODSIZE];  /* So must this */
581   uint32_t  substitute_skip;  /* Must be in same position as patctl */
582   uint32_t  substitute_stop;  /* Must be in same position as patctl */
583   uint32_t  startend[2];
584   uint32_t  cerror[2];
585   uint32_t  cfail[2];
586    int32_t  callout_data;
587    int32_t  copy_numbers[MAXCPYGET];
588    int32_t  get_numbers[MAXCPYGET];
589   uint32_t  oveccount;
590   uint32_t  offset;
591   uint8_t   copy_names[LENCPYGET];
592   uint8_t   get_names[LENCPYGET];
593 } datctl;
594 
595 /* Ids for which context to modify. */
596 
597 enum { CTX_PAT,            /* Active pattern context */
598        CTX_POPPAT,         /* Ditto, for a popped pattern */
599        CTX_DEFPAT,         /* Default pattern context */
600        CTX_DAT,            /* Active data (match) context */
601        CTX_DEFDAT };       /* Default data (match) context */
602 
603 /* Macros to simplify the big table below. */
604 
605 #define CO(name) offsetof(PCRE2_REAL_COMPILE_CONTEXT, name)
606 #define MO(name) offsetof(PCRE2_REAL_MATCH_CONTEXT, name)
607 #define PO(name) offsetof(patctl, name)
608 #define PD(name) PO(name)
609 #define DO(name) offsetof(datctl, name)
610 
611 /* Table of all long-form modifiers. Must be in collating sequence of modifier
612 name because it is searched by binary chop. */
613 
614 typedef struct modstruct {
615   const char   *name;
616   uint16_t      which;
617   uint16_t      type;
618   uint32_t      value;
619   PCRE2_SIZE    offset;
620 } modstruct;
621 
622 static modstruct modlist[] = {
623   { "aftertext",                   MOD_PNDP, MOD_CTL, CTL_AFTERTEXT,              PO(control) },
624   { "allaftertext",                MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT,           PO(control) },
625   { "allcaptures",                 MOD_PND,  MOD_CTL, CTL_ALLCAPTURES,            PO(control) },
626   { "allow_empty_class",           MOD_PAT,  MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS,    PO(options) },
627   { "allow_lookaround_bsk",        MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK, CO(extra_options) },
628   { "allow_surrogate_escapes",     MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES, CO(extra_options) },
629   { "allusedtext",                 MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT,            PO(control) },
630   { "allvector",                   MOD_PND,  MOD_CTL, CTL2_ALLVECTOR,             PO(control2) },
631   { "alt_bsux",                    MOD_PAT,  MOD_OPT, PCRE2_ALT_BSUX,             PO(options) },
632   { "alt_circumflex",              MOD_PAT,  MOD_OPT, PCRE2_ALT_CIRCUMFLEX,       PO(options) },
633   { "alt_verbnames",               MOD_PAT,  MOD_OPT, PCRE2_ALT_VERBNAMES,        PO(options) },
634   { "altglobal",                   MOD_PND,  MOD_CTL, CTL_ALTGLOBAL,              PO(control) },
635   { "anchored",                    MOD_PD,   MOD_OPT, PCRE2_ANCHORED,             PD(options) },
636   { "auto_callout",                MOD_PAT,  MOD_OPT, PCRE2_AUTO_CALLOUT,         PO(options) },
637   { "bad_escape_is_literal",       MOD_CTC,  MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) },
638   { "bincode",                     MOD_PAT,  MOD_CTL, CTL_BINCODE,                PO(control) },
639   { "bsr",                         MOD_CTC,  MOD_BSR, 0,                          CO(bsr_convention) },
640   { "callout_capture",             MOD_DAT,  MOD_CTL, CTL_CALLOUT_CAPTURE,        DO(control) },
641   { "callout_data",                MOD_DAT,  MOD_INS, 0,                          DO(callout_data) },
642   { "callout_error",               MOD_DAT,  MOD_IN2, 0,                          DO(cerror) },
643   { "callout_extra",               MOD_DAT,  MOD_CTL, CTL2_CALLOUT_EXTRA,         DO(control2) },
644   { "callout_fail",                MOD_DAT,  MOD_IN2, 0,                          DO(cfail) },
645   { "callout_info",                MOD_PAT,  MOD_CTL, CTL_CALLOUT_INFO,           PO(control) },
646   { "callout_no_where",            MOD_DAT,  MOD_CTL, CTL2_CALLOUT_NO_WHERE,      DO(control2) },
647   { "callout_none",                MOD_DAT,  MOD_CTL, CTL_CALLOUT_NONE,           DO(control) },
648   { "caseless",                    MOD_PATP, MOD_OPT, PCRE2_CASELESS,             PO(options) },
649   { "convert",                     MOD_PAT,  MOD_CON, 0,                          PO(convert_type) },
650   { "convert_glob_escape",         MOD_PAT,  MOD_CHR, 0,                          PO(convert_glob_escape) },
651   { "convert_glob_separator",      MOD_PAT,  MOD_CHR, 0,                          PO(convert_glob_separator) },
652   { "convert_length",              MOD_PAT,  MOD_INT, 0,                          PO(convert_length) },
653   { "copy",                        MOD_DAT,  MOD_NN,  DO(copy_numbers),           DO(copy_names) },
654   { "copy_matched_subject",        MOD_DAT,  MOD_OPT, PCRE2_COPY_MATCHED_SUBJECT, DO(options) },
655   { "debug",                       MOD_PAT,  MOD_CTL, CTL_DEBUG,                  PO(control) },
656   { "depth_limit",                 MOD_CTM,  MOD_INT, 0,                          MO(depth_limit) },
657   { "dfa",                         MOD_DAT,  MOD_CTL, CTL_DFA,                    DO(control) },
658   { "dfa_restart",                 MOD_DAT,  MOD_OPT, PCRE2_DFA_RESTART,          DO(options) },
659   { "dfa_shortest",                MOD_DAT,  MOD_OPT, PCRE2_DFA_SHORTEST,         DO(options) },
660   { "dollar_endonly",              MOD_PAT,  MOD_OPT, PCRE2_DOLLAR_ENDONLY,       PO(options) },
661   { "dotall",                      MOD_PATP, MOD_OPT, PCRE2_DOTALL,               PO(options) },
662   { "dupnames",                    MOD_PATP, MOD_OPT, PCRE2_DUPNAMES,             PO(options) },
663   { "endanchored",                 MOD_PD,   MOD_OPT, PCRE2_ENDANCHORED,          PD(options) },
664   { "escaped_cr_is_lf",            MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ESCAPED_CR_IS_LF, CO(extra_options) },
665   { "expand",                      MOD_PAT,  MOD_CTL, CTL_EXPAND,                 PO(control) },
666   { "extended",                    MOD_PATP, MOD_OPT, PCRE2_EXTENDED,             PO(options) },
667   { "extended_more",               MOD_PATP, MOD_OPT, PCRE2_EXTENDED_MORE,        PO(options) },
668   { "extra_alt_bsux",              MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ALT_BSUX,       CO(extra_options) },
669   { "find_limits",                 MOD_DAT,  MOD_CTL, CTL_FINDLIMITS,             DO(control) },
670   { "firstline",                   MOD_PAT,  MOD_OPT, PCRE2_FIRSTLINE,            PO(options) },
671   { "framesize",                   MOD_PAT,  MOD_CTL, CTL_FRAMESIZE,              PO(control) },
672   { "fullbincode",                 MOD_PAT,  MOD_CTL, CTL_FULLBINCODE,            PO(control) },
673   { "get",                         MOD_DAT,  MOD_NN,  DO(get_numbers),            DO(get_names) },
674   { "getall",                      MOD_DAT,  MOD_CTL, CTL_GETALL,                 DO(control) },
675   { "global",                      MOD_PNDP, MOD_CTL, CTL_GLOBAL,                 PO(control) },
676   { "heap_limit",                  MOD_CTM,  MOD_INT, 0,                          MO(heap_limit) },
677   { "hex",                         MOD_PAT,  MOD_CTL, CTL_HEXPAT,                 PO(control) },
678   { "info",                        MOD_PAT,  MOD_CTL, CTL_INFO,                   PO(control) },
679   { "jit",                         MOD_PAT,  MOD_IND, 7,                          PO(jit) },
680   { "jitfast",                     MOD_PAT,  MOD_CTL, CTL_JITFAST,                PO(control) },
681   { "jitstack",                    MOD_PNDP, MOD_INT, 0,                          PO(jitstack) },
682   { "jitverify",                   MOD_PAT,  MOD_CTL, CTL_JITVERIFY,              PO(control) },
683   { "literal",                     MOD_PAT,  MOD_OPT, PCRE2_LITERAL,              PO(options) },
684   { "locale",                      MOD_PAT,  MOD_STR, LOCALESIZE,                 PO(locale) },
685   { "mark",                        MOD_PNDP, MOD_CTL, CTL_MARK,                   PO(control) },
686   { "match_invalid_utf",           MOD_PAT,  MOD_OPT, PCRE2_MATCH_INVALID_UTF,    PO(options) },
687   { "match_limit",                 MOD_CTM,  MOD_INT, 0,                          MO(match_limit) },
688   { "match_line",                  MOD_CTC,  MOD_OPT, PCRE2_EXTRA_MATCH_LINE,     CO(extra_options) },
689   { "match_unset_backref",         MOD_PAT,  MOD_OPT, PCRE2_MATCH_UNSET_BACKREF,  PO(options) },
690   { "match_word",                  MOD_CTC,  MOD_OPT, PCRE2_EXTRA_MATCH_WORD,     CO(extra_options) },
691   { "max_pattern_length",          MOD_CTC,  MOD_SIZ, 0,                          CO(max_pattern_length) },
692   { "memory",                      MOD_PD,   MOD_CTL, CTL_MEMORY,                 PD(control) },
693   { "multiline",                   MOD_PATP, MOD_OPT, PCRE2_MULTILINE,            PO(options) },
694   { "never_backslash_c",           MOD_PAT,  MOD_OPT, PCRE2_NEVER_BACKSLASH_C,    PO(options) },
695   { "never_ucp",                   MOD_PAT,  MOD_OPT, PCRE2_NEVER_UCP,            PO(options) },
696   { "never_utf",                   MOD_PAT,  MOD_OPT, PCRE2_NEVER_UTF,            PO(options) },
697   { "newline",                     MOD_CTC,  MOD_NL,  0,                          CO(newline_convention) },
698   { "no_auto_capture",             MOD_PAT,  MOD_OPT, PCRE2_NO_AUTO_CAPTURE,      PO(options) },
699   { "no_auto_possess",             MOD_PATP, MOD_OPT, PCRE2_NO_AUTO_POSSESS,      PO(options) },
700   { "no_dotstar_anchor",           MOD_PAT,  MOD_OPT, PCRE2_NO_DOTSTAR_ANCHOR,    PO(options) },
701   { "no_jit",                      MOD_DAT,  MOD_OPT, PCRE2_NO_JIT,               DO(options) },
702   { "no_start_optimize",           MOD_PATP, MOD_OPT, PCRE2_NO_START_OPTIMIZE,    PO(options) },
703   { "no_utf_check",                MOD_PD,   MOD_OPT, PCRE2_NO_UTF_CHECK,         PD(options) },
704   { "notbol",                      MOD_DAT,  MOD_OPT, PCRE2_NOTBOL,               DO(options) },
705   { "notempty",                    MOD_DAT,  MOD_OPT, PCRE2_NOTEMPTY,             DO(options) },
706   { "notempty_atstart",            MOD_DAT,  MOD_OPT, PCRE2_NOTEMPTY_ATSTART,     DO(options) },
707   { "noteol",                      MOD_DAT,  MOD_OPT, PCRE2_NOTEOL,               DO(options) },
708   { "null_context",                MOD_PD,   MOD_CTL, CTL_NULLCONTEXT,            PO(control) },
709   { "offset",                      MOD_DAT,  MOD_INT, 0,                          DO(offset) },
710   { "offset_limit",                MOD_CTM,  MOD_SIZ, 0,                          MO(offset_limit)},
711   { "ovector",                     MOD_DAT,  MOD_INT, 0,                          DO(oveccount) },
712   { "parens_nest_limit",           MOD_CTC,  MOD_INT, 0,                          CO(parens_nest_limit) },
713   { "partial_hard",                MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_HARD,         DO(options) },
714   { "partial_soft",                MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_SOFT,         DO(options) },
715   { "ph",                          MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_HARD,         DO(options) },
716   { "posix",                       MOD_PAT,  MOD_CTL, CTL_POSIX,                  PO(control) },
717   { "posix_nosub",                 MOD_PAT,  MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB,  PO(control) },
718   { "posix_startend",              MOD_DAT,  MOD_IN2, 0,                          DO(startend) },
719   { "ps",                          MOD_DAT,  MOD_OPT, PCRE2_PARTIAL_SOFT,         DO(options) },
720   { "push",                        MOD_PAT,  MOD_CTL, CTL_PUSH,                   PO(control) },
721   { "pushcopy",                    MOD_PAT,  MOD_CTL, CTL_PUSHCOPY,               PO(control) },
722   { "pushtablescopy",              MOD_PAT,  MOD_CTL, CTL_PUSHTABLESCOPY,         PO(control) },
723   { "recursion_limit",             MOD_CTM,  MOD_INT, 0,                          MO(depth_limit) },  /* Obsolete synonym */
724   { "regerror_buffsize",           MOD_PAT,  MOD_INT, 0,                          PO(regerror_buffsize) },
725   { "replace",                     MOD_PND,  MOD_STR, REPLACE_MODSIZE,            PO(replacement) },
726   { "stackguard",                  MOD_PAT,  MOD_INT, 0,                          PO(stackguard_test) },
727   { "startchar",                   MOD_PND,  MOD_CTL, CTL_STARTCHAR,              PO(control) },
728   { "startoffset",                 MOD_DAT,  MOD_INT, 0,                          DO(offset) },
729   { "subject_literal",             MOD_PATP, MOD_CTL, CTL2_SUBJECT_LITERAL,       PO(control2) },
730   { "substitute_callout",          MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_CALLOUT,    PO(control2) },
731   { "substitute_extended",         MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_EXTENDED,   PO(control2) },
732   { "substitute_literal",          MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_LITERAL,    PO(control2) },
733   { "substitute_matched",          MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_MATCHED,    PO(control2) },
734   { "substitute_overflow_length",  MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) },
735   { "substitute_replacement_only", MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_REPLACEMENT_ONLY, PO(control2) },
736   { "substitute_skip",             MOD_PND,  MOD_INT, 0,                          PO(substitute_skip) },
737   { "substitute_stop",             MOD_PND,  MOD_INT, 0,                          PO(substitute_stop) },
738   { "substitute_unknown_unset",    MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) },
739   { "substitute_unset_empty",      MOD_PND,  MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) },
740   { "tables",                      MOD_PAT,  MOD_INT, 0,                          PO(tables_id) },
741   { "ucp",                         MOD_PATP, MOD_OPT, PCRE2_UCP,                  PO(options) },
742   { "ungreedy",                    MOD_PAT,  MOD_OPT, PCRE2_UNGREEDY,             PO(options) },
743   { "use_length",                  MOD_PAT,  MOD_CTL, CTL_USE_LENGTH,             PO(control) },
744   { "use_offset_limit",            MOD_PAT,  MOD_OPT, PCRE2_USE_OFFSET_LIMIT,     PO(options) },
745   { "utf",                         MOD_PATP, MOD_OPT, PCRE2_UTF,                  PO(options) },
746   { "utf8_input",                  MOD_PAT,  MOD_CTL, CTL_UTF8_INPUT,             PO(control) },
747   { "zero_terminate",              MOD_DAT,  MOD_CTL, CTL_ZERO_TERMINATE,         DO(control) }
748 };
749 
750 #define MODLISTCOUNT sizeof(modlist)/sizeof(modstruct)
751 
752 /* Controls and options that are supported for use with the POSIX interface. */
753 
754 #define POSIX_SUPPORTED_COMPILE_OPTIONS ( \
755   PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_LITERAL|PCRE2_MULTILINE|PCRE2_UCP| \
756   PCRE2_UTF|PCRE2_UNGREEDY)
757 
758 #define POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS (0)
759 
760 #define POSIX_SUPPORTED_COMPILE_CONTROLS ( \
761   CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_HEXPAT|CTL_POSIX| \
762   CTL_POSIX_NOSUB|CTL_USE_LENGTH)
763 
764 #define POSIX_SUPPORTED_COMPILE_CONTROLS2 (0)
765 
766 #define POSIX_SUPPORTED_MATCH_OPTIONS ( \
767   PCRE2_NOTBOL|PCRE2_NOTEMPTY|PCRE2_NOTEOL)
768 
769 #define POSIX_SUPPORTED_MATCH_CONTROLS  (CTL_AFTERTEXT|CTL_ALLAFTERTEXT)
770 #define POSIX_SUPPORTED_MATCH_CONTROLS2 (0)
771 
772 /* Control bits that are not ignored with 'push'. */
773 
774 #define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
775   CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
776   CTL_JITVERIFY|CTL_MEMORY|CTL_FRAMESIZE|CTL_PUSH|CTL_PUSHCOPY| \
777   CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
778 
779 #define PUSH_SUPPORTED_COMPILE_CONTROLS2 (CTL2_BSR_SET|CTL2_NL_SET)
780 
781 /* Controls that apply only at compile time with 'push'. */
782 
783 #define PUSH_COMPILE_ONLY_CONTROLS   CTL_JITVERIFY
784 #define PUSH_COMPILE_ONLY_CONTROLS2  (0)
785 
786 /* Controls that are forbidden with #pop or #popcopy. */
787 
788 #define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \
789   CTL_PUSHCOPY|CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
790 
791 /* Pattern controls that are mutually exclusive. At present these are all in
792 the first control word. Note that CTL_POSIX_NOSUB is always accompanied by
793 CTL_POSIX, so it doesn't need its own entries. */
794 
795 static uint32_t exclusive_pat_controls[] = {
796   CTL_POSIX    | CTL_PUSH,
797   CTL_POSIX    | CTL_PUSHCOPY,
798   CTL_POSIX    | CTL_PUSHTABLESCOPY,
799   CTL_PUSH     | CTL_PUSHCOPY,
800   CTL_PUSH     | CTL_PUSHTABLESCOPY,
801   CTL_PUSHCOPY | CTL_PUSHTABLESCOPY,
802   CTL_EXPAND   | CTL_HEXPAT };
803 
804 /* Data controls that are mutually exclusive. At present these are all in the
805 first control word. */
806 
807 static uint32_t exclusive_dat_controls[] = {
808   CTL_ALLUSEDTEXT | CTL_STARTCHAR,
809   CTL_FINDLIMITS  | CTL_NULLCONTEXT };
810 
811 /* Table of single-character abbreviated modifiers. The index field is
812 initialized to -1, but the first time the modifier is encountered, it is filled
813 in with the index of the full entry in modlist, to save repeated searching when
814 processing multiple test items. This short list is searched serially, so its
815 order does not matter. */
816 
817 typedef struct c1modstruct {
818   const char *fullname;
819   uint32_t    onechar;
820   int         index;
821 } c1modstruct;
822 
823 static c1modstruct c1modlist[] = {
824   { "bincode",         'B',           -1 },
825   { "info",            'I',           -1 },
826   { "global",          'g',           -1 },
827   { "caseless",        'i',           -1 },
828   { "multiline",       'm',           -1 },
829   { "no_auto_capture", 'n',           -1 },
830   { "dotall",          's',           -1 },
831   { "extended",        'x',           -1 }
832 };
833 
834 #define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct)
835 
836 /* Table of arguments for the -C command line option. Use macros to make the
837 table itself easier to read. */
838 
839 #if defined SUPPORT_PCRE2_8
840 #define SUPPORT_8 1
841 #endif
842 #if defined SUPPORT_PCRE2_16
843 #define SUPPORT_16 1
844 #endif
845 #if defined SUPPORT_PCRE2_32
846 #define SUPPORT_32 1
847 #endif
848 
849 #ifndef SUPPORT_8
850 #define SUPPORT_8 0
851 #endif
852 #ifndef SUPPORT_16
853 #define SUPPORT_16 0
854 #endif
855 #ifndef SUPPORT_32
856 #define SUPPORT_32 0
857 #endif
858 
859 #ifdef EBCDIC
860 #define SUPPORT_EBCDIC 1
861 #define EBCDIC_NL CHAR_LF
862 #else
863 #define SUPPORT_EBCDIC 0
864 #define EBCDIC_NL 0
865 #endif
866 
867 #ifdef NEVER_BACKSLASH_C
868 #define BACKSLASH_C 0
869 #else
870 #define BACKSLASH_C 1
871 #endif
872 
873 typedef struct coptstruct {
874   const char *name;
875   uint32_t    type;
876   uint32_t    value;
877 } coptstruct;
878 
879 enum { CONF_BSR,
880        CONF_FIX,
881        CONF_FIZ,
882        CONF_INT,
883        CONF_NL
884 };
885 
886 static coptstruct coptlist[] = {
887   { "backslash-C", CONF_FIX, BACKSLASH_C },
888   { "bsr",         CONF_BSR, PCRE2_CONFIG_BSR },
889   { "ebcdic",      CONF_FIX, SUPPORT_EBCDIC },
890   { "ebcdic-nl",   CONF_FIZ, EBCDIC_NL },
891   { "jit",         CONF_INT, PCRE2_CONFIG_JIT },
892   { "linksize",    CONF_INT, PCRE2_CONFIG_LINKSIZE },
893   { "newline",     CONF_NL,  PCRE2_CONFIG_NEWLINE },
894   { "pcre2-16",    CONF_FIX, SUPPORT_16 },
895   { "pcre2-32",    CONF_FIX, SUPPORT_32 },
896   { "pcre2-8",     CONF_FIX, SUPPORT_8 },
897   { "unicode",     CONF_INT, PCRE2_CONFIG_UNICODE }
898 };
899 
900 #define COPTLISTCOUNT sizeof(coptlist)/sizeof(coptstruct)
901 
902 #undef SUPPORT_8
903 #undef SUPPORT_16
904 #undef SUPPORT_32
905 #undef SUPPORT_EBCDIC
906 
907 
908 /* ----------------------- Static variables ------------------------ */
909 
910 static FILE *infile;
911 static FILE *outfile;
912 
913 static const void *last_callout_mark;
914 static PCRE2_JIT_STACK *jit_stack = NULL;
915 static size_t jit_stack_size = 0;
916 
917 static BOOL first_callout;
918 static BOOL jit_was_used;
919 static BOOL restrict_for_perl_test = FALSE;
920 static BOOL show_memory = FALSE;
921 
922 static int code_unit_size;                    /* Bytes */
923 static int jitrc;                             /* Return from JIT compile */
924 static int test_mode = DEFAULT_TEST_MODE;
925 static int timeit = 0;
926 static int timeitm = 0;
927 
928 clock_t total_compile_time = 0;
929 clock_t total_jit_compile_time = 0;
930 clock_t total_match_time = 0;
931 
932 static uint32_t dfa_matched;
933 static uint32_t forbid_utf = 0;
934 static uint32_t maxlookbehind;
935 static uint32_t max_oveccount;
936 static uint32_t callout_count;
937 static uint32_t maxcapcount;
938 
939 static uint16_t local_newline_default = 0;
940 
941 static VERSION_TYPE jittarget[VERSION_SIZE];
942 static VERSION_TYPE version[VERSION_SIZE];
943 static VERSION_TYPE uversion[VERSION_SIZE];
944 
945 static patctl def_patctl;
946 static patctl pat_patctl;
947 static datctl def_datctl;
948 static datctl dat_datctl;
949 
950 static void *patstack[PATSTACKSIZE];
951 static int patstacknext = 0;
952 
953 static void *malloclist[MALLOCLISTSIZE];
954 static PCRE2_SIZE malloclistlength[MALLOCLISTSIZE];
955 static uint32_t malloclistptr = 0;
956 
957 #ifdef SUPPORT_PCRE2_8
958 static regex_t preg = { NULL, NULL, 0, 0, 0, 0 };
959 #endif
960 
961 static int *dfa_workspace = NULL;
962 static const uint8_t *locale_tables = NULL;
963 static const uint8_t *use_tables = NULL;
964 static uint8_t locale_name[32];
965 static uint8_t *tables3 = NULL;         /* For binary-loaded tables */
966 static uint32_t loadtables_length = 0;
967 
968 /* We need buffers for building 16/32-bit strings; 8-bit strings don't need
969 rebuilding, but set up the same naming scheme for use in macros. The "buffer"
970 buffer is where all input lines are read. Its size is the same as pbuffer8.
971 Pattern lines are always copied to pbuffer8 for use in callouts, even if they
972 are actually compiled from pbuffer16 or pbuffer32. */
973 
974 static size_t    pbuffer8_size  = 50000;        /* Initial size, bytes */
975 static uint8_t  *pbuffer8 = NULL;
976 static uint8_t  *buffer = NULL;
977 
978 /* The dbuffer is where all processed data lines are put. In non-8-bit modes it
979 is cast as needed. For long data lines it grows as necessary. */
980 
981 static size_t dbuffer_size = 1u << 14;    /* Initial size, bytes */
982 static uint8_t *dbuffer = NULL;
983 
984 
985 /* ---------------- Mode-dependent variables -------------------*/
986 
987 #ifdef SUPPORT_PCRE2_8
988 static pcre2_code_8             *compiled_code8;
989 static pcre2_general_context_8  *general_context8, *general_context_copy8;
990 static pcre2_compile_context_8  *pat_context8, *default_pat_context8;
991 static pcre2_convert_context_8  *con_context8, *default_con_context8;
992 static pcre2_match_context_8    *dat_context8, *default_dat_context8;
993 static pcre2_match_data_8       *match_data8;
994 #endif
995 
996 #ifdef SUPPORT_PCRE2_16
997 static pcre2_code_16            *compiled_code16;
998 static pcre2_general_context_16 *general_context16, *general_context_copy16;
999 static pcre2_compile_context_16 *pat_context16, *default_pat_context16;
1000 static pcre2_convert_context_16 *con_context16, *default_con_context16;
1001 static pcre2_match_context_16   *dat_context16, *default_dat_context16;
1002 static pcre2_match_data_16      *match_data16;
1003 static PCRE2_SIZE pbuffer16_size = 0;   /* Set only when needed */
1004 static uint16_t *pbuffer16 = NULL;
1005 #endif
1006 
1007 #ifdef SUPPORT_PCRE2_32
1008 static pcre2_code_32            *compiled_code32;
1009 static pcre2_general_context_32 *general_context32, *general_context_copy32;
1010 static pcre2_compile_context_32 *pat_context32, *default_pat_context32;
1011 static pcre2_convert_context_32 *con_context32, *default_con_context32;
1012 static pcre2_match_context_32   *dat_context32, *default_dat_context32;
1013 static pcre2_match_data_32      *match_data32;
1014 static PCRE2_SIZE pbuffer32_size = 0;   /* Set only when needed */
1015 static uint32_t *pbuffer32 = NULL;
1016 #endif
1017 
1018 
1019 /* ---------------- Macros that work in all modes ----------------- */
1020 
1021 #define CAST8VAR(x) CASTVAR(uint8_t *, x)
1022 #define SET(x,y) SETOP(x,y,=)
1023 #define SETPLUS(x,y) SETOP(x,y,+=)
1024 #define strlen8(x) strlen((char *)x)
1025 
1026 
1027 /* ---------------- Mode-dependent, runtime-testing macros ------------------*/
1028 
1029 /* Define macros for variables and functions that must be selected dynamically
1030 depending on the mode setting (8, 16, 32). These are dependent on which modes
1031 are supported. */
1032 
1033 #if (defined (SUPPORT_PCRE2_8) + defined (SUPPORT_PCRE2_16) + \
1034      defined (SUPPORT_PCRE2_32)) >= 2
1035 
1036 /* ----- All three modes supported ----- */
1037 
1038 #if defined(SUPPORT_PCRE2_8) && defined(SUPPORT_PCRE2_16) && defined(SUPPORT_PCRE2_32)
1039 
1040 #define CASTFLD(t,a,b) ((test_mode == PCRE8_MODE)? (t)(G(a,8)->b) : \
1041   (test_mode == PCRE16_MODE)? (t)(G(a,16)->b) : (t)(G(a,32)->b))
1042 
1043 #define CASTVAR(t,x) ( \
1044   (test_mode == PCRE8_MODE)? (t)G(x,8) : \
1045   (test_mode == PCRE16_MODE)? (t)G(x,16) : (t)G(x,32))
1046 
1047 #define CODE_UNIT(a,b) ( \
1048   (test_mode == PCRE8_MODE)? (uint32_t)(((PCRE2_SPTR8)(a))[b]) : \
1049   (test_mode == PCRE16_MODE)? (uint32_t)(((PCRE2_SPTR16)(a))[b]) : \
1050   (uint32_t)(((PCRE2_SPTR32)(a))[b]))
1051 
1052 #define CONCTXCPY(a,b) \
1053   if (test_mode == PCRE8_MODE) \
1054     memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8)); \
1055   else if (test_mode == PCRE16_MODE) \
1056     memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16)); \
1057   else memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
1058 
1059 #define CONVERT_COPY(a,b,c) \
1060   if (test_mode == PCRE8_MODE) \
1061     memcpy(G(a,8),(char *)b,c); \
1062   else if (test_mode == PCRE16_MODE) \
1063     memcpy(G(a,16),(char *)b,(c)*2); \
1064   else if (test_mode == PCRE32_MODE) \
1065     memcpy(G(a,32),(char *)b,(c)*4)
1066 
1067 #define DATCTXCPY(a,b) \
1068   if (test_mode == PCRE8_MODE) \
1069     memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)); \
1070   else if (test_mode == PCRE16_MODE) \
1071     memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)); \
1072   else memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
1073 
1074 #define FLD(a,b) ((test_mode == PCRE8_MODE)? G(a,8)->b : \
1075   (test_mode == PCRE16_MODE)? G(a,16)->b : G(a,32)->b)
1076 
1077 #define PATCTXCPY(a,b) \
1078   if (test_mode == PCRE8_MODE) \
1079     memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)); \
1080   else if (test_mode == PCRE16_MODE) \
1081     memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)); \
1082   else memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
1083 
1084 #define PCHARS(lv, p, offset, len, utf, f) \
1085   if (test_mode == PCRE32_MODE) \
1086     lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1087   else if (test_mode == PCRE16_MODE) \
1088     lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1089   else \
1090     lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1091 
1092 #define PCHARSV(p, offset, len, utf, f) \
1093   if (test_mode == PCRE32_MODE) \
1094     (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \
1095   else if (test_mode == PCRE16_MODE) \
1096     (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \
1097   else \
1098     (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
1099 
1100 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1101   if (test_mode == PCRE8_MODE) \
1102      a = pcre2_callout_enumerate_8(compiled_code8, \
1103        (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c); \
1104   else if (test_mode == PCRE16_MODE) \
1105      a = pcre2_callout_enumerate_16(compiled_code16, \
1106        (int(*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c); \
1107   else \
1108      a = pcre2_callout_enumerate_32(compiled_code32, \
1109        (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
1110 
1111 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1112   if (test_mode == PCRE8_MODE) \
1113     G(a,8) = pcre2_code_copy_8(b); \
1114   else if (test_mode == PCRE16_MODE) \
1115     G(a,16) = pcre2_code_copy_16(b); \
1116   else \
1117     G(a,32) = pcre2_code_copy_32(b)
1118 
1119 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1120   if (test_mode == PCRE8_MODE) \
1121     a = (void *)pcre2_code_copy_8(G(b,8)); \
1122   else if (test_mode == PCRE16_MODE) \
1123     a = (void *)pcre2_code_copy_16(G(b,16)); \
1124   else \
1125     a = (void *)pcre2_code_copy_32(G(b,32))
1126 
1127 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1128   if (test_mode == PCRE8_MODE) \
1129     a = (void *)pcre2_code_copy_with_tables_8(G(b,8)); \
1130   else if (test_mode == PCRE16_MODE) \
1131     a = (void *)pcre2_code_copy_with_tables_16(G(b,16)); \
1132   else \
1133     a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
1134 
1135 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1136   if (test_mode == PCRE8_MODE) \
1137     G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g); \
1138   else if (test_mode == PCRE16_MODE) \
1139     G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g); \
1140   else \
1141     G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
1142 
1143 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1144   if (test_mode == PCRE8_MODE) pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a); \
1145   else if (test_mode == PCRE16_MODE) pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a); \
1146   else pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
1147 
1148 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1149   if (test_mode == PCRE8_MODE) \
1150     a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j); \
1151   else if (test_mode == PCRE16_MODE) \
1152     a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j); \
1153   else \
1154     a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
1155 
1156 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1157   if (test_mode == PCRE8_MODE) \
1158     r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size)); \
1159   else if (test_mode == PCRE16_MODE) \
1160     r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2)); \
1161   else \
1162     r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
1163 
1164 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1165   if (test_mode == PCRE8_MODE) \
1166     a = pcre2_get_ovector_count_8(G(b,8)); \
1167   else if (test_mode == PCRE16_MODE) \
1168     a = pcre2_get_ovector_count_16(G(b,16)); \
1169   else \
1170     a = pcre2_get_ovector_count_32(G(b,32))
1171 
1172 #define PCRE2_GET_STARTCHAR(a,b) \
1173   if (test_mode == PCRE8_MODE) \
1174     a = pcre2_get_startchar_8(G(b,8)); \
1175   else if (test_mode == PCRE16_MODE) \
1176     a = pcre2_get_startchar_16(G(b,16)); \
1177   else \
1178     a = pcre2_get_startchar_32(G(b,32))
1179 
1180 #define PCRE2_JIT_COMPILE(r,a,b) \
1181   if (test_mode == PCRE8_MODE) r = pcre2_jit_compile_8(G(a,8),b); \
1182   else if (test_mode == PCRE16_MODE) r = pcre2_jit_compile_16(G(a,16),b); \
1183   else r = pcre2_jit_compile_32(G(a,32),b)
1184 
1185 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1186   if (test_mode == PCRE8_MODE) pcre2_jit_free_unused_memory_8(G(a,8)); \
1187   else if (test_mode == PCRE16_MODE) pcre2_jit_free_unused_memory_16(G(a,16)); \
1188   else pcre2_jit_free_unused_memory_32(G(a,32))
1189 
1190 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1191   if (test_mode == PCRE8_MODE) \
1192     a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1193   else if (test_mode == PCRE16_MODE) \
1194     a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1195   else \
1196     a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1197 
1198 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1199   if (test_mode == PCRE8_MODE) \
1200     a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); \
1201   else if (test_mode == PCRE16_MODE) \
1202     a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); \
1203   else \
1204     a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
1205 
1206 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1207   if (test_mode == PCRE8_MODE) \
1208     pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c); \
1209   else if (test_mode == PCRE16_MODE) \
1210     pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c); \
1211   else \
1212     pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
1213 
1214 #define PCRE2_JIT_STACK_FREE(a) \
1215   if (test_mode == PCRE8_MODE) \
1216     pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a); \
1217   else if (test_mode == PCRE16_MODE) \
1218     pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a); \
1219   else \
1220     pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
1221 
1222 #define PCRE2_MAKETABLES(a) \
1223   if (test_mode == PCRE8_MODE) a = pcre2_maketables_8(NULL); \
1224   else if (test_mode == PCRE16_MODE) a = pcre2_maketables_16(NULL); \
1225   else a = pcre2_maketables_32(NULL)
1226 
1227 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1228   if (test_mode == PCRE8_MODE) \
1229     a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h); \
1230   else if (test_mode == PCRE16_MODE) \
1231     a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h); \
1232   else \
1233     a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
1234 
1235 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1236   if (test_mode == PCRE8_MODE) \
1237     G(a,8) = pcre2_match_data_create_8(b,c); \
1238   else if (test_mode == PCRE16_MODE) \
1239     G(a,16) = pcre2_match_data_create_16(b,c); \
1240   else \
1241     G(a,32) = pcre2_match_data_create_32(b,c)
1242 
1243 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1244   if (test_mode == PCRE8_MODE) \
1245     G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c); \
1246   else if (test_mode == PCRE16_MODE) \
1247     G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c); \
1248   else \
1249     G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
1250 
1251 #define PCRE2_MATCH_DATA_FREE(a) \
1252   if (test_mode == PCRE8_MODE) \
1253     pcre2_match_data_free_8(G(a,8)); \
1254   else if (test_mode == PCRE16_MODE) \
1255     pcre2_match_data_free_16(G(a,16)); \
1256   else \
1257     pcre2_match_data_free_32(G(a,32))
1258 
1259 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1260   if (test_mode == PCRE8_MODE) \
1261     a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8)); \
1262   else if (test_mode == PCRE16_MODE) \
1263     a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16)); \
1264   else \
1265     a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
1266 
1267 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1268   if (test_mode == PCRE8_MODE) \
1269     a = pcre2_pattern_info_8(G(b,8),c,d); \
1270   else if (test_mode == PCRE16_MODE) \
1271     a = pcre2_pattern_info_16(G(b,16),c,d); \
1272   else \
1273     a = pcre2_pattern_info_32(G(b,32),c,d)
1274 
1275 #define PCRE2_PRINTINT(a) \
1276   if (test_mode == PCRE8_MODE) \
1277     pcre2_printint_8(compiled_code8,outfile,a); \
1278   else if (test_mode == PCRE16_MODE) \
1279     pcre2_printint_16(compiled_code16,outfile,a); \
1280   else \
1281     pcre2_printint_32(compiled_code32,outfile,a)
1282 
1283 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1284   if (test_mode == PCRE8_MODE) \
1285     r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8)); \
1286   else if (test_mode == PCRE16_MODE) \
1287     r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16)); \
1288   else \
1289     r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
1290 
1291 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1292   if (test_mode == PCRE8_MODE) \
1293     r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8)); \
1294   else if (test_mode == PCRE16_MODE) \
1295     r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16)); \
1296   else \
1297     r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
1298 
1299 #define PCRE2_SERIALIZE_FREE(a) \
1300   if (test_mode == PCRE8_MODE) \
1301     pcre2_serialize_free_8(a); \
1302   else if (test_mode == PCRE16_MODE) \
1303     pcre2_serialize_free_16(a); \
1304   else \
1305     pcre2_serialize_free_32(a)
1306 
1307 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1308   if (test_mode == PCRE8_MODE) \
1309     r = pcre2_serialize_get_number_of_codes_8(a); \
1310   else if (test_mode == PCRE16_MODE) \
1311     r = pcre2_serialize_get_number_of_codes_16(a); \
1312   else \
1313     r = pcre2_serialize_get_number_of_codes_32(a); \
1314 
1315 #define PCRE2_SET_CALLOUT(a,b,c) \
1316   if (test_mode == PCRE8_MODE) \
1317     pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c); \
1318   else if (test_mode == PCRE16_MODE) \
1319     pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c); \
1320   else \
1321     pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
1322 
1323 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1324   if (test_mode == PCRE8_MODE) \
1325     pcre2_set_character_tables_8(G(a,8),b); \
1326   else if (test_mode == PCRE16_MODE) \
1327     pcre2_set_character_tables_16(G(a,16),b); \
1328   else \
1329     pcre2_set_character_tables_32(G(a,32),b)
1330 
1331 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1332   if (test_mode == PCRE8_MODE) \
1333     pcre2_set_compile_recursion_guard_8(G(a,8),b,c); \
1334   else if (test_mode == PCRE16_MODE) \
1335     pcre2_set_compile_recursion_guard_16(G(a,16),b,c); \
1336   else \
1337     pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
1338 
1339 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1340   if (test_mode == PCRE8_MODE) \
1341     pcre2_set_depth_limit_8(G(a,8),b); \
1342   else if (test_mode == PCRE16_MODE) \
1343     pcre2_set_depth_limit_16(G(a,16),b); \
1344   else \
1345     pcre2_set_depth_limit_32(G(a,32),b)
1346 
1347 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1348   if (test_mode == PCRE8_MODE) \
1349     r = pcre2_set_glob_separator_8(G(a,8),b); \
1350   else if (test_mode == PCRE16_MODE) \
1351     r = pcre2_set_glob_separator_16(G(a,16),b); \
1352   else \
1353     r = pcre2_set_glob_separator_32(G(a,32),b)
1354 
1355 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1356   if (test_mode == PCRE8_MODE) \
1357     r = pcre2_set_glob_escape_8(G(a,8),b); \
1358   else if (test_mode == PCRE16_MODE) \
1359     r = pcre2_set_glob_escape_16(G(a,16),b); \
1360   else \
1361     r = pcre2_set_glob_escape_32(G(a,32),b)
1362 
1363 #define PCRE2_SET_HEAP_LIMIT(a,b) \
1364   if (test_mode == PCRE8_MODE) \
1365     pcre2_set_heap_limit_8(G(a,8),b); \
1366   else if (test_mode == PCRE16_MODE) \
1367     pcre2_set_heap_limit_16(G(a,16),b); \
1368   else \
1369     pcre2_set_heap_limit_32(G(a,32),b)
1370 
1371 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1372   if (test_mode == PCRE8_MODE) \
1373     pcre2_set_match_limit_8(G(a,8),b); \
1374   else if (test_mode == PCRE16_MODE) \
1375     pcre2_set_match_limit_16(G(a,16),b); \
1376   else \
1377     pcre2_set_match_limit_32(G(a,32),b)
1378 
1379 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1380   if (test_mode == PCRE8_MODE) \
1381     pcre2_set_max_pattern_length_8(G(a,8),b); \
1382   else if (test_mode == PCRE16_MODE) \
1383     pcre2_set_max_pattern_length_16(G(a,16),b); \
1384   else \
1385     pcre2_set_max_pattern_length_32(G(a,32),b)
1386 
1387 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1388   if (test_mode == PCRE8_MODE) \
1389     pcre2_set_offset_limit_8(G(a,8),b); \
1390   else if (test_mode == PCRE16_MODE) \
1391     pcre2_set_offset_limit_16(G(a,16),b); \
1392   else \
1393     pcre2_set_offset_limit_32(G(a,32),b)
1394 
1395 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1396   if (test_mode == PCRE8_MODE) \
1397     pcre2_set_parens_nest_limit_8(G(a,8),b); \
1398   else if (test_mode == PCRE16_MODE) \
1399     pcre2_set_parens_nest_limit_16(G(a,16),b); \
1400   else \
1401     pcre2_set_parens_nest_limit_32(G(a,32),b)
1402 
1403 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
1404   if (test_mode == PCRE8_MODE) \
1405     pcre2_set_substitute_callout_8(G(a,8), \
1406       (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c); \
1407   else if (test_mode == PCRE16_MODE) \
1408     pcre2_set_substitute_callout_16(G(a,16), \
1409       (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c); \
1410   else \
1411     pcre2_set_substitute_callout_32(G(a,32), \
1412       (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
1413 
1414 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1415   if (test_mode == PCRE8_MODE) \
1416     a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \
1417       (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l); \
1418   else if (test_mode == PCRE16_MODE) \
1419     a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \
1420       (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l); \
1421   else \
1422     a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \
1423       (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
1424 
1425 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1426   if (test_mode == PCRE8_MODE) \
1427     a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e); \
1428   else if (test_mode == PCRE16_MODE) \
1429     a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e); \
1430   else \
1431     a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
1432 
1433 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1434   if (test_mode == PCRE8_MODE) \
1435     a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e); \
1436   else if (test_mode == PCRE16_MODE) \
1437     a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e); \
1438   else \
1439     a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e)
1440 
1441 #define PCRE2_SUBSTRING_FREE(a) \
1442   if (test_mode == PCRE8_MODE) pcre2_substring_free_8((PCRE2_UCHAR8 *)a); \
1443   else if (test_mode == PCRE16_MODE) \
1444     pcre2_substring_free_16((PCRE2_UCHAR16 *)a); \
1445   else pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
1446 
1447 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1448   if (test_mode == PCRE8_MODE) \
1449     a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e); \
1450   else if (test_mode == PCRE16_MODE) \
1451     a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e); \
1452   else \
1453     a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
1454 
1455 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1456   if (test_mode == PCRE8_MODE) \
1457     a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e); \
1458   else if (test_mode == PCRE16_MODE) \
1459     a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e); \
1460   else \
1461     a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
1462 
1463 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1464   if (test_mode == PCRE8_MODE) \
1465     a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d); \
1466   else if (test_mode == PCRE16_MODE) \
1467     a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d); \
1468   else \
1469     a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
1470 
1471 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1472   if (test_mode == PCRE8_MODE) \
1473     a = pcre2_substring_length_bynumber_8(G(b,8),c,d); \
1474   else if (test_mode == PCRE16_MODE) \
1475     a = pcre2_substring_length_bynumber_16(G(b,16),c,d); \
1476   else \
1477     a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
1478 
1479 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1480   if (test_mode == PCRE8_MODE) \
1481     a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d); \
1482   else if (test_mode == PCRE16_MODE) \
1483     a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d); \
1484   else \
1485     a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
1486 
1487 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1488   if (test_mode == PCRE8_MODE) \
1489     pcre2_substring_list_free_8((PCRE2_SPTR8 *)a); \
1490   else if (test_mode == PCRE16_MODE) \
1491     pcre2_substring_list_free_16((PCRE2_SPTR16 *)a); \
1492   else \
1493     pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
1494 
1495 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1496   if (test_mode == PCRE8_MODE) \
1497     a = pcre2_substring_number_from_name_8(G(b,8),G(c,8)); \
1498   else if (test_mode == PCRE16_MODE) \
1499     a = pcre2_substring_number_from_name_16(G(b,16),G(c,16)); \
1500   else \
1501     a = pcre2_substring_number_from_name_32(G(b,32),G(c,32))
1502 
1503 #define PTR(x) ( \
1504   (test_mode == PCRE8_MODE)? (void *)G(x,8) : \
1505   (test_mode == PCRE16_MODE)? (void *)G(x,16) : \
1506   (void *)G(x,32))
1507 
1508 #define SETFLD(x,y,z) \
1509   if (test_mode == PCRE8_MODE) G(x,8)->y = z; \
1510   else if (test_mode == PCRE16_MODE) G(x,16)->y = z; \
1511   else G(x,32)->y = z
1512 
1513 #define SETFLDVEC(x,y,v,z) \
1514   if (test_mode == PCRE8_MODE) G(x,8)->y[v] = z; \
1515   else if (test_mode == PCRE16_MODE) G(x,16)->y[v] = z; \
1516   else G(x,32)->y[v] = z
1517 
1518 #define SETOP(x,y,z) \
1519   if (test_mode == PCRE8_MODE) G(x,8) z y; \
1520   else if (test_mode == PCRE16_MODE) G(x,16) z y; \
1521   else G(x,32) z y
1522 
1523 #define SETCASTPTR(x,y) \
1524   if (test_mode == PCRE8_MODE) \
1525     G(x,8) = (uint8_t *)(y); \
1526   else if (test_mode == PCRE16_MODE) \
1527     G(x,16) = (uint16_t *)(y); \
1528   else \
1529     G(x,32) = (uint32_t *)(y)
1530 
1531 #define STRLEN(p) ((test_mode == PCRE8_MODE)? ((int)strlen((char *)p)) : \
1532   (test_mode == PCRE16_MODE)? ((int)strlen16((PCRE2_SPTR16)p)) : \
1533   ((int)strlen32((PCRE2_SPTR32)p)))
1534 
1535 #define SUB1(a,b) \
1536   if (test_mode == PCRE8_MODE) G(a,8)(G(b,8)); \
1537   else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16)); \
1538   else G(a,32)(G(b,32))
1539 
1540 #define SUB2(a,b,c) \
1541   if (test_mode == PCRE8_MODE) G(a,8)(G(b,8),G(c,8)); \
1542   else if (test_mode == PCRE16_MODE) G(a,16)(G(b,16),G(c,16)); \
1543   else G(a,32)(G(b,32),G(c,32))
1544 
1545 #define TEST(x,r,y) ( \
1546   (test_mode == PCRE8_MODE && G(x,8) r (y)) || \
1547   (test_mode == PCRE16_MODE && G(x,16) r (y)) || \
1548   (test_mode == PCRE32_MODE && G(x,32) r (y)))
1549 
1550 #define TESTFLD(x,f,r,y) ( \
1551   (test_mode == PCRE8_MODE && G(x,8)->f r (y)) || \
1552   (test_mode == PCRE16_MODE && G(x,16)->f r (y)) || \
1553   (test_mode == PCRE32_MODE && G(x,32)->f r (y)))
1554 
1555 
1556 /* ----- Two out of three modes are supported ----- */
1557 
1558 #else
1559 
1560 /* We can use some macro trickery to make a single set of definitions work in
1561 the three different cases. */
1562 
1563 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
1564 
1565 #if defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_16)
1566 #define BITONE 32
1567 #define BITTWO 16
1568 
1569 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
1570 
1571 #elif defined(SUPPORT_PCRE2_32) && defined(SUPPORT_PCRE2_8)
1572 #define BITONE 32
1573 #define BITTWO 8
1574 
1575 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
1576 
1577 #else
1578 #define BITONE 16
1579 #define BITTWO 8
1580 #endif
1581 
1582 
1583 /* ----- Common macros for two-mode cases ----- */
1584 
1585 #define BYTEONE (BITONE/8)
1586 #define BYTETWO (BITTWO/8)
1587 
1588 #define CASTFLD(t,a,b) \
1589   ((test_mode == G(G(PCRE,BITONE),_MODE))? (t)(G(a,BITONE)->b) : \
1590     (t)(G(a,BITTWO)->b))
1591 
1592 #define CASTVAR(t,x) ( \
1593   (test_mode == G(G(PCRE,BITONE),_MODE))? \
1594     (t)G(x,BITONE) : (t)G(x,BITTWO))
1595 
1596 #define CODE_UNIT(a,b) ( \
1597   (test_mode == G(G(PCRE,BITONE),_MODE))? \
1598   (uint32_t)(((G(PCRE2_SPTR,BITONE))(a))[b]) : \
1599   (uint32_t)(((G(PCRE2_SPTR,BITTWO))(a))[b]))
1600 
1601 #define CONCTXCPY(a,b) \
1602   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1603     memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_convert_context_,BITONE))); \
1604   else \
1605     memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_convert_context_,BITTWO)))
1606 
1607 #define CONVERT_COPY(a,b,c) \
1608   (test_mode == G(G(PCRE,BITONE),_MODE))? \
1609   memcpy(G(a,BITONE),(char *)b,(c)*BYTEONE) : \
1610   memcpy(G(a,BITTWO),(char *)b,(c)*BYTETWO)
1611 
1612 #define DATCTXCPY(a,b) \
1613   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1614     memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_match_context_,BITONE))); \
1615   else \
1616     memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_match_context_,BITTWO)))
1617 
1618 #define FLD(a,b) \
1619   ((test_mode == G(G(PCRE,BITONE),_MODE))? G(a,BITONE)->b : G(a,BITTWO)->b)
1620 
1621 #define PATCTXCPY(a,b) \
1622   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1623     memcpy(G(a,BITONE),G(b,BITONE),sizeof(G(pcre2_compile_context_,BITONE))); \
1624   else \
1625     memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_compile_context_,BITTWO)))
1626 
1627 #define PCHARS(lv, p, offset, len, utf, f) \
1628   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1629     lv = G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1630   else \
1631     lv = G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1632 
1633 #define PCHARSV(p, offset, len, utf, f) \
1634   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1635     (void)G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \
1636   else \
1637     (void)G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
1638 
1639 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
1640   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1641      a = G(pcre2_callout_enumerate,BITONE)(G(compiled_code,BITONE), \
1642        (int (*)(struct G(pcre2_callout_enumerate_block_,BITONE) *, void *))b,c); \
1643   else \
1644      a = G(pcre2_callout_enumerate,BITTWO)(G(compiled_code,BITTWO), \
1645        (int (*)(struct G(pcre2_callout_enumerate_block_,BITTWO) *, void *))b,c)
1646 
1647 #define PCRE2_CODE_COPY_FROM_VOID(a,b) \
1648   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1649     G(a,BITONE) = G(pcre2_code_copy_,BITONE)(b); \
1650   else \
1651     G(a,BITTWO) = G(pcre2_code_copy_,BITTWO)(b)
1652 
1653 #define PCRE2_CODE_COPY_TO_VOID(a,b) \
1654   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1655     a = (void *)G(pcre2_code_copy_,BITONE)(G(b,BITONE)); \
1656   else \
1657     a = (void *)G(pcre2_code_copy_,BITTWO)(G(b,BITTWO))
1658 
1659 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
1660   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1661     a = (void *)G(pcre2_code_copy_with_tables_,BITONE)(G(b,BITONE)); \
1662   else \
1663     a = (void *)G(pcre2_code_copy_with_tables_,BITTWO)(G(b,BITTWO))
1664 
1665 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
1666   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1667     G(a,BITONE) = G(pcre2_compile_,BITONE)(G(b,BITONE),c,d,e,f,g); \
1668   else \
1669     G(a,BITTWO) = G(pcre2_compile_,BITTWO)(G(b,BITTWO),c,d,e,f,g)
1670 
1671 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
1672   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1673     G(pcre2_converted_pattern_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1674   else \
1675     G(pcre2_converted_pattern_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1676 
1677 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
1678   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1679     a = G(pcre2_dfa_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1680       G(g,BITONE),h,i,j); \
1681   else \
1682     a = G(pcre2_dfa_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1683       G(g,BITTWO),h,i,j)
1684 
1685 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
1686   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1687     r = G(pcre2_get_error_message_,BITONE)(a,G(b,BITONE),G(G(b,BITONE),_size/BYTEONE)); \
1688   else \
1689     r = G(pcre2_get_error_message_,BITTWO)(a,G(b,BITTWO),G(G(b,BITTWO),_size/BYTETWO))
1690 
1691 #define PCRE2_GET_OVECTOR_COUNT(a,b) \
1692   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1693     a = G(pcre2_get_ovector_count_,BITONE)(G(b,BITONE)); \
1694   else \
1695     a = G(pcre2_get_ovector_count_,BITTWO)(G(b,BITTWO))
1696 
1697 #define PCRE2_GET_STARTCHAR(a,b) \
1698   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1699     a = G(pcre2_get_startchar_,BITONE)(G(b,BITONE)); \
1700   else \
1701     a = G(pcre2_get_startchar_,BITTWO)(G(b,BITTWO))
1702 
1703 #define PCRE2_JIT_COMPILE(r,a,b) \
1704   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1705     r = G(pcre2_jit_compile_,BITONE)(G(a,BITONE),b); \
1706   else \
1707     r = G(pcre2_jit_compile_,BITTWO)(G(a,BITTWO),b)
1708 
1709 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) \
1710   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1711     G(pcre2_jit_free_unused_memory_,BITONE)(G(a,BITONE)); \
1712   else \
1713     G(pcre2_jit_free_unused_memory_,BITTWO)(G(a,BITTWO))
1714 
1715 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
1716   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1717     a = G(pcre2_jit_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1718       G(g,BITONE),h); \
1719   else \
1720     a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1721       G(g,BITTWO),h)
1722 
1723 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
1724   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1725     a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITONE)(b,c,d); \
1726   else \
1727     a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITTWO)(b,c,d); \
1728 
1729 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
1730   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1731     G(pcre2_jit_stack_assign_,BITONE)(G(a,BITONE),(G(pcre2_jit_callback_,BITONE))b,c); \
1732   else \
1733     G(pcre2_jit_stack_assign_,BITTWO)(G(a,BITTWO),(G(pcre2_jit_callback_,BITTWO))b,c);
1734 
1735 #define PCRE2_JIT_STACK_FREE(a) \
1736   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1737     G(pcre2_jit_stack_free_,BITONE)((G(pcre2_jit_stack_,BITONE) *)a); \
1738   else \
1739     G(pcre2_jit_stack_free_,BITTWO)((G(pcre2_jit_stack_,BITTWO) *)a);
1740 
1741 #define PCRE2_MAKETABLES(a) \
1742   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1743     a = G(pcre2_maketables_,BITONE)(NULL); \
1744   else \
1745     a = G(pcre2_maketables_,BITTWO)(NULL)
1746 
1747 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
1748   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1749     a = G(pcre2_match_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1750       G(g,BITONE),h); \
1751   else \
1752     a = G(pcre2_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1753       G(g,BITTWO),h)
1754 
1755 #define PCRE2_MATCH_DATA_CREATE(a,b,c) \
1756   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1757     G(a,BITONE) = G(pcre2_match_data_create_,BITONE)(b,c); \
1758   else \
1759     G(a,BITTWO) = G(pcre2_match_data_create_,BITTWO)(b,c)
1760 
1761 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
1762   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1763     G(a,BITONE) = G(pcre2_match_data_create_from_pattern_,BITONE)(G(b,BITONE),c); \
1764   else \
1765     G(a,BITTWO) = G(pcre2_match_data_create_from_pattern_,BITTWO)(G(b,BITTWO),c)
1766 
1767 #define PCRE2_MATCH_DATA_FREE(a) \
1768   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1769     G(pcre2_match_data_free_,BITONE)(G(a,BITONE)); \
1770   else \
1771     G(pcre2_match_data_free_,BITTWO)(G(a,BITTWO))
1772 
1773 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) \
1774   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1775     a = G(pcre2_pattern_convert_,BITONE)(G(b,BITONE),c,d,(G(PCRE2_UCHAR,BITONE) **)e,f,G(g,BITONE)); \
1776   else \
1777     a = G(pcre2_pattern_convert_,BITTWO)(G(b,BITTWO),c,d,(G(PCRE2_UCHAR,BITTWO) **)e,f,G(g,BITTWO))
1778 
1779 #define PCRE2_PATTERN_INFO(a,b,c,d) \
1780   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1781     a = G(pcre2_pattern_info_,BITONE)(G(b,BITONE),c,d); \
1782   else \
1783     a = G(pcre2_pattern_info_,BITTWO)(G(b,BITTWO),c,d)
1784 
1785 #define PCRE2_PRINTINT(a) \
1786  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1787     G(pcre2_printint_,BITONE)(G(compiled_code,BITONE),outfile,a); \
1788   else \
1789     G(pcre2_printint_,BITTWO)(G(compiled_code,BITTWO),outfile,a)
1790 
1791 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
1792  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1793     r = G(pcre2_serialize_decode_,BITONE)((G(pcre2_code_,BITONE) **)a,b,c,G(d,BITONE)); \
1794   else \
1795     r = G(pcre2_serialize_decode_,BITTWO)((G(pcre2_code_,BITTWO) **)a,b,c,G(d,BITTWO))
1796 
1797 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
1798  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1799     r = G(pcre2_serialize_encode_,BITONE)((G(const pcre2_code_,BITONE) **)a,b,c,d,G(e,BITONE)); \
1800   else \
1801     r = G(pcre2_serialize_encode_,BITTWO)((G(const pcre2_code_,BITTWO) **)a,b,c,d,G(e,BITTWO))
1802 
1803 #define PCRE2_SERIALIZE_FREE(a) \
1804  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1805     G(pcre2_serialize_free_,BITONE)(a); \
1806   else \
1807     G(pcre2_serialize_free_,BITTWO)(a)
1808 
1809 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
1810  if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1811     r = G(pcre2_serialize_get_number_of_codes_,BITONE)(a); \
1812   else \
1813     r = G(pcre2_serialize_get_number_of_codes_,BITTWO)(a)
1814 
1815 #define PCRE2_SET_CALLOUT(a,b,c) \
1816   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1817     G(pcre2_set_callout_,BITONE)(G(a,BITONE), \
1818       (int (*)(G(pcre2_callout_block_,BITONE) *, void *))b,c); \
1819   else \
1820     G(pcre2_set_callout_,BITTWO)(G(a,BITTWO), \
1821       (int (*)(G(pcre2_callout_block_,BITTWO) *, void *))b,c);
1822 
1823 #define PCRE2_SET_CHARACTER_TABLES(a,b) \
1824   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1825     G(pcre2_set_character_tables_,BITONE)(G(a,BITONE),b); \
1826   else \
1827     G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b)
1828 
1829 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
1830   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1831     G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b,c); \
1832   else \
1833     G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b,c)
1834 
1835 #define PCRE2_SET_DEPTH_LIMIT(a,b) \
1836   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1837     G(pcre2_set_depth_limit_,BITONE)(G(a,BITONE),b); \
1838   else \
1839     G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
1840 
1841 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) \
1842   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1843     r = G(pcre2_set_glob_escape_,BITONE)(G(a,BITONE),b); \
1844   else \
1845     r = G(pcre2_set_glob_escape_,BITTWO)(G(a,BITTWO),b)
1846 
1847 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) \
1848   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1849     r = G(pcre2_set_glob_separator_,BITONE)(G(a,BITONE),b); \
1850   else \
1851     r = G(pcre2_set_glob_separator_,BITTWO)(G(a,BITTWO),b)
1852 
1853 #define PCRE2_SET_HEAP_LIMIT(a,b) \
1854   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1855     G(pcre2_set_heap_limit_,BITONE)(G(a,BITONE),b); \
1856   else \
1857     G(pcre2_set_heap_limit_,BITTWO)(G(a,BITTWO),b)
1858 
1859 #define PCRE2_SET_MATCH_LIMIT(a,b) \
1860   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1861     G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
1862   else \
1863     G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b)
1864 
1865 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
1866   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1867     G(pcre2_set_max_pattern_length_,BITONE)(G(a,BITONE),b); \
1868   else \
1869     G(pcre2_set_max_pattern_length_,BITTWO)(G(a,BITTWO),b)
1870 
1871 #define PCRE2_SET_OFFSET_LIMIT(a,b) \
1872   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1873     G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \
1874   else \
1875     G(pcre2_set_offset_limit_,BITTWO)(G(a,BITTWO),b)
1876 
1877 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) \
1878   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1879     G(pcre2_set_parens_nest_limit_,BITONE)(G(a,BITONE),b); \
1880   else \
1881     G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b)
1882 
1883 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
1884   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1885     G(pcre2_set_substitute_callout_,BITONE)(G(a,BITONE), \
1886       (int (*)(G(pcre2_substitute_callout_block_,BITONE) *, void *))b,c); \
1887   else \
1888     G(pcre2_set_substitute_callout_,BITTWO)(G(a,BITTWO), \
1889       (int (*)(G(pcre2_substitute_callout_block_,BITTWO) *, void *))b,c)
1890 
1891 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
1892   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1893     a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
1894       G(g,BITONE),h,(G(PCRE2_SPTR,BITONE))i,j, \
1895       (G(PCRE2_UCHAR,BITONE) *)k,l); \
1896   else \
1897     a = G(pcre2_substitute_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
1898       G(g,BITTWO),h,(G(PCRE2_SPTR,BITTWO))i,j, \
1899       (G(PCRE2_UCHAR,BITTWO) *)k,l)
1900 
1901 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
1902   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1903     a = G(pcre2_substring_copy_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1904       (G(PCRE2_UCHAR,BITONE) *)d,e); \
1905   else \
1906     a = G(pcre2_substring_copy_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1907       (G(PCRE2_UCHAR,BITTWO) *)d,e)
1908 
1909 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
1910   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1911     a = G(pcre2_substring_copy_bynumber_,BITONE)(G(b,BITONE),c,\
1912       (G(PCRE2_UCHAR,BITONE) *)d,e); \
1913   else \
1914     a = G(pcre2_substring_copy_bynumber_,BITTWO)(G(b,BITTWO),c,\
1915       (G(PCRE2_UCHAR,BITTWO) *)d,e)
1916 
1917 #define PCRE2_SUBSTRING_FREE(a) \
1918   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1919     G(pcre2_substring_free_,BITONE)((G(PCRE2_UCHAR,BITONE) *)a); \
1920   else G(pcre2_substring_free_,BITTWO)((G(PCRE2_UCHAR,BITTWO) *)a)
1921 
1922 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
1923   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1924     a = G(pcre2_substring_get_byname_,BITONE)(G(b,BITONE),G(c,BITONE),\
1925       (G(PCRE2_UCHAR,BITONE) **)d,e); \
1926   else \
1927     a = G(pcre2_substring_get_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),\
1928       (G(PCRE2_UCHAR,BITTWO) **)d,e)
1929 
1930 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
1931   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1932     a = G(pcre2_substring_get_bynumber_,BITONE)(G(b,BITONE),c,\
1933       (G(PCRE2_UCHAR,BITONE) **)d,e); \
1934   else \
1935     a = G(pcre2_substring_get_bynumber_,BITTWO)(G(b,BITTWO),c,\
1936       (G(PCRE2_UCHAR,BITTWO) **)d,e)
1937 
1938 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
1939   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1940     a = G(pcre2_substring_length_byname_,BITONE)(G(b,BITONE),G(c,BITONE),d); \
1941   else \
1942     a = G(pcre2_substring_length_byname_,BITTWO)(G(b,BITTWO),G(c,BITTWO),d)
1943 
1944 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
1945   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1946     a = G(pcre2_substring_length_bynumber_,BITONE)(G(b,BITONE),c,d); \
1947   else \
1948     a = G(pcre2_substring_length_bynumber_,BITTWO)(G(b,BITTWO),c,d)
1949 
1950 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
1951   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1952     a = G(pcre2_substring_list_get_,BITONE)(G(b,BITONE), \
1953       (G(PCRE2_UCHAR,BITONE) ***)c,d); \
1954   else \
1955     a = G(pcre2_substring_list_get_,BITTWO)(G(b,BITTWO), \
1956       (G(PCRE2_UCHAR,BITTWO) ***)c,d)
1957 
1958 #define PCRE2_SUBSTRING_LIST_FREE(a) \
1959   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1960     G(pcre2_substring_list_free_,BITONE)((G(PCRE2_SPTR,BITONE) *)a); \
1961   else \
1962     G(pcre2_substring_list_free_,BITTWO)((G(PCRE2_SPTR,BITTWO) *)a)
1963 
1964 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
1965   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1966     a = G(pcre2_substring_number_from_name_,BITONE)(G(b,BITONE),G(c,BITONE)); \
1967   else \
1968     a = G(pcre2_substring_number_from_name_,BITTWO)(G(b,BITTWO),G(c,BITTWO))
1969 
1970 #define PTR(x) ( \
1971   (test_mode == G(G(PCRE,BITONE),_MODE))? (void *)G(x,BITONE) : \
1972   (void *)G(x,BITTWO))
1973 
1974 #define SETFLD(x,y,z) \
1975   if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y = z; \
1976   else G(x,BITTWO)->y = z
1977 
1978 #define SETFLDVEC(x,y,v,z) \
1979   if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE)->y[v] = z; \
1980   else G(x,BITTWO)->y[v] = z
1981 
1982 #define SETOP(x,y,z) \
1983   if (test_mode == G(G(PCRE,BITONE),_MODE)) G(x,BITONE) z y; \
1984   else G(x,BITTWO) z y
1985 
1986 #define SETCASTPTR(x,y) \
1987   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1988     G(x,BITONE) = (G(G(uint,BITONE),_t) *)(y); \
1989   else \
1990     G(x,BITTWO) = (G(G(uint,BITTWO),_t) *)(y)
1991 
1992 #define STRLEN(p) ((test_mode == G(G(PCRE,BITONE),_MODE))? \
1993   G(strlen,BITONE)((G(PCRE2_SPTR,BITONE))p) : \
1994   G(strlen,BITTWO)((G(PCRE2_SPTR,BITTWO))p))
1995 
1996 #define SUB1(a,b) \
1997   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
1998     G(a,BITONE)(G(b,BITONE)); \
1999   else \
2000     G(a,BITTWO)(G(b,BITTWO))
2001 
2002 #define SUB2(a,b,c) \
2003   if (test_mode == G(G(PCRE,BITONE),_MODE)) \
2004     G(a,BITONE))(G(b,BITONE),G(c,BITONE)); \
2005   else \
2006     G(a,BITTWO))(G(b,BITTWO),G(c,BITTWO))
2007 
2008 #define TEST(x,r,y) ( \
2009   (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE) r (y)) || \
2010   (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO) r (y)))
2011 
2012 #define TESTFLD(x,f,r,y) ( \
2013   (test_mode == G(G(PCRE,BITONE),_MODE) && G(x,BITONE)->f r (y)) || \
2014   (test_mode == G(G(PCRE,BITTWO),_MODE) && G(x,BITTWO)->f r (y)))
2015 
2016 
2017 #endif  /* Two out of three modes */
2018 
2019 /* ----- End of cases where more than one mode is supported ----- */
2020 
2021 
2022 /* ----- Only 8-bit mode is supported ----- */
2023 
2024 #elif defined SUPPORT_PCRE2_8
2025 #define CASTFLD(t,a,b) (t)(G(a,8)->b)
2026 #define CASTVAR(t,x) (t)G(x,8)
2027 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR8)(a))[b])
2028 #define CONCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_convert_context_8))
2029 #define CONVERT_COPY(a,b,c) memcpy(G(a,8),(char *)b, c)
2030 #define DATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8))
2031 #define FLD(a,b) G(a,8)->b
2032 #define PATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8))
2033 #define PCHARS(lv, p, offset, len, utf, f) \
2034   lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
2035 #define PCHARSV(p, offset, len, utf, f) \
2036   (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
2037 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2038    a = pcre2_callout_enumerate_8(compiled_code8, \
2039      (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c)
2040 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,8) = pcre2_code_copy_8(b)
2041 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_8(G(b,8))
2042 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_8(G(b,8))
2043 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2044   G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g)
2045 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2046   pcre2_converted_pattern_free_8((PCRE2_UCHAR8 *)a)
2047 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2048   a = pcre2_dfa_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h,i,j)
2049 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2050   r = pcre2_get_error_message_8(a,G(b,8),G(G(b,8),_size))
2051 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_8(G(b,8))
2052 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_8(G(b,8))
2053 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_8(G(a,8),b)
2054 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_8(G(a,8))
2055 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2056   a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
2057 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2058   a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d);
2059 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2060   pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c);
2061 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a);
2062 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_8(NULL)
2063 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2064   a = pcre2_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h)
2065 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,8) = pcre2_match_data_create_8(b,c)
2066 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2067   G(a,8) = pcre2_match_data_create_from_pattern_8(G(b,8),c)
2068 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8))
2069 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8))
2070 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d)
2071 #define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a)
2072 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2073   r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8))
2074 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2075   r = pcre2_serialize_encode_8((const pcre2_code_8 **)a,b,c,d,G(e,8))
2076 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_8(a)
2077 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2078   r = pcre2_serialize_get_number_of_codes_8(a)
2079 #define PCRE2_SET_CALLOUT(a,b,c) \
2080   pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c)
2081 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
2082 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2083   pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
2084 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
2085 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_8(G(a,8),b)
2086 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_8(G(a,8),b)
2087 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b)
2088 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
2089 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
2090 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
2091 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
2092 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2093   pcre2_set_substitute_callout_8(G(a,8), \
2094     (int (*)(pcre2_substitute_callout_block_8 *, void *))b,c)
2095 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2096   a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),h, \
2097     (PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
2098 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2099   a = pcre2_substring_copy_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 *)d,e)
2100 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2101   a = pcre2_substring_copy_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 *)d,e)
2102 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_8((PCRE2_UCHAR8 *)a)
2103 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2104   a = pcre2_substring_get_byname_8(G(b,8),G(c,8),(PCRE2_UCHAR8 **)d,e)
2105 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2106   a = pcre2_substring_get_bynumber_8(G(b,8),c,(PCRE2_UCHAR8 **)d,e)
2107 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2108     a = pcre2_substring_length_byname_8(G(b,8),G(c,8),d)
2109 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2110     a = pcre2_substring_length_bynumber_8(G(b,8),c,d)
2111 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2112   a = pcre2_substring_list_get_8(G(b,8),(PCRE2_UCHAR8 ***)c,d)
2113 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2114   pcre2_substring_list_free_8((PCRE2_SPTR8 *)a)
2115 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2116   a = pcre2_substring_number_from_name_8(G(b,8),G(c,8));
2117 #define PTR(x) (void *)G(x,8)
2118 #define SETFLD(x,y,z) G(x,8)->y = z
2119 #define SETFLDVEC(x,y,v,z) G(x,8)->y[v] = z
2120 #define SETOP(x,y,z) G(x,8) z y
2121 #define SETCASTPTR(x,y) G(x,8) = (uint8_t *)(y)
2122 #define STRLEN(p) (int)strlen((char *)p)
2123 #define SUB1(a,b) G(a,8)(G(b,8))
2124 #define SUB2(a,b,c) G(a,8)(G(b,8),G(c,8))
2125 #define TEST(x,r,y) (G(x,8) r (y))
2126 #define TESTFLD(x,f,r,y) (G(x,8)->f r (y))
2127 
2128 
2129 /* ----- Only 16-bit mode is supported ----- */
2130 
2131 #elif defined SUPPORT_PCRE2_16
2132 #define CASTFLD(t,a,b) (t)(G(a,16)->b)
2133 #define CASTVAR(t,x) (t)G(x,16)
2134 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR16)(a))[b])
2135 #define CONCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_convert_context_16))
2136 #define CONVERT_COPY(a,b,c) memcpy(G(a,16),(char *)b, (c)*2)
2137 #define DATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16))
2138 #define FLD(a,b) G(a,16)->b
2139 #define PATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16))
2140 #define PCHARS(lv, p, offset, len, utf, f) \
2141   lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2142 #define PCHARSV(p, offset, len, utf, f) \
2143   (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
2144 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2145    a = pcre2_callout_enumerate_16(compiled_code16, \
2146      (int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c)
2147 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,16) = pcre2_code_copy_16(b)
2148 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_16(G(b,16))
2149 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_16(G(b,16))
2150 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2151   G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g)
2152 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2153   pcre2_converted_pattern_free_16((PCRE2_UCHAR16 *)a)
2154 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2155   a = pcre2_dfa_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h,i,j)
2156 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2157   r = pcre2_get_error_message_16(a,G(b,16),G(G(b,16),_size/2))
2158 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_16(G(b,16))
2159 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16))
2160 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_16(G(a,16),b)
2161 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16))
2162 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2163   a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2164 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2165   a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d);
2166 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2167   pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c);
2168 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a);
2169 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_16(NULL)
2170 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2171   a = pcre2_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h)
2172 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,16) = pcre2_match_data_create_16(b,c)
2173 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2174   G(a,16) = pcre2_match_data_create_from_pattern_16(G(b,16),c)
2175 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16))
2176 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16))
2177 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_16(G(b,16),c,d)
2178 #define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a)
2179 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2180   r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16))
2181 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2182   r = pcre2_serialize_encode_16((const pcre2_code_16 **)a,b,c,d,G(e,16))
2183 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_16(a)
2184 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2185   r = pcre2_serialize_get_number_of_codes_16(a)
2186 #define PCRE2_SET_CALLOUT(a,b,c) \
2187   pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c);
2188 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
2189 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2190   pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
2191 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
2192 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_16(G(a,16),b)
2193 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_16(G(a,16),b)
2194 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b)
2195 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
2196 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
2197 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
2198 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
2199 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2200   pcre2_set_substitute_callout_16(G(a,16), \
2201     (int (*)(pcre2_substitute_callout_block_16 *, void *))b,c)
2202 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2203   a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),h, \
2204     (PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
2205 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2206   a = pcre2_substring_copy_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 *)d,e)
2207 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2208   a = pcre2_substring_copy_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 *)d,e)
2209 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_16((PCRE2_UCHAR16 *)a)
2210 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2211   a = pcre2_substring_get_byname_16(G(b,16),G(c,16),(PCRE2_UCHAR16 **)d,e)
2212 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2213   a = pcre2_substring_get_bynumber_16(G(b,16),c,(PCRE2_UCHAR16 **)d,e)
2214 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2215     a = pcre2_substring_length_byname_16(G(b,16),G(c,16),d)
2216 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2217     a = pcre2_substring_length_bynumber_16(G(b,16),c,d)
2218 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2219   a = pcre2_substring_list_get_16(G(b,16),(PCRE2_UCHAR16 ***)c,d)
2220 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2221   pcre2_substring_list_free_16((PCRE2_SPTR16 *)a)
2222 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2223   a = pcre2_substring_number_from_name_16(G(b,16),G(c,16));
2224 #define PTR(x) (void *)G(x,16)
2225 #define SETFLD(x,y,z) G(x,16)->y = z
2226 #define SETFLDVEC(x,y,v,z) G(x,16)->y[v] = z
2227 #define SETOP(x,y,z) G(x,16) z y
2228 #define SETCASTPTR(x,y) G(x,16) = (uint16_t *)(y)
2229 #define STRLEN(p) (int)strlen16((PCRE2_SPTR16)p)
2230 #define SUB1(a,b) G(a,16)(G(b,16))
2231 #define SUB2(a,b,c) G(a,16)(G(b,16),G(c,16))
2232 #define TEST(x,r,y) (G(x,16) r (y))
2233 #define TESTFLD(x,f,r,y) (G(x,16)->f r (y))
2234 
2235 
2236 /* ----- Only 32-bit mode is supported ----- */
2237 
2238 #elif defined SUPPORT_PCRE2_32
2239 #define CASTFLD(t,a,b) (t)(G(a,32)->b)
2240 #define CASTVAR(t,x) (t)G(x,32)
2241 #define CODE_UNIT(a,b) (uint32_t)(((PCRE2_SPTR32)(a))[b])
2242 #define CONCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_convert_context_32))
2243 #define CONVERT_COPY(a,b,c) memcpy(G(a,32),(char *)b, (c)*4)
2244 #define DATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32))
2245 #define FLD(a,b) G(a,32)->b
2246 #define PATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32))
2247 #define PCHARS(lv, p, offset, len, utf, f) \
2248   lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2249 #define PCHARSV(p, offset, len, utf, f) \
2250   (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
2251 #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
2252    a = pcre2_callout_enumerate_32(compiled_code32, \
2253      (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
2254 #define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,32) = pcre2_code_copy_32(b)
2255 #define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_32(G(b,32))
2256 #define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
2257 #define PCRE2_COMPILE(a,b,c,d,e,f,g) \
2258   G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
2259 #define PCRE2_CONVERTED_PATTERN_FREE(a) \
2260   pcre2_converted_pattern_free_32((PCRE2_UCHAR32 *)a)
2261 #define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
2262   a = pcre2_dfa_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h,i,j)
2263 #define PCRE2_GET_ERROR_MESSAGE(r,a,b) \
2264   r = pcre2_get_error_message_32(a,G(b,32),G(G(b,32),_size/4))
2265 #define PCRE2_GET_OVECTOR_COUNT(a,b) a = pcre2_get_ovector_count_32(G(b,32))
2266 #define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32))
2267 #define PCRE2_JIT_COMPILE(r,a,b) r = pcre2_jit_compile_32(G(a,32),b)
2268 #define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32))
2269 #define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
2270   a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2271 #define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
2272   a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
2273 #define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
2274   pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
2275 #define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
2276 #define PCRE2_MAKETABLES(a) a = pcre2_maketables_32(NULL)
2277 #define PCRE2_MATCH(a,b,c,d,e,f,g,h) \
2278   a = pcre2_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h)
2279 #define PCRE2_MATCH_DATA_CREATE(a,b,c) G(a,32) = pcre2_match_data_create_32(b,c)
2280 #define PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(a,b,c) \
2281   G(a,32) = pcre2_match_data_create_from_pattern_32(G(b,32),c)
2282 #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32))
2283 #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32))
2284 #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_32(G(b,32),c,d)
2285 #define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a)
2286 #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \
2287   r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32))
2288 #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \
2289   r = pcre2_serialize_encode_32((const pcre2_code_32 **)a,b,c,d,G(e,32))
2290 #define PCRE2_SERIALIZE_FREE(a) pcre2_serialize_free_32(a)
2291 #define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
2292   r = pcre2_serialize_get_number_of_codes_32(a)
2293 #define PCRE2_SET_CALLOUT(a,b,c) \
2294   pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c)
2295 #define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
2296 #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
2297   pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
2298 #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
2299 #define PCRE2_SET_GLOB_ESCAPE(r,a,b) r = pcre2_set_glob_escape_32(G(a,32),b)
2300 #define PCRE2_SET_GLOB_SEPARATOR(r,a,b) r = pcre2_set_glob_separator_32(G(a,32),b)
2301 #define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b)
2302 #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
2303 #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
2304 #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
2305 #define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
2306 #define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
2307   pcre2_set_substitute_callout_32(G(a,32), \
2308     (int (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
2309 #define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
2310   a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),h, \
2311     (PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
2312 #define PCRE2_SUBSTRING_COPY_BYNAME(a,b,c,d,e) \
2313   a = pcre2_substring_copy_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 *)d,e)
2314 #define PCRE2_SUBSTRING_COPY_BYNUMBER(a,b,c,d,e) \
2315   a = pcre2_substring_copy_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 *)d,e);
2316 #define PCRE2_SUBSTRING_FREE(a) pcre2_substring_free_32((PCRE2_UCHAR32 *)a)
2317 #define PCRE2_SUBSTRING_GET_BYNAME(a,b,c,d,e) \
2318   a = pcre2_substring_get_byname_32(G(b,32),G(c,32),(PCRE2_UCHAR32 **)d,e)
2319 #define PCRE2_SUBSTRING_GET_BYNUMBER(a,b,c,d,e) \
2320   a = pcre2_substring_get_bynumber_32(G(b,32),c,(PCRE2_UCHAR32 **)d,e)
2321 #define PCRE2_SUBSTRING_LENGTH_BYNAME(a,b,c,d) \
2322     a = pcre2_substring_length_byname_32(G(b,32),G(c,32),d)
2323 #define PCRE2_SUBSTRING_LENGTH_BYNUMBER(a,b,c,d) \
2324     a = pcre2_substring_length_bynumber_32(G(b,32),c,d)
2325 #define PCRE2_SUBSTRING_LIST_GET(a,b,c,d) \
2326   a = pcre2_substring_list_get_32(G(b,32),(PCRE2_UCHAR32 ***)c,d)
2327 #define PCRE2_SUBSTRING_LIST_FREE(a) \
2328   pcre2_substring_list_free_32((PCRE2_SPTR32 *)a)
2329 #define PCRE2_SUBSTRING_NUMBER_FROM_NAME(a,b,c) \
2330   a = pcre2_substring_number_from_name_32(G(b,32),G(c,32));
2331 #define PTR(x) (void *)G(x,32)
2332 #define SETFLD(x,y,z) G(x,32)->y = z
2333 #define SETFLDVEC(x,y,v,z) G(x,32)->y[v] = z
2334 #define SETOP(x,y,z) G(x,32) z y
2335 #define SETCASTPTR(x,y) G(x,32) = (uint32_t *)(y)
2336 #define STRLEN(p) (int)strlen32((PCRE2_SPTR32)p)
2337 #define SUB1(a,b) G(a,32)(G(b,32))
2338 #define SUB2(a,b,c) G(a,32)(G(b,32),G(c,32))
2339 #define TEST(x,r,y) (G(x,32) r (y))
2340 #define TESTFLD(x,f,r,y) (G(x,32)->f r (y))
2341 
2342 #endif
2343 
2344 /* ----- End of mode-specific function call macros ----- */
2345 
2346 
2347 
2348 
2349 /*************************************************
2350 *         Alternate character tables             *
2351 *************************************************/
2352 
2353 /* By default, the "tables" pointer in the compile context when calling
2354 pcre2_compile() is not set (= NULL), thereby using the default tables of the
2355 library. However, the tables modifier can be used to select alternate sets of
2356 tables, for different kinds of testing. Note that the locale modifier also
2357 adjusts the tables. */
2358 
2359 /* This is the set of tables distributed as default with PCRE2. It recognizes
2360 only ASCII characters. */
2361 
2362 static const uint8_t tables1[] = {
2363 
2364 /* This table is a lower casing table. */
2365 
2366     0,  1,  2,  3,  4,  5,  6,  7,
2367     8,  9, 10, 11, 12, 13, 14, 15,
2368    16, 17, 18, 19, 20, 21, 22, 23,
2369    24, 25, 26, 27, 28, 29, 30, 31,
2370    32, 33, 34, 35, 36, 37, 38, 39,
2371    40, 41, 42, 43, 44, 45, 46, 47,
2372    48, 49, 50, 51, 52, 53, 54, 55,
2373    56, 57, 58, 59, 60, 61, 62, 63,
2374    64, 97, 98, 99,100,101,102,103,
2375   104,105,106,107,108,109,110,111,
2376   112,113,114,115,116,117,118,119,
2377   120,121,122, 91, 92, 93, 94, 95,
2378    96, 97, 98, 99,100,101,102,103,
2379   104,105,106,107,108,109,110,111,
2380   112,113,114,115,116,117,118,119,
2381   120,121,122,123,124,125,126,127,
2382   128,129,130,131,132,133,134,135,
2383   136,137,138,139,140,141,142,143,
2384   144,145,146,147,148,149,150,151,
2385   152,153,154,155,156,157,158,159,
2386   160,161,162,163,164,165,166,167,
2387   168,169,170,171,172,173,174,175,
2388   176,177,178,179,180,181,182,183,
2389   184,185,186,187,188,189,190,191,
2390   192,193,194,195,196,197,198,199,
2391   200,201,202,203,204,205,206,207,
2392   208,209,210,211,212,213,214,215,
2393   216,217,218,219,220,221,222,223,
2394   224,225,226,227,228,229,230,231,
2395   232,233,234,235,236,237,238,239,
2396   240,241,242,243,244,245,246,247,
2397   248,249,250,251,252,253,254,255,
2398 
2399 /* This table is a case flipping table. */
2400 
2401     0,  1,  2,  3,  4,  5,  6,  7,
2402     8,  9, 10, 11, 12, 13, 14, 15,
2403    16, 17, 18, 19, 20, 21, 22, 23,
2404    24, 25, 26, 27, 28, 29, 30, 31,
2405    32, 33, 34, 35, 36, 37, 38, 39,
2406    40, 41, 42, 43, 44, 45, 46, 47,
2407    48, 49, 50, 51, 52, 53, 54, 55,
2408    56, 57, 58, 59, 60, 61, 62, 63,
2409    64, 97, 98, 99,100,101,102,103,
2410   104,105,106,107,108,109,110,111,
2411   112,113,114,115,116,117,118,119,
2412   120,121,122, 91, 92, 93, 94, 95,
2413    96, 65, 66, 67, 68, 69, 70, 71,
2414    72, 73, 74, 75, 76, 77, 78, 79,
2415    80, 81, 82, 83, 84, 85, 86, 87,
2416    88, 89, 90,123,124,125,126,127,
2417   128,129,130,131,132,133,134,135,
2418   136,137,138,139,140,141,142,143,
2419   144,145,146,147,148,149,150,151,
2420   152,153,154,155,156,157,158,159,
2421   160,161,162,163,164,165,166,167,
2422   168,169,170,171,172,173,174,175,
2423   176,177,178,179,180,181,182,183,
2424   184,185,186,187,188,189,190,191,
2425   192,193,194,195,196,197,198,199,
2426   200,201,202,203,204,205,206,207,
2427   208,209,210,211,212,213,214,215,
2428   216,217,218,219,220,221,222,223,
2429   224,225,226,227,228,229,230,231,
2430   232,233,234,235,236,237,238,239,
2431   240,241,242,243,244,245,246,247,
2432   248,249,250,251,252,253,254,255,
2433 
2434 /* This table contains bit maps for various character classes. Each map is 32
2435 bytes long and the bits run from the least significant end of each byte. The
2436 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
2437 graph, print, punct, and cntrl. Other classes are built from combinations. */
2438 
2439   0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
2440   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2441   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2442   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2443 
2444   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2445   0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
2446   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2447   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2448 
2449   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2450   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2451   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2452   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2453 
2454   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2455   0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
2456   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2457   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2458 
2459   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2460   0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
2461   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2462   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2463 
2464   0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
2465   0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
2466   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2467   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2468 
2469   0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
2470   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2471   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2472   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2473 
2474   0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
2475   0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
2476   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2477   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2478 
2479   0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
2480   0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
2481   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2482   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2483 
2484   0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
2485   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
2486   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2487   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
2488 
2489 /* This table identifies various classes of character by individual bits:
2490   0x01   white space character
2491   0x02   letter
2492   0x04   decimal digit
2493   0x08   hexadecimal digit
2494   0x10   alphanumeric or '_'
2495   0x80   regular expression metacharacter or binary zero
2496 */
2497 
2498   0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
2499   0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /*   8- 15 */
2500   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
2501   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
2502   0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
2503   0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
2504   0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
2505   0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
2506   0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
2507   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
2508   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
2509   0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
2510   0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
2511   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
2512   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
2513   0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
2514   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
2515   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
2516   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
2517   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
2518   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
2519   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
2520   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
2521   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
2522   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
2523   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
2524   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
2525   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
2526   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
2527   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
2528   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
2529   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
2530 
2531 /* This is a set of tables that came originally from a Windows user. It seems
2532 to be at least an approximation of ISO 8859. In particular, there are
2533 characters greater than 128 that are marked as spaces, letters, etc. */
2534 
2535 static const uint8_t tables2[] = {
2536 0,1,2,3,4,5,6,7,
2537 8,9,10,11,12,13,14,15,
2538 16,17,18,19,20,21,22,23,
2539 24,25,26,27,28,29,30,31,
2540 32,33,34,35,36,37,38,39,
2541 40,41,42,43,44,45,46,47,
2542 48,49,50,51,52,53,54,55,
2543 56,57,58,59,60,61,62,63,
2544 64,97,98,99,100,101,102,103,
2545 104,105,106,107,108,109,110,111,
2546 112,113,114,115,116,117,118,119,
2547 120,121,122,91,92,93,94,95,
2548 96,97,98,99,100,101,102,103,
2549 104,105,106,107,108,109,110,111,
2550 112,113,114,115,116,117,118,119,
2551 120,121,122,123,124,125,126,127,
2552 128,129,130,131,132,133,134,135,
2553 136,137,138,139,140,141,142,143,
2554 144,145,146,147,148,149,150,151,
2555 152,153,154,155,156,157,158,159,
2556 160,161,162,163,164,165,166,167,
2557 168,169,170,171,172,173,174,175,
2558 176,177,178,179,180,181,182,183,
2559 184,185,186,187,188,189,190,191,
2560 224,225,226,227,228,229,230,231,
2561 232,233,234,235,236,237,238,239,
2562 240,241,242,243,244,245,246,215,
2563 248,249,250,251,252,253,254,223,
2564 224,225,226,227,228,229,230,231,
2565 232,233,234,235,236,237,238,239,
2566 240,241,242,243,244,245,246,247,
2567 248,249,250,251,252,253,254,255,
2568 0,1,2,3,4,5,6,7,
2569 8,9,10,11,12,13,14,15,
2570 16,17,18,19,20,21,22,23,
2571 24,25,26,27,28,29,30,31,
2572 32,33,34,35,36,37,38,39,
2573 40,41,42,43,44,45,46,47,
2574 48,49,50,51,52,53,54,55,
2575 56,57,58,59,60,61,62,63,
2576 64,97,98,99,100,101,102,103,
2577 104,105,106,107,108,109,110,111,
2578 112,113,114,115,116,117,118,119,
2579 120,121,122,91,92,93,94,95,
2580 96,65,66,67,68,69,70,71,
2581 72,73,74,75,76,77,78,79,
2582 80,81,82,83,84,85,86,87,
2583 88,89,90,123,124,125,126,127,
2584 128,129,130,131,132,133,134,135,
2585 136,137,138,139,140,141,142,143,
2586 144,145,146,147,148,149,150,151,
2587 152,153,154,155,156,157,158,159,
2588 160,161,162,163,164,165,166,167,
2589 168,169,170,171,172,173,174,175,
2590 176,177,178,179,180,181,182,183,
2591 184,185,186,187,188,189,190,191,
2592 224,225,226,227,228,229,230,231,
2593 232,233,234,235,236,237,238,239,
2594 240,241,242,243,244,245,246,215,
2595 248,249,250,251,252,253,254,223,
2596 192,193,194,195,196,197,198,199,
2597 200,201,202,203,204,205,206,207,
2598 208,209,210,211,212,213,214,247,
2599 216,217,218,219,220,221,222,255,
2600 0,62,0,0,1,0,0,0,
2601 0,0,0,0,0,0,0,0,
2602 32,0,0,0,1,0,0,0,
2603 0,0,0,0,0,0,0,0,
2604 0,0,0,0,0,0,255,3,
2605 126,0,0,0,126,0,0,0,
2606 0,0,0,0,0,0,0,0,
2607 0,0,0,0,0,0,0,0,
2608 0,0,0,0,0,0,255,3,
2609 0,0,0,0,0,0,0,0,
2610 0,0,0,0,0,0,12,2,
2611 0,0,0,0,0,0,0,0,
2612 0,0,0,0,0,0,0,0,
2613 254,255,255,7,0,0,0,0,
2614 0,0,0,0,0,0,0,0,
2615 255,255,127,127,0,0,0,0,
2616 0,0,0,0,0,0,0,0,
2617 0,0,0,0,254,255,255,7,
2618 0,0,0,0,0,4,32,4,
2619 0,0,0,128,255,255,127,255,
2620 0,0,0,0,0,0,255,3,
2621 254,255,255,135,254,255,255,7,
2622 0,0,0,0,0,4,44,6,
2623 255,255,127,255,255,255,127,255,
2624 0,0,0,0,254,255,255,255,
2625 255,255,255,255,255,255,255,127,
2626 0,0,0,0,254,255,255,255,
2627 255,255,255,255,255,255,255,255,
2628 0,2,0,0,255,255,255,255,
2629 255,255,255,255,255,255,255,127,
2630 0,0,0,0,255,255,255,255,
2631 255,255,255,255,255,255,255,255,
2632 0,0,0,0,254,255,0,252,
2633 1,0,0,248,1,0,0,120,
2634 0,0,0,0,254,255,255,255,
2635 0,0,128,0,0,0,128,0,
2636 255,255,255,255,0,0,0,0,
2637 0,0,0,0,0,0,0,128,
2638 255,255,255,255,0,0,0,0,
2639 0,0,0,0,0,0,0,0,
2640 128,0,0,0,0,0,0,0,
2641 0,1,1,0,1,1,0,0,
2642 0,0,0,0,0,0,0,0,
2643 0,0,0,0,0,0,0,0,
2644 1,0,0,0,128,0,0,0,
2645 128,128,128,128,0,0,128,0,
2646 28,28,28,28,28,28,28,28,
2647 28,28,0,0,0,0,0,128,
2648 0,26,26,26,26,26,26,18,
2649 18,18,18,18,18,18,18,18,
2650 18,18,18,18,18,18,18,18,
2651 18,18,18,128,128,0,128,16,
2652 0,26,26,26,26,26,26,18,
2653 18,18,18,18,18,18,18,18,
2654 18,18,18,18,18,18,18,18,
2655 18,18,18,128,128,0,0,0,
2656 0,0,0,0,0,1,0,0,
2657 0,0,0,0,0,0,0,0,
2658 0,0,0,0,0,0,0,0,
2659 0,0,0,0,0,0,0,0,
2660 1,0,0,0,0,0,0,0,
2661 0,0,18,0,0,0,0,0,
2662 0,0,20,20,0,18,0,0,
2663 0,20,18,0,0,0,0,0,
2664 18,18,18,18,18,18,18,18,
2665 18,18,18,18,18,18,18,18,
2666 18,18,18,18,18,18,18,0,
2667 18,18,18,18,18,18,18,18,
2668 18,18,18,18,18,18,18,18,
2669 18,18,18,18,18,18,18,18,
2670 18,18,18,18,18,18,18,0,
2671 18,18,18,18,18,18,18,18
2672 };
2673 
2674 
2675 
2676 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
2677 /*************************************************
2678 *    Emulated memmove() for systems without it   *
2679 *************************************************/
2680 
2681 /* This function can make use of bcopy() if it is available. Otherwise do it by
2682 steam, as there are some non-Unix environments that lack both memmove() and
2683 bcopy(). */
2684 
2685 static void *
emulated_memmove(void * d,const void * s,size_t n)2686 emulated_memmove(void *d, const void *s, size_t n)
2687 {
2688 #ifdef HAVE_BCOPY
2689 bcopy(s, d, n);
2690 return d;
2691 #else
2692 size_t i;
2693 unsigned char *dest = (unsigned char *)d;
2694 const unsigned char *src = (const unsigned char *)s;
2695 if (dest > src)
2696   {
2697   dest += n;
2698   src += n;
2699   for (i = 0; i < n; ++i) *(--dest) = *(--src);
2700   return (void *)dest;
2701   }
2702 else
2703   {
2704   for (i = 0; i < n; ++i) *dest++ = *src++;
2705   return (void *)(dest - n);
2706   }
2707 #endif   /* not HAVE_BCOPY */
2708 }
2709 #undef memmove
2710 #define memmove(d,s,n) emulated_memmove(d,s,n)
2711 #endif   /* not VPCOMPAT && not HAVE_MEMMOVE */
2712 
2713 
2714 
2715 #ifndef HAVE_STRERROR
2716 /*************************************************
2717 *     Provide strerror() for non-ANSI libraries  *
2718 *************************************************/
2719 
2720 /* Some old-fashioned systems (e.g. SunOS4) didn't have strerror() in their
2721 libraries. They may no longer be around, but just in case, we can try to
2722 provide the same facility by this simple alternative function. */
2723 
2724 extern int   sys_nerr;
2725 extern char *sys_errlist[];
2726 
2727 char *
strerror(int n)2728 strerror(int n)
2729 {
2730 if (n < 0 || n >= sys_nerr) return "unknown error number";
2731 return sys_errlist[n];
2732 }
2733 #endif /* HAVE_STRERROR */
2734 
2735 
2736 
2737 /*************************************************
2738 *            Local memory functions              *
2739 *************************************************/
2740 
2741 /* Alternative memory functions, to test functionality. */
2742 
my_malloc(PCRE2_SIZE size,void * data)2743 static void *my_malloc(PCRE2_SIZE size, void *data)
2744 {
2745 void *block = malloc(size);
2746 (void)data;
2747 if (show_memory)
2748   {
2749   if (block == NULL)
2750     {
2751     fprintf(outfile, "** malloc() failed for %" SIZ_FORM "\n", size);
2752     }
2753   else
2754     {
2755     fprintf(outfile, "malloc  %5" SIZ_FORM, size);
2756 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2757     fprintf(outfile, " %p", block);   /* Not portable */
2758 #endif
2759     if (malloclistptr < MALLOCLISTSIZE)
2760       {
2761       malloclist[malloclistptr] = block;
2762       malloclistlength[malloclistptr++] = size;
2763       }
2764     else
2765       fprintf(outfile, " (not remembered)");
2766     fprintf(outfile, "\n");
2767     }
2768   }
2769 return block;
2770 }
2771 
my_free(void * block,void * data)2772 static void my_free(void *block, void *data)
2773 {
2774 (void)data;
2775 if (show_memory)
2776   {
2777   uint32_t i, j;
2778   BOOL found = FALSE;
2779 
2780   fprintf(outfile, "free");
2781   for (i = 0; i < malloclistptr; i++)
2782     {
2783     if (block == malloclist[i])
2784       {
2785       fprintf(outfile, "    %5" SIZ_FORM, malloclistlength[i]);
2786       malloclistptr--;
2787       for (j = i; j < malloclistptr; j++)
2788         {
2789         malloclist[j] = malloclist[j+1];
2790         malloclistlength[j] = malloclistlength[j+1];
2791         }
2792       found = TRUE;
2793       break;
2794       }
2795     }
2796   if (!found) fprintf(outfile, " unremembered block");
2797 #ifdef DEBUG_SHOW_MALLOC_ADDRESSES
2798   fprintf(outfile, " %p", block);  /* Not portable */
2799 #endif
2800   fprintf(outfile, "\n");
2801   }
2802 free(block);
2803 }
2804 
2805 
2806 
2807 /*************************************************
2808 *       Callback function for stack guard        *
2809 *************************************************/
2810 
2811 /* This is set up to be called from pcre2_compile() when the stackguard=n
2812 modifier sets a value greater than zero. The test we do is whether the
2813 parenthesis nesting depth is greater than the value set by the modifier.
2814 
2815 Argument:  the current parenthesis nesting depth
2816 Returns:   non-zero to kill the compilation
2817 */
2818 
2819 static int
stack_guard(uint32_t depth,void * user_data)2820 stack_guard(uint32_t depth, void *user_data)
2821 {
2822 (void)user_data;
2823 return depth > pat_patctl.stackguard_test;
2824 }
2825 
2826 
2827 /*************************************************
2828 *         JIT memory callback                    *
2829 *************************************************/
2830 
2831 static PCRE2_JIT_STACK*
jit_callback(void * arg)2832 jit_callback(void *arg)
2833 {
2834 jit_was_used = TRUE;
2835 return (PCRE2_JIT_STACK *)arg;
2836 }
2837 
2838 
2839 /*************************************************
2840 *      Convert UTF-8 character to code point     *
2841 *************************************************/
2842 
2843 /* This function reads one or more bytes that represent a UTF-8 character,
2844 and returns the codepoint of that character. Note that the function supports
2845 the original UTF-8 definition of RFC 2279, allowing for values in the range 0
2846 to 0x7fffffff, up to 6 bytes long. This makes it possible to generate
2847 codepoints greater than 0x10ffff which are useful for testing PCRE2's error
2848 checking, and also for generating 32-bit non-UTF data values above the UTF
2849 limit.
2850 
2851 Argument:
2852   utf8bytes   a pointer to the byte vector
2853   vptr        a pointer to an int to receive the value
2854 
2855 Returns:      >  0 => the number of bytes consumed
2856               -6 to 0 => malformed UTF-8 character at offset = (-return)
2857 */
2858 
2859 static int
utf82ord(PCRE2_SPTR8 utf8bytes,uint32_t * vptr)2860 utf82ord(PCRE2_SPTR8 utf8bytes, uint32_t *vptr)
2861 {
2862 uint32_t c = *utf8bytes++;
2863 uint32_t d = c;
2864 int i, j, s;
2865 
2866 for (i = -1; i < 6; i++)               /* i is number of additional bytes */
2867   {
2868   if ((d & 0x80) == 0) break;
2869   d <<= 1;
2870   }
2871 
2872 if (i == -1) { *vptr = c; return 1; }  /* ascii character */
2873 if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
2874 
2875 /* i now has a value in the range 1-5 */
2876 
2877 s = 6*i;
2878 d = (c & utf8_table3[i]) << s;
2879 
2880 for (j = 0; j < i; j++)
2881   {
2882   c = *utf8bytes++;
2883   if ((c & 0xc0) != 0x80) return -(j+1);
2884   s -= 6;
2885   d |= (c & 0x3f) << s;
2886   }
2887 
2888 /* Check that encoding was the correct unique one */
2889 
2890 for (j = 0; j < utf8_table1_size; j++)
2891   if (d <= (uint32_t)utf8_table1[j]) break;
2892 if (j != i) return -(i+1);
2893 
2894 /* Valid value */
2895 
2896 *vptr = d;
2897 return i+1;
2898 }
2899 
2900 
2901 
2902 /*************************************************
2903 *             Print one character                *
2904 *************************************************/
2905 
2906 /* Print a single character either literally, or as a hex escape, and count how
2907 many printed characters are used.
2908 
2909 Arguments:
2910   c            the character
2911   utf          TRUE in UTF mode
2912   f            the FILE to print to, or NULL just to count characters
2913 
2914 Returns:       number of characters written
2915 */
2916 
2917 static int
pchar(uint32_t c,BOOL utf,FILE * f)2918 pchar(uint32_t c, BOOL utf, FILE *f)
2919 {
2920 int n = 0;
2921 char tempbuffer[16];
2922 
2923 if (PRINTOK(c))
2924   {
2925   if (f != NULL) fprintf(f, "%c", c);
2926   return 1;
2927   }
2928 
2929 if (c < 0x100)
2930   {
2931   if (utf)
2932     {
2933     if (f != NULL) fprintf(f, "\\x{%02x}", c);
2934     return 6;
2935     }
2936   else
2937     {
2938     if (f != NULL) fprintf(f, "\\x%02x", c);
2939     return 4;
2940     }
2941   }
2942 
2943 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2944   else n = sprintf(tempbuffer, "\\x{%02x}", c);
2945 
2946 return n >= 0 ? n : 0;
2947 }
2948 
2949 
2950 
2951 #ifdef SUPPORT_PCRE2_16
2952 /*************************************************
2953 *    Find length of 0-terminated 16-bit string   *
2954 *************************************************/
2955 
strlen16(PCRE2_SPTR16 p)2956 static size_t strlen16(PCRE2_SPTR16 p)
2957 {
2958 PCRE2_SPTR16 pp = p;
2959 while (*pp != 0) pp++;
2960 return (int)(pp - p);
2961 }
2962 #endif  /* SUPPORT_PCRE2_16 */
2963 
2964 
2965 
2966 #ifdef SUPPORT_PCRE2_32
2967 /*************************************************
2968 *    Find length of 0-terminated 32-bit string   *
2969 *************************************************/
2970 
strlen32(PCRE2_SPTR32 p)2971 static size_t strlen32(PCRE2_SPTR32 p)
2972 {
2973 PCRE2_SPTR32 pp = p;
2974 while (*pp != 0) pp++;
2975 return (int)(pp - p);
2976 }
2977 #endif  /* SUPPORT_PCRE2_32 */
2978 
2979 
2980 #ifdef SUPPORT_PCRE2_8
2981 /*************************************************
2982 *         Print 8-bit character string           *
2983 *************************************************/
2984 
2985 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2986 For printing *MARK strings, a negative length is given, indicating that the
2987 length is in the first code unit. If handed a NULL file, this function just
2988 counts chars without printing (because pchar() does that). */
2989 
pchars8(PCRE2_SPTR8 p,int length,BOOL utf,FILE * f)2990 static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f)
2991 {
2992 uint32_t c = 0;
2993 int yield = 0;
2994 if (length < 0) length = *p++;
2995 while (length-- > 0)
2996   {
2997   if (utf)
2998     {
2999     int rc = utf82ord(p, &c);
3000     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
3001       {
3002       length -= rc - 1;
3003       p += rc;
3004       yield += pchar(c, utf, f);
3005       continue;
3006       }
3007     }
3008   c = *p++;
3009   yield += pchar(c, utf, f);
3010   }
3011 
3012 return yield;
3013 }
3014 #endif
3015 
3016 
3017 #ifdef SUPPORT_PCRE2_16
3018 /*************************************************
3019 *           Print 16-bit character string        *
3020 *************************************************/
3021 
3022 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
3023 For printing *MARK strings, a negative length is given, indicating that the
3024 length is in the first code unit. If handed a NULL file, just counts chars
3025 without printing. */
3026 
pchars16(PCRE2_SPTR16 p,int length,BOOL utf,FILE * f)3027 static int pchars16(PCRE2_SPTR16 p, int length, BOOL utf, FILE *f)
3028 {
3029 int yield = 0;
3030 if (length < 0) length = *p++;
3031 while (length-- > 0)
3032   {
3033   uint32_t c = *p++ & 0xffff;
3034   if (utf && c >= 0xD800 && c < 0xDC00 && length > 0)
3035     {
3036     int d = *p & 0xffff;
3037     if (d >= 0xDC00 && d <= 0xDFFF)
3038       {
3039       c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
3040       length--;
3041       p++;
3042       }
3043     }
3044   yield += pchar(c, utf, f);
3045   }
3046 return yield;
3047 }
3048 #endif  /* SUPPORT_PCRE2_16 */
3049 
3050 
3051 
3052 #ifdef SUPPORT_PCRE2_32
3053 /*************************************************
3054 *           Print 32-bit character string        *
3055 *************************************************/
3056 
3057 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
3058 For printing *MARK strings, a negative length is given, indicating that the
3059 length is in the first code unit. If handed a NULL file, just counts chars
3060 without printing. */
3061 
pchars32(PCRE2_SPTR32 p,int length,BOOL utf,FILE * f)3062 static int pchars32(PCRE2_SPTR32 p, int length, BOOL utf, FILE *f)
3063 {
3064 int yield = 0;
3065 (void)(utf);  /* Avoid compiler warning */
3066 if (length < 0) length = *p++;
3067 while (length-- > 0)
3068   {
3069   uint32_t c = *p++;
3070   yield += pchar(c, utf, f);
3071   }
3072 return yield;
3073 }
3074 #endif  /* SUPPORT_PCRE2_32 */
3075 
3076 
3077 
3078 
3079 /*************************************************
3080 *       Convert character value to UTF-8         *
3081 *************************************************/
3082 
3083 /* This function takes an integer value in the range 0 - 0x7fffffff
3084 and encodes it as a UTF-8 character in 0 to 6 bytes. It is needed even when the
3085 8-bit library is not supported, to generate UTF-8 output for non-ASCII
3086 characters.
3087 
3088 Arguments:
3089   cvalue     the character value
3090   utf8bytes  pointer to buffer for result - at least 6 bytes long
3091 
3092 Returns:     number of characters placed in the buffer
3093 */
3094 
3095 static int
ord2utf8(uint32_t cvalue,uint8_t * utf8bytes)3096 ord2utf8(uint32_t cvalue, uint8_t *utf8bytes)
3097 {
3098 int i, j;
3099 if (cvalue > 0x7fffffffu)
3100   return -1;
3101 for (i = 0; i < utf8_table1_size; i++)
3102   if (cvalue <= (uint32_t)utf8_table1[i]) break;
3103 utf8bytes += i;
3104 for (j = i; j > 0; j--)
3105  {
3106  *utf8bytes-- = 0x80 | (cvalue & 0x3f);
3107  cvalue >>= 6;
3108  }
3109 *utf8bytes = utf8_table2[i] | cvalue;
3110 return i + 1;
3111 }
3112 
3113 
3114 
3115 #ifdef SUPPORT_PCRE2_16
3116 /*************************************************
3117 *           Convert string to 16-bit             *
3118 *************************************************/
3119 
3120 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3121 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3122 code values from 0 to 0x7fffffff. However, values greater than the later UTF
3123 limit of 0x10ffff cause an error. In non-UTF mode the input is interpreted as
3124 UTF-8 if the utf8_input modifier is set, but an error is generated for values
3125 greater than 0xffff.
3126 
3127 If all the input bytes are ASCII, the space needed for a 16-bit string is
3128 exactly double the 8-bit size. Otherwise, the size needed for a 16-bit string
3129 is no more than double, because up to 0xffff uses no more than 3 bytes in UTF-8
3130 but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes
3131 in UTF-16. The result is always left in pbuffer16. Impose a minimum size to
3132 save repeated re-sizing.
3133 
3134 Note that this function does not object to surrogate values. This is
3135 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
3136 for the purpose of testing that they are correctly faulted.
3137 
3138 Arguments:
3139   p          points to a byte string
3140   utf        true in UTF mode
3141   lenptr     points to number of bytes in the string (excluding trailing zero)
3142 
3143 Returns:     0 on success, with the length updated to the number of 16-bit
3144                data items used (excluding the trailing zero)
3145              OR -1 if a UTF-8 string is malformed
3146              OR -2 if a value > 0x10ffff is encountered in UTF mode
3147              OR -3 if a value > 0xffff is encountered when not in UTF mode
3148 */
3149 
3150 static PCRE2_SIZE
to16(uint8_t * p,int utf,PCRE2_SIZE * lenptr)3151 to16(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3152 {
3153 uint16_t *pp;
3154 PCRE2_SIZE len = *lenptr;
3155 
3156 if (pbuffer16_size < 2*len + 2)
3157   {
3158   if (pbuffer16 != NULL) free(pbuffer16);
3159   pbuffer16_size = 2*len + 2;
3160   if (pbuffer16_size < 4096) pbuffer16_size = 4096;
3161   pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
3162   if (pbuffer16 == NULL)
3163     {
3164     fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
3165       pbuffer16_size);
3166     exit(1);
3167     }
3168   }
3169 
3170 pp = pbuffer16;
3171 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3172   {
3173   for (; len > 0; len--) *pp++ = *p++;
3174   }
3175 else while (len > 0)
3176   {
3177   uint32_t c;
3178   int chlen = utf82ord(p, &c);
3179   if (chlen <= 0) return -1;
3180   if (!utf && c > 0xffff) return -3;
3181   if (c > 0x10ffff) return -2;
3182   p += chlen;
3183   len -= chlen;
3184   if (c < 0x10000) *pp++ = c; else
3185     {
3186     c -= 0x10000;
3187     *pp++ = 0xD800 | (c >> 10);
3188     *pp++ = 0xDC00 | (c & 0x3ff);
3189     }
3190   }
3191 
3192 *pp = 0;
3193 *lenptr = pp - pbuffer16;
3194 return 0;
3195 }
3196 #endif
3197 
3198 
3199 
3200 #ifdef SUPPORT_PCRE2_32
3201 /*************************************************
3202 *           Convert string to 32-bit             *
3203 *************************************************/
3204 
3205 /* In UTF mode the input is always interpreted as a string of UTF-8 bytes using
3206 the original UTF-8 definition of RFC 2279, which allows for up to 6 bytes, and
3207 code values from 0 to 0x7fffffff. However, values greater than the later UTF
3208 limit of 0x10ffff cause an error.
3209 
3210 In non-UTF mode the input is interpreted as UTF-8 if the utf8_input modifier
3211 is set, and no limit is imposed. There is special interpretation of the 0xff
3212 byte (which is illegal in UTF-8) in this case: it causes the top bit of the
3213 next character to be set. This provides a way of generating 32-bit characters
3214 greater than 0x7fffffff.
3215 
3216 If all the input bytes are ASCII, the space needed for a 32-bit string is
3217 exactly four times the 8-bit size. Otherwise, the size needed for a 32-bit
3218 string is no more than four times, because the number of characters must be
3219 less than the number of bytes. The result is always left in pbuffer32. Impose a
3220 minimum size to save repeated re-sizing.
3221 
3222 Note that this function does not object to surrogate values. This is
3223 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
3224 for the purpose of testing that they are correctly faulted.
3225 
3226 Arguments:
3227   p          points to a byte string
3228   utf        true in UTF mode
3229   lenptr     points to number of bytes in the string (excluding trailing zero)
3230 
3231 Returns:     0 on success, with the length updated to the number of 32-bit
3232                data items used (excluding the trailing zero)
3233              OR -1 if a UTF-8 string is malformed
3234              OR -2 if a value > 0x10ffff is encountered in UTF mode
3235 */
3236 
3237 static PCRE2_SIZE
to32(uint8_t * p,int utf,PCRE2_SIZE * lenptr)3238 to32(uint8_t *p, int utf, PCRE2_SIZE *lenptr)
3239 {
3240 uint32_t *pp;
3241 PCRE2_SIZE len = *lenptr;
3242 
3243 if (pbuffer32_size < 4*len + 4)
3244   {
3245   if (pbuffer32 != NULL) free(pbuffer32);
3246   pbuffer32_size = 4*len + 4;
3247   if (pbuffer32_size < 8192) pbuffer32_size = 8192;
3248   pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
3249   if (pbuffer32 == NULL)
3250     {
3251     fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
3252       pbuffer32_size);
3253     exit(1);
3254     }
3255   }
3256 
3257 pp = pbuffer32;
3258 
3259 if (!utf && (pat_patctl.control & CTL_UTF8_INPUT) == 0)
3260   {
3261   for (; len > 0; len--) *pp++ = *p++;
3262   }
3263 
3264 else while (len > 0)
3265   {
3266   int chlen;
3267   uint32_t c;
3268   uint32_t topbit = 0;
3269   if (!utf && *p == 0xff && len > 1)
3270     {
3271     topbit = 0x80000000u;
3272     p++;
3273     len--;
3274     }
3275   chlen = utf82ord(p, &c);
3276   if (chlen <= 0) return -1;
3277   if (utf && c > 0x10ffff) return -2;
3278   p += chlen;
3279   len -= chlen;
3280   *pp++ = c | topbit;
3281   }
3282 
3283 *pp = 0;
3284 *lenptr = pp - pbuffer32;
3285 return 0;
3286 }
3287 #endif /* SUPPORT_PCRE2_32 */
3288 
3289 
3290 
3291 /* This function is no longer used. Keep it around for a while, just in case it
3292 needs to be re-instated. */
3293 
3294 #ifdef NEVERNEVERNEVER
3295 
3296 /*************************************************
3297 *         Move back by so many characters        *
3298 *************************************************/
3299 
3300 /* Given a code unit offset in a subject string, move backwards by a number of
3301 characters, and return the resulting offset.
3302 
3303 Arguments:
3304   subject   pointer to the string
3305   offset    start offset
3306   count     count to move back by
3307   utf       TRUE if in UTF mode
3308 
3309 Returns:   a possibly changed offset
3310 */
3311 
3312 static PCRE2_SIZE
backchars(uint8_t * subject,PCRE2_SIZE offset,uint32_t count,BOOL utf)3313 backchars(uint8_t *subject, PCRE2_SIZE offset, uint32_t count, BOOL utf)
3314 {
3315 if (!utf || test_mode == PCRE32_MODE)
3316   return (count >= offset)? 0 : (offset - count);
3317 
3318 else if (test_mode == PCRE8_MODE)
3319   {
3320   PCRE2_SPTR8 pp = (PCRE2_SPTR8)subject + offset;
3321   for (; count > 0 && pp > (PCRE2_SPTR8)subject; count--)
3322     {
3323     pp--;
3324     while ((*pp & 0xc0) == 0x80) pp--;
3325     }
3326   return pp - (PCRE2_SPTR8)subject;
3327   }
3328 
3329 else  /* 16-bit mode */
3330   {
3331   PCRE2_SPTR16 pp = (PCRE2_SPTR16)subject + offset;
3332   for (; count > 0 && pp > (PCRE2_SPTR16)subject; count--)
3333     {
3334     pp--;
3335     if ((*pp & 0xfc00) == 0xdc00) pp--;
3336     }
3337   return pp - (PCRE2_SPTR16)subject;
3338   }
3339 }
3340 #endif  /* NEVERNEVERNEVER */
3341 
3342 
3343 
3344 /*************************************************
3345 *           Expand input buffers                 *
3346 *************************************************/
3347 
3348 /* This function doubles the size of the input buffer and the buffer for
3349 keeping an 8-bit copy of patterns (pbuffer8), and copies the current buffers to
3350 the new ones.
3351 
3352 Arguments: none
3353 Returns:   nothing (aborts if malloc() fails)
3354 */
3355 
3356 static void
expand_input_buffers(void)3357 expand_input_buffers(void)
3358 {
3359 int new_pbuffer8_size = 2*pbuffer8_size;
3360 uint8_t *new_buffer = (uint8_t *)malloc(new_pbuffer8_size);
3361 uint8_t *new_pbuffer8 = (uint8_t *)malloc(new_pbuffer8_size);
3362 
3363 if (new_buffer == NULL || new_pbuffer8 == NULL)
3364   {
3365   fprintf(stderr, "pcre2test: malloc(%d) failed\n", new_pbuffer8_size);
3366   exit(1);
3367   }
3368 
3369 memcpy(new_buffer, buffer, pbuffer8_size);
3370 memcpy(new_pbuffer8, pbuffer8, pbuffer8_size);
3371 
3372 pbuffer8_size = new_pbuffer8_size;
3373 
3374 free(buffer);
3375 free(pbuffer8);
3376 
3377 buffer = new_buffer;
3378 pbuffer8 = new_pbuffer8;
3379 }
3380 
3381 
3382 
3383 /*************************************************
3384 *        Read or extend an input line            *
3385 *************************************************/
3386 
3387 /* Input lines are read into buffer, but both patterns and data lines can be
3388 continued over multiple input lines. In addition, if the buffer fills up, we
3389 want to automatically expand it so as to be able to handle extremely large
3390 lines that are needed for certain stress tests, although this is less likely
3391 now that there are repetition features for both patterns and data. When the
3392 input buffer is expanded, the other two buffers must also be expanded likewise,
3393 and the contents of pbuffer, which are a copy of the input for callouts, must
3394 be preserved (for when expansion happens for a data line). This is not the most
3395 optimal way of handling this, but hey, this is just a test program!
3396 
3397 Arguments:
3398   f            the file to read
3399   start        where in buffer to start (this *must* be within buffer)
3400   prompt       for stdin or readline()
3401 
3402 Returns:       pointer to the start of new data
3403                could be a copy of start, or could be moved
3404                NULL if no data read and EOF reached
3405 */
3406 
3407 static uint8_t *
extend_inputline(FILE * f,uint8_t * start,const char * prompt)3408 extend_inputline(FILE *f, uint8_t *start, const char *prompt)
3409 {
3410 uint8_t *here = start;
3411 
3412 for (;;)
3413   {
3414   size_t rlen = (size_t)(pbuffer8_size - (here - buffer));
3415 
3416   if (rlen > 1000)
3417     {
3418     size_t dlen;
3419 
3420     /* If libreadline or libedit support is required, use readline() to read a
3421     line if the input is a terminal. Note that readline() removes the trailing
3422     newline, so we must put it back again, to be compatible with fgets(). */
3423 
3424 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
3425     if (INTERACTIVE(f))
3426       {
3427       size_t len;
3428       char *s = readline(prompt);
3429       if (s == NULL) return (here == start)? NULL : start;
3430       len = strlen(s);
3431       if (len > 0) add_history(s);
3432       if (len > rlen - 1) len = rlen - 1;
3433       memcpy(here, s, len);
3434       here[len] = '\n';
3435       here[len+1] = 0;
3436       free(s);
3437       }
3438     else
3439 #endif
3440 
3441     /* Read the next line by normal means, prompting if the file is a tty. */
3442 
3443       {
3444       if (INTERACTIVE(f)) printf("%s", prompt);
3445       if (fgets((char *)here, rlen,  f) == NULL)
3446         return (here == start)? NULL : start;
3447       }
3448 
3449     dlen = strlen((char *)here);
3450     here += dlen;
3451 
3452     /* Check for end of line reached. Take care not to read data from before
3453     start (dlen will be zero for a file starting with a binary zero). */
3454 
3455     if (here > start && here[-1] == '\n') return start;
3456 
3457     /* If we have not read a newline when reading a file, we have either filled
3458     the buffer or reached the end of the file. We can detect the former by
3459     checking that the string fills the buffer, and the latter by feof(). If
3460     neither of these is true, it means we read a binary zero which has caused
3461     strlen() to give a short length. This is a hard error because pcre2test
3462     expects to work with C strings. */
3463 
3464     if (!INTERACTIVE(f) && dlen < rlen - 1 && !feof(f))
3465       {
3466       fprintf(outfile, "** Binary zero encountered in input\n");
3467       fprintf(outfile, "** pcre2test run abandoned\n");
3468       exit(1);
3469       }
3470     }
3471 
3472   else
3473     {
3474     size_t start_offset = start - buffer;
3475     size_t here_offset = here - buffer;
3476     expand_input_buffers();
3477     start = buffer + start_offset;
3478     here = buffer + here_offset;
3479     }
3480   }
3481 
3482 /* Control never gets here */
3483 }
3484 
3485 
3486 
3487 /*************************************************
3488 *         Case-independent strncmp() function    *
3489 *************************************************/
3490 
3491 /*
3492 Arguments:
3493   s         first string
3494   t         second string
3495   n         number of characters to compare
3496 
3497 Returns:    < 0, = 0, or > 0, according to the comparison
3498 */
3499 
3500 static int
strncmpic(const uint8_t * s,const uint8_t * t,int n)3501 strncmpic(const uint8_t *s, const uint8_t *t, int n)
3502 {
3503 while (n--)
3504   {
3505   int c = tolower(*s++) - tolower(*t++);
3506   if (c != 0) return c;
3507   }
3508 return 0;
3509 }
3510 
3511 
3512 
3513 /*************************************************
3514 *          Scan the main modifier list           *
3515 *************************************************/
3516 
3517 /* This function searches the modifier list for a long modifier name.
3518 
3519 Argument:
3520   p         start of the name
3521   lenp      length of the name
3522 
3523 Returns:    an index in the modifier list, or -1 on failure
3524 */
3525 
3526 static int
scan_modifiers(const uint8_t * p,unsigned int len)3527 scan_modifiers(const uint8_t *p, unsigned int len)
3528 {
3529 int bot = 0;
3530 int top = MODLISTCOUNT;
3531 
3532 while (top > bot)
3533   {
3534   int mid = (bot + top)/2;
3535   unsigned int mlen = strlen(modlist[mid].name);
3536   int c = strncmp((char *)p, modlist[mid].name, (len < mlen)? len : mlen);
3537   if (c == 0)
3538     {
3539     if (len == mlen) return mid;
3540     c = (int)len - (int)mlen;
3541     }
3542   if (c > 0) bot = mid + 1; else top = mid;
3543   }
3544 
3545 return -1;
3546 
3547 }
3548 
3549 
3550 
3551 /*************************************************
3552 *        Check a modifer and find its field      *
3553 *************************************************/
3554 
3555 /* This function is called when a modifier has been identified. We check that
3556 it is allowed here and find the field that is to be changed.
3557 
3558 Arguments:
3559   m          the modifier list entry
3560   ctx        CTX_PAT     => pattern context
3561              CTX_POPPAT  => pattern context for popped pattern
3562              CTX_DEFPAT  => default pattern context
3563              CTX_DAT     => data context
3564              CTX_DEFDAT  => default data context
3565   pctl       point to pattern control block
3566   dctl       point to data control block
3567   c          a single character or 0
3568 
3569 Returns:     a field pointer or NULL
3570 */
3571 
3572 static void *
check_modifier(modstruct * m,int ctx,patctl * pctl,datctl * dctl,uint32_t c)3573 check_modifier(modstruct *m, int ctx, patctl *pctl, datctl *dctl, uint32_t c)
3574 {
3575 void *field = NULL;
3576 PCRE2_SIZE offset = m->offset;
3577 
3578 if (restrict_for_perl_test) switch(m->which)
3579   {
3580   case MOD_PNDP:
3581   case MOD_PATP:
3582   case MOD_PDP:
3583   break;
3584 
3585   default:
3586   fprintf(outfile, "** '%s' is not allowed in a Perl-compatible test\n",
3587     m->name);
3588   return NULL;
3589   }
3590 
3591 switch (m->which)
3592   {
3593   case MOD_CTC:  /* Compile context modifier */
3594   if (ctx == CTX_DEFPAT) field = PTR(default_pat_context);
3595     else if (ctx == CTX_PAT) field = PTR(pat_context);
3596   break;
3597 
3598   case MOD_CTM:  /* Match context modifier */
3599   if (ctx == CTX_DEFDAT) field = PTR(default_dat_context);
3600     else if (ctx == CTX_DAT) field = PTR(dat_context);
3601   break;
3602 
3603   case MOD_DAT:  /* Data line modifier */
3604   if (dctl != NULL) field = dctl;
3605   break;
3606 
3607   case MOD_PAT:    /* Pattern modifier */
3608   case MOD_PATP:   /* Allowed for Perl test */
3609   if (pctl != NULL) field = pctl;
3610   break;
3611 
3612   case MOD_PD:   /* Pattern or data line modifier */
3613   case MOD_PDP:  /* Ditto, allowed for Perl test */
3614   case MOD_PND:  /* Ditto, but not default pattern */
3615   case MOD_PNDP: /* Ditto, allowed for Perl test */
3616   if (dctl != NULL) field = dctl;
3617     else if (pctl != NULL && (m->which == MOD_PD || m->which == MOD_PDP ||
3618              ctx != CTX_DEFPAT))
3619       field = pctl;
3620   break;
3621   }
3622 
3623 if (field == NULL)
3624   {
3625   if (c == 0)
3626     fprintf(outfile, "** '%s' is not valid here\n", m->name);
3627   else
3628     fprintf(outfile, "** /%c is not valid here\n", c);
3629   return NULL;
3630   }
3631 
3632 return (char *)field + offset;
3633 }
3634 
3635 
3636 
3637 /*************************************************
3638 *            Decode a modifier list              *
3639 *************************************************/
3640 
3641 /* A pointer to a control block is NULL when called in cases when that block is
3642 not relevant. They are never all relevant in one call. At least one of patctl
3643 and datctl is NULL. The second argument specifies which context to use for
3644 modifiers that apply to contexts.
3645 
3646 Arguments:
3647   p          point to modifier string
3648   ctx        CTX_PAT     => pattern context
3649              CTX_POPPAT  => pattern context for popped pattern
3650              CTX_DEFPAT  => default pattern context
3651              CTX_DAT     => data context
3652              CTX_DEFDAT  => default data context
3653   pctl       point to pattern control block
3654   dctl       point to data control block
3655 
3656 Returns: TRUE if successful decode, FALSE otherwise
3657 */
3658 
3659 static BOOL
decode_modifiers(uint8_t * p,int ctx,patctl * pctl,datctl * dctl)3660 decode_modifiers(uint8_t *p, int ctx, patctl *pctl, datctl *dctl)
3661 {
3662 uint8_t *ep, *pp;
3663 long li;
3664 unsigned long uli;
3665 BOOL first = TRUE;
3666 
3667 for (;;)
3668   {
3669   void *field;
3670   modstruct *m;
3671   BOOL off = FALSE;
3672   unsigned int i, len;
3673   int index;
3674   char *endptr;
3675 
3676   /* Skip white space and commas. */
3677 
3678   while (isspace(*p) || *p == ',') p++;
3679   if (*p == 0) break;
3680 
3681   /* Find the end of the item; lose trailing whitespace at end of line. */
3682 
3683   for (ep = p; *ep != 0 && *ep != ','; ep++);
3684   if (*ep == 0)
3685     {
3686     while (ep > p && isspace(ep[-1])) ep--;
3687     *ep = 0;
3688     }
3689 
3690   /* Remember if the first character is '-'. */
3691 
3692   if (*p == '-')
3693     {
3694     off = TRUE;
3695     p++;
3696     }
3697 
3698   /* Find the length of a full-length modifier name, and scan for it. */
3699 
3700   pp = p;
3701   while (pp < ep && *pp != '=') pp++;
3702   index = scan_modifiers(p, pp - p);
3703 
3704   /* If the first modifier is unrecognized, try to interpret it as a sequence
3705   of single-character abbreviated modifiers. None of these modifiers have any
3706   associated data. They just set options or control bits. */
3707 
3708   if (index < 0)
3709     {
3710     uint32_t cc;
3711     uint8_t *mp = p;
3712 
3713     if (!first)
3714       {
3715       fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3716       if (ep - p == 1)
3717         fprintf(outfile, "** Single-character modifiers must come first\n");
3718       return FALSE;
3719       }
3720 
3721     for (cc = *p; cc != ',' && cc != '\n' && cc != 0; cc = *(++p))
3722       {
3723       for (i = 0; i < C1MODLISTCOUNT; i++)
3724         if (cc == c1modlist[i].onechar) break;
3725 
3726       if (i >= C1MODLISTCOUNT)
3727         {
3728         fprintf(outfile, "** Unrecognized modifier '%c' in '%.*s'\n",
3729           *p, (int)(ep-mp), mp);
3730         return FALSE;
3731         }
3732 
3733       if (c1modlist[i].index >= 0)
3734         {
3735         index = c1modlist[i].index;
3736         }
3737 
3738       else
3739         {
3740         index = scan_modifiers((uint8_t *)(c1modlist[i].fullname),
3741           strlen(c1modlist[i].fullname));
3742         if (index < 0)
3743           {
3744           fprintf(outfile, "** Internal error: single-character equivalent "
3745             "modifier '%s' not found\n", c1modlist[i].fullname);
3746           return FALSE;
3747           }
3748         c1modlist[i].index = index;     /* Cache for next time */
3749         }
3750 
3751       field = check_modifier(modlist + index, ctx, pctl, dctl, *p);
3752       if (field == NULL) return FALSE;
3753 
3754       /* /x is a special case; a second appearance changes PCRE2_EXTENDED to
3755       PCRE2_EXTENDED_MORE. */
3756 
3757       if (cc == 'x' && (*((uint32_t *)field) & PCRE2_EXTENDED) != 0)
3758         {
3759         *((uint32_t *)field) &= ~PCRE2_EXTENDED;
3760         *((uint32_t *)field) |= PCRE2_EXTENDED_MORE;
3761         }
3762       else
3763         *((uint32_t *)field) |= modlist[index].value;
3764       }
3765 
3766     continue;    /* With tne next (fullname) modifier */
3767     }
3768 
3769   /* We have a match on a full-name modifier. Check for the existence of data
3770   when needed. */
3771 
3772   m = modlist + index;      /* Save typing */
3773   if (m->type != MOD_CTL && m->type != MOD_OPT &&
3774       (m->type != MOD_IND || *pp == '='))
3775     {
3776     if (*pp++ != '=')
3777       {
3778       fprintf(outfile, "** '=' expected after '%s'\n", m->name);
3779       return FALSE;
3780       }
3781     if (off)
3782       {
3783       fprintf(outfile, "** '-' is not valid for '%s'\n", m->name);
3784       return FALSE;
3785       }
3786     }
3787 
3788   /* These on/off types have no data. */
3789 
3790   else if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
3791     {
3792     fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p);
3793     return FALSE;
3794     }
3795 
3796   /* Set the data length for those types that have data. Then find the field
3797   that is to be set. If check_modifier() returns NULL, it has already output an
3798   error message. */
3799 
3800   len = ep - pp;
3801   field = check_modifier(m, ctx, pctl, dctl, 0);
3802   if (field == NULL) return FALSE;
3803 
3804   /* Process according to data type. */
3805 
3806   switch (m->type)
3807     {
3808     case MOD_CTL:
3809     case MOD_OPT:
3810     if (off) *((uint32_t *)field) &= ~m->value;
3811       else *((uint32_t *)field) |= m->value;
3812     break;
3813 
3814     case MOD_BSR:
3815     if (len == 7 && strncmpic(pp, (const uint8_t *)"default", 7) == 0)
3816       {
3817 #ifdef BSR_ANYCRLF
3818       *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3819 #else
3820       *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3821 #endif
3822       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_BSR_SET;
3823         else dctl->control2 &= ~CTL2_BSR_SET;
3824       }
3825     else
3826       {
3827       if (len == 7 && strncmpic(pp, (const uint8_t *)"anycrlf", 7) == 0)
3828         *((uint16_t *)field) = PCRE2_BSR_ANYCRLF;
3829       else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0)
3830         *((uint16_t *)field) = PCRE2_BSR_UNICODE;
3831       else goto INVALID_VALUE;
3832       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_BSR_SET;
3833         else dctl->control2 |= CTL2_BSR_SET;
3834       }
3835     pp = ep;
3836     break;
3837 
3838     case MOD_CHR:  /* A single character */
3839     *((uint32_t *)field) = *pp++;
3840     break;
3841 
3842     case MOD_CON:  /* A convert type/options list */
3843     for (;; pp++)
3844       {
3845       uint8_t *colon = (uint8_t *)strchr((const char *)pp, ':');
3846       len = ((colon != NULL && colon < ep)? colon:ep) - pp;
3847       for (i = 0; i < convertlistcount; i++)
3848         {
3849         if (strncmpic(pp, (const uint8_t *)convertlist[i].name, len) == 0)
3850           {
3851           if (*((uint32_t *)field) == CONVERT_UNSET)
3852             *((uint32_t *)field) = convertlist[i].option;
3853           else
3854             *((uint32_t *)field) |= convertlist[i].option;
3855           break;
3856           }
3857         }
3858       if (i >= convertlistcount) goto INVALID_VALUE;
3859       pp += len;
3860       if (*pp != ':') break;
3861       }
3862     break;
3863 
3864     case MOD_IN2:    /* One or two unsigned integers */
3865     if (!isdigit(*pp)) goto INVALID_VALUE;
3866     uli = strtoul((const char *)pp, &endptr, 10);
3867     if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3868     ((uint32_t *)field)[0] = (uint32_t)uli;
3869     if (*endptr == ':')
3870       {
3871       uli = strtoul((const char *)endptr+1, &endptr, 10);
3872       if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3873       ((uint32_t *)field)[1] = (uint32_t)uli;
3874       }
3875     else ((uint32_t *)field)[1] = 0;
3876     pp = (uint8_t *)endptr;
3877     break;
3878 
3879     /* PCRE2_SIZE_MAX is usually SIZE_MAX, which may be greater, equal to, or
3880     less than ULONG_MAX. So first test for overflowing the long int, and then
3881     test for overflowing PCRE2_SIZE_MAX if it is smaller than ULONG_MAX. */
3882 
3883     case MOD_SIZ:    /* PCRE2_SIZE value */
3884     if (!isdigit(*pp)) goto INVALID_VALUE;
3885     uli = strtoul((const char *)pp, &endptr, 10);
3886     if (uli == ULONG_MAX) goto INVALID_VALUE;
3887 #if ULONG_MAX > PCRE2_SIZE_MAX
3888     if (uli > PCRE2_SIZE_MAX) goto INVALID_VALUE;
3889 #endif
3890     *((PCRE2_SIZE *)field) = (PCRE2_SIZE)uli;
3891     pp = (uint8_t *)endptr;
3892     break;
3893 
3894     case MOD_IND:    /* Unsigned integer with default */
3895     if (len == 0)
3896       {
3897       *((uint32_t *)field) = (uint32_t)(m->value);
3898       break;
3899       }
3900     /* Fall through */
3901 
3902     case MOD_INT:    /* Unsigned integer */
3903     if (!isdigit(*pp)) goto INVALID_VALUE;
3904     uli = strtoul((const char *)pp, &endptr, 10);
3905     if (U32OVERFLOW(uli)) goto INVALID_VALUE;
3906     *((uint32_t *)field) = (uint32_t)uli;
3907     pp = (uint8_t *)endptr;
3908     break;
3909 
3910     case MOD_INS:   /* Signed integer */
3911     if (!isdigit(*pp) && *pp != '-') goto INVALID_VALUE;
3912     li = strtol((const char *)pp, &endptr, 10);
3913     if (S32OVERFLOW(li)) goto INVALID_VALUE;
3914     *((int32_t *)field) = (int32_t)li;
3915     pp = (uint8_t *)endptr;
3916     break;
3917 
3918     case MOD_NL:
3919     for (i = 0; i < sizeof(newlines)/sizeof(char *); i++)
3920       if (len == strlen(newlines[i]) &&
3921         strncmpic(pp, (const uint8_t *)newlines[i], len) == 0) break;
3922     if (i >= sizeof(newlines)/sizeof(char *)) goto INVALID_VALUE;
3923     if (i == 0)
3924       {
3925       *((uint16_t *)field) = NEWLINE_DEFAULT;
3926       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL2_NL_SET;
3927         else dctl->control2 &= ~CTL2_NL_SET;
3928       }
3929     else
3930       {
3931       *((uint16_t *)field) = i;
3932       if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL2_NL_SET;
3933         else dctl->control2 |= CTL2_NL_SET;
3934       }
3935     pp = ep;
3936     break;
3937 
3938     case MOD_NN:              /* Name or (signed) number; may be several */
3939     if (isdigit(*pp) || *pp == '-')
3940       {
3941       int ct = MAXCPYGET - 1;
3942       int32_t value;
3943       li = strtol((const char *)pp, &endptr, 10);
3944       if (S32OVERFLOW(li)) goto INVALID_VALUE;
3945       value = (int32_t)li;
3946       field = (char *)field - m->offset + m->value;      /* Adjust field ptr */
3947       if (value >= 0)                                    /* Add new number */
3948         {
3949         while (*((int32_t *)field) >= 0 && ct-- > 0)   /* Skip previous */
3950           field = (char *)field + sizeof(int32_t);
3951         if (ct <= 0)
3952           {
3953           fprintf(outfile, "** Too many numeric '%s' modifiers\n", m->name);
3954           return FALSE;
3955           }
3956         }
3957       *((int32_t *)field) = value;
3958       if (ct > 0) ((int32_t *)field)[1] = -1;
3959       pp = (uint8_t *)endptr;
3960       }
3961 
3962     /* Multiple strings are put end to end. */
3963 
3964     else
3965       {
3966       char *nn = (char *)field;
3967       if (len > 0)                    /* Add new name */
3968         {
3969         if (len > MAX_NAME_SIZE)
3970           {
3971           fprintf(outfile, "** Group name in '%s' is too long\n", m->name);
3972           return FALSE;
3973           }
3974         while (*nn != 0) nn += strlen(nn) + 1;
3975         if (nn + len + 2 - (char *)field > LENCPYGET)
3976           {
3977           fprintf(outfile, "** Too many characters in named '%s' modifiers\n",
3978             m->name);
3979           return FALSE;
3980           }
3981         memcpy(nn, pp, len);
3982         }
3983       nn[len] = 0 ;
3984       nn[len+1] = 0;
3985       pp = ep;
3986       }
3987     break;
3988 
3989     case MOD_STR:
3990     if (len + 1 > m->value)
3991       {
3992       fprintf(outfile, "** Overlong value for '%s' (max %d code units)\n",
3993         m->name, m->value - 1);
3994       return FALSE;
3995       }
3996     memcpy(field, pp, len);
3997     ((uint8_t *)field)[len] = 0;
3998     pp = ep;
3999     break;
4000     }
4001 
4002   if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0)
4003     {
4004     fprintf(outfile, "** Comma expected after modifier item '%s'\n", m->name);
4005     return FALSE;
4006     }
4007 
4008   p = pp;
4009   first = FALSE;
4010 
4011   if (ctx == CTX_POPPAT &&
4012      (pctl->options != 0 ||
4013       pctl->tables_id != 0 ||
4014       pctl->locale[0] != 0 ||
4015       (pctl->control & NOTPOP_CONTROLS) != 0))
4016     {
4017     fprintf(outfile, "** '%s' is not valid here\n", m->name);
4018     return FALSE;
4019     }
4020   }
4021 
4022 return TRUE;
4023 
4024 INVALID_VALUE:
4025 fprintf(outfile, "** Invalid value in '%.*s'\n", (int)(ep-p), p);
4026 return FALSE;
4027 }
4028 
4029 
4030 /*************************************************
4031 *             Get info from a pattern            *
4032 *************************************************/
4033 
4034 /* A wrapped call to pcre2_pattern_info(), applied to the current compiled
4035 pattern.
4036 
4037 Arguments:
4038   what        code for the required information
4039   where       where to put the answer
4040   unsetok     PCRE2_ERROR_UNSET is an "expected" result
4041 
4042 Returns:      the return from pcre2_pattern_info()
4043 */
4044 
4045 static int
pattern_info(int what,void * where,BOOL unsetok)4046 pattern_info(int what, void *where, BOOL unsetok)
4047 {
4048 int rc;
4049 PCRE2_PATTERN_INFO(rc, compiled_code, what, NULL);  /* Exercise the code */
4050 PCRE2_PATTERN_INFO(rc, compiled_code, what, where);
4051 if (rc >= 0) return 0;
4052 if (rc != PCRE2_ERROR_UNSET || !unsetok)
4053   {
4054   fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode,
4055     what);
4056   if (rc == PCRE2_ERROR_BADMODE)
4057     fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
4058       "%d-bit mode\n", test_mode,
4059       8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK));
4060   }
4061 return rc;
4062 }
4063 
4064 
4065 
4066 #ifdef SUPPORT_PCRE2_8
4067 /*************************************************
4068 *             Show something in a list           *
4069 *************************************************/
4070 
4071 /* This function just helps to keep the code that uses it tidier. It's used for
4072 various lists of things where there needs to be introductory text before the
4073 first item. As these calls are all in the POSIX-support code, they happen only
4074 when 8-bit mode is supported. */
4075 
4076 static void
prmsg(const char ** msg,const char * s)4077 prmsg(const char **msg, const char *s)
4078 {
4079 fprintf(outfile, "%s %s", *msg, s);
4080 *msg = "";
4081 }
4082 #endif  /* SUPPORT_PCRE2_8 */
4083 
4084 
4085 
4086 /*************************************************
4087 *                Show control bits               *
4088 *************************************************/
4089 
4090 /* Called for mutually exclusive controls and for unsupported POSIX controls.
4091 Because the bits are unique, this can be used for both pattern and data control
4092 words.
4093 
4094 Arguments:
4095   controls    control bits
4096   controls2   more control bits
4097   before      text to print before
4098 
4099 Returns:      nothing
4100 */
4101 
4102 static void
show_controls(uint32_t controls,uint32_t controls2,const char * before)4103 show_controls(uint32_t controls, uint32_t controls2, const char *before)
4104 {
4105 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4106   before,
4107   ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
4108   ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
4109   ((controls & CTL_ALLCAPTURES) != 0)? " allcaptures" : "",
4110   ((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
4111   ((controls2 & CTL2_ALLVECTOR) != 0)? " allvector" : "",
4112   ((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
4113   ((controls & CTL_BINCODE) != 0)? " bincode" : "",
4114   ((controls2 & CTL2_BSR_SET) != 0)? " bsr" : "",
4115   ((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
4116   ((controls2 & CTL2_CALLOUT_EXTRA) != 0)? " callout_extra" : "",
4117   ((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "",
4118   ((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
4119   ((controls2 & CTL2_CALLOUT_NO_WHERE) != 0)? " callout_no_where" : "",
4120   ((controls & CTL_DFA) != 0)? " dfa" : "",
4121   ((controls & CTL_EXPAND) != 0)? " expand" : "",
4122   ((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "",
4123   ((controls & CTL_FRAMESIZE) != 0)? " framesize" : "",
4124   ((controls & CTL_FULLBINCODE) != 0)? " fullbincode" : "",
4125   ((controls & CTL_GETALL) != 0)? " getall" : "",
4126   ((controls & CTL_GLOBAL) != 0)? " global" : "",
4127   ((controls & CTL_HEXPAT) != 0)? " hex" : "",
4128   ((controls & CTL_INFO) != 0)? " info" : "",
4129   ((controls & CTL_JITFAST) != 0)? " jitfast" : "",
4130   ((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
4131   ((controls & CTL_MARK) != 0)? " mark" : "",
4132   ((controls & CTL_MEMORY) != 0)? " memory" : "",
4133   ((controls2 & CTL2_NL_SET) != 0)? " newline" : "",
4134   ((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "",
4135   ((controls & CTL_POSIX) != 0)? " posix" : "",
4136   ((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "",
4137   ((controls & CTL_PUSH) != 0)? " push" : "",
4138   ((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "",
4139   ((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "",
4140   ((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
4141   ((controls2 & CTL2_SUBSTITUTE_CALLOUT) != 0)? " substitute_callout" : "",
4142   ((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
4143   ((controls2 & CTL2_SUBSTITUTE_LITERAL) != 0)? " substitute_literal" : "",
4144   ((controls2 & CTL2_SUBSTITUTE_MATCHED) != 0)? " substitute_matched" : "",
4145   ((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
4146   ((controls2 & CTL2_SUBSTITUTE_REPLACEMENT_ONLY) != 0)? " substitute_replacement_only" : "",
4147   ((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
4148   ((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "",
4149   ((controls & CTL_USE_LENGTH) != 0)? " use_length" : "",
4150   ((controls & CTL_UTF8_INPUT) != 0)? " utf8_input" : "",
4151   ((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
4152 }
4153 
4154 
4155 
4156 /*************************************************
4157 *                Show compile options            *
4158 *************************************************/
4159 
4160 /* Called from show_pattern_info() and for unsupported POSIX options.
4161 
4162 Arguments:
4163   options     an options word
4164   before      text to print before
4165   after       text to print after
4166 
4167 Returns:      nothing
4168 */
4169 
4170 static void
show_compile_options(uint32_t options,const char * before,const char * after)4171 show_compile_options(uint32_t options, const char *before, const char *after)
4172 {
4173 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4174 else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
4175   before,
4176   ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "",
4177   ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "",
4178   ((options & PCRE2_ALT_VERBNAMES) != 0)? " alt_verbnames" : "",
4179   ((options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? " allow_empty_class" : "",
4180   ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4181   ((options & PCRE2_AUTO_CALLOUT) != 0)? " auto_callout" : "",
4182   ((options & PCRE2_CASELESS) != 0)? " caseless" : "",
4183   ((options & PCRE2_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4184   ((options & PCRE2_DOTALL) != 0)? " dotall" : "",
4185   ((options & PCRE2_DUPNAMES) != 0)? " dupnames" : "",
4186   ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4187   ((options & PCRE2_EXTENDED) != 0)? " extended" : "",
4188   ((options & PCRE2_EXTENDED_MORE) != 0)? " extended_more" : "",
4189   ((options & PCRE2_FIRSTLINE) != 0)? " firstline" : "",
4190   ((options & PCRE2_LITERAL) != 0)? " literal" : "",
4191   ((options & PCRE2_MATCH_INVALID_UTF) != 0)? " match_invalid_utf" : "",
4192   ((options & PCRE2_MATCH_UNSET_BACKREF) != 0)? " match_unset_backref" : "",
4193   ((options & PCRE2_MULTILINE) != 0)? " multiline" : "",
4194   ((options & PCRE2_NEVER_BACKSLASH_C) != 0)? " never_backslash_c" : "",
4195   ((options & PCRE2_NEVER_UCP) != 0)? " never_ucp" : "",
4196   ((options & PCRE2_NEVER_UTF) != 0)? " never_utf" : "",
4197   ((options & PCRE2_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4198   ((options & PCRE2_NO_AUTO_POSSESS) != 0)? " no_auto_possess" : "",
4199   ((options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? " no_dotstar_anchor" : "",
4200   ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4201   ((options & PCRE2_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4202   ((options & PCRE2_UCP) != 0)? " ucp" : "",
4203   ((options & PCRE2_UNGREEDY) != 0)? " ungreedy" : "",
4204   ((options & PCRE2_USE_OFFSET_LIMIT) != 0)? " use_offset_limit" : "",
4205   ((options & PCRE2_UTF) != 0)? " utf" : "",
4206   after);
4207 }
4208 
4209 
4210 /*************************************************
4211 *           Show compile extra options           *
4212 *************************************************/
4213 
4214 /* Called from show_pattern_info() and for unsupported POSIX options.
4215 
4216 Arguments:
4217   options     an options word
4218   before      text to print before
4219   after       text to print after
4220 
4221 Returns:      nothing
4222 */
4223 
4224 static void
show_compile_extra_options(uint32_t options,const char * before,const char * after)4225 show_compile_extra_options(uint32_t options, const char *before,
4226   const char *after)
4227 {
4228 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
4229 else fprintf(outfile, "%s%s%s%s%s%s%s%s",
4230   before,
4231   ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "",
4232   ((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "",
4233   ((options & PCRE2_EXTRA_ALT_BSUX) != 0)? " extra_alt_bsux" : "",
4234   ((options & PCRE2_EXTRA_MATCH_WORD) != 0)? " match_word" : "",
4235   ((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "",
4236   ((options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)? " escaped_cr_is_lf" : "",
4237   after);
4238 }
4239 
4240 
4241 
4242 #ifdef SUPPORT_PCRE2_8
4243 /*************************************************
4244 *                Show match options              *
4245 *************************************************/
4246 
4247 /* Called for unsupported POSIX options. */
4248 
4249 static void
show_match_options(uint32_t options)4250 show_match_options(uint32_t options)
4251 {
4252 fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s",
4253   ((options & PCRE2_ANCHORED) != 0)? " anchored" : "",
4254   ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)? " copy_matched_subject" : "",
4255   ((options & PCRE2_DFA_RESTART) != 0)? " dfa_restart" : "",
4256   ((options & PCRE2_DFA_SHORTEST) != 0)? " dfa_shortest" : "",
4257   ((options & PCRE2_ENDANCHORED) != 0)? " endanchored" : "",
4258   ((options & PCRE2_NO_JIT) != 0)? " no_jit" : "",
4259   ((options & PCRE2_NO_UTF_CHECK) != 0)? " no_utf_check" : "",
4260   ((options & PCRE2_NOTBOL) != 0)? " notbol" : "",
4261   ((options & PCRE2_NOTEMPTY) != 0)? " notempty" : "",
4262   ((options & PCRE2_NOTEMPTY_ATSTART) != 0)? " notempty_atstart" : "",
4263   ((options & PCRE2_NOTEOL) != 0)? " noteol" : "",
4264   ((options & PCRE2_PARTIAL_HARD) != 0)? " partial_hard" : "",
4265   ((options & PCRE2_PARTIAL_SOFT) != 0)? " partial_soft" : "");
4266 }
4267 #endif  /* SUPPORT_PCRE2_8 */
4268 
4269 
4270 
4271 /*************************************************
4272 *      Show memory usage info for a pattern      *
4273 *************************************************/
4274 
4275 static void
show_memory_info(void)4276 show_memory_info(void)
4277 {
4278 uint32_t name_count, name_entry_size;
4279 size_t size, cblock_size;
4280 
4281 /* One of the test_mode values will always be true, but to stop a compiler
4282 warning we must initialize cblock_size. */
4283 
4284 cblock_size = 0;
4285 #ifdef SUPPORT_PCRE2_8
4286 if (test_mode == PCRE8_MODE) cblock_size = sizeof(pcre2_real_code_8);
4287 #endif
4288 #ifdef SUPPORT_PCRE2_16
4289 if (test_mode == PCRE16_MODE) cblock_size = sizeof(pcre2_real_code_16);
4290 #endif
4291 #ifdef SUPPORT_PCRE2_32
4292 if (test_mode == PCRE32_MODE) cblock_size = sizeof(pcre2_real_code_32);
4293 #endif
4294 
4295 (void)pattern_info(PCRE2_INFO_SIZE, &size, FALSE);
4296 (void)pattern_info(PCRE2_INFO_NAMECOUNT, &name_count, FALSE);
4297 (void)pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size, FALSE);
4298 fprintf(outfile, "Memory allocation (code space): %d\n",
4299   (int)(size - name_count*name_entry_size*code_unit_size - cblock_size));
4300 if (pat_patctl.jit != 0)
4301   {
4302   (void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE);
4303   fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)size);
4304   }
4305 }
4306 
4307 
4308 
4309 /*************************************************
4310 *       Show frame size info for a pattern       *
4311 *************************************************/
4312 
4313 static void
show_framesize(void)4314 show_framesize(void)
4315 {
4316 size_t frame_size;
4317 (void)pattern_info(PCRE2_INFO_FRAMESIZE, &frame_size, FALSE);
4318 fprintf(outfile, "Frame size for pcre2_match(): %d\n", (int)frame_size);
4319 }
4320 
4321 
4322 
4323 /*************************************************
4324 *         Get and output an error message        *
4325 *************************************************/
4326 
4327 static BOOL
print_error_message(int errorcode,const char * before,const char * after)4328 print_error_message(int errorcode, const char *before, const char *after)
4329 {
4330 int len;
4331 PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
4332 if (len < 0)
4333   {
4334   fprintf(outfile, "\n** pcre2test internal error: cannot interpret error "
4335     "number\n** Unexpected return (%d) from pcre2_get_error_message()\n", len);
4336   }
4337 else
4338   {
4339   fprintf(outfile, "%s", before);
4340   PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
4341   fprintf(outfile, "%s", after);
4342   }
4343 return len >= 0;
4344 }
4345 
4346 
4347 /*************************************************
4348 *     Callback function for callout enumeration  *
4349 *************************************************/
4350 
4351 /* The only differences in the callout emumeration block for different code
4352 unit widths are that the pointers to the subject, the most recent MARK, and a
4353 callout argument string point to strings of the appropriate width. Casts can be
4354 used to deal with this.
4355 
4356 Argument:
4357   cb            pointer to enumerate block
4358   callout_data  user data
4359 
4360 Returns:    0
4361 */
4362 
callout_callback(pcre2_callout_enumerate_block_8 * cb,void * callout_data)4363 static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
4364   void *callout_data)
4365 {
4366 uint32_t i;
4367 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
4368 
4369 (void)callout_data;  /* Not currently displayed */
4370 
4371 fprintf(outfile, "Callout ");
4372 if (cb->callout_string != NULL)
4373   {
4374   uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
4375   fprintf(outfile, "%c", delimiter);
4376   PCHARSV(cb->callout_string, 0,
4377     cb->callout_string_length, utf, outfile);
4378   for (i = 0; callout_start_delims[i] != 0; i++)
4379     if (delimiter == callout_start_delims[i])
4380       {
4381       delimiter = callout_end_delims[i];
4382       break;
4383       }
4384   fprintf(outfile, "%c  ", delimiter);
4385   }
4386 else fprintf(outfile, "%d  ", cb->callout_number);
4387 
4388 fprintf(outfile, "%.*s\n",
4389   (int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
4390   pbuffer8 + cb->pattern_position);
4391 
4392 return 0;
4393 }
4394 
4395 
4396 
4397 /*************************************************
4398 *        Show information about a pattern        *
4399 *************************************************/
4400 
4401 /* This function is called after a pattern has been compiled if any of the
4402 information-requesting controls have been set.
4403 
4404 Arguments:  none
4405 
4406 Returns:    PR_OK     continue processing next line
4407             PR_SKIP   skip to a blank line
4408             PR_ABEND  abort the pcre2test run
4409 */
4410 
4411 static int
show_pattern_info(void)4412 show_pattern_info(void)
4413 {
4414 uint32_t compile_options, overall_options, extra_options;
4415 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
4416 
4417 if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0)
4418   {
4419   fprintf(outfile, "------------------------------------------------------------------\n");
4420   PCRE2_PRINTINT((pat_patctl.control & CTL_FULLBINCODE) != 0);
4421   }
4422 
4423 if ((pat_patctl.control & CTL_INFO) != 0)
4424   {
4425   int rc;
4426   void *nametable;
4427   uint8_t *start_bits;
4428   BOOL heap_limit_set, match_limit_set, depth_limit_set;
4429   uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
4430     hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
4431     depth_limit, heap_limit, match_limit, minlength, nameentrysize, namecount,
4432     newline_convention;
4433 
4434   /* Exercise the error route. */
4435 
4436   PCRE2_PATTERN_INFO(rc, compiled_code, 999, NULL);
4437   (void)rc;
4438 
4439   /* These info requests may return PCRE2_ERROR_UNSET. */
4440 
4441   switch(pattern_info(PCRE2_INFO_HEAPLIMIT, &heap_limit, TRUE))
4442     {
4443     case 0:
4444     heap_limit_set = TRUE;
4445     break;
4446 
4447     case PCRE2_ERROR_UNSET:
4448     heap_limit_set = FALSE;
4449     break;
4450 
4451     default:
4452     return PR_ABEND;
4453     }
4454 
4455   switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
4456     {
4457     case 0:
4458     match_limit_set = TRUE;
4459     break;
4460 
4461     case PCRE2_ERROR_UNSET:
4462     match_limit_set = FALSE;
4463     break;
4464 
4465     default:
4466     return PR_ABEND;
4467     }
4468 
4469   switch(pattern_info(PCRE2_INFO_DEPTHLIMIT, &depth_limit, TRUE))
4470     {
4471     case 0:
4472     depth_limit_set = TRUE;
4473     break;
4474 
4475     case PCRE2_ERROR_UNSET:
4476     depth_limit_set = FALSE;
4477     break;
4478 
4479     default:
4480     return PR_ABEND;
4481     }
4482 
4483   /* These info requests should always succeed. */
4484 
4485   if (pattern_info(PCRE2_INFO_BACKREFMAX, &backrefmax, FALSE) +
4486       pattern_info(PCRE2_INFO_BSR, &bsr_convention, FALSE) +
4487       pattern_info(PCRE2_INFO_CAPTURECOUNT, &capture_count, FALSE) +
4488       pattern_info(PCRE2_INFO_FIRSTBITMAP, &start_bits, FALSE) +
4489       pattern_info(PCRE2_INFO_FIRSTCODEUNIT, &first_cunit, FALSE) +
4490       pattern_info(PCRE2_INFO_FIRSTCODETYPE, &first_ctype, FALSE) +
4491       pattern_info(PCRE2_INFO_HASBACKSLASHC, &hasbackslashc, FALSE) +
4492       pattern_info(PCRE2_INFO_HASCRORLF, &hascrorlf, FALSE) +
4493       pattern_info(PCRE2_INFO_JCHANGED, &jchanged, FALSE) +
4494       pattern_info(PCRE2_INFO_LASTCODEUNIT, &last_cunit, FALSE) +
4495       pattern_info(PCRE2_INFO_LASTCODETYPE, &last_ctype, FALSE) +
4496       pattern_info(PCRE2_INFO_MATCHEMPTY, &match_empty, FALSE) +
4497       pattern_info(PCRE2_INFO_MINLENGTH, &minlength, FALSE) +
4498       pattern_info(PCRE2_INFO_NAMECOUNT, &namecount, FALSE) +
4499       pattern_info(PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize, FALSE) +
4500       pattern_info(PCRE2_INFO_NAMETABLE, &nametable, FALSE) +
4501       pattern_info(PCRE2_INFO_NEWLINE, &newline_convention, FALSE)
4502       != 0)
4503     return PR_ABEND;
4504 
4505   fprintf(outfile, "Capture group count = %d\n", capture_count);
4506 
4507   if (backrefmax > 0)
4508     fprintf(outfile, "Max back reference = %d\n", backrefmax);
4509 
4510   if (maxlookbehind > 0)
4511     fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4512 
4513   if (heap_limit_set)
4514     fprintf(outfile, "Heap limit = %u\n", heap_limit);
4515 
4516   if (match_limit_set)
4517     fprintf(outfile, "Match limit = %u\n", match_limit);
4518 
4519   if (depth_limit_set)
4520     fprintf(outfile, "Depth limit = %u\n", depth_limit);
4521 
4522   if (namecount > 0)
4523     {
4524     fprintf(outfile, "Named capture groups:\n");
4525     for (; namecount > 0; namecount--)
4526       {
4527       int imm2_size = test_mode == PCRE8_MODE ? 2 : 1;
4528       uint32_t length = (uint32_t)STRLEN(nametable + imm2_size);
4529       fprintf(outfile, "  ");
4530 
4531       /* In UTF mode the name may be a UTF string containing non-ASCII
4532       letters and digits. We must output it as a UTF-8 string. In non-UTF mode,
4533       use the normal string printing functions, which use escapes for all
4534       non-ASCII characters. */
4535 
4536       if (utf)
4537         {
4538 #ifdef SUPPORT_PCRE2_32
4539         if (test_mode == PCRE32_MODE)
4540           {
4541           PCRE2_SPTR32 nameptr = (PCRE2_SPTR32)nametable + imm2_size;
4542           while (*nameptr != 0)
4543             {
4544             uint8_t u8buff[6];
4545             int len = ord2utf8(*nameptr++, u8buff);
4546             fprintf(outfile, "%.*s", len, u8buff);
4547             }
4548           }
4549 #endif
4550 #ifdef SUPPORT_PCRE2_16
4551         if (test_mode == PCRE16_MODE)
4552           {
4553           PCRE2_SPTR16 nameptr = (PCRE2_SPTR16)nametable + imm2_size;
4554           while (*nameptr != 0)
4555             {
4556             int len;
4557             uint8_t u8buff[6];
4558             uint32_t c = *nameptr++ & 0xffff;
4559             if (c >= 0xD800 && c < 0xDC00)
4560               c = ((c & 0x3ff) << 10) + (*nameptr++ & 0x3ff) + 0x10000;
4561             len = ord2utf8(c, u8buff);
4562             fprintf(outfile, "%.*s", len, u8buff);
4563             }
4564           }
4565 #endif
4566 #ifdef SUPPORT_PCRE2_8
4567         if (test_mode == PCRE8_MODE)
4568           fprintf(outfile, "%s", (PCRE2_SPTR8)nametable + imm2_size);
4569 #endif
4570         }
4571       else  /* Not UTF mode */
4572         {
4573         PCHARSV(nametable, imm2_size, length, FALSE, outfile);
4574         }
4575 
4576       while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4577 
4578 #ifdef SUPPORT_PCRE2_32
4579       if (test_mode == PCRE32_MODE)
4580         fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR32)nametable)[0]));
4581 #endif
4582 #ifdef SUPPORT_PCRE2_16
4583       if (test_mode == PCRE16_MODE)
4584         fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR16)nametable)[0]));
4585 #endif
4586 #ifdef SUPPORT_PCRE2_8
4587       if (test_mode == PCRE8_MODE)
4588         fprintf(outfile, "%3d\n", (int)(
4589         ((((PCRE2_SPTR8)nametable)[0]) << 8) | ((PCRE2_SPTR8)nametable)[1]));
4590 #endif
4591 
4592       nametable = (void*)((PCRE2_SPTR8)nametable + nameentrysize * code_unit_size);
4593       }
4594     }
4595 
4596   if (hascrorlf)     fprintf(outfile, "Contains explicit CR or LF match\n");
4597   if (hasbackslashc) fprintf(outfile, "Contains \\C\n");
4598   if (match_empty)   fprintf(outfile, "May match empty string\n");
4599 
4600   pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE);
4601   pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE);
4602   pattern_info(PCRE2_INFO_EXTRAOPTIONS, &extra_options, FALSE);
4603 
4604   /* Remove UTF/UCP if they were there only because of forbid_utf. This saves
4605   cluttering up the verification output of non-UTF test files. */
4606 
4607   if ((pat_patctl.options & PCRE2_NEVER_UTF) == 0)
4608     {
4609     compile_options &= ~PCRE2_NEVER_UTF;
4610     overall_options &= ~PCRE2_NEVER_UTF;
4611     }
4612 
4613   if ((pat_patctl.options & PCRE2_NEVER_UCP) == 0)
4614     {
4615     compile_options &= ~PCRE2_NEVER_UCP;
4616     overall_options &= ~PCRE2_NEVER_UCP;
4617     }
4618 
4619   if ((compile_options|overall_options) != 0)
4620     {
4621     if (compile_options == overall_options)
4622       show_compile_options(compile_options, "Options:", "\n");
4623     else
4624       {
4625       show_compile_options(compile_options, "Compile options:", "\n");
4626       show_compile_options(overall_options, "Overall options:", "\n");
4627       }
4628     }
4629 
4630   if (extra_options != 0)
4631     show_compile_extra_options(extra_options, "Extra options:", "\n");
4632 
4633   if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4634 
4635   if ((pat_patctl.control2 & CTL2_BSR_SET) != 0 ||
4636       (FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0)
4637     fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
4638       "any Unicode newline" : "CR, LF, or CRLF");
4639 
4640   if ((FLD(compiled_code, flags) & PCRE2_NL_SET) != 0)
4641     {
4642     switch (newline_convention)
4643       {
4644       case PCRE2_NEWLINE_CR:
4645       fprintf(outfile, "Forced newline is CR\n");
4646       break;
4647 
4648       case PCRE2_NEWLINE_LF:
4649       fprintf(outfile, "Forced newline is LF\n");
4650       break;
4651 
4652       case PCRE2_NEWLINE_CRLF:
4653       fprintf(outfile, "Forced newline is CRLF\n");
4654       break;
4655 
4656       case PCRE2_NEWLINE_ANYCRLF:
4657       fprintf(outfile, "Forced newline is CR, LF, or CRLF\n");
4658       break;
4659 
4660       case PCRE2_NEWLINE_ANY:
4661       fprintf(outfile, "Forced newline is any Unicode newline\n");
4662       break;
4663 
4664       case PCRE2_NEWLINE_NUL:
4665       fprintf(outfile, "Forced newline is NUL\n");
4666       break;
4667 
4668       default:
4669       break;
4670       }
4671     }
4672 
4673   if (first_ctype == 2)
4674     {
4675     fprintf(outfile, "First code unit at start or follows newline\n");
4676     }
4677   else if (first_ctype == 1)
4678     {
4679     const char *caseless =
4680       ((FLD(compiled_code, flags) & PCRE2_FIRSTCASELESS) == 0)?
4681       "" : " (caseless)";
4682     if (PRINTOK(first_cunit))
4683       fprintf(outfile, "First code unit = \'%c\'%s\n", first_cunit, caseless);
4684     else
4685       {
4686       fprintf(outfile, "First code unit = ");
4687       pchar(first_cunit, FALSE, outfile);
4688       fprintf(outfile, "%s\n", caseless);
4689       }
4690     }
4691   else if (start_bits != NULL)
4692     {
4693     int i;
4694     int c = 24;
4695     fprintf(outfile, "Starting code units: ");
4696     for (i = 0; i < 256; i++)
4697       {
4698       if ((start_bits[i/8] & (1u << (i&7))) != 0)
4699         {
4700         if (c > 75)
4701           {
4702           fprintf(outfile, "\n  ");
4703           c = 2;
4704           }
4705         if (PRINTOK(i) && i != ' ')
4706           {
4707           fprintf(outfile, "%c ", i);
4708           c += 2;
4709           }
4710         else
4711           {
4712           fprintf(outfile, "\\x%02x ", i);
4713           c += 5;
4714           }
4715         }
4716       }
4717     fprintf(outfile, "\n");
4718     }
4719 
4720   if (last_ctype != 0)
4721     {
4722     const char *caseless =
4723       ((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)?
4724       "" : " (caseless)";
4725     if (PRINTOK(last_cunit))
4726       fprintf(outfile, "Last code unit = \'%c\'%s\n", last_cunit, caseless);
4727     else
4728       {
4729       fprintf(outfile, "Last code unit = ");
4730       pchar(last_cunit, FALSE, outfile);
4731       fprintf(outfile, "%s\n", caseless);
4732       }
4733     }
4734 
4735   if ((FLD(compiled_code, overall_options) & PCRE2_NO_START_OPTIMIZE) == 0)
4736     fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4737 
4738   if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0)
4739     {
4740     if (FLD(compiled_code, executable_jit) != NULL)
4741       fprintf(outfile, "JIT compilation was successful\n");
4742     else
4743       {
4744 #ifdef SUPPORT_JIT
4745       fprintf(outfile, "JIT compilation was not successful");
4746       if (jitrc != 0 && !print_error_message(jitrc, " (", ")"))
4747         return PR_ABEND;
4748       fprintf(outfile, "\n");
4749 #else
4750       fprintf(outfile, "JIT support is not available in this version of PCRE2\n");
4751 #endif
4752       }
4753     }
4754   }
4755 
4756 if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0)
4757   {
4758   int errorcode;
4759   PCRE2_CALLOUT_ENUMERATE(errorcode, callout_callback, 0);
4760   if (errorcode != 0)
4761     {
4762     fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode);
4763     if (errorcode < 0 && !print_error_message(errorcode, "", "\n"))
4764       return PR_ABEND;
4765     return PR_SKIP;
4766     }
4767   }
4768 
4769 return PR_OK;
4770 }
4771 
4772 
4773 
4774 /*************************************************
4775 *              Handle serialization error        *
4776 *************************************************/
4777 
4778 /* Print an error message after a serialization failure.
4779 
4780 Arguments:
4781   rc         the error code
4782   msg        an initial message for what failed
4783 
4784 Returns:     FALSE if print_error_message() fails
4785 */
4786 
4787 static BOOL
serial_error(int rc,const char * msg)4788 serial_error(int rc, const char *msg)
4789 {
4790 fprintf(outfile, "%s failed: error %d: ", msg, rc);
4791 return print_error_message(rc, "", "\n");
4792 }
4793 
4794 
4795 
4796 /*************************************************
4797 *        Open file for save/load commands        *
4798 *************************************************/
4799 
4800 /* This function decodes the file name and opens the file.
4801 
4802 Arguments:
4803   buffptr     point after the #command
4804   mode        open mode
4805   fptr        points to the FILE variable
4806   name        name of # command
4807 
4808 Returns:      PR_OK or PR_ABEND
4809 */
4810 
4811 static int
open_file(uint8_t * buffptr,const char * mode,FILE ** fptr,const char * name)4812 open_file(uint8_t *buffptr, const char *mode, FILE **fptr, const char *name)
4813 {
4814 char *endf;
4815 char *filename = (char *)buffptr;
4816 while (isspace(*filename)) filename++;
4817 endf = filename + strlen8(filename);
4818 while (endf > filename && isspace(endf[-1])) endf--;
4819 
4820 if (endf == filename)
4821   {
4822   fprintf(outfile, "** File name expected after %s\n", name);
4823   return PR_ABEND;
4824   }
4825 
4826 *endf = 0;
4827 *fptr = fopen((const char *)filename, mode);
4828 if (*fptr == NULL)
4829   {
4830   fprintf(outfile, "** Failed to open '%s': %s\n", filename, strerror(errno));
4831   return PR_ABEND;
4832   }
4833 
4834 return PR_OK;
4835 }
4836 
4837 
4838 
4839 /*************************************************
4840 *               Process command line             *
4841 *************************************************/
4842 
4843 /* This function is called for lines beginning with # and a character that is
4844 not ! or whitespace, when encountered between tests, which means that there is
4845 no compiled pattern (compiled_code is NULL). The line is in buffer.
4846 
4847 Arguments:  none
4848 
4849 Returns:    PR_OK     continue processing next line
4850             PR_SKIP   skip to a blank line
4851             PR_ABEND  abort the pcre2test run
4852 */
4853 
4854 static int
process_command(void)4855 process_command(void)
4856 {
4857 FILE *f;
4858 PCRE2_SIZE serial_size;
4859 size_t i;
4860 int rc, cmd, cmdlen, yield;
4861 uint16_t first_listed_newline;
4862 const char *cmdname;
4863 uint8_t *argptr, *serial;
4864 
4865 yield = PR_OK;
4866 cmd = CMD_UNKNOWN;
4867 cmdlen = 0;
4868 
4869 for (i = 0; i < cmdlistcount; i++)
4870   {
4871   cmdname = cmdlist[i].name;
4872   cmdlen = strlen(cmdname);
4873   if (strncmp((char *)(buffer+1), cmdname, cmdlen) == 0 &&
4874       isspace(buffer[cmdlen+1]))
4875     {
4876     cmd = cmdlist[i].value;
4877     break;
4878     }
4879   }
4880 
4881 argptr = buffer + cmdlen + 1;
4882 
4883 if (restrict_for_perl_test && cmd != CMD_PATTERN && cmd != CMD_SUBJECT)
4884   {
4885   fprintf(outfile, "** #%s is not allowed after #perltest\n", cmdname);
4886   return PR_ABEND;
4887   }
4888 
4889 switch(cmd)
4890   {
4891   case CMD_UNKNOWN:
4892   fprintf(outfile, "** Unknown command: %s", buffer);
4893   break;
4894 
4895   case CMD_FORBID_UTF:
4896   forbid_utf = PCRE2_NEVER_UTF|PCRE2_NEVER_UCP;
4897   break;
4898 
4899   case CMD_PERLTEST:
4900   restrict_for_perl_test = TRUE;
4901   break;
4902 
4903   /* Set default pattern modifiers */
4904 
4905   case CMD_PATTERN:
4906   (void)decode_modifiers(argptr, CTX_DEFPAT, &def_patctl, NULL);
4907   if (def_patctl.jit == 0 && (def_patctl.control & CTL_JITVERIFY) != 0)
4908     def_patctl.jit = JIT_DEFAULT;
4909   break;
4910 
4911   /* Set default subject modifiers */
4912 
4913   case CMD_SUBJECT:
4914   (void)decode_modifiers(argptr, CTX_DEFDAT, NULL, &def_datctl);
4915   break;
4916 
4917   /* Check the default newline, and if not one of those listed, set up the
4918   first one to be forced. An empty list unsets. */
4919 
4920   case CMD_NEWLINE_DEFAULT:
4921   local_newline_default = 0;   /* Unset */
4922   first_listed_newline = 0;
4923   for (;;)
4924     {
4925     while (isspace(*argptr)) argptr++;
4926     if (*argptr == 0) break;
4927     for (i = 1; i < sizeof(newlines)/sizeof(char *); i++)
4928       {
4929       size_t nlen = strlen(newlines[i]);
4930       if (strncmpic(argptr, (const uint8_t *)newlines[i], nlen) == 0 &&
4931           isspace(argptr[nlen]))
4932         {
4933         if (i == NEWLINE_DEFAULT) return PR_OK;  /* Default is valid */
4934         if (first_listed_newline == 0) first_listed_newline = i;
4935         }
4936       }
4937     while (*argptr != 0 && !isspace(*argptr)) argptr++;
4938     }
4939   local_newline_default = first_listed_newline;
4940   break;
4941 
4942   /* Pop or copy a compiled pattern off the stack. Modifiers that do not affect
4943   the compiled pattern (e.g. to give information) are permitted. The default
4944   pattern modifiers are ignored. */
4945 
4946   case CMD_POP:
4947   case CMD_POPCOPY:
4948   if (patstacknext <= 0)
4949     {
4950     fprintf(outfile, "** Can't pop off an empty stack\n");
4951     return PR_SKIP;
4952     }
4953   memset(&pat_patctl, 0, sizeof(patctl));   /* Completely unset */
4954   if (!decode_modifiers(argptr, CTX_POPPAT, &pat_patctl, NULL))
4955     return PR_SKIP;
4956 
4957   if (cmd == CMD_POP)
4958     {
4959     SET(compiled_code, patstack[--patstacknext]);
4960     }
4961   else
4962     {
4963     PCRE2_CODE_COPY_FROM_VOID(compiled_code, patstack[patstacknext - 1]);
4964     }
4965 
4966   if (pat_patctl.jit != 0)
4967     {
4968     PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
4969     }
4970   if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
4971   if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
4972   if ((pat_patctl.control & CTL_ANYINFO) != 0)
4973     {
4974     rc = show_pattern_info();
4975     if (rc != PR_OK) return rc;
4976     }
4977   break;
4978 
4979   /* Save the stack of compiled patterns to a file, then empty the stack. */
4980 
4981   case CMD_SAVE:
4982   if (patstacknext <= 0)
4983     {
4984     fprintf(outfile, "** No stacked patterns to save\n");
4985     return PR_OK;
4986     }
4987 
4988   rc = open_file(argptr+1, BINARY_OUTPUT_MODE, &f, "#save");
4989   if (rc != PR_OK) return rc;
4990 
4991   PCRE2_SERIALIZE_ENCODE(rc, patstack, patstacknext, &serial, &serial_size,
4992     general_context);
4993   if (rc < 0)
4994     {
4995     fclose(f);
4996     if (!serial_error(rc, "Serialization")) return PR_ABEND;
4997     break;
4998     }
4999 
5000   /* Write the length at the start of the file to make it straightforward to
5001   get the right memory when re-loading. This saves having to read the file size
5002   in different operating systems. To allow for different endianness (even
5003   though reloading with the opposite endianness does not work), write the
5004   length byte-by-byte. */
5005 
5006   for (i = 0; i < 4; i++) fputc((serial_size >> (i*8)) & 255, f);
5007   if (fwrite(serial, 1, serial_size, f) != serial_size)
5008     {
5009     fprintf(outfile, "** Wrong return from fwrite()\n");
5010     fclose(f);
5011     return PR_ABEND;
5012     }
5013 
5014   fclose(f);
5015   PCRE2_SERIALIZE_FREE(serial);
5016   while(patstacknext > 0)
5017     {
5018     SET(compiled_code, patstack[--patstacknext]);
5019     SUB1(pcre2_code_free, compiled_code);
5020     }
5021   SET(compiled_code, NULL);
5022   break;
5023 
5024   /* Load a set of compiled patterns from a file onto the stack */
5025 
5026   case CMD_LOAD:
5027   rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#load");
5028   if (rc != PR_OK) return rc;
5029 
5030   serial_size = 0;
5031   for (i = 0; i < 4; i++) serial_size |= fgetc(f) << (i*8);
5032 
5033   serial = malloc(serial_size);
5034   if (serial == NULL)
5035     {
5036     fprintf(outfile, "** Failed to get memory (size %" SIZ_FORM ") for #load\n",
5037       serial_size);
5038     fclose(f);
5039     return PR_ABEND;
5040     }
5041 
5042   i = fread(serial, 1, serial_size, f);
5043   fclose(f);
5044 
5045   if (i != serial_size)
5046     {
5047     fprintf(outfile, "** Wrong return from fread()\n");
5048     yield = PR_ABEND;
5049     }
5050   else
5051     {
5052     PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(rc, serial);
5053     if (rc < 0)
5054       {
5055       if (!serial_error(rc, "Get number of codes")) yield = PR_ABEND;
5056       }
5057     else
5058       {
5059       if (rc + patstacknext > PATSTACKSIZE)
5060         {
5061         fprintf(outfile, "** Not enough space on pattern stack for %d pattern%s\n",
5062           rc, (rc == 1)? "" : "s");
5063         rc = PATSTACKSIZE - patstacknext;
5064         fprintf(outfile, "** Decoding %d pattern%s\n", rc,
5065           (rc == 1)? "" : "s");
5066         }
5067       PCRE2_SERIALIZE_DECODE(rc, patstack + patstacknext, rc, serial,
5068         general_context);
5069       if (rc < 0)
5070         {
5071         if (!serial_error(rc, "Deserialization")) yield = PR_ABEND;
5072         }
5073       else patstacknext += rc;
5074       }
5075     }
5076 
5077   free(serial);
5078   break;
5079 
5080   /* Load a set of binary tables into tables3. */
5081 
5082   case CMD_LOADTABLES:
5083   rc = open_file(argptr+1, BINARY_INPUT_MODE, &f, "#loadtables");
5084   if (rc != PR_OK) return rc;
5085 
5086   if (tables3 == NULL)
5087     {
5088     (void)PCRE2_CONFIG(PCRE2_CONFIG_TABLES_LENGTH, &loadtables_length);
5089     tables3 = malloc(loadtables_length);
5090     }
5091 
5092   if (tables3 == NULL)
5093     {
5094     fprintf(outfile, "** Failed: malloc failed for #loadtables\n");
5095     yield = PR_ABEND;
5096     }
5097   else if (fread(tables3, 1, loadtables_length, f) != loadtables_length)
5098     {
5099     fprintf(outfile, "** Wrong return from fread()\n");
5100     yield = PR_ABEND;
5101     }
5102 
5103   fclose(f);
5104   break;
5105   }
5106 
5107 return yield;
5108 }
5109 
5110 
5111 
5112 /*************************************************
5113 *               Process pattern line             *
5114 *************************************************/
5115 
5116 /* This function is called when the input buffer contains the start of a
5117 pattern. The first character is known to be a valid delimiter. The pattern is
5118 read, modifiers are interpreted, and a suitable local context is set up for
5119 this test. The pattern is then compiled.
5120 
5121 Arguments:  none
5122 
5123 Returns:    PR_OK     continue processing next line
5124             PR_SKIP   skip to a blank line
5125             PR_ABEND  abort the pcre2test run
5126 */
5127 
5128 static int
process_pattern(void)5129 process_pattern(void)
5130 {
5131 BOOL utf;
5132 uint32_t k;
5133 uint8_t *p = buffer;
5134 unsigned int delimiter = *p++;
5135 int errorcode;
5136 void *use_pat_context;
5137 uint32_t use_forbid_utf = forbid_utf;
5138 PCRE2_SIZE patlen;
5139 PCRE2_SIZE valgrind_access_length;
5140 PCRE2_SIZE erroroffset;
5141 
5142 /* The perltest.sh script supports only / as a delimiter. */
5143 
5144 if (restrict_for_perl_test && delimiter != '/')
5145   {
5146   fprintf(outfile, "** The only allowed delimiter after #perltest is '/'\n");
5147   return PR_ABEND;
5148   }
5149 
5150 /* Initialize the context and pattern/data controls for this test from the
5151 defaults. */
5152 
5153 PATCTXCPY(pat_context, default_pat_context);
5154 memcpy(&pat_patctl, &def_patctl, sizeof(patctl));
5155 
5156 /* Find the end of the pattern, reading more lines if necessary. */
5157 
5158 for(;;)
5159   {
5160   while (*p != 0)
5161     {
5162     if (*p == '\\' && p[1] != 0) p++;
5163       else if (*p == delimiter) break;
5164     p++;
5165     }
5166   if (*p != 0) break;
5167   if ((p = extend_inputline(infile, p, "    > ")) == NULL)
5168     {
5169     fprintf(outfile, "** Unexpected EOF\n");
5170     return PR_ABEND;
5171     }
5172   if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)p);
5173   }
5174 
5175 /* If the first character after the delimiter is backslash, make the pattern
5176 end with backslash. This is purely to provide a way of testing for the error
5177 message when a pattern ends with backslash. */
5178 
5179 if (p[1] == '\\') *p++ = '\\';
5180 
5181 /* Terminate the pattern at the delimiter, and compute the length. */
5182 
5183 *p++ = 0;
5184 patlen = p - buffer - 2;
5185 
5186 /* Look for modifiers and options after the final delimiter. */
5187 
5188 if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
5189 
5190 /* Note that the match_invalid_utf option also sets utf when passed to
5191 pcre2_compile(). */
5192 
5193 utf = (pat_patctl.options & (PCRE2_UTF|PCRE2_MATCH_INVALID_UTF)) != 0;
5194 
5195 /* The utf8_input modifier is not allowed in 8-bit mode, and is mutually
5196 exclusive with the utf modifier. */
5197 
5198 if ((pat_patctl.control & CTL_UTF8_INPUT) != 0)
5199   {
5200   if (test_mode == PCRE8_MODE)
5201     {
5202     fprintf(outfile, "** The utf8_input modifier is not allowed in 8-bit mode\n");
5203     return PR_SKIP;
5204     }
5205   if (utf)
5206     {
5207     fprintf(outfile, "** The utf and utf8_input modifiers are mutually exclusive\n");
5208     return PR_SKIP;
5209     }
5210   }
5211 
5212 /* The convert and posix modifiers are mutually exclusive. */
5213 
5214 if (pat_patctl.convert_type != CONVERT_UNSET &&
5215     (pat_patctl.control & CTL_POSIX) != 0)
5216   {
5217   fprintf(outfile, "** The convert and posix modifiers are mutually exclusive\n");
5218   return PR_SKIP;
5219   }
5220 
5221 /* Check for mutually exclusive control modifiers. At present, these are all in
5222 the first control word. */
5223 
5224 for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++)
5225   {
5226   uint32_t c = pat_patctl.control & exclusive_pat_controls[k];
5227   if (c != 0 && c != (c & (~c+1)))
5228     {
5229     show_controls(c, 0, "** Not allowed together:");
5230     fprintf(outfile, "\n");
5231     return PR_SKIP;
5232     }
5233   }
5234 
5235 /* Assume full JIT compile for jitverify and/or jitfast if nothing else was
5236 specified. */
5237 
5238 if (pat_patctl.jit == 0 &&
5239     (pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
5240   pat_patctl.jit = JIT_DEFAULT;
5241 
5242 /* Now copy the pattern to pbuffer8 for use in 8-bit testing and for reflecting
5243 in callouts. Convert from hex if requested (literal strings in quotes may be
5244 present within the hexadecimal pairs). The result must necessarily be fewer
5245 characters so will always fit in pbuffer8. */
5246 
5247 if ((pat_patctl.control & CTL_HEXPAT) != 0)
5248   {
5249   uint8_t *pp, *pt;
5250   uint32_t c, d;
5251 
5252   pt = pbuffer8;
5253   for (pp = buffer + 1; *pp != 0; pp++)
5254     {
5255     if (isspace(*pp)) continue;
5256     c = *pp++;
5257 
5258     /* Handle a literal substring */
5259 
5260     if (c == '\'' || c == '"')
5261       {
5262       uint8_t *pq = pp;
5263       for (;; pp++)
5264         {
5265         d = *pp;
5266         if (d == 0)
5267           {
5268           fprintf(outfile, "** Missing closing quote in hex pattern: "
5269             "opening quote is at offset %" PTR_FORM ".\n", pq - buffer - 2);
5270           return PR_SKIP;
5271           }
5272         if (d == c) break;
5273         *pt++ = d;
5274         }
5275       }
5276 
5277     /* Expect a hex pair */
5278 
5279     else
5280       {
5281       if (!isxdigit(c))
5282         {
5283         fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5284           PTR_FORM " in hex pattern: quote missing?\n", c, pp - buffer - 2);
5285         return PR_SKIP;
5286         }
5287       if (*pp == 0)
5288         {
5289         fprintf(outfile, "** Odd number of digits in hex pattern\n");
5290         return PR_SKIP;
5291         }
5292       d = *pp;
5293       if (!isxdigit(d))
5294         {
5295         fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %"
5296           PTR_FORM " in hex pattern: quote missing?\n", d, pp - buffer - 1);
5297         return PR_SKIP;
5298         }
5299       c = toupper(c);
5300       d = toupper(d);
5301       *pt++ = ((isdigit(c)? (c - '0') : (c - 'A' + 10)) << 4) +
5302                (isdigit(d)? (d - '0') : (d - 'A' + 10));
5303       }
5304     }
5305   *pt = 0;
5306   patlen = pt - pbuffer8;
5307   }
5308 
5309 /* If not a hex string, process for repetition expansion if requested. */
5310 
5311 else if ((pat_patctl.control & CTL_EXPAND) != 0)
5312   {
5313   uint8_t *pp, *pt;
5314 
5315   pt = pbuffer8;
5316   for (pp = buffer + 1; *pp != 0; pp++)
5317     {
5318     uint8_t *pc = pp;
5319     uint32_t count = 1;
5320     size_t length = 1;
5321 
5322     /* Check for replication syntax; if not found, the defaults just set will
5323     prevail and one character will be copied. */
5324 
5325     if (pp[0] == '\\' && pp[1] == '[')
5326       {
5327       uint8_t *pe;
5328       for (pe = pp + 2; *pe != 0; pe++)
5329         {
5330         if (pe[0] == ']' && pe[1] == '{')
5331           {
5332           uint32_t clen = pe - pc - 2;
5333           uint32_t i = 0;
5334           unsigned long uli;
5335           char *endptr;
5336 
5337           pe += 2;
5338           uli = strtoul((const char *)pe, &endptr, 10);
5339           if (U32OVERFLOW(uli))
5340             {
5341             fprintf(outfile, "** Pattern repeat count too large\n");
5342             return PR_SKIP;
5343             }
5344 
5345           i = (uint32_t)uli;
5346           pe = (uint8_t *)endptr;
5347           if (*pe == '}')
5348             {
5349             if (i == 0)
5350               {
5351               fprintf(outfile, "** Zero repeat not allowed\n");
5352               return PR_SKIP;
5353               }
5354             pc += 2;
5355             count = i;
5356             length = clen;
5357             pp = pe;
5358             break;
5359             }
5360           }
5361         }
5362       }
5363 
5364     /* Add to output. If the buffer is too small expand it. The function for
5365     expanding buffers always keeps buffer and pbuffer8 in step as far as their
5366     size goes. */
5367 
5368     while (pt + count * length > pbuffer8 + pbuffer8_size)
5369       {
5370       size_t pc_offset = pc - buffer;
5371       size_t pp_offset = pp - buffer;
5372       size_t pt_offset = pt - pbuffer8;
5373       expand_input_buffers();
5374       pc = buffer + pc_offset;
5375       pp = buffer + pp_offset;
5376       pt = pbuffer8 + pt_offset;
5377       }
5378 
5379     for (; count > 0; count--)
5380       {
5381       memcpy(pt, pc, length);
5382       pt += length;
5383       }
5384     }
5385 
5386   *pt = 0;
5387   patlen = pt - pbuffer8;
5388 
5389   if ((pat_patctl.control & CTL_INFO) != 0)
5390     fprintf(outfile, "Expanded: %s\n", pbuffer8);
5391   }
5392 
5393 /* Neither hex nor expanded, just copy the input verbatim. */
5394 
5395 else
5396   {
5397   strncpy((char *)pbuffer8, (char *)(buffer+1), patlen + 1);
5398   }
5399 
5400 /* Sort out character tables */
5401 
5402 if (pat_patctl.locale[0] != 0)
5403   {
5404   if (pat_patctl.tables_id != 0)
5405     {
5406     fprintf(outfile, "** 'Locale' and 'tables' must not both be set\n");
5407     return PR_SKIP;
5408     }
5409   if (setlocale(LC_CTYPE, (const char *)pat_patctl.locale) == NULL)
5410     {
5411     fprintf(outfile, "** Failed to set locale '%s'\n", pat_patctl.locale);
5412     return PR_SKIP;
5413     }
5414   if (strcmp((const char *)pat_patctl.locale, (const char *)locale_name) != 0)
5415     {
5416     strcpy((char *)locale_name, (char *)pat_patctl.locale);
5417     if (locale_tables != NULL) free((void *)locale_tables);
5418     PCRE2_MAKETABLES(locale_tables);
5419     }
5420   use_tables = locale_tables;
5421   }
5422 
5423 else switch (pat_patctl.tables_id)
5424   {
5425   case 0: use_tables = NULL; break;
5426   case 1: use_tables = tables1; break;
5427   case 2: use_tables = tables2; break;
5428 
5429   case 3:
5430   if (tables3 == NULL)
5431     {
5432     fprintf(outfile, "** 'Tables = 3' is invalid: binary tables have not "
5433       "been loaded\n");
5434     return PR_SKIP;
5435     }
5436   use_tables = tables3;
5437   break;
5438 
5439   default:
5440   fprintf(outfile, "** 'Tables' must specify 0, 1, 2, or 3.\n");
5441   return PR_SKIP;
5442   }
5443 
5444 PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables);
5445 
5446 /* Set up for the stackguard test. */
5447 
5448 if (pat_patctl.stackguard_test != 0)
5449   {
5450   PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard, NULL);
5451   }
5452 
5453 /* Handle compiling via the POSIX interface, which doesn't support the
5454 timing, showing, or debugging options, nor the ability to pass over
5455 local character tables. Neither does it have 16-bit or 32-bit support. */
5456 
5457 if ((pat_patctl.control & CTL_POSIX) != 0)
5458   {
5459 #ifdef SUPPORT_PCRE2_8
5460   int rc;
5461   int cflags = 0;
5462   const char *msg = "** Ignored with POSIX interface:";
5463 #endif
5464 
5465   if (test_mode != PCRE8_MODE)
5466     {
5467     fprintf(outfile, "** The POSIX interface is available only in 8-bit mode\n");
5468     return PR_SKIP;
5469     }
5470 
5471 #ifdef SUPPORT_PCRE2_8
5472   /* Check for features that the POSIX interface does not support. */
5473 
5474   if (pat_patctl.locale[0] != 0) prmsg(&msg, "locale");
5475   if (pat_patctl.replacement[0] != 0) prmsg(&msg, "replace");
5476   if (pat_patctl.tables_id != 0) prmsg(&msg, "tables");
5477   if (pat_patctl.stackguard_test != 0) prmsg(&msg, "stackguard");
5478   if (timeit > 0) prmsg(&msg, "timing");
5479   if (pat_patctl.jit != 0) prmsg(&msg, "JIT");
5480 
5481   if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0)
5482     {
5483     show_compile_options(
5484       pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS, msg, "");
5485     msg = "";
5486     }
5487 
5488   if ((FLD(pat_context, extra_options) &
5489        ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS) != 0)
5490     {
5491     show_compile_extra_options(
5492       FLD(pat_context, extra_options) & ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS,
5493         msg, "");
5494     msg = "";
5495     }
5496 
5497   if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0 ||
5498       (pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2) != 0)
5499     {
5500     show_controls(pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS,
5501       pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2, msg);
5502     msg = "";
5503     }
5504 
5505   if (local_newline_default != 0) prmsg(&msg, "#newline_default");
5506   if (FLD(pat_context, max_pattern_length) != PCRE2_UNSET)
5507     prmsg(&msg, "max_pattern_length");
5508   if (FLD(pat_context, parens_nest_limit) != PARENS_NEST_DEFAULT)
5509     prmsg(&msg, "parens_nest_limit");
5510 
5511   if (msg[0] == 0) fprintf(outfile, "\n");
5512 
5513   /* Translate PCRE2 options to POSIX options and then compile. */
5514 
5515   if (utf) cflags |= REG_UTF;
5516   if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) cflags |= REG_NOSUB;
5517   if ((pat_patctl.options & PCRE2_UCP) != 0) cflags |= REG_UCP;
5518   if ((pat_patctl.options & PCRE2_CASELESS) != 0) cflags |= REG_ICASE;
5519   if ((pat_patctl.options & PCRE2_LITERAL) != 0) cflags |= REG_NOSPEC;
5520   if ((pat_patctl.options & PCRE2_MULTILINE) != 0) cflags |= REG_NEWLINE;
5521   if ((pat_patctl.options & PCRE2_DOTALL) != 0) cflags |= REG_DOTALL;
5522   if ((pat_patctl.options & PCRE2_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
5523 
5524   if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) != 0)
5525     {
5526     preg.re_endp = (char *)pbuffer8 + patlen;
5527     cflags |= REG_PEND;
5528     }
5529 
5530   rc = regcomp(&preg, (char *)pbuffer8, cflags);
5531 
5532   /* Compiling failed */
5533 
5534   if (rc != 0)
5535     {
5536     size_t bsize, usize;
5537     int psize;
5538 
5539     preg.re_pcre2_code = NULL;     /* In case something was left in there */
5540     preg.re_match_data = NULL;
5541 
5542     bsize = (pat_patctl.regerror_buffsize != 0)?
5543       pat_patctl.regerror_buffsize : pbuffer8_size;
5544     if (bsize + 8 < pbuffer8_size)
5545       memcpy(pbuffer8 + bsize, "DEADBEEF", 8);
5546     usize = regerror(rc, &preg, (char *)pbuffer8, bsize);
5547 
5548     /* Inside regerror(), snprintf() is used. If the buffer is too small, some
5549     versions of snprintf() put a zero byte at the end, but others do not.
5550     Therefore, we print a maximum of one less than the size of the buffer. */
5551 
5552     psize = (int)bsize - 1;
5553     fprintf(outfile, "Failed: POSIX code %d: %.*s\n", rc, psize, pbuffer8);
5554     if (usize > bsize)
5555       {
5556       fprintf(outfile, "** regerror() message truncated\n");
5557       if (memcmp(pbuffer8 + bsize, "DEADBEEF", 8) != 0)
5558         fprintf(outfile, "** regerror() buffer overflow\n");
5559       }
5560     return PR_SKIP;
5561     }
5562 
5563   /* Compiling succeeded. Check that the values in the preg block are sensible.
5564   It can happen that pcre2test is accidentally linked with a different POSIX
5565   library which succeeds, but of course puts different things into preg. In
5566   this situation, calling regfree() may cause a segfault (or invalid free() in
5567   valgrind), so ensure that preg.re_pcre2_code is NULL, which suppresses the
5568   calling of regfree() on exit. */
5569 
5570   if (preg.re_pcre2_code == NULL ||
5571       ((pcre2_real_code_8 *)preg.re_pcre2_code)->magic_number != MAGIC_NUMBER ||
5572       ((pcre2_real_code_8 *)preg.re_pcre2_code)->top_bracket != preg.re_nsub ||
5573       preg.re_match_data == NULL ||
5574       preg.re_cflags != cflags)
5575     {
5576     fprintf(outfile,
5577       "** The regcomp() function returned zero (success), but the values set\n"
5578       "** in the preg block are not valid for PCRE2. Check that pcre2test is\n"
5579       "** linked with PCRE2's pcre2posix module (-lpcre2-posix) and not with\n"
5580       "** some other POSIX regex library.\n**\n");
5581     preg.re_pcre2_code = NULL;
5582     return PR_ABEND;
5583     }
5584 
5585   return PR_OK;
5586 #endif  /* SUPPORT_PCRE2_8 */
5587   }
5588 
5589 /* Handle compiling via the native interface. Controls that act later are
5590 ignored with "push". Replacements are locked out. */
5591 
5592 if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5593   {
5594   if (pat_patctl.replacement[0] != 0)
5595     {
5596     fprintf(outfile, "** Replacement text is not supported with 'push'.\n");
5597     return PR_OK;
5598     }
5599   if ((pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS) != 0 ||
5600       (pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2) != 0)
5601     {
5602     show_controls(pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS,
5603                   pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2,
5604       "** Ignored when compiled pattern is stacked with 'push':");
5605     fprintf(outfile, "\n");
5606     }
5607   if ((pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS) != 0 ||
5608       (pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2) != 0)
5609     {
5610     show_controls(pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS,
5611                   pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2,
5612       "** Applies only to compile when pattern is stacked with 'push':");
5613     fprintf(outfile, "\n");
5614     }
5615   }
5616 
5617 /* Convert the input in non-8-bit modes. */
5618 
5619 errorcode = 0;
5620 
5621 #ifdef SUPPORT_PCRE2_16
5622 if (test_mode == PCRE16_MODE) errorcode = to16(pbuffer8, utf, &patlen);
5623 #endif
5624 
5625 #ifdef SUPPORT_PCRE2_32
5626 if (test_mode == PCRE32_MODE) errorcode = to32(pbuffer8, utf, &patlen);
5627 #endif
5628 
5629 switch(errorcode)
5630   {
5631   case -1:
5632   fprintf(outfile, "** Failed: invalid UTF-8 string cannot be "
5633     "converted to %d-bit string\n", (test_mode == PCRE16_MODE)? 16:32);
5634   return PR_SKIP;
5635 
5636   case -2:
5637   fprintf(outfile, "** Failed: character value greater than 0x10ffff "
5638     "cannot be converted to UTF\n");
5639   return PR_SKIP;
5640 
5641   case -3:
5642   fprintf(outfile, "** Failed: character value greater than 0xffff "
5643     "cannot be converted to 16-bit in non-UTF mode\n");
5644   return PR_SKIP;
5645 
5646   default:
5647   break;
5648   }
5649 
5650 /* The pattern is now in pbuffer[8|16|32], with the length in code units in
5651 patlen. If it is to be converted, copy the result back afterwards so that it
5652 ends up back in the usual place. */
5653 
5654 if (pat_patctl.convert_type != CONVERT_UNSET)
5655   {
5656   int rc;
5657   int convert_return = PR_OK;
5658   uint32_t convert_options = pat_patctl.convert_type;
5659   void *converted_pattern;
5660   PCRE2_SIZE converted_length;
5661 
5662   if (pat_patctl.convert_length != 0)
5663     {
5664     converted_length = pat_patctl.convert_length;
5665     converted_pattern = malloc(converted_length * code_unit_size);
5666     if (converted_pattern == NULL)
5667       {
5668       fprintf(outfile, "** Failed: malloc failed for converted pattern\n");
5669       return PR_SKIP;
5670       }
5671     }
5672   else converted_pattern = NULL;  /* Let the library allocate */
5673 
5674   if (utf) convert_options |= PCRE2_CONVERT_UTF;
5675   if ((pat_patctl.options & PCRE2_NO_UTF_CHECK) != 0)
5676     convert_options |= PCRE2_CONVERT_NO_UTF_CHECK;
5677 
5678   CONCTXCPY(con_context, default_con_context);
5679 
5680   if (pat_patctl.convert_glob_escape != 0)
5681     {
5682     uint32_t escape = (pat_patctl.convert_glob_escape == '0')? 0 :
5683       pat_patctl.convert_glob_escape;
5684     PCRE2_SET_GLOB_ESCAPE(rc, con_context, escape);
5685     if (rc != 0)
5686       {
5687       fprintf(outfile, "** Invalid glob escape '%c'\n",
5688         pat_patctl.convert_glob_escape);
5689       convert_return = PR_SKIP;
5690       goto CONVERT_FINISH;
5691       }
5692     }
5693 
5694   if (pat_patctl.convert_glob_separator != 0)
5695     {
5696     PCRE2_SET_GLOB_SEPARATOR(rc, con_context, pat_patctl.convert_glob_separator);
5697     if (rc != 0)
5698       {
5699       fprintf(outfile, "** Invalid glob separator '%c'\n",
5700         pat_patctl.convert_glob_separator);
5701       convert_return = PR_SKIP;
5702       goto CONVERT_FINISH;
5703       }
5704     }
5705 
5706   PCRE2_PATTERN_CONVERT(rc, pbuffer, patlen, convert_options,
5707     &converted_pattern, &converted_length, con_context);
5708 
5709   if (rc != 0)
5710     {
5711     fprintf(outfile, "** Pattern conversion error at offset %" SIZ_FORM ": ",
5712       converted_length);
5713     convert_return = print_error_message(rc, "", "\n")? PR_SKIP:PR_ABEND;
5714     }
5715 
5716   /* Output the converted pattern, then copy it. */
5717 
5718   else
5719     {
5720     PCHARSV(converted_pattern, 0, converted_length, utf, outfile);
5721     fprintf(outfile, "\n");
5722     patlen = converted_length;
5723     CONVERT_COPY(pbuffer, converted_pattern, converted_length + 1);
5724     }
5725 
5726   /* Free the converted pattern. */
5727 
5728   CONVERT_FINISH:
5729   if (pat_patctl.convert_length != 0)
5730     free(converted_pattern);
5731   else
5732     PCRE2_CONVERTED_PATTERN_FREE(converted_pattern);
5733 
5734   /* Return if conversion was unsuccessful. */
5735 
5736   if (convert_return != PR_OK) return convert_return;
5737   }
5738 
5739 /* By default we pass a zero-terminated pattern, but a length is passed if
5740 "use_length" was specified or this is a hex pattern (which might contain binary
5741 zeros). When valgrind is supported, arrange for the unused part of the buffer
5742 to be marked as no access. */
5743 
5744 valgrind_access_length = patlen;
5745 if ((pat_patctl.control & (CTL_HEXPAT|CTL_USE_LENGTH)) == 0)
5746   {
5747   patlen = PCRE2_ZERO_TERMINATED;
5748   valgrind_access_length += 1;  /* For the terminating zero */
5749   }
5750 
5751 #ifdef SUPPORT_VALGRIND
5752 #ifdef SUPPORT_PCRE2_8
5753 if (test_mode == PCRE8_MODE && pbuffer8 != NULL)
5754   {
5755   VALGRIND_MAKE_MEM_NOACCESS(pbuffer8 + valgrind_access_length,
5756     pbuffer8_size - valgrind_access_length);
5757   }
5758 #endif
5759 #ifdef SUPPORT_PCRE2_16
5760 if (test_mode == PCRE16_MODE && pbuffer16 != NULL)
5761   {
5762   VALGRIND_MAKE_MEM_NOACCESS(pbuffer16 + valgrind_access_length,
5763     pbuffer16_size - valgrind_access_length*sizeof(uint16_t));
5764   }
5765 #endif
5766 #ifdef SUPPORT_PCRE2_32
5767 if (test_mode == PCRE32_MODE && pbuffer32 != NULL)
5768   {
5769   VALGRIND_MAKE_MEM_NOACCESS(pbuffer32 + valgrind_access_length,
5770     pbuffer32_size - valgrind_access_length*sizeof(uint32_t));
5771   }
5772 #endif
5773 #else  /* Valgrind not supported */
5774 (void)valgrind_access_length;  /* Avoid compiler warning */
5775 #endif
5776 
5777 /* If #newline_default has been used and the library was not compiled with an
5778 appropriate default newline setting, local_newline_default will be non-zero. We
5779 use this if there is no explicit newline modifier. */
5780 
5781 if ((pat_patctl.control2 & CTL2_NL_SET) == 0 && local_newline_default != 0)
5782   {
5783   SETFLD(pat_context, newline_convention, local_newline_default);
5784   }
5785 
5786 /* The null_context modifier is used to test calling pcre2_compile() with a
5787 NULL context. */
5788 
5789 use_pat_context = ((pat_patctl.control & CTL_NULLCONTEXT) != 0)?
5790   NULL : PTR(pat_context);
5791 
5792 /* If PCRE2_LITERAL is set, set use_forbid_utf zero because PCRE2_NEVER_UTF
5793 and PCRE2_NEVER_UCP are invalid with it. */
5794 
5795 if ((pat_patctl.options & PCRE2_LITERAL) != 0) use_forbid_utf = 0;
5796 
5797 /* Compile many times when timing. */
5798 
5799 if (timeit > 0)
5800   {
5801   int i;
5802   clock_t time_taken = 0;
5803   for (i = 0; i < timeit; i++)
5804     {
5805     clock_t start_time = clock();
5806     PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5807       pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
5808         use_pat_context);
5809     time_taken += clock() - start_time;
5810     if (TEST(compiled_code, !=, NULL))
5811       { SUB1(pcre2_code_free, compiled_code); }
5812     }
5813   total_compile_time += time_taken;
5814   fprintf(outfile, "Compile time %.4f milliseconds\n",
5815     (((double)time_taken * 1000.0) / (double)timeit) /
5816       (double)CLOCKS_PER_SEC);
5817   }
5818 
5819 /* A final compile that is used "for real". */
5820 
5821 PCRE2_COMPILE(compiled_code, pbuffer, patlen, pat_patctl.options|use_forbid_utf,
5822   &errorcode, &erroroffset, use_pat_context);
5823 
5824 /* Call the JIT compiler if requested. When timing, we must free and recompile
5825 the pattern each time because that is the only way to free the JIT compiled
5826 code. We know that compilation will always succeed. */
5827 
5828 if (TEST(compiled_code, !=, NULL) && pat_patctl.jit != 0)
5829   {
5830   if (timeit > 0)
5831     {
5832     int i;
5833     clock_t time_taken = 0;
5834 
5835     for (i = 0; i < timeit; i++)
5836       {
5837       clock_t start_time;
5838       SUB1(pcre2_code_free, compiled_code);
5839       PCRE2_COMPILE(compiled_code, pbuffer, patlen,
5840         pat_patctl.options|use_forbid_utf, &errorcode, &erroroffset,
5841         use_pat_context);
5842       start_time = clock();
5843       PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
5844       time_taken += clock() - start_time;
5845       }
5846     total_jit_compile_time += time_taken;
5847     fprintf(outfile, "JIT compile  %.4f milliseconds\n",
5848       (((double)time_taken * 1000.0) / (double)timeit) /
5849         (double)CLOCKS_PER_SEC);
5850     }
5851   else
5852     {
5853     PCRE2_JIT_COMPILE(jitrc, compiled_code, pat_patctl.jit);
5854     }
5855   }
5856 
5857 /* If valgrind is supported, mark the pbuffer as accessible again. The 16-bit
5858 and 32-bit buffers can be marked completely undefined, but we must leave the
5859 pattern in the 8-bit buffer defined because it may be read from a callout
5860 during matching. */
5861 
5862 #ifdef SUPPORT_VALGRIND
5863 #ifdef SUPPORT_PCRE2_8
5864 if (test_mode == PCRE8_MODE)
5865   {
5866   VALGRIND_MAKE_MEM_UNDEFINED(pbuffer8 + valgrind_access_length,
5867     pbuffer8_size - valgrind_access_length);
5868   }
5869 #endif
5870 #ifdef SUPPORT_PCRE2_16
5871 if (test_mode == PCRE16_MODE)
5872   {
5873   VALGRIND_MAKE_MEM_UNDEFINED(pbuffer16, pbuffer16_size);
5874   }
5875 #endif
5876 #ifdef SUPPORT_PCRE2_32
5877 if (test_mode == PCRE32_MODE)
5878   {
5879   VALGRIND_MAKE_MEM_UNDEFINED(pbuffer32, pbuffer32_size);
5880   }
5881 #endif
5882 #endif
5883 
5884 /* Compilation failed; go back for another re, skipping to blank line
5885 if non-interactive. */
5886 
5887 if (TEST(compiled_code, ==, NULL))
5888   {
5889   fprintf(outfile, "Failed: error %d at offset %d: ", errorcode,
5890     (int)erroroffset);
5891   if (!print_error_message(errorcode, "", "\n")) return PR_ABEND;
5892   return PR_SKIP;
5893   }
5894 
5895 /* If forbid_utf is non-zero, we are running a non-UTF test. UTF and UCP are
5896 locked out at compile time, but we must also check for occurrences of \P, \p,
5897 and \X, which are only supported when Unicode is supported. */
5898 
5899 if (forbid_utf != 0)
5900   {
5901   if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0)
5902     {
5903     fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the "
5904       "#forbid_utf command\n");
5905     return PR_SKIP;
5906     }
5907   }
5908 
5909 /* Remember the maximum lookbehind, for partial matching. */
5910 
5911 if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
5912   return PR_ABEND;
5913 
5914 /* Remember the number of captures. */
5915 
5916 if (pattern_info(PCRE2_INFO_CAPTURECOUNT, &maxcapcount, FALSE) < 0)
5917   return PR_ABEND;
5918 
5919 /* If an explicit newline modifier was given, set the information flag in the
5920 pattern so that it is preserved over push/pop. */
5921 
5922 if ((pat_patctl.control2 & CTL2_NL_SET) != 0)
5923   {
5924   SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET);
5925   }
5926 
5927 /* Output code size and other information if requested. */
5928 
5929 if ((pat_patctl.control & CTL_MEMORY) != 0) show_memory_info();
5930 if ((pat_patctl.control & CTL_FRAMESIZE) != 0) show_framesize();
5931 if ((pat_patctl.control & CTL_ANYINFO) != 0)
5932   {
5933   int rc = show_pattern_info();
5934   if (rc != PR_OK) return rc;
5935   }
5936 
5937 /* The "push" control requests that the compiled pattern be remembered on a
5938 stack. This is mainly for testing the serialization functionality. */
5939 
5940 if ((pat_patctl.control & CTL_PUSH) != 0)
5941   {
5942   if (patstacknext >= PATSTACKSIZE)
5943     {
5944     fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5945     return PR_ABEND;
5946     }
5947   patstack[patstacknext++] = PTR(compiled_code);
5948   SET(compiled_code, NULL);
5949   }
5950 
5951 /* The "pushcopy" and "pushtablescopy" controls are similar, but push a
5952 copy of the pattern, the latter with a copy of its character tables. This tests
5953 the pcre2_code_copy() and pcre2_code_copy_with_tables() functions. */
5954 
5955 if ((pat_patctl.control & (CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
5956   {
5957   if (patstacknext >= PATSTACKSIZE)
5958     {
5959     fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
5960     return PR_ABEND;
5961     }
5962   if ((pat_patctl.control & CTL_PUSHCOPY) != 0)
5963     {
5964     PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code);
5965     }
5966   else
5967     {
5968     PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(patstack[patstacknext++],
5969       compiled_code); }
5970   }
5971 
5972 return PR_OK;
5973 }
5974 
5975 
5976 
5977 /*************************************************
5978 *          Check heap, match or depth limit      *
5979 *************************************************/
5980 
5981 /* This is used for DFA, normal, and JIT fast matching. For DFA matching it
5982 should only be called with the third argument set to PCRE2_ERROR_DEPTHLIMIT.
5983 
5984 Arguments:
5985   pp        the subject string
5986   ulen      length of subject or PCRE2_ZERO_TERMINATED
5987   errnumber defines which limit to test
5988   msg       string to include in final message
5989 
5990 Returns:    the return from the final match function call
5991 */
5992 
5993 static int
check_match_limit(uint8_t * pp,PCRE2_SIZE ulen,int errnumber,const char * msg)5994 check_match_limit(uint8_t *pp, PCRE2_SIZE ulen, int errnumber, const char *msg)
5995 {
5996 int capcount;
5997 uint32_t min = 0;
5998 uint32_t mid = 64;
5999 uint32_t max = UINT32_MAX;
6000 
6001 PCRE2_SET_MATCH_LIMIT(dat_context, max);
6002 PCRE2_SET_DEPTH_LIMIT(dat_context, max);
6003 PCRE2_SET_HEAP_LIMIT(dat_context, max);
6004 
6005 for (;;)
6006   {
6007   uint32_t stack_start = 0;
6008 
6009   if (errnumber == PCRE2_ERROR_HEAPLIMIT)
6010     {
6011     PCRE2_SET_HEAP_LIMIT(dat_context, mid);
6012     }
6013   else if (errnumber == PCRE2_ERROR_MATCHLIMIT)
6014     {
6015     PCRE2_SET_MATCH_LIMIT(dat_context, mid);
6016     }
6017   else
6018     {
6019     PCRE2_SET_DEPTH_LIMIT(dat_context, mid);
6020     }
6021 
6022   if ((dat_datctl.control & CTL_DFA) != 0)
6023     {
6024     stack_start = DFA_START_RWS_SIZE/1024;
6025     if (dfa_workspace == NULL)
6026       dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
6027     if (dfa_matched++ == 0)
6028       dfa_workspace[0] = -1;  /* To catch bad restart */
6029     PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6030       dat_datctl.options, match_data,
6031       PTR(dat_context), dfa_workspace, DFA_WS_DIMENSION);
6032     }
6033 
6034   else if ((pat_patctl.control & CTL_JITFAST) != 0)
6035     PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6036       dat_datctl.options, match_data, PTR(dat_context));
6037 
6038   else
6039     {
6040     stack_start = START_FRAMES_SIZE/1024;
6041     PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
6042       dat_datctl.options, match_data, PTR(dat_context));
6043     }
6044 
6045   if (capcount == errnumber)
6046     {
6047     if ((mid & 0x80000000u) != 0)
6048       {
6049       fprintf(outfile, "Can't find minimum %s limit: check pattern for "
6050         "restriction\n", msg);
6051       break;
6052       }
6053 
6054     min = mid;
6055     mid = (mid == max - 1)? max : (max != UINT32_MAX)? (min + max)/2 : mid*2;
6056     }
6057   else if (capcount >= 0 ||
6058            capcount == PCRE2_ERROR_NOMATCH ||
6059            capcount == PCRE2_ERROR_PARTIAL)
6060     {
6061     /* If we've not hit the error with a heap limit less than the size of the
6062     initial stack frame vector (for pcre2_match()) or the initial stack
6063     workspace vector (for pcre2_dfa_match()), the heap is not being used, so
6064     the minimum limit is zero; there's no need to go on. The other limits are
6065     always greater than zero. */
6066 
6067     if (errnumber == PCRE2_ERROR_HEAPLIMIT && mid < stack_start)
6068       {
6069       fprintf(outfile, "Minimum %s limit = 0\n", msg);
6070       break;
6071       }
6072     if (mid == min + 1)
6073       {
6074       fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
6075       break;
6076       }
6077     max = mid;
6078     mid = (min + max)/2;
6079     }
6080   else break;    /* Some other error */
6081   }
6082 
6083 return capcount;
6084 }
6085 
6086 
6087 
6088 /*************************************************
6089 *        Substitute callout function             *
6090 *************************************************/
6091 
6092 /* Called from pcre2_substitute() when the substitute_callout modifier is set.
6093 Print out the data that is passed back. The substitute callout block is
6094 identical for all code unit widths, so we just pick one.
6095 
6096 Arguments:
6097   scb         pointer to substitute callout block
6098   data_ptr    callout data
6099 
6100 Returns:      nothing
6101 */
6102 
6103 static int
substitute_callout_function(pcre2_substitute_callout_block_8 * scb,void * data_ptr)6104 substitute_callout_function(pcre2_substitute_callout_block_8 *scb,
6105   void *data_ptr)
6106 {
6107 int yield = 0;
6108 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6109 (void)data_ptr;   /* Not used */
6110 
6111 fprintf(outfile, "%2d(%d) Old %" SIZ_FORM " %" SIZ_FORM " \"",
6112   scb->subscount, scb->oveccount,
6113   scb->ovector[0], scb->ovector[1]);
6114 
6115 PCHARSV(scb->input, scb->ovector[0], scb->ovector[1] - scb->ovector[0],
6116   utf, outfile);
6117 
6118 fprintf(outfile, "\" New %" SIZ_FORM " %" SIZ_FORM " \"",
6119   scb->output_offsets[0], scb->output_offsets[1]);
6120 
6121 PCHARSV(scb->output, scb->output_offsets[0],
6122   scb->output_offsets[1] - scb->output_offsets[0], utf, outfile);
6123 
6124 if (scb->subscount == dat_datctl.substitute_stop)
6125   {
6126   yield = -1;
6127   fprintf(outfile, " STOPPED");
6128   }
6129 else if (scb->subscount == dat_datctl.substitute_skip)
6130   {
6131   yield = +1;
6132   fprintf(outfile, " SKIPPED");
6133   }
6134 
6135 fprintf(outfile, "\"\n");
6136 return yield;
6137 }
6138 
6139 
6140 /*************************************************
6141 *              Callout function                  *
6142 *************************************************/
6143 
6144 /* Called from a PCRE2 library as a result of the (?C) item. We print out where
6145 we are in the match (unless suppressed). Yield zero unless more callouts than
6146 the fail count, or the callout data is not zero. The only differences in the
6147 callout block for different code unit widths are that the pointers to the
6148 subject, the most recent MARK, and a callout argument string point to strings
6149 of the appropriate width. Casts can be used to deal with this.
6150 
6151 Arguments:
6152   cb                a pointer to a callout block
6153   callout_data_ptr  the provided callout data
6154 
6155 Returns:            0 or 1 or an error, as determined by settings
6156 */
6157 
6158 static int
callout_function(pcre2_callout_block_8 * cb,void * callout_data_ptr)6159 callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr)
6160 {
6161 FILE *f, *fdefault;
6162 uint32_t i, pre_start, post_start, subject_length;
6163 PCRE2_SIZE current_position;
6164 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6165 BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
6166 BOOL callout_where = (dat_datctl.control2 & CTL2_CALLOUT_NO_WHERE) == 0;
6167 
6168 /* The FILE f is used for echoing the subject string if it is non-NULL. This
6169 happens only once in simple cases, but we want to repeat after any additional
6170 output caused by CALLOUT_EXTRA. */
6171 
6172 fdefault = (!first_callout && !callout_capture && cb->callout_string == NULL)?
6173   NULL : outfile;
6174 
6175 if ((dat_datctl.control2 & CTL2_CALLOUT_EXTRA) != 0)
6176   {
6177   f = outfile;
6178   switch (cb->callout_flags)
6179     {
6180     case PCRE2_CALLOUT_BACKTRACK:
6181     fprintf(f, "Backtrack\n");
6182     break;
6183 
6184     case PCRE2_CALLOUT_STARTMATCH|PCRE2_CALLOUT_BACKTRACK:
6185     fprintf(f, "Backtrack\nNo other matching paths\n");
6186     /* Fall through */
6187 
6188     case PCRE2_CALLOUT_STARTMATCH:
6189     fprintf(f, "New match attempt\n");
6190     break;
6191 
6192     default:
6193     f = fdefault;
6194     break;
6195     }
6196   }
6197 else f = fdefault;
6198 
6199 /* For a callout with a string argument, show the string first because there
6200 isn't a tidy way to fit it in the rest of the data. */
6201 
6202 if (cb->callout_string != NULL)
6203   {
6204   uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
6205   fprintf(outfile, "Callout (%" SIZ_FORM "): %c",
6206     cb->callout_string_offset, delimiter);
6207   PCHARSV(cb->callout_string, 0,
6208     cb->callout_string_length, utf, outfile);
6209   for (i = 0; callout_start_delims[i] != 0; i++)
6210     if (delimiter == callout_start_delims[i])
6211       {
6212       delimiter = callout_end_delims[i];
6213       break;
6214       }
6215   fprintf(outfile, "%c", delimiter);
6216   if (!callout_capture) fprintf(outfile, "\n");
6217   }
6218 
6219 /* Show captured strings if required */
6220 
6221 if (callout_capture)
6222   {
6223   if (cb->callout_string == NULL)
6224     fprintf(outfile, "Callout %d:", cb->callout_number);
6225   fprintf(outfile, " last capture = %d\n", cb->capture_last);
6226   for (i = 2; i < cb->capture_top * 2; i += 2)
6227     {
6228     fprintf(outfile, "%2d: ", i/2);
6229     if (cb->offset_vector[i] == PCRE2_UNSET)
6230       fprintf(outfile, "<unset>");
6231     else
6232       {
6233       PCHARSV(cb->subject, cb->offset_vector[i],
6234         cb->offset_vector[i+1] - cb->offset_vector[i], utf, f);
6235       }
6236     fprintf(outfile, "\n");
6237     }
6238   }
6239 
6240 /* Unless suppressed, re-print the subject in canonical form (with escapes for
6241 non-printing characters), the first time, or if giving full details. On
6242 subsequent calls in the same match, we use PCHARS() just to find the printed
6243 lengths of the substrings. */
6244 
6245 if (callout_where)
6246   {
6247   if (f != NULL) fprintf(f, "--->");
6248 
6249   /* The subject before the match start. */
6250 
6251   PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f);
6252 
6253   /* If a lookbehind is involved, the current position may be earlier than the
6254   match start. If so, use the match start instead. */
6255 
6256   current_position = (cb->current_position >= cb->start_match)?
6257     cb->current_position : cb->start_match;
6258 
6259   /* The subject between the match start and the current position. */
6260 
6261   PCHARS(post_start, cb->subject, cb->start_match,
6262     current_position - cb->start_match, utf, f);
6263 
6264   /* Print from the current position to the end. */
6265 
6266   PCHARSV(cb->subject, current_position, cb->subject_length - current_position,
6267     utf, f);
6268 
6269   /* Calculate the total subject printed length (no print). */
6270 
6271   PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL);
6272 
6273   if (f != NULL) fprintf(f, "\n");
6274 
6275   /* For automatic callouts, show the pattern offset. Otherwise, for a
6276   numerical callout whose number has not already been shown with captured
6277   strings, show the number here. A callout with a string argument has been
6278   displayed above. */
6279 
6280   if (cb->callout_number == 255)
6281     {
6282     fprintf(outfile, "%+3d ", (int)cb->pattern_position);
6283     if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
6284     }
6285   else
6286     {
6287     if (callout_capture || cb->callout_string != NULL) fprintf(outfile, "    ");
6288       else fprintf(outfile, "%3d ", cb->callout_number);
6289     }
6290 
6291   /* Now show position indicators */
6292 
6293   for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
6294   fprintf(outfile, "^");
6295 
6296   if (post_start > 0)
6297     {
6298     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
6299     fprintf(outfile, "^");
6300     }
6301 
6302   for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
6303     fprintf(outfile, " ");
6304 
6305   if (cb->next_item_length != 0)
6306     fprintf(outfile, "%.*s", (int)(cb->next_item_length),
6307       pbuffer8 + cb->pattern_position);
6308   else
6309     fprintf(outfile, "End of pattern");
6310 
6311   fprintf(outfile, "\n");
6312   }
6313 
6314 first_callout = FALSE;
6315 
6316 /* Show any mark info */
6317 
6318 if (cb->mark != last_callout_mark)
6319   {
6320   if (cb->mark == NULL)
6321     fprintf(outfile, "Latest Mark: <unset>\n");
6322   else
6323     {
6324     fprintf(outfile, "Latest Mark: ");
6325     PCHARSV(cb->mark, -1, -1, utf, outfile);
6326     putc('\n', outfile);
6327     }
6328   last_callout_mark = cb->mark;
6329   }
6330 
6331 /* Show callout data */
6332 
6333 if (callout_data_ptr != NULL)
6334   {
6335   int callout_data = *((int32_t *)callout_data_ptr);
6336   if (callout_data != 0)
6337     {
6338     fprintf(outfile, "Callout data = %d\n", callout_data);
6339     return callout_data;
6340     }
6341   }
6342 
6343 /* Keep count and give the appropriate return code */
6344 
6345 callout_count++;
6346 
6347 if (cb->callout_number == dat_datctl.cerror[0] &&
6348     callout_count >= dat_datctl.cerror[1])
6349   return PCRE2_ERROR_CALLOUT;
6350 
6351 if (cb->callout_number == dat_datctl.cfail[0] &&
6352     callout_count >= dat_datctl.cfail[1])
6353   return 1;
6354 
6355 return 0;
6356 }
6357 
6358 
6359 
6360 /*************************************************
6361 *       Handle *MARK and copy/get tests          *
6362 *************************************************/
6363 
6364 /* This function is called after complete and partial matches. It runs the
6365 tests for substring extraction.
6366 
6367 Arguments:
6368   utf       TRUE for utf
6369   capcount  return from pcre2_match()
6370 
6371 Returns:    FALSE if print_error_message() fails
6372 */
6373 
6374 static BOOL
copy_and_get(BOOL utf,int capcount)6375 copy_and_get(BOOL utf, int capcount)
6376 {
6377 int i;
6378 uint8_t *nptr;
6379 
6380 /* Test copy strings by number */
6381 
6382 for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++)
6383   {
6384   int rc;
6385   PCRE2_SIZE length, length2;
6386   uint32_t copybuffer[256];
6387   uint32_t n = (uint32_t)(dat_datctl.copy_numbers[i]);
6388   length = sizeof(copybuffer)/code_unit_size;
6389   PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer, &length);
6390   if (rc < 0)
6391     {
6392     fprintf(outfile, "Copy substring %d failed (%d): ", n, rc);
6393     if (!print_error_message(rc, "", "\n")) return FALSE;
6394     }
6395   else
6396     {
6397     PCRE2_SUBSTRING_LENGTH_BYNUMBER(rc, match_data, n, &length2);
6398     if (rc < 0)
6399       {
6400       fprintf(outfile, "Get substring %d length failed (%d): ", n, rc);
6401       if (!print_error_message(rc, "", "\n")) return FALSE;
6402       }
6403     else if (length2 != length)
6404       {
6405       fprintf(outfile, "Mismatched substring lengths: %"
6406         SIZ_FORM " %" SIZ_FORM "\n", length, length2);
6407       }
6408     fprintf(outfile, "%2dC ", n);
6409     PCHARSV(copybuffer, 0, length, utf, outfile);
6410     fprintf(outfile, " (%" SIZ_FORM ")\n", length);
6411     }
6412   }
6413 
6414 /* Test copy strings by name */
6415 
6416 nptr = dat_datctl.copy_names;
6417 for (;;)
6418   {
6419   int rc;
6420   int groupnumber;
6421   PCRE2_SIZE length, length2;
6422   uint32_t copybuffer[256];
6423   int namelen = strlen((const char *)nptr);
6424 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6425   PCRE2_SIZE cnl = namelen;
6426 #endif
6427   if (namelen == 0) break;
6428 
6429 #ifdef SUPPORT_PCRE2_8
6430   if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6431 #endif
6432 #ifdef SUPPORT_PCRE2_16
6433   if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6434 #endif
6435 #ifdef SUPPORT_PCRE2_32
6436   if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6437 #endif
6438 
6439   PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6440   if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6441     fprintf(outfile, "Number not found for group '%s'\n", nptr);
6442 
6443   length = sizeof(copybuffer)/code_unit_size;
6444   PCRE2_SUBSTRING_COPY_BYNAME(rc, match_data, pbuffer, copybuffer, &length);
6445   if (rc < 0)
6446     {
6447     fprintf(outfile, "Copy substring '%s' failed (%d): ", nptr, rc);
6448     if (!print_error_message(rc, "", "\n")) return FALSE;
6449     }
6450   else
6451     {
6452     PCRE2_SUBSTRING_LENGTH_BYNAME(rc, match_data, pbuffer, &length2);
6453     if (rc < 0)
6454       {
6455       fprintf(outfile, "Get substring '%s' length failed (%d): ", nptr, rc);
6456       if (!print_error_message(rc, "", "\n")) return FALSE;
6457       }
6458     else if (length2 != length)
6459       {
6460       fprintf(outfile, "Mismatched substring lengths: %"
6461         SIZ_FORM " %" SIZ_FORM "\n", length, length2);
6462       }
6463     fprintf(outfile, "  C ");
6464     PCHARSV(copybuffer, 0, length, utf, outfile);
6465     fprintf(outfile, " (%" SIZ_FORM ") %s", length, nptr);
6466     if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6467       else fprintf(outfile, " (non-unique)\n");
6468     }
6469   nptr += namelen + 1;
6470   }
6471 
6472 /* Test get strings by number */
6473 
6474 for (i = 0; i < MAXCPYGET && dat_datctl.get_numbers[i] >= 0; i++)
6475   {
6476   int rc;
6477   PCRE2_SIZE length;
6478   void *gotbuffer;
6479   uint32_t n = (uint32_t)(dat_datctl.get_numbers[i]);
6480   PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer, &length);
6481   if (rc < 0)
6482     {
6483     fprintf(outfile, "Get substring %d failed (%d): ", n, rc);
6484     if (!print_error_message(rc, "", "\n")) return FALSE;
6485     }
6486   else
6487     {
6488     fprintf(outfile, "%2dG ", n);
6489     PCHARSV(gotbuffer, 0, length, utf, outfile);
6490     fprintf(outfile, " (%" SIZ_FORM ")\n", length);
6491     PCRE2_SUBSTRING_FREE(gotbuffer);
6492     }
6493   }
6494 
6495 /* Test get strings by name */
6496 
6497 nptr = dat_datctl.get_names;
6498 for (;;)
6499   {
6500   PCRE2_SIZE length;
6501   void *gotbuffer;
6502   int rc;
6503   int groupnumber;
6504   int namelen = strlen((const char *)nptr);
6505 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
6506   PCRE2_SIZE cnl = namelen;
6507 #endif
6508   if (namelen == 0) break;
6509 
6510 #ifdef SUPPORT_PCRE2_8
6511   if (test_mode == PCRE8_MODE) strcpy((char *)pbuffer8, (char *)nptr);
6512 #endif
6513 #ifdef SUPPORT_PCRE2_16
6514   if (test_mode == PCRE16_MODE)(void)to16(nptr, utf, &cnl);
6515 #endif
6516 #ifdef SUPPORT_PCRE2_32
6517   if (test_mode == PCRE32_MODE)(void)to32(nptr, utf, &cnl);
6518 #endif
6519 
6520   PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer);
6521   if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING)
6522     fprintf(outfile, "Number not found for group '%s'\n", nptr);
6523 
6524   PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer, &length);
6525   if (rc < 0)
6526     {
6527     fprintf(outfile, "Get substring '%s' failed (%d): ", nptr, rc);
6528     if (!print_error_message(rc, "", "\n")) return FALSE;
6529     }
6530   else
6531     {
6532     fprintf(outfile, "  G ");
6533     PCHARSV(gotbuffer, 0, length, utf, outfile);
6534     fprintf(outfile, " (%" SIZ_FORM ") %s", length, nptr);
6535     if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber);
6536       else fprintf(outfile, " (non-unique)\n");
6537     PCRE2_SUBSTRING_FREE(gotbuffer);
6538     }
6539   nptr += namelen + 1;
6540   }
6541 
6542 /* Test getting the complete list of captured strings. */
6543 
6544 if ((dat_datctl.control & CTL_GETALL) != 0)
6545   {
6546   int rc;
6547   void **stringlist;
6548   PCRE2_SIZE *lengths;
6549   PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths);
6550   if (rc < 0)
6551     {
6552     fprintf(outfile, "get substring list failed (%d): ", rc);
6553     if (!print_error_message(rc, "", "\n")) return FALSE;
6554     }
6555   else
6556     {
6557     for (i = 0; i < capcount; i++)
6558       {
6559       fprintf(outfile, "%2dL ", i);
6560       PCHARSV(stringlist[i], 0, lengths[i], utf, outfile);
6561       putc('\n', outfile);
6562       }
6563     if (stringlist[i] != NULL)
6564       fprintf(outfile, "string list not terminated by NULL\n");
6565     PCRE2_SUBSTRING_LIST_FREE(stringlist);
6566     }
6567   }
6568 
6569 return TRUE;
6570 }
6571 
6572 
6573 
6574 /*************************************************
6575 *            Show an entire ovector              *
6576 *************************************************/
6577 
6578 /* This function is called after partial matching or match failure, when the
6579 "allvector" modifier is set. It is a means of checking the contents of the
6580 entire ovector, to ensure no modification of fields that should be unchanged.
6581 
6582 Arguments:
6583   ovector      points to the ovector
6584   oveccount    number of pairs
6585 
6586 Returns:       nothing
6587 */
6588 
6589 static void
show_ovector(PCRE2_SIZE * ovector,uint32_t oveccount)6590 show_ovector(PCRE2_SIZE *ovector, uint32_t oveccount)
6591 {
6592 uint32_t i;
6593 for (i = 0; i < 2*oveccount; i += 2)
6594   {
6595   PCRE2_SIZE start = ovector[i];
6596   PCRE2_SIZE end = ovector[i+1];
6597 
6598   fprintf(outfile, "%2d: ", i/2);
6599   if (start == PCRE2_UNSET && end == PCRE2_UNSET)
6600     fprintf(outfile, "<unset>\n");
6601   else if (start == JUNK_OFFSET && end == JUNK_OFFSET)
6602     fprintf(outfile, "<unchanged>\n");
6603   else
6604     fprintf(outfile, "%ld %ld\n", (unsigned long int)start,
6605       (unsigned long int)end);
6606   }
6607 }
6608 
6609 
6610 /*************************************************
6611 *               Process a data line              *
6612 *************************************************/
6613 
6614 /* The line is in buffer; it will not be empty.
6615 
6616 Arguments:  none
6617 
6618 Returns:    PR_OK     continue processing next line
6619             PR_SKIP   skip to a blank line
6620             PR_ABEND  abort the pcre2test run
6621 */
6622 
6623 static int
process_data(void)6624 process_data(void)
6625 {
6626 PCRE2_SIZE len, ulen, arg_ulen;
6627 uint32_t gmatched;
6628 uint32_t c, k;
6629 uint32_t g_notempty = 0;
6630 uint8_t *p, *pp, *start_rep;
6631 size_t needlen;
6632 void *use_dat_context;
6633 BOOL utf;
6634 BOOL subject_literal;
6635 
6636 PCRE2_SIZE *ovector;
6637 PCRE2_SIZE ovecsave[3];
6638 uint32_t oveccount;
6639 
6640 #ifdef SUPPORT_PCRE2_8
6641 uint8_t *q8 = NULL;
6642 #endif
6643 #ifdef SUPPORT_PCRE2_16
6644 uint16_t *q16 = NULL;
6645 #endif
6646 #ifdef SUPPORT_PCRE2_32
6647 uint32_t *q32 = NULL;
6648 #endif
6649 
6650 subject_literal = (pat_patctl.control2 & CTL2_SUBJECT_LITERAL) != 0;
6651 
6652 /* Copy the default context and data control blocks to the active ones. Then
6653 copy from the pattern the controls that can be set in either the pattern or the
6654 data. This allows them to be overridden in the data line. We do not do this for
6655 options because those that are common apply separately to compiling and
6656 matching. */
6657 
6658 DATCTXCPY(dat_context, default_dat_context);
6659 memcpy(&dat_datctl, &def_datctl, sizeof(datctl));
6660 dat_datctl.control |= (pat_patctl.control & CTL_ALLPD);
6661 dat_datctl.control2 |= (pat_patctl.control2 & CTL2_ALLPD);
6662 strcpy((char *)dat_datctl.replacement, (char *)pat_patctl.replacement);
6663 if (dat_datctl.jitstack == 0) dat_datctl.jitstack = pat_patctl.jitstack;
6664 
6665 if (dat_datctl.substitute_skip == 0)
6666     dat_datctl.substitute_skip = pat_patctl.substitute_skip;
6667 if (dat_datctl.substitute_stop == 0)
6668     dat_datctl.substitute_stop = pat_patctl.substitute_stop;
6669 
6670 /* Initialize for scanning the data line. */
6671 
6672 #ifdef SUPPORT_PCRE2_8
6673 utf = ((((pat_patctl.control & CTL_POSIX) != 0)?
6674   ((pcre2_real_code_8 *)preg.re_pcre2_code)->overall_options :
6675   FLD(compiled_code, overall_options)) & PCRE2_UTF) != 0;
6676 #else
6677 utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
6678 #endif
6679 
6680 start_rep = NULL;
6681 len = strlen((const char *)buffer);
6682 while (len > 0 && isspace(buffer[len-1])) len--;
6683 buffer[len] = 0;
6684 p = buffer;
6685 while (isspace(*p)) p++;
6686 
6687 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
6688 invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */
6689 
6690 if (utf)
6691   {
6692   uint8_t *q;
6693   uint32_t cc;
6694   int n = 1;
6695   for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
6696   if (n <= 0)
6697     {
6698     fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input "
6699       "in UTF mode\n");
6700     return PR_OK;
6701     }
6702   }
6703 
6704 #ifdef SUPPORT_VALGRIND
6705 /* Mark the dbuffer as addressable but undefined again. */
6706 if (dbuffer != NULL)
6707   {
6708   VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size);
6709   }
6710 #endif
6711 
6712 /* Allocate a buffer to hold the data line; len+1 is an upper bound on
6713 the number of code units that will be needed (though the buffer may have to be
6714 extended if replication is involved). */
6715 
6716 needlen = (size_t)((len+1) * code_unit_size);
6717 if (dbuffer == NULL || needlen >= dbuffer_size)
6718   {
6719   while (needlen >= dbuffer_size) dbuffer_size *= 2;
6720   dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
6721   if (dbuffer == NULL)
6722     {
6723     fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
6724     exit(1);
6725     }
6726   }
6727 SETCASTPTR(q, dbuffer);  /* Sets q8, q16, or q32, as appropriate. */
6728 
6729 /* Scan the data line, interpreting data escapes, and put the result into a
6730 buffer of the appropriate width. In UTF mode, input is always UTF-8; otherwise,
6731 in 16- and 32-bit modes, it can be forced to UTF-8 by the utf8_input modifier.
6732 */
6733 
6734 while ((c = *p++) != 0)
6735   {
6736   int32_t i = 0;
6737   size_t replen;
6738 
6739   /* ] may mark the end of a replicated sequence */
6740 
6741   if (c == ']' && start_rep != NULL)
6742     {
6743     long li;
6744     char *endptr;
6745     size_t qoffset = CAST8VAR(q) - dbuffer;
6746     size_t rep_offset = start_rep - dbuffer;
6747 
6748     if (*p++ != '{')
6749       {
6750       fprintf(outfile, "** Expected '{' after \\[....]\n");
6751       return PR_OK;
6752       }
6753 
6754     li = strtol((const char *)p, &endptr, 10);
6755     if (S32OVERFLOW(li))
6756       {
6757       fprintf(outfile, "** Repeat count too large\n");
6758       return PR_OK;
6759       }
6760 
6761     p = (uint8_t *)endptr;
6762     if (*p++ != '}')
6763       {
6764       fprintf(outfile, "** Expected '}' after \\[...]{...\n");
6765       return PR_OK;
6766       }
6767 
6768     i = (int32_t)li;
6769     if (i-- == 0)
6770       {
6771       fprintf(outfile, "** Zero repeat not allowed\n");
6772       return PR_OK;
6773       }
6774 
6775     replen = CAST8VAR(q) - start_rep;
6776     needlen += replen * i;
6777 
6778     if (needlen >= dbuffer_size)
6779       {
6780       while (needlen >= dbuffer_size) dbuffer_size *= 2;
6781       dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size);
6782       if (dbuffer == NULL)
6783         {
6784         fprintf(stderr, "pcre2test: realloc(%d) failed\n", (int)dbuffer_size);
6785         exit(1);
6786         }
6787       SETCASTPTR(q, dbuffer + qoffset);
6788       start_rep = dbuffer + rep_offset;
6789       }
6790 
6791     while (i-- > 0)
6792       {
6793       memcpy(CAST8VAR(q), start_rep, replen);
6794       SETPLUS(q, replen/code_unit_size);
6795       }
6796 
6797     start_rep = NULL;
6798     continue;
6799     }
6800 
6801   /* Handle a non-escaped character. In non-UTF 32-bit mode with utf8_input
6802   set, do the fudge for setting the top bit. */
6803 
6804   if (c != '\\' || subject_literal)
6805     {
6806     uint32_t topbit = 0;
6807     if (test_mode == PCRE32_MODE && c == 0xff && *p != 0)
6808       {
6809       topbit = 0x80000000;
6810       c = *p++;
6811       }
6812     if ((utf || (pat_patctl.control & CTL_UTF8_INPUT) != 0) &&
6813       HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
6814     c |= topbit;
6815     }
6816 
6817   /* Handle backslash escapes */
6818 
6819   else switch ((c = *p++))
6820     {
6821     case '\\': break;
6822     case 'a': c = CHAR_BEL; break;
6823     case 'b': c = '\b'; break;
6824     case 'e': c = CHAR_ESC; break;
6825     case 'f': c = '\f'; break;
6826     case 'n': c = '\n'; break;
6827     case 'r': c = '\r'; break;
6828     case 't': c = '\t'; break;
6829     case 'v': c = '\v'; break;
6830 
6831     case '0': case '1': case '2': case '3':
6832     case '4': case '5': case '6': case '7':
6833     c -= '0';
6834     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
6835       c = c * 8 + *p++ - '0';
6836     break;
6837 
6838     case 'o':
6839     if (*p == '{')
6840       {
6841       uint8_t *pt = p;
6842       c = 0;
6843       for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
6844         {
6845         if (++i == 12)
6846           fprintf(outfile, "** Too many octal digits in \\o{...} item; "
6847                            "using only the first twelve.\n");
6848         else c = c * 8 + *pt - '0';
6849         }
6850       if (*pt == '}') p = pt + 1;
6851         else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
6852       }
6853     break;
6854 
6855     case 'x':
6856     if (*p == '{')
6857       {
6858       uint8_t *pt = p;
6859       c = 0;
6860 
6861       /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
6862       when isxdigit() is a macro that refers to its argument more than
6863       once. This is banned by the C Standard, but apparently happens in at
6864       least one MacOS environment. */
6865 
6866       for (pt++; isxdigit(*pt); pt++)
6867         {
6868         if (++i == 9)
6869           fprintf(outfile, "** Too many hex digits in \\x{...} item; "
6870                            "using only the first eight.\n");
6871         else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
6872         }
6873       if (*pt == '}')
6874         {
6875         p = pt + 1;
6876         break;
6877         }
6878       /* Not correct form for \x{...}; fall through */
6879       }
6880 
6881     /* \x without {} always defines just one byte in 8-bit mode. This
6882     allows UTF-8 characters to be constructed byte by byte, and also allows
6883     invalid UTF-8 sequences to be made. Just copy the byte in UTF-8 mode.
6884     Otherwise, pass it down as data. */
6885 
6886     c = 0;
6887     while (i++ < 2 && isxdigit(*p))
6888       {
6889       c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
6890       p++;
6891       }
6892 #if defined SUPPORT_PCRE2_8
6893     if (utf && (test_mode == PCRE8_MODE))
6894       {
6895       *q8++ = c;
6896       continue;
6897       }
6898 #endif
6899     break;
6900 
6901     case 0:     /* \ followed by EOF allows for an empty line */
6902     p--;
6903     continue;
6904 
6905     case '=':   /* \= terminates the data, starts modifiers */
6906     goto ENDSTRING;
6907 
6908     case '[':   /* \[ introduces a replicated character sequence */
6909     if (start_rep != NULL)
6910       {
6911       fprintf(outfile, "** Nested replication is not supported\n");
6912       return PR_OK;
6913       }
6914     start_rep = CAST8VAR(q);
6915     continue;
6916 
6917     default:
6918     if (isalnum(c))
6919       {
6920       fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c);
6921       return PR_OK;
6922       }
6923     }
6924 
6925   /* We now have a character value in c that may be greater than 255.
6926   In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
6927   than 127 in UTF mode must have come from \x{...} or octal constructs
6928   because values from \x.. get this far only in non-UTF mode. */
6929 
6930 #ifdef SUPPORT_PCRE2_8
6931   if (test_mode == PCRE8_MODE)
6932     {
6933     if (utf)
6934       {
6935       if (c > 0x7fffffff)
6936         {
6937         fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
6938           "and so cannot be converted to UTF-8\n", c);
6939         return PR_OK;
6940         }
6941       q8 += ord2utf8(c, q8);
6942       }
6943     else
6944       {
6945       if (c > 0xffu)
6946         {
6947         fprintf(outfile, "** Character \\x{%x} is greater than 255 "
6948           "and UTF-8 mode is not enabled.\n", c);
6949         fprintf(outfile, "** Truncation will probably give the wrong "
6950           "result.\n");
6951         }
6952       *q8++ = (uint8_t)c;
6953       }
6954     }
6955 #endif
6956 #ifdef SUPPORT_PCRE2_16
6957   if (test_mode == PCRE16_MODE)
6958     {
6959     if (utf)
6960       {
6961       if (c > 0x10ffffu)
6962         {
6963         fprintf(outfile, "** Failed: character \\x{%x} is greater than "
6964           "0x10ffff and so cannot be converted to UTF-16\n", c);
6965         return PR_OK;
6966         }
6967       else if (c >= 0x10000u)
6968         {
6969         c-= 0x10000u;
6970         *q16++ = 0xD800 | (c >> 10);
6971         *q16++ = 0xDC00 | (c & 0x3ff);
6972         }
6973       else
6974         *q16++ = c;
6975       }
6976     else
6977       {
6978       if (c > 0xffffu)
6979         {
6980         fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
6981           "and UTF-16 mode is not enabled.\n", c);
6982         fprintf(outfile, "** Truncation will probably give the wrong "
6983           "result.\n");
6984         }
6985 
6986       *q16++ = (uint16_t)c;
6987       }
6988     }
6989 #endif
6990 #ifdef SUPPORT_PCRE2_32
6991   if (test_mode == PCRE32_MODE)
6992     {
6993     *q32++ = c;
6994     }
6995 #endif
6996   }
6997 
6998 ENDSTRING:
6999 SET(*q, 0);
7000 len = CASTVAR(uint8_t *, q) - dbuffer;    /* Length in bytes */
7001 ulen = len/code_unit_size;                /* Length in code units */
7002 arg_ulen = ulen;                          /* Value to use in match arg */
7003 
7004 /* If the string was terminated by \= we must now interpret modifiers. */
7005 
7006 if (p[-1] != 0 && !decode_modifiers(p, CTX_DAT, NULL, &dat_datctl))
7007   return PR_OK;
7008 
7009 /* Setting substitute_{skip,fail} implies a substitute callout. */
7010 
7011 if (dat_datctl.substitute_skip != 0 || dat_datctl.substitute_stop != 0)
7012   dat_datctl.control2 |= CTL2_SUBSTITUTE_CALLOUT;
7013 
7014 /* Check for mutually exclusive modifiers. At present, these are all in the
7015 first control word. */
7016 
7017 for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++)
7018   {
7019   c = dat_datctl.control & exclusive_dat_controls[k];
7020   if (c != 0 && c != (c & (~c+1)))
7021     {
7022     show_controls(c, 0, "** Not allowed together:");
7023     fprintf(outfile, "\n");
7024     return PR_OK;
7025     }
7026   }
7027 
7028 if (pat_patctl.replacement[0] != 0)
7029   {
7030   if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0 &&
7031       (dat_datctl.control & CTL_NULLCONTEXT) != 0)
7032     {
7033     fprintf(outfile, "** Replacement callouts are not supported with null_context.\n");
7034     return PR_OK;
7035     }
7036 
7037   if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
7038     fprintf(outfile, "** Ignored with replacement text: allcaptures\n");
7039   }
7040 
7041 /* Warn for modifiers that are ignored for DFA. */
7042 
7043 if ((dat_datctl.control & CTL_DFA) != 0)
7044   {
7045   if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
7046     fprintf(outfile, "** Ignored after DFA matching: allcaptures\n");
7047   }
7048 
7049 /* We now have the subject in dbuffer, with len containing the byte length, and
7050 ulen containing the code unit length, with a copy in arg_ulen for use in match
7051 function arguments (this gets changed to PCRE2_ZERO_TERMINATED when the
7052 zero_terminate modifier is present).
7053 
7054 Move the data to the end of the buffer so that a read over the end can be
7055 caught by valgrind or other means. If we have explicit valgrind support, mark
7056 the unused start of the buffer unaddressable. If we are using the POSIX
7057 interface, or testing zero-termination, we must include the terminating zero in
7058 the usable data. */
7059 
7060 c = code_unit_size * (((pat_patctl.control & CTL_POSIX) +
7061                        (dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0);
7062 pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c);
7063 #ifdef SUPPORT_VALGRIND
7064   VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c));
7065 #endif
7066 
7067 /* Now pp points to the subject string. POSIX matching is only possible in
7068 8-bit mode, and it does not support timing or other fancy features. Some were
7069 checked at compile time, but we need to check the match-time settings here. */
7070 
7071 #ifdef SUPPORT_PCRE2_8
7072 if ((pat_patctl.control & CTL_POSIX) != 0)
7073   {
7074   int rc;
7075   int eflags = 0;
7076   regmatch_t *pmatch = NULL;
7077   const char *msg = "** Ignored with POSIX interface:";
7078 
7079   if (dat_datctl.cerror[0] != CFORE_UNSET || dat_datctl.cerror[1] != CFORE_UNSET)
7080     prmsg(&msg, "callout_error");
7081   if (dat_datctl.cfail[0] != CFORE_UNSET || dat_datctl.cfail[1] != CFORE_UNSET)
7082     prmsg(&msg, "callout_fail");
7083   if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0)
7084     prmsg(&msg, "copy");
7085   if (dat_datctl.get_numbers[0] >= 0 || dat_datctl.get_names[0] != 0)
7086     prmsg(&msg, "get");
7087   if (dat_datctl.jitstack != 0) prmsg(&msg, "jitstack");
7088   if (dat_datctl.offset != 0) prmsg(&msg, "offset");
7089 
7090   if ((dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS) != 0)
7091     {
7092     fprintf(outfile, "%s", msg);
7093     show_match_options(dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS);
7094     msg = "";
7095     }
7096   if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0 ||
7097       (dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2) != 0)
7098     {
7099     show_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS,
7100                   dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2, msg);
7101     msg = "";
7102     }
7103 
7104   if (msg[0] == 0) fprintf(outfile, "\n");
7105 
7106   if (dat_datctl.oveccount > 0)
7107     {
7108     pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * dat_datctl.oveccount);
7109     if (pmatch == NULL)
7110       {
7111       fprintf(outfile, "** Failed to get memory for recording matching "
7112         "information (size set = %du)\n", dat_datctl.oveccount);
7113       return PR_OK;
7114       }
7115     }
7116 
7117   if (dat_datctl.startend[0] != CFORE_UNSET)
7118     {
7119     pmatch[0].rm_so = dat_datctl.startend[0];
7120     pmatch[0].rm_eo = (dat_datctl.startend[1] != 0)?
7121       dat_datctl.startend[1] : len;
7122     eflags |= REG_STARTEND;
7123     }
7124 
7125   if ((dat_datctl.options & PCRE2_NOTBOL) != 0) eflags |= REG_NOTBOL;
7126   if ((dat_datctl.options & PCRE2_NOTEOL) != 0) eflags |= REG_NOTEOL;
7127   if ((dat_datctl.options & PCRE2_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
7128 
7129   rc = regexec(&preg, (const char *)pp, dat_datctl.oveccount, pmatch, eflags);
7130   if (rc != 0)
7131     {
7132     (void)regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size);
7133     fprintf(outfile, "No match: POSIX code %d: %s\n", rc, pbuffer8);
7134     }
7135   else if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0)
7136     fprintf(outfile, "Matched with REG_NOSUB\n");
7137   else if (dat_datctl.oveccount == 0)
7138     fprintf(outfile, "Matched without capture\n");
7139   else
7140     {
7141     size_t i, j;
7142     size_t last_printed = (size_t)dat_datctl.oveccount;
7143     for (i = 0; i < (size_t)dat_datctl.oveccount; i++)
7144       {
7145       if (pmatch[i].rm_so >= 0)
7146         {
7147         PCRE2_SIZE start = pmatch[i].rm_so;
7148         PCRE2_SIZE end = pmatch[i].rm_eo;
7149         for (j = last_printed + 1; j < i; j++)
7150           fprintf(outfile, "%2d: <unset>\n", (int)j);
7151         last_printed = i;
7152         if (start > end)
7153           {
7154           start = pmatch[i].rm_eo;
7155           end = pmatch[i].rm_so;
7156           fprintf(outfile, "Start of matched string is beyond its end - "
7157             "displaying from end to start.\n");
7158           }
7159         fprintf(outfile, "%2d: ", (int)i);
7160         PCHARSV(pp, start, end - start, utf, outfile);
7161         fprintf(outfile, "\n");
7162 
7163         if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) ||
7164             (dat_datctl.control & CTL_ALLAFTERTEXT) != 0)
7165           {
7166           fprintf(outfile, "%2d+ ", (int)i);
7167           /* Note: don't use the start/end variables here because we want to
7168           show the text from what is reported as the end. */
7169           PCHARSV(pp, pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf, outfile);
7170           fprintf(outfile, "\n"); }
7171         }
7172       }
7173     }
7174   free(pmatch);
7175   return PR_OK;
7176   }
7177 #endif  /* SUPPORT_PCRE2_8 */
7178 
7179  /* Handle matching via the native interface. Check for consistency of
7180 modifiers. */
7181 
7182 if (dat_datctl.startend[0] != CFORE_UNSET)
7183   fprintf(outfile, "** \\=posix_startend ignored for non-POSIX matching\n");
7184 
7185 /* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA
7186 matching, even if the JIT compiler was used. */
7187 
7188 if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT &&
7189     FLD(compiled_code, executable_jit) != NULL)
7190   {
7191   fprintf(outfile, "** Showing all consulted text is not supported by JIT: ignored\n");
7192   dat_datctl.control &= ~CTL_ALLUSEDTEXT;
7193   }
7194 
7195 /* Handle passing the subject as zero-terminated. */
7196 
7197 if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
7198   arg_ulen = PCRE2_ZERO_TERMINATED;
7199 
7200 /* The nullcontext modifier is used to test calling pcre2_[jit_]match() with a
7201 NULL context. */
7202 
7203 use_dat_context = ((dat_datctl.control & CTL_NULLCONTEXT) != 0)?
7204   NULL : PTR(dat_context);
7205 
7206 /* Enable display of malloc/free if wanted. We can do this only if either the
7207 pattern or the subject is processed with a context. */
7208 
7209 show_memory = (dat_datctl.control & CTL_MEMORY) != 0;
7210 
7211 if (show_memory &&
7212     (pat_patctl.control & dat_datctl.control & CTL_NULLCONTEXT) != 0)
7213   fprintf(outfile, "** \\=memory requires either a pattern or a subject "
7214     "context: ignored\n");
7215 
7216 /* Create and assign a JIT stack if requested. */
7217 
7218 if (dat_datctl.jitstack != 0)
7219   {
7220   if (dat_datctl.jitstack != jit_stack_size)
7221     {
7222     PCRE2_JIT_STACK_FREE(jit_stack);
7223     PCRE2_JIT_STACK_CREATE(jit_stack, 1, dat_datctl.jitstack * 1024, NULL);
7224     jit_stack_size = dat_datctl.jitstack;
7225     }
7226   PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, jit_stack);
7227   }
7228 
7229 /* Or de-assign */
7230 
7231 else if (jit_stack != NULL)
7232   {
7233   PCRE2_JIT_STACK_ASSIGN(dat_context, NULL, NULL);
7234   PCRE2_JIT_STACK_FREE(jit_stack);
7235   jit_stack = NULL;
7236   jit_stack_size = 0;
7237   }
7238 
7239 /* When no JIT stack is assigned, we must ensure that there is a JIT callback
7240 if we want to verify that JIT was actually used. */
7241 
7242 if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_stack == NULL)
7243    {
7244    PCRE2_JIT_STACK_ASSIGN(dat_context, jit_callback, NULL);
7245    }
7246 
7247 /* Adjust match_data according to size of offsets required. A size of zero
7248 causes a new match data block to be obtained that exactly fits the pattern. */
7249 
7250 if (dat_datctl.oveccount == 0)
7251   {
7252   PCRE2_MATCH_DATA_FREE(match_data);
7253   PCRE2_MATCH_DATA_CREATE_FROM_PATTERN(match_data, compiled_code, NULL);
7254   PCRE2_GET_OVECTOR_COUNT(max_oveccount, match_data);
7255   }
7256 else if (dat_datctl.oveccount <= max_oveccount)
7257   {
7258   SETFLD(match_data, oveccount, dat_datctl.oveccount);
7259   }
7260 else
7261   {
7262   max_oveccount = dat_datctl.oveccount;
7263   PCRE2_MATCH_DATA_FREE(match_data);
7264   PCRE2_MATCH_DATA_CREATE(match_data, max_oveccount, NULL);
7265   }
7266 
7267 if (CASTVAR(void *, match_data) == NULL)
7268   {
7269   fprintf(outfile, "** Failed to get memory for recording matching "
7270     "information (size requested: %d)\n", dat_datctl.oveccount);
7271   max_oveccount = 0;
7272   return PR_OK;
7273   }
7274 
7275 ovector = FLD(match_data, ovector);
7276 PCRE2_GET_OVECTOR_COUNT(oveccount, match_data);
7277 
7278 /* Replacement processing is ignored for DFA matching. */
7279 
7280 if (dat_datctl.replacement[0] != 0 && (dat_datctl.control & CTL_DFA) != 0)
7281   {
7282   fprintf(outfile, "** Ignored for DFA matching: replace\n");
7283   dat_datctl.replacement[0] = 0;
7284   }
7285 
7286 /* If a replacement string is provided, call pcre2_substitute() instead of one
7287 of the matching functions. First we have to convert the replacement string to
7288 the appropriate width. */
7289 
7290 if (dat_datctl.replacement[0] != 0)
7291   {
7292   int rc;
7293   uint8_t *pr;
7294   uint8_t rbuffer[REPLACE_BUFFSIZE];
7295   uint8_t nbuffer[REPLACE_BUFFSIZE];
7296   uint32_t xoptions;
7297   uint32_t emoption;  /* External match option */
7298   PCRE2_SIZE j, rlen, nsize, erroroffset;
7299   BOOL badutf = FALSE;
7300 
7301 #ifdef SUPPORT_PCRE2_8
7302   uint8_t *r8 = NULL;
7303 #endif
7304 #ifdef SUPPORT_PCRE2_16
7305   uint16_t *r16 = NULL;
7306 #endif
7307 #ifdef SUPPORT_PCRE2_32
7308   uint32_t *r32 = NULL;
7309 #endif
7310 
7311   /* Fill the ovector with junk to detect elements that do not get set
7312   when they should be (relevant only when "allvector" is specified). */
7313 
7314   for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
7315 
7316   if (timeitm)
7317     fprintf(outfile, "** Timing is not supported with replace: ignored\n");
7318 
7319   if ((dat_datctl.control & CTL_ALTGLOBAL) != 0)
7320     fprintf(outfile, "** Altglobal is not supported with replace: ignored\n");
7321 
7322   /* Check for a test that does substitution after an initial external match.
7323   If this is set, we run the external match, but leave the interpretation of
7324   its output to pcre2_substitute(). */
7325 
7326   emoption = ((dat_datctl.control2 & CTL2_SUBSTITUTE_MATCHED) == 0)? 0 :
7327     PCRE2_SUBSTITUTE_MATCHED;
7328 
7329   if (emoption != 0)
7330     {
7331     PCRE2_MATCH(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
7332       dat_datctl.options, match_data, use_dat_context);
7333     }
7334 
7335   xoptions = emoption |
7336              (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 :
7337                 PCRE2_SUBSTITUTE_GLOBAL) |
7338              (((dat_datctl.control2 & CTL2_SUBSTITUTE_EXTENDED) == 0)? 0 :
7339                 PCRE2_SUBSTITUTE_EXTENDED) |
7340              (((dat_datctl.control2 & CTL2_SUBSTITUTE_LITERAL) == 0)? 0 :
7341                 PCRE2_SUBSTITUTE_LITERAL) |
7342              (((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) == 0)? 0 :
7343                 PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) |
7344              (((dat_datctl.control2 & CTL2_SUBSTITUTE_REPLACEMENT_ONLY) == 0)? 0 :
7345                 PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) |
7346              (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) == 0)? 0 :
7347                 PCRE2_SUBSTITUTE_UNKNOWN_UNSET) |
7348              (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNSET_EMPTY) == 0)? 0 :
7349                 PCRE2_SUBSTITUTE_UNSET_EMPTY);
7350 
7351   SETCASTPTR(r, rbuffer);  /* Sets r8, r16, or r32, as appropriate. */
7352   pr = dat_datctl.replacement;
7353 
7354   /* If the replacement starts with '[<number>]' we interpret that as length
7355   value for the replacement buffer. */
7356 
7357   nsize = REPLACE_BUFFSIZE/code_unit_size;
7358   if (*pr == '[')
7359     {
7360     PCRE2_SIZE n = 0;
7361     while ((c = *(++pr)) >= CHAR_0 && c <= CHAR_9) n = n * 10 + c - CHAR_0;
7362     if (*pr++ != ']')
7363       {
7364       fprintf(outfile, "Bad buffer size in replacement string\n");
7365       return PR_OK;
7366       }
7367     if (n > nsize)
7368       {
7369       fprintf(outfile, "Replacement buffer setting (%" SIZ_FORM ") is too "
7370         "large (max %" SIZ_FORM ")\n", n, nsize);
7371       return PR_OK;
7372       }
7373     nsize = n;
7374     }
7375 
7376   /* Now copy the replacement string to a buffer of the appropriate width. No
7377   escape processing is done for replacements. In UTF mode, check for an invalid
7378   UTF-8 input string, and if it is invalid, just copy its code units without
7379   UTF interpretation. This provides a means of checking that an invalid string
7380   is detected. Otherwise, UTF-8 can be used to include wide characters in a
7381   replacement. */
7382 
7383   if (utf) badutf = valid_utf(pr, strlen((const char *)pr), &erroroffset);
7384 
7385   /* Not UTF or invalid UTF-8: just copy the code units. */
7386 
7387   if (!utf || badutf)
7388     {
7389     while ((c = *pr++) != 0)
7390       {
7391 #ifdef SUPPORT_PCRE2_8
7392       if (test_mode == PCRE8_MODE) *r8++ = c;
7393 #endif
7394 #ifdef SUPPORT_PCRE2_16
7395       if (test_mode == PCRE16_MODE) *r16++ = c;
7396 #endif
7397 #ifdef SUPPORT_PCRE2_32
7398       if (test_mode == PCRE32_MODE) *r32++ = c;
7399 #endif
7400       }
7401     }
7402 
7403   /* Valid UTF-8 replacement string */
7404 
7405   else while ((c = *pr++) != 0)
7406     {
7407     if (HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); }
7408 
7409 #ifdef SUPPORT_PCRE2_8
7410     if (test_mode == PCRE8_MODE) r8 += ord2utf8(c, r8);
7411 #endif
7412 
7413 #ifdef SUPPORT_PCRE2_16
7414     if (test_mode == PCRE16_MODE)
7415       {
7416       if (c >= 0x10000u)
7417         {
7418         c-= 0x10000u;
7419         *r16++ = 0xD800 | (c >> 10);
7420         *r16++ = 0xDC00 | (c & 0x3ff);
7421         }
7422       else *r16++ = c;
7423       }
7424 #endif
7425 
7426 #ifdef SUPPORT_PCRE2_32
7427     if (test_mode == PCRE32_MODE) *r32++ = c;
7428 #endif
7429     }
7430 
7431   SET(*r, 0);
7432   if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0)
7433     rlen = PCRE2_ZERO_TERMINATED;
7434   else
7435     rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
7436 
7437   if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0)
7438     {
7439     PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, substitute_callout_function, NULL);
7440     }
7441   else
7442     {
7443     PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, NULL, NULL);  /* No callout */
7444     }
7445 
7446   PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
7447     dat_datctl.options|xoptions, match_data, use_dat_context,
7448     rbuffer, rlen, nbuffer, &nsize);
7449 
7450   if (rc < 0)
7451     {
7452     fprintf(outfile, "Failed: error %d", rc);
7453     if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET)
7454       fprintf(outfile, " at offset %ld in replacement", (long int)nsize);
7455     fprintf(outfile, ": ");
7456     if (!print_error_message(rc, "", "")) return PR_ABEND;
7457     if (rc == PCRE2_ERROR_NOMEMORY &&
7458         (xoptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)
7459       fprintf(outfile, ": %ld code units are needed", (long int)nsize);
7460     }
7461   else
7462     {
7463     fprintf(outfile, "%2d: ", rc);
7464     PCHARSV(nbuffer, 0, nsize, utf, outfile);
7465     }
7466 
7467   fprintf(outfile, "\n");
7468   show_memory = FALSE;
7469 
7470   /* Show final ovector contents if requested. */
7471 
7472   if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
7473     show_ovector(ovector, oveccount);
7474 
7475   return PR_OK;
7476   }   /* End of substitution handling */
7477 
7478 /* When a replacement string is not provided, run a loop for global matching
7479 with one of the basic matching functions. For altglobal (or first time round
7480 the loop), set an "unset" value for the previous match info. */
7481 
7482 ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
7483 
7484 for (gmatched = 0;; gmatched++)
7485   {
7486   PCRE2_SIZE j;
7487   int capcount;
7488 
7489   /* Fill the ovector with junk to detect elements that do not get set
7490   when they should be. */
7491 
7492   for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET;
7493 
7494   /* When matching is via pcre2_match(), we will detect the use of JIT via the
7495   stack callback function. */
7496 
7497   jit_was_used = (pat_patctl.control & CTL_JITFAST) != 0;
7498 
7499   /* Do timing if required. */
7500 
7501   if (timeitm > 0)
7502     {
7503     int i;
7504     clock_t start_time, time_taken;
7505 
7506     if ((dat_datctl.control & CTL_DFA) != 0)
7507       {
7508       if ((dat_datctl.options & PCRE2_DFA_RESTART) != 0)
7509         {
7510         fprintf(outfile, "Timing DFA restarts is not supported\n");
7511         return PR_OK;
7512         }
7513       if (dfa_workspace == NULL)
7514         dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7515       start_time = clock();
7516       for (i = 0; i < timeitm; i++)
7517         {
7518         PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7519           dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7520           use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7521         }
7522       }
7523 
7524     else if ((pat_patctl.control & CTL_JITFAST) != 0)
7525       {
7526       start_time = clock();
7527       for (i = 0; i < timeitm; i++)
7528         {
7529         PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen,
7530           dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7531           use_dat_context);
7532         }
7533       }
7534 
7535     else
7536       {
7537       start_time = clock();
7538       for (i = 0; i < timeitm; i++)
7539         {
7540         PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen,
7541           dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7542           use_dat_context);
7543         }
7544       }
7545     total_match_time += (time_taken = clock() - start_time);
7546     fprintf(outfile, "Match time %.4f milliseconds\n",
7547       (((double)time_taken * 1000.0) / (double)timeitm) /
7548         (double)CLOCKS_PER_SEC);
7549     }
7550 
7551   /* Find the heap, match and depth limits if requested. The depth and heap
7552   limits are not relevant for JIT. The return from check_match_limit() is the
7553   return from the final call to pcre2_match() or pcre2_dfa_match(). */
7554 
7555   if ((dat_datctl.control & CTL_FINDLIMITS) != 0)
7556     {
7557     capcount = 0;  /* This stops compiler warnings */
7558 
7559     if (FLD(compiled_code, executable_jit) == NULL ||
7560           (dat_datctl.options & PCRE2_NO_JIT) != 0)
7561       {
7562       (void)check_match_limit(pp, arg_ulen, PCRE2_ERROR_HEAPLIMIT, "heap");
7563       }
7564 
7565     capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT,
7566       "match");
7567 
7568     if (FLD(compiled_code, executable_jit) == NULL ||
7569         (dat_datctl.options & PCRE2_NO_JIT) != 0 ||
7570         (dat_datctl.control & CTL_DFA) != 0)
7571       {
7572       capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
7573         "depth");
7574       }
7575 
7576     if (capcount == 0)
7577       {
7578       fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7579       capcount = dat_datctl.oveccount;
7580       }
7581     }
7582 
7583   /* Otherwise just run a single match, setting up a callout if required (the
7584   default). There is a copy of the pattern in pbuffer8 for use by callouts. */
7585 
7586   else
7587     {
7588     if ((dat_datctl.control & CTL_CALLOUT_NONE) == 0)
7589       {
7590       PCRE2_SET_CALLOUT(dat_context, callout_function,
7591         (void *)(&dat_datctl.callout_data));
7592       first_callout = TRUE;
7593       last_callout_mark = NULL;
7594       callout_count = 0;
7595       }
7596     else
7597       {
7598       PCRE2_SET_CALLOUT(dat_context, NULL, NULL);  /* No callout */
7599       }
7600 
7601     /* Run a single DFA or NFA match. */
7602 
7603     if ((dat_datctl.control & CTL_DFA) != 0)
7604       {
7605       if (dfa_workspace == NULL)
7606         dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
7607       if (dfa_matched++ == 0)
7608         dfa_workspace[0] = -1;  /* To catch bad restart */
7609       PCRE2_DFA_MATCH(capcount, compiled_code, pp, arg_ulen,
7610         dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
7611         use_dat_context, dfa_workspace, DFA_WS_DIMENSION);
7612       if (capcount == 0)
7613         {
7614         fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
7615         capcount = dat_datctl.oveccount;
7616         }
7617       }
7618     else
7619       {
7620       if ((pat_patctl.control & CTL_JITFAST) != 0)
7621         PCRE2_JIT_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7622           dat_datctl.options | g_notempty, match_data, use_dat_context);
7623       else
7624         PCRE2_MATCH(capcount, compiled_code, pp, arg_ulen, dat_datctl.offset,
7625           dat_datctl.options | g_notempty, match_data, use_dat_context);
7626       if (capcount == 0)
7627         {
7628         fprintf(outfile, "Matched, but too many substrings\n");
7629         capcount = dat_datctl.oveccount;
7630         }
7631       }
7632     }
7633 
7634   /* The result of the match is now in capcount. First handle a successful
7635   match. */
7636 
7637   if (capcount >= 0)
7638     {
7639     int i;
7640 
7641     if (capcount > (int)oveccount)   /* Check for lunatic return value */
7642       {
7643       fprintf(outfile,
7644         "** PCRE2 error: returned count %d is too big for ovector count %d\n",
7645         capcount, oveccount);
7646       capcount = oveccount;
7647       if ((dat_datctl.control & CTL_ANYGLOB) != 0)
7648         {
7649         fprintf(outfile, "** Global loop abandoned\n");
7650         dat_datctl.control &= ~CTL_ANYGLOB;        /* Break g/G loop */
7651         }
7652       }
7653 
7654     /* If PCRE2_COPY_MATCHED_SUBJECT was set, check that things are as they
7655     should be, but not for fast JIT, where it isn't supported. */
7656 
7657     if ((dat_datctl.options & PCRE2_COPY_MATCHED_SUBJECT) != 0 &&
7658         (pat_patctl.control & CTL_JITFAST) == 0)
7659       {
7660       if ((FLD(match_data, flags) & PCRE2_MD_COPIED_SUBJECT) == 0)
7661         fprintf(outfile,
7662           "** PCRE2 error: flag not set after copy_matched_subject\n");
7663 
7664       if (CASTFLD(void *, match_data, subject) == pp)
7665         fprintf(outfile,
7666           "** PCRE2 error: copy_matched_subject has not copied\n");
7667 
7668       if (memcmp(CASTFLD(void *, match_data, subject), pp, ulen) != 0)
7669         fprintf(outfile,
7670           "** PCRE2 error: copy_matched_subject mismatch\n");
7671       }
7672 
7673     /* If this is not the first time round a global loop, check that the
7674     returned string has changed. If it has not, check for an empty string match
7675     at different starting offset from the previous match. This is a failed test
7676     retry for null-matching patterns that don't match at their starting offset,
7677     for example /(?<=\G.)/. A repeated match at the same point is not such a
7678     pattern, and must be discarded, and we then proceed to seek a non-null
7679     match at the current point. For any other repeated match, there is a bug
7680     somewhere and we must break the loop because it will go on for ever. We
7681     know that there are always at least two elements in the ovector. */
7682 
7683     if (gmatched > 0 && ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
7684       {
7685       if (ovector[0] == ovector[1] && ovecsave[2] != dat_datctl.offset)
7686         {
7687         g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
7688         ovecsave[2] = dat_datctl.offset;
7689         continue;    /* Back to the top of the loop */
7690         }
7691       fprintf(outfile,
7692         "** PCRE2 error: global repeat returned the same string as previous\n");
7693       fprintf(outfile, "** Global loop abandoned\n");
7694       dat_datctl.control &= ~CTL_ANYGLOB;        /* Break g/G loop */
7695       }
7696 
7697     /* "allcaptures" requests showing of all captures in the pattern, to check
7698     unset ones at the end. It may be set on the pattern or the data. Implement
7699     by setting capcount to the maximum. This is not relevant for DFA matching,
7700     so ignore it (warning given above). */
7701 
7702     if ((dat_datctl.control & (CTL_ALLCAPTURES|CTL_DFA)) == CTL_ALLCAPTURES)
7703       {
7704       capcount = maxcapcount + 1;   /* Allow for full match */
7705       if (capcount > (int)oveccount) capcount = oveccount;
7706       }
7707 
7708     /* "allvector" request showing the entire ovector. */
7709 
7710     if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0) capcount = oveccount;
7711 
7712     /* Output the captured substrings. Note that, for the matched string,
7713     the use of \K in an assertion can make the start later than the end. */
7714 
7715     for (i = 0; i < 2*capcount; i += 2)
7716       {
7717       PCRE2_SIZE lleft, lmiddle, lright;
7718       PCRE2_SIZE start = ovector[i];
7719       PCRE2_SIZE end = ovector[i+1];
7720 
7721       if (start > end)
7722         {
7723         start = ovector[i+1];
7724         end = ovector[i];
7725         fprintf(outfile, "Start of matched string is beyond its end - "
7726           "displaying from end to start.\n");
7727         }
7728 
7729       fprintf(outfile, "%2d: ", i/2);
7730 
7731       /* Check for an unset group */
7732 
7733       if (start == PCRE2_UNSET && end == PCRE2_UNSET)
7734         {
7735         fprintf(outfile, "<unset>\n");
7736         continue;
7737         }
7738 
7739       /* Check for silly offsets, in particular, values that have not been
7740       set when they should have been. However, if we are past the end of the
7741       captures for this pattern ("allvector" causes this), or if we are DFA
7742       matching, it isn't an error if the entry is unchanged. */
7743 
7744       if (start > ulen || end > ulen)
7745         {
7746         if (((dat_datctl.control & CTL_DFA) != 0 ||
7747               i >= (int)(2*maxcapcount + 2)) &&
7748             start == JUNK_OFFSET && end == JUNK_OFFSET)
7749           fprintf(outfile, "<unchanged>\n");
7750         else
7751           fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
7752             (unsigned long int)start, (unsigned long int)end);
7753         continue;
7754         }
7755 
7756       /* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
7757       JIT, it is disabled above, with a comment.) When the match is done by the
7758       interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is
7759       set, and if the leftmost consulted character is before the start of the
7760       match or the rightmost consulted character is past the end of the match,
7761       we want to show all consulted characters for the main matched string, and
7762       indicate which were lookarounds. */
7763 
7764       if (i == 0)
7765         {
7766         BOOL showallused;
7767         PCRE2_SIZE leftchar, rightchar;
7768 
7769         if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
7770           {
7771           leftchar = FLD(match_data, leftchar);
7772           rightchar = FLD(match_data, rightchar);
7773           showallused = i == 0 && (leftchar < start || rightchar > end);
7774           }
7775         else showallused = FALSE;
7776 
7777         if (showallused)
7778           {
7779           PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile);
7780           PCHARS(lmiddle, pp, start, end - start, utf, outfile);
7781           PCHARS(lright, pp, end, rightchar - end, utf, outfile);
7782           if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7783             fprintf(outfile, " (JIT)");
7784           fprintf(outfile, "\n    ");
7785           for (j = 0; j < lleft; j++) fprintf(outfile, "<");
7786           for (j = 0; j < lmiddle; j++) fprintf(outfile, " ");
7787           for (j = 0; j < lright; j++) fprintf(outfile, ">");
7788           }
7789 
7790         /* When a pattern contains \K, the start of match position may be
7791         different to the start of the matched string. When this is the case,
7792         show it when requested. */
7793 
7794         else if ((dat_datctl.control & CTL_STARTCHAR) != 0)
7795           {
7796           PCRE2_SIZE startchar;
7797           PCRE2_GET_STARTCHAR(startchar, match_data);
7798           PCHARS(lleft, pp, startchar, start - startchar, utf, outfile);
7799           PCHARSV(pp, start, end - start, utf, outfile);
7800           if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7801             fprintf(outfile, " (JIT)");
7802           if (startchar != start)
7803             {
7804             fprintf(outfile, "\n    ");
7805             for (j = 0; j < lleft; j++) fprintf(outfile, "^");
7806             }
7807           }
7808 
7809         /* Otherwise, just show the matched string. */
7810 
7811         else
7812           {
7813           PCHARSV(pp, start, end - start, utf, outfile);
7814           if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7815             fprintf(outfile, " (JIT)");
7816           }
7817         }
7818 
7819       /* Not the main matched string. Just show it unadorned. */
7820 
7821       else
7822         {
7823         PCHARSV(pp, start, end - start, utf, outfile);
7824         }
7825 
7826       fprintf(outfile, "\n");
7827 
7828       /* Note: don't use the start/end variables here because we want to
7829       show the text from what is reported as the end. */
7830 
7831       if ((dat_datctl.control & CTL_ALLAFTERTEXT) != 0 ||
7832           (i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0))
7833         {
7834         fprintf(outfile, "%2d+ ", i/2);
7835         PCHARSV(pp, ovector[i+1], ulen - ovector[i+1], utf, outfile);
7836         fprintf(outfile, "\n");
7837         }
7838       }
7839 
7840     /* Output (*MARK) data if requested */
7841 
7842     if ((dat_datctl.control & CTL_MARK) != 0 &&
7843          TESTFLD(match_data, mark, !=, NULL))
7844       {
7845       fprintf(outfile, "MK: ");
7846       PCHARSV(CASTFLD(void *, match_data, mark), -1, -1, utf, outfile);
7847       fprintf(outfile, "\n");
7848       }
7849 
7850     /* Process copy/get strings */
7851 
7852     if (!copy_and_get(utf, capcount)) return PR_ABEND;
7853 
7854     }    /* End of handling a successful match */
7855 
7856   /* There was a partial match. The value of ovector[0] is the bumpalong point,
7857   that is, startchar, not any \K point that might have been passed. When JIT is
7858   not in use, "allusedtext" may be set, in which case we indicate the leftmost
7859   consulted character. */
7860 
7861   else if (capcount == PCRE2_ERROR_PARTIAL)
7862     {
7863     PCRE2_SIZE leftchar;
7864     int backlength;
7865     int rubriclength = 0;
7866 
7867     if ((dat_datctl.control & CTL_ALLUSEDTEXT) != 0)
7868       {
7869       leftchar = FLD(match_data, leftchar);
7870       }
7871     else leftchar = ovector[0];
7872 
7873     fprintf(outfile, "Partial match");
7874     if ((dat_datctl.control & CTL_MARK) != 0 &&
7875          TESTFLD(match_data, mark, !=, NULL))
7876       {
7877       fprintf(outfile, ", mark=");
7878       PCHARS(rubriclength, CASTFLD(void *, match_data, mark), -1, -1, utf,
7879         outfile);
7880       rubriclength += 7;
7881       }
7882     fprintf(outfile, ": ");
7883     rubriclength += 15;
7884 
7885     PCHARS(backlength, pp, leftchar, ovector[0] - leftchar, utf, outfile);
7886     PCHARSV(pp, ovector[0], ulen - ovector[0], utf, outfile);
7887 
7888     if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7889       fprintf(outfile, " (JIT)");
7890     fprintf(outfile, "\n");
7891 
7892     if (backlength != 0)
7893       {
7894       int i;
7895       for (i = 0; i < rubriclength; i++) fprintf(outfile, " ");
7896       for (i = 0; i < backlength; i++) fprintf(outfile, "<");
7897       fprintf(outfile, "\n");
7898       }
7899 
7900     if (ulen != ovector[1])
7901       fprintf(outfile, "** ovector[1] is not equal to the subject length: "
7902         "%ld != %ld\n", (unsigned long int)ovector[1], (unsigned long int)ulen);
7903 
7904     /* Process copy/get strings */
7905 
7906     if (!copy_and_get(utf, 1)) return PR_ABEND;
7907 
7908     /* "allvector" outputs the entire vector */
7909 
7910     if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
7911       show_ovector(ovector, oveccount);
7912 
7913     break;  /* Out of the /g loop */
7914     }       /* End of handling partial match */
7915 
7916   /* Failed to match. If this is a /g or /G loop, we might previously have
7917   set g_notempty (to PCRE2_NOTEMPTY_ATSTART|PCRE2_ANCHORED) after a null match.
7918   If that is the case, this is not necessarily the end. We want to advance the
7919   start offset, and continue. We won't be at the end of the string - that was
7920   checked before setting g_notempty. We achieve the effect by pretending that a
7921   single character was matched.
7922 
7923   Complication arises in the case when the newline convention is "any", "crlf",
7924   or "anycrlf". If the previous match was at the end of a line terminated by
7925   CRLF, an advance of one character just passes the CR, whereas we should
7926   prefer the longer newline sequence, as does the code in pcre2_match().
7927 
7928   Otherwise, in the case of UTF-8 or UTF-16 matching, the advance must be one
7929   character, not one byte. */
7930 
7931   else if (g_notempty != 0)   /* There was a previous null match */
7932     {
7933     uint16_t nl = FLD(compiled_code, newline_convention);
7934     PCRE2_SIZE start_offset = dat_datctl.offset;    /* Where the match was */
7935     PCRE2_SIZE end_offset = start_offset + 1;
7936 
7937     if ((nl == PCRE2_NEWLINE_CRLF || nl == PCRE2_NEWLINE_ANY ||
7938          nl == PCRE2_NEWLINE_ANYCRLF) &&
7939         start_offset < ulen - 1 &&
7940         CODE_UNIT(pp, start_offset) == '\r' &&
7941         CODE_UNIT(pp, end_offset) == '\n')
7942       end_offset++;
7943 
7944     else if (utf && test_mode != PCRE32_MODE)
7945       {
7946       if (test_mode == PCRE8_MODE)
7947         {
7948         for (; end_offset < ulen; end_offset++)
7949           if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
7950         }
7951       else  /* 16-bit mode */
7952         {
7953         for (; end_offset < ulen; end_offset++)
7954           if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
7955         }
7956       }
7957 
7958     SETFLDVEC(match_data, ovector, 0, start_offset);
7959     SETFLDVEC(match_data, ovector, 1, end_offset);
7960     }  /* End of handling null match in a global loop */
7961 
7962   /* A "normal" match failure. There will be a negative error number in
7963   capcount. */
7964 
7965   else
7966     {
7967     switch(capcount)
7968       {
7969       case PCRE2_ERROR_NOMATCH:
7970       if (gmatched == 0)
7971         {
7972         fprintf(outfile, "No match");
7973         if ((dat_datctl.control & CTL_MARK) != 0 &&
7974              TESTFLD(match_data, mark, !=, NULL))
7975           {
7976           fprintf(outfile, ", mark = ");
7977           PCHARSV(CASTFLD(void *, match_data, mark), -1, -1, utf, outfile);
7978           }
7979         if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used)
7980           fprintf(outfile, " (JIT)");
7981         fprintf(outfile, "\n");
7982 
7983         /* "allvector" outputs the entire vector */
7984 
7985         if ((dat_datctl.control2 & CTL2_ALLVECTOR) != 0)
7986           show_ovector(ovector, oveccount);
7987         }
7988       break;
7989 
7990       case PCRE2_ERROR_BADUTFOFFSET:
7991       fprintf(outfile, "Error %d (bad UTF-%d offset)\n", capcount, test_mode);
7992       break;
7993 
7994       default:
7995       fprintf(outfile, "Failed: error %d: ", capcount);
7996       if (!print_error_message(capcount, "", "")) return PR_ABEND;
7997       if (capcount <= PCRE2_ERROR_UTF8_ERR1 &&
7998           capcount >= PCRE2_ERROR_UTF32_ERR2)
7999         {
8000         PCRE2_SIZE startchar;
8001         PCRE2_GET_STARTCHAR(startchar, match_data);
8002         fprintf(outfile, " at offset %" SIZ_FORM, startchar);
8003         }
8004       fprintf(outfile, "\n");
8005       break;
8006       }
8007 
8008     break;  /* Out of the /g loop */
8009     }       /* End of failed match handling */
8010 
8011   /* Control reaches here in two circumstances: (a) after a match, and (b)
8012   after a non-match that immediately followed a match on an empty string when
8013   doing a global search. Such a match is done with PCRE2_NOTEMPTY_ATSTART and
8014   PCRE2_ANCHORED set in g_notempty. The code above turns it into a fake match
8015   of one character. So effectively we get here only after a match. If we
8016   are not doing a global search, we are done. */
8017 
8018   if ((dat_datctl.control & CTL_ANYGLOB) == 0) break; else
8019     {
8020     PCRE2_SIZE match_offset = FLD(match_data, ovector)[0];
8021     PCRE2_SIZE end_offset = FLD(match_data, ovector)[1];
8022 
8023     /* We must now set up for the next iteration of a global search. If we have
8024     matched an empty string, first check to see if we are at the end of the
8025     subject. If so, the loop is over. Otherwise, mimic what Perl's /g option
8026     does. Set PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED and try the match again
8027     at the same point. If this fails it will be picked up above, where a fake
8028     match is set up so that at this point we advance to the next character.
8029 
8030     However, in order to cope with patterns that never match at their starting
8031     offset (e.g. /(?<=\G.)/) we don't do this when the match offset is greater
8032     than the starting offset. This means there will be a retry with the
8033     starting offset at the match offset. If this returns the same match again,
8034     it is picked up above and ignored, and the special action is then taken. */
8035 
8036     if (match_offset == end_offset)
8037       {
8038       if (end_offset == ulen) break;           /* End of subject */
8039       if (match_offset <= dat_datctl.offset)
8040         g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
8041       }
8042 
8043     /* However, even after matching a non-empty string, there is still one
8044     tricky case. If a pattern contains \K within a lookbehind assertion at the
8045     start, the end of the matched string can be at the offset where the match
8046     started. In the case of a normal /g iteration without special action, this
8047     leads to a loop that keeps on returning the same substring. The loop would
8048     be caught above, but we really want to move on to the next match. */
8049 
8050     else
8051       {
8052       g_notempty = 0;   /* Set for a "normal" repeat */
8053       if ((dat_datctl.control & CTL_GLOBAL) != 0)
8054         {
8055         PCRE2_SIZE startchar;
8056         PCRE2_GET_STARTCHAR(startchar, match_data);
8057         if (end_offset <= startchar)
8058           {
8059           if (startchar >= ulen) break;       /* End of subject */
8060           end_offset = startchar + 1;
8061           if (utf && test_mode != PCRE32_MODE)
8062             {
8063             if (test_mode == PCRE8_MODE)
8064               {
8065               for (; end_offset < ulen; end_offset++)
8066                 if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
8067               }
8068             else  /* 16-bit mode */
8069               {
8070               for (; end_offset < ulen; end_offset++)
8071                 if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
8072               }
8073             }
8074           }
8075         }
8076       }
8077 
8078     /* For a normal global (/g) iteration, save the current ovector[0,1] and
8079     the starting offset so that we can check that they do change each time.
8080     Otherwise a matching bug that returns the same string causes an infinite
8081     loop. It has happened! Then update the start offset, leaving other
8082     parameters alone. */
8083 
8084     if ((dat_datctl.control & CTL_GLOBAL) != 0)
8085       {
8086       ovecsave[0] = ovector[0];
8087       ovecsave[1] = ovector[1];
8088       ovecsave[2] = dat_datctl.offset;
8089       dat_datctl.offset = end_offset;
8090       }
8091 
8092     /* For altglobal, just update the pointer and length. */
8093 
8094     else
8095       {
8096       pp += end_offset * code_unit_size;
8097       len -= end_offset * code_unit_size;
8098       ulen -= end_offset;
8099       if (arg_ulen != PCRE2_ZERO_TERMINATED) arg_ulen -= end_offset;
8100       }
8101     }
8102   }  /* End of global loop */
8103 
8104 show_memory = FALSE;
8105 return PR_OK;
8106 }
8107 
8108 
8109 
8110 
8111 /*************************************************
8112 *               Print PCRE2 version              *
8113 *************************************************/
8114 
8115 static void
print_version(FILE * f)8116 print_version(FILE *f)
8117 {
8118 VERSION_TYPE *vp;
8119 fprintf(f, "PCRE2 version ");
8120 for (vp = version; *vp != 0; vp++) fprintf(f, "%c", *vp);
8121 fprintf(f, "\n");
8122 }
8123 
8124 
8125 
8126 /*************************************************
8127 *               Print Unicode version            *
8128 *************************************************/
8129 
8130 static void
print_unicode_version(FILE * f)8131 print_unicode_version(FILE *f)
8132 {
8133 VERSION_TYPE *vp;
8134 fprintf(f, "Unicode version ");
8135 for (vp = uversion; *vp != 0; vp++) fprintf(f, "%c", *vp);
8136 }
8137 
8138 
8139 
8140 /*************************************************
8141 *               Print JIT target                 *
8142 *************************************************/
8143 
8144 static void
print_jit_target(FILE * f)8145 print_jit_target(FILE *f)
8146 {
8147 VERSION_TYPE *vp;
8148 for (vp = jittarget; *vp != 0; vp++) fprintf(f, "%c", *vp);
8149 }
8150 
8151 
8152 
8153 /*************************************************
8154 *       Print newline configuration              *
8155 *************************************************/
8156 
8157 /* Output is always to stdout.
8158 
8159 Arguments:
8160   rc         the return code from PCRE2_CONFIG_NEWLINE
8161   isc        TRUE if called from "-C newline"
8162 Returns:     nothing
8163 */
8164 
8165 static void
print_newline_config(uint32_t optval,BOOL isc)8166 print_newline_config(uint32_t optval, BOOL isc)
8167 {
8168 if (!isc) printf("  Default newline sequence is ");
8169 if (optval < sizeof(newlines)/sizeof(char *))
8170   printf("%s\n", newlines[optval]);
8171 else
8172   printf("a non-standard value: %d\n", optval);
8173 }
8174 
8175 
8176 
8177 /*************************************************
8178 *             Usage function                     *
8179 *************************************************/
8180 
8181 static void
usage(void)8182 usage(void)
8183 {
8184 printf("Usage:     pcre2test [options] [<input file> [<output file>]]\n\n");
8185 printf("Input and output default to stdin and stdout.\n");
8186 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
8187 printf("If input is a terminal, readline() is used to read from it.\n");
8188 #else
8189 printf("This version of pcre2test is not linked with readline().\n");
8190 #endif
8191 printf("\nOptions:\n");
8192 #ifdef SUPPORT_PCRE2_8
8193 printf("  -8            use the 8-bit library\n");
8194 #endif
8195 #ifdef SUPPORT_PCRE2_16
8196 printf("  -16           use the 16-bit library\n");
8197 #endif
8198 #ifdef SUPPORT_PCRE2_32
8199 printf("  -32           use the 32-bit library\n");
8200 #endif
8201 printf("  -ac           set default pattern modifier PCRE2_AUTO_CALLOUT\n");
8202 printf("  -AC           as -ac, but also set subject 'callout_extra' modifier\n");
8203 printf("  -b            set default pattern modifier 'fullbincode'\n");
8204 printf("  -C            show PCRE2 compile-time options and exit\n");
8205 printf("  -C arg        show a specific compile-time option and exit with its\n");
8206 printf("                  value if numeric (else 0). The arg can be:\n");
8207 printf("     backslash-C    use of \\C is enabled [0, 1]\n");
8208 printf("     bsr            \\R type [ANYCRLF, ANY]\n");
8209 printf("     ebcdic         compiled for EBCDIC character code [0,1]\n");
8210 printf("     ebcdic-nl      NL code if compiled for EBCDIC\n");
8211 printf("     jit            just-in-time compiler supported [0, 1]\n");
8212 printf("     linksize       internal link size [2, 3, 4]\n");
8213 printf("     newline        newline type [CR, LF, CRLF, ANYCRLF, ANY, NUL]\n");
8214 printf("     pcre2-8        8 bit library support enabled [0, 1]\n");
8215 printf("     pcre2-16       16 bit library support enabled [0, 1]\n");
8216 printf("     pcre2-32       32 bit library support enabled [0, 1]\n");
8217 printf("     unicode        Unicode and UTF support enabled [0, 1]\n");
8218 printf("  -d            set default pattern modifier 'debug'\n");
8219 printf("  -dfa          set default subject modifier 'dfa'\n");
8220 printf("  -error <n,m,..>  show messages for error numbers, then exit\n");
8221 printf("  -help         show usage information\n");
8222 printf("  -i            set default pattern modifier 'info'\n");
8223 printf("  -jit          set default pattern modifier 'jit'\n");
8224 printf("  -jitfast      set default pattern modifier 'jitfast'\n");
8225 printf("  -jitverify    set default pattern modifier 'jitverify'\n");
8226 printf("  -LM           list pattern and subject modifiers, then exit\n");
8227 printf("  -q            quiet: do not output PCRE2 version number at start\n");
8228 printf("  -pattern <s>  set default pattern modifier fields\n");
8229 printf("  -subject <s>  set default subject modifier fields\n");
8230 printf("  -S <n>        set stack size to <n> mebibytes\n");
8231 printf("  -t [<n>]      time compilation and execution, repeating <n> times\n");
8232 printf("  -tm [<n>]     time execution (matching) only, repeating <n> times\n");
8233 printf("  -T            same as -t, but show total times at the end\n");
8234 printf("  -TM           same as -tm, but show total time at the end\n");
8235 printf("  -version      show PCRE2 version and exit\n");
8236 }
8237 
8238 
8239 
8240 /*************************************************
8241 *             Handle -C option                   *
8242 *************************************************/
8243 
8244 /* This option outputs configuration options and sets an appropriate return
8245 code when asked for a single option. The code is abstracted into a separate
8246 function because of its size. Use whichever pcre2_config() function is
8247 available.
8248 
8249 Argument:   an option name or NULL
8250 Returns:    the return code
8251 */
8252 
8253 static int
c_option(const char * arg)8254 c_option(const char *arg)
8255 {
8256 uint32_t optval;
8257 unsigned int i = COPTLISTCOUNT;
8258 int yield = 0;
8259 
8260 if (arg != NULL && arg[0] != CHAR_MINUS)
8261   {
8262   for (i = 0; i < COPTLISTCOUNT; i++)
8263     if (strcmp(arg, coptlist[i].name) == 0) break;
8264 
8265   if (i >= COPTLISTCOUNT)
8266     {
8267     fprintf(stderr, "** Unknown -C option '%s'\n", arg);
8268     return 0;
8269     }
8270 
8271   switch (coptlist[i].type)
8272     {
8273     case CONF_BSR:
8274     (void)PCRE2_CONFIG(coptlist[i].value, &optval);
8275     printf("%s\n", (optval == PCRE2_BSR_ANYCRLF)? "ANYCRLF" : "ANY");
8276     break;
8277 
8278     case CONF_FIX:
8279     yield = coptlist[i].value;
8280     printf("%d\n", yield);
8281     break;
8282 
8283     case CONF_FIZ:
8284     optval = coptlist[i].value;
8285     printf("%d\n", optval);
8286     break;
8287 
8288     case CONF_INT:
8289     (void)PCRE2_CONFIG(coptlist[i].value, &yield);
8290     printf("%d\n", yield);
8291     break;
8292 
8293     case CONF_NL:
8294     (void)PCRE2_CONFIG(coptlist[i].value, &optval);
8295     print_newline_config(optval, TRUE);
8296     break;
8297     }
8298 
8299 /* For VMS, return the value by setting a symbol, for certain values only. This
8300 is contributed code which the PCRE2 developers have no means of testing. */
8301 
8302 #ifdef __VMS
8303 
8304 /* This is the original code provided by the first VMS contributor. */
8305 #ifdef NEVER
8306   if (copytlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
8307     {
8308     char ucname[16];
8309     strcpy(ucname, coptlist[i].name);
8310     for (i = 0; ucname[i] != 0; i++) ucname[i] = toupper[ucname[i]];
8311     vms_setsymbol(ucname, 0, optval);
8312     }
8313 #endif
8314 
8315 /* This is the new code, provided by a second VMS contributor. */
8316 
8317   if (coptlist[i].type == CONF_FIX || coptlist[i].type == CONF_INT)
8318     {
8319     char nam_buf[22], val_buf[4];
8320     $DESCRIPTOR(nam, nam_buf);
8321     $DESCRIPTOR(val, val_buf);
8322 
8323     strcpy(nam_buf, coptlist[i].name);
8324     nam.dsc$w_length = strlen(nam_buf);
8325     sprintf(val_buf, "%d", yield);
8326     val.dsc$w_length = strlen(val_buf);
8327     lib$set_symbol(&nam, &val);
8328     }
8329 #endif  /* __VMS */
8330 
8331   return yield;
8332   }
8333 
8334 /* No argument for -C: output all configuration information. */
8335 
8336 print_version(stdout);
8337 printf("Compiled with\n");
8338 
8339 #ifdef EBCDIC
8340 printf("  EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
8341 #if defined NATIVE_ZOS
8342 printf("  EBCDIC code page %s or similar\n", pcrz_cpversion());
8343 #endif
8344 #endif
8345 
8346 (void)PCRE2_CONFIG(PCRE2_CONFIG_COMPILED_WIDTHS, &optval);
8347 if (optval & 1) printf("  8-bit support\n");
8348 if (optval & 2) printf("  16-bit support\n");
8349 if (optval & 4) printf("  32-bit support\n");
8350 
8351 #ifdef SUPPORT_VALGRIND
8352 printf("  Valgrind support\n");
8353 #endif
8354 
8355 (void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &optval);
8356 if (optval != 0)
8357   {
8358   printf("  UTF and UCP support (");
8359   print_unicode_version(stdout);
8360   printf(")\n");
8361   }
8362 else printf("  No Unicode support\n");
8363 
8364 (void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &optval);
8365 if (optval != 0)
8366   {
8367   printf("  Just-in-time compiler support: ");
8368   print_jit_target(stdout);
8369   printf("\n");
8370   }
8371 else
8372   {
8373   printf("  No just-in-time compiler support\n");
8374   }
8375 
8376 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEWLINE, &optval);
8377 print_newline_config(optval, FALSE);
8378 (void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval);
8379 printf("  \\R matches %s\n",
8380   (optval == PCRE2_BSR_ANYCRLF)? "CR, LF, or CRLF only" :
8381                                  "all Unicode newlines");
8382 (void)PCRE2_CONFIG(PCRE2_CONFIG_NEVER_BACKSLASH_C, &optval);
8383 printf("  \\C is %ssupported\n", optval? "not ":"");
8384 (void)PCRE2_CONFIG(PCRE2_CONFIG_LINKSIZE, &optval);
8385 printf("  Internal link size = %d\n", optval);
8386 (void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
8387 printf("  Parentheses nest limit = %d\n", optval);
8388 (void)PCRE2_CONFIG(PCRE2_CONFIG_HEAPLIMIT, &optval);
8389 printf("  Default heap limit = %d kibibytes\n", optval);
8390 (void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval);
8391 printf("  Default match limit = %d\n", optval);
8392 (void)PCRE2_CONFIG(PCRE2_CONFIG_DEPTHLIMIT, &optval);
8393 printf("  Default depth limit = %d\n", optval);
8394 
8395 #if defined SUPPORT_LIBREADLINE
8396 printf("  pcre2test has libreadline support\n");
8397 #elif defined SUPPORT_LIBEDIT
8398 printf("  pcre2test has libedit support\n");
8399 #else
8400 printf("  pcre2test has neither libreadline nor libedit support\n");
8401 #endif
8402 
8403 return 0;
8404 }
8405 
8406 
8407 
8408 /*************************************************
8409 *              Display one modifier              *
8410 *************************************************/
8411 
8412 static void
display_one_modifier(modstruct * m,BOOL for_pattern)8413 display_one_modifier(modstruct *m, BOOL for_pattern)
8414 {
8415 uint32_t c = (!for_pattern && (m->which == MOD_PND || m->which == MOD_PNDP))?
8416   '*' : ' ';
8417 printf("%c%s", c, m->name);
8418 }
8419 
8420 
8421 
8422 /*************************************************
8423 *       Display pattern or subject modifiers     *
8424 *************************************************/
8425 
8426 /* In order to print in two columns, first scan without printing to get a list
8427 of the modifiers that are required.
8428 
8429 Arguments:
8430   for_pattern   TRUE for pattern modifiers, FALSE for subject modifiers
8431   title         string to be used in title
8432 
8433 Returns:        nothing
8434 */
8435 
8436 static void
display_selected_modifiers(BOOL for_pattern,const char * title)8437 display_selected_modifiers(BOOL for_pattern, const char *title)
8438 {
8439 uint32_t i, j;
8440 uint32_t n = 0;
8441 uint32_t list[MODLISTCOUNT];
8442 
8443 for (i = 0; i < MODLISTCOUNT; i++)
8444   {
8445   BOOL is_pattern = TRUE;
8446   modstruct *m = modlist + i;
8447 
8448   switch (m->which)
8449     {
8450     case MOD_CTC:       /* Compile context */
8451     case MOD_PAT:       /* Pattern */
8452     case MOD_PATP:      /* Pattern, OK for Perl-compatible test */
8453     break;
8454 
8455     /* The MOD_PND and MOD_PNDP modifiers are precisely those that affect
8456     subjects, but can be given with a pattern. We list them as subject
8457     modifiers, but marked with an asterisk.*/
8458 
8459     case MOD_CTM:       /* Match context */
8460     case MOD_DAT:       /* Subject line */
8461     case MOD_PND:       /* As PD, but not default pattern */
8462     case MOD_PNDP:      /* As PND, OK for Perl-compatible test */
8463     is_pattern = FALSE;
8464     break;
8465 
8466     default: printf("** Unknown type for modifier '%s'\n", m->name);
8467     /* Fall through */
8468     case MOD_PD:        /* Pattern or subject */
8469     case MOD_PDP:       /* As PD, OK for Perl-compatible test */
8470     is_pattern = for_pattern;
8471     break;
8472     }
8473 
8474   if (for_pattern == is_pattern) list[n++] = i;
8475   }
8476 
8477 /* Now print from the list in two columns. */
8478 
8479 printf("-------------- %s MODIFIERS --------------\n", title);
8480 
8481 for (i = 0, j = (n+1)/2; i < (n+1)/2; i++, j++)
8482   {
8483   modstruct *m = modlist + list[i];
8484   display_one_modifier(m, for_pattern);
8485   if (j < n)
8486     {
8487     uint32_t k = 27 - strlen(m->name);
8488     while (k-- > 0) printf(" ");
8489     display_one_modifier(modlist + list[j], for_pattern);
8490     }
8491   printf("\n");
8492   }
8493 }
8494 
8495 
8496 
8497 /*************************************************
8498 *          Display the list of modifiers         *
8499 *************************************************/
8500 
8501 static void
display_modifiers(void)8502 display_modifiers(void)
8503 {
8504 printf(
8505   "An asterisk on a subject modifier means that it may be given on a pattern\n"
8506   "line, in order to apply to all subjects matched by that pattern. Modifiers\n"
8507   "that are listed for both patterns and subjects have different effects in\n"
8508   "each case.\n\n");
8509 display_selected_modifiers(TRUE, "PATTERN");
8510 printf("\n");
8511 display_selected_modifiers(FALSE, "SUBJECT");
8512 }
8513 
8514 
8515 
8516 /*************************************************
8517 *                Main Program                    *
8518 *************************************************/
8519 
8520 int
main(int argc,char ** argv)8521 main(int argc, char **argv)
8522 {
8523 uint32_t temp;
8524 uint32_t yield = 0;
8525 uint32_t op = 1;
8526 BOOL notdone = TRUE;
8527 BOOL quiet = FALSE;
8528 BOOL showtotaltimes = FALSE;
8529 BOOL skipping = FALSE;
8530 char *arg_subject = NULL;
8531 char *arg_pattern = NULL;
8532 char *arg_error = NULL;
8533 
8534 /* The offsets to the options and control bits fields of the pattern and data
8535 control blocks must be the same so that common options and controls such as
8536 "anchored" or "memory" can work for either of them from a single table entry.
8537 We cannot test this till runtime because "offsetof" does not work in the
8538 preprocessor. */
8539 
8540 if (PO(options) != DO(options) || PO(control) != DO(control) ||
8541     PO(control2) != DO(control2))
8542   {
8543   fprintf(stderr, "** Coding error: "
8544     "options and control offsets for pattern and data must be the same.\n");
8545   return 1;
8546   }
8547 
8548 /* Get the PCRE2 and Unicode version number and JIT target information, at the
8549 same time checking that a request for the length gives the same answer. Also
8550 check lengths for non-string items. */
8551 
8552 if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) !=
8553     PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version) ||
8554 
8555     PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, NULL) !=
8556     PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion) ||
8557 
8558     PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, NULL) !=
8559     PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, jittarget) ||
8560 
8561     PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(uint32_t) ||
8562     PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(uint32_t))
8563   {
8564   fprintf(stderr, "** Error in pcre2_config(): bad length\n");
8565   return 1;
8566   }
8567 
8568 /* Check that bad options are diagnosed. */
8569 
8570 if (PCRE2_CONFIG(999, NULL) != PCRE2_ERROR_BADOPTION ||
8571     PCRE2_CONFIG(999, &temp) != PCRE2_ERROR_BADOPTION)
8572   {
8573   fprintf(stderr, "** Error in pcre2_config(): bad option not diagnosed\n");
8574   return 1;
8575   }
8576 
8577 /* This configuration option is now obsolete, but running a quick check ensures
8578 that its code is covered. */
8579 
8580 (void)PCRE2_CONFIG(PCRE2_CONFIG_STACKRECURSE, &temp);
8581 
8582 /* Get buffers from malloc() so that valgrind will check their misuse when
8583 debugging. They grow automatically when very long lines are read. The 16-
8584 and 32-bit buffers (pbuffer16, pbuffer32) are obtained only if needed. */
8585 
8586 buffer = (uint8_t *)malloc(pbuffer8_size);
8587 pbuffer8 = (uint8_t *)malloc(pbuffer8_size);
8588 
8589 /* The following  _setmode() stuff is some Windows magic that tells its runtime
8590 library to translate CRLF into a single LF character. At least, that's what
8591 I've been told: never having used Windows I take this all on trust. Originally
8592 it set 0x8000, but then I was advised that _O_BINARY was better. */
8593 
8594 #if defined(_WIN32) || defined(WIN32)
8595 _setmode( _fileno( stdout ), _O_BINARY );
8596 #endif
8597 
8598 /* Initialization that does not depend on the running mode. */
8599 
8600 locale_name[0] = 0;
8601 
8602 memset(&def_patctl, 0, sizeof(patctl));
8603 def_patctl.convert_type = CONVERT_UNSET;
8604 
8605 memset(&def_datctl, 0, sizeof(datctl));
8606 def_datctl.oveccount = DEFAULT_OVECCOUNT;
8607 def_datctl.copy_numbers[0] = -1;
8608 def_datctl.get_numbers[0] = -1;
8609 def_datctl.startend[0] = def_datctl.startend[1] = CFORE_UNSET;
8610 def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET;
8611 def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET;
8612 
8613 /* Scan command line options. */
8614 
8615 while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
8616   {
8617   char *endptr;
8618   char *arg = argv[op];
8619   unsigned long uli;
8620 
8621   /* List modifiers and exit. */
8622 
8623   if (strcmp(arg, "-LM") == 0)
8624     {
8625     display_modifiers();
8626     goto EXIT;
8627     }
8628 
8629   /* Display and/or set return code for configuration options. */
8630 
8631   if (strcmp(arg, "-C") == 0)
8632     {
8633     yield = c_option(argv[op + 1]);
8634     goto EXIT;
8635     }
8636 
8637   /* Select operating mode. Ensure that pcre2_config() is called in 16-bit
8638   and 32-bit modes because that won't happen naturally when 8-bit is also
8639   configured. Also call some other functions that are not otherwise used. This
8640   means that a coverage report won't claim there are uncalled functions. */
8641 
8642   if (strcmp(arg, "-8") == 0)
8643     {
8644 #ifdef SUPPORT_PCRE2_8
8645     test_mode = PCRE8_MODE;
8646     (void)pcre2_set_bsr_8(pat_context8, 999);
8647     (void)pcre2_set_newline_8(pat_context8, 999);
8648 #else
8649     fprintf(stderr,
8650       "** This version of PCRE2 was built without 8-bit support\n");
8651     exit(1);
8652 #endif
8653     }
8654 
8655   else if (strcmp(arg, "-16") == 0)
8656     {
8657 #ifdef SUPPORT_PCRE2_16
8658     test_mode = PCRE16_MODE;
8659     (void)pcre2_config_16(PCRE2_CONFIG_VERSION, NULL);
8660     (void)pcre2_set_bsr_16(pat_context16, 999);
8661     (void)pcre2_set_newline_16(pat_context16, 999);
8662 #else
8663     fprintf(stderr,
8664       "** This version of PCRE2 was built without 16-bit support\n");
8665     exit(1);
8666 #endif
8667     }
8668 
8669   else if (strcmp(arg, "-32") == 0)
8670     {
8671 #ifdef SUPPORT_PCRE2_32
8672     test_mode = PCRE32_MODE;
8673     (void)pcre2_config_32(PCRE2_CONFIG_VERSION, NULL);
8674     (void)pcre2_set_bsr_32(pat_context32, 999);
8675     (void)pcre2_set_newline_32(pat_context32, 999);
8676 #else
8677     fprintf(stderr,
8678       "** This version of PCRE2 was built without 32-bit support\n");
8679     exit(1);
8680 #endif
8681     }
8682 
8683   /* Set quiet (no version verification) */
8684 
8685   else if (strcmp(arg, "-q") == 0) quiet = TRUE;
8686 
8687   /* Set system stack size */
8688 
8689   else if (strcmp(arg, "-S") == 0 && argc > 2 &&
8690       ((uli = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))
8691     {
8692 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
8693     fprintf(stderr, "pcre2test: -S is not supported on this OS\n");
8694     exit(1);
8695 #else
8696     int rc;
8697     uint32_t stack_size;
8698     struct rlimit rlim;
8699     if (U32OVERFLOW(uli))
8700       {
8701       fprintf(stderr, "** Argument for -S is too big\n");
8702       exit(1);
8703       }
8704     stack_size = (uint32_t)uli;
8705     getrlimit(RLIMIT_STACK, &rlim);
8706     rlim.rlim_cur = stack_size * 1024 * 1024;
8707     if (rlim.rlim_cur > rlim.rlim_max)
8708       {
8709       fprintf(stderr,
8710         "pcre2test: requested stack size %luMiB is greater than hard limit "
8711           "%luMiB\n", (unsigned long int)stack_size,
8712           (unsigned long int)(rlim.rlim_max));
8713       exit(1);
8714       }
8715     rc = setrlimit(RLIMIT_STACK, &rlim);
8716     if (rc != 0)
8717       {
8718       fprintf(stderr, "pcre2test: setting stack size %luMiB failed: %s\n",
8719         (unsigned long int)stack_size, strerror(errno));
8720       exit(1);
8721       }
8722     op++;
8723     argc--;
8724 #endif
8725     }
8726 
8727   /* Set some common pattern and subject controls */
8728 
8729   else if (strcmp(arg, "-AC") == 0)
8730     {
8731     def_patctl.options |= PCRE2_AUTO_CALLOUT;
8732     def_datctl.control2 |= CTL2_CALLOUT_EXTRA;
8733     }
8734   else if (strcmp(arg, "-ac") == 0)  def_patctl.options |= PCRE2_AUTO_CALLOUT;
8735   else if (strcmp(arg, "-b") == 0)   def_patctl.control |= CTL_FULLBINCODE;
8736   else if (strcmp(arg, "-d") == 0)   def_patctl.control |= CTL_DEBUG;
8737   else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA;
8738   else if (strcmp(arg, "-i") == 0)   def_patctl.control |= CTL_INFO;
8739   else if (strcmp(arg, "-jit") == 0 || strcmp(arg, "-jitverify") == 0 ||
8740            strcmp(arg, "-jitfast") == 0)
8741     {
8742     if (arg[4] == 'v') def_patctl.control |= CTL_JITVERIFY;
8743       else if (arg[4] == 'f') def_patctl.control |= CTL_JITFAST;
8744     def_patctl.jit = JIT_DEFAULT;  /* full & partial */
8745 #ifndef SUPPORT_JIT
8746     fprintf(stderr, "** Warning: JIT support is not available: "
8747                     "-jit[fast|verify] calls functions that do nothing.\n");
8748 #endif
8749     }
8750 
8751   /* Set timing parameters */
8752 
8753   else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
8754            strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
8755     {
8756     int both = arg[2] == 0;
8757     showtotaltimes = arg[1] == 'T';
8758     if (argc > 2 && (uli = strtoul(argv[op+1], &endptr, 10), *endptr == 0))
8759       {
8760       if (uli == 0)
8761         {
8762         fprintf(stderr, "** Argument for %s must not be zero\n", arg);
8763         exit(1);
8764         }
8765       if (U32OVERFLOW(uli))
8766         {
8767         fprintf(stderr, "** Argument for %s is too big\n", arg);
8768         exit(1);
8769         }
8770       timeitm = (int)uli;
8771       op++;
8772       argc--;
8773       }
8774     else timeitm = LOOPREPEAT;
8775     if (both) timeit = timeitm;
8776     }
8777 
8778   /* Give help */
8779 
8780   else if (strcmp(arg, "-help") == 0 ||
8781            strcmp(arg, "--help") == 0)
8782     {
8783     usage();
8784     goto EXIT;
8785     }
8786 
8787   /* Show version */
8788 
8789   else if (strcmp(arg, "-version") == 0 ||
8790            strcmp(arg, "--version") == 0)
8791     {
8792     print_version(stdout);
8793     goto EXIT;
8794     }
8795 
8796   /* The following options save their data for processing once we know what
8797   the running mode is. */
8798 
8799   else if (strcmp(arg, "-error") == 0)
8800     {
8801     arg_error = argv[op+1];
8802     goto CHECK_VALUE_EXISTS;
8803     }
8804 
8805   else if (strcmp(arg, "-subject") == 0)
8806     {
8807     arg_subject = argv[op+1];
8808     goto CHECK_VALUE_EXISTS;
8809     }
8810 
8811   else if (strcmp(arg, "-pattern") == 0)
8812     {
8813     arg_pattern = argv[op+1];
8814     CHECK_VALUE_EXISTS:
8815     if (argc <= 2)
8816       {
8817       fprintf(stderr, "** Missing value for %s\n", arg);
8818       yield = 1;
8819       goto EXIT;
8820       }
8821     op++;
8822     argc--;
8823     }
8824 
8825   /* Unrecognized option */
8826 
8827   else
8828     {
8829     fprintf(stderr, "** Unknown or malformed option '%s'\n", arg);
8830     usage();
8831     yield = 1;
8832     goto EXIT;
8833     }
8834   op++;
8835   argc--;
8836   }
8837 
8838 /* If -error was present, get the error numbers, show the messages, and exit.
8839 We wait to do this until we know which mode we are in. */
8840 
8841 if (arg_error != NULL)
8842   {
8843   int len;
8844   int errcode;
8845   char *endptr;
8846 
8847 /* Ensure the relevant non-8-bit buffer is available. Ensure that it is at
8848 least 128 code units, because it is used for retrieving error messages. */
8849 
8850 #ifdef SUPPORT_PCRE2_16
8851   if (test_mode == PCRE16_MODE)
8852     {
8853     pbuffer16_size = 256;
8854     pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
8855     if (pbuffer16 == NULL)
8856       {
8857       fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n",
8858         pbuffer16_size);
8859       yield = 1;
8860       goto EXIT;
8861       }
8862     }
8863 #endif
8864 
8865 #ifdef SUPPORT_PCRE2_32
8866   if (test_mode == PCRE32_MODE)
8867     {
8868     pbuffer32_size = 512;
8869     pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
8870     if (pbuffer32 == NULL)
8871       {
8872       fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n",
8873         pbuffer32_size);
8874       yield = 1;
8875       goto EXIT;
8876       }
8877     }
8878 #endif
8879 
8880   /* Loop along a list of error numbers. */
8881 
8882   for (;;)
8883     {
8884     errcode = strtol(arg_error, &endptr, 10);
8885     if (*endptr != 0 && *endptr != CHAR_COMMA)
8886       {
8887       fprintf(stderr, "** '%s' is not a valid error number list\n", arg_error);
8888       yield = 1;
8889       goto EXIT;
8890       }
8891     printf("Error %d: ", errcode);
8892     PCRE2_GET_ERROR_MESSAGE(len, errcode, pbuffer);
8893     if (len < 0)
8894       {
8895       switch (len)
8896         {
8897         case PCRE2_ERROR_BADDATA:
8898         printf("PCRE2_ERROR_BADDATA (unknown error number)");
8899         break;
8900 
8901         case PCRE2_ERROR_NOMEMORY:
8902         printf("PCRE2_ERROR_NOMEMORY (buffer too small)");
8903         break;
8904 
8905         default:
8906         printf("Unexpected return (%d) from pcre2_get_error_message()", len);
8907         break;
8908         }
8909       }
8910     else
8911       {
8912       PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, stdout);
8913       }
8914     printf("\n");
8915     if (*endptr == 0) goto EXIT;
8916     arg_error = endptr + 1;
8917     }
8918   /* Control never reaches here */
8919   }  /* End of -error handling */
8920 
8921 /* Initialize things that cannot be done until we know which test mode we are
8922 running in. Exercise the general context copying and match data size functions,
8923 which are not otherwise used. */
8924 
8925 code_unit_size = test_mode/8;
8926 max_oveccount = DEFAULT_OVECCOUNT;
8927 
8928 /* Use macros to save a lot of duplication. */
8929 
8930 #define CREATECONTEXTS \
8931   G(general_context,BITS) = G(pcre2_general_context_create_,BITS)(&my_malloc, &my_free, NULL); \
8932   G(general_context_copy,BITS) = G(pcre2_general_context_copy_,BITS)(G(general_context,BITS)); \
8933   G(default_pat_context,BITS) = G(pcre2_compile_context_create_,BITS)(G(general_context,BITS)); \
8934   G(pat_context,BITS) = G(pcre2_compile_context_copy_,BITS)(G(default_pat_context,BITS)); \
8935   G(default_dat_context,BITS) = G(pcre2_match_context_create_,BITS)(G(general_context,BITS)); \
8936   G(dat_context,BITS) = G(pcre2_match_context_copy_,BITS)(G(default_dat_context,BITS)); \
8937   G(default_con_context,BITS) = G(pcre2_convert_context_create_,BITS)(G(general_context,BITS)); \
8938   G(con_context,BITS) = G(pcre2_convert_context_copy_,BITS)(G(default_con_context,BITS)); \
8939   G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS))
8940 
8941 #define CONTEXTTESTS \
8942   (void)G(pcre2_set_compile_extra_options_,BITS)(G(pat_context,BITS), 0); \
8943   (void)G(pcre2_set_max_pattern_length_,BITS)(G(pat_context,BITS), 0); \
8944   (void)G(pcre2_set_offset_limit_,BITS)(G(dat_context,BITS), 0); \
8945   (void)G(pcre2_set_recursion_memory_management_,BITS)(G(dat_context,BITS), my_malloc, my_free, NULL); \
8946   (void)G(pcre2_get_match_data_size_,BITS)(G(match_data,BITS))
8947 
8948 
8949 /* Call the appropriate functions for the current mode, and exercise some
8950 functions that are not otherwise called. */
8951 
8952 #ifdef SUPPORT_PCRE2_8
8953 #undef BITS
8954 #define BITS 8
8955 if (test_mode == PCRE8_MODE)
8956   {
8957   CREATECONTEXTS;
8958   CONTEXTTESTS;
8959   }
8960 #endif
8961 
8962 #ifdef SUPPORT_PCRE2_16
8963 #undef BITS
8964 #define BITS 16
8965 if (test_mode == PCRE16_MODE)
8966   {
8967   CREATECONTEXTS;
8968   CONTEXTTESTS;
8969   }
8970 #endif
8971 
8972 #ifdef SUPPORT_PCRE2_32
8973 #undef BITS
8974 #define BITS 32
8975 if (test_mode == PCRE32_MODE)
8976   {
8977   CREATECONTEXTS;
8978   CONTEXTTESTS;
8979   }
8980 #endif
8981 
8982 /* Set a default parentheses nest limit that is large enough to run the
8983 standard tests (this also exercises the function). */
8984 
8985 PCRE2_SET_PARENS_NEST_LIMIT(default_pat_context, PARENS_NEST_DEFAULT);
8986 
8987 /* Handle command line modifier settings, sending any error messages to
8988 stderr. We need to know the mode before modifying the context, and it is tidier
8989 to do them all in the same way. */
8990 
8991 outfile = stderr;
8992 if ((arg_pattern != NULL &&
8993     !decode_modifiers((uint8_t *)arg_pattern, CTX_DEFPAT, &def_patctl, NULL)) ||
8994     (arg_subject != NULL &&
8995     !decode_modifiers((uint8_t *)arg_subject, CTX_DEFDAT, NULL, &def_datctl)))
8996   {
8997   yield = 1;
8998   goto EXIT;
8999   }
9000 
9001 /* Sort out the input and output files, defaulting to stdin/stdout. */
9002 
9003 infile = stdin;
9004 outfile = stdout;
9005 
9006 if (argc > 1 && strcmp(argv[op], "-") != 0)
9007   {
9008   infile = fopen(argv[op], INPUT_MODE);
9009   if (infile == NULL)
9010     {
9011     printf("** Failed to open '%s': %s\n", argv[op], strerror(errno));
9012     yield = 1;
9013     goto EXIT;
9014     }
9015   }
9016 
9017 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
9018 if (INTERACTIVE(infile)) using_history();
9019 #endif
9020 
9021 if (argc > 2)
9022   {
9023   outfile = fopen(argv[op+1], OUTPUT_MODE);
9024   if (outfile == NULL)
9025     {
9026     printf("** Failed to open '%s': %s\n", argv[op+1], strerror(errno));
9027     yield = 1;
9028     goto EXIT;
9029     }
9030   }
9031 
9032 /* Output a heading line unless quiet, then process input lines. */
9033 
9034 if (!quiet) print_version(outfile);
9035 
9036 SET(compiled_code, NULL);
9037 
9038 #ifdef SUPPORT_PCRE2_8
9039 preg.re_pcre2_code = NULL;
9040 preg.re_match_data = NULL;
9041 #endif
9042 
9043 while (notdone)
9044   {
9045   uint8_t *p;
9046   int rc = PR_OK;
9047   BOOL expectdata = TEST(compiled_code, !=, NULL);
9048 #ifdef SUPPORT_PCRE2_8
9049   expectdata |= preg.re_pcre2_code != NULL;
9050 #endif
9051 
9052   if (extend_inputline(infile, buffer, expectdata? "data> " : "  re> ") == NULL)
9053     break;
9054   if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)buffer);
9055   fflush(outfile);
9056   p = buffer;
9057 
9058   /* If we have a pattern set up for testing, or we are skipping after a
9059   compile failure, a blank line terminates this test. */
9060 
9061   if (expectdata || skipping)
9062     {
9063     while (isspace(*p)) p++;
9064     if (*p == 0)
9065       {
9066 #ifdef SUPPORT_PCRE2_8
9067       if (preg.re_pcre2_code != NULL)
9068         {
9069         regfree(&preg);
9070         preg.re_pcre2_code = NULL;
9071         preg.re_match_data = NULL;
9072         }
9073 #endif  /* SUPPORT_PCRE2_8 */
9074       if (TEST(compiled_code, !=, NULL))
9075         {
9076         SUB1(pcre2_code_free, compiled_code);
9077         SET(compiled_code, NULL);
9078         }
9079       skipping = FALSE;
9080       setlocale(LC_CTYPE, "C");
9081       }
9082 
9083     /* Otherwise, if we are not skipping, and the line is not a data comment
9084     line starting with "\=", process a data line. */
9085 
9086     else if (!skipping && !(p[0] == '\\' && p[1] == '=' && isspace(p[2])))
9087       {
9088       rc = process_data();
9089       }
9090     }
9091 
9092   /* We do not have a pattern set up for testing. Lines starting with # are
9093   either comments or special commands. Blank lines are ignored. Otherwise, the
9094   line must start with a valid delimiter. It is then processed as a pattern
9095   line. A copy of the pattern is left in pbuffer8 for use by callouts. Under
9096   valgrind, make the unused part of the buffer undefined, to catch overruns. */
9097 
9098   else if (*p == '#')
9099     {
9100     if (isspace(p[1]) || p[1] == '!' || p[1] == 0) continue;
9101     rc = process_command();
9102     }
9103 
9104   else if (strchr("/!\"'`%&-=_:;,@~", *p) != NULL)
9105     {
9106     rc = process_pattern();
9107     dfa_matched = 0;
9108     }
9109 
9110   else
9111     {
9112     while (isspace(*p)) p++;
9113     if (*p != 0)
9114       {
9115       fprintf(outfile, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer,
9116         *buffer);
9117       rc = PR_SKIP;
9118       }
9119     }
9120 
9121   if (rc == PR_SKIP && !INTERACTIVE(infile)) skipping = TRUE;
9122   else if (rc == PR_ABEND)
9123     {
9124     fprintf(outfile, "** pcre2test run abandoned\n");
9125     yield = 1;
9126     goto EXIT;
9127     }
9128   }
9129 
9130 /* Finish off a normal run. */
9131 
9132 if (INTERACTIVE(infile)) fprintf(outfile, "\n");
9133 
9134 if (showtotaltimes)
9135   {
9136   const char *pad = "";
9137   fprintf(outfile, "--------------------------------------\n");
9138   if (timeit > 0)
9139     {
9140     fprintf(outfile, "Total compile time %.4f milliseconds\n",
9141       (((double)total_compile_time * 1000.0) / (double)timeit) /
9142         (double)CLOCKS_PER_SEC);
9143     if (total_jit_compile_time > 0)
9144       fprintf(outfile, "Total JIT compile  %.4f milliseconds\n",
9145         (((double)total_jit_compile_time * 1000.0) / (double)timeit) /
9146           (double)CLOCKS_PER_SEC);
9147     pad = "  ";
9148     }
9149   fprintf(outfile, "Total match time %s%.4f milliseconds\n", pad,
9150     (((double)total_match_time * 1000.0) / (double)timeitm) /
9151       (double)CLOCKS_PER_SEC);
9152   }
9153 
9154 
9155 EXIT:
9156 
9157 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
9158 if (infile != NULL && INTERACTIVE(infile)) clear_history();
9159 #endif
9160 
9161 if (infile != NULL && infile != stdin) fclose(infile);
9162 if (outfile != NULL && outfile != stdout) fclose(outfile);
9163 
9164 free(buffer);
9165 free(dbuffer);
9166 free(pbuffer8);
9167 free(dfa_workspace);
9168 free((void *)locale_tables);
9169 free(tables3);
9170 PCRE2_MATCH_DATA_FREE(match_data);
9171 SUB1(pcre2_code_free, compiled_code);
9172 
9173 while(patstacknext-- > 0)
9174   {
9175   SET(compiled_code, patstack[patstacknext]);
9176   SUB1(pcre2_code_free, compiled_code);
9177   }
9178 
9179 PCRE2_JIT_FREE_UNUSED_MEMORY(general_context);
9180 if (jit_stack != NULL)
9181   {
9182   PCRE2_JIT_STACK_FREE(jit_stack);
9183   }
9184 
9185 #define FREECONTEXTS \
9186   G(pcre2_general_context_free_,BITS)(G(general_context,BITS)); \
9187   G(pcre2_general_context_free_,BITS)(G(general_context_copy,BITS)); \
9188   G(pcre2_compile_context_free_,BITS)(G(pat_context,BITS)); \
9189   G(pcre2_compile_context_free_,BITS)(G(default_pat_context,BITS)); \
9190   G(pcre2_match_context_free_,BITS)(G(dat_context,BITS)); \
9191   G(pcre2_match_context_free_,BITS)(G(default_dat_context,BITS)); \
9192   G(pcre2_convert_context_free_,BITS)(G(default_con_context,BITS)); \
9193   G(pcre2_convert_context_free_,BITS)(G(con_context,BITS));
9194 
9195 #ifdef SUPPORT_PCRE2_8
9196 #undef BITS
9197 #define BITS 8
9198 if (preg.re_pcre2_code != NULL) regfree(&preg);
9199 FREECONTEXTS;
9200 #endif
9201 
9202 #ifdef SUPPORT_PCRE2_16
9203 #undef BITS
9204 #define BITS 16
9205 free(pbuffer16);
9206 FREECONTEXTS;
9207 #endif
9208 
9209 #ifdef SUPPORT_PCRE2_32
9210 #undef BITS
9211 #define BITS 32
9212 free(pbuffer32);
9213 FREECONTEXTS;
9214 #endif
9215 
9216 #if defined(__VMS)
9217   yield = SS$_NORMAL;  /* Return values via DCL symbols */
9218 #endif
9219 
9220 return yield;
9221 }
9222 
9223 /* End of pcre2test.c */
9224