1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places. The
8 addition of 16-bit support has made it even worse. :-(
9
10 -----------------------------------------------------------------------------
11 Redistribution and use in source and binary forms, with or without
12 modification, are permitted provided that the following conditions are met:
13
14 * Redistributions of source code must retain the above copyright notice,
15 this list of conditions and the following disclaimer.
16
17 * Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in the
19 documentation and/or other materials provided with the distribution.
20
21 * Neither the name of the University of Cambridge nor the names of its
22 contributors may be used to endorse or promote products derived from
23 this software without specific prior written permission.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36 -----------------------------------------------------------------------------
37 */
38
39 /* This program now supports the testing of all of the 8-bit, 16-bit, and
40 32-bit PCRE libraries in a single program. This is different from the modules
41 such as pcre_compile.c in the library itself, which are compiled separately for
42 each mode. If two modes are enabled, for example, pcre_compile.c is compiled
43 twice. By contrast, pcretest.c is compiled only once. Therefore, it must not
44 make use of any of the macros from pcre_internal.h that depend on
45 COMPILE_PCRE8, COMPILE_PCRE16, or COMPILE_PCRE32. It does, however, make use of
46 SUPPORT_PCRE8, SUPPORT_PCRE16, and SUPPORT_PCRE32 to ensure that it calls only
47 supported library functions. */
48
49 #ifdef HAVE_CONFIG_H
50 #include "config.h"
51 #endif
52
53 #include <ctype.h>
54 #include <stdio.h>
55 #include <string.h>
56 #include <stdlib.h>
57 #include <time.h>
58 #include <locale.h>
59 #include <errno.h>
60
61 /* Both libreadline and libedit are optionally supported. The user-supplied
62 original patch uses readline/readline.h for libedit, but in at least one system
63 it is installed as editline/readline.h, so the configuration code now looks for
64 that first, falling back to readline/readline.h. */
65
66 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67 #ifdef HAVE_UNISTD_H
68 #include <unistd.h>
69 #endif
70 #if defined(SUPPORT_LIBREADLINE)
71 #include <readline/readline.h>
72 #include <readline/history.h>
73 #else
74 #if defined(HAVE_EDITLINE_READLINE_H)
75 #include <editline/readline.h>
76 #else
77 #include <readline/readline.h>
78 #endif
79 #endif
80 #endif
81
82 /* A number of things vary for Windows builds. Originally, pcretest opened its
83 input and output without "b"; then I was told that "b" was needed in some
84 environments, so it was added for release 5.0 to both the input and output. (It
85 makes no difference on Unix-like systems.) Later I was told that it is wrong
86 for the input on Windows. I've now abstracted the modes into two macros that
87 are set here, to make it easier to fiddle with them, and removed "b" from the
88 input mode under Windows. */
89
90 #if defined(_WIN32) || defined(WIN32)
91 #include <io.h> /* For _setmode() */
92 #include <fcntl.h> /* For _O_BINARY */
93 #define INPUT_MODE "r"
94 #define OUTPUT_MODE "wb"
95
96 #ifndef isatty
97 #define isatty _isatty /* This is what Windows calls them, I'm told, */
98 #endif /* though in some environments they seem to */
99 /* be already defined, hence the #ifndefs. */
100 #ifndef fileno
101 #define fileno _fileno
102 #endif
103
104 /* A user sent this fix for Borland Builder 5 under Windows. */
105
106 #ifdef __BORLANDC__
107 #define _setmode(handle, mode) setmode(handle, mode)
108 #endif
109
110 /* Not Windows */
111
112 #else
113 #include <sys/time.h> /* These two includes are needed */
114 #include <sys/resource.h> /* for setrlimit(). */
115 #if defined NATIVE_ZOS /* z/OS uses non-binary I/O */
116 #define INPUT_MODE "r"
117 #define OUTPUT_MODE "w"
118 #else
119 #define INPUT_MODE "rb"
120 #define OUTPUT_MODE "wb"
121 #endif
122 #endif
123
124 #ifdef __VMS
125 #include <ssdef.h>
126 void vms_setsymbol( char *, char *, int );
127 #endif
128
129
130 #define PRIV(name) name
131
132 /* We have to include pcre_internal.h because we need the internal info for
133 displaying the results of pcre_study() and we also need to know about the
134 internal macros, structures, and other internal data values; pcretest has
135 "inside information" compared to a program that strictly follows the PCRE API.
136
137 Although pcre_internal.h does itself include pcre.h, we explicitly include it
138 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
139 appropriately for an application, not for building PCRE. */
140
141 #include "pcre.h"
142 #include "pcre_internal.h"
143
144 /* The pcre_printint() function, which prints the internal form of a compiled
145 regex, is held in a separate file so that (a) it can be compiled in either
146 8-, 16- or 32-bit mode, and (b) it can be #included directly in pcre_compile.c
147 when that is compiled in debug mode. */
148
149 #ifdef SUPPORT_PCRE8
150 void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
151 #endif
152 #ifdef SUPPORT_PCRE16
153 void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
154 #endif
155 #ifdef SUPPORT_PCRE32
156 void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
157 #endif
158
159 /* We need access to some of the data tables that PCRE uses. So as not to have
160 to keep two copies, we include the source files here, changing the names of the
161 external symbols to prevent clashes. */
162
163 #define PCRE_INCLUDED
164
165 #include "pcre_tables.c"
166 #include "pcre_ucd.c"
167
168 /* The definition of the macro PRINTABLE, which determines whether to print an
169 output character as-is or as a hex value when showing compiled patterns, is
170 the same as in the printint.src file. We uses it here in cases when the locale
171 has not been explicitly changed, so as to get consistent output from systems
172 that differ in their output from isprint() even in the "C" locale. */
173
174 #ifdef EBCDIC
175 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
176 #else
177 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
178 #endif
179
180 #define PRINTOK(c) (locale_set? (((c) < 256) && isprint(c)) : PRINTABLE(c))
181
182 /* Posix support is disabled in 16 or 32 bit only mode. */
183 #if !defined SUPPORT_PCRE8 && !defined NOPOSIX
184 #define NOPOSIX
185 #endif
186
187 /* It is possible to compile this test program without including support for
188 testing the POSIX interface, though this is not available via the standard
189 Makefile. */
190
191 #if !defined NOPOSIX
192 #include "pcreposix.h"
193 #endif
194
195 /* It is also possible, originally for the benefit of a version that was
196 imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
197 NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
198 automatically cut out the UTF support if PCRE is built without it. */
199
200 #ifndef SUPPORT_UTF
201 #ifndef NOUTF
202 #define NOUTF
203 #endif
204 #endif
205
206 /* To make the code a bit tidier for 8/16/32-bit support, we define macros
207 for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
208 only from one place and is handled differently). I couldn't dream up any way of
209 using a single macro to do this in a generic way, because of the many different
210 argument requirements. We know that at least one of SUPPORT_PCRE8 and
211 SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
212 use these in the definitions of generic macros.
213
214 **** Special note about the PCHARSxxx macros: the address of the string to be
215 printed is always given as two arguments: a base address followed by an offset.
216 The base address is cast to the correct data size for 8 or 16 bit data; the
217 offset is in units of this size. If the string were given as base+offset in one
218 argument, the casting might be incorrectly applied. */
219
220 #ifdef SUPPORT_PCRE8
221
222 #define PCHARS8(lv, p, offset, len, f) \
223 lv = pchars((pcre_uint8 *)(p) + offset, len, f)
224
225 #define PCHARSV8(p, offset, len, f) \
226 (void)pchars((pcre_uint8 *)(p) + offset, len, f)
227
228 #define READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re) \
229 p = read_capture_name8(p, cn8, re)
230
231 #define STRLEN8(p) ((int)strlen((char *)p))
232
233 #define SET_PCRE_CALLOUT8(callout) \
234 pcre_callout = callout
235
236 #define SET_PCRE_STACK_GUARD8(stack_guard) \
237 pcre_stack_guard = stack_guard
238
239 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
240 pcre_assign_jit_stack(extra, callback, userdata)
241
242 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
243 re = pcre_compile((char *)pat, options, error, erroffset, tables)
244
245 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
246 namesptr, cbuffer, size) \
247 rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
248 (char *)namesptr, cbuffer, size)
249
250 #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
251 rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
252
253 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
254 offsets, size_offsets, workspace, size_workspace) \
255 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
256 offsets, size_offsets, workspace, size_workspace)
257
258 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
259 offsets, size_offsets) \
260 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
261 offsets, size_offsets)
262
263 #define PCRE_FREE_STUDY8(extra) \
264 pcre_free_study(extra)
265
266 #define PCRE_FREE_SUBSTRING8(substring) \
267 pcre_free_substring(substring)
268
269 #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
270 pcre_free_substring_list(listptr)
271
272 #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
273 getnamesptr, subsptr) \
274 rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
275 (char *)getnamesptr, subsptr)
276
277 #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
278 n = pcre_get_stringnumber(re, (char *)ptr)
279
280 #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
281 rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
282
283 #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
284 rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
285
286 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
287 rc = pcre_pattern_to_host_byte_order(re, extra, tables)
288
289 #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
290 pcre_printint(re, outfile, debug_lengths)
291
292 #define PCRE_STUDY8(extra, re, options, error) \
293 extra = pcre_study(re, options, error)
294
295 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
296 pcre_jit_stack_alloc(startsize, maxsize)
297
298 #define PCRE_JIT_STACK_FREE8(stack) \
299 pcre_jit_stack_free(stack)
300
301 #define pcre8_maketables pcre_maketables
302
303 #endif /* SUPPORT_PCRE8 */
304
305 /* -----------------------------------------------------------*/
306
307 #ifdef SUPPORT_PCRE16
308
309 #define PCHARS16(lv, p, offset, len, f) \
310 lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
311
312 #define PCHARSV16(p, offset, len, f) \
313 (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
314
315 #define READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re) \
316 p = read_capture_name16(p, cn16, re)
317
318 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
319
320 #define SET_PCRE_CALLOUT16(callout) \
321 pcre16_callout = (int (*)(pcre16_callout_block *))callout
322
323 #define SET_PCRE_STACK_GUARD16(stack_guard) \
324 pcre16_stack_guard = (int (*)(void))stack_guard
325
326 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
327 pcre16_assign_jit_stack((pcre16_extra *)extra, \
328 (pcre16_jit_callback)callback, userdata)
329
330 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
331 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
332 tables)
333
334 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
335 namesptr, cbuffer, size) \
336 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
337 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
338
339 #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
340 rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
341 (PCRE_UCHAR16 *)cbuffer, size/2)
342
343 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
344 offsets, size_offsets, workspace, size_workspace) \
345 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
346 (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
347 workspace, size_workspace)
348
349 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
350 offsets, size_offsets) \
351 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
352 len, start_offset, options, offsets, size_offsets)
353
354 #define PCRE_FREE_STUDY16(extra) \
355 pcre16_free_study((pcre16_extra *)extra)
356
357 #define PCRE_FREE_SUBSTRING16(substring) \
358 pcre16_free_substring((PCRE_SPTR16)substring)
359
360 #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
361 pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
362
363 #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
364 getnamesptr, subsptr) \
365 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
366 count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
367
368 #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
369 n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
370
371 #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
372 rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
373 (PCRE_SPTR16 *)(void*)subsptr)
374
375 #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
376 rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
377 (PCRE_SPTR16 **)(void*)listptr)
378
379 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
380 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
381 tables)
382
383 #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
384 pcre16_printint(re, outfile, debug_lengths)
385
386 #define PCRE_STUDY16(extra, re, options, error) \
387 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
388
389 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
390 (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
391
392 #define PCRE_JIT_STACK_FREE16(stack) \
393 pcre16_jit_stack_free((pcre16_jit_stack *)stack)
394
395 #endif /* SUPPORT_PCRE16 */
396
397 /* -----------------------------------------------------------*/
398
399 #ifdef SUPPORT_PCRE32
400
401 #define PCHARS32(lv, p, offset, len, f) \
402 lv = pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
403
404 #define PCHARSV32(p, offset, len, f) \
405 (void)pchars32((PCRE_SPTR32)(p) + offset, len, use_utf, f)
406
407 #define READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re) \
408 p = read_capture_name32(p, cn32, re)
409
410 #define STRLEN32(p) ((int)strlen32((PCRE_SPTR32)p))
411
412 #define SET_PCRE_CALLOUT32(callout) \
413 pcre32_callout = (int (*)(pcre32_callout_block *))callout
414
415 #define SET_PCRE_STACK_GUARD32(stack_guard) \
416 pcre32_stack_guard = (int (*)(void))stack_guard
417
418 #define PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata) \
419 pcre32_assign_jit_stack((pcre32_extra *)extra, \
420 (pcre32_jit_callback)callback, userdata)
421
422 #define PCRE_COMPILE32(re, pat, options, error, erroffset, tables) \
423 re = (pcre *)pcre32_compile((PCRE_SPTR32)pat, options, error, erroffset, \
424 tables)
425
426 #define PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
427 namesptr, cbuffer, size) \
428 rc = pcre32_copy_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
429 count, (PCRE_SPTR32)namesptr, (PCRE_UCHAR32 *)cbuffer, size/4)
430
431 #define PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size) \
432 rc = pcre32_copy_substring((PCRE_SPTR32)bptr, offsets, count, i, \
433 (PCRE_UCHAR32 *)cbuffer, size/4)
434
435 #define PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
436 offsets, size_offsets, workspace, size_workspace) \
437 count = pcre32_dfa_exec((pcre32 *)re, (pcre32_extra *)extra, \
438 (PCRE_SPTR32)bptr, len, start_offset, options, offsets, size_offsets, \
439 workspace, size_workspace)
440
441 #define PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
442 offsets, size_offsets) \
443 count = pcre32_exec((pcre32 *)re, (pcre32_extra *)extra, (PCRE_SPTR32)bptr, \
444 len, start_offset, options, offsets, size_offsets)
445
446 #define PCRE_FREE_STUDY32(extra) \
447 pcre32_free_study((pcre32_extra *)extra)
448
449 #define PCRE_FREE_SUBSTRING32(substring) \
450 pcre32_free_substring((PCRE_SPTR32)substring)
451
452 #define PCRE_FREE_SUBSTRING_LIST32(listptr) \
453 pcre32_free_substring_list((PCRE_SPTR32 *)listptr)
454
455 #define PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
456 getnamesptr, subsptr) \
457 rc = pcre32_get_named_substring((pcre32 *)re, (PCRE_SPTR32)bptr, offsets, \
458 count, (PCRE_SPTR32)getnamesptr, (PCRE_SPTR32 *)(void*)subsptr)
459
460 #define PCRE_GET_STRINGNUMBER32(n, rc, ptr) \
461 n = pcre32_get_stringnumber(re, (PCRE_SPTR32)ptr)
462
463 #define PCRE_GET_SUBSTRING32(rc, bptr, offsets, count, i, subsptr) \
464 rc = pcre32_get_substring((PCRE_SPTR32)bptr, offsets, count, i, \
465 (PCRE_SPTR32 *)(void*)subsptr)
466
467 #define PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr) \
468 rc = pcre32_get_substring_list((PCRE_SPTR32)bptr, offsets, count, \
469 (PCRE_SPTR32 **)(void*)listptr)
470
471 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables) \
472 rc = pcre32_pattern_to_host_byte_order((pcre32 *)re, (pcre32_extra *)extra, \
473 tables)
474
475 #define PCRE_PRINTINT32(re, outfile, debug_lengths) \
476 pcre32_printint(re, outfile, debug_lengths)
477
478 #define PCRE_STUDY32(extra, re, options, error) \
479 extra = (pcre_extra *)pcre32_study((pcre32 *)re, options, error)
480
481 #define PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
482 (pcre_jit_stack *)pcre32_jit_stack_alloc(startsize, maxsize)
483
484 #define PCRE_JIT_STACK_FREE32(stack) \
485 pcre32_jit_stack_free((pcre32_jit_stack *)stack)
486
487 #endif /* SUPPORT_PCRE32 */
488
489
490 /* ----- More than one mode is supported; a runtime test is needed, except for
491 pcre_config(), and the JIT stack functions, when it doesn't matter which
492 available version is called. ----- */
493
494 enum {
495 PCRE8_MODE,
496 PCRE16_MODE,
497 PCRE32_MODE
498 };
499
500 #if (defined (SUPPORT_PCRE8) + defined (SUPPORT_PCRE16) + \
501 defined (SUPPORT_PCRE32)) >= 2
502
503 #define CHAR_SIZE (1U << pcre_mode)
504
505 /* There doesn't seem to be an easy way of writing these macros that can cope
506 with the 3 pairs of bit sizes plus all three bit sizes. So just handle all the
507 cases separately. */
508
509 /* ----- All three modes supported ----- */
510
511 #if defined(SUPPORT_PCRE8) && defined(SUPPORT_PCRE16) && defined(SUPPORT_PCRE32)
512
513 #define PCHARS(lv, p, offset, len, f) \
514 if (pcre_mode == PCRE32_MODE) \
515 PCHARS32(lv, p, offset, len, f); \
516 else if (pcre_mode == PCRE16_MODE) \
517 PCHARS16(lv, p, offset, len, f); \
518 else \
519 PCHARS8(lv, p, offset, len, f)
520
521 #define PCHARSV(p, offset, len, f) \
522 if (pcre_mode == PCRE32_MODE) \
523 PCHARSV32(p, offset, len, f); \
524 else if (pcre_mode == PCRE16_MODE) \
525 PCHARSV16(p, offset, len, f); \
526 else \
527 PCHARSV8(p, offset, len, f)
528
529 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
530 if (pcre_mode == PCRE32_MODE) \
531 READ_CAPTURE_NAME32(p, cn8, cn16, cn32, re); \
532 else if (pcre_mode == PCRE16_MODE) \
533 READ_CAPTURE_NAME16(p, cn8, cn16, cn32, re); \
534 else \
535 READ_CAPTURE_NAME8(p, cn8, cn16, cn32, re)
536
537 #define SET_PCRE_CALLOUT(callout) \
538 if (pcre_mode == PCRE32_MODE) \
539 SET_PCRE_CALLOUT32(callout); \
540 else if (pcre_mode == PCRE16_MODE) \
541 SET_PCRE_CALLOUT16(callout); \
542 else \
543 SET_PCRE_CALLOUT8(callout)
544
545 #define SET_PCRE_STACK_GUARD(stack_guard) \
546 if (pcre_mode == PCRE32_MODE) \
547 SET_PCRE_STACK_GUARD32(stack_guard); \
548 else if (pcre_mode == PCRE16_MODE) \
549 SET_PCRE_STACK_GUARD16(stack_guard); \
550 else \
551 SET_PCRE_STACK_GUARD8(stack_guard)
552
553 #define STRLEN(p) (pcre_mode == PCRE32_MODE ? STRLEN32(p) : pcre_mode == PCRE16_MODE ? STRLEN16(p) : STRLEN8(p))
554
555 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
556 if (pcre_mode == PCRE32_MODE) \
557 PCRE_ASSIGN_JIT_STACK32(extra, callback, userdata); \
558 else if (pcre_mode == PCRE16_MODE) \
559 PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
560 else \
561 PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
562
563 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
564 if (pcre_mode == PCRE32_MODE) \
565 PCRE_COMPILE32(re, pat, options, error, erroffset, tables); \
566 else if (pcre_mode == PCRE16_MODE) \
567 PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
568 else \
569 PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
570
571 #define PCRE_CONFIG pcre_config
572
573 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
574 namesptr, cbuffer, size) \
575 if (pcre_mode == PCRE32_MODE) \
576 PCRE_COPY_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
577 namesptr, cbuffer, size); \
578 else if (pcre_mode == PCRE16_MODE) \
579 PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
580 namesptr, cbuffer, size); \
581 else \
582 PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
583 namesptr, cbuffer, size)
584
585 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
586 if (pcre_mode == PCRE32_MODE) \
587 PCRE_COPY_SUBSTRING32(rc, bptr, offsets, count, i, cbuffer, size); \
588 else if (pcre_mode == PCRE16_MODE) \
589 PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
590 else \
591 PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
592
593 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
594 offsets, size_offsets, workspace, size_workspace) \
595 if (pcre_mode == PCRE32_MODE) \
596 PCRE_DFA_EXEC32(count, re, extra, bptr, len, start_offset, options, \
597 offsets, size_offsets, workspace, size_workspace); \
598 else if (pcre_mode == PCRE16_MODE) \
599 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
600 offsets, size_offsets, workspace, size_workspace); \
601 else \
602 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
603 offsets, size_offsets, workspace, size_workspace)
604
605 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
606 offsets, size_offsets) \
607 if (pcre_mode == PCRE32_MODE) \
608 PCRE_EXEC32(count, re, extra, bptr, len, start_offset, options, \
609 offsets, size_offsets); \
610 else if (pcre_mode == PCRE16_MODE) \
611 PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
612 offsets, size_offsets); \
613 else \
614 PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
615 offsets, size_offsets)
616
617 #define PCRE_FREE_STUDY(extra) \
618 if (pcre_mode == PCRE32_MODE) \
619 PCRE_FREE_STUDY32(extra); \
620 else if (pcre_mode == PCRE16_MODE) \
621 PCRE_FREE_STUDY16(extra); \
622 else \
623 PCRE_FREE_STUDY8(extra)
624
625 #define PCRE_FREE_SUBSTRING(substring) \
626 if (pcre_mode == PCRE32_MODE) \
627 PCRE_FREE_SUBSTRING32(substring); \
628 else if (pcre_mode == PCRE16_MODE) \
629 PCRE_FREE_SUBSTRING16(substring); \
630 else \
631 PCRE_FREE_SUBSTRING8(substring)
632
633 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
634 if (pcre_mode == PCRE32_MODE) \
635 PCRE_FREE_SUBSTRING_LIST32(listptr); \
636 else if (pcre_mode == PCRE16_MODE) \
637 PCRE_FREE_SUBSTRING_LIST16(listptr); \
638 else \
639 PCRE_FREE_SUBSTRING_LIST8(listptr)
640
641 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
642 getnamesptr, subsptr) \
643 if (pcre_mode == PCRE32_MODE) \
644 PCRE_GET_NAMED_SUBSTRING32(rc, re, bptr, offsets, count, \
645 getnamesptr, subsptr); \
646 else if (pcre_mode == PCRE16_MODE) \
647 PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
648 getnamesptr, subsptr); \
649 else \
650 PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
651 getnamesptr, subsptr)
652
653 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
654 if (pcre_mode == PCRE32_MODE) \
655 PCRE_GET_STRINGNUMBER32(n, rc, ptr); \
656 else if (pcre_mode == PCRE16_MODE) \
657 PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
658 else \
659 PCRE_GET_STRINGNUMBER8(n, rc, ptr)
660
661 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
662 if (pcre_mode == PCRE32_MODE) \
663 PCRE_GET_SUBSTRING32(rc, bptr, use_offsets, count, i, subsptr); \
664 else if (pcre_mode == PCRE16_MODE) \
665 PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
666 else \
667 PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
668
669 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
670 if (pcre_mode == PCRE32_MODE) \
671 PCRE_GET_SUBSTRING_LIST32(rc, bptr, offsets, count, listptr); \
672 else if (pcre_mode == PCRE16_MODE) \
673 PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
674 else \
675 PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
676
677 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
678 (pcre_mode == PCRE32_MODE ? \
679 PCRE_JIT_STACK_ALLOC32(startsize, maxsize) \
680 : pcre_mode == PCRE16_MODE ? \
681 PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
682 : PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
683
684 #define PCRE_JIT_STACK_FREE(stack) \
685 if (pcre_mode == PCRE32_MODE) \
686 PCRE_JIT_STACK_FREE32(stack); \
687 else if (pcre_mode == PCRE16_MODE) \
688 PCRE_JIT_STACK_FREE16(stack); \
689 else \
690 PCRE_JIT_STACK_FREE8(stack)
691
692 #define PCRE_MAKETABLES \
693 (pcre_mode == PCRE32_MODE ? pcre32_maketables() : pcre_mode == PCRE16_MODE ? pcre16_maketables() : pcre_maketables())
694
695 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
696 if (pcre_mode == PCRE32_MODE) \
697 PCRE_PATTERN_TO_HOST_BYTE_ORDER32(rc, re, extra, tables); \
698 else if (pcre_mode == PCRE16_MODE) \
699 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
700 else \
701 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
702
703 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
704 if (pcre_mode == PCRE32_MODE) \
705 PCRE_PRINTINT32(re, outfile, debug_lengths); \
706 else if (pcre_mode == PCRE16_MODE) \
707 PCRE_PRINTINT16(re, outfile, debug_lengths); \
708 else \
709 PCRE_PRINTINT8(re, outfile, debug_lengths)
710
711 #define PCRE_STUDY(extra, re, options, error) \
712 if (pcre_mode == PCRE32_MODE) \
713 PCRE_STUDY32(extra, re, options, error); \
714 else if (pcre_mode == PCRE16_MODE) \
715 PCRE_STUDY16(extra, re, options, error); \
716 else \
717 PCRE_STUDY8(extra, re, options, error)
718
719
720 /* ----- Two out of three modes are supported ----- */
721
722 #else
723
724 /* We can use some macro trickery to make a single set of definitions work in
725 the three different cases. */
726
727 /* ----- 32-bit and 16-bit but not 8-bit supported ----- */
728
729 #if defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE16)
730 #define BITONE 32
731 #define BITTWO 16
732
733 /* ----- 32-bit and 8-bit but not 16-bit supported ----- */
734
735 #elif defined(SUPPORT_PCRE32) && defined(SUPPORT_PCRE8)
736 #define BITONE 32
737 #define BITTWO 8
738
739 /* ----- 16-bit and 8-bit but not 32-bit supported ----- */
740
741 #else
742 #define BITONE 16
743 #define BITTWO 8
744 #endif
745
746 #define glue(a,b) a##b
747 #define G(a,b) glue(a,b)
748
749
750 /* ----- Common macros for two-mode cases ----- */
751
752 #define PCHARS(lv, p, offset, len, f) \
753 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
754 G(PCHARS,BITONE)(lv, p, offset, len, f); \
755 else \
756 G(PCHARS,BITTWO)(lv, p, offset, len, f)
757
758 #define PCHARSV(p, offset, len, f) \
759 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
760 G(PCHARSV,BITONE)(p, offset, len, f); \
761 else \
762 G(PCHARSV,BITTWO)(p, offset, len, f)
763
764 #define READ_CAPTURE_NAME(p, cn8, cn16, cn32, re) \
765 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
766 G(READ_CAPTURE_NAME,BITONE)(p, cn8, cn16, cn32, re); \
767 else \
768 G(READ_CAPTURE_NAME,BITTWO)(p, cn8, cn16, cn32, re)
769
770 #define SET_PCRE_CALLOUT(callout) \
771 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
772 G(SET_PCRE_CALLOUT,BITONE)(callout); \
773 else \
774 G(SET_PCRE_CALLOUT,BITTWO)(callout)
775
776 #define SET_PCRE_STACK_GUARD(stack_guard) \
777 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
778 G(SET_PCRE_STACK_GUARD,BITONE)(stack_guard); \
779 else \
780 G(SET_PCRE_STACK_GUARD,BITTWO)(stack_guard)
781
782 #define STRLEN(p) ((pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
783 G(STRLEN,BITONE)(p) : G(STRLEN,BITTWO)(p))
784
785 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
786 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
787 G(PCRE_ASSIGN_JIT_STACK,BITONE)(extra, callback, userdata); \
788 else \
789 G(PCRE_ASSIGN_JIT_STACK,BITTWO)(extra, callback, userdata)
790
791 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
792 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
793 G(PCRE_COMPILE,BITONE)(re, pat, options, error, erroffset, tables); \
794 else \
795 G(PCRE_COMPILE,BITTWO)(re, pat, options, error, erroffset, tables)
796
797 #define PCRE_CONFIG G(G(pcre,BITONE),_config)
798
799 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
800 namesptr, cbuffer, size) \
801 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
802 G(PCRE_COPY_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
803 namesptr, cbuffer, size); \
804 else \
805 G(PCRE_COPY_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
806 namesptr, cbuffer, size)
807
808 #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
809 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
810 G(PCRE_COPY_SUBSTRING,BITONE)(rc, bptr, offsets, count, i, cbuffer, size); \
811 else \
812 G(PCRE_COPY_SUBSTRING,BITTWO)(rc, bptr, offsets, count, i, cbuffer, size)
813
814 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
815 offsets, size_offsets, workspace, size_workspace) \
816 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
817 G(PCRE_DFA_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
818 offsets, size_offsets, workspace, size_workspace); \
819 else \
820 G(PCRE_DFA_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
821 offsets, size_offsets, workspace, size_workspace)
822
823 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
824 offsets, size_offsets) \
825 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
826 G(PCRE_EXEC,BITONE)(count, re, extra, bptr, len, start_offset, options, \
827 offsets, size_offsets); \
828 else \
829 G(PCRE_EXEC,BITTWO)(count, re, extra, bptr, len, start_offset, options, \
830 offsets, size_offsets)
831
832 #define PCRE_FREE_STUDY(extra) \
833 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
834 G(PCRE_FREE_STUDY,BITONE)(extra); \
835 else \
836 G(PCRE_FREE_STUDY,BITTWO)(extra)
837
838 #define PCRE_FREE_SUBSTRING(substring) \
839 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
840 G(PCRE_FREE_SUBSTRING,BITONE)(substring); \
841 else \
842 G(PCRE_FREE_SUBSTRING,BITTWO)(substring)
843
844 #define PCRE_FREE_SUBSTRING_LIST(listptr) \
845 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
846 G(PCRE_FREE_SUBSTRING_LIST,BITONE)(listptr); \
847 else \
848 G(PCRE_FREE_SUBSTRING_LIST,BITTWO)(listptr)
849
850 #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
851 getnamesptr, subsptr) \
852 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
853 G(PCRE_GET_NAMED_SUBSTRING,BITONE)(rc, re, bptr, offsets, count, \
854 getnamesptr, subsptr); \
855 else \
856 G(PCRE_GET_NAMED_SUBSTRING,BITTWO)(rc, re, bptr, offsets, count, \
857 getnamesptr, subsptr)
858
859 #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
860 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
861 G(PCRE_GET_STRINGNUMBER,BITONE)(n, rc, ptr); \
862 else \
863 G(PCRE_GET_STRINGNUMBER,BITTWO)(n, rc, ptr)
864
865 #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
866 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
867 G(PCRE_GET_SUBSTRING,BITONE)(rc, bptr, use_offsets, count, i, subsptr); \
868 else \
869 G(PCRE_GET_SUBSTRING,BITTWO)(rc, bptr, use_offsets, count, i, subsptr)
870
871 #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
872 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
873 G(PCRE_GET_SUBSTRING_LIST,BITONE)(rc, bptr, offsets, count, listptr); \
874 else \
875 G(PCRE_GET_SUBSTRING_LIST,BITTWO)(rc, bptr, offsets, count, listptr)
876
877 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
878 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
879 G(PCRE_JIT_STACK_ALLOC,BITONE)(startsize, maxsize) \
880 : G(PCRE_JIT_STACK_ALLOC,BITTWO)(startsize, maxsize)
881
882 #define PCRE_JIT_STACK_FREE(stack) \
883 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
884 G(PCRE_JIT_STACK_FREE,BITONE)(stack); \
885 else \
886 G(PCRE_JIT_STACK_FREE,BITTWO)(stack)
887
888 #define PCRE_MAKETABLES \
889 (pcre_mode == G(G(PCRE,BITONE),_MODE)) ? \
890 G(G(pcre,BITONE),_maketables)() : G(G(pcre,BITTWO),_maketables)()
891
892 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
893 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
894 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITONE)(rc, re, extra, tables); \
895 else \
896 G(PCRE_PATTERN_TO_HOST_BYTE_ORDER,BITTWO)(rc, re, extra, tables)
897
898 #define PCRE_PRINTINT(re, outfile, debug_lengths) \
899 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
900 G(PCRE_PRINTINT,BITONE)(re, outfile, debug_lengths); \
901 else \
902 G(PCRE_PRINTINT,BITTWO)(re, outfile, debug_lengths)
903
904 #define PCRE_STUDY(extra, re, options, error) \
905 if (pcre_mode == G(G(PCRE,BITONE),_MODE)) \
906 G(PCRE_STUDY,BITONE)(extra, re, options, error); \
907 else \
908 G(PCRE_STUDY,BITTWO)(extra, re, options, error)
909
910 #endif /* Two out of three modes */
911
912 /* ----- End of cases where more than one mode is supported ----- */
913
914
915 /* ----- Only 8-bit mode is supported ----- */
916
917 #elif defined SUPPORT_PCRE8
918 #define CHAR_SIZE 1
919 #define PCHARS PCHARS8
920 #define PCHARSV PCHARSV8
921 #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
922 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
923 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD8
924 #define STRLEN STRLEN8
925 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
926 #define PCRE_COMPILE PCRE_COMPILE8
927 #define PCRE_CONFIG pcre_config
928 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
929 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
930 #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
931 #define PCRE_EXEC PCRE_EXEC8
932 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
933 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
934 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
935 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
936 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
937 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
938 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
939 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
940 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
941 #define PCRE_MAKETABLES pcre_maketables()
942 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
943 #define PCRE_PRINTINT PCRE_PRINTINT8
944 #define PCRE_STUDY PCRE_STUDY8
945
946 /* ----- Only 16-bit mode is supported ----- */
947
948 #elif defined SUPPORT_PCRE16
949 #define CHAR_SIZE 2
950 #define PCHARS PCHARS16
951 #define PCHARSV PCHARSV16
952 #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
953 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
954 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD16
955 #define STRLEN STRLEN16
956 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
957 #define PCRE_COMPILE PCRE_COMPILE16
958 #define PCRE_CONFIG pcre16_config
959 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
960 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
961 #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
962 #define PCRE_EXEC PCRE_EXEC16
963 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
964 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
965 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
966 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
967 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
968 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
969 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
970 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
971 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
972 #define PCRE_MAKETABLES pcre16_maketables()
973 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
974 #define PCRE_PRINTINT PCRE_PRINTINT16
975 #define PCRE_STUDY PCRE_STUDY16
976
977 /* ----- Only 32-bit mode is supported ----- */
978
979 #elif defined SUPPORT_PCRE32
980 #define CHAR_SIZE 4
981 #define PCHARS PCHARS32
982 #define PCHARSV PCHARSV32
983 #define READ_CAPTURE_NAME READ_CAPTURE_NAME32
984 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT32
985 #define SET_PCRE_STACK_GUARD SET_PCRE_STACK_GUARD32
986 #define STRLEN STRLEN32
987 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK32
988 #define PCRE_COMPILE PCRE_COMPILE32
989 #define PCRE_CONFIG pcre32_config
990 #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING32
991 #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING32
992 #define PCRE_DFA_EXEC PCRE_DFA_EXEC32
993 #define PCRE_EXEC PCRE_EXEC32
994 #define PCRE_FREE_STUDY PCRE_FREE_STUDY32
995 #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING32
996 #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST32
997 #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING32
998 #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER32
999 #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING32
1000 #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST32
1001 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC32
1002 #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE32
1003 #define PCRE_MAKETABLES pcre32_maketables()
1004 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER32
1005 #define PCRE_PRINTINT PCRE_PRINTINT32
1006 #define PCRE_STUDY PCRE_STUDY32
1007
1008 #endif
1009
1010 /* ----- End of mode-specific function call macros ----- */
1011
1012
1013 /* Other parameters */
1014
1015 #ifndef CLOCKS_PER_SEC
1016 #ifdef CLK_TCK
1017 #define CLOCKS_PER_SEC CLK_TCK
1018 #else
1019 #define CLOCKS_PER_SEC 100
1020 #endif
1021 #endif
1022
1023 #if !defined NODFA
1024 #define DFA_WS_DIMENSION 1000
1025 #endif
1026
1027 /* This is the default loop count for timing. */
1028
1029 #define LOOPREPEAT 500000
1030
1031 /* Static variables */
1032
1033 static FILE *outfile;
1034 static int log_store = 0;
1035 static int callout_count;
1036 static int callout_extra;
1037 static int callout_fail_count;
1038 static int callout_fail_id;
1039 static int debug_lengths;
1040 static int first_callout;
1041 static int jit_was_used;
1042 static int locale_set = 0;
1043 static int show_malloc;
1044 static int stack_guard_return;
1045 static int use_utf;
1046 static const unsigned char *last_callout_mark = NULL;
1047
1048 /* The buffers grow automatically if very long input lines are encountered. */
1049
1050 static int buffer_size = 50000;
1051 static pcre_uint8 *buffer = NULL;
1052 static pcre_uint8 *pbuffer = NULL;
1053
1054 /* Just as a safety check, make sure that COMPILE_PCRE[16|32] are *not* set. */
1055
1056 #ifdef COMPILE_PCRE16
1057 #error COMPILE_PCRE16 must not be set when compiling pcretest.c
1058 #endif
1059
1060 #ifdef COMPILE_PCRE32
1061 #error COMPILE_PCRE32 must not be set when compiling pcretest.c
1062 #endif
1063
1064 /* We need buffers for building 16/32-bit strings, and the tables of operator
1065 lengths that are used for 16/32-bit compiling, in order to swap bytes in a
1066 pattern for saving/reloading testing. Luckily, the data for these tables is
1067 defined as a macro. However, we must ensure that LINK_SIZE and IMM2_SIZE (which
1068 are used in the tables) are adjusted appropriately for the 16/32-bit world.
1069 LINK_SIZE is also used later in this program. */
1070
1071 #ifdef SUPPORT_PCRE16
1072 #undef IMM2_SIZE
1073 #define IMM2_SIZE 1
1074
1075 #if LINK_SIZE == 2
1076 #undef LINK_SIZE
1077 #define LINK_SIZE 1
1078 #elif LINK_SIZE == 3 || LINK_SIZE == 4
1079 #undef LINK_SIZE
1080 #define LINK_SIZE 2
1081 #else
1082 #error LINK_SIZE must be either 2, 3, or 4
1083 #endif
1084
1085 static int buffer16_size = 0;
1086 static pcre_uint16 *buffer16 = NULL;
1087 static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
1088 #endif /* SUPPORT_PCRE16 */
1089
1090 #ifdef SUPPORT_PCRE32
1091 #undef IMM2_SIZE
1092 #define IMM2_SIZE 1
1093 #undef LINK_SIZE
1094 #define LINK_SIZE 1
1095
1096 static int buffer32_size = 0;
1097 static pcre_uint32 *buffer32 = NULL;
1098 static const pcre_uint32 OP_lengths32[] = { OP_LENGTHS };
1099 #endif /* SUPPORT_PCRE32 */
1100
1101 /* If we have 8-bit support, default to it; if there is also 16-or 32-bit
1102 support, it can be changed by an option. If there is no 8-bit support, there
1103 must be 16-or 32-bit support, so default it to 1. */
1104
1105 #if defined SUPPORT_PCRE8
1106 static int pcre_mode = PCRE8_MODE;
1107 #elif defined SUPPORT_PCRE16
1108 static int pcre_mode = PCRE16_MODE;
1109 #elif defined SUPPORT_PCRE32
1110 static int pcre_mode = PCRE32_MODE;
1111 #endif
1112
1113 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
1114
1115 static int jit_study_bits[] =
1116 {
1117 PCRE_STUDY_JIT_COMPILE,
1118 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1119 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
1120 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1121 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1122 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
1123 PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
1124 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
1125 };
1126
1127 #define PCRE_STUDY_ALLJIT (PCRE_STUDY_JIT_COMPILE | \
1128 PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE)
1129
1130 /* Textual explanations for runtime error codes */
1131
1132 static const char *errtexts[] = {
1133 NULL, /* 0 is no error */
1134 NULL, /* NOMATCH is handled specially */
1135 "NULL argument passed",
1136 "bad option value",
1137 "magic number missing",
1138 "unknown opcode - pattern overwritten?",
1139 "no more memory",
1140 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
1141 "match limit exceeded",
1142 "callout error code",
1143 NULL, /* BADUTF8/16 is handled specially */
1144 NULL, /* BADUTF8/16 offset is handled specially */
1145 NULL, /* PARTIAL is handled specially */
1146 "not used - internal error",
1147 "internal error - pattern overwritten?",
1148 "bad count value",
1149 "item unsupported for DFA matching",
1150 "backreference condition or recursion test not supported for DFA matching",
1151 "match limit not supported for DFA matching",
1152 "workspace size exceeded in DFA matching",
1153 "too much recursion for DFA matching",
1154 "recursion limit exceeded",
1155 "not used - internal error",
1156 "invalid combination of newline options",
1157 "bad offset value",
1158 NULL, /* SHORTUTF8/16 is handled specially */
1159 "nested recursion at the same subject position",
1160 "JIT stack limit reached",
1161 "pattern compiled in wrong mode: 8-bit/16-bit error",
1162 "pattern compiled with other endianness",
1163 "invalid data in workspace for DFA restart",
1164 "bad JIT option",
1165 "bad length"
1166 };
1167
1168
1169 /*************************************************
1170 * Alternate character tables *
1171 *************************************************/
1172
1173 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
1174 using the default tables of the library. However, the T option can be used to
1175 select alternate sets of tables, for different kinds of testing. Note also that
1176 the L (locale) option also adjusts the tables. */
1177
1178 /* This is the set of tables distributed as default with PCRE. It recognizes
1179 only ASCII characters. */
1180
1181 static const pcre_uint8 tables0[] = {
1182
1183 /* This table is a lower casing table. */
1184
1185 0, 1, 2, 3, 4, 5, 6, 7,
1186 8, 9, 10, 11, 12, 13, 14, 15,
1187 16, 17, 18, 19, 20, 21, 22, 23,
1188 24, 25, 26, 27, 28, 29, 30, 31,
1189 32, 33, 34, 35, 36, 37, 38, 39,
1190 40, 41, 42, 43, 44, 45, 46, 47,
1191 48, 49, 50, 51, 52, 53, 54, 55,
1192 56, 57, 58, 59, 60, 61, 62, 63,
1193 64, 97, 98, 99,100,101,102,103,
1194 104,105,106,107,108,109,110,111,
1195 112,113,114,115,116,117,118,119,
1196 120,121,122, 91, 92, 93, 94, 95,
1197 96, 97, 98, 99,100,101,102,103,
1198 104,105,106,107,108,109,110,111,
1199 112,113,114,115,116,117,118,119,
1200 120,121,122,123,124,125,126,127,
1201 128,129,130,131,132,133,134,135,
1202 136,137,138,139,140,141,142,143,
1203 144,145,146,147,148,149,150,151,
1204 152,153,154,155,156,157,158,159,
1205 160,161,162,163,164,165,166,167,
1206 168,169,170,171,172,173,174,175,
1207 176,177,178,179,180,181,182,183,
1208 184,185,186,187,188,189,190,191,
1209 192,193,194,195,196,197,198,199,
1210 200,201,202,203,204,205,206,207,
1211 208,209,210,211,212,213,214,215,
1212 216,217,218,219,220,221,222,223,
1213 224,225,226,227,228,229,230,231,
1214 232,233,234,235,236,237,238,239,
1215 240,241,242,243,244,245,246,247,
1216 248,249,250,251,252,253,254,255,
1217
1218 /* This table is a case flipping table. */
1219
1220 0, 1, 2, 3, 4, 5, 6, 7,
1221 8, 9, 10, 11, 12, 13, 14, 15,
1222 16, 17, 18, 19, 20, 21, 22, 23,
1223 24, 25, 26, 27, 28, 29, 30, 31,
1224 32, 33, 34, 35, 36, 37, 38, 39,
1225 40, 41, 42, 43, 44, 45, 46, 47,
1226 48, 49, 50, 51, 52, 53, 54, 55,
1227 56, 57, 58, 59, 60, 61, 62, 63,
1228 64, 97, 98, 99,100,101,102,103,
1229 104,105,106,107,108,109,110,111,
1230 112,113,114,115,116,117,118,119,
1231 120,121,122, 91, 92, 93, 94, 95,
1232 96, 65, 66, 67, 68, 69, 70, 71,
1233 72, 73, 74, 75, 76, 77, 78, 79,
1234 80, 81, 82, 83, 84, 85, 86, 87,
1235 88, 89, 90,123,124,125,126,127,
1236 128,129,130,131,132,133,134,135,
1237 136,137,138,139,140,141,142,143,
1238 144,145,146,147,148,149,150,151,
1239 152,153,154,155,156,157,158,159,
1240 160,161,162,163,164,165,166,167,
1241 168,169,170,171,172,173,174,175,
1242 176,177,178,179,180,181,182,183,
1243 184,185,186,187,188,189,190,191,
1244 192,193,194,195,196,197,198,199,
1245 200,201,202,203,204,205,206,207,
1246 208,209,210,211,212,213,214,215,
1247 216,217,218,219,220,221,222,223,
1248 224,225,226,227,228,229,230,231,
1249 232,233,234,235,236,237,238,239,
1250 240,241,242,243,244,245,246,247,
1251 248,249,250,251,252,253,254,255,
1252
1253 /* This table contains bit maps for various character classes. Each map is 32
1254 bytes long and the bits run from the least significant end of each byte. The
1255 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
1256 graph, print, punct, and cntrl. Other classes are built from combinations. */
1257
1258 0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
1259 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1260 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1261 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1262
1263 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1264 0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
1265 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1266 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1267
1268 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1269 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1270 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1271 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1272
1273 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1274 0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
1275 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1276 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1277
1278 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1279 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
1280 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1281 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1282
1283 0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
1284 0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
1285 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1286 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1287
1288 0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
1289 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1290 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1291 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1292
1293 0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
1294 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
1295 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1296 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1297
1298 0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
1299 0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
1300 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1301 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1302
1303 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
1304 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
1305 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1306 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
1307
1308 /* This table identifies various classes of character by individual bits:
1309 0x01 white space character
1310 0x02 letter
1311 0x04 decimal digit
1312 0x08 hexadecimal digit
1313 0x10 alphanumeric or '_'
1314 0x80 regular expression metacharacter or binary zero
1315 */
1316
1317 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
1318 0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
1319 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
1320 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
1321 0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
1322 0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
1323 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
1324 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
1325 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
1326 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
1327 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
1328 0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
1329 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
1330 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
1331 0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
1332 0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
1333 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
1334 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
1335 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
1336 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
1337 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
1338 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
1339 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
1340 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
1341 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
1342 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
1343 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
1344 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
1345 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
1346 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
1347 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
1348 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
1349
1350 /* This is a set of tables that came originally from a Windows user. It seems
1351 to be at least an approximation of ISO 8859. In particular, there are
1352 characters greater than 128 that are marked as spaces, letters, etc. */
1353
1354 static const pcre_uint8 tables1[] = {
1355 0,1,2,3,4,5,6,7,
1356 8,9,10,11,12,13,14,15,
1357 16,17,18,19,20,21,22,23,
1358 24,25,26,27,28,29,30,31,
1359 32,33,34,35,36,37,38,39,
1360 40,41,42,43,44,45,46,47,
1361 48,49,50,51,52,53,54,55,
1362 56,57,58,59,60,61,62,63,
1363 64,97,98,99,100,101,102,103,
1364 104,105,106,107,108,109,110,111,
1365 112,113,114,115,116,117,118,119,
1366 120,121,122,91,92,93,94,95,
1367 96,97,98,99,100,101,102,103,
1368 104,105,106,107,108,109,110,111,
1369 112,113,114,115,116,117,118,119,
1370 120,121,122,123,124,125,126,127,
1371 128,129,130,131,132,133,134,135,
1372 136,137,138,139,140,141,142,143,
1373 144,145,146,147,148,149,150,151,
1374 152,153,154,155,156,157,158,159,
1375 160,161,162,163,164,165,166,167,
1376 168,169,170,171,172,173,174,175,
1377 176,177,178,179,180,181,182,183,
1378 184,185,186,187,188,189,190,191,
1379 224,225,226,227,228,229,230,231,
1380 232,233,234,235,236,237,238,239,
1381 240,241,242,243,244,245,246,215,
1382 248,249,250,251,252,253,254,223,
1383 224,225,226,227,228,229,230,231,
1384 232,233,234,235,236,237,238,239,
1385 240,241,242,243,244,245,246,247,
1386 248,249,250,251,252,253,254,255,
1387 0,1,2,3,4,5,6,7,
1388 8,9,10,11,12,13,14,15,
1389 16,17,18,19,20,21,22,23,
1390 24,25,26,27,28,29,30,31,
1391 32,33,34,35,36,37,38,39,
1392 40,41,42,43,44,45,46,47,
1393 48,49,50,51,52,53,54,55,
1394 56,57,58,59,60,61,62,63,
1395 64,97,98,99,100,101,102,103,
1396 104,105,106,107,108,109,110,111,
1397 112,113,114,115,116,117,118,119,
1398 120,121,122,91,92,93,94,95,
1399 96,65,66,67,68,69,70,71,
1400 72,73,74,75,76,77,78,79,
1401 80,81,82,83,84,85,86,87,
1402 88,89,90,123,124,125,126,127,
1403 128,129,130,131,132,133,134,135,
1404 136,137,138,139,140,141,142,143,
1405 144,145,146,147,148,149,150,151,
1406 152,153,154,155,156,157,158,159,
1407 160,161,162,163,164,165,166,167,
1408 168,169,170,171,172,173,174,175,
1409 176,177,178,179,180,181,182,183,
1410 184,185,186,187,188,189,190,191,
1411 224,225,226,227,228,229,230,231,
1412 232,233,234,235,236,237,238,239,
1413 240,241,242,243,244,245,246,215,
1414 248,249,250,251,252,253,254,223,
1415 192,193,194,195,196,197,198,199,
1416 200,201,202,203,204,205,206,207,
1417 208,209,210,211,212,213,214,247,
1418 216,217,218,219,220,221,222,255,
1419 0,62,0,0,1,0,0,0,
1420 0,0,0,0,0,0,0,0,
1421 32,0,0,0,1,0,0,0,
1422 0,0,0,0,0,0,0,0,
1423 0,0,0,0,0,0,255,3,
1424 126,0,0,0,126,0,0,0,
1425 0,0,0,0,0,0,0,0,
1426 0,0,0,0,0,0,0,0,
1427 0,0,0,0,0,0,255,3,
1428 0,0,0,0,0,0,0,0,
1429 0,0,0,0,0,0,12,2,
1430 0,0,0,0,0,0,0,0,
1431 0,0,0,0,0,0,0,0,
1432 254,255,255,7,0,0,0,0,
1433 0,0,0,0,0,0,0,0,
1434 255,255,127,127,0,0,0,0,
1435 0,0,0,0,0,0,0,0,
1436 0,0,0,0,254,255,255,7,
1437 0,0,0,0,0,4,32,4,
1438 0,0,0,128,255,255,127,255,
1439 0,0,0,0,0,0,255,3,
1440 254,255,255,135,254,255,255,7,
1441 0,0,0,0,0,4,44,6,
1442 255,255,127,255,255,255,127,255,
1443 0,0,0,0,254,255,255,255,
1444 255,255,255,255,255,255,255,127,
1445 0,0,0,0,254,255,255,255,
1446 255,255,255,255,255,255,255,255,
1447 0,2,0,0,255,255,255,255,
1448 255,255,255,255,255,255,255,127,
1449 0,0,0,0,255,255,255,255,
1450 255,255,255,255,255,255,255,255,
1451 0,0,0,0,254,255,0,252,
1452 1,0,0,248,1,0,0,120,
1453 0,0,0,0,254,255,255,255,
1454 0,0,128,0,0,0,128,0,
1455 255,255,255,255,0,0,0,0,
1456 0,0,0,0,0,0,0,128,
1457 255,255,255,255,0,0,0,0,
1458 0,0,0,0,0,0,0,0,
1459 128,0,0,0,0,0,0,0,
1460 0,1,1,0,1,1,0,0,
1461 0,0,0,0,0,0,0,0,
1462 0,0,0,0,0,0,0,0,
1463 1,0,0,0,128,0,0,0,
1464 128,128,128,128,0,0,128,0,
1465 28,28,28,28,28,28,28,28,
1466 28,28,0,0,0,0,0,128,
1467 0,26,26,26,26,26,26,18,
1468 18,18,18,18,18,18,18,18,
1469 18,18,18,18,18,18,18,18,
1470 18,18,18,128,128,0,128,16,
1471 0,26,26,26,26,26,26,18,
1472 18,18,18,18,18,18,18,18,
1473 18,18,18,18,18,18,18,18,
1474 18,18,18,128,128,0,0,0,
1475 0,0,0,0,0,1,0,0,
1476 0,0,0,0,0,0,0,0,
1477 0,0,0,0,0,0,0,0,
1478 0,0,0,0,0,0,0,0,
1479 1,0,0,0,0,0,0,0,
1480 0,0,18,0,0,0,0,0,
1481 0,0,20,20,0,18,0,0,
1482 0,20,18,0,0,0,0,0,
1483 18,18,18,18,18,18,18,18,
1484 18,18,18,18,18,18,18,18,
1485 18,18,18,18,18,18,18,0,
1486 18,18,18,18,18,18,18,18,
1487 18,18,18,18,18,18,18,18,
1488 18,18,18,18,18,18,18,18,
1489 18,18,18,18,18,18,18,0,
1490 18,18,18,18,18,18,18,18
1491 };
1492
1493
1494
1495
1496 #ifndef HAVE_STRERROR
1497 /*************************************************
1498 * Provide strerror() for non-ANSI libraries *
1499 *************************************************/
1500
1501 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1502 in their libraries, but can provide the same facility by this simple
1503 alternative function. */
1504
1505 extern int sys_nerr;
1506 extern char *sys_errlist[];
1507
1508 char *
strerror(int n)1509 strerror(int n)
1510 {
1511 if (n < 0 || n >= sys_nerr) return "unknown error number";
1512 return sys_errlist[n];
1513 }
1514 #endif /* HAVE_STRERROR */
1515
1516
1517
1518 /*************************************************
1519 * Print newline configuration *
1520 *************************************************/
1521
1522 /*
1523 Arguments:
1524 rc the return code from PCRE_CONFIG_NEWLINE
1525 isc TRUE if called from "-C newline"
1526 Returns: nothing
1527 */
1528
1529 static void
print_newline_config(int rc,BOOL isc)1530 print_newline_config(int rc, BOOL isc)
1531 {
1532 const char *s = NULL;
1533 if (!isc) printf(" Newline sequence is ");
1534 switch(rc)
1535 {
1536 case CHAR_CR: s = "CR"; break;
1537 case CHAR_LF: s = "LF"; break;
1538 case (CHAR_CR<<8 | CHAR_LF): s = "CRLF"; break;
1539 case -1: s = "ANY"; break;
1540 case -2: s = "ANYCRLF"; break;
1541
1542 default:
1543 printf("a non-standard value: 0x%04x\n", rc);
1544 return;
1545 }
1546
1547 printf("%s\n", s);
1548 }
1549
1550
1551
1552 /*************************************************
1553 * JIT memory callback *
1554 *************************************************/
1555
jit_callback(void * arg)1556 static pcre_jit_stack* jit_callback(void *arg)
1557 {
1558 jit_was_used = TRUE;
1559 return (pcre_jit_stack *)arg;
1560 }
1561
1562
1563 #if !defined NOUTF || defined SUPPORT_PCRE16 || defined SUPPORT_PCRE32
1564 /*************************************************
1565 * Convert UTF-8 string to value *
1566 *************************************************/
1567
1568 /* This function takes one or more bytes that represents a UTF-8 character,
1569 and returns the value of the character.
1570
1571 Argument:
1572 utf8bytes a pointer to the byte vector
1573 vptr a pointer to an int to receive the value
1574
1575 Returns: > 0 => the number of bytes consumed
1576 -6 to 0 => malformed UTF-8 character at offset = (-return)
1577 */
1578
1579 static int
utf82ord(pcre_uint8 * utf8bytes,pcre_uint32 * vptr)1580 utf82ord(pcre_uint8 *utf8bytes, pcre_uint32 *vptr)
1581 {
1582 pcre_uint32 c = *utf8bytes++;
1583 pcre_uint32 d = c;
1584 int i, j, s;
1585
1586 for (i = -1; i < 6; i++) /* i is number of additional bytes */
1587 {
1588 if ((d & 0x80) == 0) break;
1589 d <<= 1;
1590 }
1591
1592 if (i == -1) { *vptr = c; return 1; } /* ascii character */
1593 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1594
1595 /* i now has a value in the range 1-5 */
1596
1597 s = 6*i;
1598 d = (c & utf8_table3[i]) << s;
1599
1600 for (j = 0; j < i; j++)
1601 {
1602 c = *utf8bytes++;
1603 if ((c & 0xc0) != 0x80) return -(j+1);
1604 s -= 6;
1605 d |= (c & 0x3f) << s;
1606 }
1607
1608 /* Check that encoding was the correct unique one */
1609
1610 for (j = 0; j < utf8_table1_size; j++)
1611 if (d <= (pcre_uint32)utf8_table1[j]) break;
1612 if (j != i) return -(i+1);
1613
1614 /* Valid value */
1615
1616 *vptr = d;
1617 return i+1;
1618 }
1619 #endif /* NOUTF || SUPPORT_PCRE16 */
1620
1621
1622
1623 #if defined SUPPORT_PCRE8 && !defined NOUTF
1624 /*************************************************
1625 * Convert character value to UTF-8 *
1626 *************************************************/
1627
1628 /* This function takes an integer value in the range 0 - 0x7fffffff
1629 and encodes it as a UTF-8 character in 0 to 6 bytes.
1630
1631 Arguments:
1632 cvalue the character value
1633 utf8bytes pointer to buffer for result - at least 6 bytes long
1634
1635 Returns: number of characters placed in the buffer
1636 */
1637
1638 static int
ord2utf8(pcre_uint32 cvalue,pcre_uint8 * utf8bytes)1639 ord2utf8(pcre_uint32 cvalue, pcre_uint8 *utf8bytes)
1640 {
1641 register int i, j;
1642 if (cvalue > 0x7fffffffu)
1643 return -1;
1644 for (i = 0; i < utf8_table1_size; i++)
1645 if (cvalue <= (pcre_uint32)utf8_table1[i]) break;
1646 utf8bytes += i;
1647 for (j = i; j > 0; j--)
1648 {
1649 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1650 cvalue >>= 6;
1651 }
1652 *utf8bytes = utf8_table2[i] | cvalue;
1653 return i + 1;
1654 }
1655 #endif
1656
1657
1658 #ifdef SUPPORT_PCRE16
1659 /*************************************************
1660 * Convert a string to 16-bit *
1661 *************************************************/
1662
1663 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1664 8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1665 double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1666 in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1667 result is always left in buffer16.
1668
1669 Note that this function does not object to surrogate values. This is
1670 deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1671 for the purpose of testing that they are correctly faulted.
1672
1673 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1674 in UTF-8 so that values greater than 255 can be handled.
1675
1676 Arguments:
1677 data TRUE if converting a data line; FALSE for a regex
1678 p points to a byte string
1679 utf true if UTF-8 (to be converted to UTF-16)
1680 len number of bytes in the string (excluding trailing zero)
1681
1682 Returns: number of 16-bit data items used (excluding trailing zero)
1683 OR -1 if a UTF-8 string is malformed
1684 OR -2 if a value > 0x10ffff is encountered
1685 OR -3 if a value > 0xffff is encountered when not in UTF mode
1686 */
1687
1688 static int
to16(int data,pcre_uint8 * p,int utf,int len)1689 to16(int data, pcre_uint8 *p, int utf, int len)
1690 {
1691 pcre_uint16 *pp;
1692
1693 if (buffer16_size < 2*len + 2)
1694 {
1695 if (buffer16 != NULL) free(buffer16);
1696 buffer16_size = 2*len + 2;
1697 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1698 if (buffer16 == NULL)
1699 {
1700 fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1701 exit(1);
1702 }
1703 }
1704
1705 pp = buffer16;
1706
1707 if (!utf && !data)
1708 {
1709 while (len-- > 0) *pp++ = *p++;
1710 }
1711
1712 else
1713 {
1714 pcre_uint32 c = 0;
1715 while (len > 0)
1716 {
1717 int chlen = utf82ord(p, &c);
1718 if (chlen <= 0) return -1;
1719 if (c > 0x10ffff) return -2;
1720 p += chlen;
1721 len -= chlen;
1722 if (c < 0x10000) *pp++ = c; else
1723 {
1724 if (!utf) return -3;
1725 c -= 0x10000;
1726 *pp++ = 0xD800 | (c >> 10);
1727 *pp++ = 0xDC00 | (c & 0x3ff);
1728 }
1729 }
1730 }
1731
1732 *pp = 0;
1733 return pp - buffer16;
1734 }
1735 #endif
1736
1737 #ifdef SUPPORT_PCRE32
1738 /*************************************************
1739 * Convert a string to 32-bit *
1740 *************************************************/
1741
1742 /* In non-UTF mode, the space needed for a 32-bit string is exactly four times the
1743 8-bit size. For a UTF-8 string, the size needed for UTF-32 is no more than four
1744 times, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1745 in UTF-32. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-32. The
1746 result is always left in buffer32.
1747
1748 Note that this function does not object to surrogate values. This is
1749 deliberate; it makes it possible to construct UTF-32 strings that are invalid,
1750 for the purpose of testing that they are correctly faulted.
1751
1752 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1753 in UTF-8 so that values greater than 255 can be handled.
1754
1755 Arguments:
1756 data TRUE if converting a data line; FALSE for a regex
1757 p points to a byte string
1758 utf true if UTF-8 (to be converted to UTF-32)
1759 len number of bytes in the string (excluding trailing zero)
1760
1761 Returns: number of 32-bit data items used (excluding trailing zero)
1762 OR -1 if a UTF-8 string is malformed
1763 OR -2 if a value > 0x10ffff is encountered
1764 OR -3 if an ill-formed value is encountered (i.e. a surrogate)
1765 */
1766
1767 static int
to32(int data,pcre_uint8 * p,int utf,int len)1768 to32(int data, pcre_uint8 *p, int utf, int len)
1769 {
1770 pcre_uint32 *pp;
1771
1772 if (buffer32_size < 4*len + 4)
1773 {
1774 if (buffer32 != NULL) free(buffer32);
1775 buffer32_size = 4*len + 4;
1776 buffer32 = (pcre_uint32 *)malloc(buffer32_size);
1777 if (buffer32 == NULL)
1778 {
1779 fprintf(stderr, "pcretest: malloc(%d) failed for buffer32\n", buffer32_size);
1780 exit(1);
1781 }
1782 }
1783
1784 pp = buffer32;
1785
1786 if (!utf && !data)
1787 {
1788 while (len-- > 0) *pp++ = *p++;
1789 }
1790
1791 else
1792 {
1793 pcre_uint32 c = 0;
1794 while (len > 0)
1795 {
1796 int chlen = utf82ord(p, &c);
1797 if (chlen <= 0) return -1;
1798 if (utf)
1799 {
1800 if (c > 0x10ffff) return -2;
1801 if (!data && (c & 0xfffff800u) == 0xd800u) return -3;
1802 }
1803
1804 p += chlen;
1805 len -= chlen;
1806 *pp++ = c;
1807 }
1808 }
1809
1810 *pp = 0;
1811 return pp - buffer32;
1812 }
1813
1814 /* Check that a 32-bit character string is valid UTF-32.
1815
1816 Arguments:
1817 string points to the string
1818 length length of string, or -1 if the string is zero-terminated
1819
1820 Returns: TRUE if the string is a valid UTF-32 string
1821 FALSE otherwise
1822 */
1823
1824 #ifdef NEVER /* Not used */
1825 #ifdef SUPPORT_UTF
1826 static BOOL
valid_utf32(pcre_uint32 * string,int length)1827 valid_utf32(pcre_uint32 *string, int length)
1828 {
1829 register pcre_uint32 *p;
1830 register pcre_uint32 c;
1831
1832 for (p = string; length-- > 0; p++)
1833 {
1834 c = *p;
1835 if (c > 0x10ffffu) return FALSE; /* Too big */
1836 if ((c & 0xfffff800u) == 0xd800u) return FALSE; /* Surrogate */
1837 }
1838
1839 return TRUE;
1840 }
1841 #endif /* SUPPORT_UTF */
1842 #endif /* NEVER */
1843 #endif /* SUPPORT_PCRE32 */
1844
1845
1846 /*************************************************
1847 * Read or extend an input line *
1848 *************************************************/
1849
1850 /* Input lines are read into buffer, but both patterns and data lines can be
1851 continued over multiple input lines. In addition, if the buffer fills up, we
1852 want to automatically expand it so as to be able to handle extremely large
1853 lines that are needed for certain stress tests. When the input buffer is
1854 expanded, the other two buffers must also be expanded likewise, and the
1855 contents of pbuffer, which are a copy of the input for callouts, must be
1856 preserved (for when expansion happens for a data line). This is not the most
1857 optimal way of handling this, but hey, this is just a test program!
1858
1859 Arguments:
1860 f the file to read
1861 start where in buffer to start (this *must* be within buffer)
1862 prompt for stdin or readline()
1863
1864 Returns: pointer to the start of new data
1865 could be a copy of start, or could be moved
1866 NULL if no data read and EOF reached
1867 */
1868
1869 static pcre_uint8 *
extend_inputline(FILE * f,pcre_uint8 * start,const char * prompt)1870 extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1871 {
1872 pcre_uint8 *here = start;
1873
1874 for (;;)
1875 {
1876 size_t rlen = (size_t)(buffer_size - (here - buffer));
1877
1878 if (rlen > 1000)
1879 {
1880 int dlen;
1881
1882 /* If libreadline or libedit support is required, use readline() to read a
1883 line if the input is a terminal. Note that readline() removes the trailing
1884 newline, so we must put it back again, to be compatible with fgets(). */
1885
1886 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1887 if (isatty(fileno(f)))
1888 {
1889 size_t len;
1890 char *s = readline(prompt);
1891 if (s == NULL) return (here == start)? NULL : start;
1892 len = strlen(s);
1893 if (len > 0) add_history(s);
1894 if (len > rlen - 1) len = rlen - 1;
1895 memcpy(here, s, len);
1896 here[len] = '\n';
1897 here[len+1] = 0;
1898 free(s);
1899 }
1900 else
1901 #endif
1902
1903 /* Read the next line by normal means, prompting if the file is stdin. */
1904
1905 {
1906 if (f == stdin) printf("%s", prompt);
1907 if (fgets((char *)here, rlen, f) == NULL)
1908 return (here == start)? NULL : start;
1909 }
1910
1911 dlen = (int)strlen((char *)here);
1912 if (dlen > 0 && here[dlen - 1] == '\n') return start;
1913 here += dlen;
1914 }
1915
1916 else
1917 {
1918 int new_buffer_size = 2*buffer_size;
1919 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1920 pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1921
1922 if (new_buffer == NULL || new_pbuffer == NULL)
1923 {
1924 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1925 exit(1);
1926 }
1927
1928 memcpy(new_buffer, buffer, buffer_size);
1929 memcpy(new_pbuffer, pbuffer, buffer_size);
1930
1931 buffer_size = new_buffer_size;
1932
1933 start = new_buffer + (start - buffer);
1934 here = new_buffer + (here - buffer);
1935
1936 free(buffer);
1937 free(pbuffer);
1938
1939 buffer = new_buffer;
1940 pbuffer = new_pbuffer;
1941 }
1942 }
1943
1944 /* Control never gets here */
1945 }
1946
1947
1948
1949 /*************************************************
1950 * Read number from string *
1951 *************************************************/
1952
1953 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1954 around with conditional compilation, just do the job by hand. It is only used
1955 for unpicking arguments, so just keep it simple.
1956
1957 Arguments:
1958 str string to be converted
1959 endptr where to put the end pointer
1960
1961 Returns: the unsigned long
1962 */
1963
1964 static int
get_value(pcre_uint8 * str,pcre_uint8 ** endptr)1965 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1966 {
1967 int result = 0;
1968 while(*str != 0 && isspace(*str)) str++;
1969 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1970 *endptr = str;
1971 return(result);
1972 }
1973
1974
1975
1976 /*************************************************
1977 * Print one character *
1978 *************************************************/
1979
1980 /* Print a single character either literally, or as a hex escape. */
1981
pchar(pcre_uint32 c,FILE * f)1982 static int pchar(pcre_uint32 c, FILE *f)
1983 {
1984 int n = 0;
1985 char tempbuffer[16];
1986 if (PRINTOK(c))
1987 {
1988 if (f != NULL) fprintf(f, "%c", c);
1989 return 1;
1990 }
1991
1992 if (c < 0x100)
1993 {
1994 if (use_utf)
1995 {
1996 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1997 return 6;
1998 }
1999 else
2000 {
2001 if (f != NULL) fprintf(f, "\\x%02x", c);
2002 return 4;
2003 }
2004 }
2005
2006 if (f != NULL) n = fprintf(f, "\\x{%02x}", c);
2007 else n = sprintf(tempbuffer, "\\x{%02x}", c);
2008
2009 return n >= 0 ? n : 0;
2010 }
2011
2012
2013
2014 #ifdef SUPPORT_PCRE8
2015 /*************************************************
2016 * Print 8-bit character string *
2017 *************************************************/
2018
2019 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
2020 If handed a NULL file, just counts chars without printing. */
2021
pchars(pcre_uint8 * p,int length,FILE * f)2022 static int pchars(pcre_uint8 *p, int length, FILE *f)
2023 {
2024 pcre_uint32 c = 0;
2025 int yield = 0;
2026
2027 if (length < 0)
2028 length = strlen((char *)p);
2029
2030 while (length-- > 0)
2031 {
2032 #if !defined NOUTF
2033 if (use_utf)
2034 {
2035 int rc = utf82ord(p, &c);
2036 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
2037 {
2038 length -= rc - 1;
2039 p += rc;
2040 yield += pchar(c, f);
2041 continue;
2042 }
2043 }
2044 #endif
2045 c = *p++;
2046 yield += pchar(c, f);
2047 }
2048
2049 return yield;
2050 }
2051 #endif
2052
2053
2054
2055 #ifdef SUPPORT_PCRE16
2056 /*************************************************
2057 * Find length of 0-terminated 16-bit string *
2058 *************************************************/
2059
strlen16(PCRE_SPTR16 p)2060 static int strlen16(PCRE_SPTR16 p)
2061 {
2062 PCRE_SPTR16 pp = p;
2063 while (*pp != 0) pp++;
2064 return (int)(pp - p);
2065 }
2066 #endif /* SUPPORT_PCRE16 */
2067
2068
2069
2070 #ifdef SUPPORT_PCRE32
2071 /*************************************************
2072 * Find length of 0-terminated 32-bit string *
2073 *************************************************/
2074
strlen32(PCRE_SPTR32 p)2075 static int strlen32(PCRE_SPTR32 p)
2076 {
2077 PCRE_SPTR32 pp = p;
2078 while (*pp != 0) pp++;
2079 return (int)(pp - p);
2080 }
2081 #endif /* SUPPORT_PCRE32 */
2082
2083
2084
2085 #ifdef SUPPORT_PCRE16
2086 /*************************************************
2087 * Print 16-bit character string *
2088 *************************************************/
2089
2090 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
2091 If handed a NULL file, just counts chars without printing. */
2092
pchars16(PCRE_SPTR16 p,int length,FILE * f)2093 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
2094 {
2095 int yield = 0;
2096
2097 if (length < 0)
2098 length = strlen16(p);
2099
2100 while (length-- > 0)
2101 {
2102 pcre_uint32 c = *p++ & 0xffff;
2103 #if !defined NOUTF
2104 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
2105 {
2106 int d = *p & 0xffff;
2107 if (d >= 0xDC00 && d <= 0xDFFF)
2108 {
2109 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
2110 length--;
2111 p++;
2112 }
2113 }
2114 #endif
2115 yield += pchar(c, f);
2116 }
2117
2118 return yield;
2119 }
2120 #endif /* SUPPORT_PCRE16 */
2121
2122
2123
2124 #ifdef SUPPORT_PCRE32
2125 /*************************************************
2126 * Print 32-bit character string *
2127 *************************************************/
2128
2129 /* Must handle UTF-32 strings in utf mode. Yields number of characters printed.
2130 If handed a NULL file, just counts chars without printing. */
2131
pchars32(PCRE_SPTR32 p,int length,BOOL utf,FILE * f)2132 static int pchars32(PCRE_SPTR32 p, int length, BOOL utf, FILE *f)
2133 {
2134 int yield = 0;
2135
2136 (void)(utf); /* Avoid compiler warning */
2137
2138 if (length < 0)
2139 length = strlen32(p);
2140
2141 while (length-- > 0)
2142 {
2143 pcre_uint32 c = *p++;
2144 yield += pchar(c, f);
2145 }
2146
2147 return yield;
2148 }
2149 #endif /* SUPPORT_PCRE32 */
2150
2151
2152
2153 #ifdef SUPPORT_PCRE8
2154 /*************************************************
2155 * Read a capture name (8-bit) and check it *
2156 *************************************************/
2157
2158 static pcre_uint8 *
read_capture_name8(pcre_uint8 * p,pcre_uint8 ** pp,pcre * re)2159 read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
2160 {
2161 pcre_uint8 *npp = *pp;
2162 while (isalnum(*p)) *npp++ = *p++;
2163 *npp++ = 0;
2164 *npp = 0;
2165 if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
2166 {
2167 fprintf(outfile, "no parentheses with name \"");
2168 PCHARSV(*pp, 0, -1, outfile);
2169 fprintf(outfile, "\"\n");
2170 }
2171
2172 *pp = npp;
2173 return p;
2174 }
2175 #endif /* SUPPORT_PCRE8 */
2176
2177
2178
2179 #ifdef SUPPORT_PCRE16
2180 /*************************************************
2181 * Read a capture name (16-bit) and check it *
2182 *************************************************/
2183
2184 /* Note that the text being read is 8-bit. */
2185
2186 static pcre_uint8 *
read_capture_name16(pcre_uint8 * p,pcre_uint16 ** pp,pcre * re)2187 read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
2188 {
2189 pcre_uint16 *npp = *pp;
2190 while (isalnum(*p)) *npp++ = *p++;
2191 *npp++ = 0;
2192 *npp = 0;
2193 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
2194 {
2195 fprintf(outfile, "no parentheses with name \"");
2196 PCHARSV(*pp, 0, -1, outfile);
2197 fprintf(outfile, "\"\n");
2198 }
2199 *pp = npp;
2200 return p;
2201 }
2202 #endif /* SUPPORT_PCRE16 */
2203
2204
2205
2206 #ifdef SUPPORT_PCRE32
2207 /*************************************************
2208 * Read a capture name (32-bit) and check it *
2209 *************************************************/
2210
2211 /* Note that the text being read is 8-bit. */
2212
2213 static pcre_uint8 *
read_capture_name32(pcre_uint8 * p,pcre_uint32 ** pp,pcre * re)2214 read_capture_name32(pcre_uint8 *p, pcre_uint32 **pp, pcre *re)
2215 {
2216 pcre_uint32 *npp = *pp;
2217 while (isalnum(*p)) *npp++ = *p++;
2218 *npp++ = 0;
2219 *npp = 0;
2220 if (pcre32_get_stringnumber((pcre32 *)re, (PCRE_SPTR32)(*pp)) < 0)
2221 {
2222 fprintf(outfile, "no parentheses with name \"");
2223 PCHARSV(*pp, 0, -1, outfile);
2224 fprintf(outfile, "\"\n");
2225 }
2226 *pp = npp;
2227 return p;
2228 }
2229 #endif /* SUPPORT_PCRE32 */
2230
2231
2232
2233 /*************************************************
2234 * Stack guard function *
2235 *************************************************/
2236
2237 /* Called from PCRE when set in pcre_stack_guard. We give an error (non-zero)
2238 return when a count overflows. */
2239
stack_guard(void)2240 static int stack_guard(void)
2241 {
2242 return stack_guard_return;
2243 }
2244
2245 /*************************************************
2246 * Callout function *
2247 *************************************************/
2248
2249 /* Called from PCRE as a result of the (?C) item. We print out where we are in
2250 the match. Yield zero unless more callouts than the fail count, or the callout
2251 data is not zero. */
2252
callout(pcre_callout_block * cb)2253 static int callout(pcre_callout_block *cb)
2254 {
2255 FILE *f = (first_callout | callout_extra)? outfile : NULL;
2256 int i, current_position, pre_start, post_start, subject_length;
2257
2258 if (callout_extra)
2259 {
2260 fprintf(f, "Callout %d: last capture = %d\n",
2261 cb->callout_number, cb->capture_last);
2262
2263 if (cb->offset_vector != NULL)
2264 {
2265 for (i = 0; i < cb->capture_top * 2; i += 2)
2266 {
2267 if (cb->offset_vector[i] < 0)
2268 fprintf(f, "%2d: <unset>\n", i/2);
2269 else
2270 {
2271 fprintf(f, "%2d: ", i/2);
2272 PCHARSV(cb->subject, cb->offset_vector[i],
2273 cb->offset_vector[i+1] - cb->offset_vector[i], f);
2274 fprintf(f, "\n");
2275 }
2276 }
2277 }
2278 }
2279
2280 /* Re-print the subject in canonical form, the first time or if giving full
2281 datails. On subsequent calls in the same match, we use pchars just to find the
2282 printed lengths of the substrings. */
2283
2284 if (f != NULL) fprintf(f, "--->");
2285
2286 /* If a lookbehind is involved, the current position may be earlier than the
2287 match start. If so, use the match start instead. */
2288
2289 current_position = (cb->current_position >= cb->start_match)?
2290 cb->current_position : cb->start_match;
2291
2292 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
2293 PCHARS(post_start, cb->subject, cb->start_match,
2294 current_position - cb->start_match, f);
2295
2296 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
2297
2298 PCHARSV(cb->subject, current_position, cb->subject_length - current_position, f);
2299
2300 if (f != NULL) fprintf(f, "\n");
2301
2302 /* Always print appropriate indicators, with callout number if not already
2303 shown. For automatic callouts, show the pattern offset. */
2304
2305 if (cb->callout_number == 255)
2306 {
2307 fprintf(outfile, "%+3d ", cb->pattern_position);
2308 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
2309 }
2310 else
2311 {
2312 if (callout_extra) fprintf(outfile, " ");
2313 else fprintf(outfile, "%3d ", cb->callout_number);
2314 }
2315
2316 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
2317 fprintf(outfile, "^");
2318
2319 if (post_start > 0)
2320 {
2321 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
2322 fprintf(outfile, "^");
2323 }
2324
2325 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
2326 fprintf(outfile, " ");
2327
2328 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
2329 pbuffer + cb->pattern_position);
2330
2331 fprintf(outfile, "\n");
2332 first_callout = 0;
2333
2334 if (cb->mark != last_callout_mark)
2335 {
2336 if (cb->mark == NULL)
2337 fprintf(outfile, "Latest Mark: <unset>\n");
2338 else
2339 {
2340 fprintf(outfile, "Latest Mark: ");
2341 PCHARSV(cb->mark, 0, -1, outfile);
2342 putc('\n', outfile);
2343 }
2344 last_callout_mark = cb->mark;
2345 }
2346
2347 if (cb->callout_data != NULL)
2348 {
2349 int callout_data = *((int *)(cb->callout_data));
2350 if (callout_data != 0)
2351 {
2352 fprintf(outfile, "Callout data = %d\n", callout_data);
2353 return callout_data;
2354 }
2355 }
2356
2357 return (cb->callout_number != callout_fail_id)? 0 :
2358 (++callout_count >= callout_fail_count)? 1 : 0;
2359 }
2360
2361
2362 /*************************************************
2363 * Local malloc functions *
2364 *************************************************/
2365
2366 /* Alternative malloc function, to test functionality and save the size of a
2367 compiled re, which is the first store request that pcre_compile() makes. The
2368 show_malloc variable is set only during matching. */
2369
new_malloc(size_t size)2370 static void *new_malloc(size_t size)
2371 {
2372 void *block = malloc(size);
2373 if (show_malloc)
2374 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
2375 return block;
2376 }
2377
new_free(void * block)2378 static void new_free(void *block)
2379 {
2380 if (show_malloc)
2381 fprintf(outfile, "free %p\n", block);
2382 free(block);
2383 }
2384
2385 /* For recursion malloc/free, to test stacking calls */
2386
stack_malloc(size_t size)2387 static void *stack_malloc(size_t size)
2388 {
2389 void *block = malloc(size);
2390 if (show_malloc)
2391 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
2392 return block;
2393 }
2394
stack_free(void * block)2395 static void stack_free(void *block)
2396 {
2397 if (show_malloc)
2398 fprintf(outfile, "stack_free %p\n", block);
2399 free(block);
2400 }
2401
2402
2403 /*************************************************
2404 * Call pcre_fullinfo() *
2405 *************************************************/
2406
2407 /* Get one piece of information from the pcre_fullinfo() function. When only
2408 one of 8-, 16- or 32-bit is supported, pcre_mode should always have the correct
2409 value, but the code is defensive.
2410
2411 Arguments:
2412 re compiled regex
2413 study study data
2414 option PCRE_INFO_xxx option
2415 ptr where to put the data
2416
2417 Returns: 0 when OK, < 0 on error
2418 */
2419
2420 static int
new_info(pcre * re,pcre_extra * study,int option,void * ptr)2421 new_info(pcre *re, pcre_extra *study, int option, void *ptr)
2422 {
2423 int rc;
2424
2425 if (pcre_mode == PCRE32_MODE)
2426 #ifdef SUPPORT_PCRE32
2427 rc = pcre32_fullinfo((pcre32 *)re, (pcre32_extra *)study, option, ptr);
2428 #else
2429 rc = PCRE_ERROR_BADMODE;
2430 #endif
2431 else if (pcre_mode == PCRE16_MODE)
2432 #ifdef SUPPORT_PCRE16
2433 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
2434 #else
2435 rc = PCRE_ERROR_BADMODE;
2436 #endif
2437 else
2438 #ifdef SUPPORT_PCRE8
2439 rc = pcre_fullinfo(re, study, option, ptr);
2440 #else
2441 rc = PCRE_ERROR_BADMODE;
2442 #endif
2443
2444 if (rc < 0 && rc != PCRE_ERROR_UNSET)
2445 {
2446 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2447 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "", option);
2448 if (rc == PCRE_ERROR_BADMODE)
2449 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
2450 "%d-bit mode\n", 8 * CHAR_SIZE,
2451 8 * (REAL_PCRE_FLAGS(re) & PCRE_MODE_MASK));
2452 }
2453
2454 return rc;
2455 }
2456
2457
2458
2459 /*************************************************
2460 * Swap byte functions *
2461 *************************************************/
2462
2463 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
2464 value, respectively.
2465
2466 Arguments:
2467 value any number
2468
2469 Returns: the byte swapped value
2470 */
2471
2472 static pcre_uint32
swap_uint32(pcre_uint32 value)2473 swap_uint32(pcre_uint32 value)
2474 {
2475 return ((value & 0x000000ff) << 24) |
2476 ((value & 0x0000ff00) << 8) |
2477 ((value & 0x00ff0000) >> 8) |
2478 (value >> 24);
2479 }
2480
2481 static pcre_uint16
swap_uint16(pcre_uint16 value)2482 swap_uint16(pcre_uint16 value)
2483 {
2484 return (value >> 8) | (value << 8);
2485 }
2486
2487
2488
2489 /*************************************************
2490 * Flip bytes in a compiled pattern *
2491 *************************************************/
2492
2493 /* This function is called if the 'F' option was present on a pattern that is
2494 to be written to a file. We flip the bytes of all the integer fields in the
2495 regex data block and the study block. In 16-bit mode this also flips relevant
2496 bytes in the pattern itself. This is to make it possible to test PCRE's
2497 ability to reload byte-flipped patterns, e.g. those compiled on a different
2498 architecture. */
2499
2500 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2501 static void
regexflip8_or_16(pcre * ere,pcre_extra * extra)2502 regexflip8_or_16(pcre *ere, pcre_extra *extra)
2503 {
2504 real_pcre8_or_16 *re = (real_pcre8_or_16 *)ere;
2505 #ifdef SUPPORT_PCRE16
2506 int op;
2507 pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
2508 int length = re->name_count * re->name_entry_size;
2509 #ifdef SUPPORT_UTF
2510 BOOL utf = (re->options & PCRE_UTF16) != 0;
2511 BOOL utf16_char = FALSE;
2512 #endif /* SUPPORT_UTF */
2513 #endif /* SUPPORT_PCRE16 */
2514
2515 /* Always flip the bytes in the main data block and study blocks. */
2516
2517 re->magic_number = REVERSED_MAGIC_NUMBER;
2518 re->size = swap_uint32(re->size);
2519 re->options = swap_uint32(re->options);
2520 re->flags = swap_uint32(re->flags);
2521 re->limit_match = swap_uint32(re->limit_match);
2522 re->limit_recursion = swap_uint32(re->limit_recursion);
2523 re->first_char = swap_uint16(re->first_char);
2524 re->req_char = swap_uint16(re->req_char);
2525 re->max_lookbehind = swap_uint16(re->max_lookbehind);
2526 re->top_bracket = swap_uint16(re->top_bracket);
2527 re->top_backref = swap_uint16(re->top_backref);
2528 re->name_table_offset = swap_uint16(re->name_table_offset);
2529 re->name_entry_size = swap_uint16(re->name_entry_size);
2530 re->name_count = swap_uint16(re->name_count);
2531 re->ref_count = swap_uint16(re->ref_count);
2532
2533 if (extra != NULL && (extra->flags & PCRE_EXTRA_STUDY_DATA) != 0)
2534 {
2535 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2536 rsd->size = swap_uint32(rsd->size);
2537 rsd->flags = swap_uint32(rsd->flags);
2538 rsd->minlength = swap_uint32(rsd->minlength);
2539 }
2540
2541 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
2542 in the name table, if present, and then in the pattern itself. */
2543
2544 #ifdef SUPPORT_PCRE16
2545 if (pcre_mode != PCRE16_MODE) return;
2546
2547 while(TRUE)
2548 {
2549 /* Swap previous characters. */
2550 while (length-- > 0)
2551 {
2552 *ptr = swap_uint16(*ptr);
2553 ptr++;
2554 }
2555 #ifdef SUPPORT_UTF
2556 if (utf16_char)
2557 {
2558 if ((ptr[-1] & 0xfc00) == 0xd800)
2559 {
2560 /* We know that there is only one extra character in UTF-16. */
2561 *ptr = swap_uint16(*ptr);
2562 ptr++;
2563 }
2564 }
2565 utf16_char = FALSE;
2566 #endif /* SUPPORT_UTF */
2567
2568 /* Get next opcode. */
2569
2570 length = 0;
2571 op = *ptr;
2572 *ptr++ = swap_uint16(op);
2573
2574 switch (op)
2575 {
2576 case OP_END:
2577 return;
2578
2579 #ifdef SUPPORT_UTF
2580 case OP_CHAR:
2581 case OP_CHARI:
2582 case OP_NOT:
2583 case OP_NOTI:
2584 case OP_STAR:
2585 case OP_MINSTAR:
2586 case OP_PLUS:
2587 case OP_MINPLUS:
2588 case OP_QUERY:
2589 case OP_MINQUERY:
2590 case OP_UPTO:
2591 case OP_MINUPTO:
2592 case OP_EXACT:
2593 case OP_POSSTAR:
2594 case OP_POSPLUS:
2595 case OP_POSQUERY:
2596 case OP_POSUPTO:
2597 case OP_STARI:
2598 case OP_MINSTARI:
2599 case OP_PLUSI:
2600 case OP_MINPLUSI:
2601 case OP_QUERYI:
2602 case OP_MINQUERYI:
2603 case OP_UPTOI:
2604 case OP_MINUPTOI:
2605 case OP_EXACTI:
2606 case OP_POSSTARI:
2607 case OP_POSPLUSI:
2608 case OP_POSQUERYI:
2609 case OP_POSUPTOI:
2610 case OP_NOTSTAR:
2611 case OP_NOTMINSTAR:
2612 case OP_NOTPLUS:
2613 case OP_NOTMINPLUS:
2614 case OP_NOTQUERY:
2615 case OP_NOTMINQUERY:
2616 case OP_NOTUPTO:
2617 case OP_NOTMINUPTO:
2618 case OP_NOTEXACT:
2619 case OP_NOTPOSSTAR:
2620 case OP_NOTPOSPLUS:
2621 case OP_NOTPOSQUERY:
2622 case OP_NOTPOSUPTO:
2623 case OP_NOTSTARI:
2624 case OP_NOTMINSTARI:
2625 case OP_NOTPLUSI:
2626 case OP_NOTMINPLUSI:
2627 case OP_NOTQUERYI:
2628 case OP_NOTMINQUERYI:
2629 case OP_NOTUPTOI:
2630 case OP_NOTMINUPTOI:
2631 case OP_NOTEXACTI:
2632 case OP_NOTPOSSTARI:
2633 case OP_NOTPOSPLUSI:
2634 case OP_NOTPOSQUERYI:
2635 case OP_NOTPOSUPTOI:
2636 if (utf) utf16_char = TRUE;
2637 #endif
2638 /* Fall through. */
2639
2640 default:
2641 length = OP_lengths16[op] - 1;
2642 break;
2643
2644 case OP_CLASS:
2645 case OP_NCLASS:
2646 /* Skip the character bit map. */
2647 ptr += 32/sizeof(pcre_uint16);
2648 length = 0;
2649 break;
2650
2651 case OP_XCLASS:
2652 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
2653 if (LINK_SIZE > 1)
2654 length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
2655 - (1 + LINK_SIZE + 1));
2656 else
2657 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2658
2659 /* Reverse the size of the XCLASS instance. */
2660 *ptr = swap_uint16(*ptr);
2661 ptr++;
2662 if (LINK_SIZE > 1)
2663 {
2664 *ptr = swap_uint16(*ptr);
2665 ptr++;
2666 }
2667
2668 op = *ptr;
2669 *ptr = swap_uint16(op);
2670 ptr++;
2671 if ((op & XCL_MAP) != 0)
2672 {
2673 /* Skip the character bit map. */
2674 ptr += 32/sizeof(pcre_uint16);
2675 length -= 32/sizeof(pcre_uint16);
2676 }
2677 break;
2678 }
2679 }
2680 /* Control should never reach here in 16 bit mode. */
2681 #endif /* SUPPORT_PCRE16 */
2682 }
2683 #endif /* SUPPORT_PCRE[8|16] */
2684
2685
2686
2687 #if defined SUPPORT_PCRE32
2688 static void
regexflip_32(pcre * ere,pcre_extra * extra)2689 regexflip_32(pcre *ere, pcre_extra *extra)
2690 {
2691 real_pcre32 *re = (real_pcre32 *)ere;
2692 int op;
2693 pcre_uint32 *ptr = (pcre_uint32 *)re + re->name_table_offset;
2694 int length = re->name_count * re->name_entry_size;
2695
2696 /* Always flip the bytes in the main data block and study blocks. */
2697
2698 re->magic_number = REVERSED_MAGIC_NUMBER;
2699 re->size = swap_uint32(re->size);
2700 re->options = swap_uint32(re->options);
2701 re->flags = swap_uint32(re->flags);
2702 re->limit_match = swap_uint32(re->limit_match);
2703 re->limit_recursion = swap_uint32(re->limit_recursion);
2704 re->first_char = swap_uint32(re->first_char);
2705 re->req_char = swap_uint32(re->req_char);
2706 re->max_lookbehind = swap_uint16(re->max_lookbehind);
2707 re->top_bracket = swap_uint16(re->top_bracket);
2708 re->top_backref = swap_uint16(re->top_backref);
2709 re->name_table_offset = swap_uint16(re->name_table_offset);
2710 re->name_entry_size = swap_uint16(re->name_entry_size);
2711 re->name_count = swap_uint16(re->name_count);
2712 re->ref_count = swap_uint16(re->ref_count);
2713
2714 if (extra != NULL && (extra->flags & PCRE_EXTRA_STUDY_DATA) != 0)
2715 {
2716 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2717 rsd->size = swap_uint32(rsd->size);
2718 rsd->flags = swap_uint32(rsd->flags);
2719 rsd->minlength = swap_uint32(rsd->minlength);
2720 }
2721
2722 /* In 32-bit mode we must swap bytes in the name table, if present, and then in
2723 the pattern itself. */
2724
2725 while(TRUE)
2726 {
2727 /* Swap previous characters. */
2728 while (length-- > 0)
2729 {
2730 *ptr = swap_uint32(*ptr);
2731 ptr++;
2732 }
2733
2734 /* Get next opcode. */
2735
2736 length = 0;
2737 op = *ptr;
2738 *ptr++ = swap_uint32(op);
2739
2740 switch (op)
2741 {
2742 case OP_END:
2743 return;
2744
2745 default:
2746 length = OP_lengths32[op] - 1;
2747 break;
2748
2749 case OP_CLASS:
2750 case OP_NCLASS:
2751 /* Skip the character bit map. */
2752 ptr += 32/sizeof(pcre_uint32);
2753 length = 0;
2754 break;
2755
2756 case OP_XCLASS:
2757 /* LINK_SIZE can only be 1 in 32-bit mode. */
2758 length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
2759
2760 /* Reverse the size of the XCLASS instance. */
2761 *ptr = swap_uint32(*ptr);
2762 ptr++;
2763
2764 op = *ptr;
2765 *ptr = swap_uint32(op);
2766 ptr++;
2767 if ((op & XCL_MAP) != 0)
2768 {
2769 /* Skip the character bit map. */
2770 ptr += 32/sizeof(pcre_uint32);
2771 length -= 32/sizeof(pcre_uint32);
2772 }
2773 break;
2774 }
2775 }
2776 /* Control should never reach here in 32 bit mode. */
2777 }
2778
2779 #endif /* SUPPORT_PCRE32 */
2780
2781
2782
2783 static void
regexflip(pcre * ere,pcre_extra * extra)2784 regexflip(pcre *ere, pcre_extra *extra)
2785 {
2786 #if defined SUPPORT_PCRE32
2787 if (REAL_PCRE_FLAGS(ere) & PCRE_MODE32)
2788 regexflip_32(ere, extra);
2789 #endif
2790 #if defined SUPPORT_PCRE8 || defined SUPPORT_PCRE16
2791 if (REAL_PCRE_FLAGS(ere) & (PCRE_MODE8 | PCRE_MODE16))
2792 regexflip8_or_16(ere, extra);
2793 #endif
2794 }
2795
2796
2797
2798 /*************************************************
2799 * Check match or recursion limit *
2800 *************************************************/
2801
2802 static int
check_match_limit(pcre * re,pcre_extra * extra,pcre_uint8 * bptr,int len,int start_offset,int options,int * use_offsets,int use_size_offsets,int flag,unsigned long int * limit,int errnumber,const char * msg)2803 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2804 int start_offset, int options, int *use_offsets, int use_size_offsets,
2805 int flag, unsigned long int *limit, int errnumber, const char *msg)
2806 {
2807 int count;
2808 int min = 0;
2809 int mid = 64;
2810 int max = -1;
2811
2812 extra->flags |= flag;
2813
2814 for (;;)
2815 {
2816 *limit = mid;
2817
2818 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2819 use_offsets, use_size_offsets);
2820
2821 if (count == errnumber)
2822 {
2823 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2824 min = mid;
2825 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2826 }
2827
2828 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2829 count == PCRE_ERROR_PARTIAL)
2830 {
2831 if (mid == min + 1)
2832 {
2833 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2834 break;
2835 }
2836 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2837 max = mid;
2838 mid = (min + mid)/2;
2839 }
2840 else break; /* Some other error */
2841 }
2842
2843 extra->flags &= ~flag;
2844 return count;
2845 }
2846
2847
2848
2849 /*************************************************
2850 * Case-independent strncmp() function *
2851 *************************************************/
2852
2853 /*
2854 Arguments:
2855 s first string
2856 t second string
2857 n number of characters to compare
2858
2859 Returns: < 0, = 0, or > 0, according to the comparison
2860 */
2861
2862 static int
strncmpic(pcre_uint8 * s,pcre_uint8 * t,int n)2863 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2864 {
2865 while (n--)
2866 {
2867 int c = tolower(*s++) - tolower(*t++);
2868 if (c) return c;
2869 }
2870 return 0;
2871 }
2872
2873
2874
2875 /*************************************************
2876 * Check multicharacter option *
2877 *************************************************/
2878
2879 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2880 a message and return 0 if there is no match.
2881
2882 Arguments:
2883 p points after the leading '<'
2884 f file for error message
2885 nl TRUE to check only for newline settings
2886 stype "modifier" or "escape sequence"
2887
2888 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2889 */
2890
2891 static int
check_mc_option(pcre_uint8 * p,FILE * f,BOOL nl,const char * stype)2892 check_mc_option(pcre_uint8 *p, FILE *f, BOOL nl, const char *stype)
2893 {
2894 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2895 if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2896 if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2897 if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2898 if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2899 if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2900 if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2901
2902 if (!nl)
2903 {
2904 if (strncmpic(p, (pcre_uint8 *)"JS>", 3) == 0) return PCRE_JAVASCRIPT_COMPAT;
2905 }
2906
2907 fprintf(f, "Unknown %s at: <%s\n", stype, p);
2908 return 0;
2909 }
2910
2911
2912
2913 /*************************************************
2914 * Usage function *
2915 *************************************************/
2916
2917 static void
usage(void)2918 usage(void)
2919 {
2920 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2921 printf("Input and output default to stdin and stdout.\n");
2922 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2923 printf("If input is a terminal, readline() is used to read from it.\n");
2924 #else
2925 printf("This version of pcretest is not linked with readline().\n");
2926 #endif
2927 printf("\nOptions:\n");
2928 #ifdef SUPPORT_PCRE16
2929 printf(" -16 use the 16-bit library\n");
2930 #endif
2931 #ifdef SUPPORT_PCRE32
2932 printf(" -32 use the 32-bit library\n");
2933 #endif
2934 printf(" -b show compiled code\n");
2935 printf(" -C show PCRE compile-time options and exit\n");
2936 printf(" -C arg show a specific compile-time option and exit\n");
2937 printf(" with its value if numeric (else 0). The arg can be:\n");
2938 printf(" linksize internal link size [2, 3, 4]\n");
2939 printf(" pcre8 8 bit library support enabled [0, 1]\n");
2940 printf(" pcre16 16 bit library support enabled [0, 1]\n");
2941 printf(" pcre32 32 bit library support enabled [0, 1]\n");
2942 printf(" utf Unicode Transformation Format supported [0, 1]\n");
2943 printf(" ucp Unicode Properties supported [0, 1]\n");
2944 printf(" jit Just-in-time compiler supported [0, 1]\n");
2945 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY]\n");
2946 printf(" bsr \\R type [ANYCRLF, ANY]\n");
2947 printf(" -d debug: show compiled code and information (-b and -i)\n");
2948 #if !defined NODFA
2949 printf(" -dfa force DFA matching for all subjects\n");
2950 #endif
2951 printf(" -help show usage information\n");
2952 printf(" -i show information about compiled patterns\n"
2953 " -M find MATCH_LIMIT minimum for each subject\n"
2954 " -m output memory used information\n"
2955 " -O set PCRE_NO_AUTO_POSSESS on each pattern\n"
2956 " -o <n> set size of offsets vector to <n>\n");
2957 #if !defined NOPOSIX
2958 printf(" -p use POSIX interface\n");
2959 #endif
2960 printf(" -q quiet: do not output PCRE version number at start\n");
2961 printf(" -S <n> set stack size to <n> megabytes\n");
2962 printf(" -s force each pattern to be studied at basic level\n"
2963 " -s+ force each pattern to be studied, using JIT if available\n"
2964 " -s++ ditto, verifying when JIT was actually used\n"
2965 " -s+n force each pattern to be studied, using JIT if available,\n"
2966 " where 1 <= n <= 7 selects JIT options\n"
2967 " -s++n ditto, verifying when JIT was actually used\n"
2968 " -t time compilation and execution\n");
2969 printf(" -t <n> time compilation and execution, repeating <n> times\n");
2970 printf(" -tm time execution (matching) only\n");
2971 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2972 printf(" -T same as -t, but show total times at the end\n");
2973 printf(" -TM same as -tm, but show total time at the end\n");
2974 }
2975
2976
2977
2978 /*************************************************
2979 * Main Program *
2980 *************************************************/
2981
2982 /* Read lines from named file or stdin and write to named file or stdout; lines
2983 consist of a regular expression, in delimiters and optionally followed by
2984 options, followed by a set of test data, terminated by an empty line. */
2985
main(int argc,char ** argv)2986 int main(int argc, char **argv)
2987 {
2988 FILE *infile = stdin;
2989 const char *version;
2990 int options = 0;
2991 int study_options = 0;
2992 int default_find_match_limit = FALSE;
2993 pcre_uint32 default_options = 0;
2994 int op = 1;
2995 int timeit = 0;
2996 int timeitm = 0;
2997 int showtotaltimes = 0;
2998 int showinfo = 0;
2999 int showstore = 0;
3000 int force_study = -1;
3001 int force_study_options = 0;
3002 int quiet = 0;
3003 int size_offsets = 45;
3004 int size_offsets_max;
3005 int *offsets = NULL;
3006 int debug = 0;
3007 int done = 0;
3008 int all_use_dfa = 0;
3009 int verify_jit = 0;
3010 int yield = 0;
3011 int stack_size;
3012 pcre_uint8 *dbuffer = NULL;
3013 pcre_uint8 lockout[24] = { 0 };
3014 size_t dbuffer_size = 1u << 14;
3015 clock_t total_compile_time = 0;
3016 clock_t total_study_time = 0;
3017 clock_t total_match_time = 0;
3018
3019 #if !defined NOPOSIX
3020 int posix = 0;
3021 #endif
3022 #if !defined NODFA
3023 int *dfa_workspace = NULL;
3024 #endif
3025
3026 pcre_jit_stack *jit_stack = NULL;
3027
3028 /* These vectors store, end-to-end, a list of zero-terminated captured
3029 substring names, each list itself being terminated by an empty name. Assume
3030 that 1024 is plenty long enough for the few names we'll be testing. It is
3031 easiest to keep separate 8-, 16- and 32-bit versions, using the 32-bit version
3032 for the actual memory, to ensure alignment. */
3033
3034 pcre_uint32 copynames[1024];
3035 pcre_uint32 getnames[1024];
3036
3037 #ifdef SUPPORT_PCRE32
3038 pcre_uint32 *cn32ptr;
3039 pcre_uint32 *gn32ptr;
3040 #endif
3041
3042 #ifdef SUPPORT_PCRE16
3043 pcre_uint16 *copynames16 = (pcre_uint16 *)copynames;
3044 pcre_uint16 *getnames16 = (pcre_uint16 *)getnames;
3045 pcre_uint16 *cn16ptr;
3046 pcre_uint16 *gn16ptr;
3047 #endif
3048
3049 #ifdef SUPPORT_PCRE8
3050 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
3051 pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
3052 pcre_uint8 *cn8ptr;
3053 pcre_uint8 *gn8ptr;
3054 #endif
3055
3056 /* Get buffers from malloc() so that valgrind will check their misuse when
3057 debugging. They grow automatically when very long lines are read. The 16-
3058 and 32-bit buffers (buffer16, buffer32) are obtained only if needed. */
3059
3060 buffer = (pcre_uint8 *)malloc(buffer_size);
3061 pbuffer = (pcre_uint8 *)malloc(buffer_size);
3062
3063 /* The outfile variable is static so that new_malloc can use it. */
3064
3065 outfile = stdout;
3066
3067 /* The following _setmode() stuff is some Windows magic that tells its runtime
3068 library to translate CRLF into a single LF character. At least, that's what
3069 I've been told: never having used Windows I take this all on trust. Originally
3070 it set 0x8000, but then I was advised that _O_BINARY was better. */
3071
3072 #if defined(_WIN32) || defined(WIN32)
3073 _setmode( _fileno( stdout ), _O_BINARY );
3074 #endif
3075
3076 /* Get the version number: both pcre_version() and pcre16_version() give the
3077 same answer. We just need to ensure that we call one that is available. */
3078
3079 #if defined SUPPORT_PCRE8
3080 version = pcre_version();
3081 #elif defined SUPPORT_PCRE16
3082 version = pcre16_version();
3083 #elif defined SUPPORT_PCRE32
3084 version = pcre32_version();
3085 #endif
3086
3087 /* Scan options */
3088
3089 while (argc > 1 && argv[op][0] == '-')
3090 {
3091 pcre_uint8 *endptr;
3092 char *arg = argv[op];
3093
3094 if (strcmp(arg, "-m") == 0) showstore = 1;
3095 else if (strcmp(arg, "-s") == 0) force_study = 0;
3096
3097 else if (strncmp(arg, "-s+", 3) == 0)
3098 {
3099 arg += 3;
3100 if (*arg == '+') { arg++; verify_jit = TRUE; }
3101 force_study = 1;
3102 if (*arg == 0)
3103 force_study_options = jit_study_bits[6];
3104 else if (*arg >= '1' && *arg <= '7')
3105 force_study_options = jit_study_bits[*arg - '1'];
3106 else goto BAD_ARG;
3107 }
3108 else if (strcmp(arg, "-8") == 0)
3109 {
3110 #ifdef SUPPORT_PCRE8
3111 pcre_mode = PCRE8_MODE;
3112 #else
3113 printf("** This version of PCRE was built without 8-bit support\n");
3114 exit(1);
3115 #endif
3116 }
3117 else if (strcmp(arg, "-16") == 0)
3118 {
3119 #ifdef SUPPORT_PCRE16
3120 pcre_mode = PCRE16_MODE;
3121 #else
3122 printf("** This version of PCRE was built without 16-bit support\n");
3123 exit(1);
3124 #endif
3125 }
3126 else if (strcmp(arg, "-32") == 0)
3127 {
3128 #ifdef SUPPORT_PCRE32
3129 pcre_mode = PCRE32_MODE;
3130 #else
3131 printf("** This version of PCRE was built without 32-bit support\n");
3132 exit(1);
3133 #endif
3134 }
3135 else if (strcmp(arg, "-q") == 0) quiet = 1;
3136 else if (strcmp(arg, "-b") == 0) debug = 1;
3137 else if (strcmp(arg, "-i") == 0) showinfo = 1;
3138 else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
3139 else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
3140 else if (strcmp(arg, "-O") == 0) default_options |= PCRE_NO_AUTO_POSSESS;
3141 #if !defined NODFA
3142 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
3143 #endif
3144 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
3145 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3146 *endptr == 0))
3147 {
3148 op++;
3149 argc--;
3150 }
3151 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0 ||
3152 strcmp(arg, "-T") == 0 || strcmp(arg, "-TM") == 0)
3153 {
3154 int temp;
3155 int both = arg[2] == 0;
3156 showtotaltimes = arg[1] == 'T';
3157 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
3158 *endptr == 0))
3159 {
3160 timeitm = temp;
3161 op++;
3162 argc--;
3163 }
3164 else timeitm = LOOPREPEAT;
3165 if (both) timeit = timeitm;
3166 }
3167 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
3168 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
3169 *endptr == 0))
3170 {
3171 #if defined(_WIN32) || defined(WIN32) || defined(__minix) || defined(NATIVE_ZOS) || defined(__VMS)
3172 printf("PCRE: -S not supported on this OS\n");
3173 exit(1);
3174 #else
3175 int rc;
3176 struct rlimit rlim;
3177 getrlimit(RLIMIT_STACK, &rlim);
3178 rlim.rlim_cur = stack_size * 1024 * 1024;
3179 rc = setrlimit(RLIMIT_STACK, &rlim);
3180 if (rc != 0)
3181 {
3182 printf("PCRE: setrlimit() failed with error %d\n", rc);
3183 exit(1);
3184 }
3185 op++;
3186 argc--;
3187 #endif
3188 }
3189 #if !defined NOPOSIX
3190 else if (strcmp(arg, "-p") == 0) posix = 1;
3191 #endif
3192 else if (strcmp(arg, "-C") == 0)
3193 {
3194 int rc;
3195 unsigned long int lrc;
3196
3197 if (argc > 2)
3198 {
3199 if (strcmp(argv[op + 1], "linksize") == 0)
3200 {
3201 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3202 printf("%d\n", rc);
3203 yield = rc;
3204
3205 #ifdef __VMS
3206 vms_setsymbol("LINKSIZE",0,yield );
3207 #endif
3208 }
3209 else if (strcmp(argv[op + 1], "pcre8") == 0)
3210 {
3211 #ifdef SUPPORT_PCRE8
3212 printf("1\n");
3213 yield = 1;
3214 #else
3215 printf("0\n");
3216 yield = 0;
3217 #endif
3218 #ifdef __VMS
3219 vms_setsymbol("PCRE8",0,yield );
3220 #endif
3221 }
3222 else if (strcmp(argv[op + 1], "pcre16") == 0)
3223 {
3224 #ifdef SUPPORT_PCRE16
3225 printf("1\n");
3226 yield = 1;
3227 #else
3228 printf("0\n");
3229 yield = 0;
3230 #endif
3231 #ifdef __VMS
3232 vms_setsymbol("PCRE16",0,yield );
3233 #endif
3234 }
3235 else if (strcmp(argv[op + 1], "pcre32") == 0)
3236 {
3237 #ifdef SUPPORT_PCRE32
3238 printf("1\n");
3239 yield = 1;
3240 #else
3241 printf("0\n");
3242 yield = 0;
3243 #endif
3244 #ifdef __VMS
3245 vms_setsymbol("PCRE32",0,yield );
3246 #endif
3247 }
3248 else if (strcmp(argv[op + 1], "utf") == 0)
3249 {
3250 #ifdef SUPPORT_PCRE8
3251 if (pcre_mode == PCRE8_MODE)
3252 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3253 #endif
3254 #ifdef SUPPORT_PCRE16
3255 if (pcre_mode == PCRE16_MODE)
3256 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3257 #endif
3258 #ifdef SUPPORT_PCRE32
3259 if (pcre_mode == PCRE32_MODE)
3260 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3261 #endif
3262 printf("%d\n", rc);
3263 yield = rc;
3264 #ifdef __VMS
3265 vms_setsymbol("UTF",0,yield );
3266 #endif
3267 }
3268 else if (strcmp(argv[op + 1], "ucp") == 0)
3269 {
3270 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3271 printf("%d\n", rc);
3272 yield = rc;
3273 }
3274 else if (strcmp(argv[op + 1], "jit") == 0)
3275 {
3276 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3277 printf("%d\n", rc);
3278 yield = rc;
3279 }
3280 else if (strcmp(argv[op + 1], "newline") == 0)
3281 {
3282 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3283 print_newline_config(rc, TRUE);
3284 }
3285 else if (strcmp(argv[op + 1], "bsr") == 0)
3286 {
3287 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3288 printf("%s\n", rc? "ANYCRLF" : "ANY");
3289 }
3290 else if (strcmp(argv[op + 1], "ebcdic") == 0)
3291 {
3292 #ifdef EBCDIC
3293 printf("1\n");
3294 yield = 1;
3295 #else
3296 printf("0\n");
3297 #endif
3298 }
3299 else if (strcmp(argv[op + 1], "ebcdic-nl") == 0)
3300 {
3301 #ifdef EBCDIC
3302 printf("0x%02x\n", CHAR_LF);
3303 #else
3304 printf("0\n");
3305 #endif
3306 }
3307 else
3308 {
3309 printf("Unknown -C option: %s\n", argv[op + 1]);
3310 }
3311 goto EXIT;
3312 }
3313
3314 /* No argument for -C: output all configuration information. */
3315
3316 printf("PCRE version %s\n", version);
3317 printf("Compiled with\n");
3318
3319 #ifdef EBCDIC
3320 printf(" EBCDIC code support: LF is 0x%02x\n", CHAR_LF);
3321 #endif
3322
3323 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
3324 are set, either both UTFs are supported or both are not supported. */
3325
3326 #ifdef SUPPORT_PCRE8
3327 printf(" 8-bit support\n");
3328 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
3329 printf (" %sUTF-8 support\n", rc ? "" : "No ");
3330 #endif
3331 #ifdef SUPPORT_PCRE16
3332 printf(" 16-bit support\n");
3333 (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
3334 printf (" %sUTF-16 support\n", rc ? "" : "No ");
3335 #endif
3336 #ifdef SUPPORT_PCRE32
3337 printf(" 32-bit support\n");
3338 (void)pcre32_config(PCRE_CONFIG_UTF32, &rc);
3339 printf (" %sUTF-32 support\n", rc ? "" : "No ");
3340 #endif
3341
3342 (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
3343 printf(" %sUnicode properties support\n", rc? "" : "No ");
3344 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
3345 if (rc)
3346 {
3347 const char *arch;
3348 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
3349 printf(" Just-in-time compiler support: %s\n", arch);
3350 }
3351 else
3352 printf(" No just-in-time compiler support\n");
3353 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
3354 print_newline_config(rc, FALSE);
3355 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
3356 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
3357 "all Unicode newlines");
3358 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
3359 printf(" Internal link size = %d\n", rc);
3360 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
3361 printf(" POSIX malloc threshold = %d\n", rc);
3362 (void)PCRE_CONFIG(PCRE_CONFIG_PARENS_LIMIT, &lrc);
3363 printf(" Parentheses nest limit = %ld\n", lrc);
3364 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
3365 printf(" Default match limit = %ld\n", lrc);
3366 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
3367 printf(" Default recursion depth limit = %ld\n", lrc);
3368 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
3369 printf(" Match recursion uses %s", rc? "stack" : "heap");
3370 if (showstore)
3371 {
3372 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
3373 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
3374 }
3375 printf("\n");
3376 goto EXIT;
3377 }
3378 else if (strcmp(arg, "-help") == 0 ||
3379 strcmp(arg, "--help") == 0)
3380 {
3381 usage();
3382 goto EXIT;
3383 }
3384 else
3385 {
3386 BAD_ARG:
3387 printf("** Unknown or malformed option %s\n", arg);
3388 usage();
3389 yield = 1;
3390 goto EXIT;
3391 }
3392 op++;
3393 argc--;
3394 }
3395
3396 /* Get the store for the offsets vector, and remember what it was */
3397
3398 size_offsets_max = size_offsets;
3399 offsets = (int *)malloc(size_offsets_max * sizeof(int));
3400 if (offsets == NULL)
3401 {
3402 printf("** Failed to get %d bytes of memory for offsets vector\n",
3403 (int)(size_offsets_max * sizeof(int)));
3404 yield = 1;
3405 goto EXIT;
3406 }
3407
3408 /* Sort out the input and output files */
3409
3410 if (argc > 1)
3411 {
3412 infile = fopen(argv[op], INPUT_MODE);
3413 if (infile == NULL)
3414 {
3415 printf("** Failed to open %s\n", argv[op]);
3416 yield = 1;
3417 goto EXIT;
3418 }
3419 }
3420
3421 if (argc > 2)
3422 {
3423 outfile = fopen(argv[op+1], OUTPUT_MODE);
3424 if (outfile == NULL)
3425 {
3426 printf("** Failed to open %s\n", argv[op+1]);
3427 yield = 1;
3428 goto EXIT;
3429 }
3430 }
3431
3432 /* Set alternative malloc function */
3433
3434 #ifdef SUPPORT_PCRE8
3435 pcre_malloc = new_malloc;
3436 pcre_free = new_free;
3437 pcre_stack_malloc = stack_malloc;
3438 pcre_stack_free = stack_free;
3439 #endif
3440
3441 #ifdef SUPPORT_PCRE16
3442 pcre16_malloc = new_malloc;
3443 pcre16_free = new_free;
3444 pcre16_stack_malloc = stack_malloc;
3445 pcre16_stack_free = stack_free;
3446 #endif
3447
3448 #ifdef SUPPORT_PCRE32
3449 pcre32_malloc = new_malloc;
3450 pcre32_free = new_free;
3451 pcre32_stack_malloc = stack_malloc;
3452 pcre32_stack_free = stack_free;
3453 #endif
3454
3455 /* Heading line unless quiet */
3456
3457 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
3458
3459 /* Main loop */
3460
3461 while (!done)
3462 {
3463 pcre *re = NULL;
3464 pcre_extra *extra = NULL;
3465
3466 #if !defined NOPOSIX /* There are still compilers that require no indent */
3467 regex_t preg = { NULL, 0, 0} ;
3468 int do_posix = 0;
3469 #endif
3470
3471 const char *error;
3472 pcre_uint8 *markptr;
3473 pcre_uint8 *p, *pp, *ppp;
3474 pcre_uint8 *to_file = NULL;
3475 const pcre_uint8 *tables = NULL;
3476 unsigned long int get_options;
3477 unsigned long int true_size, true_study_size = 0;
3478 size_t size;
3479 int do_allcaps = 0;
3480 int do_mark = 0;
3481 int do_study = 0;
3482 int no_force_study = 0;
3483 int do_debug = debug;
3484 int do_G = 0;
3485 int do_g = 0;
3486 int do_showinfo = showinfo;
3487 int do_showrest = 0;
3488 int do_showcaprest = 0;
3489 int do_flip = 0;
3490 int erroroffset, len, delimiter, poffset;
3491
3492 #if !defined NODFA
3493 int dfa_matched = 0;
3494 #endif
3495
3496 use_utf = 0;
3497 debug_lengths = 1;
3498 SET_PCRE_STACK_GUARD(NULL);
3499
3500 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
3501 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3502 fflush(outfile);
3503
3504 p = buffer;
3505 while (isspace(*p)) p++;
3506 if (*p == 0) continue;
3507
3508 /* Handle option lock-out setting */
3509
3510 if (*p == '<' && p[1] == ' ')
3511 {
3512 p += 2;
3513 while (isspace(*p)) p++;
3514 if (strncmp((char *)p, "forbid ", 7) == 0)
3515 {
3516 p += 7;
3517 while (isspace(*p)) p++;
3518 pp = lockout;
3519 while (!isspace(*p) && pp < lockout + sizeof(lockout) - 1)
3520 *pp++ = *p++;
3521 *pp = 0;
3522 }
3523 else
3524 {
3525 printf("** Unrecognized special command '%s'\n", p);
3526 yield = 1;
3527 goto EXIT;
3528 }
3529 continue;
3530 }
3531
3532 /* See if the pattern is to be loaded pre-compiled from a file. */
3533
3534 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
3535 {
3536 pcre_uint32 magic;
3537 pcre_uint8 sbuf[8];
3538 FILE *f;
3539
3540 p++;
3541 if (*p == '!')
3542 {
3543 do_debug = TRUE;
3544 do_showinfo = TRUE;
3545 p++;
3546 }
3547
3548 pp = p + (int)strlen((char *)p);
3549 while (isspace(pp[-1])) pp--;
3550 *pp = 0;
3551
3552 f = fopen((char *)p, "rb");
3553 if (f == NULL)
3554 {
3555 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
3556 continue;
3557 }
3558 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
3559
3560 true_size =
3561 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
3562 true_study_size =
3563 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
3564
3565 re = (pcre *)new_malloc(true_size);
3566 if (re == NULL)
3567 {
3568 printf("** Failed to get %d bytes of memory for pcre object\n",
3569 (int)true_size);
3570 yield = 1;
3571 goto EXIT;
3572 }
3573 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
3574
3575 magic = REAL_PCRE_MAGIC(re);
3576 if (magic != MAGIC_NUMBER)
3577 {
3578 if (swap_uint32(magic) == MAGIC_NUMBER)
3579 {
3580 do_flip = 1;
3581 }
3582 else
3583 {
3584 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
3585 new_free(re);
3586 fclose(f);
3587 continue;
3588 }
3589 }
3590
3591 /* We hide the byte-invert info for little and big endian tests. */
3592 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
3593 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
3594
3595 /* Now see if there is any following study data. */
3596
3597 if (true_study_size != 0)
3598 {
3599 pcre_study_data *psd;
3600
3601 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
3602 extra->flags = PCRE_EXTRA_STUDY_DATA;
3603
3604 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
3605 extra->study_data = psd;
3606
3607 if (fread(psd, 1, true_study_size, f) != true_study_size)
3608 {
3609 FAIL_READ:
3610 fprintf(outfile, "Failed to read data from %s\n", p);
3611 if (extra != NULL)
3612 {
3613 PCRE_FREE_STUDY(extra);
3614 }
3615 new_free(re);
3616 fclose(f);
3617 continue;
3618 }
3619 fprintf(outfile, "Study data loaded from %s\n", p);
3620 do_study = 1; /* To get the data output if requested */
3621 }
3622 else fprintf(outfile, "No study data\n");
3623
3624 /* Flip the necessary bytes. */
3625 if (do_flip)
3626 {
3627 int rc;
3628 PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
3629 if (rc == PCRE_ERROR_BADMODE)
3630 {
3631 pcre_uint32 flags_in_host_byte_order;
3632 if (REAL_PCRE_MAGIC(re) == MAGIC_NUMBER)
3633 flags_in_host_byte_order = REAL_PCRE_FLAGS(re);
3634 else
3635 flags_in_host_byte_order = swap_uint32(REAL_PCRE_FLAGS(re));
3636 /* Simulate the result of the function call below. */
3637 fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
3638 pcre_mode == PCRE32_MODE ? "32" : pcre_mode == PCRE16_MODE ? "16" : "",
3639 PCRE_INFO_OPTIONS);
3640 fprintf(outfile, "Running in %d-bit mode but pattern was compiled in "
3641 "%d-bit mode\n", 8 * CHAR_SIZE, 8 * (flags_in_host_byte_order & PCRE_MODE_MASK));
3642 new_free(re);
3643 fclose(f);
3644 continue;
3645 }
3646 }
3647
3648 /* Need to know if UTF-8 for printing data strings. */
3649
3650 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3651 {
3652 new_free(re);
3653 fclose(f);
3654 continue;
3655 }
3656 use_utf = (get_options & PCRE_UTF8) != 0;
3657
3658 fclose(f);
3659 goto SHOW_INFO;
3660 }
3661
3662 /* In-line pattern (the usual case). Get the delimiter and seek the end of
3663 the pattern; if it isn't complete, read more. */
3664
3665 delimiter = *p++;
3666
3667 if (isalnum(delimiter) || delimiter == '\\')
3668 {
3669 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
3670 goto SKIP_DATA;
3671 }
3672
3673 pp = p;
3674 poffset = (int)(p - buffer);
3675
3676 for(;;)
3677 {
3678 while (*pp != 0)
3679 {
3680 if (*pp == '\\' && pp[1] != 0) pp++;
3681 else if (*pp == delimiter) break;
3682 pp++;
3683 }
3684 if (*pp != 0) break;
3685 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
3686 {
3687 fprintf(outfile, "** Unexpected EOF\n");
3688 done = 1;
3689 goto CONTINUE;
3690 }
3691 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
3692 }
3693
3694 /* The buffer may have moved while being extended; reset the start of data
3695 pointer to the correct relative point in the buffer. */
3696
3697 p = buffer + poffset;
3698
3699 /* If the first character after the delimiter is backslash, make
3700 the pattern end with backslash. This is purely to provide a way
3701 of testing for the error message when a pattern ends with backslash. */
3702
3703 if (pp[1] == '\\') *pp++ = '\\';
3704
3705 /* Terminate the pattern at the delimiter, and save a copy of the pattern
3706 for callouts. */
3707
3708 *pp++ = 0;
3709 strcpy((char *)pbuffer, (char *)p);
3710
3711 /* Look for modifiers and options after the final delimiter. */
3712
3713 options = default_options;
3714 study_options = force_study_options;
3715 log_store = showstore; /* default from command line */
3716
3717 while (*pp != 0)
3718 {
3719 /* Check to see whether this modifier has been locked out for this file.
3720 This is complicated for the multi-character options that begin with '<'.
3721 If there is no '>' in the lockout string, all multi-character modifiers are
3722 locked out. */
3723
3724 if (strchr((char *)lockout, *pp) != NULL)
3725 {
3726 if (*pp == '<' && strchr((char *)lockout, '>') != NULL)
3727 {
3728 int x = check_mc_option(pp+1, outfile, FALSE, "modifier");
3729 if (x == 0) goto SKIP_DATA;
3730
3731 for (ppp = lockout; *ppp != 0; ppp++)
3732 {
3733 if (*ppp == '<')
3734 {
3735 int y = check_mc_option(ppp+1, outfile, FALSE, "modifier");
3736 if (y == 0)
3737 {
3738 printf("** Error in modifier forbid data - giving up.\n");
3739 yield = 1;
3740 goto EXIT;
3741 }
3742 if (x == y)
3743 {
3744 ppp = pp;
3745 while (*ppp != '>') ppp++;
3746 printf("** The %.*s modifier is locked out - giving up.\n",
3747 (int)(ppp - pp + 1), pp);
3748 yield = 1;
3749 goto EXIT;
3750 }
3751 }
3752 }
3753 }
3754
3755 /* The single-character modifiers are straightforward. */
3756
3757 else
3758 {
3759 printf("** The /%c modifier is locked out - giving up.\n", *pp);
3760 yield = 1;
3761 goto EXIT;
3762 }
3763 }
3764
3765 /* The modifier is not locked out; handle it. */
3766
3767 switch (*pp++)
3768 {
3769 case 'f': options |= PCRE_FIRSTLINE; break;
3770 case 'g': do_g = 1; break;
3771 case 'i': options |= PCRE_CASELESS; break;
3772 case 'm': options |= PCRE_MULTILINE; break;
3773 case 's': options |= PCRE_DOTALL; break;
3774 case 'x': options |= PCRE_EXTENDED; break;
3775
3776 case '+':
3777 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
3778 break;
3779
3780 case '=': do_allcaps = 1; break;
3781 case 'A': options |= PCRE_ANCHORED; break;
3782 case 'B': do_debug = 1; break;
3783 case 'C': options |= PCRE_AUTO_CALLOUT; break;
3784 case 'D': do_debug = do_showinfo = 1; break;
3785 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
3786 case 'F': do_flip = 1; break;
3787 case 'G': do_G = 1; break;
3788 case 'I': do_showinfo = 1; break;
3789 case 'J': options |= PCRE_DUPNAMES; break;
3790 case 'K': do_mark = 1; break;
3791 case 'M': log_store = 1; break;
3792 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
3793 case 'O': options |= PCRE_NO_AUTO_POSSESS; break;
3794
3795 #if !defined NOPOSIX
3796 case 'P': do_posix = 1; break;
3797 #endif
3798
3799 case 'Q':
3800 switch (*pp)
3801 {
3802 case '0':
3803 case '1':
3804 stack_guard_return = *pp++ - '0';
3805 break;
3806
3807 default:
3808 fprintf(outfile, "** Missing 0 or 1 after /Q\n");
3809 goto SKIP_DATA;
3810 }
3811 SET_PCRE_STACK_GUARD(stack_guard);
3812 break;
3813
3814 case 'S':
3815 do_study = 1;
3816 for (;;)
3817 {
3818 switch (*pp++)
3819 {
3820 case 'S':
3821 do_study = 0;
3822 no_force_study = 1;
3823 break;
3824
3825 case '!':
3826 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3827 break;
3828
3829 case '+':
3830 if (*pp == '+')
3831 {
3832 verify_jit = TRUE;
3833 pp++;
3834 }
3835 if (*pp >= '1' && *pp <= '7')
3836 study_options |= jit_study_bits[*pp++ - '1'];
3837 else
3838 study_options |= jit_study_bits[6];
3839 break;
3840
3841 case '-':
3842 study_options &= ~PCRE_STUDY_ALLJIT;
3843 break;
3844
3845 default:
3846 pp--;
3847 goto ENDLOOP;
3848 }
3849 }
3850 ENDLOOP:
3851 break;
3852
3853 case 'U': options |= PCRE_UNGREEDY; break;
3854 case 'W': options |= PCRE_UCP; break;
3855 case 'X': options |= PCRE_EXTRA; break;
3856 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
3857 case 'Z': debug_lengths = 0; break;
3858 case '8': options |= PCRE_UTF8; use_utf = 1; break;
3859 case '9': options |= PCRE_NEVER_UTF; break;
3860 case '?': options |= PCRE_NO_UTF8_CHECK; break;
3861
3862 case 'T':
3863 switch (*pp++)
3864 {
3865 case '0': tables = tables0; break;
3866 case '1': tables = tables1; break;
3867
3868 case '\r':
3869 case '\n':
3870 case ' ':
3871 case 0:
3872 fprintf(outfile, "** Missing table number after /T\n");
3873 goto SKIP_DATA;
3874
3875 default:
3876 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
3877 goto SKIP_DATA;
3878 }
3879 break;
3880
3881 case 'L':
3882 ppp = pp;
3883 /* The '\r' test here is so that it works on Windows. */
3884 /* The '0' test is just in case this is an unterminated line. */
3885 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
3886 *ppp = 0;
3887 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
3888 {
3889 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
3890 goto SKIP_DATA;
3891 }
3892 locale_set = 1;
3893 tables = PCRE_MAKETABLES;
3894 pp = ppp;
3895 break;
3896
3897 case '>':
3898 to_file = pp;
3899 while (*pp != 0) pp++;
3900 while (isspace(pp[-1])) pp--;
3901 *pp = 0;
3902 break;
3903
3904 case '<':
3905 {
3906 int x = check_mc_option(pp, outfile, FALSE, "modifier");
3907 if (x == 0) goto SKIP_DATA;
3908 options |= x;
3909 while (*pp++ != '>');
3910 }
3911 break;
3912
3913 case '\r': /* So that it works in Windows */
3914 case '\n':
3915 case ' ':
3916 break;
3917
3918 default:
3919 fprintf(outfile, "** Unknown modifier '%c'\n", pp[-1]);
3920 goto SKIP_DATA;
3921 }
3922 }
3923
3924 /* Handle compiling via the POSIX interface, which doesn't support the
3925 timing, showing, or debugging options, nor the ability to pass over
3926 local character tables. Neither does it have 16-bit support. */
3927
3928 #if !defined NOPOSIX
3929 if (posix || do_posix)
3930 {
3931 int rc;
3932 int cflags = 0;
3933
3934 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
3935 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
3936 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
3937 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
3938 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
3939 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
3940 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
3941
3942 rc = regcomp(&preg, (char *)p, cflags);
3943
3944 /* Compilation failed; go back for another re, skipping to blank line
3945 if non-interactive. */
3946
3947 if (rc != 0)
3948 {
3949 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3950 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
3951 goto SKIP_DATA;
3952 }
3953 }
3954
3955 /* Handle compiling via the native interface */
3956
3957 else
3958 #endif /* !defined NOPOSIX */
3959
3960 {
3961 /* In 16- or 32-bit mode, convert the input. */
3962
3963 #ifdef SUPPORT_PCRE16
3964 if (pcre_mode == PCRE16_MODE)
3965 {
3966 switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
3967 {
3968 case -1:
3969 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3970 "converted to UTF-16\n");
3971 goto SKIP_DATA;
3972
3973 case -2:
3974 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3975 "cannot be converted to UTF-16\n");
3976 goto SKIP_DATA;
3977
3978 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
3979 fprintf(outfile, "**Failed: character value greater than 0xffff "
3980 "cannot be converted to 16-bit in non-UTF mode\n");
3981 goto SKIP_DATA;
3982
3983 default:
3984 break;
3985 }
3986 p = (pcre_uint8 *)buffer16;
3987 }
3988 #endif
3989
3990 #ifdef SUPPORT_PCRE32
3991 if (pcre_mode == PCRE32_MODE)
3992 {
3993 switch(to32(FALSE, p, options & PCRE_UTF32, (int)strlen((char *)p)))
3994 {
3995 case -1:
3996 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3997 "converted to UTF-32\n");
3998 goto SKIP_DATA;
3999
4000 case -2:
4001 fprintf(outfile, "**Failed: character value greater than 0x10ffff "
4002 "cannot be converted to UTF-32\n");
4003 goto SKIP_DATA;
4004
4005 case -3:
4006 fprintf(outfile, "**Failed: character value is ill-formed UTF-32\n");
4007 goto SKIP_DATA;
4008
4009 default:
4010 break;
4011 }
4012 p = (pcre_uint8 *)buffer32;
4013 }
4014 #endif
4015
4016 /* Compile many times when timing */
4017
4018 if (timeit > 0)
4019 {
4020 register int i;
4021 clock_t time_taken;
4022 clock_t start_time = clock();
4023 for (i = 0; i < timeit; i++)
4024 {
4025 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
4026 if (re != NULL) free(re);
4027 }
4028 total_compile_time += (time_taken = clock() - start_time);
4029 fprintf(outfile, "Compile time %.4f milliseconds\n",
4030 (((double)time_taken * 1000.0) / (double)timeit) /
4031 (double)CLOCKS_PER_SEC);
4032 }
4033
4034 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
4035
4036 /* Compilation failed; go back for another re, skipping to blank line
4037 if non-interactive. */
4038
4039 if (re == NULL)
4040 {
4041 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
4042 SKIP_DATA:
4043 if (infile != stdin)
4044 {
4045 for (;;)
4046 {
4047 if (extend_inputline(infile, buffer, NULL) == NULL)
4048 {
4049 done = 1;
4050 goto CONTINUE;
4051 }
4052 len = (int)strlen((char *)buffer);
4053 while (len > 0 && isspace(buffer[len-1])) len--;
4054 if (len == 0) break;
4055 }
4056 fprintf(outfile, "\n");
4057 }
4058 goto CONTINUE;
4059 }
4060
4061 /* Compilation succeeded. It is now possible to set the UTF-8 option from
4062 within the regex; check for this so that we know how to process the data
4063 lines. */
4064
4065 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
4066 goto SKIP_DATA;
4067 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
4068
4069 /* Extract the size for possible writing before possibly flipping it,
4070 and remember the store that was got. */
4071
4072 true_size = REAL_PCRE_SIZE(re);
4073
4074 /* Output code size information if requested */
4075
4076 if (log_store)
4077 {
4078 int name_count, name_entry_size, real_pcre_size;
4079
4080 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &name_count);
4081 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
4082 real_pcre_size = 0;
4083 #ifdef SUPPORT_PCRE8
4084 if (REAL_PCRE_FLAGS(re) & PCRE_MODE8)
4085 real_pcre_size = sizeof(real_pcre);
4086 #endif
4087 #ifdef SUPPORT_PCRE16
4088 if (REAL_PCRE_FLAGS(re) & PCRE_MODE16)
4089 real_pcre_size = sizeof(real_pcre16);
4090 #endif
4091 #ifdef SUPPORT_PCRE32
4092 if (REAL_PCRE_FLAGS(re) & PCRE_MODE32)
4093 real_pcre_size = sizeof(real_pcre32);
4094 #endif
4095 new_info(re, NULL, PCRE_INFO_SIZE, &size);
4096 fprintf(outfile, "Memory allocation (code space): %d\n",
4097 (int)(size - real_pcre_size - name_count * name_entry_size));
4098 }
4099
4100 /* If -s or /S was present, study the regex to generate additional info to
4101 help with the matching, unless the pattern has the SS option, which
4102 suppresses the effect of /S (used for a few test patterns where studying is
4103 never sensible). */
4104
4105 if (do_study || (force_study >= 0 && !no_force_study))
4106 {
4107 if (timeit > 0)
4108 {
4109 register int i;
4110 clock_t time_taken;
4111 clock_t start_time = clock();
4112 for (i = 0; i < timeit; i++)
4113 {
4114 PCRE_STUDY(extra, re, study_options, &error);
4115 }
4116 total_study_time = (time_taken = clock() - start_time);
4117 if (extra != NULL)
4118 {
4119 PCRE_FREE_STUDY(extra);
4120 }
4121 fprintf(outfile, " Study time %.4f milliseconds\n",
4122 (((double)time_taken * 1000.0) / (double)timeit) /
4123 (double)CLOCKS_PER_SEC);
4124 }
4125 PCRE_STUDY(extra, re, study_options, &error);
4126 if (error != NULL)
4127 fprintf(outfile, "Failed to study: %s\n", error);
4128 else if (extra != NULL)
4129 {
4130 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
4131 if (log_store)
4132 {
4133 size_t jitsize;
4134 if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
4135 jitsize != 0)
4136 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
4137 }
4138 }
4139 }
4140
4141 /* If /K was present, we set up for handling MARK data. */
4142
4143 if (do_mark)
4144 {
4145 if (extra == NULL)
4146 {
4147 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4148 extra->flags = 0;
4149 }
4150 extra->mark = &markptr;
4151 extra->flags |= PCRE_EXTRA_MARK;
4152 }
4153
4154 /* Extract and display information from the compiled data if required. */
4155
4156 SHOW_INFO:
4157
4158 if (do_debug)
4159 {
4160 fprintf(outfile, "------------------------------------------------------------------\n");
4161 PCRE_PRINTINT(re, outfile, debug_lengths);
4162 }
4163
4164 /* We already have the options in get_options (see above) */
4165
4166 if (do_showinfo)
4167 {
4168 unsigned long int all_options;
4169 pcre_uint32 first_char, need_char;
4170 pcre_uint32 match_limit, recursion_limit;
4171 int count, backrefmax, first_char_set, need_char_set, okpartial, jchanged,
4172 hascrorlf, maxlookbehind, match_empty;
4173 int nameentrysize, namecount;
4174 const pcre_uint8 *nametable;
4175
4176 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
4177 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
4178 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTER, &first_char) +
4179 new_info(re, NULL, PCRE_INFO_FIRSTCHARACTERFLAGS, &first_char_set) +
4180 new_info(re, NULL, PCRE_INFO_REQUIREDCHAR, &need_char) +
4181 new_info(re, NULL, PCRE_INFO_REQUIREDCHARFLAGS, &need_char_set) +
4182 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
4183 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
4184 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
4185 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
4186 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
4187 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
4188 new_info(re, NULL, PCRE_INFO_MATCH_EMPTY, &match_empty) +
4189 new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
4190 != 0)
4191 goto SKIP_DATA;
4192
4193 fprintf(outfile, "Capturing subpattern count = %d\n", count);
4194
4195 if (backrefmax > 0)
4196 fprintf(outfile, "Max back reference = %d\n", backrefmax);
4197
4198 if (maxlookbehind > 0)
4199 fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
4200
4201 if (new_info(re, NULL, PCRE_INFO_MATCHLIMIT, &match_limit) == 0)
4202 fprintf(outfile, "Match limit = %u\n", match_limit);
4203
4204 if (new_info(re, NULL, PCRE_INFO_RECURSIONLIMIT, &recursion_limit) == 0)
4205 fprintf(outfile, "Recursion limit = %u\n", recursion_limit);
4206
4207 if (namecount > 0)
4208 {
4209 fprintf(outfile, "Named capturing subpatterns:\n");
4210 while (namecount-- > 0)
4211 {
4212 int imm2_size = pcre_mode == PCRE8_MODE ? 2 : 1;
4213 int length = (int)STRLEN(nametable + imm2_size);
4214 fprintf(outfile, " ");
4215 PCHARSV(nametable, imm2_size, length, outfile);
4216 while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
4217 #ifdef SUPPORT_PCRE32
4218 if (pcre_mode == PCRE32_MODE)
4219 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR32)nametable)[0]));
4220 #endif
4221 #ifdef SUPPORT_PCRE16
4222 if (pcre_mode == PCRE16_MODE)
4223 fprintf(outfile, "%3d\n", (int)(((PCRE_SPTR16)nametable)[0]));
4224 #endif
4225 #ifdef SUPPORT_PCRE8
4226 if (pcre_mode == PCRE8_MODE)
4227 fprintf(outfile, "%3d\n", ((int)nametable[0] << 8) | (int)nametable[1]);
4228 #endif
4229 nametable += nameentrysize * CHAR_SIZE;
4230 }
4231 }
4232
4233 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
4234 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
4235 if (match_empty) fprintf(outfile, "May match empty string\n");
4236
4237 all_options = REAL_PCRE_OPTIONS(re);
4238 if (do_flip) all_options = swap_uint32(all_options);
4239
4240 if (get_options == 0) fprintf(outfile, "No options\n");
4241 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
4242 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
4243 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
4244 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
4245 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
4246 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
4247 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
4248 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
4249 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
4250 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
4251 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
4252 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
4253 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
4254 ((get_options & PCRE_NO_AUTO_POSSESS) != 0)? " no_auto_possessify" : "",
4255 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
4256 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
4257 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
4258 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
4259 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "",
4260 ((get_options & PCRE_NEVER_UTF) != 0)? " never_utf" : "");
4261
4262 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
4263
4264 switch (get_options & PCRE_NEWLINE_BITS)
4265 {
4266 case PCRE_NEWLINE_CR:
4267 fprintf(outfile, "Forced newline sequence: CR\n");
4268 break;
4269
4270 case PCRE_NEWLINE_LF:
4271 fprintf(outfile, "Forced newline sequence: LF\n");
4272 break;
4273
4274 case PCRE_NEWLINE_CRLF:
4275 fprintf(outfile, "Forced newline sequence: CRLF\n");
4276 break;
4277
4278 case PCRE_NEWLINE_ANYCRLF:
4279 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
4280 break;
4281
4282 case PCRE_NEWLINE_ANY:
4283 fprintf(outfile, "Forced newline sequence: ANY\n");
4284 break;
4285
4286 default:
4287 break;
4288 }
4289
4290 if (first_char_set == 2)
4291 {
4292 fprintf(outfile, "First char at start or follows newline\n");
4293 }
4294 else if (first_char_set == 1)
4295 {
4296 const char *caseless =
4297 ((REAL_PCRE_FLAGS(re) & PCRE_FCH_CASELESS) == 0)?
4298 "" : " (caseless)";
4299
4300 if (PRINTOK(first_char))
4301 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
4302 else
4303 {
4304 fprintf(outfile, "First char = ");
4305 pchar(first_char, outfile);
4306 fprintf(outfile, "%s\n", caseless);
4307 }
4308 }
4309 else
4310 {
4311 fprintf(outfile, "No first char\n");
4312 }
4313
4314 if (need_char_set == 0)
4315 {
4316 fprintf(outfile, "No need char\n");
4317 }
4318 else
4319 {
4320 const char *caseless =
4321 ((REAL_PCRE_FLAGS(re) & PCRE_RCH_CASELESS) == 0)?
4322 "" : " (caseless)";
4323
4324 if (PRINTOK(need_char))
4325 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
4326 else
4327 {
4328 fprintf(outfile, "Need char = ");
4329 pchar(need_char, outfile);
4330 fprintf(outfile, "%s\n", caseless);
4331 }
4332 }
4333
4334 /* Don't output study size; at present it is in any case a fixed
4335 value, but it varies, depending on the computer architecture, and
4336 so messes up the test suite. (And with the /F option, it might be
4337 flipped.) If study was forced by an external -s, don't show this
4338 information unless -i or -d was also present. This means that, except
4339 when auto-callouts are involved, the output from runs with and without
4340 -s should be identical. */
4341
4342 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
4343 {
4344 if (extra == NULL)
4345 fprintf(outfile, "Study returned NULL\n");
4346 else
4347 {
4348 pcre_uint8 *start_bits = NULL;
4349 int minlength;
4350
4351 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
4352 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
4353
4354 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
4355 {
4356 if (start_bits == NULL)
4357 fprintf(outfile, "No starting char list\n");
4358 else
4359 {
4360 int i;
4361 int c = 24;
4362 fprintf(outfile, "Starting chars: ");
4363 for (i = 0; i < 256; i++)
4364 {
4365 if ((start_bits[i/8] & (1<<(i&7))) != 0)
4366 {
4367 if (c > 75)
4368 {
4369 fprintf(outfile, "\n ");
4370 c = 2;
4371 }
4372 if (PRINTOK(i) && i != ' ')
4373 {
4374 fprintf(outfile, "%c ", i);
4375 c += 2;
4376 }
4377 else
4378 {
4379 fprintf(outfile, "\\x%02x ", i);
4380 c += 5;
4381 }
4382 }
4383 }
4384 fprintf(outfile, "\n");
4385 }
4386 }
4387 }
4388
4389 /* Show this only if the JIT was set by /S, not by -s. */
4390
4391 if ((study_options & PCRE_STUDY_ALLJIT) != 0 &&
4392 (force_study_options & PCRE_STUDY_ALLJIT) == 0)
4393 {
4394 int jit;
4395 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
4396 {
4397 if (jit)
4398 fprintf(outfile, "JIT study was successful\n");
4399 else
4400 #ifdef SUPPORT_JIT
4401 fprintf(outfile, "JIT study was not successful\n");
4402 #else
4403 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
4404 #endif
4405 }
4406 }
4407 }
4408 }
4409
4410 /* If the '>' option was present, we write out the regex to a file, and
4411 that is all. The first 8 bytes of the file are the regex length and then
4412 the study length, in big-endian order. */
4413
4414 if (to_file != NULL)
4415 {
4416 FILE *f = fopen((char *)to_file, "wb");
4417 if (f == NULL)
4418 {
4419 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
4420 }
4421 else
4422 {
4423 pcre_uint8 sbuf[8];
4424
4425 if (do_flip) regexflip(re, extra);
4426 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
4427 sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
4428 sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
4429 sbuf[3] = (pcre_uint8)((true_size) & 255);
4430 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
4431 sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
4432 sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
4433 sbuf[7] = (pcre_uint8)((true_study_size) & 255);
4434
4435 if (fwrite(sbuf, 1, 8, f) < 8 ||
4436 fwrite(re, 1, true_size, f) < true_size)
4437 {
4438 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
4439 }
4440 else
4441 {
4442 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
4443
4444 /* If there is study data, write it. */
4445
4446 if (extra != NULL && (extra->flags & PCRE_EXTRA_STUDY_DATA) != 0)
4447 {
4448 if (fwrite(extra->study_data, 1, true_study_size, f) <
4449 true_study_size)
4450 {
4451 fprintf(outfile, "Write error on %s: %s\n", to_file,
4452 strerror(errno));
4453 }
4454 else fprintf(outfile, "Study data written to %s\n", to_file);
4455 }
4456 }
4457 fclose(f);
4458 }
4459
4460 new_free(re);
4461 if (extra != NULL)
4462 {
4463 PCRE_FREE_STUDY(extra);
4464 }
4465 if (locale_set)
4466 {
4467 new_free((void *)tables);
4468 setlocale(LC_CTYPE, "C");
4469 locale_set = 0;
4470 }
4471 continue; /* With next regex */
4472 }
4473 } /* End of non-POSIX compile */
4474
4475 /* Read data lines and test them */
4476
4477 for (;;)
4478 {
4479 #ifdef SUPPORT_PCRE8
4480 pcre_uint8 *q8;
4481 #endif
4482 #ifdef SUPPORT_PCRE16
4483 pcre_uint16 *q16;
4484 #endif
4485 #ifdef SUPPORT_PCRE32
4486 pcre_uint32 *q32;
4487 #endif
4488 pcre_uint8 *bptr;
4489 int *use_offsets = offsets;
4490 int use_size_offsets = size_offsets;
4491 int callout_data = 0;
4492 int callout_data_set = 0;
4493 int count;
4494 pcre_uint32 c;
4495 int copystrings = 0;
4496 int find_match_limit = default_find_match_limit;
4497 int getstrings = 0;
4498 int getlist = 0;
4499 int gmatched = 0;
4500 int start_offset = 0;
4501 int start_offset_sign = 1;
4502 int g_notempty = 0;
4503 int use_dfa = 0;
4504
4505 *copynames = 0;
4506 *getnames = 0;
4507
4508 #ifdef SUPPORT_PCRE32
4509 cn32ptr = copynames;
4510 gn32ptr = getnames;
4511 #endif
4512 #ifdef SUPPORT_PCRE16
4513 cn16ptr = copynames16;
4514 gn16ptr = getnames16;
4515 #endif
4516 #ifdef SUPPORT_PCRE8
4517 cn8ptr = copynames8;
4518 gn8ptr = getnames8;
4519 #endif
4520
4521 SET_PCRE_CALLOUT(callout);
4522 first_callout = 1;
4523 last_callout_mark = NULL;
4524 callout_extra = 0;
4525 callout_count = 0;
4526 callout_fail_count = 999999;
4527 callout_fail_id = -1;
4528 show_malloc = 0;
4529 options = 0;
4530
4531 if (extra != NULL) extra->flags &=
4532 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
4533
4534 len = 0;
4535 for (;;)
4536 {
4537 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
4538 {
4539 if (len > 0) /* Reached EOF without hitting a newline */
4540 {
4541 fprintf(outfile, "\n");
4542 break;
4543 }
4544 done = 1;
4545 goto CONTINUE;
4546 }
4547 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
4548 len = (int)strlen((char *)buffer);
4549 if (buffer[len-1] == '\n') break;
4550 }
4551
4552 while (len > 0 && isspace(buffer[len-1])) len--;
4553 buffer[len] = 0;
4554 if (len == 0) break;
4555
4556 p = buffer;
4557 while (isspace(*p)) p++;
4558
4559 #ifndef NOUTF
4560 /* Check that the data is well-formed UTF-8 if we're in UTF mode. To create
4561 invalid input to pcre_exec, you must use \x?? or \x{} sequences. */
4562
4563 if (use_utf)
4564 {
4565 pcre_uint8 *q;
4566 pcre_uint32 cc;
4567 int n = 1;
4568
4569 for (q = p; n > 0 && *q; q += n) n = utf82ord(q, &cc);
4570 if (n <= 0)
4571 {
4572 fprintf(outfile, "**Failed: invalid UTF-8 string cannot be used as input in UTF mode\n");
4573 goto NEXT_DATA;
4574 }
4575 }
4576 #endif
4577
4578 #ifdef SUPPORT_VALGRIND
4579 /* Mark the dbuffer as addressable but undefined again. */
4580
4581 if (dbuffer != NULL)
4582 {
4583 VALGRIND_MAKE_MEM_UNDEFINED(dbuffer, dbuffer_size * CHAR_SIZE);
4584 }
4585 #endif
4586
4587 /* Allocate a buffer to hold the data line; len+1 is an upper bound on
4588 the number of pcre_uchar units that will be needed. */
4589
4590 while (dbuffer == NULL || (size_t)len >= dbuffer_size)
4591 {
4592 dbuffer_size *= 2;
4593 dbuffer = (pcre_uint8 *)realloc(dbuffer, dbuffer_size * CHAR_SIZE);
4594 if (dbuffer == NULL)
4595 {
4596 fprintf(stderr, "pcretest: realloc(%d) failed\n", (int)dbuffer_size);
4597 exit(1);
4598 }
4599 }
4600
4601 #ifdef SUPPORT_PCRE8
4602 q8 = (pcre_uint8 *) dbuffer;
4603 #endif
4604 #ifdef SUPPORT_PCRE16
4605 q16 = (pcre_uint16 *) dbuffer;
4606 #endif
4607 #ifdef SUPPORT_PCRE32
4608 q32 = (pcre_uint32 *) dbuffer;
4609 #endif
4610
4611 while ((c = *p++) != 0)
4612 {
4613 int i = 0;
4614 int n = 0;
4615
4616 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
4617 In non-UTF mode, allow the value of the byte to fall through to later,
4618 where values greater than 127 are turned into UTF-8 when running in
4619 16-bit or 32-bit mode. */
4620
4621 if (c != '\\')
4622 {
4623 #ifndef NOUTF
4624 if (use_utf && HASUTF8EXTRALEN(c)) { GETUTF8INC(c, p); }
4625 #endif
4626 }
4627
4628 /* Handle backslash escapes */
4629
4630 else switch ((c = *p++))
4631 {
4632 case 'a': c = CHAR_BEL; break;
4633 case 'b': c = '\b'; break;
4634 case 'e': c = CHAR_ESC; break;
4635 case 'f': c = '\f'; break;
4636 case 'n': c = '\n'; break;
4637 case 'r': c = '\r'; break;
4638 case 't': c = '\t'; break;
4639 case 'v': c = '\v'; break;
4640
4641 case '0': case '1': case '2': case '3':
4642 case '4': case '5': case '6': case '7':
4643 c -= '0';
4644 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
4645 c = c * 8 + *p++ - '0';
4646 break;
4647
4648 case 'o':
4649 if (*p == '{')
4650 {
4651 pcre_uint8 *pt = p;
4652 c = 0;
4653 for (pt++; isdigit(*pt) && *pt != '8' && *pt != '9'; pt++)
4654 {
4655 if (++i == 12)
4656 fprintf(outfile, "** Too many octal digits in \\o{...} item; "
4657 "using only the first twelve.\n");
4658 else c = c * 8 + *pt - '0';
4659 }
4660 if (*pt == '}') p = pt + 1;
4661 else fprintf(outfile, "** Missing } after \\o{ (assumed)\n");
4662 }
4663 break;
4664
4665 case 'x':
4666 if (*p == '{')
4667 {
4668 pcre_uint8 *pt = p;
4669 c = 0;
4670
4671 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
4672 when isxdigit() is a macro that refers to its argument more than
4673 once. This is banned by the C Standard, but apparently happens in at
4674 least one MacOS environment. */
4675
4676 for (pt++; isxdigit(*pt); pt++)
4677 {
4678 if (++i == 9)
4679 fprintf(outfile, "** Too many hex digits in \\x{...} item; "
4680 "using only the first eight.\n");
4681 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
4682 }
4683 if (*pt == '}')
4684 {
4685 p = pt + 1;
4686 break;
4687 }
4688 /* Not correct form for \x{...}; fall through */
4689 }
4690
4691 /* \x without {} always defines just one byte in 8-bit mode. This
4692 allows UTF-8 characters to be constructed byte by byte, and also allows
4693 invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
4694 Otherwise, pass it down to later code so that it can be turned into
4695 UTF-8 when running in 16/32-bit mode. */
4696
4697 c = 0;
4698 while (i++ < 2 && isxdigit(*p))
4699 {
4700 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
4701 p++;
4702 }
4703 #if !defined NOUTF && defined SUPPORT_PCRE8
4704 if (use_utf && (pcre_mode == PCRE8_MODE))
4705 {
4706 *q8++ = c;
4707 continue;
4708 }
4709 #endif
4710 break;
4711
4712 case 0: /* \ followed by EOF allows for an empty line */
4713 p--;
4714 continue;
4715
4716 case '>':
4717 if (*p == '-')
4718 {
4719 start_offset_sign = -1;
4720 p++;
4721 }
4722 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
4723 start_offset *= start_offset_sign;
4724 continue;
4725
4726 case 'A': /* Option setting */
4727 options |= PCRE_ANCHORED;
4728 continue;
4729
4730 case 'B':
4731 options |= PCRE_NOTBOL;
4732 continue;
4733
4734 case 'C':
4735 if (isdigit(*p)) /* Set copy string */
4736 {
4737 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4738 copystrings |= 1U << n;
4739 }
4740 else if (isalnum(*p))
4741 {
4742 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, &cn32ptr, re);
4743 }
4744 else if (*p == '+')
4745 {
4746 callout_extra = 1;
4747 p++;
4748 }
4749 else if (*p == '-')
4750 {
4751 SET_PCRE_CALLOUT(NULL);
4752 p++;
4753 }
4754 else if (*p == '!')
4755 {
4756 callout_fail_id = 0;
4757 p++;
4758 while(isdigit(*p))
4759 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
4760 callout_fail_count = 0;
4761 if (*p == '!')
4762 {
4763 p++;
4764 while(isdigit(*p))
4765 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
4766 }
4767 }
4768 else if (*p == '*')
4769 {
4770 int sign = 1;
4771 callout_data = 0;
4772 if (*(++p) == '-') { sign = -1; p++; }
4773 while(isdigit(*p))
4774 callout_data = callout_data * 10 + *p++ - '0';
4775 callout_data *= sign;
4776 callout_data_set = 1;
4777 }
4778 continue;
4779
4780 #if !defined NODFA
4781 case 'D':
4782 #if !defined NOPOSIX
4783 if (posix || do_posix)
4784 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
4785 else
4786 #endif
4787 use_dfa = 1;
4788 continue;
4789 #endif
4790
4791 #if !defined NODFA
4792 case 'F':
4793 options |= PCRE_DFA_SHORTEST;
4794 continue;
4795 #endif
4796
4797 case 'G':
4798 if (isdigit(*p))
4799 {
4800 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4801 getstrings |= 1U << n;
4802 }
4803 else if (isalnum(*p))
4804 {
4805 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, &gn32ptr, re);
4806 }
4807 continue;
4808
4809 case 'J':
4810 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4811 if (extra != NULL
4812 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
4813 && extra->executable_jit != NULL)
4814 {
4815 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
4816 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
4817 PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
4818 }
4819 continue;
4820
4821 case 'L':
4822 getlist = 1;
4823 continue;
4824
4825 case 'M':
4826 find_match_limit = 1;
4827 continue;
4828
4829 case 'N':
4830 if ((options & PCRE_NOTEMPTY) != 0)
4831 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
4832 else
4833 options |= PCRE_NOTEMPTY;
4834 continue;
4835
4836 case 'O':
4837 while(isdigit(*p))
4838 {
4839 if (n > (INT_MAX-10)/10) /* Hack to stop fuzzers */
4840 {
4841 printf("** \\O argument is too big\n");
4842 yield = 1;
4843 goto EXIT;
4844 }
4845 n = n * 10 + *p++ - '0';
4846 }
4847 if (n > size_offsets_max)
4848 {
4849 size_offsets_max = n;
4850 free(offsets);
4851 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
4852 if (offsets == NULL)
4853 {
4854 printf("** Failed to get %d bytes of memory for offsets vector\n",
4855 (int)(size_offsets_max * sizeof(int)));
4856 yield = 1;
4857 goto EXIT;
4858 }
4859 }
4860 use_size_offsets = n;
4861 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
4862 else use_offsets = offsets + size_offsets_max - n; /* To catch overruns */
4863 continue;
4864
4865 case 'P':
4866 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
4867 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
4868 continue;
4869
4870 case 'Q':
4871 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4872 if (extra == NULL)
4873 {
4874 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4875 extra->flags = 0;
4876 }
4877 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
4878 extra->match_limit_recursion = n;
4879 continue;
4880
4881 case 'q':
4882 while(isdigit(*p)) n = n * 10 + *p++ - '0';
4883 if (extra == NULL)
4884 {
4885 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
4886 extra->flags = 0;
4887 }
4888 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
4889 extra->match_limit = n;
4890 continue;
4891
4892 #if !defined NODFA
4893 case 'R':
4894 options |= PCRE_DFA_RESTART;
4895 continue;
4896 #endif
4897
4898 case 'S':
4899 show_malloc = 1;
4900 continue;
4901
4902 case 'Y':
4903 options |= PCRE_NO_START_OPTIMIZE;
4904 continue;
4905
4906 case 'Z':
4907 options |= PCRE_NOTEOL;
4908 continue;
4909
4910 case '?':
4911 options |= PCRE_NO_UTF8_CHECK;
4912 continue;
4913
4914 case '<':
4915 {
4916 int x = check_mc_option(p, outfile, TRUE, "escape sequence");
4917 if (x == 0) goto NEXT_DATA;
4918 options |= x;
4919 while (*p++ != '>');
4920 }
4921 continue;
4922 }
4923
4924 /* We now have a character value in c that may be greater than 255.
4925 In 8-bit mode we convert to UTF-8 if we are in UTF mode. Values greater
4926 than 127 in UTF mode must have come from \x{...} or octal constructs
4927 because values from \x.. get this far only in non-UTF mode. */
4928
4929 #ifdef SUPPORT_PCRE8
4930 if (pcre_mode == PCRE8_MODE)
4931 {
4932 #ifndef NOUTF
4933 if (use_utf)
4934 {
4935 if (c > 0x7fffffff)
4936 {
4937 fprintf(outfile, "** Character \\x{%x} is greater than 0x7fffffff "
4938 "and so cannot be converted to UTF-8\n", c);
4939 goto NEXT_DATA;
4940 }
4941 q8 += ord2utf8(c, q8);
4942 }
4943 else
4944 #endif
4945 {
4946 if (c > 0xffu)
4947 {
4948 fprintf(outfile, "** Character \\x{%x} is greater than 255 "
4949 "and UTF-8 mode is not enabled.\n", c);
4950 fprintf(outfile, "** Truncation will probably give the wrong "
4951 "result.\n");
4952 }
4953 *q8++ = c;
4954 }
4955 }
4956 #endif
4957 #ifdef SUPPORT_PCRE16
4958 if (pcre_mode == PCRE16_MODE)
4959 {
4960 #ifndef NOUTF
4961 if (use_utf)
4962 {
4963 if (c > 0x10ffffu)
4964 {
4965 fprintf(outfile, "** Failed: character \\x{%x} is greater than "
4966 "0x10ffff and so cannot be converted to UTF-16\n", c);
4967 goto NEXT_DATA;
4968 }
4969 else if (c >= 0x10000u)
4970 {
4971 c-= 0x10000u;
4972 *q16++ = 0xD800 | (c >> 10);
4973 *q16++ = 0xDC00 | (c & 0x3ff);
4974 }
4975 else
4976 *q16++ = c;
4977 }
4978 else
4979 #endif
4980 {
4981 if (c > 0xffffu)
4982 {
4983 fprintf(outfile, "** Character \\x{%x} is greater than 0xffff "
4984 "and UTF-16 mode is not enabled.\n", c);
4985 fprintf(outfile, "** Truncation will probably give the wrong "
4986 "result.\n");
4987 }
4988
4989 *q16++ = c;
4990 }
4991 }
4992 #endif
4993 #ifdef SUPPORT_PCRE32
4994 if (pcre_mode == PCRE32_MODE)
4995 {
4996 *q32++ = c;
4997 }
4998 #endif
4999
5000 }
5001
5002 /* Reached end of subject string */
5003
5004 #ifdef SUPPORT_PCRE8
5005 if (pcre_mode == PCRE8_MODE)
5006 {
5007 *q8 = 0;
5008 len = (int)(q8 - (pcre_uint8 *)dbuffer);
5009 }
5010 #endif
5011 #ifdef SUPPORT_PCRE16
5012 if (pcre_mode == PCRE16_MODE)
5013 {
5014 *q16 = 0;
5015 len = (int)(q16 - (pcre_uint16 *)dbuffer);
5016 }
5017 #endif
5018 #ifdef SUPPORT_PCRE32
5019 if (pcre_mode == PCRE32_MODE)
5020 {
5021 *q32 = 0;
5022 len = (int)(q32 - (pcre_uint32 *)dbuffer);
5023 }
5024 #endif
5025
5026 /* If we're compiling with explicit valgrind support, Mark the data from after
5027 its end to the end of the buffer as unaddressable, so that a read over the end
5028 of the buffer will be seen by valgrind, even if it doesn't cause a crash.
5029 If we're not building with valgrind support, at least move the data to the end
5030 of the buffer so that it might at least cause a crash.
5031 If we are using the POSIX interface, we must include the terminating zero. */
5032
5033 bptr = dbuffer;
5034
5035 #if !defined NOPOSIX
5036 if (posix || do_posix)
5037 {
5038 #ifdef SUPPORT_VALGRIND
5039 VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len + 1, dbuffer_size - (len + 1));
5040 #else
5041 memmove(bptr + dbuffer_size - len - 1, bptr, len + 1);
5042 bptr += dbuffer_size - len - 1;
5043 #endif
5044 }
5045 else
5046 #endif
5047 {
5048 #ifdef SUPPORT_VALGRIND
5049 VALGRIND_MAKE_MEM_NOACCESS(dbuffer + len * CHAR_SIZE, (dbuffer_size - len) * CHAR_SIZE);
5050 #else
5051 bptr = memmove(bptr + (dbuffer_size - len) * CHAR_SIZE, bptr, len * CHAR_SIZE);
5052 #endif
5053 }
5054
5055 if ((all_use_dfa || use_dfa) && find_match_limit)
5056 {
5057 printf("** Match limit not relevant for DFA matching: ignored\n");
5058 find_match_limit = 0;
5059 }
5060
5061 /* Handle matching via the POSIX interface, which does not
5062 support timing or playing with the match limit or callout data. */
5063
5064 #if !defined NOPOSIX
5065 if (posix || do_posix)
5066 {
5067 int rc;
5068 int eflags = 0;
5069 regmatch_t *pmatch = NULL;
5070 if (use_size_offsets > 0)
5071 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
5072 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
5073 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
5074 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
5075
5076 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
5077
5078 if (rc != 0)
5079 {
5080 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
5081 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
5082 }
5083 else if ((REAL_PCRE_OPTIONS(preg.re_pcre) & PCRE_NO_AUTO_CAPTURE) != 0)
5084 {
5085 fprintf(outfile, "Matched with REG_NOSUB\n");
5086 }
5087 else
5088 {
5089 size_t i;
5090 for (i = 0; i < (size_t)use_size_offsets; i++)
5091 {
5092 if (pmatch[i].rm_so >= 0)
5093 {
5094 fprintf(outfile, "%2d: ", (int)i);
5095 PCHARSV(dbuffer, pmatch[i].rm_so,
5096 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
5097 fprintf(outfile, "\n");
5098 if (do_showcaprest || (i == 0 && do_showrest))
5099 {
5100 fprintf(outfile, "%2d+ ", (int)i);
5101 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
5102 outfile);
5103 fprintf(outfile, "\n");
5104 }
5105 }
5106 }
5107 }
5108 free(pmatch);
5109 goto NEXT_DATA;
5110 }
5111
5112 #endif /* !defined NOPOSIX */
5113
5114 /* Handle matching via the native interface - repeats for /g and /G */
5115
5116 /* Ensure that there is a JIT callback if we want to verify that JIT was
5117 actually used. If jit_stack == NULL, no stack has yet been assigned. */
5118
5119 if (verify_jit && jit_stack == NULL && extra != NULL)
5120 { PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack); }
5121
5122 for (;; gmatched++) /* Loop for /g or /G */
5123 {
5124 markptr = NULL;
5125 jit_was_used = FALSE;
5126
5127 if (timeitm > 0)
5128 {
5129 register int i;
5130 clock_t time_taken;
5131 clock_t start_time = clock();
5132
5133 #if !defined NODFA
5134 if (all_use_dfa || use_dfa)
5135 {
5136 if ((options & PCRE_DFA_RESTART) != 0)
5137 {
5138 fprintf(outfile, "Timing DFA restarts is not supported\n");
5139 break;
5140 }
5141 if (dfa_workspace == NULL)
5142 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5143 for (i = 0; i < timeitm; i++)
5144 {
5145 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
5146 (options | g_notempty), use_offsets, use_size_offsets,
5147 dfa_workspace, DFA_WS_DIMENSION);
5148 }
5149 }
5150 else
5151 #endif
5152
5153 for (i = 0; i < timeitm; i++)
5154 {
5155 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5156 (options | g_notempty), use_offsets, use_size_offsets);
5157 }
5158 total_match_time += (time_taken = clock() - start_time);
5159 fprintf(outfile, "Execute time %.4f milliseconds\n",
5160 (((double)time_taken * 1000.0) / (double)timeitm) /
5161 (double)CLOCKS_PER_SEC);
5162 }
5163
5164 /* If find_match_limit is set, we want to do repeated matches with
5165 varying limits in order to find the minimum value for the match limit and
5166 for the recursion limit. The match limits are relevant only to the normal
5167 running of pcre_exec(), so disable the JIT optimization. This makes it
5168 possible to run the same set of tests with and without JIT externally
5169 requested. */
5170
5171 if (find_match_limit)
5172 {
5173 if (extra != NULL) { PCRE_FREE_STUDY(extra); }
5174 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
5175 extra->flags = 0;
5176
5177 (void)check_match_limit(re, extra, bptr, len, start_offset,
5178 options|g_notempty, use_offsets, use_size_offsets,
5179 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
5180 PCRE_ERROR_MATCHLIMIT, "match()");
5181
5182 count = check_match_limit(re, extra, bptr, len, start_offset,
5183 options|g_notempty, use_offsets, use_size_offsets,
5184 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
5185 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
5186 }
5187
5188 /* If callout_data is set, use the interface with additional data */
5189
5190 else if (callout_data_set)
5191 {
5192 if (extra == NULL)
5193 {
5194 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
5195 extra->flags = 0;
5196 }
5197 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
5198 extra->callout_data = &callout_data;
5199 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5200 options | g_notempty, use_offsets, use_size_offsets);
5201 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
5202 }
5203
5204 /* The normal case is just to do the match once, with the default
5205 value of match_limit. */
5206
5207 #if !defined NODFA
5208 else if (all_use_dfa || use_dfa)
5209 {
5210 if (dfa_workspace == NULL)
5211 dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
5212 if (dfa_matched++ == 0)
5213 dfa_workspace[0] = -1; /* To catch bad restart */
5214 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
5215 (options | g_notempty), use_offsets, use_size_offsets, dfa_workspace,
5216 DFA_WS_DIMENSION);
5217 if (count == 0)
5218 {
5219 fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n");
5220 count = use_size_offsets/2;
5221 }
5222 }
5223 #endif
5224
5225 else
5226 {
5227 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
5228 options | g_notempty, use_offsets, use_size_offsets);
5229 if (count == 0)
5230 {
5231 fprintf(outfile, "Matched, but too many substrings\n");
5232 /* 2 is a special case; match can be returned */
5233 count = (use_size_offsets == 2)? 1 : use_size_offsets/3;
5234 }
5235 }
5236
5237 /* Matched */
5238
5239 if (count >= 0)
5240 {
5241 int i, maxcount;
5242 void *cnptr, *gnptr;
5243
5244 #if !defined NODFA
5245 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
5246 #endif
5247 /* 2 is a special case; match can be returned */
5248 maxcount = (use_size_offsets == 2)? 1 : use_size_offsets/3;
5249
5250 /* This is a check against a lunatic return value. */
5251
5252 if (count > maxcount)
5253 {
5254 fprintf(outfile,
5255 "** PCRE error: returned count %d is too big for offset size %d\n",
5256 count, use_size_offsets);
5257 count = use_size_offsets/3;
5258 if (do_g || do_G)
5259 {
5260 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
5261 do_g = do_G = FALSE; /* Break g/G loop */
5262 }
5263 }
5264
5265 /* do_allcaps requests showing of all captures in the pattern, to check
5266 unset ones at the end. */
5267
5268 if (do_allcaps)
5269 {
5270 if (all_use_dfa || use_dfa)
5271 {
5272 fprintf(outfile, "** Show all captures ignored after DFA matching\n");
5273 }
5274 else
5275 {
5276 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
5277 goto SKIP_DATA;
5278 count++; /* Allow for full match */
5279 if (count * 2 > use_size_offsets) count = use_size_offsets/2;
5280 }
5281 }
5282
5283 /* Output the captured substrings. Note that, for the matched string,
5284 the use of \K in an assertion can make the start later than the end. */
5285
5286 for (i = 0; i < count * 2; i += 2)
5287 {
5288 if (use_offsets[i] < 0)
5289 {
5290 if (use_offsets[i] != -1)
5291 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5292 use_offsets[i], i);
5293 if (use_offsets[i+1] != -1)
5294 fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
5295 use_offsets[i+1], i+1);
5296 fprintf(outfile, "%2d: <unset>\n", i/2);
5297 }
5298 else
5299 {
5300 int start = use_offsets[i];
5301 int end = use_offsets[i+1];
5302
5303 if (start > end)
5304 {
5305 start = use_offsets[i+1];
5306 end = use_offsets[i];
5307 fprintf(outfile, "Start of matched string is beyond its end - "
5308 "displaying from end to start.\n");
5309 }
5310
5311 fprintf(outfile, "%2d: ", i/2);
5312 PCHARSV(bptr, start, end - start, outfile);
5313 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5314 fprintf(outfile, "\n");
5315
5316 /* Note: don't use the start/end variables here because we want to
5317 show the text from what is reported as the end. */
5318
5319 if (do_showcaprest || (i == 0 && do_showrest))
5320 {
5321 fprintf(outfile, "%2d+ ", i/2);
5322 PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
5323 outfile);
5324 fprintf(outfile, "\n");
5325 }
5326 }
5327 }
5328
5329 if (markptr != NULL)
5330 {
5331 fprintf(outfile, "MK: ");
5332 PCHARSV(markptr, 0, -1, outfile);
5333 fprintf(outfile, "\n");
5334 }
5335
5336 for (i = 0; i < 32; i++)
5337 {
5338 if ((copystrings & (1U << i)) != 0)
5339 {
5340 int rc;
5341 char copybuffer[256];
5342 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
5343 copybuffer, sizeof(copybuffer));
5344 if (rc < 0)
5345 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
5346 else
5347 {
5348 fprintf(outfile, "%2dC ", i);
5349 PCHARSV(copybuffer, 0, rc, outfile);
5350 fprintf(outfile, " (%d)\n", rc);
5351 }
5352 }
5353 }
5354
5355 cnptr = copynames;
5356 for (;;)
5357 {
5358 int rc;
5359 char copybuffer[256];
5360
5361 #ifdef SUPPORT_PCRE32
5362 if (pcre_mode == PCRE32_MODE)
5363 {
5364 if (*(pcre_uint32 *)cnptr == 0) break;
5365 }
5366 #endif
5367 #ifdef SUPPORT_PCRE16
5368 if (pcre_mode == PCRE16_MODE)
5369 {
5370 if (*(pcre_uint16 *)cnptr == 0) break;
5371 }
5372 #endif
5373 #ifdef SUPPORT_PCRE8
5374 if (pcre_mode == PCRE8_MODE)
5375 {
5376 if (*(pcre_uint8 *)cnptr == 0) break;
5377 }
5378 #endif
5379
5380 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5381 cnptr, copybuffer, sizeof(copybuffer));
5382
5383 if (rc < 0)
5384 {
5385 fprintf(outfile, "copy substring ");
5386 PCHARSV(cnptr, 0, -1, outfile);
5387 fprintf(outfile, " failed %d\n", rc);
5388 }
5389 else
5390 {
5391 fprintf(outfile, " C ");
5392 PCHARSV(copybuffer, 0, rc, outfile);
5393 fprintf(outfile, " (%d) ", rc);
5394 PCHARSV(cnptr, 0, -1, outfile);
5395 putc('\n', outfile);
5396 }
5397
5398 cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
5399 }
5400
5401 for (i = 0; i < 32; i++)
5402 {
5403 if ((getstrings & (1U << i)) != 0)
5404 {
5405 int rc;
5406 const char *substring;
5407 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
5408 if (rc < 0)
5409 fprintf(outfile, "get substring %d failed %d\n", i, rc);
5410 else
5411 {
5412 fprintf(outfile, "%2dG ", i);
5413 PCHARSV(substring, 0, rc, outfile);
5414 fprintf(outfile, " (%d)\n", rc);
5415 PCRE_FREE_SUBSTRING(substring);
5416 }
5417 }
5418 }
5419
5420 gnptr = getnames;
5421 for (;;)
5422 {
5423 int rc;
5424 const char *substring;
5425
5426 #ifdef SUPPORT_PCRE32
5427 if (pcre_mode == PCRE32_MODE)
5428 {
5429 if (*(pcre_uint32 *)gnptr == 0) break;
5430 }
5431 #endif
5432 #ifdef SUPPORT_PCRE16
5433 if (pcre_mode == PCRE16_MODE)
5434 {
5435 if (*(pcre_uint16 *)gnptr == 0) break;
5436 }
5437 #endif
5438 #ifdef SUPPORT_PCRE8
5439 if (pcre_mode == PCRE8_MODE)
5440 {
5441 if (*(pcre_uint8 *)gnptr == 0) break;
5442 }
5443 #endif
5444
5445 PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
5446 gnptr, &substring);
5447 if (rc < 0)
5448 {
5449 fprintf(outfile, "get substring ");
5450 PCHARSV(gnptr, 0, -1, outfile);
5451 fprintf(outfile, " failed %d\n", rc);
5452 }
5453 else
5454 {
5455 fprintf(outfile, " G ");
5456 PCHARSV(substring, 0, rc, outfile);
5457 fprintf(outfile, " (%d) ", rc);
5458 PCHARSV(gnptr, 0, -1, outfile);
5459 PCRE_FREE_SUBSTRING(substring);
5460 putc('\n', outfile);
5461 }
5462
5463 gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
5464 }
5465
5466 if (getlist)
5467 {
5468 int rc;
5469 const char **stringlist;
5470 PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
5471 if (rc < 0)
5472 fprintf(outfile, "get substring list failed %d\n", rc);
5473 else
5474 {
5475 for (i = 0; i < count; i++)
5476 {
5477 fprintf(outfile, "%2dL ", i);
5478 PCHARSV(stringlist[i], 0, -1, outfile);
5479 putc('\n', outfile);
5480 }
5481 if (stringlist[i] != NULL)
5482 fprintf(outfile, "string list not terminated by NULL\n");
5483 PCRE_FREE_SUBSTRING_LIST(stringlist);
5484 }
5485 }
5486 }
5487
5488 /* There was a partial match. If the bumpalong point is not the same as
5489 the first inspected character, show the offset explicitly. */
5490
5491 else if (count == PCRE_ERROR_PARTIAL)
5492 {
5493 fprintf(outfile, "Partial match");
5494 if (use_size_offsets > 2 && use_offsets[0] != use_offsets[2])
5495 fprintf(outfile, " at offset %d", use_offsets[2]);
5496 if (markptr != NULL)
5497 {
5498 fprintf(outfile, ", mark=");
5499 PCHARSV(markptr, 0, -1, outfile);
5500 }
5501 if (use_size_offsets > 1)
5502 {
5503 fprintf(outfile, ": ");
5504 PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
5505 outfile);
5506 }
5507 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5508 fprintf(outfile, "\n");
5509 break; /* Out of the /g loop */
5510 }
5511
5512 /* Failed to match. If this is a /g or /G loop and we previously set
5513 g_notempty after a null match, this is not necessarily the end. We want
5514 to advance the start offset, and continue. We won't be at the end of the
5515 string - that was checked before setting g_notempty.
5516
5517 Complication arises in the case when the newline convention is "any",
5518 "crlf", or "anycrlf". If the previous match was at the end of a line
5519 terminated by CRLF, an advance of one character just passes the \r,
5520 whereas we should prefer the longer newline sequence, as does the code in
5521 pcre_exec(). Fudge the offset value to achieve this. We check for a
5522 newline setting in the pattern; if none was set, use PCRE_CONFIG() to
5523 find the default.
5524
5525 Otherwise, in the case of UTF-8 matching, the advance must be one
5526 character, not one byte. */
5527
5528 else
5529 {
5530 if (g_notempty != 0)
5531 {
5532 int onechar = 1;
5533 unsigned int obits = REAL_PCRE_OPTIONS(re);
5534 use_offsets[0] = start_offset;
5535 if ((obits & PCRE_NEWLINE_BITS) == 0)
5536 {
5537 int d;
5538 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
5539 /* Note that these values are always the ASCII ones, even in
5540 EBCDIC environments. CR = 13, NL = 10. */
5541 obits = (d == 13)? PCRE_NEWLINE_CR :
5542 (d == 10)? PCRE_NEWLINE_LF :
5543 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
5544 (d == -2)? PCRE_NEWLINE_ANYCRLF :
5545 (d == -1)? PCRE_NEWLINE_ANY : 0;
5546 }
5547 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
5548 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
5549 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
5550 &&
5551 start_offset < len - 1 && (
5552 #ifdef SUPPORT_PCRE8
5553 (pcre_mode == PCRE8_MODE &&
5554 bptr[start_offset] == '\r' &&
5555 bptr[start_offset + 1] == '\n') ||
5556 #endif
5557 #ifdef SUPPORT_PCRE16
5558 (pcre_mode == PCRE16_MODE &&
5559 ((PCRE_SPTR16)bptr)[start_offset] == '\r' &&
5560 ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n') ||
5561 #endif
5562 #ifdef SUPPORT_PCRE32
5563 (pcre_mode == PCRE32_MODE &&
5564 ((PCRE_SPTR32)bptr)[start_offset] == '\r' &&
5565 ((PCRE_SPTR32)bptr)[start_offset + 1] == '\n') ||
5566 #endif
5567 0))
5568 onechar++;
5569 else if (use_utf)
5570 {
5571 while (start_offset + onechar < len)
5572 {
5573 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
5574 onechar++;
5575 }
5576 }
5577 use_offsets[1] = start_offset + onechar;
5578 }
5579 else
5580 {
5581 switch(count)
5582 {
5583 case PCRE_ERROR_NOMATCH:
5584 if (gmatched == 0)
5585 {
5586 if (markptr == NULL)
5587 {
5588 fprintf(outfile, "No match");
5589 }
5590 else
5591 {
5592 fprintf(outfile, "No match, mark = ");
5593 PCHARSV(markptr, 0, -1, outfile);
5594 }
5595 if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
5596 putc('\n', outfile);
5597 }
5598 break;
5599
5600 case PCRE_ERROR_BADUTF8:
5601 case PCRE_ERROR_SHORTUTF8:
5602 fprintf(outfile, "Error %d (%s UTF-%d string)", count,
5603 (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
5604 8 * CHAR_SIZE);
5605 if (use_size_offsets >= 2)
5606 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
5607 use_offsets[1]);
5608 fprintf(outfile, "\n");
5609 break;
5610
5611 case PCRE_ERROR_BADUTF8_OFFSET:
5612 fprintf(outfile, "Error %d (bad UTF-%d offset)\n", count,
5613 8 * CHAR_SIZE);
5614 break;
5615
5616 default:
5617 if (count < 0 &&
5618 (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
5619 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
5620 else
5621 fprintf(outfile, "Error %d (Unexpected value)\n", count);
5622 break;
5623 }
5624
5625 break; /* Out of the /g loop */
5626 }
5627 }
5628
5629 /* If not /g or /G we are done */
5630
5631 if (!do_g && !do_G) break;
5632
5633 if (use_offsets == NULL)
5634 {
5635 fprintf(outfile, "Cannot do global matching without an ovector\n");
5636 break;
5637 }
5638
5639 if (use_size_offsets < 2)
5640 {
5641 fprintf(outfile, "Cannot do global matching with an ovector size < 2\n");
5642 break;
5643 }
5644
5645 /* If we have matched an empty string, first check to see if we are at
5646 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
5647 Perl's /g options does. This turns out to be rather cunning. First we set
5648 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
5649 same point. If this fails (picked up above) we advance to the next
5650 character. */
5651
5652 g_notempty = 0;
5653
5654 if (use_offsets[0] == use_offsets[1])
5655 {
5656 if (use_offsets[0] == len) break;
5657 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
5658 }
5659
5660 /* For /g, update the start offset, leaving the rest alone. There is a
5661 tricky case when \K is used in a positive lookbehind assertion. This can
5662 cause the end of the match to be less than or equal to the start offset.
5663 In this case we restart at one past the start offset. This may return the
5664 same match if the original start offset was bumped along during the
5665 match, but eventually the new start offset will hit the actual start
5666 offset. (In PCRE2 the true start offset is available, and this can be
5667 done better. It is not worth doing more than making sure we do not loop
5668 at this stage in the life of PCRE1.) */
5669
5670 if (do_g)
5671 {
5672 if (g_notempty == 0 && use_offsets[1] <= start_offset)
5673 {
5674 if (start_offset >= len) break; /* End of subject */
5675 start_offset++;
5676 if (use_utf)
5677 {
5678 while (start_offset < len)
5679 {
5680 if ((bptr[start_offset] & 0xc0) != 0x80) break;
5681 start_offset++;
5682 }
5683 }
5684 }
5685 else start_offset = use_offsets[1];
5686 }
5687
5688 /* For /G, update the pointer and length */
5689
5690 else
5691 {
5692 bptr += use_offsets[1] * CHAR_SIZE;
5693 len -= use_offsets[1];
5694 }
5695 } /* End of loop for /g and /G */
5696
5697 NEXT_DATA: continue;
5698 } /* End of loop for data lines */
5699
5700 CONTINUE:
5701
5702 #if !defined NOPOSIX
5703 if ((posix || do_posix) && preg.re_pcre != 0) regfree(&preg);
5704 #endif
5705
5706 if (re != NULL) new_free(re);
5707 if (extra != NULL)
5708 {
5709 PCRE_FREE_STUDY(extra);
5710 }
5711 if (locale_set)
5712 {
5713 new_free((void *)tables);
5714 setlocale(LC_CTYPE, "C");
5715 locale_set = 0;
5716 }
5717 if (jit_stack != NULL)
5718 {
5719 PCRE_JIT_STACK_FREE(jit_stack);
5720 jit_stack = NULL;
5721 }
5722 }
5723
5724 if (infile == stdin) fprintf(outfile, "\n");
5725
5726 if (showtotaltimes)
5727 {
5728 fprintf(outfile, "--------------------------------------\n");
5729 if (timeit > 0)
5730 {
5731 fprintf(outfile, "Total compile time %.4f milliseconds\n",
5732 (((double)total_compile_time * 1000.0) / (double)timeit) /
5733 (double)CLOCKS_PER_SEC);
5734 fprintf(outfile, "Total study time %.4f milliseconds\n",
5735 (((double)total_study_time * 1000.0) / (double)timeit) /
5736 (double)CLOCKS_PER_SEC);
5737 }
5738 fprintf(outfile, "Total execute time %.4f milliseconds\n",
5739 (((double)total_match_time * 1000.0) / (double)timeitm) /
5740 (double)CLOCKS_PER_SEC);
5741 }
5742
5743 EXIT:
5744
5745 if (infile != NULL && infile != stdin) fclose(infile);
5746 if (outfile != NULL && outfile != stdout) fclose(outfile);
5747
5748 free(buffer);
5749 free(dbuffer);
5750 free(pbuffer);
5751 free(offsets);
5752
5753 #ifdef SUPPORT_PCRE16
5754 if (buffer16 != NULL) free(buffer16);
5755 #endif
5756 #ifdef SUPPORT_PCRE32
5757 if (buffer32 != NULL) free(buffer32);
5758 #endif
5759
5760 #if !defined NODFA
5761 if (dfa_workspace != NULL)
5762 free(dfa_workspace);
5763 #endif
5764
5765 #if defined(__VMS)
5766 yield = SS$_NORMAL; /* Return values via DCL symbols */
5767 #endif
5768
5769 return yield;
5770 }
5771
5772 /* End of pcretest.c */
5773
5774