1 /*************************************************
2 * pcre2grep program *
3 *************************************************/
4
5 /* This is a grep program that uses the 8-bit PCRE regular expression library
6 via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows,
7 and native z/OS systems it can recurse into directories, and in z/OS it can
8 handle PDS files.
9
10 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
11 additional header is required. That header is not included in the main PCRE2
12 distribution because other apparatus is needed to compile pcre2grep for z/OS.
13 The header can be found in the special z/OS distribution, which is available
14 from www.zaconsultants.net or from www.cbttape.org.
15
16 Copyright (c) 1997-2020 University of Cambridge
17
18 -----------------------------------------------------------------------------
19 Redistribution and use in source and binary forms, with or without
20 modification, are permitted provided that the following conditions are met:
21
22 * Redistributions of source code must retain the above copyright notice,
23 this list of conditions and the following disclaimer.
24
25 * Redistributions in binary form must reproduce the above copyright
26 notice, this list of conditions and the following disclaimer in the
27 documentation and/or other materials provided with the distribution.
28
29 * Neither the name of the University of Cambridge nor the names of its
30 contributors may be used to endorse or promote products derived from
31 this software without specific prior written permission.
32
33 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43 POSSIBILITY OF SUCH DAMAGE.
44 -----------------------------------------------------------------------------
45 */
46
47 #ifdef HAVE_CONFIG_H
48 #include "config.h"
49 #endif
50
51 #include <ctype.h>
52 #include <locale.h>
53 #include <stdio.h>
54 #include <string.h>
55 #include <stdlib.h>
56 #include <errno.h>
57
58 #include <sys/types.h>
59 #include <sys/stat.h>
60
61 #if (defined _WIN32 || (defined HAVE_WINDOWS_H && HAVE_WINDOWS_H)) \
62 && !defined WIN32 && !defined(__CYGWIN__)
63 #define WIN32
64 #endif
65
66 /* Some CMake's define it still */
67 #if defined(__CYGWIN__) && defined(WIN32)
68 #undef WIN32
69 #endif
70
71 #ifdef __VMS
72 #include clidef
73 #include descrip
74 #include lib$routines
75 #endif
76
77 #ifdef WIN32
78 #include <io.h> /* For _setmode() */
79 #include <fcntl.h> /* For _O_BINARY */
80 #endif
81
82 #if defined(SUPPORT_PCRE2GREP_CALLOUT) && defined(SUPPORT_PCRE2GREP_CALLOUT_FORK)
83 #ifdef WIN32
84 #include <process.h>
85 #else
86 #include <sys/wait.h>
87 #endif
88 #endif
89
90 #ifdef HAVE_UNISTD_H
91 #include <unistd.h>
92 #endif
93
94 #ifdef SUPPORT_LIBZ
95 #include <zlib.h>
96 #endif
97
98 #ifdef SUPPORT_LIBBZ2
99 #include <bzlib.h>
100 #endif
101
102 #define PCRE2_CODE_UNIT_WIDTH 8
103 #include "pcre2.h"
104
105 /* Older versions of MSVC lack snprintf(). This define allows for
106 warning/error-free compilation and testing with MSVC compilers back to at least
107 MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
108
109 #if defined(_MSC_VER) && (_MSC_VER < 1900)
110 #define snprintf _snprintf
111 #endif
112
113 /* old VC and older compilers don't support %td or %zu, and even some that claim to
114 be C99 don't support it (hence DISABLE_PERCENT_ZT). */
115
116 #if defined(DISABLE_PERCENT_ZT) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \
117 (!defined(_MSC_VER) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L))
118 #ifdef _WIN64
119 #define SIZ_FORM "llu"
120 #else
121 #define SIZ_FORM "lu"
122 #endif
123 #else
124 #define SIZ_FORM "zu"
125 #endif
126
127 #define FALSE 0
128 #define TRUE 1
129
130 typedef int BOOL;
131
132 #define DEFAULT_CAPTURE_MAX 50
133
134 #if BUFSIZ > 8192
135 #define MAXPATLEN BUFSIZ
136 #else
137 #define MAXPATLEN 8192
138 #endif
139
140 #define FNBUFSIZ 2048
141 #define ERRBUFSIZ 256
142
143 /* Values for the "filenames" variable, which specifies options for file name
144 output. The order is important; it is assumed that a file name is wanted for
145 all values greater than FN_DEFAULT. */
146
147 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
148
149 /* File reading styles */
150
151 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
152
153 /* Actions for the -d and -D options */
154
155 enum { dee_READ, dee_SKIP, dee_RECURSE };
156 enum { DEE_READ, DEE_SKIP };
157
158 /* Actions for special processing options (flag bits) */
159
160 #define PO_WORD_MATCH 0x0001
161 #define PO_LINE_MATCH 0x0002
162 #define PO_FIXED_STRINGS 0x0004
163
164 /* Binary file options */
165
166 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
167
168 /* Return values from decode_dollar_escape() */
169
170 enum { DDE_ERROR, DDE_CAPTURE, DDE_CHAR };
171
172 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
173 environments), a warning is issued if the value of fwrite() is ignored.
174 Unfortunately, casting to (void) does not suppress the warning. To get round
175 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
176 apply to fprintf(). */
177
178 #define FWRITE_IGNORE(a,b,c,d) if (fwrite(a,b,c,d)) {}
179
180 /* Under Windows, we have to set stdout to be binary, so that it does not
181 convert \r\n at the ends of output lines to \r\r\n. However, that means that
182 any messages written to stdout must have \r\n as their line terminator. This is
183 handled by using STDOUT_NL as the newline string. We also use a normal double
184 quote for the example, as single quotes aren't usually available. */
185
186 #ifdef WIN32
187 #define STDOUT_NL "\r\n"
188 #define STDOUT_NL_LEN 2
189 #define QUOT "\""
190 #else
191 #define STDOUT_NL "\n"
192 #define STDOUT_NL_LEN 1
193 #define QUOT "'"
194 #endif
195
196 /* This code is returned from decode_dollar_escape() when $n is encountered,
197 and used to mean "output STDOUT_NL". It is, of course, not a valid Unicode code
198 point. */
199
200 #define STDOUT_NL_CODE 0x7fffffffu
201
202
203
204 /*************************************************
205 * Global variables *
206 *************************************************/
207
208 /* Jeffrey Friedl has some debugging requirements that are not part of the
209 regular code. */
210
211 #ifdef JFRIEDL_DEBUG
212 static int S_arg = -1;
213 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
214 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
215 static const char *jfriedl_prefix = "";
216 static const char *jfriedl_postfix = "";
217 #endif
218
219 static const char *colour_string = "1;31";
220 static const char *colour_option = NULL;
221 static const char *dee_option = NULL;
222 static const char *DEE_option = NULL;
223 static const char *locale = NULL;
224 static const char *newline_arg = NULL;
225 static const char *om_separator = NULL;
226 static const char *stdin_name = "(standard input)";
227 static const char *output_text = NULL;
228
229 static char *main_buffer = NULL;
230
231 static int after_context = 0;
232 static int before_context = 0;
233 static int binary_files = BIN_BINARY;
234 static int both_context = 0;
235 static int bufthird = PCRE2GREP_BUFSIZE;
236 static int max_bufthird = PCRE2GREP_MAX_BUFSIZE;
237 static int bufsize = 3*PCRE2GREP_BUFSIZE;
238 static int endlinetype;
239
240 static int count_limit = -1; /* Not long, so that it works with OP_NUMBER */
241 static unsigned long int counts_printed = 0;
242 static unsigned long int total_count = 0;
243
244 #ifdef WIN32
245 static int dee_action = dee_SKIP;
246 #else
247 static int dee_action = dee_READ;
248 #endif
249
250 static int DEE_action = DEE_READ;
251 static int error_count = 0;
252 static int filenames = FN_DEFAULT;
253
254 #ifdef SUPPORT_PCRE2GREP_JIT
255 static BOOL use_jit = TRUE;
256 #else
257 static BOOL use_jit = FALSE;
258 #endif
259
260 static const uint8_t *character_tables = NULL;
261
262 static uint32_t pcre2_options = 0;
263 static uint32_t extra_options = 0;
264 static PCRE2_SIZE heap_limit = PCRE2_UNSET;
265 static uint32_t match_limit = 0;
266 static uint32_t depth_limit = 0;
267
268 static pcre2_compile_context *compile_context;
269 static pcre2_match_context *match_context;
270 static pcre2_match_data *match_data;
271 static PCRE2_SIZE *offsets;
272 static uint32_t offset_size;
273 static uint32_t capture_max = DEFAULT_CAPTURE_MAX;
274
275 static BOOL count_only = FALSE;
276 static BOOL do_colour = FALSE;
277 #ifdef WIN32
278 static BOOL do_ansi = FALSE;
279 #endif
280 static BOOL file_offsets = FALSE;
281 static BOOL hyphenpending = FALSE;
282 static BOOL invert = FALSE;
283 static BOOL line_buffered = FALSE;
284 static BOOL line_offsets = FALSE;
285 static BOOL multiline = FALSE;
286 static BOOL number = FALSE;
287 static BOOL omit_zero_count = FALSE;
288 static BOOL resource_error = FALSE;
289 static BOOL quiet = FALSE;
290 static BOOL show_total_count = FALSE;
291 static BOOL silent = FALSE;
292 static BOOL utf = FALSE;
293
294 static uint8_t utf8_buffer[8];
295
296
297 /* Structure for list of --only-matching capturing numbers. */
298
299 typedef struct omstr {
300 struct omstr *next;
301 int groupnum;
302 } omstr;
303
304 static omstr *only_matching = NULL;
305 static omstr *only_matching_last = NULL;
306 static int only_matching_count;
307
308 /* Structure for holding the two variables that describe a number chain. */
309
310 typedef struct omdatastr {
311 omstr **anchor;
312 omstr **lastptr;
313 } omdatastr;
314
315 static omdatastr only_matching_data = { &only_matching, &only_matching_last };
316
317 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
318
319 typedef struct fnstr {
320 struct fnstr *next;
321 char *name;
322 } fnstr;
323
324 static fnstr *exclude_from = NULL;
325 static fnstr *exclude_from_last = NULL;
326 static fnstr *include_from = NULL;
327 static fnstr *include_from_last = NULL;
328
329 static fnstr *file_lists = NULL;
330 static fnstr *file_lists_last = NULL;
331 static fnstr *pattern_files = NULL;
332 static fnstr *pattern_files_last = NULL;
333
334 /* Structure for holding the two variables that describe a file name chain. */
335
336 typedef struct fndatastr {
337 fnstr **anchor;
338 fnstr **lastptr;
339 } fndatastr;
340
341 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
342 static fndatastr include_from_data = { &include_from, &include_from_last };
343 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
344 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
345
346 /* Structure for pattern and its compiled form; used for matching patterns and
347 also for include/exclude patterns. */
348
349 typedef struct patstr {
350 struct patstr *next;
351 char *string;
352 PCRE2_SIZE length;
353 pcre2_code *compiled;
354 } patstr;
355
356 static patstr *patterns = NULL;
357 static patstr *patterns_last = NULL;
358 static patstr *include_patterns = NULL;
359 static patstr *include_patterns_last = NULL;
360 static patstr *exclude_patterns = NULL;
361 static patstr *exclude_patterns_last = NULL;
362 static patstr *include_dir_patterns = NULL;
363 static patstr *include_dir_patterns_last = NULL;
364 static patstr *exclude_dir_patterns = NULL;
365 static patstr *exclude_dir_patterns_last = NULL;
366
367 /* Structure holding the two variables that describe a pattern chain. A pointer
368 to such structures is used for each appropriate option. */
369
370 typedef struct patdatastr {
371 patstr **anchor;
372 patstr **lastptr;
373 } patdatastr;
374
375 static patdatastr match_patdata = { &patterns, &patterns_last };
376 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
377 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
378 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
379 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
380
381 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
382 &include_dir_patterns, &exclude_dir_patterns };
383
384 static const char *incexname[4] = { "--include", "--exclude",
385 "--include-dir", "--exclude-dir" };
386
387 /* Structure for options and list of them */
388
389 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, OP_SIZE,
390 OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
391
392 typedef struct option_item {
393 int type;
394 int one_char;
395 void *dataptr;
396 const char *long_name;
397 const char *help_text;
398 } option_item;
399
400 /* Options without a single-letter equivalent get a negative value. This can be
401 used to identify them. */
402
403 #define N_COLOUR (-1)
404 #define N_EXCLUDE (-2)
405 #define N_EXCLUDE_DIR (-3)
406 #define N_HELP (-4)
407 #define N_INCLUDE (-5)
408 #define N_INCLUDE_DIR (-6)
409 #define N_LABEL (-7)
410 #define N_LOCALE (-8)
411 #define N_NULL (-9)
412 #define N_LOFFSETS (-10)
413 #define N_FOFFSETS (-11)
414 #define N_LBUFFER (-12)
415 #define N_H_LIMIT (-13)
416 #define N_M_LIMIT (-14)
417 #define N_M_LIMIT_DEP (-15)
418 #define N_BUFSIZE (-16)
419 #define N_NOJIT (-17)
420 #define N_FILE_LIST (-18)
421 #define N_BINARY_FILES (-19)
422 #define N_EXCLUDE_FROM (-20)
423 #define N_INCLUDE_FROM (-21)
424 #define N_OM_SEPARATOR (-22)
425 #define N_MAX_BUFSIZE (-23)
426 #define N_OM_CAPTURE (-24)
427 #define N_ALLABSK (-25)
428
429 static option_item optionlist[] = {
430 { OP_NODATA, N_NULL, NULL, "", "terminate options" },
431 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
432 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
433 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
434 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
435 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
436 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer starting size" },
437 { OP_NUMBER, N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number", "set processing buffer maximum size" },
438 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
439 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
440 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
441 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
442 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
443 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
444 { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
445 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
446 { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
447 { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
448 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
449 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
450 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
451 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
452 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
453 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
454 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
455 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
456 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
457 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
458 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
459 { OP_SIZE, N_H_LIMIT, &heap_limit, "heap-limit=number", "set PCRE2 heap limit option (kibibytes)" },
460 { OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE2 match limit option" },
461 { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
462 { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
463 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
464 { OP_NUMBER, 'm', &count_limit, "max-count=number", "stop after <number> matched lines" },
465 { OP_STRING, 'N', &newline_arg, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" },
466 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
467 #ifdef SUPPORT_PCRE2GREP_JIT
468 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
469 #else
470 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" },
471 #endif
472 { OP_STRING, 'O', &output_text, "output=text", "show only this text (possibly expanded)" },
473 { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
474 { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
475 { OP_U32NUMBER, N_OM_CAPTURE, &capture_max, "om-capture=n", "set capture count for --only-matching" },
476 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
477 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
478 { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
479 { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
480 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
481 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
482 { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
483 { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
484 #ifdef JFRIEDL_DEBUG
485 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
486 #endif
487 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
488 { OP_NODATA, 't', NULL, "total-count", "print total count of matching lines" },
489 { OP_NODATA, 'u', NULL, "utf", "use UTF mode" },
490 { OP_NODATA, 'U', NULL, "utf-allow-invalid", "use UTF mode, allow for invalid code units" },
491 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
492 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
493 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
494 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
495 { OP_NODATA, N_ALLABSK, NULL, "allow-lookaround-bsk", "allow \\K in lookarounds" },
496 { OP_NODATA, 0, NULL, NULL, NULL }
497 };
498
499 /* Table of names for newline types. Must be kept in step with the definitions
500 of PCRE2_NEWLINE_xx in pcre2.h. */
501
502 static const char *newlines[] = {
503 "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
504
505 /* UTF-8 tables */
506
507 const int utf8_table1[] =
508 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
509 const int utf8_table1_size = sizeof(utf8_table1) / sizeof(int);
510
511 const int utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
512 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
513
514 const char utf8_table4[] = {
515 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
516 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
517 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
518 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
519
520
521 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
522 /*************************************************
523 * Emulated memmove() for systems without it *
524 *************************************************/
525
526 /* This function can make use of bcopy() if it is available. Otherwise do it by
527 steam, as there are some non-Unix environments that lack both memmove() and
528 bcopy(). */
529
530 static void *
emulated_memmove(void * d,const void * s,size_t n)531 emulated_memmove(void *d, const void *s, size_t n)
532 {
533 #ifdef HAVE_BCOPY
534 bcopy(s, d, n);
535 return d;
536 #else
537 size_t i;
538 unsigned char *dest = (unsigned char *)d;
539 const unsigned char *src = (const unsigned char *)s;
540 if (dest > src)
541 {
542 dest += n;
543 src += n;
544 for (i = 0; i < n; ++i) *(--dest) = *(--src);
545 return (void *)dest;
546 }
547 else
548 {
549 for (i = 0; i < n; ++i) *dest++ = *src++;
550 return (void *)(dest - n);
551 }
552 #endif /* not HAVE_BCOPY */
553 }
554 #undef memmove
555 #define memmove(d,s,n) emulated_memmove(d,s,n)
556 #endif /* not VPCOMPAT && not HAVE_MEMMOVE */
557
558
559
560 /*************************************************
561 * Convert code point to UTF-8 *
562 *************************************************/
563
564 /* A static buffer is used. Returns the number of bytes. */
565
566 static int
ord2utf8(uint32_t value)567 ord2utf8(uint32_t value)
568 {
569 int i, j;
570 uint8_t *utf8bytes = utf8_buffer;
571 for (i = 0; i < utf8_table1_size; i++)
572 if (value <= (uint32_t)utf8_table1[i]) break;
573 utf8bytes += i;
574 for (j = i; j > 0; j--)
575 {
576 *utf8bytes-- = 0x80 | (value & 0x3f);
577 value >>= 6;
578 }
579 *utf8bytes = utf8_table2[i] | value;
580 return i + 1;
581 }
582
583
584
585 /*************************************************
586 * Case-independent string compare *
587 *************************************************/
588
589 static int
strcmpic(const char * str1,const char * str2)590 strcmpic(const char *str1, const char *str2)
591 {
592 unsigned int c1, c2;
593 while (*str1 != '\0' || *str2 != '\0')
594 {
595 c1 = tolower(*str1++);
596 c2 = tolower(*str2++);
597 if (c1 != c2) return ((c1 > c2) << 1) - 1;
598 }
599 return 0;
600 }
601
602
603 /*************************************************
604 * Parse GREP_COLORS *
605 *************************************************/
606
607 /* Extract ms or mt from GREP_COLORS.
608
609 Argument: the string, possibly NULL
610 Returns: the value of ms or mt, or NULL if neither present
611 */
612
613 static char *
parse_grep_colors(const char * gc)614 parse_grep_colors(const char *gc)
615 {
616 static char seq[16];
617 char *col;
618 uint32_t len;
619 if (gc == NULL) return NULL;
620 col = strstr(gc, "ms=");
621 if (col == NULL) col = strstr(gc, "mt=");
622 if (col == NULL) return NULL;
623 len = 0;
624 col += 3;
625 while (*col != ':' && *col != 0 && len < sizeof(seq)-1)
626 seq[len++] = *col++;
627 seq[len] = 0;
628 return seq;
629 }
630
631
632 /*************************************************
633 * Exit from the program *
634 *************************************************/
635
636 /* If there has been a resource error, give a suitable message.
637
638 Argument: the return code
639 Returns: does not return
640 */
641
642 static void
pcre2grep_exit(int rc)643 pcre2grep_exit(int rc)
644 {
645 /* VMS does exit codes differently: both exit(1) and exit(0) return with a
646 status of 1, which is not helpful. To help with this problem, define a symbol
647 (akin to an environment variable) called "PCRE2GREP_RC" and put the exit code
648 therein. */
649
650 #ifdef __VMS
651 char val_buf[4];
652 $DESCRIPTOR(sym_nam, "PCRE2GREP_RC");
653 $DESCRIPTOR(sym_val, val_buf);
654 sprintf(val_buf, "%d", rc);
655 sym_val.dsc$w_length = strlen(val_buf);
656 lib$set_symbol(&sym_nam, &sym_val);
657 #endif
658
659 if (resource_error)
660 {
661 fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource "
662 "limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
663 PCRE2_ERROR_DEPTHLIMIT, PCRE2_ERROR_HEAPLIMIT);
664 fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
665 }
666 exit(rc);
667 }
668
669
670 /*************************************************
671 * Add item to chain of patterns *
672 *************************************************/
673
674 /* Used to add an item onto a chain, or just return an unconnected item if the
675 "after" argument is NULL.
676
677 Arguments:
678 s pattern string to add
679 patlen length of pattern
680 after if not NULL points to item to insert after
681
682 Returns: new pattern block or NULL on error
683 */
684
685 static patstr *
add_pattern(char * s,PCRE2_SIZE patlen,patstr * after)686 add_pattern(char *s, PCRE2_SIZE patlen, patstr *after)
687 {
688 patstr *p = (patstr *)malloc(sizeof(patstr));
689 if (p == NULL)
690 {
691 fprintf(stderr, "pcre2grep: malloc failed\n");
692 pcre2grep_exit(2);
693 }
694 if (patlen > MAXPATLEN)
695 {
696 fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
697 MAXPATLEN);
698 free(p);
699 return NULL;
700 }
701 p->next = NULL;
702 p->string = s;
703 p->length = patlen;
704 p->compiled = NULL;
705
706 if (after != NULL)
707 {
708 p->next = after->next;
709 after->next = p;
710 }
711 return p;
712 }
713
714
715 /*************************************************
716 * Free chain of patterns *
717 *************************************************/
718
719 /* Used for several chains of patterns.
720
721 Argument: pointer to start of chain
722 Returns: nothing
723 */
724
725 static void
free_pattern_chain(patstr * pc)726 free_pattern_chain(patstr *pc)
727 {
728 while (pc != NULL)
729 {
730 patstr *p = pc;
731 pc = p->next;
732 if (p->compiled != NULL) pcre2_code_free(p->compiled);
733 free(p);
734 }
735 }
736
737
738 /*************************************************
739 * Free chain of file names *
740 *************************************************/
741
742 /*
743 Argument: pointer to start of chain
744 Returns: nothing
745 */
746
747 static void
free_file_chain(fnstr * fn)748 free_file_chain(fnstr *fn)
749 {
750 while (fn != NULL)
751 {
752 fnstr *f = fn;
753 fn = f->next;
754 free(f);
755 }
756 }
757
758
759 /*************************************************
760 * OS-specific functions *
761 *************************************************/
762
763 /* These definitions are needed in all Windows environments, even those where
764 Unix-style directory scanning can be used (see below). */
765
766 #ifdef WIN32
767
768 #ifndef STRICT
769 # define STRICT
770 #endif
771 #ifndef WIN32_LEAN_AND_MEAN
772 # define WIN32_LEAN_AND_MEAN
773 #endif
774
775 #include <windows.h>
776
777 #define iswild(name) (strpbrk(name, "*?") != NULL)
778
779 /* Convert ANSI BGR format to RGB used by Windows */
780 #define BGR_RGB(x) ((x & 1 ? 4 : 0) | (x & 2) | (x & 4 ? 1 : 0))
781
782 static HANDLE hstdout;
783 static CONSOLE_SCREEN_BUFFER_INFO csbi;
784 static WORD match_colour;
785
786 static WORD
decode_ANSI_colour(const char * cs)787 decode_ANSI_colour(const char *cs)
788 {
789 WORD result = csbi.wAttributes;
790 while (*cs)
791 {
792 if (isdigit(*cs))
793 {
794 int code = atoi(cs);
795 if (code == 1) result |= 0x08;
796 else if (code == 4) result |= 0x8000;
797 else if (code == 5) result |= 0x80;
798 else if (code >= 30 && code <= 37) result = (result & 0xF8) | BGR_RGB(code - 30);
799 else if (code == 39) result = (result & 0xF0) | (csbi.wAttributes & 0x0F);
800 else if (code >= 40 && code <= 47) result = (result & 0x8F) | (BGR_RGB(code - 40) << 4);
801 else if (code == 49) result = (result & 0x0F) | (csbi.wAttributes & 0xF0);
802 /* aixterm high intensity colour codes */
803 else if (code >= 90 && code <= 97) result = (result & 0xF0) | BGR_RGB(code - 90) | 0x08;
804 else if (code >= 100 && code <= 107) result = (result & 0x0F) | (BGR_RGB(code - 100) << 4) | 0x80;
805
806 while (isdigit(*cs)) cs++;
807 }
808 if (*cs) cs++;
809 }
810 return result;
811 }
812
813
814 static void
init_colour_output()815 init_colour_output()
816 {
817 if (do_colour)
818 {
819 hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
820 /* This fails when redirected to con; try again if so. */
821 if (!GetConsoleScreenBufferInfo(hstdout, &csbi) && !do_ansi)
822 {
823 HANDLE hcon = CreateFile("CONOUT$", GENERIC_READ | GENERIC_WRITE,
824 FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL);
825 GetConsoleScreenBufferInfo(hcon, &csbi);
826 CloseHandle(hcon);
827 }
828 match_colour = decode_ANSI_colour(colour_string);
829 /* No valid colour found - turn off colouring */
830 if (!match_colour) do_colour = FALSE;
831 }
832 }
833
834 #endif /* WIN32 */
835
836
837 /* The following sets of functions are defined so that they can be made system
838 specific. At present there are versions for Unix-style environments, Windows,
839 native z/OS, and "no support". */
840
841
842 /************* Directory scanning Unix-style and z/OS ***********/
843
844 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
845 #include <sys/types.h>
846 #include <sys/stat.h>
847 #include <dirent.h>
848
849 #if defined NATIVE_ZOS
850 /************* Directory and PDS/E scanning for z/OS ***********/
851 /************* z/OS looks mostly like Unix with USS ************/
852 /* However, z/OS needs the #include statements in this header */
853 #include "pcrzosfs.h"
854 /* That header is not included in the main PCRE distribution because
855 other apparatus is needed to compile pcre2grep for z/OS. The header
856 can be found in the special z/OS distribution, which is available
857 from www.zaconsultants.net or from www.cbttape.org. */
858 #endif
859
860 typedef DIR directory_type;
861 #define FILESEP '/'
862
863 static int
isdirectory(char * filename)864 isdirectory(char *filename)
865 {
866 struct stat statbuf;
867 if (stat(filename, &statbuf) < 0)
868 return 0; /* In the expectation that opening as a file will fail */
869 return S_ISDIR(statbuf.st_mode);
870 }
871
872 static directory_type *
opendirectory(char * filename)873 opendirectory(char *filename)
874 {
875 return opendir(filename);
876 }
877
878 static char *
readdirectory(directory_type * dir)879 readdirectory(directory_type *dir)
880 {
881 for (;;)
882 {
883 struct dirent *dent = readdir(dir);
884 if (dent == NULL) return NULL;
885 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
886 return dent->d_name;
887 }
888 /* Control never reaches here */
889 }
890
891 static void
closedirectory(directory_type * dir)892 closedirectory(directory_type *dir)
893 {
894 closedir(dir);
895 }
896
897
898 /************* Test for regular file, Unix-style **********/
899
900 static int
isregfile(char * filename)901 isregfile(char *filename)
902 {
903 struct stat statbuf;
904 if (stat(filename, &statbuf) < 0)
905 return 1; /* In the expectation that opening as a file will fail */
906 return S_ISREG(statbuf.st_mode);
907 }
908
909
910 #if defined NATIVE_ZOS
911 /************* Test for a terminal in z/OS **********/
912 /* isatty() does not work in a TSO environment, so always give FALSE.*/
913
914 static BOOL
is_stdout_tty(void)915 is_stdout_tty(void)
916 {
917 return FALSE;
918 }
919
920 static BOOL
is_file_tty(FILE * f)921 is_file_tty(FILE *f)
922 {
923 return FALSE;
924 }
925
926
927 /************* Test for a terminal, Unix-style **********/
928
929 #else
930 static BOOL
is_stdout_tty(void)931 is_stdout_tty(void)
932 {
933 return isatty(fileno(stdout));
934 }
935
936 static BOOL
is_file_tty(FILE * f)937 is_file_tty(FILE *f)
938 {
939 return isatty(fileno(f));
940 }
941 #endif
942
943
944 /************* Print optionally coloured match Unix-style and z/OS **********/
945
946 static void
print_match(const void * buf,int length)947 print_match(const void *buf, int length)
948 {
949 if (length == 0) return;
950 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
951 FWRITE_IGNORE(buf, 1, length, stdout);
952 if (do_colour) fprintf(stdout, "%c[0m", 0x1b);
953 }
954
955 /* End of Unix-style or native z/OS environment functions. */
956
957
958 /************* Directory scanning in Windows ***********/
959
960 /* I (Philip Hazel) have no means of testing this code. It was contributed by
961 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
962 when it did not exist. David Byron added a patch that moved the #include of
963 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
964 */
965
966 #elif defined WIN32
967
968 #ifndef INVALID_FILE_ATTRIBUTES
969 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
970 #endif
971
972 typedef struct directory_type
973 {
974 HANDLE handle;
975 BOOL first;
976 WIN32_FIND_DATA data;
977 } directory_type;
978
979 #define FILESEP '/'
980
981 int
isdirectory(char * filename)982 isdirectory(char *filename)
983 {
984 DWORD attr = GetFileAttributes(filename);
985 if (attr == INVALID_FILE_ATTRIBUTES)
986 return 0;
987 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
988 }
989
990 directory_type *
opendirectory(char * filename)991 opendirectory(char *filename)
992 {
993 size_t len;
994 char *pattern;
995 directory_type *dir;
996 DWORD err;
997 len = strlen(filename);
998 pattern = (char *)malloc(len + 3);
999 dir = (directory_type *)malloc(sizeof(*dir));
1000 if ((pattern == NULL) || (dir == NULL))
1001 {
1002 fprintf(stderr, "pcre2grep: malloc failed\n");
1003 pcre2grep_exit(2);
1004 }
1005 memcpy(pattern, filename, len);
1006 if (iswild(filename))
1007 pattern[len] = 0;
1008 else
1009 memcpy(&(pattern[len]), "\\*", 3);
1010 dir->handle = FindFirstFile(pattern, &(dir->data));
1011 if (dir->handle != INVALID_HANDLE_VALUE)
1012 {
1013 free(pattern);
1014 dir->first = TRUE;
1015 return dir;
1016 }
1017 err = GetLastError();
1018 free(pattern);
1019 free(dir);
1020 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
1021 return NULL;
1022 }
1023
1024 char *
readdirectory(directory_type * dir)1025 readdirectory(directory_type *dir)
1026 {
1027 for (;;)
1028 {
1029 if (!dir->first)
1030 {
1031 if (!FindNextFile(dir->handle, &(dir->data)))
1032 return NULL;
1033 }
1034 else
1035 {
1036 dir->first = FALSE;
1037 }
1038 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
1039 return dir->data.cFileName;
1040 }
1041 #ifndef _MSC_VER
1042 return NULL; /* Keep compiler happy; never executed */
1043 #endif
1044 }
1045
1046 void
closedirectory(directory_type * dir)1047 closedirectory(directory_type *dir)
1048 {
1049 FindClose(dir->handle);
1050 free(dir);
1051 }
1052
1053
1054 /************* Test for regular file in Windows **********/
1055
1056 /* I don't know how to do this, or if it can be done; assume all paths are
1057 regular if they are not directories. */
1058
isregfile(char * filename)1059 int isregfile(char *filename)
1060 {
1061 return !isdirectory(filename);
1062 }
1063
1064
1065 /************* Test for a terminal in Windows **********/
1066
1067 static BOOL
is_stdout_tty(void)1068 is_stdout_tty(void)
1069 {
1070 return _isatty(_fileno(stdout));
1071 }
1072
1073 static BOOL
is_file_tty(FILE * f)1074 is_file_tty(FILE *f)
1075 {
1076 return _isatty(_fileno(f));
1077 }
1078
1079
1080 /************* Print optionally coloured match in Windows **********/
1081
1082 static void
print_match(const void * buf,int length)1083 print_match(const void *buf, int length)
1084 {
1085 if (length == 0) return;
1086 if (do_colour)
1087 {
1088 if (do_ansi) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1089 else SetConsoleTextAttribute(hstdout, match_colour);
1090 }
1091 FWRITE_IGNORE(buf, 1, length, stdout);
1092 if (do_colour)
1093 {
1094 if (do_ansi) fprintf(stdout, "%c[0m", 0x1b);
1095 else SetConsoleTextAttribute(hstdout, csbi.wAttributes);
1096 }
1097 }
1098
1099 /* End of Windows functions */
1100
1101
1102 /************* Directory scanning when we can't do it ***********/
1103
1104 /* The type is void, and apart from isdirectory(), the functions do nothing. */
1105
1106 #else
1107
1108 #define FILESEP 0
1109 typedef void directory_type;
1110
isdirectory(char * filename)1111 int isdirectory(char *filename) { return 0; }
opendirectory(char * filename)1112 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
readdirectory(directory_type * dir)1113 char *readdirectory(directory_type *dir) { return (char*)0;}
closedirectory(directory_type * dir)1114 void closedirectory(directory_type *dir) {}
1115
1116
1117 /************* Test for regular file when we can't do it **********/
1118
1119 /* Assume all files are regular. */
1120
isregfile(char * filename)1121 int isregfile(char *filename) { return 1; }
1122
1123
1124 /************* Test for a terminal when we can't do it **********/
1125
1126 static BOOL
is_stdout_tty(void)1127 is_stdout_tty(void)
1128 {
1129 return FALSE;
1130 }
1131
1132 static BOOL
is_file_tty(FILE * f)1133 is_file_tty(FILE *f)
1134 {
1135 return FALSE;
1136 }
1137
1138
1139 /************* Print optionally coloured match when we can't do it **********/
1140
1141 static void
print_match(const void * buf,int length)1142 print_match(const void *buf, int length)
1143 {
1144 if (length == 0) return;
1145 FWRITE_IGNORE(buf, 1, length, stdout);
1146 }
1147
1148 #endif /* End of system-specific functions */
1149
1150
1151
1152 #ifndef HAVE_STRERROR
1153 /*************************************************
1154 * Provide strerror() for non-ANSI libraries *
1155 *************************************************/
1156
1157 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1158 in their libraries, but can provide the same facility by this simple
1159 alternative function. */
1160
1161 extern int sys_nerr;
1162 extern char *sys_errlist[];
1163
1164 char *
strerror(int n)1165 strerror(int n)
1166 {
1167 if (n < 0 || n >= sys_nerr) return "unknown error number";
1168 return sys_errlist[n];
1169 }
1170 #endif /* HAVE_STRERROR */
1171
1172
1173
1174 /*************************************************
1175 * Usage function *
1176 *************************************************/
1177
1178 static int
usage(int rc)1179 usage(int rc)
1180 {
1181 option_item *op;
1182 fprintf(stderr, "Usage: pcre2grep [-");
1183 for (op = optionlist; op->one_char != 0; op++)
1184 {
1185 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1186 }
1187 fprintf(stderr, "] [long options] [pattern] [files]\n");
1188 fprintf(stderr, "Type \"pcre2grep --help\" for more information and the long "
1189 "options.\n");
1190 return rc;
1191 }
1192
1193
1194
1195 /*************************************************
1196 * Help function *
1197 *************************************************/
1198
1199 static void
help(void)1200 help(void)
1201 {
1202 option_item *op;
1203
1204 printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL);
1205 printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
1206 printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
1207
1208 #ifdef SUPPORT_PCRE2GREP_CALLOUT
1209 #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
1210 printf("All callout scripts in patterns are supported." STDOUT_NL);
1211 #else
1212 printf("Non-fork callout scripts in patterns are supported." STDOUT_NL);
1213 #endif
1214 #else
1215 printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
1216 #endif
1217
1218 printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL);
1219
1220 #ifdef SUPPORT_LIBZ
1221 printf("Files whose names end in .gz are read using zlib." STDOUT_NL);
1222 #endif
1223
1224 #ifdef SUPPORT_LIBBZ2
1225 printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL);
1226 #endif
1227
1228 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1229 printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL);
1230 #else
1231 printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL);
1232 #endif
1233
1234 printf("Example: pcre2grep -i " QUOT "hello.*world" QUOT " menu.h main.c" STDOUT_NL STDOUT_NL);
1235 printf("Options:" STDOUT_NL);
1236
1237 for (op = optionlist; op->one_char != 0; op++)
1238 {
1239 int n;
1240 char s[4];
1241
1242 if (op->one_char > 0 && (op->long_name)[0] == 0)
1243 n = 31 - printf(" -%c", op->one_char);
1244 else
1245 {
1246 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
1247 else strcpy(s, " ");
1248 n = 31 - printf(" %s --%s", s, op->long_name);
1249 }
1250
1251 if (n < 1) n = 1;
1252 printf("%.*s%s" STDOUT_NL, n, " ", op->help_text);
1253 }
1254
1255 printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --max-buffer-size=100K." STDOUT_NL);
1256 printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
1257 printf("The default value for --max-buffer-size is %d." STDOUT_NL, PCRE2GREP_MAX_BUFSIZE);
1258 printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
1259 printf("space is removed and blank lines are ignored." STDOUT_NL);
1260 printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
1261
1262 printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL);
1263 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL);
1264 }
1265
1266
1267
1268 /*************************************************
1269 * Test exclude/includes *
1270 *************************************************/
1271
1272 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
1273 there are no includes, the path must match an include pattern.
1274
1275 Arguments:
1276 path the path to be matched
1277 ip the chain of include patterns
1278 ep the chain of exclude patterns
1279
1280 Returns: TRUE if the path is not excluded
1281 */
1282
1283 static BOOL
test_incexc(char * path,patstr * ip,patstr * ep)1284 test_incexc(char *path, patstr *ip, patstr *ep)
1285 {
1286 int plen = strlen((const char *)path);
1287
1288 for (; ep != NULL; ep = ep->next)
1289 {
1290 if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1291 return FALSE;
1292 }
1293
1294 if (ip == NULL) return TRUE;
1295
1296 for (; ip != NULL; ip = ip->next)
1297 {
1298 if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1299 return TRUE;
1300 }
1301
1302 return FALSE;
1303 }
1304
1305
1306
1307 /*************************************************
1308 * Decode integer argument value *
1309 *************************************************/
1310
1311 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
1312 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
1313 just keep it simple.
1314
1315 Arguments:
1316 option_data the option data string
1317 op the option item (for error messages)
1318 longop TRUE if option given in long form
1319
1320 Returns: a long integer
1321 */
1322
1323 static long int
decode_number(char * option_data,option_item * op,BOOL longop)1324 decode_number(char *option_data, option_item *op, BOOL longop)
1325 {
1326 unsigned long int n = 0;
1327 char *endptr = option_data;
1328 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
1329 while (isdigit((unsigned char)(*endptr)))
1330 n = n * 10 + (int)(*endptr++ - '0');
1331 if (toupper(*endptr) == 'K')
1332 {
1333 n *= 1024;
1334 endptr++;
1335 }
1336 else if (toupper(*endptr) == 'M')
1337 {
1338 n *= 1024*1024;
1339 endptr++;
1340 }
1341
1342 if (*endptr != 0) /* Error */
1343 {
1344 if (longop)
1345 {
1346 char *equals = strchr(op->long_name, '=');
1347 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1348 (int)(equals - op->long_name);
1349 fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n",
1350 option_data, nlen, op->long_name);
1351 }
1352 else
1353 fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n",
1354 option_data, op->one_char);
1355 pcre2grep_exit(usage(2));
1356 }
1357
1358 return n;
1359 }
1360
1361
1362
1363 /*************************************************
1364 * Add item to a chain of numbers *
1365 *************************************************/
1366
1367 /* Used to add an item onto a chain, or just return an unconnected item if the
1368 "after" argument is NULL.
1369
1370 Arguments:
1371 n the number to add
1372 after if not NULL points to item to insert after
1373
1374 Returns: new number block
1375 */
1376
1377 static omstr *
add_number(int n,omstr * after)1378 add_number(int n, omstr *after)
1379 {
1380 omstr *om = (omstr *)malloc(sizeof(omstr));
1381
1382 if (om == NULL)
1383 {
1384 fprintf(stderr, "pcre2grep: malloc failed\n");
1385 pcre2grep_exit(2);
1386 }
1387 om->next = NULL;
1388 om->groupnum = n;
1389
1390 if (after != NULL)
1391 {
1392 om->next = after->next;
1393 after->next = om;
1394 }
1395 return om;
1396 }
1397
1398
1399
1400 /*************************************************
1401 * Read one line of input *
1402 *************************************************/
1403
1404 /* Normally, input that is to be scanned is read using fread() (or gzread, or
1405 BZ2_read) into a large buffer, so many lines may be read at once. However,
1406 doing this for tty input means that no output appears until a lot of input has
1407 been typed. Instead, tty input is handled line by line. We cannot use fgets()
1408 for this, because it does not stop at a binary zero, and therefore there is no
1409 way of telling how many characters it has read, because there may be binary
1410 zeros embedded in the data. This function is also used for reading patterns
1411 from files (the -f option).
1412
1413 Arguments:
1414 buffer the buffer to read into
1415 length the maximum number of characters to read
1416 f the file
1417
1418 Returns: the number of characters read, zero at end of file
1419 */
1420
1421 static PCRE2_SIZE
read_one_line(char * buffer,int length,FILE * f)1422 read_one_line(char *buffer, int length, FILE *f)
1423 {
1424 int c;
1425 int yield = 0;
1426 while ((c = fgetc(f)) != EOF)
1427 {
1428 buffer[yield++] = c;
1429 if (c == '\n' || yield >= length) break;
1430 }
1431 return yield;
1432 }
1433
1434
1435
1436 /*************************************************
1437 * Find end of line *
1438 *************************************************/
1439
1440 /* The length of the endline sequence that is found is set via lenptr. This may
1441 be zero at the very end of the file if there is no line-ending sequence there.
1442
1443 Arguments:
1444 p current position in line
1445 endptr end of available data
1446 lenptr where to put the length of the eol sequence
1447
1448 Returns: pointer after the last byte of the line,
1449 including the newline byte(s)
1450 */
1451
1452 static char *
end_of_line(char * p,char * endptr,int * lenptr)1453 end_of_line(char *p, char *endptr, int *lenptr)
1454 {
1455 switch(endlinetype)
1456 {
1457 default: /* Just in case */
1458 case PCRE2_NEWLINE_LF:
1459 while (p < endptr && *p != '\n') p++;
1460 if (p < endptr)
1461 {
1462 *lenptr = 1;
1463 return p + 1;
1464 }
1465 *lenptr = 0;
1466 return endptr;
1467
1468 case PCRE2_NEWLINE_CR:
1469 while (p < endptr && *p != '\r') p++;
1470 if (p < endptr)
1471 {
1472 *lenptr = 1;
1473 return p + 1;
1474 }
1475 *lenptr = 0;
1476 return endptr;
1477
1478 case PCRE2_NEWLINE_NUL:
1479 while (p < endptr && *p != '\0') p++;
1480 if (p < endptr)
1481 {
1482 *lenptr = 1;
1483 return p + 1;
1484 }
1485 *lenptr = 0;
1486 return endptr;
1487
1488 case PCRE2_NEWLINE_CRLF:
1489 for (;;)
1490 {
1491 while (p < endptr && *p != '\r') p++;
1492 if (++p >= endptr)
1493 {
1494 *lenptr = 0;
1495 return endptr;
1496 }
1497 if (*p == '\n')
1498 {
1499 *lenptr = 2;
1500 return p + 1;
1501 }
1502 }
1503 break;
1504
1505 case PCRE2_NEWLINE_ANYCRLF:
1506 while (p < endptr)
1507 {
1508 int extra = 0;
1509 int c = *((unsigned char *)p);
1510
1511 if (utf && c >= 0xc0)
1512 {
1513 int gcii, gcss;
1514 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1515 gcss = 6*extra;
1516 c = (c & utf8_table3[extra]) << gcss;
1517 for (gcii = 1; gcii <= extra; gcii++)
1518 {
1519 gcss -= 6;
1520 c |= (p[gcii] & 0x3f) << gcss;
1521 }
1522 }
1523
1524 p += 1 + extra;
1525
1526 switch (c)
1527 {
1528 case '\n':
1529 *lenptr = 1;
1530 return p;
1531
1532 case '\r':
1533 if (p < endptr && *p == '\n')
1534 {
1535 *lenptr = 2;
1536 p++;
1537 }
1538 else *lenptr = 1;
1539 return p;
1540
1541 default:
1542 break;
1543 }
1544 } /* End of loop for ANYCRLF case */
1545
1546 *lenptr = 0; /* Must have hit the end */
1547 return endptr;
1548
1549 case PCRE2_NEWLINE_ANY:
1550 while (p < endptr)
1551 {
1552 int extra = 0;
1553 int c = *((unsigned char *)p);
1554
1555 if (utf && c >= 0xc0)
1556 {
1557 int gcii, gcss;
1558 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1559 gcss = 6*extra;
1560 c = (c & utf8_table3[extra]) << gcss;
1561 for (gcii = 1; gcii <= extra; gcii++)
1562 {
1563 gcss -= 6;
1564 c |= (p[gcii] & 0x3f) << gcss;
1565 }
1566 }
1567
1568 p += 1 + extra;
1569
1570 switch (c)
1571 {
1572 case '\n': /* LF */
1573 case '\v': /* VT */
1574 case '\f': /* FF */
1575 *lenptr = 1;
1576 return p;
1577
1578 case '\r': /* CR */
1579 if (p < endptr && *p == '\n')
1580 {
1581 *lenptr = 2;
1582 p++;
1583 }
1584 else *lenptr = 1;
1585 return p;
1586
1587 #ifndef EBCDIC
1588 case 0x85: /* Unicode NEL */
1589 *lenptr = utf? 2 : 1;
1590 return p;
1591
1592 case 0x2028: /* Unicode LS */
1593 case 0x2029: /* Unicode PS */
1594 *lenptr = 3;
1595 return p;
1596 #endif /* Not EBCDIC */
1597
1598 default:
1599 break;
1600 }
1601 } /* End of loop for ANY case */
1602
1603 *lenptr = 0; /* Must have hit the end */
1604 return endptr;
1605 } /* End of overall switch */
1606 }
1607
1608
1609
1610 /*************************************************
1611 * Find start of previous line *
1612 *************************************************/
1613
1614 /* This is called when looking back for before lines to print.
1615
1616 Arguments:
1617 p start of the subsequent line
1618 startptr start of available data
1619
1620 Returns: pointer to the start of the previous line
1621 */
1622
1623 static char *
previous_line(char * p,char * startptr)1624 previous_line(char *p, char *startptr)
1625 {
1626 switch(endlinetype)
1627 {
1628 default: /* Just in case */
1629 case PCRE2_NEWLINE_LF:
1630 p--;
1631 while (p > startptr && p[-1] != '\n') p--;
1632 return p;
1633
1634 case PCRE2_NEWLINE_CR:
1635 p--;
1636 while (p > startptr && p[-1] != '\n') p--;
1637 return p;
1638
1639 case PCRE2_NEWLINE_NUL:
1640 p--;
1641 while (p > startptr && p[-1] != '\0') p--;
1642 return p;
1643
1644 case PCRE2_NEWLINE_CRLF:
1645 for (;;)
1646 {
1647 p -= 2;
1648 while (p > startptr && p[-1] != '\n') p--;
1649 if (p <= startptr + 1 || p[-2] == '\r') return p;
1650 }
1651 /* Control can never get here */
1652
1653 case PCRE2_NEWLINE_ANY:
1654 case PCRE2_NEWLINE_ANYCRLF:
1655 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1656 if (utf) while ((*p & 0xc0) == 0x80) p--;
1657
1658 while (p > startptr)
1659 {
1660 unsigned int c;
1661 char *pp = p - 1;
1662
1663 if (utf)
1664 {
1665 int extra = 0;
1666 while ((*pp & 0xc0) == 0x80) pp--;
1667 c = *((unsigned char *)pp);
1668 if (c >= 0xc0)
1669 {
1670 int gcii, gcss;
1671 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1672 gcss = 6*extra;
1673 c = (c & utf8_table3[extra]) << gcss;
1674 for (gcii = 1; gcii <= extra; gcii++)
1675 {
1676 gcss -= 6;
1677 c |= (pp[gcii] & 0x3f) << gcss;
1678 }
1679 }
1680 }
1681 else c = *((unsigned char *)pp);
1682
1683 if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c)
1684 {
1685 case '\n': /* LF */
1686 case '\r': /* CR */
1687 return p;
1688
1689 default:
1690 break;
1691 }
1692
1693 else switch (c)
1694 {
1695 case '\n': /* LF */
1696 case '\v': /* VT */
1697 case '\f': /* FF */
1698 case '\r': /* CR */
1699 #ifndef EBCDIC
1700 case 0x85: /* Unicode NEL */
1701 case 0x2028: /* Unicode LS */
1702 case 0x2029: /* Unicode PS */
1703 #endif /* Not EBCDIC */
1704 return p;
1705
1706 default:
1707 break;
1708 }
1709
1710 p = pp; /* Back one character */
1711 } /* End of loop for ANY case */
1712
1713 return startptr; /* Hit start of data */
1714 } /* End of overall switch */
1715 }
1716
1717
1718
1719 /*************************************************
1720 * Output newline at end *
1721 *************************************************/
1722
1723 /* This function is called if the final line of a file has been written to
1724 stdout, but it does not have a terminating newline.
1725
1726 Arguments: none
1727 Returns: nothing
1728 */
1729
1730 static void
write_final_newline(void)1731 write_final_newline(void)
1732 {
1733 switch(endlinetype)
1734 {
1735 default: /* Just in case */
1736 case PCRE2_NEWLINE_LF:
1737 case PCRE2_NEWLINE_ANY:
1738 case PCRE2_NEWLINE_ANYCRLF:
1739 fprintf(stdout, "\n");
1740 break;
1741
1742 case PCRE2_NEWLINE_CR:
1743 fprintf(stdout, "\r");
1744 break;
1745
1746 case PCRE2_NEWLINE_CRLF:
1747 fprintf(stdout, "\r\n");
1748 break;
1749
1750 case PCRE2_NEWLINE_NUL:
1751 fprintf(stdout, "%c", 0);
1752 break;
1753 }
1754 }
1755
1756
1757 /*************************************************
1758 * Print the previous "after" lines *
1759 *************************************************/
1760
1761 /* This is called if we are about to lose said lines because of buffer filling,
1762 and at the end of the file. The data in the line is written using fwrite() so
1763 that a binary zero does not terminate it.
1764
1765 Arguments:
1766 lastmatchnumber the number of the last matching line, plus one
1767 lastmatchrestart where we restarted after the last match
1768 endptr end of available data
1769 printname filename for printing
1770
1771 Returns: nothing
1772 */
1773
1774 static void
do_after_lines(unsigned long int lastmatchnumber,char * lastmatchrestart,char * endptr,const char * printname)1775 do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
1776 char *endptr, const char *printname)
1777 {
1778 if (after_context > 0 && lastmatchnumber > 0)
1779 {
1780 int count = 0;
1781 int ellength = 0;
1782 while (lastmatchrestart < endptr && count < after_context)
1783 {
1784 char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
1785 if (ellength == 0 && pp == main_buffer + bufsize) break;
1786 if (printname != NULL) fprintf(stdout, "%s-", printname);
1787 if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
1788 FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1789 lastmatchrestart = pp;
1790 count++;
1791 }
1792
1793 /* If we have printed any lines, arrange for a hyphen separator if anything
1794 else follows. Also, if the last line is the final line in the file and it had
1795 no newline, add one. */
1796
1797 if (count > 0)
1798 {
1799 hyphenpending = TRUE;
1800 if (ellength == 0 && lastmatchrestart >= endptr)
1801 write_final_newline();
1802 }
1803 }
1804 }
1805
1806
1807
1808 /*************************************************
1809 * Apply patterns to subject till one matches *
1810 *************************************************/
1811
1812 /* This function is called to run through all patterns, looking for a match. It
1813 is used multiple times for the same subject when colouring is enabled, in order
1814 to find all possible matches.
1815
1816 Arguments:
1817 matchptr the start of the subject
1818 length the length of the subject to match
1819 options options for pcre_exec
1820 startoffset where to start matching
1821 mrc address of where to put the result of pcre2_match()
1822
1823 Returns: TRUE if there was a match
1824 FALSE if there was no match
1825 invert if there was a non-fatal error
1826 */
1827
1828 static BOOL
match_patterns(char * matchptr,PCRE2_SIZE length,unsigned int options,PCRE2_SIZE startoffset,int * mrc)1829 match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options,
1830 PCRE2_SIZE startoffset, int *mrc)
1831 {
1832 int i;
1833 PCRE2_SIZE slen = length;
1834 patstr *p = patterns;
1835 const char *msg = "this text:\n\n";
1836
1837 if (slen > 200)
1838 {
1839 slen = 200;
1840 msg = "text that starts:\n\n";
1841 }
1842
1843 for (i = 1; p != NULL; p = p->next, i++)
1844 {
1845 *mrc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length,
1846 startoffset, options, match_data, match_context);
1847 if (*mrc >= 0) return TRUE;
1848 if (*mrc == PCRE2_ERROR_NOMATCH) continue;
1849 fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", *mrc);
1850 if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1851 fprintf(stderr, "%s", msg);
1852 FWRITE_IGNORE(matchptr, 1, slen, stderr); /* In case binary zero included */
1853 fprintf(stderr, "\n\n");
1854 if (*mrc <= PCRE2_ERROR_UTF8_ERR1 &&
1855 *mrc >= PCRE2_ERROR_UTF8_ERR21)
1856 {
1857 unsigned char mbuffer[256];
1858 PCRE2_SIZE startchar = pcre2_get_startchar(match_data);
1859 (void)pcre2_get_error_message(*mrc, mbuffer, sizeof(mbuffer));
1860 fprintf(stderr, "%s at offset %" SIZ_FORM "\n\n", mbuffer, startchar);
1861 }
1862 if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_DEPTHLIMIT ||
1863 *mrc == PCRE2_ERROR_HEAPLIMIT || *mrc == PCRE2_ERROR_JIT_STACKLIMIT)
1864 resource_error = TRUE;
1865 if (error_count++ > 20)
1866 {
1867 fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n");
1868 pcre2grep_exit(2);
1869 }
1870 return invert; /* No more matching; don't show the line again */
1871 }
1872
1873 return FALSE; /* No match, no errors */
1874 }
1875
1876
1877
1878 /*************************************************
1879 * Decode dollar escape sequence *
1880 *************************************************/
1881
1882 /* Called from various places to decode $ escapes in output strings. The escape
1883 sequences are as follows:
1884
1885 $<digits> or ${<digits>} returns a capture number. However, if callout is TRUE,
1886 zero is never returned; '0' is substituted.
1887
1888 $a returns bell.
1889 $b returns backspace.
1890 $e returns escape.
1891 $f returns form feed.
1892 $n returns newline.
1893 $r returns carriage return.
1894 $t returns tab.
1895 $v returns vertical tab.
1896 $o<digits> returns the character represented by the given octal
1897 number; up to three digits are processed.
1898 $o{<digits>} does the same, up to 7 digits, but gives an error for mode-invalid
1899 code points.
1900 $x<digits> returns the character represented by the given hexadecimal
1901 number; up to two digits are processed.
1902 $x{<digits} does the same, up to 6 digits, but gives an error for mode-invalid
1903 code points.
1904 Any other character is substituted by itself. E.g: $$ is replaced by a single
1905 dollar.
1906
1907 Arguments:
1908 begin the start of the whole string
1909 string points to the $
1910 callout TRUE if in a callout (inhibits error messages)
1911 value where to return a value
1912 last where to return pointer to the last used character
1913
1914 Returns: DDE_ERROR after a syntax error
1915 DDE_CAPTURE if *value is a capture number
1916 DDE_CHAR if *value is a character code
1917 */
1918
1919 static int
decode_dollar_escape(PCRE2_SPTR begin,PCRE2_SPTR string,BOOL callout,uint32_t * value,PCRE2_SPTR * last)1920 decode_dollar_escape(PCRE2_SPTR begin, PCRE2_SPTR string, BOOL callout,
1921 uint32_t *value, PCRE2_SPTR *last)
1922 {
1923 uint32_t c = 0;
1924 int base = 10;
1925 int dcount;
1926 int rc = DDE_CHAR;
1927 BOOL brace = FALSE;
1928
1929 switch (*(++string))
1930 {
1931 case 0: /* Syntax error: a character must be present after $. */
1932 if (!callout)
1933 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1934 (int)(string - begin), "no character after $");
1935 *last = string;
1936 return DDE_ERROR;
1937
1938 case '{':
1939 brace = TRUE;
1940 string++;
1941 if (!isdigit(*string)) /* Syntax error: a decimal number required. */
1942 {
1943 if (!callout)
1944 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1945 (int)(string - begin), "decimal number expected");
1946 rc = DDE_ERROR;
1947 break;
1948 }
1949
1950 /* Fall through */
1951
1952 /* The maximum capture number is 65535, so any number greater than that will
1953 always be an unknown capture number. We just stop incrementing, in order to
1954 avoid overflow. */
1955
1956 case '0': case '1': case '2': case '3': case '4':
1957 case '5': case '6': case '7': case '8': case '9':
1958 do
1959 {
1960 if (c <= 65535) c = c * 10 + (*string - '0');
1961 string++;
1962 }
1963 while (*string >= '0' && *string <= '9');
1964 string--; /* Point to last digit */
1965
1966 /* In a callout, capture number 0 is not available. No error can be given,
1967 so just return the character '0'. */
1968
1969 if (callout && c == 0)
1970 {
1971 *value = '0';
1972 }
1973 else
1974 {
1975 *value = c;
1976 rc = DDE_CAPTURE;
1977 }
1978 break;
1979
1980 /* Limit octal numbers to 3 digits without braces, or up to 7 with braces,
1981 for valid Unicode code points. */
1982
1983 case 'o':
1984 base = 8;
1985 string++;
1986 if (*string == '{')
1987 {
1988 brace = TRUE;
1989 string++;
1990 dcount = 7;
1991 }
1992 else dcount = 3;
1993 for (; dcount > 0; dcount--)
1994 {
1995 if (*string < '0' || *string > '7') break;
1996 c = c * 8 + (*string++ - '0');
1997 }
1998 *value = c;
1999 string--; /* Point to last digit */
2000 break;
2001
2002 /* Limit hex numbers to 2 digits without braces, or up to 6 with braces,
2003 for valid Unicode code points. */
2004
2005 case 'x':
2006 base = 16;
2007 string++;
2008 if (*string == '{')
2009 {
2010 brace = TRUE;
2011 string++;
2012 dcount = 6;
2013 }
2014 else dcount = 2;
2015 for (; dcount > 0; dcount--)
2016 {
2017 if (!isxdigit(*string)) break;
2018 if (*string >= '0' && *string <= '9')
2019 c = c *16 + *string++ - '0';
2020 else
2021 c = c * 16 + (*string++ | 0x20) - 'a' + 10;
2022 }
2023 *value = c;
2024 string--; /* Point to last digit */
2025 break;
2026
2027 case 'a': *value = '\a'; break;
2028 case 'b': *value = '\b'; break;
2029 #ifndef EBCDIC
2030 case 'e': *value = '\033'; break;
2031 #else
2032 case 'e': *value = '\047'; break;
2033 #endif
2034 case 'f': *value = '\f'; break;
2035 case 'n': *value = STDOUT_NL_CODE; break;
2036 case 'r': *value = '\r'; break;
2037 case 't': *value = '\t'; break;
2038 case 'v': *value = '\v'; break;
2039
2040 default: *value = *string; break;
2041 }
2042
2043 if (brace)
2044 {
2045 c = string[1];
2046 if (c != '}')
2047 {
2048 rc = DDE_ERROR;
2049 if (!callout)
2050 {
2051 if ((base == 8 && c >= '0' && c <= '7') ||
2052 (base == 16 && isxdigit(c)))
2053 {
2054 fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2055 "too many %s digits\n", (int)(string - begin),
2056 (base == 8)? "octal" : "hex");
2057 }
2058 else
2059 {
2060 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
2061 (int)(string - begin), "missing closing brace");
2062 }
2063 }
2064 }
2065 else string++;
2066 }
2067
2068 /* Check maximum code point values, but take note of STDOUT_NL_CODE. */
2069
2070 if (rc == DDE_CHAR && *value != STDOUT_NL_CODE)
2071 {
2072 uint32_t max = utf? 0x0010ffffu : 0xffu;
2073 if (*value > max)
2074 {
2075 if (!callout)
2076 fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2077 "code point greater than 0x%x is invalid\n", (int)(string - begin), max);
2078 rc = DDE_ERROR;
2079 }
2080 }
2081
2082 *last = string;
2083 return rc;
2084 }
2085
2086
2087
2088 /*************************************************
2089 * Check output text for errors *
2090 *************************************************/
2091
2092 /* Called early, to get errors before doing anything for -O text; also called
2093 from callouts to check before outputting.
2094
2095 Arguments:
2096 string an --output text string
2097 callout TRUE if in a callout (stops printing errors)
2098
2099 Returns: TRUE if OK, FALSE on error
2100 */
2101
2102 static BOOL
syntax_check_output_text(PCRE2_SPTR string,BOOL callout)2103 syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
2104 {
2105 uint32_t value;
2106 PCRE2_SPTR begin = string;
2107
2108 for (; *string != 0; string++)
2109 {
2110 if (*string == '$' &&
2111 decode_dollar_escape(begin, string, callout, &value, &string) == DDE_ERROR)
2112 return FALSE;
2113 }
2114
2115 return TRUE;
2116 }
2117
2118
2119 /*************************************************
2120 * Display output text *
2121 *************************************************/
2122
2123 /* Display the output text, which is assumed to have already been syntax
2124 checked. Output may contain escape sequences started by the dollar sign.
2125
2126 Arguments:
2127 string: the output text
2128 callout: TRUE for the builtin callout, FALSE for --output
2129 subject the start of the subject
2130 ovector: capture offsets
2131 capture_top: number of captures
2132
2133 Returns: TRUE if something was output, other than newline
2134 FALSE if nothing was output, or newline was last output
2135 */
2136
2137 static BOOL
display_output_text(PCRE2_SPTR string,BOOL callout,PCRE2_SPTR subject,PCRE2_SIZE * ovector,PCRE2_SIZE capture_top)2138 display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject,
2139 PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
2140 {
2141 uint32_t value;
2142 BOOL printed = FALSE;
2143 PCRE2_SPTR begin = string;
2144
2145 for (; *string != 0; string++)
2146 {
2147 if (*string == '$')
2148 {
2149 switch(decode_dollar_escape(begin, string, callout, &value, &string))
2150 {
2151 case DDE_CHAR:
2152 if (value == STDOUT_NL_CODE)
2153 {
2154 fprintf(stdout, STDOUT_NL);
2155 printed = FALSE;
2156 continue;
2157 }
2158 break; /* Will print value */
2159
2160 case DDE_CAPTURE:
2161 if (value < capture_top)
2162 {
2163 PCRE2_SIZE capturesize;
2164 value *= 2;
2165 capturesize = ovector[value + 1] - ovector[value];
2166 if (capturesize > 0)
2167 {
2168 print_match(subject + ovector[value], capturesize);
2169 printed = TRUE;
2170 }
2171 }
2172 continue;
2173
2174 default: /* Should not occur */
2175 break;
2176 }
2177 }
2178
2179 else value = *string; /* Not a $ escape */
2180
2181 if (utf && value <= 127) fprintf(stdout, "%c", *string); else
2182 {
2183 int i;
2184 int n = ord2utf8(value);
2185 for (i = 0; i < n; i++) fputc(utf8_buffer[i], stdout);
2186 }
2187
2188 printed = TRUE;
2189 }
2190
2191 return printed;
2192 }
2193
2194
2195 #ifdef SUPPORT_PCRE2GREP_CALLOUT
2196
2197 /*************************************************
2198 * Parse and execute callout scripts *
2199 *************************************************/
2200
2201 /* If SUPPORT_PCRE2GREP_CALLOUT_FORK is defined, this function parses a callout
2202 string block and executes the program specified by the string. The string is a
2203 list of substrings separated by pipe characters. The first substring represents
2204 the executable name, and the following substrings specify the arguments:
2205
2206 program_name|param1|param2|...
2207
2208 Any substring (including the program name) can contain escape sequences
2209 started by the dollar character. The escape sequences are substituted as
2210 follows:
2211
2212 $<digits> or ${<digits>} is replaced by the captured substring of the given
2213 decimal number, which must be greater than zero. If the number is greater
2214 than the number of capturing substrings, or if the capture is unset, the
2215 replacement is empty.
2216
2217 Any other character is substituted by itself. E.g: $$ is replaced by a single
2218 dollar or $| replaced by a pipe character.
2219
2220 Alternatively, if string starts with pipe, the remainder is taken as an output
2221 string, same as --output. This is the only form that is supported if
2222 SUPPORT_PCRE2GREP_FORK is not defined. In this case, --om-separator is used to
2223 separate each callout, defaulting to newline.
2224
2225 Example:
2226
2227 echo -e "abcde\n12345" | pcre2grep \
2228 '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
2229
2230 Output:
2231
2232 Arg1: [a] [bcd] [d] Arg2: |a| ()
2233 abcde
2234 Arg1: [1] [234] [4] Arg2: |1| ()
2235 12345
2236
2237 Arguments:
2238 blockptr the callout block
2239
2240 Returns: currently it always returns with 0
2241 */
2242
2243 static int
pcre2grep_callout(pcre2_callout_block * calloutptr,void * unused)2244 pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused)
2245 {
2246 PCRE2_SIZE length = calloutptr->callout_string_length;
2247 PCRE2_SPTR string = calloutptr->callout_string;
2248 PCRE2_SPTR subject = calloutptr->subject;
2249 PCRE2_SIZE *ovector = calloutptr->offset_vector;
2250 PCRE2_SIZE capture_top = calloutptr->capture_top;
2251
2252 #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
2253 PCRE2_SIZE argsvectorlen = 2;
2254 PCRE2_SIZE argslen = 1;
2255 char *args;
2256 char *argsptr;
2257 char **argsvector;
2258 char **argsvectorptr;
2259 #ifndef WIN32
2260 pid_t pid;
2261 #endif
2262 int result = 0;
2263 #endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2264
2265 (void)unused; /* Avoid compiler warning */
2266
2267 /* Only callouts with strings are supported. */
2268
2269 if (string == NULL || length == 0) return 0;
2270
2271 /* If there's no command, output the remainder directly. */
2272
2273 if (*string == '|')
2274 {
2275 string++;
2276 if (!syntax_check_output_text(string, TRUE)) return 0;
2277 (void)display_output_text(string, TRUE, subject, ovector, capture_top);
2278 return 0;
2279 }
2280
2281 #ifndef SUPPORT_PCRE2GREP_CALLOUT_FORK
2282 return 0;
2283 #else
2284
2285 /* Checking syntax and compute the number of string fragments. Callout strings
2286 are silently ignored in the event of a syntax error. */
2287
2288 while (length > 0)
2289 {
2290 if (*string == '|')
2291 {
2292 argsvectorlen++;
2293 if (argsvectorlen > 10000) return 0; /* Too many args */
2294 }
2295
2296 else if (*string == '$')
2297 {
2298 uint32_t value;
2299 PCRE2_SPTR begin = string;
2300
2301 switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2302 {
2303 case DDE_CAPTURE:
2304 if (value < capture_top)
2305 {
2306 value *= 2;
2307 argslen += ovector[value + 1] - ovector[value];
2308 }
2309 argslen--; /* Negate the effect of argslen++ below. */
2310 break;
2311
2312 case DDE_CHAR:
2313 if (value == STDOUT_NL_CODE) argslen += STDOUT_NL_LEN - 1;
2314 else if (utf && value > 127) argslen += ord2utf8(value) - 1;
2315 break;
2316
2317 default: /* Should not occur */
2318 case DDE_ERROR:
2319 return 0;
2320 }
2321
2322 length -= (string - begin);
2323 }
2324
2325 string++;
2326 length--;
2327 argslen++;
2328 }
2329
2330 /* Get memory for the argument vector and its strings. */
2331
2332 args = (char*)malloc(argslen);
2333 if (args == NULL) return 0;
2334
2335 argsvector = (char**)malloc(argsvectorlen * sizeof(char*));
2336 if (argsvector == NULL)
2337 {
2338 free(args);
2339 return 0;
2340 }
2341
2342 /* Now reprocess the string and set up the arguments. */
2343
2344 argsptr = args;
2345 argsvectorptr = argsvector;
2346 *argsvectorptr++ = argsptr;
2347
2348 length = calloutptr->callout_string_length;
2349 string = calloutptr->callout_string;
2350
2351 while (length > 0)
2352 {
2353 if (*string == '|')
2354 {
2355 *argsptr++ = '\0';
2356 *argsvectorptr++ = argsptr;
2357 }
2358
2359 else if (*string == '$')
2360 {
2361 uint32_t value;
2362 PCRE2_SPTR begin = string;
2363
2364 switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2365 {
2366 case DDE_CAPTURE:
2367 if (value < capture_top)
2368 {
2369 PCRE2_SIZE capturesize;
2370 value *= 2;
2371 capturesize = ovector[value + 1] - ovector[value];
2372 memcpy(argsptr, subject + ovector[value], capturesize);
2373 argsptr += capturesize;
2374 }
2375 break;
2376
2377 case DDE_CHAR:
2378 if (value == STDOUT_NL_CODE)
2379 {
2380 memcpy(argsptr, STDOUT_NL, STDOUT_NL_LEN);
2381 argsptr += STDOUT_NL_LEN;
2382 }
2383 else if (utf && value > 127)
2384 {
2385 int n = ord2utf8(value);
2386 memcpy(argsptr, utf8_buffer, n);
2387 argsptr += n;
2388 }
2389 else
2390 {
2391 *argsptr++ = value;
2392 }
2393 break;
2394
2395 default: /* Even though this should not occur, the string having */
2396 case DDE_ERROR: /* been checked above, we need to include the free() */
2397 free(args); /* calls so that source checkers do not complain. */
2398 free(argsvector);
2399 return 0;
2400 }
2401
2402 length -= (string - begin);
2403 }
2404
2405 else *argsptr++ = *string;
2406
2407 /* Advance along the string */
2408
2409 string++;
2410 length--;
2411 }
2412
2413 *argsptr++ = '\0';
2414 *argsvectorptr = NULL;
2415
2416 /* Running an external command is system-dependent. Handle Windows and VMS as
2417 necessary, otherwise assume fork(). */
2418
2419 #ifdef WIN32
2420 result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector);
2421
2422 #elif defined __VMS
2423 {
2424 char cmdbuf[500];
2425 short i = 0;
2426 int flags = CLI$M_NOCLISYM|CLI$M_NOLOGNAM|CLI$M_NOKEYPAD, status, retstat;
2427 $DESCRIPTOR(cmd, cmdbuf);
2428
2429 cmdbuf[0] = 0;
2430 while (argsvector[i])
2431 {
2432 strcat(cmdbuf, argsvector[i]);
2433 strcat(cmdbuf, " ");
2434 i++;
2435 }
2436 cmd.dsc$w_length = strlen(cmdbuf) - 1;
2437 status = lib$spawn(&cmd, 0,0, &flags, 0,0, &retstat);
2438 if (!(status & 1)) result = 0;
2439 else result = retstat & 1 ? 0 : 1;
2440 }
2441
2442 #else /* Neither Windows nor VMS */
2443 pid = fork();
2444 if (pid == 0)
2445 {
2446 (void)execv(argsvector[0], argsvector);
2447 /* Control gets here if there is an error, e.g. a non-existent program */
2448 exit(1);
2449 }
2450 else if (pid > 0)
2451 (void)waitpid(pid, &result, 0);
2452 #endif /* End Windows/VMS/other handling */
2453
2454 free(args);
2455 free(argsvector);
2456
2457 /* Currently negative return values are not supported, only zero (match
2458 continues) or non-zero (match fails). */
2459
2460 return result != 0;
2461 #endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2462 }
2463 #endif /* SUPPORT_PCRE2GREP_CALLOUT */
2464
2465
2466
2467 /*************************************************
2468 * Read a portion of the file into buffer *
2469 *************************************************/
2470
2471 static int
fill_buffer(void * handle,int frtype,char * buffer,int length,BOOL input_line_buffered)2472 fill_buffer(void *handle, int frtype, char *buffer, int length,
2473 BOOL input_line_buffered)
2474 {
2475 (void)frtype; /* Avoid warning when not used */
2476
2477 #ifdef SUPPORT_LIBZ
2478 if (frtype == FR_LIBZ)
2479 return gzread((gzFile)handle, buffer, length);
2480 else
2481 #endif
2482
2483 #ifdef SUPPORT_LIBBZ2
2484 if (frtype == FR_LIBBZ2)
2485 return BZ2_bzread((BZFILE *)handle, buffer, length);
2486 else
2487 #endif
2488
2489 return (input_line_buffered ?
2490 read_one_line(buffer, length, (FILE *)handle) :
2491 fread(buffer, 1, length, (FILE *)handle));
2492 }
2493
2494
2495
2496 /*************************************************
2497 * Grep an individual file *
2498 *************************************************/
2499
2500 /* This is called from grep_or_recurse() below. It uses a buffer that is three
2501 times the value of bufthird. The matching point is never allowed to stray into
2502 the top third of the buffer, thus keeping more of the file available for
2503 context printing or for multiline scanning. For large files, the pointer will
2504 be in the middle third most of the time, so the bottom third is available for
2505 "before" context printing.
2506
2507 Arguments:
2508 handle the fopened FILE stream for a normal file
2509 the gzFile pointer when reading is via libz
2510 the BZFILE pointer when reading is via libbz2
2511 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
2512 filename the file name or NULL (for errors)
2513 printname the file name if it is to be printed for each match
2514 or NULL if the file name is not to be printed
2515 it cannot be NULL if filenames[_nomatch]_only is set
2516
2517 Returns: 0 if there was at least one match
2518 1 otherwise (no matches)
2519 2 if an overlong line is encountered
2520 3 if there is a read error on a .bz2 file
2521 */
2522
2523 static int
pcre2grep(void * handle,int frtype,const char * filename,const char * printname)2524 pcre2grep(void *handle, int frtype, const char *filename, const char *printname)
2525 {
2526 int rc = 1;
2527 int filepos = 0;
2528 unsigned long int linenumber = 1;
2529 unsigned long int lastmatchnumber = 0;
2530 unsigned long int count = 0;
2531 long int count_matched_lines = 0;
2532 char *lastmatchrestart = main_buffer;
2533 char *ptr = main_buffer;
2534 char *endptr;
2535 PCRE2_SIZE bufflength;
2536 BOOL binary = FALSE;
2537 BOOL endhyphenpending = FALSE;
2538 BOOL lines_printed = FALSE;
2539 BOOL input_line_buffered = line_buffered;
2540 FILE *in = NULL; /* Ensure initialized */
2541
2542 /* Do the first read into the start of the buffer and set up the pointer to end
2543 of what we have. In the case of libz, a non-zipped .gz file will be read as a
2544 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
2545 fail. */
2546
2547 if (frtype != FR_LIBZ && frtype != FR_LIBBZ2)
2548 {
2549 in = (FILE *)handle;
2550 if (is_file_tty(in)) input_line_buffered = TRUE;
2551 }
2552 else input_line_buffered = FALSE;
2553
2554 bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
2555 input_line_buffered);
2556
2557 #ifdef SUPPORT_LIBBZ2
2558 if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2; /* Gotcha: bufflength is PCRE2_SIZE */
2559 #endif
2560
2561 endptr = main_buffer + bufflength;
2562
2563 /* Unless binary-files=text, see if we have a binary file. This uses the same
2564 rule as GNU grep, namely, a search for a binary zero byte near the start of the
2565 file. However, when the newline convention is binary zero, we can't do this. */
2566
2567 if (binary_files != BIN_TEXT)
2568 {
2569 if (endlinetype != PCRE2_NEWLINE_NUL)
2570 binary = memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength)
2571 != NULL;
2572 if (binary && binary_files == BIN_NOMATCH) return 1;
2573 }
2574
2575 /* Loop while the current pointer is not at the end of the file. For large
2576 files, endptr will be at the end of the buffer when we are in the middle of the
2577 file, but ptr will never get there, because as soon as it gets over 2/3 of the
2578 way, the buffer is shifted left and re-filled. */
2579
2580 while (ptr < endptr)
2581 {
2582 int endlinelength;
2583 int mrc = 0;
2584 unsigned int options = 0;
2585 BOOL match;
2586 BOOL line_matched = FALSE;
2587 char *t = ptr;
2588 PCRE2_SIZE length, linelength;
2589 PCRE2_SIZE startoffset = 0;
2590
2591 /* If the -m option set a limit for the number of matched or non-matched
2592 lines, check it here. A limit of zero means that no matching is ever done.
2593 For stdin from a file, set the file position. */
2594
2595 if (count_limit >= 0 && count_matched_lines >= count_limit)
2596 {
2597 if (frtype == FR_PLAIN && filename == stdin_name && !is_file_tty(handle))
2598 (void)fseek(handle, (long int)filepos, SEEK_SET);
2599 rc = (count_limit == 0)? 1 : 0;
2600 break;
2601 }
2602
2603 /* At this point, ptr is at the start of a line. We need to find the length
2604 of the subject string to pass to pcre2_match(). In multiline mode, it is the
2605 length remainder of the data in the buffer. Otherwise, it is the length of
2606 the next line, excluding the terminating newline. After matching, we always
2607 advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
2608 option is used for compiling, so that any match is constrained to be in the
2609 first line. */
2610
2611 t = end_of_line(t, endptr, &endlinelength);
2612 linelength = t - ptr - endlinelength;
2613 length = multiline? (PCRE2_SIZE)(endptr - ptr) : linelength;
2614
2615 /* Check to see if the line we are looking at extends right to the very end
2616 of the buffer without a line terminator. This means the line is too long to
2617 handle at the current buffer size. Until the buffer reaches its maximum size,
2618 try doubling it and reading more data. */
2619
2620 if (endlinelength == 0 && t == main_buffer + bufsize)
2621 {
2622 if (bufthird < max_bufthird)
2623 {
2624 char *new_buffer;
2625 int new_bufthird = 2*bufthird;
2626
2627 if (new_bufthird > max_bufthird) new_bufthird = max_bufthird;
2628 new_buffer = (char *)malloc(3*new_bufthird);
2629
2630 if (new_buffer == NULL)
2631 {
2632 fprintf(stderr,
2633 "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2634 "pcre2grep: not enough memory to increase the buffer size to %d\n",
2635 linenumber,
2636 (filename == NULL)? "" : " of file ",
2637 (filename == NULL)? "" : filename,
2638 new_bufthird);
2639 return 2;
2640 }
2641
2642 /* Copy the data and adjust pointers to the new buffer location. */
2643
2644 memcpy(new_buffer, main_buffer, bufsize);
2645 bufthird = new_bufthird;
2646 bufsize = 3*bufthird;
2647 ptr = new_buffer + (ptr - main_buffer);
2648 lastmatchrestart = new_buffer + (lastmatchrestart - main_buffer);
2649 free(main_buffer);
2650 main_buffer = new_buffer;
2651
2652 /* Read more data into the buffer and then try to find the line ending
2653 again. */
2654
2655 bufflength += fill_buffer(handle, frtype, main_buffer + bufflength,
2656 bufsize - bufflength, input_line_buffered);
2657 endptr = main_buffer + bufflength;
2658 continue;
2659 }
2660 else
2661 {
2662 fprintf(stderr,
2663 "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2664 "pcre2grep: the maximum buffer size is %d\n"
2665 "pcre2grep: use the --max-buffer-size option to change it\n",
2666 linenumber,
2667 (filename == NULL)? "" : " of file ",
2668 (filename == NULL)? "" : filename,
2669 bufthird);
2670 return 2;
2671 }
2672 }
2673
2674 /* Extra processing for Jeffrey Friedl's debugging. */
2675
2676 #ifdef JFRIEDL_DEBUG
2677 if (jfriedl_XT || jfriedl_XR)
2678 {
2679 # include <sys/time.h>
2680 # include <time.h>
2681 struct timeval start_time, end_time;
2682 struct timezone dummy;
2683 int i;
2684
2685 if (jfriedl_XT)
2686 {
2687 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
2688 const char *orig = ptr;
2689 ptr = malloc(newlen + 1);
2690 if (!ptr) {
2691 printf("out of memory");
2692 pcre2grep_exit(2);
2693 }
2694 endptr = ptr;
2695 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
2696 for (i = 0; i < jfriedl_XT; i++) {
2697 strncpy(endptr, orig, length);
2698 endptr += length;
2699 }
2700 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
2701 length = newlen;
2702 }
2703
2704 if (gettimeofday(&start_time, &dummy) != 0)
2705 perror("bad gettimeofday");
2706
2707
2708 for (i = 0; i < jfriedl_XR; i++)
2709 match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
2710 PCRE2_NOTEMPTY, offsets, offset_size) >= 0);
2711
2712 if (gettimeofday(&end_time, &dummy) != 0)
2713 perror("bad gettimeofday");
2714
2715 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
2716 -
2717 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
2718
2719 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
2720 return 0;
2721 }
2722 #endif
2723
2724 /* We come back here after a match when only_matching_count is non-zero, in
2725 order to find any further matches in the same line. This applies to
2726 --only-matching, --file-offsets, and --line-offsets. */
2727
2728 ONLY_MATCHING_RESTART:
2729
2730 /* Run through all the patterns until one matches or there is an error other
2731 than NOMATCH. This code is in a subroutine so that it can be re-used for
2732 finding subsequent matches when colouring matched lines. After finding one
2733 match, set PCRE2_NOTEMPTY to disable any further matches of null strings in
2734 this line. */
2735
2736 match = match_patterns(ptr, length, options, startoffset, &mrc);
2737 options = PCRE2_NOTEMPTY;
2738
2739 /* If it's a match or a not-match (as required), do what's wanted. NOTE: Use
2740 only FWRITE_IGNORE() - which is just a packaged fwrite() that ignores its
2741 return code - to output data lines, so that binary zeroes are treated as just
2742 another data character. */
2743
2744 if (match != invert)
2745 {
2746 BOOL hyphenprinted = FALSE;
2747
2748 /* We've failed if we want a file that doesn't have any matches. */
2749
2750 if (filenames == FN_NOMATCH_ONLY) return 1;
2751
2752 /* Remember that this line matched (for counting matched lines) */
2753
2754 line_matched = TRUE;
2755
2756 /* If all we want is a yes/no answer, we can return immediately. */
2757
2758 if (quiet) return 0;
2759
2760 /* Just count if just counting is wanted. */
2761
2762 else if (count_only || show_total_count) count++;
2763
2764 /* When handling a binary file and binary-files==binary, the "binary"
2765 variable will be set true (it's false in all other cases). In this
2766 situation we just want to output the file name. No need to scan further. */
2767
2768 else if (binary)
2769 {
2770 fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename);
2771 return 0;
2772 }
2773
2774 /* Likewise, if all we want is a file name, there is no need to scan any
2775 more lines in the file. */
2776
2777 else if (filenames == FN_MATCH_ONLY)
2778 {
2779 fprintf(stdout, "%s" STDOUT_NL, printname);
2780 return 0;
2781 }
2782
2783 /* The --only-matching option prints just the substring that matched,
2784 and/or one or more captured portions of it, as long as these strings are
2785 not empty. The --file-offsets and --line-offsets options output offsets for
2786 the matching substring (all three set only_matching_count non-zero). None
2787 of these mutually exclusive options prints any context. Afterwards, adjust
2788 the start and then jump back to look for further matches in the same line.
2789 If we are in invert mode, however, nothing is printed and we do not restart
2790 - this could still be useful because the return code is set. */
2791
2792 else if (only_matching_count != 0)
2793 {
2794 if (!invert)
2795 {
2796 PCRE2_SIZE oldstartoffset;
2797
2798 if (printname != NULL) fprintf(stdout, "%s:", printname);
2799 if (number) fprintf(stdout, "%lu:", linenumber);
2800
2801 /* Handle --line-offsets */
2802
2803 if (line_offsets)
2804 fprintf(stdout, "%d,%d" STDOUT_NL, (int)(ptr + offsets[0] - ptr),
2805 (int)(offsets[1] - offsets[0]));
2806
2807 /* Handle --file-offsets */
2808
2809 else if (file_offsets)
2810 fprintf(stdout, "%d,%d" STDOUT_NL,
2811 (int)(filepos + ptr + offsets[0] - ptr),
2812 (int)(offsets[1] - offsets[0]));
2813
2814 /* Handle --output (which has already been syntax checked) */
2815
2816 else if (output_text != NULL)
2817 {
2818 if (display_output_text((PCRE2_SPTR)output_text, FALSE,
2819 (PCRE2_SPTR)ptr, offsets, mrc) || printname != NULL ||
2820 number)
2821 fprintf(stdout, STDOUT_NL);
2822 }
2823
2824 /* Handle --only-matching, which may occur many times */
2825
2826 else
2827 {
2828 BOOL printed = FALSE;
2829 omstr *om;
2830
2831 for (om = only_matching; om != NULL; om = om->next)
2832 {
2833 int n = om->groupnum;
2834 if (n == 0 || n < mrc)
2835 {
2836 int plen = offsets[2*n + 1] - offsets[2*n];
2837 if (plen > 0)
2838 {
2839 if (printed && om_separator != NULL)
2840 fprintf(stdout, "%s", om_separator);
2841 print_match(ptr + offsets[n*2], plen);
2842 printed = TRUE;
2843 }
2844 }
2845 }
2846
2847 if (printed || printname != NULL || number)
2848 fprintf(stdout, STDOUT_NL);
2849 }
2850
2851 /* Prepare to repeat to find the next match in the line. */
2852
2853 match = FALSE;
2854 if (line_buffered) fflush(stdout);
2855 rc = 0; /* Had some success */
2856
2857 /* If the pattern contained a lookbehind that included \K, it is
2858 possible that the end of the match might be at or before the actual
2859 starting offset we have just used. In this case, start one character
2860 further on. */
2861
2862 startoffset = offsets[1]; /* Restart after the match */
2863 oldstartoffset = pcre2_get_startchar(match_data);
2864 if (startoffset <= oldstartoffset)
2865 {
2866 if (startoffset >= length) goto END_ONE_MATCH; /* Were at end */
2867 startoffset = oldstartoffset + 1;
2868 if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
2869 }
2870
2871 /* If the current match ended past the end of the line (only possible
2872 in multiline mode), we must move on to the line in which it did end
2873 before searching for more matches. */
2874
2875 while (startoffset > linelength)
2876 {
2877 ptr += linelength + endlinelength;
2878 filepos += (int)(linelength + endlinelength);
2879 linenumber++;
2880 startoffset -= (int)(linelength + endlinelength);
2881 t = end_of_line(ptr, endptr, &endlinelength);
2882 linelength = t - ptr - endlinelength;
2883 length = (PCRE2_SIZE)(endptr - ptr);
2884 }
2885
2886 goto ONLY_MATCHING_RESTART;
2887 }
2888 }
2889
2890 /* This is the default case when none of the above options is set. We print
2891 the matching lines(s), possibly preceded and/or followed by other lines of
2892 context. */
2893
2894 else
2895 {
2896 lines_printed = TRUE;
2897
2898 /* See if there is a requirement to print some "after" lines from a
2899 previous match. We never print any overlaps. */
2900
2901 if (after_context > 0 && lastmatchnumber > 0)
2902 {
2903 int ellength;
2904 int linecount = 0;
2905 char *p = lastmatchrestart;
2906
2907 while (p < ptr && linecount < after_context)
2908 {
2909 p = end_of_line(p, ptr, &ellength);
2910 linecount++;
2911 }
2912
2913 /* It is important to advance lastmatchrestart during this printing so
2914 that it interacts correctly with any "before" printing below. Print
2915 each line's data using fwrite() in case there are binary zeroes. */
2916
2917 while (lastmatchrestart < p)
2918 {
2919 char *pp = lastmatchrestart;
2920 if (printname != NULL) fprintf(stdout, "%s-", printname);
2921 if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
2922 pp = end_of_line(pp, endptr, &ellength);
2923 FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
2924 lastmatchrestart = pp;
2925 }
2926 if (lastmatchrestart != ptr) hyphenpending = TRUE;
2927 }
2928
2929 /* If there were non-contiguous lines printed above, insert hyphens. */
2930
2931 if (hyphenpending)
2932 {
2933 fprintf(stdout, "--" STDOUT_NL);
2934 hyphenpending = FALSE;
2935 hyphenprinted = TRUE;
2936 }
2937
2938 /* See if there is a requirement to print some "before" lines for this
2939 match. Again, don't print overlaps. */
2940
2941 if (before_context > 0)
2942 {
2943 int linecount = 0;
2944 char *p = ptr;
2945
2946 while (p > main_buffer &&
2947 (lastmatchnumber == 0 || p > lastmatchrestart) &&
2948 linecount < before_context)
2949 {
2950 linecount++;
2951 p = previous_line(p, main_buffer);
2952 }
2953
2954 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
2955 fprintf(stdout, "--" STDOUT_NL);
2956
2957 while (p < ptr)
2958 {
2959 int ellength;
2960 char *pp = p;
2961 if (printname != NULL) fprintf(stdout, "%s-", printname);
2962 if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
2963 pp = end_of_line(pp, endptr, &ellength);
2964 FWRITE_IGNORE(p, 1, pp - p, stdout);
2965 p = pp;
2966 }
2967 }
2968
2969 /* Now print the matching line(s); ensure we set hyphenpending at the end
2970 of the file if any context lines are being output. */
2971
2972 if (after_context > 0 || before_context > 0)
2973 endhyphenpending = TRUE;
2974
2975 if (printname != NULL) fprintf(stdout, "%s:", printname);
2976 if (number) fprintf(stdout, "%lu:", linenumber);
2977
2978 /* This extra option, for Jeffrey Friedl's debugging requirements,
2979 replaces the matched string, or a specific captured string if it exists,
2980 with X. When this happens, colouring is ignored. */
2981
2982 #ifdef JFRIEDL_DEBUG
2983 if (S_arg >= 0 && S_arg < mrc)
2984 {
2985 int first = S_arg * 2;
2986 int last = first + 1;
2987 FWRITE_IGNORE(ptr, 1, offsets[first], stdout);
2988 fprintf(stdout, "X");
2989 FWRITE_IGNORE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
2990 }
2991 else
2992 #endif
2993
2994 /* In multiline mode, or if colouring, we have to split the line(s) up
2995 and search for further matches, but not of course if the line is a
2996 non-match. In multiline mode this is necessary in case there is another
2997 match that spans the end of the current line. When colouring we want to
2998 colour all matches. */
2999
3000 if ((multiline || do_colour) && !invert)
3001 {
3002 int plength;
3003 PCRE2_SIZE endprevious;
3004
3005 /* The use of \K may make the end offset earlier than the start. In
3006 this situation, swap them round. */
3007
3008 if (offsets[0] > offsets[1])
3009 {
3010 PCRE2_SIZE temp = offsets[0];
3011 offsets[0] = offsets[1];
3012 offsets[1] = temp;
3013 }
3014
3015 FWRITE_IGNORE(ptr, 1, offsets[0], stdout);
3016 print_match(ptr + offsets[0], offsets[1] - offsets[0]);
3017
3018 for (;;)
3019 {
3020 PCRE2_SIZE oldstartoffset = pcre2_get_startchar(match_data);
3021
3022 endprevious = offsets[1];
3023 startoffset = endprevious; /* Advance after previous match. */
3024
3025 /* If the pattern contained a lookbehind that included \K, it is
3026 possible that the end of the match might be at or before the actual
3027 starting offset we have just used. In this case, start one character
3028 further on. */
3029
3030 if (startoffset <= oldstartoffset)
3031 {
3032 startoffset = oldstartoffset + 1;
3033 if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
3034 }
3035
3036 /* If the current match ended past the end of the line (only possible
3037 in multiline mode), we must move on to the line in which it did end
3038 before searching for more matches. Because the PCRE2_FIRSTLINE option
3039 is set, the start of the match will always be before the first
3040 newline sequence. */
3041
3042 while (startoffset > linelength + endlinelength)
3043 {
3044 ptr += linelength + endlinelength;
3045 filepos += (int)(linelength + endlinelength);
3046 linenumber++;
3047 startoffset -= (int)(linelength + endlinelength);
3048 endprevious -= (int)(linelength + endlinelength);
3049 t = end_of_line(ptr, endptr, &endlinelength);
3050 linelength = t - ptr - endlinelength;
3051 length = (PCRE2_SIZE)(endptr - ptr);
3052 }
3053
3054 /* If startoffset is at the exact end of the line it means this
3055 complete line was the final part of the match, so there is nothing
3056 more to do. */
3057
3058 if (startoffset == linelength + endlinelength) break;
3059
3060 /* Otherwise, run a match from within the final line, and if found,
3061 loop for any that may follow. */
3062
3063 if (!match_patterns(ptr, length, options, startoffset, &mrc)) break;
3064
3065 /* The use of \K may make the end offset earlier than the start. In
3066 this situation, swap them round. */
3067
3068 if (offsets[0] > offsets[1])
3069 {
3070 PCRE2_SIZE temp = offsets[0];
3071 offsets[0] = offsets[1];
3072 offsets[1] = temp;
3073 }
3074
3075 FWRITE_IGNORE(ptr + endprevious, 1, offsets[0] - endprevious, stdout);
3076 print_match(ptr + offsets[0], offsets[1] - offsets[0]);
3077 }
3078
3079 /* In multiline mode, we may have already printed the complete line
3080 and its line-ending characters (if they matched the pattern), so there
3081 may be no more to print. */
3082
3083 plength = (int)((linelength + endlinelength) - endprevious);
3084 if (plength > 0) FWRITE_IGNORE(ptr + endprevious, 1, plength, stdout);
3085 }
3086
3087 /* Not colouring or multiline; no need to search for further matches. */
3088
3089 else FWRITE_IGNORE(ptr, 1, linelength + endlinelength, stdout);
3090 }
3091
3092 /* End of doing what has to be done for a match. If --line-buffered was
3093 given, flush the output. */
3094
3095 if (line_buffered) fflush(stdout);
3096 rc = 0; /* Had some success */
3097
3098 /* Remember where the last match happened for after_context. We remember
3099 where we are about to restart, and that line's number. */
3100
3101 lastmatchrestart = ptr + linelength + endlinelength;
3102 lastmatchnumber = linenumber + 1;
3103
3104 /* If a line was printed and we are now at the end of the file and the last
3105 line had no newline, output one. */
3106
3107 if (lines_printed && lastmatchrestart >= endptr && endlinelength == 0)
3108 write_final_newline();
3109 }
3110
3111 /* For a match in multiline inverted mode (which of course did not cause
3112 anything to be printed), we have to move on to the end of the match before
3113 proceeding. */
3114
3115 if (multiline && invert && match)
3116 {
3117 int ellength;
3118 char *endmatch = ptr + offsets[1];
3119 t = ptr;
3120 while (t < endmatch)
3121 {
3122 t = end_of_line(t, endptr, &ellength);
3123 if (t <= endmatch) linenumber++; else break;
3124 }
3125 endmatch = end_of_line(endmatch, endptr, &ellength);
3126 linelength = endmatch - ptr - ellength;
3127 }
3128
3129 /* Advance to after the newline and increment the line number. The file
3130 offset to the current line is maintained in filepos. */
3131
3132 END_ONE_MATCH:
3133 ptr += linelength + endlinelength;
3134 filepos += (int)(linelength + endlinelength);
3135 linenumber++;
3136
3137 /* If there was at least one match (or a non-match, as required) in the line,
3138 increment the count for the -m option. */
3139
3140 if (line_matched) count_matched_lines++;
3141
3142 /* If input is line buffered, and the buffer is not yet full, read another
3143 line and add it into the buffer. */
3144
3145 if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize)
3146 {
3147 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
3148 bufflength += add;
3149 endptr += add;
3150 }
3151
3152 /* If we haven't yet reached the end of the file (the buffer is full), and
3153 the current point is in the top 1/3 of the buffer, slide the buffer down by
3154 1/3 and refill it. Before we do this, if some unprinted "after" lines are
3155 about to be lost, print them. */
3156
3157 if (bufflength >= (PCRE2_SIZE)bufsize && ptr > main_buffer + 2*bufthird)
3158 {
3159 if (after_context > 0 &&
3160 lastmatchnumber > 0 &&
3161 lastmatchrestart < main_buffer + bufthird)
3162 {
3163 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3164 lastmatchnumber = 0; /* Indicates no after lines pending */
3165 }
3166
3167 /* Now do the shuffle */
3168
3169 (void)memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
3170 ptr -= bufthird;
3171
3172 bufflength = 2*bufthird + fill_buffer(handle, frtype,
3173 main_buffer + 2*bufthird, bufthird, input_line_buffered);
3174 endptr = main_buffer + bufflength;
3175
3176 /* Adjust any last match point */
3177
3178 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
3179 }
3180 } /* Loop through the whole file */
3181
3182 /* End of file; print final "after" lines if wanted; do_after_lines sets
3183 hyphenpending if it prints something. */
3184
3185 if (only_matching_count == 0 && !(count_only|show_total_count))
3186 {
3187 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3188 hyphenpending |= endhyphenpending;
3189 }
3190
3191 /* Print the file name if we are looking for those without matches and there
3192 were none. If we found a match, we won't have got this far. */
3193
3194 if (filenames == FN_NOMATCH_ONLY)
3195 {
3196 fprintf(stdout, "%s" STDOUT_NL, printname);
3197 return 0;
3198 }
3199
3200 /* Print the match count if wanted */
3201
3202 if (count_only && !quiet)
3203 {
3204 if (count > 0 || !omit_zero_count)
3205 {
3206 if (printname != NULL && filenames != FN_NONE)
3207 fprintf(stdout, "%s:", printname);
3208 fprintf(stdout, "%lu" STDOUT_NL, count);
3209 counts_printed++;
3210 }
3211 }
3212
3213 total_count += count; /* Can be set without count_only */
3214 return rc;
3215 }
3216
3217
3218
3219 /*************************************************
3220 * Grep a file or recurse into a directory *
3221 *************************************************/
3222
3223 /* Given a path name, if it's a directory, scan all the files if we are
3224 recursing; if it's a file, grep it.
3225
3226 Arguments:
3227 pathname the path to investigate
3228 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
3229 only_one_at_top TRUE if the path is the only one at toplevel
3230
3231 Returns: -1 the file/directory was skipped
3232 0 if there was at least one match
3233 1 if there were no matches
3234 2 there was some kind of error
3235
3236 However, file opening failures are suppressed if "silent" is set.
3237 */
3238
3239 static int
grep_or_recurse(char * pathname,BOOL dir_recurse,BOOL only_one_at_top)3240 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
3241 {
3242 int rc = 1;
3243 int frtype;
3244 void *handle;
3245 char *lastcomp;
3246 FILE *in = NULL; /* Ensure initialized */
3247
3248 #ifdef SUPPORT_LIBZ
3249 gzFile ingz = NULL;
3250 #endif
3251
3252 #ifdef SUPPORT_LIBBZ2
3253 BZFILE *inbz2 = NULL;
3254 #endif
3255
3256 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3257 int pathlen;
3258 #endif
3259
3260 #if defined NATIVE_ZOS
3261 int zos_type;
3262 FILE *zos_test_file;
3263 #endif
3264
3265 /* If the file name is "-" we scan stdin */
3266
3267 if (strcmp(pathname, "-") == 0)
3268 {
3269 return pcre2grep(stdin, FR_PLAIN, stdin_name,
3270 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
3271 stdin_name : NULL);
3272 }
3273
3274 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
3275 directories, whereas --include and --exclude apply to everything else. The test
3276 is against the final component of the path. */
3277
3278 lastcomp = strrchr(pathname, FILESEP);
3279 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
3280
3281 /* If the file is a directory, skip if not recursing or if explicitly excluded.
3282 Otherwise, scan the directory and recurse for each path within it. The scanning
3283 code is localized so it can be made system-specific. */
3284
3285
3286 /* For z/OS, determine the file type. */
3287
3288 #if defined NATIVE_ZOS
3289 zos_test_file = fopen(pathname,"rb");
3290
3291 if (zos_test_file == NULL)
3292 {
3293 if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n",
3294 pathname, strerror(errno));
3295 return -1;
3296 }
3297 zos_type = identifyzosfiletype (zos_test_file);
3298 fclose (zos_test_file);
3299
3300 /* Handle a PDS in separate code */
3301
3302 if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
3303 {
3304 return travelonpdsdir (pathname, only_one_at_top);
3305 }
3306
3307 /* Deal with regular files in the normal way below. These types are:
3308 zos_type == __ZOS_PDS_MEMBER
3309 zos_type == __ZOS_PS
3310 zos_type == __ZOS_VSAM_KSDS
3311 zos_type == __ZOS_VSAM_ESDS
3312 zos_type == __ZOS_VSAM_RRDS
3313 */
3314
3315 /* Handle a z/OS directory using common code. */
3316
3317 else if (zos_type == __ZOS_HFS)
3318 {
3319 #endif /* NATIVE_ZOS */
3320
3321
3322 /* Handle directories: common code for all OS */
3323
3324 if (isdirectory(pathname))
3325 {
3326 if (dee_action == dee_SKIP ||
3327 !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
3328 return -1;
3329
3330 if (dee_action == dee_RECURSE)
3331 {
3332 char childpath[FNBUFSIZ];
3333 char *nextfile;
3334 directory_type *dir = opendirectory(pathname);
3335
3336 if (dir == NULL)
3337 {
3338 if (!silent)
3339 fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname,
3340 strerror(errno));
3341 return 2;
3342 }
3343
3344 while ((nextfile = readdirectory(dir)) != NULL)
3345 {
3346 int frc;
3347 int fnlength = strlen(pathname) + strlen(nextfile) + 2;
3348 if (fnlength > FNBUFSIZ)
3349 {
3350 fprintf(stderr, "pcre2grep: recursive filename is too long\n");
3351 rc = 2;
3352 break;
3353 }
3354 sprintf(childpath, "%s%c%s", pathname, FILESEP, nextfile);
3355
3356 /* If the realpath() function is available, we can try to prevent endless
3357 recursion caused by a symlink pointing to a parent directory (GitHub
3358 issue #2 (old Bugzilla #2794). Original patch from Thomas Tempelmann.
3359 Modified to avoid using strlcat() because that isn't a standard C
3360 function, and also modified not to copy back the fully resolved path,
3361 because that affects the output from pcre2grep. */
3362
3363 #ifdef HAVE_REALPATH
3364 {
3365 char resolvedpath[PATH_MAX];
3366 BOOL isSame;
3367 size_t rlen;
3368 if (realpath(childpath, resolvedpath) == NULL)
3369 continue; /* This path is invalid - we can skip processing this */
3370 isSame = strcmp(pathname, resolvedpath) == 0;
3371 if (isSame) continue; /* We have a recursion */
3372 rlen = strlen(resolvedpath);
3373 if (rlen++ < sizeof(resolvedpath) - 3)
3374 {
3375 BOOL contained;
3376 strcat(resolvedpath, "/");
3377 contained = strncmp(pathname, resolvedpath, rlen) == 0;
3378 if (contained) continue; /* We have a recursion */
3379 }
3380 }
3381 #endif /* HAVE_REALPATH */
3382
3383 frc = grep_or_recurse(childpath, dir_recurse, FALSE);
3384 if (frc > 1) rc = frc;
3385 else if (frc == 0 && rc == 1) rc = 0;
3386 }
3387
3388 closedirectory(dir);
3389 return rc;
3390 }
3391 }
3392
3393 #ifdef WIN32
3394 if (iswild(pathname))
3395 {
3396 char buffer[1024];
3397 char *nextfile;
3398 char *name;
3399 directory_type *dir = opendirectory(pathname);
3400
3401 if (dir == NULL)
3402 return 0;
3403
3404 for (nextfile = name = pathname; *nextfile != 0; nextfile++)
3405 if (*nextfile == '/' || *nextfile == '\\')
3406 name = nextfile + 1;
3407 *name = 0;
3408
3409 while ((nextfile = readdirectory(dir)) != NULL)
3410 {
3411 int frc;
3412 sprintf(buffer, "%.512s%.128s", pathname, nextfile);
3413 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
3414 if (frc > 1) rc = frc;
3415 else if (frc == 0 && rc == 1) rc = 0;
3416 }
3417
3418 closedirectory(dir);
3419 return rc;
3420 }
3421 #endif
3422
3423 #if defined NATIVE_ZOS
3424 }
3425 #endif
3426
3427 /* If the file is not a directory, check for a regular file, and if it is not,
3428 skip it if that's been requested. Otherwise, check for an explicit inclusion or
3429 exclusion. */
3430
3431 else if (
3432 #if defined NATIVE_ZOS
3433 (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
3434 #else /* all other OS */
3435 (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
3436 #endif
3437 !test_incexc(lastcomp, include_patterns, exclude_patterns))
3438 return -1; /* File skipped */
3439
3440 /* Control reaches here if we have a regular file, or if we have a directory
3441 and recursion or skipping was not requested, or if we have anything else and
3442 skipping was not requested. The scan proceeds. If this is the first and only
3443 argument at top level, we don't show the file name, unless we are only showing
3444 the file name, or the filename was forced (-H). */
3445
3446 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3447 pathlen = (int)(strlen(pathname));
3448 #endif
3449
3450 /* Open using zlib if it is supported and the file name ends with .gz. */
3451
3452 #ifdef SUPPORT_LIBZ
3453 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
3454 {
3455 ingz = gzopen(pathname, "rb");
3456 if (ingz == NULL)
3457 {
3458 if (!silent)
3459 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3460 strerror(errno));
3461 return 2;
3462 }
3463 handle = (void *)ingz;
3464 frtype = FR_LIBZ;
3465 }
3466 else
3467 #endif
3468
3469 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
3470
3471 #ifdef SUPPORT_LIBBZ2
3472 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
3473 {
3474 inbz2 = BZ2_bzopen(pathname, "rb");
3475 handle = (void *)inbz2;
3476 frtype = FR_LIBBZ2;
3477 }
3478 else
3479 #endif
3480
3481 /* Otherwise use plain fopen(). The label is so that we can come back here if
3482 an attempt to read a .bz2 file indicates that it really is a plain file. */
3483
3484 #ifdef SUPPORT_LIBBZ2
3485 PLAIN_FILE:
3486 #endif
3487 {
3488 in = fopen(pathname, "rb");
3489 handle = (void *)in;
3490 frtype = FR_PLAIN;
3491 }
3492
3493 /* All the opening methods return errno when they fail. */
3494
3495 if (handle == NULL)
3496 {
3497 if (!silent)
3498 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3499 strerror(errno));
3500 return 2;
3501 }
3502
3503 /* Now grep the file */
3504
3505 rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
3506 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
3507
3508 /* Close in an appropriate manner. */
3509
3510 #ifdef SUPPORT_LIBZ
3511 if (frtype == FR_LIBZ)
3512 gzclose(ingz);
3513 else
3514 #endif
3515
3516 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
3517 read failed. If the error indicates that the file isn't in fact bzipped, try
3518 again as a normal file. */
3519
3520 #ifdef SUPPORT_LIBBZ2
3521 if (frtype == FR_LIBBZ2)
3522 {
3523 if (rc == 3)
3524 {
3525 int errnum;
3526 const char *err = BZ2_bzerror(inbz2, &errnum);
3527 if (errnum == BZ_DATA_ERROR_MAGIC)
3528 {
3529 BZ2_bzclose(inbz2);
3530 goto PLAIN_FILE;
3531 }
3532 else if (!silent)
3533 fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n",
3534 pathname, err);
3535 rc = 2; /* The normal "something went wrong" code */
3536 }
3537 BZ2_bzclose(inbz2);
3538 }
3539 else
3540 #endif
3541
3542 /* Normal file close */
3543
3544 fclose(in);
3545
3546 /* Pass back the yield from pcre2grep(). */
3547
3548 return rc;
3549 }
3550
3551
3552
3553 /*************************************************
3554 * Handle a no-data option *
3555 *************************************************/
3556
3557 static int
handle_option(int letter,int options)3558 handle_option(int letter, int options)
3559 {
3560 switch(letter)
3561 {
3562 case N_FOFFSETS: file_offsets = TRUE; break;
3563 case N_HELP: help(); pcre2grep_exit(0); break; /* Stops compiler warning */
3564 case N_LBUFFER: line_buffered = TRUE; break;
3565 case N_LOFFSETS: line_offsets = number = TRUE; break;
3566 case N_NOJIT: use_jit = FALSE; break;
3567 case N_ALLABSK: extra_options |= PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK; break;
3568 case 'a': binary_files = BIN_TEXT; break;
3569 case 'c': count_only = TRUE; break;
3570 case 'F': options |= PCRE2_LITERAL; break;
3571 case 'H': filenames = FN_FORCE; break;
3572 case 'I': binary_files = BIN_NOMATCH; break;
3573 case 'h': filenames = FN_NONE; break;
3574 case 'i': options |= PCRE2_CASELESS; break;
3575 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
3576 case 'L': filenames = FN_NOMATCH_ONLY; break;
3577 case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break;
3578 case 'n': number = TRUE; break;
3579
3580 case 'o':
3581 only_matching_last = add_number(0, only_matching_last);
3582 if (only_matching == NULL) only_matching = only_matching_last;
3583 break;
3584
3585 case 'q': quiet = TRUE; break;
3586 case 'r': dee_action = dee_RECURSE; break;
3587 case 's': silent = TRUE; break;
3588 case 't': show_total_count = TRUE; break;
3589 case 'u': options |= PCRE2_UTF; utf = TRUE; break;
3590 case 'U': options |= PCRE2_UTF|PCRE2_MATCH_INVALID_UTF; utf = TRUE; break;
3591 case 'v': invert = TRUE; break;
3592 case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
3593 case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
3594
3595 case 'V':
3596 {
3597 unsigned char buffer[128];
3598 (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
3599 fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer);
3600 }
3601 pcre2grep_exit(0);
3602 break;
3603
3604 default:
3605 fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
3606 pcre2grep_exit(usage(2));
3607 }
3608
3609 return options;
3610 }
3611
3612
3613
3614 /*************************************************
3615 * Construct printed ordinal *
3616 *************************************************/
3617
3618 /* This turns a number into "1st", "3rd", etc. */
3619
3620 static char *
ordin(int n)3621 ordin(int n)
3622 {
3623 static char buffer[14];
3624 char *p = buffer;
3625 sprintf(p, "%d", n);
3626 while (*p != 0) p++;
3627 n %= 100;
3628 if (n >= 11 && n <= 13) n = 0;
3629 switch (n%10)
3630 {
3631 case 1: strcpy(p, "st"); break;
3632 case 2: strcpy(p, "nd"); break;
3633 case 3: strcpy(p, "rd"); break;
3634 default: strcpy(p, "th"); break;
3635 }
3636 return buffer;
3637 }
3638
3639
3640
3641 /*************************************************
3642 * Compile a single pattern *
3643 *************************************************/
3644
3645 /* Do nothing if the pattern has already been compiled. This is the case for
3646 include/exclude patterns read from a file.
3647
3648 When the -F option has been used, each "pattern" may be a list of strings,
3649 separated by line breaks. They will be matched literally. We split such a
3650 string and compile the first substring, inserting an additional block into the
3651 pattern chain.
3652
3653 Arguments:
3654 p points to the pattern block
3655 options the PCRE options
3656 fromfile TRUE if the pattern was read from a file
3657 fromtext file name or identifying text (e.g. "include")
3658 count 0 if this is the only command line pattern, or
3659 number of the command line pattern, or
3660 linenumber for a pattern from a file
3661
3662 Returns: TRUE on success, FALSE after an error
3663 */
3664
3665 static BOOL
compile_pattern(patstr * p,int options,int fromfile,const char * fromtext,int count)3666 compile_pattern(patstr *p, int options, int fromfile, const char *fromtext,
3667 int count)
3668 {
3669 char *ps;
3670 int errcode;
3671 PCRE2_SIZE patlen, erroffset;
3672 PCRE2_UCHAR errmessbuffer[ERRBUFSIZ];
3673
3674 if (p->compiled != NULL) return TRUE;
3675 ps = p->string;
3676 patlen = p->length;
3677
3678 if ((options & PCRE2_LITERAL) != 0)
3679 {
3680 int ellength;
3681 char *eop = ps + patlen;
3682 char *pe = end_of_line(ps, eop, &ellength);
3683
3684 if (ellength != 0)
3685 {
3686 patlen = pe - ps - ellength;
3687 if (add_pattern(pe, p->length-patlen-ellength, p) == NULL) return FALSE;
3688 }
3689 }
3690
3691 p->compiled = pcre2_compile((PCRE2_SPTR)ps, patlen, options, &errcode,
3692 &erroffset, compile_context);
3693
3694 /* Handle successful compile. Try JIT-compiling if supported and enabled. We
3695 ignore any JIT compiler errors, relying falling back to interpreting if
3696 anything goes wrong with JIT. */
3697
3698 if (p->compiled != NULL)
3699 {
3700 #ifdef SUPPORT_PCRE2GREP_JIT
3701 if (use_jit) (void)pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE);
3702 #endif
3703 return TRUE;
3704 }
3705
3706 /* Handle compile errors */
3707
3708 if (erroffset > patlen) erroffset = patlen;
3709 pcre2_get_error_message(errcode, errmessbuffer, sizeof(errmessbuffer));
3710
3711 if (fromfile)
3712 {
3713 fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
3714 "at offset %d: %s\n", count, fromtext, (int)erroffset, errmessbuffer);
3715 }
3716 else
3717 {
3718 if (count == 0)
3719 fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
3720 fromtext, (int)erroffset, errmessbuffer);
3721 else
3722 fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
3723 ordin(count), fromtext, (int)erroffset, errmessbuffer);
3724 }
3725
3726 return FALSE;
3727 }
3728
3729
3730
3731 /*************************************************
3732 * Read and compile a file of patterns *
3733 *************************************************/
3734
3735 /* This is used for --filelist, --include-from, and --exclude-from.
3736
3737 Arguments:
3738 name the name of the file; "-" is stdin
3739 patptr pointer to the pattern chain anchor
3740 patlastptr pointer to the last pattern pointer
3741
3742 Returns: TRUE if all went well
3743 */
3744
3745 static BOOL
read_pattern_file(char * name,patstr ** patptr,patstr ** patlastptr)3746 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr)
3747 {
3748 int linenumber = 0;
3749 PCRE2_SIZE patlen;
3750 FILE *f;
3751 const char *filename;
3752 char buffer[MAXPATLEN+20];
3753
3754 if (strcmp(name, "-") == 0)
3755 {
3756 f = stdin;
3757 filename = stdin_name;
3758 }
3759 else
3760 {
3761 f = fopen(name, "r");
3762 if (f == NULL)
3763 {
3764 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno));
3765 return FALSE;
3766 }
3767 filename = name;
3768 }
3769
3770 while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0)
3771 {
3772 while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--;
3773 linenumber++;
3774 if (patlen == 0) continue; /* Skip blank lines */
3775
3776 /* Note: this call to add_pattern() puts a pointer to the local variable
3777 "buffer" into the pattern chain. However, that pointer is used only when
3778 compiling the pattern, which happens immediately below, so we flatten it
3779 afterwards, as a precaution against any later code trying to use it. */
3780
3781 *patlastptr = add_pattern(buffer, patlen, *patlastptr);
3782 if (*patlastptr == NULL)
3783 {
3784 if (f != stdin) fclose(f);
3785 return FALSE;
3786 }
3787 if (*patptr == NULL) *patptr = *patlastptr;
3788
3789 /* This loop is needed because compiling a "pattern" when -F is set may add
3790 on additional literal patterns if the original contains a newline. In the
3791 common case, it never will, because read_one_line() stops at a newline.
3792 However, the -N option can be used to give pcre2grep a different newline
3793 setting. */
3794
3795 for(;;)
3796 {
3797 if (!compile_pattern(*patlastptr, pcre2_options, TRUE, filename,
3798 linenumber))
3799 {
3800 if (f != stdin) fclose(f);
3801 return FALSE;
3802 }
3803 (*patlastptr)->string = NULL; /* Insurance */
3804 if ((*patlastptr)->next == NULL) break;
3805 *patlastptr = (*patlastptr)->next;
3806 }
3807 }
3808
3809 if (f != stdin) fclose(f);
3810 return TRUE;
3811 }
3812
3813
3814
3815 /*************************************************
3816 * Main program *
3817 *************************************************/
3818
3819 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
3820
3821 int
main(int argc,char ** argv)3822 main(int argc, char **argv)
3823 {
3824 int i, j;
3825 int rc = 1;
3826 BOOL only_one_at_top;
3827 patstr *cp;
3828 fnstr *fn;
3829 omstr *om;
3830 const char *locale_from = "--locale";
3831
3832 #ifdef SUPPORT_PCRE2GREP_JIT
3833 pcre2_jit_stack *jit_stack = NULL;
3834 #endif
3835
3836 /* In Windows, stdout is set up as a text stream, which means that \n is
3837 converted to \r\n. This causes output lines that are copied from the input to
3838 change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
3839 that stdout is a binary stream. Note that this means all other output to stdout
3840 must use STDOUT_NL to terminate lines. */
3841
3842 #ifdef WIN32
3843 _setmode(_fileno(stdout), _O_BINARY);
3844 #endif
3845
3846 /* Process the options */
3847
3848 for (i = 1; i < argc; i++)
3849 {
3850 option_item *op = NULL;
3851 char *option_data = (char *)""; /* default to keep compiler happy */
3852 BOOL longop;
3853 BOOL longopwasequals = FALSE;
3854
3855 if (argv[i][0] != '-') break;
3856
3857 /* If we hit an argument that is just "-", it may be a reference to STDIN,
3858 but only if we have previously had -e or -f to define the patterns. */
3859
3860 if (argv[i][1] == 0)
3861 {
3862 if (pattern_files != NULL || patterns != NULL) break;
3863 else pcre2grep_exit(usage(2));
3864 }
3865
3866 /* Handle a long name option, or -- to terminate the options */
3867
3868 if (argv[i][1] == '-')
3869 {
3870 char *arg = argv[i] + 2;
3871 char *argequals = strchr(arg, '=');
3872
3873 if (*arg == 0) /* -- terminates options */
3874 {
3875 i++;
3876 break; /* out of the options-handling loop */
3877 }
3878
3879 longop = TRUE;
3880
3881 /* Some long options have data that follows after =, for example file=name.
3882 Some options have variations in the long name spelling: specifically, we
3883 allow "regexp" because GNU grep allows it, though I personally go along
3884 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
3885 These options are entered in the table as "regex(p)". Options can be in
3886 both these categories. */
3887
3888 for (op = optionlist; op->one_char != 0; op++)
3889 {
3890 char *opbra = strchr(op->long_name, '(');
3891 char *equals = strchr(op->long_name, '=');
3892
3893 /* Handle options with only one spelling of the name */
3894
3895 if (opbra == NULL) /* Does not contain '(' */
3896 {
3897 if (equals == NULL) /* Not thing=data case */
3898 {
3899 if (strcmp(arg, op->long_name) == 0) break;
3900 }
3901 else /* Special case xxx=data */
3902 {
3903 int oplen = (int)(equals - op->long_name);
3904 int arglen = (argequals == NULL)?
3905 (int)strlen(arg) : (int)(argequals - arg);
3906 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
3907 {
3908 option_data = arg + arglen;
3909 if (*option_data == '=')
3910 {
3911 option_data++;
3912 longopwasequals = TRUE;
3913 }
3914 break;
3915 }
3916 }
3917 }
3918
3919 /* Handle options with an alternate spelling of the name */
3920
3921 else
3922 {
3923 char buff1[24];
3924 char buff2[24];
3925 int ret;
3926
3927 int baselen = (int)(opbra - op->long_name);
3928 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
3929 int arglen = (argequals == NULL || equals == NULL)?
3930 (int)strlen(arg) : (int)(argequals - arg);
3931
3932 if ((ret = snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name),
3933 ret < 0 || ret > (int)sizeof(buff1)) ||
3934 (ret = snprintf(buff2, sizeof(buff2), "%s%.*s", buff1,
3935 fulllen - baselen - 2, opbra + 1),
3936 ret < 0 || ret > (int)sizeof(buff2)))
3937 {
3938 fprintf(stderr, "pcre2grep: Buffer overflow when parsing %s option\n",
3939 op->long_name);
3940 pcre2grep_exit(2);
3941 }
3942
3943 if (strncmp(arg, buff1, arglen) == 0 ||
3944 strncmp(arg, buff2, arglen) == 0)
3945 {
3946 if (equals != NULL && argequals != NULL)
3947 {
3948 option_data = argequals;
3949 if (*option_data == '=')
3950 {
3951 option_data++;
3952 longopwasequals = TRUE;
3953 }
3954 }
3955 break;
3956 }
3957 }
3958 }
3959
3960 if (op->one_char == 0)
3961 {
3962 fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]);
3963 pcre2grep_exit(usage(2));
3964 }
3965 }
3966
3967 /* Jeffrey Friedl's debugging harness uses these additional options which
3968 are not in the right form for putting in the option table because they use
3969 only one hyphen, yet are more than one character long. By putting them
3970 separately here, they will not get displayed as part of the help() output,
3971 but I don't think Jeffrey will care about that. */
3972
3973 #ifdef JFRIEDL_DEBUG
3974 else if (strcmp(argv[i], "-pre") == 0) {
3975 jfriedl_prefix = argv[++i];
3976 continue;
3977 } else if (strcmp(argv[i], "-post") == 0) {
3978 jfriedl_postfix = argv[++i];
3979 continue;
3980 } else if (strcmp(argv[i], "-XT") == 0) {
3981 sscanf(argv[++i], "%d", &jfriedl_XT);
3982 continue;
3983 } else if (strcmp(argv[i], "-XR") == 0) {
3984 sscanf(argv[++i], "%d", &jfriedl_XR);
3985 continue;
3986 }
3987 #endif
3988
3989
3990 /* One-char options; many that have no data may be in a single argument; we
3991 continue till we hit the last one or one that needs data. */
3992
3993 else
3994 {
3995 char *s = argv[i] + 1;
3996 longop = FALSE;
3997
3998 while (*s != 0)
3999 {
4000 for (op = optionlist; op->one_char != 0; op++)
4001 {
4002 if (*s == op->one_char) break;
4003 }
4004 if (op->one_char == 0)
4005 {
4006 fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n",
4007 *s, argv[i]);
4008 pcre2grep_exit(usage(2));
4009 }
4010
4011 option_data = s+1;
4012
4013 /* Break out if this is the last character in the string; it's handled
4014 below like a single multi-char option. */
4015
4016 if (*option_data == 0) break;
4017
4018 /* Check for a single-character option that has data: OP_OP_NUMBER(S)
4019 are used for ones that either have a numerical number or defaults, i.e.
4020 the data is optional. If a digit follows, there is data; if not, carry on
4021 with other single-character options in the same string. */
4022
4023 if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
4024 {
4025 if (isdigit((unsigned char)s[1])) break;
4026 }
4027 else /* Check for an option with data */
4028 {
4029 if (op->type != OP_NODATA) break;
4030 }
4031
4032 /* Handle a single-character option with no data, then loop for the
4033 next character in the string. */
4034
4035 pcre2_options = handle_option(*s++, pcre2_options);
4036 }
4037 }
4038
4039 /* At this point we should have op pointing to a matched option. If the type
4040 is NO_DATA, it means that there is no data, and the option might set
4041 something in the PCRE options. */
4042
4043 if (op->type == OP_NODATA)
4044 {
4045 pcre2_options = handle_option(op->one_char, pcre2_options);
4046 continue;
4047 }
4048
4049 /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
4050 either has a value or defaults to something. It cannot have data in a
4051 separate item. At the moment, the only such options are "colo(u)r",
4052 "only-matching", and Jeffrey Friedl's special -S debugging option. */
4053
4054 if (*option_data == 0 &&
4055 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
4056 op->type == OP_OP_NUMBERS))
4057 {
4058 switch (op->one_char)
4059 {
4060 case N_COLOUR:
4061 colour_option = "auto";
4062 break;
4063
4064 case 'o':
4065 only_matching_last = add_number(0, only_matching_last);
4066 if (only_matching == NULL) only_matching = only_matching_last;
4067 break;
4068
4069 #ifdef JFRIEDL_DEBUG
4070 case 'S':
4071 S_arg = 0;
4072 break;
4073 #endif
4074 }
4075 continue;
4076 }
4077
4078 /* Otherwise, find the data string for the option. */
4079
4080 if (*option_data == 0)
4081 {
4082 if (i >= argc - 1 || longopwasequals)
4083 {
4084 fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]);
4085 pcre2grep_exit(usage(2));
4086 }
4087 option_data = argv[++i];
4088 }
4089
4090 /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
4091 added to a chain of numbers. */
4092
4093 if (op->type == OP_OP_NUMBERS)
4094 {
4095 unsigned long int n = decode_number(option_data, op, longop);
4096 omdatastr *omd = (omdatastr *)op->dataptr;
4097 *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
4098 if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
4099 }
4100
4101 /* If the option type is OP_PATLIST, it's the -e option, or one of the
4102 include/exclude options, which can be called multiple times to create lists
4103 of patterns. */
4104
4105 else if (op->type == OP_PATLIST)
4106 {
4107 patdatastr *pd = (patdatastr *)op->dataptr;
4108 *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data),
4109 *(pd->lastptr));
4110 if (*(pd->lastptr) == NULL) goto EXIT2;
4111 if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
4112 }
4113
4114 /* If the option type is OP_FILELIST, it's one of the options that names a
4115 file. */
4116
4117 else if (op->type == OP_FILELIST)
4118 {
4119 fndatastr *fd = (fndatastr *)op->dataptr;
4120 fn = (fnstr *)malloc(sizeof(fnstr));
4121 if (fn == NULL)
4122 {
4123 fprintf(stderr, "pcre2grep: malloc failed\n");
4124 goto EXIT2;
4125 }
4126 fn->next = NULL;
4127 fn->name = option_data;
4128 if (*(fd->anchor) == NULL)
4129 *(fd->anchor) = fn;
4130 else
4131 (*(fd->lastptr))->next = fn;
4132 *(fd->lastptr) = fn;
4133 }
4134
4135 /* Handle OP_BINARY_FILES */
4136
4137 else if (op->type == OP_BINFILES)
4138 {
4139 if (strcmp(option_data, "binary") == 0)
4140 binary_files = BIN_BINARY;
4141 else if (strcmp(option_data, "without-match") == 0)
4142 binary_files = BIN_NOMATCH;
4143 else if (strcmp(option_data, "text") == 0)
4144 binary_files = BIN_TEXT;
4145 else
4146 {
4147 fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n",
4148 option_data);
4149 pcre2grep_exit(usage(2));
4150 }
4151 }
4152
4153 /* Otherwise, deal with a single string or numeric data value. */
4154
4155 else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
4156 op->type != OP_OP_NUMBER && op->type != OP_SIZE)
4157 {
4158 *((char **)op->dataptr) = option_data;
4159 }
4160 else
4161 {
4162 unsigned long int n = decode_number(option_data, op, longop);
4163 if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
4164 else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n;
4165 else *((int *)op->dataptr) = n;
4166 }
4167 }
4168
4169 /* Options have been decoded. If -C was used, its value is used as a default
4170 for -A and -B. */
4171
4172 if (both_context > 0)
4173 {
4174 if (after_context == 0) after_context = both_context;
4175 if (before_context == 0) before_context = both_context;
4176 }
4177
4178 /* Only one of --only-matching, --output, --file-offsets, or --line-offsets is
4179 permitted. They display, each in their own way, only the data that has matched.
4180 */
4181
4182 only_matching_count = (only_matching != NULL) + (output_text != NULL) +
4183 file_offsets + line_offsets;
4184
4185 if (only_matching_count > 1)
4186 {
4187 fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, "
4188 "--file-offsets and/or --line-offsets\n");
4189 pcre2grep_exit(usage(2));
4190 }
4191
4192
4193 /* Check that there is a big enough ovector for all -o settings. */
4194
4195 for (om = only_matching; om != NULL; om = om->next)
4196 {
4197 int n = om->groupnum;
4198 if (n > (int)capture_max)
4199 {
4200 fprintf(stderr, "pcre2grep: Requested group %d cannot be captured.\n", n);
4201 fprintf(stderr, "pcre2grep: Use --om-capture to increase the size of the capture vector.\n");
4202 goto EXIT2;
4203 }
4204 }
4205
4206 /* Check the text supplied to --output for errors. */
4207
4208 if (output_text != NULL &&
4209 !syntax_check_output_text((PCRE2_SPTR)output_text, FALSE))
4210 goto EXIT2;
4211
4212 /* Set up default compile and match contexts and a match data block. */
4213
4214 offset_size = capture_max + 1;
4215 compile_context = pcre2_compile_context_create(NULL);
4216 match_context = pcre2_match_context_create(NULL);
4217 match_data = pcre2_match_data_create(offset_size, NULL);
4218 offsets = pcre2_get_ovector_pointer(match_data);
4219
4220 /* If string (script) callouts are supported, set up the callout processing
4221 function. */
4222
4223 #ifdef SUPPORT_PCRE2GREP_CALLOUT
4224 pcre2_set_callout(match_context, pcre2grep_callout, NULL);
4225 #endif
4226
4227 /* Put limits into the match data block. */
4228
4229 if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit);
4230 if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
4231 if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
4232
4233 /* If a locale has not been provided as an option, see if the LC_CTYPE or
4234 LC_ALL environment variable is set, and if so, use it. */
4235
4236 if (locale == NULL)
4237 {
4238 locale = getenv("LC_ALL");
4239 locale_from = "LC_ALL";
4240 }
4241
4242 if (locale == NULL)
4243 {
4244 locale = getenv("LC_CTYPE");
4245 locale_from = "LC_CTYPE";
4246 }
4247
4248 /* If a locale is set, use it to generate the tables the PCRE needs. Passing
4249 NULL to pcre2_maketables() means that malloc() is used to get the memory. */
4250
4251 if (locale != NULL)
4252 {
4253 if (setlocale(LC_CTYPE, locale) == NULL)
4254 {
4255 fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n",
4256 locale, locale_from);
4257 goto EXIT2;
4258 }
4259 character_tables = pcre2_maketables(NULL);
4260 pcre2_set_character_tables(compile_context, character_tables);
4261 }
4262
4263 /* Sort out colouring */
4264
4265 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
4266 {
4267 if (strcmp(colour_option, "always") == 0)
4268 #ifdef WIN32
4269 do_ansi = !is_stdout_tty(),
4270 #endif
4271 do_colour = TRUE;
4272 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
4273 else
4274 {
4275 fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n",
4276 colour_option);
4277 goto EXIT2;
4278 }
4279 if (do_colour)
4280 {
4281 char *cs = getenv("PCRE2GREP_COLOUR");
4282 if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
4283 if (cs == NULL) cs = getenv("PCREGREP_COLOUR");
4284 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
4285 if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS"));
4286 if (cs == NULL) cs = getenv("GREP_COLOR");
4287 if (cs != NULL)
4288 {
4289 if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs;
4290 }
4291 #ifdef WIN32
4292 init_colour_output();
4293 #endif
4294 }
4295 }
4296
4297 /* Sort out a newline setting. */
4298
4299 if (newline_arg != NULL)
4300 {
4301 for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
4302 endlinetype++)
4303 {
4304 if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
4305 }
4306 if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
4307 pcre2_set_newline(compile_context, endlinetype);
4308 else
4309 {
4310 fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
4311 newline_arg);
4312 goto EXIT2;
4313 }
4314 }
4315
4316 /* Find default newline convention */
4317
4318 else
4319 {
4320 (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
4321 }
4322
4323 /* Interpret the text values for -d and -D */
4324
4325 if (dee_option != NULL)
4326 {
4327 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
4328 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
4329 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
4330 else
4331 {
4332 fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option);
4333 goto EXIT2;
4334 }
4335 }
4336
4337 if (DEE_option != NULL)
4338 {
4339 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
4340 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
4341 else
4342 {
4343 fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option);
4344 goto EXIT2;
4345 }
4346 }
4347
4348 /* Set the extra options */
4349
4350 (void)pcre2_set_compile_extra_options(compile_context, extra_options);
4351
4352 /* Check the values for Jeffrey Friedl's debugging options. */
4353
4354 #ifdef JFRIEDL_DEBUG
4355 if (S_arg > 9)
4356 {
4357 fprintf(stderr, "pcre2grep: bad value for -S option\n");
4358 return 2;
4359 }
4360 if (jfriedl_XT != 0 || jfriedl_XR != 0)
4361 {
4362 if (jfriedl_XT == 0) jfriedl_XT = 1;
4363 if (jfriedl_XR == 0) jfriedl_XR = 1;
4364 }
4365 #endif
4366
4367 /* If use_jit is set, check whether JIT is available. If not, do not try
4368 to use JIT. */
4369
4370 if (use_jit)
4371 {
4372 uint32_t answer;
4373 (void)pcre2_config(PCRE2_CONFIG_JIT, &answer);
4374 if (!answer) use_jit = FALSE;
4375 }
4376
4377 /* Get memory for the main buffer. */
4378
4379 if (bufthird <= 0)
4380 {
4381 fprintf(stderr, "pcre2grep: --buffer-size must be greater than zero\n");
4382 goto EXIT2;
4383 }
4384
4385 bufsize = 3*bufthird;
4386 main_buffer = (char *)malloc(bufsize);
4387
4388 if (main_buffer == NULL)
4389 {
4390 fprintf(stderr, "pcre2grep: malloc failed\n");
4391 goto EXIT2;
4392 }
4393
4394 /* If no patterns were provided by -e, and there are no files provided by -f,
4395 the first argument is the one and only pattern, and it must exist. */
4396
4397 if (patterns == NULL && pattern_files == NULL)
4398 {
4399 if (i >= argc) return usage(2);
4400 patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]),
4401 NULL);
4402 i++;
4403 if (patterns == NULL) goto EXIT2;
4404 }
4405
4406 /* Compile the patterns that were provided on the command line, either by
4407 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
4408 after all the command-line options are read so that we know which PCRE options
4409 to use. When -F is used, compile_pattern() may add another block into the
4410 chain, so we must not access the next pointer till after the compile. */
4411
4412 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
4413 {
4414 if (!compile_pattern(cp, pcre2_options, FALSE, "command-line",
4415 (j == 1 && patterns->next == NULL)? 0 : j))
4416 goto EXIT2;
4417 }
4418
4419 /* Read and compile the regular expressions that are provided in files. */
4420
4421 for (fn = pattern_files; fn != NULL; fn = fn->next)
4422 {
4423 if (!read_pattern_file(fn->name, &patterns, &patterns_last)) goto EXIT2;
4424 }
4425
4426 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
4427
4428 #ifdef SUPPORT_PCRE2GREP_JIT
4429 if (use_jit)
4430 {
4431 jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL);
4432 if (jit_stack != NULL )
4433 pcre2_jit_stack_assign(match_context, NULL, jit_stack);
4434 }
4435 #endif
4436
4437 /* -F, -w, and -x do not apply to include or exclude patterns, so we must
4438 adjust the options. */
4439
4440 pcre2_options &= ~PCRE2_LITERAL;
4441 (void)pcre2_set_compile_extra_options(compile_context, 0);
4442
4443 /* If there are include or exclude patterns read from the command line, compile
4444 them. */
4445
4446 for (j = 0; j < 4; j++)
4447 {
4448 int k;
4449 for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
4450 {
4451 if (!compile_pattern(cp, pcre2_options, FALSE, incexname[j],
4452 (k == 1 && cp->next == NULL)? 0 : k))
4453 goto EXIT2;
4454 }
4455 }
4456
4457 /* Read and compile include/exclude patterns from files. */
4458
4459 for (fn = include_from; fn != NULL; fn = fn->next)
4460 {
4461 if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last))
4462 goto EXIT2;
4463 }
4464
4465 for (fn = exclude_from; fn != NULL; fn = fn->next)
4466 {
4467 if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last))
4468 goto EXIT2;
4469 }
4470
4471 /* If there are no files that contain lists of files to search, and there are
4472 no file arguments, search stdin, and then exit. */
4473
4474 if (file_lists == NULL && i >= argc)
4475 {
4476 rc = pcre2grep(stdin, FR_PLAIN, stdin_name,
4477 (filenames > FN_DEFAULT)? stdin_name : NULL);
4478 goto EXIT;
4479 }
4480
4481 /* If any files that contains a list of files to search have been specified,
4482 read them line by line and search the given files. */
4483
4484 for (fn = file_lists; fn != NULL; fn = fn->next)
4485 {
4486 char buffer[FNBUFSIZ];
4487 FILE *fl;
4488 if (strcmp(fn->name, "-") == 0) fl = stdin; else
4489 {
4490 fl = fopen(fn->name, "rb");
4491 if (fl == NULL)
4492 {
4493 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name,
4494 strerror(errno));
4495 goto EXIT2;
4496 }
4497 }
4498 while (fgets(buffer, sizeof(buffer), fl) != NULL)
4499 {
4500 int frc;
4501 char *end = buffer + (int)strlen(buffer);
4502 while (end > buffer && isspace(end[-1])) end--;
4503 *end = 0;
4504 if (*buffer != 0)
4505 {
4506 frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
4507 if (frc > 1) rc = frc;
4508 else if (frc == 0 && rc == 1) rc = 0;
4509 }
4510 }
4511 if (fl != stdin) fclose(fl);
4512 }
4513
4514 /* After handling file-list, work through remaining arguments. Pass in the fact
4515 that there is only one argument at top level - this suppresses the file name if
4516 the argument is not a directory and filenames are not otherwise forced. */
4517
4518 only_one_at_top = i == argc - 1 && file_lists == NULL;
4519
4520 for (; i < argc; i++)
4521 {
4522 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
4523 only_one_at_top);
4524 if (frc > 1) rc = frc;
4525 else if (frc == 0 && rc == 1) rc = 0;
4526 }
4527
4528 #ifdef SUPPORT_PCRE2GREP_CALLOUT
4529 /* If separating builtin echo callouts by implicit newline, add one more for
4530 the final item. */
4531
4532 if (om_separator != NULL && strcmp(om_separator, STDOUT_NL) == 0)
4533 fprintf(stdout, STDOUT_NL);
4534 #endif
4535
4536 /* Show the total number of matches if requested, but not if only one file's
4537 count was printed. */
4538
4539 if (show_total_count && counts_printed != 1 && filenames != FN_NOMATCH_ONLY)
4540 {
4541 if (counts_printed != 0 && filenames >= FN_DEFAULT)
4542 fprintf(stdout, "TOTAL:");
4543 fprintf(stdout, "%lu" STDOUT_NL, total_count);
4544 }
4545
4546 EXIT:
4547 #ifdef SUPPORT_PCRE2GREP_JIT
4548 pcre2_jit_free_unused_memory(NULL);
4549 if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack);
4550 #endif
4551
4552 free(main_buffer);
4553 if (character_tables != NULL) pcre2_maketables_free(NULL, character_tables);
4554
4555 pcre2_compile_context_free(compile_context);
4556 pcre2_match_context_free(match_context);
4557 pcre2_match_data_free(match_data);
4558
4559 free_pattern_chain(patterns);
4560 free_pattern_chain(include_patterns);
4561 free_pattern_chain(include_dir_patterns);
4562 free_pattern_chain(exclude_patterns);
4563 free_pattern_chain(exclude_dir_patterns);
4564
4565 free_file_chain(exclude_from);
4566 free_file_chain(include_from);
4567 free_file_chain(pattern_files);
4568 free_file_chain(file_lists);
4569
4570 while (only_matching != NULL)
4571 {
4572 omstr *this = only_matching;
4573 only_matching = this->next;
4574 free(this);
4575 }
4576
4577 pcre2grep_exit(rc);
4578
4579 EXIT2:
4580 rc = 2;
4581 goto EXIT;
4582 }
4583
4584 /* End of pcre2grep */
4585