1 /*************************************************
2 *               pcre2grep program                *
3 *************************************************/
4 
5 /* This is a grep program that uses the 8-bit PCRE regular expression library
6 via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows,
7 and native z/OS systems it can recurse into directories, and in z/OS it can
8 handle PDS files.
9 
10 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
11 additional header is required. That header is not included in the main PCRE2
12 distribution because other apparatus is needed to compile pcre2grep for z/OS.
13 The header can be found in the special z/OS distribution, which is available
14 from www.zaconsultants.net or from www.cbttape.org.
15 
16            Copyright (c) 1997-2020 University of Cambridge
17 
18 -----------------------------------------------------------------------------
19 Redistribution and use in source and binary forms, with or without
20 modification, are permitted provided that the following conditions are met:
21 
22     * Redistributions of source code must retain the above copyright notice,
23       this list of conditions and the following disclaimer.
24 
25     * Redistributions in binary form must reproduce the above copyright
26       notice, this list of conditions and the following disclaimer in the
27       documentation and/or other materials provided with the distribution.
28 
29     * Neither the name of the University of Cambridge nor the names of its
30       contributors may be used to endorse or promote products derived from
31       this software without specific prior written permission.
32 
33 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43 POSSIBILITY OF SUCH DAMAGE.
44 -----------------------------------------------------------------------------
45 */
46 
47 #ifdef HAVE_CONFIG_H
48 #include "config.h"
49 #endif
50 
51 #include <ctype.h>
52 #include <locale.h>
53 #include <stdio.h>
54 #include <string.h>
55 #include <stdlib.h>
56 #include <errno.h>
57 
58 #include <sys/types.h>
59 #include <sys/stat.h>
60 
61 #if (defined _WIN32 || (defined HAVE_WINDOWS_H && HAVE_WINDOWS_H)) \
62   && !defined WIN32 && !defined(__CYGWIN__)
63 #define WIN32
64 #endif
65 
66 /* Some CMake's define it still */
67 #if defined(__CYGWIN__) && defined(WIN32)
68 #undef WIN32
69 #endif
70 
71 #ifdef __VMS
72 #include clidef
73 #include descrip
74 #include lib$routines
75 #endif
76 
77 #ifdef WIN32
78 #include <io.h>                /* For _setmode() */
79 #include <fcntl.h>             /* For _O_BINARY */
80 #endif
81 
82 #if defined(SUPPORT_PCRE2GREP_CALLOUT) && defined(SUPPORT_PCRE2GREP_CALLOUT_FORK)
83 #ifdef WIN32
84 #include <process.h>
85 #else
86 #include <sys/wait.h>
87 #endif
88 #endif
89 
90 #ifdef HAVE_UNISTD_H
91 #include <unistd.h>
92 #endif
93 
94 #ifdef SUPPORT_LIBZ
95 #include <zlib.h>
96 #endif
97 
98 #ifdef SUPPORT_LIBBZ2
99 #include <bzlib.h>
100 #endif
101 
102 #define PCRE2_CODE_UNIT_WIDTH 8
103 #include "pcre2.h"
104 
105 /* Older versions of MSVC lack snprintf(). This define allows for
106 warning/error-free compilation and testing with MSVC compilers back to at least
107 MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
108 
109 #if defined(_MSC_VER) && (_MSC_VER < 1900)
110 #define snprintf _snprintf
111 #endif
112 
113 /* old VC and older compilers don't support %td or %zu, and even some that claim to
114 be C99 don't support it (hence DISABLE_PERCENT_ZT). */
115 
116 #if defined(DISABLE_PERCENT_ZT) || (defined(_MSC_VER) && (_MSC_VER < 1800)) || \
117   (!defined(_MSC_VER) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L))
118 #ifdef _WIN64
119 #define SIZ_FORM "llu"
120 #else
121 #define SIZ_FORM "lu"
122 #endif
123 #else
124 #define SIZ_FORM "zu"
125 #endif
126 
127 #define FALSE 0
128 #define TRUE 1
129 
130 typedef int BOOL;
131 
132 #define DEFAULT_CAPTURE_MAX 50
133 
134 #if BUFSIZ > 8192
135 #define MAXPATLEN BUFSIZ
136 #else
137 #define MAXPATLEN 8192
138 #endif
139 
140 #define FNBUFSIZ 2048
141 #define ERRBUFSIZ 256
142 
143 /* Values for the "filenames" variable, which specifies options for file name
144 output. The order is important; it is assumed that a file name is wanted for
145 all values greater than FN_DEFAULT. */
146 
147 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
148 
149 /* File reading styles */
150 
151 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
152 
153 /* Actions for the -d and -D options */
154 
155 enum { dee_READ, dee_SKIP, dee_RECURSE };
156 enum { DEE_READ, DEE_SKIP };
157 
158 /* Actions for special processing options (flag bits) */
159 
160 #define PO_WORD_MATCH     0x0001
161 #define PO_LINE_MATCH     0x0002
162 #define PO_FIXED_STRINGS  0x0004
163 
164 /* Binary file options */
165 
166 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
167 
168 /* Return values from decode_dollar_escape() */
169 
170 enum { DDE_ERROR, DDE_CAPTURE, DDE_CHAR };
171 
172 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
173 environments), a warning is issued if the value of fwrite() is ignored.
174 Unfortunately, casting to (void) does not suppress the warning. To get round
175 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
176 apply to fprintf(). */
177 
178 #define FWRITE_IGNORE(a,b,c,d) if (fwrite(a,b,c,d)) {}
179 
180 /* Under Windows, we have to set stdout to be binary, so that it does not
181 convert \r\n at the ends of output lines to \r\r\n. However, that means that
182 any messages written to stdout must have \r\n as their line terminator. This is
183 handled by using STDOUT_NL as the newline string. We also use a normal double
184 quote for the example, as single quotes aren't usually available. */
185 
186 #ifdef WIN32
187 #define STDOUT_NL     "\r\n"
188 #define STDOUT_NL_LEN  2
189 #define QUOT          "\""
190 #else
191 #define STDOUT_NL      "\n"
192 #define STDOUT_NL_LEN  1
193 #define QUOT           "'"
194 #endif
195 
196 /* This code is returned from decode_dollar_escape() when $n is encountered,
197 and used to mean "output STDOUT_NL". It is, of course, not a valid Unicode code
198 point. */
199 
200 #define STDOUT_NL_CODE 0x7fffffffu
201 
202 
203 
204 /*************************************************
205 *               Global variables                 *
206 *************************************************/
207 
208 /* Jeffrey Friedl has some debugging requirements that are not part of the
209 regular code. */
210 
211 #ifdef JFRIEDL_DEBUG
212 static int S_arg = -1;
213 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
214 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
215 static const char *jfriedl_prefix = "";
216 static const char *jfriedl_postfix = "";
217 #endif
218 
219 static const char *colour_string = "1;31";
220 static const char *colour_option = NULL;
221 static const char *dee_option = NULL;
222 static const char *DEE_option = NULL;
223 static const char *locale = NULL;
224 static const char *newline_arg = NULL;
225 static const char *om_separator = NULL;
226 static const char *stdin_name = "(standard input)";
227 static const char *output_text = NULL;
228 
229 static char *main_buffer = NULL;
230 
231 static int after_context = 0;
232 static int before_context = 0;
233 static int binary_files = BIN_BINARY;
234 static int both_context = 0;
235 static int bufthird = PCRE2GREP_BUFSIZE;
236 static int max_bufthird = PCRE2GREP_MAX_BUFSIZE;
237 static int bufsize = 3*PCRE2GREP_BUFSIZE;
238 static int endlinetype;
239 
240 static int count_limit = -1;  /* Not long, so that it works with OP_NUMBER */
241 static unsigned long int counts_printed = 0;
242 static unsigned long int total_count = 0;
243 
244 #ifdef WIN32
245 static int dee_action = dee_SKIP;
246 #else
247 static int dee_action = dee_READ;
248 #endif
249 
250 static int DEE_action = DEE_READ;
251 static int error_count = 0;
252 static int filenames = FN_DEFAULT;
253 
254 #ifdef SUPPORT_PCRE2GREP_JIT
255 static BOOL use_jit = TRUE;
256 #else
257 static BOOL use_jit = FALSE;
258 #endif
259 
260 static const uint8_t *character_tables = NULL;
261 
262 static uint32_t pcre2_options = 0;
263 static uint32_t extra_options = 0;
264 static PCRE2_SIZE heap_limit = PCRE2_UNSET;
265 static uint32_t match_limit = 0;
266 static uint32_t depth_limit = 0;
267 
268 static pcre2_compile_context *compile_context;
269 static pcre2_match_context *match_context;
270 static pcre2_match_data *match_data;
271 static PCRE2_SIZE *offsets;
272 static uint32_t offset_size;
273 static uint32_t capture_max = DEFAULT_CAPTURE_MAX;
274 
275 static BOOL count_only = FALSE;
276 static BOOL do_colour = FALSE;
277 #ifdef WIN32
278 static BOOL do_ansi = FALSE;
279 #endif
280 static BOOL file_offsets = FALSE;
281 static BOOL hyphenpending = FALSE;
282 static BOOL invert = FALSE;
283 static BOOL line_buffered = FALSE;
284 static BOOL line_offsets = FALSE;
285 static BOOL multiline = FALSE;
286 static BOOL number = FALSE;
287 static BOOL omit_zero_count = FALSE;
288 static BOOL resource_error = FALSE;
289 static BOOL quiet = FALSE;
290 static BOOL show_total_count = FALSE;
291 static BOOL silent = FALSE;
292 static BOOL utf = FALSE;
293 
294 static uint8_t utf8_buffer[8];
295 
296 
297 /* Structure for list of --only-matching capturing numbers. */
298 
299 typedef struct omstr {
300   struct omstr *next;
301   int groupnum;
302 } omstr;
303 
304 static omstr *only_matching = NULL;
305 static omstr *only_matching_last = NULL;
306 static int only_matching_count;
307 
308 /* Structure for holding the two variables that describe a number chain. */
309 
310 typedef struct omdatastr {
311   omstr **anchor;
312   omstr **lastptr;
313 } omdatastr;
314 
315 static omdatastr only_matching_data = { &only_matching, &only_matching_last };
316 
317 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
318 
319 typedef struct fnstr {
320   struct fnstr *next;
321   char *name;
322 } fnstr;
323 
324 static fnstr *exclude_from = NULL;
325 static fnstr *exclude_from_last = NULL;
326 static fnstr *include_from = NULL;
327 static fnstr *include_from_last = NULL;
328 
329 static fnstr *file_lists = NULL;
330 static fnstr *file_lists_last = NULL;
331 static fnstr *pattern_files = NULL;
332 static fnstr *pattern_files_last = NULL;
333 
334 /* Structure for holding the two variables that describe a file name chain. */
335 
336 typedef struct fndatastr {
337   fnstr **anchor;
338   fnstr **lastptr;
339 } fndatastr;
340 
341 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
342 static fndatastr include_from_data = { &include_from, &include_from_last };
343 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
344 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
345 
346 /* Structure for pattern and its compiled form; used for matching patterns and
347 also for include/exclude patterns. */
348 
349 typedef struct patstr {
350   struct patstr *next;
351   char *string;
352   PCRE2_SIZE length;
353   pcre2_code *compiled;
354 } patstr;
355 
356 static patstr *patterns = NULL;
357 static patstr *patterns_last = NULL;
358 static patstr *include_patterns = NULL;
359 static patstr *include_patterns_last = NULL;
360 static patstr *exclude_patterns = NULL;
361 static patstr *exclude_patterns_last = NULL;
362 static patstr *include_dir_patterns = NULL;
363 static patstr *include_dir_patterns_last = NULL;
364 static patstr *exclude_dir_patterns = NULL;
365 static patstr *exclude_dir_patterns_last = NULL;
366 
367 /* Structure holding the two variables that describe a pattern chain. A pointer
368 to such structures is used for each appropriate option. */
369 
370 typedef struct patdatastr {
371   patstr **anchor;
372   patstr **lastptr;
373 } patdatastr;
374 
375 static patdatastr match_patdata = { &patterns, &patterns_last };
376 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
377 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
378 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
379 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
380 
381 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
382                                  &include_dir_patterns, &exclude_dir_patterns };
383 
384 static const char *incexname[4] = { "--include", "--exclude",
385                                     "--include-dir", "--exclude-dir" };
386 
387 /* Structure for options and list of them */
388 
389 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, OP_SIZE,
390        OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
391 
392 typedef struct option_item {
393   int type;
394   int one_char;
395   void *dataptr;
396   const char *long_name;
397   const char *help_text;
398 } option_item;
399 
400 /* Options without a single-letter equivalent get a negative value. This can be
401 used to identify them. */
402 
403 #define N_COLOUR       (-1)
404 #define N_EXCLUDE      (-2)
405 #define N_EXCLUDE_DIR  (-3)
406 #define N_HELP         (-4)
407 #define N_INCLUDE      (-5)
408 #define N_INCLUDE_DIR  (-6)
409 #define N_LABEL        (-7)
410 #define N_LOCALE       (-8)
411 #define N_NULL         (-9)
412 #define N_LOFFSETS     (-10)
413 #define N_FOFFSETS     (-11)
414 #define N_LBUFFER      (-12)
415 #define N_H_LIMIT      (-13)
416 #define N_M_LIMIT      (-14)
417 #define N_M_LIMIT_DEP  (-15)
418 #define N_BUFSIZE      (-16)
419 #define N_NOJIT        (-17)
420 #define N_FILE_LIST    (-18)
421 #define N_BINARY_FILES (-19)
422 #define N_EXCLUDE_FROM (-20)
423 #define N_INCLUDE_FROM (-21)
424 #define N_OM_SEPARATOR (-22)
425 #define N_MAX_BUFSIZE  (-23)
426 #define N_OM_CAPTURE   (-24)
427 #define N_ALLABSK      (-25)
428 
429 static option_item optionlist[] = {
430   { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
431   { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
432   { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
433   { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
434   { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
435   { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
436   { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer starting size" },
437   { OP_NUMBER,     N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number",  "set processing buffer maximum size" },
438   { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
439   { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
440   { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
441   { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
442   { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
443   { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
444   { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
445   { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
446   { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
447   { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
448   { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
449   { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
450   { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
451   { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
452   { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
453   { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
454   { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
455   { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
456   { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
457   { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
458   { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
459   { OP_SIZE,       N_H_LIMIT, &heap_limit,      "heap-limit=number",  "set PCRE2 heap limit option (kibibytes)" },
460   { OP_U32NUMBER,  N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE2 match limit option" },
461   { OP_U32NUMBER,  N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
462   { OP_U32NUMBER,  N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
463   { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
464   { OP_NUMBER,     'm',      &count_limit,      "max-count=number", "stop after <number> matched lines" },
465   { OP_STRING,     'N',      &newline_arg,      "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" },
466   { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
467 #ifdef SUPPORT_PCRE2GREP_JIT
468   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
469 #else
470   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcre2grep does not support JIT" },
471 #endif
472   { OP_STRING,     'O',      &output_text,       "output=text",   "show only this text (possibly expanded)" },
473   { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
474   { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
475   { OP_U32NUMBER,  N_OM_CAPTURE, &capture_max,  "om-capture=n",  "set capture count for --only-matching" },
476   { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
477   { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
478   { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
479   { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
480   { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
481   { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
482   { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
483   { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
484 #ifdef JFRIEDL_DEBUG
485   { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
486 #endif
487   { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
488   { OP_NODATA,    't',      NULL,              "total-count",   "print total count of matching lines" },
489   { OP_NODATA,    'u',      NULL,              "utf",           "use UTF mode" },
490   { OP_NODATA,    'U',      NULL,              "utf-allow-invalid", "use UTF mode, allow for invalid code units" },
491   { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
492   { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
493   { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
494   { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
495   { OP_NODATA,   N_ALLABSK, NULL,              "allow-lookaround-bsk", "allow \\K in lookarounds" },
496   { OP_NODATA,    0,        NULL,               NULL,            NULL }
497 };
498 
499 /* Table of names for newline types. Must be kept in step with the definitions
500 of PCRE2_NEWLINE_xx in pcre2.h. */
501 
502 static const char *newlines[] = {
503   "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
504 
505 /* UTF-8 tables  */
506 
507 const int utf8_table1[] =
508   { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
509 const int utf8_table1_size = sizeof(utf8_table1) / sizeof(int);
510 
511 const int utf8_table2[] = { 0,    0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
512 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
513 
514 const char utf8_table4[] = {
515   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
516   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
517   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
518   3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
519 
520 
521 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
522 /*************************************************
523 *    Emulated memmove() for systems without it   *
524 *************************************************/
525 
526 /* This function can make use of bcopy() if it is available. Otherwise do it by
527 steam, as there are some non-Unix environments that lack both memmove() and
528 bcopy(). */
529 
530 static void *
emulated_memmove(void * d,const void * s,size_t n)531 emulated_memmove(void *d, const void *s, size_t n)
532 {
533 #ifdef HAVE_BCOPY
534 bcopy(s, d, n);
535 return d;
536 #else
537 size_t i;
538 unsigned char *dest = (unsigned char *)d;
539 const unsigned char *src = (const unsigned char *)s;
540 if (dest > src)
541   {
542   dest += n;
543   src += n;
544   for (i = 0; i < n; ++i) *(--dest) = *(--src);
545   return (void *)dest;
546   }
547 else
548   {
549   for (i = 0; i < n; ++i) *dest++ = *src++;
550   return (void *)(dest - n);
551   }
552 #endif   /* not HAVE_BCOPY */
553 }
554 #undef memmove
555 #define memmove(d,s,n) emulated_memmove(d,s,n)
556 #endif   /* not VPCOMPAT && not HAVE_MEMMOVE */
557 
558 
559 
560 /*************************************************
561 *           Convert code point to UTF-8          *
562 *************************************************/
563 
564 /* A static buffer is used. Returns the number of bytes. */
565 
566 static int
ord2utf8(uint32_t value)567 ord2utf8(uint32_t value)
568 {
569 int i, j;
570 uint8_t *utf8bytes = utf8_buffer;
571 for (i = 0; i < utf8_table1_size; i++)
572   if (value <= (uint32_t)utf8_table1[i]) break;
573 utf8bytes += i;
574 for (j = i; j > 0; j--)
575   {
576   *utf8bytes-- = 0x80 | (value & 0x3f);
577   value >>= 6;
578   }
579 *utf8bytes = utf8_table2[i] | value;
580 return i + 1;
581 }
582 
583 
584 
585 /*************************************************
586 *         Case-independent string compare        *
587 *************************************************/
588 
589 static int
strcmpic(const char * str1,const char * str2)590 strcmpic(const char *str1, const char *str2)
591 {
592 unsigned int c1, c2;
593 while (*str1 != '\0' || *str2 != '\0')
594   {
595   c1 = tolower(*str1++);
596   c2 = tolower(*str2++);
597   if (c1 != c2) return ((c1 > c2) << 1) - 1;
598   }
599 return 0;
600 }
601 
602 
603 /*************************************************
604 *         Parse GREP_COLORS                      *
605 *************************************************/
606 
607 /* Extract ms or mt from GREP_COLORS.
608 
609 Argument:  the string, possibly NULL
610 Returns:   the value of ms or mt, or NULL if neither present
611 */
612 
613 static char *
parse_grep_colors(const char * gc)614 parse_grep_colors(const char *gc)
615 {
616 static char seq[16];
617 char *col;
618 uint32_t len;
619 if (gc == NULL) return NULL;
620 col = strstr(gc, "ms=");
621 if (col == NULL) col = strstr(gc, "mt=");
622 if (col == NULL) return NULL;
623 len = 0;
624 col += 3;
625 while (*col != ':' && *col != 0 && len < sizeof(seq)-1)
626   seq[len++] = *col++;
627 seq[len] = 0;
628 return seq;
629 }
630 
631 
632 /*************************************************
633 *         Exit from the program                  *
634 *************************************************/
635 
636 /* If there has been a resource error, give a suitable message.
637 
638 Argument:  the return code
639 Returns:   does not return
640 */
641 
642 static void
pcre2grep_exit(int rc)643 pcre2grep_exit(int rc)
644 {
645 /* VMS does exit codes differently: both exit(1) and exit(0) return with a
646 status of 1, which is not helpful. To help with this problem, define a symbol
647 (akin to an environment variable) called "PCRE2GREP_RC" and put the exit code
648 therein. */
649 
650 #ifdef __VMS
651   char val_buf[4];
652   $DESCRIPTOR(sym_nam, "PCRE2GREP_RC");
653   $DESCRIPTOR(sym_val, val_buf);
654   sprintf(val_buf, "%d", rc);
655   sym_val.dsc$w_length = strlen(val_buf);
656   lib$set_symbol(&sym_nam, &sym_val);
657 #endif
658 
659 if (resource_error)
660   {
661   fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource "
662     "limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
663     PCRE2_ERROR_DEPTHLIMIT, PCRE2_ERROR_HEAPLIMIT);
664   fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
665   }
666 exit(rc);
667 }
668 
669 
670 /*************************************************
671 *          Add item to chain of patterns         *
672 *************************************************/
673 
674 /* Used to add an item onto a chain, or just return an unconnected item if the
675 "after" argument is NULL.
676 
677 Arguments:
678   s          pattern string to add
679   patlen     length of pattern
680   after      if not NULL points to item to insert after
681 
682 Returns:     new pattern block or NULL on error
683 */
684 
685 static patstr *
add_pattern(char * s,PCRE2_SIZE patlen,patstr * after)686 add_pattern(char *s, PCRE2_SIZE patlen, patstr *after)
687 {
688 patstr *p = (patstr *)malloc(sizeof(patstr));
689 if (p == NULL)
690   {
691   fprintf(stderr, "pcre2grep: malloc failed\n");
692   pcre2grep_exit(2);
693   }
694 if (patlen > MAXPATLEN)
695   {
696   fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
697     MAXPATLEN);
698   free(p);
699   return NULL;
700   }
701 p->next = NULL;
702 p->string = s;
703 p->length = patlen;
704 p->compiled = NULL;
705 
706 if (after != NULL)
707   {
708   p->next = after->next;
709   after->next = p;
710   }
711 return p;
712 }
713 
714 
715 /*************************************************
716 *           Free chain of patterns               *
717 *************************************************/
718 
719 /* Used for several chains of patterns.
720 
721 Argument: pointer to start of chain
722 Returns:  nothing
723 */
724 
725 static void
free_pattern_chain(patstr * pc)726 free_pattern_chain(patstr *pc)
727 {
728 while (pc != NULL)
729   {
730   patstr *p = pc;
731   pc = p->next;
732   if (p->compiled != NULL) pcre2_code_free(p->compiled);
733   free(p);
734   }
735 }
736 
737 
738 /*************************************************
739 *           Free chain of file names             *
740 *************************************************/
741 
742 /*
743 Argument: pointer to start of chain
744 Returns:  nothing
745 */
746 
747 static void
free_file_chain(fnstr * fn)748 free_file_chain(fnstr *fn)
749 {
750 while (fn != NULL)
751   {
752   fnstr *f = fn;
753   fn = f->next;
754   free(f);
755   }
756 }
757 
758 
759 /*************************************************
760 *            OS-specific functions               *
761 *************************************************/
762 
763 /* These definitions are needed in all Windows environments, even those where
764 Unix-style directory scanning can be used (see below). */
765 
766 #ifdef WIN32
767 
768 #ifndef STRICT
769 # define STRICT
770 #endif
771 #ifndef WIN32_LEAN_AND_MEAN
772 # define WIN32_LEAN_AND_MEAN
773 #endif
774 
775 #include <windows.h>
776 
777 #define iswild(name) (strpbrk(name, "*?") != NULL)
778 
779 /* Convert ANSI BGR format to RGB used by Windows */
780 #define BGR_RGB(x) ((x & 1 ? 4 : 0) | (x & 2) | (x & 4 ? 1 : 0))
781 
782 static HANDLE hstdout;
783 static CONSOLE_SCREEN_BUFFER_INFO csbi;
784 static WORD match_colour;
785 
786 static WORD
decode_ANSI_colour(const char * cs)787 decode_ANSI_colour(const char *cs)
788 {
789 WORD result = csbi.wAttributes;
790 while (*cs)
791   {
792   if (isdigit(*cs))
793     {
794     int code = atoi(cs);
795     if (code == 1) result |= 0x08;
796     else if (code == 4) result |= 0x8000;
797     else if (code == 5) result |= 0x80;
798     else if (code >= 30 && code <= 37) result = (result & 0xF8) | BGR_RGB(code - 30);
799     else if (code == 39) result = (result & 0xF0) | (csbi.wAttributes & 0x0F);
800     else if (code >= 40 && code <= 47) result = (result & 0x8F) | (BGR_RGB(code - 40) << 4);
801     else if (code == 49) result = (result & 0x0F) | (csbi.wAttributes & 0xF0);
802     /* aixterm high intensity colour codes */
803     else if (code >= 90 && code <= 97) result = (result & 0xF0) | BGR_RGB(code - 90) | 0x08;
804     else if (code >= 100 && code <= 107) result = (result & 0x0F) | (BGR_RGB(code - 100) << 4) | 0x80;
805 
806     while (isdigit(*cs)) cs++;
807     }
808   if (*cs) cs++;
809   }
810 return result;
811 }
812 
813 
814 static void
init_colour_output()815 init_colour_output()
816 {
817 if (do_colour)
818   {
819   hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
820   /* This fails when redirected to con; try again if so. */
821   if (!GetConsoleScreenBufferInfo(hstdout, &csbi) && !do_ansi)
822     {
823     HANDLE hcon = CreateFile("CONOUT$", GENERIC_READ | GENERIC_WRITE,
824       FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL);
825     GetConsoleScreenBufferInfo(hcon, &csbi);
826     CloseHandle(hcon);
827     }
828   match_colour = decode_ANSI_colour(colour_string);
829   /* No valid colour found - turn off colouring */
830   if (!match_colour) do_colour = FALSE;
831   }
832 }
833 
834 #endif  /* WIN32 */
835 
836 
837 /* The following sets of functions are defined so that they can be made system
838 specific. At present there are versions for Unix-style environments, Windows,
839 native z/OS, and "no support". */
840 
841 
842 /************* Directory scanning Unix-style and z/OS ***********/
843 
844 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
845 #include <sys/types.h>
846 #include <sys/stat.h>
847 #include <dirent.h>
848 
849 #if defined NATIVE_ZOS
850 /************* Directory and PDS/E scanning for z/OS ***********/
851 /************* z/OS looks mostly like Unix with USS ************/
852 /* However, z/OS needs the #include statements in this header */
853 #include "pcrzosfs.h"
854 /* That header is not included in the main PCRE distribution because
855    other apparatus is needed to compile pcre2grep for z/OS. The header
856    can be found in the special z/OS distribution, which is available
857    from www.zaconsultants.net or from www.cbttape.org. */
858 #endif
859 
860 typedef DIR directory_type;
861 #define FILESEP '/'
862 
863 static int
isdirectory(char * filename)864 isdirectory(char *filename)
865 {
866 struct stat statbuf;
867 if (stat(filename, &statbuf) < 0)
868   return 0;        /* In the expectation that opening as a file will fail */
869 return S_ISDIR(statbuf.st_mode);
870 }
871 
872 static directory_type *
opendirectory(char * filename)873 opendirectory(char *filename)
874 {
875 return opendir(filename);
876 }
877 
878 static char *
readdirectory(directory_type * dir)879 readdirectory(directory_type *dir)
880 {
881 for (;;)
882   {
883   struct dirent *dent = readdir(dir);
884   if (dent == NULL) return NULL;
885   if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
886     return dent->d_name;
887   }
888 /* Control never reaches here */
889 }
890 
891 static void
closedirectory(directory_type * dir)892 closedirectory(directory_type *dir)
893 {
894 closedir(dir);
895 }
896 
897 
898 /************* Test for regular file, Unix-style **********/
899 
900 static int
isregfile(char * filename)901 isregfile(char *filename)
902 {
903 struct stat statbuf;
904 if (stat(filename, &statbuf) < 0)
905   return 1;        /* In the expectation that opening as a file will fail */
906 return S_ISREG(statbuf.st_mode);
907 }
908 
909 
910 #if defined NATIVE_ZOS
911 /************* Test for a terminal in z/OS **********/
912 /* isatty() does not work in a TSO environment, so always give FALSE.*/
913 
914 static BOOL
is_stdout_tty(void)915 is_stdout_tty(void)
916 {
917 return FALSE;
918 }
919 
920 static BOOL
is_file_tty(FILE * f)921 is_file_tty(FILE *f)
922 {
923 return FALSE;
924 }
925 
926 
927 /************* Test for a terminal, Unix-style **********/
928 
929 #else
930 static BOOL
is_stdout_tty(void)931 is_stdout_tty(void)
932 {
933 return isatty(fileno(stdout));
934 }
935 
936 static BOOL
is_file_tty(FILE * f)937 is_file_tty(FILE *f)
938 {
939 return isatty(fileno(f));
940 }
941 #endif
942 
943 
944 /************* Print optionally coloured match Unix-style and z/OS **********/
945 
946 static void
print_match(const void * buf,int length)947 print_match(const void *buf, int length)
948 {
949 if (length == 0) return;
950 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
951 FWRITE_IGNORE(buf, 1, length, stdout);
952 if (do_colour) fprintf(stdout, "%c[0m", 0x1b);
953 }
954 
955 /* End of Unix-style or native z/OS environment functions. */
956 
957 
958 /************* Directory scanning in Windows ***********/
959 
960 /* I (Philip Hazel) have no means of testing this code. It was contributed by
961 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
962 when it did not exist. David Byron added a patch that moved the #include of
963 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
964 */
965 
966 #elif defined WIN32
967 
968 #ifndef INVALID_FILE_ATTRIBUTES
969 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
970 #endif
971 
972 typedef struct directory_type
973 {
974 HANDLE handle;
975 BOOL first;
976 WIN32_FIND_DATA data;
977 } directory_type;
978 
979 #define FILESEP '/'
980 
981 int
isdirectory(char * filename)982 isdirectory(char *filename)
983 {
984 DWORD attr = GetFileAttributes(filename);
985 if (attr == INVALID_FILE_ATTRIBUTES)
986   return 0;
987 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
988 }
989 
990 directory_type *
opendirectory(char * filename)991 opendirectory(char *filename)
992 {
993 size_t len;
994 char *pattern;
995 directory_type *dir;
996 DWORD err;
997 len = strlen(filename);
998 pattern = (char *)malloc(len + 3);
999 dir = (directory_type *)malloc(sizeof(*dir));
1000 if ((pattern == NULL) || (dir == NULL))
1001   {
1002   fprintf(stderr, "pcre2grep: malloc failed\n");
1003   pcre2grep_exit(2);
1004   }
1005 memcpy(pattern, filename, len);
1006 if (iswild(filename))
1007   pattern[len] = 0;
1008 else
1009   memcpy(&(pattern[len]), "\\*", 3);
1010 dir->handle = FindFirstFile(pattern, &(dir->data));
1011 if (dir->handle != INVALID_HANDLE_VALUE)
1012   {
1013   free(pattern);
1014   dir->first = TRUE;
1015   return dir;
1016   }
1017 err = GetLastError();
1018 free(pattern);
1019 free(dir);
1020 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
1021 return NULL;
1022 }
1023 
1024 char *
readdirectory(directory_type * dir)1025 readdirectory(directory_type *dir)
1026 {
1027 for (;;)
1028   {
1029   if (!dir->first)
1030     {
1031     if (!FindNextFile(dir->handle, &(dir->data)))
1032       return NULL;
1033     }
1034   else
1035     {
1036     dir->first = FALSE;
1037     }
1038   if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
1039     return dir->data.cFileName;
1040   }
1041 #ifndef _MSC_VER
1042 return NULL;   /* Keep compiler happy; never executed */
1043 #endif
1044 }
1045 
1046 void
closedirectory(directory_type * dir)1047 closedirectory(directory_type *dir)
1048 {
1049 FindClose(dir->handle);
1050 free(dir);
1051 }
1052 
1053 
1054 /************* Test for regular file in Windows **********/
1055 
1056 /* I don't know how to do this, or if it can be done; assume all paths are
1057 regular if they are not directories. */
1058 
isregfile(char * filename)1059 int isregfile(char *filename)
1060 {
1061 return !isdirectory(filename);
1062 }
1063 
1064 
1065 /************* Test for a terminal in Windows **********/
1066 
1067 static BOOL
is_stdout_tty(void)1068 is_stdout_tty(void)
1069 {
1070 return _isatty(_fileno(stdout));
1071 }
1072 
1073 static BOOL
is_file_tty(FILE * f)1074 is_file_tty(FILE *f)
1075 {
1076 return _isatty(_fileno(f));
1077 }
1078 
1079 
1080 /************* Print optionally coloured match in Windows **********/
1081 
1082 static void
print_match(const void * buf,int length)1083 print_match(const void *buf, int length)
1084 {
1085 if (length == 0) return;
1086 if (do_colour)
1087   {
1088   if (do_ansi) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1089     else SetConsoleTextAttribute(hstdout, match_colour);
1090   }
1091 FWRITE_IGNORE(buf, 1, length, stdout);
1092 if (do_colour)
1093   {
1094   if (do_ansi) fprintf(stdout, "%c[0m", 0x1b);
1095     else SetConsoleTextAttribute(hstdout, csbi.wAttributes);
1096   }
1097 }
1098 
1099 /* End of Windows functions */
1100 
1101 
1102 /************* Directory scanning when we can't do it ***********/
1103 
1104 /* The type is void, and apart from isdirectory(), the functions do nothing. */
1105 
1106 #else
1107 
1108 #define FILESEP 0
1109 typedef void directory_type;
1110 
isdirectory(char * filename)1111 int isdirectory(char *filename) { return 0; }
opendirectory(char * filename)1112 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
readdirectory(directory_type * dir)1113 char *readdirectory(directory_type *dir) { return (char*)0;}
closedirectory(directory_type * dir)1114 void closedirectory(directory_type *dir) {}
1115 
1116 
1117 /************* Test for regular file when we can't do it **********/
1118 
1119 /* Assume all files are regular. */
1120 
isregfile(char * filename)1121 int isregfile(char *filename) { return 1; }
1122 
1123 
1124 /************* Test for a terminal when we can't do it **********/
1125 
1126 static BOOL
is_stdout_tty(void)1127 is_stdout_tty(void)
1128 {
1129 return FALSE;
1130 }
1131 
1132 static BOOL
is_file_tty(FILE * f)1133 is_file_tty(FILE *f)
1134 {
1135 return FALSE;
1136 }
1137 
1138 
1139 /************* Print optionally coloured match when we can't do it **********/
1140 
1141 static void
print_match(const void * buf,int length)1142 print_match(const void *buf, int length)
1143 {
1144 if (length == 0) return;
1145 FWRITE_IGNORE(buf, 1, length, stdout);
1146 }
1147 
1148 #endif  /* End of system-specific functions */
1149 
1150 
1151 
1152 #ifndef HAVE_STRERROR
1153 /*************************************************
1154 *     Provide strerror() for non-ANSI libraries  *
1155 *************************************************/
1156 
1157 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1158 in their libraries, but can provide the same facility by this simple
1159 alternative function. */
1160 
1161 extern int   sys_nerr;
1162 extern char *sys_errlist[];
1163 
1164 char *
strerror(int n)1165 strerror(int n)
1166 {
1167 if (n < 0 || n >= sys_nerr) return "unknown error number";
1168 return sys_errlist[n];
1169 }
1170 #endif /* HAVE_STRERROR */
1171 
1172 
1173 
1174 /*************************************************
1175 *                Usage function                  *
1176 *************************************************/
1177 
1178 static int
usage(int rc)1179 usage(int rc)
1180 {
1181 option_item *op;
1182 fprintf(stderr, "Usage: pcre2grep [-");
1183 for (op = optionlist; op->one_char != 0; op++)
1184   {
1185   if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1186   }
1187 fprintf(stderr, "] [long options] [pattern] [files]\n");
1188 fprintf(stderr, "Type \"pcre2grep --help\" for more information and the long "
1189   "options.\n");
1190 return rc;
1191 }
1192 
1193 
1194 
1195 /*************************************************
1196 *                Help function                   *
1197 *************************************************/
1198 
1199 static void
help(void)1200 help(void)
1201 {
1202 option_item *op;
1203 
1204 printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL);
1205 printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
1206 printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
1207 
1208 #ifdef SUPPORT_PCRE2GREP_CALLOUT
1209 #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
1210 printf("All callout scripts in patterns are supported." STDOUT_NL);
1211 #else
1212 printf("Non-fork callout scripts in patterns are supported." STDOUT_NL);
1213 #endif
1214 #else
1215 printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
1216 #endif
1217 
1218 printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL);
1219 
1220 #ifdef SUPPORT_LIBZ
1221 printf("Files whose names end in .gz are read using zlib." STDOUT_NL);
1222 #endif
1223 
1224 #ifdef SUPPORT_LIBBZ2
1225 printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL);
1226 #endif
1227 
1228 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1229 printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL);
1230 #else
1231 printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL);
1232 #endif
1233 
1234 printf("Example: pcre2grep -i " QUOT "hello.*world" QUOT " menu.h main.c" STDOUT_NL STDOUT_NL);
1235 printf("Options:" STDOUT_NL);
1236 
1237 for (op = optionlist; op->one_char != 0; op++)
1238   {
1239   int n;
1240   char s[4];
1241 
1242   if (op->one_char > 0 && (op->long_name)[0] == 0)
1243     n = 31 - printf("  -%c", op->one_char);
1244   else
1245     {
1246     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
1247       else strcpy(s, "   ");
1248     n = 31 - printf("  %s --%s", s, op->long_name);
1249     }
1250 
1251   if (n < 1) n = 1;
1252   printf("%.*s%s" STDOUT_NL, n, "                           ", op->help_text);
1253   }
1254 
1255 printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --max-buffer-size=100K." STDOUT_NL);
1256 printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
1257 printf("The default value for --max-buffer-size is %d." STDOUT_NL, PCRE2GREP_MAX_BUFSIZE);
1258 printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
1259 printf("space is removed and blank lines are ignored." STDOUT_NL);
1260 printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
1261 
1262 printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL);
1263 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL);
1264 }
1265 
1266 
1267 
1268 /*************************************************
1269 *            Test exclude/includes               *
1270 *************************************************/
1271 
1272 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
1273 there are no includes, the path must match an include pattern.
1274 
1275 Arguments:
1276   path      the path to be matched
1277   ip        the chain of include patterns
1278   ep        the chain of exclude patterns
1279 
1280 Returns:    TRUE if the path is not excluded
1281 */
1282 
1283 static BOOL
test_incexc(char * path,patstr * ip,patstr * ep)1284 test_incexc(char *path, patstr *ip, patstr *ep)
1285 {
1286 int plen = strlen((const char *)path);
1287 
1288 for (; ep != NULL; ep = ep->next)
1289   {
1290   if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1291     return FALSE;
1292   }
1293 
1294 if (ip == NULL) return TRUE;
1295 
1296 for (; ip != NULL; ip = ip->next)
1297   {
1298   if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1299     return TRUE;
1300   }
1301 
1302 return FALSE;
1303 }
1304 
1305 
1306 
1307 /*************************************************
1308 *         Decode integer argument value          *
1309 *************************************************/
1310 
1311 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
1312 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
1313 just keep it simple.
1314 
1315 Arguments:
1316   option_data   the option data string
1317   op            the option item (for error messages)
1318   longop        TRUE if option given in long form
1319 
1320 Returns:        a long integer
1321 */
1322 
1323 static long int
decode_number(char * option_data,option_item * op,BOOL longop)1324 decode_number(char *option_data, option_item *op, BOOL longop)
1325 {
1326 unsigned long int n = 0;
1327 char *endptr = option_data;
1328 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
1329 while (isdigit((unsigned char)(*endptr)))
1330   n = n * 10 + (int)(*endptr++ - '0');
1331 if (toupper(*endptr) == 'K')
1332   {
1333   n *= 1024;
1334   endptr++;
1335   }
1336 else if (toupper(*endptr) == 'M')
1337   {
1338   n *= 1024*1024;
1339   endptr++;
1340   }
1341 
1342 if (*endptr != 0)   /* Error */
1343   {
1344   if (longop)
1345     {
1346     char *equals = strchr(op->long_name, '=');
1347     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1348       (int)(equals - op->long_name);
1349     fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n",
1350       option_data, nlen, op->long_name);
1351     }
1352   else
1353     fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n",
1354       option_data, op->one_char);
1355   pcre2grep_exit(usage(2));
1356   }
1357 
1358 return n;
1359 }
1360 
1361 
1362 
1363 /*************************************************
1364 *       Add item to a chain of numbers           *
1365 *************************************************/
1366 
1367 /* Used to add an item onto a chain, or just return an unconnected item if the
1368 "after" argument is NULL.
1369 
1370 Arguments:
1371   n          the number to add
1372   after      if not NULL points to item to insert after
1373 
1374 Returns:     new number block
1375 */
1376 
1377 static omstr *
add_number(int n,omstr * after)1378 add_number(int n, omstr *after)
1379 {
1380 omstr *om = (omstr *)malloc(sizeof(omstr));
1381 
1382 if (om == NULL)
1383   {
1384   fprintf(stderr, "pcre2grep: malloc failed\n");
1385   pcre2grep_exit(2);
1386   }
1387 om->next = NULL;
1388 om->groupnum = n;
1389 
1390 if (after != NULL)
1391   {
1392   om->next = after->next;
1393   after->next = om;
1394   }
1395 return om;
1396 }
1397 
1398 
1399 
1400 /*************************************************
1401 *            Read one line of input              *
1402 *************************************************/
1403 
1404 /* Normally, input that is to be scanned is read using fread() (or gzread, or
1405 BZ2_read) into a large buffer, so many lines may be read at once. However,
1406 doing this for tty input means that no output appears until a lot of input has
1407 been typed. Instead, tty input is handled line by line. We cannot use fgets()
1408 for this, because it does not stop at a binary zero, and therefore there is no
1409 way of telling how many characters it has read, because there may be binary
1410 zeros embedded in the data. This function is also used for reading patterns
1411 from files (the -f option).
1412 
1413 Arguments:
1414   buffer     the buffer to read into
1415   length     the maximum number of characters to read
1416   f          the file
1417 
1418 Returns:     the number of characters read, zero at end of file
1419 */
1420 
1421 static PCRE2_SIZE
read_one_line(char * buffer,int length,FILE * f)1422 read_one_line(char *buffer, int length, FILE *f)
1423 {
1424 int c;
1425 int yield = 0;
1426 while ((c = fgetc(f)) != EOF)
1427   {
1428   buffer[yield++] = c;
1429   if (c == '\n' || yield >= length) break;
1430   }
1431 return yield;
1432 }
1433 
1434 
1435 
1436 /*************************************************
1437 *             Find end of line                   *
1438 *************************************************/
1439 
1440 /* The length of the endline sequence that is found is set via lenptr. This may
1441 be zero at the very end of the file if there is no line-ending sequence there.
1442 
1443 Arguments:
1444   p         current position in line
1445   endptr    end of available data
1446   lenptr    where to put the length of the eol sequence
1447 
1448 Returns:    pointer after the last byte of the line,
1449             including the newline byte(s)
1450 */
1451 
1452 static char *
end_of_line(char * p,char * endptr,int * lenptr)1453 end_of_line(char *p, char *endptr, int *lenptr)
1454 {
1455 switch(endlinetype)
1456   {
1457   default:      /* Just in case */
1458   case PCRE2_NEWLINE_LF:
1459   while (p < endptr && *p != '\n') p++;
1460   if (p < endptr)
1461     {
1462     *lenptr = 1;
1463     return p + 1;
1464     }
1465   *lenptr = 0;
1466   return endptr;
1467 
1468   case PCRE2_NEWLINE_CR:
1469   while (p < endptr && *p != '\r') p++;
1470   if (p < endptr)
1471     {
1472     *lenptr = 1;
1473     return p + 1;
1474     }
1475   *lenptr = 0;
1476   return endptr;
1477 
1478   case PCRE2_NEWLINE_NUL:
1479   while (p < endptr && *p != '\0') p++;
1480   if (p < endptr)
1481     {
1482     *lenptr = 1;
1483     return p + 1;
1484     }
1485   *lenptr = 0;
1486   return endptr;
1487 
1488   case PCRE2_NEWLINE_CRLF:
1489   for (;;)
1490     {
1491     while (p < endptr && *p != '\r') p++;
1492     if (++p >= endptr)
1493       {
1494       *lenptr = 0;
1495       return endptr;
1496       }
1497     if (*p == '\n')
1498       {
1499       *lenptr = 2;
1500       return p + 1;
1501       }
1502     }
1503   break;
1504 
1505   case PCRE2_NEWLINE_ANYCRLF:
1506   while (p < endptr)
1507     {
1508     int extra = 0;
1509     int c = *((unsigned char *)p);
1510 
1511     if (utf && c >= 0xc0)
1512       {
1513       int gcii, gcss;
1514       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1515       gcss = 6*extra;
1516       c = (c & utf8_table3[extra]) << gcss;
1517       for (gcii = 1; gcii <= extra; gcii++)
1518         {
1519         gcss -= 6;
1520         c |= (p[gcii] & 0x3f) << gcss;
1521         }
1522       }
1523 
1524     p += 1 + extra;
1525 
1526     switch (c)
1527       {
1528       case '\n':
1529       *lenptr = 1;
1530       return p;
1531 
1532       case '\r':
1533       if (p < endptr && *p == '\n')
1534         {
1535         *lenptr = 2;
1536         p++;
1537         }
1538       else *lenptr = 1;
1539       return p;
1540 
1541       default:
1542       break;
1543       }
1544     }   /* End of loop for ANYCRLF case */
1545 
1546   *lenptr = 0;  /* Must have hit the end */
1547   return endptr;
1548 
1549   case PCRE2_NEWLINE_ANY:
1550   while (p < endptr)
1551     {
1552     int extra = 0;
1553     int c = *((unsigned char *)p);
1554 
1555     if (utf && c >= 0xc0)
1556       {
1557       int gcii, gcss;
1558       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1559       gcss = 6*extra;
1560       c = (c & utf8_table3[extra]) << gcss;
1561       for (gcii = 1; gcii <= extra; gcii++)
1562         {
1563         gcss -= 6;
1564         c |= (p[gcii] & 0x3f) << gcss;
1565         }
1566       }
1567 
1568     p += 1 + extra;
1569 
1570     switch (c)
1571       {
1572       case '\n':    /* LF */
1573       case '\v':    /* VT */
1574       case '\f':    /* FF */
1575       *lenptr = 1;
1576       return p;
1577 
1578       case '\r':    /* CR */
1579       if (p < endptr && *p == '\n')
1580         {
1581         *lenptr = 2;
1582         p++;
1583         }
1584       else *lenptr = 1;
1585       return p;
1586 
1587 #ifndef EBCDIC
1588       case 0x85:    /* Unicode NEL */
1589       *lenptr = utf? 2 : 1;
1590       return p;
1591 
1592       case 0x2028:  /* Unicode LS */
1593       case 0x2029:  /* Unicode PS */
1594       *lenptr = 3;
1595       return p;
1596 #endif  /* Not EBCDIC */
1597 
1598       default:
1599       break;
1600       }
1601     }   /* End of loop for ANY case */
1602 
1603   *lenptr = 0;  /* Must have hit the end */
1604   return endptr;
1605   }     /* End of overall switch */
1606 }
1607 
1608 
1609 
1610 /*************************************************
1611 *         Find start of previous line            *
1612 *************************************************/
1613 
1614 /* This is called when looking back for before lines to print.
1615 
1616 Arguments:
1617   p         start of the subsequent line
1618   startptr  start of available data
1619 
1620 Returns:    pointer to the start of the previous line
1621 */
1622 
1623 static char *
previous_line(char * p,char * startptr)1624 previous_line(char *p, char *startptr)
1625 {
1626 switch(endlinetype)
1627   {
1628   default:      /* Just in case */
1629   case PCRE2_NEWLINE_LF:
1630   p--;
1631   while (p > startptr && p[-1] != '\n') p--;
1632   return p;
1633 
1634   case PCRE2_NEWLINE_CR:
1635   p--;
1636   while (p > startptr && p[-1] != '\n') p--;
1637   return p;
1638 
1639   case PCRE2_NEWLINE_NUL:
1640   p--;
1641   while (p > startptr && p[-1] != '\0') p--;
1642   return p;
1643 
1644   case PCRE2_NEWLINE_CRLF:
1645   for (;;)
1646     {
1647     p -= 2;
1648     while (p > startptr && p[-1] != '\n') p--;
1649     if (p <= startptr + 1 || p[-2] == '\r') return p;
1650     }
1651   /* Control can never get here */
1652 
1653   case PCRE2_NEWLINE_ANY:
1654   case PCRE2_NEWLINE_ANYCRLF:
1655   if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1656   if (utf) while ((*p & 0xc0) == 0x80) p--;
1657 
1658   while (p > startptr)
1659     {
1660     unsigned int c;
1661     char *pp = p - 1;
1662 
1663     if (utf)
1664       {
1665       int extra = 0;
1666       while ((*pp & 0xc0) == 0x80) pp--;
1667       c = *((unsigned char *)pp);
1668       if (c >= 0xc0)
1669         {
1670         int gcii, gcss;
1671         extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1672         gcss = 6*extra;
1673         c = (c & utf8_table3[extra]) << gcss;
1674         for (gcii = 1; gcii <= extra; gcii++)
1675           {
1676           gcss -= 6;
1677           c |= (pp[gcii] & 0x3f) << gcss;
1678           }
1679         }
1680       }
1681     else c = *((unsigned char *)pp);
1682 
1683     if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c)
1684       {
1685       case '\n':    /* LF */
1686       case '\r':    /* CR */
1687       return p;
1688 
1689       default:
1690       break;
1691       }
1692 
1693     else switch (c)
1694       {
1695       case '\n':    /* LF */
1696       case '\v':    /* VT */
1697       case '\f':    /* FF */
1698       case '\r':    /* CR */
1699 #ifndef EBCDIC
1700       case 0x85:    /* Unicode NEL */
1701       case 0x2028:  /* Unicode LS */
1702       case 0x2029:  /* Unicode PS */
1703 #endif  /* Not EBCDIC */
1704       return p;
1705 
1706       default:
1707       break;
1708       }
1709 
1710     p = pp;  /* Back one character */
1711     }        /* End of loop for ANY case */
1712 
1713   return startptr;  /* Hit start of data */
1714   }     /* End of overall switch */
1715 }
1716 
1717 
1718 
1719 /*************************************************
1720 *              Output newline at end             *
1721 *************************************************/
1722 
1723 /* This function is called if the final line of a file has been written to
1724 stdout, but it does not have a terminating newline.
1725 
1726 Arguments:  none
1727 Returns:    nothing
1728 */
1729 
1730 static void
write_final_newline(void)1731 write_final_newline(void)
1732 {
1733 switch(endlinetype)
1734   {
1735   default:      /* Just in case */
1736   case PCRE2_NEWLINE_LF:
1737   case PCRE2_NEWLINE_ANY:
1738   case PCRE2_NEWLINE_ANYCRLF:
1739   fprintf(stdout, "\n");
1740   break;
1741 
1742   case PCRE2_NEWLINE_CR:
1743   fprintf(stdout, "\r");
1744   break;
1745 
1746   case PCRE2_NEWLINE_CRLF:
1747   fprintf(stdout, "\r\n");
1748   break;
1749 
1750   case PCRE2_NEWLINE_NUL:
1751   fprintf(stdout, "%c", 0);
1752   break;
1753   }
1754 }
1755 
1756 
1757 /*************************************************
1758 *       Print the previous "after" lines         *
1759 *************************************************/
1760 
1761 /* This is called if we are about to lose said lines because of buffer filling,
1762 and at the end of the file. The data in the line is written using fwrite() so
1763 that a binary zero does not terminate it.
1764 
1765 Arguments:
1766   lastmatchnumber   the number of the last matching line, plus one
1767   lastmatchrestart  where we restarted after the last match
1768   endptr            end of available data
1769   printname         filename for printing
1770 
1771 Returns:            nothing
1772 */
1773 
1774 static void
do_after_lines(unsigned long int lastmatchnumber,char * lastmatchrestart,char * endptr,const char * printname)1775 do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
1776   char *endptr, const char *printname)
1777 {
1778 if (after_context > 0 && lastmatchnumber > 0)
1779   {
1780   int count = 0;
1781   int ellength = 0;
1782   while (lastmatchrestart < endptr && count < after_context)
1783     {
1784     char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
1785     if (ellength == 0 && pp == main_buffer + bufsize) break;
1786     if (printname != NULL) fprintf(stdout, "%s-", printname);
1787     if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
1788     FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1789     lastmatchrestart = pp;
1790     count++;
1791     }
1792 
1793   /* If we have printed any lines, arrange for a hyphen separator if anything
1794   else follows. Also, if the last line is the final line in the file and it had
1795   no newline, add one. */
1796 
1797   if (count > 0)
1798     {
1799     hyphenpending = TRUE;
1800     if (ellength == 0 && lastmatchrestart >= endptr)
1801       write_final_newline();
1802     }
1803   }
1804 }
1805 
1806 
1807 
1808 /*************************************************
1809 *   Apply patterns to subject till one matches   *
1810 *************************************************/
1811 
1812 /* This function is called to run through all patterns, looking for a match. It
1813 is used multiple times for the same subject when colouring is enabled, in order
1814 to find all possible matches.
1815 
1816 Arguments:
1817   matchptr     the start of the subject
1818   length       the length of the subject to match
1819   options      options for pcre_exec
1820   startoffset  where to start matching
1821   mrc          address of where to put the result of pcre2_match()
1822 
1823 Returns:      TRUE if there was a match
1824               FALSE if there was no match
1825               invert if there was a non-fatal error
1826 */
1827 
1828 static BOOL
match_patterns(char * matchptr,PCRE2_SIZE length,unsigned int options,PCRE2_SIZE startoffset,int * mrc)1829 match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options,
1830   PCRE2_SIZE startoffset, int *mrc)
1831 {
1832 int i;
1833 PCRE2_SIZE slen = length;
1834 patstr *p = patterns;
1835 const char *msg = "this text:\n\n";
1836 
1837 if (slen > 200)
1838   {
1839   slen = 200;
1840   msg = "text that starts:\n\n";
1841   }
1842 
1843 for (i = 1; p != NULL; p = p->next, i++)
1844   {
1845   *mrc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length,
1846     startoffset, options, match_data, match_context);
1847   if (*mrc >= 0) return TRUE;
1848   if (*mrc == PCRE2_ERROR_NOMATCH) continue;
1849   fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", *mrc);
1850   if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1851   fprintf(stderr, "%s", msg);
1852   FWRITE_IGNORE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1853   fprintf(stderr, "\n\n");
1854   if (*mrc <= PCRE2_ERROR_UTF8_ERR1 &&
1855       *mrc >= PCRE2_ERROR_UTF8_ERR21)
1856     {
1857     unsigned char mbuffer[256];
1858     PCRE2_SIZE startchar = pcre2_get_startchar(match_data);
1859     (void)pcre2_get_error_message(*mrc, mbuffer, sizeof(mbuffer));
1860     fprintf(stderr, "%s at offset %" SIZ_FORM "\n\n", mbuffer, startchar);
1861     }
1862   if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_DEPTHLIMIT ||
1863       *mrc == PCRE2_ERROR_HEAPLIMIT || *mrc == PCRE2_ERROR_JIT_STACKLIMIT)
1864     resource_error = TRUE;
1865   if (error_count++ > 20)
1866     {
1867     fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n");
1868     pcre2grep_exit(2);
1869     }
1870   return invert;    /* No more matching; don't show the line again */
1871   }
1872 
1873 return FALSE;  /* No match, no errors */
1874 }
1875 
1876 
1877 
1878 /*************************************************
1879 *          Decode dollar escape sequence         *
1880 *************************************************/
1881 
1882 /* Called from various places to decode $ escapes in output strings. The escape
1883 sequences are as follows:
1884 
1885 $<digits> or ${<digits>} returns a capture number. However, if callout is TRUE,
1886 zero is never returned; '0' is substituted.
1887 
1888 $a returns bell.
1889 $b returns backspace.
1890 $e returns escape.
1891 $f returns form feed.
1892 $n returns newline.
1893 $r returns carriage return.
1894 $t returns tab.
1895 $v returns vertical tab.
1896 $o<digits> returns the character represented by the given octal
1897   number; up to three digits are processed.
1898 $o{<digits>} does the same, up to 7 digits, but gives an error for mode-invalid
1899   code points.
1900 $x<digits> returns the character represented by the given hexadecimal
1901   number; up to two digits are processed.
1902 $x{<digits} does the same, up to 6 digits, but gives an error for mode-invalid
1903   code points.
1904 Any other character is substituted by itself. E.g: $$ is replaced by a single
1905 dollar.
1906 
1907 Arguments:
1908   begin      the start of the whole string
1909   string     points to the $
1910   callout    TRUE if in a callout (inhibits error messages)
1911   value      where to return a value
1912   last       where to return pointer to the last used character
1913 
1914 Returns:     DDE_ERROR    after a syntax error
1915              DDE_CAPTURE  if *value is a capture number
1916              DDE_CHAR     if *value is a character code
1917 */
1918 
1919 static int
decode_dollar_escape(PCRE2_SPTR begin,PCRE2_SPTR string,BOOL callout,uint32_t * value,PCRE2_SPTR * last)1920 decode_dollar_escape(PCRE2_SPTR begin, PCRE2_SPTR string, BOOL callout,
1921   uint32_t *value, PCRE2_SPTR *last)
1922 {
1923 uint32_t c = 0;
1924 int base = 10;
1925 int dcount;
1926 int rc = DDE_CHAR;
1927 BOOL brace = FALSE;
1928 
1929 switch (*(++string))
1930   {
1931   case 0:   /* Syntax error: a character must be present after $. */
1932   if (!callout)
1933     fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1934       (int)(string - begin), "no character after $");
1935   *last = string;
1936   return DDE_ERROR;
1937 
1938   case '{':
1939   brace = TRUE;
1940   string++;
1941   if (!isdigit(*string))  /* Syntax error: a decimal number required. */
1942     {
1943     if (!callout)
1944       fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1945         (int)(string - begin), "decimal number expected");
1946     rc = DDE_ERROR;
1947     break;
1948     }
1949 
1950   /* Fall through */
1951 
1952   /* The maximum capture number is 65535, so any number greater than that will
1953   always be an unknown capture number. We just stop incrementing, in order to
1954   avoid overflow. */
1955 
1956   case '0': case '1': case '2': case '3': case '4':
1957   case '5': case '6': case '7': case '8': case '9':
1958   do
1959     {
1960     if (c <= 65535) c = c * 10 + (*string - '0');
1961     string++;
1962     }
1963   while (*string >= '0' && *string <= '9');
1964   string--;  /* Point to last digit */
1965 
1966   /* In a callout, capture number 0 is not available. No error can be given,
1967   so just return the character '0'. */
1968 
1969   if (callout && c == 0)
1970     {
1971     *value = '0';
1972     }
1973   else
1974     {
1975     *value = c;
1976     rc = DDE_CAPTURE;
1977     }
1978   break;
1979 
1980   /* Limit octal numbers to 3 digits without braces, or up to 7 with braces,
1981   for valid Unicode code points. */
1982 
1983   case 'o':
1984   base = 8;
1985   string++;
1986   if (*string == '{')
1987     {
1988     brace = TRUE;
1989     string++;
1990     dcount = 7;
1991     }
1992   else dcount = 3;
1993   for (; dcount > 0; dcount--)
1994     {
1995     if (*string < '0' || *string > '7') break;
1996     c = c * 8 + (*string++ - '0');
1997     }
1998   *value = c;
1999   string--;  /* Point to last digit */
2000   break;
2001 
2002   /* Limit hex numbers to 2 digits without braces, or up to 6 with braces,
2003   for valid Unicode code points. */
2004 
2005   case 'x':
2006   base = 16;
2007   string++;
2008   if (*string == '{')
2009     {
2010     brace = TRUE;
2011     string++;
2012     dcount = 6;
2013     }
2014   else dcount = 2;
2015   for (; dcount > 0; dcount--)
2016     {
2017     if (!isxdigit(*string)) break;
2018     if (*string >= '0' && *string <= '9')
2019       c = c *16 + *string++ - '0';
2020     else
2021       c = c * 16 + (*string++ | 0x20) - 'a' + 10;
2022     }
2023   *value = c;
2024   string--;  /* Point to last digit */
2025   break;
2026 
2027   case 'a': *value = '\a'; break;
2028   case 'b': *value = '\b'; break;
2029 #ifndef EBCDIC
2030   case 'e': *value = '\033'; break;
2031 #else
2032   case 'e': *value = '\047'; break;
2033 #endif
2034   case 'f': *value = '\f'; break;
2035   case 'n': *value = STDOUT_NL_CODE; break;
2036   case 'r': *value = '\r'; break;
2037   case 't': *value = '\t'; break;
2038   case 'v': *value = '\v'; break;
2039 
2040   default: *value = *string; break;
2041   }
2042 
2043 if (brace)
2044   {
2045   c = string[1];
2046   if (c != '}')
2047     {
2048     rc = DDE_ERROR;
2049     if (!callout)
2050       {
2051       if ((base == 8 && c >= '0' && c <= '7') ||
2052           (base == 16 && isxdigit(c)))
2053         {
2054         fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2055           "too many %s digits\n", (int)(string - begin),
2056           (base == 8)? "octal" : "hex");
2057         }
2058       else
2059         {
2060         fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
2061           (int)(string - begin), "missing closing brace");
2062         }
2063       }
2064     }
2065   else string++;
2066   }
2067 
2068 /* Check maximum code point values, but take note of STDOUT_NL_CODE. */
2069 
2070 if (rc == DDE_CHAR && *value != STDOUT_NL_CODE)
2071   {
2072   uint32_t max = utf? 0x0010ffffu : 0xffu;
2073   if (*value > max)
2074     {
2075     if (!callout)
2076       fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
2077         "code point greater than 0x%x is invalid\n", (int)(string - begin), max);
2078     rc = DDE_ERROR;
2079     }
2080   }
2081 
2082 *last = string;
2083 return rc;
2084 }
2085 
2086 
2087 
2088 /*************************************************
2089 *          Check output text for errors          *
2090 *************************************************/
2091 
2092 /* Called early, to get errors before doing anything for -O text; also called
2093 from callouts to check before outputting.
2094 
2095 Arguments:
2096   string    an --output text string
2097   callout   TRUE if in a callout (stops printing errors)
2098 
2099 Returns:    TRUE if OK, FALSE on error
2100 */
2101 
2102 static BOOL
syntax_check_output_text(PCRE2_SPTR string,BOOL callout)2103 syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
2104 {
2105 uint32_t value;
2106 PCRE2_SPTR begin = string;
2107 
2108 for (; *string != 0; string++)
2109   {
2110   if (*string == '$' &&
2111     decode_dollar_escape(begin, string, callout, &value, &string) == DDE_ERROR)
2112       return FALSE;
2113   }
2114 
2115 return TRUE;
2116 }
2117 
2118 
2119 /*************************************************
2120 *              Display output text               *
2121 *************************************************/
2122 
2123 /* Display the output text, which is assumed to have already been syntax
2124 checked. Output may contain escape sequences started by the dollar sign.
2125 
2126 Arguments:
2127   string:       the output text
2128   callout:      TRUE for the builtin callout, FALSE for --output
2129   subject       the start of the subject
2130   ovector:      capture offsets
2131   capture_top:  number of captures
2132 
2133 Returns:        TRUE if something was output, other than newline
2134                 FALSE if nothing was output, or newline was last output
2135 */
2136 
2137 static BOOL
display_output_text(PCRE2_SPTR string,BOOL callout,PCRE2_SPTR subject,PCRE2_SIZE * ovector,PCRE2_SIZE capture_top)2138 display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject,
2139   PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
2140 {
2141 uint32_t value;
2142 BOOL printed = FALSE;
2143 PCRE2_SPTR begin = string;
2144 
2145 for (; *string != 0; string++)
2146   {
2147   if (*string == '$')
2148     {
2149     switch(decode_dollar_escape(begin, string, callout, &value, &string))
2150       {
2151       case DDE_CHAR:
2152       if (value == STDOUT_NL_CODE)
2153         {
2154         fprintf(stdout, STDOUT_NL);
2155         printed = FALSE;
2156         continue;
2157         }
2158       break;  /* Will print value */
2159 
2160       case DDE_CAPTURE:
2161       if (value < capture_top)
2162         {
2163         PCRE2_SIZE capturesize;
2164         value *= 2;
2165         capturesize = ovector[value + 1] - ovector[value];
2166         if (capturesize > 0)
2167           {
2168           print_match(subject + ovector[value], capturesize);
2169           printed = TRUE;
2170           }
2171         }
2172       continue;
2173 
2174       default:  /* Should not occur */
2175       break;
2176       }
2177     }
2178 
2179   else value = *string;  /* Not a $ escape */
2180 
2181   if (utf && value <= 127) fprintf(stdout, "%c", *string); else
2182     {
2183     int i;
2184     int n = ord2utf8(value);
2185     for (i = 0; i < n; i++) fputc(utf8_buffer[i], stdout);
2186     }
2187 
2188   printed = TRUE;
2189   }
2190 
2191 return printed;
2192 }
2193 
2194 
2195 #ifdef SUPPORT_PCRE2GREP_CALLOUT
2196 
2197 /*************************************************
2198 *        Parse and execute callout scripts       *
2199 *************************************************/
2200 
2201 /* If SUPPORT_PCRE2GREP_CALLOUT_FORK is defined, this function parses a callout
2202 string block and executes the program specified by the string. The string is a
2203 list of substrings separated by pipe characters. The first substring represents
2204 the executable name, and the following substrings specify the arguments:
2205 
2206   program_name|param1|param2|...
2207 
2208 Any substring (including the program name) can contain escape sequences
2209 started by the dollar character. The escape sequences are substituted as
2210 follows:
2211 
2212   $<digits> or ${<digits>} is replaced by the captured substring of the given
2213   decimal number, which must be greater than zero. If the number is greater
2214   than the number of capturing substrings, or if the capture is unset, the
2215   replacement is empty.
2216 
2217   Any other character is substituted by itself. E.g: $$ is replaced by a single
2218   dollar or $| replaced by a pipe character.
2219 
2220 Alternatively, if string starts with pipe, the remainder is taken as an output
2221 string, same as --output. This is the only form that is supported if
2222 SUPPORT_PCRE2GREP_FORK is not defined. In this case, --om-separator is used to
2223 separate each callout, defaulting to newline.
2224 
2225 Example:
2226 
2227   echo -e "abcde\n12345" | pcre2grep \
2228     '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
2229 
2230   Output:
2231 
2232     Arg1: [a] [bcd] [d] Arg2: |a| ()
2233     abcde
2234     Arg1: [1] [234] [4] Arg2: |1| ()
2235     12345
2236 
2237 Arguments:
2238   blockptr     the callout block
2239 
2240 Returns:       currently it always returns with 0
2241 */
2242 
2243 static int
pcre2grep_callout(pcre2_callout_block * calloutptr,void * unused)2244 pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused)
2245 {
2246 PCRE2_SIZE length = calloutptr->callout_string_length;
2247 PCRE2_SPTR string = calloutptr->callout_string;
2248 PCRE2_SPTR subject = calloutptr->subject;
2249 PCRE2_SIZE *ovector = calloutptr->offset_vector;
2250 PCRE2_SIZE capture_top = calloutptr->capture_top;
2251 
2252 #ifdef SUPPORT_PCRE2GREP_CALLOUT_FORK
2253 PCRE2_SIZE argsvectorlen = 2;
2254 PCRE2_SIZE argslen = 1;
2255 char *args;
2256 char *argsptr;
2257 char **argsvector;
2258 char **argsvectorptr;
2259 #ifndef WIN32
2260 pid_t pid;
2261 #endif
2262 int result = 0;
2263 #endif  /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2264 
2265 (void)unused;   /* Avoid compiler warning */
2266 
2267 /* Only callouts with strings are supported. */
2268 
2269 if (string == NULL || length == 0) return 0;
2270 
2271 /* If there's no command, output the remainder directly. */
2272 
2273 if (*string == '|')
2274   {
2275   string++;
2276   if (!syntax_check_output_text(string, TRUE)) return 0;
2277   (void)display_output_text(string, TRUE, subject, ovector, capture_top);
2278   return 0;
2279   }
2280 
2281 #ifndef SUPPORT_PCRE2GREP_CALLOUT_FORK
2282 return 0;
2283 #else
2284 
2285 /* Checking syntax and compute the number of string fragments. Callout strings
2286 are silently ignored in the event of a syntax error. */
2287 
2288 while (length > 0)
2289   {
2290   if (*string == '|')
2291     {
2292     argsvectorlen++;
2293     if (argsvectorlen > 10000) return 0;  /* Too many args */
2294     }
2295 
2296   else if (*string == '$')
2297     {
2298     uint32_t value;
2299     PCRE2_SPTR begin = string;
2300 
2301     switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2302       {
2303       case DDE_CAPTURE:
2304       if (value < capture_top)
2305         {
2306         value *= 2;
2307         argslen += ovector[value + 1] - ovector[value];
2308         }
2309       argslen--;   /* Negate the effect of argslen++ below. */
2310       break;
2311 
2312       case DDE_CHAR:
2313       if (value == STDOUT_NL_CODE) argslen += STDOUT_NL_LEN - 1;
2314         else if (utf && value > 127) argslen += ord2utf8(value) - 1;
2315       break;
2316 
2317       default:         /* Should not occur */
2318       case DDE_ERROR:
2319       return 0;
2320       }
2321 
2322     length -= (string - begin);
2323     }
2324 
2325   string++;
2326   length--;
2327   argslen++;
2328   }
2329 
2330 /* Get memory for the argument vector and its strings. */
2331 
2332 args = (char*)malloc(argslen);
2333 if (args == NULL) return 0;
2334 
2335 argsvector = (char**)malloc(argsvectorlen * sizeof(char*));
2336 if (argsvector == NULL)
2337   {
2338   free(args);
2339   return 0;
2340   }
2341 
2342 /* Now reprocess the string and set up the arguments. */
2343 
2344 argsptr = args;
2345 argsvectorptr = argsvector;
2346 *argsvectorptr++ = argsptr;
2347 
2348 length = calloutptr->callout_string_length;
2349 string = calloutptr->callout_string;
2350 
2351 while (length > 0)
2352   {
2353   if (*string == '|')
2354     {
2355     *argsptr++ = '\0';
2356     *argsvectorptr++ = argsptr;
2357     }
2358 
2359   else if (*string == '$')
2360     {
2361     uint32_t value;
2362     PCRE2_SPTR begin = string;
2363 
2364     switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
2365       {
2366       case DDE_CAPTURE:
2367       if (value < capture_top)
2368         {
2369         PCRE2_SIZE capturesize;
2370         value *= 2;
2371         capturesize = ovector[value + 1] - ovector[value];
2372         memcpy(argsptr, subject + ovector[value], capturesize);
2373         argsptr += capturesize;
2374         }
2375       break;
2376 
2377       case DDE_CHAR:
2378       if (value == STDOUT_NL_CODE)
2379         {
2380         memcpy(argsptr, STDOUT_NL, STDOUT_NL_LEN);
2381         argsptr += STDOUT_NL_LEN;
2382         }
2383       else if (utf && value > 127)
2384         {
2385         int n = ord2utf8(value);
2386         memcpy(argsptr, utf8_buffer, n);
2387         argsptr += n;
2388         }
2389       else
2390         {
2391         *argsptr++ = value;
2392         }
2393       break;
2394 
2395       default:         /* Even though this should not occur, the string having */
2396       case DDE_ERROR:  /* been checked above, we need to include the free() */
2397       free(args);      /* calls so that source checkers do not complain. */
2398       free(argsvector);
2399       return 0;
2400       }
2401 
2402     length -= (string - begin);
2403     }
2404 
2405   else *argsptr++ = *string;
2406 
2407   /* Advance along the string */
2408 
2409   string++;
2410   length--;
2411   }
2412 
2413 *argsptr++ = '\0';
2414 *argsvectorptr = NULL;
2415 
2416 /* Running an external command is system-dependent. Handle Windows and VMS as
2417 necessary, otherwise assume fork(). */
2418 
2419 #ifdef WIN32
2420 result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector);
2421 
2422 #elif defined __VMS
2423   {
2424   char cmdbuf[500];
2425   short i = 0;
2426   int flags = CLI$M_NOCLISYM|CLI$M_NOLOGNAM|CLI$M_NOKEYPAD, status, retstat;
2427   $DESCRIPTOR(cmd, cmdbuf);
2428 
2429   cmdbuf[0] = 0;
2430   while (argsvector[i])
2431   {
2432     strcat(cmdbuf, argsvector[i]);
2433     strcat(cmdbuf, " ");
2434     i++;
2435   }
2436   cmd.dsc$w_length = strlen(cmdbuf) - 1;
2437   status = lib$spawn(&cmd, 0,0, &flags, 0,0, &retstat);
2438   if (!(status & 1)) result = 0;
2439   else result = retstat & 1 ? 0 : 1;
2440   }
2441 
2442 #else  /* Neither Windows nor VMS */
2443 pid = fork();
2444 if (pid == 0)
2445   {
2446   (void)execv(argsvector[0], argsvector);
2447   /* Control gets here if there is an error, e.g. a non-existent program */
2448   exit(1);
2449   }
2450 else if (pid > 0)
2451   (void)waitpid(pid, &result, 0);
2452 #endif  /* End Windows/VMS/other handling */
2453 
2454 free(args);
2455 free(argsvector);
2456 
2457 /* Currently negative return values are not supported, only zero (match
2458 continues) or non-zero (match fails). */
2459 
2460 return result != 0;
2461 #endif  /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
2462 }
2463 #endif  /* SUPPORT_PCRE2GREP_CALLOUT */
2464 
2465 
2466 
2467 /*************************************************
2468 *     Read a portion of the file into buffer     *
2469 *************************************************/
2470 
2471 static int
fill_buffer(void * handle,int frtype,char * buffer,int length,BOOL input_line_buffered)2472 fill_buffer(void *handle, int frtype, char *buffer, int length,
2473   BOOL input_line_buffered)
2474 {
2475 (void)frtype;  /* Avoid warning when not used */
2476 
2477 #ifdef SUPPORT_LIBZ
2478 if (frtype == FR_LIBZ)
2479   return gzread((gzFile)handle, buffer, length);
2480 else
2481 #endif
2482 
2483 #ifdef SUPPORT_LIBBZ2
2484 if (frtype == FR_LIBBZ2)
2485   return BZ2_bzread((BZFILE *)handle, buffer, length);
2486 else
2487 #endif
2488 
2489 return (input_line_buffered ?
2490   read_one_line(buffer, length, (FILE *)handle) :
2491   fread(buffer, 1, length, (FILE *)handle));
2492 }
2493 
2494 
2495 
2496 /*************************************************
2497 *            Grep an individual file             *
2498 *************************************************/
2499 
2500 /* This is called from grep_or_recurse() below. It uses a buffer that is three
2501 times the value of bufthird. The matching point is never allowed to stray into
2502 the top third of the buffer, thus keeping more of the file available for
2503 context printing or for multiline scanning. For large files, the pointer will
2504 be in the middle third most of the time, so the bottom third is available for
2505 "before" context printing.
2506 
2507 Arguments:
2508   handle       the fopened FILE stream for a normal file
2509                the gzFile pointer when reading is via libz
2510                the BZFILE pointer when reading is via libbz2
2511   frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
2512   filename     the file name or NULL (for errors)
2513   printname    the file name if it is to be printed for each match
2514                or NULL if the file name is not to be printed
2515                it cannot be NULL if filenames[_nomatch]_only is set
2516 
2517 Returns:       0 if there was at least one match
2518                1 otherwise (no matches)
2519                2 if an overlong line is encountered
2520                3 if there is a read error on a .bz2 file
2521 */
2522 
2523 static int
pcre2grep(void * handle,int frtype,const char * filename,const char * printname)2524 pcre2grep(void *handle, int frtype, const char *filename, const char *printname)
2525 {
2526 int rc = 1;
2527 int filepos = 0;
2528 unsigned long int linenumber = 1;
2529 unsigned long int lastmatchnumber = 0;
2530 unsigned long int count = 0;
2531 long int count_matched_lines = 0;
2532 char *lastmatchrestart = main_buffer;
2533 char *ptr = main_buffer;
2534 char *endptr;
2535 PCRE2_SIZE bufflength;
2536 BOOL binary = FALSE;
2537 BOOL endhyphenpending = FALSE;
2538 BOOL lines_printed = FALSE;
2539 BOOL input_line_buffered = line_buffered;
2540 FILE *in = NULL;                    /* Ensure initialized */
2541 
2542 /* Do the first read into the start of the buffer and set up the pointer to end
2543 of what we have. In the case of libz, a non-zipped .gz file will be read as a
2544 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
2545 fail. */
2546 
2547 if (frtype != FR_LIBZ && frtype != FR_LIBBZ2)
2548   {
2549   in = (FILE *)handle;
2550   if (is_file_tty(in)) input_line_buffered = TRUE;
2551   }
2552 else input_line_buffered = FALSE;
2553 
2554 bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
2555   input_line_buffered);
2556 
2557 #ifdef SUPPORT_LIBBZ2
2558 if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2;   /* Gotcha: bufflength is PCRE2_SIZE */
2559 #endif
2560 
2561 endptr = main_buffer + bufflength;
2562 
2563 /* Unless binary-files=text, see if we have a binary file. This uses the same
2564 rule as GNU grep, namely, a search for a binary zero byte near the start of the
2565 file. However, when the newline convention is binary zero, we can't do this. */
2566 
2567 if (binary_files != BIN_TEXT)
2568   {
2569   if (endlinetype != PCRE2_NEWLINE_NUL)
2570     binary = memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength)
2571       != NULL;
2572   if (binary && binary_files == BIN_NOMATCH) return 1;
2573   }
2574 
2575 /* Loop while the current pointer is not at the end of the file. For large
2576 files, endptr will be at the end of the buffer when we are in the middle of the
2577 file, but ptr will never get there, because as soon as it gets over 2/3 of the
2578 way, the buffer is shifted left and re-filled. */
2579 
2580 while (ptr < endptr)
2581   {
2582   int endlinelength;
2583   int mrc = 0;
2584   unsigned int options = 0;
2585   BOOL match;
2586   BOOL line_matched = FALSE;
2587   char *t = ptr;
2588   PCRE2_SIZE length, linelength;
2589   PCRE2_SIZE startoffset = 0;
2590 
2591   /* If the -m option set a limit for the number of matched or non-matched
2592   lines, check it here. A limit of zero means that no matching is ever done.
2593   For stdin from a file, set the file position. */
2594 
2595   if (count_limit >= 0 && count_matched_lines >= count_limit)
2596     {
2597     if (frtype == FR_PLAIN && filename == stdin_name && !is_file_tty(handle))
2598       (void)fseek(handle, (long int)filepos, SEEK_SET);
2599     rc = (count_limit == 0)? 1 : 0;
2600     break;
2601     }
2602 
2603   /* At this point, ptr is at the start of a line. We need to find the length
2604   of the subject string to pass to pcre2_match(). In multiline mode, it is the
2605   length remainder of the data in the buffer. Otherwise, it is the length of
2606   the next line, excluding the terminating newline. After matching, we always
2607   advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
2608   option is used for compiling, so that any match is constrained to be in the
2609   first line. */
2610 
2611   t = end_of_line(t, endptr, &endlinelength);
2612   linelength = t - ptr - endlinelength;
2613   length = multiline? (PCRE2_SIZE)(endptr - ptr) : linelength;
2614 
2615   /* Check to see if the line we are looking at extends right to the very end
2616   of the buffer without a line terminator. This means the line is too long to
2617   handle at the current buffer size. Until the buffer reaches its maximum size,
2618   try doubling it and reading more data. */
2619 
2620   if (endlinelength == 0 && t == main_buffer + bufsize)
2621     {
2622     if (bufthird < max_bufthird)
2623       {
2624       char *new_buffer;
2625       int new_bufthird = 2*bufthird;
2626 
2627       if (new_bufthird > max_bufthird) new_bufthird = max_bufthird;
2628       new_buffer = (char *)malloc(3*new_bufthird);
2629 
2630       if (new_buffer == NULL)
2631         {
2632         fprintf(stderr,
2633           "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2634           "pcre2grep: not enough memory to increase the buffer size to %d\n",
2635           linenumber,
2636           (filename == NULL)? "" : " of file ",
2637           (filename == NULL)? "" : filename,
2638           new_bufthird);
2639         return 2;
2640         }
2641 
2642       /* Copy the data and adjust pointers to the new buffer location. */
2643 
2644       memcpy(new_buffer, main_buffer, bufsize);
2645       bufthird = new_bufthird;
2646       bufsize = 3*bufthird;
2647       ptr = new_buffer + (ptr - main_buffer);
2648       lastmatchrestart = new_buffer + (lastmatchrestart - main_buffer);
2649       free(main_buffer);
2650       main_buffer = new_buffer;
2651 
2652       /* Read more data into the buffer and then try to find the line ending
2653       again. */
2654 
2655       bufflength += fill_buffer(handle, frtype, main_buffer + bufflength,
2656         bufsize - bufflength, input_line_buffered);
2657       endptr = main_buffer + bufflength;
2658       continue;
2659       }
2660     else
2661       {
2662       fprintf(stderr,
2663         "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2664         "pcre2grep: the maximum buffer size is %d\n"
2665         "pcre2grep: use the --max-buffer-size option to change it\n",
2666         linenumber,
2667         (filename == NULL)? "" : " of file ",
2668         (filename == NULL)? "" : filename,
2669         bufthird);
2670       return 2;
2671       }
2672     }
2673 
2674   /* Extra processing for Jeffrey Friedl's debugging. */
2675 
2676 #ifdef JFRIEDL_DEBUG
2677   if (jfriedl_XT || jfriedl_XR)
2678   {
2679 #     include <sys/time.h>
2680 #     include <time.h>
2681       struct timeval start_time, end_time;
2682       struct timezone dummy;
2683       int i;
2684 
2685       if (jfriedl_XT)
2686       {
2687           unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
2688           const char *orig = ptr;
2689           ptr = malloc(newlen + 1);
2690           if (!ptr) {
2691                   printf("out of memory");
2692                   pcre2grep_exit(2);
2693           }
2694           endptr = ptr;
2695           strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
2696           for (i = 0; i < jfriedl_XT; i++) {
2697                   strncpy(endptr, orig,  length);
2698                   endptr += length;
2699           }
2700           strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
2701           length = newlen;
2702       }
2703 
2704       if (gettimeofday(&start_time, &dummy) != 0)
2705               perror("bad gettimeofday");
2706 
2707 
2708       for (i = 0; i < jfriedl_XR; i++)
2709           match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
2710               PCRE2_NOTEMPTY, offsets, offset_size) >= 0);
2711 
2712       if (gettimeofday(&end_time, &dummy) != 0)
2713               perror("bad gettimeofday");
2714 
2715       double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
2716                       -
2717                       (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
2718 
2719       printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
2720       return 0;
2721   }
2722 #endif
2723 
2724   /* We come back here after a match when only_matching_count is non-zero, in
2725   order to find any further matches in the same line. This applies to
2726   --only-matching, --file-offsets, and --line-offsets. */
2727 
2728   ONLY_MATCHING_RESTART:
2729 
2730   /* Run through all the patterns until one matches or there is an error other
2731   than NOMATCH. This code is in a subroutine so that it can be re-used for
2732   finding subsequent matches when colouring matched lines. After finding one
2733   match, set PCRE2_NOTEMPTY to disable any further matches of null strings in
2734   this line. */
2735 
2736   match = match_patterns(ptr, length, options, startoffset, &mrc);
2737   options = PCRE2_NOTEMPTY;
2738 
2739   /* If it's a match or a not-match (as required), do what's wanted. NOTE: Use
2740   only FWRITE_IGNORE() - which is just a packaged fwrite() that ignores its
2741   return code - to output data lines, so that binary zeroes are treated as just
2742   another data character. */
2743 
2744   if (match != invert)
2745     {
2746     BOOL hyphenprinted = FALSE;
2747 
2748     /* We've failed if we want a file that doesn't have any matches. */
2749 
2750     if (filenames == FN_NOMATCH_ONLY) return 1;
2751 
2752     /* Remember that this line matched (for counting matched lines) */
2753 
2754     line_matched = TRUE;
2755 
2756     /* If all we want is a yes/no answer, we can return immediately. */
2757 
2758     if (quiet) return 0;
2759 
2760     /* Just count if just counting is wanted. */
2761 
2762     else if (count_only || show_total_count) count++;
2763 
2764     /* When handling a binary file and binary-files==binary, the "binary"
2765     variable will be set true (it's false in all other cases). In this
2766     situation we just want to output the file name. No need to scan further. */
2767 
2768     else if (binary)
2769       {
2770       fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename);
2771       return 0;
2772       }
2773 
2774     /* Likewise, if all we want is a file name, there is no need to scan any
2775     more lines in the file. */
2776 
2777     else if (filenames == FN_MATCH_ONLY)
2778       {
2779       fprintf(stdout, "%s" STDOUT_NL, printname);
2780       return 0;
2781       }
2782 
2783     /* The --only-matching option prints just the substring that matched,
2784     and/or one or more captured portions of it, as long as these strings are
2785     not empty. The --file-offsets and --line-offsets options output offsets for
2786     the matching substring (all three set only_matching_count non-zero). None
2787     of these mutually exclusive options prints any context. Afterwards, adjust
2788     the start and then jump back to look for further matches in the same line.
2789     If we are in invert mode, however, nothing is printed and we do not restart
2790     - this could still be useful because the return code is set. */
2791 
2792     else if (only_matching_count != 0)
2793       {
2794       if (!invert)
2795         {
2796         PCRE2_SIZE oldstartoffset;
2797 
2798         if (printname != NULL) fprintf(stdout, "%s:", printname);
2799         if (number) fprintf(stdout, "%lu:", linenumber);
2800 
2801         /* Handle --line-offsets */
2802 
2803         if (line_offsets)
2804           fprintf(stdout, "%d,%d" STDOUT_NL, (int)(ptr + offsets[0] - ptr),
2805             (int)(offsets[1] - offsets[0]));
2806 
2807         /* Handle --file-offsets */
2808 
2809         else if (file_offsets)
2810           fprintf(stdout, "%d,%d" STDOUT_NL,
2811             (int)(filepos + ptr + offsets[0] - ptr),
2812             (int)(offsets[1] - offsets[0]));
2813 
2814         /* Handle --output (which has already been syntax checked) */
2815 
2816         else if (output_text != NULL)
2817           {
2818           if (display_output_text((PCRE2_SPTR)output_text, FALSE,
2819               (PCRE2_SPTR)ptr, offsets, mrc) || printname != NULL ||
2820               number)
2821             fprintf(stdout, STDOUT_NL);
2822           }
2823 
2824         /* Handle --only-matching, which may occur many times */
2825 
2826         else
2827           {
2828           BOOL printed = FALSE;
2829           omstr *om;
2830 
2831           for (om = only_matching; om != NULL; om = om->next)
2832             {
2833             int n = om->groupnum;
2834             if (n == 0 || n < mrc)
2835               {
2836               int plen = offsets[2*n + 1] - offsets[2*n];
2837               if (plen > 0)
2838                 {
2839                 if (printed && om_separator != NULL)
2840                   fprintf(stdout, "%s", om_separator);
2841                 print_match(ptr + offsets[n*2], plen);
2842                 printed = TRUE;
2843                 }
2844               }
2845             }
2846 
2847           if (printed || printname != NULL || number)
2848             fprintf(stdout, STDOUT_NL);
2849           }
2850 
2851         /* Prepare to repeat to find the next match in the line. */
2852 
2853         match = FALSE;
2854         if (line_buffered) fflush(stdout);
2855         rc = 0;                      /* Had some success */
2856 
2857         /* If the pattern contained a lookbehind that included \K, it is
2858         possible that the end of the match might be at or before the actual
2859         starting offset we have just used. In this case, start one character
2860         further on. */
2861 
2862         startoffset = offsets[1];    /* Restart after the match */
2863         oldstartoffset = pcre2_get_startchar(match_data);
2864         if (startoffset <= oldstartoffset)
2865           {
2866           if (startoffset >= length) goto END_ONE_MATCH;  /* Were at end */
2867           startoffset = oldstartoffset + 1;
2868           if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
2869           }
2870 
2871         /* If the current match ended past the end of the line (only possible
2872         in multiline mode), we must move on to the line in which it did end
2873         before searching for more matches. */
2874 
2875         while (startoffset > linelength)
2876           {
2877           ptr += linelength + endlinelength;
2878           filepos += (int)(linelength + endlinelength);
2879           linenumber++;
2880           startoffset -= (int)(linelength + endlinelength);
2881           t = end_of_line(ptr, endptr, &endlinelength);
2882           linelength = t - ptr - endlinelength;
2883           length = (PCRE2_SIZE)(endptr - ptr);
2884           }
2885 
2886         goto ONLY_MATCHING_RESTART;
2887         }
2888       }
2889 
2890     /* This is the default case when none of the above options is set. We print
2891     the matching lines(s), possibly preceded and/or followed by other lines of
2892     context. */
2893 
2894     else
2895       {
2896       lines_printed = TRUE;
2897 
2898       /* See if there is a requirement to print some "after" lines from a
2899       previous match. We never print any overlaps. */
2900 
2901       if (after_context > 0 && lastmatchnumber > 0)
2902         {
2903         int ellength;
2904         int linecount = 0;
2905         char *p = lastmatchrestart;
2906 
2907         while (p < ptr && linecount < after_context)
2908           {
2909           p = end_of_line(p, ptr, &ellength);
2910           linecount++;
2911           }
2912 
2913         /* It is important to advance lastmatchrestart during this printing so
2914         that it interacts correctly with any "before" printing below. Print
2915         each line's data using fwrite() in case there are binary zeroes. */
2916 
2917         while (lastmatchrestart < p)
2918           {
2919           char *pp = lastmatchrestart;
2920           if (printname != NULL) fprintf(stdout, "%s-", printname);
2921           if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
2922           pp = end_of_line(pp, endptr, &ellength);
2923           FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
2924           lastmatchrestart = pp;
2925           }
2926         if (lastmatchrestart != ptr) hyphenpending = TRUE;
2927         }
2928 
2929       /* If there were non-contiguous lines printed above, insert hyphens. */
2930 
2931       if (hyphenpending)
2932         {
2933         fprintf(stdout, "--" STDOUT_NL);
2934         hyphenpending = FALSE;
2935         hyphenprinted = TRUE;
2936         }
2937 
2938       /* See if there is a requirement to print some "before" lines for this
2939       match. Again, don't print overlaps. */
2940 
2941       if (before_context > 0)
2942         {
2943         int linecount = 0;
2944         char *p = ptr;
2945 
2946         while (p > main_buffer &&
2947                (lastmatchnumber == 0 || p > lastmatchrestart) &&
2948                linecount < before_context)
2949           {
2950           linecount++;
2951           p = previous_line(p, main_buffer);
2952           }
2953 
2954         if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
2955           fprintf(stdout, "--" STDOUT_NL);
2956 
2957         while (p < ptr)
2958           {
2959           int ellength;
2960           char *pp = p;
2961           if (printname != NULL) fprintf(stdout, "%s-", printname);
2962           if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
2963           pp = end_of_line(pp, endptr, &ellength);
2964           FWRITE_IGNORE(p, 1, pp - p, stdout);
2965           p = pp;
2966           }
2967         }
2968 
2969       /* Now print the matching line(s); ensure we set hyphenpending at the end
2970       of the file if any context lines are being output. */
2971 
2972       if (after_context > 0 || before_context > 0)
2973         endhyphenpending = TRUE;
2974 
2975       if (printname != NULL) fprintf(stdout, "%s:", printname);
2976       if (number) fprintf(stdout, "%lu:", linenumber);
2977 
2978       /* This extra option, for Jeffrey Friedl's debugging requirements,
2979       replaces the matched string, or a specific captured string if it exists,
2980       with X. When this happens, colouring is ignored. */
2981 
2982 #ifdef JFRIEDL_DEBUG
2983       if (S_arg >= 0 && S_arg < mrc)
2984         {
2985         int first = S_arg * 2;
2986         int last  = first + 1;
2987         FWRITE_IGNORE(ptr, 1, offsets[first], stdout);
2988         fprintf(stdout, "X");
2989         FWRITE_IGNORE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
2990         }
2991       else
2992 #endif
2993 
2994       /* In multiline mode, or if colouring, we have to split the line(s) up
2995       and search for further matches, but not of course if the line is a
2996       non-match. In multiline mode this is necessary in case there is another
2997       match that spans the end of the current line. When colouring we want to
2998       colour all matches. */
2999 
3000       if ((multiline || do_colour) && !invert)
3001         {
3002         int plength;
3003         PCRE2_SIZE endprevious;
3004 
3005         /* The use of \K may make the end offset earlier than the start. In
3006         this situation, swap them round. */
3007 
3008         if (offsets[0] > offsets[1])
3009           {
3010           PCRE2_SIZE temp = offsets[0];
3011           offsets[0] = offsets[1];
3012           offsets[1] = temp;
3013           }
3014 
3015         FWRITE_IGNORE(ptr, 1, offsets[0], stdout);
3016         print_match(ptr + offsets[0], offsets[1] - offsets[0]);
3017 
3018         for (;;)
3019           {
3020           PCRE2_SIZE oldstartoffset = pcre2_get_startchar(match_data);
3021 
3022           endprevious = offsets[1];
3023           startoffset = endprevious;  /* Advance after previous match. */
3024 
3025           /* If the pattern contained a lookbehind that included \K, it is
3026           possible that the end of the match might be at or before the actual
3027           starting offset we have just used. In this case, start one character
3028           further on. */
3029 
3030           if (startoffset <= oldstartoffset)
3031             {
3032             startoffset = oldstartoffset + 1;
3033             if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
3034             }
3035 
3036           /* If the current match ended past the end of the line (only possible
3037           in multiline mode), we must move on to the line in which it did end
3038           before searching for more matches. Because the PCRE2_FIRSTLINE option
3039           is set, the start of the match will always be before the first
3040           newline sequence. */
3041 
3042           while (startoffset > linelength + endlinelength)
3043             {
3044             ptr += linelength + endlinelength;
3045             filepos += (int)(linelength + endlinelength);
3046             linenumber++;
3047             startoffset -= (int)(linelength + endlinelength);
3048             endprevious -= (int)(linelength + endlinelength);
3049             t = end_of_line(ptr, endptr, &endlinelength);
3050             linelength = t - ptr - endlinelength;
3051             length = (PCRE2_SIZE)(endptr - ptr);
3052             }
3053 
3054           /* If startoffset is at the exact end of the line it means this
3055           complete line was the final part of the match, so there is nothing
3056           more to do. */
3057 
3058           if (startoffset == linelength + endlinelength) break;
3059 
3060           /* Otherwise, run a match from within the final line, and if found,
3061           loop for any that may follow. */
3062 
3063           if (!match_patterns(ptr, length, options, startoffset, &mrc)) break;
3064 
3065           /* The use of \K may make the end offset earlier than the start. In
3066           this situation, swap them round. */
3067 
3068           if (offsets[0] > offsets[1])
3069             {
3070             PCRE2_SIZE temp = offsets[0];
3071             offsets[0] = offsets[1];
3072             offsets[1] = temp;
3073             }
3074 
3075           FWRITE_IGNORE(ptr + endprevious, 1, offsets[0] - endprevious, stdout);
3076           print_match(ptr + offsets[0], offsets[1] - offsets[0]);
3077           }
3078 
3079         /* In multiline mode, we may have already printed the complete line
3080         and its line-ending characters (if they matched the pattern), so there
3081         may be no more to print. */
3082 
3083         plength = (int)((linelength + endlinelength) - endprevious);
3084         if (plength > 0) FWRITE_IGNORE(ptr + endprevious, 1, plength, stdout);
3085         }
3086 
3087       /* Not colouring or multiline; no need to search for further matches. */
3088 
3089       else FWRITE_IGNORE(ptr, 1, linelength + endlinelength, stdout);
3090       }
3091 
3092     /* End of doing what has to be done for a match. If --line-buffered was
3093     given, flush the output. */
3094 
3095     if (line_buffered) fflush(stdout);
3096     rc = 0;    /* Had some success */
3097 
3098     /* Remember where the last match happened for after_context. We remember
3099     where we are about to restart, and that line's number. */
3100 
3101     lastmatchrestart = ptr + linelength + endlinelength;
3102     lastmatchnumber = linenumber + 1;
3103 
3104     /* If a line was printed and we are now at the end of the file and the last
3105     line had no newline, output one. */
3106 
3107     if (lines_printed && lastmatchrestart >= endptr && endlinelength == 0)
3108       write_final_newline();
3109     }
3110 
3111   /* For a match in multiline inverted mode (which of course did not cause
3112   anything to be printed), we have to move on to the end of the match before
3113   proceeding. */
3114 
3115   if (multiline && invert && match)
3116     {
3117     int ellength;
3118     char *endmatch = ptr + offsets[1];
3119     t = ptr;
3120     while (t < endmatch)
3121       {
3122       t = end_of_line(t, endptr, &ellength);
3123       if (t <= endmatch) linenumber++; else break;
3124       }
3125     endmatch = end_of_line(endmatch, endptr, &ellength);
3126     linelength = endmatch - ptr - ellength;
3127     }
3128 
3129   /* Advance to after the newline and increment the line number. The file
3130   offset to the current line is maintained in filepos. */
3131 
3132   END_ONE_MATCH:
3133   ptr += linelength + endlinelength;
3134   filepos += (int)(linelength + endlinelength);
3135   linenumber++;
3136 
3137   /* If there was at least one match (or a non-match, as required) in the line,
3138   increment the count for the -m option. */
3139 
3140   if (line_matched) count_matched_lines++;
3141 
3142   /* If input is line buffered, and the buffer is not yet full, read another
3143   line and add it into the buffer. */
3144 
3145   if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize)
3146     {
3147     int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
3148     bufflength += add;
3149     endptr += add;
3150     }
3151 
3152   /* If we haven't yet reached the end of the file (the buffer is full), and
3153   the current point is in the top 1/3 of the buffer, slide the buffer down by
3154   1/3 and refill it. Before we do this, if some unprinted "after" lines are
3155   about to be lost, print them. */
3156 
3157   if (bufflength >= (PCRE2_SIZE)bufsize && ptr > main_buffer + 2*bufthird)
3158     {
3159     if (after_context > 0 &&
3160         lastmatchnumber > 0 &&
3161         lastmatchrestart < main_buffer + bufthird)
3162       {
3163       do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3164       lastmatchnumber = 0;  /* Indicates no after lines pending */
3165       }
3166 
3167     /* Now do the shuffle */
3168 
3169     (void)memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
3170     ptr -= bufthird;
3171 
3172     bufflength = 2*bufthird + fill_buffer(handle, frtype,
3173       main_buffer + 2*bufthird, bufthird, input_line_buffered);
3174     endptr = main_buffer + bufflength;
3175 
3176     /* Adjust any last match point */
3177 
3178     if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
3179     }
3180   }     /* Loop through the whole file */
3181 
3182 /* End of file; print final "after" lines if wanted; do_after_lines sets
3183 hyphenpending if it prints something. */
3184 
3185 if (only_matching_count == 0 && !(count_only|show_total_count))
3186   {
3187   do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
3188   hyphenpending |= endhyphenpending;
3189   }
3190 
3191 /* Print the file name if we are looking for those without matches and there
3192 were none. If we found a match, we won't have got this far. */
3193 
3194 if (filenames == FN_NOMATCH_ONLY)
3195   {
3196   fprintf(stdout, "%s" STDOUT_NL, printname);
3197   return 0;
3198   }
3199 
3200 /* Print the match count if wanted */
3201 
3202 if (count_only && !quiet)
3203   {
3204   if (count > 0 || !omit_zero_count)
3205     {
3206     if (printname != NULL && filenames != FN_NONE)
3207       fprintf(stdout, "%s:", printname);
3208     fprintf(stdout, "%lu" STDOUT_NL, count);
3209     counts_printed++;
3210     }
3211   }
3212 
3213 total_count += count;   /* Can be set without count_only */
3214 return rc;
3215 }
3216 
3217 
3218 
3219 /*************************************************
3220 *     Grep a file or recurse into a directory    *
3221 *************************************************/
3222 
3223 /* Given a path name, if it's a directory, scan all the files if we are
3224 recursing; if it's a file, grep it.
3225 
3226 Arguments:
3227   pathname          the path to investigate
3228   dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
3229   only_one_at_top   TRUE if the path is the only one at toplevel
3230 
3231 Returns:  -1 the file/directory was skipped
3232            0 if there was at least one match
3233            1 if there were no matches
3234            2 there was some kind of error
3235 
3236 However, file opening failures are suppressed if "silent" is set.
3237 */
3238 
3239 static int
grep_or_recurse(char * pathname,BOOL dir_recurse,BOOL only_one_at_top)3240 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
3241 {
3242 int rc = 1;
3243 int frtype;
3244 void *handle;
3245 char *lastcomp;
3246 FILE *in = NULL;           /* Ensure initialized */
3247 
3248 #ifdef SUPPORT_LIBZ
3249 gzFile ingz = NULL;
3250 #endif
3251 
3252 #ifdef SUPPORT_LIBBZ2
3253 BZFILE *inbz2 = NULL;
3254 #endif
3255 
3256 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3257 int pathlen;
3258 #endif
3259 
3260 #if defined NATIVE_ZOS
3261 int zos_type;
3262 FILE *zos_test_file;
3263 #endif
3264 
3265 /* If the file name is "-" we scan stdin */
3266 
3267 if (strcmp(pathname, "-") == 0)
3268   {
3269   return pcre2grep(stdin, FR_PLAIN, stdin_name,
3270     (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
3271       stdin_name : NULL);
3272   }
3273 
3274 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
3275 directories, whereas --include and --exclude apply to everything else. The test
3276 is against the final component of the path. */
3277 
3278 lastcomp = strrchr(pathname, FILESEP);
3279 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
3280 
3281 /* If the file is a directory, skip if not recursing or if explicitly excluded.
3282 Otherwise, scan the directory and recurse for each path within it. The scanning
3283 code is localized so it can be made system-specific. */
3284 
3285 
3286 /* For z/OS, determine the file type. */
3287 
3288 #if defined NATIVE_ZOS
3289 zos_test_file =  fopen(pathname,"rb");
3290 
3291 if (zos_test_file == NULL)
3292    {
3293    if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n",
3294      pathname, strerror(errno));
3295    return -1;
3296    }
3297 zos_type = identifyzosfiletype (zos_test_file);
3298 fclose (zos_test_file);
3299 
3300 /* Handle a PDS in separate code */
3301 
3302 if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
3303    {
3304    return travelonpdsdir (pathname, only_one_at_top);
3305    }
3306 
3307 /* Deal with regular files in the normal way below. These types are:
3308    zos_type == __ZOS_PDS_MEMBER
3309    zos_type == __ZOS_PS
3310    zos_type == __ZOS_VSAM_KSDS
3311    zos_type == __ZOS_VSAM_ESDS
3312    zos_type == __ZOS_VSAM_RRDS
3313 */
3314 
3315 /* Handle a z/OS directory using common code. */
3316 
3317 else if (zos_type == __ZOS_HFS)
3318  {
3319 #endif  /* NATIVE_ZOS */
3320 
3321 
3322 /* Handle directories: common code for all OS */
3323 
3324 if (isdirectory(pathname))
3325   {
3326   if (dee_action == dee_SKIP ||
3327       !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
3328     return -1;
3329 
3330   if (dee_action == dee_RECURSE)
3331     {
3332     char childpath[FNBUFSIZ];
3333     char *nextfile;
3334     directory_type *dir = opendirectory(pathname);
3335 
3336     if (dir == NULL)
3337       {
3338       if (!silent)
3339         fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname,
3340           strerror(errno));
3341       return 2;
3342       }
3343 
3344     while ((nextfile = readdirectory(dir)) != NULL)
3345       {
3346       int frc;
3347       int fnlength = strlen(pathname) + strlen(nextfile) + 2;
3348       if (fnlength > FNBUFSIZ)
3349         {
3350         fprintf(stderr, "pcre2grep: recursive filename is too long\n");
3351         rc = 2;
3352         break;
3353         }
3354       sprintf(childpath, "%s%c%s", pathname, FILESEP, nextfile);
3355 
3356       /* If the realpath() function is available, we can try to prevent endless
3357       recursion caused by a symlink pointing to a parent directory (GitHub
3358       issue #2 (old Bugzilla #2794). Original patch from Thomas Tempelmann.
3359       Modified to avoid using strlcat() because that isn't a standard C
3360       function, and also modified not to copy back the fully resolved path,
3361       because that affects the output from pcre2grep. */
3362 
3363 #ifdef HAVE_REALPATH
3364       {
3365       char resolvedpath[PATH_MAX];
3366       BOOL isSame;
3367       size_t rlen;
3368       if (realpath(childpath, resolvedpath) == NULL)
3369         continue;     /* This path is invalid - we can skip processing this */
3370       isSame = strcmp(pathname, resolvedpath) == 0;
3371       if (isSame) continue;    /* We have a recursion */
3372       rlen = strlen(resolvedpath);
3373       if (rlen++ < sizeof(resolvedpath) - 3)
3374         {
3375         BOOL contained;
3376         strcat(resolvedpath, "/");
3377         contained = strncmp(pathname, resolvedpath, rlen) == 0;
3378         if (contained) continue;    /* We have a recursion */
3379         }
3380       }
3381 #endif  /* HAVE_REALPATH */
3382 
3383       frc = grep_or_recurse(childpath, dir_recurse, FALSE);
3384       if (frc > 1) rc = frc;
3385        else if (frc == 0 && rc == 1) rc = 0;
3386       }
3387 
3388     closedirectory(dir);
3389     return rc;
3390     }
3391   }
3392 
3393 #ifdef WIN32
3394 if (iswild(pathname))
3395   {
3396   char buffer[1024];
3397   char *nextfile;
3398   char *name;
3399   directory_type *dir = opendirectory(pathname);
3400 
3401   if (dir == NULL)
3402     return 0;
3403 
3404   for (nextfile = name = pathname; *nextfile != 0; nextfile++)
3405     if (*nextfile == '/' || *nextfile == '\\')
3406       name = nextfile + 1;
3407   *name = 0;
3408 
3409   while ((nextfile = readdirectory(dir)) != NULL)
3410     {
3411     int frc;
3412     sprintf(buffer, "%.512s%.128s", pathname, nextfile);
3413     frc = grep_or_recurse(buffer, dir_recurse, FALSE);
3414     if (frc > 1) rc = frc;
3415      else if (frc == 0 && rc == 1) rc = 0;
3416     }
3417 
3418   closedirectory(dir);
3419   return rc;
3420   }
3421 #endif
3422 
3423 #if defined NATIVE_ZOS
3424  }
3425 #endif
3426 
3427 /* If the file is not a directory, check for a regular file, and if it is not,
3428 skip it if that's been requested. Otherwise, check for an explicit inclusion or
3429 exclusion. */
3430 
3431 else if (
3432 #if defined NATIVE_ZOS
3433         (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
3434 #else  /* all other OS */
3435         (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
3436 #endif
3437         !test_incexc(lastcomp, include_patterns, exclude_patterns))
3438   return -1;  /* File skipped */
3439 
3440 /* Control reaches here if we have a regular file, or if we have a directory
3441 and recursion or skipping was not requested, or if we have anything else and
3442 skipping was not requested. The scan proceeds. If this is the first and only
3443 argument at top level, we don't show the file name, unless we are only showing
3444 the file name, or the filename was forced (-H). */
3445 
3446 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3447 pathlen = (int)(strlen(pathname));
3448 #endif
3449 
3450 /* Open using zlib if it is supported and the file name ends with .gz. */
3451 
3452 #ifdef SUPPORT_LIBZ
3453 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
3454   {
3455   ingz = gzopen(pathname, "rb");
3456   if (ingz == NULL)
3457     {
3458     if (!silent)
3459       fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3460         strerror(errno));
3461     return 2;
3462     }
3463   handle = (void *)ingz;
3464   frtype = FR_LIBZ;
3465   }
3466 else
3467 #endif
3468 
3469 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
3470 
3471 #ifdef SUPPORT_LIBBZ2
3472 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
3473   {
3474   inbz2 = BZ2_bzopen(pathname, "rb");
3475   handle = (void *)inbz2;
3476   frtype = FR_LIBBZ2;
3477   }
3478 else
3479 #endif
3480 
3481 /* Otherwise use plain fopen(). The label is so that we can come back here if
3482 an attempt to read a .bz2 file indicates that it really is a plain file. */
3483 
3484 #ifdef SUPPORT_LIBBZ2
3485 PLAIN_FILE:
3486 #endif
3487   {
3488   in = fopen(pathname, "rb");
3489   handle = (void *)in;
3490   frtype = FR_PLAIN;
3491   }
3492 
3493 /* All the opening methods return errno when they fail. */
3494 
3495 if (handle == NULL)
3496   {
3497   if (!silent)
3498     fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3499       strerror(errno));
3500   return 2;
3501   }
3502 
3503 /* Now grep the file */
3504 
3505 rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
3506   (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
3507 
3508 /* Close in an appropriate manner. */
3509 
3510 #ifdef SUPPORT_LIBZ
3511 if (frtype == FR_LIBZ)
3512   gzclose(ingz);
3513 else
3514 #endif
3515 
3516 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
3517 read failed. If the error indicates that the file isn't in fact bzipped, try
3518 again as a normal file. */
3519 
3520 #ifdef SUPPORT_LIBBZ2
3521 if (frtype == FR_LIBBZ2)
3522   {
3523   if (rc == 3)
3524     {
3525     int errnum;
3526     const char *err = BZ2_bzerror(inbz2, &errnum);
3527     if (errnum == BZ_DATA_ERROR_MAGIC)
3528       {
3529       BZ2_bzclose(inbz2);
3530       goto PLAIN_FILE;
3531       }
3532     else if (!silent)
3533       fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n",
3534         pathname, err);
3535     rc = 2;    /* The normal "something went wrong" code */
3536     }
3537   BZ2_bzclose(inbz2);
3538   }
3539 else
3540 #endif
3541 
3542 /* Normal file close */
3543 
3544 fclose(in);
3545 
3546 /* Pass back the yield from pcre2grep(). */
3547 
3548 return rc;
3549 }
3550 
3551 
3552 
3553 /*************************************************
3554 *          Handle a no-data option               *
3555 *************************************************/
3556 
3557 static int
handle_option(int letter,int options)3558 handle_option(int letter, int options)
3559 {
3560 switch(letter)
3561   {
3562   case N_FOFFSETS: file_offsets = TRUE; break;
3563   case N_HELP: help(); pcre2grep_exit(0); break; /* Stops compiler warning */
3564   case N_LBUFFER: line_buffered = TRUE; break;
3565   case N_LOFFSETS: line_offsets = number = TRUE; break;
3566   case N_NOJIT: use_jit = FALSE; break;
3567   case N_ALLABSK: extra_options |= PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK; break;
3568   case 'a': binary_files = BIN_TEXT; break;
3569   case 'c': count_only = TRUE; break;
3570   case 'F': options |= PCRE2_LITERAL; break;
3571   case 'H': filenames = FN_FORCE; break;
3572   case 'I': binary_files = BIN_NOMATCH; break;
3573   case 'h': filenames = FN_NONE; break;
3574   case 'i': options |= PCRE2_CASELESS; break;
3575   case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
3576   case 'L': filenames = FN_NOMATCH_ONLY; break;
3577   case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break;
3578   case 'n': number = TRUE; break;
3579 
3580   case 'o':
3581   only_matching_last = add_number(0, only_matching_last);
3582   if (only_matching == NULL) only_matching = only_matching_last;
3583   break;
3584 
3585   case 'q': quiet = TRUE; break;
3586   case 'r': dee_action = dee_RECURSE; break;
3587   case 's': silent = TRUE; break;
3588   case 't': show_total_count = TRUE; break;
3589   case 'u': options |= PCRE2_UTF; utf = TRUE; break;
3590   case 'U': options |= PCRE2_UTF|PCRE2_MATCH_INVALID_UTF; utf = TRUE; break;
3591   case 'v': invert = TRUE; break;
3592   case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
3593   case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
3594 
3595   case 'V':
3596     {
3597     unsigned char buffer[128];
3598     (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
3599     fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer);
3600     }
3601   pcre2grep_exit(0);
3602   break;
3603 
3604   default:
3605   fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
3606   pcre2grep_exit(usage(2));
3607   }
3608 
3609 return options;
3610 }
3611 
3612 
3613 
3614 /*************************************************
3615 *          Construct printed ordinal             *
3616 *************************************************/
3617 
3618 /* This turns a number into "1st", "3rd", etc. */
3619 
3620 static char *
ordin(int n)3621 ordin(int n)
3622 {
3623 static char buffer[14];
3624 char *p = buffer;
3625 sprintf(p, "%d", n);
3626 while (*p != 0) p++;
3627 n %= 100;
3628 if (n >= 11 && n <= 13) n = 0;
3629 switch (n%10)
3630   {
3631   case 1: strcpy(p, "st"); break;
3632   case 2: strcpy(p, "nd"); break;
3633   case 3: strcpy(p, "rd"); break;
3634   default: strcpy(p, "th"); break;
3635   }
3636 return buffer;
3637 }
3638 
3639 
3640 
3641 /*************************************************
3642 *          Compile a single pattern              *
3643 *************************************************/
3644 
3645 /* Do nothing if the pattern has already been compiled. This is the case for
3646 include/exclude patterns read from a file.
3647 
3648 When the -F option has been used, each "pattern" may be a list of strings,
3649 separated by line breaks. They will be matched literally. We split such a
3650 string and compile the first substring, inserting an additional block into the
3651 pattern chain.
3652 
3653 Arguments:
3654   p              points to the pattern block
3655   options        the PCRE options
3656   fromfile       TRUE if the pattern was read from a file
3657   fromtext       file name or identifying text (e.g. "include")
3658   count          0 if this is the only command line pattern, or
3659                  number of the command line pattern, or
3660                  linenumber for a pattern from a file
3661 
3662 Returns:         TRUE on success, FALSE after an error
3663 */
3664 
3665 static BOOL
compile_pattern(patstr * p,int options,int fromfile,const char * fromtext,int count)3666 compile_pattern(patstr *p, int options, int fromfile, const char *fromtext,
3667   int count)
3668 {
3669 char *ps;
3670 int errcode;
3671 PCRE2_SIZE patlen, erroffset;
3672 PCRE2_UCHAR errmessbuffer[ERRBUFSIZ];
3673 
3674 if (p->compiled != NULL) return TRUE;
3675 ps = p->string;
3676 patlen = p->length;
3677 
3678 if ((options & PCRE2_LITERAL) != 0)
3679   {
3680   int ellength;
3681   char *eop = ps + patlen;
3682   char *pe = end_of_line(ps, eop, &ellength);
3683 
3684   if (ellength != 0)
3685     {
3686     patlen = pe - ps - ellength;
3687     if (add_pattern(pe, p->length-patlen-ellength, p) == NULL) return FALSE;
3688     }
3689   }
3690 
3691 p->compiled = pcre2_compile((PCRE2_SPTR)ps, patlen, options, &errcode,
3692   &erroffset, compile_context);
3693 
3694 /* Handle successful compile. Try JIT-compiling if supported and enabled. We
3695 ignore any JIT compiler errors, relying falling back to interpreting if
3696 anything goes wrong with JIT. */
3697 
3698 if (p->compiled != NULL)
3699   {
3700 #ifdef SUPPORT_PCRE2GREP_JIT
3701   if (use_jit) (void)pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE);
3702 #endif
3703   return TRUE;
3704   }
3705 
3706 /* Handle compile errors */
3707 
3708 if (erroffset > patlen) erroffset = patlen;
3709 pcre2_get_error_message(errcode, errmessbuffer, sizeof(errmessbuffer));
3710 
3711 if (fromfile)
3712   {
3713   fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
3714     "at offset %d: %s\n", count, fromtext, (int)erroffset, errmessbuffer);
3715   }
3716 else
3717   {
3718   if (count == 0)
3719     fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
3720       fromtext, (int)erroffset, errmessbuffer);
3721   else
3722     fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
3723       ordin(count), fromtext, (int)erroffset, errmessbuffer);
3724   }
3725 
3726 return FALSE;
3727 }
3728 
3729 
3730 
3731 /*************************************************
3732 *     Read and compile a file of patterns        *
3733 *************************************************/
3734 
3735 /* This is used for --filelist, --include-from, and --exclude-from.
3736 
3737 Arguments:
3738   name         the name of the file; "-" is stdin
3739   patptr       pointer to the pattern chain anchor
3740   patlastptr   pointer to the last pattern pointer
3741 
3742 Returns:       TRUE if all went well
3743 */
3744 
3745 static BOOL
read_pattern_file(char * name,patstr ** patptr,patstr ** patlastptr)3746 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr)
3747 {
3748 int linenumber = 0;
3749 PCRE2_SIZE patlen;
3750 FILE *f;
3751 const char *filename;
3752 char buffer[MAXPATLEN+20];
3753 
3754 if (strcmp(name, "-") == 0)
3755   {
3756   f = stdin;
3757   filename = stdin_name;
3758   }
3759 else
3760   {
3761   f = fopen(name, "r");
3762   if (f == NULL)
3763     {
3764     fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno));
3765     return FALSE;
3766     }
3767   filename = name;
3768   }
3769 
3770 while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0)
3771   {
3772   while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--;
3773   linenumber++;
3774   if (patlen == 0) continue;   /* Skip blank lines */
3775 
3776   /* Note: this call to add_pattern() puts a pointer to the local variable
3777   "buffer" into the pattern chain. However, that pointer is used only when
3778   compiling the pattern, which happens immediately below, so we flatten it
3779   afterwards, as a precaution against any later code trying to use it. */
3780 
3781   *patlastptr = add_pattern(buffer, patlen, *patlastptr);
3782   if (*patlastptr == NULL)
3783     {
3784     if (f != stdin) fclose(f);
3785     return FALSE;
3786     }
3787   if (*patptr == NULL) *patptr = *patlastptr;
3788 
3789   /* This loop is needed because compiling a "pattern" when -F is set may add
3790   on additional literal patterns if the original contains a newline. In the
3791   common case, it never will, because read_one_line() stops at a newline.
3792   However, the -N option can be used to give pcre2grep a different newline
3793   setting. */
3794 
3795   for(;;)
3796     {
3797     if (!compile_pattern(*patlastptr, pcre2_options, TRUE, filename,
3798         linenumber))
3799       {
3800       if (f != stdin) fclose(f);
3801       return FALSE;
3802       }
3803     (*patlastptr)->string = NULL;            /* Insurance */
3804     if ((*patlastptr)->next == NULL) break;
3805     *patlastptr = (*patlastptr)->next;
3806     }
3807   }
3808 
3809 if (f != stdin) fclose(f);
3810 return TRUE;
3811 }
3812 
3813 
3814 
3815 /*************************************************
3816 *                Main program                    *
3817 *************************************************/
3818 
3819 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
3820 
3821 int
main(int argc,char ** argv)3822 main(int argc, char **argv)
3823 {
3824 int i, j;
3825 int rc = 1;
3826 BOOL only_one_at_top;
3827 patstr *cp;
3828 fnstr *fn;
3829 omstr *om;
3830 const char *locale_from = "--locale";
3831 
3832 #ifdef SUPPORT_PCRE2GREP_JIT
3833 pcre2_jit_stack *jit_stack = NULL;
3834 #endif
3835 
3836 /* In Windows, stdout is set up as a text stream, which means that \n is
3837 converted to \r\n. This causes output lines that are copied from the input to
3838 change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
3839 that stdout is a binary stream. Note that this means all other output to stdout
3840 must use STDOUT_NL to terminate lines. */
3841 
3842 #ifdef WIN32
3843 _setmode(_fileno(stdout), _O_BINARY);
3844 #endif
3845 
3846 /* Process the options */
3847 
3848 for (i = 1; i < argc; i++)
3849   {
3850   option_item *op = NULL;
3851   char *option_data = (char *)"";    /* default to keep compiler happy */
3852   BOOL longop;
3853   BOOL longopwasequals = FALSE;
3854 
3855   if (argv[i][0] != '-') break;
3856 
3857   /* If we hit an argument that is just "-", it may be a reference to STDIN,
3858   but only if we have previously had -e or -f to define the patterns. */
3859 
3860   if (argv[i][1] == 0)
3861     {
3862     if (pattern_files != NULL || patterns != NULL) break;
3863       else pcre2grep_exit(usage(2));
3864     }
3865 
3866   /* Handle a long name option, or -- to terminate the options */
3867 
3868   if (argv[i][1] == '-')
3869     {
3870     char *arg = argv[i] + 2;
3871     char *argequals = strchr(arg, '=');
3872 
3873     if (*arg == 0)    /* -- terminates options */
3874       {
3875       i++;
3876       break;                /* out of the options-handling loop */
3877       }
3878 
3879     longop = TRUE;
3880 
3881     /* Some long options have data that follows after =, for example file=name.
3882     Some options have variations in the long name spelling: specifically, we
3883     allow "regexp" because GNU grep allows it, though I personally go along
3884     with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
3885     These options are entered in the table as "regex(p)". Options can be in
3886     both these categories. */
3887 
3888     for (op = optionlist; op->one_char != 0; op++)
3889       {
3890       char *opbra = strchr(op->long_name, '(');
3891       char *equals = strchr(op->long_name, '=');
3892 
3893       /* Handle options with only one spelling of the name */
3894 
3895       if (opbra == NULL)     /* Does not contain '(' */
3896         {
3897         if (equals == NULL)  /* Not thing=data case */
3898           {
3899           if (strcmp(arg, op->long_name) == 0) break;
3900           }
3901         else                 /* Special case xxx=data */
3902           {
3903           int oplen = (int)(equals - op->long_name);
3904           int arglen = (argequals == NULL)?
3905             (int)strlen(arg) : (int)(argequals - arg);
3906           if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
3907             {
3908             option_data = arg + arglen;
3909             if (*option_data == '=')
3910               {
3911               option_data++;
3912               longopwasequals = TRUE;
3913               }
3914             break;
3915             }
3916           }
3917         }
3918 
3919       /* Handle options with an alternate spelling of the name */
3920 
3921       else
3922         {
3923         char buff1[24];
3924         char buff2[24];
3925         int ret;
3926 
3927         int baselen = (int)(opbra - op->long_name);
3928         int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
3929         int arglen = (argequals == NULL || equals == NULL)?
3930           (int)strlen(arg) : (int)(argequals - arg);
3931 
3932         if ((ret = snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name),
3933              ret < 0 || ret > (int)sizeof(buff1)) ||
3934             (ret = snprintf(buff2, sizeof(buff2), "%s%.*s", buff1,
3935                      fulllen - baselen - 2, opbra + 1),
3936              ret < 0 || ret > (int)sizeof(buff2)))
3937           {
3938           fprintf(stderr, "pcre2grep: Buffer overflow when parsing %s option\n",
3939             op->long_name);
3940           pcre2grep_exit(2);
3941           }
3942 
3943         if (strncmp(arg, buff1, arglen) == 0 ||
3944            strncmp(arg, buff2, arglen) == 0)
3945           {
3946           if (equals != NULL && argequals != NULL)
3947             {
3948             option_data = argequals;
3949             if (*option_data == '=')
3950               {
3951               option_data++;
3952               longopwasequals = TRUE;
3953               }
3954             }
3955           break;
3956           }
3957         }
3958       }
3959 
3960     if (op->one_char == 0)
3961       {
3962       fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]);
3963       pcre2grep_exit(usage(2));
3964       }
3965     }
3966 
3967   /* Jeffrey Friedl's debugging harness uses these additional options which
3968   are not in the right form for putting in the option table because they use
3969   only one hyphen, yet are more than one character long. By putting them
3970   separately here, they will not get displayed as part of the help() output,
3971   but I don't think Jeffrey will care about that. */
3972 
3973 #ifdef JFRIEDL_DEBUG
3974   else if (strcmp(argv[i], "-pre") == 0) {
3975           jfriedl_prefix = argv[++i];
3976           continue;
3977   } else if (strcmp(argv[i], "-post") == 0) {
3978           jfriedl_postfix = argv[++i];
3979           continue;
3980   } else if (strcmp(argv[i], "-XT") == 0) {
3981           sscanf(argv[++i], "%d", &jfriedl_XT);
3982           continue;
3983   } else if (strcmp(argv[i], "-XR") == 0) {
3984           sscanf(argv[++i], "%d", &jfriedl_XR);
3985           continue;
3986   }
3987 #endif
3988 
3989 
3990   /* One-char options; many that have no data may be in a single argument; we
3991   continue till we hit the last one or one that needs data. */
3992 
3993   else
3994     {
3995     char *s = argv[i] + 1;
3996     longop = FALSE;
3997 
3998     while (*s != 0)
3999       {
4000       for (op = optionlist; op->one_char != 0; op++)
4001         {
4002         if (*s == op->one_char) break;
4003         }
4004       if (op->one_char == 0)
4005         {
4006         fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n",
4007           *s, argv[i]);
4008         pcre2grep_exit(usage(2));
4009         }
4010 
4011       option_data = s+1;
4012 
4013       /* Break out if this is the last character in the string; it's handled
4014       below like a single multi-char option. */
4015 
4016       if (*option_data == 0) break;
4017 
4018       /* Check for a single-character option that has data: OP_OP_NUMBER(S)
4019       are used for ones that either have a numerical number or defaults, i.e.
4020       the data is optional. If a digit follows, there is data; if not, carry on
4021       with other single-character options in the same string. */
4022 
4023       if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
4024         {
4025         if (isdigit((unsigned char)s[1])) break;
4026         }
4027       else   /* Check for an option with data */
4028         {
4029         if (op->type != OP_NODATA) break;
4030         }
4031 
4032       /* Handle a single-character option with no data, then loop for the
4033       next character in the string. */
4034 
4035       pcre2_options = handle_option(*s++, pcre2_options);
4036       }
4037     }
4038 
4039   /* At this point we should have op pointing to a matched option. If the type
4040   is NO_DATA, it means that there is no data, and the option might set
4041   something in the PCRE options. */
4042 
4043   if (op->type == OP_NODATA)
4044     {
4045     pcre2_options = handle_option(op->one_char, pcre2_options);
4046     continue;
4047     }
4048 
4049   /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
4050   either has a value or defaults to something. It cannot have data in a
4051   separate item. At the moment, the only such options are "colo(u)r",
4052   "only-matching", and Jeffrey Friedl's special -S debugging option. */
4053 
4054   if (*option_data == 0 &&
4055       (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
4056        op->type == OP_OP_NUMBERS))
4057     {
4058     switch (op->one_char)
4059       {
4060       case N_COLOUR:
4061       colour_option = "auto";
4062       break;
4063 
4064       case 'o':
4065       only_matching_last = add_number(0, only_matching_last);
4066       if (only_matching == NULL) only_matching = only_matching_last;
4067       break;
4068 
4069 #ifdef JFRIEDL_DEBUG
4070       case 'S':
4071       S_arg = 0;
4072       break;
4073 #endif
4074       }
4075     continue;
4076     }
4077 
4078   /* Otherwise, find the data string for the option. */
4079 
4080   if (*option_data == 0)
4081     {
4082     if (i >= argc - 1 || longopwasequals)
4083       {
4084       fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]);
4085       pcre2grep_exit(usage(2));
4086       }
4087     option_data = argv[++i];
4088     }
4089 
4090   /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
4091   added to a chain of numbers. */
4092 
4093   if (op->type == OP_OP_NUMBERS)
4094     {
4095     unsigned long int n = decode_number(option_data, op, longop);
4096     omdatastr *omd = (omdatastr *)op->dataptr;
4097     *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
4098     if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
4099     }
4100 
4101   /* If the option type is OP_PATLIST, it's the -e option, or one of the
4102   include/exclude options, which can be called multiple times to create lists
4103   of patterns. */
4104 
4105   else if (op->type == OP_PATLIST)
4106     {
4107     patdatastr *pd = (patdatastr *)op->dataptr;
4108     *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data),
4109       *(pd->lastptr));
4110     if (*(pd->lastptr) == NULL) goto EXIT2;
4111     if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
4112     }
4113 
4114   /* If the option type is OP_FILELIST, it's one of the options that names a
4115   file. */
4116 
4117   else if (op->type == OP_FILELIST)
4118     {
4119     fndatastr *fd = (fndatastr *)op->dataptr;
4120     fn = (fnstr *)malloc(sizeof(fnstr));
4121     if (fn == NULL)
4122       {
4123       fprintf(stderr, "pcre2grep: malloc failed\n");
4124       goto EXIT2;
4125       }
4126     fn->next = NULL;
4127     fn->name = option_data;
4128     if (*(fd->anchor) == NULL)
4129       *(fd->anchor) = fn;
4130     else
4131       (*(fd->lastptr))->next = fn;
4132     *(fd->lastptr) = fn;
4133     }
4134 
4135   /* Handle OP_BINARY_FILES */
4136 
4137   else if (op->type == OP_BINFILES)
4138     {
4139     if (strcmp(option_data, "binary") == 0)
4140       binary_files = BIN_BINARY;
4141     else if (strcmp(option_data, "without-match") == 0)
4142       binary_files = BIN_NOMATCH;
4143     else if (strcmp(option_data, "text") == 0)
4144       binary_files = BIN_TEXT;
4145     else
4146       {
4147       fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n",
4148         option_data);
4149       pcre2grep_exit(usage(2));
4150       }
4151     }
4152 
4153   /* Otherwise, deal with a single string or numeric data value. */
4154 
4155   else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
4156            op->type != OP_OP_NUMBER && op->type != OP_SIZE)
4157     {
4158     *((char **)op->dataptr) = option_data;
4159     }
4160   else
4161     {
4162     unsigned long int n = decode_number(option_data, op, longop);
4163     if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
4164       else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n;
4165       else *((int *)op->dataptr) = n;
4166     }
4167   }
4168 
4169 /* Options have been decoded. If -C was used, its value is used as a default
4170 for -A and -B. */
4171 
4172 if (both_context > 0)
4173   {
4174   if (after_context == 0) after_context = both_context;
4175   if (before_context == 0) before_context = both_context;
4176   }
4177 
4178 /* Only one of --only-matching, --output, --file-offsets, or --line-offsets is
4179 permitted. They display, each in their own way, only the data that has matched.
4180 */
4181 
4182 only_matching_count = (only_matching != NULL) + (output_text != NULL) +
4183   file_offsets + line_offsets;
4184 
4185 if (only_matching_count > 1)
4186   {
4187   fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, "
4188     "--file-offsets and/or --line-offsets\n");
4189   pcre2grep_exit(usage(2));
4190   }
4191 
4192 
4193 /* Check that there is a big enough ovector for all -o settings. */
4194 
4195 for (om = only_matching; om != NULL; om = om->next)
4196   {
4197   int n = om->groupnum;
4198   if (n > (int)capture_max)
4199     {
4200     fprintf(stderr, "pcre2grep: Requested group %d cannot be captured.\n", n);
4201     fprintf(stderr, "pcre2grep: Use --om-capture to increase the size of the capture vector.\n");
4202     goto EXIT2;
4203     }
4204   }
4205 
4206 /* Check the text supplied to --output for errors. */
4207 
4208 if (output_text != NULL &&
4209     !syntax_check_output_text((PCRE2_SPTR)output_text, FALSE))
4210   goto EXIT2;
4211 
4212 /* Set up default compile and match contexts and a match data block. */
4213 
4214 offset_size = capture_max + 1;
4215 compile_context = pcre2_compile_context_create(NULL);
4216 match_context = pcre2_match_context_create(NULL);
4217 match_data = pcre2_match_data_create(offset_size, NULL);
4218 offsets = pcre2_get_ovector_pointer(match_data);
4219 
4220 /* If string (script) callouts are supported, set up the callout processing
4221 function. */
4222 
4223 #ifdef SUPPORT_PCRE2GREP_CALLOUT
4224 pcre2_set_callout(match_context, pcre2grep_callout, NULL);
4225 #endif
4226 
4227 /* Put limits into the match data block. */
4228 
4229 if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit);
4230 if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
4231 if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
4232 
4233 /* If a locale has not been provided as an option, see if the LC_CTYPE or
4234 LC_ALL environment variable is set, and if so, use it. */
4235 
4236 if (locale == NULL)
4237   {
4238   locale = getenv("LC_ALL");
4239   locale_from = "LC_ALL";
4240   }
4241 
4242 if (locale == NULL)
4243   {
4244   locale = getenv("LC_CTYPE");
4245   locale_from = "LC_CTYPE";
4246   }
4247 
4248 /* If a locale is set, use it to generate the tables the PCRE needs. Passing
4249 NULL to pcre2_maketables() means that malloc() is used to get the memory. */
4250 
4251 if (locale != NULL)
4252   {
4253   if (setlocale(LC_CTYPE, locale) == NULL)
4254     {
4255     fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n",
4256       locale, locale_from);
4257     goto EXIT2;
4258     }
4259   character_tables = pcre2_maketables(NULL);
4260   pcre2_set_character_tables(compile_context, character_tables);
4261   }
4262 
4263 /* Sort out colouring */
4264 
4265 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
4266   {
4267   if (strcmp(colour_option, "always") == 0)
4268 #ifdef WIN32
4269     do_ansi = !is_stdout_tty(),
4270 #endif
4271     do_colour = TRUE;
4272   else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
4273   else
4274     {
4275     fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n",
4276       colour_option);
4277     goto EXIT2;
4278     }
4279   if (do_colour)
4280     {
4281     char *cs = getenv("PCRE2GREP_COLOUR");
4282     if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
4283     if (cs == NULL) cs = getenv("PCREGREP_COLOUR");
4284     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
4285     if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS"));
4286     if (cs == NULL) cs = getenv("GREP_COLOR");
4287     if (cs != NULL)
4288       {
4289       if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs;
4290       }
4291 #ifdef WIN32
4292     init_colour_output();
4293 #endif
4294     }
4295   }
4296 
4297 /* Sort out a newline setting. */
4298 
4299 if (newline_arg != NULL)
4300   {
4301   for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
4302        endlinetype++)
4303     {
4304     if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
4305     }
4306   if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
4307     pcre2_set_newline(compile_context, endlinetype);
4308   else
4309     {
4310     fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
4311       newline_arg);
4312     goto EXIT2;
4313     }
4314   }
4315 
4316 /* Find default newline convention */
4317 
4318 else
4319   {
4320   (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
4321   }
4322 
4323 /* Interpret the text values for -d and -D */
4324 
4325 if (dee_option != NULL)
4326   {
4327   if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
4328   else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
4329   else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
4330   else
4331     {
4332     fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option);
4333     goto EXIT2;
4334     }
4335   }
4336 
4337 if (DEE_option != NULL)
4338   {
4339   if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
4340   else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
4341   else
4342     {
4343     fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option);
4344     goto EXIT2;
4345     }
4346   }
4347 
4348 /* Set the extra options */
4349 
4350 (void)pcre2_set_compile_extra_options(compile_context, extra_options);
4351 
4352 /* Check the values for Jeffrey Friedl's debugging options. */
4353 
4354 #ifdef JFRIEDL_DEBUG
4355 if (S_arg > 9)
4356   {
4357   fprintf(stderr, "pcre2grep: bad value for -S option\n");
4358   return 2;
4359   }
4360 if (jfriedl_XT != 0 || jfriedl_XR != 0)
4361   {
4362   if (jfriedl_XT == 0) jfriedl_XT = 1;
4363   if (jfriedl_XR == 0) jfriedl_XR = 1;
4364   }
4365 #endif
4366 
4367 /* If use_jit is set, check whether JIT is available. If not, do not try
4368 to use JIT. */
4369 
4370 if (use_jit)
4371   {
4372   uint32_t answer;
4373   (void)pcre2_config(PCRE2_CONFIG_JIT, &answer);
4374   if (!answer) use_jit = FALSE;
4375   }
4376 
4377 /* Get memory for the main buffer. */
4378 
4379 if (bufthird <= 0)
4380   {
4381   fprintf(stderr, "pcre2grep: --buffer-size must be greater than zero\n");
4382   goto EXIT2;
4383   }
4384 
4385 bufsize = 3*bufthird;
4386 main_buffer = (char *)malloc(bufsize);
4387 
4388 if (main_buffer == NULL)
4389   {
4390   fprintf(stderr, "pcre2grep: malloc failed\n");
4391   goto EXIT2;
4392   }
4393 
4394 /* If no patterns were provided by -e, and there are no files provided by -f,
4395 the first argument is the one and only pattern, and it must exist. */
4396 
4397 if (patterns == NULL && pattern_files == NULL)
4398   {
4399   if (i >= argc) return usage(2);
4400   patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]),
4401     NULL);
4402   i++;
4403   if (patterns == NULL) goto EXIT2;
4404   }
4405 
4406 /* Compile the patterns that were provided on the command line, either by
4407 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
4408 after all the command-line options are read so that we know which PCRE options
4409 to use. When -F is used, compile_pattern() may add another block into the
4410 chain, so we must not access the next pointer till after the compile. */
4411 
4412 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
4413   {
4414   if (!compile_pattern(cp, pcre2_options, FALSE, "command-line",
4415        (j == 1 && patterns->next == NULL)? 0 : j))
4416     goto EXIT2;
4417   }
4418 
4419 /* Read and compile the regular expressions that are provided in files. */
4420 
4421 for (fn = pattern_files; fn != NULL; fn = fn->next)
4422   {
4423   if (!read_pattern_file(fn->name, &patterns, &patterns_last)) goto EXIT2;
4424   }
4425 
4426 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
4427 
4428 #ifdef SUPPORT_PCRE2GREP_JIT
4429 if (use_jit)
4430   {
4431   jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL);
4432   if (jit_stack != NULL                        )
4433     pcre2_jit_stack_assign(match_context, NULL, jit_stack);
4434   }
4435 #endif
4436 
4437 /* -F, -w, and -x do not apply to include or exclude patterns, so we must
4438 adjust the options. */
4439 
4440 pcre2_options &= ~PCRE2_LITERAL;
4441 (void)pcre2_set_compile_extra_options(compile_context, 0);
4442 
4443 /* If there are include or exclude patterns read from the command line, compile
4444 them. */
4445 
4446 for (j = 0; j < 4; j++)
4447   {
4448   int k;
4449   for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
4450     {
4451     if (!compile_pattern(cp, pcre2_options, FALSE, incexname[j],
4452          (k == 1 && cp->next == NULL)? 0 : k))
4453       goto EXIT2;
4454     }
4455   }
4456 
4457 /* Read and compile include/exclude patterns from files. */
4458 
4459 for (fn = include_from; fn != NULL; fn = fn->next)
4460   {
4461   if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last))
4462     goto EXIT2;
4463   }
4464 
4465 for (fn = exclude_from; fn != NULL; fn = fn->next)
4466   {
4467   if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last))
4468     goto EXIT2;
4469   }
4470 
4471 /* If there are no files that contain lists of files to search, and there are
4472 no file arguments, search stdin, and then exit. */
4473 
4474 if (file_lists == NULL && i >= argc)
4475   {
4476   rc = pcre2grep(stdin, FR_PLAIN, stdin_name,
4477     (filenames > FN_DEFAULT)? stdin_name : NULL);
4478   goto EXIT;
4479   }
4480 
4481 /* If any files that contains a list of files to search have been specified,
4482 read them line by line and search the given files. */
4483 
4484 for (fn = file_lists; fn != NULL; fn = fn->next)
4485   {
4486   char buffer[FNBUFSIZ];
4487   FILE *fl;
4488   if (strcmp(fn->name, "-") == 0) fl = stdin; else
4489     {
4490     fl = fopen(fn->name, "rb");
4491     if (fl == NULL)
4492       {
4493       fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name,
4494         strerror(errno));
4495       goto EXIT2;
4496       }
4497     }
4498   while (fgets(buffer, sizeof(buffer), fl) != NULL)
4499     {
4500     int frc;
4501     char *end = buffer + (int)strlen(buffer);
4502     while (end > buffer && isspace(end[-1])) end--;
4503     *end = 0;
4504     if (*buffer != 0)
4505       {
4506       frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
4507       if (frc > 1) rc = frc;
4508         else if (frc == 0 && rc == 1) rc = 0;
4509       }
4510     }
4511   if (fl != stdin) fclose(fl);
4512   }
4513 
4514 /* After handling file-list, work through remaining arguments. Pass in the fact
4515 that there is only one argument at top level - this suppresses the file name if
4516 the argument is not a directory and filenames are not otherwise forced. */
4517 
4518 only_one_at_top = i == argc - 1 && file_lists == NULL;
4519 
4520 for (; i < argc; i++)
4521   {
4522   int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
4523     only_one_at_top);
4524   if (frc > 1) rc = frc;
4525     else if (frc == 0 && rc == 1) rc = 0;
4526   }
4527 
4528 #ifdef SUPPORT_PCRE2GREP_CALLOUT
4529 /* If separating builtin echo callouts by implicit newline, add one more for
4530 the final item. */
4531 
4532 if (om_separator != NULL && strcmp(om_separator, STDOUT_NL) == 0)
4533   fprintf(stdout, STDOUT_NL);
4534 #endif
4535 
4536 /* Show the total number of matches if requested, but not if only one file's
4537 count was printed. */
4538 
4539 if (show_total_count && counts_printed != 1 && filenames != FN_NOMATCH_ONLY)
4540   {
4541   if (counts_printed != 0 && filenames >= FN_DEFAULT)
4542     fprintf(stdout, "TOTAL:");
4543   fprintf(stdout, "%lu" STDOUT_NL, total_count);
4544   }
4545 
4546 EXIT:
4547 #ifdef SUPPORT_PCRE2GREP_JIT
4548 pcre2_jit_free_unused_memory(NULL);
4549 if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack);
4550 #endif
4551 
4552 free(main_buffer);
4553 if (character_tables != NULL) pcre2_maketables_free(NULL, character_tables);
4554 
4555 pcre2_compile_context_free(compile_context);
4556 pcre2_match_context_free(match_context);
4557 pcre2_match_data_free(match_data);
4558 
4559 free_pattern_chain(patterns);
4560 free_pattern_chain(include_patterns);
4561 free_pattern_chain(include_dir_patterns);
4562 free_pattern_chain(exclude_patterns);
4563 free_pattern_chain(exclude_dir_patterns);
4564 
4565 free_file_chain(exclude_from);
4566 free_file_chain(include_from);
4567 free_file_chain(pattern_files);
4568 free_file_chain(file_lists);
4569 
4570 while (only_matching != NULL)
4571   {
4572   omstr *this = only_matching;
4573   only_matching = this->next;
4574   free(this);
4575   }
4576 
4577 pcre2grep_exit(rc);
4578 
4579 EXIT2:
4580 rc = 2;
4581 goto EXIT;
4582 }
4583 
4584 /* End of pcre2grep */
4585