1 /******************************************************************************\
2 * Copyright (c) 2019, Robert van Engelen, Genivia Inc. All rights reserved.    *
3 *                                                                              *
4 * Redistribution and use in source and binary forms, with or without           *
5 * modification, are permitted provided that the following conditions are met:  *
6 *                                                                              *
7 *   (1) Redistributions of source code must retain the above copyright notice, *
8 *       this list of conditions and the following disclaimer.                  *
9 *                                                                              *
10 *   (2) Redistributions in binary form must reproduce the above copyright      *
11 *       notice, this list of conditions and the following disclaimer in the    *
12 *       documentation and/or other materials provided with the distribution.   *
13 *                                                                              *
14 *   (3) The name of the author may not be used to endorse or promote products  *
15 *       derived from this software without specific prior written permission.  *
16 *                                                                              *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED *
18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF         *
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO   *
20 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,       *
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;  *
23 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,     *
24 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR      *
25 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF       *
26 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                   *
27 \******************************************************************************/
28 
29 /**
30 @file      ugrep.cpp
31 @brief     a pattern search utility written in C++11
32 @author    Robert van Engelen - engelen@genivia.com
33 @copyright (c) 2019-2021, Robert van Engelen, Genivia Inc. All rights reserved.
34 @copyright (c) BSD-3 License - see LICENSE.txt
35 
36 For download and installation instructions:
37 
38   https://github.com/Genivia/ugrep
39 
40 This program uses RE/flex:
41 
42   https://github.com/Genivia/RE-flex
43 
44 Optional libraries to support options -P and -z:
45 
46   -P: PCRE2 or Boost.Regex
47   -z: zlib (.gz)
48   -z: libbz2 (.bz, bz2, .bzip2)
49   -z: liblzma (.lzma, .xz)
50   -z: liblz4 (.lz4)
51   -z: libzstd (.zst, .zstd)
52 
53 Build ugrep as follows:
54 
55   $ ./configure --enable-colors
56   $ make -j
57 
58 Git does not preserve time stamps so ./configure may fail, in that case do:
59 
60   $ autoreconf -fi
61   $ ./configure --enable-colors
62   $ make -j
63 
64 After this, you may want to test ugrep and install it (optional):
65 
66   $ make test
67   $ sudo make install
68 
69 */
70 
71 #include "ugrep.hpp"
72 #include "glob.hpp"
73 #include "mmap.hpp"
74 #include "output.hpp"
75 #include "query.hpp"
76 #include "stats.hpp"
77 #include <reflex/matcher.h>
78 #include <reflex/fuzzymatcher.h>
79 #include <iomanip>
80 #include <cctype>
81 #include <limits>
82 #include <functional>
83 #include <list>
84 #include <deque>
85 #include <thread>
86 #include <memory>
87 #include <mutex>
88 #include <condition_variable>
89 #include <chrono>
90 #include <sstream>
91 
92 #ifdef OS_WIN
93 
94 // compiling for a windows OS, except Cygwin and MinGW
95 
96 // optionally enable --color=auto by default
97 // #define WITH_COLOR
98 
99 // optionally enable PCRE2 for -P
100 // #define HAVE_PCRE2
101 
102 // optionally enable Boost.Regex for -P
103 // #define HAVE_BOOST_REGEX
104 
105 // optionally enable zlib for -z
106 // #define HAVE_LIBZ
107 
108 // optionally enable libbz2 for -z
109 // #define HAVE_LIBBZ2
110 
111 // optionally enable liblzma for -z
112 // #define HAVE_LIBLZMA
113 
114 // optionally enable liblz4 for -z
115 // #define HAVE_LIBLZ4
116 
117 // optionally enable libzstd for -z
118 // #define HAVE_LIBZSTD
119 
120 #include <stringapiset.h>
121 #include <direct.h>
122 
123 #else
124 
125 // not compiling for a windows OS
126 
127 #include <signal.h>
128 #include <dirent.h>
129 #include <sys/select.h>
130 #include <sys/stat.h>
131 #include <unistd.h>
132 
133 #endif
134 
135 // use PCRE2 for option -P
136 #ifdef HAVE_PCRE2
137 # include <reflex/pcre2matcher.h>
138 #else
139 // use Boost.Regex for option -P
140 # ifdef HAVE_BOOST_REGEX
141 #  include <reflex/boostmatcher.h>
142 # endif
143 #endif
144 
145 // optional: specify an optimal decompression block size, default is 65536, must be larger than 1024 for tar extraction
146 // #define Z_BUF_LEN 16384
147 // #define Z_BUF_LEN 32768
148 
149 // use zlib, libbz2, liblzma for option -z
150 #ifdef HAVE_LIBZ
151 # include "zstream.hpp"
152 #endif
153 
154 // ugrep exit codes
155 #define EXIT_OK    0 // One or more lines were selected
156 #define EXIT_FAIL  1 // No lines were selected
157 #define EXIT_ERROR 2 // An error occurred
158 
159 // limit the total number of threads spawn (i.e. limit spawn overhead), because grepping is practically IO bound
160 #ifndef MAX_JOBS
161 # define MAX_JOBS 16U
162 #endif
163 
164 // limit the job queue size to wait to give the worker threads some slack
165 #ifndef MAX_JOB_QUEUE_SIZE
166 # define MAX_JOB_QUEUE_SIZE 65536
167 #endif
168 
169 // a hard limit on the recursive search depth
170 #ifndef MAX_DEPTH
171 # define MAX_DEPTH 100
172 #endif
173 
174 // --min-steal default, the minimum co-worker's queue size of pending jobs to steal a job from, smaller values result in higher job stealing rates, should not be less than 3
175 #ifndef MIN_STEAL
176 # define MIN_STEAL 3U
177 #endif
178 
179 // use dirent d_type when available to improve performance
180 #ifdef HAVE_STRUCT_DIRENT_D_TYPE
181 # define DIRENT_TYPE_UNKNOWN DT_UNKNOWN
182 # define DIRENT_TYPE_LNK     DT_LNK
183 # define DIRENT_TYPE_DIR     DT_DIR
184 # define DIRENT_TYPE_REG     DT_REG
185 #else
186 # define DIRENT_TYPE_UNKNOWN 0
187 # define DIRENT_TYPE_LNK     1
188 # define DIRENT_TYPE_DIR     1
189 # define DIRENT_TYPE_REG     1
190 #endif
191 
192 // the -M MAGIC pattern DFA constructed before threads start, read-only afterwards
193 reflex::Pattern magic_pattern; // concurrent access is thread safe
194 reflex::Matcher magic_matcher; // concurrent access is not thread safe
195 
196 // the --filter-magic-label pattern DFA
197 reflex::Pattern filter_magic_pattern; // concurrent access is thread safe
198 
199 // TTY detected
200 bool tty_term = false;
201 
202 // color term detected
203 bool color_term = false;
204 
205 #ifdef OS_WIN
206 
207 // CTRL-C handler
sigint(DWORD signal)208 BOOL WINAPI sigint(DWORD signal)
209 {
210   if (signal == CTRL_C_EVENT || signal == CTRL_BREAK_EVENT)
211   {
212     // be nice, reset colors on interrupt when sending output to a color TTY
213     if (color_term)
214       color_term = write(1, "\033[m", 3) > 0; // appease -Wunused-result
215   }
216 
217   // return FALSE to invoke the next handler (when applicable) or just exit
218   return FALSE;
219 }
220 
221 #else
222 
223 // SIGINT and SIGTERM handler
sigint(int sig)224 static void sigint(int sig)
225 {
226   // reset to the default handler
227   signal(sig, SIG_DFL);
228 
229   // be nice, reset colors on interrupt when sending output to a color TTY
230   if (color_term)
231     color_term = write(1, "\033[m", 3) > 0; // appease -Wunused-result
232 
233   // signal again
234   kill(getpid(), sig);
235 }
236 
237 #endif
238 
239 // full home directory path
240 const char *home_dir = NULL;
241 
242 // ANSI SGR substrings extracted from GREP_COLORS
243 char color_sl[COLORLEN]; // selected line
244 char color_cx[COLORLEN]; // context line
245 char color_mt[COLORLEN]; // matched text in any matched line
246 char color_ms[COLORLEN]; // matched text in a selected line
247 char color_mc[COLORLEN]; // matched text in a context line
248 char color_fn[COLORLEN]; // file name
249 char color_ln[COLORLEN]; // line number
250 char color_cn[COLORLEN]; // column number
251 char color_bn[COLORLEN]; // byte offset
252 char color_se[COLORLEN]; // separator
253 
254 char match_ms[COLORLEN];  // --match or --tag: matched text in a selected line
255 char match_mc[COLORLEN];  // --match or --tag: matched text in a context line
256 char match_off[COLORLEN]; // --match or --tag: off
257 
258 std::string color_wd; // hyperlink working directory path
259 
260 const char *color_hl      = NULL; // hyperlink
261 const char *color_st      = NULL; // ST
262 
263 const char *color_del     = ""; // erase line after the cursor
264 const char *color_off     = ""; // disable colors
265 
266 const char *color_high    = ""; // stderr highlighted text
267 const char *color_error   = ""; // stderr error text
268 const char *color_warning = ""; // stderr warning text
269 const char *color_message = ""; // stderr error or warning message text
270 
271 // number of concurrent threads for workers
272 size_t threads;
273 
274 // number of warnings given
275 std::atomic_size_t warnings;
276 
277 // redirectable source is standard input by default or a pipe
278 FILE *source = stdin;
279 
280 // redirectable output destination is standard output by default or a pipe
281 FILE *output = stdout;
282 
283 // Grep object handle, to cancel the search with cancel_ugrep()
284 struct Grep *grep_handle = NULL;
285 
286 std::mutex grep_handle_mutex;
287 
288 // set/clear the handle to be able to use cancel_ugrep()
289 void set_grep_handle(struct Grep*);
290 void clear_grep_handle();
291 
292 #ifndef OS_WIN
293 
294 // output file stat is available when stat() result is true
295 bool output_stat_result  = false;
296 bool output_stat_regular = false;
297 struct stat output_stat;
298 
299 // container of inodes to detect directory cycles when symlinks are traversed with --dereference
300 std::set<ino_t> visited;
301 
302 #ifdef HAVE_STATVFS
303 // containers of file system ids to exclude from recursive searches or include in recursive searches
304 std::set<uint64_t> exclude_fs_ids, include_fs_ids;
305 #endif
306 
307 #endif
308 
309 // ugrep command-line options
310 bool flag_all_threads              = false;
311 bool flag_any_line                 = false;
312 bool flag_basic_regexp             = false;
313 bool flag_bool                     = false;
314 bool flag_confirm                  = DEFAULT_CONFIRM;
315 bool flag_count                    = false;
316 bool flag_cpp                      = false;
317 bool flag_csv                      = false;
318 bool flag_decompress               = false;
319 bool flag_dereference              = false;
320 bool flag_files                    = false;
321 bool flag_files_with_matches       = false;
322 bool flag_files_without_match      = false;
323 bool flag_fixed_strings            = false;
324 bool flag_hex_ast                  = false;
325 bool flag_hex_cbr                  = true;
326 bool flag_hex_chr                  = true;
327 bool flag_hex_hbr                  = true;
328 bool flag_hidden                   = DEFAULT_HIDDEN;
329 bool flag_invert_match             = false;
330 bool flag_json                     = false;
331 bool flag_line_buffered            = false;
332 bool flag_line_regexp              = false;
333 bool flag_match                    = false;
334 bool flag_no_dereference           = false;
335 bool flag_no_header                = false;
336 bool flag_no_messages              = false;
337 bool flag_not                      = false;
338 bool flag_null                     = false;
339 bool flag_only_line_number         = false;
340 bool flag_only_matching            = false;
341 bool flag_perl_regexp              = false;
342 bool flag_pretty                   = DEFAULT_PRETTY;
343 bool flag_quiet                    = false;
344 bool flag_sort_rev                 = false;
345 bool flag_stdin                    = false;
346 bool flag_usage_warnings           = false;
347 bool flag_word_regexp              = false;
348 bool flag_xml                      = false;
349 bool flag_hex                      = false;
350 bool flag_with_hex                 = false;
351 bool flag_no_filename              = false;
352 bool flag_with_filename            = false;
353 Flag flag_binary;
354 Flag flag_binary_without_match;
355 Flag flag_break;
356 Flag flag_byte_offset;
357 Flag flag_column_number;
358 Flag flag_empty;
359 Flag flag_dotall;
360 Flag flag_free_space;
361 Flag flag_heading;
362 Flag flag_ignore_case;
363 Flag flag_initial_tab;
364 Flag flag_line_number;
365 Flag flag_smart_case;
366 Flag flag_text;
367 Flag flag_ungroup;
368 Sort flag_sort_key                 = Sort::NA;
369 Action flag_devices_action         = Action::SKIP;
370 Action flag_directories_action     = Action::SKIP;
371 size_t flag_after_context          = 0;
372 size_t flag_before_context         = 0;
373 size_t flag_fuzzy                  = 0;
374 size_t flag_hex_columns            = 16;
375 size_t flag_jobs                   = 0;
376 size_t flag_max_count              = 0;
377 size_t flag_max_depth              = 0;
378 size_t flag_max_files              = 0;
379 size_t flag_max_line               = 0;
380 size_t flag_max_mmap               = DEFAULT_MAX_MMAP_SIZE;
381 size_t flag_min_depth              = 0;
382 size_t flag_min_line               = 0;
383 size_t flag_min_magic              = 1;
384 size_t flag_min_steal              = MIN_STEAL;
385 size_t flag_not_magic              = 0;
386 size_t flag_query                  = 0;
387 size_t flag_tabs                   = DEFAULT_TABS;
388 const char *flag_apply_color       = NULL;
389 const char *flag_binary_files      = "binary";
390 const char *flag_color             = DEFAULT_COLOR;
391 const char *flag_colors            = NULL;
392 const char *flag_config            = NULL;
393 const char *flag_devices           = "skip";
394 const char *flag_directories       = "skip";
395 const char *flag_encoding          = NULL;
396 const char *flag_filter            = NULL;
397 const char *flag_format            = NULL;
398 const char *flag_format_begin      = NULL;
399 const char *flag_format_close      = NULL;
400 const char *flag_format_end        = NULL;
401 const char *flag_format_open       = NULL;
402 const char *flag_group_separator   = "--";
403 const char *flag_hexdump           = NULL;
404 const char *flag_label             = "(standard input)";
405 const char *flag_pager             = DEFAULT_PAGER;
406 const char *flag_view              = "";
407 const char *flag_save_config       = NULL;
408 const char *flag_separator         = ":";
409 const char *flag_sort              = NULL;
410 const char *flag_stats             = NULL;
411 const char *flag_tag               = NULL;
412 std::string              flag_config_file;
413 std::set<std::string>    flag_config_options;
414 std::vector<std::string> flag_regexp;
415 std::vector<std::string> flag_file;
416 std::vector<std::string> flag_file_type;
417 std::vector<std::string> flag_file_extension;
418 std::vector<std::string> flag_file_magic;
419 std::vector<std::string> flag_filter_magic_label;
420 std::vector<std::string> flag_glob;
421 std::vector<std::string> flag_ignore_files;
422 std::vector<std::string> flag_include;
423 std::vector<std::string> flag_include_dir;
424 std::vector<std::string> flag_include_from;
425 std::vector<std::string> flag_include_fs;
426 std::vector<std::string> flag_exclude;
427 std::vector<std::string> flag_exclude_dir;
428 std::vector<std::string> flag_exclude_from;
429 std::vector<std::string> flag_exclude_fs;
430 std::vector<std::string> flag_all_include;
431 std::vector<std::string> flag_all_include_dir;
432 std::vector<std::string> flag_all_exclude;
433 std::vector<std::string> flag_all_exclude_dir;
434 reflex::Input::file_encoding_type flag_encoding_type = reflex::Input::file_encoding::plain;
435 
436 // the CNF of Boolean search queries and patterns
437 CNF bcnf;
438 
439 // ugrep command-line arguments pointing to argv[]
440 const char *arg_pattern = NULL;
441 std::vector<const char*> arg_files;
442 
443 #ifdef OS_WIN
444 // store UTF-8 arguments decoded from wargv[] in strings to re-populate argv[] with pointers
445 std::list<std::string> arg_strings;
446 #endif
447 
448 // function protos
449 void options(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, int argc, const char **argv);
450 void option_regexp(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, const char *arg, bool is_neg = false);
451 void option_and(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, int& i, int argc, const char **argv);
452 void option_and(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, const char *arg);
453 void option_andnot(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, int& i, int argc, const char **argv);
454 void option_andnot(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, const char *arg);
455 void option_not(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, int& i, int argc, const char **argv);
456 void option_not(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, const char *arg);
457 void init(int argc, const char **argv);
458 void set_color(const char *colors, const char *parameter, char color[COLORLEN]);
459 void trim(std::string& line);
460 void trim_pathname_arg(const char *arg);
461 bool is_output(ino_t inode);
462 size_t strtonum(const char *string, const char *message);
463 size_t strtopos(const char *string, const char *message);
464 void strtopos2(const char *string, size_t& pos1, size_t& pos2, const char *message, bool optional_first = false);
465 size_t strtofuzzy(const char *string, const char *message);
466 void split_globs(FILE *file, std::vector<std::string>& files, std::vector<std::string>& dirs);
467 void format(const char *format, size_t matches);
468 void usage(const char *message, const char *arg = NULL, const char *valid = NULL);
469 void help(std::ostream& out);
470 void help(const char *what = NULL);
471 void version();
472 void is_directory(const char *pathname);
473 void cannot_decompress(const char *pathname, const char *message);
474 
475 // open a file where - means stdin/stdout and an initial ~ expands to home directory
fopen_smart(FILE ** file,const char * filename,const char * mode)476 int fopen_smart(FILE **file, const char *filename, const char *mode)
477 {
478   if (filename == NULL || *filename == '\0')
479     return errno = ENOENT;
480 
481   if (strcmp(filename, "-") == 0)
482   {
483     *file = strchr(mode, 'w') == NULL ? stdin : stdout;
484     return 0;
485   }
486 
487   if (*filename == '~')
488     return fopenw_s(file, std::string(home_dir).append(filename + 1).c_str(), mode);
489 
490   return fopenw_s(file, filename, mode);
491 }
492 
493 // read a line from buffered input, returns true when eof
getline(reflex::BufferedInput & input,std::string & line)494 inline bool getline(reflex::BufferedInput& input, std::string& line)
495 {
496   int ch;
497 
498   line.erase();
499   while ((ch = input.get()) != EOF)
500   {
501     if (ch == '\n')
502       break;
503     line.push_back(ch);
504   }
505   if (!line.empty() && line.back() == '\r')
506     line.pop_back();
507   return ch == EOF && line.empty();
508 }
509 
510 // read a line from mmap memory, returns true when eof
getline(const char * & here,size_t & left)511 inline bool getline(const char*& here, size_t& left)
512 {
513   // read line from mmap memory
514   if (left == 0)
515     return true;
516 
517   const char *s = static_cast<const char*>(memchr(here, '\n', left));
518   if (s == NULL)
519     s = here + left;
520   else
521     ++s;
522 
523   left -= s - here;
524   here = s;
525 
526   return false;
527 }
528 
529 // read a line from mmap memory or from buffered input or from unbuffered input, returns true when eof
getline(const char * & here,size_t & left,reflex::BufferedInput & buffered_input,reflex::Input & input,std::string & line)530 inline bool getline(const char*& here, size_t& left, reflex::BufferedInput& buffered_input, reflex::Input& input, std::string& line)
531 {
532   if (here != NULL)
533   {
534     // read line from mmap memory
535     if (left == 0)
536       return true;
537     const char *s = static_cast<const char*>(memchr(here, '\n', left));
538     if (s == NULL)
539       s = here + left;
540     else
541       ++s;
542     line.assign(here, s - here);
543     left -= s - here;
544     here = s;
545     return false;
546   }
547 
548   int ch;
549 
550   line.erase();
551 
552   if (buffered_input.assigned())
553   {
554     // read line from buffered input
555     while ((ch = buffered_input.get()) != EOF)
556     {
557       line.push_back(ch);
558       if (ch == '\n')
559         break;
560     }
561     return ch == EOF && line.empty();
562   }
563 
564   // read line from unbuffered input
565   while ((ch = input.get()) != EOF)
566   {
567     line.push_back(ch);
568     if (ch == '\n')
569       break;
570   }
571 
572   return ch == EOF && line.empty();
573 }
574 
575 // return true if s[0..n-1] contains a NUL or is non-displayable invalid UTF-8
is_binary(const char * s,size_t n)576 inline bool is_binary(const char *s, size_t n)
577 {
578   if (n == 1)
579     return *s == '\0' || (*s & 0xc0) == 0x80;
580 
581   if (memchr(s, '\0', n) != NULL)
582     return true;
583 
584   const char *e = s + n;
585 
586   while (s < e)
587   {
588     do
589     {
590       if ((*s & 0xc0) == 0x80)
591         return true;
592     } while ((*s & 0xc0) != 0xc0 && ++s < e);
593 
594     if (s >= e)
595       return false;
596 
597     if (++s >= e || (*s & 0xc0) != 0x80)
598       return true;
599 
600     if (++s < e && (*s & 0xc0) == 0x80)
601       if (++s < e && (*s & 0xc0) == 0x80)
602         if (++s < e && (*s & 0xc0) == 0x80)
603           ++s;
604   }
605 
606   return false;
607 }
608 
609 // check if a file's inode is the current output file
is_output(ino_t inode)610 inline bool is_output(ino_t inode)
611 {
612 #ifdef OS_WIN
613   return false; // TODO check that two FILE* on Windows are the same, is this possible?
614 #else
615   return output_stat_regular && inode == output_stat.st_ino;
616 #endif
617 }
618 
619 // specify a line of input for the matcher to read, matcher must not use text() or rest() to keep the line contents unmodified
read_line(reflex::AbstractMatcher * matcher,const char * line,size_t size)620 inline void read_line(reflex::AbstractMatcher *matcher, const char *line, size_t size)
621 {
622   // safe cast: buffer() is read-only if no matcher.text() and matcher.rest() are used, size + 1 to include final \0
623   matcher->buffer(const_cast<char*>(line), size + 1);
624 }
625 
626 // specify a line of input for the matcher to read, matcher must not use text() or rest() to keep the line contents unmodified
read_line(reflex::AbstractMatcher * matcher,const std::string & line)627 inline void read_line(reflex::AbstractMatcher *matcher, const std::string& line)
628 {
629   // safe cast: buffer() is read-only if no matcher.text() and matcher.rest() are used, size + 1 to include final \0
630   matcher->buffer(const_cast<char*>(line.c_str()), line.size() + 1);
631 }
632 
633 // copy color buffers
copy_color(char to[COLORLEN],const char from[COLORLEN])634 inline void copy_color(char to[COLORLEN], const char from[COLORLEN])
635 {
636   size_t len = std::min(strlen(from), static_cast<size_t>(COLORLEN - 1));
637 
638   memcpy(to, from, len);
639   to[len] = '\0';
640 
641   char *comma = strchr(to, ',');
642   if (comma != NULL)
643     *comma = '\0';
644 }
645 
646 // grep manages output, matcher, input, and decompression
647 struct Grep {
648 
649   // CNF of AND/OR/NOT matchers
650   typedef std::list<std::list<std::unique_ptr<reflex::AbstractMatcher>>> Matchers;
651 
652   // exit search exception
653   struct EXIT_SEARCH : public std::exception { };
654 
655   // entry type
656   enum class Type { SKIP, DIRECTORY, OTHER };
657 
658   // entry data extracted from directory contents
659   struct Entry {
EntryGrep::Entry660     Entry(std::string& pathname, ino_t inode, uint64_t info)
661       :
662         pathname(std::move(pathname)),
663         inode(inode),
664         info(info),
665         cost(0)
666     { }
667 
668     std::string pathname;
669     ino_t       inode;
670     uint64_t    info;
671     uint16_t    cost;
672 
673 #ifndef OS_WIN
674     // get sortable info from stat buf
sort_infoGrep::Entry675     static uint64_t sort_info(const struct stat& buf)
676     {
677 #if defined(HAVE_STAT_ST_ATIM) && defined(HAVE_STAT_ST_MTIM) && defined(HAVE_STAT_ST_CTIM)
678       // tv_sec may be 64 bit, but value is small enough to multiply by 1000000 to fit in 64 bits
679       return static_cast<uint64_t>(flag_sort_key == Sort::SIZE ? buf.st_size : flag_sort_key == Sort::USED ? static_cast<uint64_t>(buf.st_atim.tv_sec) * 1000000 + buf.st_atim.tv_nsec / 1000 : flag_sort_key == Sort::CHANGED ? static_cast<uint64_t>(buf.st_mtim.tv_sec) * 1000000 + buf.st_mtim.tv_nsec / 1000 : flag_sort_key == Sort::CREATED ? static_cast<uint64_t>(buf.st_ctim.tv_sec) * 1000000 + buf.st_ctim.tv_nsec / 1000 : 0);
680 #elif defined(HAVE_STAT_ST_ATIMESPEC) && defined(HAVE_STAT_ST_MTIMESPEC) && defined(HAVE_STAT_ST_CTIMESPEC)
681       // tv_sec may be 64 bit, but value is small enough to multiply by 1000000 to fit in 64 bits
682       return static_cast<uint64_t>(flag_sort_key == Sort::SIZE ? buf.st_size : flag_sort_key == Sort::USED ? static_cast<uint64_t>(buf.st_atimespec.tv_sec) * 1000000 + buf.st_atimespec.tv_nsec / 1000 : flag_sort_key == Sort::CHANGED ? static_cast<uint64_t>(buf.st_mtimespec.tv_sec) * 1000000 + buf.st_mtimespec.tv_nsec / 1000 : flag_sort_key == Sort::CREATED ? static_cast<uint64_t>(buf.st_ctimespec.tv_sec) * 1000000 + buf.st_ctimespec.tv_nsec / 1000 : 0);
683 #else
684       return static_cast<uint64_t>(flag_sort_key == Sort::SIZE ? buf.st_size : flag_sort_key == Sort::USED ? buf.st_atime : flag_sort_key == Sort::CHANGED ? buf.st_mtime : flag_sort_key == Sort::CREATED ? buf.st_ctime : 0);
685 #endif
686     }
687 #endif
688 
689     // compare two entries by pathname
comp_by_pathGrep::Entry690     static bool comp_by_path(const Entry& a, const Entry& b)
691     {
692       return a.pathname < b.pathname;
693     }
694 
695     // compare two entries by size or time (atime, mtime, or ctime), if equal compare by pathname
comp_by_infoGrep::Entry696     static bool comp_by_info(const Entry& a, const Entry& b)
697     {
698       return a.info < b.info || (a.info == b.info && a.pathname < b.pathname);
699     }
700 
701     // compare two entries by edit distance cost
comp_by_bestGrep::Entry702     static bool comp_by_best(const Entry& a, const Entry& b)
703     {
704       return a.cost < b.cost || (a.cost == b.cost && a.pathname < b.pathname);
705     }
706 
707     // reverse compare two entries by pathname
rev_comp_by_pathGrep::Entry708     static bool rev_comp_by_path(const Entry& a, const Entry& b)
709     {
710       return a.pathname > b.pathname;
711     }
712 
713     // reverse compare two entries by size or time (atime, mtime, or ctime), if equal reverse compare by pathname
rev_comp_by_infoGrep::Entry714     static bool rev_comp_by_info(const Entry& a, const Entry& b)
715     {
716       return a.info > b.info || (a.info == b.info && a.pathname > b.pathname);
717     }
718 
719     // reverse compare two entries by edit distance cost
rev_comp_by_bestGrep::Entry720     static bool rev_comp_by_best(const Entry& a, const Entry& b)
721     {
722       return a.cost > b.cost || (a.cost == b.cost && a.pathname > b.pathname);
723     }
724   };
725 
726 #ifndef OS_WIN
727   // extend the reflex::Input::Handler to handle stdin from a TTY or a slow pipe
728   struct StdInHandler : public reflex::Input::Handler {
729 
StdInHandlerGrep::StdInHandler730     StdInHandler(Grep *grep)
731       :
732         grep(grep)
733     { }
734 
735     Grep *grep;
736 
operator ()Grep::StdInHandler737     int operator()()
738     {
739       grep->out.flush();
740 
741       while (true)
742       {
743         struct timeval tv;
744         fd_set rfds, efds;
745         FD_ZERO(&rfds);
746         FD_ZERO(&efds);
747         FD_SET(0, &rfds);
748         FD_SET(0, &efds);
749         tv.tv_sec = 1;
750         tv.tv_usec = 0;
751         int r = ::select(1, &rfds, NULL, &efds, &tv);
752         if (r < 0 && errno != EINTR)
753           return 0;
754         if (r > 0 && FD_ISSET(0, &efds))
755           return 0;
756         if (r > 0)
757           return 1;
758       }
759     }
760   };
761 #endif
762 
763   // extend the reflex::AbstractMatcher::Handler with a grep object reference and references to some of the grep::search locals
764   struct GrepHandler : public reflex::AbstractMatcher::Handler {
765 
GrepHandlerGrep::GrepHandler766     GrepHandler(Grep& grep, const char*& pathname, size_t& lineno, bool& binfile, bool& hex, bool& binary, size_t& matches, bool& stop)
767       :
768         grep(grep),
769         pathname(pathname),
770         lineno(lineno),
771         binfile(binfile),
772         hex(hex),
773         binary(binary),
774         matches(matches),
775         stop(stop)
776     { }
777 
778     Grep&        grep;     // grep object
779     const char*& pathname; // grep::search argument pathname
780     size_t&      lineno;   // grep::search lineno local variable
781     bool&        binfile;  // grep::search binfile local variable
782     bool&        hex;      // grep::search hex local variable
783     bool&        binary;   // grep::search binary local variable
784     size_t&      matches;  // grep::search matches local variable
785     bool&        stop;     // grep::search stop local variable
786 
787     // get the start of the before context, if present
begin_beforeGrep::GrepHandler788     void begin_before(reflex::AbstractMatcher& matcher, const char *buf, size_t len, size_t num, const char*& ptr, size_t& size, size_t& offset)
789     {
790       ptr = NULL;
791       size = 0;
792       offset = 0;
793 
794       if (len == 0)
795         return;
796 
797       size_t current = matcher.lineno();
798       size_t between = current - lineno;
799 
800       if (between > 1)
801       {
802         const char *s = buf + len;
803         const char *e = s;
804 
805         if (buf[len - 1] != '\n')
806           --between;
807 
808         while (--s >= buf)
809         {
810           if (*s == '\n')
811           {
812             if (--between == 0)
813               break;;
814             e = s + 1;
815           }
816         }
817 
818         ptr = ++s;
819         size = e - s;
820         offset = s - buf + num;
821 
822         ++lineno;
823       }
824     }
825 
826     // advance to the next before context, if present
next_beforeGrep::GrepHandler827     void next_before(const char *buf, size_t len, size_t num, const char*& ptr, size_t& size, size_t& offset)
828     {
829       if (ptr == NULL)
830         return;
831 
832       const char *s = ptr + size;
833       const char *e = buf + len;
834 
835       if (s >= e)
836       {
837         ptr = NULL;
838       }
839       else
840       {
841         e = static_cast<const char*>(memchr(s, '\n', e - s));
842 
843         if (e == NULL)
844           e = buf + len;
845         else
846           ++e;
847 
848         ptr = s;
849         size = e - s;
850         offset = s - buf + num;
851 
852         ++lineno;
853       }
854     }
855   };
856 
857   // extend event GrepHandler to output invert match lines for -v
858   struct InvertMatchGrepHandler : public GrepHandler {
859 
InvertMatchGrepHandlerGrep::InvertMatchGrepHandler860     InvertMatchGrepHandler(Grep& grep, const char*& pathname, size_t& lineno, bool& binfile, bool& hex, bool& binary, size_t& matches, bool& stop)
861       :
862         GrepHandler(grep, pathname, lineno, binfile, hex, binary, matches, stop)
863     { }
864 
865     // functor invoked by the reflex::AbstractMatcher when the buffer contents are shifted out, also called explicitly in grep::search
operator ()Grep::InvertMatchGrepHandler866     virtual void operator()(reflex::AbstractMatcher& matcher, const char *buf, size_t len, size_t num) override
867     {
868       const char *ptr;
869       size_t size;
870       size_t offset;
871 
872       begin_before(matcher, buf, len, num, ptr, size, offset);
873 
874       while (ptr != NULL)
875       {
876         // --range: max line exceeded?
877         if (flag_max_line > 0 && lineno > flag_max_line)
878           break;
879 
880         // --max-files: max reached?
881         if (matches == 0 && !Stats::found_part())
882         {
883           stop = true;
884           break;
885         }
886 
887         // -m: max number of matches reached?
888         if (flag_max_count > 0 && matches >= flag_max_count)
889           break;
890 
891         // output blocked?
892         if (grep.out.eof)
893           break;
894 
895         ++matches;
896 
897         if (flag_with_hex)
898           binary = false;
899 
900         binary = binary || flag_hex || (!flag_text && is_binary(ptr, size));
901 
902         if (binfile || (binary && !flag_hex && !flag_with_hex))
903           break;
904 
905         if (hex && !binary)
906           grep.out.dump.done();
907 
908         if (!flag_no_header)
909           grep.out.header(pathname, grep.partname, lineno, NULL, offset, flag_separator, binary);
910 
911         hex = binary;
912 
913         if (binary)
914         {
915           grep.out.dump.hex(Output::Dump::HEX_LINE, offset, ptr, size);
916         }
917         else
918         {
919           bool lf_only = false;
920           if (size > 0)
921           {
922             lf_only = ptr[size - 1] == '\n';
923             size_t sizen = size - lf_only;
924             if (sizen > 0)
925             {
926               grep.out.str(color_sl);
927               grep.out.str(ptr, sizen);
928               grep.out.str(color_off);
929             }
930           }
931           grep.out.nl(lf_only);
932         }
933 
934         next_before(buf, len, num, ptr, size, offset);
935       }
936     }
937   };
938 
939   // extend event GrepHandler to output formatted invert match lines for --format -v
940   struct FormatInvertMatchGrepHandler : public GrepHandler {
941 
FormatInvertMatchGrepHandlerGrep::FormatInvertMatchGrepHandler942     FormatInvertMatchGrepHandler(Grep& grep, const char*& pathname, size_t& lineno, bool& binfile, bool& hex, bool& binary, size_t& matches, bool& stop)
943       :
944         GrepHandler(grep, pathname, lineno, binfile, hex, binary, matches, stop)
945     { }
946 
947     // functor invoked by the reflex::AbstractMatcher when the buffer contents are shifted out, also called explicitly in grep::search
operator ()Grep::FormatInvertMatchGrepHandler948     virtual void operator()(reflex::AbstractMatcher& matcher, const char *buf, size_t len, size_t num) override
949     {
950       const char *ptr;
951       size_t size;
952       size_t offset;
953 
954       begin_before(matcher, buf, len, num, ptr, size, offset);
955 
956       while (ptr != NULL)
957       {
958         // --range: max line exceeded?
959         if (flag_max_line > 0 && lineno > flag_max_line)
960           break;
961 
962         // output --format-open
963         if (matches == 0)
964         {
965           // --format-open or --format-close: we must acquire lock early before Stats::found_part()
966           if (flag_format_open != NULL || flag_format_close != NULL)
967             grep.out.acquire();
968 
969           // --max-files: max reached?
970           if (!Stats::found_part())
971           {
972             stop = true;
973             break;
974           }
975 
976           if (flag_format_open != NULL)
977             grep.out.format(flag_format_open, pathname, grep.partname, Stats::found_parts(), &matcher, false, Stats::found_parts() > 1);
978         }
979 
980         // -m: max number of matches reached?
981         if (flag_max_count > 0 && matches >= flag_max_count)
982           break;
983 
984         // output blocked?
985         if (grep.out.eof)
986           break;
987 
988         ++matches;
989 
990         // output --format
991         grep.out.format_invert(flag_format, pathname, grep.partname, matches, lineno, offset, ptr, size - (size > 0 && ptr[size - 1] == '\n'), matches > 1);
992 
993         next_before(buf, len, num, ptr, size, offset);
994       }
995     }
996   };
997 
998   // extend event GrepHandler to output any context lines for -y
999   struct AnyLineGrepHandler : public GrepHandler {
1000 
AnyLineGrepHandlerGrep::AnyLineGrepHandler1001     AnyLineGrepHandler(Grep& grep, const char*& pathname, size_t& lineno, bool& binfile, bool& hex, bool& binary, size_t& matches, bool& stop, const char*& rest_line_data, size_t& rest_line_size, size_t& rest_line_last)
1002       :
1003         GrepHandler(grep, pathname, lineno, binfile, hex, binary, matches, stop),
1004         rest_line_data(rest_line_data),
1005         rest_line_size(rest_line_size),
1006         rest_line_last(rest_line_last)
1007     { }
1008 
1009     // functor invoked by the reflex::AbstractMatcher when the buffer contents are shifted out, also called explicitly in grep::search
operator ()Grep::AnyLineGrepHandler1010     virtual void operator()(reflex::AbstractMatcher& matcher, const char *buf, size_t len, size_t num) override
1011     {
1012       const char *ptr;
1013       size_t size;
1014       size_t offset;
1015 
1016       begin_before(matcher, buf, len, num, ptr, size, offset);
1017 
1018       // display the rest of the matching line before the context lines
1019       if (rest_line_data != NULL && (lineno != matcher.lineno() || flag_ungroup))
1020       {
1021         if (binary)
1022         {
1023           grep.out.dump.hex(flag_invert_match ? Output::Dump::HEX_CONTEXT_LINE : Output::Dump::HEX_LINE, rest_line_last, rest_line_data, rest_line_size);
1024           grep.out.dump.done();
1025         }
1026         else
1027         {
1028           bool lf_only = false;
1029           if (rest_line_size > 0)
1030           {
1031             lf_only = rest_line_data[rest_line_size - 1] == '\n';
1032             rest_line_size -= lf_only;
1033             if (rest_line_size > 0)
1034             {
1035               grep.out.str(flag_invert_match ? color_cx : color_sl);
1036               grep.out.str(rest_line_data, rest_line_size);
1037               grep.out.str(color_off);
1038             }
1039           }
1040           grep.out.nl(lf_only);
1041         }
1042 
1043         rest_line_data = NULL;
1044       }
1045 
1046       // context colors with or without -v
1047       short v_hex_context_line = flag_invert_match ? Output::Dump::HEX_LINE : Output::Dump::HEX_CONTEXT_LINE;
1048       const char *v_color_cx = flag_invert_match ? color_sl : color_cx;
1049       const char *separator = flag_invert_match ? flag_separator : "-";
1050 
1051       while (ptr != NULL)
1052       {
1053         // --range: max line exceeded?
1054         if (flag_max_line > 0 && lineno > flag_max_line)
1055           break;
1056 
1057         if (matches == 0 && flag_invert_match)
1058         {
1059           // --max-files: max reached?
1060           if (!Stats::found_part())
1061           {
1062             stop = true;
1063             break;
1064           }
1065         }
1066 
1067         // -m: max number of matches reached?
1068         if (flag_invert_match && flag_max_count > 0 && matches >= flag_max_count)
1069         {
1070           stop = true;
1071           break;
1072         }
1073 
1074         // output blocked?
1075         if (grep.out.eof)
1076           break;
1077 
1078         if (flag_with_hex)
1079           binary = false;
1080 
1081         if (flag_invert_match)
1082           ++matches;
1083 
1084         binary = binary || flag_hex || (!flag_text && is_binary(ptr, size));
1085 
1086         if (binfile || (binary && !flag_hex && !flag_with_hex))
1087           break;
1088 
1089         if (hex && !binary)
1090           grep.out.dump.done();
1091 
1092         if (!flag_no_header)
1093           grep.out.header(pathname, grep.partname, lineno, NULL, offset, separator, binary);
1094 
1095         hex = binary;
1096 
1097         if (binary)
1098         {
1099           grep.out.dump.hex(v_hex_context_line, offset, ptr, size);
1100         }
1101         else
1102         {
1103           bool lf_only = false;
1104           if (size > 0)
1105           {
1106             lf_only = ptr[size - 1] == '\n';
1107             size_t sizen = size - lf_only;
1108             if (sizen > 0)
1109             {
1110               grep.out.str(v_color_cx);
1111               grep.out.str(ptr, sizen);
1112               grep.out.str(color_off);
1113             }
1114           }
1115           grep.out.nl(lf_only);
1116         }
1117 
1118         next_before(buf, len, num, ptr, size, offset);
1119       }
1120     }
1121 
1122     const char*& rest_line_data;
1123     size_t&      rest_line_size;
1124     size_t&      rest_line_last;
1125 
1126   };
1127 
1128   // extend event AnyLineGrepHandler to output specific context lines for -A, -B, and -C
1129   struct ContextGrepHandler : public AnyLineGrepHandler {
1130 
1131     // context state to track context lines before and after a match
1132     struct ContextState {
1133 
ContextStateGrep::ContextGrepHandler::ContextState1134       ContextState()
1135         :
1136           before_index(0),
1137           before_length(0),
1138           after_lineno(0),
1139           after_length(flag_after_context)
1140       {
1141         before_binary.resize(flag_before_context);
1142         before_offset.resize(flag_before_context);
1143         before_line.resize(flag_before_context);
1144       }
1145 
1146       size_t                   before_index;  // before context rotation index
1147       size_t                   before_length; // accumulated length of the before context
1148       std::vector<bool>        before_binary; // before context binary line
1149       std::vector<size_t>      before_offset; // before context offset of line
1150       std::vector<std::string> before_line;   // before context line data
1151       size_t                   after_lineno;  // after context line number
1152       size_t                   after_length;  // accumulated length of the after context
1153 
1154     };
1155 
ContextGrepHandlerGrep::ContextGrepHandler1156     ContextGrepHandler(Grep& grep, const char*& pathname, size_t& lineno, bool& binfile, bool& hex, bool& binary, size_t& matches, bool& stop, const char*& rest_line_data, size_t& rest_line_size, size_t& rest_line_last)
1157       :
1158         AnyLineGrepHandler(grep, pathname, lineno, binfile, hex, binary, matches, stop, rest_line_data, rest_line_size, rest_line_last)
1159     { }
1160 
1161     // display the before context
output_before_contextGrep::ContextGrepHandler1162     void output_before_context()
1163     {
1164       // the group separator indicates lines skipped, like GNU grep
1165       if (state.after_lineno > 0 && state.after_lineno + state.after_length < grep.matcher->lineno() - state.before_length)
1166       {
1167         if (hex)
1168           grep.out.dump.done();
1169 
1170         if (flag_group_separator != NULL)
1171         {
1172           grep.out.str(color_se);
1173           grep.out.str(flag_group_separator);
1174           grep.out.str(color_off);
1175           grep.out.nl();
1176         }
1177       }
1178 
1179       // output the before context
1180       if (state.before_length > 0)
1181       {
1182         // the first line number of the before context
1183         size_t before_lineno = grep.matcher->lineno() - state.before_length;
1184 
1185         for (size_t i = 0; i < state.before_length; ++i)
1186         {
1187           size_t j = (state.before_index + i) % state.before_length;
1188 
1189           if (hex && !state.before_binary[j])
1190             grep.out.dump.done();
1191 
1192           if (!flag_no_header)
1193             grep.out.header(pathname, grep.partname, before_lineno + i, NULL, state.before_offset[j], "-", state.before_binary[j]);
1194 
1195           hex = state.before_binary[j];
1196 
1197           const char *ptr = state.before_line[j].c_str();
1198           size_t size = state.before_line[j].size();
1199 
1200           if (hex)
1201           {
1202             grep.out.dump.hex(Output::Dump::HEX_CONTEXT_LINE, state.before_offset[j], ptr, size);
1203           }
1204           else
1205           {
1206             bool lf_only = false;
1207             if (size > 0)
1208             {
1209               lf_only = ptr[size - 1] == '\n';
1210               size -= lf_only;
1211               if (size > 0)
1212               {
1213                 grep.out.str(color_cx);
1214                 grep.out.str(ptr, size);
1215                 grep.out.str(color_off);
1216               }
1217             }
1218             grep.out.nl(lf_only);
1219           }
1220         }
1221       }
1222 
1223       // reset the before context state
1224       state.before_index = 0;
1225       state.before_length = 0;
1226     }
1227 
1228     // set the after context
set_after_linenoGrep::ContextGrepHandler1229     void set_after_lineno(size_t lineno)
1230     {
1231       // set the after context state with the first after context line number
1232       state.after_length = 0;
1233       state.after_lineno = lineno;
1234     }
1235 
1236     // functor invoked by the reflex::AbstractMatcher when the buffer contents are shifted out, also called explicitly in grep::search
operator ()Grep::ContextGrepHandler1237     virtual void operator()(reflex::AbstractMatcher& matcher, const char *buf, size_t len, size_t num) override
1238     {
1239       const char *ptr;
1240       size_t size;
1241       size_t offset;
1242 
1243       begin_before(matcher, buf, len, num, ptr, size, offset);
1244 
1245       // display the rest of the matching line before the context lines
1246       if (rest_line_data != NULL && (lineno != matcher.lineno() || flag_ungroup))
1247       {
1248         if (binary)
1249         {
1250           grep.out.dump.hex(flag_invert_match ? Output::Dump::HEX_CONTEXT_LINE : Output::Dump::HEX_LINE, rest_line_last, rest_line_data, rest_line_size);
1251         }
1252         else
1253         {
1254           bool lf_only = false;
1255           if (rest_line_size > 0)
1256           {
1257             lf_only = rest_line_data[rest_line_size - 1] == '\n';
1258             rest_line_size -= lf_only;
1259             if (rest_line_size > 0)
1260             {
1261               grep.out.str(flag_invert_match ? color_cx : color_sl);
1262               grep.out.str(rest_line_data, rest_line_size);
1263               grep.out.str(color_off);
1264             }
1265           }
1266           grep.out.nl(lf_only);
1267         }
1268 
1269         rest_line_data = NULL;
1270       }
1271 
1272       while (ptr != NULL)
1273       {
1274         // --range: max line exceeded?
1275         if (flag_max_line > 0 && lineno > flag_max_line)
1276           break;
1277 
1278         if (matches == 0 && flag_invert_match)
1279         {
1280           // --max-files: max reached?
1281           if (!Stats::found_part())
1282           {
1283             stop = true;
1284             break;
1285           }
1286         }
1287 
1288         // -m: max number of matches reached?
1289         if (flag_invert_match && flag_max_count > 0 && matches >= flag_max_count)
1290         {
1291           stop = true;
1292           break;
1293         }
1294 
1295         // output blocked?
1296         if (grep.out.eof)
1297           break;
1298 
1299         if (flag_invert_match)
1300           ++matches;
1301 
1302         if (flag_with_hex)
1303           binary = false;
1304 
1305         binary = binary || flag_hex || (!flag_text && is_binary(ptr, size));
1306 
1307         if (binfile || (binary && !flag_hex && !flag_with_hex))
1308           break;
1309 
1310         if (state.after_lineno > 0 && state.after_length < flag_after_context)
1311         {
1312           ++state.after_length;
1313 
1314           if (hex && !binary)
1315             grep.out.dump.done();
1316 
1317           if (!flag_no_header)
1318             grep.out.header(pathname, grep.partname, lineno, NULL, offset, "-", binary);
1319 
1320           hex = binary;
1321 
1322           if (binary)
1323           {
1324             grep.out.dump.hex(Output::Dump::HEX_CONTEXT_LINE, offset, ptr, size);
1325           }
1326           else
1327           {
1328             bool lf_only = false;
1329             if (size > 0)
1330             {
1331               lf_only = ptr[size - 1] == '\n';
1332               size_t sizen = size - lf_only;
1333               if (sizen > 0)
1334               {
1335                 grep.out.str(color_cx);
1336                 grep.out.str(ptr, sizen);
1337                 grep.out.str(color_off);
1338               }
1339             }
1340             grep.out.nl(lf_only);
1341           }
1342         }
1343         else if (flag_before_context > 0)
1344         {
1345           if (state.before_length < flag_before_context)
1346             ++state.before_length;
1347           state.before_index %= state.before_length;
1348           state.before_binary[state.before_index] = binary;
1349           state.before_offset[state.before_index] = offset;
1350           state.before_line[state.before_index].assign(ptr, size);
1351           ++state.before_index;
1352         }
1353         else
1354         {
1355           break;
1356         }
1357 
1358         next_before(buf, len, num, ptr, size, offset);
1359       }
1360     }
1361 
1362     ContextState state;
1363 
1364   };
1365 
1366   // extend event AnyLineGrepHandler to output specific context lines for -A, -B, and -C with -v
1367   struct InvertContextGrepHandler : public AnyLineGrepHandler {
1368 
1369     struct InvertContextMatch {
1370 
InvertContextMatchGrep::InvertContextGrepHandler::InvertContextMatch1371       InvertContextMatch(size_t pos, size_t size, size_t offset)
1372         :
1373           pos(pos),
1374           size(size),
1375           offset(offset)
1376       { }
1377 
1378       size_t pos;    // position on the line
1379       size_t size;   // size of the match
1380       size_t offset; // size of the match
1381 
1382     };
1383 
1384     typedef std::vector<InvertContextMatch> InvertContextMatches;
1385 
1386     // context state to track matching lines before non-matching lines
1387     struct InvertContextState {
1388 
InvertContextStateGrep::InvertContextGrepHandler::InvertContextState1389       InvertContextState()
1390         :
1391           before_index(0),
1392           before_length(0),
1393           after_lineno(0)
1394       {
1395         before_binary.resize(flag_before_context);
1396         before_columno.resize(flag_before_context);
1397         before_offset.resize(flag_before_context);
1398         before_line.resize(flag_before_context);
1399         before_match.resize(flag_before_context);
1400       }
1401 
1402       size_t                            before_index;   // before context rotation index
1403       size_t                            before_length;  // accumulated length of the before context
1404       std::vector<bool>                 before_binary;  // before context binary line
1405       std::vector<size_t>               before_columno; // before context column number of first match
1406       std::vector<size_t>               before_offset;  // before context offset of first match
1407       std::vector<std::string>          before_line;    // before context line data
1408       std::vector<InvertContextMatches> before_match;   // before context matches per line
1409       size_t                            after_lineno;   // the after context line number
1410 
1411     };
1412 
InvertContextGrepHandlerGrep::InvertContextGrepHandler1413     InvertContextGrepHandler(Grep& grep, const char*& pathname, size_t& lineno, bool& binfile, bool& hex, bool& binary, size_t& matches, bool& stop, const char*& rest_line_data, size_t& rest_line_size, size_t& rest_line_last)
1414       :
1415         AnyLineGrepHandler(grep, pathname, lineno, binfile, hex, binary, matches, stop, rest_line_data, rest_line_size, rest_line_last)
1416     { }
1417 
1418     // display the before context
output_before_contextGrep::InvertContextGrepHandler1419     void output_before_context()
1420     {
1421       // the group separator indicates lines skipped, like GNU grep
1422       if (state.after_lineno > 0 && state.after_lineno + flag_after_context + flag_before_context < lineno && flag_group_separator != NULL)
1423       {
1424         if (hex)
1425           grep.out.dump.done();
1426 
1427         grep.out.str(color_se);
1428         grep.out.str(flag_group_separator);
1429         grep.out.str(color_off);
1430         grep.out.nl();
1431       }
1432 
1433       // output the before context
1434       if (state.before_length > 0)
1435       {
1436         // the first line number of the before context
1437         size_t before_lineno = lineno - state.before_length;
1438 
1439         for (size_t i = 0; i < state.before_length; ++i)
1440         {
1441           size_t j = (state.before_index + i) % state.before_length;
1442           size_t offset = state.before_match[j].empty() ? state.before_offset[j] : state.before_match[j].front().offset;
1443 
1444           if (hex && !state.before_binary[j])
1445             grep.out.dump.done();
1446 
1447           if (!flag_no_header)
1448             grep.out.header(pathname, grep.partname, before_lineno + i, NULL, offset, "-", state.before_binary[j]);
1449 
1450           hex = state.before_binary[j];
1451 
1452           const char *ptr = state.before_line[j].c_str();
1453           size_t size = state.before_line[j].size();
1454           size_t pos = 0;
1455 
1456           for (auto& match : state.before_match[j])
1457           {
1458             if (hex)
1459             {
1460               grep.out.dump.hex(Output::Dump::HEX_CONTEXT_LINE, match.offset - (match.pos - pos), ptr + pos, match.pos - pos);
1461               grep.out.dump.hex(Output::Dump::HEX_CONTEXT_MATCH, match.offset, ptr + match.pos, match.size);
1462             }
1463             else
1464             {
1465               if (match.pos > pos)
1466               {
1467                 grep.out.str(color_cx);
1468                 grep.out.str(ptr + pos, match.pos - pos);
1469                 grep.out.str(color_off);
1470               }
1471 
1472               if (match.size > 0)
1473               {
1474                 size_t sizen = match.size - (ptr[match.pos + match.size - 1] == '\n');
1475                 if (sizen > 0)
1476                 {
1477                   grep.out.str(match_mc);
1478                   grep.out.str(ptr + match.pos, sizen);
1479                   grep.out.str(match_off);
1480                 }
1481               }
1482             }
1483 
1484             pos = match.pos + match.size;
1485           }
1486 
1487           if (hex)
1488           {
1489             grep.out.dump.hex(Output::Dump::HEX_CONTEXT_LINE, state.before_offset[j] + pos, ptr + pos, size - pos);
1490           }
1491           else
1492           {
1493             bool lf_only = false;
1494             if (size > pos)
1495             {
1496               lf_only = ptr[size - 1] == '\n';
1497               size -= lf_only;
1498               if (size > pos)
1499               {
1500                 grep.out.str(color_cx);
1501                 grep.out.str(ptr + pos, size - pos);
1502                 grep.out.str(color_off);
1503               }
1504             }
1505             grep.out.nl(lf_only);
1506           }
1507         }
1508       }
1509 
1510       // reset the context state
1511       state.before_index = 0;
1512       state.before_length = 0;
1513       state.after_lineno = lineno;
1514     }
1515 
1516     // add line with the first match to the before context
add_before_context_lineGrep::InvertContextGrepHandler1517     void add_before_context_line(const char *bol, const char *eol, size_t columno, size_t offset)
1518     {
1519       if (state.before_length < flag_before_context)
1520         ++state.before_length;
1521       state.before_index %= state.before_length;
1522       state.before_binary[state.before_index] = binary;
1523       state.before_columno[state.before_index] = columno;
1524       state.before_offset[state.before_index] = offset;
1525       state.before_line[state.before_index].assign(bol, eol - bol);
1526       state.before_match[state.before_index].clear();
1527       ++state.before_index;
1528     }
1529 
1530     // add match fragment to the before context
add_before_context_matchGrep::InvertContextGrepHandler1531     void add_before_context_match(size_t pos, size_t size, size_t offset)
1532     {
1533       // only add a match if we have a before line, i.e. not an after line with a multiline match
1534       if (state.before_length > 0)
1535       {
1536         size_t index = (state.before_index + state.before_length - 1) % state.before_length;
1537         state.before_match[index].emplace_back(pos, size, offset);
1538       }
1539     }
1540 
1541     // set the after context
set_after_linenoGrep::InvertContextGrepHandler1542     void set_after_lineno(size_t lineno)
1543     {
1544       state.after_lineno = lineno;
1545     }
1546 
1547     // functor invoked by the reflex::AbstractMatcher when the buffer contents are shifted out, also called explicitly in grep::search
operator ()Grep::InvertContextGrepHandler1548     virtual void operator()(reflex::AbstractMatcher& matcher, const char *buf, size_t len, size_t num) override
1549     {
1550       const char *ptr;
1551       size_t size;
1552       size_t offset;
1553 
1554       begin_before(matcher, buf, len, num, ptr, size, offset);
1555 
1556       // display the rest of the "after" matching line
1557       if (rest_line_data != NULL && (lineno != matcher.lineno() || flag_ungroup))
1558       {
1559         if (binary)
1560         {
1561           grep.out.dump.hex(Output::Dump::HEX_CONTEXT_LINE, rest_line_last, rest_line_data, rest_line_size);
1562         }
1563         else
1564         {
1565           bool lf_only = false;
1566           if (rest_line_size > 0)
1567           {
1568             lf_only = rest_line_data[rest_line_size - 1] == '\n';
1569             rest_line_size -= lf_only;
1570             if (rest_line_size > 0)
1571             {
1572               grep.out.str(color_cx);
1573               grep.out.str(rest_line_data, rest_line_size);
1574               grep.out.str(color_off);
1575             }
1576           }
1577           grep.out.nl(lf_only);
1578         }
1579 
1580         rest_line_data = NULL;
1581       }
1582 
1583       if (ptr != NULL)
1584         output_before_context();
1585 
1586       while (ptr != NULL)
1587       {
1588         state.after_lineno = lineno + 1;
1589 
1590         // --range: max line exceeded?
1591         if (flag_max_line > 0 && lineno > flag_max_line)
1592           break;
1593 
1594         if (matches == 0)
1595         {
1596           // --max-files: max reached?
1597           if (!Stats::found_part())
1598           {
1599             stop = true;
1600             break;
1601           }
1602         }
1603 
1604         // -m: max number of matches reached?
1605         if (flag_invert_match && flag_max_count > 0 && matches >= flag_max_count)
1606         {
1607           stop = true;
1608           break;
1609         }
1610 
1611         // output blocked?
1612         if (grep.out.eof)
1613           break;
1614 
1615         ++matches;
1616 
1617         if (flag_with_hex)
1618           binary = false;
1619 
1620         binary = binary || flag_hex || (!flag_text && is_binary(ptr, size));
1621 
1622         if (binfile || (binary && !flag_hex && !flag_with_hex))
1623           break;
1624 
1625         if (hex && !binary)
1626           grep.out.dump.done();
1627 
1628         if (!flag_no_header)
1629           grep.out.header(pathname, grep.partname, lineno, NULL, offset, flag_separator, binary);
1630 
1631         hex = binary;
1632 
1633         if (binary)
1634         {
1635           grep.out.dump.hex(Output::Dump::HEX_LINE, offset, ptr, size);
1636         }
1637         else
1638         {
1639           bool lf_only = false;
1640           if (size > 0)
1641           {
1642             lf_only = ptr[size - 1] == '\n';
1643             size_t sizen = size - lf_only;
1644             if (sizen > 0)
1645             {
1646               grep.out.str(color_sl);
1647               grep.out.str(ptr, sizen);
1648               grep.out.str(color_off);
1649             }
1650           }
1651           grep.out.nl(lf_only);
1652         }
1653 
1654         next_before(buf, len, num, ptr, size, offset);
1655       }
1656     }
1657 
1658     InvertContextState state;
1659 
1660   };
1661 
GrepGrep1662   Grep(FILE *file, reflex::AbstractMatcher *matcher, Matchers *matchers)
1663     :
1664       out(file),
1665       matcher(matcher),
1666       matchers(matchers),
1667       file(NULL)
1668 #ifndef OS_WIN
1669     , stdin_handler(this)
1670 #endif
1671 #ifdef HAVE_LIBZ
1672     , zstream(NULL),
1673       stream(NULL)
1674 #ifdef WITH_DECOMPRESSION_THREAD
1675     , thread_end(false),
1676       extracting(false),
1677       waiting(false)
1678 #endif
1679 #endif
1680   {
1681     restline.reserve(256); // pre-reserve a "rest line" of input to display matches to limit heap allocs
1682   }
1683 
~GrepGrep1684   virtual ~Grep()
1685   {
1686 #ifdef HAVE_LIBZ
1687 
1688 #ifdef WITH_DECOMPRESSION_THREAD
1689     if (thread.joinable())
1690     {
1691       thread_end = true;
1692 
1693       std::unique_lock<std::mutex> lock(pipe_mutex);
1694       if (waiting)
1695         pipe_zstrm.notify_one();
1696       lock.unlock();
1697 
1698       thread.join();
1699     }
1700 #endif
1701 
1702     if (stream != NULL)
1703     {
1704       delete stream;
1705       stream = NULL;
1706     }
1707 
1708     if (zstream != NULL)
1709     {
1710       delete zstream;
1711       zstream = NULL;
1712     }
1713 #endif
1714   }
1715 
1716   // cancel all active searches
cancelGrep1717   void cancel()
1718   {
1719     // global cancellation is forced by cancelling the shared output
1720     out.cancel();
1721   }
1722 
1723   // search the specified files or standard input for pattern matches
1724   virtual void ugrep();
1725 
1726   // search file or directory for pattern matches
1727   Type select(size_t level, const char *pathname, const char *basename, int type, ino_t& inode, uint64_t& info, bool is_argument = false);
1728 
1729   // recurse a directory
1730   virtual void recurse(size_t level, const char *pathname);
1731 
1732   // -Z and --sort=best: perform a presearch to determine edit distance cost, return cost of pathname file, 65535 when no match is found
1733   uint16_t cost(const char *pathname);
1734 
1735   // search a file
1736   virtual void search(const char *pathname);
1737 
1738   // check CNF AND/OR/NOT conditions are met for the line(s) spanning bol to eol
cnf_matchingGrep1739   bool cnf_matching(const char *bol, const char *eol, bool acquire = false)
1740   {
1741     if (flag_files)
1742     {
1743       if (out.holding())
1744       {
1745         size_t k = 0;    // iterate over matching[] bitmask
1746         bool all = true; // all terms matched
1747 
1748         // for each AND term check if the AND term was matched before or has a match this time
1749         for (const auto& i : *matchers)
1750         {
1751           // an OR term hasn't matched before
1752           if (!matching[k])
1753           {
1754             auto j = i.begin();
1755             auto e = i.end();
1756 
1757             if (j != e)
1758             {
1759               // check OR terms
1760               if (*j && (*j)->buffer(const_cast<char*>(bol), eol - bol + 1).find() != 0)
1761               {
1762                 matching[k] = true;
1763                 ++j;
1764               }
1765               else
1766               {
1767                 // check OR NOT terms
1768                 size_t l = 0;     // iterate over notmaching[k] bitmask
1769                 bool none = true; // all not-terms matched
1770 
1771                 while (++j != e)
1772                 {
1773                   if (*j && !notmatching[k][l])
1774                   {
1775                     if ((*j)->buffer(const_cast<char*>(bol), eol - bol + 1).find() != 0)
1776                       notmatching[k][l] = true;
1777                     else
1778                       all = none = false;
1779                   }
1780 
1781                   ++l;
1782                 }
1783 
1784                 if (none)
1785                 {
1786                   // when all not-terms matched and we don't have a positive alternative then stop searching this file
1787                   if (!*i.begin())
1788                     throw EXIT_SEARCH();
1789 
1790                   all = false;
1791                 }
1792               }
1793             }
1794           }
1795           ++k;
1796         }
1797 
1798         // if all terms matched globally per file then remove the hold to launch output
1799         if (all)
1800         {
1801           if (acquire)
1802             out.acquire();
1803 
1804           // --max-files: max reached?
1805           if (!Stats::found_part())
1806             throw EXIT_SEARCH();
1807 
1808           out.launch();
1809         }
1810       }
1811     }
1812     else
1813     {
1814       // for each AND term check if the line has a match
1815       for (const auto& i : *matchers)
1816       {
1817         auto j = i.begin();
1818         auto e = i.end();
1819 
1820         if (j != e)
1821         {
1822           // check OR terms
1823           if (*j && (*j)->buffer(const_cast<char*>(bol), eol - bol + 1).find() != 0)
1824             continue;
1825 
1826           // check OR NOT terms
1827           while (++j != e)
1828             if (*j && (*j)->buffer(const_cast<char*>(bol), eol - bol + 1).find() == 0)
1829               break;
1830 
1831           if (j == e)
1832             return false;
1833         }
1834       }
1835     }
1836 
1837     return true;
1838   }
1839 
1840   // if CNF AND/OR/NOT conditions are met globally then launch output after searching a file with --files
cnf_satisfiedGrep1841   bool cnf_satisfied(bool acquire = false)
1842   {
1843     if (out.holding())
1844     {
1845       size_t k = 0; // iterate over matching[] bitmask
1846 
1847       // for each AND term check if the term was matched before
1848       for (const auto& i : *matchers)
1849       {
1850         // an OR term hasn't matched
1851         if (!matching[k])
1852         {
1853           // return if there are no OR NOT terms to check
1854           if (i.size() <= 1)
1855             return false;
1856 
1857           auto j = i.begin();
1858           auto e = i.end();
1859 
1860           // check if not all of the OR NOT terms matched
1861           if (j != e)
1862           {
1863             size_t l = 0; // iterate over notmaching[k] bitmask
1864             while (++j != e)
1865             {
1866               if (*j && !notmatching[k][l])
1867                 break;
1868               ++l;
1869             }
1870             // return if all OR NOT terms matched
1871             if (j == e)
1872               return false;
1873           }
1874         }
1875         ++k;
1876       }
1877 
1878       if (acquire)
1879         out.acquire();
1880 
1881       // --max-files: max reached?
1882       if (!Stats::found_part())
1883         throw EXIT_SEARCH();
1884 
1885       out.launch();
1886     }
1887 
1888     return true;
1889   }
1890 
1891   // open a file for (binary) reading and assign input, decompress the file when -z, --decompress specified, may throw bad_alloc
open_fileGrep1892   bool open_file(const char *pathname)
1893   {
1894     if (pathname == NULL)
1895     {
1896       if (source == NULL)
1897         return false;
1898 
1899       pathname = flag_label;
1900       file = source;
1901 
1902 #ifdef OS_WIN
1903       _setmode(fileno(source), _O_BINARY);
1904 #endif
1905     }
1906     else if (fopenw_s(&file, pathname, "rb") != 0)
1907     {
1908       warning("cannot read", pathname);
1909 
1910       return false;
1911     }
1912 
1913     // --filter: fork process to filter file, when applicable
1914     if (!filter(file, pathname))
1915       return false;
1916 
1917 #ifdef HAVE_LIBZ
1918     if (flag_decompress)
1919     {
1920 #ifdef WITH_DECOMPRESSION_THREAD
1921 
1922       pipe_fd[0] = -1;
1923       pipe_fd[1] = -1;
1924 
1925       FILE *pipe_in = NULL;
1926 
1927       // open pipe between worker and decompression thread, then start decompression thread
1928       if (pipe(pipe_fd) == 0 && (pipe_in = fdopen(pipe_fd[0], "rb")) != NULL)
1929       {
1930         // create or open a new zstreambuf to (re)start the decompression thread
1931         if (zstream == NULL)
1932           zstream = new zstreambuf(pathname, file);
1933         else
1934           zstream->open(pathname, file);
1935 
1936         if (thread.joinable())
1937         {
1938           pipe_zstrm.notify_one();
1939         }
1940         else
1941         {
1942           try
1943           {
1944             thread_end = false;
1945             extracting = false;
1946             waiting = false;
1947 
1948             thread = std::thread(&Grep::decompress, this);
1949           }
1950 
1951           catch (std::system_error&)
1952           {
1953             fclose(pipe_in);
1954             close(pipe_fd[1]);
1955             pipe_fd[0] = -1;
1956             pipe_fd[1] = -1;
1957 
1958             warning("cannot create thread to decompress",  pathname);
1959 
1960             return false;
1961           }
1962         }
1963       }
1964       else
1965       {
1966         if (pipe_fd[0] != -1)
1967         {
1968           close(pipe_fd[0]);
1969           close(pipe_fd[1]);
1970           pipe_fd[0] = -1;
1971           pipe_fd[1] = -1;
1972         }
1973 
1974         warning("cannot create pipe to decompress",  pathname);
1975 
1976         return false;
1977       }
1978 
1979       input = reflex::Input(pipe_in, flag_encoding_type);
1980 
1981 #else
1982 
1983       // create or open a new zstreambuf
1984       if (zstream == NULL)
1985         zstream = new zstreambuf(pathname, file);
1986       else
1987         zstream->open(pathname, file);
1988 
1989       if (stream != NULL)
1990         delete stream;
1991 
1992       stream = new std::istream(zstream);
1993 
1994       input = stream;
1995 
1996 #endif
1997     }
1998     else
1999 #endif
2000     {
2001       input = reflex::Input(file, flag_encoding_type);
2002     }
2003 
2004     return true;
2005   }
2006 
2007   // return true on success, create a pipe to replace file input if filtering files in a forked process
filterGrep2008   bool filter(FILE *& in, const char *pathname)
2009   {
2010 #ifndef OS_WIN
2011 
2012     // --filter
2013     if (flag_filter != NULL && in != NULL)
2014     {
2015       const char *basename = strrchr(pathname, PATHSEPCHR);
2016       if (basename == NULL)
2017         basename = pathname;
2018       else
2019         ++basename;
2020 
2021       // get the basenames's extension suffix
2022       const char *suffix = strrchr(basename, '.');
2023 
2024       // don't consider . at the front of basename, otherwise skip .
2025       if (suffix == basename)
2026         suffix = NULL;
2027       else if (suffix != NULL)
2028         ++suffix;
2029 
2030       // --filter-magic-label: if the file is seekable, then check for a magic pattern match
2031       if (!flag_filter_magic_label.empty() && fseek(in, 0, SEEK_CUR) == 0)
2032       {
2033         bool is_plus = false;
2034 
2035         // --filter-magic-label: check for overriding +
2036         if (suffix != NULL)
2037         {
2038           for (const auto& i : flag_filter_magic_label)
2039           {
2040             if (i.front() == '+')
2041             {
2042               is_plus = true;
2043 
2044               break;
2045             }
2046           }
2047         }
2048 
2049         // --filter-magic-label: if the basename has no suffix or a +LABEL + then check magic bytes
2050         if (suffix == NULL || is_plus)
2051         {
2052           // create a matcher to match the magic pattern
2053           size_t match = reflex::Matcher(filter_magic_pattern, in).scan();
2054 
2055           // rewind the input after scan
2056           rewind(in);
2057 
2058           if (match > 0 && match <= flag_filter_magic_label.size())
2059           {
2060             suffix = flag_filter_magic_label[match - 1].c_str();
2061 
2062             if (*suffix == '+')
2063               ++suffix;
2064           }
2065         }
2066       }
2067 
2068       // basenames without a suffix get "*" as a suffix
2069       if (suffix == NULL || *suffix == '\0')
2070         suffix = "*";
2071 
2072       size_t sep = strlen(suffix);
2073 
2074       const char *command = flag_filter;
2075       const char *default_command = NULL;
2076 
2077       // find the command corresponding to the suffix
2078       while (true)
2079       {
2080         while (isspace(*command))
2081           ++command;
2082 
2083         if (*command == '*')
2084           default_command = strchr(command, ':');
2085 
2086         if (strncmp(suffix, command, sep) == 0 && (command[sep] == ':' || command[sep] == ',' || isspace(command[sep])))
2087         {
2088           command = strchr(command, ':');
2089           break;
2090         }
2091 
2092         command = strchr(command, ',');
2093         if (command == NULL)
2094           break;
2095 
2096         ++command;
2097       }
2098 
2099       // if no matching command, use the *:command if specified
2100       if (command == NULL)
2101         command = default_command;
2102 
2103       // suffix has a command to execute
2104       if (command != NULL)
2105       {
2106         // skip over the ':'
2107         ++command;
2108 
2109         int fd[2];
2110 
2111         if (pipe(fd) == 0)
2112         {
2113           int pid;
2114 
2115           if ((pid = fork()) == 0)
2116           {
2117             // child process
2118 
2119             // close the reading end of the pipe
2120             close(fd[0]);
2121 
2122             // dup the input file to stdin unless reading stdin
2123             if (in != stdin)
2124             {
2125               dup2(fileno(in), STDIN_FILENO);
2126               fclose(in);
2127             }
2128 
2129             // dup the writing end of the pipe to stdout
2130             dup2(fd[1], STDOUT_FILENO);
2131             close(fd[1]);
2132 
2133             // populate argv[] with the command and its arguments, thereby destroying flag_filter
2134             std::vector<const char*> args;
2135 
2136             char *arg = const_cast<char*>(command);
2137 
2138             while (*arg != '\0' && *arg != ',')
2139             {
2140               while (isspace(*arg))
2141                 ++arg;
2142 
2143               char *p = arg;
2144 
2145               while (*p != '\0' && *p != ',' && !isspace(*p))
2146                 ++p;
2147 
2148               if (p > arg)
2149               {
2150                 if (p - arg == 1 && *arg == '%')
2151                   args.push_back(in == stdin ? "-" : pathname);
2152                 else
2153                   args.push_back(arg);
2154               }
2155 
2156               if (*p == '\0')
2157                 break;
2158 
2159               if (*p == ',')
2160               {
2161                 *p = '\0';
2162                 break;
2163               }
2164 
2165               *p = '\0';
2166 
2167               arg = p + 1;
2168             }
2169 
2170             // silently bail out if there is no command
2171             if (args.empty())
2172               exit(EXIT_SUCCESS);
2173 
2174             // add sentinel
2175             args.push_back(NULL);
2176 
2177             // get argv[] array data
2178             char * const *argv = const_cast<char * const *>(args.data());
2179 
2180             // execute
2181             execvp(argv[0], argv);
2182 
2183             error("--filter: cannot exec", argv[0]);
2184           }
2185 
2186           // close the writing end of the pipe
2187           close(fd[1]);
2188 
2189           // close the file and use the reading end of the pipe
2190           if (in != stdin)
2191             fclose(in);
2192           in = fdopen(fd[0], "r");
2193         }
2194         else
2195         {
2196           if (in != stdin)
2197             fclose(in);
2198           in = NULL;
2199 
2200           warning("--filter: cannot create pipe", flag_filter);
2201 
2202           return false;
2203         }
2204       }
2205     }
2206 
2207 #endif
2208 
2209     return true;
2210   }
2211 
2212 #ifdef HAVE_LIBZ
2213 #ifdef WITH_DECOMPRESSION_THREAD
2214 
2215   // decompression thread
decompressGrep2216   void decompress()
2217   {
2218     while (!thread_end)
2219     {
2220       // use the zstreambuf internal buffer to hold decompressed data
2221       unsigned char *buf;
2222       size_t maxlen;
2223       zstream->get_buffer(buf, maxlen);
2224 
2225       // to hold the path (prefix + name) extracted from the zip file
2226       std::string path;
2227 
2228       // reset flags
2229       extracting = false;
2230       waiting = false;
2231 
2232       // extract the parts of a zip file, one by one, if zip file detected
2233       while (!thread_end)
2234       {
2235         // a regular file, may be reset when unzipping a directory
2236         bool is_regular = true;
2237 
2238         const zstreambuf::ZipInfo *zipinfo = zstream->zipinfo();
2239 
2240         if (zipinfo != NULL)
2241         {
2242           // extracting a zip file
2243           extracting = true;
2244 
2245           if (!zipinfo->name.empty() && zipinfo->name.back() == '/')
2246           {
2247             // skip zip directories
2248             is_regular = false;
2249           }
2250           else
2251           {
2252             path.assign(zipinfo->name);
2253 
2254             // produce headers with zip file pathnames for each archived part (Grep::partname)
2255             if (!flag_no_filename)
2256               flag_no_header = false;
2257           }
2258         }
2259 
2260         // decompress a block of data into the buffer
2261         std::streamsize len = zstream->decompress(buf, maxlen);
2262         if (len < 0)
2263           break;
2264 
2265         bool is_selected = true;
2266 
2267         if (!filter_tar(*zstream, path, buf, maxlen, len) && !filter_cpio(*zstream, path, buf, maxlen, len))
2268         {
2269           // not a tar/cpio file, decompress the data into pipe, if not unzipping or if zipped file meets selection criteria
2270           is_selected = is_regular && (zipinfo == NULL || select_matching(path.c_str(), buf, static_cast<size_t>(len), true));
2271 
2272           if (is_selected)
2273           {
2274             // if pipe is closed, then reopen it
2275             if (pipe_fd[1] == -1)
2276             {
2277               // signal close and wait until the main grep thread created a new pipe in close_file()
2278               std::unique_lock<std::mutex> lock(pipe_mutex);
2279               pipe_close.notify_one();
2280               waiting = true;
2281               pipe_ready.wait(lock);
2282               waiting = false;
2283               lock.unlock();
2284 
2285               // failed to create a pipe in close_file()
2286               if (pipe_fd[1] == -1)
2287                 break;
2288             }
2289 
2290             // assign the Grep::partname (synchronized on pipe_mutex and pipe), before sending to the (new) pipe
2291             partname.swap(path);
2292           }
2293 
2294           // push decompressed data into pipe
2295           while (len > 0)
2296           {
2297             // write buffer data to the pipe, if the pipe is broken then the receiver is waiting for this thread to join
2298             if (is_selected && write(pipe_fd[1], buf, static_cast<size_t>(len)) < len)
2299               break;
2300 
2301             // decompress the next block of data into the buffer
2302             len = zstream->decompress(buf, maxlen);
2303           }
2304         }
2305 
2306         // break if not unzipping or if no more files to unzip
2307         if (zstream->zipinfo() == NULL)
2308           break;
2309 
2310         // extracting a zip file
2311         extracting = true;
2312 
2313         // after unzipping the selected zip file, close our end of the pipe and loop for the next file
2314         if (is_selected && pipe_fd[1] != -1)
2315         {
2316           close(pipe_fd[1]);
2317           pipe_fd[1] = -1;
2318         }
2319       }
2320 
2321       extracting = false;
2322 
2323       if (pipe_fd[1] != -1)
2324       {
2325         // close our end of the pipe
2326         close(pipe_fd[1]);
2327         pipe_fd[1] = -1;
2328       }
2329 
2330       if (!thread_end)
2331       {
2332         // wait until a new zstream is ready
2333         std::unique_lock<std::mutex> lock(pipe_mutex);
2334         pipe_close.notify_one();
2335         waiting = true;
2336         pipe_zstrm.wait(lock);
2337         waiting = false;
2338         lock.unlock();
2339       }
2340     }
2341   }
2342 
2343   // if tar file, extract regular file contents and push into pipes one by one, return true when done
filter_tarGrep2344   bool filter_tar(zstreambuf& zstream, const std::string& partprefix, unsigned char *buf, size_t maxlen, std::streamsize len)
2345   {
2346     const int BLOCKSIZE = 512;
2347 
2348     if (len > BLOCKSIZE)
2349     {
2350       // v7 and ustar formats
2351       const char ustar_magic[8] = { 'u', 's', 't', 'a', 'r', 0, '0', '0' };
2352 
2353       // gnu and oldgnu formats
2354       const char gnutar_magic[8] = { 'u', 's', 't', 'a', 'r', ' ', ' ', 0 };
2355 
2356       // is this a tar archive?
2357       if (*buf != '\0' && (memcmp(buf + 257, ustar_magic, 8) == 0 || memcmp(buf + 257, gnutar_magic, 8) == 0))
2358       {
2359         // produce headers with tar file pathnames for each archived part (Grep::partname)
2360         if (!flag_no_filename)
2361           flag_no_header = false;
2362 
2363         // inform the main grep thread we are extracting an archive
2364         extracting = true;
2365 
2366         // to hold the path (prefix + name) extracted from the header
2367         std::string path;
2368 
2369         // to hold long path extracted from the previous header block that is marked with typeflag 'x' or 'L'
2370         std::string long_path;
2371 
2372         while (true)
2373         {
2374           // extract tar header fields (name and prefix strings are not \0-terminated!!)
2375           const char *name = reinterpret_cast<const char*>(buf);
2376           const char *prefix = reinterpret_cast<const char*>(buf + 345);
2377           size_t size = strtoul(reinterpret_cast<const char*>(buf + 124), NULL, 8);
2378           int padding = (BLOCKSIZE - size % BLOCKSIZE) % BLOCKSIZE;
2379           unsigned char typeflag = buf[156];
2380 
2381           // header types
2382           bool is_regular = typeflag == '0' || typeflag == '\0';
2383           bool is_xhd = typeflag == 'x';
2384           bool is_extended = typeflag == 'L';
2385 
2386           // assign the (long) tar pathname
2387           path.clear();
2388           if (long_path.empty())
2389           {
2390             if (*prefix != '\0')
2391             {
2392               if (prefix[154] == '\0')
2393                 path.assign(prefix);
2394               else
2395                 path.assign(prefix, 155);
2396               path.push_back('/');
2397             }
2398             if (name[99] == '\0')
2399               path.append(name);
2400             else
2401               path.append(name, 100);
2402           }
2403           else
2404           {
2405             path.swap(long_path);
2406           }
2407 
2408           // remove header to advance to the body
2409           len -= BLOCKSIZE;
2410           memmove(buf, buf + BLOCKSIZE, static_cast<size_t>(len));
2411 
2412           // check if archived file meets selection criteria
2413           size_t minlen = std::min(static_cast<size_t>(len), size);
2414           bool is_selected = select_matching(path.c_str(), buf, minlen, is_regular);
2415 
2416           // if extended headers are present
2417           if (is_xhd)
2418           {
2419             // typeflag 'x': extract the long path from the pax extended header block in the body
2420             const char *b = reinterpret_cast<const char*>(buf);
2421             const char *e = b + minlen;
2422             const char *t = "path=";
2423             const char *s = std::search(b, e, t, t + 5);
2424             if (s != NULL)
2425             {
2426               e = static_cast<const char*>(memchr(s, '\n', e - s));
2427               if (e != NULL)
2428                 long_path.assign(s + 5, e - s - 5);
2429             }
2430           }
2431           else if (is_extended)
2432           {
2433             // typeflag 'L': get long name from the body
2434             long_path.assign(reinterpret_cast<const char*>(buf), minlen);
2435           }
2436 
2437           // if the pipe is closed, then get a new pipe to search the next part in the archive
2438           if (is_selected && pipe_fd[1] == -1)
2439           {
2440             // signal close and wait until the main grep thread created a new pipe in close_file()
2441             std::unique_lock<std::mutex> lock(pipe_mutex);
2442             pipe_close.notify_one();
2443             waiting = true;
2444             pipe_ready.wait(lock);
2445             waiting = false;
2446             lock.unlock();
2447 
2448             // failed to create a pipe in close_file()
2449             if (pipe_fd[1] == -1)
2450               break;
2451           }
2452 
2453           // assign the Grep::partname (synchronized on pipe_mutex and pipe), before sending to the (new) pipe
2454           if (is_selected)
2455           {
2456             if (!partprefix.empty())
2457               partname.assign(partprefix).append(":").append(path);
2458             else
2459               partname.swap(path);
2460           }
2461 
2462           // it is ok to push the body into the pipe for the main thread to search
2463           bool ok = is_selected;
2464 
2465           while (len > 0)
2466           {
2467             size_t len_out = std::min(static_cast<size_t>(len), size);
2468 
2469             if (ok)
2470             {
2471               // write decompressed data to the pipe, if the pipe is broken then stop pushing more data into this pipe
2472               if (write(pipe_fd[1], buf, len_out) < static_cast<ssize_t>(len_out))
2473                 ok = false;
2474             }
2475 
2476             size -= len_out;
2477 
2478             // reached the end of the tar body?
2479             if (size == 0)
2480             {
2481               len -= len_out;
2482               memmove(buf, buf + len_out, static_cast<size_t>(len));
2483 
2484               break;
2485             }
2486 
2487             // decompress the next block of data into the buffer
2488             len = zstream.decompress(buf, maxlen);
2489           }
2490 
2491           // error?
2492           if (len < 0)
2493             break;
2494 
2495           // fill the rest of the buffer with decompressed data
2496           if (static_cast<size_t>(len) < maxlen)
2497           {
2498             std::streamsize len_in = zstream.decompress(buf + len, maxlen - static_cast<size_t>(len));
2499 
2500             // error?
2501             if (len_in < 0)
2502               break;
2503 
2504             len += len_in;
2505           }
2506 
2507           // skip padding
2508           if (len > padding)
2509           {
2510             len -= padding;
2511             memmove(buf, buf + padding, static_cast<size_t>(len));
2512           }
2513 
2514           // rest of the file is too short, something is wrong
2515           if (len <= BLOCKSIZE)
2516             break;
2517 
2518           // no more parts to extract?
2519           if (*buf == '\0' || (memcmp(buf + 257, ustar_magic, 8) != 0 && memcmp(buf + 257, gnutar_magic, 8) != 0))
2520             break;
2521 
2522           // get a new pipe to search the next part in the archive, if the previous part was a regular file
2523           if (is_selected)
2524           {
2525             // close our end of the pipe
2526             close(pipe_fd[1]);
2527             pipe_fd[1] = -1;
2528           }
2529         }
2530 
2531         // done extracting the tar file
2532         return true;
2533       }
2534     }
2535 
2536     // not a tar file
2537     return false;
2538   }
2539 
2540   // if cpio file, extract regular file contents and push into pipes one by one, return true when done
filter_cpioGrep2541   bool filter_cpio(zstreambuf& zstream, const std::string& partprefix, unsigned char *buf, size_t maxlen, std::streamsize len)
2542   {
2543     const int HEADERSIZE = 110;
2544 
2545     if (len > HEADERSIZE)
2546     {
2547       // cpio odc format
2548       const char odc_magic[6] = { '0', '7', '0', '7', '0', '7' };
2549 
2550       // cpio newc format
2551       const char newc_magic[6] = { '0', '7', '0', '7', '0', '1' };
2552 
2553       // cpio newc+crc format
2554       const char newc_crc_magic[6] = { '0', '7', '0', '7', '0', '2' };
2555 
2556       // is this a cpio archive?
2557       if (memcmp(buf, odc_magic, 6) == 0 || memcmp(buf, newc_magic, 6) == 0 || memcmp(buf, newc_crc_magic, 6) == 0)
2558       {
2559         // produce headers with cpio file pathnames for each archived part (Grep::partname)
2560         if (!flag_no_filename)
2561           flag_no_header = false;
2562 
2563         // inform the main grep thread we are extracting an archive
2564         extracting = true;
2565 
2566         // to hold the path (prefix + name) extracted from the header
2567         std::string path;
2568 
2569         // need a new pipe, close current pipe first to create a new pipe
2570         bool in_progress = false;
2571 
2572         while (true)
2573         {
2574           // true if odc format, false if newc format
2575           bool is_odc = buf[5] == '7';
2576 
2577           // odc header length is 76, newc header length is 110
2578           int header_len = is_odc ? 76 : 110;
2579 
2580           char tmp[16];
2581           char *rest;
2582 
2583           // get the namesize
2584           size_t namesize;
2585           if (is_odc)
2586           {
2587             memcpy(tmp, buf + 59, 6);
2588             tmp[6] = '\0';
2589             namesize = strtoul(tmp, &rest, 8);
2590           }
2591           else
2592           {
2593             memcpy(tmp, buf + 94, 8);
2594             tmp[8] = '\0';
2595             namesize = strtoul(tmp, &rest, 16);
2596           }
2597 
2598           // if not a valid mode value, then something is wrong
2599           if (rest == NULL || *rest != '\0')
2600           {
2601             // data was read, stop reading more
2602             if (in_progress)
2603               break;
2604 
2605             // assume this is not a cpio file and return false
2606             return false;
2607           }
2608 
2609           // pathnames with trailing \0 cannot be empty or too large
2610           if (namesize <= 1 || namesize >= 65536)
2611             break;
2612 
2613           // get the filesize
2614           size_t filesize;
2615           if (is_odc)
2616           {
2617             memcpy(tmp, buf + 65, 11);
2618             tmp[11] = '\0';
2619             filesize = strtoul(tmp, &rest, 8);
2620           }
2621           else
2622           {
2623             memcpy(tmp, buf + 54, 8);
2624             tmp[8] = '\0';
2625             filesize = strtoul(tmp, &rest, 16);
2626           }
2627 
2628           // if not a valid mode value, then something is wrong
2629           if (rest == NULL || *rest != '\0')
2630           {
2631             // data was read, stop reading more
2632             if (in_progress)
2633               break;
2634 
2635             // assume this is not a cpio file and return false
2636             return false;
2637           }
2638 
2639           // true if this is a regular file when (mode & 0170000) == 0100000
2640           bool is_regular;
2641           if (is_odc)
2642           {
2643             memcpy(tmp, buf + 18, 6);
2644             tmp[6] = '\0';
2645             is_regular = (strtoul(tmp, &rest, 8) & 0170000) == 0100000;
2646           }
2647           else
2648           {
2649             memcpy(tmp, buf + 14, 8);
2650             tmp[8] = '\0';
2651             is_regular = (strtoul(tmp, &rest, 16) & 0170000) == 0100000;
2652           }
2653 
2654           // if not a valid mode value, then something is wrong
2655           if (rest == NULL || *rest != '\0')
2656           {
2657             // data was read, stop reading more
2658             if (in_progress)
2659               break;
2660 
2661             // assume this is not a cpio file and return false
2662             return false;
2663           }
2664 
2665           // remove header to advance to the body
2666           len -= header_len;
2667           memmove(buf, buf + header_len, static_cast<size_t>(len));
2668 
2669           // assign the cpio pathname
2670           path.clear();
2671 
2672           size_t size = namesize;
2673 
2674           while (len > 0)
2675           {
2676             size_t n = std::min(static_cast<size_t>(len), size);
2677             char *b = reinterpret_cast<char*>(buf);
2678 
2679             path.append(b, n);
2680             size -= n;
2681 
2682             if (size == 0)
2683             {
2684               // remove pathname to advance to the body
2685               len -= n;
2686               memmove(buf, buf + n, static_cast<size_t>(len));
2687 
2688               break;
2689             }
2690 
2691             // decompress the next block of data into the buffer
2692             len = zstream.decompress(buf, maxlen);
2693           }
2694 
2695           // error?
2696           if (len < 0)
2697             break;
2698 
2699           // remove trailing \0
2700           if (path.back() == '\0')
2701             path.pop_back();
2702 
2703           // reached the end of the cpio archive?
2704           if (path == "TRAILER!!!")
2705             break;
2706 
2707           // fill the rest of the buffer with decompressed data
2708           if (static_cast<size_t>(len) < maxlen)
2709           {
2710             std::streamsize len_in = zstream.decompress(buf + len, maxlen - static_cast<size_t>(len));
2711 
2712             // error?
2713             if (len_in < 0)
2714               break;
2715 
2716             len += len_in;
2717           }
2718 
2719           // skip newc format \0 padding after the pathname
2720           if (!is_odc && len > 3)
2721           {
2722             size_t n = 4 - (110 + namesize) % 4;
2723             len -= n;
2724             memmove(buf, buf + n, static_cast<size_t>(len));
2725           }
2726 
2727           // check if archived file meets selection criteria
2728           size_t minlen = std::min(static_cast<size_t>(len), filesize);
2729           bool is_selected = select_matching(path.c_str(), buf, minlen, is_regular);
2730 
2731           // if the pipe is closed, then get a new pipe to search the next part in the archive
2732           if (is_selected && pipe_fd[1] == -1)
2733           {
2734             // signal close and wait until the main grep thread created a new pipe in close_file()
2735             std::unique_lock<std::mutex> lock(pipe_mutex);
2736             pipe_close.notify_one();
2737             waiting = true;
2738             pipe_ready.wait(lock);
2739             waiting = false;
2740             lock.unlock();
2741 
2742             // failed to create a pipe in close_file()
2743             if (pipe_fd[1] == -1)
2744               break;
2745           }
2746 
2747           // assign the Grep::partname (synchronized on pipe_mutex and pipe), before sending to the (new) pipe
2748           if (is_selected)
2749           {
2750             if (!partprefix.empty())
2751               partname.assign(partprefix).append(":").append(path);
2752             else
2753               partname.swap(path);
2754           }
2755 
2756           // it is ok to push the body into the pipe for the main thread to search
2757           bool ok = is_selected;
2758 
2759           size = filesize;
2760 
2761           while (len > 0)
2762           {
2763             size_t len_out = std::min(static_cast<size_t>(len), size);
2764 
2765             if (ok)
2766             {
2767               // write decompressed data to the pipe, if the pipe is broken then stop pushing more data into this pipe
2768               if (write(pipe_fd[1], buf, len_out) < static_cast<ssize_t>(len_out))
2769                 ok = false;
2770             }
2771 
2772             size -= len_out;
2773 
2774             // reached the end of the cpio body?
2775             if (size == 0)
2776             {
2777               len -= len_out;
2778               memmove(buf, buf + len_out, static_cast<size_t>(len));
2779 
2780               break;
2781             }
2782 
2783             // decompress the next block of data into the buffer
2784             len = zstream.decompress(buf, maxlen);
2785           }
2786 
2787           // error?
2788           if (len < 0)
2789             break;
2790 
2791           if (static_cast<size_t>(len) < maxlen)
2792           {
2793             // fill the rest of the buffer with decompressed data
2794             std::streamsize len_in = zstream.decompress(buf + len, maxlen - static_cast<size_t>(len));
2795 
2796             // error?
2797             if (len_in < 0)
2798               break;
2799 
2800             len += len_in;
2801           }
2802 
2803           // skip newc format \0 padding
2804           if (!is_odc && len > 2)
2805           {
2806             size_t n = (4 - filesize % 4) % 4;
2807             len -= n;
2808             memmove(buf, buf + n, static_cast<size_t>(len));
2809           }
2810 
2811           // rest of the file is too short, something is wrong
2812           if (len <= HEADERSIZE)
2813             break;
2814 
2815           // quit if this is not valid cpio header magic
2816           if (memcmp(buf, odc_magic, 6) != 0 && memcmp(buf, newc_magic, 6) != 0 && memcmp(buf, newc_crc_magic, 6) != 0)
2817             break;
2818 
2819           // get a new pipe to search the next part in the archive, if the previous part was a regular file
2820           if (is_selected)
2821           {
2822             // close our end of the pipe
2823             close(pipe_fd[1]);
2824             pipe_fd[1] = -1;
2825 
2826             in_progress = true;
2827           }
2828         }
2829 
2830         // done extracting the cpio file
2831         return true;
2832       }
2833     }
2834 
2835     // not a cpio file
2836     return false;
2837   }
2838 
2839   // true if path matches search constraints or buf contains magic bytes
select_matchingGrep2840   bool select_matching(const char *path, const unsigned char *buf, size_t len, bool is_regular)
2841   {
2842     bool is_selected = is_regular;
2843 
2844     if (is_selected)
2845     {
2846       const char *basename = strrchr(path, '/');
2847       if (basename == NULL)
2848         basename = path;
2849       else
2850         ++basename;
2851 
2852       if (*basename == '.' && !flag_hidden)
2853         return false;
2854 
2855       // -O, -t, and -g (--include and --exclude): check if pathname or basename matches globs, is_selected = false if not
2856       if (!flag_all_exclude.empty() || !flag_all_include.empty())
2857       {
2858         // exclude files whose basename matches any one of the --exclude globs
2859         for (const auto& glob : flag_all_exclude)
2860           if (!(is_selected = !glob_match(path, basename, glob.c_str())))
2861             break;
2862 
2863         // include only if not excluded
2864         if (is_selected)
2865         {
2866           // include files whose basename matches any one of the --include globs
2867           for (const auto& glob : flag_all_include)
2868             if ((is_selected = glob_match(path, basename, glob.c_str())))
2869               break;
2870         }
2871       }
2872 
2873       // -M: check magic bytes, requires sufficiently large len of buf[] to match patterns, which is fine when Z_BUF_LEN is large e.g. 64K
2874       if (buf != NULL && !flag_file_magic.empty() && (flag_all_include.empty() || !is_selected))
2875       {
2876         // create a matcher to match the magic pattern, we cannot use magic_matcher because it is not thread safe
2877         reflex::Matcher magic(magic_pattern);
2878         magic.buffer(const_cast<char*>(reinterpret_cast<const char*>(buf)), len + 1);
2879         size_t match = magic.scan();
2880         is_selected = match == flag_not_magic || match >= flag_min_magic;
2881       }
2882     }
2883 
2884     return is_selected;
2885   }
2886 
2887 #endif
2888 #endif
2889 
2890   // close the file and clear input, return true if next file is extracted from an archive to search
close_fileGrep2891   bool close_file(const char *pathname)
2892   {
2893     (void)pathname; // appease -Wunused-parameter
2894 
2895 #ifdef HAVE_LIBZ
2896 
2897 #ifdef WITH_DECOMPRESSION_THREAD
2898 
2899     if (flag_decompress && pipe_fd[0] != -1)
2900     {
2901       // close the FILE* and its underlying pipe created with pipe() and fdopen()
2902       if (input.file() != NULL)
2903       {
2904         fclose(input.file());
2905         input = static_cast<FILE*>(NULL);
2906       }
2907 
2908       // our end of the pipe is now closed
2909       pipe_fd[0] = -1;
2910 
2911       // if extracting and the decompression filter thread is not yet waiting, then wait until the other end closed the pipe
2912       std::unique_lock<std::mutex> lock(pipe_mutex);
2913       if (!waiting)
2914         pipe_close.wait(lock);
2915       lock.unlock();
2916 
2917       // extract the next file from the archive when applicable, e.g. zip format
2918       if (extracting)
2919       {
2920         // output is not blocked or cancelled
2921         if (!out.eof && !out.cancelled())
2922         {
2923           FILE *pipe_in = NULL;
2924 
2925           // open pipe between worker and decompression thread, then start decompression thread
2926           if (pipe(pipe_fd) == 0 && (pipe_in = fdopen(pipe_fd[0], "rb")) != NULL)
2927           {
2928             // notify the decompression filter thread of the new pipe
2929             pipe_ready.notify_one();
2930 
2931             input = reflex::Input(pipe_in, flag_encoding_type);
2932 
2933             // loop back in search() to start searching the next file in the archive
2934             return true;
2935           }
2936 
2937           // failed to create a new pipe
2938           warning("cannot open decompression pipe while reading", pathname);
2939 
2940           if (pipe_fd[0] != -1)
2941           {
2942             close(pipe_fd[0]);
2943             close(pipe_fd[1]);
2944           }
2945         }
2946 
2947         pipe_fd[0] = -1;
2948         pipe_fd[1] = -1;
2949 
2950         // notify the decompression thread filter_tar/filter_cpio
2951         pipe_ready.notify_one();
2952       }
2953     }
2954 
2955 #endif
2956 
2957     if (stream != NULL)
2958     {
2959       delete stream;
2960       stream = NULL;
2961     }
2962 
2963 #endif
2964 
2965 #ifdef WITH_STDIN_DRAIN
2966     // drain stdin until eof
2967     if (file == stdin && !feof(stdin))
2968     {
2969       if (fseek(stdin, 0, SEEK_END) != 0)
2970       {
2971         char buf[16384];
2972         while (true)
2973         {
2974           size_t r = fread(buf, 1, sizeof(buf), stdin);
2975           if (r == sizeof(buf))
2976             continue;
2977           if (feof(stdin))
2978             break;
2979           if (r >= 0)
2980           {
2981             if (!(fcntl(0, F_GETFL) & O_NONBLOCK))
2982               break;
2983             struct timeval tv;
2984             fd_set rfds, efds;
2985             FD_ZERO(&rfds);
2986             FD_ZERO(&efds);
2987             FD_SET(0, &rfds);
2988             FD_SET(0, &efds);
2989             tv.tv_sec = 1;
2990             tv.tv_usec = 0;
2991             int r = ::select(1, &rfds, NULL, &efds, &tv);
2992             if (r < 0 && errno != EINTR)
2993               break;
2994             if (r > 0 && FD_ISSET(0, &efds))
2995               break;
2996           }
2997           else if (errno != EINTR)
2998           {
2999             break;
3000           }
3001         }
3002       }
3003     }
3004 #endif
3005 
3006     // close the file
3007     if (file != NULL && file != stdin && file != source)
3008     {
3009       fclose(file);
3010       file = NULL;
3011     }
3012 
3013     input.clear();
3014 
3015     return false;
3016   }
3017 
3018   // specify input to read for matcher, when input is a regular file then try mmap for zero copy overhead
init_readGrep3019   bool init_read()
3020   {
3021     const char *base;
3022     size_t size;
3023 
3024     // attempt to mmap the input file
3025     if (mmap.file(input, base, size))
3026     {
3027       // matcher reads directly from protected mmap memory (cast is safe: base[0..size] is not modified!)
3028       matcher->buffer(const_cast<char*>(base), size + 1);
3029     }
3030     else
3031     {
3032       matcher->input(input);
3033 
3034 #if !defined(HAVE_PCRE2) && defined(HAVE_BOOST_REGEX)
3035       // buffer all input to work around Boost.Regex partial matching bug, but this may throw std::bad_alloc if the file is too large
3036       if (flag_perl_regexp)
3037         matcher->buffer();
3038 #endif
3039 
3040 #ifndef OS_WIN
3041       if (input == stdin)
3042       {
3043         struct stat buf;
3044         bool interactive = fstat(0, &buf) == 0 && (S_ISCHR(buf.st_mode) || S_ISFIFO(buf.st_mode));
3045 
3046         // if input is a TTY or pipe, then make stdin nonblocking and register a stdin handler to continue reading and to flush results to output
3047         if (interactive)
3048         {
3049           fcntl(0, F_SETFL, fcntl(0, F_GETFL) | O_NONBLOCK);
3050           matcher->in.set_handler(&stdin_handler);
3051         }
3052       }
3053 #endif
3054     }
3055 
3056     // -I: do not match binary
3057     if (flag_binary_without_match && init_is_binary())
3058       return false;
3059 
3060     // --range=NUM1[,NUM2]: start searching at line NUM1
3061     for (size_t i = flag_min_line; i > 1; --i)
3062       if (!matcher->skip('\n'))
3063         break;
3064 
3065     return true;
3066   }
3067 
3068   // after opening a file with init_read, check if it is binary
init_is_binaryGrep3069   bool init_is_binary()
3070   {
3071     // limit checking to first buffer filled with input up to 16K, which should suffice, to improve performance
3072     size_t avail = matcher->avail();
3073     return is_binary(matcher->begin(), avail < 16384 ? avail : 16384);
3074   }
3075 
3076   const char                    *filename;      // the name of the file being searched
3077   std::string                    partname;      // the name of an extracted file from an archive
3078   std::string                    restline;      // a buffer to store the rest of a line to search
3079   Output                         out;           // asynchronous output
3080   reflex::AbstractMatcher       *matcher;       // the pattern matcher we're using, never NULL
3081   Matchers                      *matchers;      // the CNF of AND/OR/NOT matchers or NULL
3082   std::vector<bool>              matching;      // bitmap to keep track of globally matching CNF terms
3083   std::vector<std::vector<bool>> notmatching;   // bitmap to keep track of globally matching OR NOT CNF terms
3084   MMap                           mmap;          // mmap state
3085   reflex::Input                  input;         // input to the matcher
3086   FILE                          *file;          // the current input file
3087 #ifndef OS_WIN
3088   StdInHandler                   stdin_handler; // a handler to handle non-blocking stdin from a TTY or a slow pipe
3089 #endif
3090 #ifdef HAVE_LIBZ
3091   zstreambuf                    *zstream;       // the decompressed stream from the current input file
3092   std::istream                  *stream;        // input stream layered on the decompressed stream
3093 #ifdef WITH_DECOMPRESSION_THREAD
3094   std::thread                    thread;        // decompression thread
3095   std::atomic_bool               thread_end;    // true if decompression thread should terminate
3096   int                            pipe_fd[2];    // decompressed stream pipe
3097   std::mutex                     pipe_mutex;    // mutex to extract files in thread
3098   std::condition_variable        pipe_zstrm;    // cv to control new pipe creation
3099   std::condition_variable        pipe_ready;    // cv to control new pipe creation
3100   std::condition_variable        pipe_close;    // cv to control new pipe creation
3101   volatile bool                  extracting;    // true if extracting files from TAR or ZIP archive
3102   volatile bool                  waiting;       // true if decompression thread is waiting
3103 #endif
3104 #endif
3105 
3106 };
3107 
3108 // a job in the job queue
3109 struct Job {
3110 
3111   // sentinel job NONE
3112   static const size_t NONE = UNDEFINED_SIZE;
3113 
JobJob3114   Job()
3115     :
3116       pathname(),
3117       slot(NONE)
3118   { }
3119 
JobJob3120   Job(const char *pathname, size_t slot)
3121     :
3122       pathname(pathname),
3123       slot(slot)
3124   { }
3125 
noneJob3126   bool none()
3127   {
3128     return slot == NONE;
3129   }
3130 
3131   std::string pathname;
3132   size_t      slot;
3133 };
3134 
3135 struct GrepWorker;
3136 
3137 // master submits jobs to workers and implements operations to support lock-free job stealing
3138 struct GrepMaster : public Grep {
3139 
GrepMasterGrepMaster3140   GrepMaster(FILE *file, reflex::AbstractMatcher *matcher, Matchers *matchers)
3141     :
3142       Grep(file, matcher, matchers),
3143       sync(flag_sort_key == Sort::NA ? Output::Sync::Mode::UNORDERED : Output::Sync::Mode::ORDERED)
3144   {
3145     // master and workers synchronize their output
3146     out.sync_on(&sync);
3147 
3148     // set global handle to be able to call cancel_ugrep()
3149     set_grep_handle(this);
3150 
3151     start_workers();
3152 
3153     iworker = workers.begin();
3154   }
3155 
~GrepMasterGrepMaster3156   virtual ~GrepMaster()
3157   {
3158     stop_workers();
3159     clear_grep_handle();
3160   }
3161 
3162   // clone the pattern matcher - the caller is responsible to deallocate the returned matcher
matcher_cloneGrepMaster3163   reflex::AbstractMatcher *matcher_clone() const
3164   {
3165     return matcher->clone();
3166   }
3167 
3168   // clone the CNF of AND/OR/NOT matchers - the caller is responsible to deallocate the returned list of matchers if not NULL
matchers_cloneGrepMaster3169   Matchers *matchers_clone() const
3170   {
3171     if (matchers == NULL)
3172       return NULL;
3173 
3174     auto *new_matchers = new Matchers;
3175 
3176     for (const auto& i : *matchers)
3177     {
3178       new_matchers->emplace_back();
3179 
3180       auto& last = new_matchers->back();
3181 
3182       for (const auto& j : i)
3183       {
3184         if (j)
3185           last.emplace_back(j->clone());
3186         else
3187           last.emplace_back();
3188       }
3189     }
3190 
3191     return new_matchers;
3192   }
3193 
3194   // search a file by submitting it as a job to a worker
searchGrepMaster3195   void search(const char *pathname) override
3196   {
3197     submit(pathname);
3198   }
3199 
3200   // start worker threads
3201   void start_workers();
3202 
3203   // stop all workers
3204   void stop_workers();
3205 
3206   // submit a job with a pathname to a worker, workers are visited round-robin
3207   void submit(const char *pathname);
3208 
3209   // lock-free job stealing on behalf of a worker from a co-worker with at least --min-steal jobs still to do
3210   bool steal(GrepWorker *worker);
3211 
3212   std::list<GrepWorker>           workers; // workers running threads
3213   std::list<GrepWorker>::iterator iworker; // the next worker to submit a job to
3214   Output::Sync                    sync;    // sync output of workers
3215 
3216 };
3217 
3218 // worker runs a thread to execute jobs submitted by the master
3219 struct GrepWorker : public Grep {
3220 
GrepWorkerGrepWorker3221   GrepWorker(FILE *file, GrepMaster *master)
3222     :
3223       Grep(file, master->matcher_clone(), master->matchers_clone()),
3224       master(master),
3225       todo(0)
3226   {
3227     // all workers synchronize their output on the master's sync object
3228     out.sync_on(&master->sync);
3229 
3230     // run worker thread executing jobs assigned to its queue
3231     thread = std::thread(&GrepWorker::execute, this);
3232   }
3233 
~GrepWorkerGrepWorker3234   virtual ~GrepWorker()
3235   {
3236     // delete the cloned matcher
3237     delete matcher;
3238 
3239     // delete the cloned matchers, if any
3240     if (matchers != NULL)
3241       delete matchers;
3242   }
3243 
3244   // worker thread execution
3245   void execute();
3246 
3247   // submit Job::NONE sentinel to this worker
submit_jobGrepWorker3248   void submit_job()
3249   {
3250     while (todo >= MAX_JOB_QUEUE_SIZE && !out.eof && !out.cancelled())
3251       std::this_thread::sleep_for(std::chrono::milliseconds(100)); // give the worker threads some slack
3252 
3253     std::unique_lock<std::mutex> lock(queue_mutex);
3254 
3255     jobs.emplace_back();
3256     ++todo;
3257 
3258     queue_work.notify_one();
3259   }
3260 
3261   // submit a job to this worker
submit_jobGrepWorker3262   void submit_job(const char *pathname, size_t slot)
3263   {
3264     while (todo >= MAX_JOB_QUEUE_SIZE && !out.eof && !out.cancelled())
3265       std::this_thread::sleep_for(std::chrono::milliseconds(100)); // give the worker threads some slack
3266 
3267     std::unique_lock<std::mutex> lock(queue_mutex);
3268 
3269     jobs.emplace_back(pathname, slot);
3270     ++todo;
3271 
3272     queue_work.notify_one();
3273   }
3274 
3275   // move a stolen job to this worker, maintaining job slot order
move_jobGrepWorker3276   void move_job(Job& job)
3277   {
3278     std::unique_lock<std::mutex> lock(queue_mutex);
3279 
3280     bool inserted = false;
3281 
3282     // insert job in the queue to maintain job order
3283     for (auto j = jobs.begin(); j != jobs.end(); ++j)
3284     {
3285       if (j->slot > job.slot)
3286       {
3287         jobs.insert(j, std::move(job));
3288         inserted = true;
3289         break;
3290       }
3291     }
3292 
3293     if (!inserted)
3294       jobs.emplace_back(std::move(job));
3295 
3296     ++todo;
3297 
3298     queue_work.notify_one();
3299   }
3300 
3301   // receive a job for this worker, wait until one arrives
next_jobGrepWorker3302   void next_job(Job& job)
3303   {
3304     std::unique_lock<std::mutex> lock(queue_mutex);
3305 
3306     while (jobs.empty())
3307       queue_work.wait(lock);
3308 
3309     job = jobs.front();
3310 
3311     jobs.pop_front();
3312     --todo;
3313 
3314     // if we popped a Job::NONE sentinel but the queue has some jobs, then move the sentinel to the back of the queue
3315     if (job.none() && !jobs.empty())
3316     {
3317       jobs.emplace_back();
3318       job = jobs.front();
3319       jobs.pop_front();
3320     }
3321   }
3322 
3323   // steal a job from this worker, if at least --min-steal jobs to do, returns true if successful
steal_jobGrepWorker3324   bool steal_job(Job& job)
3325   {
3326     // not enough jobs in the queue to steal from
3327     if (todo < flag_min_steal)
3328       return false;
3329 
3330     std::unique_lock<std::mutex> lock(queue_mutex);
3331 
3332     if (jobs.empty())
3333       return false;
3334 
3335     job = jobs.front();
3336 
3337     // we cannot steal a Job::NONE sentinel
3338     if (job.none())
3339       return false;
3340 
3341     jobs.pop_front();
3342     --todo;
3343 
3344     return true;
3345   }
3346 
3347   // submit Job::NONE sentinel to stop this worker
stopGrepWorker3348   void stop()
3349   {
3350     submit_job();
3351   }
3352 
3353   std::thread             thread;      // thread of this worker, spawns GrepWorker::execute()
3354   GrepMaster             *master;      // the master of this worker
3355   std::mutex              queue_mutex; // job queue mutex
3356   std::condition_variable queue_work;  // cv to control the job queue
3357   std::deque<Job>         jobs;        // queue of pending jobs submitted to this worker
3358   std::atomic_size_t      todo;        // number of jobs in the queue, atomic for lock-free job stealing
3359 
3360 };
3361 
3362 // start worker threads
start_workers()3363 void GrepMaster::start_workers()
3364 {
3365   size_t num;
3366 
3367   // create worker threads
3368   try
3369   {
3370     for (num = 0; num < threads; ++num)
3371       workers.emplace(workers.end(), out.file, this);
3372   }
3373 
3374   // if sufficient resources are not available then reduce the number of threads to the number of active workers created
3375   catch (std::system_error& error)
3376   {
3377     if (error.code() != std::errc::resource_unavailable_try_again)
3378       throw;
3379 
3380     threads = num;
3381   }
3382 }
3383 
3384 // stop all workers
stop_workers()3385 void GrepMaster::stop_workers()
3386 {
3387   // submit Job::NONE sentinel to workers
3388   for (auto& worker : workers)
3389     worker.stop();
3390 
3391   // wait for workers to join
3392   for (auto& worker : workers)
3393     worker.thread.join();
3394 }
3395 
3396 // submit a job with a pathname to a worker, workers are visited round-robin
submit(const char * pathname)3397 void GrepMaster::submit(const char *pathname)
3398 {
3399   iworker->submit_job(pathname, sync.next++);
3400 
3401   // around we go
3402   ++iworker;
3403   if (iworker == workers.end())
3404     iworker = workers.begin();
3405 }
3406 
3407 // lock-free job stealing on behalf of a worker from a co-worker with at least --min-steal jobs still to do
steal(GrepWorker * worker)3408 bool GrepMaster::steal(GrepWorker *worker)
3409 {
3410   // pick a random co-worker using thread-safe std::chrono::high_resolution_clock as a simple RNG
3411   size_t n = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now().time_since_epoch()).count() % threads;
3412   auto iworker = workers.begin();
3413 
3414   while (n > 0)
3415   {
3416     ++iworker;
3417     --n;
3418   }
3419 
3420   // try to steal a job from the random co-worker or the next co-workers
3421   for (size_t i = 0; i < threads; ++i)
3422   {
3423     // around we go
3424     if (iworker == workers.end())
3425       iworker = workers.begin();
3426 
3427     // if co-worker isn't this worker (no self-stealing!)
3428     if (&*iworker != worker)
3429     {
3430       Job job;
3431 
3432       // if co-worker has at least --min-steal jobs then steal one for this worker
3433       if (iworker->steal_job(job))
3434       {
3435         worker->move_job(job);
3436 
3437         return true;
3438       }
3439     }
3440 
3441     // try next co-worker
3442     ++iworker;
3443   }
3444 
3445   // couldn't steal any job
3446   return false;
3447 }
3448 
3449 // execute worker thread
execute()3450 void GrepWorker::execute()
3451 {
3452   Job job;
3453 
3454   while (!out.eof && !out.cancelled())
3455   {
3456     // wait for next job
3457     next_job(job);
3458 
3459     // worker should stop?
3460     if (job.none())
3461       break;
3462 
3463     // start synchronizing output for this job slot in ORDERED mode (--sort)
3464     out.begin(job.slot);
3465 
3466     // search the file for this job
3467     search(job.pathname.c_str());
3468 
3469     // end output in ORDERED mode (--sort) for this job slot
3470     out.end();
3471 
3472     // if only one job is left to do, try stealing another job from a co-worker
3473     if (todo <= 1)
3474       master->steal(this);
3475   }
3476 }
3477 
3478 // table of RE/flex file encodings for option --encoding (may be specified in any case)
3479 const Encoding encoding_table[] = {
3480   { "binary",      reflex::Input::file_encoding::plain      },
3481   { "ASCII",       reflex::Input::file_encoding::utf8       },
3482   { "UTF-8",       reflex::Input::file_encoding::utf8       },
3483   { "UTF-16",      reflex::Input::file_encoding::utf16be    },
3484   { "UTF-16BE",    reflex::Input::file_encoding::utf16be    },
3485   { "UTF-16LE",    reflex::Input::file_encoding::utf16le    },
3486   { "UTF-32",      reflex::Input::file_encoding::utf32be    },
3487   { "UTF-32BE",    reflex::Input::file_encoding::utf32be    },
3488   { "UTF-32LE",    reflex::Input::file_encoding::utf32le    },
3489   { "LATIN1",      reflex::Input::file_encoding::latin      },
3490   { "ISO-8859-1",  reflex::Input::file_encoding::latin      },
3491   { "ISO-8859-2",  reflex::Input::file_encoding::iso8859_2  },
3492   { "ISO-8859-3",  reflex::Input::file_encoding::iso8859_3  },
3493   { "ISO-8859-4",  reflex::Input::file_encoding::iso8859_4  },
3494   { "ISO-8859-5",  reflex::Input::file_encoding::iso8859_5  },
3495   { "ISO-8859-6",  reflex::Input::file_encoding::iso8859_6  },
3496   { "ISO-8859-7",  reflex::Input::file_encoding::iso8859_7  },
3497   { "ISO-8859-8",  reflex::Input::file_encoding::iso8859_8  },
3498   { "ISO-8859-9",  reflex::Input::file_encoding::iso8859_9  },
3499   { "ISO-8859-10", reflex::Input::file_encoding::iso8859_10 },
3500   { "ISO-8859-11", reflex::Input::file_encoding::iso8859_11 },
3501   { "ISO-8859-13", reflex::Input::file_encoding::iso8859_13 },
3502   { "ISO-8859-14", reflex::Input::file_encoding::iso8859_14 },
3503   { "ISO-8859-15", reflex::Input::file_encoding::iso8859_15 },
3504   { "ISO-8859-16", reflex::Input::file_encoding::iso8859_16 },
3505   { "MAC",         reflex::Input::file_encoding::macroman   },
3506   { "MACROMAN",    reflex::Input::file_encoding::macroman   },
3507   { "EBCDIC",      reflex::Input::file_encoding::ebcdic     },
3508   { "CP437",       reflex::Input::file_encoding::cp437      },
3509   { "CP850",       reflex::Input::file_encoding::cp850      },
3510   { "CP858",       reflex::Input::file_encoding::cp858      },
3511   { "CP1250",      reflex::Input::file_encoding::cp1250     },
3512   { "CP1251",      reflex::Input::file_encoding::cp1251     },
3513   { "CP1252",      reflex::Input::file_encoding::cp1252     },
3514   { "CP1253",      reflex::Input::file_encoding::cp1253     },
3515   { "CP1254",      reflex::Input::file_encoding::cp1254     },
3516   { "CP1255",      reflex::Input::file_encoding::cp1255     },
3517   { "CP1256",      reflex::Input::file_encoding::cp1256     },
3518   { "CP1257",      reflex::Input::file_encoding::cp1257     },
3519   { "CP1258",      reflex::Input::file_encoding::cp1258     },
3520   { "KOI8-R",      reflex::Input::file_encoding::koi8_r     },
3521   { "KOI8-U",      reflex::Input::file_encoding::koi8_u     },
3522   { "KOI8-RU",     reflex::Input::file_encoding::koi8_ru    },
3523   { NULL, 0 }
3524 };
3525 
3526 // table of file types for option -t, --file-type
3527 const Type type_table[] = {
3528   { "actionscript", "as,mxml", NULL,                                                  NULL },
3529   { "ada",          "ada,adb,ads", NULL,                                              NULL },
3530   { "asm",          "asm,s,S", NULL,                                                  NULL },
3531   { "asp",          "asp", NULL,                                                      NULL },
3532   { "aspx",         "master,ascx,asmx,aspx,svc", NULL,                                NULL },
3533   { "autoconf",     "ac,in", NULL,                                                    NULL },
3534   { "automake",     "am,in", NULL,                                                    NULL },
3535   { "awk",          "awk", NULL,                                                      NULL },
3536   { "Awk",          "awk", NULL,                                                      "#!\\h*/.*\\Wg?awk(\\W.*)?\\n" },
3537   { "basic",        "bas,BAS,cls,frm,ctl,vb,resx", NULL,                              NULL },
3538   { "batch",        "bat,BAT,cmd,CMD", NULL,                                          NULL },
3539   { "bison",        "y,yy,yxx", NULL,                                                 NULL },
3540   { "c",            "c,h,H,hdl,xs", NULL,                                             NULL },
3541   { "c++",          "cpp,CPP,cc,cxx,CXX,h,hh,H,hpp,hxx,Hxx,HXX", NULL,                NULL },
3542   { "clojure",      "clj", NULL,                                                      NULL },
3543   { "csharp",       "cs", NULL,                                                       NULL },
3544   { "css",          "css", NULL,                                                      NULL },
3545   { "csv",          "csv", NULL,                                                      NULL },
3546   { "dart",         "dart", NULL,                                                     NULL },
3547   { "Dart",         "dart", NULL,                                                     "#!\\h*/.*\\Wdart(\\W.*)?\\n" },
3548   { "delphi",       "pas,int,dfm,nfm,dof,dpk,dproj,groupproj,bdsgroup,bdsproj", NULL, NULL },
3549   { "elisp",        "el", NULL,                                                       NULL },
3550   { "elixir",       "ex,exs", NULL,                                                   NULL },
3551   { "erlang",       "erl,hrl", NULL,                                                  NULL },
3552   { "fortran",      "for,ftn,fpp,f,F,f77,F77,f90,F90,f95,F95,f03,F03", NULL,          NULL },
3553   { "gif",          "gif", NULL,                                                      NULL },
3554   { "Gif",          "gif", NULL,                                                      "GIF87a|GIF89a" },
3555   { "go",           "go", NULL,                                                       NULL },
3556   { "groovy",       "groovy,gtmpl,gpp,grunit,gradle", NULL,                           NULL },
3557   { "gsp",          "gsp", NULL,                                                      NULL },
3558   { "haskell",      "hs,lhs", NULL,                                                   NULL },
3559   { "html",         "htm,html,xhtml", NULL,                                           NULL },
3560   { "jade",         "jade", NULL,                                                     NULL },
3561   { "java",         "java,properties", NULL,                                          NULL },
3562   { "jpeg",         "jpg,jpeg", NULL,                                                 NULL },
3563   { "Jpeg",         "jpg,jpeg", NULL,                                                 "\\xff\\xd8\\xff[\\xdb\\xe0\\xe1\\xee]" },
3564   { "js",           "js", NULL,                                                       NULL },
3565   { "json",         "json", NULL,                                                     NULL },
3566   { "jsp",          "jsp,jspx,jthm,jhtml", NULL,                                      NULL },
3567   { "julia",        "jl", NULL,                                                       NULL },
3568   { "kotlin",       "kt,kts", NULL,                                                   NULL },
3569   { "less",         "less", NULL,                                                     NULL },
3570   { "lex",          "l,ll,lxx", NULL,                                                 NULL },
3571   { "lisp",         "lisp,lsp", NULL,                                                 NULL },
3572   { "lua",          "lua", NULL,                                                      NULL },
3573   { "m4",           "m4", NULL,                                                       NULL },
3574   { "make",         "mk,mak", "makefile,Makefile,Makefile.Debug,Makefile.Release",    NULL },
3575   { "markdown",     "md", NULL,                                                       NULL },
3576   { "matlab",       "m", NULL,                                                        NULL },
3577   { "node",         "js", NULL,                                                       NULL },
3578   { "Node",         "js", NULL,                                                       "#!\\h*/.*\\Wnode(\\W.*)?\\n" },
3579   { "objc",         "m,h", NULL,                                                      NULL },
3580   { "objc++",       "mm,h", NULL,                                                     NULL },
3581   { "ocaml",        "ml,mli,mll,mly", NULL,                                           NULL },
3582   { "parrot",       "pir,pasm,pmc,ops,pod,pg,tg", NULL,                               NULL },
3583   { "pascal",       "pas,pp", NULL,                                                   NULL },
3584   { "pdf",          "pdf", NULL,                                                      NULL },
3585   { "Pdf",          "pdf", NULL,                                                      "\\x25\\x50\\x44\\x46\\x2d" },
3586   { "perl",         "pl,PL,pm,pod,t,psgi", NULL,                                      NULL },
3587   { "Perl",         "pl,PL,pm,pod,t,psgi", NULL,                                      "#!\\h*/.*\\Wperl(\\W.*)?\\n" },
3588   { "php",          "php,php3,php4,phtml", NULL,                                      NULL },
3589   { "Php",          "php,php3,php4,phtml", NULL,                                      "#!\\h*/.*\\Wphp(\\W.*)?\\n" },
3590   { "png",          "png", NULL,                                                      NULL },
3591   { "Png",          "png", NULL,                                                      "\\x89png\\x0d\\x0a\\x1a\\x0a" },
3592   { "prolog",       "pl,pro", NULL,                                                   NULL },
3593   { "python",       "py", NULL,                                                       NULL },
3594   { "Python",       "py", NULL,                                                       "#!\\h*/.*\\Wpython[23]?(\\W.*)?\\n" },
3595   { "r",            "R", NULL,                                                        NULL },
3596   { "rpm",          "rpm", NULL,                                                      NULL },
3597   { "Rpm",          "rpm", NULL,                                                      "\\xed\\xab\\xee\\xdb" },
3598   { "rst",          "rst", NULL,                                                      NULL },
3599   { "rtf",          "rtf", NULL,                                                      NULL },
3600   { "Rtf",          "rtf", NULL,                                                      "\\{\\rtf1" },
3601   { "ruby",         "rb,rhtml,rjs,rxml,erb,rake,spec", "Rakefile",                    NULL },
3602   { "Ruby",         "rb,rhtml,rjs,rxml,erb,rake,spec", "Rakefile",                    "#!\\h*/.*\\Wruby(\\W.*)?\\n" },
3603   { "rust",         "rs", NULL,                                                       NULL },
3604   { "scala",        "scala", NULL,                                                    NULL },
3605   { "scheme",       "scm,ss", NULL,                                                   NULL },
3606   { "shell",        "sh,bash,dash,csh,tcsh,ksh,zsh,fish", NULL,                       NULL },
3607   { "Shell",        "sh,bash,dash,csh,tcsh,ksh,zsh,fish", NULL,                       "#!\\h*/.*\\W(ba|da|t?c|k|z|fi)?sh(\\W.*)?\\n" },
3608   { "smalltalk",    "st", NULL,                                                       NULL },
3609   { "sql",          "sql,ctl", NULL,                                                  NULL },
3610   { "svg",          "svg", NULL,                                                      NULL },
3611   { "swift",        "swift", NULL,                                                    NULL },
3612   { "tcl",          "tcl,itcl,itk", NULL,                                             NULL },
3613   { "tex",          "tex,cls,sty,bib", NULL,                                          NULL },
3614   { "text",         "text,txt,TXT,md,rst", NULL,                                      NULL },
3615   { "tiff",         "tif,tiff", NULL,                                                 NULL },
3616   { "Tiff",         "tif,tiff", NULL,                                                 "\\x49\\x49\\x2a\\x00|\\x4d\\x4d\\x00\\x2a" },
3617   { "tt",           "tt,tt2,ttml", NULL,                                              NULL },
3618   { "typescript",   "ts,tsx", NULL,                                                   NULL },
3619   { "verilog",      "v,vh,sv", NULL,                                                  NULL },
3620   { "vhdl",         "vhd,vhdl", NULL,                                                 NULL },
3621   { "vim",          "vim", NULL,                                                      NULL },
3622   { "xml",          "xml,xsd,xsl,xslt,wsdl,rss,svg,ent,plist", NULL,                  NULL },
3623   { "Xml",          "xml,xsd,xsl,xslt,wsdl,rss,svg,ent,plist", NULL,                  "<\\?xml " },
3624   { "yacc",         "y", NULL,                                                        NULL },
3625   { "yaml",         "yaml,yml", NULL,                                                 NULL },
3626   { NULL,           NULL, NULL,                                                       NULL }
3627 };
3628 
3629 #ifdef OS_WIN
3630 // ugrep main() for Windows to support wide string arguments and globbing
wmain(int argc,const wchar_t ** wargv)3631 int wmain(int argc, const wchar_t **wargv)
3632 #else
3633 // ugrep main()
3634 int main(int argc, const char **argv)
3635 #endif
3636 {
3637 
3638 #ifdef OS_WIN
3639 
3640   // store UTF-8 arguments for the duration of main() and convert Unicode command line arguments wargv[] to UTF-8 arguments argv[]
3641   const char **argv = new const char *[argc];
3642   for (int i = 0; i < argc; ++i)
3643   {
3644     arg_strings.emplace_back(utf8_encode(wargv[i]));
3645     argv[i] = arg_strings.back().c_str();
3646   }
3647 
3648   // handle CTRL-C
3649   SetConsoleCtrlHandler(&sigint, TRUE);
3650 
3651 #else
3652 
3653   // ignore SIGPIPE
3654   signal(SIGPIPE, SIG_IGN);
3655 
3656   // reset color on SIGINT and SIGTERM
3657   signal(SIGINT, sigint);
3658   signal(SIGTERM, sigint);
3659 
3660 #endif
3661 
3662   try
3663   {
3664     init(argc, argv);
3665   }
3666 
3667   catch (std::exception& error)
3668   {
3669     abort("error: ", error.what());
3670   }
3671 
3672   if (flag_query > 0)
3673   {
3674     if (!flag_no_messages && warnings > 0)
3675       abort("option -Q: warnings are present, use -s to ignore");
3676 
3677     Query::query();
3678   }
3679   else
3680   {
3681     if (!flag_no_messages && flag_pager != NULL && warnings > 0)
3682       abort("option --pager: warnings are present, use -s to ignore");
3683 
3684     try
3685     {
3686       ugrep();
3687     }
3688 
3689     catch (reflex::regex_error& error)
3690     {
3691       abort("error: ", error.what());
3692     }
3693 
3694     catch (std::exception& error)
3695     {
3696       abort("error: ", error.what());
3697     }
3698   }
3699 
3700 #ifdef OS_WIN
3701 
3702   delete[] argv;
3703 
3704 #endif
3705 
3706   return warnings == 0 && Stats::found_any_file() ? EXIT_OK : EXIT_FAIL;
3707 }
3708 
set_depth(const char * arg)3709 static void set_depth(const char *arg)
3710 {
3711   if (flag_max_depth > 0)
3712   {
3713     if (flag_min_depth == 0)
3714       flag_min_depth = flag_max_depth;
3715     flag_max_depth = strtopos(arg, "invalid argument --");
3716     if (flag_min_depth > flag_max_depth)
3717       usage("invalid argument -", arg);
3718   }
3719   else
3720   {
3721     strtopos2(arg, flag_min_depth, flag_max_depth, "invalid argument --", true);
3722   }
3723 }
3724 
3725 // load config file specified or the default .ugrep, located in the working directory or home directory
load_config(std::list<std::pair<CNF::PATTERN,const char * >> & pattern_args)3726 static void load_config(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args)
3727 {
3728   // warn about invalid options but do not exit
3729   flag_usage_warnings = true;
3730 
3731   // the default config file is .ugrep when FILE is not specified
3732   if (flag_config == NULL || *flag_config == '\0')
3733     flag_config_file.assign(".ugrep");
3734   else
3735     flag_config_file.assign(flag_config);
3736 
3737   FILE *file = NULL;
3738 
3739   if (fopen_smart(&file, flag_config_file.c_str(), "r") != 0)
3740   {
3741     if (home_dir != NULL)
3742     {
3743       // check the home directory for the configuration file
3744       if (flag_config == NULL || *flag_config == '\0')
3745         flag_config_file.assign(home_dir).append(PATHSEPSTR).append(".ugrep");
3746       else
3747         flag_config_file.assign(home_dir).append(PATHSEPSTR).append(flag_config);
3748       if (fopen_smart(&file, flag_config_file.c_str(), "r") != 0)
3749         file = NULL;
3750     }
3751   }
3752 
3753   if (file != NULL)
3754   {
3755     reflex::BufferedInput input(file);
3756 
3757     std::string line;
3758     size_t lineno = 1;
3759     bool errors = false;
3760 
3761     while (true)
3762     {
3763       // read the next line
3764       if (getline(input, line))
3765         break;
3766 
3767       trim(line);
3768 
3769       // skip empty lines and comments
3770       if (!line.empty() && line.front() != '#')
3771       {
3772         // construct an option argument to parse as argv[]
3773         line.insert(0, "--");
3774         const char *arg = flag_config_options.insert(line).first->c_str();
3775         const char *args[2] = { NULL, arg };
3776 
3777         warnings = 0;
3778 
3779         options(pattern_args, 2, args);
3780 
3781         if (warnings > 0)
3782         {
3783           std::cerr << "ugrep: error in " << flag_config_file << " at line " << lineno << "\n\n";
3784 
3785           errors = true;
3786         }
3787       }
3788 
3789       ++lineno;
3790     }
3791 
3792     if (ferror(file))
3793       error("error while reading", flag_config_file.c_str());
3794 
3795     if (file != stdin)
3796       fclose(file);
3797 
3798     if (errors)
3799     {
3800       std::cerr << "Try 'ugrep --help [WHAT]' for more information\n";
3801 
3802       exit(EXIT_ERROR);
3803     }
3804   }
3805   else if (flag_config != NULL && *flag_config != '\0')
3806   {
3807     error("option --config: cannot read", flag_config_file.c_str());
3808   }
3809 
3810   flag_usage_warnings = false;
3811 }
3812 
3813 // save a configuration file
save_config()3814 static void save_config()
3815 {
3816   FILE *file = NULL;
3817 
3818   if (fopen_smart(&file, flag_save_config, "w") != 0)
3819   {
3820     usage("cannot save configuration file ", flag_save_config);
3821 
3822     return;
3823   }
3824 
3825   if (strcmp(flag_save_config, ".ugrep") == 0)
3826     fprintf(file, "# default .ugrep configuration file used by ug and ugrep --config.\n");
3827   else if (strcmp(flag_save_config, "-") == 0)
3828     fprintf(file, "# ugrep configuration.\n");
3829   else
3830     fprintf(file, "# configuration used with ugrep --config=%s or ---%s.\n", flag_save_config, flag_save_config);
3831 
3832   fprintf(file, "\
3833 #\n\
3834 # A long option is defined per line with an optional `=' and its argument,\n\
3835 # when applicable. Empty lines and lines starting with a `#' are ignored.\n\
3836 #\n\
3837 # Try `ugrep --help [WHAT]' for more information.\n\n");
3838 
3839   fprintf(file, "### TERMINAL DISPLAY ###\n\n");
3840 
3841   fprintf(file, "# Custom color scheme overrides default GREP_COLORS parameters\ncolors=%s\n", flag_colors != NULL ? flag_colors : "");
3842   fprintf(file, "\
3843 # The argument is a colon-separated list of one or more parameters `sl='\n\
3844 # (selected line), `cx=' (context line), `mt=' (matched text), `ms=' (match\n\
3845 # selected), `mc=' (match context), `fn=' (file name), `ln=' (line number),\n\
3846 # `cn=' (column number), `bn=' (byte offset), `se=' (separator).  Parameter\n\
3847 # values are ANSI SGR color codes or `k' (black), `r' (red), `g' (green), `y'\n\
3848 # (yellow), `b' (blue), `m' (magenta), `c' (cyan), `w' (white).  Upper case\n\
3849 # specifies background colors.  A `+' qualifies a color as bright.  A\n\
3850 # foreground and a background color may be combined with font properties `n'\n\
3851 # (normal), `f' (faint), `h' (highlight), `i' (invert), `u' (underline).\n\n");
3852   fprintf(file, "# Enable/disable color\n%s\n\n", flag_color != NULL ? "color" : "no-color");
3853   fprintf(file, "# Enable/disable query UI confirmation prompts, default: confirm\n%s\n\n", flag_confirm ? "confirm" : "no-confirm");
3854   fprintf(file, "# Enable/disable query UI file viewing with CTRL-Y, default: view\n");
3855   if (flag_view != NULL && *flag_view == '\0')
3856     fprintf(file, "view\n\n");
3857   else if (flag_view != NULL)
3858     fprintf(file, "view=%s\n\n", flag_view);
3859   else
3860     fprintf(file, "no-view\n\n");
3861   fprintf(file, "# Enable/disable or specify a pager for terminal output, default: no-pager\n");
3862   if (flag_pager != NULL)
3863     fprintf(file, "pager=%s\n\n", flag_pager);
3864   else
3865     fprintf(file, "no-pager\n\n");
3866   fprintf(file, "# Enable/disable pretty output to the terminal, default: no-pretty\n%s\n\n", flag_pretty ? "pretty" : "no-pretty");
3867   fprintf(file, "# Enable/disable headings for terminal output, default: no-heading\n%s\n\n", flag_heading.is_undefined() ? "# no-heading" : flag_heading ? "heading" : "no-heading");
3868 
3869   if (flag_break.is_defined())
3870     fprintf(file, "# Enable/disable break for terminal output\n%s\n\n", flag_break ? "break" : "no-break");
3871 
3872   if (flag_line_number.is_defined() && flag_line_number != flag_pretty)
3873     fprintf(file, "# Enable/disable line numbers\n%s\n\n", flag_line_number ? "line-number" : "no-line-number");
3874 
3875   if (flag_column_number.is_defined())
3876     fprintf(file, "# Enable/disable column numbers\n%s\n\n", flag_column_number ? "column-number" : "no-column-number");
3877 
3878   if (flag_byte_offset.is_defined())
3879     fprintf(file, "# Enable/disable byte offsets\n%s\n\n", flag_byte_offset ? "byte-offset" : "no-byte-offset");
3880 
3881   if (flag_initial_tab.is_defined() && flag_line_number != flag_pretty)
3882     fprintf(file, "# Enable/disable initial tab\n%s\n\n", flag_initial_tab ? "initial-tab" : "no-initial-tab");
3883 
3884   if (strcmp(flag_binary_files, "hex") == 0)
3885     fprintf(file, "# Hex output\nhex\n\n");
3886   else if (strcmp(flag_binary_files, "with-hex") == 0)
3887     fprintf(file, "# Output with hex for binary matches\nwith-hex\n\n");
3888   if (flag_hexdump != NULL)
3889     fprintf(file, "# Hex dump (columns, no space breaks, no character column, no hex spacing)\nhexdump=%s\n\n", flag_hexdump);
3890 
3891   if (flag_any_line)
3892   {
3893     fprintf(file, "# Display any line as context\nany-line\n\n");
3894   }
3895   else if (flag_after_context > 0 && flag_before_context == flag_after_context)
3896   {
3897     fprintf(file, "# Display context lines\ncontext=%zu\n\n", flag_after_context);
3898   }
3899   else
3900   {
3901     if (flag_after_context > 0)
3902       fprintf(file, "# Display lines after context\nafter-context=%zu\n\n", flag_after_context);
3903     if (flag_before_context > 0)
3904       fprintf(file, "# Display lines before context\nbefore-context=%zu\n\n", flag_before_context);
3905   }
3906   if (flag_group_separator == NULL)
3907     fprintf(file, "# Disable group separator for contexts\nno-group-separator\n\n");
3908   else if (strcmp(flag_group_separator, "--") != 0)
3909     fprintf(file, "# Group separator for contexts\ngroup-separator=%s\n\n", flag_group_separator);
3910 
3911   fprintf(file, "### SEARCH PATTERNS ###\n\n");
3912 
3913   fprintf(file, "# Enable/disable case-insensitive search, default: no-ignore-case\n%s\n\n", flag_ignore_case.is_undefined() ? "# no-ignore-case" : flag_ignore_case ? "ignore-case" : "no-ignore-case");
3914   fprintf(file, "# Enable/disable smart case, default: no-smart-case\n%s\n\n", flag_smart_case.is_undefined() ? "# no-smart-case" : flag_smart_case ? "smart-case" : "no-smart-case");
3915   fprintf(file, "# Enable/disable empty pattern matches, default: no-empty\n%s\n\n", flag_empty.is_undefined() ? "# no-empty" : flag_empty ? "empty" : "no-empty");
3916 
3917   fprintf(file, "### SEARCH TARGETS ###\n\n");
3918 
3919   fprintf(file, "# Enable/disable searching hidden files and directories, default: no-hidden\n%s\n\n", flag_hidden ? "hidden" : "no-hidden");
3920   fprintf(file, "# Enable/disable binary files, default: no-ignore-binary\n%s\n\n", strcmp(flag_binary_files, "without-match") == 0 ? "ignore-binary" : "no-ignore-binary");
3921   fprintf(file, "# Enable/disable decompression and archive search, default: no-decompress\n%s\n\n", flag_decompress ? "decompress" : "no-decompress");
3922   if (flag_ignore_files.empty())
3923   {
3924     fprintf(file, "# Enable/disable ignore files, default: no-ignore-files\nno-ignore-files\n\n");
3925   }
3926   else
3927   {
3928     fprintf(file, "# Enable/disable ignore files, default: no-ignore-files\n");
3929     for (const auto& ignore : flag_ignore_files)
3930       fprintf(file, "ignore-files=%s\n", ignore.c_str());
3931     fprintf(file, "\n");
3932   }
3933   if (flag_filter != NULL)
3934   {
3935     fprintf(file, "# Filtering\nfilter=%s\n\n", flag_filter);
3936     if (!flag_filter_magic_label.empty())
3937     {
3938       fprintf(file, "# Filter by file signature magic bytes\n");
3939       for (const auto& label : flag_filter_magic_label)
3940         fprintf(file, "filter-magic-label=%s\n", label.c_str());
3941       fprintf(file, "# Warning: filter-magic-label significantly reduces performance!\n\n");
3942     }
3943   }
3944 
3945   fprintf(file, "### OUTPUT ###\n\n");
3946 
3947   fprintf(file, "# Enable/disable sorted output, default: no-sort\n");
3948   if (flag_sort != NULL)
3949     fprintf(file, "sort=%s\n\n", flag_sort);
3950   else
3951     fprintf(file, "# no-sort\n\n");
3952 
3953   if (ferror(file))
3954     error("cannot save", flag_save_config);
3955 
3956   if (file != stdout)
3957     fclose(file);
3958 }
3959 
3960 // parse the command-line options
options(std::list<std::pair<CNF::PATTERN,const char * >> & pattern_args,int argc,const char ** argv)3961 void options(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, int argc, const char **argv)
3962 {
3963   bool options = true;
3964 
3965   for (int i = 1; i < argc; ++i)
3966   {
3967     const char *arg = argv[i];
3968 
3969     if ((*arg == '-'
3970 #ifdef OS_WIN
3971          || *arg == '/'
3972 #endif
3973         ) && arg[1] != '\0' && options)
3974     {
3975       bool is_grouped = true;
3976 
3977       // parse a ugrep command-line option
3978       while (is_grouped && *++arg != '\0')
3979       {
3980         switch (*arg)
3981         {
3982           case '-':
3983             is_grouped = false;
3984             if (*++arg == '\0')
3985             {
3986               options = false;
3987               continue;
3988             }
3989 
3990             switch (*arg)
3991             {
3992               case '-':
3993                 break;
3994 
3995               case 'a':
3996                 if (strncmp(arg, "after-context=", 14) == 0)
3997                   flag_after_context = strtonum(arg + 14, "invalid argument --after-context=");
3998                 else if (strcmp(arg, "and") == 0)
3999                   option_and(pattern_args, i, argc, argv);
4000                 else if (strncmp(arg, "and=", 4) == 0)
4001                   option_and(pattern_args, arg + 4);
4002                 else if (strcmp(arg, "andnot") == 0)
4003                   option_andnot(pattern_args, i, argc, argv);
4004                 else if (strncmp(arg, "andnot=", 7) == 0)
4005                   option_andnot(pattern_args, arg + 7);
4006                 else if (strcmp(arg, "any-line") == 0)
4007                   flag_any_line = true;
4008                 else if (strcmp(arg, "after-context") == 0)
4009                   usage("missing argument for --", arg);
4010                 else
4011                   usage("invalid option --", arg, "--after-context, --and, --andnot or --any-line");
4012                 break;
4013 
4014               case 'b':
4015                 if (strcmp(arg, "basic-regexp") == 0)
4016                   flag_basic_regexp = true;
4017                 else if (strncmp(arg, "before-context=", 15) == 0)
4018                   flag_before_context = strtonum(arg + 15, "invalid argument --before-context=");
4019                 else if (strcmp(arg, "binary") == 0)
4020                   flag_binary = true;
4021                 else if (strncmp(arg, "binary-files=", 13) == 0)
4022                   flag_binary_files = arg + 13;
4023                 else if (strcmp(arg, "bool") == 0)
4024                   flag_bool = true;
4025                 else if (strcmp(arg, "break") == 0)
4026                   flag_break = true;
4027                 else if (strcmp(arg, "byte-offset") == 0)
4028                   flag_byte_offset = true;
4029                 else if (strcmp(arg, "before-context") == 0 || strcmp(arg, "binary-files") == 0)
4030                   usage("missing argument for --", arg);
4031                 else
4032                   usage("invalid option --", arg, "--basic-regexp, --before-context, --binary, --binary-files, --bool, --break or --byte-offset");
4033                 break;
4034 
4035               case 'c':
4036                 if (strcmp(arg, "color") == 0 || strcmp(arg, "colour") == 0)
4037                   flag_color = "auto";
4038                 else if (strncmp(arg, "color=", 6) == 0)
4039                   flag_color = arg + 6;
4040                 else if (strncmp(arg, "colour=", 7) == 0)
4041                   flag_color = arg + 7;
4042                 else if (strncmp(arg, "colors=", 7) == 0)
4043                   flag_colors = arg + 7;
4044                 else if (strncmp(arg, "colours=", 8) == 0)
4045                   flag_colors = arg + 8;
4046                 else if (strcmp(arg, "column-number") == 0)
4047                   flag_column_number = true;
4048                 else if (strcmp(arg, "config") == 0 || strncmp(arg, "config=", 7) == 0)
4049                   ; // --config is pre-parsed before other options are parsed
4050                 else if (strcmp(arg, "confirm") == 0)
4051                   flag_confirm = true;
4052                 else if (strncmp(arg, "context=", 8) == 0)
4053                   flag_after_context = flag_before_context = strtonum(arg + 8, "invalid argument --context=");
4054                 else if (strcmp(arg, "count") == 0)
4055                   flag_count = true;
4056                 else if (strcmp(arg, "cpp") == 0)
4057                   flag_cpp = true;
4058                 else if (strcmp(arg, "csv") == 0)
4059                   flag_csv = true;
4060                 else if (strcmp(arg, "colors") == 0 || strcmp(arg, "colours") == 0)
4061                   usage("missing argument for --", arg);
4062                 else
4063                   usage("invalid option --", arg, "--color, --colors, --column-number, --config, --confirm, --context, --count, --cpp or --csv");
4064                 break;
4065 
4066               case 'd':
4067                 if (strcmp(arg, "decompress") == 0)
4068                   flag_decompress = true;
4069                 else if (strncmp(arg, "depth=", 6) == 0)
4070                   strtopos2(arg + 6, flag_min_depth, flag_max_depth, "invalid argument --depth=", true);
4071                 else if (strcmp(arg, "dereference") == 0)
4072                   flag_dereference = true;
4073                 else if (strcmp(arg, "dereference-recursive") == 0)
4074                   flag_directories = "dereference-recurse";
4075                 else if (strncmp(arg, "devices=", 8) == 0)
4076                   flag_devices = arg + 8;
4077                 else if (strncmp(arg, "directories=", 12) == 0)
4078                   flag_directories = arg + 12;
4079                 else if (strcmp(arg, "dotall") == 0)
4080                   flag_dotall = true;
4081                 else if (strcmp(arg, "depth") == 0)
4082                   usage("missing argument for --", arg);
4083                 else
4084                   usage("invalid option --", arg, "--decompress, --depth, --dereference, --dereference-recursive, --devices, --directories or --dotall");
4085                 break;
4086 
4087               case 'e':
4088                 if (strcmp(arg, "empty") == 0)
4089                   flag_empty = true;
4090                 else if (strncmp(arg, "encoding=", 9) == 0)
4091                   flag_encoding = arg + 9;
4092                 else if (strncmp(arg, "exclude=", 8) == 0)
4093                   flag_exclude.emplace_back(arg + 8);
4094                 else if (strncmp(arg, "exclude-dir=", 12) == 0)
4095                   flag_exclude_dir.emplace_back(arg + 12);
4096                 else if (strncmp(arg, "exclude-from=", 13) == 0)
4097                   flag_exclude_from.emplace_back(arg + 13);
4098                 else if (strncmp(arg, "exclude-fs=", 11) == 0)
4099                   flag_exclude_fs.emplace_back(arg + 11);
4100                 else if (strcmp(arg, "extended-regexp") == 0)
4101                   flag_basic_regexp = false;
4102                 else if (strcmp(arg, "encoding") == 0 ||
4103                     strcmp(arg, "exclude") == 0 ||
4104                     strcmp(arg, "exclude-dir") == 0 ||
4105                     strcmp(arg, "exclude-from") == 0 ||
4106                     strcmp(arg, "exclude-fs") == 0)
4107                   usage("missing argument for --", arg);
4108                 else
4109                   usage("invalid option --", arg, "--empty, --encoding, --exclude, --exclude-dir, --exclude-from, --exclude-fs or --extended-regexp");
4110                 break;
4111 
4112               case 'f':
4113                 if (strncmp(arg, "file=", 5) == 0)
4114                   flag_file.emplace_back(arg + 5);
4115                 else if (strncmp(arg, "file-extension=", 15) == 0)
4116                   flag_file_extension.emplace_back(arg + 15);
4117                 else if (strncmp(arg, "file-magic=", 11) == 0)
4118                   flag_file_magic.emplace_back(arg + 11);
4119                 else if (strncmp(arg, "file-type=", 10) == 0)
4120                   flag_file_type.emplace_back(arg + 10);
4121                 else if (strcmp(arg, "files") == 0)
4122                   flag_files = true;
4123                 else if (strcmp(arg, "files-with-matches") == 0)
4124                   flag_files_with_matches = true;
4125                 else if (strcmp(arg, "files-without-match") == 0)
4126                   flag_files_without_match = true;
4127                 else if (strcmp(arg, "fixed-strings") == 0)
4128                   flag_fixed_strings = true;
4129                 else if (strncmp(arg, "filter=", 7) == 0)
4130                   flag_filter = arg + 7;
4131                 else if (strncmp(arg, "filter-magic-label=", 19) == 0)
4132                   flag_filter_magic_label.emplace_back(arg + 19);
4133                 else if (strncmp(arg, "format=", 7) == 0)
4134                   flag_format = arg + 7;
4135                 else if (strncmp(arg, "format-begin=", 13) == 0)
4136                   flag_format_begin = arg + 13;
4137                 else if (strncmp(arg, "format-close=", 13) == 0)
4138                   flag_format_close = arg + 13;
4139                 else if (strncmp(arg, "format-end=", 11) == 0)
4140                   flag_format_end = arg + 11;
4141                 else if (strncmp(arg, "format-open=", 12) == 0)
4142                   flag_format_open = arg + 12;
4143                 else if (strcmp(arg, "fuzzy") == 0)
4144                   flag_fuzzy = 1;
4145                 else if (strncmp(arg, "fuzzy=", 6) == 0)
4146                   flag_fuzzy = strtofuzzy(arg + 6, "invalid argument --fuzzy=");
4147                 else if (strcmp(arg, "free-space") == 0)
4148                   flag_free_space = true;
4149                 else if (strcmp(arg, "file") == 0 ||
4150                     strcmp(arg, "file-extension") == 0 ||
4151                     strcmp(arg, "file-magic") == 0 ||
4152                     strcmp(arg, "file-type") == 0 ||
4153                     strcmp(arg, "filter") == 0 ||
4154                     strcmp(arg, "filter-magic-label") == 0 ||
4155                     strcmp(arg, "format") == 0 ||
4156                     strcmp(arg, "format-begin") == 0 ||
4157                     strcmp(arg, "format-close") == 0 ||
4158                     strcmp(arg, "format-end") == 0 ||
4159                     strcmp(arg, "format-open") == 0)
4160                   usage("missing argument for --", arg);
4161                 else
4162                   usage("invalid option --", arg, "--file, --file-extension, --file-magic, --file-type, --files, --files-with-matches, --files-without-match, --fixed-strings, --filter, --filter-magic-label, --format, --format-begin, --format-close, --format-end, --format-open, --fuzzy or --free-space");
4163                 break;
4164 
4165               case 'g':
4166                 if (strncmp(arg, "glob=", 5) == 0)
4167                   flag_glob.emplace_back(arg + 5);
4168                 else if (strncmp(arg, "group-separator=", 16) == 0)
4169                   flag_group_separator = arg + 16;
4170                 else if (strcmp(arg, "group-separator") == 0)
4171                   flag_group_separator = "--";
4172                 else if (strcmp(arg, "glob") == 0)
4173                   usage("missing argument for --", arg);
4174                 else
4175                   usage("invalid option --", arg, "--glob or --group-separator");
4176                 break;
4177 
4178               case 'h':
4179                 if (strcmp(arg, "heading") == 0)
4180                   flag_heading = true;
4181                 else if (strncmp(arg, "help", 4) == 0)
4182                   help(arg[4] != '\0' ? arg + 4 : ++i < argc ? argv[i] : NULL);
4183                 else if (strcmp(arg, "hex") == 0)
4184                   flag_binary_files = "hex";
4185                 else if (strcmp(arg, "hexdump") == 0)
4186                   flag_hexdump = "2";
4187                 else if (strncmp(arg, "hexdump=", 8) == 0)
4188                   flag_hexdump = arg + 8;
4189                 else if (strcmp(arg, "hidden") == 0)
4190                   flag_hidden = true;
4191                 else if (strcmp(arg, "hyperlink") == 0)
4192                   flag_colors = "hl";
4193                 else
4194                   usage("invalid option --", arg, "--heading, --help, --hex, --hexdump, --hidden or --hyperlink");
4195                 break;
4196 
4197               case 'i':
4198                 if (strcmp(arg, "ignore-binary") == 0)
4199                   flag_binary_files = "without-match";
4200                 else if (strcmp(arg, "ignore-case") == 0)
4201                   flag_ignore_case = true;
4202                 else if (strcmp(arg, "ignore-files") == 0)
4203                   flag_ignore_files.emplace_back(DEFAULT_IGNORE_FILE);
4204                 else if (strncmp(arg, "ignore-files=", 13) == 0)
4205                   flag_ignore_files.emplace_back(arg + 13);
4206                 else if (strncmp(arg, "include=", 8) == 0)
4207                   flag_include.emplace_back(arg + 8);
4208                 else if (strncmp(arg, "include-dir=", 12) == 0)
4209                   flag_include_dir.emplace_back(arg + 12);
4210                 else if (strncmp(arg, "include-from=", 13) == 0)
4211                   flag_include_from.emplace_back(arg + 13);
4212                 else if (strncmp(arg, "include-fs=", 11) == 0)
4213                   flag_include_fs.emplace_back(arg + 11);
4214                 else if (strcmp(arg, "initial-tab") == 0)
4215                   flag_initial_tab = true;
4216                 else if (strcmp(arg, "invert-match") == 0)
4217                   flag_invert_match = true;
4218                 else if (strcmp(arg, "include") == 0 ||
4219                     strcmp(arg, "include-dir") == 0 ||
4220                     strcmp(arg, "include-from") == 0 ||
4221                     strcmp(arg, "include-fs") == 0)
4222                   usage("missing argument for --", arg);
4223                 else
4224                   usage("invalid option --", arg, "--ignore-case, --ignore-files, --include, --include-dir, --include-from, --include-fs, --initial-tab or --invert-match");
4225                 break;
4226 
4227               case 'j':
4228                 if (strncmp(arg, "jobs=", 4) == 0)
4229                   flag_jobs = strtonum(arg + 4, "invalid argument --jobs=");
4230                 else if (strcmp(arg, "json") == 0)
4231                   flag_json = true;
4232                 else if (strcmp(arg, "jobs") == 0)
4233                   usage("missing argument for --", arg);
4234                 else
4235                   usage("invalid option --", arg, "--jobs or --json");
4236                 break;
4237 
4238               case 'l':
4239                 if (strncmp(arg, "label=", 6) == 0)
4240                   flag_label = arg + 6;
4241                 else if (strcmp(arg, "line-buffered") == 0)
4242                   flag_line_buffered = true;
4243                 else if (strcmp(arg, "line-number") == 0)
4244                   flag_line_number = true;
4245                 else if (strcmp(arg, "line-regexp") == 0)
4246                   flag_line_regexp = true;
4247                 else if (strcmp(arg, "lines") == 0)
4248                   flag_files = false;
4249                 else
4250                   usage("invalid option --", arg, "--label, --line-buffered, --line-number, --line-regexp or --lines");
4251                 break;
4252 
4253               case 'm':
4254                 if (strcmp(arg, "match") == 0)
4255                   flag_match = true;
4256                 else if (strncmp(arg, "max-count=", 10) == 0)
4257                   flag_max_count = strtopos(arg + 10, "invalid argument --max-count=");
4258                 else if (strncmp(arg, "max-files=", 10) == 0)
4259                   flag_max_files = strtopos(arg + 10, "invalid argument --max-files=");
4260                 else if (strncmp(arg, "min-steal=", 10) == 0)
4261                   flag_min_steal = strtopos(arg + 10, "invalid argument --min-steal=");
4262                 else if (strcmp(arg, "mmap") == 0)
4263                   flag_max_mmap = MAX_MMAP_SIZE;
4264                 else if (strncmp(arg, "mmap=", 5) == 0)
4265                   flag_max_mmap = strtopos(arg + 5, "invalid argument --mmap=");
4266                 else if (strcmp(arg, "messages") == 0)
4267                   flag_no_messages = false;
4268                 else if (strcmp(arg, "max-count") == 0 || strcmp(arg, "max-files") == 0)
4269                   usage("missing argument for --", arg);
4270                 else
4271                   usage("invalid option --", arg, "--match, --max-count, --max-files, --mmap or --messages");
4272                 break;
4273 
4274               case 'n':
4275                 if (strncmp(arg, "neg-regexp=", 11) == 0)
4276                   option_regexp(pattern_args, arg + 1, true);
4277                 else if (strcmp(arg, "not") == 0)
4278                   option_not(pattern_args, i, argc, argv);
4279                 else if (strncmp(arg, "not=", 4) == 0)
4280                   option_not(pattern_args, arg + 4);
4281                 else if (strcmp(arg, "no-any-line") == 0)
4282                   flag_any_line = false;
4283                 else if (strcmp(arg, "no-binary") == 0)
4284                   flag_binary = false;
4285                 else if (strcmp(arg, "no-bool") == 0)
4286                   flag_bool = false;
4287                 else if (strcmp(arg, "no-break") == 0)
4288                   flag_break = false;
4289                 else if (strcmp(arg, "no-byte-offset") == 0)
4290                   flag_byte_offset = false;
4291                 else if (strcmp(arg, "no-color") == 0 || strcmp(arg, "no-colour") == 0)
4292                   flag_color = "never";
4293                 else if (strcmp(arg, "no-column-number") == 0)
4294                   flag_column_number = false;
4295                 else if (strcmp(arg, "no-confirm") == 0)
4296                   flag_confirm = false;
4297                 else if (strcmp(arg, "no-decompress") == 0)
4298                   flag_decompress = false;
4299                 else if (strcmp(arg, "no-dereference") == 0)
4300                   flag_no_dereference = true;
4301                 else if (strcmp(arg, "no-dotall") == 0)
4302                   flag_dotall = false;
4303                 else if (strcmp(arg, "no-empty") == 0)
4304                   flag_empty = false;
4305                 else if (strcmp(arg, "no-filename") == 0)
4306                   flag_no_filename = true;
4307                 else if (strcmp(arg, "no-group-separator") == 0)
4308                   flag_group_separator = NULL;
4309                 else if (strcmp(arg, "no-heading") == 0)
4310                   flag_heading = false;
4311                 else if (strcmp(arg, "no-hidden") == 0)
4312                   flag_hidden = false;
4313                 else if (strcmp(arg, "no-ignore-binary") == 0)
4314                   flag_binary_files = "binary";
4315                 else if (strcmp(arg, "no-ignore-case") == 0)
4316                   flag_ignore_case = false;
4317                 else if (strcmp(arg, "no-ignore-files") == 0)
4318                   flag_ignore_files.clear();
4319                 else if (strcmp(arg, "no-initial-tab") == 0)
4320                   flag_initial_tab = false;
4321                 else if (strcmp(arg, "no-invert-match") == 0)
4322                   flag_invert_match = false;
4323                 else if (strcmp(arg, "no-line-number") == 0)
4324                   flag_line_number = false;
4325                 else if (strcmp(arg, "no-only-line-number") == 0)
4326                   flag_only_line_number = false;
4327                 else if (strcmp(arg, "no-only-matching") == 0)
4328                   flag_only_matching = false;
4329                 else if (strcmp(arg, "no-messages") == 0)
4330                   flag_no_messages = true;
4331                 else if (strcmp(arg, "no-mmap") == 0)
4332                   flag_max_mmap = 0;
4333                 else if (strcmp(arg, "no-pager") == 0)
4334                   flag_pager = NULL;
4335                 else if (strcmp(arg, "no-pretty") == 0)
4336                   flag_pretty = false;
4337                 else if (strcmp(arg, "no-smart-case") == 0)
4338                   flag_smart_case = false;
4339                 else if (strcmp(arg, "no-sort") == 0)
4340                   flag_sort = NULL;
4341                 else if (strcmp(arg, "no-stats") == 0)
4342                   flag_stats = NULL;
4343                 else if (strcmp(arg, "no-ungroup") == 0)
4344                   flag_ungroup = false;
4345                 else if (strcmp(arg, "no-view") == 0)
4346                   flag_view = NULL;
4347                 else if (strcmp(arg, "null") == 0)
4348                   flag_null = true;
4349                 else if (strcmp(arg, "neg-regexp") == 0)
4350                   usage("missing argument for --", arg);
4351                 else
4352                   usage("invalid option --", arg, "--neg-regexp, --not, --no-any-line, --no-binary, --no-bool, --no-break, --no-byte-offset, --no-color, --no-confirm, --no-decompress, --no-dereference, --no-dotall, --no-empty, --no-filename, --no-group-separator, --no-heading, --no-hidden, --no-ignore-binary, --no-ignore-case, --no-ignore-files --no-initial-tab, --no-invert-match, --no-line-number, --no-only-line-number, --no-only-matching, --no-messages, --no-mmap, --no-pager, --no-pretty, --no-smart-case, --no-sort, --no-stats, --no-ungroup, --no-view or --null");
4353                 break;
4354 
4355               case 'o':
4356                 if (strcmp(arg, "only-line-number") == 0)
4357                   flag_only_line_number = true;
4358                 else if (strcmp(arg, "only-matching") == 0)
4359                   flag_only_matching = true;
4360                 else
4361                   usage("invalid option --", arg, "--only-line-number or --only-matching");
4362                 break;
4363 
4364               case 'p':
4365                 if (strcmp(arg, "pager") == 0)
4366                   flag_pager = DEFAULT_PAGER_COMMAND;
4367                 else if (strncmp(arg, "pager=", 6) == 0)
4368                   flag_pager = arg + 6;
4369                 else if (strcmp(arg, "perl-regexp") == 0)
4370                   flag_perl_regexp = true;
4371                 else if (strcmp(arg, "pretty") == 0)
4372                   flag_pretty = true;
4373                 else
4374                   usage("invalid option --", arg, "--pager, --perl-regexp or --pretty");
4375                 break;
4376 
4377               case 'q':
4378                 if (strcmp(arg, "query") == 0)
4379                   flag_query = DEFAULT_QUERY_DELAY;
4380                 else if (strncmp(arg, "query=", 6) == 0)
4381                   flag_query = strtopos(arg + 6, "invalid argument --query=");
4382                 else if (strcmp(arg, "quiet") == 0)
4383                   flag_quiet = flag_no_messages = true;
4384                 else
4385                   usage("invalid option --", arg, "--query or --quiet");
4386                 break;
4387 
4388               case 'r':
4389                 if (strncmp(arg, "range=", 6) == 0)
4390                   strtopos2(arg + 6, flag_min_line, flag_max_line, "invalid argument --range=");
4391                 else if (strcmp(arg, "recursive") == 0)
4392                   flag_directories = "recurse";
4393                 else if (strncmp(arg, "regexp=", 7) == 0)
4394                   option_regexp(pattern_args, arg + 7);
4395                 else if (strcmp(arg, "range") == 0)
4396                   usage("missing argument for --", arg);
4397                 else
4398                   usage("invalid option --", arg, "--range, --recursive or --regexp");
4399                 break;
4400 
4401               case 's':
4402                 if (strcmp(arg, "save-config") == 0)
4403                   flag_save_config = ".ugrep";
4404                 else if (strncmp(arg, "save-config=", 12) == 0)
4405                   flag_save_config = arg + 12;
4406                 else if (strcmp(arg, "separator") == 0)
4407                   flag_separator = ":";
4408                 else if (strncmp(arg, "separator=", 10) == 0)
4409                   flag_separator = arg + 10;
4410                 else if (strcmp(arg, "silent") == 0)
4411                   flag_quiet = flag_no_messages = true;
4412                 else if (strcmp(arg, "smart-case") == 0)
4413                   flag_smart_case = true;
4414                 else if (strcmp(arg, "sort") == 0)
4415                   flag_sort = "name";
4416                 else if (strncmp(arg, "sort=", 5) == 0)
4417                   flag_sort = arg + 5;
4418                 else if (strcmp(arg, "stats") == 0)
4419                   flag_stats = "";
4420                 else if (strncmp(arg, "stats=", 6) == 0)
4421                   flag_stats = arg + 6;
4422                 else
4423                   usage("invalid option --", arg, "--save-config, --separator, --silent, --smart-case, --sort or --stats");
4424                 break;
4425 
4426               case 't':
4427                 if (strcmp(arg, "tabs") == 0)
4428                   flag_tabs = DEFAULT_TABS;
4429                 else if (strncmp(arg, "tabs=", 5) == 0)
4430                   flag_tabs = strtopos(arg + 5, "invalid argument --tabs=");
4431                 else if (strcmp(arg, "tag") == 0)
4432                   flag_tag = DEFAULT_TAG;
4433                 else if (strncmp(arg, "tag=", 4) == 0)
4434                   flag_tag = arg + 4;
4435                 else if (strcmp(arg, "text") == 0)
4436                   flag_binary_files = "text";
4437                 else
4438                   usage("invalid option --", arg, "--tabs, --tag or --text");
4439                 break;
4440 
4441               case 'u':
4442                 if (strcmp(arg, "ungroup") == 0)
4443                   flag_ungroup = true;
4444                 else
4445                   usage("invalid option --", arg, "--ungroup");
4446                 break;
4447 
4448               case 'v':
4449                 if (strcmp(arg, "version") == 0)
4450                   version();
4451                 else if (strncmp(arg, "view=", 5) == 0)
4452                   flag_view = arg + 5;
4453                 else if (strcmp(arg, "view") == 0)
4454                   flag_view = "";
4455                 else
4456                   usage("invalid option --", arg, "--view or --version");
4457                 break;
4458 
4459               case 'w':
4460                 if (strcmp(arg, "with-filename") == 0)
4461                   flag_with_filename = true;
4462                 else if (strcmp(arg, "with-hex") == 0)
4463                   flag_binary_files = "with-hex";
4464                 else if (strcmp(arg, "word-regexp") == 0)
4465                   flag_word_regexp = true;
4466                 else
4467                   usage("invalid option --", arg, "--with-filename, --with-hex or --word-regexp");
4468                 break;
4469 
4470               case 'x':
4471                 if (strcmp(arg, "xml") == 0)
4472                   flag_xml = true;
4473                 else
4474                   usage("invalid option --", arg, "--xml");
4475                 break;
4476 
4477               default:
4478                 if (isdigit(*arg))
4479                   set_depth(arg);
4480                 else
4481                   usage("invalid option --", arg);
4482             }
4483             break;
4484 
4485           case 'A':
4486             ++arg;
4487             if (*arg)
4488               flag_after_context = strtonum(&arg[*arg == '='], "invalid argument -A=");
4489             else if (++i < argc)
4490               flag_after_context = strtonum(argv[i], "invalid argument -A=");
4491             else
4492               usage("missing NUM argument for option -A");
4493             is_grouped = false;
4494             break;
4495 
4496           case 'a':
4497             flag_binary_files = "text";
4498             break;
4499 
4500           case 'B':
4501             ++arg;
4502             if (*arg)
4503               flag_before_context = strtonum(&arg[*arg == '='], "invalid argument -B=");
4504             else if (++i < argc)
4505               flag_before_context = strtonum(argv[i], "invalid argument -B=");
4506             else
4507               usage("missing NUM argument for option -B");
4508             is_grouped = false;
4509             break;
4510 
4511           case 'b':
4512             flag_byte_offset = true;
4513             break;
4514 
4515           case 'C':
4516             ++arg;
4517             if (*arg)
4518               flag_after_context = flag_before_context = strtonum(&arg[*arg == '='], "invalid argument -C=");
4519             else if (++i < argc)
4520               flag_after_context = flag_before_context = strtonum(argv[i], "invalid argument -C=");
4521             else
4522               usage("missing NUM argument for option -C");
4523             is_grouped = false;
4524             break;
4525 
4526           case 'c':
4527             flag_count = true;
4528             break;
4529 
4530           case 'D':
4531             ++arg;
4532             if (*arg)
4533               flag_devices = &arg[*arg == '='];
4534             else if (++i < argc)
4535               flag_devices = argv[i];
4536             else
4537               usage("missing ACTION argument for option -D");
4538             is_grouped = false;
4539             break;
4540 
4541           case 'd':
4542             ++arg;
4543             if (*arg)
4544               flag_directories = &arg[*arg == '='];
4545             else if (++i < argc)
4546               flag_directories = argv[i];
4547             else
4548               usage("missing ACTION argument for option -d");
4549             is_grouped = false;
4550             break;
4551 
4552           case 'E':
4553             flag_basic_regexp = false;
4554             break;
4555 
4556           case 'e':
4557             ++arg;
4558             if (*arg)
4559               option_regexp(pattern_args, &arg[*arg == '=']);
4560             else if (++i < argc)
4561               option_regexp(pattern_args, argv[i]);
4562             else
4563               usage("missing PATTERN argument for option -e");
4564             is_grouped = false;
4565             break;
4566 
4567           case 'F':
4568             flag_fixed_strings = true;
4569             break;
4570 
4571           case 'f':
4572             ++arg;
4573             if (*arg)
4574               flag_file.emplace_back(&arg[*arg == '=']);
4575             else if (++i < argc)
4576               flag_file.emplace_back(argv[i]);
4577             else
4578               usage("missing FILE argument for option -f");
4579             is_grouped = false;
4580             break;
4581 
4582           case 'G':
4583             flag_basic_regexp = true;
4584             break;
4585 
4586           case 'g':
4587             ++arg;
4588             if (*arg)
4589               flag_glob.emplace_back(&arg[*arg == '=']);
4590             else if (++i < argc)
4591               flag_glob.emplace_back(argv[i]);
4592             else
4593               usage("missing GLOB argument for option -g");
4594             is_grouped = false;
4595             break;
4596 
4597           case 'H':
4598             flag_with_filename = true;
4599             break;
4600 
4601           case 'h':
4602             flag_no_filename = true;
4603             break;
4604 
4605           case 'I':
4606             flag_binary_files = "without-match";
4607             break;
4608 
4609           case 'i':
4610             flag_ignore_case = true;
4611             break;
4612 
4613           case 'J':
4614             ++arg;
4615             if (*arg)
4616               flag_jobs = strtonum(&arg[*arg == '='], "invalid argument -J=");
4617             else if (++i < argc)
4618               flag_jobs = strtonum(argv[i], "invalid argument -J=");
4619             else
4620               usage("missing NUM argument for option -J");
4621             is_grouped = false;
4622             break;
4623 
4624           case 'j':
4625             flag_smart_case = true;
4626             break;
4627 
4628           case 'K':
4629             ++arg;
4630             if (*arg)
4631               strtopos2(&arg[*arg == '='], flag_min_line, flag_max_line, "invalid argument -K=");
4632             else if (++i < argc)
4633               strtopos2(argv[i], flag_min_line, flag_max_line, "invalid argument -K=");
4634             else
4635               usage("missing NUM argument for option -K");
4636             is_grouped = false;
4637             break;
4638 
4639           case 'k':
4640             flag_column_number = true;
4641             break;
4642 
4643           case 'L':
4644             flag_files_without_match = true;
4645             break;
4646 
4647           case 'l':
4648             flag_files_with_matches = true;
4649             break;
4650 
4651           case 'M':
4652             ++arg;
4653             if (*arg)
4654               flag_file_magic.emplace_back(&arg[*arg == '=']);
4655             else if (++i < argc)
4656               flag_file_magic.emplace_back(argv[i]);
4657             else
4658               usage("missing MAGIC argument for option -M");
4659             is_grouped = false;
4660             break;
4661 
4662           case 'm':
4663             ++arg;
4664             if (*arg)
4665               flag_max_count = strtopos(&arg[*arg == '='], "invalid argument -m=");
4666             else if (++i < argc)
4667               flag_max_count = strtopos(argv[i], "invalid argument -m=");
4668             else
4669               usage("missing NUM argument for option -m");
4670             is_grouped = false;
4671             break;
4672 
4673           case 'N':
4674             ++arg;
4675             if (*arg)
4676               option_regexp(pattern_args, &arg[*arg == '='], true);
4677             else if (++i < argc)
4678               option_regexp(pattern_args, argv[i], true);
4679             else
4680               usage("missing PATTERN argument for option -N");
4681             is_grouped = false;
4682             break;
4683 
4684           case 'n':
4685             flag_line_number = true;
4686             break;
4687 
4688           case 'O':
4689             ++arg;
4690             if (*arg)
4691               flag_file_extension.emplace_back(&arg[*arg == '=']);
4692             else if (++i < argc)
4693               flag_file_extension.emplace_back(argv[i]);
4694             else
4695               usage("missing EXTENSIONS argument for option -O");
4696             is_grouped = false;
4697             break;
4698 
4699           case 'o':
4700             flag_only_matching = true;
4701             break;
4702 
4703           case 'P':
4704             flag_perl_regexp = true;
4705             break;
4706 
4707           case 'p':
4708             flag_no_dereference = true;
4709             break;
4710 
4711           case 'Q':
4712             ++arg;
4713             if (*arg == '=' || isdigit(*arg))
4714             {
4715               flag_query = strtopos(&arg[*arg == '='], "invalid argument -Q=");
4716               is_grouped = false;
4717             }
4718             else
4719             {
4720               flag_query = DEFAULT_QUERY_DELAY;
4721               --arg;
4722             }
4723             break;
4724 
4725           case 'q':
4726             flag_quiet = true;
4727             break;
4728 
4729           case 'R':
4730             flag_directories = "dereference-recurse";
4731             break;
4732 
4733           case 'r':
4734             flag_directories = "recurse";
4735             break;
4736 
4737           case 'S':
4738             flag_dereference = true;
4739             break;
4740 
4741           case 's':
4742             flag_no_messages = true;
4743             break;
4744 
4745           case 'T':
4746             flag_initial_tab = true;
4747             break;
4748 
4749           case 't':
4750             ++arg;
4751             if (*arg)
4752               flag_file_type.emplace_back(&arg[*arg == '=']);
4753             else if (++i < argc)
4754               flag_file_type.emplace_back(argv[i]);
4755             else
4756               usage("missing TYPES argument for option -t");
4757             is_grouped = false;
4758             break;
4759 
4760           case 'U':
4761             flag_binary = true;
4762             break;
4763 
4764           case 'u':
4765             flag_ungroup = true;
4766             break;
4767 
4768           case 'V':
4769             version();
4770             break;
4771 
4772           case 'v':
4773             flag_invert_match = true;
4774             break;
4775 
4776           case 'W':
4777             flag_binary_files = "with-hex";
4778             break;
4779 
4780           case 'w':
4781             flag_word_regexp = true;
4782             break;
4783 
4784           case 'X':
4785             flag_binary_files = "hex";
4786             break;
4787 
4788           case 'x':
4789             flag_line_regexp = true;
4790             break;
4791 
4792           case 'Y':
4793             flag_empty = true;
4794             break;
4795 
4796           case 'y':
4797             flag_any_line = true;
4798             break;
4799 
4800           case 'Z':
4801             ++arg;
4802             if (*arg == '=' || isdigit(*arg) || strchr("+-~", *arg) != NULL)
4803             {
4804               flag_fuzzy = strtofuzzy(&arg[*arg == '='], "invalid argument -Z=");
4805               is_grouped = false;
4806             }
4807             else
4808             {
4809               flag_fuzzy = 1;
4810               --arg;
4811             }
4812             break;
4813 
4814 
4815           case 'z':
4816             flag_decompress = true;
4817             break;
4818 
4819           case '0':
4820             flag_null = true;
4821             break;
4822 
4823           case '1':
4824           case '2':
4825           case '3':
4826           case '4':
4827           case '5':
4828           case '6':
4829           case '7':
4830           case '8':
4831           case '9':
4832             if (flag_min_depth == 0 && flag_max_depth > 0)
4833               flag_min_depth = flag_max_depth;
4834             flag_max_depth = *arg - '0';
4835             if (flag_min_depth > flag_max_depth)
4836               usage("invalid argument -", arg);
4837             break;
4838 
4839           case '?':
4840             help(arg[1] != '\0' ? arg + 1 : ++i < argc ? argv[i] : NULL);
4841             break;
4842 
4843           case '%':
4844             flag_bool = true;
4845             break;
4846 
4847           case '+':
4848             flag_heading = true;
4849             break;
4850 
4851           case '.':
4852             flag_hidden = true;
4853             break;
4854 
4855           default:
4856             usage("invalid option -", arg);
4857         }
4858 
4859         if (!is_grouped)
4860           break;
4861       }
4862     }
4863     else if (strcmp(arg, "-") == 0)
4864     {
4865       // read standard input
4866       flag_stdin = true;
4867     }
4868     else if (arg_pattern == NULL && !flag_match && !flag_not && pattern_args.empty() && flag_file.empty())
4869     {
4870       // no regex pattern specified yet, so assume it is PATTERN
4871       arg_pattern = arg;
4872     }
4873     else
4874     {
4875       // otherwise add the file argument to the list of FILE files
4876       arg_files.emplace_back(arg);
4877     }
4878   }
4879 
4880   if (flag_not)
4881     usage("missing PATTERN for --not");
4882 }
4883 
4884 // parse -e PATTERN and -N PATTERN
option_regexp(std::list<std::pair<CNF::PATTERN,const char * >> & pattern_args,const char * arg,bool is_neg)4885 void option_regexp(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, const char *arg, bool is_neg)
4886 {
4887   if (flag_query)
4888   {
4889     // -Q: pass -e PATTERN and -N PATTERN patterns to the query engine
4890     if (is_neg)
4891     {
4892       std::string neg_arg(arg);
4893       neg_arg.insert(0, "(?^").append(")");
4894       flag_regexp.emplace_back(neg_arg);
4895     }
4896     else
4897     {
4898       flag_regexp.emplace_back(arg);
4899     }
4900   }
4901   else
4902   {
4903     pattern_args.emplace_back((flag_not ? CNF::PATTERN::NOT : CNF::PATTERN::NA) | (is_neg ? CNF::PATTERN::NEG : CNF::PATTERN::NA), arg);
4904   }
4905 }
4906 
4907 // parse --and [PATTERN]
option_and(std::list<std::pair<CNF::PATTERN,const char * >> & pattern_args,int & i,int argc,const char ** argv)4908 void option_and(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, int& i, int argc, const char **argv)
4909 {
4910   if (flag_not)
4911     usage("missing PATTERN for --not");
4912 
4913   if (flag_query)
4914     usage("option -Q does not support --and");
4915 
4916   pattern_args.emplace_back(CNF::PATTERN::TERM, "");
4917 
4918   if (i + 1 < argc && *argv[i + 1] != '-')
4919     pattern_args.emplace_back((flag_not ? CNF::PATTERN::NOT : CNF::PATTERN::NA), argv[++i]);
4920 }
4921 
4922 // parse --and=PATTERN
option_and(std::list<std::pair<CNF::PATTERN,const char * >> & pattern_args,const char * arg)4923 void option_and(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, const char *arg)
4924 {
4925   if (flag_not)
4926     usage("missing PATTERN for --not");
4927 
4928   if (flag_query)
4929     usage("option -Q does not support --and");
4930 
4931   pattern_args.emplace_back(CNF::PATTERN::TERM, "");
4932   pattern_args.emplace_back((flag_not ? CNF::PATTERN::NOT : CNF::PATTERN::NA), arg);
4933 }
4934 
4935 // parse --andnot [PATTERN]
option_andnot(std::list<std::pair<CNF::PATTERN,const char * >> & pattern_args,int & i,int argc,const char ** argv)4936 void option_andnot(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, int& i, int argc, const char **argv)
4937 {
4938   if (flag_not)
4939     usage("missing PATTERN for --not");
4940 
4941   if (flag_query)
4942     usage("option -Q does not support --andnot");
4943 
4944   pattern_args.emplace_back(CNF::PATTERN::TERM, "");
4945 
4946   flag_not = true;
4947 
4948   if (i + 1 < argc && *argv[i + 1] != '-')
4949   {
4950     pattern_args.emplace_back(CNF::PATTERN::NOT, argv[++i]);
4951     flag_not = false;
4952   }
4953 }
4954 
4955 // parse --andnot=PATTERN
option_andnot(std::list<std::pair<CNF::PATTERN,const char * >> & pattern_args,const char * arg)4956 void option_andnot(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, const char *arg)
4957 {
4958   if (flag_not)
4959     usage("missing PATTERN for --not");
4960 
4961   if (flag_query)
4962     usage("option -Q does not support --andnot");
4963 
4964   pattern_args.emplace_back(CNF::PATTERN::TERM, "");
4965   pattern_args.emplace_back(CNF::PATTERN::NOT, arg);
4966 }
4967 
4968 // parse --not [PATTERN]
option_not(std::list<std::pair<CNF::PATTERN,const char * >> & pattern_args,int & i,int argc,const char ** argv)4969 void option_not(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, int& i, int argc, const char **argv)
4970 {
4971   if (flag_query)
4972     usage("option -Q does not support --not");
4973 
4974   flag_not = !flag_not;
4975 
4976   if (i + 1 < argc && *argv[i + 1] != '-')
4977   {
4978     pattern_args.emplace_back((flag_not ? CNF::PATTERN::NOT : CNF::PATTERN::NA), argv[++i]);
4979     flag_not = false;
4980   }
4981 }
4982 
4983 // parse --not=PATTERN
option_not(std::list<std::pair<CNF::PATTERN,const char * >> & pattern_args,const char * arg)4984 void option_not(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, const char *arg)
4985 {
4986   if (flag_query)
4987     usage("option -Q does not support --not");
4988 
4989   flag_not = !flag_not;
4990 
4991   pattern_args.emplace_back((flag_not ? CNF::PATTERN::NOT : CNF::PATTERN::NA), arg);
4992   flag_not = false;
4993 }
4994 
4995 // parse the command-line options and initialize
init(int argc,const char ** argv)4996 void init(int argc, const char **argv)
4997 {
4998   // get home directory path to expand ~ in options with file arguments, using fopen_smart()
4999 
5000 #ifdef OS_WIN
5001   home_dir = getenv("USERPROFILE");
5002 #else
5003   home_dir = getenv("HOME");
5004 #endif
5005 
5006   // --config=FILE or ---FILE: load configuration file first before parsing any other options
5007 
5008   for (int i = 1; i < argc; ++i)
5009   {
5010     if (strcmp(argv[i], "--") == 0)
5011       break;
5012 
5013     if (strncmp(argv[i], "--config", 8) == 0)
5014     {
5015       if (flag_config != NULL)
5016         std::cerr << "ugrep: warning: multiple configurations specified, ignoring extra " << argv[i] << '\n';
5017       else if (argv[i][8] == '\0')
5018         flag_config = "";
5019       else if (argv[i][8] == '=')
5020         flag_config = argv[i] + 9;
5021     }
5022     else if (strncmp(argv[i], "---", 3) == 0)
5023     {
5024       if (flag_config != NULL)
5025         std::cerr << "ugrep: warning: multiple configurations specified, ignoring extra " << argv[i] << '\n';
5026       else
5027         flag_config = argv[i] + 3;
5028     }
5029   }
5030 
5031   // collect regex pattern arguments -e PATTERN, -N PATTERN, --and PATTERN, --andnot PATTERN
5032   std::list<std::pair<CNF::PATTERN,const char*>> pattern_args;
5033 
5034   if (flag_config != NULL)
5035     load_config(pattern_args);
5036 
5037   // apply the appropriate options when the program is named grep, egrep, fgrep, zgrep, zegrep, zfgrep
5038 
5039   const char *program = strrchr(argv[0], PATHSEPCHR);
5040 
5041   if (program == NULL)
5042     program = argv[0];
5043   else
5044     ++program;
5045 
5046   if (strcmp(program, "ug") == 0)
5047   {
5048     // the 'ug' command is equivalent to 'ugrep --config' to load custom configuration files, when no --config=FILE is specified
5049     if (flag_config == NULL)
5050       load_config(pattern_args);
5051   }
5052   else if (strcmp(program, "grep") == 0)
5053   {
5054     // the 'grep' command is equivalent to 'ugrep -GY.'
5055     flag_basic_regexp = true;
5056     flag_hidden = true;
5057     flag_empty = true;
5058   }
5059   else if (strcmp(program, "egrep") == 0)
5060   {
5061     // the 'egrep' command is equivalent to 'ugrep -Y.'
5062     flag_hidden = true;
5063     flag_empty = true;
5064   }
5065   else if (strcmp(program, "fgrep") == 0)
5066   {
5067     // the 'fgrep' command is equivalent to 'ugrep -FY.'
5068     flag_fixed_strings = true;
5069     flag_hidden = true;
5070     flag_empty = true;
5071   }
5072   else if (strcmp(program, "zgrep") == 0)
5073   {
5074     // the 'zgrep' command is equivalent to 'ugrep -zGY.'
5075     flag_decompress = true;
5076     flag_basic_regexp = true;
5077     flag_hidden = true;
5078     flag_empty = true;
5079   }
5080   else if (strcmp(program, "zegrep") == 0)
5081   {
5082     // the 'zegrep' command is equivalent to 'ugrep -zY.'
5083     flag_decompress = true;
5084     flag_hidden = true;
5085     flag_empty = true;
5086   }
5087   else if (strcmp(program, "zfgrep") == 0)
5088   {
5089     // the 'zfgrep' command is equivalent to 'ugrep -zFY.'
5090     flag_decompress = true;
5091     flag_fixed_strings = true;
5092     flag_hidden = true;
5093     flag_empty = true;
5094   }
5095 
5096   // parse ugrep command-line options and arguments
5097 
5098   options(pattern_args, argc, argv);
5099 
5100   if (warnings > 0)
5101   {
5102     std::cerr << "Usage: ugrep [OPTIONS] [PATTERN] [-f FILE] [-e PATTERN] [FILE ...]\n";
5103     std::cerr << "Try 'ugrep --help [WHAT]' for more information\n";
5104     exit(EXIT_ERROR);
5105   }
5106 
5107   // -t list: list table of types and exit
5108   if (flag_file_type.size() == 1 && flag_file_type[0] == "list")
5109   {
5110     std::cerr << std::setw(12) << "FILE TYPE" << "   -O EXTENSIONS, -g FILENAMES AND FILE SIGNATURE -M 'MAGIC BYTES'\n";
5111 
5112     for (int i = 0; type_table[i].type != NULL; ++i)
5113     {
5114       std::cerr << std::setw(12) << type_table[i].type << " = -O " << type_table[i].extensions << '\n';
5115       if (type_table[i].filenames)
5116         std::cerr << std::setw(18) << "-g " << type_table[i].filenames << "\n";
5117       if (type_table[i].magic)
5118         std::cerr << std::setw(19) << "-M '" << type_table[i].magic << "'\n";
5119     }
5120 
5121     exit(EXIT_ERROR);
5122   }
5123 
5124 #ifndef HAVE_LIBZ
5125   // -z: but we don't have libz
5126   if (flag_decompress)
5127     usage("option -z is not available in this build configuration of ugrep");
5128 #endif
5129 
5130   // -P disables -F, -G and -Z (P>F>G>E override)
5131   if (flag_perl_regexp)
5132   {
5133 #if defined(HAVE_PCRE2) || defined(HAVE_BOOST_REGEX)
5134     flag_fixed_strings = false;
5135     flag_basic_regexp = false;
5136     if (flag_fuzzy > 0)
5137       usage("options -P and -Z are not compatible");
5138 #else
5139     usage("option -P is not available in this build configuration of ugrep");
5140 #endif
5141   }
5142 
5143   // -F disables -G (P>F>G>E override)
5144   if (flag_fixed_strings)
5145     flag_basic_regexp = false;
5146 
5147   // populate the CNF with the collected regex pattern args, each arg points to a persistent command line argv[]
5148   for (const auto &arg : pattern_args)
5149   {
5150     if (arg.first == CNF::PATTERN::TERM)
5151       bcnf.new_term();
5152     else
5153       bcnf.new_pattern(arg.first, arg.second); // relies on options --bool, -F, -G, -w, -x, and -f
5154   }
5155 
5156   // --query: override --pager
5157   if (flag_query > 0)
5158     flag_pager = NULL;
5159 
5160   // check TTY info and set colors (warnings and errors may occur from here on)
5161   terminal();
5162 
5163   // --save-config and --save-config=FILE
5164   if (flag_save_config != NULL)
5165   {
5166     save_config();
5167 
5168     exit(EXIT_ERROR);
5169   }
5170 
5171 #ifdef OS_WIN
5172   // save_config() and help() assume text mode, so switch to
5173   // binary after we're no longer going to call them.
5174   (void)_setmode(fileno(stdout), _O_BINARY);
5175 #endif
5176 
5177   // --encoding: parse ENCODING value
5178   if (flag_encoding != NULL)
5179   {
5180     int i, j;
5181 
5182     // scan the encoding_table[] for a matching encoding, case insensitive ASCII
5183     for (i = 0; encoding_table[i].format != NULL; ++i)
5184     {
5185       for (j = 0; flag_encoding[j] != '\0' && encoding_table[i].format[j] != '\0'; ++j)
5186         if (toupper(flag_encoding[j]) != toupper(encoding_table[i].format[j]))
5187           break;
5188 
5189       if (flag_encoding[j] == '\0' && encoding_table[i].format[j] == '\0')
5190         break;
5191     }
5192 
5193     if (encoding_table[i].format == NULL)
5194     {
5195       std::string msg = "invalid argument --encoding=ENCODING, valid arguments are";
5196 
5197       for (int i = 0; encoding_table[i].format != NULL; ++i)
5198         msg.append(" '").append(encoding_table[i].format).append("',");
5199       msg.pop_back();
5200 
5201       usage(msg.c_str());
5202     }
5203 
5204     // encoding is the file encoding used by all input files, if no BOM is present
5205     flag_encoding_type = encoding_table[i].encoding;
5206   }
5207 
5208   // --binary-files: normalize by assigning flags
5209   if (strcmp(flag_binary_files, "without-match") == 0)
5210     flag_binary_without_match = true;
5211   else if (strcmp(flag_binary_files, "text") == 0)
5212     flag_text = true;
5213   else if (strcmp(flag_binary_files, "hex") == 0)
5214     flag_hex = true;
5215   else if (strcmp(flag_binary_files, "with-hex") == 0)
5216     flag_with_hex = true;
5217   else if (strcmp(flag_binary_files, "binary") != 0)
5218     usage("invalid argument --binary-files=TYPE, valid arguments are 'binary', 'without-match', 'text', 'hex', and 'with-hex'");
5219 
5220   // --hex takes priority over --with-hex takes priority over -I takes priority over -a
5221   if (flag_hex)
5222     flag_with_hex = (flag_binary_without_match = flag_text = false);
5223   else if (flag_with_hex)
5224     flag_binary_without_match = (flag_text = false);
5225   else if (flag_binary_without_match)
5226     flag_text = false;
5227 
5228   // --hexdump: normalize by assigning flags
5229   if (flag_hexdump != NULL)
5230   {
5231     if (isdigit(*flag_hexdump))
5232     {
5233       flag_hex_columns = 8 * (*flag_hexdump - '0');
5234       if (flag_hex_columns == 0 || flag_hex_columns > MAX_HEX_COLUMNS)
5235         usage("invalid argument --hexdump=[1-8][a][b][c][h]");
5236     }
5237     if (strchr(flag_hexdump, 'a') != NULL)
5238       flag_hex_ast = true;
5239     if (strchr(flag_hexdump, 'b') != NULL)
5240       flag_hex_hbr = flag_hex_cbr = false;
5241     if (strchr(flag_hexdump, 'c') != NULL)
5242       flag_hex_chr = false;
5243     if (strchr(flag_hexdump, 'h') != NULL)
5244       flag_hex_hbr = false;
5245     if (!flag_with_hex)
5246       flag_hex = true;
5247   }
5248 
5249   // --tabs: value should be 1, 2, 4, or 8
5250   if (flag_tabs && flag_tabs != 1 && flag_tabs != 2 && flag_tabs != 4 && flag_tabs != 8)
5251     usage("invalid argument --tabs=NUM, valid arguments are 1, 2, 4, or 8");
5252 
5253   // --match: same as specifying an empty "" pattern argument
5254   if (flag_match)
5255     arg_pattern = "";
5256 
5257   // if no regex pattern is specified and no -e PATTERN and no -f FILE and not -Q, then exit with usage message
5258   if (arg_pattern == NULL && pattern_args.empty() && flag_file.empty() && flag_query == 0)
5259     usage("no PATTERN specified: specify an empty \"\" pattern to match all input");
5260 
5261   // regex PATTERN should be a FILE argument when -Q or -e PATTERN is specified
5262   if (!flag_match && arg_pattern != NULL && (flag_query > 0 || !pattern_args.empty()))
5263   {
5264     arg_files.insert(arg_files.begin(), arg_pattern);
5265     arg_pattern = NULL;
5266   }
5267 
5268 #ifdef OS_WIN
5269 
5270   // Windows shell does not expand wildcards in arguments, do that now (basename part only)
5271   if (!arg_files.empty())
5272   {
5273     std::vector<const char*> expanded_arg_files;
5274 
5275     for (const auto& arg_file : arg_files)
5276     {
5277       std::wstring filename = utf8_decode(arg_file);
5278       bool has_wildcard_char = false;
5279 
5280       size_t basename_pos;
5281       for (basename_pos = filename.size(); basename_pos > 0; --basename_pos)
5282       {
5283         wchar_t ch = filename[basename_pos - 1];
5284 
5285         if (ch == L'*' || ch == L'?')
5286           has_wildcard_char = true;
5287         else if (ch == L'\\' || ch == L'/' || ch == L':')
5288           break;
5289       }
5290 
5291       if (!has_wildcard_char)
5292       {
5293         // no wildcard chars, use argument as-is
5294         expanded_arg_files.push_back(arg_file);
5295         continue;
5296       }
5297 
5298       WIN32_FIND_DATAW find_data;
5299 
5300       HANDLE hFile = FindFirstFileExW(filename.c_str(), FindExInfoBasic, &find_data, FindExSearchNameMatch, NULL, 0);
5301       if (hFile == INVALID_HANDLE_VALUE)
5302       {
5303         // glob pattern didn't match any files, use argument as-is which will trigger a warning later
5304         expanded_arg_files.push_back(arg_file);
5305         continue;
5306       }
5307 
5308       bool glob_starts_with_dot = filename[basename_pos] == L'.';
5309 
5310       do
5311       {
5312         if (find_data.cFileName[0] == L'.')
5313         {
5314           // don't expand directories "." or ".."
5315           if (find_data.cFileName[1] == 0 ||
5316               (find_data.cFileName[1] == L'.' && find_data.cFileName[2] == 0))
5317             continue;
5318 
5319           // don't expand hidden files unless --hidden or the pattern started with '.'
5320           if (!flag_hidden && !glob_starts_with_dot)
5321             continue;
5322         }
5323 
5324         // replace glob pattern with matching filename converted to UTF-8, then add to expanded filename list
5325         filename.erase(basename_pos);
5326         filename += find_data.cFileName;
5327         arg_strings.emplace_back(utf8_encode(filename));
5328         expanded_arg_files.push_back(arg_strings.back().c_str());
5329       } while (FindNextFileW(hFile, &find_data));
5330 
5331       FindClose(hFile);
5332     }
5333 
5334     // replace the original filenames list with the expanded list
5335     arg_files.swap(expanded_arg_files);
5336   }
5337 
5338 #endif
5339 
5340   // -D: check ACTION value
5341   if (strcmp(flag_devices, "skip") == 0)
5342     flag_devices_action = Action::SKIP;
5343   else if (strcmp(flag_devices, "read") == 0)
5344     flag_devices_action = Action::READ;
5345   else
5346     usage("invalid argument -D ACTION, valid arguments are 'skip' and 'read'");
5347 
5348   // normalize -R (--dereference-recurse) option
5349   if (strcmp(flag_directories, "dereference-recurse") == 0)
5350   {
5351     flag_directories = "recurse";
5352     flag_dereference = true;
5353   }
5354 
5355   // -d: check ACTION value and set flags
5356   if (strcmp(flag_directories, "skip") == 0)
5357     flag_directories_action = Action::SKIP;
5358   else if (strcmp(flag_directories, "read") == 0)
5359     flag_directories_action = Action::READ;
5360   else if (strcmp(flag_directories, "recurse") == 0)
5361     flag_directories_action = Action::RECURSE;
5362   else
5363     usage("invalid argument -d ACTION, valid arguments are 'skip', 'read', 'recurse', and 'dereference-recurse'");
5364 
5365   // if no FILE specified and no -r or -R specified, when reading standard input from a TTY then enable -R
5366   if (!flag_stdin && arg_files.empty() && flag_directories_action != Action::RECURSE && isatty(STDIN_FILENO))
5367   {
5368     flag_directories_action = Action::RECURSE;
5369     flag_dereference = true;
5370   }
5371 
5372   // if no FILE specified then read standard input, unless recursive searches are specified
5373   if (arg_files.empty() && flag_min_depth == 0 && flag_max_depth == 0 && flag_directories_action != Action::RECURSE)
5374     flag_stdin = true;
5375 
5376   // check FILE arguments, warn about non-existing FILE
5377   auto file = arg_files.begin();
5378   while (file != arg_files.end())
5379   {
5380 #ifdef OS_WIN
5381 
5382     DWORD attr = GetFileAttributesW(utf8_decode(*file).c_str());
5383 
5384     if (attr == INVALID_FILE_ATTRIBUTES)
5385     {
5386       // FILE does not exist
5387       errno = ENOENT;
5388       warning(NULL, *file);
5389 
5390       file = arg_files.erase(file);
5391       if (arg_files.empty())
5392         exit(EXIT_ERROR);
5393     }
5394     else
5395     {
5396       // use threads to recurse into a directory
5397       if ((attr & FILE_ATTRIBUTE_DIRECTORY))
5398       {
5399         flag_all_threads = true;
5400 
5401         // remove trailing path separators, if any (*file points to argv[])
5402         trim_pathname_arg(*file);
5403       }
5404 
5405       ++file;
5406     }
5407 
5408 #else
5409 
5410     struct stat buf;
5411 
5412     if (stat(*file, &buf) != 0)
5413     {
5414       // FILE does not exist
5415       warning(NULL, *file);
5416 
5417       file = arg_files.erase(file);
5418       if (arg_files.empty())
5419         exit(EXIT_ERROR);
5420     }
5421     else
5422     {
5423       // use threads to recurse into a directory
5424       if (S_ISDIR(buf.st_mode))
5425       {
5426         flag_all_threads = true;
5427 
5428         // remove trailing path separators, if any (*file points to argv[])
5429         trim_pathname_arg(*file);
5430       }
5431 
5432       ++file;
5433     }
5434 
5435 #endif
5436   }
5437 
5438   // normalize --cpp, --csv, --json, --xml to their corresponding --format
5439   if (flag_cpp)
5440   {
5441     flag_format_begin = "const struct grep {\n  const char *file;\n  size_t line;\n  size_t column;\n  size_t offset;\n  const char *match;\n} matches[] = {\n";
5442     flag_format_open  = "  // %f\n";
5443     flag_format       = "  { %h, %n, %k, %b, %C },\n%u";
5444     flag_format_close = "  \n";
5445     flag_format_end   = "  { NULL, 0, 0, 0, NULL }\n};\n";
5446   }
5447   else if (flag_csv)
5448   {
5449     flag_format       = "%[,]$%H%N%K%B%V\n%u";
5450   }
5451   else if (flag_json)
5452   {
5453     flag_format_begin = "[";
5454     flag_format_open  = "%,\n  {\n    %[,\n    ]$%[\"file\": ]H\"matches\": [";
5455     flag_format       = "%,\n      { %[, ]$%[\"line\": ]N%[\"column\": ]K%[\"offset\": ]B\"match\": %J }%u";
5456     flag_format_close = "\n    ]\n  }";
5457     flag_format_end   = "\n]\n";
5458   }
5459   else if (flag_xml)
5460   {
5461     flag_format_begin = "<grep>\n";
5462     flag_format_open  = "  <file%[]$%[ name=]H>\n";
5463     flag_format       = "    <match%[\"]$%[ line=\"]N%[ column=\"]K%[ offset=\"]B>%X</match>\n%u";
5464     flag_format_close = "  </file>\n";
5465     flag_format_end   = "</grep>\n";
5466   }
5467 
5468 #ifdef HAVE_STATVFS
5469 
5470   // --exclude-fs: add file system ids to exclude
5471   for (const auto& mounts : flag_exclude_fs)
5472   {
5473     if (!mounts.empty())
5474     {
5475       struct statvfs buf;
5476       size_t from = 0;
5477 
5478       while (true)
5479       {
5480         size_t to = mounts.find(',', from);
5481         size_t size = (to == std::string::npos ? mounts.size() : to) - from;
5482 
5483         if (size > 0)
5484         {
5485           std::string mount(mounts.substr(from, size));
5486 
5487           if (statvfs(mount.c_str(), &buf) == 0)
5488             exclude_fs_ids.insert(static_cast<uint64_t>(buf.f_fsid));
5489           else
5490             warning("--exclude-fs", mount.c_str());
5491         }
5492 
5493         if (to == std::string::npos)
5494           break;
5495 
5496         from = to + 1;
5497       }
5498     }
5499   }
5500 
5501   // --include-fs: add file system ids to include
5502   for (const auto& mounts : flag_include_fs)
5503   {
5504     if (!mounts.empty())
5505     {
5506       struct statvfs buf;
5507       size_t from = 0;
5508 
5509       while (true)
5510       {
5511         size_t to = mounts.find(',', from);
5512         size_t size = (to == std::string::npos ? mounts.size() : to) - from;
5513 
5514         if (size > 0)
5515         {
5516           std::string mount(mounts.substr(from, size));
5517 
5518           if (statvfs(mount.c_str(), &buf) == 0)
5519             include_fs_ids.insert(static_cast<uint64_t>(buf.f_fsid));
5520           else
5521             warning("--include-fs", mount.c_str());
5522         }
5523 
5524         if (to == std::string::npos)
5525           break;
5526 
5527         from = to + 1;
5528       }
5529     }
5530   }
5531 
5532 #endif
5533 
5534   // --exclude-from: add globs to the exclude and exclude-dir lists
5535   for (const auto& from : flag_exclude_from)
5536   {
5537     if (!from.empty())
5538     {
5539       FILE *file = NULL;
5540 
5541       if (fopen_smart(&file, from.c_str(), "r") != 0)
5542         error("option --exclude-from: cannot read", from.c_str());
5543 
5544       split_globs(file, flag_exclude, flag_exclude_dir);
5545 
5546       if (file != stdin)
5547         fclose(file);
5548     }
5549   }
5550 
5551   // --include-from: add globs to the include and include-dir lists
5552   for (const auto& from : flag_include_from)
5553   {
5554     if (!from.empty())
5555     {
5556       FILE *file = NULL;
5557 
5558       if (fopen_smart(&file, from.c_str(), "r") != 0)
5559         error("option --include-from: cannot read", from.c_str());
5560 
5561       split_globs(file, flag_include, flag_include_dir);
5562 
5563       if (file != stdin)
5564         fclose(file);
5565     }
5566   }
5567 
5568   // -t: parse TYPES and access type table to add -O (--file-extension), -g (--glob) and -M (--file-magic) values
5569   for (const auto& types : flag_file_type)
5570   {
5571     size_t from = 0;
5572 
5573     while (true)
5574     {
5575       size_t to = types.find(',', from);
5576       size_t size = (to == std::string::npos ? types.size() : to) - from;
5577 
5578       if (size > 0)
5579       {
5580         bool negate = size > 1 && (types[from] == '!' || types[from] == '^');
5581 
5582         if (negate)
5583         {
5584           ++from;
5585           --size;
5586         }
5587 
5588         std::string type(types.substr(from, size));
5589 
5590         size_t i;
5591 
5592         // scan the type_table[] for a matching type
5593         for (i = 0; type_table[i].type != NULL; ++i)
5594           if (type == type_table[i].type)
5595             break;
5596 
5597         if (type_table[i].type == NULL)
5598         {
5599           std::string msg = "invalid argument -t TYPES, valid arguments are";
5600 
5601           for (int i = 0; type_table[i].type != NULL; ++i)
5602             msg.append(" '").append(type_table[i].type).append("',");
5603           msg.append(" and 'list' to show a detailed list of file types");
5604 
5605           usage(msg.c_str());
5606         }
5607 
5608         std::string temp(type_table[i].extensions);
5609 
5610         if (negate)
5611         {
5612           temp.insert(0, "!");
5613           size_t j = 0;
5614           while ((j = temp.find(',', j)) != std::string::npos)
5615             temp.insert(++j, "!");
5616         }
5617 
5618         flag_file_extension.emplace_back(temp);
5619 
5620         if (type_table[i].filenames != NULL)
5621         {
5622           temp.assign(type_table[i].filenames);
5623 
5624           if (negate)
5625           {
5626             temp.insert(0, "!");
5627             size_t j = 0;
5628             while ((j = temp.find(',', j)) != std::string::npos)
5629               temp.insert(++j, "!");
5630           }
5631 
5632           flag_glob.emplace_back(temp);
5633         }
5634 
5635         if (type_table[i].magic != NULL)
5636         {
5637           flag_file_magic.emplace_back(type_table[i].magic);
5638 
5639           if (negate)
5640             flag_file_magic.back().insert(0, "!");
5641         }
5642       }
5643 
5644       if (to == std::string::npos)
5645         break;
5646 
5647       from = to + 1;
5648     }
5649   }
5650 
5651   // -O: add filename extensions as globs
5652   for (const auto& extensions : flag_file_extension)
5653   {
5654     size_t from = 0;
5655     std::string glob;
5656 
5657     while (true)
5658     {
5659       size_t to = extensions.find(',', from);
5660       size_t size = (to == std::string::npos ? extensions.size() : to) - from;
5661 
5662       if (size > 0)
5663       {
5664         bool negate = size > 1 && (extensions[from] == '!' || extensions[from] == '^');
5665 
5666         if (negate)
5667         {
5668           ++from;
5669           --size;
5670         }
5671 
5672         flag_glob.emplace_back(glob.assign(negate ? "^*." : "*.").append(extensions.substr(from, size)));
5673       }
5674 
5675       if (to == std::string::npos)
5676         break;
5677 
5678       from = to + 1;
5679     }
5680   }
5681 
5682   // -M: file "magic bytes" regex string
5683   std::string magic_regex;
5684 
5685   // -M !MAGIC: combine to create a regex string
5686   for (const auto& magic : flag_file_magic)
5687   {
5688     if (magic.size() > 1 && (magic.front() == '!' || magic.front() == '^'))
5689     {
5690       if (!magic_regex.empty())
5691         magic_regex.push_back('|');
5692       magic_regex.append(magic.substr(1));
5693 
5694       // tally negative MAGIC patterns
5695       ++flag_min_magic;
5696     }
5697   }
5698 
5699   // -M MAGIC: append to regex string
5700   for (const auto& magic : flag_file_magic)
5701   {
5702     if (magic.size() <= 1 || (magic.front() != '!' && magic.front() != '^'))
5703     {
5704       if (!magic_regex.empty())
5705         magic_regex.push_back('|');
5706       magic_regex.append(magic);
5707 
5708       // we have positive MAGIC patterns, so scan() is a match when flag_min_magic or greater
5709       flag_not_magic = flag_min_magic;
5710     }
5711   }
5712 
5713   // -M: create a magic matcher for the MAGIC regex to match file with magic.scan()
5714   try
5715   {
5716     // construct magic_pattern DFA for -M !MAGIC and -M MAGIC
5717     if (!magic_regex.empty())
5718       magic_pattern.assign(magic_regex, "r");
5719     magic_matcher.pattern(magic_pattern);
5720   }
5721 
5722   catch (reflex::regex_error& error)
5723   {
5724     abort("option -M: ", error.what());
5725   }
5726 
5727   // --filter-magic-label: construct filter_magic_pattern and map "magic bytes" to labels
5728   magic_regex = "(";
5729 
5730   // --filter-magic-label: append pattern to magic_labels, parenthesized to ensure capture indexing
5731   for (auto& label : flag_filter_magic_label)
5732   {
5733     if (!label.empty())
5734     {
5735       size_t sep = label.find(':');
5736 
5737       if (sep != std::string::npos && sep > 0 && sep + 1 < label.size())
5738       {
5739         if (!label.empty() && magic_regex.size() > 1)
5740           magic_regex.append(")|(");
5741         magic_regex.append(label.substr(sep + 1));
5742 
5743         // truncate so we end up with a list of labels without patterns
5744         label.resize(sep);
5745       }
5746       else
5747       {
5748         abort("option --filter-magic-label: invalid LABEL:MAGIC argument ", label);
5749       }
5750     }
5751   }
5752 
5753   magic_regex.push_back(')');
5754 
5755   // --filter-magic-label: create a filter_magic_pattern
5756   try
5757   {
5758     // construct filter_magic_pattern DFA
5759     if (magic_regex.size() > 2)
5760       filter_magic_pattern.assign(magic_regex, "r");
5761   }
5762 
5763   catch (reflex::regex_error& error)
5764   {
5765     abort("option --filter-magic-label: ", error.what());
5766   }
5767 }
5768 
5769 // check TTY info and set colors
terminal()5770 void terminal()
5771 {
5772   if (flag_query > 0)
5773   {
5774     // -Q: disable --quiet
5775     flag_quiet = false;
5776   }
5777   else if (!flag_quiet)
5778   {
5779     // is output sent to a color TTY, to a pager, or to /dev/null?
5780 
5781     // check if standard output is a TTY
5782     tty_term = isatty(STDOUT_FILENO) != 0;
5783 
5784 #ifndef OS_WIN
5785 
5786     if (!tty_term)
5787     {
5788       output_stat_result = fstat(STDOUT_FILENO, &output_stat) == 0;
5789       output_stat_regular = output_stat_result && S_ISREG(output_stat.st_mode);
5790 
5791       // if output is sent to /dev/null, then enable -q (i.e. "cheat" like GNU grep!)
5792       struct stat dev_null_stat;
5793       if (output_stat_result &&
5794           S_ISCHR(output_stat.st_mode) &&
5795           stat("/dev/null", &dev_null_stat) == 0 &&
5796           output_stat.st_dev == dev_null_stat.st_dev &&
5797           output_stat.st_ino == dev_null_stat.st_ino)
5798       {
5799         flag_quiet = true;
5800       }
5801     }
5802 
5803 #endif
5804   }
5805 
5806   // whether to apply colors
5807   flag_apply_color = flag_tag != NULL ? "never" : flag_query > 0 ? "always" : flag_color;
5808 
5809   if (!flag_quiet)
5810   {
5811     if (tty_term || flag_query > 0)
5812     {
5813       if (flag_pretty)
5814       {
5815         // --pretty: if output is to a TTY then enable --color, --heading, -T, -n, and --sort
5816 
5817         // enable --color
5818         if (flag_apply_color == NULL)
5819           flag_apply_color = "auto";
5820 
5821         // enable --heading if not explicitly disabled (enables --break later)
5822         if (flag_heading.is_undefined())
5823           flag_heading = true;
5824 
5825         // enable -T if not explicitly disabled (initial tab)
5826         if (flag_initial_tab.is_undefined())
5827           flag_initial_tab = true;
5828 
5829         // enable -n if not explicitly disabled
5830         if (flag_line_number.is_undefined())
5831           flag_line_number = true;
5832 
5833         // enable --sort=name if no --sort specified
5834         if (flag_sort == NULL)
5835           flag_sort = "name";
5836       }
5837       else if (flag_apply_color != NULL)
5838       {
5839         // --colors: if output is to a TTY then enable --color and use the specified --colors
5840 
5841         // enable --color
5842         if (flag_apply_color == NULL)
5843           flag_apply_color = "auto";
5844       }
5845 
5846       if (flag_query > 0)
5847       {
5848         // --query: run the interactive query UI
5849 
5850         // enable --heading if not explicitly disabled (enables --break later)
5851         if (flag_heading.is_undefined())
5852           flag_heading = true;
5853 
5854         // enable --line-buffered to flush output immediately
5855         flag_line_buffered = true;
5856       }
5857       else if (flag_pager != NULL && *flag_pager != '\0')
5858       {
5859         // --pager: if output is to a TTY then page through the results
5860 
5861         // open a pipe to a forked pager
5862 #ifdef OS_WIN
5863         output = popen(flag_pager, "wb");
5864 #else
5865         output = popen(flag_pager, "w");
5866 #endif
5867         if (output == NULL)
5868           error("cannot open pipe to pager", flag_pager);
5869 
5870         // enable --heading if not explicitly disabled (enables --break later)
5871         if (flag_heading.is_undefined())
5872           flag_heading = true;
5873 
5874         // enable --line-buffered to flush output to the pager immediately
5875         flag_line_buffered = true;
5876       }
5877     }
5878 
5879     // --color: (re)set flag_apply_color depending on color_term and TTY output
5880     if (flag_apply_color != NULL)
5881     {
5882       color_term = flag_query > 0;
5883 
5884       if (strcmp(flag_apply_color, "never") == 0 || strcmp(flag_apply_color, "no") == 0 || strcmp(flag_apply_color, "none") == 0)
5885       {
5886         flag_apply_color = NULL;
5887       }
5888       else
5889       {
5890 #ifdef OS_WIN
5891 
5892         if (tty_term || flag_query > 0)
5893         {
5894 #ifdef ENABLE_VIRTUAL_TERMINAL_PROCESSING
5895           // assume we have a color terminal on Windows if isatty() is true
5896           HANDLE hConOut = GetStdHandle(STD_OUTPUT_HANDLE);
5897           if (hConOut != INVALID_HANDLE_VALUE)
5898           {
5899 #ifdef CP_UTF8
5900             // enable UTF-8 output
5901             SetConsoleOutputCP(CP_UTF8);
5902 #endif
5903             // try virtual terminal processing for ANSI SGR codes, enable colors when successful
5904             DWORD outMode;
5905             GetConsoleMode(hConOut, &outMode);
5906             outMode |= ENABLE_VIRTUAL_TERMINAL_PROCESSING;
5907             color_term = SetConsoleMode(hConOut, outMode) != 0;
5908           }
5909 #endif
5910         }
5911 
5912 #else
5913 
5914         // check whether we have a color terminal
5915         if (tty_term)
5916         {
5917           const char *term;
5918           if (getenv("COLORTERM") != NULL ||
5919               ((term = getenv("TERM")) != NULL &&
5920                (strstr(term, "ansi") != NULL ||
5921                 strstr(term, "xterm") != NULL ||
5922                 strstr(term, "screen") != NULL ||
5923                 strstr(term, "color") != NULL)))
5924             color_term = true;
5925         }
5926 
5927 #endif
5928 
5929         if (strcmp(flag_apply_color, "auto") == 0 || strcmp(flag_apply_color, "tty") == 0 || strcmp(flag_apply_color, "if-tty") == 0)
5930         {
5931           if (!color_term)
5932             flag_apply_color = NULL;
5933         }
5934         else if (strcmp(flag_apply_color, "always") != 0 && strcmp(flag_apply_color, "yes") != 0 && strcmp(flag_apply_color, "force") != 0)
5935         {
5936           usage("invalid argument --color=WHEN, valid arguments are 'never', 'always', and 'auto'");
5937         }
5938 
5939         if (flag_apply_color != NULL)
5940         {
5941           // get GREP_COLOR and GREP_COLORS, when defined
5942           char *env_grep_color = NULL;
5943           dupenv_s(&env_grep_color, "GREP_COLOR");
5944           char *env_grep_colors = NULL;
5945           dupenv_s(&env_grep_colors, "GREP_COLORS");
5946           const char *grep_colors = env_grep_colors;
5947 
5948           // if GREP_COLOR is defined but not GREP_COLORS, use it to set mt= default value (overridden by GREP_COLORS mt=, ms=, mc=)
5949           if (env_grep_colors == NULL && env_grep_color != NULL)
5950             set_color(std::string("mt=").append(env_grep_color).c_str(), "mt=", color_mt);
5951           else if (grep_colors == NULL)
5952             grep_colors = DEFAULT_GREP_COLORS;
5953 
5954           // parse GREP_COLORS
5955           set_color(grep_colors, "sl=", color_sl); // selected line
5956           set_color(grep_colors, "cx=", color_cx); // context line
5957           set_color(grep_colors, "mt=", color_mt); // matched text in any line
5958           set_color(grep_colors, "ms=", color_ms); // matched text in selected line
5959           set_color(grep_colors, "mc=", color_mc); // matched text in a context line
5960           set_color(grep_colors, "fn=", color_fn); // file name
5961           set_color(grep_colors, "ln=", color_ln); // line number
5962           set_color(grep_colors, "cn=", color_cn); // column number
5963           set_color(grep_colors, "bn=", color_bn); // byte offset
5964           set_color(grep_colors, "se=", color_se); // separator
5965 
5966           // parse --colors to override GREP_COLORS
5967           set_color(flag_colors, "sl=", color_sl); // selected line
5968           set_color(flag_colors, "cx=", color_cx); // context line
5969           set_color(flag_colors, "mt=", color_mt); // matched text in any line
5970           set_color(flag_colors, "ms=", color_ms); // matched text in selected line
5971           set_color(flag_colors, "mc=", color_mc); // matched text in a context line
5972           set_color(flag_colors, "fn=", color_fn); // file name
5973           set_color(flag_colors, "ln=", color_ln); // line number
5974           set_color(flag_colors, "cn=", color_cn); // column number
5975           set_color(flag_colors, "bn=", color_bn); // byte offset
5976           set_color(flag_colors, "se=", color_se); // separator
5977 
5978           // -v: if rv in GREP_COLORS then swap the sl and cx colors (note that rv does not match color letters)
5979           if (flag_invert_match &&
5980               ((grep_colors != NULL && strstr(grep_colors, "rv") != NULL) ||
5981                (flag_colors != NULL && strstr(flag_colors, "rv") != NULL)))
5982           {
5983             char color_tmp[COLORLEN];
5984             copy_color(color_tmp, color_sl);
5985             copy_color(color_sl, color_cx);
5986             copy_color(color_cx, color_tmp);
5987           }
5988 
5989           // if ms= is not specified, use the mt= value
5990           if (*color_ms == '\0')
5991             copy_color(color_ms, color_mt);
5992 
5993           // if mc= is not specified, use the mt= value
5994           if (*color_mc == '\0')
5995             copy_color(color_mc, color_mt);
5996 
5997           // if OSC hyperlinks are OK (note that "hl" does not match color letters so strstr can be used)
5998           if ((grep_colors != NULL && strstr(grep_colors, "hl") != NULL) || (flag_colors != NULL && strstr(flag_colors, "hl") != NULL))
5999           {
6000             char *cwd = getcwd0();
6001             if (cwd != NULL)
6002             {
6003               char *path = cwd;
6004               if (*path == PATHSEPCHR)
6005                 ++path;
6006               color_wd.assign("file://localhost").append(PATHSEPSTR).append(path).push_back(PATHSEPCHR);
6007               free(cwd);
6008               color_hl = "\033]8;;";
6009               color_st = "\033\\";
6010             }
6011           }
6012 
6013           // if CSI erase line is OK (note that ne does not match color letters so strstr can be used)
6014           if ((grep_colors == NULL || strstr(grep_colors, "ne") == NULL) && (flag_colors == NULL || strstr(flag_colors, "ne") == NULL))
6015             color_del = "\033[K";
6016 
6017           color_off = "\033[m";
6018 
6019           copy_color(match_off, color_off);
6020 
6021           if (isatty(STDERR_FILENO))
6022           {
6023             color_high    = "\033[1m";
6024             color_error   = "\033[1;31m";
6025             color_warning = "\033[1;35m";
6026             color_message = "\033[1;36m";
6027           }
6028 
6029           if (env_grep_color != NULL)
6030             free(env_grep_color);
6031           if (env_grep_colors != NULL)
6032             free(env_grep_colors);
6033         }
6034       }
6035     }
6036   }
6037 }
6038 
6039 // search the specified files, directories, and/or standard input for pattern matches
ugrep()6040 void ugrep()
6041 {
6042   // reset warnings
6043   warnings = 0;
6044 
6045   // reset stats
6046   Stats::reset();
6047 
6048   // populate the combined all-include and all-exclude
6049   flag_all_include = flag_include;
6050   flag_all_include_dir = flag_include_dir;
6051   flag_all_exclude = flag_exclude;
6052   flag_all_exclude_dir = flag_exclude_dir;
6053 
6054   // -g, --glob: add globs to all-include/all-exclude
6055   for (const auto& globs : flag_glob)
6056   {
6057     size_t from = 0;
6058     std::string glob;
6059 
6060     while (true)
6061     {
6062       size_t to = globs.find(',', from);
6063       size_t size = (to == std::string::npos ? globs.size() : to) - from;
6064 
6065       if (size > 0)
6066       {
6067         bool negate = size > 1 && (globs[from] == '!' || globs[from] == '^');
6068 
6069         if (negate)
6070         {
6071           ++from;
6072           --size;
6073         }
6074 
6075         (negate ? flag_all_exclude : flag_all_include).emplace_back(globs.substr(from, size));
6076       }
6077 
6078       if (to == std::string::npos)
6079         break;
6080 
6081       from = to + 1;
6082     }
6083   }
6084 
6085   // all excluded files: normalize by moving directory globs (globs ending in a path separator /) to --exclude-dir
6086   auto i = flag_all_exclude.begin();
6087   while (i != flag_all_exclude.end())
6088   {
6089     if (i->empty())
6090     {
6091       i = flag_all_exclude.erase(i);
6092     }
6093     else if (i->back() == '/')
6094     {
6095       flag_all_exclude_dir.emplace_back(*i);
6096       i = flag_all_exclude.erase(i);
6097     }
6098     else
6099     {
6100       ++i;
6101     }
6102   }
6103 
6104   // all included files: normalize by moving directory globs (globs ending in a path separator /) to --include-dir
6105   i = flag_all_include.begin();
6106   while (i != flag_all_include.end())
6107   {
6108     if (i->empty())
6109     {
6110       i = flag_all_include.erase(i);
6111     }
6112     else
6113     {
6114       if (i->back() == '/')
6115       {
6116         flag_all_include_dir.emplace_back(*i);
6117         i = flag_all_include.erase(i);
6118       }
6119       else
6120       {
6121         // if an include file glob starts with a dot, then enable searching hidden files and directories
6122         if (i->front() == '.' || i->find(PATHSEPSTR ".") != std::string::npos)
6123           flag_hidden = true;
6124 
6125         ++i;
6126       }
6127     }
6128   }
6129 
6130   // if an include dir glob starts with a dot, then enable searching hidden files and directories
6131   if (!flag_hidden)
6132   {
6133     for (const auto& dir : flag_all_include_dir)
6134     {
6135       if (dir.front() == '.' || dir.find(PATHSEPSTR ".") != std::string::npos)
6136       {
6137         flag_hidden = true;
6138         break;
6139       }
6140     }
6141   }
6142 
6143 #ifdef HAVE_LIBZ
6144 #ifdef WITH_DECOMPRESSION_THREAD
6145   // -z with -M or -O/--include: add globs to search archive contents
6146   if (flag_decompress && (!flag_file_magic.empty() || !flag_all_include.empty()))
6147   {
6148     flag_all_include.emplace_back("*.cpio");
6149     flag_all_include.emplace_back("*.pax");
6150     flag_all_include.emplace_back("*.tar");
6151     flag_all_include.emplace_back("*.zip");
6152     flag_all_include.emplace_back("*.zipx");
6153     flag_all_include.emplace_back("*.ZIP");
6154 
6155     flag_all_include.emplace_back("*.cpio.gz");
6156     flag_all_include.emplace_back("*.pax.gz");
6157     flag_all_include.emplace_back("*.tar.gz");
6158     flag_all_include.emplace_back("*.taz");
6159     flag_all_include.emplace_back("*.tgz");
6160     flag_all_include.emplace_back("*.tpz");
6161 
6162     flag_all_include.emplace_back("*.cpio.Z");
6163     flag_all_include.emplace_back("*.pax.Z");
6164     flag_all_include.emplace_back("*.tar.Z");
6165 
6166     flag_all_include.emplace_back("*.cpio.zip");
6167     flag_all_include.emplace_back("*.pax.zip");
6168     flag_all_include.emplace_back("*.tar.zip");
6169 
6170 #ifdef HAVE_LIBBZ2
6171     flag_all_include.emplace_back("*.cpio.bz");
6172     flag_all_include.emplace_back("*.pax.bz");
6173     flag_all_include.emplace_back("*.tar.bz");
6174     flag_all_include.emplace_back("*.cpio.bz2");
6175     flag_all_include.emplace_back("*.pax.bz2");
6176     flag_all_include.emplace_back("*.tar.bz2");
6177     flag_all_include.emplace_back("*.cpio.bzip2");
6178     flag_all_include.emplace_back("*.pax.bzip2");
6179     flag_all_include.emplace_back("*.tar.bzip2");
6180     flag_all_include.emplace_back("*.tb2");
6181     flag_all_include.emplace_back("*.tbz");
6182     flag_all_include.emplace_back("*.tbz2");
6183     flag_all_include.emplace_back("*.tz2");
6184 #endif
6185 
6186 #ifdef HAVE_LIBLZMA
6187     flag_all_include.emplace_back("*.cpio.lzma");
6188     flag_all_include.emplace_back("*.pax.lzma");
6189     flag_all_include.emplace_back("*.tar.lzma");
6190     flag_all_include.emplace_back("*.cpio.xz");
6191     flag_all_include.emplace_back("*.pax.xz");
6192     flag_all_include.emplace_back("*.tar.xz");
6193     flag_all_include.emplace_back("*.tlz");
6194     flag_all_include.emplace_back("*.txz");
6195 #endif
6196 
6197 #ifdef HAVE_LIBLZ4
6198     flag_all_include.emplace_back("*.cpio.lz4");
6199     flag_all_include.emplace_back("*.pax.lz4");
6200     flag_all_include.emplace_back("*.tar.lz4");
6201 #endif
6202 
6203 #ifdef HAVE_LIBZSTD
6204     flag_all_include.emplace_back("*.cpio.zst");
6205     flag_all_include.emplace_back("*.pax.zst");
6206     flag_all_include.emplace_back("*.tar.zst");
6207     flag_all_include.emplace_back("*.cpio.zstd");
6208     flag_all_include.emplace_back("*.pax.zstd");
6209     flag_all_include.emplace_back("*.tar.zstd");
6210     flag_all_include.emplace_back("*.tzst");
6211 #endif
6212   }
6213 #endif
6214 #endif
6215 
6216   // all excluded-dirs: normalize by removing trailing path separators
6217   for (auto& i : flag_all_exclude_dir)
6218     while (i.size() > 1 && i.back() == '/')
6219       i.pop_back();
6220 
6221   // all included-dirs: normalize by removing trailing path separators
6222   for (auto& i : flag_all_include_dir)
6223     while (i.size() > 1 && i.back() == '/')
6224       i.pop_back();
6225 
6226   // --sort: check sort KEY and set flags
6227   if (flag_sort != NULL)
6228   {
6229     flag_sort_rev = *flag_sort == 'r';
6230 
6231     if (strcmp(flag_sort, "name") == 0 || strcmp(flag_sort, "rname") == 0)
6232       flag_sort_key = Sort::NAME;
6233     else if (strcmp(flag_sort, "best") == 0 || strcmp(flag_sort, "rbest") == 0)
6234       flag_sort_key = Sort::BEST;
6235     else if (strcmp(flag_sort, "size") == 0 || strcmp(flag_sort, "rsize") == 0)
6236       flag_sort_key = Sort::SIZE;
6237     else if (strcmp(flag_sort, "used") == 0 || strcmp(flag_sort, "rused") == 0)
6238       flag_sort_key = Sort::USED;
6239     else if (strcmp(flag_sort, "changed") == 0 || strcmp(flag_sort, "rchanged") == 0)
6240       flag_sort_key = Sort::CHANGED;
6241     else if (strcmp(flag_sort, "created") == 0 || strcmp(flag_sort, "rcreated") == 0)
6242       flag_sort_key = Sort::CREATED;
6243     else
6244       usage("invalid argument --sort=KEY, valid arguments are 'name', 'best', 'size', 'used', 'changed', 'created', 'rname', 'rbest', 'rsize', 'rused', 'rchanged', and 'rcreated'");
6245   }
6246 
6247   // add PATTERN to the CNF
6248   if (arg_pattern != NULL)
6249     bcnf.new_pattern(CNF::PATTERN::NA, arg_pattern);
6250 
6251   // the regex compiled from PATTERN, -e PATTERN, -N PATTERN, and -f FILE
6252   std::string regex;
6253 
6254   if (bcnf.defined())
6255   {
6256     // prune empty terms from the CNF that match anything
6257     bcnf.prune();
6258 
6259     // split the patterns at newlines, standard grep behavior
6260     bcnf.split();
6261 
6262     if (flag_file.empty())
6263     {
6264       // the CNF patterns to search, this matches more than necessary to support multiline matching and to highlight all matches in color
6265       regex.assign(bcnf.adjoin());
6266 
6267       // an empty pattern specified matches every line with ^.* (using ^ to prevent -o from making an extra empty match), including empty lines
6268       if (regex.empty())
6269       {
6270         regex = flag_hex ? ".*\\n?" : "^.*";
6271         flag_empty = true;
6272         flag_dotall = false;
6273       }
6274 
6275       // CNF is empty if all patterns are empty, i.e. match anything unless -f FILE specified
6276       if (bcnf.empty())
6277       {
6278         flag_match = true;
6279         flag_dotall = false;
6280       }
6281     }
6282     else
6283     {
6284       // -f FILE is combined with -e, --and, --andnot, --not
6285 
6286       if (bcnf.first_empty())
6287       {
6288         // an empty pattern specified with -e '' matches every line
6289         regex = flag_hex ? ".*\\n?" : "^.*";
6290         flag_empty = true;
6291       }
6292       else
6293       {
6294         // for efficiency, take only the first CNF OR-list terms to search in combination with -f FILE patterns
6295         regex.assign(bcnf.first());
6296       }
6297     }
6298   }
6299 
6300   // -v with --files is not permitted
6301   if (flag_files && flag_invert_match)
6302   {
6303     abort("-v is not permitted with --files, invert the Boolean query instead");
6304     flag_invert_match = false;
6305   }
6306 
6307   // -x or --match: enable -Y and disable --dotall and -w
6308   if (flag_line_regexp || flag_match)
6309   {
6310     flag_empty = true;
6311     flag_dotall = false;
6312     flag_word_regexp = false;
6313   }
6314 
6315   // -f: get patterns from file
6316   if (!flag_file.empty())
6317   {
6318     bool line_regexp = flag_line_regexp;
6319     bool word_regexp = flag_word_regexp;
6320 
6321     // -F: make newline-separated lines in regex literal with \Q and \E
6322     const char *Q = flag_fixed_strings ? "\\Q" : "";
6323     const char *E = flag_fixed_strings ? "\\E|" : flag_basic_regexp ? "\\|" : "|";
6324 
6325     // PATTERN or -e PATTERN: add an ending '|' (or BRE '\|') to the regex to concatenate sub-expressions
6326     if (!regex.empty())
6327     {
6328       // -F does not apply to patterns in -f FILE when PATTERN or -e PATTERN is specified
6329       Q = "";
6330       E = flag_basic_regexp ? "\\|" : "|";
6331 
6332       // -x and -w do not apply to patterns in -f FILE when PATTERN or -e PATTERN is specified
6333       line_regexp = false;
6334       word_regexp = false;
6335 
6336       regex.append(E);
6337     }
6338 
6339     // -f: read patterns from the specified file or files
6340     for (const auto& filename : flag_file)
6341     {
6342       FILE *file = NULL;
6343 
6344       if (fopen_smart(&file, filename.c_str(), "r") != 0)
6345         file = NULL;
6346 
6347       if (file == NULL)
6348       {
6349         // could not open, try GREP_PATH environment variable
6350         char *env_grep_path = NULL;
6351         dupenv_s(&env_grep_path, "GREP_PATH");
6352 
6353         if (env_grep_path != NULL)
6354         {
6355           if (fopen_smart(&file, std::string(env_grep_path).append(PATHSEPSTR).append(filename).c_str(), "r") != 0)
6356             file = NULL;
6357 
6358           free(env_grep_path);
6359         }
6360       }
6361 
6362 #ifdef GREP_PATH
6363       if (file == NULL)
6364       {
6365         if (fopen_smart(&file, std::string(GREP_PATH).append(PATHSEPSTR).append(filename).c_str(), "r") != 0)
6366           file = NULL;
6367       }
6368 #endif
6369 
6370       if (file == NULL)
6371         throw std::runtime_error(std::string("option -f: cannot read ").append(filename)); // to catch in query UI
6372 
6373       reflex::BufferedInput input(file);
6374       std::string line;
6375 
6376       while (true)
6377       {
6378         // read the next line
6379         if (getline(input, line))
6380           break;
6381 
6382         // add line to the regex if not empty
6383         if (!line.empty())
6384           regex.append(Q).append(line).append(E);
6385       }
6386 
6387       if (file != stdin)
6388         fclose(file);
6389     }
6390 
6391     // pop unused ending '|' (or BRE '\|') from the |-concatenated regexes in the regex string
6392     regex.pop_back();
6393     if (flag_basic_regexp)
6394       regex.pop_back();
6395 
6396     // -G requires \( \) instead of ( ) and -P requires (?<!\w) (?!\w) instead of \< and \>
6397     const char *xleft = flag_basic_regexp ? "^\\(" : "^(?:";
6398     const char *xright = flag_basic_regexp ? "\\)$" : ")$";
6399 #if defined(HAVE_PCRE2)
6400     const char *wleft = flag_basic_regexp ? "\\<\\(" : flag_perl_regexp ? "(?<!\\w)(?:" : "\\<(";
6401     const char *wright = flag_basic_regexp ? "\\)\\>" : flag_perl_regexp ? ")(?!\\w)" : ")\\>";
6402 #else // Boost.Regex
6403     const char *wleft = flag_basic_regexp ? "\\<\\(" : flag_perl_regexp ? "(?<![[:word:]])(?:" : "\\<(";
6404     const char *wright = flag_basic_regexp ? "\\)\\>" : flag_perl_regexp ? ")(?![[:word:]])" : ")\\>";
6405 #endif
6406 
6407     // -x or -w: if no PATTERN is specified, then apply -x or -w to -f FILE patterns
6408     if (line_regexp)
6409       regex.insert(0, xleft).append(xright); // make the regex line-anchored
6410     else if (word_regexp)
6411       regex.insert(0, wleft).append(wright); // make the regex word-anchored
6412   }
6413 
6414   // --match: adjust color highlighting to show matches as selected lines without color
6415   if (flag_match)
6416   {
6417     copy_color(match_ms, color_sl);
6418     copy_color(match_mc, color_cx);
6419     copy_color(match_off, color_off);
6420   }
6421   else
6422   {
6423     // --tag: output tagged matches instead of colors
6424     if (flag_tag != NULL)
6425     {
6426       const char *s1 = strchr(flag_tag, ',');
6427       const char *s2 = s1 != NULL ? strchr(s1 + 1, ',') : NULL;
6428 
6429       copy_color(match_ms, flag_tag);
6430 
6431       if (s1 == NULL)
6432       {
6433         copy_color(match_mc, flag_tag);
6434         copy_color(match_off, flag_tag);
6435       }
6436       else
6437       {
6438         copy_color(match_off, s1 + 1);
6439 
6440         if (s2 == NULL)
6441           copy_color(match_mc, match_ms);
6442         else
6443           copy_color(match_mc, s2 + 1);
6444       }
6445     }
6446     else
6447     {
6448       copy_color(match_ms, color_ms);
6449       copy_color(match_mc, color_mc);
6450       copy_color(match_off, color_off);
6451     }
6452   }
6453 
6454   // -j: case insensitive search if regex does not contain an upper case letter
6455   if (flag_smart_case)
6456   {
6457     flag_ignore_case = true;
6458 
6459     for (size_t i = 0; i < regex.size(); ++i)
6460     {
6461       if (regex[i] == '\\')
6462       {
6463         ++i;
6464       }
6465       else if (regex[i] == '{')
6466       {
6467         while (++i < regex.size() && regex[i] != '}')
6468           continue;
6469       }
6470       else if (isupper(regex[i]))
6471       {
6472         flag_ignore_case = false;
6473         break;
6474       }
6475     }
6476   }
6477 
6478   // -y: disable -A, -B, and -C
6479   if (flag_any_line)
6480     flag_after_context = flag_before_context = 0;
6481 
6482   // -A, -B, or -C: disable -o
6483   if (flag_after_context > 0 || flag_before_context > 0)
6484     flag_only_matching = false;
6485 
6486   // -v or -y: disable -o and -u
6487   if (flag_invert_match || flag_any_line)
6488     flag_only_matching = flag_ungroup = false;
6489 
6490   // --depth: if -R or -r is not specified then enable -R
6491   if ((flag_min_depth > 0 || flag_max_depth > 0) && flag_directories_action != Action::RECURSE)
6492   {
6493     flag_directories_action = Action::RECURSE;
6494     flag_dereference = true;
6495   }
6496 
6497   // -p (--no-dereference) and -S (--dereference): -p takes priority over -S and -R
6498   if (flag_no_dereference)
6499     flag_dereference = false;
6500 
6501   // display file name if more than one input file is specified or options -R, -r, and option -h --no-filename is not specified
6502   if (!flag_no_filename && (flag_all_threads || flag_directories_action == Action::RECURSE || arg_files.size() > 1 || (flag_stdin && !arg_files.empty())))
6503     flag_with_filename = true;
6504 
6505   // --only-line-number implies -n
6506   if (flag_only_line_number)
6507     flag_line_number = true;
6508 
6509   // if no display options -H, -n, -k, -b are set, enable --no-header to suppress headers for speed
6510   if (!flag_with_filename && !flag_line_number && !flag_column_number && !flag_byte_offset)
6511     flag_no_header = true;
6512 
6513   // -q: we only need to find one matching file and we're done
6514   if (flag_quiet)
6515   {
6516     flag_max_files = 1;
6517 
6518     // -q overrides -l and -L
6519     flag_files_with_matches = false;
6520     flag_files_without_match = false;
6521 
6522     // disable --format options
6523     flag_format_begin = NULL;
6524     flag_format_open = NULL;
6525     flag_format = NULL;
6526     flag_format_close = NULL;
6527     flag_format_end = NULL;
6528   }
6529 
6530   // -L: enable -l and flip -v i.e. -L=-lv and -l=-Lv
6531   if (flag_files_without_match)
6532   {
6533     flag_files_with_matches = true;
6534     flag_invert_match = !flag_invert_match;
6535   }
6536 
6537   // -l or -L: enable -H, disable -c
6538   if (flag_files_with_matches)
6539   {
6540     flag_with_filename = true;
6541     flag_count = false;
6542   }
6543 
6544   // --heading: enable --break when filenames are shown
6545   if (flag_heading && flag_with_filename)
6546     flag_break = true;
6547 
6548   // -J: when not set the default is the number of cores (or hardware threads), limited to MAX_JOBS
6549   if (flag_jobs == 0)
6550   {
6551     unsigned int cores = std::thread::hardware_concurrency();
6552     unsigned int concurrency = cores > 2 ? cores : 2;
6553     flag_jobs = std::min(concurrency, MAX_JOBS);
6554   }
6555 
6556   // --sort and --max-files: limit number of threads to --max-files to prevent unordered results, this is a special case
6557   if (flag_sort_key != Sort::NA && flag_max_files > 0)
6558     flag_jobs = std::min(flag_jobs, flag_max_files);
6559 
6560   // set the number of threads to the number of files or when recursing to the value of -J, --jobs
6561   if (flag_all_threads || flag_directories_action == Action::RECURSE)
6562     threads = flag_jobs;
6563   else
6564     threads = std::min(arg_files.size() + flag_stdin, flag_jobs);
6565 
6566   // inverted character classes and \s do not match newlines, e.g. [^x] matches anything except x and \n
6567   reflex::convert_flag_type convert_flags = reflex::convert_flag::notnewline;
6568 
6569   // not -U: convert regex to Unicode
6570   if (!flag_binary)
6571     convert_flags |= reflex::convert_flag::unicode;
6572 
6573   // -G: convert basic regex (BRE) to extended regex (ERE)
6574   if (flag_basic_regexp)
6575     convert_flags |= reflex::convert_flag::basic;
6576 
6577   // set reflex::Pattern options to enable multiline mode
6578   std::string pattern_options("(?m");
6579 
6580   // -i: case insensitive reflex::Pattern option, applies to ASCII only
6581   if (flag_ignore_case)
6582     pattern_options.push_back('i');
6583 
6584   // --dotall and not --match (or empty pattern): dot matches newline
6585   if (flag_dotall)
6586     pattern_options.push_back('s');
6587 
6588   // --free-space: convert_flags is needed to check free-space conformance by the converter
6589   if (flag_free_space)
6590   {
6591     convert_flags |= reflex::convert_flag::freespace;
6592     pattern_options.push_back('x');
6593   }
6594 
6595   // prepend the pattern options (?m...) to the regex
6596   pattern_options.push_back(')');
6597   regex.insert(0, pattern_options);
6598 
6599   // reflex::Matcher options
6600   std::string matcher_options;
6601 
6602   // -Y: permit empty pattern matches
6603   if (flag_empty)
6604     matcher_options.push_back('N');
6605 
6606   // -w: match whole words, i.e. make \< and \> match only left side and right side, respectively
6607   if (flag_word_regexp)
6608     matcher_options.push_back('W');
6609 
6610   // --tabs: set reflex::Matcher option T to NUM (1, 2, 4, or 8) tab size
6611   if (flag_tabs)
6612     matcher_options.append("T=").push_back(static_cast<char>(flag_tabs) + '0');
6613 
6614   // --format-begin
6615   if (flag_format_begin != NULL)
6616     format(flag_format_begin, 0);
6617 
6618   size_t nodes = 0;
6619   size_t edges = 0;
6620   size_t words = 0;
6621   size_t nodes_time = 0;
6622   size_t edges_time = 0;
6623   size_t words_time = 0;
6624 
6625   // -P: Perl matching with PCRE2 or Boost.Regex
6626   if (flag_perl_regexp)
6627   {
6628 #if defined(HAVE_PCRE2)
6629     // construct the PCRE2 JIT-optimized NFA-based Perl pattern matcher
6630     std::string pattern(flag_binary ? reflex::PCRE2Matcher::convert(regex, convert_flags) : reflex::PCRE2UTFMatcher::convert(regex, convert_flags));
6631     reflex::PCRE2Matcher matcher(pattern, reflex::Input(), matcher_options.c_str(), flag_binary ? (PCRE2_NEVER_UTF | PCRE2_NEVER_UCP) : (PCRE2_UTF | PCRE2_UCP));
6632     Grep::Matchers matchers;
6633 
6634     if (!bcnf.singleton_or_undefined())
6635     {
6636       std::string subregex;
6637 
6638       for (const auto& i : bcnf.lists())
6639       {
6640         matchers.emplace_back();
6641 
6642         auto& submatchers = matchers.back();
6643 
6644         for (const auto& j : i)
6645         {
6646           if (j)
6647           {
6648             subregex.assign(pattern_options).append(*j);
6649             submatchers.emplace_back(new reflex::PCRE2Matcher((flag_binary ? reflex::PCRE2Matcher::convert(subregex, convert_flags) : reflex::PCRE2UTFMatcher::convert(subregex, convert_flags)), reflex::Input(), matcher_options.c_str(), flag_binary ? (PCRE2_NEVER_UTF | PCRE2_NEVER_UCP) : (PCRE2_UTF | PCRE2_UCP)));
6650           }
6651           else
6652           {
6653             submatchers.emplace_back();
6654           }
6655         }
6656       }
6657     }
6658 
6659     if (threads > 1)
6660     {
6661       GrepMaster grep(output, &matcher, bcnf.singleton_or_undefined() ? NULL : &matchers);
6662       grep.ugrep();
6663     }
6664     else
6665     {
6666       Grep grep(output, &matcher, bcnf.singleton_or_undefined() ? NULL : &matchers);
6667       set_grep_handle(&grep);
6668       grep.ugrep();
6669       clear_grep_handle();
6670     }
6671 #elif defined(HAVE_BOOST_REGEX)
6672     std::string pattern;
6673     try
6674     {
6675       // construct the Boost.Regex NFA-based Perl pattern matcher
6676       pattern.assign(reflex::BoostPerlMatcher::convert(regex, convert_flags));
6677       reflex::BoostPerlMatcher matcher(pattern, reflex::Input(), matcher_options.c_str());
6678       Grep::Matchers matchers;
6679 
6680       if (!bcnf.singleton_or_undefined())
6681       {
6682         std::string subregex;
6683 
6684         for (const auto& i : bcnf.lists())
6685         {
6686           matchers.emplace_back();
6687 
6688           auto& submatchers = matchers.back();
6689 
6690           for (const auto& j : i)
6691           {
6692             if (j)
6693             {
6694               subregex.assign(pattern_options).append(*j);
6695               submatchers.emplace_back(new reflex::BoostPerlMatcher(reflex::BoostPerlMatcher::convert(subregex, convert_flags), reflex::Input(), matcher_options.c_str()));
6696             }
6697             else
6698             {
6699               submatchers.emplace_back();
6700             }
6701           }
6702         }
6703       }
6704 
6705       if (threads > 1)
6706       {
6707         GrepMaster grep(output, &matcher, bcnf.singleton_or_undefined() ? NULL : &matchers);
6708         grep.ugrep();
6709       }
6710       else
6711       {
6712         Grep grep(output, &matcher, bcnf.singleton_or_undefined() ? NULL : &matchers);
6713         set_grep_handle(&grep);
6714         grep.ugrep();
6715         clear_grep_handle();
6716       }
6717     }
6718 
6719     catch (boost::regex_error& error)
6720     {
6721       reflex::regex_error_type code;
6722 
6723       switch (error.code())
6724       {
6725         case boost::regex_constants::error_collate:
6726           code = reflex::regex_error::invalid_collating;
6727           break;
6728         case boost::regex_constants::error_ctype:
6729           code = reflex::regex_error::invalid_class;
6730           break;
6731         case boost::regex_constants::error_escape:
6732           code = reflex::regex_error::invalid_escape;
6733           break;
6734         case boost::regex_constants::error_backref:
6735           code = reflex::regex_error::invalid_backreference;
6736           break;
6737         case boost::regex_constants::error_brack:
6738           code = reflex::regex_error::invalid_class;
6739           break;
6740         case boost::regex_constants::error_paren:
6741           code = reflex::regex_error::mismatched_parens;
6742           break;
6743         case boost::regex_constants::error_brace:
6744           code = reflex::regex_error::mismatched_braces;
6745           break;
6746         case boost::regex_constants::error_badbrace:
6747           code = reflex::regex_error::invalid_repeat;
6748           break;
6749         case boost::regex_constants::error_range:
6750           code = reflex::regex_error::invalid_class_range;
6751           break;
6752         case boost::regex_constants::error_space:
6753           code = reflex::regex_error::exceeds_limits;
6754           break;
6755         case boost::regex_constants::error_badrepeat:
6756           code = reflex::regex_error::invalid_repeat;
6757           break;
6758         case boost::regex_constants::error_complexity:
6759           code = reflex::regex_error::exceeds_limits;
6760           break;
6761         case boost::regex_constants::error_stack:
6762           code = reflex::regex_error::exceeds_limits;
6763           break;
6764         default:
6765           code = reflex::regex_error::invalid_syntax;
6766       }
6767 
6768       throw reflex::regex_error(code, pattern, error.position() + 1);
6769     }
6770 #endif
6771   }
6772   else
6773   {
6774     // construct the RE/flex DFA-based pattern matcher and start matching files
6775     reflex::Pattern pattern(reflex::Matcher::convert(regex, convert_flags), "r");
6776     std::list<reflex::Pattern> patterns;
6777     Grep::Matchers matchers;
6778 
6779     if (flag_fuzzy > 0)
6780     {
6781       reflex::FuzzyMatcher matcher(pattern, static_cast<uint16_t>(flag_fuzzy), reflex::Input(), matcher_options.c_str());
6782 
6783       if (!bcnf.singleton_or_undefined())
6784       {
6785         std::string subregex;
6786 
6787         for (const auto& i : bcnf.lists())
6788         {
6789           matchers.emplace_back();
6790 
6791           auto& submatchers = matchers.back();
6792 
6793           for (const auto& j : i)
6794           {
6795             if (j)
6796             {
6797               subregex.assign(pattern_options).append(*j);
6798               patterns.emplace_back(reflex::FuzzyMatcher::convert(subregex, convert_flags), "r");
6799               submatchers.emplace_back(new reflex::FuzzyMatcher(patterns.back(), reflex::Input(), matcher_options.c_str()));
6800             }
6801             else
6802             {
6803               submatchers.emplace_back();
6804             }
6805           }
6806         }
6807       }
6808 
6809       if (threads > 1)
6810       {
6811         GrepMaster grep(output, &matcher, bcnf.singleton_or_undefined() ? NULL : &matchers);
6812         grep.ugrep();
6813       }
6814       else
6815       {
6816         Grep grep(output, &matcher, bcnf.singleton_or_undefined() ? NULL : &matchers);
6817         set_grep_handle(&grep);
6818         grep.ugrep();
6819         clear_grep_handle();
6820       }
6821     }
6822     else
6823     {
6824       reflex::Matcher matcher(pattern, reflex::Input(), matcher_options.c_str());
6825 
6826       if (!bcnf.singleton_or_undefined())
6827       {
6828         std::string subregex;
6829 
6830         for (const auto& i : bcnf.lists())
6831         {
6832           matchers.emplace_back();
6833 
6834           auto& submatchers = matchers.back();
6835 
6836           for (const auto& j : i)
6837           {
6838             if (j)
6839             {
6840               subregex.assign(pattern_options).append(*j);
6841               patterns.emplace_back(reflex::Matcher::convert(subregex, convert_flags), "r");
6842               submatchers.emplace_back(new reflex::Matcher(patterns.back(), reflex::Input(), matcher_options.c_str()));
6843             }
6844             else
6845             {
6846               submatchers.emplace_back();
6847             }
6848           }
6849         }
6850       }
6851 
6852       if (threads > 1)
6853       {
6854         GrepMaster grep(output, &matcher, bcnf.singleton_or_undefined() ? NULL : &matchers);
6855         grep.ugrep();
6856       }
6857       else
6858       {
6859         Grep grep(output, &matcher, bcnf.singleton_or_undefined() ? NULL : &matchers);
6860         set_grep_handle(&grep);
6861         grep.ugrep();
6862         clear_grep_handle();
6863       }
6864     }
6865 
6866     nodes = pattern.nodes();
6867     edges = pattern.edges();
6868     words = pattern.words();
6869     nodes_time = static_cast<size_t>(pattern.nodes_time());
6870     edges_time = static_cast<size_t>(pattern.parse_time() + pattern.edges_time());
6871     words_time = static_cast<size_t>(pattern.words_time());
6872   }
6873 
6874   // --format-end
6875   if (flag_format_end != NULL)
6876     format(flag_format_end, Stats::found_parts());
6877 
6878   // --stats: display stats when we're done
6879   if (flag_stats != NULL)
6880   {
6881     Stats::report(output);
6882 
6883     bcnf.report(output);
6884 
6885     if (strcmp(flag_stats, "vm") == 0 && words > 0)
6886       fprintf(output, "VM memory: %zu nodes (%zums), %zu edges (%zums), %zu opcode words (%zums)" NEWLINESTR, nodes, nodes_time, edges, edges_time, words, words_time);
6887   }
6888 
6889   // close the pipe to the forked pager
6890   if (flag_pager != NULL && output != NULL && output != stdout)
6891     pclose(output);
6892 }
6893 
6894 // cancel the search
cancel_ugrep()6895 void cancel_ugrep()
6896 {
6897   std::unique_lock<std::mutex> lock(grep_handle_mutex);
6898   if (grep_handle != NULL)
6899     grep_handle->cancel();
6900 }
6901 
6902 // set the handle to be able to use cancel_ugrep()
set_grep_handle(Grep * grep)6903 void set_grep_handle(Grep *grep)
6904 {
6905   std::unique_lock<std::mutex> lock(grep_handle_mutex);
6906   grep_handle = grep;
6907 }
6908 
6909 // reset the grep handle
clear_grep_handle()6910 void clear_grep_handle()
6911 {
6912   std::unique_lock<std::mutex> lock(grep_handle_mutex);
6913   grep_handle = NULL;
6914 }
6915 
6916 // search the specified files or standard input for pattern matches
ugrep()6917 void Grep::ugrep()
6918 {
6919   if (!flag_stdin && arg_files.empty())
6920   {
6921     recurse(1, ".");
6922   }
6923   else
6924   {
6925     // read each input file to find pattern matches
6926     if (flag_stdin)
6927     {
6928       Stats::score_file();
6929 
6930       // search standard input
6931       search(NULL);
6932     }
6933 
6934 #ifndef OS_WIN
6935     std::pair<std::set<ino_t>::iterator,bool> vino;
6936 #endif
6937 
6938     for (const auto pathname : arg_files)
6939     {
6940       // stop after finding max-files matching files
6941       if (flag_max_files > 0 && Stats::found_parts() >= flag_max_files)
6942         break;
6943 
6944       // stop when output is blocked or search cancelled
6945       if (out.eof || out.cancelled())
6946         break;
6947 
6948       // search file or directory, get the basename from the file argument first
6949       const char *basename = strrchr(pathname, PATHSEPCHR);
6950       if (basename != NULL)
6951         ++basename;
6952       else
6953         basename = pathname;
6954 
6955       ino_t inode = 0;
6956       uint64_t info;
6957 
6958       // search file, unless searchable directory into which we should recurse
6959       switch (select(1, pathname, basename, DIRENT_TYPE_UNKNOWN, inode, info, true))
6960       {
6961         case Type::DIRECTORY:
6962 #ifndef OS_WIN
6963           if (flag_dereference)
6964             vino = visited.insert(inode);
6965 #endif
6966 
6967           recurse(1, pathname);
6968 
6969 #ifndef OS_WIN
6970           if (flag_dereference)
6971             visited.erase(vino.first);
6972 #endif
6973           break;
6974 
6975         case Type::OTHER:
6976           search(pathname);
6977           break;
6978 
6979         case Type::SKIP:
6980           break;
6981       }
6982     }
6983   }
6984 }
6985 
6986 // search file or directory for pattern matches
select(size_t level,const char * pathname,const char * basename,int type,ino_t & inode,uint64_t & info,bool is_argument)6987 Grep::Type Grep::select(size_t level, const char *pathname, const char *basename, int type, ino_t& inode, uint64_t& info, bool is_argument)
6988 {
6989   if (*basename == '.' && !flag_hidden && !is_argument)
6990     return Type::SKIP;
6991 
6992 #ifdef OS_WIN
6993 
6994   DWORD attr = GetFileAttributesW(utf8_decode(pathname).c_str());
6995 
6996   if (attr == INVALID_FILE_ATTRIBUTES)
6997   {
6998     errno = ENOENT;
6999     warning("cannot read", pathname);
7000     return Type::SKIP;
7001   }
7002 
7003   if (!flag_hidden && !is_argument && ((attr & FILE_ATTRIBUTE_HIDDEN) || (attr & FILE_ATTRIBUTE_SYSTEM)))
7004     return Type::SKIP;
7005 
7006   if ((attr & FILE_ATTRIBUTE_DIRECTORY))
7007   {
7008     if (flag_directories_action == Action::READ)
7009     {
7010       // directories cannot be read actually, so grep produces a warning message (errno is not set)
7011       is_directory(pathname);
7012       return Type::SKIP;
7013     }
7014 
7015     if (is_argument || flag_directories_action == Action::RECURSE)
7016     {
7017       // --depth: recursion level exceeds max depth?
7018       if (flag_max_depth > 0 && level > flag_max_depth)
7019         return Type::SKIP;
7020 
7021       // hard maximum recursion depth reached?
7022       if (level > MAX_DEPTH)
7023       {
7024         if (!flag_no_messages)
7025           fprintf(stderr, "%sugrep: %s%s%s recursion depth hit hard limit of %d\n", color_off, color_high, pathname, color_off, MAX_DEPTH);
7026         return Type::SKIP;
7027       }
7028 
7029       // check for --exclude-dir and --include-dir constraints if pathname != "."
7030       if (strcmp(pathname, ".") != 0)
7031       {
7032         if (!flag_all_exclude_dir.empty())
7033         {
7034           // exclude directories whose pathname matches any one of the --exclude-dir globs unless negated with !
7035           bool ok = true;
7036           for (const auto& glob : flag_all_exclude_dir)
7037           {
7038             if (glob.front() == '!')
7039             {
7040               if (!ok && glob_match(pathname, basename, glob.c_str() + 1))
7041                 ok = true;
7042             }
7043             else if (ok && glob_match(pathname, basename, glob.c_str()))
7044             {
7045               ok = false;
7046             }
7047           }
7048           if (!ok)
7049             return Type::SKIP;
7050         }
7051 
7052         if (!flag_all_include_dir.empty())
7053         {
7054           // include directories whose pathname matches any one of the --include-dir globs unless negated with !
7055           bool ok = false;
7056           for (const auto& glob : flag_all_include_dir)
7057           {
7058             if (glob.front() == '!')
7059             {
7060               if (ok && glob_match(pathname, basename, glob.c_str() + 1))
7061                 ok = false;
7062             }
7063             else if (!ok && glob_match(pathname, basename, glob.c_str()))
7064             {
7065               ok = true;
7066             }
7067           }
7068           if (!ok)
7069             return Type::SKIP;
7070         }
7071       }
7072 
7073       return Type::DIRECTORY;
7074     }
7075   }
7076   else if ((attr & FILE_ATTRIBUTE_DEVICE) == 0 || flag_devices_action == Action::READ)
7077   {
7078     // --depth: recursion level not deep enough?
7079     if (flag_min_depth > 0 && level <= flag_min_depth)
7080       return Type::SKIP;
7081 
7082     if (!flag_all_exclude.empty())
7083     {
7084       // exclude files whose pathname matches any one of the --exclude globs unless negated with !
7085       bool ok = true;
7086       for (const auto& glob : flag_all_exclude)
7087       {
7088         if (glob.front() == '!')
7089         {
7090           if (!ok && glob_match(pathname, basename, glob.c_str() + 1))
7091             ok = true;
7092         }
7093         else if (ok && glob_match(pathname, basename, glob.c_str()))
7094         {
7095           ok = false;
7096         }
7097       }
7098       if (!ok)
7099         return Type::SKIP;
7100     }
7101 
7102     // check magic pattern against the file signature, when --file-magic=MAGIC is specified
7103     if (!flag_file_magic.empty())
7104     {
7105       FILE *file;
7106 
7107       if (fopenw_s(&file, pathname, "rb") != 0)
7108       {
7109         warning("cannot read", pathname);
7110         return Type::SKIP;
7111       }
7112 
7113 #ifdef HAVE_LIBZ
7114       if (flag_decompress)
7115       {
7116         zstreambuf streambuf(pathname, file);
7117         std::istream stream(&streambuf);
7118 
7119         // file has the magic bytes we're looking for: search the file
7120         size_t match = magic_matcher.input(&stream).scan();
7121         if (match == flag_not_magic || match >= flag_min_magic)
7122         {
7123           fclose(file);
7124 
7125           Stats::score_file();
7126 
7127           return Type::OTHER;
7128         }
7129       }
7130       else
7131 #endif
7132       {
7133         size_t match = magic_matcher.input(reflex::Input(file, flag_encoding_type)).scan();
7134         if (match == flag_not_magic || match >= flag_min_magic)
7135         {
7136           fclose(file);
7137 
7138           Stats::score_file();
7139 
7140           return Type::OTHER;
7141         }
7142       }
7143 
7144       fclose(file);
7145 
7146       if (flag_all_include.empty())
7147         return Type::SKIP;
7148     }
7149 
7150     if (!flag_all_include.empty())
7151     {
7152       // include files whose pathname matches any one of the --include globs unless negated with !
7153       bool ok = false;
7154       for (const auto& glob : flag_all_include)
7155       {
7156         if (glob.front() == '!')
7157         {
7158           if (ok && glob_match(pathname, basename, glob.c_str() + 1))
7159             ok = false;
7160         }
7161         else if (!ok && glob_match(pathname, basename, glob.c_str()))
7162         {
7163           ok = true;
7164         }
7165       }
7166       if (!ok)
7167         return Type::SKIP;
7168     }
7169 
7170     Stats::score_file();
7171 
7172     return Type::OTHER;
7173   }
7174 
7175 #else
7176 
7177   struct stat buf;
7178 
7179   // if dir entry is unknown, use lstat() to check if pathname is a symlink
7180   if (type != DIRENT_TYPE_UNKNOWN || lstat(pathname, &buf) == 0)
7181   {
7182     // symlinks are followed when specified on the command line (unless option -p) or with options -R, -S, --dereference
7183     if ((is_argument && !flag_no_dereference) || flag_dereference || (type != DIRENT_TYPE_UNKNOWN ? type != DIRENT_TYPE_LNK : !S_ISLNK(buf.st_mode)))
7184     {
7185       // if we got a symlink, use stat() to check if pathname is a directory or a regular file, we also stat when sorting by stat info
7186       if (((flag_sort_key == Sort::NA || flag_sort_key == Sort::NAME) && type != DIRENT_TYPE_UNKNOWN && type != DIRENT_TYPE_LNK) || stat(pathname, &buf) == 0)
7187       {
7188         // check if directory
7189         if (type == DIRENT_TYPE_DIR || ((type == DIRENT_TYPE_UNKNOWN || type == DIRENT_TYPE_LNK) && S_ISDIR(buf.st_mode)))
7190         {
7191           if (flag_directories_action == Action::READ)
7192           {
7193             // directories cannot be read actually, so grep produces a warning message (errno is not set)
7194             is_directory(pathname);
7195             return Type::SKIP;
7196           }
7197 
7198           if (is_argument || flag_directories_action == Action::RECURSE)
7199           {
7200             // --depth: recursion level exceeds max depth?
7201             if (flag_max_depth > 0 && level > flag_max_depth)
7202               return Type::SKIP;
7203 
7204             // hard maximum recursion depth reached?
7205             if (level > MAX_DEPTH)
7206             {
7207               if (!flag_no_messages)
7208                 fprintf(stderr, "%sugrep: %s%s%s recursion depth hit hard limit of %d\n", color_off, color_high, pathname, color_off, MAX_DEPTH);
7209               return Type::SKIP;
7210             }
7211 
7212             // check for --exclude-dir and --include-dir constraints if pathname != "."
7213             if (strcmp(pathname, ".") != 0)
7214             {
7215               if (!flag_all_exclude_dir.empty())
7216               {
7217                 // exclude directories whose pathname matches any one of the --exclude-dir globs unless negated with !
7218                 bool ok = true;
7219                 for (const auto& glob : flag_all_exclude_dir)
7220                 {
7221                   if (glob.front() == '!')
7222                   {
7223                     if (!ok && glob_match(pathname, basename, glob.c_str() + 1))
7224                       ok = true;
7225                   }
7226                   else if (ok && glob_match(pathname, basename, glob.c_str()))
7227                   {
7228                     ok = false;
7229                   }
7230                 }
7231                 if (!ok)
7232                   return Type::SKIP;
7233               }
7234 
7235               if (!flag_all_include_dir.empty())
7236               {
7237                 // include directories whose pathname matches any one of the --include-dir globs unless negated with !
7238                 bool ok = false;
7239                 for (const auto& glob : flag_all_include_dir)
7240                 {
7241                   if (glob.front() == '!')
7242                   {
7243                     if (ok && glob_match(pathname, basename, glob.c_str() + 1))
7244                       ok = false;
7245                   }
7246                   else if (!ok && glob_match(pathname, basename, glob.c_str()))
7247                   {
7248                     ok = true;
7249                   }
7250                 }
7251                 if (!ok)
7252                   return Type::SKIP;
7253               }
7254             }
7255 
7256             if (type != DIRENT_TYPE_DIR)
7257               inode = buf.st_ino;
7258 
7259             info = Entry::sort_info(buf);
7260 
7261             return Type::DIRECTORY;
7262           }
7263         }
7264         else if (type == DIRENT_TYPE_REG ? !is_output(inode) : (type == DIRENT_TYPE_UNKNOWN || type == DIRENT_TYPE_LNK) && S_ISREG(buf.st_mode) ? !is_output(buf.st_ino) : flag_devices_action == Action::READ)
7265         {
7266           // --depth: recursion level not deep enough?
7267           if (flag_min_depth > 0 && level <= flag_min_depth)
7268             return Type::SKIP;
7269 
7270           if (!flag_all_exclude.empty())
7271           {
7272             // exclude files whose pathname matches any one of the --exclude globs unless negated with !
7273             bool ok = true;
7274             for (const auto& glob : flag_all_exclude)
7275             {
7276               if (glob.front() == '!')
7277               {
7278                 if (!ok && glob_match(pathname, basename, glob.c_str() + 1))
7279                   ok = true;
7280               }
7281               else if (ok && glob_match(pathname, basename, glob.c_str()))
7282               {
7283                 ok = false;
7284               }
7285             }
7286             if (!ok)
7287               return Type::SKIP;
7288           }
7289 
7290           // check magic pattern against the file signature, when --file-magic=MAGIC is specified
7291           if (!flag_file_magic.empty())
7292           {
7293             FILE *file;
7294 
7295             if (fopenw_s(&file, pathname, "rb") != 0)
7296             {
7297               warning("cannot read", pathname);
7298               return Type::SKIP;
7299             }
7300 
7301 #ifdef HAVE_LIBZ
7302             if (flag_decompress)
7303             {
7304               zstreambuf streambuf(pathname, file);
7305               std::istream stream(&streambuf);
7306 
7307               // file has the magic bytes we're looking for: search the file
7308               size_t match = magic_matcher.input(&stream).scan();
7309               if (match == flag_not_magic || match >= flag_min_magic)
7310               {
7311                 fclose(file);
7312 
7313                 Stats::score_file();
7314 
7315                 info = Entry::sort_info(buf);
7316 
7317                 return Type::OTHER;
7318               }
7319             }
7320             else
7321 #endif
7322             {
7323               // if file has the magic bytes we're looking for: search the file
7324               size_t match = magic_matcher.input(reflex::Input(file, flag_encoding_type)).scan();
7325               if (match == flag_not_magic || match >= flag_min_magic)
7326               {
7327                 fclose(file);
7328 
7329                 Stats::score_file();
7330 
7331                 info = Entry::sort_info(buf);
7332 
7333                 return Type::OTHER;
7334               }
7335             }
7336 
7337             fclose(file);
7338 
7339             if (flag_all_include.empty())
7340               return Type::SKIP;
7341           }
7342 
7343           if (!flag_all_include.empty())
7344           {
7345             // include directories whose basename matches any one of the --include-dir globs if not negated with !
7346             bool ok = false;
7347             for (const auto& glob : flag_all_include)
7348             {
7349               if (glob.front() == '!')
7350               {
7351                 if (ok && glob_match(pathname, basename, glob.c_str() + 1))
7352                   ok = false;
7353               }
7354               else if (!ok && glob_match(pathname, basename, glob.c_str()))
7355               {
7356                 ok = true;
7357               }
7358             }
7359             if (!ok)
7360               return Type::SKIP;
7361           }
7362 
7363           Stats::score_file();
7364 
7365           info = Entry::sort_info(buf);
7366 
7367           return Type::OTHER;
7368         }
7369       }
7370     }
7371   }
7372   else
7373   {
7374     warning(NULL, pathname);
7375   }
7376 
7377 #endif
7378 
7379   return Type::SKIP;
7380 }
7381 
7382 // recurse over directory, searching for pattern matches in files and subdirectories
recurse(size_t level,const char * pathname)7383 void Grep::recurse(size_t level, const char *pathname)
7384 {
7385   // output is closed or cancelled?
7386   if (out.eof || out.cancelled())
7387     return;
7388 
7389 #ifdef OS_WIN
7390 
7391   WIN32_FIND_DATAW ffd;
7392 
7393   std::string glob;
7394 
7395   if (strcmp(pathname, ".") != 0)
7396     glob.assign(pathname).append("/*");
7397   else
7398     glob.assign("*");
7399 
7400   std::wstring wglob = utf8_decode(glob);
7401   HANDLE hFind = FindFirstFileW(wglob.c_str(), &ffd);
7402 
7403   if (hFind == INVALID_HANDLE_VALUE)
7404   {
7405     if (GetLastError() != ERROR_FILE_NOT_FOUND)
7406       warning("cannot open directory", pathname);
7407     return;
7408   }
7409 
7410 #else
7411 
7412 #ifdef HAVE_STATVFS
7413 
7414   if (!exclude_fs_ids.empty() || !include_fs_ids.empty())
7415   {
7416     struct statvfs buf;
7417 
7418     if (statvfs(pathname, &buf) == 0)
7419     {
7420       uint64_t id = static_cast<uint64_t>(buf.f_fsid);
7421 
7422       if (exclude_fs_ids.find(id) != exclude_fs_ids.end())
7423         return;
7424 
7425       if (!include_fs_ids.empty() && include_fs_ids.find(id) == include_fs_ids.end())
7426         return;
7427     }
7428   }
7429 
7430 #endif
7431 
7432   DIR *dir = opendir(pathname);
7433 
7434   if (dir == NULL)
7435   {
7436     warning("cannot open directory", pathname);
7437     return;
7438   }
7439 
7440 #endif
7441 
7442   // --ignore-files: check if one or more are present to read and extend the file and dir exclusions
7443   // std::vector<std::string> *save_exclude = NULL, *save_exclude_dir = NULL, *save_not_exclude = NULL, *save_not_exclude_dir = NULL;
7444   std::unique_ptr<std::vector<std::string>> save_all_exclude, save_all_exclude_dir;
7445   bool saved = false;
7446 
7447   if (!flag_ignore_files.empty())
7448   {
7449     std::string filename;
7450 
7451     for (const auto& i : flag_ignore_files)
7452     {
7453       filename.assign(pathname).append(PATHSEPSTR).append(i);
7454 
7455       FILE *file = NULL;
7456       if (fopenw_s(&file, filename.c_str(), "r") == 0)
7457       {
7458         if (!saved)
7459         {
7460           save_all_exclude = std::unique_ptr<std::vector<std::string>>(new std::vector<std::string>);
7461           save_all_exclude->swap(flag_all_exclude);
7462           save_all_exclude_dir = std::unique_ptr<std::vector<std::string>>(new std::vector<std::string>);
7463           save_all_exclude_dir->swap(flag_all_exclude_dir);
7464 
7465           saved = true;
7466         }
7467 
7468         Stats::ignore_file(filename);
7469         split_globs(file, flag_all_exclude, flag_all_exclude_dir);
7470         fclose(file);
7471       }
7472     }
7473   }
7474 
7475   Stats::score_dir();
7476 
7477   std::vector<Entry> content;
7478   std::vector<Entry> subdirs;
7479   std::string dirpathname;
7480 
7481 #ifdef OS_WIN
7482 
7483   std::string cFileName;
7484 
7485   do
7486   {
7487     cFileName.assign(utf8_encode(ffd.cFileName));
7488 
7489     // search directory entries that aren't . or .. or hidden when --no-hidden is enabled
7490     if (cFileName[0] != '.' || (flag_hidden && cFileName[1] != '\0' && cFileName[1] != '.'))
7491     {
7492       size_t len = strlen(pathname);
7493 
7494       if (len == 1 && pathname[0] == '.')
7495         dirpathname.assign(cFileName);
7496       else if (len > 0 && pathname[len - 1] == PATHSEPCHR)
7497         dirpathname.assign(pathname).append(cFileName);
7498       else
7499         dirpathname.assign(pathname).append(PATHSEPSTR).append(cFileName);
7500 
7501       ino_t inode = 0;
7502       uint64_t info = 0;
7503 
7504       // --sort: get file info
7505       if (flag_sort_key != Sort::NA && flag_sort_key != Sort::NAME)
7506       {
7507         if (flag_sort_key == Sort::SIZE)
7508         {
7509           info = static_cast<uint64_t>(ffd.nFileSizeLow) | (static_cast<uint64_t>(ffd.nFileSizeHigh) << 32);
7510         }
7511         else
7512         {
7513           struct _FILETIME& time = flag_sort_key == Sort::USED ? ffd.ftLastAccessTime : flag_sort_key == Sort::CHANGED ? ffd.ftLastWriteTime : ffd.ftCreationTime;
7514           info = static_cast<uint64_t>(time.dwLowDateTime) | (static_cast<uint64_t>(time.dwHighDateTime) << 32);
7515         }
7516       }
7517 
7518       // search dirpathname, unless searchable directory into which we should recurse
7519       switch (select(level + 1, dirpathname.c_str(), cFileName.c_str(), DIRENT_TYPE_UNKNOWN, inode, info))
7520       {
7521         case Type::DIRECTORY:
7522           subdirs.emplace_back(dirpathname, 0, info);
7523           break;
7524 
7525         case Type::OTHER:
7526           if (flag_sort_key == Sort::NA)
7527             search(dirpathname.c_str());
7528           else
7529             content.emplace_back(dirpathname, 0, info);
7530           break;
7531 
7532         case Type::SKIP:
7533           break;
7534       }
7535 
7536       // stop after finding max-files matching files
7537       if (flag_max_files > 0 && Stats::found_parts() >= flag_max_files)
7538         break;
7539 
7540       // stop when output is blocked or search cancelled
7541       if (out.eof || out.cancelled())
7542         break;
7543     }
7544   } while (FindNextFileW(hFind, &ffd) != 0);
7545 
7546   FindClose(hFind);
7547 
7548 #else
7549 
7550   struct dirent *dirent = NULL;
7551 
7552   while ((dirent = readdir(dir)) != NULL)
7553   {
7554     // search directory entries that aren't . or .. or hidden when --no-hidden is enabled
7555     if (dirent->d_name[0] != '.' || (flag_hidden && dirent->d_name[1] != '\0' && dirent->d_name[1] != '.'))
7556     {
7557       size_t len = strlen(pathname);
7558 
7559       if (len == 1 && pathname[0] == '.')
7560         dirpathname.assign(dirent->d_name);
7561       else if (len > 0 && pathname[len - 1] == PATHSEPCHR)
7562         dirpathname.assign(pathname).append(dirent->d_name);
7563       else
7564         dirpathname.assign(pathname).append(PATHSEPSTR).append(dirent->d_name);
7565 
7566       Type type;
7567       ino_t inode;
7568       uint64_t info;
7569 
7570       // search dirpathname, unless searchable directory into which we should recurse
7571 #if defined(HAVE_STRUCT_DIRENT_D_TYPE) && defined(HAVE_STRUCT_DIRENT_D_INO)
7572       inode = dirent->d_ino;
7573       type = select(level + 1, dirpathname.c_str(), dirent->d_name, dirent->d_type, inode, info);
7574 #else
7575       inode = 0;
7576       type = select(level + 1, dirpathname.c_str(), dirent->d_name, DIRENT_TYPE_UNKNOWN, inode, info);
7577 #endif
7578 
7579       switch (type)
7580       {
7581         case Type::DIRECTORY:
7582           subdirs.emplace_back(dirpathname, inode, info);
7583           break;
7584 
7585         case Type::OTHER:
7586           if (flag_sort_key == Sort::NA)
7587             search(dirpathname.c_str());
7588           else
7589             content.emplace_back(dirpathname, inode, info);
7590           break;
7591 
7592         case Type::SKIP:
7593           break;
7594       }
7595 
7596       // stop after finding max-files matching files
7597       if (flag_max_files > 0 && Stats::found_parts() >= flag_max_files)
7598         break;
7599 
7600       // stop when output is blocked or search cancelled
7601       if (out.eof || out.cancelled())
7602         break;
7603     }
7604   }
7605 
7606   closedir(dir);
7607 
7608 #endif
7609 
7610   // -Z and --sort=best: presearch the selected files to determine edit distance cost
7611   if (flag_fuzzy > 0 && flag_sort_key == Sort::BEST)
7612   {
7613     auto entry = content.begin();
7614     while (entry != content.end())
7615     {
7616       entry->cost = cost(entry->pathname.c_str());
7617 
7618       // if a file has no match, remove it
7619       if (entry->cost == 65535)
7620         entry = content.erase(entry);
7621       else
7622         ++entry;
7623     }
7624   }
7625 
7626   // --sort: sort the selected non-directory entries and search them
7627   if (flag_sort_key != Sort::NA)
7628   {
7629     if (flag_sort_key == Sort::NAME)
7630     {
7631       if (flag_sort_rev)
7632         std::sort(content.begin(), content.end(), Entry::rev_comp_by_path);
7633       else
7634         std::sort(content.begin(), content.end(), Entry::comp_by_path);
7635     }
7636     else if (flag_sort_key == Sort::BEST)
7637     {
7638       if (flag_sort_rev)
7639         std::sort(content.begin(), content.end(), Entry::rev_comp_by_best);
7640       else
7641         std::sort(content.begin(), content.end(), Entry::comp_by_best);
7642     }
7643     else
7644     {
7645       if (flag_sort_rev)
7646         std::sort(content.begin(), content.end(), Entry::rev_comp_by_info);
7647       else
7648         std::sort(content.begin(), content.end(), Entry::comp_by_info);
7649     }
7650 
7651     // search the select sorted non-directory entries
7652     for (const auto& entry : content)
7653     {
7654       search(entry.pathname.c_str());
7655 
7656       // stop after finding max-files matching files
7657       if (flag_max_files > 0 && Stats::found_parts() >= flag_max_files)
7658         break;
7659 
7660       // stop when output is blocked or search cancelled
7661       if (out.eof || out.cancelled())
7662         break;
7663     }
7664   }
7665 
7666   // --sort: sort the selected subdirectory entries
7667   if (flag_sort_key != Sort::NA)
7668   {
7669     if (flag_sort_key == Sort::NAME || flag_sort_key == Sort::BEST)
7670     {
7671       if (flag_sort_rev)
7672         std::sort(subdirs.begin(), subdirs.end(), Entry::rev_comp_by_path);
7673       else
7674         std::sort(subdirs.begin(), subdirs.end(), Entry::comp_by_path);
7675     }
7676     else
7677     {
7678       if (flag_sort_rev)
7679         std::sort(subdirs.begin(), subdirs.end(), Entry::rev_comp_by_info);
7680       else
7681         std::sort(subdirs.begin(), subdirs.end(), Entry::comp_by_info);
7682     }
7683   }
7684 
7685   // recurse into the selected subdirectories
7686   for (const auto& entry : subdirs)
7687   {
7688     // stop after finding max-files matching files
7689     if (flag_max_files > 0 && Stats::found_parts() >= flag_max_files)
7690       break;
7691 
7692     // stop when output is blocked or search cancelled
7693     if (out.eof || out.cancelled())
7694       break;
7695 
7696 #ifndef OS_WIN
7697     // -R: check if this directory was visited before
7698     std::pair<std::set<ino_t>::iterator,bool> vino;
7699 
7700     if (flag_dereference)
7701     {
7702       vino = visited.insert(entry.inode);
7703 
7704       // if visited before, then do not recurse on this directory again
7705       if (!vino.second)
7706         continue;
7707     }
7708 #endif
7709 
7710     recurse(level + 1, entry.pathname.c_str());
7711 
7712 #ifndef OS_WIN
7713     if (flag_dereference)
7714       visited.erase(vino.first);
7715 #endif
7716   }
7717 
7718   // --ignore-files: restore if changed
7719   if (saved)
7720   {
7721     save_all_exclude->swap(flag_all_exclude);
7722     save_all_exclude_dir->swap(flag_all_exclude_dir);
7723   }
7724 }
7725 
7726 // -Z and --sort=best: perform a presearch to determine edit distance cost, returns 65535 when no match is found
cost(const char * pathname)7727 uint16_t Grep::cost(const char *pathname)
7728 {
7729   // stop when output is blocked
7730   if (out.eof)
7731     return 0;
7732 
7733   try
7734   {
7735     // open (archive or compressed) file (pathname is NULL to read stdin), return on failure
7736     if (!open_file(pathname))
7737       return 0;
7738   }
7739 
7740   catch (...)
7741   {
7742     // this should never happen
7743     warning("exception while opening", pathname);
7744 
7745     return 0;
7746   }
7747 
7748   // -Z: matcher is a FuzzyMatcher
7749   reflex::FuzzyMatcher *fuzzy_matcher = dynamic_cast<reflex::FuzzyMatcher*>(matcher);
7750 
7751   uint16_t cost = 65535;
7752 
7753   // -z: loop over extracted archive parts, when applicable
7754   do
7755   {
7756     try
7757     {
7758       if (init_read())
7759       {
7760         while (fuzzy_matcher->find())
7761         {
7762           if (fuzzy_matcher->edits() < cost)
7763             cost = fuzzy_matcher->edits();
7764 
7765           // exact match?
7766           if (cost == 0)
7767             break;
7768         }
7769       }
7770     }
7771 
7772     catch (...)
7773     {
7774       // this should never happen
7775       warning("exception while searching", pathname);
7776     }
7777 
7778     // close file or -z: loop over next extracted archive parts, when applicable
7779   } while (close_file(pathname));
7780 
7781   return cost;
7782 }
7783 
7784 // search input and display pattern matches
search(const char * pathname)7785 void Grep::search(const char *pathname)
7786 {
7787   // stop when output is blocked
7788   if (out.eof)
7789     return;
7790 
7791   try
7792   {
7793     // open (archive or compressed) file (pathname is NULL to read stdin), return on failure
7794     if (!open_file(pathname))
7795       return;
7796   }
7797 
7798   catch (...)
7799   {
7800     // this should never happen
7801     warning("exception while opening", pathname);
7802 
7803     return;
7804   }
7805 
7806   // pathname is NULL when stdin is searched
7807   if (pathname == NULL)
7808     pathname = flag_label;
7809 
7810   bool colorize = flag_apply_color || flag_tag != NULL;
7811   bool matched = false;
7812 
7813   // -z: loop over extracted archive parts, when applicable
7814   do
7815   {
7816     try
7817     {
7818       size_t matches = 0;
7819 
7820       // --files: reset the matching[] bitmask used in cnf_matching() for each matcher in matchers
7821       if (flag_files && matchers != NULL)
7822       {
7823         // hold the output
7824         out.hold();
7825 
7826         // reset the bit corresponding to each matcher in matchers
7827         size_t n = matchers->size();
7828         matching.resize(0);
7829         matching.resize(n);
7830 
7831         // reset the bit corresponding to the OR NOT terms of each matcher in matchers
7832         notmatching.resize(n);
7833         size_t j = 0;
7834         for (auto& i : *matchers)
7835         {
7836           notmatching[j].resize(0);
7837           notmatching[j].resize(i.size() > 0 ? i.size() - 1 : 0);
7838           ++j;
7839         }
7840       }
7841 
7842       if (flag_quiet || flag_files_with_matches)
7843       {
7844         // option -q, -l, or -L
7845 
7846         if (!init_read())
7847           goto exit_search;
7848 
7849         // --format: whether to out.acquire() early before Stats::found_part()
7850         bool acquire = flag_format != NULL && (flag_format_open != NULL || flag_format_close != NULL);
7851 
7852         while (matcher->find())
7853         {
7854           // --range: max line exceeded?
7855           if (flag_max_line > 0 && matcher->lineno() > flag_max_line)
7856             break;
7857 
7858           if (matchers != NULL)
7859           {
7860             const char *eol = matcher->eol(true); // warning: call eol() before bol() and end()
7861             const char *bol = matcher->bol();
7862 
7863             // check CNF AND/OR/NOT matching
7864             if (!cnf_matching(bol, eol, acquire) || out.holding())
7865               continue;
7866           }
7867 
7868           matches = 1;
7869           break;
7870         }
7871 
7872         // --files: if we are still holding the output and CNF is finally satisfyable then a match was made
7873         if (flag_files && matchers != NULL)
7874         {
7875           if (!cnf_satisfied(acquire))
7876             goto exit_search;
7877 
7878           matches = 1;
7879         }
7880 
7881         // -v: invert
7882         if (flag_invert_match)
7883           matches = !matches;
7884 
7885         if (matches > 0)
7886         {
7887           // --format-open or format-close: we must acquire lock early before Stats::found_part()
7888           if (acquire)
7889             out.acquire();
7890 
7891           if (!flag_files || matchers == NULL)
7892           {
7893             // --max-files: max reached?
7894             if (!Stats::found_part())
7895               goto exit_search;
7896           }
7897 
7898           // -l or -L
7899           if (flag_files_with_matches)
7900           {
7901             if (flag_format != NULL)
7902             {
7903               if (flag_format_open != NULL)
7904                 out.format(flag_format_open, pathname, partname, Stats::found_parts(), matcher, false, Stats::found_parts() > 1);
7905               out.format(flag_format, pathname, partname, 1, matcher, false, false);
7906               if (flag_format_close != NULL)
7907                 out.format(flag_format_close, pathname, partname, Stats::found_parts(), matcher, false, Stats::found_parts() > 1);
7908             }
7909             else
7910             {
7911               out.str(color_fn);
7912               if (color_hl != NULL)
7913               {
7914                 out.str(color_hl);
7915                 out.uri(color_wd);
7916                 out.uri(pathname);
7917                 out.str(color_st);
7918               }
7919               out.str(pathname);
7920               if (color_hl != NULL)
7921               {
7922                 out.str(color_hl);
7923                 out.str(color_st);
7924               }
7925               if (!partname.empty())
7926               {
7927                 out.chr('{');
7928                 out.str(partname);
7929                 out.chr('}');
7930               }
7931               out.str(color_off);
7932 
7933               if (flag_null)
7934                 out.chr('\0');
7935               else
7936                 out.nl();
7937             }
7938           }
7939         }
7940       }
7941       else if (flag_count)
7942       {
7943         // option -c
7944 
7945         if (!init_read())
7946           goto exit_search;
7947 
7948         // --format: whether to out.acquire() early before Stats::found_part()
7949         bool acquire = flag_format != NULL && (flag_format_open != NULL || flag_format_close != NULL);
7950 
7951         if (flag_ungroup || flag_only_matching)
7952         {
7953           // -co or -cu: count the number of patterns matched in the file
7954 
7955           while (matcher->find())
7956           {
7957             // --range: max line exceeded?
7958             if (flag_max_line > 0 && matcher->lineno() > flag_max_line)
7959               break;
7960 
7961             if (matchers != NULL)
7962             {
7963               const char *eol = matcher->eol(true); // warning: call eol() before bol() and end()
7964               const char *bol = matcher->bol();
7965 
7966               // check CNF AND/OR/NOT matching, with --files acquire lock before Stats::found_part()
7967               if (!cnf_matching(bol, eol, acquire))
7968                 continue;
7969             }
7970 
7971             ++matches;
7972 
7973             // -m: max number of matches reached?
7974             if (flag_max_count > 0 && matches >= flag_max_count)
7975               break;
7976           }
7977         }
7978         else
7979         {
7980           // -c without -o/-u: count the number of matching lines
7981 
7982           size_t lineno = 0;
7983 
7984           while (matcher->find())
7985           {
7986             size_t current_lineno = matcher->lineno();
7987 
7988             if (lineno != current_lineno)
7989             {
7990               // --range: max line exceeded?
7991               if (flag_max_line > 0 && current_lineno > flag_max_line)
7992                 break;
7993 
7994               if (matchers != NULL)
7995               {
7996                 const char *eol = matcher->eol(true); // warning: call eol() before bol() and end()
7997                 const char *bol = matcher->bol();
7998 
7999                 // check CNF AND/OR/NOT matching, with --files acquire lock before Stats::found_part()
8000                 if (!cnf_matching(bol, eol, acquire))
8001                   continue;
8002               }
8003 
8004               ++matches;
8005 
8006               // -m: max number of matches reached?
8007               if (flag_max_count > 0 && matches >= flag_max_count)
8008                 break;
8009 
8010               lineno = current_lineno;
8011             }
8012           }
8013 
8014           // -c with -v: count non-matching lines
8015           if (flag_invert_match)
8016           {
8017             matches = matcher->lineno() - matches;
8018             if (matches > 0)
8019               --matches;
8020           }
8021         }
8022 
8023         // --files: if we are still holding the output and CNF is not satisfyable then no global matches were made
8024         if (flag_files && matchers != NULL)
8025         {
8026           if (!cnf_satisfied(acquire))
8027             goto exit_search; // we cannot report 0 matches and ensure accurate output
8028         }
8029         else
8030         {
8031           // --format-open or --format-close: we must acquire lock early before Stats::found_part()
8032           if (acquire)
8033             out.acquire();
8034 
8035           // --max-files: max reached?
8036           // unfortunately, allowing 'acquire' below produces "x matching + y in archives"
8037           // but without this we cannot produce correct format-open and format-close outputs
8038           if (matches > 0 || acquire)
8039             if (!Stats::found_part())
8040               goto exit_search;
8041         }
8042 
8043         if (flag_format != NULL)
8044         {
8045           if (flag_format_open != NULL)
8046             out.format(flag_format_open, pathname, partname, Stats::found_parts(), matcher, false, Stats::found_parts() > 1);
8047           out.format(flag_format, pathname, partname, matches, matcher, false, false);
8048           if (flag_format_close != NULL)
8049             out.format(flag_format_close, pathname, partname, Stats::found_parts(), matcher, false, Stats::found_parts() > 1);
8050         }
8051         else
8052         {
8053           if (flag_with_filename || !partname.empty())
8054           {
8055             out.str(color_fn);
8056             if (color_hl != NULL)
8057             {
8058               out.str(color_hl);
8059               out.uri(color_wd);
8060               out.uri(pathname);
8061               out.str(color_st);
8062             }
8063             out.str(pathname);
8064             if (color_hl != NULL)
8065             {
8066               out.str(color_hl);
8067               out.str(color_st);
8068             }
8069             if (!partname.empty())
8070             {
8071               out.chr('{');
8072               out.str(partname);
8073               out.chr('}');
8074             }
8075             out.str(color_off);
8076 
8077             if (flag_null)
8078             {
8079               out.chr('\0');
8080             }
8081             else
8082             {
8083               out.str(color_se);
8084               out.str(flag_separator);
8085               out.str(color_off);
8086             }
8087           }
8088           out.num(matches);
8089           out.nl();
8090         }
8091       }
8092       else if (flag_format != NULL)
8093       {
8094         // option --format
8095 
8096         if (!init_read())
8097           goto exit_search;
8098 
8099         // whether to out.acquire() early before Stats::found_part()
8100         bool acquire = flag_format_open != NULL || flag_format_close != NULL;
8101 
8102         if (flag_invert_match)
8103         {
8104           // FormatInvertMatchHandler requires lineno to be set precisely, i.e. after skipping --range lines
8105           size_t lineno = flag_min_line > 0 ? flag_min_line - 1 : 0;
8106           bool binfile = false; // unused
8107           bool hex = false;     // unused
8108           bool binary = false;  // unused
8109           bool stop = false;
8110 
8111           // construct event handler functor with captured *this and some of the locals
8112           FormatInvertMatchGrepHandler invert_match_handler(*this, pathname, lineno, binfile, hex, binary, matches, stop);
8113 
8114           // register an event handler to display non-matching lines
8115           matcher->set_handler(&invert_match_handler);
8116 
8117           // to get the context from the invert_match handler explicitly
8118           reflex::AbstractMatcher::Context context;
8119 
8120           while (matcher->find())
8121           {
8122             size_t current_lineno = matcher->lineno();
8123 
8124             if (lineno != current_lineno)
8125             {
8126               if (matchers != NULL)
8127               {
8128                 const char *eol = matcher->eol(true); // warning: call eol() before bol() and end()
8129                 const char *bol = matcher->bol();
8130 
8131                 // check CNF AND/OR/NOT matching
8132                 if (!cnf_matching(bol, eol))
8133                   continue;
8134               }
8135 
8136               // get the lines before the matched line
8137               context = matcher->before();
8138 
8139               // display non-matching lines up to this line
8140               if (context.len > 0)
8141                 invert_match_handler(*matcher, context.buf, context.len, context.num);
8142 
8143               // --range: max line exceeded?
8144               if (flag_max_line > 0 && current_lineno > flag_max_line)
8145                 goto done_search;
8146 
8147               // --max-files: max reached?
8148               if (stop)
8149                 goto exit_search;
8150 
8151               // -m: max number of matches reached?
8152               if (flag_max_count > 0 && matches >= flag_max_count)
8153                 goto done_search;
8154 
8155               // output blocked?
8156               if (out.eof)
8157                 goto exit_search;
8158             }
8159 
8160             lineno = current_lineno + matcher->lines() - 1;
8161           }
8162 
8163           // get the remaining context
8164           context = matcher->after();
8165 
8166           if (context.len > 0)
8167             invert_match_handler(*matcher, context.buf, context.len, context.num);
8168         }
8169         else
8170         {
8171           while (matcher->find())
8172           {
8173             // --range: max line exceeded?
8174             if (flag_max_line > 0 && matcher->lineno() > flag_max_line)
8175               break;
8176 
8177             if (matchers != NULL)
8178             {
8179               const char *eol = matcher->eol(true); // warning: call eol() before bol() and end()
8180               const char *bol = matcher->bol();
8181 
8182               // check CNF AND/OR/NOT matching
8183               if (!cnf_matching(bol, eol, acquire))
8184                 continue;
8185             }
8186 
8187             // output --format-open
8188             if (matches == 0)
8189             {
8190               if (flag_files && matchers != NULL)
8191               {
8192                 // --format-open: we must acquire lock early before Stats::found_part()
8193                 if (acquire && out.holding())
8194                 {
8195                   out.acquire();
8196 
8197                   // --max-files: max reached?
8198                   if (!Stats::found_part())
8199                     goto exit_search;
8200                 }
8201               }
8202               else
8203               {
8204                 // --format-open: we must acquire lock early before Stats::found_part()
8205                 if (acquire)
8206                   out.acquire();
8207 
8208                 // --max-files: max reached?
8209                 if (!Stats::found_part())
8210                   goto exit_search;
8211               }
8212 
8213               if (flag_format_open != NULL)
8214               {
8215                 out.format(flag_format_open, pathname, partname, Stats::found_parts(), matcher, false, Stats::found_parts() > 1);
8216 
8217                 // --files: undo files count
8218                 if (flag_files && matchers != NULL && out.holding())
8219                   Stats::undo_found_part();
8220               }
8221             }
8222 
8223             ++matches;
8224 
8225             // output --format
8226             out.format(flag_format, pathname, partname, matches, matcher, matches > 1, matches > 1);
8227 
8228             // -m: max number of matches reached?
8229             if (flag_max_count > 0 && matches >= flag_max_count)
8230               break;
8231 
8232             out.check_flush();
8233           }
8234         }
8235 
8236         // --files: if we are still holding the output and CNF is not satisfyable then no global matches were made
8237         if (flag_files && matchers != NULL)
8238           if (!cnf_satisfied(true))
8239             goto exit_search;
8240 
8241         // output --format-close
8242         if (matches > 0 && flag_format_close != NULL)
8243           out.format(flag_format_close, pathname, partname, Stats::found_parts(), matcher, false, Stats::found_parts() > 1);
8244       }
8245       else if (flag_only_line_number)
8246       {
8247         // option --only-line-number
8248 
8249         if (!init_read())
8250           goto exit_search;
8251 
8252         size_t lineno = 0;
8253         const char *separator = flag_separator;
8254 
8255         while (matcher->find())
8256         {
8257           size_t current_lineno = matcher->lineno();
8258 
8259           separator = lineno != current_lineno ? flag_separator : "+";
8260 
8261           if (lineno != current_lineno || flag_ungroup)
8262           {
8263             // --range: max line exceeded?
8264             if (flag_max_line > 0 && current_lineno > flag_max_line)
8265               break;
8266 
8267             if (matchers != NULL)
8268             {
8269               const char *eol = matcher->eol(true); // warning: call eol() before bol() and end()
8270               const char *bol = matcher->bol();
8271 
8272               // check CNF AND/OR/NOT matching
8273               if (!cnf_matching(bol, eol))
8274                 continue;
8275             }
8276 
8277             if (matches == 0 && (!flag_files || matchers == NULL))
8278             {
8279               // --max-files: max reached?
8280               if (!Stats::found_part())
8281                 goto exit_search;
8282             }
8283 
8284             ++matches;
8285 
8286             out.header(pathname, partname, current_lineno, matcher, matcher->first(), separator, true);
8287 
8288             // -m: max number of matches reached?
8289             if (flag_max_count > 0 && matches >= flag_max_count)
8290               break;
8291 
8292             // output blocked?
8293             if (out.eof)
8294               goto exit_search;
8295 
8296             lineno = current_lineno;
8297           }
8298         }
8299       }
8300       else if (flag_only_matching)
8301       {
8302         // option -o
8303 
8304         if (!init_read())
8305           goto exit_search;
8306 
8307         size_t lineno = 0;
8308         bool binfile = !flag_text && !flag_hex && !flag_with_hex && init_is_binary();
8309         bool hex = false;
8310         bool nl = false;
8311 
8312         while (matcher->find())
8313         {
8314           const char *begin = matcher->begin();
8315           size_t size = matcher->size();
8316           bool binary = flag_hex || (!flag_text && is_binary(begin, size));
8317 
8318           if (hex && !binary)
8319           {
8320             out.dump.done();
8321           }
8322           else if (!hex && binary && nl)
8323           {
8324             out.nl();
8325             nl = false;
8326           }
8327 
8328           size_t current_lineno = matcher->lineno();
8329 
8330           if (lineno != current_lineno || flag_ungroup)
8331           {
8332             if (nl)
8333             {
8334               out.nl();
8335               nl = false;
8336             }
8337 
8338             // --range: max line exceeded?
8339             if (flag_max_line > 0 && current_lineno > flag_max_line)
8340               break;
8341 
8342             // -m: max number of matches reached?
8343             if (flag_max_count > 0 && matches >= flag_max_count)
8344               break;
8345 
8346             // output blocked?
8347             if (out.eof)
8348               goto exit_search;
8349 
8350             if (matchers != NULL)
8351             {
8352               const char *eol = matcher->eol(true); // warning: call eol() before bol() and end()
8353               const char *bol = matcher->bol();
8354 
8355               // check CNF AND/OR/NOT matching
8356               if (!cnf_matching(bol, eol))
8357                 continue;
8358             }
8359 
8360             if (matches == 0 && (!flag_files || matchers == NULL))
8361             {
8362               // --max-files: max reached?
8363               if (!Stats::found_part())
8364                 goto exit_search;
8365             }
8366 
8367             if (binfile || (binary && !flag_hex && !flag_with_hex))
8368             {
8369               if (flag_binary_without_match)
8370               {
8371                 matches = 0;
8372               }
8373               else
8374               {
8375                 out.binary_file_matches(pathname, partname);
8376                 matches = 1;
8377               }
8378 
8379               if (flag_files && matchers != NULL && out.holding())
8380                 continue;
8381 
8382               goto done_search;
8383             }
8384 
8385             ++matches;
8386 
8387             if (!flag_no_header)
8388             {
8389               const char *separator = lineno != current_lineno ? flag_separator : "+";
8390               out.header(pathname, partname, current_lineno, matcher, matcher->first(), separator, binary);
8391             }
8392 
8393             lineno = current_lineno;
8394           }
8395 
8396           hex = binary;
8397 
8398           if (binary)
8399           {
8400             if (flag_hex || flag_with_hex)
8401             {
8402               out.dump.next(matcher->first());
8403               out.dump.hex(Output::Dump::HEX_MATCH, matcher->first(), begin, size);
8404             }
8405             else
8406             {
8407               if (flag_binary_without_match)
8408               {
8409                 matches = 0;
8410               }
8411               else
8412               {
8413                 out.binary_file_matches(pathname, partname);
8414                 matches = 1;
8415               }
8416 
8417               if (flag_files && matchers != NULL && out.holding())
8418                 continue;
8419 
8420               goto done_search;
8421             }
8422 
8423             lineno += matcher->lines() - 1;
8424           }
8425           else
8426           {
8427             // echo multi-line matches line-by-line
8428 
8429             const char *from = begin;
8430             const char *to;
8431 
8432             while ((to = static_cast<const char*>(memchr(from, '\n', size - (from - begin)))) != NULL)
8433             {
8434               out.str(match_ms);
8435               out.str(from, to - from);
8436               out.str(match_off);
8437               out.chr('\n');
8438 
8439               out.header(pathname, partname, ++lineno, NULL, matcher->first() + (to - begin) + 1, "|", false);
8440 
8441               from = to + 1;
8442             }
8443 
8444             size -= from - begin;
8445 
8446             if (size > 0)
8447             {
8448               bool lf_only = from[size - 1] == '\n';
8449               size -= lf_only;
8450               if (size > 0)
8451               {
8452                 out.str(match_ms);
8453                 out.str(from, size);
8454                 out.str(match_off);
8455               }
8456               out.nl(lf_only);
8457             }
8458             else
8459             {
8460               nl = true;
8461             }
8462           }
8463         }
8464 
8465         if (nl)
8466           out.nl();
8467 
8468         if (hex)
8469           out.dump.done();
8470       }
8471       else if (flag_before_context == 0 && flag_after_context == 0 && !flag_any_line && !flag_invert_match)
8472       {
8473         // options -A, -B, -C, -y, -v are not specified
8474 
8475         if (!init_read())
8476           goto exit_search;
8477 
8478         size_t lineno = 0;
8479         bool binfile = !flag_text && !flag_hex && !flag_with_hex && init_is_binary();
8480         bool hex = false;
8481         bool binary = false;
8482         const char *restline_data = NULL;
8483         size_t restline_size = 0;
8484         size_t restline_last = 0;
8485 
8486         while (matcher->find())
8487         {
8488           size_t current_lineno = matcher->lineno();
8489 
8490           if (lineno != current_lineno || flag_ungroup)
8491           {
8492             if (restline_data != NULL)
8493             {
8494               if (binary)
8495               {
8496                 out.dump.hex(Output::Dump::HEX_LINE, restline_last, restline_data, restline_size);
8497               }
8498               else
8499               {
8500                 bool lf_only = false;
8501                 if (restline_size > 0)
8502                 {
8503                   lf_only = restline_data[restline_size - 1] == '\n';
8504                   restline_size -= lf_only;
8505                   if (restline_size > 0)
8506                   {
8507                     out.str(color_sl);
8508                     out.str(restline_data, restline_size);
8509                     out.str(color_off);
8510                   }
8511                 }
8512                 out.nl(lf_only);
8513               }
8514 
8515               restline_data = NULL;
8516             }
8517 
8518             // --range: max line exceeded?
8519             if (flag_max_line > 0 && current_lineno > flag_max_line)
8520               break;
8521 
8522             // -m: max number of matches reached?
8523             if (flag_max_count > 0 && matches >= flag_max_count)
8524               break;
8525 
8526             // output blocked?
8527             if (out.eof)
8528               goto exit_search;
8529 
8530             const char *eol = matcher->eol(true); // warning: call eol() before bol() and end()
8531             const char *bol = matcher->bol();
8532 
8533             // check CNF AND/OR/NOT matching
8534             if (matchers != NULL && !cnf_matching(bol, eol))
8535               continue;
8536 
8537             if (matches == 0 && (!flag_files || matchers == NULL))
8538             {
8539               // --max-files: max reached?
8540               if (!Stats::found_part())
8541                 goto exit_search;
8542             }
8543 
8544             binary = flag_hex || (!flag_text && is_binary(bol, eol - bol));
8545 
8546             if (binfile || (binary && !flag_hex && !flag_with_hex))
8547             {
8548               if (flag_binary_without_match)
8549               {
8550                 matches = 0;
8551               }
8552               else
8553               {
8554                 out.binary_file_matches(pathname, partname);
8555                 matches = 1;
8556               }
8557 
8558               if (flag_files && matchers != NULL && out.holding())
8559                 continue;
8560 
8561               goto done_search;
8562             }
8563 
8564             ++matches;
8565 
8566             size_t border = matcher->border();
8567             size_t first = matcher->first();
8568             const char *begin = matcher->begin();
8569             const char *end = matcher->end();
8570             size_t size = matcher->size();
8571 
8572             if (hex && !binary)
8573               out.dump.done();
8574 
8575             if (!flag_no_header)
8576             {
8577               const char *separator = lineno != current_lineno ? flag_separator : "+";
8578               out.header(pathname, partname, current_lineno, matcher, first, separator, binary);
8579             }
8580 
8581             hex = binary;
8582 
8583             lineno = current_lineno;
8584 
8585             if (binary)
8586             {
8587               out.dump.hex(Output::Dump::HEX_LINE, first - border, bol, border);
8588               out.dump.hex(Output::Dump::HEX_MATCH, first, begin, size);
8589 
8590               if (flag_ungroup)
8591               {
8592                 out.dump.hex(Output::Dump::HEX_LINE, matcher->last(), end, eol - end);
8593                 out.dump.done();
8594               }
8595               else
8596               {
8597                 restline.assign(end, eol - end);
8598                 restline_data = restline.c_str();
8599                 restline_size = restline.size();
8600                 restline_last = matcher->last();
8601               }
8602 
8603               lineno += matcher->lines() - 1;
8604             }
8605             else
8606             {
8607               out.str(color_sl);
8608               out.str(bol, border);
8609               out.str(color_off);
8610 
8611               // echo multi-line matches line-by-line
8612 
8613               const char *from = begin;
8614               const char *to;
8615 
8616               while ((to = static_cast<const char*>(memchr(from, '\n', size - (from - begin)))) != NULL)
8617               {
8618                 out.str(match_ms);
8619                 out.str(from, to - from);
8620                 out.str(match_off);
8621                 out.chr('\n');
8622 
8623                 out.header(pathname, partname, ++lineno, NULL, first + (to - begin) + 1, "|", false);
8624 
8625                 from = to + 1;
8626               }
8627 
8628               size -= from - begin;
8629               begin = from;
8630 
8631               out.str(match_ms);
8632               out.str(begin, size);
8633               out.str(match_off);
8634 
8635               if (flag_ungroup)
8636               {
8637                 if (eol > end)
8638                 {
8639                   bool lf_only = end[eol - end - 1] == '\n';
8640                   eol -= lf_only;
8641                   if (eol > end)
8642                   {
8643                     out.str(color_sl);
8644                     out.str(end, eol - end);
8645                     out.str(color_off);
8646                   }
8647                   out.nl(lf_only);
8648                 }
8649                 else if (matcher->hit_end())
8650                 {
8651                   out.nl();
8652                 }
8653                 else
8654                 {
8655                   out.check_flush();
8656                 }
8657               }
8658               else
8659               {
8660                 restline.assign(end, eol - end);
8661                 restline_data = restline.c_str();
8662                 restline_size = restline.size();
8663                 restline_last = matcher->last();
8664               }
8665             }
8666           }
8667           else
8668           {
8669             size_t size = matcher->size();
8670 
8671             if (size > 0)
8672             {
8673               size_t lines = matcher->lines();
8674 
8675               if (lines > 1 || colorize)
8676               {
8677                 size_t first = matcher->first();
8678                 size_t last = matcher->last();
8679                 const char *begin = matcher->begin();
8680 
8681                 if (binary)
8682                 {
8683                   out.dump.hex(Output::Dump::HEX_LINE, restline_last, restline_data, first - restline_last);
8684                   out.dump.hex(Output::Dump::HEX_MATCH, first, begin, size);
8685                 }
8686                 else
8687                 {
8688                   out.str(color_sl);
8689                   out.str(restline_data, first - restline_last);
8690                   out.str(color_off);
8691 
8692                   if (lines > 1)
8693                   {
8694                     // echo multi-line matches line-by-line
8695 
8696                     const char *from = begin;
8697                     const char *to;
8698                     size_t num = 1;
8699 
8700                     while ((to = static_cast<const char*>(memchr(from, '\n', size - (from - begin)))) != NULL)
8701                     {
8702                       out.str(match_ms);
8703                       out.str(from, to - from);
8704                       out.str(match_off);
8705                       out.chr('\n');
8706 
8707                       out.header(pathname, partname, lineno + num, NULL, first + (to - begin) + 1, "|", false);
8708 
8709                       from = to + 1;
8710                       ++num;
8711                     }
8712 
8713                     size -= from - begin;
8714                     begin = from;
8715                   }
8716 
8717                   out.str(match_ms);
8718                   out.str(begin, size);
8719                   out.str(match_off);
8720                 }
8721 
8722                 if (lines == 1)
8723                 {
8724                   restline_data += last - restline_last;
8725                   restline_size -= last - restline_last;
8726                   restline_last = last;
8727                 }
8728                 else
8729                 {
8730                   const char *eol = matcher->eol(true); // warning: call eol() before end()
8731                   const char *end = matcher->end();
8732 
8733                   binary = flag_hex || (!flag_text && is_binary(end, eol - end));
8734 
8735                   if (hex && !binary)
8736                     out.dump.done();
8737                   else if (!hex && binary)
8738                     out.nl();
8739 
8740                   if (hex != binary && !flag_no_header)
8741                     out.header(pathname, partname, lineno + lines - 1, matcher, last, flag_separator, binary);
8742 
8743                   hex = binary;
8744 
8745                   if (flag_ungroup)
8746                   {
8747                     if (binary)
8748                     {
8749                       out.dump.hex(Output::Dump::HEX_LINE, matcher->last(), end, eol - end);
8750                       out.dump.done();
8751                     }
8752                     else
8753                     {
8754                       if (eol > end)
8755                       {
8756                         bool lf_only = end[eol - end - 1] == '\n';
8757                         eol -= lf_only;
8758                         if (eol > end)
8759                         {
8760                           out.str(color_sl);
8761                           out.str(end, eol - end);
8762                           out.str(color_off);
8763                         }
8764                         out.nl(lf_only);
8765                       }
8766                       else if (matcher->hit_end())
8767                       {
8768                         out.nl();
8769                       }
8770                       else
8771                       {
8772                         out.check_flush();
8773                       }
8774                     }
8775                   }
8776                   else
8777                   {
8778                     restline.assign(end, eol - end);
8779                     restline_data = restline.c_str();
8780                     restline_size = restline.size();
8781                     restline_last = last;
8782                   }
8783 
8784                   lineno += lines - 1;
8785                 }
8786               }
8787             }
8788           }
8789         }
8790 
8791         if (restline_data != NULL)
8792         {
8793           if (binary)
8794           {
8795             out.dump.hex(Output::Dump::HEX_LINE, restline_last, restline_data, restline_size);
8796           }
8797           else
8798           {
8799             bool lf_only = false;
8800             if (restline_size > 0)
8801             {
8802               lf_only = restline_data[restline_size - 1] == '\n';
8803               restline_size -= lf_only;
8804               if (restline_size > 0)
8805               {
8806                 out.str(color_sl);
8807                 out.str(restline_data, restline_size);
8808                 out.str(color_off);
8809               }
8810             }
8811             out.nl(lf_only);
8812           }
8813 
8814           restline_data = NULL;
8815         }
8816 
8817         if (binary)
8818           out.dump.done();
8819       }
8820       else if (flag_before_context == 0 && flag_after_context == 0 && !flag_any_line)
8821       {
8822         // option -v without -A, -B, -C, -y
8823 
8824         if (!init_read())
8825           goto exit_search;
8826 
8827         // InvertMatchHandler requires lineno to be set precisely, i.e. after skipping --range lines
8828         size_t lineno = flag_min_line > 0 ? flag_min_line - 1 : 0;
8829         bool binfile = !flag_text && !flag_hex && !flag_with_hex && init_is_binary();
8830         bool hex = false;
8831         bool binary = false;
8832         bool stop = false;
8833 
8834         // construct event handler functor with captured *this and some of the locals
8835         InvertMatchGrepHandler invert_match_handler(*this, pathname, lineno, binfile, hex, binary, matches, stop);
8836 
8837         // register an event handler to display non-matching lines
8838         matcher->set_handler(&invert_match_handler);
8839 
8840         // to get the context from the invert_match handler explicitly
8841         reflex::AbstractMatcher::Context context;
8842 
8843         while (matcher->find())
8844         {
8845           size_t current_lineno = matcher->lineno();
8846 
8847           if (lineno != current_lineno)
8848           {
8849             if (matchers != NULL)
8850             {
8851               const char *eol = matcher->eol(true); // warning: call eol() before bol() and end()
8852               const char *bol = matcher->bol();
8853 
8854               // check CNF AND/OR/NOT matching
8855               if (!cnf_matching(bol, eol))
8856                 continue;
8857             }
8858 
8859             // get the lines before the matched line
8860             context = matcher->before();
8861 
8862             // display non-matching lines up to this line
8863             if (context.len > 0)
8864               invert_match_handler(*matcher, context.buf, context.len, context.num);
8865 
8866             if (matches > 0 && (binfile || (binary && !flag_hex && !flag_with_hex)))
8867               break;
8868 
8869             if (binary)
8870               out.dump.done();
8871 
8872             // --range: max line exceeded?
8873             if (flag_max_line > 0 && current_lineno > flag_max_line)
8874               goto done_search;
8875 
8876             // --max-files: max reached?
8877             if (stop)
8878               goto exit_search;
8879 
8880             // -m: max number of matches reached?
8881             if (flag_max_count > 0 && matches >= flag_max_count)
8882               goto done_search;
8883 
8884             // output blocked?
8885             if (out.eof)
8886               goto exit_search;
8887           }
8888 
8889           lineno = current_lineno + matcher->lines() - 1;
8890         }
8891 
8892         // get the remaining context
8893         context = matcher->after();
8894 
8895         if (context.len > 0)
8896           invert_match_handler(*matcher, context.buf, context.len, context.num);
8897 
8898         if (matches > 0 && (binfile || (binary && !flag_hex && !flag_with_hex)))
8899         {
8900           if (flag_binary_without_match)
8901             matches = 0;
8902           else
8903             out.binary_file_matches(pathname, partname);
8904         }
8905 
8906         if (binary)
8907           out.dump.done();
8908       }
8909       else if (flag_any_line)
8910       {
8911         // option -y
8912 
8913         if (!init_read())
8914           goto exit_search;
8915 
8916         // AnyLineGrepHandler requires lineno to be set precisely, i.e. after skipping --range lines
8917         size_t lineno = flag_min_line > 0 ? flag_min_line - 1 : 0;
8918         bool binfile = !flag_text && !flag_hex && !flag_with_hex && init_is_binary();
8919         bool hex = false;
8920         bool binary = false;
8921         bool stop = false;
8922 
8923         // to display the rest of the matching line
8924         const char *restline_data = NULL;
8925         size_t restline_size = 0;
8926         size_t restline_last = 0;
8927 
8928         // construct event handler functor with captured *this and some of the locals
8929         AnyLineGrepHandler any_line_handler(*this, pathname, lineno, binfile, hex, binary, matches, stop, restline_data, restline_size, restline_last);
8930 
8931         // register an event handler functor to display non-matching lines
8932         matcher->set_handler(&any_line_handler);
8933 
8934         // to display colors with or without -v
8935         short v_hex_line = flag_invert_match ? Output::Dump::HEX_CONTEXT_LINE : Output::Dump::HEX_LINE;
8936         short v_hex_match = flag_invert_match ? Output::Dump::HEX_CONTEXT_MATCH : Output::Dump::HEX_MATCH;
8937         const char *v_color_sl = flag_invert_match ? color_cx : color_sl;
8938         const char *v_match_ms = flag_invert_match ? match_mc : match_ms;
8939 
8940         // to get the context from the any_line handler explicitly
8941         reflex::AbstractMatcher::Context context;
8942 
8943         while (matcher->find())
8944         {
8945           size_t current_lineno = matcher->lineno();
8946 
8947           if (lineno != current_lineno || flag_ungroup)
8948           {
8949             if (restline_data != NULL)
8950             {
8951               if (binary)
8952               {
8953                 out.dump.hex(v_hex_line, restline_last, restline_data, restline_size);
8954               }
8955               else
8956               {
8957                 bool lf_only = false;
8958                 if (restline_size > 0)
8959                 {
8960                   lf_only = restline_data[restline_size - 1] == '\n';
8961                   restline_size -= lf_only;
8962                   if (restline_size > 0)
8963                   {
8964                     out.str(v_color_sl);
8965                     out.str(restline_data, restline_size);
8966                     out.str(color_off);
8967                   }
8968                 }
8969                 out.nl(lf_only);
8970               }
8971 
8972               restline_data = NULL;
8973             }
8974 
8975             const char *eol = matcher->eol(true); // warning: call eol() before bol() and end()
8976             const char *bol = matcher->bol();
8977 
8978             // check CNF AND/OR/NOT matching
8979             if (matchers != NULL && !cnf_matching(bol, eol))
8980               continue;
8981 
8982             // get the lines before the matched line
8983             context = matcher->before();
8984 
8985             if (context.len > 0)
8986             {
8987               any_line_handler(*matcher, context.buf, context.len, context.num);
8988 
8989               if (matches > 0 && (binfile || (binary && !flag_hex && !flag_with_hex)))
8990               {
8991                 if (flag_binary_without_match)
8992                 {
8993                   matches = 0;
8994                 }
8995                 else
8996                 {
8997                   out.binary_file_matches(pathname, partname);
8998                   matches = 1;
8999                 }
9000 
9001                 if (flag_files && matchers != NULL && out.holding())
9002                   continue;
9003 
9004                 goto done_search;
9005               }
9006             }
9007 
9008             // --range: max line exceeded?
9009             if (flag_max_line > 0 && current_lineno > flag_max_line)
9010               break;
9011 
9012             // --max-files: max reached?
9013             if (stop)
9014               goto exit_search;
9015 
9016             if (!flag_invert_match)
9017             {
9018               if (matches == 0 && (!flag_files || matchers == NULL))
9019               {
9020                 // --max-files: max reached?
9021                 if (!Stats::found_part())
9022                   goto exit_search;
9023               }
9024 
9025               ++matches;
9026             }
9027 
9028             // -m: max number of matches reached?
9029             if (flag_max_count > 0 && matches >= flag_max_count)
9030               break;
9031 
9032             // output blocked?
9033             if (out.eof)
9034               goto exit_search;
9035 
9036             binary = flag_hex || (!flag_text && is_binary(bol, eol - bol));
9037 
9038             if (binfile || (binary && !flag_hex && !flag_with_hex))
9039             {
9040               if (flag_binary_without_match)
9041               {
9042                 matches = 0;
9043               }
9044               else if (flag_invert_match)
9045               {
9046                 lineno = current_lineno + matcher->lines() - 1;
9047                 continue;
9048               }
9049               else
9050               {
9051                 out.binary_file_matches(pathname, partname);
9052                 matches = 1;
9053               }
9054 
9055               if (flag_files && matchers != NULL && out.holding())
9056                 continue;
9057 
9058               goto done_search;
9059             }
9060 
9061             size_t border = matcher->border();
9062             size_t first = matcher->first();
9063             const char *begin = matcher->begin();
9064             const char *end = matcher->end();
9065             size_t size = matcher->size();
9066 
9067             if (hex && !binary)
9068               out.dump.done();
9069 
9070             if (!flag_no_header)
9071             {
9072               const char *separator = lineno != current_lineno ? flag_invert_match ? "-" : flag_separator : "+";
9073               out.header(pathname, partname, current_lineno, matcher, first, separator, binary);
9074             }
9075 
9076             hex = binary;
9077 
9078             lineno = current_lineno;
9079 
9080             if (binary)
9081             {
9082               out.dump.hex(v_hex_line, first - border, bol, border);
9083               out.dump.hex(v_hex_match, first, begin, size);
9084 
9085               if (flag_ungroup)
9086               {
9087                 out.dump.hex(v_hex_line, matcher->last(), end, eol - end);
9088                 out.dump.done();
9089               }
9090               else
9091               {
9092                 restline.assign(end, eol - end);
9093                 restline_data = restline.c_str();
9094                 restline_size = restline.size();
9095                 restline_last = matcher->last();
9096               }
9097 
9098               lineno += matcher->lines() - 1;
9099             }
9100             else
9101             {
9102               out.str(v_color_sl);
9103               out.str(bol, border);
9104               out.str(color_off);
9105 
9106               // echo multi-line matches line-by-line
9107 
9108               const char *from = begin;
9109               const char *to;
9110 
9111               while ((to = static_cast<const char*>(memchr(from, '\n', size - (from - begin)))) != NULL)
9112               {
9113                 out.str(v_match_ms);
9114                 out.str(from, to - from);
9115                 out.str(match_off);
9116                 out.chr('\n');
9117 
9118                 out.header(pathname, partname, ++lineno, NULL, first + (to - begin) + 1, "|", false);
9119 
9120                 from = to + 1;
9121               }
9122 
9123               size -= from - begin;
9124               begin = from;
9125 
9126               out.str(v_match_ms);
9127               out.str(begin, size);
9128               out.str(match_off);
9129 
9130               if (flag_ungroup)
9131               {
9132                 if (eol > end)
9133                 {
9134                   bool lf_only = end[eol - end - 1] == '\n';
9135                   eol -= end[eol - end - 1] == '\n';
9136                   if (eol > end)
9137                   {
9138                     out.str(v_color_sl);
9139                     out.str(end, eol - end);
9140                     out.str(color_off);
9141                   }
9142                   out.nl(lf_only);
9143                 }
9144                 else if (matcher->hit_end())
9145                 {
9146                   out.nl();
9147                 }
9148                 else
9149                 {
9150                   out.check_flush();
9151                 }
9152               }
9153               else
9154               {
9155                 restline.assign(end, eol - end);
9156                 restline_data = restline.c_str();
9157                 restline_size = restline.size();
9158                 restline_last = matcher->last();
9159               }
9160             }
9161           }
9162           else if (!binfile && (!binary || flag_hex || flag_with_hex))
9163           {
9164             size_t size = matcher->size();
9165 
9166             if (size > 0)
9167             {
9168               size_t lines = matcher->lines();
9169 
9170               if (lines > 1 || colorize)
9171               {
9172                 size_t first = matcher->first();
9173                 size_t last = matcher->last();
9174                 const char *begin = matcher->begin();
9175 
9176                 if (binary)
9177                 {
9178                   out.dump.hex(v_hex_line, restline_last, restline_data, first - restline_last);
9179                   out.dump.hex(v_hex_match, first, begin, size);
9180                 }
9181                 else
9182                 {
9183                   out.str(v_color_sl);
9184                   out.str(restline_data, first - restline_last);
9185                   out.str(color_off);
9186 
9187                   if (lines > 1)
9188                   {
9189                     // echo multi-line matches line-by-line
9190 
9191                     const char *from = begin;
9192                     const char *to;
9193                     size_t num = 1;
9194 
9195                     while ((to = static_cast<const char*>(memchr(from, '\n', size - (from - begin)))) != NULL)
9196                     {
9197                       out.str(v_match_ms);
9198                       out.str(from, to - from);
9199                       out.str(match_off);
9200                       out.chr('\n');
9201 
9202                       out.header(pathname, partname, lineno + num, NULL, first + (to - begin) + 1, "|", false);
9203 
9204                       from = to + 1;
9205                       ++num;
9206                     }
9207 
9208                     size -= from - begin;
9209                     begin = from;
9210                   }
9211 
9212                   out.str(v_match_ms);
9213                   out.str(begin, size);
9214                   out.str(match_off);
9215                 }
9216 
9217                 if (lines == 1)
9218                 {
9219                   restline_data += last - restline_last;
9220                   restline_size -= last - restline_last;
9221                   restline_last = last;
9222                 }
9223                 else
9224                 {
9225                   const char *eol = matcher->eol(true); // warning: call eol() before end()
9226                   const char *end = matcher->end();
9227 
9228                   binary = flag_hex || (!flag_text && is_binary(end, eol - end));
9229 
9230                   if (hex && !binary)
9231                     out.dump.done();
9232                   else if (!hex && binary)
9233                     out.nl();
9234 
9235                   if (hex != binary && !flag_no_header)
9236                     out.header(pathname, partname, lineno + lines - 1, matcher, last, flag_separator, binary);
9237 
9238                   hex = binary;
9239 
9240                   if (flag_ungroup)
9241                   {
9242                     if (binary)
9243                     {
9244                       out.dump.hex(v_hex_line, matcher->last(), end, eol - end);
9245                       out.dump.done();
9246                     }
9247                     else
9248                     {
9249                       if (eol > end)
9250                       {
9251                         bool lf_only = end[eol - end - 1] == '\n';
9252                         eol -= lf_only;
9253                         if (eol > end)
9254                         {
9255                           out.str(v_color_sl);
9256                           out.str(end, eol - end);
9257                           out.str(color_off);
9258                         }
9259                         out.nl(lf_only);
9260                       }
9261                       else if (matcher->hit_end())
9262                       {
9263                         out.nl();
9264                       }
9265                       else
9266                       {
9267                         out.check_flush();
9268                       }
9269                     }
9270                   }
9271                   else
9272                   {
9273                     restline.assign(end, eol - end);
9274                     restline_data = restline.c_str();
9275                     restline_size = restline.size();
9276                     restline_last = last;
9277                   }
9278 
9279                   lineno += lines - 1;
9280                 }
9281               }
9282             }
9283           }
9284         }
9285 
9286         if (restline_data != NULL)
9287         {
9288           if (binary)
9289           {
9290             out.dump.hex(v_hex_line, restline_last, restline_data, restline_size);
9291           }
9292           else
9293           {
9294             bool lf_only = false;
9295             if (restline_size > 0)
9296             {
9297               lf_only = restline_data[restline_size - 1] == '\n';
9298               restline_size -= lf_only;
9299               if (restline_size > 0)
9300               {
9301                 out.str(v_color_sl);
9302                 out.str(restline_data, restline_size);
9303                 out.str(color_off);
9304               }
9305             }
9306             out.nl(lf_only);
9307           }
9308 
9309           restline_data = NULL;
9310         }
9311 
9312         // get the remaining context
9313         context = matcher->after();
9314 
9315         if (context.len > 0)
9316           any_line_handler(*matcher, context.buf, context.len, context.num);
9317 
9318         if (matches > 0 && (binfile || (binary && !flag_hex && !flag_with_hex)))
9319         {
9320           if (flag_binary_without_match)
9321             matches = 0;
9322           else
9323             out.binary_file_matches(pathname, partname);
9324         }
9325 
9326         if (binary)
9327           out.dump.done();
9328       }
9329       else if (!flag_invert_match)
9330       {
9331         // options -A, -B, -C without -v
9332 
9333         if (!init_read())
9334           goto exit_search;
9335 
9336         // ContextGrepHandler requires lineno to be set precisely, i.e. after skipping --range lines
9337         size_t lineno = flag_min_line > 0 ? flag_min_line - 1 : 0;
9338         bool binfile = !flag_text && !flag_hex && !flag_with_hex && init_is_binary();
9339         bool hex = false;
9340         bool binary = false;
9341         bool stop = false;
9342 
9343         // to display the rest of the matching line
9344         const char *restline_data = NULL;
9345         size_t restline_size = 0;
9346         size_t restline_last = 0;
9347 
9348         // construct event handler functor with captured *this and some of the locals
9349         ContextGrepHandler context_handler(*this, pathname, lineno, binfile, hex, binary, matches, stop, restline_data, restline_size, restline_last);
9350 
9351         // register an event handler functor to display non-matching lines
9352         matcher->set_handler(&context_handler);
9353 
9354         // to get the context from the any_line handler explicitly
9355         reflex::AbstractMatcher::Context context;
9356 
9357         while (matcher->find())
9358         {
9359           size_t current_lineno = matcher->lineno();
9360 
9361           if (lineno != current_lineno || flag_ungroup)
9362           {
9363             if (restline_data != NULL)
9364             {
9365               if (binary)
9366               {
9367                 out.dump.hex(Output::Dump::HEX_LINE, restline_last, restline_data, restline_size);
9368               }
9369               else
9370               {
9371                 bool lf_only = false;
9372                 if (restline_size > 0)
9373                 {
9374                   lf_only = restline_data[restline_size - 1] == '\n';
9375                   restline_size -= lf_only;
9376                   if (restline_size > 0)
9377                   {
9378                     out.str(color_sl);
9379                     out.str(restline_data, restline_size);
9380                     out.str(color_off);
9381                   }
9382                 }
9383                 out.nl(lf_only);
9384               }
9385 
9386               restline_data = NULL;
9387             }
9388 
9389             const char *eol = matcher->eol(true); // warning: call eol() before bol() and end()
9390             const char *bol = matcher->bol();
9391 
9392             // check CNF AND/OR/NOT matching
9393             if (matchers != NULL && !cnf_matching(bol, eol))
9394               continue;
9395 
9396             // get the lines before the matched line
9397             context = matcher->before();
9398 
9399             if (context.len > 0)
9400               context_handler(*matcher, context.buf, context.len, context.num);
9401 
9402             if (binfile || (binary && !flag_hex && !flag_with_hex))
9403             {
9404               if (flag_binary_without_match)
9405               {
9406                 matches = 0;
9407               }
9408               else
9409               {
9410                 out.binary_file_matches(pathname, partname);
9411                 matches = 1;
9412               }
9413 
9414               if (flag_files && matchers != NULL && out.holding())
9415                 continue;
9416 
9417               goto done_search;
9418             }
9419 
9420             context_handler.output_before_context();
9421 
9422             // --range: max line exceeded?
9423             if (flag_max_line > 0 && current_lineno > flag_max_line)
9424               break;
9425 
9426             // --max-files: max reached?
9427             if (stop)
9428               goto exit_search;
9429 
9430             if (matches == 0 && (!flag_files || matchers == NULL))
9431             {
9432               // --max-files: max reached?
9433               if (!Stats::found_part())
9434                 goto exit_search;
9435             }
9436 
9437             ++matches;
9438 
9439             // -m: max number of matches reached?
9440             if (flag_max_count > 0 && matches >= flag_max_count)
9441               break;
9442 
9443             // output blocked?
9444             if (out.eof)
9445               goto exit_search;
9446 
9447             binary = flag_hex || (!flag_text && is_binary(bol, eol - bol));
9448 
9449             if (binfile || (binary && !flag_hex && !flag_with_hex))
9450             {
9451               if (flag_binary_without_match)
9452               {
9453                 matches = 0;
9454               }
9455               else
9456               {
9457                 out.binary_file_matches(pathname, partname);
9458                 matches = 1;
9459               }
9460 
9461               if (flag_files && matchers != NULL && out.holding())
9462                 continue;
9463 
9464               goto done_search;
9465             }
9466 
9467             size_t border = matcher->border();
9468             size_t first = matcher->first();
9469             const char *begin = matcher->begin();
9470             const char *end = matcher->end();
9471             size_t size = matcher->size();
9472 
9473             if (hex && !binary)
9474               out.dump.done();
9475 
9476             if (!flag_no_header)
9477             {
9478               const char *separator = lineno != current_lineno ? flag_invert_match ? "-" : flag_separator : "+";
9479               out.header(pathname, partname, current_lineno, matcher, first, separator, binary);
9480             }
9481 
9482             hex = binary;
9483 
9484             lineno = current_lineno;
9485 
9486             if (binary)
9487             {
9488               out.dump.hex(Output::Dump::HEX_LINE, first - border, bol, border);
9489               out.dump.hex(Output::Dump::HEX_MATCH, first, begin, size);
9490 
9491               if (flag_ungroup)
9492               {
9493                 out.dump.hex(Output::Dump::HEX_LINE, matcher->last(), end, eol - end);
9494                 out.dump.done();
9495               }
9496               else
9497               {
9498                 restline.assign(end, eol - end);
9499                 restline_data = restline.c_str();
9500                 restline_size = restline.size();
9501                 restline_last = matcher->last();
9502               }
9503 
9504               lineno += matcher->lines() - 1;
9505             }
9506             else
9507             {
9508               out.str(color_sl);
9509               out.str(bol, border);
9510               out.str(color_off);
9511 
9512               // echo multi-line matches line-by-line
9513 
9514               const char *from = begin;
9515               const char *to;
9516 
9517               while ((to = static_cast<const char*>(memchr(from, '\n', size - (from - begin)))) != NULL)
9518               {
9519                 out.str(match_ms);
9520                 out.str(from, to - from);
9521                 out.str(match_off);
9522                 out.chr('\n');
9523 
9524                 out.header(pathname, partname, ++lineno, NULL, first + (to - begin) + 1, "|", false);
9525 
9526                 from = to + 1;
9527               }
9528 
9529               size -= from - begin;
9530               begin = from;
9531 
9532               out.str(match_ms);
9533               out.str(begin, size);
9534               out.str(match_off);
9535 
9536               if (flag_ungroup)
9537               {
9538                 if (eol > end)
9539                 {
9540                   bool lf_only = end[eol - end - 1] == '\n';
9541                   eol -= lf_only;
9542                   if (eol > end)
9543                   {
9544                     out.str(color_sl);
9545                     out.str(end, eol - end);
9546                     out.str(color_off);
9547                   }
9548                   out.nl(lf_only);
9549                 }
9550                 else if (matcher->hit_end())
9551                 {
9552                   out.nl();
9553                 }
9554                 else
9555                 {
9556                   out.check_flush();
9557                 }
9558               }
9559               else
9560               {
9561                 restline.assign(end, eol - end);
9562                 restline_data = restline.c_str();
9563                 restline_size = restline.size();
9564                 restline_last = matcher->last();
9565               }
9566             }
9567           }
9568           else
9569           {
9570             size_t size = matcher->size();
9571 
9572             if (size > 0)
9573             {
9574               size_t lines = matcher->lines();
9575 
9576               if (lines > 1 || colorize)
9577               {
9578                 size_t first = matcher->first();
9579                 size_t last = matcher->last();
9580                 const char *begin = matcher->begin();
9581 
9582                 if (binary)
9583                 {
9584                   out.dump.hex(Output::Dump::HEX_LINE, restline_last, restline_data, first - restline_last);
9585                   out.dump.hex(Output::Dump::HEX_MATCH, first, begin, size);
9586                 }
9587                 else
9588                 {
9589                   out.str(color_sl);
9590                   out.str(restline_data, first - restline_last);
9591                   out.str(color_off);
9592 
9593                   if (lines > 1)
9594                   {
9595                     // echo multi-line matches line-by-line
9596 
9597                     const char *from = begin;
9598                     const char *to;
9599                     size_t num = 1;
9600 
9601                     while ((to = static_cast<const char*>(memchr(from, '\n', size - (from - begin)))) != NULL)
9602                     {
9603                       out.str(match_ms);
9604                       out.str(from, to - from);
9605                       out.str(match_off);
9606                       out.chr('\n');
9607 
9608                       out.header(pathname, partname, lineno + num, NULL, first + (to - begin) + 1, "|", false);
9609 
9610                       from = to + 1;
9611                       ++num;
9612                     }
9613 
9614                     size -= from - begin;
9615                     begin = from;
9616                   }
9617 
9618                   out.str(match_ms);
9619                   out.str(begin, size);
9620                   out.str(match_off);
9621                 }
9622 
9623                 if (lines == 1)
9624                 {
9625                   restline_data += last - restline_last;
9626                   restline_size -= last - restline_last;
9627                   restline_last = last;
9628                 }
9629                 else
9630                 {
9631                   const char *eol = matcher->eol(true); // warning: call eol() before end()
9632                   const char *end = matcher->end();
9633 
9634                   binary = flag_hex || (!flag_text && is_binary(end, eol - end));
9635 
9636                   if (hex && !binary)
9637                     out.dump.done();
9638                   else if (!hex && binary)
9639                     out.nl();
9640 
9641                   if (hex != binary && !flag_no_header)
9642                     out.header(pathname, partname, lineno + lines - 1, matcher, last, flag_separator, binary);
9643 
9644                   hex = binary;
9645 
9646                   if (flag_ungroup)
9647                   {
9648                     if (binary)
9649                     {
9650                       out.dump.hex(Output::Dump::HEX_LINE, matcher->last(), end, eol - end);
9651                       out.dump.done();
9652                     }
9653                     else
9654                     {
9655                       if (eol > end)
9656                       {
9657                         bool lf_only = end[eol - end - 1] == '\n';
9658                         eol -= lf_only;
9659                         if (eol > end)
9660                         {
9661                           out.str(color_sl);
9662                           out.str(end, eol - end);
9663                           out.str(color_off);
9664                         }
9665                         out.nl(lf_only);
9666                       }
9667                       else if (matcher->hit_end())
9668                       {
9669                         out.nl();
9670                       }
9671                       else
9672                       {
9673                         out.check_flush();
9674                       }
9675                     }
9676                   }
9677                   else
9678                   {
9679                     restline.assign(end, eol - end);
9680                     restline_data = restline.c_str();
9681                     restline_size = restline.size();
9682                     restline_last = last;
9683                   }
9684 
9685                   lineno += lines - 1;
9686                 }
9687               }
9688             }
9689           }
9690 
9691           context_handler.set_after_lineno(lineno + 1);
9692         }
9693 
9694         if (restline_data != NULL)
9695         {
9696           if (binary)
9697           {
9698             out.dump.hex(Output::Dump::HEX_LINE, restline_last, restline_data, restline_size);
9699           }
9700           else
9701           {
9702             bool lf_only = false;
9703             if (restline_size > 0)
9704             {
9705               lf_only = restline_data[restline_size - 1] == '\n';
9706               restline_size -= lf_only;
9707               if (restline_size > 0)
9708               {
9709                 out.str(color_sl);
9710                 out.str(restline_data, restline_size);
9711                 out.str(color_off);
9712               }
9713             }
9714             out.nl(lf_only);
9715           }
9716 
9717           restline_data = NULL;
9718         }
9719 
9720         // get the remaining context
9721         context = matcher->after();
9722 
9723         if (context.len > 0)
9724           context_handler(*matcher, context.buf, context.len, context.num);
9725 
9726         if (binfile || (binary && !flag_hex && !flag_with_hex))
9727         {
9728           if (flag_binary_without_match)
9729             matches = 0;
9730           else if (matches > 0)
9731             out.binary_file_matches(pathname, partname);
9732         }
9733 
9734         if (binary)
9735           out.dump.done();
9736       }
9737       else
9738       {
9739         // options -A, -B, -C with -v
9740 
9741         if (!init_read())
9742           goto exit_search;
9743 
9744         // InvertContextGrepHandler requires lineno to be set precisely, i.e. after skipping --range lines
9745         size_t lineno = flag_min_line > 0 ? flag_min_line - 1 : 0;
9746         size_t last_lineno = 0;
9747         size_t after = flag_after_context;
9748         bool binfile = !flag_text && !flag_hex && !flag_with_hex && init_is_binary();
9749         bool hex = false;
9750         bool binary = false;
9751         bool stop = false;
9752 
9753         // to display the rest of the matching line
9754         const char *restline_data = NULL;
9755         size_t restline_size = 0;
9756         size_t restline_last = 0;
9757 
9758         // construct event handler functor with captured *this and some of the locals
9759         InvertContextGrepHandler invert_context_handler(*this, pathname, lineno, binfile, hex, binary, matches, stop, restline_data, restline_size, restline_last);
9760 
9761         // register an event handler functor to display non-matching lines
9762         matcher->set_handler(&invert_context_handler);
9763 
9764         // to get the context from the any_line handler explicitly
9765         reflex::AbstractMatcher::Context context;
9766 
9767         while (matcher->find())
9768         {
9769           size_t current_lineno = matcher->lineno();
9770           size_t lines = matcher->lines();
9771 
9772           if (last_lineno + 1 >= current_lineno)
9773             after += lines;
9774           else if (last_lineno != current_lineno)
9775             after = 0;
9776 
9777           if (last_lineno != current_lineno)
9778           {
9779             if (restline_data != NULL)
9780             {
9781               if (binary)
9782               {
9783                 out.dump.hex(Output::Dump::HEX_CONTEXT_LINE, restline_last, restline_data, restline_size);
9784               }
9785               else
9786               {
9787                 bool lf_only = false;
9788                 if (restline_size > 0)
9789                 {
9790                   lf_only = restline_data[restline_size - 1] == '\n';
9791                   restline_size -= lf_only;
9792                   if (restline_size > 0)
9793                   {
9794                     out.str(color_cx);
9795                     out.str(restline_data, restline_size);
9796                     out.str(color_off);
9797                   }
9798                 }
9799                 out.nl(lf_only);
9800               }
9801 
9802               restline_data = NULL;
9803             }
9804 
9805             const char *eol = matcher->eol(true); // warning: call eol() before bol() and end()
9806             const char *bol = matcher->bol();
9807 
9808             // check CNF AND/OR/NOT matching
9809             if (matchers != NULL && !cnf_matching(bol, eol))
9810               continue;
9811 
9812             // get the lines before the matched line
9813             context = matcher->before();
9814 
9815             if (context.len > 0)
9816             {
9817               invert_context_handler(*matcher, context.buf, context.len, context.num);
9818 
9819               if (matches > 0 && (binfile || (binary && !flag_hex && !flag_with_hex)))
9820               {
9821                 if (flag_binary_without_match)
9822                 {
9823                   matches = 0;
9824                 }
9825                 else
9826                 {
9827                   out.binary_file_matches(pathname, partname);
9828                   matches = 1;
9829                 }
9830 
9831                 goto done_search;
9832               }
9833             }
9834 
9835             lineno = current_lineno;
9836 
9837             // --range: max line exceeded?
9838             if (flag_max_line > 0 && lineno > flag_max_line)
9839               break;
9840 
9841             // --max-files: max reached?
9842             if (stop)
9843               goto exit_search;
9844 
9845             /* logically OK but dead code because -v
9846             if (matches == 0 && !flag_invert_match && (!flag_files || matchers == NULL))
9847             {
9848               // --max-files: max reached?
9849               if (!Stats::found_part())
9850                 goto exit_search;
9851             }
9852             */
9853 
9854             // -m: max number of matches reached?
9855             if (flag_max_count > 0 && matches >= flag_max_count)
9856               break;
9857 
9858             // output blocked?
9859             if (out.eof)
9860               goto exit_search;
9861 
9862             if (after < flag_after_context)
9863             {
9864               binary = flag_hex || (!flag_text && is_binary(bol, eol - bol));
9865 
9866               if (binfile || (binary && !flag_hex && !flag_with_hex))
9867               {
9868                 if (flag_binary_without_match)
9869                 {
9870                   matches = 0;
9871                 }
9872                 else // if (flag_invert_match) is true
9873                 {
9874                   lineno = last_lineno = current_lineno + matcher->lines() - 1;
9875                   continue;
9876                 }
9877                 /* logically OK but dead code because -v
9878                 else
9879                 {
9880                   out.binary_file_matches(pathname, partname);
9881                   matches = 1;
9882                 }
9883                 */
9884 
9885                 goto done_search;
9886               }
9887 
9888               size_t border = matcher->border();
9889               size_t first = matcher->first();
9890               const char *begin = matcher->begin();
9891               const char *end = matcher->end();
9892               size_t size = matcher->size();
9893 
9894               if (hex && !binary)
9895                 out.dump.done();
9896 
9897               if (!flag_no_header)
9898                 out.header(pathname, partname, lineno, matcher, first, "-", binary);
9899 
9900               hex = binary;
9901 
9902               if (binary)
9903               {
9904                 if (flag_hex || flag_with_hex)
9905                 {
9906                   out.dump.hex(Output::Dump::HEX_CONTEXT_LINE, first - border, bol, border);
9907                   out.dump.hex(Output::Dump::HEX_CONTEXT_MATCH, first, begin, size);
9908 
9909                   restline.assign(end, eol - end);
9910                   restline_data = restline.c_str();
9911                   restline_size = restline.size();
9912                   restline_last = matcher->last();
9913                 }
9914               }
9915               else
9916               {
9917                 out.str(color_cx);
9918                 out.str(bol, border);
9919                 out.str(color_off);
9920 
9921                 if (lines > 1)
9922                 {
9923                   // echo multi-line matches line-by-line
9924 
9925                   const char *from = begin;
9926                   const char *to;
9927                   size_t num = 1;
9928 
9929                   while ((to = static_cast<const char*>(memchr(from, '\n', size - (from - begin)))) != NULL)
9930                   {
9931                     out.str(match_mc);
9932                     out.str(from, to - from);
9933                     out.str(match_off);
9934                     out.chr('\n');
9935 
9936                     out.header(pathname, partname, lineno + num, NULL, first + (to - begin) + 1, "-", false);
9937 
9938                     from = to + 1;
9939                     ++num;
9940                   }
9941 
9942                   size -= from - begin;
9943                   begin = from;
9944                 }
9945 
9946                 out.str(match_mc);
9947                 out.str(begin, size);
9948                 out.str(match_off);
9949 
9950                 restline.assign(end, eol - end);
9951                 restline_data = restline.c_str();
9952                 restline_size = restline.size();
9953                 restline_last = matcher->last();
9954               }
9955             }
9956             else if (flag_before_context > 0)
9957             {
9958               binary = flag_hex || (!flag_text && is_binary(bol, eol - bol));
9959 
9960               if (binfile || (binary && !flag_hex && !flag_with_hex))
9961               {
9962                 if (flag_binary_without_match)
9963                 {
9964                   matches = 0;
9965                 }
9966                 else // if (flag_invert_match) is true
9967                 {
9968                   lineno = last_lineno = current_lineno + matcher->lines() - 1;
9969                   continue;
9970                 }
9971                 /* logically OK but dead code because -v
9972                 else
9973                 {
9974                   out.binary_file_matches(pathname, partname);
9975                   matches = 1;
9976                 }
9977                 */
9978 
9979                 goto done_search;
9980               }
9981 
9982               if (hex && !binary)
9983                 out.dump.done();
9984               hex = binary;
9985 
9986               const char *begin = matcher->begin();
9987               size_t size = matcher->size();
9988               size_t offset = matcher->first();
9989 
9990               if (lines == 1)
9991               {
9992                 invert_context_handler.add_before_context_line(bol, eol, matcher->columno(), offset - (begin - bol));
9993                 invert_context_handler.add_before_context_match(begin - bol, size, offset);
9994               }
9995               else
9996               {
9997                 // add lines to the before context
9998 
9999                 const char *from = begin;
10000                 const char *to;
10001 
10002                 while ((to = static_cast<const char*>(memchr(from, '\n', eol - from))) != NULL)
10003                 {
10004                   if (from == begin)
10005                   {
10006                     invert_context_handler.add_before_context_line(bol, to + 1, matcher->columno(), offset - (begin - bol));
10007                     invert_context_handler.add_before_context_match(begin - bol, to - from + 1, offset);
10008                   }
10009                   else
10010                   {
10011                     invert_context_handler.add_before_context_line(from, to + 1, 1, offset);
10012                     invert_context_handler.add_before_context_match(0, to + 1 < from + size ? to - from + 1 : size, offset);
10013                   }
10014 
10015                   size -= to - from + 1;
10016                   offset += to - from + 1;
10017                   from = to + 1;
10018                 }
10019               }
10020             }
10021           }
10022           else if (after < flag_after_context)
10023           {
10024             size_t size = matcher->size();
10025 
10026             if (size > 0)
10027             {
10028               if (lines > 1 || colorize)
10029               {
10030                 size_t first = matcher->first();
10031                 size_t last = matcher->last();
10032                 const char *begin = matcher->begin();
10033 
10034                 if (binary)
10035                 {
10036                   out.dump.hex(Output::Dump::HEX_CONTEXT_LINE, restline_last, restline_data, first - restline_last);
10037                   out.dump.hex(Output::Dump::HEX_CONTEXT_MATCH, first, begin, size);
10038                 }
10039                 else
10040                 {
10041                   out.str(color_cx);
10042                   out.str(restline_data, first - restline_last);
10043                   out.str(color_off);
10044 
10045                   if (lines > 1)
10046                   {
10047                     // echo multi-line matches line-by-line
10048 
10049                     const char *from = begin;
10050                     const char *to;
10051                     size_t num = 1;
10052 
10053                     while ((to = static_cast<const char*>(memchr(from, '\n', size - (from - begin)))) != NULL)
10054                     {
10055                       out.str(match_mc);
10056                       out.str(from, to - from);
10057                       out.str(match_off);
10058                       out.chr('\n');
10059 
10060                       out.header(pathname, partname, lineno + num, NULL, first + (to - begin) + 1, "-", false);
10061 
10062                       from = to + 1;
10063                       ++num;
10064                     }
10065 
10066                     size -= from - begin;
10067                     begin = from;
10068                   }
10069 
10070                   out.str(match_mc);
10071                   out.str(begin, size);
10072                   out.str(match_off);
10073                 }
10074 
10075                 if (lines == 1)
10076                 {
10077                   restline_data += last - restline_last;
10078                   restline_size -= last - restline_last;
10079                   restline_last = last;
10080                 }
10081                 else
10082                 {
10083                   const char *eol = matcher->eol(true); // warning: call eol() before end()
10084                   const char *end = matcher->end();
10085 
10086                   binary = flag_hex || (!flag_text && is_binary(end, eol - end));
10087 
10088                   if (hex && !binary)
10089                     out.dump.done();
10090                   else if (!hex && binary)
10091                     out.nl();
10092 
10093                   if (hex != binary && !flag_no_header)
10094                     out.header(pathname, partname, lineno + lines - 1, matcher, last, "-", binary);
10095 
10096                   hex = binary;
10097 
10098                   restline.assign(end, eol - end);
10099                   restline_data = restline.c_str();
10100                   restline_size = restline.size();
10101                   restline_last = last;
10102                 }
10103               }
10104             }
10105           }
10106           else
10107           {
10108             if (restline_data != NULL)
10109             {
10110               if (binary)
10111               {
10112                 out.dump.hex(Output::Dump::HEX_CONTEXT_LINE, restline_last, restline_data, restline_size);
10113               }
10114               else
10115               {
10116                 bool lf_only = false;
10117                 if (restline_size > 0)
10118                 {
10119                   lf_only = restline_data[restline_size - 1] == '\n';
10120                   restline_size -= lf_only;
10121                   if (restline_size > 0)
10122                   {
10123                     out.str(color_cx);
10124                     out.str(restline_data, restline_size);
10125                     out.str(color_off);
10126                   }
10127                 }
10128                 out.nl(lf_only);
10129               }
10130 
10131               restline_data = NULL;
10132             }
10133 
10134             if (flag_before_context > 0)
10135             {
10136               const char *eol = matcher->eol(true); // warning: call eol() before bol()
10137               const char *bol = matcher->bol();
10138               const char *begin = matcher->begin();
10139               size_t size = matcher->size();
10140               size_t offset = matcher->first();
10141 
10142               if (lines == 1)
10143               {
10144                 invert_context_handler.add_before_context_match(begin - bol, size, offset);
10145               }
10146               else
10147               {
10148                 // add lines to the before context
10149 
10150                 const char *end = matcher->end();
10151 
10152                 binary = flag_hex || (!flag_text && is_binary(end, eol - end));
10153 
10154                 if (binfile || (binary && !flag_hex && !flag_with_hex))
10155                 {
10156                   if (flag_binary_without_match)
10157                   {
10158                     matches = 0;
10159                   }
10160                   else // if (flag_invert_match) is true
10161                   {
10162                     lineno = last_lineno = current_lineno + matcher->lines() - 1;
10163                     continue;
10164                   }
10165                   /* logically OK but dead code because -v
10166                   else
10167                   {
10168                     out.binary_file_matches(pathname, partname);
10169                     matches = 1;
10170                   }
10171                   */
10172 
10173                   goto done_search;
10174                 }
10175 
10176                 if (hex && !binary)
10177                   out.dump.done();
10178                 hex = binary;
10179 
10180                 const char *from = begin;
10181                 const char *to;
10182 
10183                 while ((to = static_cast<const char*>(memchr(from, '\n', eol - from))) != NULL)
10184                 {
10185                   if (from == begin)
10186                   {
10187                     invert_context_handler.add_before_context_match(begin - bol, to - from + 1, offset);
10188                   }
10189                   else
10190                   {
10191                     invert_context_handler.add_before_context_line(from, to + 1, 1, offset);
10192                     invert_context_handler.add_before_context_match(0, to + 1 < from + size ? to - from + 1 : size, offset);
10193                   }
10194 
10195                   size -= to - from + 1;
10196                   offset += to - from + 1;
10197                   from = to + 1;
10198                 }
10199               }
10200             }
10201           }
10202 
10203           lineno = last_lineno = current_lineno + lines - 1;
10204         }
10205 
10206         if (restline_data != NULL)
10207         {
10208           if (binary)
10209           {
10210             out.dump.hex(Output::Dump::HEX_CONTEXT_LINE, restline_last, restline_data, restline_size);
10211           }
10212           else
10213           {
10214             bool lf_only = false;
10215             if (restline_size > 0)
10216             {
10217               lf_only = restline_data[restline_size - 1] == '\n';
10218               restline_size -= lf_only;
10219               if (restline_size > 0)
10220               {
10221                 out.str(color_cx);
10222                 out.str(restline_data, restline_size);
10223                 out.str(color_off);
10224               }
10225             }
10226             out.nl(lf_only);
10227           }
10228 
10229           restline_data = NULL;
10230         }
10231 
10232         // get the remaining context
10233         context = matcher->after();
10234 
10235         if (context.len > 0)
10236           invert_context_handler(*matcher, context.buf, context.len, context.num);
10237 
10238         if (matches > 0 && (binfile || (binary && !flag_hex && !flag_with_hex)))
10239         {
10240           if (flag_binary_without_match)
10241             matches = 0;
10242           else
10243             out.binary_file_matches(pathname, partname);
10244         }
10245 
10246         if (binary)
10247           out.dump.done();
10248       }
10249 
10250 done_search:
10251 
10252       // --files: check if all CNF conditions are met globally to launch output or reset matches
10253       if (flag_files && matchers != NULL)
10254         if (!cnf_satisfied())
10255           matches = 0;
10256 
10257       // any matches in this file or archive?
10258       if (matches > 0)
10259         matched = true;
10260 
10261       // --break: add a line break when applicable
10262       if (flag_break && (matches > 0 || flag_any_line) && !flag_quiet && !flag_files_with_matches && !flag_count && flag_format == NULL)
10263         out.nl();
10264     }
10265 
10266     catch (EXIT_SEARCH&)
10267     {
10268       // --files: cnf_matching() rejected a file, no need to search this file any further
10269     }
10270 
10271     catch (...)
10272     {
10273       // this should never happen
10274       warning("exception while searching", pathname);
10275     }
10276 
10277 exit_search:
10278 
10279     // flush and release output to allow other workers to output results
10280     out.release();
10281 
10282     // close file or -z: loop over next extracted archive parts, when applicable
10283   } while (close_file(pathname));
10284 
10285   // this file or archive has a match
10286   if (matched)
10287     Stats::found_file();
10288 }
10289 
10290 // read globs from a file and split them into files or dirs to include or exclude
split_globs(FILE * file,std::vector<std::string> & files,std::vector<std::string> & dirs)10291 void split_globs(FILE *file, std::vector<std::string>& files, std::vector<std::string>& dirs)
10292 {
10293   // read globs from the specified file or files
10294   reflex::BufferedInput input(file);
10295   std::string line;
10296 
10297   while (true)
10298   {
10299     // read the next line
10300     if (getline(input, line))
10301       break;
10302 
10303     // trim white space from either end
10304     trim(line);
10305 
10306     // add glob to files or dirs using gitignore glob pattern rules
10307     if (!line.empty() && line.front() != '#')
10308     {
10309       if (line.front() != '!' || line.size() > 1)
10310       {
10311         if (line.back() == '/')
10312         {
10313           if (line.size() > 1)
10314             line.pop_back();
10315           dirs.emplace_back(line);
10316         }
10317         else
10318         {
10319           files.emplace_back(line);
10320         }
10321       }
10322     }
10323   }
10324 }
10325 
10326 // display format with option --format-begin and --format-end
format(const char * format,size_t matches)10327 void format(const char *format, size_t matches)
10328 {
10329   const char *sep = NULL;
10330   size_t len = 0;
10331   const char *s = format;
10332   while (*s != '\0')
10333   {
10334     const char *a = NULL;
10335     const char *t = s;
10336     while (*s != '\0' && *s != '%')
10337       ++s;
10338     fwrite(t, 1, s - t, output);
10339     if (*s == '\0' || *(s + 1) == '\0')
10340       break;
10341     ++s;
10342     if (*s == '[')
10343     {
10344       a = ++s;
10345       while (*s != '\0' && *s != ']')
10346         ++s;
10347       if (*s == '\0' || *(s + 1) == '\0')
10348         break;
10349       ++s;
10350     }
10351     int c = *s;
10352     switch (c)
10353     {
10354       case 'T':
10355         if (flag_initial_tab)
10356         {
10357           if (a)
10358             fwrite(a, 1, s - a - 1, output);
10359           fputc('\t', output);
10360         }
10361         break;
10362 
10363       case 'S':
10364         if (matches > 1)
10365         {
10366           if (a)
10367             fwrite(a, 1, s - a - 1, output);
10368           if (sep != NULL)
10369             fwrite(sep, 1, len, output);
10370           else
10371             fputs(flag_separator, output);
10372         }
10373         break;
10374 
10375       case '$':
10376         sep = a;
10377         len = s - a - 1;
10378         break;
10379 
10380       case 't':
10381         fputc('\t', output);
10382         break;
10383 
10384       case 's':
10385         if (sep != NULL)
10386           fwrite(sep, 1, len, output);
10387         else
10388           fputs(flag_separator, output);
10389         break;
10390 
10391       case '~':
10392 #ifdef OS_WIN
10393         fputc('\r', output);
10394 #endif
10395         fputc('\n', output);
10396         break;
10397 
10398       case 'm':
10399         fprintf(output, "%zu", matches);
10400         break;
10401 
10402       case '<':
10403         if (matches <= 1 && a)
10404           fwrite(a, 1, s - a - 1, output);
10405         break;
10406 
10407       case '>':
10408         if (matches > 1 && a)
10409           fwrite(a, 1, s - a - 1, output);
10410         break;
10411 
10412       case ',':
10413       case ':':
10414       case ';':
10415       case '|':
10416         if (matches > 1)
10417           fputc(c, output);
10418         break;
10419 
10420       default:
10421         fputc(c, output);
10422     }
10423     ++s;
10424   }
10425 }
10426 
10427 // trim white space from either end of the line
trim(std::string & line)10428 void trim(std::string& line)
10429 {
10430   size_t len = line.length();
10431   size_t pos;
10432 
10433   for (pos = 0; pos < len && isspace(line.at(pos)); ++pos)
10434     continue;
10435 
10436   if (pos > 0)
10437     line.erase(0, pos);
10438 
10439   len -= pos;
10440 
10441   for (pos = len; pos > 0 && isspace(line.at(pos - 1)); --pos)
10442     continue;
10443 
10444   if (len > pos)
10445     line.erase(pos, len - pos);
10446 }
10447 
10448 // trim path separators from an argv[] argument - important: modifies the argv[] string
trim_pathname_arg(const char * arg)10449 void trim_pathname_arg(const char *arg)
10450 {
10451   // remove trailing path separators after the drive prefix and path, if any - note: this truncates argv[] strings
10452   const char *path = strchr(arg, ':');
10453   if (path != NULL)
10454     ++path;
10455   else
10456     path = arg;
10457   size_t len = strlen(path);
10458   while (len > 1 && path[--len] == PATHSEPCHR)
10459     const_cast<char*>(path)[len] = '\0';
10460 }
10461 
10462 // convert GREP_COLORS and set the color substring to the ANSI SGR codes
set_color(const char * colors,const char * parameter,char color[COLORLEN])10463 void set_color(const char *colors, const char *parameter, char color[COLORLEN])
10464 {
10465   if (colors != NULL)
10466   {
10467     const char *s = strstr(colors, parameter);
10468 
10469     // check if substring parameter is present in colors
10470     if (s != NULL)
10471     {
10472       s += 3;
10473       char *t = color + 2;
10474 
10475 #ifdef WITH_EASY_GREP_COLORS
10476 
10477       // foreground colors: k=black, r=red, g=green, y=yellow b=blue, m=magenta, c=cyan, w=white
10478       // background colors: K=black, R=red, G=green, Y=yellow B=blue, M=magenta, C=cyan, W=white
10479       // bright colors: +k, +r, +g, +y, +b, +m, +c, +w, +K, +R, +G, +Y, +B, +M, +C, +W
10480       // modifiers: h=highlight, u=underline, i=invert, f=faint, n=normal, H=highlight off, U=underline off, I=invert off
10481       // semicolons are not required and abbreviations can be mixed with numeric ANSI SGR codes
10482 
10483       uint8_t offset = 30;
10484       bool sep = false;
10485 
10486       while (*s != '\0' && *s != ':' && t - color < COLORLEN - 6)
10487       {
10488         if (isdigit(*s))
10489         {
10490           if (sep)
10491             *t++ = ';';
10492           if (offset == 90)
10493           {
10494             *t++ = '1';
10495             *t++ = ';';
10496             offset = 30;
10497           }
10498           *t++ = *s++;
10499           while (isdigit(*s) && t - color < COLORLEN - 2)
10500             *t++ = *s++;
10501           sep = true;
10502           continue;
10503         }
10504 
10505         if (*s == '+')
10506         {
10507           offset = 90;
10508         }
10509         else if (*s == 'n')
10510         {
10511           if (sep)
10512             *t++ = ';';
10513           *t++ = '0';
10514           sep = true;
10515         }
10516         else if (*s == 'h')
10517         {
10518           if (sep)
10519             *t++ = ';';
10520           *t++ = '1';
10521           sep = true;
10522         }
10523         else if (*s == 'H')
10524         {
10525           if (sep)
10526             *t++ = ';';
10527           *t++ = '2';
10528           *t++ = '1';
10529           offset = 30;
10530           sep = true;
10531         }
10532         else if (*s == 'f')
10533         {
10534           if (sep)
10535             *t++ = ';';
10536           *t++ = '2';
10537           sep = true;
10538         }
10539         else if (*s == 'u')
10540         {
10541           if (sep)
10542             *t++ = ';';
10543           *t++ = '4';
10544           sep = true;
10545         }
10546         else if (*s == 'U')
10547         {
10548           if (sep)
10549             *t++ = ';';
10550           *t++ = '2';
10551           *t++ = '4';
10552           sep = true;
10553         }
10554         else if (*s == 'i')
10555         {
10556           if (sep)
10557             *t++ = ';';
10558           *t++ = '7';
10559           sep = true;
10560         }
10561         else if (*s == 'I')
10562         {
10563           if (sep)
10564             *t++ = ';';
10565           *t++ = '2';
10566           *t++ = '7';
10567           sep = true;
10568         }
10569         else if (*s == ',' || *s == ';' || isspace(*s))
10570         {
10571           if (sep)
10572             *t++ = ';';
10573           sep = false;
10574         }
10575         else
10576         {
10577           const char *c = "krgybmcw  KRGYBMCW";
10578           const char *k = strchr(c, *s);
10579 
10580           if (k != NULL)
10581           {
10582             if (sep)
10583               *t++ = ';';
10584             uint8_t n = offset + static_cast<uint8_t>(k - c);
10585             if (n >= 100)
10586             {
10587               *t++ = '1';
10588               n -= 100;
10589             }
10590             *t++ = '0' + n / 10;
10591             *t++ = '0' + n % 10;
10592             offset = 30;
10593             sep = true;
10594           }
10595         }
10596 
10597         ++s;
10598       }
10599 
10600 #else
10601 
10602       // traditional grep SGR parameters
10603       while ((*s == ';' || isdigit(*s)) && t - color < COLORLEN - 2)
10604         *t++ = *s++;
10605 
10606 #endif
10607 
10608       if (t > color + 2)
10609       {
10610         color[0] = '\033';
10611         color[1] = '[';
10612         *t++ = 'm';
10613         *t++ = '\0';
10614       }
10615       else
10616       {
10617         color[0] = '\0';
10618       }
10619     }
10620   }
10621 }
10622 
10623 // convert unsigned decimal to non-negative size_t, produce error when conversion fails
strtonum(const char * string,const char * message)10624 size_t strtonum(const char *string, const char *message)
10625 {
10626   char *rest = NULL;
10627   size_t size = static_cast<size_t>(strtoull(string, &rest, 10));
10628   if (rest == NULL || *rest != '\0')
10629     usage(message, string);
10630   return size;
10631 }
10632 
10633 // convert unsigned decimal to positive size_t, produce error when conversion fails or when the value is zero
strtopos(const char * string,const char * message)10634 size_t strtopos(const char *string, const char *message)
10635 {
10636   size_t size = strtonum(string, message);
10637   if (size == 0)
10638     usage(message, string);
10639   return size;
10640 }
10641 
10642 // convert one or two comma-separated unsigned decimals specifying a range to positive size_t, produce error when conversion fails or when the range is invalid
strtopos2(const char * string,size_t & pos1,size_t & pos2,const char * message,bool optional_first)10643 void strtopos2(const char *string, size_t& pos1, size_t& pos2, const char *message, bool optional_first)
10644 {
10645   char *rest = const_cast<char*>(string);
10646   if (*string != ',')
10647     pos1 = static_cast<size_t>(strtoull(string, &rest, 10));
10648   else
10649     pos1 = 0;
10650   if (*rest == ',')
10651     pos2 = static_cast<size_t>(strtoull(rest + 1, &rest, 10));
10652   else if (optional_first)
10653     pos2 = pos1, pos1 = 0;
10654   else
10655     pos2 = 0;
10656   if (rest == NULL || *rest != '\0' || (pos2 > 0 && pos1 > pos2))
10657     usage(message, string);
10658 }
10659 
10660 // convert unsigned decimal MAX fuzzy with optional prefix '+', '-', or '~' to positive size_t
strtofuzzy(const char * string,const char * message)10661 size_t strtofuzzy(const char *string, const char *message)
10662 {
10663   char *rest = NULL;
10664   size_t flags = 0;
10665   size_t max = 1;
10666   while (*string != '\0')
10667   {
10668     switch (*string)
10669     {
10670       case '+':
10671         flags |= reflex::FuzzyMatcher::INS;
10672         ++string;
10673         break;
10674       case '-':
10675         flags |= reflex::FuzzyMatcher::DEL;
10676         ++string;
10677         break;
10678       case '~':
10679         flags |= reflex::FuzzyMatcher::SUB;
10680         ++string;
10681         break;
10682       default:
10683         max = static_cast<size_t>(strtoull(string, &rest, 10));
10684         if (max == 0 || max > 255 || rest == NULL || *rest != '\0')
10685           usage(message, string);
10686         string = rest;
10687     }
10688   }
10689   return max | flags;
10690 }
10691 
10692 // display diagnostic message
usage(const char * message,const char * arg,const char * valid)10693 void usage(const char *message, const char *arg, const char *valid)
10694 {
10695   std::cerr << "ugrep: " << message << (arg != NULL ? arg : "");
10696   if (valid != NULL)
10697     std::cerr << ", did you mean " << valid << "?";
10698   std::cerr << std::endl;
10699   if (!flag_usage_warnings)
10700     exit(EXIT_ERROR);
10701   ++warnings;
10702 }
10703 
10704 // display usage/help information and exit
help(std::ostream & out)10705 void help(std::ostream& out)
10706 {
10707   out <<
10708     "Usage: ugrep [OPTIONS] [PATTERN] [-f FILE] [-e PATTERN] [FILE ...]\n\n\
10709     -A NUM, --after-context=NUM\n\
10710             Print NUM lines of trailing context after matching lines.  Places\n\
10711             a --group-separator between contiguous groups of matches.  See also\n\
10712             options -B, -C, and -y.\n\
10713     -a, --text\n\
10714             Process a binary file as if it were text.  This is equivalent to\n\
10715             the --binary-files=text option.  This option might output binary\n\
10716             garbage to the terminal, which can have problematic consequences if\n\
10717             the terminal driver interprets some of it as commands.\n\
10718     --and [[-e] PATTERN] ... -e PATTERN\n\
10719             Specify additional patterns to match.  Patterns must be specified\n\
10720             with -e.  Each -e PATTERN following this option is considered an\n\
10721             alternative pattern to match, i.e. each -e is interpreted as an OR\n\
10722             pattern.  For example, -e A -e B --and -e C -e D matches lines with\n\
10723             (`A' or `B') and (`C' or `D').  Note that multiple -e PATTERN are\n\
10724             alternations that bind more tightly together than --and.  Option\n\
10725             --stats displays the search patterns applied.  See also options\n\
10726             --not, --andnot, --bool, --files, and --lines.\n\
10727     --andnot [[-e] PATTERN] ...\n\
10728             Combines --and --not.  See also options --and, --not, and --bool.\n\
10729     -B NUM, --before-context=NUM\n\
10730             Print NUM lines of leading context before matching lines.  Places\n\
10731             a --group-separator between contiguous groups of matches.  See also\n\
10732             options -A, -C, and -y.\n\
10733     -b, --byte-offset\n\
10734             The offset in bytes of a matched line is displayed in front of the\n\
10735             respective matched line.  If -u is specified, displays the offset\n\
10736             for each pattern matched on the same line.  Byte offsets are exact\n\
10737             for ASCII, UTF-8, and raw binary input.  Otherwise, the byte offset\n\
10738             in the UTF-8 normalized input is displayed.\n\
10739     --binary-files=TYPE\n\
10740             Controls searching and reporting pattern matches in binary files.\n\
10741             TYPE can be `binary', `without-match`, `text`, `hex`, and\n\
10742             `with-hex'.  The default is `binary' to search binary files and to\n\
10743             report a match without displaying the match.  `without-match'\n\
10744             ignores binary matches.  `text' treats all binary files as text,\n\
10745             which might output binary garbage to the terminal, which can have\n\
10746             problematic consequences if the terminal driver interprets some of\n\
10747             it as commands.  `hex' reports all matches in hexadecimal.\n\
10748             `with-hex' only reports binary matches in hexadecimal, leaving text\n\
10749             matches alone.  A match is considered binary when matching a zero\n\
10750             byte or invalid UTF.  Short options are -a, -I, -U, -W, and -X.\n\
10751     --bool, -%\n\
10752             Specifies Boolean query patterns.  A Boolean query pattern is\n\
10753             composed of `AND', `OR', `NOT' operators and grouping with `(' `)'.\n\
10754             Spacing between subpatterns is the same as `AND', `|' is the same\n\
10755             as `OR', and a `-' is the same as `NOT'.  The `OR' operator binds\n\
10756             more tightly than `AND'.  For example, --bool 'A|B C|D' matches\n\
10757             lines with (`A' or `B') and (`C' or `D'), --bool 'A -B' matches\n\
10758             lines with `A' and not `B'.  Operators `AND', `OR', `NOT' require\n\
10759             proper spacing.  For example, --bool 'A OR B AND C OR D' matches\n\
10760             lines with (`A' or `B') and (`C' or `D'), --bool 'A AND NOT B'\n\
10761             matches lines with `A' without `B'.  Quoted subpatterns are matched\n\
10762             literally as strings.  For example, --bool 'A \"AND\"|\"OR\"' matches\n\
10763             lines with `A' and also either `AND' or `OR'.  Parenthesis are used\n\
10764             for grouping.  For example, --bool '(A B)|C' matches lines with `A'\n\
10765             and `B', or lines with `C'.  Note that all subpatterns in a Boolean\n\
10766             query pattern are regular expressions, unless option -F is used.\n\
10767             Options -E, -F, -G, -P, and -Z can be combined with --bool to match\n\
10768             subpatterns as strings or regular expressions (-E is the default.)\n\
10769             This option does not apply to -f FILE patterns.  Option --stats\n\
10770             displays the search patterns applied.  See also options --and,\n\
10771             --andnot, --not, --files, and --lines.\n\
10772     --break\n\
10773             Adds a line break between results from different files.\n\
10774     -C NUM, --context=NUM\n\
10775             Print NUM lines of leading and trailing context surrounding each\n\
10776             match.  Places a --group-separator between contiguous groups of\n\
10777             matches.  See also options -A, -B, and -y.\n\
10778     -c, --count\n\
10779             Only a count of selected lines is written to standard output.\n\
10780             If -o or -u is specified, counts the number of patterns matched.\n\
10781             If -v is specified, counts the number of non-matching lines.\n\
10782     --color[=WHEN], --colour[=WHEN]\n\
10783             Mark up the matching text with the expression stored in the\n\
10784             GREP_COLOR or GREP_COLORS environment variable.  WHEN can be\n\
10785             `never', `always', or `auto', where `auto' marks up matches only\n\
10786             when output on a terminal.  The default is `auto'.\n\
10787     --colors=COLORS, --colours=COLORS\n\
10788             Use COLORS to mark up text.  COLORS is a colon-separated list of\n\
10789             one or more parameters `sl=' (selected line), `cx=' (context line),\n\
10790             `mt=' (matched text), `ms=' (match selected), `mc=' (match\n\
10791             context), `fn=' (file name), `ln=' (line number), `cn=' (column\n\
10792             number), `bn=' (byte offset), `se=' (separator).  Parameter values\n\
10793             are ANSI SGR color codes or `k' (black), `r' (red), `g' (green),\n\
10794             `y' (yellow), `b' (blue), `m' (magenta), `c' (cyan), `w' (white).\n\
10795             Upper case specifies background colors.  A `+' qualifies a color as\n\
10796             bright.  A foreground and a background color may be combined with\n\
10797             font properties `n' (normal), `f' (faint), `h' (highlight), `i'\n\
10798             (invert), `u' (underline).  Parameter `hl' enables file name\n\
10799             hyperlinks.  Parameter `rv' reverses the `sl=' and `cx=' parameters\n\
10800             with option -v.  Selectively overrides GREP_COLORS.\n\
10801     --config[=FILE], ---[FILE]\n\
10802             Use configuration FILE.  The default FILE is `.ugrep'.  The working\n\
10803             directory is checked first for FILE, then the home directory.  The\n\
10804             options specified in the configuration FILE are parsed first,\n\
10805             followed by the remaining options specified on the command line.\n\
10806     --confirm\n\
10807             Confirm actions in -Q query mode.  The default is confirm.\n\
10808     --cpp\n\
10809             Output file matches in C++.  See also options --format and -u.\n\
10810     --csv\n\
10811             Output file matches in CSV.  If -H, -n, -k, or -b is specified,\n\
10812             additional values are output.  See also options --format and -u.\n\
10813     -D ACTION, --devices=ACTION\n\
10814             If an input file is a device, FIFO or socket, use ACTION to process\n\
10815             it.  By default, ACTION is `skip', which means that devices are\n\
10816             silently skipped.  If ACTION is `read', devices read just as if\n\
10817             they were ordinary files.\n\
10818     -d ACTION, --directories=ACTION\n\
10819             If an input file is a directory, use ACTION to process it.  By\n\
10820             default, ACTION is `skip', i.e., silently skip directories unless\n\
10821             specified on the command line.  If ACTION is `read', warn when\n\
10822             directories are read as input.  If ACTION is `recurse', read all\n\
10823             files under each directory, recursively, following symbolic links\n\
10824             only if they are on the command line.  This is equivalent to the -r\n\
10825             option.  If ACTION is `dereference-recurse', read all files under\n\
10826             each directory, recursively, following symbolic links.  This is\n\
10827             equivalent to the -R option.\n\
10828     --depth=[MIN,][MAX], -1, -2 ... -9, --10, --11 ...\n\
10829             Restrict recursive searches from MIN to MAX directory levels deep,\n\
10830             where -1 (--depth=1) searches the specified path without recursing\n\
10831             into subdirectories.  Note that -3 -5, -3-5, or -35 searches 3 to 5\n\
10832             levels deep.  Enables -R if -R or -r is not specified.\n\
10833     --dotall\n\
10834             Dot `.' in regular expressions matches anything, including newline.\n\
10835             Note that `.*' matches all input and should not be used.\n\
10836     -E, --extended-regexp\n\
10837             Interpret patterns as extended regular expressions (EREs). This is\n\
10838             the default.\n\
10839     -e PATTERN, --regexp=PATTERN\n\
10840             Specify a PATTERN used during the search of the input: an input\n\
10841             line is selected if it matches any of the specified patterns.\n\
10842             Note that longer patterns take precedence over shorter patterns.\n\
10843             This option is most useful when multiple -e options are used to\n\
10844             specify multiple patterns, when a pattern begins with a dash (`-'),\n\
10845             to specify a pattern after option -f or after the FILE arguments.\n\
10846     --encoding=ENCODING\n\
10847             The encoding format of the input, where ENCODING can be:";
10848   for (int i = 0; encoding_table[i].format != NULL; ++i)
10849     out << (i == 0 ? "" : ",") << (i % 4 ? " " : "\n            ") << "`" << encoding_table[i].format << "'";
10850   out << ".\n\
10851     --exclude=GLOB\n\
10852             Skip files whose name matches GLOB using wildcard matching, same as\n\
10853             -g ^GLOB.  GLOB can use **, *, ?, and [...] as wildcards, and \\ to\n\
10854             quote a wildcard or backslash character literally.  When GLOB\n\
10855             contains a `/', full pathnames are matched.  Otherwise basenames\n\
10856             are matched.  When GLOB ends with a `/', directories are excluded\n\
10857             as if --exclude-dir is specified.  Otherwise files are excluded.\n\
10858             Note that --exclude patterns take priority over --include patterns.\n\
10859             GLOB should be quoted to prevent shell globbing.  This option may\n\
10860             be repeated.\n\
10861     --exclude-dir=GLOB\n\
10862             Exclude directories whose name matches GLOB from recursive\n\
10863             searches, same as -g ^GLOB/.  GLOB can use **, *, ?, and [...] as\n\
10864             wildcards, and \\ to quote a wildcard or backslash character\n\
10865             literally.  When GLOB contains a `/', full pathnames are matched.\n\
10866             Otherwise basenames are matched.  Note that --exclude-dir patterns\n\
10867             take priority over --include-dir patterns.  GLOB should be quoted\n\
10868             to prevent shell globbing.  This option may be repeated.\n\
10869     --exclude-from=FILE\n\
10870             Read the globs from FILE and skip files and directories whose name\n\
10871             matches one or more globs.  A glob can use **, *, ?, and [...] as\n\
10872             wildcards, and \\ to quote a wildcard or backslash character\n\
10873             literally.  When a glob contains a `/', full pathnames are matched.\n\
10874             Otherwise basenames are matched.  When a glob ends with a `/',\n\
10875             directories are excluded as if --exclude-dir is specified.\n\
10876             Otherwise files are excluded.  A glob starting with a `!' overrides\n\
10877             previously-specified exclusions by including matching files.  Lines\n\
10878             starting with a `#' and empty lines in FILE are ignored.  When FILE\n\
10879             is a `-', standard input is read.  This option may be repeated.\n\
10880     --exclude-fs=MOUNTS\n\
10881             Exclude file systems specified by MOUNTS from recursive searches,\n\
10882             MOUNTS is a comma-separated list of mount points or pathnames of\n\
10883             directories on file systems.  Note that --exclude-fs mounts take\n\
10884             priority over --include-fs mounts.  This option may be repeated.\n"
10885 #ifndef HAVE_STATVFS
10886             "\
10887             This option is not available in this build configuration of ugrep.\n"
10888 #endif
10889             "\
10890     -F, --fixed-strings\n\
10891             Interpret pattern as a set of fixed strings, separated by newlines,\n\
10892             any of which is to be matched.  This makes ugrep behave as fgrep.\n\
10893             If a PATTERN is specified, or -e PATTERN or -N PATTERN, then this\n\
10894             option has no effect on -f FILE patterns to allow -f FILE patterns\n\
10895             to narrow or widen the scope of the PATTERN search.\n\
10896     -f FILE, --file=FILE\n\
10897             Read newline-separated patterns from FILE.  White space in patterns\n\
10898             is significant.  Empty lines in FILE are ignored.  If FILE does not\n\
10899             exist, the GREP_PATH environment variable is used as path to FILE.\n"
10900 #ifdef GREP_PATH
10901             "\
10902             If that fails, looks for FILE in " GREP_PATH ".\n"
10903 #endif
10904             "\
10905             When FILE is a `-', standard input is read.  Empty files contain no\n\
10906             patterns; thus nothing is matched.  This option may be repeated.\n"
10907 #ifndef OS_WIN
10908             "\
10909     --filter=COMMANDS\n\
10910             Filter files through the specified COMMANDS first before searching.\n\
10911             COMMANDS is a comma-separated list of `exts:command [option ...]',\n\
10912             where `exts' is a comma-separated list of filename extensions and\n\
10913             `command' is a filter utility.  The filter utility should read from\n\
10914             standard input and write to standard output.  Files matching one of\n\
10915             `exts' are filtered.  When `exts' is `*', files with non-matching\n\
10916             extensions are filtered.  One or more `option' separated by spacing\n\
10917             may be specified, which are passed verbatim to the command.  A `%'\n\
10918             as `option' expands into the pathname to search.  For example,\n\
10919             --filter='pdf:pdftotext % -' searches PDF files.  The `%' expands\n\
10920             into a `-' when searching standard input.  Option --label=.ext may\n\
10921             be used to specify extension `ext' when searching standard input.\n\
10922     --filter-magic-label=[+]LABEL:MAGIC\n\
10923             Associate LABEL with files whose signature \"magic bytes\" match the\n\
10924             MAGIC regex pattern.  Only files that have no filename extension\n\
10925             are labeled, unless +LABEL is specified.  When LABEL matches an\n\
10926             extension specified in --filter=COMMANDS, the corresponding command\n\
10927             is invoked.  This option may be repeated.\n"
10928 #endif
10929             "\
10930     --format=FORMAT\n\
10931             Output FORMAT-formatted matches.  For example --format='%f:%n:%O%~'\n\
10932             outputs matching lines `%O' with filename `%f` and line number `%n'\n\
10933             followed by a newline `%~'.  Context options -A, -B, -C, and -y are\n\
10934             ignored.  See `man ugrep' section FORMAT.\n\
10935     --free-space\n\
10936             Spacing (blanks and tabs) in regular expressions are ignored.\n\
10937     -G, --basic-regexp\n\
10938             Interpret pattern as a basic regular expression, i.e. make ugrep\n\
10939             behave as traditional grep.\n\
10940     -g GLOBS, --glob=GLOBS\n\
10941             Search only files whose name matches the specified comma-separated\n\
10942             list of GLOBS, same as --include='glob' for each `glob' in GLOBS.\n\
10943             When a `glob' is preceded by a `!' or a `^', skip files whose name\n\
10944             matches `glob', same as --exclude='glob'.  When `glob' contains a\n\
10945             `/', full pathnames are matched.  Otherwise basenames are matched.\n\
10946             When `glob' ends with a `/', directories are matched, same as\n\
10947             --include-dir='glob' and --exclude-dir='glob'.  A leading `/'\n\
10948             matches the working directory.  This option may be repeated and may\n\
10949             be combined with options -M, -O and -t to expand the recursive\n\
10950             search.\n\
10951     --group-separator[=SEP]\n\
10952             Use SEP as a group separator for context options -A, -B, and -C.\n\
10953             The default is a double hyphen (`--').\n\
10954     -H, --with-filename\n\
10955             Always print the filename with output lines.  This is the default\n\
10956             when there is more than one file to search.\n\
10957     -h, --no-filename\n\
10958             Never print filenames with output lines.  This is the default\n\
10959             when there is only one file (or only standard input) to search.\n\
10960     --heading, -+\n\
10961             Group matches per file.  Adds a heading and a line break between\n\
10962             results from different files.\n\
10963     --help [WHAT], -? [WHAT]\n\
10964             Display a help message, specifically on WHAT when specified.\n\
10965     --hexdump=[1-8][a][b][c][h]\n\
10966             Output matches in 1 to 8 columns of 8 hexadecimal octets.  The\n\
10967             default is 2 columns or 16 octets per line.  Option `a' outputs a\n\
10968             `*' for all hex lines that are identical to the previous hex line,\n\
10969             `b' removes all space breaks, `c' removes the character column and\n\
10970             `h' removes hex spacing.  Enables -X if -W or -X is not specified.\n\
10971     --hidden, -.\n\
10972             Search "
10973 #ifdef OS_WIN
10974             "Windows system and "
10975 #endif
10976             "hidden files and directories.\n\
10977     --hyperlink\n\
10978             Hyperlinks are enabled for file names when colors are enabled.\n\
10979             Same as --colors=hl.\n\
10980     -I, --ignore-binary\n\
10981             Ignore matches in binary files.  This option is equivalent to the\n\
10982             --binary-files=without-match option.\n\
10983     -i, --ignore-case\n\
10984             Perform case insensitive matching.  By default, ugrep is case\n\
10985             sensitive.  By default, this option applies to ASCII letters only.\n\
10986             Use options -P and -i for Unicode case insensitive matching.\n\
10987     --ignore-files[=FILE]\n\
10988             Ignore files and directories matching the globs in each FILE that\n\
10989             is encountered in recursive searches.  The default FILE is\n\
10990             `" DEFAULT_IGNORE_FILE "'.  Matching files and directories located in the\n\
10991             directory of a FILE's location and in directories below are ignored\n\
10992             by temporarily overriding the --exclude and --exclude-dir globs,\n\
10993             as if --exclude-from=FILE is locally enforced.  Globbing is the\n\
10994             same as --exclude-from=FILE and supports gitignore syntax, but\n\
10995             directories are not automatically excluded from searches (use a\n\
10996             glob ending with a `/' to identify directories to ignore, same as\n\
10997             git).  Files and directories explicitly specified as command line\n\
10998             arguments are never ignored.  This option may be repeated.\n\
10999     --include=GLOB\n\
11000             Search only files whose name matches GLOB using wildcard matching,\n\
11001             same as -g GLOB.  GLOB can use **, *, ?, and [...] as wildcards,\n\
11002             and \\ to quote a wildcard or backslash character literally.  When\n\
11003             GLOB contains a `/', full pathnames are matched.  Otherwise\n\
11004             basenames are matched.  When GLOB ends with a `/', directories are\n\
11005             included as if --include-dir is specified.  Otherwise files are\n\
11006             included.  Note that --exclude patterns take priority over\n\
11007             --include patterns.  GLOB should be quoted to prevent shell\n\
11008             globbing.  This option may be repeated.\n\
11009     --include-dir=GLOB\n\
11010             Only directories whose name matches GLOB are included in recursive\n\
11011             searches, same as -g GLOB/.  GLOB can use **, *, ?, and [...] as\n\
11012             wildcards, and \\ to quote a wildcard or backslash character\n\
11013             literally.  When GLOB contains a `/', full pathnames are matched.\n\
11014             Otherwise basenames are matched.  Note that --exclude-dir patterns\n\
11015             take priority over --include-dir patterns.  GLOB should be quoted\n\
11016             to prevent shell globbing.  This option may be repeated.\n\
11017     --include-from=FILE\n\
11018             Read the globs from FILE and search only files and directories\n\
11019             whose name matches one or more globs.  A glob can use **, *, ?, and\n\
11020             [...] as wildcards, and \\ to quote a wildcard or backslash\n\
11021             character literally.  When a glob contains a `/', full pathnames\n\
11022             are matched.  Otherwise basenames are matched.  When a glob ends\n\
11023             with a `/', directories are included as if --include-dir is\n\
11024             specified.  Otherwise files are included.  A glob starting with a\n\
11025             `!' overrides previously-specified inclusions by excluding matching\n\
11026             files.  Lines starting with a `#' and empty lines in FILE are\n\
11027             ignored.  When FILE is a `-', standard input is read.  This option\n\
11028             may be repeated.\n\
11029     --include-fs=MOUNTS\n\
11030             Only file systems specified by MOUNTS are included in recursive\n\
11031             searches.  MOUNTS is a comma-separated list of mount points or\n\
11032             pathnames of directories on file systems.  --include-fs=. restricts\n\
11033             recursive searches to the file system of the working directory\n\
11034             only.  Note that --exclude-fs mounts take priority over\n\
11035             --include-fs mounts.  This option may be repeated.\n"
11036 #ifndef HAVE_STATVFS
11037             "\
11038             This option is not available in this build configuration of ugrep.\n"
11039 #endif
11040             "\
11041     -J NUM, --jobs=NUM\n\
11042             Specifies the number of threads spawned to search files.  By\n\
11043             default an optimum number of threads is spawned to search files\n\
11044             simultaneously.  -J1 disables threading: files are searched in the\n\
11045             same order as specified.\n\
11046     -j, --smart-case\n\
11047             Perform case insensitive matching like option -i, unless a pattern\n\
11048             is specified with a literal ASCII upper case letter.\n\
11049     --json\n\
11050             Output file matches in JSON.  If -H, -n, -k, or -b is specified,\n\
11051             additional values are output.  See also options --format and -u.\n\
11052     -K FIRST[,LAST], --range=FIRST[,LAST]\n\
11053             Start searching at line FIRST, stop at line LAST when specified.\n\
11054     -k, --column-number\n\
11055             The column number of a matched pattern is displayed in front of the\n\
11056             respective matched line, starting at column 1.  Tabs are expanded\n\
11057             when columns are counted, see also option --tabs.\n\
11058     -L, --files-without-match\n\
11059             Only the names of files not containing selected lines are written\n\
11060             to standard output.  Pathnames are listed once per file searched.\n\
11061             If the standard input is searched, the string ``(standard input)''\n\
11062             is written.\n\
11063     -l, --files-with-matches\n\
11064             Only the names of files containing selected lines are written to\n\
11065             standard output.  ugrep will only search a file until a match has\n\
11066             been found, making searches potentially less expensive.  Pathnames\n\
11067             are listed once per file searched.  If the standard input is\n\
11068             searched, the string ``(standard input)'' is written.\n\
11069     --label=LABEL\n\
11070             Displays the LABEL value when input is read from standard input\n\
11071             where a file name would normally be printed in the output.\n\
11072             Associates a filename extension with standard input when LABEL has\n\
11073             a suffix.  The default value is `(standard input)'.\n\
11074     --line-buffered\n\
11075             Force output to be line buffered instead of block buffered.\n\
11076     --lines\n\
11077             Apply Boolean queries to match lines, the opposite of --files.\n\
11078             This is the default Boolean query mode to match specific lines.\n\
11079     -M MAGIC, --file-magic=MAGIC\n\
11080             Only files matching the signature pattern MAGIC are searched.  The\n\
11081             signature \"magic bytes\" at the start of a file are compared to\n\
11082             the MAGIC regex pattern.  When matching, the file will be searched.\n\
11083             When MAGIC is preceded by a `!' or a `^', skip files with matching\n\
11084             MAGIC signatures.  This option may be repeated and may be combined\n\
11085             with options -O and -t to expand the search.  Every file on the\n\
11086             search path is read, making searches potentially more expensive.\n\
11087     -m NUM, --max-count=NUM\n\
11088             Stop reading the input after NUM matches in each input file.\n\
11089     --match\n\
11090             Match all input.  Same as specifying an empty pattern to search.\n\
11091     --max-files=NUM\n\
11092             Restrict the number of files matched to NUM.  Note that --sort or\n\
11093             -J1 may be specified to produce replicable results.  If --sort is\n\
11094             specified, the number of threads spawned is limited to NUM.\n\
11095     --mmap[=MAX]\n\
11096             Use memory maps to search files.  By default, memory maps are used\n\
11097             under certain conditions to improve performance.  When MAX is\n\
11098             specified, use up to MAX mmap memory per thread.\n\
11099     -N PATTERN, --neg-regexp=PATTERN\n\
11100             Specify a negative PATTERN used during the search of the input:\n\
11101             an input line is selected only if it matches any of the specified\n\
11102             patterns unless a subpattern of PATTERN.  Same as -e (?^PATTERN).\n\
11103             Negative PATTERN matches are essentially removed before any other\n\
11104             patterns are matched.  Note that longer patterns take precedence\n\
11105             over shorter patterns.  This option may be repeated.\n\
11106     -n, --line-number\n\
11107             Each output line is preceded by its relative line number in the\n\
11108             file, starting at line 1.  The line number counter is reset for\n\
11109             each file processed.\n\
11110     --no-group-separator\n\
11111             Removes the group separator line from the output for context\n\
11112             options -A, -B, and -C.\n\
11113     --not [-e] PATTERN\n\
11114             Specifies that PATTERN should not match.  Note that -e A --not -e B\n\
11115             matches lines with `A' or lines without a `B'.  To match lines with\n\
11116             `A' that have no `B', specify -e A --andnot -e B.  Option --stats\n\
11117             displays the search patterns applied.  See also options --and,\n\
11118             --andnot, --bool, --files, and --lines.\n\
11119     -O EXTENSIONS, --file-extension=EXTENSIONS\n\
11120             Search only files whose filename extensions match the specified\n\
11121             comma-separated list of EXTENSIONS, same as --include='*.ext' for\n\
11122             each `ext' in EXTENSIONS.  When an `ext' is preceded by a `!' or a\n\
11123             `^', skip files whose filename extensions matches `ext', same as\n\
11124             --exclude='*.ext'.  This option may be repeated and may be combined\n\
11125             with options -g, -M and -t to expand the recursive search.\n\
11126     -o, --only-matching\n\
11127             Print only the matching part of lines.  When multiple lines match,\n\
11128             the line numbers with option -n are displayed using `|' as the\n\
11129             field separator for each additional line matched by the pattern.\n\
11130             If -u is specified, ungroups multiple matches on the same line.\n\
11131             This option cannot be combined with options -A, -B, -C, -v, and -y.\n\
11132     --only-line-number\n\
11133             The line number of the matching line in the file is output without\n\
11134             displaying the match.  The line number counter is reset for each\n\
11135             file processed.\n\
11136     --files\n\
11137             Apply Boolean queries to match files, the opposite of --lines.  A\n\
11138             file matches if all Boolean conditions are satisfied by the lines\n\
11139             matched in the file.  For example, --files -e A --and -e B -e C\n\
11140             --andnot -e D matches a file if some lines match `A' and some lines\n\
11141             match (`B' or `C') and no line in the file matches `D'.  May also\n\
11142             be specified as --files --bool 'A B|C -D'.  Option -v cannot be\n\
11143             specified with --files.  See also options --and, --andnot, --not,\n\
11144             --bool and --lines.\n\
11145     -P, --perl-regexp\n\
11146             Interpret PATTERN as a Perl regular expression"
11147 #if defined(HAVE_PCRE2)
11148             " using PCRE2.\n"
11149 #elif defined(HAVE_BOOST_REGEX)
11150             " using Boost.Regex.\n"
11151 #else
11152             ".\n\
11153             This option is not available in this build configuration of ugrep.\n"
11154 #endif
11155             "\
11156             Note that Perl pattern matching differs from the default grep POSIX\n\
11157             pattern matching.\n\
11158     -p, --no-dereference\n\
11159             If -R or -r is specified, no symbolic links are followed, even when\n\
11160             they are specified on the command line.\n\
11161     --pager[=COMMAND]\n\
11162             When output is sent to the terminal, uses COMMAND to page through\n\
11163             the output.  The default COMMAND is `" DEFAULT_PAGER_COMMAND "'.  Enables --heading\n\
11164             and --line-buffered.\n\
11165     --pretty\n\
11166             When output is sent to a terminal, enables --color, --heading, -n,\n\
11167             --sort and -T when not explicitly disabled or set.\n\
11168     -Q[DELAY], --query[=DELAY]\n\
11169             Query mode: user interface to perform interactive searches.  This\n\
11170             mode requires an ANSI capable terminal.  An optional DELAY argument\n\
11171             may be specified to reduce or increase the response time to execute\n\
11172             searches after the last key press, in increments of 100ms, where\n\
11173             the default is 5 (0.5s delay).  No whitespace may be given between\n\
11174             -Q and its argument DELAY.  Initial patterns may be specified with\n\
11175             -e PATTERN, i.e. a PATTERN argument requires option -e.  Press F1\n\
11176             or CTRL-Z to view the help screen.  Press F2 or CTRL-Y to invoke a\n\
11177             command to view or edit the file shown at the top of the screen.\n\
11178             The command can be specified with option --view, or defaults to\n\
11179             environment variable PAGER if defined, or EDITOR.  Press Tab and\n\
11180             Shift-Tab to navigate directories and to select a file to search.\n\
11181             Press Enter to select lines to output.  Press ALT-l for option -l\n\
11182             to list files, ALT-n for -n, etc.  Non-option commands include\n\
11183             ALT-] to increase fuzziness and ALT-} to increase context.  Enables\n\
11184             --heading.  See also options --confirm and --view.\n\
11185     -q, --quiet, --silent\n\
11186             Quiet mode: suppress all output.  ugrep will only search until a\n\
11187             match has been found.\n\
11188     -R, --dereference-recursive\n\
11189             Recursively read all files under each directory.  Follow all\n\
11190             symbolic links, unlike -r.  When -J1 is specified, files are\n\
11191             searched in the same order as specified.  Note that when no FILE\n\
11192             arguments are specified and input is read from a terminal,\n\
11193             recursive searches are performed as if -R is specified.\n\
11194     -r, --recursive\n\
11195             Recursively read all files under each directory, following symbolic\n\
11196             links only if they are on the command line.  When -J1 is specified,\n\
11197             files are searched in the same order as specified.\n\
11198     -S, --dereference\n\
11199             If -r is specified, all symbolic links are followed, like -R.  The\n\
11200             default is not to follow symbolic links.\n\
11201     -s, --no-messages\n\
11202             Silent mode: nonexistent and unreadable files are ignored, i.e.\n\
11203             their error messages are suppressed.\n\
11204     --save-config[=FILE]\n\
11205             Save configuration FILE.  By default `.ugrep' is saved.  If FILE is\n\
11206             a `-', write the configuration to standard output.\n\
11207     --separator[=SEP]\n\
11208             Use SEP as field separator between file name, line number, column\n\
11209             number, byte offset, and the matched line.  The default is a colon\n\
11210             (`:').\n\
11211     --sort[=KEY]\n\
11212             Displays matching files in the order specified by KEY in recursive\n\
11213             searches.  KEY can be `name' to sort by pathname (default), `best'\n\
11214             to sort by best match with option -Z (sort by best match requires\n\
11215             two passes over the input files), `size' to sort by file size,\n\
11216             `used' to sort by last access time, `changed' to sort by last\n\
11217             modification time, and `created' to sort by creation time.  Sorting\n\
11218             is reversed with `rname', `rbest', `rsize', `rused', `rchanged', or\n\
11219             `rcreated'.  Archive contents are not sorted.  Subdirectories are\n\
11220             sorted and displayed after matching files.  FILE arguments are\n\
11221             searched in the same order as specified.  Normally ugrep displays\n\
11222             matches in no particular order to improve performance.\n\
11223     --stats\n\
11224             Output statistics on the number of files and directories searched,\n\
11225             and the inclusion and exclusion constraints applied.\n\
11226     -T, --initial-tab\n\
11227             Add a tab space to separate the file name, line number, column\n\
11228             number, and byte offset with the matched line.\n\
11229     -t TYPES, --file-type=TYPES\n\
11230             Search only files associated with TYPES, a comma-separated list of\n\
11231             file types.  Each file type corresponds to a set of filename\n\
11232             extensions passed to option -O and filenames passed to option -g.\n\
11233             For capitalized file types, the search is expanded to include files\n\
11234             with matching file signature magic bytes, as if passed to option\n\
11235             -M.  When a type is preceded by a `!' or a `^', excludes files of\n\
11236             the specified type.  This option may be repeated.  The possible\n\
11237             file types can be (where -tlist displays a detailed list):";
11238   for (int i = 0; type_table[i].type != NULL; ++i)
11239     out << (i == 0 ? "" : ",") << (i % 7 ? " " : "\n            ") << "`" << type_table[i].type << "'";
11240   out << ".\n\
11241     --tabs[=NUM]\n\
11242             Set the tab size to NUM to expand tabs for option -k.  The value of\n\
11243             NUM may be 1, 2, 4, or 8.  The default tab size is 8.\n\
11244     --tag[=TAG[,END]]\n\
11245             Disables colors to mark up matches with TAG.  END marks the end of\n\
11246             a match if specified, otherwise TAG.  The default is `___'.\n\
11247     -U, --binary\n\
11248             Disables Unicode matching for binary file matching, forcing PATTERN\n\
11249             to match bytes, not Unicode characters.  For example, -U '\\xa3'\n\
11250             matches byte A3 (hex) instead of the Unicode code point U+00A3\n\
11251             represented by the UTF-8 sequence C2 A3.  See also option --dotall.\n\
11252     -u, --ungroup\n\
11253             Do not group multiple pattern matches on the same matched line.\n\
11254             Output the matched line again for each additional pattern match,\n\
11255             using `+' as the field separator.\n\
11256     -V, --version\n\
11257             Display version information and exit.\n\
11258     -v, --invert-match\n\
11259             Selected lines are those not matching any of the specified\n\
11260             patterns.\n\
11261     --view[=COMMAND]\n\
11262             Use COMMAND to view/edit a file in query mode when pressing CTRL-Y.\n\
11263     -W, --with-hex\n\
11264             Output binary matches in hexadecimal, leaving text matches alone.\n\
11265             This option is equivalent to the --binary-files=with-hex option.\n\
11266     -w, --word-regexp\n\
11267             The PATTERN is searched for as a word, such that the matching text\n\
11268             is preceded by a non-word character and is followed by a non-word\n\
11269             character.  Word characters are letters, digits, and the\n\
11270             underscore.  With option -P, word characters are Unicode letters,\n\
11271             digits, and underscore.  This option has no effect if -x is also\n\
11272             specified.  If a PATTERN is specified, or -e PATTERN or -N PATTERN,\n\
11273             then this option has no effect on -f FILE patterns to allow -f FILE\n\
11274             patterns to narrow or widen the scope of the PATTERN search.\n\
11275     -X, --hex\n\
11276             Output matches in hexadecimal.  This option is equivalent to the\n\
11277             --binary-files=hex option.  See also option --hexdump.\n\
11278     -x, --line-regexp\n\
11279             Select only those matches that exactly match the whole line, as if\n\
11280             the patterns are surrounded by ^ and $.  If a PATTERN is specified,\n\
11281             or -e PATTERN or -N PATTERN, then this option has no effect on\n\
11282             -f FILE patterns to allow -f FILE patterns to narrow or widen the\n\
11283             scope of the PATTERN search.\n\
11284     --xml\n\
11285             Output file matches in XML.  If -H, -n, -k, or -b is specified,\n\
11286             additional values are output.  See also options --format and -u.\n\
11287     -Y, --empty\n\
11288             Permits empty matches.  By default, empty matches are disabled,\n\
11289             unless a pattern begins with `^' or ends with `$'.  With this\n\
11290             option, empty-matching patterns such as x? and x*, match all input,\n\
11291             not only lines containing the character `x'.\n\
11292     -y, --any-line\n\
11293             Any matching or non-matching line is output.  Non-matching lines\n\
11294             are output with the `-' separator as context of the matching lines.\n\
11295             See also options -A, -B, and -C.\n\
11296     -Z[[+-~]MAX], --fuzzy[=[+-~]MAX]\n\
11297             Fuzzy mode: report approximate pattern matches within MAX errors.\n\
11298             By default, MAX is 1: one deletion, insertion or substitution is\n\
11299             allowed.  When `+' and/or `-' precede MAX, only insertions and/or\n\
11300             deletions are allowed, respectively.  When `~' precedes MAX,\n\
11301             substitution counts as one error.  For example, -Z+~3 allows up to\n\
11302             three insertions or substitutions, but no deletions.  The first\n\
11303             character of an approximate match always matches the begin of a\n\
11304             pattern.  Option --sort=best orders matching files by best match.\n\
11305             No whitespace may be given between -Z and its argument.\n\
11306     -z, --decompress\n\
11307             Decompress files to search, when compressed.  Archives (.cpio,\n\
11308             .pax, .tar and .zip) and compressed archives (e.g. .taz, .tgz,\n\
11309             .tpz, .tbz, .tbz2, .tb2, .tz2, .tlz, .txz, .tzst) are searched and\n\
11310             matching pathnames of files in archives are output in braces.  If\n\
11311             -g, -O, -M, or -t is specified, searches files within archives\n\
11312             whose name matches globs, matches file name extensions, matches\n\
11313             file signature magic bytes, or matches file types, respectively.\n"
11314 #ifndef HAVE_LIBZ
11315             "\
11316             This option is not available in this build configuration of ugrep.\n"
11317 #else
11318             "\
11319             Supported compression formats: gzip (.gz), compress (.Z), zip"
11320 #ifdef HAVE_LIBBZ2
11321             ",\n\
11322             bzip2 (requires suffix .bz, .bz2, .bzip2, .tbz, .tbz2, .tb2, .tz2)"
11323 #endif
11324 #ifdef HAVE_LIBLZMA
11325             ",\n\
11326             lzma and xz (requires suffix .lzma, .tlz, .xz, .txz)"
11327 #endif
11328 #ifdef HAVE_LIBLZ4
11329             ",\n\
11330             lz4 (requires suffix .lz4)"
11331 #endif
11332 #ifdef HAVE_LIBZSTD
11333             ",\n\
11334             zstd (requires suffix .zst, .zstd, .tzst)"
11335 #endif
11336             ".\n"
11337 #endif
11338             "\
11339     -0, --null\n\
11340             Prints a zero-byte (NUL) after the file name.  This option can be\n\
11341             used with commands such as `find -print0' and `xargs -0' to process\n\
11342             arbitrary file names.\n\
11343 \n\
11344     Long options may start with `--no-' to disable, when applicable.\n\
11345 \n\
11346     The ugrep utility exits with one of the following values:\n\
11347     0       One or more lines were selected.\n\
11348     1       No lines were selected.\n\
11349     >1      An error occurred.\n\
11350 \n\
11351     If -q or --quiet or --silent is used and a line is selected, the exit\n\
11352     status is 0 even if an error occurred.\n\n";
11353 }
11354 
11355 // display helpful information for WHAT, if specified, and exit
help(const char * what)11356 void help(const char *what)
11357 {
11358   if (what == NULL)
11359   {
11360     help(std::cout);
11361   }
11362   else
11363   {
11364     if (*what == '=')
11365       ++what;
11366 
11367     if (strncmp(what, "--no", 4) == 0)
11368       what += 4;
11369 
11370     if (*what == '\0')
11371     {
11372       help(std::cout);
11373     }
11374     else
11375     {
11376       std::stringstream text;
11377       help(text);
11378       const std::string& str = text.str();
11379 
11380       int found = 0;
11381 
11382       for (int pass = 0; pass < 2; ++pass)
11383       {
11384         size_t pos = 0;
11385 
11386         while (true)
11387         {
11388           size_t end = str.find("\n    -", pos + 1);
11389 
11390           if (end == std::string::npos)
11391             end = str.find("\n\n", pos + 1);
11392 
11393           if (end == std::string::npos)
11394             break;
11395 
11396           size_t nl = str.find('\n', pos + 1);
11397 
11398           // roughly find a case-independent match of WHAT
11399           for (size_t i = pos + 5; i < (pass == 0 ? nl : end); ++i)
11400           {
11401             size_t j = 0;
11402 
11403             for (j = 0; what[j] != '\0'; ++j)
11404               if (((what[j] ^ str.at(i + j)) & ~0x20) != 0)
11405                 break;
11406 
11407             if (what[j] == '\0')
11408             {
11409               if (pass == 0 ? i < nl: i > nl)
11410               {
11411                 if (found == 0 && pass == 0)
11412                   std::cout << "\nOptions and arguments:\n";
11413                 else if (found == 1 && pass == 1)
11414                   std::cout << "\n\nOther options:\n";
11415                 else if (found == 0)
11416                   std::cout << "\nNo matching option, other relevant options:\n";
11417 
11418                 std::cout << str.substr(pos, end - pos);
11419                 found = pass + 1;
11420               }
11421               break;
11422             }
11423           }
11424 
11425           pos = end;
11426         }
11427       }
11428 
11429       if (found == 0)
11430         std::cout << "ugrep --help: nothing appropriate for " << what;
11431 
11432       std::cout << "\n\n";
11433     }
11434   }
11435 
11436   exit(EXIT_ERROR);
11437 }
11438 
11439 // display version info
version()11440 void version()
11441 {
11442 #if defined(HAVE_PCRE2)
11443   uint32_t tmp = 0;
11444 #endif
11445   std::cout << "ugrep " UGREP_VERSION " " PLATFORM <<
11446 #if defined(HAVE_AVX512BW)
11447     (reflex::have_HW_AVX512BW() ? " +avx512" : (reflex::have_HW_AVX2() ? " +avx2" : reflex::have_HW_SSE2() ?  " +sse2" : " (no sse2!)")) <<
11448 #elif defined(HAVE_AVX2)
11449     (reflex::have_HW_AVX2() ? " +avx2" : reflex::have_HW_SSE2() ?  " +sse2" : " (no sse2!)") <<
11450 #elif defined(HAVE_SSE2)
11451     (reflex::have_HW_SSE2() ?  " +sse2" : " (no sse2!)") <<
11452 #elif defined(HAVE_NEON)
11453     " +neon" <<
11454 #endif
11455 #if defined(HAVE_PCRE2)
11456     (pcre2_config(PCRE2_CONFIG_JIT, &tmp) >= 0 && tmp != 0 ? " +pcre2_jit" : " +pcre2") <<
11457 #elif defined(HAVE_BOOST_REGEX)
11458     " +boost_regex" <<
11459 #endif
11460 #ifdef HAVE_LIBZ
11461     " +zlib" <<
11462 #endif
11463 #ifdef HAVE_LIBBZ2
11464     " +bzip2" <<
11465 #endif
11466 #ifdef HAVE_LIBLZMA
11467     " +lzma" <<
11468 #endif
11469 #ifdef HAVE_LIBLZ4
11470     " +lz4" <<
11471 #endif
11472 #ifdef HAVE_LIBZSTD
11473     " +zstd" <<
11474 #endif
11475     "\n"
11476     "License BSD-3-Clause: <https://opensource.org/licenses/BSD-3-Clause>\n"
11477     "Written by Robert van Engelen and others: <https://github.com/Genivia/ugrep>" << std::endl;
11478   exit(EXIT_OK);
11479 }
11480 
11481 // print to standard error: ... is a directory if -q is not specified
is_directory(const char * pathname)11482 void is_directory(const char *pathname)
11483 {
11484   if (!flag_no_messages)
11485     fprintf(stderr, "%sugrep: %s%s%s is a directory\n", color_off, color_high, pathname, color_off);
11486 }
11487 
11488 #ifdef HAVE_LIBZ
11489 // print to standard error: cannot decompress message if -q is not specified
cannot_decompress(const char * pathname,const char * message)11490 void cannot_decompress(const char *pathname, const char *message)
11491 {
11492   if (!flag_no_messages)
11493   {
11494     fprintf(stderr, "%sugrep: %swarning:%s %scannot decompress %s:%s %s%s%s\n", color_off, color_warning, color_off, color_high, pathname, color_off, color_message, message ? message : "", color_off);
11495     ++warnings;
11496   }
11497 }
11498 #endif
11499 
11500 // print to standard error: warning message if -q is not specified, assumes errno is set, like perror()
warning(const char * message,const char * arg)11501 void warning(const char *message, const char *arg)
11502 {
11503   if (!flag_no_messages)
11504   {
11505     // use safe strerror_s() instead of strerror() when available
11506 #if defined(__STDC_LIB_EXT1__) || defined(OS_WIN)
11507     char errmsg[256];
11508     strerror_s(errmsg, sizeof(errmsg), errno);
11509 #else
11510     const char *errmsg = strerror(errno);
11511 #endif
11512     fprintf(stderr, "%sugrep: %swarning:%s %s%s%s%s:%s %s%s%s\n", color_off, color_warning, color_off, color_high, message ? message : "", message ? " " : "", arg ? arg : "", color_off, color_message, errmsg, color_off);
11513     ++warnings;
11514   }
11515 }
11516 
11517 // print to standard error: error message, assumes errno is set, like perror(), then exit
error(const char * message,const char * arg)11518 void error(const char *message, const char *arg)
11519 {
11520   // use safe strerror_s() instead of strerror() when available
11521 #if defined(__STDC_LIB_EXT1__) || defined(OS_WIN)
11522   char errmsg[256];
11523   strerror_s(errmsg, sizeof(errmsg), errno);
11524 #else
11525   const char *errmsg = strerror(errno);
11526 #endif
11527   fprintf(stderr, "%sugrep: %serror:%s %s%s%s%s:%s %s%s%s\n\n", color_off, color_error, color_off, color_high, message ? message : "", message ? " " : "", arg ? arg : "", color_off, color_message, errmsg, color_off);
11528   exit(EXIT_ERROR);
11529 }
11530 
11531 // print to standard error: abort message with exception details, then exit
abort(const char * message)11532 void abort(const char *message)
11533 {
11534   fprintf(stderr, "%sugrep: %s%s%s\n\n", color_off, color_error, message, color_off);
11535   exit(EXIT_ERROR);
11536 }
11537 
11538 // print to standard error: abort message with exception details, then exit
abort(const char * message,const std::string & what)11539 void abort(const char *message, const std::string& what)
11540 {
11541   fprintf(stderr, "%sugrep: %s%s%s%s%s%s\n\n", color_off, color_error, message ? message : "", color_off, color_high, what.c_str(), color_off);
11542   exit(EXIT_ERROR);
11543 }
11544