1 /******************************************************************************\
2 * Copyright (c) 2019, Robert van Engelen, Genivia Inc. All rights reserved. *
3 * *
4 * Redistribution and use in source and binary forms, with or without *
5 * modification, are permitted provided that the following conditions are met: *
6 * *
7 * (1) Redistributions of source code must retain the above copyright notice, *
8 * this list of conditions and the following disclaimer. *
9 * *
10 * (2) Redistributions in binary form must reproduce the above copyright *
11 * notice, this list of conditions and the following disclaimer in the *
12 * documentation and/or other materials provided with the distribution. *
13 * *
14 * (3) The name of the author may not be used to endorse or promote products *
15 * derived from this software without specific prior written permission. *
16 * *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED *
18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF *
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO *
20 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, *
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; *
23 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, *
24 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR *
25 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF *
26 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
27 \******************************************************************************/
28
29 /**
30 @file ugrep.cpp
31 @brief a pattern search utility written in C++11
32 @author Robert van Engelen - engelen@genivia.com
33 @copyright (c) 2019-2021, Robert van Engelen, Genivia Inc. All rights reserved.
34 @copyright (c) BSD-3 License - see LICENSE.txt
35
36 For download and installation instructions:
37
38 https://github.com/Genivia/ugrep
39
40 This program uses RE/flex:
41
42 https://github.com/Genivia/RE-flex
43
44 Optional libraries to support options -P and -z:
45
46 -P: PCRE2 or Boost.Regex
47 -z: zlib (.gz)
48 -z: libbz2 (.bz, bz2, .bzip2)
49 -z: liblzma (.lzma, .xz)
50 -z: liblz4 (.lz4)
51 -z: libzstd (.zst, .zstd)
52
53 Build ugrep as follows:
54
55 $ ./configure --enable-colors
56 $ make -j
57
58 Git does not preserve time stamps so ./configure may fail, in that case do:
59
60 $ autoreconf -fi
61 $ ./configure --enable-colors
62 $ make -j
63
64 After this, you may want to test ugrep and install it (optional):
65
66 $ make test
67 $ sudo make install
68
69 */
70
71 #include "ugrep.hpp"
72 #include "glob.hpp"
73 #include "mmap.hpp"
74 #include "output.hpp"
75 #include "query.hpp"
76 #include "stats.hpp"
77 #include <reflex/matcher.h>
78 #include <reflex/fuzzymatcher.h>
79 #include <iomanip>
80 #include <cctype>
81 #include <limits>
82 #include <functional>
83 #include <list>
84 #include <deque>
85 #include <thread>
86 #include <memory>
87 #include <mutex>
88 #include <condition_variable>
89 #include <chrono>
90 #include <sstream>
91
92 #ifdef OS_WIN
93
94 // compiling for a windows OS, except Cygwin and MinGW
95
96 // optionally enable --color=auto by default
97 // #define WITH_COLOR
98
99 // optionally enable PCRE2 for -P
100 // #define HAVE_PCRE2
101
102 // optionally enable Boost.Regex for -P
103 // #define HAVE_BOOST_REGEX
104
105 // optionally enable zlib for -z
106 // #define HAVE_LIBZ
107
108 // optionally enable libbz2 for -z
109 // #define HAVE_LIBBZ2
110
111 // optionally enable liblzma for -z
112 // #define HAVE_LIBLZMA
113
114 // optionally enable liblz4 for -z
115 // #define HAVE_LIBLZ4
116
117 // optionally enable libzstd for -z
118 // #define HAVE_LIBZSTD
119
120 #include <stringapiset.h>
121 #include <direct.h>
122
123 #else
124
125 // not compiling for a windows OS
126
127 #include <signal.h>
128 #include <dirent.h>
129 #include <sys/select.h>
130 #include <sys/stat.h>
131 #include <unistd.h>
132
133 #endif
134
135 // use PCRE2 for option -P
136 #ifdef HAVE_PCRE2
137 # include <reflex/pcre2matcher.h>
138 #else
139 // use Boost.Regex for option -P
140 # ifdef HAVE_BOOST_REGEX
141 # include <reflex/boostmatcher.h>
142 # endif
143 #endif
144
145 // optional: specify an optimal decompression block size, default is 65536, must be larger than 1024 for tar extraction
146 // #define Z_BUF_LEN 16384
147 // #define Z_BUF_LEN 32768
148
149 // use zlib, libbz2, liblzma for option -z
150 #ifdef HAVE_LIBZ
151 # include "zstream.hpp"
152 #endif
153
154 // ugrep exit codes
155 #define EXIT_OK 0 // One or more lines were selected
156 #define EXIT_FAIL 1 // No lines were selected
157 #define EXIT_ERROR 2 // An error occurred
158
159 // limit the total number of threads spawn (i.e. limit spawn overhead), because grepping is practically IO bound
160 #ifndef MAX_JOBS
161 # define MAX_JOBS 16U
162 #endif
163
164 // limit the job queue size to wait to give the worker threads some slack
165 #ifndef MAX_JOB_QUEUE_SIZE
166 # define MAX_JOB_QUEUE_SIZE 65536
167 #endif
168
169 // a hard limit on the recursive search depth
170 #ifndef MAX_DEPTH
171 # define MAX_DEPTH 100
172 #endif
173
174 // --min-steal default, the minimum co-worker's queue size of pending jobs to steal a job from, smaller values result in higher job stealing rates, should not be less than 3
175 #ifndef MIN_STEAL
176 # define MIN_STEAL 3U
177 #endif
178
179 // use dirent d_type when available to improve performance
180 #ifdef HAVE_STRUCT_DIRENT_D_TYPE
181 # define DIRENT_TYPE_UNKNOWN DT_UNKNOWN
182 # define DIRENT_TYPE_LNK DT_LNK
183 # define DIRENT_TYPE_DIR DT_DIR
184 # define DIRENT_TYPE_REG DT_REG
185 #else
186 # define DIRENT_TYPE_UNKNOWN 0
187 # define DIRENT_TYPE_LNK 1
188 # define DIRENT_TYPE_DIR 1
189 # define DIRENT_TYPE_REG 1
190 #endif
191
192 // the -M MAGIC pattern DFA constructed before threads start, read-only afterwards
193 reflex::Pattern magic_pattern; // concurrent access is thread safe
194 reflex::Matcher magic_matcher; // concurrent access is not thread safe
195
196 // the --filter-magic-label pattern DFA
197 reflex::Pattern filter_magic_pattern; // concurrent access is thread safe
198
199 // TTY detected
200 bool tty_term = false;
201
202 // color term detected
203 bool color_term = false;
204
205 #ifdef OS_WIN
206
207 // CTRL-C handler
sigint(DWORD signal)208 BOOL WINAPI sigint(DWORD signal)
209 {
210 if (signal == CTRL_C_EVENT || signal == CTRL_BREAK_EVENT)
211 {
212 // be nice, reset colors on interrupt when sending output to a color TTY
213 if (color_term)
214 color_term = write(1, "\033[m", 3) > 0; // appease -Wunused-result
215 }
216
217 // return FALSE to invoke the next handler (when applicable) or just exit
218 return FALSE;
219 }
220
221 #else
222
223 // SIGINT and SIGTERM handler
sigint(int sig)224 static void sigint(int sig)
225 {
226 // reset to the default handler
227 signal(sig, SIG_DFL);
228
229 // be nice, reset colors on interrupt when sending output to a color TTY
230 if (color_term)
231 color_term = write(1, "\033[m", 3) > 0; // appease -Wunused-result
232
233 // signal again
234 kill(getpid(), sig);
235 }
236
237 #endif
238
239 // full home directory path
240 const char *home_dir = NULL;
241
242 // ANSI SGR substrings extracted from GREP_COLORS
243 char color_sl[COLORLEN]; // selected line
244 char color_cx[COLORLEN]; // context line
245 char color_mt[COLORLEN]; // matched text in any matched line
246 char color_ms[COLORLEN]; // matched text in a selected line
247 char color_mc[COLORLEN]; // matched text in a context line
248 char color_fn[COLORLEN]; // file name
249 char color_ln[COLORLEN]; // line number
250 char color_cn[COLORLEN]; // column number
251 char color_bn[COLORLEN]; // byte offset
252 char color_se[COLORLEN]; // separator
253
254 char match_ms[COLORLEN]; // --match or --tag: matched text in a selected line
255 char match_mc[COLORLEN]; // --match or --tag: matched text in a context line
256 char match_off[COLORLEN]; // --match or --tag: off
257
258 std::string color_wd; // hyperlink working directory path
259
260 const char *color_hl = NULL; // hyperlink
261 const char *color_st = NULL; // ST
262
263 const char *color_del = ""; // erase line after the cursor
264 const char *color_off = ""; // disable colors
265
266 const char *color_high = ""; // stderr highlighted text
267 const char *color_error = ""; // stderr error text
268 const char *color_warning = ""; // stderr warning text
269 const char *color_message = ""; // stderr error or warning message text
270
271 // number of concurrent threads for workers
272 size_t threads;
273
274 // number of warnings given
275 std::atomic_size_t warnings;
276
277 // redirectable source is standard input by default or a pipe
278 FILE *source = stdin;
279
280 // redirectable output destination is standard output by default or a pipe
281 FILE *output = stdout;
282
283 // Grep object handle, to cancel the search with cancel_ugrep()
284 struct Grep *grep_handle = NULL;
285
286 std::mutex grep_handle_mutex;
287
288 // set/clear the handle to be able to use cancel_ugrep()
289 void set_grep_handle(struct Grep*);
290 void clear_grep_handle();
291
292 #ifndef OS_WIN
293
294 // output file stat is available when stat() result is true
295 bool output_stat_result = false;
296 bool output_stat_regular = false;
297 struct stat output_stat;
298
299 // container of inodes to detect directory cycles when symlinks are traversed with --dereference
300 std::set<ino_t> visited;
301
302 #ifdef HAVE_STATVFS
303 // containers of file system ids to exclude from recursive searches or include in recursive searches
304 std::set<uint64_t> exclude_fs_ids, include_fs_ids;
305 #endif
306
307 #endif
308
309 // ugrep command-line options
310 bool flag_all_threads = false;
311 bool flag_any_line = false;
312 bool flag_basic_regexp = false;
313 bool flag_bool = false;
314 bool flag_confirm = DEFAULT_CONFIRM;
315 bool flag_count = false;
316 bool flag_cpp = false;
317 bool flag_csv = false;
318 bool flag_decompress = false;
319 bool flag_dereference = false;
320 bool flag_files = false;
321 bool flag_files_with_matches = false;
322 bool flag_files_without_match = false;
323 bool flag_fixed_strings = false;
324 bool flag_hex_ast = false;
325 bool flag_hex_cbr = true;
326 bool flag_hex_chr = true;
327 bool flag_hex_hbr = true;
328 bool flag_hidden = DEFAULT_HIDDEN;
329 bool flag_invert_match = false;
330 bool flag_json = false;
331 bool flag_line_buffered = false;
332 bool flag_line_regexp = false;
333 bool flag_match = false;
334 bool flag_no_dereference = false;
335 bool flag_no_header = false;
336 bool flag_no_messages = false;
337 bool flag_not = false;
338 bool flag_null = false;
339 bool flag_only_line_number = false;
340 bool flag_only_matching = false;
341 bool flag_perl_regexp = false;
342 bool flag_pretty = DEFAULT_PRETTY;
343 bool flag_quiet = false;
344 bool flag_sort_rev = false;
345 bool flag_stdin = false;
346 bool flag_usage_warnings = false;
347 bool flag_word_regexp = false;
348 bool flag_xml = false;
349 bool flag_hex = false;
350 bool flag_with_hex = false;
351 bool flag_no_filename = false;
352 bool flag_with_filename = false;
353 Flag flag_binary;
354 Flag flag_binary_without_match;
355 Flag flag_break;
356 Flag flag_byte_offset;
357 Flag flag_column_number;
358 Flag flag_empty;
359 Flag flag_dotall;
360 Flag flag_free_space;
361 Flag flag_heading;
362 Flag flag_ignore_case;
363 Flag flag_initial_tab;
364 Flag flag_line_number;
365 Flag flag_smart_case;
366 Flag flag_text;
367 Flag flag_ungroup;
368 Sort flag_sort_key = Sort::NA;
369 Action flag_devices_action = Action::SKIP;
370 Action flag_directories_action = Action::SKIP;
371 size_t flag_after_context = 0;
372 size_t flag_before_context = 0;
373 size_t flag_fuzzy = 0;
374 size_t flag_hex_columns = 16;
375 size_t flag_jobs = 0;
376 size_t flag_max_count = 0;
377 size_t flag_max_depth = 0;
378 size_t flag_max_files = 0;
379 size_t flag_max_line = 0;
380 size_t flag_max_mmap = DEFAULT_MAX_MMAP_SIZE;
381 size_t flag_min_depth = 0;
382 size_t flag_min_line = 0;
383 size_t flag_min_magic = 1;
384 size_t flag_min_steal = MIN_STEAL;
385 size_t flag_not_magic = 0;
386 size_t flag_query = 0;
387 size_t flag_tabs = DEFAULT_TABS;
388 const char *flag_apply_color = NULL;
389 const char *flag_binary_files = "binary";
390 const char *flag_color = DEFAULT_COLOR;
391 const char *flag_colors = NULL;
392 const char *flag_config = NULL;
393 const char *flag_devices = "skip";
394 const char *flag_directories = "skip";
395 const char *flag_encoding = NULL;
396 const char *flag_filter = NULL;
397 const char *flag_format = NULL;
398 const char *flag_format_begin = NULL;
399 const char *flag_format_close = NULL;
400 const char *flag_format_end = NULL;
401 const char *flag_format_open = NULL;
402 const char *flag_group_separator = "--";
403 const char *flag_hexdump = NULL;
404 const char *flag_label = "(standard input)";
405 const char *flag_pager = DEFAULT_PAGER;
406 const char *flag_view = "";
407 const char *flag_save_config = NULL;
408 const char *flag_separator = ":";
409 const char *flag_sort = NULL;
410 const char *flag_stats = NULL;
411 const char *flag_tag = NULL;
412 std::string flag_config_file;
413 std::set<std::string> flag_config_options;
414 std::vector<std::string> flag_regexp;
415 std::vector<std::string> flag_file;
416 std::vector<std::string> flag_file_type;
417 std::vector<std::string> flag_file_extension;
418 std::vector<std::string> flag_file_magic;
419 std::vector<std::string> flag_filter_magic_label;
420 std::vector<std::string> flag_glob;
421 std::vector<std::string> flag_ignore_files;
422 std::vector<std::string> flag_include;
423 std::vector<std::string> flag_include_dir;
424 std::vector<std::string> flag_include_from;
425 std::vector<std::string> flag_include_fs;
426 std::vector<std::string> flag_exclude;
427 std::vector<std::string> flag_exclude_dir;
428 std::vector<std::string> flag_exclude_from;
429 std::vector<std::string> flag_exclude_fs;
430 std::vector<std::string> flag_all_include;
431 std::vector<std::string> flag_all_include_dir;
432 std::vector<std::string> flag_all_exclude;
433 std::vector<std::string> flag_all_exclude_dir;
434 reflex::Input::file_encoding_type flag_encoding_type = reflex::Input::file_encoding::plain;
435
436 // the CNF of Boolean search queries and patterns
437 CNF bcnf;
438
439 // ugrep command-line arguments pointing to argv[]
440 const char *arg_pattern = NULL;
441 std::vector<const char*> arg_files;
442
443 #ifdef OS_WIN
444 // store UTF-8 arguments decoded from wargv[] in strings to re-populate argv[] with pointers
445 std::list<std::string> arg_strings;
446 #endif
447
448 // function protos
449 void options(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, int argc, const char **argv);
450 void option_regexp(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, const char *arg, bool is_neg = false);
451 void option_and(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, int& i, int argc, const char **argv);
452 void option_and(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, const char *arg);
453 void option_andnot(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, int& i, int argc, const char **argv);
454 void option_andnot(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, const char *arg);
455 void option_not(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, int& i, int argc, const char **argv);
456 void option_not(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, const char *arg);
457 void init(int argc, const char **argv);
458 void set_color(const char *colors, const char *parameter, char color[COLORLEN]);
459 void trim(std::string& line);
460 void trim_pathname_arg(const char *arg);
461 bool is_output(ino_t inode);
462 size_t strtonum(const char *string, const char *message);
463 size_t strtopos(const char *string, const char *message);
464 void strtopos2(const char *string, size_t& pos1, size_t& pos2, const char *message, bool optional_first = false);
465 size_t strtofuzzy(const char *string, const char *message);
466 void split_globs(FILE *file, std::vector<std::string>& files, std::vector<std::string>& dirs);
467 void format(const char *format, size_t matches);
468 void usage(const char *message, const char *arg = NULL, const char *valid = NULL);
469 void help(std::ostream& out);
470 void help(const char *what = NULL);
471 void version();
472 void is_directory(const char *pathname);
473 void cannot_decompress(const char *pathname, const char *message);
474
475 // open a file where - means stdin/stdout and an initial ~ expands to home directory
fopen_smart(FILE ** file,const char * filename,const char * mode)476 int fopen_smart(FILE **file, const char *filename, const char *mode)
477 {
478 if (filename == NULL || *filename == '\0')
479 return errno = ENOENT;
480
481 if (strcmp(filename, "-") == 0)
482 {
483 *file = strchr(mode, 'w') == NULL ? stdin : stdout;
484 return 0;
485 }
486
487 if (*filename == '~')
488 return fopenw_s(file, std::string(home_dir).append(filename + 1).c_str(), mode);
489
490 return fopenw_s(file, filename, mode);
491 }
492
493 // read a line from buffered input, returns true when eof
getline(reflex::BufferedInput & input,std::string & line)494 inline bool getline(reflex::BufferedInput& input, std::string& line)
495 {
496 int ch;
497
498 line.erase();
499 while ((ch = input.get()) != EOF)
500 {
501 if (ch == '\n')
502 break;
503 line.push_back(ch);
504 }
505 if (!line.empty() && line.back() == '\r')
506 line.pop_back();
507 return ch == EOF && line.empty();
508 }
509
510 // read a line from mmap memory, returns true when eof
getline(const char * & here,size_t & left)511 inline bool getline(const char*& here, size_t& left)
512 {
513 // read line from mmap memory
514 if (left == 0)
515 return true;
516
517 const char *s = static_cast<const char*>(memchr(here, '\n', left));
518 if (s == NULL)
519 s = here + left;
520 else
521 ++s;
522
523 left -= s - here;
524 here = s;
525
526 return false;
527 }
528
529 // read a line from mmap memory or from buffered input or from unbuffered input, returns true when eof
getline(const char * & here,size_t & left,reflex::BufferedInput & buffered_input,reflex::Input & input,std::string & line)530 inline bool getline(const char*& here, size_t& left, reflex::BufferedInput& buffered_input, reflex::Input& input, std::string& line)
531 {
532 if (here != NULL)
533 {
534 // read line from mmap memory
535 if (left == 0)
536 return true;
537 const char *s = static_cast<const char*>(memchr(here, '\n', left));
538 if (s == NULL)
539 s = here + left;
540 else
541 ++s;
542 line.assign(here, s - here);
543 left -= s - here;
544 here = s;
545 return false;
546 }
547
548 int ch;
549
550 line.erase();
551
552 if (buffered_input.assigned())
553 {
554 // read line from buffered input
555 while ((ch = buffered_input.get()) != EOF)
556 {
557 line.push_back(ch);
558 if (ch == '\n')
559 break;
560 }
561 return ch == EOF && line.empty();
562 }
563
564 // read line from unbuffered input
565 while ((ch = input.get()) != EOF)
566 {
567 line.push_back(ch);
568 if (ch == '\n')
569 break;
570 }
571
572 return ch == EOF && line.empty();
573 }
574
575 // return true if s[0..n-1] contains a NUL or is non-displayable invalid UTF-8
is_binary(const char * s,size_t n)576 inline bool is_binary(const char *s, size_t n)
577 {
578 if (n == 1)
579 return *s == '\0' || (*s & 0xc0) == 0x80;
580
581 if (memchr(s, '\0', n) != NULL)
582 return true;
583
584 const char *e = s + n;
585
586 while (s < e)
587 {
588 do
589 {
590 if ((*s & 0xc0) == 0x80)
591 return true;
592 } while ((*s & 0xc0) != 0xc0 && ++s < e);
593
594 if (s >= e)
595 return false;
596
597 if (++s >= e || (*s & 0xc0) != 0x80)
598 return true;
599
600 if (++s < e && (*s & 0xc0) == 0x80)
601 if (++s < e && (*s & 0xc0) == 0x80)
602 if (++s < e && (*s & 0xc0) == 0x80)
603 ++s;
604 }
605
606 return false;
607 }
608
609 // check if a file's inode is the current output file
is_output(ino_t inode)610 inline bool is_output(ino_t inode)
611 {
612 #ifdef OS_WIN
613 return false; // TODO check that two FILE* on Windows are the same, is this possible?
614 #else
615 return output_stat_regular && inode == output_stat.st_ino;
616 #endif
617 }
618
619 // specify a line of input for the matcher to read, matcher must not use text() or rest() to keep the line contents unmodified
read_line(reflex::AbstractMatcher * matcher,const char * line,size_t size)620 inline void read_line(reflex::AbstractMatcher *matcher, const char *line, size_t size)
621 {
622 // safe cast: buffer() is read-only if no matcher.text() and matcher.rest() are used, size + 1 to include final \0
623 matcher->buffer(const_cast<char*>(line), size + 1);
624 }
625
626 // specify a line of input for the matcher to read, matcher must not use text() or rest() to keep the line contents unmodified
read_line(reflex::AbstractMatcher * matcher,const std::string & line)627 inline void read_line(reflex::AbstractMatcher *matcher, const std::string& line)
628 {
629 // safe cast: buffer() is read-only if no matcher.text() and matcher.rest() are used, size + 1 to include final \0
630 matcher->buffer(const_cast<char*>(line.c_str()), line.size() + 1);
631 }
632
633 // copy color buffers
copy_color(char to[COLORLEN],const char from[COLORLEN])634 inline void copy_color(char to[COLORLEN], const char from[COLORLEN])
635 {
636 size_t len = std::min(strlen(from), static_cast<size_t>(COLORLEN - 1));
637
638 memcpy(to, from, len);
639 to[len] = '\0';
640
641 char *comma = strchr(to, ',');
642 if (comma != NULL)
643 *comma = '\0';
644 }
645
646 // grep manages output, matcher, input, and decompression
647 struct Grep {
648
649 // CNF of AND/OR/NOT matchers
650 typedef std::list<std::list<std::unique_ptr<reflex::AbstractMatcher>>> Matchers;
651
652 // exit search exception
653 struct EXIT_SEARCH : public std::exception { };
654
655 // entry type
656 enum class Type { SKIP, DIRECTORY, OTHER };
657
658 // entry data extracted from directory contents
659 struct Entry {
EntryGrep::Entry660 Entry(std::string& pathname, ino_t inode, uint64_t info)
661 :
662 pathname(std::move(pathname)),
663 inode(inode),
664 info(info),
665 cost(0)
666 { }
667
668 std::string pathname;
669 ino_t inode;
670 uint64_t info;
671 uint16_t cost;
672
673 #ifndef OS_WIN
674 // get sortable info from stat buf
sort_infoGrep::Entry675 static uint64_t sort_info(const struct stat& buf)
676 {
677 #if defined(HAVE_STAT_ST_ATIM) && defined(HAVE_STAT_ST_MTIM) && defined(HAVE_STAT_ST_CTIM)
678 // tv_sec may be 64 bit, but value is small enough to multiply by 1000000 to fit in 64 bits
679 return static_cast<uint64_t>(flag_sort_key == Sort::SIZE ? buf.st_size : flag_sort_key == Sort::USED ? static_cast<uint64_t>(buf.st_atim.tv_sec) * 1000000 + buf.st_atim.tv_nsec / 1000 : flag_sort_key == Sort::CHANGED ? static_cast<uint64_t>(buf.st_mtim.tv_sec) * 1000000 + buf.st_mtim.tv_nsec / 1000 : flag_sort_key == Sort::CREATED ? static_cast<uint64_t>(buf.st_ctim.tv_sec) * 1000000 + buf.st_ctim.tv_nsec / 1000 : 0);
680 #elif defined(HAVE_STAT_ST_ATIMESPEC) && defined(HAVE_STAT_ST_MTIMESPEC) && defined(HAVE_STAT_ST_CTIMESPEC)
681 // tv_sec may be 64 bit, but value is small enough to multiply by 1000000 to fit in 64 bits
682 return static_cast<uint64_t>(flag_sort_key == Sort::SIZE ? buf.st_size : flag_sort_key == Sort::USED ? static_cast<uint64_t>(buf.st_atimespec.tv_sec) * 1000000 + buf.st_atimespec.tv_nsec / 1000 : flag_sort_key == Sort::CHANGED ? static_cast<uint64_t>(buf.st_mtimespec.tv_sec) * 1000000 + buf.st_mtimespec.tv_nsec / 1000 : flag_sort_key == Sort::CREATED ? static_cast<uint64_t>(buf.st_ctimespec.tv_sec) * 1000000 + buf.st_ctimespec.tv_nsec / 1000 : 0);
683 #else
684 return static_cast<uint64_t>(flag_sort_key == Sort::SIZE ? buf.st_size : flag_sort_key == Sort::USED ? buf.st_atime : flag_sort_key == Sort::CHANGED ? buf.st_mtime : flag_sort_key == Sort::CREATED ? buf.st_ctime : 0);
685 #endif
686 }
687 #endif
688
689 // compare two entries by pathname
comp_by_pathGrep::Entry690 static bool comp_by_path(const Entry& a, const Entry& b)
691 {
692 return a.pathname < b.pathname;
693 }
694
695 // compare two entries by size or time (atime, mtime, or ctime), if equal compare by pathname
comp_by_infoGrep::Entry696 static bool comp_by_info(const Entry& a, const Entry& b)
697 {
698 return a.info < b.info || (a.info == b.info && a.pathname < b.pathname);
699 }
700
701 // compare two entries by edit distance cost
comp_by_bestGrep::Entry702 static bool comp_by_best(const Entry& a, const Entry& b)
703 {
704 return a.cost < b.cost || (a.cost == b.cost && a.pathname < b.pathname);
705 }
706
707 // reverse compare two entries by pathname
rev_comp_by_pathGrep::Entry708 static bool rev_comp_by_path(const Entry& a, const Entry& b)
709 {
710 return a.pathname > b.pathname;
711 }
712
713 // reverse compare two entries by size or time (atime, mtime, or ctime), if equal reverse compare by pathname
rev_comp_by_infoGrep::Entry714 static bool rev_comp_by_info(const Entry& a, const Entry& b)
715 {
716 return a.info > b.info || (a.info == b.info && a.pathname > b.pathname);
717 }
718
719 // reverse compare two entries by edit distance cost
rev_comp_by_bestGrep::Entry720 static bool rev_comp_by_best(const Entry& a, const Entry& b)
721 {
722 return a.cost > b.cost || (a.cost == b.cost && a.pathname > b.pathname);
723 }
724 };
725
726 #ifndef OS_WIN
727 // extend the reflex::Input::Handler to handle stdin from a TTY or a slow pipe
728 struct StdInHandler : public reflex::Input::Handler {
729
StdInHandlerGrep::StdInHandler730 StdInHandler(Grep *grep)
731 :
732 grep(grep)
733 { }
734
735 Grep *grep;
736
operator ()Grep::StdInHandler737 int operator()()
738 {
739 grep->out.flush();
740
741 while (true)
742 {
743 struct timeval tv;
744 fd_set rfds, efds;
745 FD_ZERO(&rfds);
746 FD_ZERO(&efds);
747 FD_SET(0, &rfds);
748 FD_SET(0, &efds);
749 tv.tv_sec = 1;
750 tv.tv_usec = 0;
751 int r = ::select(1, &rfds, NULL, &efds, &tv);
752 if (r < 0 && errno != EINTR)
753 return 0;
754 if (r > 0 && FD_ISSET(0, &efds))
755 return 0;
756 if (r > 0)
757 return 1;
758 }
759 }
760 };
761 #endif
762
763 // extend the reflex::AbstractMatcher::Handler with a grep object reference and references to some of the grep::search locals
764 struct GrepHandler : public reflex::AbstractMatcher::Handler {
765
GrepHandlerGrep::GrepHandler766 GrepHandler(Grep& grep, const char*& pathname, size_t& lineno, bool& binfile, bool& hex, bool& binary, size_t& matches, bool& stop)
767 :
768 grep(grep),
769 pathname(pathname),
770 lineno(lineno),
771 binfile(binfile),
772 hex(hex),
773 binary(binary),
774 matches(matches),
775 stop(stop)
776 { }
777
778 Grep& grep; // grep object
779 const char*& pathname; // grep::search argument pathname
780 size_t& lineno; // grep::search lineno local variable
781 bool& binfile; // grep::search binfile local variable
782 bool& hex; // grep::search hex local variable
783 bool& binary; // grep::search binary local variable
784 size_t& matches; // grep::search matches local variable
785 bool& stop; // grep::search stop local variable
786
787 // get the start of the before context, if present
begin_beforeGrep::GrepHandler788 void begin_before(reflex::AbstractMatcher& matcher, const char *buf, size_t len, size_t num, const char*& ptr, size_t& size, size_t& offset)
789 {
790 ptr = NULL;
791 size = 0;
792 offset = 0;
793
794 if (len == 0)
795 return;
796
797 size_t current = matcher.lineno();
798 size_t between = current - lineno;
799
800 if (between > 1)
801 {
802 const char *s = buf + len;
803 const char *e = s;
804
805 if (buf[len - 1] != '\n')
806 --between;
807
808 while (--s >= buf)
809 {
810 if (*s == '\n')
811 {
812 if (--between == 0)
813 break;;
814 e = s + 1;
815 }
816 }
817
818 ptr = ++s;
819 size = e - s;
820 offset = s - buf + num;
821
822 ++lineno;
823 }
824 }
825
826 // advance to the next before context, if present
next_beforeGrep::GrepHandler827 void next_before(const char *buf, size_t len, size_t num, const char*& ptr, size_t& size, size_t& offset)
828 {
829 if (ptr == NULL)
830 return;
831
832 const char *s = ptr + size;
833 const char *e = buf + len;
834
835 if (s >= e)
836 {
837 ptr = NULL;
838 }
839 else
840 {
841 e = static_cast<const char*>(memchr(s, '\n', e - s));
842
843 if (e == NULL)
844 e = buf + len;
845 else
846 ++e;
847
848 ptr = s;
849 size = e - s;
850 offset = s - buf + num;
851
852 ++lineno;
853 }
854 }
855 };
856
857 // extend event GrepHandler to output invert match lines for -v
858 struct InvertMatchGrepHandler : public GrepHandler {
859
InvertMatchGrepHandlerGrep::InvertMatchGrepHandler860 InvertMatchGrepHandler(Grep& grep, const char*& pathname, size_t& lineno, bool& binfile, bool& hex, bool& binary, size_t& matches, bool& stop)
861 :
862 GrepHandler(grep, pathname, lineno, binfile, hex, binary, matches, stop)
863 { }
864
865 // functor invoked by the reflex::AbstractMatcher when the buffer contents are shifted out, also called explicitly in grep::search
operator ()Grep::InvertMatchGrepHandler866 virtual void operator()(reflex::AbstractMatcher& matcher, const char *buf, size_t len, size_t num) override
867 {
868 const char *ptr;
869 size_t size;
870 size_t offset;
871
872 begin_before(matcher, buf, len, num, ptr, size, offset);
873
874 while (ptr != NULL)
875 {
876 // --range: max line exceeded?
877 if (flag_max_line > 0 && lineno > flag_max_line)
878 break;
879
880 // --max-files: max reached?
881 if (matches == 0 && !Stats::found_part())
882 {
883 stop = true;
884 break;
885 }
886
887 // -m: max number of matches reached?
888 if (flag_max_count > 0 && matches >= flag_max_count)
889 break;
890
891 // output blocked?
892 if (grep.out.eof)
893 break;
894
895 ++matches;
896
897 if (flag_with_hex)
898 binary = false;
899
900 binary = binary || flag_hex || (!flag_text && is_binary(ptr, size));
901
902 if (binfile || (binary && !flag_hex && !flag_with_hex))
903 break;
904
905 if (hex && !binary)
906 grep.out.dump.done();
907
908 if (!flag_no_header)
909 grep.out.header(pathname, grep.partname, lineno, NULL, offset, flag_separator, binary);
910
911 hex = binary;
912
913 if (binary)
914 {
915 grep.out.dump.hex(Output::Dump::HEX_LINE, offset, ptr, size);
916 }
917 else
918 {
919 bool lf_only = false;
920 if (size > 0)
921 {
922 lf_only = ptr[size - 1] == '\n';
923 size_t sizen = size - lf_only;
924 if (sizen > 0)
925 {
926 grep.out.str(color_sl);
927 grep.out.str(ptr, sizen);
928 grep.out.str(color_off);
929 }
930 }
931 grep.out.nl(lf_only);
932 }
933
934 next_before(buf, len, num, ptr, size, offset);
935 }
936 }
937 };
938
939 // extend event GrepHandler to output formatted invert match lines for --format -v
940 struct FormatInvertMatchGrepHandler : public GrepHandler {
941
FormatInvertMatchGrepHandlerGrep::FormatInvertMatchGrepHandler942 FormatInvertMatchGrepHandler(Grep& grep, const char*& pathname, size_t& lineno, bool& binfile, bool& hex, bool& binary, size_t& matches, bool& stop)
943 :
944 GrepHandler(grep, pathname, lineno, binfile, hex, binary, matches, stop)
945 { }
946
947 // functor invoked by the reflex::AbstractMatcher when the buffer contents are shifted out, also called explicitly in grep::search
operator ()Grep::FormatInvertMatchGrepHandler948 virtual void operator()(reflex::AbstractMatcher& matcher, const char *buf, size_t len, size_t num) override
949 {
950 const char *ptr;
951 size_t size;
952 size_t offset;
953
954 begin_before(matcher, buf, len, num, ptr, size, offset);
955
956 while (ptr != NULL)
957 {
958 // --range: max line exceeded?
959 if (flag_max_line > 0 && lineno > flag_max_line)
960 break;
961
962 // output --format-open
963 if (matches == 0)
964 {
965 // --format-open or --format-close: we must acquire lock early before Stats::found_part()
966 if (flag_format_open != NULL || flag_format_close != NULL)
967 grep.out.acquire();
968
969 // --max-files: max reached?
970 if (!Stats::found_part())
971 {
972 stop = true;
973 break;
974 }
975
976 if (flag_format_open != NULL)
977 grep.out.format(flag_format_open, pathname, grep.partname, Stats::found_parts(), &matcher, false, Stats::found_parts() > 1);
978 }
979
980 // -m: max number of matches reached?
981 if (flag_max_count > 0 && matches >= flag_max_count)
982 break;
983
984 // output blocked?
985 if (grep.out.eof)
986 break;
987
988 ++matches;
989
990 // output --format
991 grep.out.format_invert(flag_format, pathname, grep.partname, matches, lineno, offset, ptr, size - (size > 0 && ptr[size - 1] == '\n'), matches > 1);
992
993 next_before(buf, len, num, ptr, size, offset);
994 }
995 }
996 };
997
998 // extend event GrepHandler to output any context lines for -y
999 struct AnyLineGrepHandler : public GrepHandler {
1000
AnyLineGrepHandlerGrep::AnyLineGrepHandler1001 AnyLineGrepHandler(Grep& grep, const char*& pathname, size_t& lineno, bool& binfile, bool& hex, bool& binary, size_t& matches, bool& stop, const char*& rest_line_data, size_t& rest_line_size, size_t& rest_line_last)
1002 :
1003 GrepHandler(grep, pathname, lineno, binfile, hex, binary, matches, stop),
1004 rest_line_data(rest_line_data),
1005 rest_line_size(rest_line_size),
1006 rest_line_last(rest_line_last)
1007 { }
1008
1009 // functor invoked by the reflex::AbstractMatcher when the buffer contents are shifted out, also called explicitly in grep::search
operator ()Grep::AnyLineGrepHandler1010 virtual void operator()(reflex::AbstractMatcher& matcher, const char *buf, size_t len, size_t num) override
1011 {
1012 const char *ptr;
1013 size_t size;
1014 size_t offset;
1015
1016 begin_before(matcher, buf, len, num, ptr, size, offset);
1017
1018 // display the rest of the matching line before the context lines
1019 if (rest_line_data != NULL && (lineno != matcher.lineno() || flag_ungroup))
1020 {
1021 if (binary)
1022 {
1023 grep.out.dump.hex(flag_invert_match ? Output::Dump::HEX_CONTEXT_LINE : Output::Dump::HEX_LINE, rest_line_last, rest_line_data, rest_line_size);
1024 grep.out.dump.done();
1025 }
1026 else
1027 {
1028 bool lf_only = false;
1029 if (rest_line_size > 0)
1030 {
1031 lf_only = rest_line_data[rest_line_size - 1] == '\n';
1032 rest_line_size -= lf_only;
1033 if (rest_line_size > 0)
1034 {
1035 grep.out.str(flag_invert_match ? color_cx : color_sl);
1036 grep.out.str(rest_line_data, rest_line_size);
1037 grep.out.str(color_off);
1038 }
1039 }
1040 grep.out.nl(lf_only);
1041 }
1042
1043 rest_line_data = NULL;
1044 }
1045
1046 // context colors with or without -v
1047 short v_hex_context_line = flag_invert_match ? Output::Dump::HEX_LINE : Output::Dump::HEX_CONTEXT_LINE;
1048 const char *v_color_cx = flag_invert_match ? color_sl : color_cx;
1049 const char *separator = flag_invert_match ? flag_separator : "-";
1050
1051 while (ptr != NULL)
1052 {
1053 // --range: max line exceeded?
1054 if (flag_max_line > 0 && lineno > flag_max_line)
1055 break;
1056
1057 if (matches == 0 && flag_invert_match)
1058 {
1059 // --max-files: max reached?
1060 if (!Stats::found_part())
1061 {
1062 stop = true;
1063 break;
1064 }
1065 }
1066
1067 // -m: max number of matches reached?
1068 if (flag_invert_match && flag_max_count > 0 && matches >= flag_max_count)
1069 {
1070 stop = true;
1071 break;
1072 }
1073
1074 // output blocked?
1075 if (grep.out.eof)
1076 break;
1077
1078 if (flag_with_hex)
1079 binary = false;
1080
1081 if (flag_invert_match)
1082 ++matches;
1083
1084 binary = binary || flag_hex || (!flag_text && is_binary(ptr, size));
1085
1086 if (binfile || (binary && !flag_hex && !flag_with_hex))
1087 break;
1088
1089 if (hex && !binary)
1090 grep.out.dump.done();
1091
1092 if (!flag_no_header)
1093 grep.out.header(pathname, grep.partname, lineno, NULL, offset, separator, binary);
1094
1095 hex = binary;
1096
1097 if (binary)
1098 {
1099 grep.out.dump.hex(v_hex_context_line, offset, ptr, size);
1100 }
1101 else
1102 {
1103 bool lf_only = false;
1104 if (size > 0)
1105 {
1106 lf_only = ptr[size - 1] == '\n';
1107 size_t sizen = size - lf_only;
1108 if (sizen > 0)
1109 {
1110 grep.out.str(v_color_cx);
1111 grep.out.str(ptr, sizen);
1112 grep.out.str(color_off);
1113 }
1114 }
1115 grep.out.nl(lf_only);
1116 }
1117
1118 next_before(buf, len, num, ptr, size, offset);
1119 }
1120 }
1121
1122 const char*& rest_line_data;
1123 size_t& rest_line_size;
1124 size_t& rest_line_last;
1125
1126 };
1127
1128 // extend event AnyLineGrepHandler to output specific context lines for -A, -B, and -C
1129 struct ContextGrepHandler : public AnyLineGrepHandler {
1130
1131 // context state to track context lines before and after a match
1132 struct ContextState {
1133
ContextStateGrep::ContextGrepHandler::ContextState1134 ContextState()
1135 :
1136 before_index(0),
1137 before_length(0),
1138 after_lineno(0),
1139 after_length(flag_after_context)
1140 {
1141 before_binary.resize(flag_before_context);
1142 before_offset.resize(flag_before_context);
1143 before_line.resize(flag_before_context);
1144 }
1145
1146 size_t before_index; // before context rotation index
1147 size_t before_length; // accumulated length of the before context
1148 std::vector<bool> before_binary; // before context binary line
1149 std::vector<size_t> before_offset; // before context offset of line
1150 std::vector<std::string> before_line; // before context line data
1151 size_t after_lineno; // after context line number
1152 size_t after_length; // accumulated length of the after context
1153
1154 };
1155
ContextGrepHandlerGrep::ContextGrepHandler1156 ContextGrepHandler(Grep& grep, const char*& pathname, size_t& lineno, bool& binfile, bool& hex, bool& binary, size_t& matches, bool& stop, const char*& rest_line_data, size_t& rest_line_size, size_t& rest_line_last)
1157 :
1158 AnyLineGrepHandler(grep, pathname, lineno, binfile, hex, binary, matches, stop, rest_line_data, rest_line_size, rest_line_last)
1159 { }
1160
1161 // display the before context
output_before_contextGrep::ContextGrepHandler1162 void output_before_context()
1163 {
1164 // the group separator indicates lines skipped, like GNU grep
1165 if (state.after_lineno > 0 && state.after_lineno + state.after_length < grep.matcher->lineno() - state.before_length)
1166 {
1167 if (hex)
1168 grep.out.dump.done();
1169
1170 if (flag_group_separator != NULL)
1171 {
1172 grep.out.str(color_se);
1173 grep.out.str(flag_group_separator);
1174 grep.out.str(color_off);
1175 grep.out.nl();
1176 }
1177 }
1178
1179 // output the before context
1180 if (state.before_length > 0)
1181 {
1182 // the first line number of the before context
1183 size_t before_lineno = grep.matcher->lineno() - state.before_length;
1184
1185 for (size_t i = 0; i < state.before_length; ++i)
1186 {
1187 size_t j = (state.before_index + i) % state.before_length;
1188
1189 if (hex && !state.before_binary[j])
1190 grep.out.dump.done();
1191
1192 if (!flag_no_header)
1193 grep.out.header(pathname, grep.partname, before_lineno + i, NULL, state.before_offset[j], "-", state.before_binary[j]);
1194
1195 hex = state.before_binary[j];
1196
1197 const char *ptr = state.before_line[j].c_str();
1198 size_t size = state.before_line[j].size();
1199
1200 if (hex)
1201 {
1202 grep.out.dump.hex(Output::Dump::HEX_CONTEXT_LINE, state.before_offset[j], ptr, size);
1203 }
1204 else
1205 {
1206 bool lf_only = false;
1207 if (size > 0)
1208 {
1209 lf_only = ptr[size - 1] == '\n';
1210 size -= lf_only;
1211 if (size > 0)
1212 {
1213 grep.out.str(color_cx);
1214 grep.out.str(ptr, size);
1215 grep.out.str(color_off);
1216 }
1217 }
1218 grep.out.nl(lf_only);
1219 }
1220 }
1221 }
1222
1223 // reset the before context state
1224 state.before_index = 0;
1225 state.before_length = 0;
1226 }
1227
1228 // set the after context
set_after_linenoGrep::ContextGrepHandler1229 void set_after_lineno(size_t lineno)
1230 {
1231 // set the after context state with the first after context line number
1232 state.after_length = 0;
1233 state.after_lineno = lineno;
1234 }
1235
1236 // functor invoked by the reflex::AbstractMatcher when the buffer contents are shifted out, also called explicitly in grep::search
operator ()Grep::ContextGrepHandler1237 virtual void operator()(reflex::AbstractMatcher& matcher, const char *buf, size_t len, size_t num) override
1238 {
1239 const char *ptr;
1240 size_t size;
1241 size_t offset;
1242
1243 begin_before(matcher, buf, len, num, ptr, size, offset);
1244
1245 // display the rest of the matching line before the context lines
1246 if (rest_line_data != NULL && (lineno != matcher.lineno() || flag_ungroup))
1247 {
1248 if (binary)
1249 {
1250 grep.out.dump.hex(flag_invert_match ? Output::Dump::HEX_CONTEXT_LINE : Output::Dump::HEX_LINE, rest_line_last, rest_line_data, rest_line_size);
1251 }
1252 else
1253 {
1254 bool lf_only = false;
1255 if (rest_line_size > 0)
1256 {
1257 lf_only = rest_line_data[rest_line_size - 1] == '\n';
1258 rest_line_size -= lf_only;
1259 if (rest_line_size > 0)
1260 {
1261 grep.out.str(flag_invert_match ? color_cx : color_sl);
1262 grep.out.str(rest_line_data, rest_line_size);
1263 grep.out.str(color_off);
1264 }
1265 }
1266 grep.out.nl(lf_only);
1267 }
1268
1269 rest_line_data = NULL;
1270 }
1271
1272 while (ptr != NULL)
1273 {
1274 // --range: max line exceeded?
1275 if (flag_max_line > 0 && lineno > flag_max_line)
1276 break;
1277
1278 if (matches == 0 && flag_invert_match)
1279 {
1280 // --max-files: max reached?
1281 if (!Stats::found_part())
1282 {
1283 stop = true;
1284 break;
1285 }
1286 }
1287
1288 // -m: max number of matches reached?
1289 if (flag_invert_match && flag_max_count > 0 && matches >= flag_max_count)
1290 {
1291 stop = true;
1292 break;
1293 }
1294
1295 // output blocked?
1296 if (grep.out.eof)
1297 break;
1298
1299 if (flag_invert_match)
1300 ++matches;
1301
1302 if (flag_with_hex)
1303 binary = false;
1304
1305 binary = binary || flag_hex || (!flag_text && is_binary(ptr, size));
1306
1307 if (binfile || (binary && !flag_hex && !flag_with_hex))
1308 break;
1309
1310 if (state.after_lineno > 0 && state.after_length < flag_after_context)
1311 {
1312 ++state.after_length;
1313
1314 if (hex && !binary)
1315 grep.out.dump.done();
1316
1317 if (!flag_no_header)
1318 grep.out.header(pathname, grep.partname, lineno, NULL, offset, "-", binary);
1319
1320 hex = binary;
1321
1322 if (binary)
1323 {
1324 grep.out.dump.hex(Output::Dump::HEX_CONTEXT_LINE, offset, ptr, size);
1325 }
1326 else
1327 {
1328 bool lf_only = false;
1329 if (size > 0)
1330 {
1331 lf_only = ptr[size - 1] == '\n';
1332 size_t sizen = size - lf_only;
1333 if (sizen > 0)
1334 {
1335 grep.out.str(color_cx);
1336 grep.out.str(ptr, sizen);
1337 grep.out.str(color_off);
1338 }
1339 }
1340 grep.out.nl(lf_only);
1341 }
1342 }
1343 else if (flag_before_context > 0)
1344 {
1345 if (state.before_length < flag_before_context)
1346 ++state.before_length;
1347 state.before_index %= state.before_length;
1348 state.before_binary[state.before_index] = binary;
1349 state.before_offset[state.before_index] = offset;
1350 state.before_line[state.before_index].assign(ptr, size);
1351 ++state.before_index;
1352 }
1353 else
1354 {
1355 break;
1356 }
1357
1358 next_before(buf, len, num, ptr, size, offset);
1359 }
1360 }
1361
1362 ContextState state;
1363
1364 };
1365
1366 // extend event AnyLineGrepHandler to output specific context lines for -A, -B, and -C with -v
1367 struct InvertContextGrepHandler : public AnyLineGrepHandler {
1368
1369 struct InvertContextMatch {
1370
InvertContextMatchGrep::InvertContextGrepHandler::InvertContextMatch1371 InvertContextMatch(size_t pos, size_t size, size_t offset)
1372 :
1373 pos(pos),
1374 size(size),
1375 offset(offset)
1376 { }
1377
1378 size_t pos; // position on the line
1379 size_t size; // size of the match
1380 size_t offset; // size of the match
1381
1382 };
1383
1384 typedef std::vector<InvertContextMatch> InvertContextMatches;
1385
1386 // context state to track matching lines before non-matching lines
1387 struct InvertContextState {
1388
InvertContextStateGrep::InvertContextGrepHandler::InvertContextState1389 InvertContextState()
1390 :
1391 before_index(0),
1392 before_length(0),
1393 after_lineno(0)
1394 {
1395 before_binary.resize(flag_before_context);
1396 before_columno.resize(flag_before_context);
1397 before_offset.resize(flag_before_context);
1398 before_line.resize(flag_before_context);
1399 before_match.resize(flag_before_context);
1400 }
1401
1402 size_t before_index; // before context rotation index
1403 size_t before_length; // accumulated length of the before context
1404 std::vector<bool> before_binary; // before context binary line
1405 std::vector<size_t> before_columno; // before context column number of first match
1406 std::vector<size_t> before_offset; // before context offset of first match
1407 std::vector<std::string> before_line; // before context line data
1408 std::vector<InvertContextMatches> before_match; // before context matches per line
1409 size_t after_lineno; // the after context line number
1410
1411 };
1412
InvertContextGrepHandlerGrep::InvertContextGrepHandler1413 InvertContextGrepHandler(Grep& grep, const char*& pathname, size_t& lineno, bool& binfile, bool& hex, bool& binary, size_t& matches, bool& stop, const char*& rest_line_data, size_t& rest_line_size, size_t& rest_line_last)
1414 :
1415 AnyLineGrepHandler(grep, pathname, lineno, binfile, hex, binary, matches, stop, rest_line_data, rest_line_size, rest_line_last)
1416 { }
1417
1418 // display the before context
output_before_contextGrep::InvertContextGrepHandler1419 void output_before_context()
1420 {
1421 // the group separator indicates lines skipped, like GNU grep
1422 if (state.after_lineno > 0 && state.after_lineno + flag_after_context + flag_before_context < lineno && flag_group_separator != NULL)
1423 {
1424 if (hex)
1425 grep.out.dump.done();
1426
1427 grep.out.str(color_se);
1428 grep.out.str(flag_group_separator);
1429 grep.out.str(color_off);
1430 grep.out.nl();
1431 }
1432
1433 // output the before context
1434 if (state.before_length > 0)
1435 {
1436 // the first line number of the before context
1437 size_t before_lineno = lineno - state.before_length;
1438
1439 for (size_t i = 0; i < state.before_length; ++i)
1440 {
1441 size_t j = (state.before_index + i) % state.before_length;
1442 size_t offset = state.before_match[j].empty() ? state.before_offset[j] : state.before_match[j].front().offset;
1443
1444 if (hex && !state.before_binary[j])
1445 grep.out.dump.done();
1446
1447 if (!flag_no_header)
1448 grep.out.header(pathname, grep.partname, before_lineno + i, NULL, offset, "-", state.before_binary[j]);
1449
1450 hex = state.before_binary[j];
1451
1452 const char *ptr = state.before_line[j].c_str();
1453 size_t size = state.before_line[j].size();
1454 size_t pos = 0;
1455
1456 for (auto& match : state.before_match[j])
1457 {
1458 if (hex)
1459 {
1460 grep.out.dump.hex(Output::Dump::HEX_CONTEXT_LINE, match.offset - (match.pos - pos), ptr + pos, match.pos - pos);
1461 grep.out.dump.hex(Output::Dump::HEX_CONTEXT_MATCH, match.offset, ptr + match.pos, match.size);
1462 }
1463 else
1464 {
1465 if (match.pos > pos)
1466 {
1467 grep.out.str(color_cx);
1468 grep.out.str(ptr + pos, match.pos - pos);
1469 grep.out.str(color_off);
1470 }
1471
1472 if (match.size > 0)
1473 {
1474 size_t sizen = match.size - (ptr[match.pos + match.size - 1] == '\n');
1475 if (sizen > 0)
1476 {
1477 grep.out.str(match_mc);
1478 grep.out.str(ptr + match.pos, sizen);
1479 grep.out.str(match_off);
1480 }
1481 }
1482 }
1483
1484 pos = match.pos + match.size;
1485 }
1486
1487 if (hex)
1488 {
1489 grep.out.dump.hex(Output::Dump::HEX_CONTEXT_LINE, state.before_offset[j] + pos, ptr + pos, size - pos);
1490 }
1491 else
1492 {
1493 bool lf_only = false;
1494 if (size > pos)
1495 {
1496 lf_only = ptr[size - 1] == '\n';
1497 size -= lf_only;
1498 if (size > pos)
1499 {
1500 grep.out.str(color_cx);
1501 grep.out.str(ptr + pos, size - pos);
1502 grep.out.str(color_off);
1503 }
1504 }
1505 grep.out.nl(lf_only);
1506 }
1507 }
1508 }
1509
1510 // reset the context state
1511 state.before_index = 0;
1512 state.before_length = 0;
1513 state.after_lineno = lineno;
1514 }
1515
1516 // add line with the first match to the before context
add_before_context_lineGrep::InvertContextGrepHandler1517 void add_before_context_line(const char *bol, const char *eol, size_t columno, size_t offset)
1518 {
1519 if (state.before_length < flag_before_context)
1520 ++state.before_length;
1521 state.before_index %= state.before_length;
1522 state.before_binary[state.before_index] = binary;
1523 state.before_columno[state.before_index] = columno;
1524 state.before_offset[state.before_index] = offset;
1525 state.before_line[state.before_index].assign(bol, eol - bol);
1526 state.before_match[state.before_index].clear();
1527 ++state.before_index;
1528 }
1529
1530 // add match fragment to the before context
add_before_context_matchGrep::InvertContextGrepHandler1531 void add_before_context_match(size_t pos, size_t size, size_t offset)
1532 {
1533 // only add a match if we have a before line, i.e. not an after line with a multiline match
1534 if (state.before_length > 0)
1535 {
1536 size_t index = (state.before_index + state.before_length - 1) % state.before_length;
1537 state.before_match[index].emplace_back(pos, size, offset);
1538 }
1539 }
1540
1541 // set the after context
set_after_linenoGrep::InvertContextGrepHandler1542 void set_after_lineno(size_t lineno)
1543 {
1544 state.after_lineno = lineno;
1545 }
1546
1547 // functor invoked by the reflex::AbstractMatcher when the buffer contents are shifted out, also called explicitly in grep::search
operator ()Grep::InvertContextGrepHandler1548 virtual void operator()(reflex::AbstractMatcher& matcher, const char *buf, size_t len, size_t num) override
1549 {
1550 const char *ptr;
1551 size_t size;
1552 size_t offset;
1553
1554 begin_before(matcher, buf, len, num, ptr, size, offset);
1555
1556 // display the rest of the "after" matching line
1557 if (rest_line_data != NULL && (lineno != matcher.lineno() || flag_ungroup))
1558 {
1559 if (binary)
1560 {
1561 grep.out.dump.hex(Output::Dump::HEX_CONTEXT_LINE, rest_line_last, rest_line_data, rest_line_size);
1562 }
1563 else
1564 {
1565 bool lf_only = false;
1566 if (rest_line_size > 0)
1567 {
1568 lf_only = rest_line_data[rest_line_size - 1] == '\n';
1569 rest_line_size -= lf_only;
1570 if (rest_line_size > 0)
1571 {
1572 grep.out.str(color_cx);
1573 grep.out.str(rest_line_data, rest_line_size);
1574 grep.out.str(color_off);
1575 }
1576 }
1577 grep.out.nl(lf_only);
1578 }
1579
1580 rest_line_data = NULL;
1581 }
1582
1583 if (ptr != NULL)
1584 output_before_context();
1585
1586 while (ptr != NULL)
1587 {
1588 state.after_lineno = lineno + 1;
1589
1590 // --range: max line exceeded?
1591 if (flag_max_line > 0 && lineno > flag_max_line)
1592 break;
1593
1594 if (matches == 0)
1595 {
1596 // --max-files: max reached?
1597 if (!Stats::found_part())
1598 {
1599 stop = true;
1600 break;
1601 }
1602 }
1603
1604 // -m: max number of matches reached?
1605 if (flag_invert_match && flag_max_count > 0 && matches >= flag_max_count)
1606 {
1607 stop = true;
1608 break;
1609 }
1610
1611 // output blocked?
1612 if (grep.out.eof)
1613 break;
1614
1615 ++matches;
1616
1617 if (flag_with_hex)
1618 binary = false;
1619
1620 binary = binary || flag_hex || (!flag_text && is_binary(ptr, size));
1621
1622 if (binfile || (binary && !flag_hex && !flag_with_hex))
1623 break;
1624
1625 if (hex && !binary)
1626 grep.out.dump.done();
1627
1628 if (!flag_no_header)
1629 grep.out.header(pathname, grep.partname, lineno, NULL, offset, flag_separator, binary);
1630
1631 hex = binary;
1632
1633 if (binary)
1634 {
1635 grep.out.dump.hex(Output::Dump::HEX_LINE, offset, ptr, size);
1636 }
1637 else
1638 {
1639 bool lf_only = false;
1640 if (size > 0)
1641 {
1642 lf_only = ptr[size - 1] == '\n';
1643 size_t sizen = size - lf_only;
1644 if (sizen > 0)
1645 {
1646 grep.out.str(color_sl);
1647 grep.out.str(ptr, sizen);
1648 grep.out.str(color_off);
1649 }
1650 }
1651 grep.out.nl(lf_only);
1652 }
1653
1654 next_before(buf, len, num, ptr, size, offset);
1655 }
1656 }
1657
1658 InvertContextState state;
1659
1660 };
1661
GrepGrep1662 Grep(FILE *file, reflex::AbstractMatcher *matcher, Matchers *matchers)
1663 :
1664 out(file),
1665 matcher(matcher),
1666 matchers(matchers),
1667 file(NULL)
1668 #ifndef OS_WIN
1669 , stdin_handler(this)
1670 #endif
1671 #ifdef HAVE_LIBZ
1672 , zstream(NULL),
1673 stream(NULL)
1674 #ifdef WITH_DECOMPRESSION_THREAD
1675 , thread_end(false),
1676 extracting(false),
1677 waiting(false)
1678 #endif
1679 #endif
1680 {
1681 restline.reserve(256); // pre-reserve a "rest line" of input to display matches to limit heap allocs
1682 }
1683
~GrepGrep1684 virtual ~Grep()
1685 {
1686 #ifdef HAVE_LIBZ
1687
1688 #ifdef WITH_DECOMPRESSION_THREAD
1689 if (thread.joinable())
1690 {
1691 thread_end = true;
1692
1693 std::unique_lock<std::mutex> lock(pipe_mutex);
1694 if (waiting)
1695 pipe_zstrm.notify_one();
1696 lock.unlock();
1697
1698 thread.join();
1699 }
1700 #endif
1701
1702 if (stream != NULL)
1703 {
1704 delete stream;
1705 stream = NULL;
1706 }
1707
1708 if (zstream != NULL)
1709 {
1710 delete zstream;
1711 zstream = NULL;
1712 }
1713 #endif
1714 }
1715
1716 // cancel all active searches
cancelGrep1717 void cancel()
1718 {
1719 // global cancellation is forced by cancelling the shared output
1720 out.cancel();
1721 }
1722
1723 // search the specified files or standard input for pattern matches
1724 virtual void ugrep();
1725
1726 // search file or directory for pattern matches
1727 Type select(size_t level, const char *pathname, const char *basename, int type, ino_t& inode, uint64_t& info, bool is_argument = false);
1728
1729 // recurse a directory
1730 virtual void recurse(size_t level, const char *pathname);
1731
1732 // -Z and --sort=best: perform a presearch to determine edit distance cost, return cost of pathname file, 65535 when no match is found
1733 uint16_t cost(const char *pathname);
1734
1735 // search a file
1736 virtual void search(const char *pathname);
1737
1738 // check CNF AND/OR/NOT conditions are met for the line(s) spanning bol to eol
cnf_matchingGrep1739 bool cnf_matching(const char *bol, const char *eol, bool acquire = false)
1740 {
1741 if (flag_files)
1742 {
1743 if (out.holding())
1744 {
1745 size_t k = 0; // iterate over matching[] bitmask
1746 bool all = true; // all terms matched
1747
1748 // for each AND term check if the AND term was matched before or has a match this time
1749 for (const auto& i : *matchers)
1750 {
1751 // an OR term hasn't matched before
1752 if (!matching[k])
1753 {
1754 auto j = i.begin();
1755 auto e = i.end();
1756
1757 if (j != e)
1758 {
1759 // check OR terms
1760 if (*j && (*j)->buffer(const_cast<char*>(bol), eol - bol + 1).find() != 0)
1761 {
1762 matching[k] = true;
1763 ++j;
1764 }
1765 else
1766 {
1767 // check OR NOT terms
1768 size_t l = 0; // iterate over notmaching[k] bitmask
1769 bool none = true; // all not-terms matched
1770
1771 while (++j != e)
1772 {
1773 if (*j && !notmatching[k][l])
1774 {
1775 if ((*j)->buffer(const_cast<char*>(bol), eol - bol + 1).find() != 0)
1776 notmatching[k][l] = true;
1777 else
1778 all = none = false;
1779 }
1780
1781 ++l;
1782 }
1783
1784 if (none)
1785 {
1786 // when all not-terms matched and we don't have a positive alternative then stop searching this file
1787 if (!*i.begin())
1788 throw EXIT_SEARCH();
1789
1790 all = false;
1791 }
1792 }
1793 }
1794 }
1795 ++k;
1796 }
1797
1798 // if all terms matched globally per file then remove the hold to launch output
1799 if (all)
1800 {
1801 if (acquire)
1802 out.acquire();
1803
1804 // --max-files: max reached?
1805 if (!Stats::found_part())
1806 throw EXIT_SEARCH();
1807
1808 out.launch();
1809 }
1810 }
1811 }
1812 else
1813 {
1814 // for each AND term check if the line has a match
1815 for (const auto& i : *matchers)
1816 {
1817 auto j = i.begin();
1818 auto e = i.end();
1819
1820 if (j != e)
1821 {
1822 // check OR terms
1823 if (*j && (*j)->buffer(const_cast<char*>(bol), eol - bol + 1).find() != 0)
1824 continue;
1825
1826 // check OR NOT terms
1827 while (++j != e)
1828 if (*j && (*j)->buffer(const_cast<char*>(bol), eol - bol + 1).find() == 0)
1829 break;
1830
1831 if (j == e)
1832 return false;
1833 }
1834 }
1835 }
1836
1837 return true;
1838 }
1839
1840 // if CNF AND/OR/NOT conditions are met globally then launch output after searching a file with --files
cnf_satisfiedGrep1841 bool cnf_satisfied(bool acquire = false)
1842 {
1843 if (out.holding())
1844 {
1845 size_t k = 0; // iterate over matching[] bitmask
1846
1847 // for each AND term check if the term was matched before
1848 for (const auto& i : *matchers)
1849 {
1850 // an OR term hasn't matched
1851 if (!matching[k])
1852 {
1853 // return if there are no OR NOT terms to check
1854 if (i.size() <= 1)
1855 return false;
1856
1857 auto j = i.begin();
1858 auto e = i.end();
1859
1860 // check if not all of the OR NOT terms matched
1861 if (j != e)
1862 {
1863 size_t l = 0; // iterate over notmaching[k] bitmask
1864 while (++j != e)
1865 {
1866 if (*j && !notmatching[k][l])
1867 break;
1868 ++l;
1869 }
1870 // return if all OR NOT terms matched
1871 if (j == e)
1872 return false;
1873 }
1874 }
1875 ++k;
1876 }
1877
1878 if (acquire)
1879 out.acquire();
1880
1881 // --max-files: max reached?
1882 if (!Stats::found_part())
1883 throw EXIT_SEARCH();
1884
1885 out.launch();
1886 }
1887
1888 return true;
1889 }
1890
1891 // open a file for (binary) reading and assign input, decompress the file when -z, --decompress specified, may throw bad_alloc
open_fileGrep1892 bool open_file(const char *pathname)
1893 {
1894 if (pathname == NULL)
1895 {
1896 if (source == NULL)
1897 return false;
1898
1899 pathname = flag_label;
1900 file = source;
1901
1902 #ifdef OS_WIN
1903 _setmode(fileno(source), _O_BINARY);
1904 #endif
1905 }
1906 else if (fopenw_s(&file, pathname, "rb") != 0)
1907 {
1908 warning("cannot read", pathname);
1909
1910 return false;
1911 }
1912
1913 // --filter: fork process to filter file, when applicable
1914 if (!filter(file, pathname))
1915 return false;
1916
1917 #ifdef HAVE_LIBZ
1918 if (flag_decompress)
1919 {
1920 #ifdef WITH_DECOMPRESSION_THREAD
1921
1922 pipe_fd[0] = -1;
1923 pipe_fd[1] = -1;
1924
1925 FILE *pipe_in = NULL;
1926
1927 // open pipe between worker and decompression thread, then start decompression thread
1928 if (pipe(pipe_fd) == 0 && (pipe_in = fdopen(pipe_fd[0], "rb")) != NULL)
1929 {
1930 // create or open a new zstreambuf to (re)start the decompression thread
1931 if (zstream == NULL)
1932 zstream = new zstreambuf(pathname, file);
1933 else
1934 zstream->open(pathname, file);
1935
1936 if (thread.joinable())
1937 {
1938 pipe_zstrm.notify_one();
1939 }
1940 else
1941 {
1942 try
1943 {
1944 thread_end = false;
1945 extracting = false;
1946 waiting = false;
1947
1948 thread = std::thread(&Grep::decompress, this);
1949 }
1950
1951 catch (std::system_error&)
1952 {
1953 fclose(pipe_in);
1954 close(pipe_fd[1]);
1955 pipe_fd[0] = -1;
1956 pipe_fd[1] = -1;
1957
1958 warning("cannot create thread to decompress", pathname);
1959
1960 return false;
1961 }
1962 }
1963 }
1964 else
1965 {
1966 if (pipe_fd[0] != -1)
1967 {
1968 close(pipe_fd[0]);
1969 close(pipe_fd[1]);
1970 pipe_fd[0] = -1;
1971 pipe_fd[1] = -1;
1972 }
1973
1974 warning("cannot create pipe to decompress", pathname);
1975
1976 return false;
1977 }
1978
1979 input = reflex::Input(pipe_in, flag_encoding_type);
1980
1981 #else
1982
1983 // create or open a new zstreambuf
1984 if (zstream == NULL)
1985 zstream = new zstreambuf(pathname, file);
1986 else
1987 zstream->open(pathname, file);
1988
1989 if (stream != NULL)
1990 delete stream;
1991
1992 stream = new std::istream(zstream);
1993
1994 input = stream;
1995
1996 #endif
1997 }
1998 else
1999 #endif
2000 {
2001 input = reflex::Input(file, flag_encoding_type);
2002 }
2003
2004 return true;
2005 }
2006
2007 // return true on success, create a pipe to replace file input if filtering files in a forked process
filterGrep2008 bool filter(FILE *& in, const char *pathname)
2009 {
2010 #ifndef OS_WIN
2011
2012 // --filter
2013 if (flag_filter != NULL && in != NULL)
2014 {
2015 const char *basename = strrchr(pathname, PATHSEPCHR);
2016 if (basename == NULL)
2017 basename = pathname;
2018 else
2019 ++basename;
2020
2021 // get the basenames's extension suffix
2022 const char *suffix = strrchr(basename, '.');
2023
2024 // don't consider . at the front of basename, otherwise skip .
2025 if (suffix == basename)
2026 suffix = NULL;
2027 else if (suffix != NULL)
2028 ++suffix;
2029
2030 // --filter-magic-label: if the file is seekable, then check for a magic pattern match
2031 if (!flag_filter_magic_label.empty() && fseek(in, 0, SEEK_CUR) == 0)
2032 {
2033 bool is_plus = false;
2034
2035 // --filter-magic-label: check for overriding +
2036 if (suffix != NULL)
2037 {
2038 for (const auto& i : flag_filter_magic_label)
2039 {
2040 if (i.front() == '+')
2041 {
2042 is_plus = true;
2043
2044 break;
2045 }
2046 }
2047 }
2048
2049 // --filter-magic-label: if the basename has no suffix or a +LABEL + then check magic bytes
2050 if (suffix == NULL || is_plus)
2051 {
2052 // create a matcher to match the magic pattern
2053 size_t match = reflex::Matcher(filter_magic_pattern, in).scan();
2054
2055 // rewind the input after scan
2056 rewind(in);
2057
2058 if (match > 0 && match <= flag_filter_magic_label.size())
2059 {
2060 suffix = flag_filter_magic_label[match - 1].c_str();
2061
2062 if (*suffix == '+')
2063 ++suffix;
2064 }
2065 }
2066 }
2067
2068 // basenames without a suffix get "*" as a suffix
2069 if (suffix == NULL || *suffix == '\0')
2070 suffix = "*";
2071
2072 size_t sep = strlen(suffix);
2073
2074 const char *command = flag_filter;
2075 const char *default_command = NULL;
2076
2077 // find the command corresponding to the suffix
2078 while (true)
2079 {
2080 while (isspace(*command))
2081 ++command;
2082
2083 if (*command == '*')
2084 default_command = strchr(command, ':');
2085
2086 if (strncmp(suffix, command, sep) == 0 && (command[sep] == ':' || command[sep] == ',' || isspace(command[sep])))
2087 {
2088 command = strchr(command, ':');
2089 break;
2090 }
2091
2092 command = strchr(command, ',');
2093 if (command == NULL)
2094 break;
2095
2096 ++command;
2097 }
2098
2099 // if no matching command, use the *:command if specified
2100 if (command == NULL)
2101 command = default_command;
2102
2103 // suffix has a command to execute
2104 if (command != NULL)
2105 {
2106 // skip over the ':'
2107 ++command;
2108
2109 int fd[2];
2110
2111 if (pipe(fd) == 0)
2112 {
2113 int pid;
2114
2115 if ((pid = fork()) == 0)
2116 {
2117 // child process
2118
2119 // close the reading end of the pipe
2120 close(fd[0]);
2121
2122 // dup the input file to stdin unless reading stdin
2123 if (in != stdin)
2124 {
2125 dup2(fileno(in), STDIN_FILENO);
2126 fclose(in);
2127 }
2128
2129 // dup the writing end of the pipe to stdout
2130 dup2(fd[1], STDOUT_FILENO);
2131 close(fd[1]);
2132
2133 // populate argv[] with the command and its arguments, thereby destroying flag_filter
2134 std::vector<const char*> args;
2135
2136 char *arg = const_cast<char*>(command);
2137
2138 while (*arg != '\0' && *arg != ',')
2139 {
2140 while (isspace(*arg))
2141 ++arg;
2142
2143 char *p = arg;
2144
2145 while (*p != '\0' && *p != ',' && !isspace(*p))
2146 ++p;
2147
2148 if (p > arg)
2149 {
2150 if (p - arg == 1 && *arg == '%')
2151 args.push_back(in == stdin ? "-" : pathname);
2152 else
2153 args.push_back(arg);
2154 }
2155
2156 if (*p == '\0')
2157 break;
2158
2159 if (*p == ',')
2160 {
2161 *p = '\0';
2162 break;
2163 }
2164
2165 *p = '\0';
2166
2167 arg = p + 1;
2168 }
2169
2170 // silently bail out if there is no command
2171 if (args.empty())
2172 exit(EXIT_SUCCESS);
2173
2174 // add sentinel
2175 args.push_back(NULL);
2176
2177 // get argv[] array data
2178 char * const *argv = const_cast<char * const *>(args.data());
2179
2180 // execute
2181 execvp(argv[0], argv);
2182
2183 error("--filter: cannot exec", argv[0]);
2184 }
2185
2186 // close the writing end of the pipe
2187 close(fd[1]);
2188
2189 // close the file and use the reading end of the pipe
2190 if (in != stdin)
2191 fclose(in);
2192 in = fdopen(fd[0], "r");
2193 }
2194 else
2195 {
2196 if (in != stdin)
2197 fclose(in);
2198 in = NULL;
2199
2200 warning("--filter: cannot create pipe", flag_filter);
2201
2202 return false;
2203 }
2204 }
2205 }
2206
2207 #endif
2208
2209 return true;
2210 }
2211
2212 #ifdef HAVE_LIBZ
2213 #ifdef WITH_DECOMPRESSION_THREAD
2214
2215 // decompression thread
decompressGrep2216 void decompress()
2217 {
2218 while (!thread_end)
2219 {
2220 // use the zstreambuf internal buffer to hold decompressed data
2221 unsigned char *buf;
2222 size_t maxlen;
2223 zstream->get_buffer(buf, maxlen);
2224
2225 // to hold the path (prefix + name) extracted from the zip file
2226 std::string path;
2227
2228 // reset flags
2229 extracting = false;
2230 waiting = false;
2231
2232 // extract the parts of a zip file, one by one, if zip file detected
2233 while (!thread_end)
2234 {
2235 // a regular file, may be reset when unzipping a directory
2236 bool is_regular = true;
2237
2238 const zstreambuf::ZipInfo *zipinfo = zstream->zipinfo();
2239
2240 if (zipinfo != NULL)
2241 {
2242 // extracting a zip file
2243 extracting = true;
2244
2245 if (!zipinfo->name.empty() && zipinfo->name.back() == '/')
2246 {
2247 // skip zip directories
2248 is_regular = false;
2249 }
2250 else
2251 {
2252 path.assign(zipinfo->name);
2253
2254 // produce headers with zip file pathnames for each archived part (Grep::partname)
2255 if (!flag_no_filename)
2256 flag_no_header = false;
2257 }
2258 }
2259
2260 // decompress a block of data into the buffer
2261 std::streamsize len = zstream->decompress(buf, maxlen);
2262 if (len < 0)
2263 break;
2264
2265 bool is_selected = true;
2266
2267 if (!filter_tar(*zstream, path, buf, maxlen, len) && !filter_cpio(*zstream, path, buf, maxlen, len))
2268 {
2269 // not a tar/cpio file, decompress the data into pipe, if not unzipping or if zipped file meets selection criteria
2270 is_selected = is_regular && (zipinfo == NULL || select_matching(path.c_str(), buf, static_cast<size_t>(len), true));
2271
2272 if (is_selected)
2273 {
2274 // if pipe is closed, then reopen it
2275 if (pipe_fd[1] == -1)
2276 {
2277 // signal close and wait until the main grep thread created a new pipe in close_file()
2278 std::unique_lock<std::mutex> lock(pipe_mutex);
2279 pipe_close.notify_one();
2280 waiting = true;
2281 pipe_ready.wait(lock);
2282 waiting = false;
2283 lock.unlock();
2284
2285 // failed to create a pipe in close_file()
2286 if (pipe_fd[1] == -1)
2287 break;
2288 }
2289
2290 // assign the Grep::partname (synchronized on pipe_mutex and pipe), before sending to the (new) pipe
2291 partname.swap(path);
2292 }
2293
2294 // push decompressed data into pipe
2295 while (len > 0)
2296 {
2297 // write buffer data to the pipe, if the pipe is broken then the receiver is waiting for this thread to join
2298 if (is_selected && write(pipe_fd[1], buf, static_cast<size_t>(len)) < len)
2299 break;
2300
2301 // decompress the next block of data into the buffer
2302 len = zstream->decompress(buf, maxlen);
2303 }
2304 }
2305
2306 // break if not unzipping or if no more files to unzip
2307 if (zstream->zipinfo() == NULL)
2308 break;
2309
2310 // extracting a zip file
2311 extracting = true;
2312
2313 // after unzipping the selected zip file, close our end of the pipe and loop for the next file
2314 if (is_selected && pipe_fd[1] != -1)
2315 {
2316 close(pipe_fd[1]);
2317 pipe_fd[1] = -1;
2318 }
2319 }
2320
2321 extracting = false;
2322
2323 if (pipe_fd[1] != -1)
2324 {
2325 // close our end of the pipe
2326 close(pipe_fd[1]);
2327 pipe_fd[1] = -1;
2328 }
2329
2330 if (!thread_end)
2331 {
2332 // wait until a new zstream is ready
2333 std::unique_lock<std::mutex> lock(pipe_mutex);
2334 pipe_close.notify_one();
2335 waiting = true;
2336 pipe_zstrm.wait(lock);
2337 waiting = false;
2338 lock.unlock();
2339 }
2340 }
2341 }
2342
2343 // if tar file, extract regular file contents and push into pipes one by one, return true when done
filter_tarGrep2344 bool filter_tar(zstreambuf& zstream, const std::string& partprefix, unsigned char *buf, size_t maxlen, std::streamsize len)
2345 {
2346 const int BLOCKSIZE = 512;
2347
2348 if (len > BLOCKSIZE)
2349 {
2350 // v7 and ustar formats
2351 const char ustar_magic[8] = { 'u', 's', 't', 'a', 'r', 0, '0', '0' };
2352
2353 // gnu and oldgnu formats
2354 const char gnutar_magic[8] = { 'u', 's', 't', 'a', 'r', ' ', ' ', 0 };
2355
2356 // is this a tar archive?
2357 if (*buf != '\0' && (memcmp(buf + 257, ustar_magic, 8) == 0 || memcmp(buf + 257, gnutar_magic, 8) == 0))
2358 {
2359 // produce headers with tar file pathnames for each archived part (Grep::partname)
2360 if (!flag_no_filename)
2361 flag_no_header = false;
2362
2363 // inform the main grep thread we are extracting an archive
2364 extracting = true;
2365
2366 // to hold the path (prefix + name) extracted from the header
2367 std::string path;
2368
2369 // to hold long path extracted from the previous header block that is marked with typeflag 'x' or 'L'
2370 std::string long_path;
2371
2372 while (true)
2373 {
2374 // extract tar header fields (name and prefix strings are not \0-terminated!!)
2375 const char *name = reinterpret_cast<const char*>(buf);
2376 const char *prefix = reinterpret_cast<const char*>(buf + 345);
2377 size_t size = strtoul(reinterpret_cast<const char*>(buf + 124), NULL, 8);
2378 int padding = (BLOCKSIZE - size % BLOCKSIZE) % BLOCKSIZE;
2379 unsigned char typeflag = buf[156];
2380
2381 // header types
2382 bool is_regular = typeflag == '0' || typeflag == '\0';
2383 bool is_xhd = typeflag == 'x';
2384 bool is_extended = typeflag == 'L';
2385
2386 // assign the (long) tar pathname
2387 path.clear();
2388 if (long_path.empty())
2389 {
2390 if (*prefix != '\0')
2391 {
2392 if (prefix[154] == '\0')
2393 path.assign(prefix);
2394 else
2395 path.assign(prefix, 155);
2396 path.push_back('/');
2397 }
2398 if (name[99] == '\0')
2399 path.append(name);
2400 else
2401 path.append(name, 100);
2402 }
2403 else
2404 {
2405 path.swap(long_path);
2406 }
2407
2408 // remove header to advance to the body
2409 len -= BLOCKSIZE;
2410 memmove(buf, buf + BLOCKSIZE, static_cast<size_t>(len));
2411
2412 // check if archived file meets selection criteria
2413 size_t minlen = std::min(static_cast<size_t>(len), size);
2414 bool is_selected = select_matching(path.c_str(), buf, minlen, is_regular);
2415
2416 // if extended headers are present
2417 if (is_xhd)
2418 {
2419 // typeflag 'x': extract the long path from the pax extended header block in the body
2420 const char *b = reinterpret_cast<const char*>(buf);
2421 const char *e = b + minlen;
2422 const char *t = "path=";
2423 const char *s = std::search(b, e, t, t + 5);
2424 if (s != NULL)
2425 {
2426 e = static_cast<const char*>(memchr(s, '\n', e - s));
2427 if (e != NULL)
2428 long_path.assign(s + 5, e - s - 5);
2429 }
2430 }
2431 else if (is_extended)
2432 {
2433 // typeflag 'L': get long name from the body
2434 long_path.assign(reinterpret_cast<const char*>(buf), minlen);
2435 }
2436
2437 // if the pipe is closed, then get a new pipe to search the next part in the archive
2438 if (is_selected && pipe_fd[1] == -1)
2439 {
2440 // signal close and wait until the main grep thread created a new pipe in close_file()
2441 std::unique_lock<std::mutex> lock(pipe_mutex);
2442 pipe_close.notify_one();
2443 waiting = true;
2444 pipe_ready.wait(lock);
2445 waiting = false;
2446 lock.unlock();
2447
2448 // failed to create a pipe in close_file()
2449 if (pipe_fd[1] == -1)
2450 break;
2451 }
2452
2453 // assign the Grep::partname (synchronized on pipe_mutex and pipe), before sending to the (new) pipe
2454 if (is_selected)
2455 {
2456 if (!partprefix.empty())
2457 partname.assign(partprefix).append(":").append(path);
2458 else
2459 partname.swap(path);
2460 }
2461
2462 // it is ok to push the body into the pipe for the main thread to search
2463 bool ok = is_selected;
2464
2465 while (len > 0)
2466 {
2467 size_t len_out = std::min(static_cast<size_t>(len), size);
2468
2469 if (ok)
2470 {
2471 // write decompressed data to the pipe, if the pipe is broken then stop pushing more data into this pipe
2472 if (write(pipe_fd[1], buf, len_out) < static_cast<ssize_t>(len_out))
2473 ok = false;
2474 }
2475
2476 size -= len_out;
2477
2478 // reached the end of the tar body?
2479 if (size == 0)
2480 {
2481 len -= len_out;
2482 memmove(buf, buf + len_out, static_cast<size_t>(len));
2483
2484 break;
2485 }
2486
2487 // decompress the next block of data into the buffer
2488 len = zstream.decompress(buf, maxlen);
2489 }
2490
2491 // error?
2492 if (len < 0)
2493 break;
2494
2495 // fill the rest of the buffer with decompressed data
2496 if (static_cast<size_t>(len) < maxlen)
2497 {
2498 std::streamsize len_in = zstream.decompress(buf + len, maxlen - static_cast<size_t>(len));
2499
2500 // error?
2501 if (len_in < 0)
2502 break;
2503
2504 len += len_in;
2505 }
2506
2507 // skip padding
2508 if (len > padding)
2509 {
2510 len -= padding;
2511 memmove(buf, buf + padding, static_cast<size_t>(len));
2512 }
2513
2514 // rest of the file is too short, something is wrong
2515 if (len <= BLOCKSIZE)
2516 break;
2517
2518 // no more parts to extract?
2519 if (*buf == '\0' || (memcmp(buf + 257, ustar_magic, 8) != 0 && memcmp(buf + 257, gnutar_magic, 8) != 0))
2520 break;
2521
2522 // get a new pipe to search the next part in the archive, if the previous part was a regular file
2523 if (is_selected)
2524 {
2525 // close our end of the pipe
2526 close(pipe_fd[1]);
2527 pipe_fd[1] = -1;
2528 }
2529 }
2530
2531 // done extracting the tar file
2532 return true;
2533 }
2534 }
2535
2536 // not a tar file
2537 return false;
2538 }
2539
2540 // if cpio file, extract regular file contents and push into pipes one by one, return true when done
filter_cpioGrep2541 bool filter_cpio(zstreambuf& zstream, const std::string& partprefix, unsigned char *buf, size_t maxlen, std::streamsize len)
2542 {
2543 const int HEADERSIZE = 110;
2544
2545 if (len > HEADERSIZE)
2546 {
2547 // cpio odc format
2548 const char odc_magic[6] = { '0', '7', '0', '7', '0', '7' };
2549
2550 // cpio newc format
2551 const char newc_magic[6] = { '0', '7', '0', '7', '0', '1' };
2552
2553 // cpio newc+crc format
2554 const char newc_crc_magic[6] = { '0', '7', '0', '7', '0', '2' };
2555
2556 // is this a cpio archive?
2557 if (memcmp(buf, odc_magic, 6) == 0 || memcmp(buf, newc_magic, 6) == 0 || memcmp(buf, newc_crc_magic, 6) == 0)
2558 {
2559 // produce headers with cpio file pathnames for each archived part (Grep::partname)
2560 if (!flag_no_filename)
2561 flag_no_header = false;
2562
2563 // inform the main grep thread we are extracting an archive
2564 extracting = true;
2565
2566 // to hold the path (prefix + name) extracted from the header
2567 std::string path;
2568
2569 // need a new pipe, close current pipe first to create a new pipe
2570 bool in_progress = false;
2571
2572 while (true)
2573 {
2574 // true if odc format, false if newc format
2575 bool is_odc = buf[5] == '7';
2576
2577 // odc header length is 76, newc header length is 110
2578 int header_len = is_odc ? 76 : 110;
2579
2580 char tmp[16];
2581 char *rest;
2582
2583 // get the namesize
2584 size_t namesize;
2585 if (is_odc)
2586 {
2587 memcpy(tmp, buf + 59, 6);
2588 tmp[6] = '\0';
2589 namesize = strtoul(tmp, &rest, 8);
2590 }
2591 else
2592 {
2593 memcpy(tmp, buf + 94, 8);
2594 tmp[8] = '\0';
2595 namesize = strtoul(tmp, &rest, 16);
2596 }
2597
2598 // if not a valid mode value, then something is wrong
2599 if (rest == NULL || *rest != '\0')
2600 {
2601 // data was read, stop reading more
2602 if (in_progress)
2603 break;
2604
2605 // assume this is not a cpio file and return false
2606 return false;
2607 }
2608
2609 // pathnames with trailing \0 cannot be empty or too large
2610 if (namesize <= 1 || namesize >= 65536)
2611 break;
2612
2613 // get the filesize
2614 size_t filesize;
2615 if (is_odc)
2616 {
2617 memcpy(tmp, buf + 65, 11);
2618 tmp[11] = '\0';
2619 filesize = strtoul(tmp, &rest, 8);
2620 }
2621 else
2622 {
2623 memcpy(tmp, buf + 54, 8);
2624 tmp[8] = '\0';
2625 filesize = strtoul(tmp, &rest, 16);
2626 }
2627
2628 // if not a valid mode value, then something is wrong
2629 if (rest == NULL || *rest != '\0')
2630 {
2631 // data was read, stop reading more
2632 if (in_progress)
2633 break;
2634
2635 // assume this is not a cpio file and return false
2636 return false;
2637 }
2638
2639 // true if this is a regular file when (mode & 0170000) == 0100000
2640 bool is_regular;
2641 if (is_odc)
2642 {
2643 memcpy(tmp, buf + 18, 6);
2644 tmp[6] = '\0';
2645 is_regular = (strtoul(tmp, &rest, 8) & 0170000) == 0100000;
2646 }
2647 else
2648 {
2649 memcpy(tmp, buf + 14, 8);
2650 tmp[8] = '\0';
2651 is_regular = (strtoul(tmp, &rest, 16) & 0170000) == 0100000;
2652 }
2653
2654 // if not a valid mode value, then something is wrong
2655 if (rest == NULL || *rest != '\0')
2656 {
2657 // data was read, stop reading more
2658 if (in_progress)
2659 break;
2660
2661 // assume this is not a cpio file and return false
2662 return false;
2663 }
2664
2665 // remove header to advance to the body
2666 len -= header_len;
2667 memmove(buf, buf + header_len, static_cast<size_t>(len));
2668
2669 // assign the cpio pathname
2670 path.clear();
2671
2672 size_t size = namesize;
2673
2674 while (len > 0)
2675 {
2676 size_t n = std::min(static_cast<size_t>(len), size);
2677 char *b = reinterpret_cast<char*>(buf);
2678
2679 path.append(b, n);
2680 size -= n;
2681
2682 if (size == 0)
2683 {
2684 // remove pathname to advance to the body
2685 len -= n;
2686 memmove(buf, buf + n, static_cast<size_t>(len));
2687
2688 break;
2689 }
2690
2691 // decompress the next block of data into the buffer
2692 len = zstream.decompress(buf, maxlen);
2693 }
2694
2695 // error?
2696 if (len < 0)
2697 break;
2698
2699 // remove trailing \0
2700 if (path.back() == '\0')
2701 path.pop_back();
2702
2703 // reached the end of the cpio archive?
2704 if (path == "TRAILER!!!")
2705 break;
2706
2707 // fill the rest of the buffer with decompressed data
2708 if (static_cast<size_t>(len) < maxlen)
2709 {
2710 std::streamsize len_in = zstream.decompress(buf + len, maxlen - static_cast<size_t>(len));
2711
2712 // error?
2713 if (len_in < 0)
2714 break;
2715
2716 len += len_in;
2717 }
2718
2719 // skip newc format \0 padding after the pathname
2720 if (!is_odc && len > 3)
2721 {
2722 size_t n = 4 - (110 + namesize) % 4;
2723 len -= n;
2724 memmove(buf, buf + n, static_cast<size_t>(len));
2725 }
2726
2727 // check if archived file meets selection criteria
2728 size_t minlen = std::min(static_cast<size_t>(len), filesize);
2729 bool is_selected = select_matching(path.c_str(), buf, minlen, is_regular);
2730
2731 // if the pipe is closed, then get a new pipe to search the next part in the archive
2732 if (is_selected && pipe_fd[1] == -1)
2733 {
2734 // signal close and wait until the main grep thread created a new pipe in close_file()
2735 std::unique_lock<std::mutex> lock(pipe_mutex);
2736 pipe_close.notify_one();
2737 waiting = true;
2738 pipe_ready.wait(lock);
2739 waiting = false;
2740 lock.unlock();
2741
2742 // failed to create a pipe in close_file()
2743 if (pipe_fd[1] == -1)
2744 break;
2745 }
2746
2747 // assign the Grep::partname (synchronized on pipe_mutex and pipe), before sending to the (new) pipe
2748 if (is_selected)
2749 {
2750 if (!partprefix.empty())
2751 partname.assign(partprefix).append(":").append(path);
2752 else
2753 partname.swap(path);
2754 }
2755
2756 // it is ok to push the body into the pipe for the main thread to search
2757 bool ok = is_selected;
2758
2759 size = filesize;
2760
2761 while (len > 0)
2762 {
2763 size_t len_out = std::min(static_cast<size_t>(len), size);
2764
2765 if (ok)
2766 {
2767 // write decompressed data to the pipe, if the pipe is broken then stop pushing more data into this pipe
2768 if (write(pipe_fd[1], buf, len_out) < static_cast<ssize_t>(len_out))
2769 ok = false;
2770 }
2771
2772 size -= len_out;
2773
2774 // reached the end of the cpio body?
2775 if (size == 0)
2776 {
2777 len -= len_out;
2778 memmove(buf, buf + len_out, static_cast<size_t>(len));
2779
2780 break;
2781 }
2782
2783 // decompress the next block of data into the buffer
2784 len = zstream.decompress(buf, maxlen);
2785 }
2786
2787 // error?
2788 if (len < 0)
2789 break;
2790
2791 if (static_cast<size_t>(len) < maxlen)
2792 {
2793 // fill the rest of the buffer with decompressed data
2794 std::streamsize len_in = zstream.decompress(buf + len, maxlen - static_cast<size_t>(len));
2795
2796 // error?
2797 if (len_in < 0)
2798 break;
2799
2800 len += len_in;
2801 }
2802
2803 // skip newc format \0 padding
2804 if (!is_odc && len > 2)
2805 {
2806 size_t n = (4 - filesize % 4) % 4;
2807 len -= n;
2808 memmove(buf, buf + n, static_cast<size_t>(len));
2809 }
2810
2811 // rest of the file is too short, something is wrong
2812 if (len <= HEADERSIZE)
2813 break;
2814
2815 // quit if this is not valid cpio header magic
2816 if (memcmp(buf, odc_magic, 6) != 0 && memcmp(buf, newc_magic, 6) != 0 && memcmp(buf, newc_crc_magic, 6) != 0)
2817 break;
2818
2819 // get a new pipe to search the next part in the archive, if the previous part was a regular file
2820 if (is_selected)
2821 {
2822 // close our end of the pipe
2823 close(pipe_fd[1]);
2824 pipe_fd[1] = -1;
2825
2826 in_progress = true;
2827 }
2828 }
2829
2830 // done extracting the cpio file
2831 return true;
2832 }
2833 }
2834
2835 // not a cpio file
2836 return false;
2837 }
2838
2839 // true if path matches search constraints or buf contains magic bytes
select_matchingGrep2840 bool select_matching(const char *path, const unsigned char *buf, size_t len, bool is_regular)
2841 {
2842 bool is_selected = is_regular;
2843
2844 if (is_selected)
2845 {
2846 const char *basename = strrchr(path, '/');
2847 if (basename == NULL)
2848 basename = path;
2849 else
2850 ++basename;
2851
2852 if (*basename == '.' && !flag_hidden)
2853 return false;
2854
2855 // -O, -t, and -g (--include and --exclude): check if pathname or basename matches globs, is_selected = false if not
2856 if (!flag_all_exclude.empty() || !flag_all_include.empty())
2857 {
2858 // exclude files whose basename matches any one of the --exclude globs
2859 for (const auto& glob : flag_all_exclude)
2860 if (!(is_selected = !glob_match(path, basename, glob.c_str())))
2861 break;
2862
2863 // include only if not excluded
2864 if (is_selected)
2865 {
2866 // include files whose basename matches any one of the --include globs
2867 for (const auto& glob : flag_all_include)
2868 if ((is_selected = glob_match(path, basename, glob.c_str())))
2869 break;
2870 }
2871 }
2872
2873 // -M: check magic bytes, requires sufficiently large len of buf[] to match patterns, which is fine when Z_BUF_LEN is large e.g. 64K
2874 if (buf != NULL && !flag_file_magic.empty() && (flag_all_include.empty() || !is_selected))
2875 {
2876 // create a matcher to match the magic pattern, we cannot use magic_matcher because it is not thread safe
2877 reflex::Matcher magic(magic_pattern);
2878 magic.buffer(const_cast<char*>(reinterpret_cast<const char*>(buf)), len + 1);
2879 size_t match = magic.scan();
2880 is_selected = match == flag_not_magic || match >= flag_min_magic;
2881 }
2882 }
2883
2884 return is_selected;
2885 }
2886
2887 #endif
2888 #endif
2889
2890 // close the file and clear input, return true if next file is extracted from an archive to search
close_fileGrep2891 bool close_file(const char *pathname)
2892 {
2893 (void)pathname; // appease -Wunused-parameter
2894
2895 #ifdef HAVE_LIBZ
2896
2897 #ifdef WITH_DECOMPRESSION_THREAD
2898
2899 if (flag_decompress && pipe_fd[0] != -1)
2900 {
2901 // close the FILE* and its underlying pipe created with pipe() and fdopen()
2902 if (input.file() != NULL)
2903 {
2904 fclose(input.file());
2905 input = static_cast<FILE*>(NULL);
2906 }
2907
2908 // our end of the pipe is now closed
2909 pipe_fd[0] = -1;
2910
2911 // if extracting and the decompression filter thread is not yet waiting, then wait until the other end closed the pipe
2912 std::unique_lock<std::mutex> lock(pipe_mutex);
2913 if (!waiting)
2914 pipe_close.wait(lock);
2915 lock.unlock();
2916
2917 // extract the next file from the archive when applicable, e.g. zip format
2918 if (extracting)
2919 {
2920 // output is not blocked or cancelled
2921 if (!out.eof && !out.cancelled())
2922 {
2923 FILE *pipe_in = NULL;
2924
2925 // open pipe between worker and decompression thread, then start decompression thread
2926 if (pipe(pipe_fd) == 0 && (pipe_in = fdopen(pipe_fd[0], "rb")) != NULL)
2927 {
2928 // notify the decompression filter thread of the new pipe
2929 pipe_ready.notify_one();
2930
2931 input = reflex::Input(pipe_in, flag_encoding_type);
2932
2933 // loop back in search() to start searching the next file in the archive
2934 return true;
2935 }
2936
2937 // failed to create a new pipe
2938 warning("cannot open decompression pipe while reading", pathname);
2939
2940 if (pipe_fd[0] != -1)
2941 {
2942 close(pipe_fd[0]);
2943 close(pipe_fd[1]);
2944 }
2945 }
2946
2947 pipe_fd[0] = -1;
2948 pipe_fd[1] = -1;
2949
2950 // notify the decompression thread filter_tar/filter_cpio
2951 pipe_ready.notify_one();
2952 }
2953 }
2954
2955 #endif
2956
2957 if (stream != NULL)
2958 {
2959 delete stream;
2960 stream = NULL;
2961 }
2962
2963 #endif
2964
2965 #ifdef WITH_STDIN_DRAIN
2966 // drain stdin until eof
2967 if (file == stdin && !feof(stdin))
2968 {
2969 if (fseek(stdin, 0, SEEK_END) != 0)
2970 {
2971 char buf[16384];
2972 while (true)
2973 {
2974 size_t r = fread(buf, 1, sizeof(buf), stdin);
2975 if (r == sizeof(buf))
2976 continue;
2977 if (feof(stdin))
2978 break;
2979 if (r >= 0)
2980 {
2981 if (!(fcntl(0, F_GETFL) & O_NONBLOCK))
2982 break;
2983 struct timeval tv;
2984 fd_set rfds, efds;
2985 FD_ZERO(&rfds);
2986 FD_ZERO(&efds);
2987 FD_SET(0, &rfds);
2988 FD_SET(0, &efds);
2989 tv.tv_sec = 1;
2990 tv.tv_usec = 0;
2991 int r = ::select(1, &rfds, NULL, &efds, &tv);
2992 if (r < 0 && errno != EINTR)
2993 break;
2994 if (r > 0 && FD_ISSET(0, &efds))
2995 break;
2996 }
2997 else if (errno != EINTR)
2998 {
2999 break;
3000 }
3001 }
3002 }
3003 }
3004 #endif
3005
3006 // close the file
3007 if (file != NULL && file != stdin && file != source)
3008 {
3009 fclose(file);
3010 file = NULL;
3011 }
3012
3013 input.clear();
3014
3015 return false;
3016 }
3017
3018 // specify input to read for matcher, when input is a regular file then try mmap for zero copy overhead
init_readGrep3019 bool init_read()
3020 {
3021 const char *base;
3022 size_t size;
3023
3024 // attempt to mmap the input file
3025 if (mmap.file(input, base, size))
3026 {
3027 // matcher reads directly from protected mmap memory (cast is safe: base[0..size] is not modified!)
3028 matcher->buffer(const_cast<char*>(base), size + 1);
3029 }
3030 else
3031 {
3032 matcher->input(input);
3033
3034 #if !defined(HAVE_PCRE2) && defined(HAVE_BOOST_REGEX)
3035 // buffer all input to work around Boost.Regex partial matching bug, but this may throw std::bad_alloc if the file is too large
3036 if (flag_perl_regexp)
3037 matcher->buffer();
3038 #endif
3039
3040 #ifndef OS_WIN
3041 if (input == stdin)
3042 {
3043 struct stat buf;
3044 bool interactive = fstat(0, &buf) == 0 && (S_ISCHR(buf.st_mode) || S_ISFIFO(buf.st_mode));
3045
3046 // if input is a TTY or pipe, then make stdin nonblocking and register a stdin handler to continue reading and to flush results to output
3047 if (interactive)
3048 {
3049 fcntl(0, F_SETFL, fcntl(0, F_GETFL) | O_NONBLOCK);
3050 matcher->in.set_handler(&stdin_handler);
3051 }
3052 }
3053 #endif
3054 }
3055
3056 // -I: do not match binary
3057 if (flag_binary_without_match && init_is_binary())
3058 return false;
3059
3060 // --range=NUM1[,NUM2]: start searching at line NUM1
3061 for (size_t i = flag_min_line; i > 1; --i)
3062 if (!matcher->skip('\n'))
3063 break;
3064
3065 return true;
3066 }
3067
3068 // after opening a file with init_read, check if it is binary
init_is_binaryGrep3069 bool init_is_binary()
3070 {
3071 // limit checking to first buffer filled with input up to 16K, which should suffice, to improve performance
3072 size_t avail = matcher->avail();
3073 return is_binary(matcher->begin(), avail < 16384 ? avail : 16384);
3074 }
3075
3076 const char *filename; // the name of the file being searched
3077 std::string partname; // the name of an extracted file from an archive
3078 std::string restline; // a buffer to store the rest of a line to search
3079 Output out; // asynchronous output
3080 reflex::AbstractMatcher *matcher; // the pattern matcher we're using, never NULL
3081 Matchers *matchers; // the CNF of AND/OR/NOT matchers or NULL
3082 std::vector<bool> matching; // bitmap to keep track of globally matching CNF terms
3083 std::vector<std::vector<bool>> notmatching; // bitmap to keep track of globally matching OR NOT CNF terms
3084 MMap mmap; // mmap state
3085 reflex::Input input; // input to the matcher
3086 FILE *file; // the current input file
3087 #ifndef OS_WIN
3088 StdInHandler stdin_handler; // a handler to handle non-blocking stdin from a TTY or a slow pipe
3089 #endif
3090 #ifdef HAVE_LIBZ
3091 zstreambuf *zstream; // the decompressed stream from the current input file
3092 std::istream *stream; // input stream layered on the decompressed stream
3093 #ifdef WITH_DECOMPRESSION_THREAD
3094 std::thread thread; // decompression thread
3095 std::atomic_bool thread_end; // true if decompression thread should terminate
3096 int pipe_fd[2]; // decompressed stream pipe
3097 std::mutex pipe_mutex; // mutex to extract files in thread
3098 std::condition_variable pipe_zstrm; // cv to control new pipe creation
3099 std::condition_variable pipe_ready; // cv to control new pipe creation
3100 std::condition_variable pipe_close; // cv to control new pipe creation
3101 volatile bool extracting; // true if extracting files from TAR or ZIP archive
3102 volatile bool waiting; // true if decompression thread is waiting
3103 #endif
3104 #endif
3105
3106 };
3107
3108 // a job in the job queue
3109 struct Job {
3110
3111 // sentinel job NONE
3112 static const size_t NONE = UNDEFINED_SIZE;
3113
JobJob3114 Job()
3115 :
3116 pathname(),
3117 slot(NONE)
3118 { }
3119
JobJob3120 Job(const char *pathname, size_t slot)
3121 :
3122 pathname(pathname),
3123 slot(slot)
3124 { }
3125
noneJob3126 bool none()
3127 {
3128 return slot == NONE;
3129 }
3130
3131 std::string pathname;
3132 size_t slot;
3133 };
3134
3135 struct GrepWorker;
3136
3137 // master submits jobs to workers and implements operations to support lock-free job stealing
3138 struct GrepMaster : public Grep {
3139
GrepMasterGrepMaster3140 GrepMaster(FILE *file, reflex::AbstractMatcher *matcher, Matchers *matchers)
3141 :
3142 Grep(file, matcher, matchers),
3143 sync(flag_sort_key == Sort::NA ? Output::Sync::Mode::UNORDERED : Output::Sync::Mode::ORDERED)
3144 {
3145 // master and workers synchronize their output
3146 out.sync_on(&sync);
3147
3148 // set global handle to be able to call cancel_ugrep()
3149 set_grep_handle(this);
3150
3151 start_workers();
3152
3153 iworker = workers.begin();
3154 }
3155
~GrepMasterGrepMaster3156 virtual ~GrepMaster()
3157 {
3158 stop_workers();
3159 clear_grep_handle();
3160 }
3161
3162 // clone the pattern matcher - the caller is responsible to deallocate the returned matcher
matcher_cloneGrepMaster3163 reflex::AbstractMatcher *matcher_clone() const
3164 {
3165 return matcher->clone();
3166 }
3167
3168 // clone the CNF of AND/OR/NOT matchers - the caller is responsible to deallocate the returned list of matchers if not NULL
matchers_cloneGrepMaster3169 Matchers *matchers_clone() const
3170 {
3171 if (matchers == NULL)
3172 return NULL;
3173
3174 auto *new_matchers = new Matchers;
3175
3176 for (const auto& i : *matchers)
3177 {
3178 new_matchers->emplace_back();
3179
3180 auto& last = new_matchers->back();
3181
3182 for (const auto& j : i)
3183 {
3184 if (j)
3185 last.emplace_back(j->clone());
3186 else
3187 last.emplace_back();
3188 }
3189 }
3190
3191 return new_matchers;
3192 }
3193
3194 // search a file by submitting it as a job to a worker
searchGrepMaster3195 void search(const char *pathname) override
3196 {
3197 submit(pathname);
3198 }
3199
3200 // start worker threads
3201 void start_workers();
3202
3203 // stop all workers
3204 void stop_workers();
3205
3206 // submit a job with a pathname to a worker, workers are visited round-robin
3207 void submit(const char *pathname);
3208
3209 // lock-free job stealing on behalf of a worker from a co-worker with at least --min-steal jobs still to do
3210 bool steal(GrepWorker *worker);
3211
3212 std::list<GrepWorker> workers; // workers running threads
3213 std::list<GrepWorker>::iterator iworker; // the next worker to submit a job to
3214 Output::Sync sync; // sync output of workers
3215
3216 };
3217
3218 // worker runs a thread to execute jobs submitted by the master
3219 struct GrepWorker : public Grep {
3220
GrepWorkerGrepWorker3221 GrepWorker(FILE *file, GrepMaster *master)
3222 :
3223 Grep(file, master->matcher_clone(), master->matchers_clone()),
3224 master(master),
3225 todo(0)
3226 {
3227 // all workers synchronize their output on the master's sync object
3228 out.sync_on(&master->sync);
3229
3230 // run worker thread executing jobs assigned to its queue
3231 thread = std::thread(&GrepWorker::execute, this);
3232 }
3233
~GrepWorkerGrepWorker3234 virtual ~GrepWorker()
3235 {
3236 // delete the cloned matcher
3237 delete matcher;
3238
3239 // delete the cloned matchers, if any
3240 if (matchers != NULL)
3241 delete matchers;
3242 }
3243
3244 // worker thread execution
3245 void execute();
3246
3247 // submit Job::NONE sentinel to this worker
submit_jobGrepWorker3248 void submit_job()
3249 {
3250 while (todo >= MAX_JOB_QUEUE_SIZE && !out.eof && !out.cancelled())
3251 std::this_thread::sleep_for(std::chrono::milliseconds(100)); // give the worker threads some slack
3252
3253 std::unique_lock<std::mutex> lock(queue_mutex);
3254
3255 jobs.emplace_back();
3256 ++todo;
3257
3258 queue_work.notify_one();
3259 }
3260
3261 // submit a job to this worker
submit_jobGrepWorker3262 void submit_job(const char *pathname, size_t slot)
3263 {
3264 while (todo >= MAX_JOB_QUEUE_SIZE && !out.eof && !out.cancelled())
3265 std::this_thread::sleep_for(std::chrono::milliseconds(100)); // give the worker threads some slack
3266
3267 std::unique_lock<std::mutex> lock(queue_mutex);
3268
3269 jobs.emplace_back(pathname, slot);
3270 ++todo;
3271
3272 queue_work.notify_one();
3273 }
3274
3275 // move a stolen job to this worker, maintaining job slot order
move_jobGrepWorker3276 void move_job(Job& job)
3277 {
3278 std::unique_lock<std::mutex> lock(queue_mutex);
3279
3280 bool inserted = false;
3281
3282 // insert job in the queue to maintain job order
3283 for (auto j = jobs.begin(); j != jobs.end(); ++j)
3284 {
3285 if (j->slot > job.slot)
3286 {
3287 jobs.insert(j, std::move(job));
3288 inserted = true;
3289 break;
3290 }
3291 }
3292
3293 if (!inserted)
3294 jobs.emplace_back(std::move(job));
3295
3296 ++todo;
3297
3298 queue_work.notify_one();
3299 }
3300
3301 // receive a job for this worker, wait until one arrives
next_jobGrepWorker3302 void next_job(Job& job)
3303 {
3304 std::unique_lock<std::mutex> lock(queue_mutex);
3305
3306 while (jobs.empty())
3307 queue_work.wait(lock);
3308
3309 job = jobs.front();
3310
3311 jobs.pop_front();
3312 --todo;
3313
3314 // if we popped a Job::NONE sentinel but the queue has some jobs, then move the sentinel to the back of the queue
3315 if (job.none() && !jobs.empty())
3316 {
3317 jobs.emplace_back();
3318 job = jobs.front();
3319 jobs.pop_front();
3320 }
3321 }
3322
3323 // steal a job from this worker, if at least --min-steal jobs to do, returns true if successful
steal_jobGrepWorker3324 bool steal_job(Job& job)
3325 {
3326 // not enough jobs in the queue to steal from
3327 if (todo < flag_min_steal)
3328 return false;
3329
3330 std::unique_lock<std::mutex> lock(queue_mutex);
3331
3332 if (jobs.empty())
3333 return false;
3334
3335 job = jobs.front();
3336
3337 // we cannot steal a Job::NONE sentinel
3338 if (job.none())
3339 return false;
3340
3341 jobs.pop_front();
3342 --todo;
3343
3344 return true;
3345 }
3346
3347 // submit Job::NONE sentinel to stop this worker
stopGrepWorker3348 void stop()
3349 {
3350 submit_job();
3351 }
3352
3353 std::thread thread; // thread of this worker, spawns GrepWorker::execute()
3354 GrepMaster *master; // the master of this worker
3355 std::mutex queue_mutex; // job queue mutex
3356 std::condition_variable queue_work; // cv to control the job queue
3357 std::deque<Job> jobs; // queue of pending jobs submitted to this worker
3358 std::atomic_size_t todo; // number of jobs in the queue, atomic for lock-free job stealing
3359
3360 };
3361
3362 // start worker threads
start_workers()3363 void GrepMaster::start_workers()
3364 {
3365 size_t num;
3366
3367 // create worker threads
3368 try
3369 {
3370 for (num = 0; num < threads; ++num)
3371 workers.emplace(workers.end(), out.file, this);
3372 }
3373
3374 // if sufficient resources are not available then reduce the number of threads to the number of active workers created
3375 catch (std::system_error& error)
3376 {
3377 if (error.code() != std::errc::resource_unavailable_try_again)
3378 throw;
3379
3380 threads = num;
3381 }
3382 }
3383
3384 // stop all workers
stop_workers()3385 void GrepMaster::stop_workers()
3386 {
3387 // submit Job::NONE sentinel to workers
3388 for (auto& worker : workers)
3389 worker.stop();
3390
3391 // wait for workers to join
3392 for (auto& worker : workers)
3393 worker.thread.join();
3394 }
3395
3396 // submit a job with a pathname to a worker, workers are visited round-robin
submit(const char * pathname)3397 void GrepMaster::submit(const char *pathname)
3398 {
3399 iworker->submit_job(pathname, sync.next++);
3400
3401 // around we go
3402 ++iworker;
3403 if (iworker == workers.end())
3404 iworker = workers.begin();
3405 }
3406
3407 // lock-free job stealing on behalf of a worker from a co-worker with at least --min-steal jobs still to do
steal(GrepWorker * worker)3408 bool GrepMaster::steal(GrepWorker *worker)
3409 {
3410 // pick a random co-worker using thread-safe std::chrono::high_resolution_clock as a simple RNG
3411 size_t n = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now().time_since_epoch()).count() % threads;
3412 auto iworker = workers.begin();
3413
3414 while (n > 0)
3415 {
3416 ++iworker;
3417 --n;
3418 }
3419
3420 // try to steal a job from the random co-worker or the next co-workers
3421 for (size_t i = 0; i < threads; ++i)
3422 {
3423 // around we go
3424 if (iworker == workers.end())
3425 iworker = workers.begin();
3426
3427 // if co-worker isn't this worker (no self-stealing!)
3428 if (&*iworker != worker)
3429 {
3430 Job job;
3431
3432 // if co-worker has at least --min-steal jobs then steal one for this worker
3433 if (iworker->steal_job(job))
3434 {
3435 worker->move_job(job);
3436
3437 return true;
3438 }
3439 }
3440
3441 // try next co-worker
3442 ++iworker;
3443 }
3444
3445 // couldn't steal any job
3446 return false;
3447 }
3448
3449 // execute worker thread
execute()3450 void GrepWorker::execute()
3451 {
3452 Job job;
3453
3454 while (!out.eof && !out.cancelled())
3455 {
3456 // wait for next job
3457 next_job(job);
3458
3459 // worker should stop?
3460 if (job.none())
3461 break;
3462
3463 // start synchronizing output for this job slot in ORDERED mode (--sort)
3464 out.begin(job.slot);
3465
3466 // search the file for this job
3467 search(job.pathname.c_str());
3468
3469 // end output in ORDERED mode (--sort) for this job slot
3470 out.end();
3471
3472 // if only one job is left to do, try stealing another job from a co-worker
3473 if (todo <= 1)
3474 master->steal(this);
3475 }
3476 }
3477
3478 // table of RE/flex file encodings for option --encoding (may be specified in any case)
3479 const Encoding encoding_table[] = {
3480 { "binary", reflex::Input::file_encoding::plain },
3481 { "ASCII", reflex::Input::file_encoding::utf8 },
3482 { "UTF-8", reflex::Input::file_encoding::utf8 },
3483 { "UTF-16", reflex::Input::file_encoding::utf16be },
3484 { "UTF-16BE", reflex::Input::file_encoding::utf16be },
3485 { "UTF-16LE", reflex::Input::file_encoding::utf16le },
3486 { "UTF-32", reflex::Input::file_encoding::utf32be },
3487 { "UTF-32BE", reflex::Input::file_encoding::utf32be },
3488 { "UTF-32LE", reflex::Input::file_encoding::utf32le },
3489 { "LATIN1", reflex::Input::file_encoding::latin },
3490 { "ISO-8859-1", reflex::Input::file_encoding::latin },
3491 { "ISO-8859-2", reflex::Input::file_encoding::iso8859_2 },
3492 { "ISO-8859-3", reflex::Input::file_encoding::iso8859_3 },
3493 { "ISO-8859-4", reflex::Input::file_encoding::iso8859_4 },
3494 { "ISO-8859-5", reflex::Input::file_encoding::iso8859_5 },
3495 { "ISO-8859-6", reflex::Input::file_encoding::iso8859_6 },
3496 { "ISO-8859-7", reflex::Input::file_encoding::iso8859_7 },
3497 { "ISO-8859-8", reflex::Input::file_encoding::iso8859_8 },
3498 { "ISO-8859-9", reflex::Input::file_encoding::iso8859_9 },
3499 { "ISO-8859-10", reflex::Input::file_encoding::iso8859_10 },
3500 { "ISO-8859-11", reflex::Input::file_encoding::iso8859_11 },
3501 { "ISO-8859-13", reflex::Input::file_encoding::iso8859_13 },
3502 { "ISO-8859-14", reflex::Input::file_encoding::iso8859_14 },
3503 { "ISO-8859-15", reflex::Input::file_encoding::iso8859_15 },
3504 { "ISO-8859-16", reflex::Input::file_encoding::iso8859_16 },
3505 { "MAC", reflex::Input::file_encoding::macroman },
3506 { "MACROMAN", reflex::Input::file_encoding::macroman },
3507 { "EBCDIC", reflex::Input::file_encoding::ebcdic },
3508 { "CP437", reflex::Input::file_encoding::cp437 },
3509 { "CP850", reflex::Input::file_encoding::cp850 },
3510 { "CP858", reflex::Input::file_encoding::cp858 },
3511 { "CP1250", reflex::Input::file_encoding::cp1250 },
3512 { "CP1251", reflex::Input::file_encoding::cp1251 },
3513 { "CP1252", reflex::Input::file_encoding::cp1252 },
3514 { "CP1253", reflex::Input::file_encoding::cp1253 },
3515 { "CP1254", reflex::Input::file_encoding::cp1254 },
3516 { "CP1255", reflex::Input::file_encoding::cp1255 },
3517 { "CP1256", reflex::Input::file_encoding::cp1256 },
3518 { "CP1257", reflex::Input::file_encoding::cp1257 },
3519 { "CP1258", reflex::Input::file_encoding::cp1258 },
3520 { "KOI8-R", reflex::Input::file_encoding::koi8_r },
3521 { "KOI8-U", reflex::Input::file_encoding::koi8_u },
3522 { "KOI8-RU", reflex::Input::file_encoding::koi8_ru },
3523 { NULL, 0 }
3524 };
3525
3526 // table of file types for option -t, --file-type
3527 const Type type_table[] = {
3528 { "actionscript", "as,mxml", NULL, NULL },
3529 { "ada", "ada,adb,ads", NULL, NULL },
3530 { "asm", "asm,s,S", NULL, NULL },
3531 { "asp", "asp", NULL, NULL },
3532 { "aspx", "master,ascx,asmx,aspx,svc", NULL, NULL },
3533 { "autoconf", "ac,in", NULL, NULL },
3534 { "automake", "am,in", NULL, NULL },
3535 { "awk", "awk", NULL, NULL },
3536 { "Awk", "awk", NULL, "#!\\h*/.*\\Wg?awk(\\W.*)?\\n" },
3537 { "basic", "bas,BAS,cls,frm,ctl,vb,resx", NULL, NULL },
3538 { "batch", "bat,BAT,cmd,CMD", NULL, NULL },
3539 { "bison", "y,yy,yxx", NULL, NULL },
3540 { "c", "c,h,H,hdl,xs", NULL, NULL },
3541 { "c++", "cpp,CPP,cc,cxx,CXX,h,hh,H,hpp,hxx,Hxx,HXX", NULL, NULL },
3542 { "clojure", "clj", NULL, NULL },
3543 { "csharp", "cs", NULL, NULL },
3544 { "css", "css", NULL, NULL },
3545 { "csv", "csv", NULL, NULL },
3546 { "dart", "dart", NULL, NULL },
3547 { "Dart", "dart", NULL, "#!\\h*/.*\\Wdart(\\W.*)?\\n" },
3548 { "delphi", "pas,int,dfm,nfm,dof,dpk,dproj,groupproj,bdsgroup,bdsproj", NULL, NULL },
3549 { "elisp", "el", NULL, NULL },
3550 { "elixir", "ex,exs", NULL, NULL },
3551 { "erlang", "erl,hrl", NULL, NULL },
3552 { "fortran", "for,ftn,fpp,f,F,f77,F77,f90,F90,f95,F95,f03,F03", NULL, NULL },
3553 { "gif", "gif", NULL, NULL },
3554 { "Gif", "gif", NULL, "GIF87a|GIF89a" },
3555 { "go", "go", NULL, NULL },
3556 { "groovy", "groovy,gtmpl,gpp,grunit,gradle", NULL, NULL },
3557 { "gsp", "gsp", NULL, NULL },
3558 { "haskell", "hs,lhs", NULL, NULL },
3559 { "html", "htm,html,xhtml", NULL, NULL },
3560 { "jade", "jade", NULL, NULL },
3561 { "java", "java,properties", NULL, NULL },
3562 { "jpeg", "jpg,jpeg", NULL, NULL },
3563 { "Jpeg", "jpg,jpeg", NULL, "\\xff\\xd8\\xff[\\xdb\\xe0\\xe1\\xee]" },
3564 { "js", "js", NULL, NULL },
3565 { "json", "json", NULL, NULL },
3566 { "jsp", "jsp,jspx,jthm,jhtml", NULL, NULL },
3567 { "julia", "jl", NULL, NULL },
3568 { "kotlin", "kt,kts", NULL, NULL },
3569 { "less", "less", NULL, NULL },
3570 { "lex", "l,ll,lxx", NULL, NULL },
3571 { "lisp", "lisp,lsp", NULL, NULL },
3572 { "lua", "lua", NULL, NULL },
3573 { "m4", "m4", NULL, NULL },
3574 { "make", "mk,mak", "makefile,Makefile,Makefile.Debug,Makefile.Release", NULL },
3575 { "markdown", "md", NULL, NULL },
3576 { "matlab", "m", NULL, NULL },
3577 { "node", "js", NULL, NULL },
3578 { "Node", "js", NULL, "#!\\h*/.*\\Wnode(\\W.*)?\\n" },
3579 { "objc", "m,h", NULL, NULL },
3580 { "objc++", "mm,h", NULL, NULL },
3581 { "ocaml", "ml,mli,mll,mly", NULL, NULL },
3582 { "parrot", "pir,pasm,pmc,ops,pod,pg,tg", NULL, NULL },
3583 { "pascal", "pas,pp", NULL, NULL },
3584 { "pdf", "pdf", NULL, NULL },
3585 { "Pdf", "pdf", NULL, "\\x25\\x50\\x44\\x46\\x2d" },
3586 { "perl", "pl,PL,pm,pod,t,psgi", NULL, NULL },
3587 { "Perl", "pl,PL,pm,pod,t,psgi", NULL, "#!\\h*/.*\\Wperl(\\W.*)?\\n" },
3588 { "php", "php,php3,php4,phtml", NULL, NULL },
3589 { "Php", "php,php3,php4,phtml", NULL, "#!\\h*/.*\\Wphp(\\W.*)?\\n" },
3590 { "png", "png", NULL, NULL },
3591 { "Png", "png", NULL, "\\x89png\\x0d\\x0a\\x1a\\x0a" },
3592 { "prolog", "pl,pro", NULL, NULL },
3593 { "python", "py", NULL, NULL },
3594 { "Python", "py", NULL, "#!\\h*/.*\\Wpython[23]?(\\W.*)?\\n" },
3595 { "r", "R", NULL, NULL },
3596 { "rpm", "rpm", NULL, NULL },
3597 { "Rpm", "rpm", NULL, "\\xed\\xab\\xee\\xdb" },
3598 { "rst", "rst", NULL, NULL },
3599 { "rtf", "rtf", NULL, NULL },
3600 { "Rtf", "rtf", NULL, "\\{\\rtf1" },
3601 { "ruby", "rb,rhtml,rjs,rxml,erb,rake,spec", "Rakefile", NULL },
3602 { "Ruby", "rb,rhtml,rjs,rxml,erb,rake,spec", "Rakefile", "#!\\h*/.*\\Wruby(\\W.*)?\\n" },
3603 { "rust", "rs", NULL, NULL },
3604 { "scala", "scala", NULL, NULL },
3605 { "scheme", "scm,ss", NULL, NULL },
3606 { "shell", "sh,bash,dash,csh,tcsh,ksh,zsh,fish", NULL, NULL },
3607 { "Shell", "sh,bash,dash,csh,tcsh,ksh,zsh,fish", NULL, "#!\\h*/.*\\W(ba|da|t?c|k|z|fi)?sh(\\W.*)?\\n" },
3608 { "smalltalk", "st", NULL, NULL },
3609 { "sql", "sql,ctl", NULL, NULL },
3610 { "svg", "svg", NULL, NULL },
3611 { "swift", "swift", NULL, NULL },
3612 { "tcl", "tcl,itcl,itk", NULL, NULL },
3613 { "tex", "tex,cls,sty,bib", NULL, NULL },
3614 { "text", "text,txt,TXT,md,rst", NULL, NULL },
3615 { "tiff", "tif,tiff", NULL, NULL },
3616 { "Tiff", "tif,tiff", NULL, "\\x49\\x49\\x2a\\x00|\\x4d\\x4d\\x00\\x2a" },
3617 { "tt", "tt,tt2,ttml", NULL, NULL },
3618 { "typescript", "ts,tsx", NULL, NULL },
3619 { "verilog", "v,vh,sv", NULL, NULL },
3620 { "vhdl", "vhd,vhdl", NULL, NULL },
3621 { "vim", "vim", NULL, NULL },
3622 { "xml", "xml,xsd,xsl,xslt,wsdl,rss,svg,ent,plist", NULL, NULL },
3623 { "Xml", "xml,xsd,xsl,xslt,wsdl,rss,svg,ent,plist", NULL, "<\\?xml " },
3624 { "yacc", "y", NULL, NULL },
3625 { "yaml", "yaml,yml", NULL, NULL },
3626 { NULL, NULL, NULL, NULL }
3627 };
3628
3629 #ifdef OS_WIN
3630 // ugrep main() for Windows to support wide string arguments and globbing
wmain(int argc,const wchar_t ** wargv)3631 int wmain(int argc, const wchar_t **wargv)
3632 #else
3633 // ugrep main()
3634 int main(int argc, const char **argv)
3635 #endif
3636 {
3637
3638 #ifdef OS_WIN
3639
3640 // store UTF-8 arguments for the duration of main() and convert Unicode command line arguments wargv[] to UTF-8 arguments argv[]
3641 const char **argv = new const char *[argc];
3642 for (int i = 0; i < argc; ++i)
3643 {
3644 arg_strings.emplace_back(utf8_encode(wargv[i]));
3645 argv[i] = arg_strings.back().c_str();
3646 }
3647
3648 // handle CTRL-C
3649 SetConsoleCtrlHandler(&sigint, TRUE);
3650
3651 #else
3652
3653 // ignore SIGPIPE
3654 signal(SIGPIPE, SIG_IGN);
3655
3656 // reset color on SIGINT and SIGTERM
3657 signal(SIGINT, sigint);
3658 signal(SIGTERM, sigint);
3659
3660 #endif
3661
3662 try
3663 {
3664 init(argc, argv);
3665 }
3666
3667 catch (std::exception& error)
3668 {
3669 abort("error: ", error.what());
3670 }
3671
3672 if (flag_query > 0)
3673 {
3674 if (!flag_no_messages && warnings > 0)
3675 abort("option -Q: warnings are present, use -s to ignore");
3676
3677 Query::query();
3678 }
3679 else
3680 {
3681 if (!flag_no_messages && flag_pager != NULL && warnings > 0)
3682 abort("option --pager: warnings are present, use -s to ignore");
3683
3684 try
3685 {
3686 ugrep();
3687 }
3688
3689 catch (reflex::regex_error& error)
3690 {
3691 abort("error: ", error.what());
3692 }
3693
3694 catch (std::exception& error)
3695 {
3696 abort("error: ", error.what());
3697 }
3698 }
3699
3700 #ifdef OS_WIN
3701
3702 delete[] argv;
3703
3704 #endif
3705
3706 return warnings == 0 && Stats::found_any_file() ? EXIT_OK : EXIT_FAIL;
3707 }
3708
set_depth(const char * arg)3709 static void set_depth(const char *arg)
3710 {
3711 if (flag_max_depth > 0)
3712 {
3713 if (flag_min_depth == 0)
3714 flag_min_depth = flag_max_depth;
3715 flag_max_depth = strtopos(arg, "invalid argument --");
3716 if (flag_min_depth > flag_max_depth)
3717 usage("invalid argument -", arg);
3718 }
3719 else
3720 {
3721 strtopos2(arg, flag_min_depth, flag_max_depth, "invalid argument --", true);
3722 }
3723 }
3724
3725 // load config file specified or the default .ugrep, located in the working directory or home directory
load_config(std::list<std::pair<CNF::PATTERN,const char * >> & pattern_args)3726 static void load_config(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args)
3727 {
3728 // warn about invalid options but do not exit
3729 flag_usage_warnings = true;
3730
3731 // the default config file is .ugrep when FILE is not specified
3732 if (flag_config == NULL || *flag_config == '\0')
3733 flag_config_file.assign(".ugrep");
3734 else
3735 flag_config_file.assign(flag_config);
3736
3737 FILE *file = NULL;
3738
3739 if (fopen_smart(&file, flag_config_file.c_str(), "r") != 0)
3740 {
3741 if (home_dir != NULL)
3742 {
3743 // check the home directory for the configuration file
3744 if (flag_config == NULL || *flag_config == '\0')
3745 flag_config_file.assign(home_dir).append(PATHSEPSTR).append(".ugrep");
3746 else
3747 flag_config_file.assign(home_dir).append(PATHSEPSTR).append(flag_config);
3748 if (fopen_smart(&file, flag_config_file.c_str(), "r") != 0)
3749 file = NULL;
3750 }
3751 }
3752
3753 if (file != NULL)
3754 {
3755 reflex::BufferedInput input(file);
3756
3757 std::string line;
3758 size_t lineno = 1;
3759 bool errors = false;
3760
3761 while (true)
3762 {
3763 // read the next line
3764 if (getline(input, line))
3765 break;
3766
3767 trim(line);
3768
3769 // skip empty lines and comments
3770 if (!line.empty() && line.front() != '#')
3771 {
3772 // construct an option argument to parse as argv[]
3773 line.insert(0, "--");
3774 const char *arg = flag_config_options.insert(line).first->c_str();
3775 const char *args[2] = { NULL, arg };
3776
3777 warnings = 0;
3778
3779 options(pattern_args, 2, args);
3780
3781 if (warnings > 0)
3782 {
3783 std::cerr << "ugrep: error in " << flag_config_file << " at line " << lineno << "\n\n";
3784
3785 errors = true;
3786 }
3787 }
3788
3789 ++lineno;
3790 }
3791
3792 if (ferror(file))
3793 error("error while reading", flag_config_file.c_str());
3794
3795 if (file != stdin)
3796 fclose(file);
3797
3798 if (errors)
3799 {
3800 std::cerr << "Try 'ugrep --help [WHAT]' for more information\n";
3801
3802 exit(EXIT_ERROR);
3803 }
3804 }
3805 else if (flag_config != NULL && *flag_config != '\0')
3806 {
3807 error("option --config: cannot read", flag_config_file.c_str());
3808 }
3809
3810 flag_usage_warnings = false;
3811 }
3812
3813 // save a configuration file
save_config()3814 static void save_config()
3815 {
3816 FILE *file = NULL;
3817
3818 if (fopen_smart(&file, flag_save_config, "w") != 0)
3819 {
3820 usage("cannot save configuration file ", flag_save_config);
3821
3822 return;
3823 }
3824
3825 if (strcmp(flag_save_config, ".ugrep") == 0)
3826 fprintf(file, "# default .ugrep configuration file used by ug and ugrep --config.\n");
3827 else if (strcmp(flag_save_config, "-") == 0)
3828 fprintf(file, "# ugrep configuration.\n");
3829 else
3830 fprintf(file, "# configuration used with ugrep --config=%s or ---%s.\n", flag_save_config, flag_save_config);
3831
3832 fprintf(file, "\
3833 #\n\
3834 # A long option is defined per line with an optional `=' and its argument,\n\
3835 # when applicable. Empty lines and lines starting with a `#' are ignored.\n\
3836 #\n\
3837 # Try `ugrep --help [WHAT]' for more information.\n\n");
3838
3839 fprintf(file, "### TERMINAL DISPLAY ###\n\n");
3840
3841 fprintf(file, "# Custom color scheme overrides default GREP_COLORS parameters\ncolors=%s\n", flag_colors != NULL ? flag_colors : "");
3842 fprintf(file, "\
3843 # The argument is a colon-separated list of one or more parameters `sl='\n\
3844 # (selected line), `cx=' (context line), `mt=' (matched text), `ms=' (match\n\
3845 # selected), `mc=' (match context), `fn=' (file name), `ln=' (line number),\n\
3846 # `cn=' (column number), `bn=' (byte offset), `se=' (separator). Parameter\n\
3847 # values are ANSI SGR color codes or `k' (black), `r' (red), `g' (green), `y'\n\
3848 # (yellow), `b' (blue), `m' (magenta), `c' (cyan), `w' (white). Upper case\n\
3849 # specifies background colors. A `+' qualifies a color as bright. A\n\
3850 # foreground and a background color may be combined with font properties `n'\n\
3851 # (normal), `f' (faint), `h' (highlight), `i' (invert), `u' (underline).\n\n");
3852 fprintf(file, "# Enable/disable color\n%s\n\n", flag_color != NULL ? "color" : "no-color");
3853 fprintf(file, "# Enable/disable query UI confirmation prompts, default: confirm\n%s\n\n", flag_confirm ? "confirm" : "no-confirm");
3854 fprintf(file, "# Enable/disable query UI file viewing with CTRL-Y, default: view\n");
3855 if (flag_view != NULL && *flag_view == '\0')
3856 fprintf(file, "view\n\n");
3857 else if (flag_view != NULL)
3858 fprintf(file, "view=%s\n\n", flag_view);
3859 else
3860 fprintf(file, "no-view\n\n");
3861 fprintf(file, "# Enable/disable or specify a pager for terminal output, default: no-pager\n");
3862 if (flag_pager != NULL)
3863 fprintf(file, "pager=%s\n\n", flag_pager);
3864 else
3865 fprintf(file, "no-pager\n\n");
3866 fprintf(file, "# Enable/disable pretty output to the terminal, default: no-pretty\n%s\n\n", flag_pretty ? "pretty" : "no-pretty");
3867 fprintf(file, "# Enable/disable headings for terminal output, default: no-heading\n%s\n\n", flag_heading.is_undefined() ? "# no-heading" : flag_heading ? "heading" : "no-heading");
3868
3869 if (flag_break.is_defined())
3870 fprintf(file, "# Enable/disable break for terminal output\n%s\n\n", flag_break ? "break" : "no-break");
3871
3872 if (flag_line_number.is_defined() && flag_line_number != flag_pretty)
3873 fprintf(file, "# Enable/disable line numbers\n%s\n\n", flag_line_number ? "line-number" : "no-line-number");
3874
3875 if (flag_column_number.is_defined())
3876 fprintf(file, "# Enable/disable column numbers\n%s\n\n", flag_column_number ? "column-number" : "no-column-number");
3877
3878 if (flag_byte_offset.is_defined())
3879 fprintf(file, "# Enable/disable byte offsets\n%s\n\n", flag_byte_offset ? "byte-offset" : "no-byte-offset");
3880
3881 if (flag_initial_tab.is_defined() && flag_line_number != flag_pretty)
3882 fprintf(file, "# Enable/disable initial tab\n%s\n\n", flag_initial_tab ? "initial-tab" : "no-initial-tab");
3883
3884 if (strcmp(flag_binary_files, "hex") == 0)
3885 fprintf(file, "# Hex output\nhex\n\n");
3886 else if (strcmp(flag_binary_files, "with-hex") == 0)
3887 fprintf(file, "# Output with hex for binary matches\nwith-hex\n\n");
3888 if (flag_hexdump != NULL)
3889 fprintf(file, "# Hex dump (columns, no space breaks, no character column, no hex spacing)\nhexdump=%s\n\n", flag_hexdump);
3890
3891 if (flag_any_line)
3892 {
3893 fprintf(file, "# Display any line as context\nany-line\n\n");
3894 }
3895 else if (flag_after_context > 0 && flag_before_context == flag_after_context)
3896 {
3897 fprintf(file, "# Display context lines\ncontext=%zu\n\n", flag_after_context);
3898 }
3899 else
3900 {
3901 if (flag_after_context > 0)
3902 fprintf(file, "# Display lines after context\nafter-context=%zu\n\n", flag_after_context);
3903 if (flag_before_context > 0)
3904 fprintf(file, "# Display lines before context\nbefore-context=%zu\n\n", flag_before_context);
3905 }
3906 if (flag_group_separator == NULL)
3907 fprintf(file, "# Disable group separator for contexts\nno-group-separator\n\n");
3908 else if (strcmp(flag_group_separator, "--") != 0)
3909 fprintf(file, "# Group separator for contexts\ngroup-separator=%s\n\n", flag_group_separator);
3910
3911 fprintf(file, "### SEARCH PATTERNS ###\n\n");
3912
3913 fprintf(file, "# Enable/disable case-insensitive search, default: no-ignore-case\n%s\n\n", flag_ignore_case.is_undefined() ? "# no-ignore-case" : flag_ignore_case ? "ignore-case" : "no-ignore-case");
3914 fprintf(file, "# Enable/disable smart case, default: no-smart-case\n%s\n\n", flag_smart_case.is_undefined() ? "# no-smart-case" : flag_smart_case ? "smart-case" : "no-smart-case");
3915 fprintf(file, "# Enable/disable empty pattern matches, default: no-empty\n%s\n\n", flag_empty.is_undefined() ? "# no-empty" : flag_empty ? "empty" : "no-empty");
3916
3917 fprintf(file, "### SEARCH TARGETS ###\n\n");
3918
3919 fprintf(file, "# Enable/disable searching hidden files and directories, default: no-hidden\n%s\n\n", flag_hidden ? "hidden" : "no-hidden");
3920 fprintf(file, "# Enable/disable binary files, default: no-ignore-binary\n%s\n\n", strcmp(flag_binary_files, "without-match") == 0 ? "ignore-binary" : "no-ignore-binary");
3921 fprintf(file, "# Enable/disable decompression and archive search, default: no-decompress\n%s\n\n", flag_decompress ? "decompress" : "no-decompress");
3922 if (flag_ignore_files.empty())
3923 {
3924 fprintf(file, "# Enable/disable ignore files, default: no-ignore-files\nno-ignore-files\n\n");
3925 }
3926 else
3927 {
3928 fprintf(file, "# Enable/disable ignore files, default: no-ignore-files\n");
3929 for (const auto& ignore : flag_ignore_files)
3930 fprintf(file, "ignore-files=%s\n", ignore.c_str());
3931 fprintf(file, "\n");
3932 }
3933 if (flag_filter != NULL)
3934 {
3935 fprintf(file, "# Filtering\nfilter=%s\n\n", flag_filter);
3936 if (!flag_filter_magic_label.empty())
3937 {
3938 fprintf(file, "# Filter by file signature magic bytes\n");
3939 for (const auto& label : flag_filter_magic_label)
3940 fprintf(file, "filter-magic-label=%s\n", label.c_str());
3941 fprintf(file, "# Warning: filter-magic-label significantly reduces performance!\n\n");
3942 }
3943 }
3944
3945 fprintf(file, "### OUTPUT ###\n\n");
3946
3947 fprintf(file, "# Enable/disable sorted output, default: no-sort\n");
3948 if (flag_sort != NULL)
3949 fprintf(file, "sort=%s\n\n", flag_sort);
3950 else
3951 fprintf(file, "# no-sort\n\n");
3952
3953 if (ferror(file))
3954 error("cannot save", flag_save_config);
3955
3956 if (file != stdout)
3957 fclose(file);
3958 }
3959
3960 // parse the command-line options
options(std::list<std::pair<CNF::PATTERN,const char * >> & pattern_args,int argc,const char ** argv)3961 void options(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, int argc, const char **argv)
3962 {
3963 bool options = true;
3964
3965 for (int i = 1; i < argc; ++i)
3966 {
3967 const char *arg = argv[i];
3968
3969 if ((*arg == '-'
3970 #ifdef OS_WIN
3971 || *arg == '/'
3972 #endif
3973 ) && arg[1] != '\0' && options)
3974 {
3975 bool is_grouped = true;
3976
3977 // parse a ugrep command-line option
3978 while (is_grouped && *++arg != '\0')
3979 {
3980 switch (*arg)
3981 {
3982 case '-':
3983 is_grouped = false;
3984 if (*++arg == '\0')
3985 {
3986 options = false;
3987 continue;
3988 }
3989
3990 switch (*arg)
3991 {
3992 case '-':
3993 break;
3994
3995 case 'a':
3996 if (strncmp(arg, "after-context=", 14) == 0)
3997 flag_after_context = strtonum(arg + 14, "invalid argument --after-context=");
3998 else if (strcmp(arg, "and") == 0)
3999 option_and(pattern_args, i, argc, argv);
4000 else if (strncmp(arg, "and=", 4) == 0)
4001 option_and(pattern_args, arg + 4);
4002 else if (strcmp(arg, "andnot") == 0)
4003 option_andnot(pattern_args, i, argc, argv);
4004 else if (strncmp(arg, "andnot=", 7) == 0)
4005 option_andnot(pattern_args, arg + 7);
4006 else if (strcmp(arg, "any-line") == 0)
4007 flag_any_line = true;
4008 else if (strcmp(arg, "after-context") == 0)
4009 usage("missing argument for --", arg);
4010 else
4011 usage("invalid option --", arg, "--after-context, --and, --andnot or --any-line");
4012 break;
4013
4014 case 'b':
4015 if (strcmp(arg, "basic-regexp") == 0)
4016 flag_basic_regexp = true;
4017 else if (strncmp(arg, "before-context=", 15) == 0)
4018 flag_before_context = strtonum(arg + 15, "invalid argument --before-context=");
4019 else if (strcmp(arg, "binary") == 0)
4020 flag_binary = true;
4021 else if (strncmp(arg, "binary-files=", 13) == 0)
4022 flag_binary_files = arg + 13;
4023 else if (strcmp(arg, "bool") == 0)
4024 flag_bool = true;
4025 else if (strcmp(arg, "break") == 0)
4026 flag_break = true;
4027 else if (strcmp(arg, "byte-offset") == 0)
4028 flag_byte_offset = true;
4029 else if (strcmp(arg, "before-context") == 0 || strcmp(arg, "binary-files") == 0)
4030 usage("missing argument for --", arg);
4031 else
4032 usage("invalid option --", arg, "--basic-regexp, --before-context, --binary, --binary-files, --bool, --break or --byte-offset");
4033 break;
4034
4035 case 'c':
4036 if (strcmp(arg, "color") == 0 || strcmp(arg, "colour") == 0)
4037 flag_color = "auto";
4038 else if (strncmp(arg, "color=", 6) == 0)
4039 flag_color = arg + 6;
4040 else if (strncmp(arg, "colour=", 7) == 0)
4041 flag_color = arg + 7;
4042 else if (strncmp(arg, "colors=", 7) == 0)
4043 flag_colors = arg + 7;
4044 else if (strncmp(arg, "colours=", 8) == 0)
4045 flag_colors = arg + 8;
4046 else if (strcmp(arg, "column-number") == 0)
4047 flag_column_number = true;
4048 else if (strcmp(arg, "config") == 0 || strncmp(arg, "config=", 7) == 0)
4049 ; // --config is pre-parsed before other options are parsed
4050 else if (strcmp(arg, "confirm") == 0)
4051 flag_confirm = true;
4052 else if (strncmp(arg, "context=", 8) == 0)
4053 flag_after_context = flag_before_context = strtonum(arg + 8, "invalid argument --context=");
4054 else if (strcmp(arg, "count") == 0)
4055 flag_count = true;
4056 else if (strcmp(arg, "cpp") == 0)
4057 flag_cpp = true;
4058 else if (strcmp(arg, "csv") == 0)
4059 flag_csv = true;
4060 else if (strcmp(arg, "colors") == 0 || strcmp(arg, "colours") == 0)
4061 usage("missing argument for --", arg);
4062 else
4063 usage("invalid option --", arg, "--color, --colors, --column-number, --config, --confirm, --context, --count, --cpp or --csv");
4064 break;
4065
4066 case 'd':
4067 if (strcmp(arg, "decompress") == 0)
4068 flag_decompress = true;
4069 else if (strncmp(arg, "depth=", 6) == 0)
4070 strtopos2(arg + 6, flag_min_depth, flag_max_depth, "invalid argument --depth=", true);
4071 else if (strcmp(arg, "dereference") == 0)
4072 flag_dereference = true;
4073 else if (strcmp(arg, "dereference-recursive") == 0)
4074 flag_directories = "dereference-recurse";
4075 else if (strncmp(arg, "devices=", 8) == 0)
4076 flag_devices = arg + 8;
4077 else if (strncmp(arg, "directories=", 12) == 0)
4078 flag_directories = arg + 12;
4079 else if (strcmp(arg, "dotall") == 0)
4080 flag_dotall = true;
4081 else if (strcmp(arg, "depth") == 0)
4082 usage("missing argument for --", arg);
4083 else
4084 usage("invalid option --", arg, "--decompress, --depth, --dereference, --dereference-recursive, --devices, --directories or --dotall");
4085 break;
4086
4087 case 'e':
4088 if (strcmp(arg, "empty") == 0)
4089 flag_empty = true;
4090 else if (strncmp(arg, "encoding=", 9) == 0)
4091 flag_encoding = arg + 9;
4092 else if (strncmp(arg, "exclude=", 8) == 0)
4093 flag_exclude.emplace_back(arg + 8);
4094 else if (strncmp(arg, "exclude-dir=", 12) == 0)
4095 flag_exclude_dir.emplace_back(arg + 12);
4096 else if (strncmp(arg, "exclude-from=", 13) == 0)
4097 flag_exclude_from.emplace_back(arg + 13);
4098 else if (strncmp(arg, "exclude-fs=", 11) == 0)
4099 flag_exclude_fs.emplace_back(arg + 11);
4100 else if (strcmp(arg, "extended-regexp") == 0)
4101 flag_basic_regexp = false;
4102 else if (strcmp(arg, "encoding") == 0 ||
4103 strcmp(arg, "exclude") == 0 ||
4104 strcmp(arg, "exclude-dir") == 0 ||
4105 strcmp(arg, "exclude-from") == 0 ||
4106 strcmp(arg, "exclude-fs") == 0)
4107 usage("missing argument for --", arg);
4108 else
4109 usage("invalid option --", arg, "--empty, --encoding, --exclude, --exclude-dir, --exclude-from, --exclude-fs or --extended-regexp");
4110 break;
4111
4112 case 'f':
4113 if (strncmp(arg, "file=", 5) == 0)
4114 flag_file.emplace_back(arg + 5);
4115 else if (strncmp(arg, "file-extension=", 15) == 0)
4116 flag_file_extension.emplace_back(arg + 15);
4117 else if (strncmp(arg, "file-magic=", 11) == 0)
4118 flag_file_magic.emplace_back(arg + 11);
4119 else if (strncmp(arg, "file-type=", 10) == 0)
4120 flag_file_type.emplace_back(arg + 10);
4121 else if (strcmp(arg, "files") == 0)
4122 flag_files = true;
4123 else if (strcmp(arg, "files-with-matches") == 0)
4124 flag_files_with_matches = true;
4125 else if (strcmp(arg, "files-without-match") == 0)
4126 flag_files_without_match = true;
4127 else if (strcmp(arg, "fixed-strings") == 0)
4128 flag_fixed_strings = true;
4129 else if (strncmp(arg, "filter=", 7) == 0)
4130 flag_filter = arg + 7;
4131 else if (strncmp(arg, "filter-magic-label=", 19) == 0)
4132 flag_filter_magic_label.emplace_back(arg + 19);
4133 else if (strncmp(arg, "format=", 7) == 0)
4134 flag_format = arg + 7;
4135 else if (strncmp(arg, "format-begin=", 13) == 0)
4136 flag_format_begin = arg + 13;
4137 else if (strncmp(arg, "format-close=", 13) == 0)
4138 flag_format_close = arg + 13;
4139 else if (strncmp(arg, "format-end=", 11) == 0)
4140 flag_format_end = arg + 11;
4141 else if (strncmp(arg, "format-open=", 12) == 0)
4142 flag_format_open = arg + 12;
4143 else if (strcmp(arg, "fuzzy") == 0)
4144 flag_fuzzy = 1;
4145 else if (strncmp(arg, "fuzzy=", 6) == 0)
4146 flag_fuzzy = strtofuzzy(arg + 6, "invalid argument --fuzzy=");
4147 else if (strcmp(arg, "free-space") == 0)
4148 flag_free_space = true;
4149 else if (strcmp(arg, "file") == 0 ||
4150 strcmp(arg, "file-extension") == 0 ||
4151 strcmp(arg, "file-magic") == 0 ||
4152 strcmp(arg, "file-type") == 0 ||
4153 strcmp(arg, "filter") == 0 ||
4154 strcmp(arg, "filter-magic-label") == 0 ||
4155 strcmp(arg, "format") == 0 ||
4156 strcmp(arg, "format-begin") == 0 ||
4157 strcmp(arg, "format-close") == 0 ||
4158 strcmp(arg, "format-end") == 0 ||
4159 strcmp(arg, "format-open") == 0)
4160 usage("missing argument for --", arg);
4161 else
4162 usage("invalid option --", arg, "--file, --file-extension, --file-magic, --file-type, --files, --files-with-matches, --files-without-match, --fixed-strings, --filter, --filter-magic-label, --format, --format-begin, --format-close, --format-end, --format-open, --fuzzy or --free-space");
4163 break;
4164
4165 case 'g':
4166 if (strncmp(arg, "glob=", 5) == 0)
4167 flag_glob.emplace_back(arg + 5);
4168 else if (strncmp(arg, "group-separator=", 16) == 0)
4169 flag_group_separator = arg + 16;
4170 else if (strcmp(arg, "group-separator") == 0)
4171 flag_group_separator = "--";
4172 else if (strcmp(arg, "glob") == 0)
4173 usage("missing argument for --", arg);
4174 else
4175 usage("invalid option --", arg, "--glob or --group-separator");
4176 break;
4177
4178 case 'h':
4179 if (strcmp(arg, "heading") == 0)
4180 flag_heading = true;
4181 else if (strncmp(arg, "help", 4) == 0)
4182 help(arg[4] != '\0' ? arg + 4 : ++i < argc ? argv[i] : NULL);
4183 else if (strcmp(arg, "hex") == 0)
4184 flag_binary_files = "hex";
4185 else if (strcmp(arg, "hexdump") == 0)
4186 flag_hexdump = "2";
4187 else if (strncmp(arg, "hexdump=", 8) == 0)
4188 flag_hexdump = arg + 8;
4189 else if (strcmp(arg, "hidden") == 0)
4190 flag_hidden = true;
4191 else if (strcmp(arg, "hyperlink") == 0)
4192 flag_colors = "hl";
4193 else
4194 usage("invalid option --", arg, "--heading, --help, --hex, --hexdump, --hidden or --hyperlink");
4195 break;
4196
4197 case 'i':
4198 if (strcmp(arg, "ignore-binary") == 0)
4199 flag_binary_files = "without-match";
4200 else if (strcmp(arg, "ignore-case") == 0)
4201 flag_ignore_case = true;
4202 else if (strcmp(arg, "ignore-files") == 0)
4203 flag_ignore_files.emplace_back(DEFAULT_IGNORE_FILE);
4204 else if (strncmp(arg, "ignore-files=", 13) == 0)
4205 flag_ignore_files.emplace_back(arg + 13);
4206 else if (strncmp(arg, "include=", 8) == 0)
4207 flag_include.emplace_back(arg + 8);
4208 else if (strncmp(arg, "include-dir=", 12) == 0)
4209 flag_include_dir.emplace_back(arg + 12);
4210 else if (strncmp(arg, "include-from=", 13) == 0)
4211 flag_include_from.emplace_back(arg + 13);
4212 else if (strncmp(arg, "include-fs=", 11) == 0)
4213 flag_include_fs.emplace_back(arg + 11);
4214 else if (strcmp(arg, "initial-tab") == 0)
4215 flag_initial_tab = true;
4216 else if (strcmp(arg, "invert-match") == 0)
4217 flag_invert_match = true;
4218 else if (strcmp(arg, "include") == 0 ||
4219 strcmp(arg, "include-dir") == 0 ||
4220 strcmp(arg, "include-from") == 0 ||
4221 strcmp(arg, "include-fs") == 0)
4222 usage("missing argument for --", arg);
4223 else
4224 usage("invalid option --", arg, "--ignore-case, --ignore-files, --include, --include-dir, --include-from, --include-fs, --initial-tab or --invert-match");
4225 break;
4226
4227 case 'j':
4228 if (strncmp(arg, "jobs=", 4) == 0)
4229 flag_jobs = strtonum(arg + 4, "invalid argument --jobs=");
4230 else if (strcmp(arg, "json") == 0)
4231 flag_json = true;
4232 else if (strcmp(arg, "jobs") == 0)
4233 usage("missing argument for --", arg);
4234 else
4235 usage("invalid option --", arg, "--jobs or --json");
4236 break;
4237
4238 case 'l':
4239 if (strncmp(arg, "label=", 6) == 0)
4240 flag_label = arg + 6;
4241 else if (strcmp(arg, "line-buffered") == 0)
4242 flag_line_buffered = true;
4243 else if (strcmp(arg, "line-number") == 0)
4244 flag_line_number = true;
4245 else if (strcmp(arg, "line-regexp") == 0)
4246 flag_line_regexp = true;
4247 else if (strcmp(arg, "lines") == 0)
4248 flag_files = false;
4249 else
4250 usage("invalid option --", arg, "--label, --line-buffered, --line-number, --line-regexp or --lines");
4251 break;
4252
4253 case 'm':
4254 if (strcmp(arg, "match") == 0)
4255 flag_match = true;
4256 else if (strncmp(arg, "max-count=", 10) == 0)
4257 flag_max_count = strtopos(arg + 10, "invalid argument --max-count=");
4258 else if (strncmp(arg, "max-files=", 10) == 0)
4259 flag_max_files = strtopos(arg + 10, "invalid argument --max-files=");
4260 else if (strncmp(arg, "min-steal=", 10) == 0)
4261 flag_min_steal = strtopos(arg + 10, "invalid argument --min-steal=");
4262 else if (strcmp(arg, "mmap") == 0)
4263 flag_max_mmap = MAX_MMAP_SIZE;
4264 else if (strncmp(arg, "mmap=", 5) == 0)
4265 flag_max_mmap = strtopos(arg + 5, "invalid argument --mmap=");
4266 else if (strcmp(arg, "messages") == 0)
4267 flag_no_messages = false;
4268 else if (strcmp(arg, "max-count") == 0 || strcmp(arg, "max-files") == 0)
4269 usage("missing argument for --", arg);
4270 else
4271 usage("invalid option --", arg, "--match, --max-count, --max-files, --mmap or --messages");
4272 break;
4273
4274 case 'n':
4275 if (strncmp(arg, "neg-regexp=", 11) == 0)
4276 option_regexp(pattern_args, arg + 1, true);
4277 else if (strcmp(arg, "not") == 0)
4278 option_not(pattern_args, i, argc, argv);
4279 else if (strncmp(arg, "not=", 4) == 0)
4280 option_not(pattern_args, arg + 4);
4281 else if (strcmp(arg, "no-any-line") == 0)
4282 flag_any_line = false;
4283 else if (strcmp(arg, "no-binary") == 0)
4284 flag_binary = false;
4285 else if (strcmp(arg, "no-bool") == 0)
4286 flag_bool = false;
4287 else if (strcmp(arg, "no-break") == 0)
4288 flag_break = false;
4289 else if (strcmp(arg, "no-byte-offset") == 0)
4290 flag_byte_offset = false;
4291 else if (strcmp(arg, "no-color") == 0 || strcmp(arg, "no-colour") == 0)
4292 flag_color = "never";
4293 else if (strcmp(arg, "no-column-number") == 0)
4294 flag_column_number = false;
4295 else if (strcmp(arg, "no-confirm") == 0)
4296 flag_confirm = false;
4297 else if (strcmp(arg, "no-decompress") == 0)
4298 flag_decompress = false;
4299 else if (strcmp(arg, "no-dereference") == 0)
4300 flag_no_dereference = true;
4301 else if (strcmp(arg, "no-dotall") == 0)
4302 flag_dotall = false;
4303 else if (strcmp(arg, "no-empty") == 0)
4304 flag_empty = false;
4305 else if (strcmp(arg, "no-filename") == 0)
4306 flag_no_filename = true;
4307 else if (strcmp(arg, "no-group-separator") == 0)
4308 flag_group_separator = NULL;
4309 else if (strcmp(arg, "no-heading") == 0)
4310 flag_heading = false;
4311 else if (strcmp(arg, "no-hidden") == 0)
4312 flag_hidden = false;
4313 else if (strcmp(arg, "no-ignore-binary") == 0)
4314 flag_binary_files = "binary";
4315 else if (strcmp(arg, "no-ignore-case") == 0)
4316 flag_ignore_case = false;
4317 else if (strcmp(arg, "no-ignore-files") == 0)
4318 flag_ignore_files.clear();
4319 else if (strcmp(arg, "no-initial-tab") == 0)
4320 flag_initial_tab = false;
4321 else if (strcmp(arg, "no-invert-match") == 0)
4322 flag_invert_match = false;
4323 else if (strcmp(arg, "no-line-number") == 0)
4324 flag_line_number = false;
4325 else if (strcmp(arg, "no-only-line-number") == 0)
4326 flag_only_line_number = false;
4327 else if (strcmp(arg, "no-only-matching") == 0)
4328 flag_only_matching = false;
4329 else if (strcmp(arg, "no-messages") == 0)
4330 flag_no_messages = true;
4331 else if (strcmp(arg, "no-mmap") == 0)
4332 flag_max_mmap = 0;
4333 else if (strcmp(arg, "no-pager") == 0)
4334 flag_pager = NULL;
4335 else if (strcmp(arg, "no-pretty") == 0)
4336 flag_pretty = false;
4337 else if (strcmp(arg, "no-smart-case") == 0)
4338 flag_smart_case = false;
4339 else if (strcmp(arg, "no-sort") == 0)
4340 flag_sort = NULL;
4341 else if (strcmp(arg, "no-stats") == 0)
4342 flag_stats = NULL;
4343 else if (strcmp(arg, "no-ungroup") == 0)
4344 flag_ungroup = false;
4345 else if (strcmp(arg, "no-view") == 0)
4346 flag_view = NULL;
4347 else if (strcmp(arg, "null") == 0)
4348 flag_null = true;
4349 else if (strcmp(arg, "neg-regexp") == 0)
4350 usage("missing argument for --", arg);
4351 else
4352 usage("invalid option --", arg, "--neg-regexp, --not, --no-any-line, --no-binary, --no-bool, --no-break, --no-byte-offset, --no-color, --no-confirm, --no-decompress, --no-dereference, --no-dotall, --no-empty, --no-filename, --no-group-separator, --no-heading, --no-hidden, --no-ignore-binary, --no-ignore-case, --no-ignore-files --no-initial-tab, --no-invert-match, --no-line-number, --no-only-line-number, --no-only-matching, --no-messages, --no-mmap, --no-pager, --no-pretty, --no-smart-case, --no-sort, --no-stats, --no-ungroup, --no-view or --null");
4353 break;
4354
4355 case 'o':
4356 if (strcmp(arg, "only-line-number") == 0)
4357 flag_only_line_number = true;
4358 else if (strcmp(arg, "only-matching") == 0)
4359 flag_only_matching = true;
4360 else
4361 usage("invalid option --", arg, "--only-line-number or --only-matching");
4362 break;
4363
4364 case 'p':
4365 if (strcmp(arg, "pager") == 0)
4366 flag_pager = DEFAULT_PAGER_COMMAND;
4367 else if (strncmp(arg, "pager=", 6) == 0)
4368 flag_pager = arg + 6;
4369 else if (strcmp(arg, "perl-regexp") == 0)
4370 flag_perl_regexp = true;
4371 else if (strcmp(arg, "pretty") == 0)
4372 flag_pretty = true;
4373 else
4374 usage("invalid option --", arg, "--pager, --perl-regexp or --pretty");
4375 break;
4376
4377 case 'q':
4378 if (strcmp(arg, "query") == 0)
4379 flag_query = DEFAULT_QUERY_DELAY;
4380 else if (strncmp(arg, "query=", 6) == 0)
4381 flag_query = strtopos(arg + 6, "invalid argument --query=");
4382 else if (strcmp(arg, "quiet") == 0)
4383 flag_quiet = flag_no_messages = true;
4384 else
4385 usage("invalid option --", arg, "--query or --quiet");
4386 break;
4387
4388 case 'r':
4389 if (strncmp(arg, "range=", 6) == 0)
4390 strtopos2(arg + 6, flag_min_line, flag_max_line, "invalid argument --range=");
4391 else if (strcmp(arg, "recursive") == 0)
4392 flag_directories = "recurse";
4393 else if (strncmp(arg, "regexp=", 7) == 0)
4394 option_regexp(pattern_args, arg + 7);
4395 else if (strcmp(arg, "range") == 0)
4396 usage("missing argument for --", arg);
4397 else
4398 usage("invalid option --", arg, "--range, --recursive or --regexp");
4399 break;
4400
4401 case 's':
4402 if (strcmp(arg, "save-config") == 0)
4403 flag_save_config = ".ugrep";
4404 else if (strncmp(arg, "save-config=", 12) == 0)
4405 flag_save_config = arg + 12;
4406 else if (strcmp(arg, "separator") == 0)
4407 flag_separator = ":";
4408 else if (strncmp(arg, "separator=", 10) == 0)
4409 flag_separator = arg + 10;
4410 else if (strcmp(arg, "silent") == 0)
4411 flag_quiet = flag_no_messages = true;
4412 else if (strcmp(arg, "smart-case") == 0)
4413 flag_smart_case = true;
4414 else if (strcmp(arg, "sort") == 0)
4415 flag_sort = "name";
4416 else if (strncmp(arg, "sort=", 5) == 0)
4417 flag_sort = arg + 5;
4418 else if (strcmp(arg, "stats") == 0)
4419 flag_stats = "";
4420 else if (strncmp(arg, "stats=", 6) == 0)
4421 flag_stats = arg + 6;
4422 else
4423 usage("invalid option --", arg, "--save-config, --separator, --silent, --smart-case, --sort or --stats");
4424 break;
4425
4426 case 't':
4427 if (strcmp(arg, "tabs") == 0)
4428 flag_tabs = DEFAULT_TABS;
4429 else if (strncmp(arg, "tabs=", 5) == 0)
4430 flag_tabs = strtopos(arg + 5, "invalid argument --tabs=");
4431 else if (strcmp(arg, "tag") == 0)
4432 flag_tag = DEFAULT_TAG;
4433 else if (strncmp(arg, "tag=", 4) == 0)
4434 flag_tag = arg + 4;
4435 else if (strcmp(arg, "text") == 0)
4436 flag_binary_files = "text";
4437 else
4438 usage("invalid option --", arg, "--tabs, --tag or --text");
4439 break;
4440
4441 case 'u':
4442 if (strcmp(arg, "ungroup") == 0)
4443 flag_ungroup = true;
4444 else
4445 usage("invalid option --", arg, "--ungroup");
4446 break;
4447
4448 case 'v':
4449 if (strcmp(arg, "version") == 0)
4450 version();
4451 else if (strncmp(arg, "view=", 5) == 0)
4452 flag_view = arg + 5;
4453 else if (strcmp(arg, "view") == 0)
4454 flag_view = "";
4455 else
4456 usage("invalid option --", arg, "--view or --version");
4457 break;
4458
4459 case 'w':
4460 if (strcmp(arg, "with-filename") == 0)
4461 flag_with_filename = true;
4462 else if (strcmp(arg, "with-hex") == 0)
4463 flag_binary_files = "with-hex";
4464 else if (strcmp(arg, "word-regexp") == 0)
4465 flag_word_regexp = true;
4466 else
4467 usage("invalid option --", arg, "--with-filename, --with-hex or --word-regexp");
4468 break;
4469
4470 case 'x':
4471 if (strcmp(arg, "xml") == 0)
4472 flag_xml = true;
4473 else
4474 usage("invalid option --", arg, "--xml");
4475 break;
4476
4477 default:
4478 if (isdigit(*arg))
4479 set_depth(arg);
4480 else
4481 usage("invalid option --", arg);
4482 }
4483 break;
4484
4485 case 'A':
4486 ++arg;
4487 if (*arg)
4488 flag_after_context = strtonum(&arg[*arg == '='], "invalid argument -A=");
4489 else if (++i < argc)
4490 flag_after_context = strtonum(argv[i], "invalid argument -A=");
4491 else
4492 usage("missing NUM argument for option -A");
4493 is_grouped = false;
4494 break;
4495
4496 case 'a':
4497 flag_binary_files = "text";
4498 break;
4499
4500 case 'B':
4501 ++arg;
4502 if (*arg)
4503 flag_before_context = strtonum(&arg[*arg == '='], "invalid argument -B=");
4504 else if (++i < argc)
4505 flag_before_context = strtonum(argv[i], "invalid argument -B=");
4506 else
4507 usage("missing NUM argument for option -B");
4508 is_grouped = false;
4509 break;
4510
4511 case 'b':
4512 flag_byte_offset = true;
4513 break;
4514
4515 case 'C':
4516 ++arg;
4517 if (*arg)
4518 flag_after_context = flag_before_context = strtonum(&arg[*arg == '='], "invalid argument -C=");
4519 else if (++i < argc)
4520 flag_after_context = flag_before_context = strtonum(argv[i], "invalid argument -C=");
4521 else
4522 usage("missing NUM argument for option -C");
4523 is_grouped = false;
4524 break;
4525
4526 case 'c':
4527 flag_count = true;
4528 break;
4529
4530 case 'D':
4531 ++arg;
4532 if (*arg)
4533 flag_devices = &arg[*arg == '='];
4534 else if (++i < argc)
4535 flag_devices = argv[i];
4536 else
4537 usage("missing ACTION argument for option -D");
4538 is_grouped = false;
4539 break;
4540
4541 case 'd':
4542 ++arg;
4543 if (*arg)
4544 flag_directories = &arg[*arg == '='];
4545 else if (++i < argc)
4546 flag_directories = argv[i];
4547 else
4548 usage("missing ACTION argument for option -d");
4549 is_grouped = false;
4550 break;
4551
4552 case 'E':
4553 flag_basic_regexp = false;
4554 break;
4555
4556 case 'e':
4557 ++arg;
4558 if (*arg)
4559 option_regexp(pattern_args, &arg[*arg == '=']);
4560 else if (++i < argc)
4561 option_regexp(pattern_args, argv[i]);
4562 else
4563 usage("missing PATTERN argument for option -e");
4564 is_grouped = false;
4565 break;
4566
4567 case 'F':
4568 flag_fixed_strings = true;
4569 break;
4570
4571 case 'f':
4572 ++arg;
4573 if (*arg)
4574 flag_file.emplace_back(&arg[*arg == '=']);
4575 else if (++i < argc)
4576 flag_file.emplace_back(argv[i]);
4577 else
4578 usage("missing FILE argument for option -f");
4579 is_grouped = false;
4580 break;
4581
4582 case 'G':
4583 flag_basic_regexp = true;
4584 break;
4585
4586 case 'g':
4587 ++arg;
4588 if (*arg)
4589 flag_glob.emplace_back(&arg[*arg == '=']);
4590 else if (++i < argc)
4591 flag_glob.emplace_back(argv[i]);
4592 else
4593 usage("missing GLOB argument for option -g");
4594 is_grouped = false;
4595 break;
4596
4597 case 'H':
4598 flag_with_filename = true;
4599 break;
4600
4601 case 'h':
4602 flag_no_filename = true;
4603 break;
4604
4605 case 'I':
4606 flag_binary_files = "without-match";
4607 break;
4608
4609 case 'i':
4610 flag_ignore_case = true;
4611 break;
4612
4613 case 'J':
4614 ++arg;
4615 if (*arg)
4616 flag_jobs = strtonum(&arg[*arg == '='], "invalid argument -J=");
4617 else if (++i < argc)
4618 flag_jobs = strtonum(argv[i], "invalid argument -J=");
4619 else
4620 usage("missing NUM argument for option -J");
4621 is_grouped = false;
4622 break;
4623
4624 case 'j':
4625 flag_smart_case = true;
4626 break;
4627
4628 case 'K':
4629 ++arg;
4630 if (*arg)
4631 strtopos2(&arg[*arg == '='], flag_min_line, flag_max_line, "invalid argument -K=");
4632 else if (++i < argc)
4633 strtopos2(argv[i], flag_min_line, flag_max_line, "invalid argument -K=");
4634 else
4635 usage("missing NUM argument for option -K");
4636 is_grouped = false;
4637 break;
4638
4639 case 'k':
4640 flag_column_number = true;
4641 break;
4642
4643 case 'L':
4644 flag_files_without_match = true;
4645 break;
4646
4647 case 'l':
4648 flag_files_with_matches = true;
4649 break;
4650
4651 case 'M':
4652 ++arg;
4653 if (*arg)
4654 flag_file_magic.emplace_back(&arg[*arg == '=']);
4655 else if (++i < argc)
4656 flag_file_magic.emplace_back(argv[i]);
4657 else
4658 usage("missing MAGIC argument for option -M");
4659 is_grouped = false;
4660 break;
4661
4662 case 'm':
4663 ++arg;
4664 if (*arg)
4665 flag_max_count = strtopos(&arg[*arg == '='], "invalid argument -m=");
4666 else if (++i < argc)
4667 flag_max_count = strtopos(argv[i], "invalid argument -m=");
4668 else
4669 usage("missing NUM argument for option -m");
4670 is_grouped = false;
4671 break;
4672
4673 case 'N':
4674 ++arg;
4675 if (*arg)
4676 option_regexp(pattern_args, &arg[*arg == '='], true);
4677 else if (++i < argc)
4678 option_regexp(pattern_args, argv[i], true);
4679 else
4680 usage("missing PATTERN argument for option -N");
4681 is_grouped = false;
4682 break;
4683
4684 case 'n':
4685 flag_line_number = true;
4686 break;
4687
4688 case 'O':
4689 ++arg;
4690 if (*arg)
4691 flag_file_extension.emplace_back(&arg[*arg == '=']);
4692 else if (++i < argc)
4693 flag_file_extension.emplace_back(argv[i]);
4694 else
4695 usage("missing EXTENSIONS argument for option -O");
4696 is_grouped = false;
4697 break;
4698
4699 case 'o':
4700 flag_only_matching = true;
4701 break;
4702
4703 case 'P':
4704 flag_perl_regexp = true;
4705 break;
4706
4707 case 'p':
4708 flag_no_dereference = true;
4709 break;
4710
4711 case 'Q':
4712 ++arg;
4713 if (*arg == '=' || isdigit(*arg))
4714 {
4715 flag_query = strtopos(&arg[*arg == '='], "invalid argument -Q=");
4716 is_grouped = false;
4717 }
4718 else
4719 {
4720 flag_query = DEFAULT_QUERY_DELAY;
4721 --arg;
4722 }
4723 break;
4724
4725 case 'q':
4726 flag_quiet = true;
4727 break;
4728
4729 case 'R':
4730 flag_directories = "dereference-recurse";
4731 break;
4732
4733 case 'r':
4734 flag_directories = "recurse";
4735 break;
4736
4737 case 'S':
4738 flag_dereference = true;
4739 break;
4740
4741 case 's':
4742 flag_no_messages = true;
4743 break;
4744
4745 case 'T':
4746 flag_initial_tab = true;
4747 break;
4748
4749 case 't':
4750 ++arg;
4751 if (*arg)
4752 flag_file_type.emplace_back(&arg[*arg == '=']);
4753 else if (++i < argc)
4754 flag_file_type.emplace_back(argv[i]);
4755 else
4756 usage("missing TYPES argument for option -t");
4757 is_grouped = false;
4758 break;
4759
4760 case 'U':
4761 flag_binary = true;
4762 break;
4763
4764 case 'u':
4765 flag_ungroup = true;
4766 break;
4767
4768 case 'V':
4769 version();
4770 break;
4771
4772 case 'v':
4773 flag_invert_match = true;
4774 break;
4775
4776 case 'W':
4777 flag_binary_files = "with-hex";
4778 break;
4779
4780 case 'w':
4781 flag_word_regexp = true;
4782 break;
4783
4784 case 'X':
4785 flag_binary_files = "hex";
4786 break;
4787
4788 case 'x':
4789 flag_line_regexp = true;
4790 break;
4791
4792 case 'Y':
4793 flag_empty = true;
4794 break;
4795
4796 case 'y':
4797 flag_any_line = true;
4798 break;
4799
4800 case 'Z':
4801 ++arg;
4802 if (*arg == '=' || isdigit(*arg) || strchr("+-~", *arg) != NULL)
4803 {
4804 flag_fuzzy = strtofuzzy(&arg[*arg == '='], "invalid argument -Z=");
4805 is_grouped = false;
4806 }
4807 else
4808 {
4809 flag_fuzzy = 1;
4810 --arg;
4811 }
4812 break;
4813
4814
4815 case 'z':
4816 flag_decompress = true;
4817 break;
4818
4819 case '0':
4820 flag_null = true;
4821 break;
4822
4823 case '1':
4824 case '2':
4825 case '3':
4826 case '4':
4827 case '5':
4828 case '6':
4829 case '7':
4830 case '8':
4831 case '9':
4832 if (flag_min_depth == 0 && flag_max_depth > 0)
4833 flag_min_depth = flag_max_depth;
4834 flag_max_depth = *arg - '0';
4835 if (flag_min_depth > flag_max_depth)
4836 usage("invalid argument -", arg);
4837 break;
4838
4839 case '?':
4840 help(arg[1] != '\0' ? arg + 1 : ++i < argc ? argv[i] : NULL);
4841 break;
4842
4843 case '%':
4844 flag_bool = true;
4845 break;
4846
4847 case '+':
4848 flag_heading = true;
4849 break;
4850
4851 case '.':
4852 flag_hidden = true;
4853 break;
4854
4855 default:
4856 usage("invalid option -", arg);
4857 }
4858
4859 if (!is_grouped)
4860 break;
4861 }
4862 }
4863 else if (strcmp(arg, "-") == 0)
4864 {
4865 // read standard input
4866 flag_stdin = true;
4867 }
4868 else if (arg_pattern == NULL && !flag_match && !flag_not && pattern_args.empty() && flag_file.empty())
4869 {
4870 // no regex pattern specified yet, so assume it is PATTERN
4871 arg_pattern = arg;
4872 }
4873 else
4874 {
4875 // otherwise add the file argument to the list of FILE files
4876 arg_files.emplace_back(arg);
4877 }
4878 }
4879
4880 if (flag_not)
4881 usage("missing PATTERN for --not");
4882 }
4883
4884 // parse -e PATTERN and -N PATTERN
option_regexp(std::list<std::pair<CNF::PATTERN,const char * >> & pattern_args,const char * arg,bool is_neg)4885 void option_regexp(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, const char *arg, bool is_neg)
4886 {
4887 if (flag_query)
4888 {
4889 // -Q: pass -e PATTERN and -N PATTERN patterns to the query engine
4890 if (is_neg)
4891 {
4892 std::string neg_arg(arg);
4893 neg_arg.insert(0, "(?^").append(")");
4894 flag_regexp.emplace_back(neg_arg);
4895 }
4896 else
4897 {
4898 flag_regexp.emplace_back(arg);
4899 }
4900 }
4901 else
4902 {
4903 pattern_args.emplace_back((flag_not ? CNF::PATTERN::NOT : CNF::PATTERN::NA) | (is_neg ? CNF::PATTERN::NEG : CNF::PATTERN::NA), arg);
4904 }
4905 }
4906
4907 // parse --and [PATTERN]
option_and(std::list<std::pair<CNF::PATTERN,const char * >> & pattern_args,int & i,int argc,const char ** argv)4908 void option_and(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, int& i, int argc, const char **argv)
4909 {
4910 if (flag_not)
4911 usage("missing PATTERN for --not");
4912
4913 if (flag_query)
4914 usage("option -Q does not support --and");
4915
4916 pattern_args.emplace_back(CNF::PATTERN::TERM, "");
4917
4918 if (i + 1 < argc && *argv[i + 1] != '-')
4919 pattern_args.emplace_back((flag_not ? CNF::PATTERN::NOT : CNF::PATTERN::NA), argv[++i]);
4920 }
4921
4922 // parse --and=PATTERN
option_and(std::list<std::pair<CNF::PATTERN,const char * >> & pattern_args,const char * arg)4923 void option_and(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, const char *arg)
4924 {
4925 if (flag_not)
4926 usage("missing PATTERN for --not");
4927
4928 if (flag_query)
4929 usage("option -Q does not support --and");
4930
4931 pattern_args.emplace_back(CNF::PATTERN::TERM, "");
4932 pattern_args.emplace_back((flag_not ? CNF::PATTERN::NOT : CNF::PATTERN::NA), arg);
4933 }
4934
4935 // parse --andnot [PATTERN]
option_andnot(std::list<std::pair<CNF::PATTERN,const char * >> & pattern_args,int & i,int argc,const char ** argv)4936 void option_andnot(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, int& i, int argc, const char **argv)
4937 {
4938 if (flag_not)
4939 usage("missing PATTERN for --not");
4940
4941 if (flag_query)
4942 usage("option -Q does not support --andnot");
4943
4944 pattern_args.emplace_back(CNF::PATTERN::TERM, "");
4945
4946 flag_not = true;
4947
4948 if (i + 1 < argc && *argv[i + 1] != '-')
4949 {
4950 pattern_args.emplace_back(CNF::PATTERN::NOT, argv[++i]);
4951 flag_not = false;
4952 }
4953 }
4954
4955 // parse --andnot=PATTERN
option_andnot(std::list<std::pair<CNF::PATTERN,const char * >> & pattern_args,const char * arg)4956 void option_andnot(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, const char *arg)
4957 {
4958 if (flag_not)
4959 usage("missing PATTERN for --not");
4960
4961 if (flag_query)
4962 usage("option -Q does not support --andnot");
4963
4964 pattern_args.emplace_back(CNF::PATTERN::TERM, "");
4965 pattern_args.emplace_back(CNF::PATTERN::NOT, arg);
4966 }
4967
4968 // parse --not [PATTERN]
option_not(std::list<std::pair<CNF::PATTERN,const char * >> & pattern_args,int & i,int argc,const char ** argv)4969 void option_not(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, int& i, int argc, const char **argv)
4970 {
4971 if (flag_query)
4972 usage("option -Q does not support --not");
4973
4974 flag_not = !flag_not;
4975
4976 if (i + 1 < argc && *argv[i + 1] != '-')
4977 {
4978 pattern_args.emplace_back((flag_not ? CNF::PATTERN::NOT : CNF::PATTERN::NA), argv[++i]);
4979 flag_not = false;
4980 }
4981 }
4982
4983 // parse --not=PATTERN
option_not(std::list<std::pair<CNF::PATTERN,const char * >> & pattern_args,const char * arg)4984 void option_not(std::list<std::pair<CNF::PATTERN,const char*>>& pattern_args, const char *arg)
4985 {
4986 if (flag_query)
4987 usage("option -Q does not support --not");
4988
4989 flag_not = !flag_not;
4990
4991 pattern_args.emplace_back((flag_not ? CNF::PATTERN::NOT : CNF::PATTERN::NA), arg);
4992 flag_not = false;
4993 }
4994
4995 // parse the command-line options and initialize
init(int argc,const char ** argv)4996 void init(int argc, const char **argv)
4997 {
4998 // get home directory path to expand ~ in options with file arguments, using fopen_smart()
4999
5000 #ifdef OS_WIN
5001 home_dir = getenv("USERPROFILE");
5002 #else
5003 home_dir = getenv("HOME");
5004 #endif
5005
5006 // --config=FILE or ---FILE: load configuration file first before parsing any other options
5007
5008 for (int i = 1; i < argc; ++i)
5009 {
5010 if (strcmp(argv[i], "--") == 0)
5011 break;
5012
5013 if (strncmp(argv[i], "--config", 8) == 0)
5014 {
5015 if (flag_config != NULL)
5016 std::cerr << "ugrep: warning: multiple configurations specified, ignoring extra " << argv[i] << '\n';
5017 else if (argv[i][8] == '\0')
5018 flag_config = "";
5019 else if (argv[i][8] == '=')
5020 flag_config = argv[i] + 9;
5021 }
5022 else if (strncmp(argv[i], "---", 3) == 0)
5023 {
5024 if (flag_config != NULL)
5025 std::cerr << "ugrep: warning: multiple configurations specified, ignoring extra " << argv[i] << '\n';
5026 else
5027 flag_config = argv[i] + 3;
5028 }
5029 }
5030
5031 // collect regex pattern arguments -e PATTERN, -N PATTERN, --and PATTERN, --andnot PATTERN
5032 std::list<std::pair<CNF::PATTERN,const char*>> pattern_args;
5033
5034 if (flag_config != NULL)
5035 load_config(pattern_args);
5036
5037 // apply the appropriate options when the program is named grep, egrep, fgrep, zgrep, zegrep, zfgrep
5038
5039 const char *program = strrchr(argv[0], PATHSEPCHR);
5040
5041 if (program == NULL)
5042 program = argv[0];
5043 else
5044 ++program;
5045
5046 if (strcmp(program, "ug") == 0)
5047 {
5048 // the 'ug' command is equivalent to 'ugrep --config' to load custom configuration files, when no --config=FILE is specified
5049 if (flag_config == NULL)
5050 load_config(pattern_args);
5051 }
5052 else if (strcmp(program, "grep") == 0)
5053 {
5054 // the 'grep' command is equivalent to 'ugrep -GY.'
5055 flag_basic_regexp = true;
5056 flag_hidden = true;
5057 flag_empty = true;
5058 }
5059 else if (strcmp(program, "egrep") == 0)
5060 {
5061 // the 'egrep' command is equivalent to 'ugrep -Y.'
5062 flag_hidden = true;
5063 flag_empty = true;
5064 }
5065 else if (strcmp(program, "fgrep") == 0)
5066 {
5067 // the 'fgrep' command is equivalent to 'ugrep -FY.'
5068 flag_fixed_strings = true;
5069 flag_hidden = true;
5070 flag_empty = true;
5071 }
5072 else if (strcmp(program, "zgrep") == 0)
5073 {
5074 // the 'zgrep' command is equivalent to 'ugrep -zGY.'
5075 flag_decompress = true;
5076 flag_basic_regexp = true;
5077 flag_hidden = true;
5078 flag_empty = true;
5079 }
5080 else if (strcmp(program, "zegrep") == 0)
5081 {
5082 // the 'zegrep' command is equivalent to 'ugrep -zY.'
5083 flag_decompress = true;
5084 flag_hidden = true;
5085 flag_empty = true;
5086 }
5087 else if (strcmp(program, "zfgrep") == 0)
5088 {
5089 // the 'zfgrep' command is equivalent to 'ugrep -zFY.'
5090 flag_decompress = true;
5091 flag_fixed_strings = true;
5092 flag_hidden = true;
5093 flag_empty = true;
5094 }
5095
5096 // parse ugrep command-line options and arguments
5097
5098 options(pattern_args, argc, argv);
5099
5100 if (warnings > 0)
5101 {
5102 std::cerr << "Usage: ugrep [OPTIONS] [PATTERN] [-f FILE] [-e PATTERN] [FILE ...]\n";
5103 std::cerr << "Try 'ugrep --help [WHAT]' for more information\n";
5104 exit(EXIT_ERROR);
5105 }
5106
5107 // -t list: list table of types and exit
5108 if (flag_file_type.size() == 1 && flag_file_type[0] == "list")
5109 {
5110 std::cerr << std::setw(12) << "FILE TYPE" << " -O EXTENSIONS, -g FILENAMES AND FILE SIGNATURE -M 'MAGIC BYTES'\n";
5111
5112 for (int i = 0; type_table[i].type != NULL; ++i)
5113 {
5114 std::cerr << std::setw(12) << type_table[i].type << " = -O " << type_table[i].extensions << '\n';
5115 if (type_table[i].filenames)
5116 std::cerr << std::setw(18) << "-g " << type_table[i].filenames << "\n";
5117 if (type_table[i].magic)
5118 std::cerr << std::setw(19) << "-M '" << type_table[i].magic << "'\n";
5119 }
5120
5121 exit(EXIT_ERROR);
5122 }
5123
5124 #ifndef HAVE_LIBZ
5125 // -z: but we don't have libz
5126 if (flag_decompress)
5127 usage("option -z is not available in this build configuration of ugrep");
5128 #endif
5129
5130 // -P disables -F, -G and -Z (P>F>G>E override)
5131 if (flag_perl_regexp)
5132 {
5133 #if defined(HAVE_PCRE2) || defined(HAVE_BOOST_REGEX)
5134 flag_fixed_strings = false;
5135 flag_basic_regexp = false;
5136 if (flag_fuzzy > 0)
5137 usage("options -P and -Z are not compatible");
5138 #else
5139 usage("option -P is not available in this build configuration of ugrep");
5140 #endif
5141 }
5142
5143 // -F disables -G (P>F>G>E override)
5144 if (flag_fixed_strings)
5145 flag_basic_regexp = false;
5146
5147 // populate the CNF with the collected regex pattern args, each arg points to a persistent command line argv[]
5148 for (const auto &arg : pattern_args)
5149 {
5150 if (arg.first == CNF::PATTERN::TERM)
5151 bcnf.new_term();
5152 else
5153 bcnf.new_pattern(arg.first, arg.second); // relies on options --bool, -F, -G, -w, -x, and -f
5154 }
5155
5156 // --query: override --pager
5157 if (flag_query > 0)
5158 flag_pager = NULL;
5159
5160 // check TTY info and set colors (warnings and errors may occur from here on)
5161 terminal();
5162
5163 // --save-config and --save-config=FILE
5164 if (flag_save_config != NULL)
5165 {
5166 save_config();
5167
5168 exit(EXIT_ERROR);
5169 }
5170
5171 #ifdef OS_WIN
5172 // save_config() and help() assume text mode, so switch to
5173 // binary after we're no longer going to call them.
5174 (void)_setmode(fileno(stdout), _O_BINARY);
5175 #endif
5176
5177 // --encoding: parse ENCODING value
5178 if (flag_encoding != NULL)
5179 {
5180 int i, j;
5181
5182 // scan the encoding_table[] for a matching encoding, case insensitive ASCII
5183 for (i = 0; encoding_table[i].format != NULL; ++i)
5184 {
5185 for (j = 0; flag_encoding[j] != '\0' && encoding_table[i].format[j] != '\0'; ++j)
5186 if (toupper(flag_encoding[j]) != toupper(encoding_table[i].format[j]))
5187 break;
5188
5189 if (flag_encoding[j] == '\0' && encoding_table[i].format[j] == '\0')
5190 break;
5191 }
5192
5193 if (encoding_table[i].format == NULL)
5194 {
5195 std::string msg = "invalid argument --encoding=ENCODING, valid arguments are";
5196
5197 for (int i = 0; encoding_table[i].format != NULL; ++i)
5198 msg.append(" '").append(encoding_table[i].format).append("',");
5199 msg.pop_back();
5200
5201 usage(msg.c_str());
5202 }
5203
5204 // encoding is the file encoding used by all input files, if no BOM is present
5205 flag_encoding_type = encoding_table[i].encoding;
5206 }
5207
5208 // --binary-files: normalize by assigning flags
5209 if (strcmp(flag_binary_files, "without-match") == 0)
5210 flag_binary_without_match = true;
5211 else if (strcmp(flag_binary_files, "text") == 0)
5212 flag_text = true;
5213 else if (strcmp(flag_binary_files, "hex") == 0)
5214 flag_hex = true;
5215 else if (strcmp(flag_binary_files, "with-hex") == 0)
5216 flag_with_hex = true;
5217 else if (strcmp(flag_binary_files, "binary") != 0)
5218 usage("invalid argument --binary-files=TYPE, valid arguments are 'binary', 'without-match', 'text', 'hex', and 'with-hex'");
5219
5220 // --hex takes priority over --with-hex takes priority over -I takes priority over -a
5221 if (flag_hex)
5222 flag_with_hex = (flag_binary_without_match = flag_text = false);
5223 else if (flag_with_hex)
5224 flag_binary_without_match = (flag_text = false);
5225 else if (flag_binary_without_match)
5226 flag_text = false;
5227
5228 // --hexdump: normalize by assigning flags
5229 if (flag_hexdump != NULL)
5230 {
5231 if (isdigit(*flag_hexdump))
5232 {
5233 flag_hex_columns = 8 * (*flag_hexdump - '0');
5234 if (flag_hex_columns == 0 || flag_hex_columns > MAX_HEX_COLUMNS)
5235 usage("invalid argument --hexdump=[1-8][a][b][c][h]");
5236 }
5237 if (strchr(flag_hexdump, 'a') != NULL)
5238 flag_hex_ast = true;
5239 if (strchr(flag_hexdump, 'b') != NULL)
5240 flag_hex_hbr = flag_hex_cbr = false;
5241 if (strchr(flag_hexdump, 'c') != NULL)
5242 flag_hex_chr = false;
5243 if (strchr(flag_hexdump, 'h') != NULL)
5244 flag_hex_hbr = false;
5245 if (!flag_with_hex)
5246 flag_hex = true;
5247 }
5248
5249 // --tabs: value should be 1, 2, 4, or 8
5250 if (flag_tabs && flag_tabs != 1 && flag_tabs != 2 && flag_tabs != 4 && flag_tabs != 8)
5251 usage("invalid argument --tabs=NUM, valid arguments are 1, 2, 4, or 8");
5252
5253 // --match: same as specifying an empty "" pattern argument
5254 if (flag_match)
5255 arg_pattern = "";
5256
5257 // if no regex pattern is specified and no -e PATTERN and no -f FILE and not -Q, then exit with usage message
5258 if (arg_pattern == NULL && pattern_args.empty() && flag_file.empty() && flag_query == 0)
5259 usage("no PATTERN specified: specify an empty \"\" pattern to match all input");
5260
5261 // regex PATTERN should be a FILE argument when -Q or -e PATTERN is specified
5262 if (!flag_match && arg_pattern != NULL && (flag_query > 0 || !pattern_args.empty()))
5263 {
5264 arg_files.insert(arg_files.begin(), arg_pattern);
5265 arg_pattern = NULL;
5266 }
5267
5268 #ifdef OS_WIN
5269
5270 // Windows shell does not expand wildcards in arguments, do that now (basename part only)
5271 if (!arg_files.empty())
5272 {
5273 std::vector<const char*> expanded_arg_files;
5274
5275 for (const auto& arg_file : arg_files)
5276 {
5277 std::wstring filename = utf8_decode(arg_file);
5278 bool has_wildcard_char = false;
5279
5280 size_t basename_pos;
5281 for (basename_pos = filename.size(); basename_pos > 0; --basename_pos)
5282 {
5283 wchar_t ch = filename[basename_pos - 1];
5284
5285 if (ch == L'*' || ch == L'?')
5286 has_wildcard_char = true;
5287 else if (ch == L'\\' || ch == L'/' || ch == L':')
5288 break;
5289 }
5290
5291 if (!has_wildcard_char)
5292 {
5293 // no wildcard chars, use argument as-is
5294 expanded_arg_files.push_back(arg_file);
5295 continue;
5296 }
5297
5298 WIN32_FIND_DATAW find_data;
5299
5300 HANDLE hFile = FindFirstFileExW(filename.c_str(), FindExInfoBasic, &find_data, FindExSearchNameMatch, NULL, 0);
5301 if (hFile == INVALID_HANDLE_VALUE)
5302 {
5303 // glob pattern didn't match any files, use argument as-is which will trigger a warning later
5304 expanded_arg_files.push_back(arg_file);
5305 continue;
5306 }
5307
5308 bool glob_starts_with_dot = filename[basename_pos] == L'.';
5309
5310 do
5311 {
5312 if (find_data.cFileName[0] == L'.')
5313 {
5314 // don't expand directories "." or ".."
5315 if (find_data.cFileName[1] == 0 ||
5316 (find_data.cFileName[1] == L'.' && find_data.cFileName[2] == 0))
5317 continue;
5318
5319 // don't expand hidden files unless --hidden or the pattern started with '.'
5320 if (!flag_hidden && !glob_starts_with_dot)
5321 continue;
5322 }
5323
5324 // replace glob pattern with matching filename converted to UTF-8, then add to expanded filename list
5325 filename.erase(basename_pos);
5326 filename += find_data.cFileName;
5327 arg_strings.emplace_back(utf8_encode(filename));
5328 expanded_arg_files.push_back(arg_strings.back().c_str());
5329 } while (FindNextFileW(hFile, &find_data));
5330
5331 FindClose(hFile);
5332 }
5333
5334 // replace the original filenames list with the expanded list
5335 arg_files.swap(expanded_arg_files);
5336 }
5337
5338 #endif
5339
5340 // -D: check ACTION value
5341 if (strcmp(flag_devices, "skip") == 0)
5342 flag_devices_action = Action::SKIP;
5343 else if (strcmp(flag_devices, "read") == 0)
5344 flag_devices_action = Action::READ;
5345 else
5346 usage("invalid argument -D ACTION, valid arguments are 'skip' and 'read'");
5347
5348 // normalize -R (--dereference-recurse) option
5349 if (strcmp(flag_directories, "dereference-recurse") == 0)
5350 {
5351 flag_directories = "recurse";
5352 flag_dereference = true;
5353 }
5354
5355 // -d: check ACTION value and set flags
5356 if (strcmp(flag_directories, "skip") == 0)
5357 flag_directories_action = Action::SKIP;
5358 else if (strcmp(flag_directories, "read") == 0)
5359 flag_directories_action = Action::READ;
5360 else if (strcmp(flag_directories, "recurse") == 0)
5361 flag_directories_action = Action::RECURSE;
5362 else
5363 usage("invalid argument -d ACTION, valid arguments are 'skip', 'read', 'recurse', and 'dereference-recurse'");
5364
5365 // if no FILE specified and no -r or -R specified, when reading standard input from a TTY then enable -R
5366 if (!flag_stdin && arg_files.empty() && flag_directories_action != Action::RECURSE && isatty(STDIN_FILENO))
5367 {
5368 flag_directories_action = Action::RECURSE;
5369 flag_dereference = true;
5370 }
5371
5372 // if no FILE specified then read standard input, unless recursive searches are specified
5373 if (arg_files.empty() && flag_min_depth == 0 && flag_max_depth == 0 && flag_directories_action != Action::RECURSE)
5374 flag_stdin = true;
5375
5376 // check FILE arguments, warn about non-existing FILE
5377 auto file = arg_files.begin();
5378 while (file != arg_files.end())
5379 {
5380 #ifdef OS_WIN
5381
5382 DWORD attr = GetFileAttributesW(utf8_decode(*file).c_str());
5383
5384 if (attr == INVALID_FILE_ATTRIBUTES)
5385 {
5386 // FILE does not exist
5387 errno = ENOENT;
5388 warning(NULL, *file);
5389
5390 file = arg_files.erase(file);
5391 if (arg_files.empty())
5392 exit(EXIT_ERROR);
5393 }
5394 else
5395 {
5396 // use threads to recurse into a directory
5397 if ((attr & FILE_ATTRIBUTE_DIRECTORY))
5398 {
5399 flag_all_threads = true;
5400
5401 // remove trailing path separators, if any (*file points to argv[])
5402 trim_pathname_arg(*file);
5403 }
5404
5405 ++file;
5406 }
5407
5408 #else
5409
5410 struct stat buf;
5411
5412 if (stat(*file, &buf) != 0)
5413 {
5414 // FILE does not exist
5415 warning(NULL, *file);
5416
5417 file = arg_files.erase(file);
5418 if (arg_files.empty())
5419 exit(EXIT_ERROR);
5420 }
5421 else
5422 {
5423 // use threads to recurse into a directory
5424 if (S_ISDIR(buf.st_mode))
5425 {
5426 flag_all_threads = true;
5427
5428 // remove trailing path separators, if any (*file points to argv[])
5429 trim_pathname_arg(*file);
5430 }
5431
5432 ++file;
5433 }
5434
5435 #endif
5436 }
5437
5438 // normalize --cpp, --csv, --json, --xml to their corresponding --format
5439 if (flag_cpp)
5440 {
5441 flag_format_begin = "const struct grep {\n const char *file;\n size_t line;\n size_t column;\n size_t offset;\n const char *match;\n} matches[] = {\n";
5442 flag_format_open = " // %f\n";
5443 flag_format = " { %h, %n, %k, %b, %C },\n%u";
5444 flag_format_close = " \n";
5445 flag_format_end = " { NULL, 0, 0, 0, NULL }\n};\n";
5446 }
5447 else if (flag_csv)
5448 {
5449 flag_format = "%[,]$%H%N%K%B%V\n%u";
5450 }
5451 else if (flag_json)
5452 {
5453 flag_format_begin = "[";
5454 flag_format_open = "%,\n {\n %[,\n ]$%[\"file\": ]H\"matches\": [";
5455 flag_format = "%,\n { %[, ]$%[\"line\": ]N%[\"column\": ]K%[\"offset\": ]B\"match\": %J }%u";
5456 flag_format_close = "\n ]\n }";
5457 flag_format_end = "\n]\n";
5458 }
5459 else if (flag_xml)
5460 {
5461 flag_format_begin = "<grep>\n";
5462 flag_format_open = " <file%[]$%[ name=]H>\n";
5463 flag_format = " <match%[\"]$%[ line=\"]N%[ column=\"]K%[ offset=\"]B>%X</match>\n%u";
5464 flag_format_close = " </file>\n";
5465 flag_format_end = "</grep>\n";
5466 }
5467
5468 #ifdef HAVE_STATVFS
5469
5470 // --exclude-fs: add file system ids to exclude
5471 for (const auto& mounts : flag_exclude_fs)
5472 {
5473 if (!mounts.empty())
5474 {
5475 struct statvfs buf;
5476 size_t from = 0;
5477
5478 while (true)
5479 {
5480 size_t to = mounts.find(',', from);
5481 size_t size = (to == std::string::npos ? mounts.size() : to) - from;
5482
5483 if (size > 0)
5484 {
5485 std::string mount(mounts.substr(from, size));
5486
5487 if (statvfs(mount.c_str(), &buf) == 0)
5488 exclude_fs_ids.insert(static_cast<uint64_t>(buf.f_fsid));
5489 else
5490 warning("--exclude-fs", mount.c_str());
5491 }
5492
5493 if (to == std::string::npos)
5494 break;
5495
5496 from = to + 1;
5497 }
5498 }
5499 }
5500
5501 // --include-fs: add file system ids to include
5502 for (const auto& mounts : flag_include_fs)
5503 {
5504 if (!mounts.empty())
5505 {
5506 struct statvfs buf;
5507 size_t from = 0;
5508
5509 while (true)
5510 {
5511 size_t to = mounts.find(',', from);
5512 size_t size = (to == std::string::npos ? mounts.size() : to) - from;
5513
5514 if (size > 0)
5515 {
5516 std::string mount(mounts.substr(from, size));
5517
5518 if (statvfs(mount.c_str(), &buf) == 0)
5519 include_fs_ids.insert(static_cast<uint64_t>(buf.f_fsid));
5520 else
5521 warning("--include-fs", mount.c_str());
5522 }
5523
5524 if (to == std::string::npos)
5525 break;
5526
5527 from = to + 1;
5528 }
5529 }
5530 }
5531
5532 #endif
5533
5534 // --exclude-from: add globs to the exclude and exclude-dir lists
5535 for (const auto& from : flag_exclude_from)
5536 {
5537 if (!from.empty())
5538 {
5539 FILE *file = NULL;
5540
5541 if (fopen_smart(&file, from.c_str(), "r") != 0)
5542 error("option --exclude-from: cannot read", from.c_str());
5543
5544 split_globs(file, flag_exclude, flag_exclude_dir);
5545
5546 if (file != stdin)
5547 fclose(file);
5548 }
5549 }
5550
5551 // --include-from: add globs to the include and include-dir lists
5552 for (const auto& from : flag_include_from)
5553 {
5554 if (!from.empty())
5555 {
5556 FILE *file = NULL;
5557
5558 if (fopen_smart(&file, from.c_str(), "r") != 0)
5559 error("option --include-from: cannot read", from.c_str());
5560
5561 split_globs(file, flag_include, flag_include_dir);
5562
5563 if (file != stdin)
5564 fclose(file);
5565 }
5566 }
5567
5568 // -t: parse TYPES and access type table to add -O (--file-extension), -g (--glob) and -M (--file-magic) values
5569 for (const auto& types : flag_file_type)
5570 {
5571 size_t from = 0;
5572
5573 while (true)
5574 {
5575 size_t to = types.find(',', from);
5576 size_t size = (to == std::string::npos ? types.size() : to) - from;
5577
5578 if (size > 0)
5579 {
5580 bool negate = size > 1 && (types[from] == '!' || types[from] == '^');
5581
5582 if (negate)
5583 {
5584 ++from;
5585 --size;
5586 }
5587
5588 std::string type(types.substr(from, size));
5589
5590 size_t i;
5591
5592 // scan the type_table[] for a matching type
5593 for (i = 0; type_table[i].type != NULL; ++i)
5594 if (type == type_table[i].type)
5595 break;
5596
5597 if (type_table[i].type == NULL)
5598 {
5599 std::string msg = "invalid argument -t TYPES, valid arguments are";
5600
5601 for (int i = 0; type_table[i].type != NULL; ++i)
5602 msg.append(" '").append(type_table[i].type).append("',");
5603 msg.append(" and 'list' to show a detailed list of file types");
5604
5605 usage(msg.c_str());
5606 }
5607
5608 std::string temp(type_table[i].extensions);
5609
5610 if (negate)
5611 {
5612 temp.insert(0, "!");
5613 size_t j = 0;
5614 while ((j = temp.find(',', j)) != std::string::npos)
5615 temp.insert(++j, "!");
5616 }
5617
5618 flag_file_extension.emplace_back(temp);
5619
5620 if (type_table[i].filenames != NULL)
5621 {
5622 temp.assign(type_table[i].filenames);
5623
5624 if (negate)
5625 {
5626 temp.insert(0, "!");
5627 size_t j = 0;
5628 while ((j = temp.find(',', j)) != std::string::npos)
5629 temp.insert(++j, "!");
5630 }
5631
5632 flag_glob.emplace_back(temp);
5633 }
5634
5635 if (type_table[i].magic != NULL)
5636 {
5637 flag_file_magic.emplace_back(type_table[i].magic);
5638
5639 if (negate)
5640 flag_file_magic.back().insert(0, "!");
5641 }
5642 }
5643
5644 if (to == std::string::npos)
5645 break;
5646
5647 from = to + 1;
5648 }
5649 }
5650
5651 // -O: add filename extensions as globs
5652 for (const auto& extensions : flag_file_extension)
5653 {
5654 size_t from = 0;
5655 std::string glob;
5656
5657 while (true)
5658 {
5659 size_t to = extensions.find(',', from);
5660 size_t size = (to == std::string::npos ? extensions.size() : to) - from;
5661
5662 if (size > 0)
5663 {
5664 bool negate = size > 1 && (extensions[from] == '!' || extensions[from] == '^');
5665
5666 if (negate)
5667 {
5668 ++from;
5669 --size;
5670 }
5671
5672 flag_glob.emplace_back(glob.assign(negate ? "^*." : "*.").append(extensions.substr(from, size)));
5673 }
5674
5675 if (to == std::string::npos)
5676 break;
5677
5678 from = to + 1;
5679 }
5680 }
5681
5682 // -M: file "magic bytes" regex string
5683 std::string magic_regex;
5684
5685 // -M !MAGIC: combine to create a regex string
5686 for (const auto& magic : flag_file_magic)
5687 {
5688 if (magic.size() > 1 && (magic.front() == '!' || magic.front() == '^'))
5689 {
5690 if (!magic_regex.empty())
5691 magic_regex.push_back('|');
5692 magic_regex.append(magic.substr(1));
5693
5694 // tally negative MAGIC patterns
5695 ++flag_min_magic;
5696 }
5697 }
5698
5699 // -M MAGIC: append to regex string
5700 for (const auto& magic : flag_file_magic)
5701 {
5702 if (magic.size() <= 1 || (magic.front() != '!' && magic.front() != '^'))
5703 {
5704 if (!magic_regex.empty())
5705 magic_regex.push_back('|');
5706 magic_regex.append(magic);
5707
5708 // we have positive MAGIC patterns, so scan() is a match when flag_min_magic or greater
5709 flag_not_magic = flag_min_magic;
5710 }
5711 }
5712
5713 // -M: create a magic matcher for the MAGIC regex to match file with magic.scan()
5714 try
5715 {
5716 // construct magic_pattern DFA for -M !MAGIC and -M MAGIC
5717 if (!magic_regex.empty())
5718 magic_pattern.assign(magic_regex, "r");
5719 magic_matcher.pattern(magic_pattern);
5720 }
5721
5722 catch (reflex::regex_error& error)
5723 {
5724 abort("option -M: ", error.what());
5725 }
5726
5727 // --filter-magic-label: construct filter_magic_pattern and map "magic bytes" to labels
5728 magic_regex = "(";
5729
5730 // --filter-magic-label: append pattern to magic_labels, parenthesized to ensure capture indexing
5731 for (auto& label : flag_filter_magic_label)
5732 {
5733 if (!label.empty())
5734 {
5735 size_t sep = label.find(':');
5736
5737 if (sep != std::string::npos && sep > 0 && sep + 1 < label.size())
5738 {
5739 if (!label.empty() && magic_regex.size() > 1)
5740 magic_regex.append(")|(");
5741 magic_regex.append(label.substr(sep + 1));
5742
5743 // truncate so we end up with a list of labels without patterns
5744 label.resize(sep);
5745 }
5746 else
5747 {
5748 abort("option --filter-magic-label: invalid LABEL:MAGIC argument ", label);
5749 }
5750 }
5751 }
5752
5753 magic_regex.push_back(')');
5754
5755 // --filter-magic-label: create a filter_magic_pattern
5756 try
5757 {
5758 // construct filter_magic_pattern DFA
5759 if (magic_regex.size() > 2)
5760 filter_magic_pattern.assign(magic_regex, "r");
5761 }
5762
5763 catch (reflex::regex_error& error)
5764 {
5765 abort("option --filter-magic-label: ", error.what());
5766 }
5767 }
5768
5769 // check TTY info and set colors
terminal()5770 void terminal()
5771 {
5772 if (flag_query > 0)
5773 {
5774 // -Q: disable --quiet
5775 flag_quiet = false;
5776 }
5777 else if (!flag_quiet)
5778 {
5779 // is output sent to a color TTY, to a pager, or to /dev/null?
5780
5781 // check if standard output is a TTY
5782 tty_term = isatty(STDOUT_FILENO) != 0;
5783
5784 #ifndef OS_WIN
5785
5786 if (!tty_term)
5787 {
5788 output_stat_result = fstat(STDOUT_FILENO, &output_stat) == 0;
5789 output_stat_regular = output_stat_result && S_ISREG(output_stat.st_mode);
5790
5791 // if output is sent to /dev/null, then enable -q (i.e. "cheat" like GNU grep!)
5792 struct stat dev_null_stat;
5793 if (output_stat_result &&
5794 S_ISCHR(output_stat.st_mode) &&
5795 stat("/dev/null", &dev_null_stat) == 0 &&
5796 output_stat.st_dev == dev_null_stat.st_dev &&
5797 output_stat.st_ino == dev_null_stat.st_ino)
5798 {
5799 flag_quiet = true;
5800 }
5801 }
5802
5803 #endif
5804 }
5805
5806 // whether to apply colors
5807 flag_apply_color = flag_tag != NULL ? "never" : flag_query > 0 ? "always" : flag_color;
5808
5809 if (!flag_quiet)
5810 {
5811 if (tty_term || flag_query > 0)
5812 {
5813 if (flag_pretty)
5814 {
5815 // --pretty: if output is to a TTY then enable --color, --heading, -T, -n, and --sort
5816
5817 // enable --color
5818 if (flag_apply_color == NULL)
5819 flag_apply_color = "auto";
5820
5821 // enable --heading if not explicitly disabled (enables --break later)
5822 if (flag_heading.is_undefined())
5823 flag_heading = true;
5824
5825 // enable -T if not explicitly disabled (initial tab)
5826 if (flag_initial_tab.is_undefined())
5827 flag_initial_tab = true;
5828
5829 // enable -n if not explicitly disabled
5830 if (flag_line_number.is_undefined())
5831 flag_line_number = true;
5832
5833 // enable --sort=name if no --sort specified
5834 if (flag_sort == NULL)
5835 flag_sort = "name";
5836 }
5837 else if (flag_apply_color != NULL)
5838 {
5839 // --colors: if output is to a TTY then enable --color and use the specified --colors
5840
5841 // enable --color
5842 if (flag_apply_color == NULL)
5843 flag_apply_color = "auto";
5844 }
5845
5846 if (flag_query > 0)
5847 {
5848 // --query: run the interactive query UI
5849
5850 // enable --heading if not explicitly disabled (enables --break later)
5851 if (flag_heading.is_undefined())
5852 flag_heading = true;
5853
5854 // enable --line-buffered to flush output immediately
5855 flag_line_buffered = true;
5856 }
5857 else if (flag_pager != NULL && *flag_pager != '\0')
5858 {
5859 // --pager: if output is to a TTY then page through the results
5860
5861 // open a pipe to a forked pager
5862 #ifdef OS_WIN
5863 output = popen(flag_pager, "wb");
5864 #else
5865 output = popen(flag_pager, "w");
5866 #endif
5867 if (output == NULL)
5868 error("cannot open pipe to pager", flag_pager);
5869
5870 // enable --heading if not explicitly disabled (enables --break later)
5871 if (flag_heading.is_undefined())
5872 flag_heading = true;
5873
5874 // enable --line-buffered to flush output to the pager immediately
5875 flag_line_buffered = true;
5876 }
5877 }
5878
5879 // --color: (re)set flag_apply_color depending on color_term and TTY output
5880 if (flag_apply_color != NULL)
5881 {
5882 color_term = flag_query > 0;
5883
5884 if (strcmp(flag_apply_color, "never") == 0 || strcmp(flag_apply_color, "no") == 0 || strcmp(flag_apply_color, "none") == 0)
5885 {
5886 flag_apply_color = NULL;
5887 }
5888 else
5889 {
5890 #ifdef OS_WIN
5891
5892 if (tty_term || flag_query > 0)
5893 {
5894 #ifdef ENABLE_VIRTUAL_TERMINAL_PROCESSING
5895 // assume we have a color terminal on Windows if isatty() is true
5896 HANDLE hConOut = GetStdHandle(STD_OUTPUT_HANDLE);
5897 if (hConOut != INVALID_HANDLE_VALUE)
5898 {
5899 #ifdef CP_UTF8
5900 // enable UTF-8 output
5901 SetConsoleOutputCP(CP_UTF8);
5902 #endif
5903 // try virtual terminal processing for ANSI SGR codes, enable colors when successful
5904 DWORD outMode;
5905 GetConsoleMode(hConOut, &outMode);
5906 outMode |= ENABLE_VIRTUAL_TERMINAL_PROCESSING;
5907 color_term = SetConsoleMode(hConOut, outMode) != 0;
5908 }
5909 #endif
5910 }
5911
5912 #else
5913
5914 // check whether we have a color terminal
5915 if (tty_term)
5916 {
5917 const char *term;
5918 if (getenv("COLORTERM") != NULL ||
5919 ((term = getenv("TERM")) != NULL &&
5920 (strstr(term, "ansi") != NULL ||
5921 strstr(term, "xterm") != NULL ||
5922 strstr(term, "screen") != NULL ||
5923 strstr(term, "color") != NULL)))
5924 color_term = true;
5925 }
5926
5927 #endif
5928
5929 if (strcmp(flag_apply_color, "auto") == 0 || strcmp(flag_apply_color, "tty") == 0 || strcmp(flag_apply_color, "if-tty") == 0)
5930 {
5931 if (!color_term)
5932 flag_apply_color = NULL;
5933 }
5934 else if (strcmp(flag_apply_color, "always") != 0 && strcmp(flag_apply_color, "yes") != 0 && strcmp(flag_apply_color, "force") != 0)
5935 {
5936 usage("invalid argument --color=WHEN, valid arguments are 'never', 'always', and 'auto'");
5937 }
5938
5939 if (flag_apply_color != NULL)
5940 {
5941 // get GREP_COLOR and GREP_COLORS, when defined
5942 char *env_grep_color = NULL;
5943 dupenv_s(&env_grep_color, "GREP_COLOR");
5944 char *env_grep_colors = NULL;
5945 dupenv_s(&env_grep_colors, "GREP_COLORS");
5946 const char *grep_colors = env_grep_colors;
5947
5948 // if GREP_COLOR is defined but not GREP_COLORS, use it to set mt= default value (overridden by GREP_COLORS mt=, ms=, mc=)
5949 if (env_grep_colors == NULL && env_grep_color != NULL)
5950 set_color(std::string("mt=").append(env_grep_color).c_str(), "mt=", color_mt);
5951 else if (grep_colors == NULL)
5952 grep_colors = DEFAULT_GREP_COLORS;
5953
5954 // parse GREP_COLORS
5955 set_color(grep_colors, "sl=", color_sl); // selected line
5956 set_color(grep_colors, "cx=", color_cx); // context line
5957 set_color(grep_colors, "mt=", color_mt); // matched text in any line
5958 set_color(grep_colors, "ms=", color_ms); // matched text in selected line
5959 set_color(grep_colors, "mc=", color_mc); // matched text in a context line
5960 set_color(grep_colors, "fn=", color_fn); // file name
5961 set_color(grep_colors, "ln=", color_ln); // line number
5962 set_color(grep_colors, "cn=", color_cn); // column number
5963 set_color(grep_colors, "bn=", color_bn); // byte offset
5964 set_color(grep_colors, "se=", color_se); // separator
5965
5966 // parse --colors to override GREP_COLORS
5967 set_color(flag_colors, "sl=", color_sl); // selected line
5968 set_color(flag_colors, "cx=", color_cx); // context line
5969 set_color(flag_colors, "mt=", color_mt); // matched text in any line
5970 set_color(flag_colors, "ms=", color_ms); // matched text in selected line
5971 set_color(flag_colors, "mc=", color_mc); // matched text in a context line
5972 set_color(flag_colors, "fn=", color_fn); // file name
5973 set_color(flag_colors, "ln=", color_ln); // line number
5974 set_color(flag_colors, "cn=", color_cn); // column number
5975 set_color(flag_colors, "bn=", color_bn); // byte offset
5976 set_color(flag_colors, "se=", color_se); // separator
5977
5978 // -v: if rv in GREP_COLORS then swap the sl and cx colors (note that rv does not match color letters)
5979 if (flag_invert_match &&
5980 ((grep_colors != NULL && strstr(grep_colors, "rv") != NULL) ||
5981 (flag_colors != NULL && strstr(flag_colors, "rv") != NULL)))
5982 {
5983 char color_tmp[COLORLEN];
5984 copy_color(color_tmp, color_sl);
5985 copy_color(color_sl, color_cx);
5986 copy_color(color_cx, color_tmp);
5987 }
5988
5989 // if ms= is not specified, use the mt= value
5990 if (*color_ms == '\0')
5991 copy_color(color_ms, color_mt);
5992
5993 // if mc= is not specified, use the mt= value
5994 if (*color_mc == '\0')
5995 copy_color(color_mc, color_mt);
5996
5997 // if OSC hyperlinks are OK (note that "hl" does not match color letters so strstr can be used)
5998 if ((grep_colors != NULL && strstr(grep_colors, "hl") != NULL) || (flag_colors != NULL && strstr(flag_colors, "hl") != NULL))
5999 {
6000 char *cwd = getcwd0();
6001 if (cwd != NULL)
6002 {
6003 char *path = cwd;
6004 if (*path == PATHSEPCHR)
6005 ++path;
6006 color_wd.assign("file://localhost").append(PATHSEPSTR).append(path).push_back(PATHSEPCHR);
6007 free(cwd);
6008 color_hl = "\033]8;;";
6009 color_st = "\033\\";
6010 }
6011 }
6012
6013 // if CSI erase line is OK (note that ne does not match color letters so strstr can be used)
6014 if ((grep_colors == NULL || strstr(grep_colors, "ne") == NULL) && (flag_colors == NULL || strstr(flag_colors, "ne") == NULL))
6015 color_del = "\033[K";
6016
6017 color_off = "\033[m";
6018
6019 copy_color(match_off, color_off);
6020
6021 if (isatty(STDERR_FILENO))
6022 {
6023 color_high = "\033[1m";
6024 color_error = "\033[1;31m";
6025 color_warning = "\033[1;35m";
6026 color_message = "\033[1;36m";
6027 }
6028
6029 if (env_grep_color != NULL)
6030 free(env_grep_color);
6031 if (env_grep_colors != NULL)
6032 free(env_grep_colors);
6033 }
6034 }
6035 }
6036 }
6037 }
6038
6039 // search the specified files, directories, and/or standard input for pattern matches
ugrep()6040 void ugrep()
6041 {
6042 // reset warnings
6043 warnings = 0;
6044
6045 // reset stats
6046 Stats::reset();
6047
6048 // populate the combined all-include and all-exclude
6049 flag_all_include = flag_include;
6050 flag_all_include_dir = flag_include_dir;
6051 flag_all_exclude = flag_exclude;
6052 flag_all_exclude_dir = flag_exclude_dir;
6053
6054 // -g, --glob: add globs to all-include/all-exclude
6055 for (const auto& globs : flag_glob)
6056 {
6057 size_t from = 0;
6058 std::string glob;
6059
6060 while (true)
6061 {
6062 size_t to = globs.find(',', from);
6063 size_t size = (to == std::string::npos ? globs.size() : to) - from;
6064
6065 if (size > 0)
6066 {
6067 bool negate = size > 1 && (globs[from] == '!' || globs[from] == '^');
6068
6069 if (negate)
6070 {
6071 ++from;
6072 --size;
6073 }
6074
6075 (negate ? flag_all_exclude : flag_all_include).emplace_back(globs.substr(from, size));
6076 }
6077
6078 if (to == std::string::npos)
6079 break;
6080
6081 from = to + 1;
6082 }
6083 }
6084
6085 // all excluded files: normalize by moving directory globs (globs ending in a path separator /) to --exclude-dir
6086 auto i = flag_all_exclude.begin();
6087 while (i != flag_all_exclude.end())
6088 {
6089 if (i->empty())
6090 {
6091 i = flag_all_exclude.erase(i);
6092 }
6093 else if (i->back() == '/')
6094 {
6095 flag_all_exclude_dir.emplace_back(*i);
6096 i = flag_all_exclude.erase(i);
6097 }
6098 else
6099 {
6100 ++i;
6101 }
6102 }
6103
6104 // all included files: normalize by moving directory globs (globs ending in a path separator /) to --include-dir
6105 i = flag_all_include.begin();
6106 while (i != flag_all_include.end())
6107 {
6108 if (i->empty())
6109 {
6110 i = flag_all_include.erase(i);
6111 }
6112 else
6113 {
6114 if (i->back() == '/')
6115 {
6116 flag_all_include_dir.emplace_back(*i);
6117 i = flag_all_include.erase(i);
6118 }
6119 else
6120 {
6121 // if an include file glob starts with a dot, then enable searching hidden files and directories
6122 if (i->front() == '.' || i->find(PATHSEPSTR ".") != std::string::npos)
6123 flag_hidden = true;
6124
6125 ++i;
6126 }
6127 }
6128 }
6129
6130 // if an include dir glob starts with a dot, then enable searching hidden files and directories
6131 if (!flag_hidden)
6132 {
6133 for (const auto& dir : flag_all_include_dir)
6134 {
6135 if (dir.front() == '.' || dir.find(PATHSEPSTR ".") != std::string::npos)
6136 {
6137 flag_hidden = true;
6138 break;
6139 }
6140 }
6141 }
6142
6143 #ifdef HAVE_LIBZ
6144 #ifdef WITH_DECOMPRESSION_THREAD
6145 // -z with -M or -O/--include: add globs to search archive contents
6146 if (flag_decompress && (!flag_file_magic.empty() || !flag_all_include.empty()))
6147 {
6148 flag_all_include.emplace_back("*.cpio");
6149 flag_all_include.emplace_back("*.pax");
6150 flag_all_include.emplace_back("*.tar");
6151 flag_all_include.emplace_back("*.zip");
6152 flag_all_include.emplace_back("*.zipx");
6153 flag_all_include.emplace_back("*.ZIP");
6154
6155 flag_all_include.emplace_back("*.cpio.gz");
6156 flag_all_include.emplace_back("*.pax.gz");
6157 flag_all_include.emplace_back("*.tar.gz");
6158 flag_all_include.emplace_back("*.taz");
6159 flag_all_include.emplace_back("*.tgz");
6160 flag_all_include.emplace_back("*.tpz");
6161
6162 flag_all_include.emplace_back("*.cpio.Z");
6163 flag_all_include.emplace_back("*.pax.Z");
6164 flag_all_include.emplace_back("*.tar.Z");
6165
6166 flag_all_include.emplace_back("*.cpio.zip");
6167 flag_all_include.emplace_back("*.pax.zip");
6168 flag_all_include.emplace_back("*.tar.zip");
6169
6170 #ifdef HAVE_LIBBZ2
6171 flag_all_include.emplace_back("*.cpio.bz");
6172 flag_all_include.emplace_back("*.pax.bz");
6173 flag_all_include.emplace_back("*.tar.bz");
6174 flag_all_include.emplace_back("*.cpio.bz2");
6175 flag_all_include.emplace_back("*.pax.bz2");
6176 flag_all_include.emplace_back("*.tar.bz2");
6177 flag_all_include.emplace_back("*.cpio.bzip2");
6178 flag_all_include.emplace_back("*.pax.bzip2");
6179 flag_all_include.emplace_back("*.tar.bzip2");
6180 flag_all_include.emplace_back("*.tb2");
6181 flag_all_include.emplace_back("*.tbz");
6182 flag_all_include.emplace_back("*.tbz2");
6183 flag_all_include.emplace_back("*.tz2");
6184 #endif
6185
6186 #ifdef HAVE_LIBLZMA
6187 flag_all_include.emplace_back("*.cpio.lzma");
6188 flag_all_include.emplace_back("*.pax.lzma");
6189 flag_all_include.emplace_back("*.tar.lzma");
6190 flag_all_include.emplace_back("*.cpio.xz");
6191 flag_all_include.emplace_back("*.pax.xz");
6192 flag_all_include.emplace_back("*.tar.xz");
6193 flag_all_include.emplace_back("*.tlz");
6194 flag_all_include.emplace_back("*.txz");
6195 #endif
6196
6197 #ifdef HAVE_LIBLZ4
6198 flag_all_include.emplace_back("*.cpio.lz4");
6199 flag_all_include.emplace_back("*.pax.lz4");
6200 flag_all_include.emplace_back("*.tar.lz4");
6201 #endif
6202
6203 #ifdef HAVE_LIBZSTD
6204 flag_all_include.emplace_back("*.cpio.zst");
6205 flag_all_include.emplace_back("*.pax.zst");
6206 flag_all_include.emplace_back("*.tar.zst");
6207 flag_all_include.emplace_back("*.cpio.zstd");
6208 flag_all_include.emplace_back("*.pax.zstd");
6209 flag_all_include.emplace_back("*.tar.zstd");
6210 flag_all_include.emplace_back("*.tzst");
6211 #endif
6212 }
6213 #endif
6214 #endif
6215
6216 // all excluded-dirs: normalize by removing trailing path separators
6217 for (auto& i : flag_all_exclude_dir)
6218 while (i.size() > 1 && i.back() == '/')
6219 i.pop_back();
6220
6221 // all included-dirs: normalize by removing trailing path separators
6222 for (auto& i : flag_all_include_dir)
6223 while (i.size() > 1 && i.back() == '/')
6224 i.pop_back();
6225
6226 // --sort: check sort KEY and set flags
6227 if (flag_sort != NULL)
6228 {
6229 flag_sort_rev = *flag_sort == 'r';
6230
6231 if (strcmp(flag_sort, "name") == 0 || strcmp(flag_sort, "rname") == 0)
6232 flag_sort_key = Sort::NAME;
6233 else if (strcmp(flag_sort, "best") == 0 || strcmp(flag_sort, "rbest") == 0)
6234 flag_sort_key = Sort::BEST;
6235 else if (strcmp(flag_sort, "size") == 0 || strcmp(flag_sort, "rsize") == 0)
6236 flag_sort_key = Sort::SIZE;
6237 else if (strcmp(flag_sort, "used") == 0 || strcmp(flag_sort, "rused") == 0)
6238 flag_sort_key = Sort::USED;
6239 else if (strcmp(flag_sort, "changed") == 0 || strcmp(flag_sort, "rchanged") == 0)
6240 flag_sort_key = Sort::CHANGED;
6241 else if (strcmp(flag_sort, "created") == 0 || strcmp(flag_sort, "rcreated") == 0)
6242 flag_sort_key = Sort::CREATED;
6243 else
6244 usage("invalid argument --sort=KEY, valid arguments are 'name', 'best', 'size', 'used', 'changed', 'created', 'rname', 'rbest', 'rsize', 'rused', 'rchanged', and 'rcreated'");
6245 }
6246
6247 // add PATTERN to the CNF
6248 if (arg_pattern != NULL)
6249 bcnf.new_pattern(CNF::PATTERN::NA, arg_pattern);
6250
6251 // the regex compiled from PATTERN, -e PATTERN, -N PATTERN, and -f FILE
6252 std::string regex;
6253
6254 if (bcnf.defined())
6255 {
6256 // prune empty terms from the CNF that match anything
6257 bcnf.prune();
6258
6259 // split the patterns at newlines, standard grep behavior
6260 bcnf.split();
6261
6262 if (flag_file.empty())
6263 {
6264 // the CNF patterns to search, this matches more than necessary to support multiline matching and to highlight all matches in color
6265 regex.assign(bcnf.adjoin());
6266
6267 // an empty pattern specified matches every line with ^.* (using ^ to prevent -o from making an extra empty match), including empty lines
6268 if (regex.empty())
6269 {
6270 regex = flag_hex ? ".*\\n?" : "^.*";
6271 flag_empty = true;
6272 flag_dotall = false;
6273 }
6274
6275 // CNF is empty if all patterns are empty, i.e. match anything unless -f FILE specified
6276 if (bcnf.empty())
6277 {
6278 flag_match = true;
6279 flag_dotall = false;
6280 }
6281 }
6282 else
6283 {
6284 // -f FILE is combined with -e, --and, --andnot, --not
6285
6286 if (bcnf.first_empty())
6287 {
6288 // an empty pattern specified with -e '' matches every line
6289 regex = flag_hex ? ".*\\n?" : "^.*";
6290 flag_empty = true;
6291 }
6292 else
6293 {
6294 // for efficiency, take only the first CNF OR-list terms to search in combination with -f FILE patterns
6295 regex.assign(bcnf.first());
6296 }
6297 }
6298 }
6299
6300 // -v with --files is not permitted
6301 if (flag_files && flag_invert_match)
6302 {
6303 abort("-v is not permitted with --files, invert the Boolean query instead");
6304 flag_invert_match = false;
6305 }
6306
6307 // -x or --match: enable -Y and disable --dotall and -w
6308 if (flag_line_regexp || flag_match)
6309 {
6310 flag_empty = true;
6311 flag_dotall = false;
6312 flag_word_regexp = false;
6313 }
6314
6315 // -f: get patterns from file
6316 if (!flag_file.empty())
6317 {
6318 bool line_regexp = flag_line_regexp;
6319 bool word_regexp = flag_word_regexp;
6320
6321 // -F: make newline-separated lines in regex literal with \Q and \E
6322 const char *Q = flag_fixed_strings ? "\\Q" : "";
6323 const char *E = flag_fixed_strings ? "\\E|" : flag_basic_regexp ? "\\|" : "|";
6324
6325 // PATTERN or -e PATTERN: add an ending '|' (or BRE '\|') to the regex to concatenate sub-expressions
6326 if (!regex.empty())
6327 {
6328 // -F does not apply to patterns in -f FILE when PATTERN or -e PATTERN is specified
6329 Q = "";
6330 E = flag_basic_regexp ? "\\|" : "|";
6331
6332 // -x and -w do not apply to patterns in -f FILE when PATTERN or -e PATTERN is specified
6333 line_regexp = false;
6334 word_regexp = false;
6335
6336 regex.append(E);
6337 }
6338
6339 // -f: read patterns from the specified file or files
6340 for (const auto& filename : flag_file)
6341 {
6342 FILE *file = NULL;
6343
6344 if (fopen_smart(&file, filename.c_str(), "r") != 0)
6345 file = NULL;
6346
6347 if (file == NULL)
6348 {
6349 // could not open, try GREP_PATH environment variable
6350 char *env_grep_path = NULL;
6351 dupenv_s(&env_grep_path, "GREP_PATH");
6352
6353 if (env_grep_path != NULL)
6354 {
6355 if (fopen_smart(&file, std::string(env_grep_path).append(PATHSEPSTR).append(filename).c_str(), "r") != 0)
6356 file = NULL;
6357
6358 free(env_grep_path);
6359 }
6360 }
6361
6362 #ifdef GREP_PATH
6363 if (file == NULL)
6364 {
6365 if (fopen_smart(&file, std::string(GREP_PATH).append(PATHSEPSTR).append(filename).c_str(), "r") != 0)
6366 file = NULL;
6367 }
6368 #endif
6369
6370 if (file == NULL)
6371 throw std::runtime_error(std::string("option -f: cannot read ").append(filename)); // to catch in query UI
6372
6373 reflex::BufferedInput input(file);
6374 std::string line;
6375
6376 while (true)
6377 {
6378 // read the next line
6379 if (getline(input, line))
6380 break;
6381
6382 // add line to the regex if not empty
6383 if (!line.empty())
6384 regex.append(Q).append(line).append(E);
6385 }
6386
6387 if (file != stdin)
6388 fclose(file);
6389 }
6390
6391 // pop unused ending '|' (or BRE '\|') from the |-concatenated regexes in the regex string
6392 regex.pop_back();
6393 if (flag_basic_regexp)
6394 regex.pop_back();
6395
6396 // -G requires \( \) instead of ( ) and -P requires (?<!\w) (?!\w) instead of \< and \>
6397 const char *xleft = flag_basic_regexp ? "^\\(" : "^(?:";
6398 const char *xright = flag_basic_regexp ? "\\)$" : ")$";
6399 #if defined(HAVE_PCRE2)
6400 const char *wleft = flag_basic_regexp ? "\\<\\(" : flag_perl_regexp ? "(?<!\\w)(?:" : "\\<(";
6401 const char *wright = flag_basic_regexp ? "\\)\\>" : flag_perl_regexp ? ")(?!\\w)" : ")\\>";
6402 #else // Boost.Regex
6403 const char *wleft = flag_basic_regexp ? "\\<\\(" : flag_perl_regexp ? "(?<![[:word:]])(?:" : "\\<(";
6404 const char *wright = flag_basic_regexp ? "\\)\\>" : flag_perl_regexp ? ")(?![[:word:]])" : ")\\>";
6405 #endif
6406
6407 // -x or -w: if no PATTERN is specified, then apply -x or -w to -f FILE patterns
6408 if (line_regexp)
6409 regex.insert(0, xleft).append(xright); // make the regex line-anchored
6410 else if (word_regexp)
6411 regex.insert(0, wleft).append(wright); // make the regex word-anchored
6412 }
6413
6414 // --match: adjust color highlighting to show matches as selected lines without color
6415 if (flag_match)
6416 {
6417 copy_color(match_ms, color_sl);
6418 copy_color(match_mc, color_cx);
6419 copy_color(match_off, color_off);
6420 }
6421 else
6422 {
6423 // --tag: output tagged matches instead of colors
6424 if (flag_tag != NULL)
6425 {
6426 const char *s1 = strchr(flag_tag, ',');
6427 const char *s2 = s1 != NULL ? strchr(s1 + 1, ',') : NULL;
6428
6429 copy_color(match_ms, flag_tag);
6430
6431 if (s1 == NULL)
6432 {
6433 copy_color(match_mc, flag_tag);
6434 copy_color(match_off, flag_tag);
6435 }
6436 else
6437 {
6438 copy_color(match_off, s1 + 1);
6439
6440 if (s2 == NULL)
6441 copy_color(match_mc, match_ms);
6442 else
6443 copy_color(match_mc, s2 + 1);
6444 }
6445 }
6446 else
6447 {
6448 copy_color(match_ms, color_ms);
6449 copy_color(match_mc, color_mc);
6450 copy_color(match_off, color_off);
6451 }
6452 }
6453
6454 // -j: case insensitive search if regex does not contain an upper case letter
6455 if (flag_smart_case)
6456 {
6457 flag_ignore_case = true;
6458
6459 for (size_t i = 0; i < regex.size(); ++i)
6460 {
6461 if (regex[i] == '\\')
6462 {
6463 ++i;
6464 }
6465 else if (regex[i] == '{')
6466 {
6467 while (++i < regex.size() && regex[i] != '}')
6468 continue;
6469 }
6470 else if (isupper(regex[i]))
6471 {
6472 flag_ignore_case = false;
6473 break;
6474 }
6475 }
6476 }
6477
6478 // -y: disable -A, -B, and -C
6479 if (flag_any_line)
6480 flag_after_context = flag_before_context = 0;
6481
6482 // -A, -B, or -C: disable -o
6483 if (flag_after_context > 0 || flag_before_context > 0)
6484 flag_only_matching = false;
6485
6486 // -v or -y: disable -o and -u
6487 if (flag_invert_match || flag_any_line)
6488 flag_only_matching = flag_ungroup = false;
6489
6490 // --depth: if -R or -r is not specified then enable -R
6491 if ((flag_min_depth > 0 || flag_max_depth > 0) && flag_directories_action != Action::RECURSE)
6492 {
6493 flag_directories_action = Action::RECURSE;
6494 flag_dereference = true;
6495 }
6496
6497 // -p (--no-dereference) and -S (--dereference): -p takes priority over -S and -R
6498 if (flag_no_dereference)
6499 flag_dereference = false;
6500
6501 // display file name if more than one input file is specified or options -R, -r, and option -h --no-filename is not specified
6502 if (!flag_no_filename && (flag_all_threads || flag_directories_action == Action::RECURSE || arg_files.size() > 1 || (flag_stdin && !arg_files.empty())))
6503 flag_with_filename = true;
6504
6505 // --only-line-number implies -n
6506 if (flag_only_line_number)
6507 flag_line_number = true;
6508
6509 // if no display options -H, -n, -k, -b are set, enable --no-header to suppress headers for speed
6510 if (!flag_with_filename && !flag_line_number && !flag_column_number && !flag_byte_offset)
6511 flag_no_header = true;
6512
6513 // -q: we only need to find one matching file and we're done
6514 if (flag_quiet)
6515 {
6516 flag_max_files = 1;
6517
6518 // -q overrides -l and -L
6519 flag_files_with_matches = false;
6520 flag_files_without_match = false;
6521
6522 // disable --format options
6523 flag_format_begin = NULL;
6524 flag_format_open = NULL;
6525 flag_format = NULL;
6526 flag_format_close = NULL;
6527 flag_format_end = NULL;
6528 }
6529
6530 // -L: enable -l and flip -v i.e. -L=-lv and -l=-Lv
6531 if (flag_files_without_match)
6532 {
6533 flag_files_with_matches = true;
6534 flag_invert_match = !flag_invert_match;
6535 }
6536
6537 // -l or -L: enable -H, disable -c
6538 if (flag_files_with_matches)
6539 {
6540 flag_with_filename = true;
6541 flag_count = false;
6542 }
6543
6544 // --heading: enable --break when filenames are shown
6545 if (flag_heading && flag_with_filename)
6546 flag_break = true;
6547
6548 // -J: when not set the default is the number of cores (or hardware threads), limited to MAX_JOBS
6549 if (flag_jobs == 0)
6550 {
6551 unsigned int cores = std::thread::hardware_concurrency();
6552 unsigned int concurrency = cores > 2 ? cores : 2;
6553 flag_jobs = std::min(concurrency, MAX_JOBS);
6554 }
6555
6556 // --sort and --max-files: limit number of threads to --max-files to prevent unordered results, this is a special case
6557 if (flag_sort_key != Sort::NA && flag_max_files > 0)
6558 flag_jobs = std::min(flag_jobs, flag_max_files);
6559
6560 // set the number of threads to the number of files or when recursing to the value of -J, --jobs
6561 if (flag_all_threads || flag_directories_action == Action::RECURSE)
6562 threads = flag_jobs;
6563 else
6564 threads = std::min(arg_files.size() + flag_stdin, flag_jobs);
6565
6566 // inverted character classes and \s do not match newlines, e.g. [^x] matches anything except x and \n
6567 reflex::convert_flag_type convert_flags = reflex::convert_flag::notnewline;
6568
6569 // not -U: convert regex to Unicode
6570 if (!flag_binary)
6571 convert_flags |= reflex::convert_flag::unicode;
6572
6573 // -G: convert basic regex (BRE) to extended regex (ERE)
6574 if (flag_basic_regexp)
6575 convert_flags |= reflex::convert_flag::basic;
6576
6577 // set reflex::Pattern options to enable multiline mode
6578 std::string pattern_options("(?m");
6579
6580 // -i: case insensitive reflex::Pattern option, applies to ASCII only
6581 if (flag_ignore_case)
6582 pattern_options.push_back('i');
6583
6584 // --dotall and not --match (or empty pattern): dot matches newline
6585 if (flag_dotall)
6586 pattern_options.push_back('s');
6587
6588 // --free-space: convert_flags is needed to check free-space conformance by the converter
6589 if (flag_free_space)
6590 {
6591 convert_flags |= reflex::convert_flag::freespace;
6592 pattern_options.push_back('x');
6593 }
6594
6595 // prepend the pattern options (?m...) to the regex
6596 pattern_options.push_back(')');
6597 regex.insert(0, pattern_options);
6598
6599 // reflex::Matcher options
6600 std::string matcher_options;
6601
6602 // -Y: permit empty pattern matches
6603 if (flag_empty)
6604 matcher_options.push_back('N');
6605
6606 // -w: match whole words, i.e. make \< and \> match only left side and right side, respectively
6607 if (flag_word_regexp)
6608 matcher_options.push_back('W');
6609
6610 // --tabs: set reflex::Matcher option T to NUM (1, 2, 4, or 8) tab size
6611 if (flag_tabs)
6612 matcher_options.append("T=").push_back(static_cast<char>(flag_tabs) + '0');
6613
6614 // --format-begin
6615 if (flag_format_begin != NULL)
6616 format(flag_format_begin, 0);
6617
6618 size_t nodes = 0;
6619 size_t edges = 0;
6620 size_t words = 0;
6621 size_t nodes_time = 0;
6622 size_t edges_time = 0;
6623 size_t words_time = 0;
6624
6625 // -P: Perl matching with PCRE2 or Boost.Regex
6626 if (flag_perl_regexp)
6627 {
6628 #if defined(HAVE_PCRE2)
6629 // construct the PCRE2 JIT-optimized NFA-based Perl pattern matcher
6630 std::string pattern(flag_binary ? reflex::PCRE2Matcher::convert(regex, convert_flags) : reflex::PCRE2UTFMatcher::convert(regex, convert_flags));
6631 reflex::PCRE2Matcher matcher(pattern, reflex::Input(), matcher_options.c_str(), flag_binary ? (PCRE2_NEVER_UTF | PCRE2_NEVER_UCP) : (PCRE2_UTF | PCRE2_UCP));
6632 Grep::Matchers matchers;
6633
6634 if (!bcnf.singleton_or_undefined())
6635 {
6636 std::string subregex;
6637
6638 for (const auto& i : bcnf.lists())
6639 {
6640 matchers.emplace_back();
6641
6642 auto& submatchers = matchers.back();
6643
6644 for (const auto& j : i)
6645 {
6646 if (j)
6647 {
6648 subregex.assign(pattern_options).append(*j);
6649 submatchers.emplace_back(new reflex::PCRE2Matcher((flag_binary ? reflex::PCRE2Matcher::convert(subregex, convert_flags) : reflex::PCRE2UTFMatcher::convert(subregex, convert_flags)), reflex::Input(), matcher_options.c_str(), flag_binary ? (PCRE2_NEVER_UTF | PCRE2_NEVER_UCP) : (PCRE2_UTF | PCRE2_UCP)));
6650 }
6651 else
6652 {
6653 submatchers.emplace_back();
6654 }
6655 }
6656 }
6657 }
6658
6659 if (threads > 1)
6660 {
6661 GrepMaster grep(output, &matcher, bcnf.singleton_or_undefined() ? NULL : &matchers);
6662 grep.ugrep();
6663 }
6664 else
6665 {
6666 Grep grep(output, &matcher, bcnf.singleton_or_undefined() ? NULL : &matchers);
6667 set_grep_handle(&grep);
6668 grep.ugrep();
6669 clear_grep_handle();
6670 }
6671 #elif defined(HAVE_BOOST_REGEX)
6672 std::string pattern;
6673 try
6674 {
6675 // construct the Boost.Regex NFA-based Perl pattern matcher
6676 pattern.assign(reflex::BoostPerlMatcher::convert(regex, convert_flags));
6677 reflex::BoostPerlMatcher matcher(pattern, reflex::Input(), matcher_options.c_str());
6678 Grep::Matchers matchers;
6679
6680 if (!bcnf.singleton_or_undefined())
6681 {
6682 std::string subregex;
6683
6684 for (const auto& i : bcnf.lists())
6685 {
6686 matchers.emplace_back();
6687
6688 auto& submatchers = matchers.back();
6689
6690 for (const auto& j : i)
6691 {
6692 if (j)
6693 {
6694 subregex.assign(pattern_options).append(*j);
6695 submatchers.emplace_back(new reflex::BoostPerlMatcher(reflex::BoostPerlMatcher::convert(subregex, convert_flags), reflex::Input(), matcher_options.c_str()));
6696 }
6697 else
6698 {
6699 submatchers.emplace_back();
6700 }
6701 }
6702 }
6703 }
6704
6705 if (threads > 1)
6706 {
6707 GrepMaster grep(output, &matcher, bcnf.singleton_or_undefined() ? NULL : &matchers);
6708 grep.ugrep();
6709 }
6710 else
6711 {
6712 Grep grep(output, &matcher, bcnf.singleton_or_undefined() ? NULL : &matchers);
6713 set_grep_handle(&grep);
6714 grep.ugrep();
6715 clear_grep_handle();
6716 }
6717 }
6718
6719 catch (boost::regex_error& error)
6720 {
6721 reflex::regex_error_type code;
6722
6723 switch (error.code())
6724 {
6725 case boost::regex_constants::error_collate:
6726 code = reflex::regex_error::invalid_collating;
6727 break;
6728 case boost::regex_constants::error_ctype:
6729 code = reflex::regex_error::invalid_class;
6730 break;
6731 case boost::regex_constants::error_escape:
6732 code = reflex::regex_error::invalid_escape;
6733 break;
6734 case boost::regex_constants::error_backref:
6735 code = reflex::regex_error::invalid_backreference;
6736 break;
6737 case boost::regex_constants::error_brack:
6738 code = reflex::regex_error::invalid_class;
6739 break;
6740 case boost::regex_constants::error_paren:
6741 code = reflex::regex_error::mismatched_parens;
6742 break;
6743 case boost::regex_constants::error_brace:
6744 code = reflex::regex_error::mismatched_braces;
6745 break;
6746 case boost::regex_constants::error_badbrace:
6747 code = reflex::regex_error::invalid_repeat;
6748 break;
6749 case boost::regex_constants::error_range:
6750 code = reflex::regex_error::invalid_class_range;
6751 break;
6752 case boost::regex_constants::error_space:
6753 code = reflex::regex_error::exceeds_limits;
6754 break;
6755 case boost::regex_constants::error_badrepeat:
6756 code = reflex::regex_error::invalid_repeat;
6757 break;
6758 case boost::regex_constants::error_complexity:
6759 code = reflex::regex_error::exceeds_limits;
6760 break;
6761 case boost::regex_constants::error_stack:
6762 code = reflex::regex_error::exceeds_limits;
6763 break;
6764 default:
6765 code = reflex::regex_error::invalid_syntax;
6766 }
6767
6768 throw reflex::regex_error(code, pattern, error.position() + 1);
6769 }
6770 #endif
6771 }
6772 else
6773 {
6774 // construct the RE/flex DFA-based pattern matcher and start matching files
6775 reflex::Pattern pattern(reflex::Matcher::convert(regex, convert_flags), "r");
6776 std::list<reflex::Pattern> patterns;
6777 Grep::Matchers matchers;
6778
6779 if (flag_fuzzy > 0)
6780 {
6781 reflex::FuzzyMatcher matcher(pattern, static_cast<uint16_t>(flag_fuzzy), reflex::Input(), matcher_options.c_str());
6782
6783 if (!bcnf.singleton_or_undefined())
6784 {
6785 std::string subregex;
6786
6787 for (const auto& i : bcnf.lists())
6788 {
6789 matchers.emplace_back();
6790
6791 auto& submatchers = matchers.back();
6792
6793 for (const auto& j : i)
6794 {
6795 if (j)
6796 {
6797 subregex.assign(pattern_options).append(*j);
6798 patterns.emplace_back(reflex::FuzzyMatcher::convert(subregex, convert_flags), "r");
6799 submatchers.emplace_back(new reflex::FuzzyMatcher(patterns.back(), reflex::Input(), matcher_options.c_str()));
6800 }
6801 else
6802 {
6803 submatchers.emplace_back();
6804 }
6805 }
6806 }
6807 }
6808
6809 if (threads > 1)
6810 {
6811 GrepMaster grep(output, &matcher, bcnf.singleton_or_undefined() ? NULL : &matchers);
6812 grep.ugrep();
6813 }
6814 else
6815 {
6816 Grep grep(output, &matcher, bcnf.singleton_or_undefined() ? NULL : &matchers);
6817 set_grep_handle(&grep);
6818 grep.ugrep();
6819 clear_grep_handle();
6820 }
6821 }
6822 else
6823 {
6824 reflex::Matcher matcher(pattern, reflex::Input(), matcher_options.c_str());
6825
6826 if (!bcnf.singleton_or_undefined())
6827 {
6828 std::string subregex;
6829
6830 for (const auto& i : bcnf.lists())
6831 {
6832 matchers.emplace_back();
6833
6834 auto& submatchers = matchers.back();
6835
6836 for (const auto& j : i)
6837 {
6838 if (j)
6839 {
6840 subregex.assign(pattern_options).append(*j);
6841 patterns.emplace_back(reflex::Matcher::convert(subregex, convert_flags), "r");
6842 submatchers.emplace_back(new reflex::Matcher(patterns.back(), reflex::Input(), matcher_options.c_str()));
6843 }
6844 else
6845 {
6846 submatchers.emplace_back();
6847 }
6848 }
6849 }
6850 }
6851
6852 if (threads > 1)
6853 {
6854 GrepMaster grep(output, &matcher, bcnf.singleton_or_undefined() ? NULL : &matchers);
6855 grep.ugrep();
6856 }
6857 else
6858 {
6859 Grep grep(output, &matcher, bcnf.singleton_or_undefined() ? NULL : &matchers);
6860 set_grep_handle(&grep);
6861 grep.ugrep();
6862 clear_grep_handle();
6863 }
6864 }
6865
6866 nodes = pattern.nodes();
6867 edges = pattern.edges();
6868 words = pattern.words();
6869 nodes_time = static_cast<size_t>(pattern.nodes_time());
6870 edges_time = static_cast<size_t>(pattern.parse_time() + pattern.edges_time());
6871 words_time = static_cast<size_t>(pattern.words_time());
6872 }
6873
6874 // --format-end
6875 if (flag_format_end != NULL)
6876 format(flag_format_end, Stats::found_parts());
6877
6878 // --stats: display stats when we're done
6879 if (flag_stats != NULL)
6880 {
6881 Stats::report(output);
6882
6883 bcnf.report(output);
6884
6885 if (strcmp(flag_stats, "vm") == 0 && words > 0)
6886 fprintf(output, "VM memory: %zu nodes (%zums), %zu edges (%zums), %zu opcode words (%zums)" NEWLINESTR, nodes, nodes_time, edges, edges_time, words, words_time);
6887 }
6888
6889 // close the pipe to the forked pager
6890 if (flag_pager != NULL && output != NULL && output != stdout)
6891 pclose(output);
6892 }
6893
6894 // cancel the search
cancel_ugrep()6895 void cancel_ugrep()
6896 {
6897 std::unique_lock<std::mutex> lock(grep_handle_mutex);
6898 if (grep_handle != NULL)
6899 grep_handle->cancel();
6900 }
6901
6902 // set the handle to be able to use cancel_ugrep()
set_grep_handle(Grep * grep)6903 void set_grep_handle(Grep *grep)
6904 {
6905 std::unique_lock<std::mutex> lock(grep_handle_mutex);
6906 grep_handle = grep;
6907 }
6908
6909 // reset the grep handle
clear_grep_handle()6910 void clear_grep_handle()
6911 {
6912 std::unique_lock<std::mutex> lock(grep_handle_mutex);
6913 grep_handle = NULL;
6914 }
6915
6916 // search the specified files or standard input for pattern matches
ugrep()6917 void Grep::ugrep()
6918 {
6919 if (!flag_stdin && arg_files.empty())
6920 {
6921 recurse(1, ".");
6922 }
6923 else
6924 {
6925 // read each input file to find pattern matches
6926 if (flag_stdin)
6927 {
6928 Stats::score_file();
6929
6930 // search standard input
6931 search(NULL);
6932 }
6933
6934 #ifndef OS_WIN
6935 std::pair<std::set<ino_t>::iterator,bool> vino;
6936 #endif
6937
6938 for (const auto pathname : arg_files)
6939 {
6940 // stop after finding max-files matching files
6941 if (flag_max_files > 0 && Stats::found_parts() >= flag_max_files)
6942 break;
6943
6944 // stop when output is blocked or search cancelled
6945 if (out.eof || out.cancelled())
6946 break;
6947
6948 // search file or directory, get the basename from the file argument first
6949 const char *basename = strrchr(pathname, PATHSEPCHR);
6950 if (basename != NULL)
6951 ++basename;
6952 else
6953 basename = pathname;
6954
6955 ino_t inode = 0;
6956 uint64_t info;
6957
6958 // search file, unless searchable directory into which we should recurse
6959 switch (select(1, pathname, basename, DIRENT_TYPE_UNKNOWN, inode, info, true))
6960 {
6961 case Type::DIRECTORY:
6962 #ifndef OS_WIN
6963 if (flag_dereference)
6964 vino = visited.insert(inode);
6965 #endif
6966
6967 recurse(1, pathname);
6968
6969 #ifndef OS_WIN
6970 if (flag_dereference)
6971 visited.erase(vino.first);
6972 #endif
6973 break;
6974
6975 case Type::OTHER:
6976 search(pathname);
6977 break;
6978
6979 case Type::SKIP:
6980 break;
6981 }
6982 }
6983 }
6984 }
6985
6986 // search file or directory for pattern matches
select(size_t level,const char * pathname,const char * basename,int type,ino_t & inode,uint64_t & info,bool is_argument)6987 Grep::Type Grep::select(size_t level, const char *pathname, const char *basename, int type, ino_t& inode, uint64_t& info, bool is_argument)
6988 {
6989 if (*basename == '.' && !flag_hidden && !is_argument)
6990 return Type::SKIP;
6991
6992 #ifdef OS_WIN
6993
6994 DWORD attr = GetFileAttributesW(utf8_decode(pathname).c_str());
6995
6996 if (attr == INVALID_FILE_ATTRIBUTES)
6997 {
6998 errno = ENOENT;
6999 warning("cannot read", pathname);
7000 return Type::SKIP;
7001 }
7002
7003 if (!flag_hidden && !is_argument && ((attr & FILE_ATTRIBUTE_HIDDEN) || (attr & FILE_ATTRIBUTE_SYSTEM)))
7004 return Type::SKIP;
7005
7006 if ((attr & FILE_ATTRIBUTE_DIRECTORY))
7007 {
7008 if (flag_directories_action == Action::READ)
7009 {
7010 // directories cannot be read actually, so grep produces a warning message (errno is not set)
7011 is_directory(pathname);
7012 return Type::SKIP;
7013 }
7014
7015 if (is_argument || flag_directories_action == Action::RECURSE)
7016 {
7017 // --depth: recursion level exceeds max depth?
7018 if (flag_max_depth > 0 && level > flag_max_depth)
7019 return Type::SKIP;
7020
7021 // hard maximum recursion depth reached?
7022 if (level > MAX_DEPTH)
7023 {
7024 if (!flag_no_messages)
7025 fprintf(stderr, "%sugrep: %s%s%s recursion depth hit hard limit of %d\n", color_off, color_high, pathname, color_off, MAX_DEPTH);
7026 return Type::SKIP;
7027 }
7028
7029 // check for --exclude-dir and --include-dir constraints if pathname != "."
7030 if (strcmp(pathname, ".") != 0)
7031 {
7032 if (!flag_all_exclude_dir.empty())
7033 {
7034 // exclude directories whose pathname matches any one of the --exclude-dir globs unless negated with !
7035 bool ok = true;
7036 for (const auto& glob : flag_all_exclude_dir)
7037 {
7038 if (glob.front() == '!')
7039 {
7040 if (!ok && glob_match(pathname, basename, glob.c_str() + 1))
7041 ok = true;
7042 }
7043 else if (ok && glob_match(pathname, basename, glob.c_str()))
7044 {
7045 ok = false;
7046 }
7047 }
7048 if (!ok)
7049 return Type::SKIP;
7050 }
7051
7052 if (!flag_all_include_dir.empty())
7053 {
7054 // include directories whose pathname matches any one of the --include-dir globs unless negated with !
7055 bool ok = false;
7056 for (const auto& glob : flag_all_include_dir)
7057 {
7058 if (glob.front() == '!')
7059 {
7060 if (ok && glob_match(pathname, basename, glob.c_str() + 1))
7061 ok = false;
7062 }
7063 else if (!ok && glob_match(pathname, basename, glob.c_str()))
7064 {
7065 ok = true;
7066 }
7067 }
7068 if (!ok)
7069 return Type::SKIP;
7070 }
7071 }
7072
7073 return Type::DIRECTORY;
7074 }
7075 }
7076 else if ((attr & FILE_ATTRIBUTE_DEVICE) == 0 || flag_devices_action == Action::READ)
7077 {
7078 // --depth: recursion level not deep enough?
7079 if (flag_min_depth > 0 && level <= flag_min_depth)
7080 return Type::SKIP;
7081
7082 if (!flag_all_exclude.empty())
7083 {
7084 // exclude files whose pathname matches any one of the --exclude globs unless negated with !
7085 bool ok = true;
7086 for (const auto& glob : flag_all_exclude)
7087 {
7088 if (glob.front() == '!')
7089 {
7090 if (!ok && glob_match(pathname, basename, glob.c_str() + 1))
7091 ok = true;
7092 }
7093 else if (ok && glob_match(pathname, basename, glob.c_str()))
7094 {
7095 ok = false;
7096 }
7097 }
7098 if (!ok)
7099 return Type::SKIP;
7100 }
7101
7102 // check magic pattern against the file signature, when --file-magic=MAGIC is specified
7103 if (!flag_file_magic.empty())
7104 {
7105 FILE *file;
7106
7107 if (fopenw_s(&file, pathname, "rb") != 0)
7108 {
7109 warning("cannot read", pathname);
7110 return Type::SKIP;
7111 }
7112
7113 #ifdef HAVE_LIBZ
7114 if (flag_decompress)
7115 {
7116 zstreambuf streambuf(pathname, file);
7117 std::istream stream(&streambuf);
7118
7119 // file has the magic bytes we're looking for: search the file
7120 size_t match = magic_matcher.input(&stream).scan();
7121 if (match == flag_not_magic || match >= flag_min_magic)
7122 {
7123 fclose(file);
7124
7125 Stats::score_file();
7126
7127 return Type::OTHER;
7128 }
7129 }
7130 else
7131 #endif
7132 {
7133 size_t match = magic_matcher.input(reflex::Input(file, flag_encoding_type)).scan();
7134 if (match == flag_not_magic || match >= flag_min_magic)
7135 {
7136 fclose(file);
7137
7138 Stats::score_file();
7139
7140 return Type::OTHER;
7141 }
7142 }
7143
7144 fclose(file);
7145
7146 if (flag_all_include.empty())
7147 return Type::SKIP;
7148 }
7149
7150 if (!flag_all_include.empty())
7151 {
7152 // include files whose pathname matches any one of the --include globs unless negated with !
7153 bool ok = false;
7154 for (const auto& glob : flag_all_include)
7155 {
7156 if (glob.front() == '!')
7157 {
7158 if (ok && glob_match(pathname, basename, glob.c_str() + 1))
7159 ok = false;
7160 }
7161 else if (!ok && glob_match(pathname, basename, glob.c_str()))
7162 {
7163 ok = true;
7164 }
7165 }
7166 if (!ok)
7167 return Type::SKIP;
7168 }
7169
7170 Stats::score_file();
7171
7172 return Type::OTHER;
7173 }
7174
7175 #else
7176
7177 struct stat buf;
7178
7179 // if dir entry is unknown, use lstat() to check if pathname is a symlink
7180 if (type != DIRENT_TYPE_UNKNOWN || lstat(pathname, &buf) == 0)
7181 {
7182 // symlinks are followed when specified on the command line (unless option -p) or with options -R, -S, --dereference
7183 if ((is_argument && !flag_no_dereference) || flag_dereference || (type != DIRENT_TYPE_UNKNOWN ? type != DIRENT_TYPE_LNK : !S_ISLNK(buf.st_mode)))
7184 {
7185 // if we got a symlink, use stat() to check if pathname is a directory or a regular file, we also stat when sorting by stat info
7186 if (((flag_sort_key == Sort::NA || flag_sort_key == Sort::NAME) && type != DIRENT_TYPE_UNKNOWN && type != DIRENT_TYPE_LNK) || stat(pathname, &buf) == 0)
7187 {
7188 // check if directory
7189 if (type == DIRENT_TYPE_DIR || ((type == DIRENT_TYPE_UNKNOWN || type == DIRENT_TYPE_LNK) && S_ISDIR(buf.st_mode)))
7190 {
7191 if (flag_directories_action == Action::READ)
7192 {
7193 // directories cannot be read actually, so grep produces a warning message (errno is not set)
7194 is_directory(pathname);
7195 return Type::SKIP;
7196 }
7197
7198 if (is_argument || flag_directories_action == Action::RECURSE)
7199 {
7200 // --depth: recursion level exceeds max depth?
7201 if (flag_max_depth > 0 && level > flag_max_depth)
7202 return Type::SKIP;
7203
7204 // hard maximum recursion depth reached?
7205 if (level > MAX_DEPTH)
7206 {
7207 if (!flag_no_messages)
7208 fprintf(stderr, "%sugrep: %s%s%s recursion depth hit hard limit of %d\n", color_off, color_high, pathname, color_off, MAX_DEPTH);
7209 return Type::SKIP;
7210 }
7211
7212 // check for --exclude-dir and --include-dir constraints if pathname != "."
7213 if (strcmp(pathname, ".") != 0)
7214 {
7215 if (!flag_all_exclude_dir.empty())
7216 {
7217 // exclude directories whose pathname matches any one of the --exclude-dir globs unless negated with !
7218 bool ok = true;
7219 for (const auto& glob : flag_all_exclude_dir)
7220 {
7221 if (glob.front() == '!')
7222 {
7223 if (!ok && glob_match(pathname, basename, glob.c_str() + 1))
7224 ok = true;
7225 }
7226 else if (ok && glob_match(pathname, basename, glob.c_str()))
7227 {
7228 ok = false;
7229 }
7230 }
7231 if (!ok)
7232 return Type::SKIP;
7233 }
7234
7235 if (!flag_all_include_dir.empty())
7236 {
7237 // include directories whose pathname matches any one of the --include-dir globs unless negated with !
7238 bool ok = false;
7239 for (const auto& glob : flag_all_include_dir)
7240 {
7241 if (glob.front() == '!')
7242 {
7243 if (ok && glob_match(pathname, basename, glob.c_str() + 1))
7244 ok = false;
7245 }
7246 else if (!ok && glob_match(pathname, basename, glob.c_str()))
7247 {
7248 ok = true;
7249 }
7250 }
7251 if (!ok)
7252 return Type::SKIP;
7253 }
7254 }
7255
7256 if (type != DIRENT_TYPE_DIR)
7257 inode = buf.st_ino;
7258
7259 info = Entry::sort_info(buf);
7260
7261 return Type::DIRECTORY;
7262 }
7263 }
7264 else if (type == DIRENT_TYPE_REG ? !is_output(inode) : (type == DIRENT_TYPE_UNKNOWN || type == DIRENT_TYPE_LNK) && S_ISREG(buf.st_mode) ? !is_output(buf.st_ino) : flag_devices_action == Action::READ)
7265 {
7266 // --depth: recursion level not deep enough?
7267 if (flag_min_depth > 0 && level <= flag_min_depth)
7268 return Type::SKIP;
7269
7270 if (!flag_all_exclude.empty())
7271 {
7272 // exclude files whose pathname matches any one of the --exclude globs unless negated with !
7273 bool ok = true;
7274 for (const auto& glob : flag_all_exclude)
7275 {
7276 if (glob.front() == '!')
7277 {
7278 if (!ok && glob_match(pathname, basename, glob.c_str() + 1))
7279 ok = true;
7280 }
7281 else if (ok && glob_match(pathname, basename, glob.c_str()))
7282 {
7283 ok = false;
7284 }
7285 }
7286 if (!ok)
7287 return Type::SKIP;
7288 }
7289
7290 // check magic pattern against the file signature, when --file-magic=MAGIC is specified
7291 if (!flag_file_magic.empty())
7292 {
7293 FILE *file;
7294
7295 if (fopenw_s(&file, pathname, "rb") != 0)
7296 {
7297 warning("cannot read", pathname);
7298 return Type::SKIP;
7299 }
7300
7301 #ifdef HAVE_LIBZ
7302 if (flag_decompress)
7303 {
7304 zstreambuf streambuf(pathname, file);
7305 std::istream stream(&streambuf);
7306
7307 // file has the magic bytes we're looking for: search the file
7308 size_t match = magic_matcher.input(&stream).scan();
7309 if (match == flag_not_magic || match >= flag_min_magic)
7310 {
7311 fclose(file);
7312
7313 Stats::score_file();
7314
7315 info = Entry::sort_info(buf);
7316
7317 return Type::OTHER;
7318 }
7319 }
7320 else
7321 #endif
7322 {
7323 // if file has the magic bytes we're looking for: search the file
7324 size_t match = magic_matcher.input(reflex::Input(file, flag_encoding_type)).scan();
7325 if (match == flag_not_magic || match >= flag_min_magic)
7326 {
7327 fclose(file);
7328
7329 Stats::score_file();
7330
7331 info = Entry::sort_info(buf);
7332
7333 return Type::OTHER;
7334 }
7335 }
7336
7337 fclose(file);
7338
7339 if (flag_all_include.empty())
7340 return Type::SKIP;
7341 }
7342
7343 if (!flag_all_include.empty())
7344 {
7345 // include directories whose basename matches any one of the --include-dir globs if not negated with !
7346 bool ok = false;
7347 for (const auto& glob : flag_all_include)
7348 {
7349 if (glob.front() == '!')
7350 {
7351 if (ok && glob_match(pathname, basename, glob.c_str() + 1))
7352 ok = false;
7353 }
7354 else if (!ok && glob_match(pathname, basename, glob.c_str()))
7355 {
7356 ok = true;
7357 }
7358 }
7359 if (!ok)
7360 return Type::SKIP;
7361 }
7362
7363 Stats::score_file();
7364
7365 info = Entry::sort_info(buf);
7366
7367 return Type::OTHER;
7368 }
7369 }
7370 }
7371 }
7372 else
7373 {
7374 warning(NULL, pathname);
7375 }
7376
7377 #endif
7378
7379 return Type::SKIP;
7380 }
7381
7382 // recurse over directory, searching for pattern matches in files and subdirectories
recurse(size_t level,const char * pathname)7383 void Grep::recurse(size_t level, const char *pathname)
7384 {
7385 // output is closed or cancelled?
7386 if (out.eof || out.cancelled())
7387 return;
7388
7389 #ifdef OS_WIN
7390
7391 WIN32_FIND_DATAW ffd;
7392
7393 std::string glob;
7394
7395 if (strcmp(pathname, ".") != 0)
7396 glob.assign(pathname).append("/*");
7397 else
7398 glob.assign("*");
7399
7400 std::wstring wglob = utf8_decode(glob);
7401 HANDLE hFind = FindFirstFileW(wglob.c_str(), &ffd);
7402
7403 if (hFind == INVALID_HANDLE_VALUE)
7404 {
7405 if (GetLastError() != ERROR_FILE_NOT_FOUND)
7406 warning("cannot open directory", pathname);
7407 return;
7408 }
7409
7410 #else
7411
7412 #ifdef HAVE_STATVFS
7413
7414 if (!exclude_fs_ids.empty() || !include_fs_ids.empty())
7415 {
7416 struct statvfs buf;
7417
7418 if (statvfs(pathname, &buf) == 0)
7419 {
7420 uint64_t id = static_cast<uint64_t>(buf.f_fsid);
7421
7422 if (exclude_fs_ids.find(id) != exclude_fs_ids.end())
7423 return;
7424
7425 if (!include_fs_ids.empty() && include_fs_ids.find(id) == include_fs_ids.end())
7426 return;
7427 }
7428 }
7429
7430 #endif
7431
7432 DIR *dir = opendir(pathname);
7433
7434 if (dir == NULL)
7435 {
7436 warning("cannot open directory", pathname);
7437 return;
7438 }
7439
7440 #endif
7441
7442 // --ignore-files: check if one or more are present to read and extend the file and dir exclusions
7443 // std::vector<std::string> *save_exclude = NULL, *save_exclude_dir = NULL, *save_not_exclude = NULL, *save_not_exclude_dir = NULL;
7444 std::unique_ptr<std::vector<std::string>> save_all_exclude, save_all_exclude_dir;
7445 bool saved = false;
7446
7447 if (!flag_ignore_files.empty())
7448 {
7449 std::string filename;
7450
7451 for (const auto& i : flag_ignore_files)
7452 {
7453 filename.assign(pathname).append(PATHSEPSTR).append(i);
7454
7455 FILE *file = NULL;
7456 if (fopenw_s(&file, filename.c_str(), "r") == 0)
7457 {
7458 if (!saved)
7459 {
7460 save_all_exclude = std::unique_ptr<std::vector<std::string>>(new std::vector<std::string>);
7461 save_all_exclude->swap(flag_all_exclude);
7462 save_all_exclude_dir = std::unique_ptr<std::vector<std::string>>(new std::vector<std::string>);
7463 save_all_exclude_dir->swap(flag_all_exclude_dir);
7464
7465 saved = true;
7466 }
7467
7468 Stats::ignore_file(filename);
7469 split_globs(file, flag_all_exclude, flag_all_exclude_dir);
7470 fclose(file);
7471 }
7472 }
7473 }
7474
7475 Stats::score_dir();
7476
7477 std::vector<Entry> content;
7478 std::vector<Entry> subdirs;
7479 std::string dirpathname;
7480
7481 #ifdef OS_WIN
7482
7483 std::string cFileName;
7484
7485 do
7486 {
7487 cFileName.assign(utf8_encode(ffd.cFileName));
7488
7489 // search directory entries that aren't . or .. or hidden when --no-hidden is enabled
7490 if (cFileName[0] != '.' || (flag_hidden && cFileName[1] != '\0' && cFileName[1] != '.'))
7491 {
7492 size_t len = strlen(pathname);
7493
7494 if (len == 1 && pathname[0] == '.')
7495 dirpathname.assign(cFileName);
7496 else if (len > 0 && pathname[len - 1] == PATHSEPCHR)
7497 dirpathname.assign(pathname).append(cFileName);
7498 else
7499 dirpathname.assign(pathname).append(PATHSEPSTR).append(cFileName);
7500
7501 ino_t inode = 0;
7502 uint64_t info = 0;
7503
7504 // --sort: get file info
7505 if (flag_sort_key != Sort::NA && flag_sort_key != Sort::NAME)
7506 {
7507 if (flag_sort_key == Sort::SIZE)
7508 {
7509 info = static_cast<uint64_t>(ffd.nFileSizeLow) | (static_cast<uint64_t>(ffd.nFileSizeHigh) << 32);
7510 }
7511 else
7512 {
7513 struct _FILETIME& time = flag_sort_key == Sort::USED ? ffd.ftLastAccessTime : flag_sort_key == Sort::CHANGED ? ffd.ftLastWriteTime : ffd.ftCreationTime;
7514 info = static_cast<uint64_t>(time.dwLowDateTime) | (static_cast<uint64_t>(time.dwHighDateTime) << 32);
7515 }
7516 }
7517
7518 // search dirpathname, unless searchable directory into which we should recurse
7519 switch (select(level + 1, dirpathname.c_str(), cFileName.c_str(), DIRENT_TYPE_UNKNOWN, inode, info))
7520 {
7521 case Type::DIRECTORY:
7522 subdirs.emplace_back(dirpathname, 0, info);
7523 break;
7524
7525 case Type::OTHER:
7526 if (flag_sort_key == Sort::NA)
7527 search(dirpathname.c_str());
7528 else
7529 content.emplace_back(dirpathname, 0, info);
7530 break;
7531
7532 case Type::SKIP:
7533 break;
7534 }
7535
7536 // stop after finding max-files matching files
7537 if (flag_max_files > 0 && Stats::found_parts() >= flag_max_files)
7538 break;
7539
7540 // stop when output is blocked or search cancelled
7541 if (out.eof || out.cancelled())
7542 break;
7543 }
7544 } while (FindNextFileW(hFind, &ffd) != 0);
7545
7546 FindClose(hFind);
7547
7548 #else
7549
7550 struct dirent *dirent = NULL;
7551
7552 while ((dirent = readdir(dir)) != NULL)
7553 {
7554 // search directory entries that aren't . or .. or hidden when --no-hidden is enabled
7555 if (dirent->d_name[0] != '.' || (flag_hidden && dirent->d_name[1] != '\0' && dirent->d_name[1] != '.'))
7556 {
7557 size_t len = strlen(pathname);
7558
7559 if (len == 1 && pathname[0] == '.')
7560 dirpathname.assign(dirent->d_name);
7561 else if (len > 0 && pathname[len - 1] == PATHSEPCHR)
7562 dirpathname.assign(pathname).append(dirent->d_name);
7563 else
7564 dirpathname.assign(pathname).append(PATHSEPSTR).append(dirent->d_name);
7565
7566 Type type;
7567 ino_t inode;
7568 uint64_t info;
7569
7570 // search dirpathname, unless searchable directory into which we should recurse
7571 #if defined(HAVE_STRUCT_DIRENT_D_TYPE) && defined(HAVE_STRUCT_DIRENT_D_INO)
7572 inode = dirent->d_ino;
7573 type = select(level + 1, dirpathname.c_str(), dirent->d_name, dirent->d_type, inode, info);
7574 #else
7575 inode = 0;
7576 type = select(level + 1, dirpathname.c_str(), dirent->d_name, DIRENT_TYPE_UNKNOWN, inode, info);
7577 #endif
7578
7579 switch (type)
7580 {
7581 case Type::DIRECTORY:
7582 subdirs.emplace_back(dirpathname, inode, info);
7583 break;
7584
7585 case Type::OTHER:
7586 if (flag_sort_key == Sort::NA)
7587 search(dirpathname.c_str());
7588 else
7589 content.emplace_back(dirpathname, inode, info);
7590 break;
7591
7592 case Type::SKIP:
7593 break;
7594 }
7595
7596 // stop after finding max-files matching files
7597 if (flag_max_files > 0 && Stats::found_parts() >= flag_max_files)
7598 break;
7599
7600 // stop when output is blocked or search cancelled
7601 if (out.eof || out.cancelled())
7602 break;
7603 }
7604 }
7605
7606 closedir(dir);
7607
7608 #endif
7609
7610 // -Z and --sort=best: presearch the selected files to determine edit distance cost
7611 if (flag_fuzzy > 0 && flag_sort_key == Sort::BEST)
7612 {
7613 auto entry = content.begin();
7614 while (entry != content.end())
7615 {
7616 entry->cost = cost(entry->pathname.c_str());
7617
7618 // if a file has no match, remove it
7619 if (entry->cost == 65535)
7620 entry = content.erase(entry);
7621 else
7622 ++entry;
7623 }
7624 }
7625
7626 // --sort: sort the selected non-directory entries and search them
7627 if (flag_sort_key != Sort::NA)
7628 {
7629 if (flag_sort_key == Sort::NAME)
7630 {
7631 if (flag_sort_rev)
7632 std::sort(content.begin(), content.end(), Entry::rev_comp_by_path);
7633 else
7634 std::sort(content.begin(), content.end(), Entry::comp_by_path);
7635 }
7636 else if (flag_sort_key == Sort::BEST)
7637 {
7638 if (flag_sort_rev)
7639 std::sort(content.begin(), content.end(), Entry::rev_comp_by_best);
7640 else
7641 std::sort(content.begin(), content.end(), Entry::comp_by_best);
7642 }
7643 else
7644 {
7645 if (flag_sort_rev)
7646 std::sort(content.begin(), content.end(), Entry::rev_comp_by_info);
7647 else
7648 std::sort(content.begin(), content.end(), Entry::comp_by_info);
7649 }
7650
7651 // search the select sorted non-directory entries
7652 for (const auto& entry : content)
7653 {
7654 search(entry.pathname.c_str());
7655
7656 // stop after finding max-files matching files
7657 if (flag_max_files > 0 && Stats::found_parts() >= flag_max_files)
7658 break;
7659
7660 // stop when output is blocked or search cancelled
7661 if (out.eof || out.cancelled())
7662 break;
7663 }
7664 }
7665
7666 // --sort: sort the selected subdirectory entries
7667 if (flag_sort_key != Sort::NA)
7668 {
7669 if (flag_sort_key == Sort::NAME || flag_sort_key == Sort::BEST)
7670 {
7671 if (flag_sort_rev)
7672 std::sort(subdirs.begin(), subdirs.end(), Entry::rev_comp_by_path);
7673 else
7674 std::sort(subdirs.begin(), subdirs.end(), Entry::comp_by_path);
7675 }
7676 else
7677 {
7678 if (flag_sort_rev)
7679 std::sort(subdirs.begin(), subdirs.end(), Entry::rev_comp_by_info);
7680 else
7681 std::sort(subdirs.begin(), subdirs.end(), Entry::comp_by_info);
7682 }
7683 }
7684
7685 // recurse into the selected subdirectories
7686 for (const auto& entry : subdirs)
7687 {
7688 // stop after finding max-files matching files
7689 if (flag_max_files > 0 && Stats::found_parts() >= flag_max_files)
7690 break;
7691
7692 // stop when output is blocked or search cancelled
7693 if (out.eof || out.cancelled())
7694 break;
7695
7696 #ifndef OS_WIN
7697 // -R: check if this directory was visited before
7698 std::pair<std::set<ino_t>::iterator,bool> vino;
7699
7700 if (flag_dereference)
7701 {
7702 vino = visited.insert(entry.inode);
7703
7704 // if visited before, then do not recurse on this directory again
7705 if (!vino.second)
7706 continue;
7707 }
7708 #endif
7709
7710 recurse(level + 1, entry.pathname.c_str());
7711
7712 #ifndef OS_WIN
7713 if (flag_dereference)
7714 visited.erase(vino.first);
7715 #endif
7716 }
7717
7718 // --ignore-files: restore if changed
7719 if (saved)
7720 {
7721 save_all_exclude->swap(flag_all_exclude);
7722 save_all_exclude_dir->swap(flag_all_exclude_dir);
7723 }
7724 }
7725
7726 // -Z and --sort=best: perform a presearch to determine edit distance cost, returns 65535 when no match is found
cost(const char * pathname)7727 uint16_t Grep::cost(const char *pathname)
7728 {
7729 // stop when output is blocked
7730 if (out.eof)
7731 return 0;
7732
7733 try
7734 {
7735 // open (archive or compressed) file (pathname is NULL to read stdin), return on failure
7736 if (!open_file(pathname))
7737 return 0;
7738 }
7739
7740 catch (...)
7741 {
7742 // this should never happen
7743 warning("exception while opening", pathname);
7744
7745 return 0;
7746 }
7747
7748 // -Z: matcher is a FuzzyMatcher
7749 reflex::FuzzyMatcher *fuzzy_matcher = dynamic_cast<reflex::FuzzyMatcher*>(matcher);
7750
7751 uint16_t cost = 65535;
7752
7753 // -z: loop over extracted archive parts, when applicable
7754 do
7755 {
7756 try
7757 {
7758 if (init_read())
7759 {
7760 while (fuzzy_matcher->find())
7761 {
7762 if (fuzzy_matcher->edits() < cost)
7763 cost = fuzzy_matcher->edits();
7764
7765 // exact match?
7766 if (cost == 0)
7767 break;
7768 }
7769 }
7770 }
7771
7772 catch (...)
7773 {
7774 // this should never happen
7775 warning("exception while searching", pathname);
7776 }
7777
7778 // close file or -z: loop over next extracted archive parts, when applicable
7779 } while (close_file(pathname));
7780
7781 return cost;
7782 }
7783
7784 // search input and display pattern matches
search(const char * pathname)7785 void Grep::search(const char *pathname)
7786 {
7787 // stop when output is blocked
7788 if (out.eof)
7789 return;
7790
7791 try
7792 {
7793 // open (archive or compressed) file (pathname is NULL to read stdin), return on failure
7794 if (!open_file(pathname))
7795 return;
7796 }
7797
7798 catch (...)
7799 {
7800 // this should never happen
7801 warning("exception while opening", pathname);
7802
7803 return;
7804 }
7805
7806 // pathname is NULL when stdin is searched
7807 if (pathname == NULL)
7808 pathname = flag_label;
7809
7810 bool colorize = flag_apply_color || flag_tag != NULL;
7811 bool matched = false;
7812
7813 // -z: loop over extracted archive parts, when applicable
7814 do
7815 {
7816 try
7817 {
7818 size_t matches = 0;
7819
7820 // --files: reset the matching[] bitmask used in cnf_matching() for each matcher in matchers
7821 if (flag_files && matchers != NULL)
7822 {
7823 // hold the output
7824 out.hold();
7825
7826 // reset the bit corresponding to each matcher in matchers
7827 size_t n = matchers->size();
7828 matching.resize(0);
7829 matching.resize(n);
7830
7831 // reset the bit corresponding to the OR NOT terms of each matcher in matchers
7832 notmatching.resize(n);
7833 size_t j = 0;
7834 for (auto& i : *matchers)
7835 {
7836 notmatching[j].resize(0);
7837 notmatching[j].resize(i.size() > 0 ? i.size() - 1 : 0);
7838 ++j;
7839 }
7840 }
7841
7842 if (flag_quiet || flag_files_with_matches)
7843 {
7844 // option -q, -l, or -L
7845
7846 if (!init_read())
7847 goto exit_search;
7848
7849 // --format: whether to out.acquire() early before Stats::found_part()
7850 bool acquire = flag_format != NULL && (flag_format_open != NULL || flag_format_close != NULL);
7851
7852 while (matcher->find())
7853 {
7854 // --range: max line exceeded?
7855 if (flag_max_line > 0 && matcher->lineno() > flag_max_line)
7856 break;
7857
7858 if (matchers != NULL)
7859 {
7860 const char *eol = matcher->eol(true); // warning: call eol() before bol() and end()
7861 const char *bol = matcher->bol();
7862
7863 // check CNF AND/OR/NOT matching
7864 if (!cnf_matching(bol, eol, acquire) || out.holding())
7865 continue;
7866 }
7867
7868 matches = 1;
7869 break;
7870 }
7871
7872 // --files: if we are still holding the output and CNF is finally satisfyable then a match was made
7873 if (flag_files && matchers != NULL)
7874 {
7875 if (!cnf_satisfied(acquire))
7876 goto exit_search;
7877
7878 matches = 1;
7879 }
7880
7881 // -v: invert
7882 if (flag_invert_match)
7883 matches = !matches;
7884
7885 if (matches > 0)
7886 {
7887 // --format-open or format-close: we must acquire lock early before Stats::found_part()
7888 if (acquire)
7889 out.acquire();
7890
7891 if (!flag_files || matchers == NULL)
7892 {
7893 // --max-files: max reached?
7894 if (!Stats::found_part())
7895 goto exit_search;
7896 }
7897
7898 // -l or -L
7899 if (flag_files_with_matches)
7900 {
7901 if (flag_format != NULL)
7902 {
7903 if (flag_format_open != NULL)
7904 out.format(flag_format_open, pathname, partname, Stats::found_parts(), matcher, false, Stats::found_parts() > 1);
7905 out.format(flag_format, pathname, partname, 1, matcher, false, false);
7906 if (flag_format_close != NULL)
7907 out.format(flag_format_close, pathname, partname, Stats::found_parts(), matcher, false, Stats::found_parts() > 1);
7908 }
7909 else
7910 {
7911 out.str(color_fn);
7912 if (color_hl != NULL)
7913 {
7914 out.str(color_hl);
7915 out.uri(color_wd);
7916 out.uri(pathname);
7917 out.str(color_st);
7918 }
7919 out.str(pathname);
7920 if (color_hl != NULL)
7921 {
7922 out.str(color_hl);
7923 out.str(color_st);
7924 }
7925 if (!partname.empty())
7926 {
7927 out.chr('{');
7928 out.str(partname);
7929 out.chr('}');
7930 }
7931 out.str(color_off);
7932
7933 if (flag_null)
7934 out.chr('\0');
7935 else
7936 out.nl();
7937 }
7938 }
7939 }
7940 }
7941 else if (flag_count)
7942 {
7943 // option -c
7944
7945 if (!init_read())
7946 goto exit_search;
7947
7948 // --format: whether to out.acquire() early before Stats::found_part()
7949 bool acquire = flag_format != NULL && (flag_format_open != NULL || flag_format_close != NULL);
7950
7951 if (flag_ungroup || flag_only_matching)
7952 {
7953 // -co or -cu: count the number of patterns matched in the file
7954
7955 while (matcher->find())
7956 {
7957 // --range: max line exceeded?
7958 if (flag_max_line > 0 && matcher->lineno() > flag_max_line)
7959 break;
7960
7961 if (matchers != NULL)
7962 {
7963 const char *eol = matcher->eol(true); // warning: call eol() before bol() and end()
7964 const char *bol = matcher->bol();
7965
7966 // check CNF AND/OR/NOT matching, with --files acquire lock before Stats::found_part()
7967 if (!cnf_matching(bol, eol, acquire))
7968 continue;
7969 }
7970
7971 ++matches;
7972
7973 // -m: max number of matches reached?
7974 if (flag_max_count > 0 && matches >= flag_max_count)
7975 break;
7976 }
7977 }
7978 else
7979 {
7980 // -c without -o/-u: count the number of matching lines
7981
7982 size_t lineno = 0;
7983
7984 while (matcher->find())
7985 {
7986 size_t current_lineno = matcher->lineno();
7987
7988 if (lineno != current_lineno)
7989 {
7990 // --range: max line exceeded?
7991 if (flag_max_line > 0 && current_lineno > flag_max_line)
7992 break;
7993
7994 if (matchers != NULL)
7995 {
7996 const char *eol = matcher->eol(true); // warning: call eol() before bol() and end()
7997 const char *bol = matcher->bol();
7998
7999 // check CNF AND/OR/NOT matching, with --files acquire lock before Stats::found_part()
8000 if (!cnf_matching(bol, eol, acquire))
8001 continue;
8002 }
8003
8004 ++matches;
8005
8006 // -m: max number of matches reached?
8007 if (flag_max_count > 0 && matches >= flag_max_count)
8008 break;
8009
8010 lineno = current_lineno;
8011 }
8012 }
8013
8014 // -c with -v: count non-matching lines
8015 if (flag_invert_match)
8016 {
8017 matches = matcher->lineno() - matches;
8018 if (matches > 0)
8019 --matches;
8020 }
8021 }
8022
8023 // --files: if we are still holding the output and CNF is not satisfyable then no global matches were made
8024 if (flag_files && matchers != NULL)
8025 {
8026 if (!cnf_satisfied(acquire))
8027 goto exit_search; // we cannot report 0 matches and ensure accurate output
8028 }
8029 else
8030 {
8031 // --format-open or --format-close: we must acquire lock early before Stats::found_part()
8032 if (acquire)
8033 out.acquire();
8034
8035 // --max-files: max reached?
8036 // unfortunately, allowing 'acquire' below produces "x matching + y in archives"
8037 // but without this we cannot produce correct format-open and format-close outputs
8038 if (matches > 0 || acquire)
8039 if (!Stats::found_part())
8040 goto exit_search;
8041 }
8042
8043 if (flag_format != NULL)
8044 {
8045 if (flag_format_open != NULL)
8046 out.format(flag_format_open, pathname, partname, Stats::found_parts(), matcher, false, Stats::found_parts() > 1);
8047 out.format(flag_format, pathname, partname, matches, matcher, false, false);
8048 if (flag_format_close != NULL)
8049 out.format(flag_format_close, pathname, partname, Stats::found_parts(), matcher, false, Stats::found_parts() > 1);
8050 }
8051 else
8052 {
8053 if (flag_with_filename || !partname.empty())
8054 {
8055 out.str(color_fn);
8056 if (color_hl != NULL)
8057 {
8058 out.str(color_hl);
8059 out.uri(color_wd);
8060 out.uri(pathname);
8061 out.str(color_st);
8062 }
8063 out.str(pathname);
8064 if (color_hl != NULL)
8065 {
8066 out.str(color_hl);
8067 out.str(color_st);
8068 }
8069 if (!partname.empty())
8070 {
8071 out.chr('{');
8072 out.str(partname);
8073 out.chr('}');
8074 }
8075 out.str(color_off);
8076
8077 if (flag_null)
8078 {
8079 out.chr('\0');
8080 }
8081 else
8082 {
8083 out.str(color_se);
8084 out.str(flag_separator);
8085 out.str(color_off);
8086 }
8087 }
8088 out.num(matches);
8089 out.nl();
8090 }
8091 }
8092 else if (flag_format != NULL)
8093 {
8094 // option --format
8095
8096 if (!init_read())
8097 goto exit_search;
8098
8099 // whether to out.acquire() early before Stats::found_part()
8100 bool acquire = flag_format_open != NULL || flag_format_close != NULL;
8101
8102 if (flag_invert_match)
8103 {
8104 // FormatInvertMatchHandler requires lineno to be set precisely, i.e. after skipping --range lines
8105 size_t lineno = flag_min_line > 0 ? flag_min_line - 1 : 0;
8106 bool binfile = false; // unused
8107 bool hex = false; // unused
8108 bool binary = false; // unused
8109 bool stop = false;
8110
8111 // construct event handler functor with captured *this and some of the locals
8112 FormatInvertMatchGrepHandler invert_match_handler(*this, pathname, lineno, binfile, hex, binary, matches, stop);
8113
8114 // register an event handler to display non-matching lines
8115 matcher->set_handler(&invert_match_handler);
8116
8117 // to get the context from the invert_match handler explicitly
8118 reflex::AbstractMatcher::Context context;
8119
8120 while (matcher->find())
8121 {
8122 size_t current_lineno = matcher->lineno();
8123
8124 if (lineno != current_lineno)
8125 {
8126 if (matchers != NULL)
8127 {
8128 const char *eol = matcher->eol(true); // warning: call eol() before bol() and end()
8129 const char *bol = matcher->bol();
8130
8131 // check CNF AND/OR/NOT matching
8132 if (!cnf_matching(bol, eol))
8133 continue;
8134 }
8135
8136 // get the lines before the matched line
8137 context = matcher->before();
8138
8139 // display non-matching lines up to this line
8140 if (context.len > 0)
8141 invert_match_handler(*matcher, context.buf, context.len, context.num);
8142
8143 // --range: max line exceeded?
8144 if (flag_max_line > 0 && current_lineno > flag_max_line)
8145 goto done_search;
8146
8147 // --max-files: max reached?
8148 if (stop)
8149 goto exit_search;
8150
8151 // -m: max number of matches reached?
8152 if (flag_max_count > 0 && matches >= flag_max_count)
8153 goto done_search;
8154
8155 // output blocked?
8156 if (out.eof)
8157 goto exit_search;
8158 }
8159
8160 lineno = current_lineno + matcher->lines() - 1;
8161 }
8162
8163 // get the remaining context
8164 context = matcher->after();
8165
8166 if (context.len > 0)
8167 invert_match_handler(*matcher, context.buf, context.len, context.num);
8168 }
8169 else
8170 {
8171 while (matcher->find())
8172 {
8173 // --range: max line exceeded?
8174 if (flag_max_line > 0 && matcher->lineno() > flag_max_line)
8175 break;
8176
8177 if (matchers != NULL)
8178 {
8179 const char *eol = matcher->eol(true); // warning: call eol() before bol() and end()
8180 const char *bol = matcher->bol();
8181
8182 // check CNF AND/OR/NOT matching
8183 if (!cnf_matching(bol, eol, acquire))
8184 continue;
8185 }
8186
8187 // output --format-open
8188 if (matches == 0)
8189 {
8190 if (flag_files && matchers != NULL)
8191 {
8192 // --format-open: we must acquire lock early before Stats::found_part()
8193 if (acquire && out.holding())
8194 {
8195 out.acquire();
8196
8197 // --max-files: max reached?
8198 if (!Stats::found_part())
8199 goto exit_search;
8200 }
8201 }
8202 else
8203 {
8204 // --format-open: we must acquire lock early before Stats::found_part()
8205 if (acquire)
8206 out.acquire();
8207
8208 // --max-files: max reached?
8209 if (!Stats::found_part())
8210 goto exit_search;
8211 }
8212
8213 if (flag_format_open != NULL)
8214 {
8215 out.format(flag_format_open, pathname, partname, Stats::found_parts(), matcher, false, Stats::found_parts() > 1);
8216
8217 // --files: undo files count
8218 if (flag_files && matchers != NULL && out.holding())
8219 Stats::undo_found_part();
8220 }
8221 }
8222
8223 ++matches;
8224
8225 // output --format
8226 out.format(flag_format, pathname, partname, matches, matcher, matches > 1, matches > 1);
8227
8228 // -m: max number of matches reached?
8229 if (flag_max_count > 0 && matches >= flag_max_count)
8230 break;
8231
8232 out.check_flush();
8233 }
8234 }
8235
8236 // --files: if we are still holding the output and CNF is not satisfyable then no global matches were made
8237 if (flag_files && matchers != NULL)
8238 if (!cnf_satisfied(true))
8239 goto exit_search;
8240
8241 // output --format-close
8242 if (matches > 0 && flag_format_close != NULL)
8243 out.format(flag_format_close, pathname, partname, Stats::found_parts(), matcher, false, Stats::found_parts() > 1);
8244 }
8245 else if (flag_only_line_number)
8246 {
8247 // option --only-line-number
8248
8249 if (!init_read())
8250 goto exit_search;
8251
8252 size_t lineno = 0;
8253 const char *separator = flag_separator;
8254
8255 while (matcher->find())
8256 {
8257 size_t current_lineno = matcher->lineno();
8258
8259 separator = lineno != current_lineno ? flag_separator : "+";
8260
8261 if (lineno != current_lineno || flag_ungroup)
8262 {
8263 // --range: max line exceeded?
8264 if (flag_max_line > 0 && current_lineno > flag_max_line)
8265 break;
8266
8267 if (matchers != NULL)
8268 {
8269 const char *eol = matcher->eol(true); // warning: call eol() before bol() and end()
8270 const char *bol = matcher->bol();
8271
8272 // check CNF AND/OR/NOT matching
8273 if (!cnf_matching(bol, eol))
8274 continue;
8275 }
8276
8277 if (matches == 0 && (!flag_files || matchers == NULL))
8278 {
8279 // --max-files: max reached?
8280 if (!Stats::found_part())
8281 goto exit_search;
8282 }
8283
8284 ++matches;
8285
8286 out.header(pathname, partname, current_lineno, matcher, matcher->first(), separator, true);
8287
8288 // -m: max number of matches reached?
8289 if (flag_max_count > 0 && matches >= flag_max_count)
8290 break;
8291
8292 // output blocked?
8293 if (out.eof)
8294 goto exit_search;
8295
8296 lineno = current_lineno;
8297 }
8298 }
8299 }
8300 else if (flag_only_matching)
8301 {
8302 // option -o
8303
8304 if (!init_read())
8305 goto exit_search;
8306
8307 size_t lineno = 0;
8308 bool binfile = !flag_text && !flag_hex && !flag_with_hex && init_is_binary();
8309 bool hex = false;
8310 bool nl = false;
8311
8312 while (matcher->find())
8313 {
8314 const char *begin = matcher->begin();
8315 size_t size = matcher->size();
8316 bool binary = flag_hex || (!flag_text && is_binary(begin, size));
8317
8318 if (hex && !binary)
8319 {
8320 out.dump.done();
8321 }
8322 else if (!hex && binary && nl)
8323 {
8324 out.nl();
8325 nl = false;
8326 }
8327
8328 size_t current_lineno = matcher->lineno();
8329
8330 if (lineno != current_lineno || flag_ungroup)
8331 {
8332 if (nl)
8333 {
8334 out.nl();
8335 nl = false;
8336 }
8337
8338 // --range: max line exceeded?
8339 if (flag_max_line > 0 && current_lineno > flag_max_line)
8340 break;
8341
8342 // -m: max number of matches reached?
8343 if (flag_max_count > 0 && matches >= flag_max_count)
8344 break;
8345
8346 // output blocked?
8347 if (out.eof)
8348 goto exit_search;
8349
8350 if (matchers != NULL)
8351 {
8352 const char *eol = matcher->eol(true); // warning: call eol() before bol() and end()
8353 const char *bol = matcher->bol();
8354
8355 // check CNF AND/OR/NOT matching
8356 if (!cnf_matching(bol, eol))
8357 continue;
8358 }
8359
8360 if (matches == 0 && (!flag_files || matchers == NULL))
8361 {
8362 // --max-files: max reached?
8363 if (!Stats::found_part())
8364 goto exit_search;
8365 }
8366
8367 if (binfile || (binary && !flag_hex && !flag_with_hex))
8368 {
8369 if (flag_binary_without_match)
8370 {
8371 matches = 0;
8372 }
8373 else
8374 {
8375 out.binary_file_matches(pathname, partname);
8376 matches = 1;
8377 }
8378
8379 if (flag_files && matchers != NULL && out.holding())
8380 continue;
8381
8382 goto done_search;
8383 }
8384
8385 ++matches;
8386
8387 if (!flag_no_header)
8388 {
8389 const char *separator = lineno != current_lineno ? flag_separator : "+";
8390 out.header(pathname, partname, current_lineno, matcher, matcher->first(), separator, binary);
8391 }
8392
8393 lineno = current_lineno;
8394 }
8395
8396 hex = binary;
8397
8398 if (binary)
8399 {
8400 if (flag_hex || flag_with_hex)
8401 {
8402 out.dump.next(matcher->first());
8403 out.dump.hex(Output::Dump::HEX_MATCH, matcher->first(), begin, size);
8404 }
8405 else
8406 {
8407 if (flag_binary_without_match)
8408 {
8409 matches = 0;
8410 }
8411 else
8412 {
8413 out.binary_file_matches(pathname, partname);
8414 matches = 1;
8415 }
8416
8417 if (flag_files && matchers != NULL && out.holding())
8418 continue;
8419
8420 goto done_search;
8421 }
8422
8423 lineno += matcher->lines() - 1;
8424 }
8425 else
8426 {
8427 // echo multi-line matches line-by-line
8428
8429 const char *from = begin;
8430 const char *to;
8431
8432 while ((to = static_cast<const char*>(memchr(from, '\n', size - (from - begin)))) != NULL)
8433 {
8434 out.str(match_ms);
8435 out.str(from, to - from);
8436 out.str(match_off);
8437 out.chr('\n');
8438
8439 out.header(pathname, partname, ++lineno, NULL, matcher->first() + (to - begin) + 1, "|", false);
8440
8441 from = to + 1;
8442 }
8443
8444 size -= from - begin;
8445
8446 if (size > 0)
8447 {
8448 bool lf_only = from[size - 1] == '\n';
8449 size -= lf_only;
8450 if (size > 0)
8451 {
8452 out.str(match_ms);
8453 out.str(from, size);
8454 out.str(match_off);
8455 }
8456 out.nl(lf_only);
8457 }
8458 else
8459 {
8460 nl = true;
8461 }
8462 }
8463 }
8464
8465 if (nl)
8466 out.nl();
8467
8468 if (hex)
8469 out.dump.done();
8470 }
8471 else if (flag_before_context == 0 && flag_after_context == 0 && !flag_any_line && !flag_invert_match)
8472 {
8473 // options -A, -B, -C, -y, -v are not specified
8474
8475 if (!init_read())
8476 goto exit_search;
8477
8478 size_t lineno = 0;
8479 bool binfile = !flag_text && !flag_hex && !flag_with_hex && init_is_binary();
8480 bool hex = false;
8481 bool binary = false;
8482 const char *restline_data = NULL;
8483 size_t restline_size = 0;
8484 size_t restline_last = 0;
8485
8486 while (matcher->find())
8487 {
8488 size_t current_lineno = matcher->lineno();
8489
8490 if (lineno != current_lineno || flag_ungroup)
8491 {
8492 if (restline_data != NULL)
8493 {
8494 if (binary)
8495 {
8496 out.dump.hex(Output::Dump::HEX_LINE, restline_last, restline_data, restline_size);
8497 }
8498 else
8499 {
8500 bool lf_only = false;
8501 if (restline_size > 0)
8502 {
8503 lf_only = restline_data[restline_size - 1] == '\n';
8504 restline_size -= lf_only;
8505 if (restline_size > 0)
8506 {
8507 out.str(color_sl);
8508 out.str(restline_data, restline_size);
8509 out.str(color_off);
8510 }
8511 }
8512 out.nl(lf_only);
8513 }
8514
8515 restline_data = NULL;
8516 }
8517
8518 // --range: max line exceeded?
8519 if (flag_max_line > 0 && current_lineno > flag_max_line)
8520 break;
8521
8522 // -m: max number of matches reached?
8523 if (flag_max_count > 0 && matches >= flag_max_count)
8524 break;
8525
8526 // output blocked?
8527 if (out.eof)
8528 goto exit_search;
8529
8530 const char *eol = matcher->eol(true); // warning: call eol() before bol() and end()
8531 const char *bol = matcher->bol();
8532
8533 // check CNF AND/OR/NOT matching
8534 if (matchers != NULL && !cnf_matching(bol, eol))
8535 continue;
8536
8537 if (matches == 0 && (!flag_files || matchers == NULL))
8538 {
8539 // --max-files: max reached?
8540 if (!Stats::found_part())
8541 goto exit_search;
8542 }
8543
8544 binary = flag_hex || (!flag_text && is_binary(bol, eol - bol));
8545
8546 if (binfile || (binary && !flag_hex && !flag_with_hex))
8547 {
8548 if (flag_binary_without_match)
8549 {
8550 matches = 0;
8551 }
8552 else
8553 {
8554 out.binary_file_matches(pathname, partname);
8555 matches = 1;
8556 }
8557
8558 if (flag_files && matchers != NULL && out.holding())
8559 continue;
8560
8561 goto done_search;
8562 }
8563
8564 ++matches;
8565
8566 size_t border = matcher->border();
8567 size_t first = matcher->first();
8568 const char *begin = matcher->begin();
8569 const char *end = matcher->end();
8570 size_t size = matcher->size();
8571
8572 if (hex && !binary)
8573 out.dump.done();
8574
8575 if (!flag_no_header)
8576 {
8577 const char *separator = lineno != current_lineno ? flag_separator : "+";
8578 out.header(pathname, partname, current_lineno, matcher, first, separator, binary);
8579 }
8580
8581 hex = binary;
8582
8583 lineno = current_lineno;
8584
8585 if (binary)
8586 {
8587 out.dump.hex(Output::Dump::HEX_LINE, first - border, bol, border);
8588 out.dump.hex(Output::Dump::HEX_MATCH, first, begin, size);
8589
8590 if (flag_ungroup)
8591 {
8592 out.dump.hex(Output::Dump::HEX_LINE, matcher->last(), end, eol - end);
8593 out.dump.done();
8594 }
8595 else
8596 {
8597 restline.assign(end, eol - end);
8598 restline_data = restline.c_str();
8599 restline_size = restline.size();
8600 restline_last = matcher->last();
8601 }
8602
8603 lineno += matcher->lines() - 1;
8604 }
8605 else
8606 {
8607 out.str(color_sl);
8608 out.str(bol, border);
8609 out.str(color_off);
8610
8611 // echo multi-line matches line-by-line
8612
8613 const char *from = begin;
8614 const char *to;
8615
8616 while ((to = static_cast<const char*>(memchr(from, '\n', size - (from - begin)))) != NULL)
8617 {
8618 out.str(match_ms);
8619 out.str(from, to - from);
8620 out.str(match_off);
8621 out.chr('\n');
8622
8623 out.header(pathname, partname, ++lineno, NULL, first + (to - begin) + 1, "|", false);
8624
8625 from = to + 1;
8626 }
8627
8628 size -= from - begin;
8629 begin = from;
8630
8631 out.str(match_ms);
8632 out.str(begin, size);
8633 out.str(match_off);
8634
8635 if (flag_ungroup)
8636 {
8637 if (eol > end)
8638 {
8639 bool lf_only = end[eol - end - 1] == '\n';
8640 eol -= lf_only;
8641 if (eol > end)
8642 {
8643 out.str(color_sl);
8644 out.str(end, eol - end);
8645 out.str(color_off);
8646 }
8647 out.nl(lf_only);
8648 }
8649 else if (matcher->hit_end())
8650 {
8651 out.nl();
8652 }
8653 else
8654 {
8655 out.check_flush();
8656 }
8657 }
8658 else
8659 {
8660 restline.assign(end, eol - end);
8661 restline_data = restline.c_str();
8662 restline_size = restline.size();
8663 restline_last = matcher->last();
8664 }
8665 }
8666 }
8667 else
8668 {
8669 size_t size = matcher->size();
8670
8671 if (size > 0)
8672 {
8673 size_t lines = matcher->lines();
8674
8675 if (lines > 1 || colorize)
8676 {
8677 size_t first = matcher->first();
8678 size_t last = matcher->last();
8679 const char *begin = matcher->begin();
8680
8681 if (binary)
8682 {
8683 out.dump.hex(Output::Dump::HEX_LINE, restline_last, restline_data, first - restline_last);
8684 out.dump.hex(Output::Dump::HEX_MATCH, first, begin, size);
8685 }
8686 else
8687 {
8688 out.str(color_sl);
8689 out.str(restline_data, first - restline_last);
8690 out.str(color_off);
8691
8692 if (lines > 1)
8693 {
8694 // echo multi-line matches line-by-line
8695
8696 const char *from = begin;
8697 const char *to;
8698 size_t num = 1;
8699
8700 while ((to = static_cast<const char*>(memchr(from, '\n', size - (from - begin)))) != NULL)
8701 {
8702 out.str(match_ms);
8703 out.str(from, to - from);
8704 out.str(match_off);
8705 out.chr('\n');
8706
8707 out.header(pathname, partname, lineno + num, NULL, first + (to - begin) + 1, "|", false);
8708
8709 from = to + 1;
8710 ++num;
8711 }
8712
8713 size -= from - begin;
8714 begin = from;
8715 }
8716
8717 out.str(match_ms);
8718 out.str(begin, size);
8719 out.str(match_off);
8720 }
8721
8722 if (lines == 1)
8723 {
8724 restline_data += last - restline_last;
8725 restline_size -= last - restline_last;
8726 restline_last = last;
8727 }
8728 else
8729 {
8730 const char *eol = matcher->eol(true); // warning: call eol() before end()
8731 const char *end = matcher->end();
8732
8733 binary = flag_hex || (!flag_text && is_binary(end, eol - end));
8734
8735 if (hex && !binary)
8736 out.dump.done();
8737 else if (!hex && binary)
8738 out.nl();
8739
8740 if (hex != binary && !flag_no_header)
8741 out.header(pathname, partname, lineno + lines - 1, matcher, last, flag_separator, binary);
8742
8743 hex = binary;
8744
8745 if (flag_ungroup)
8746 {
8747 if (binary)
8748 {
8749 out.dump.hex(Output::Dump::HEX_LINE, matcher->last(), end, eol - end);
8750 out.dump.done();
8751 }
8752 else
8753 {
8754 if (eol > end)
8755 {
8756 bool lf_only = end[eol - end - 1] == '\n';
8757 eol -= lf_only;
8758 if (eol > end)
8759 {
8760 out.str(color_sl);
8761 out.str(end, eol - end);
8762 out.str(color_off);
8763 }
8764 out.nl(lf_only);
8765 }
8766 else if (matcher->hit_end())
8767 {
8768 out.nl();
8769 }
8770 else
8771 {
8772 out.check_flush();
8773 }
8774 }
8775 }
8776 else
8777 {
8778 restline.assign(end, eol - end);
8779 restline_data = restline.c_str();
8780 restline_size = restline.size();
8781 restline_last = last;
8782 }
8783
8784 lineno += lines - 1;
8785 }
8786 }
8787 }
8788 }
8789 }
8790
8791 if (restline_data != NULL)
8792 {
8793 if (binary)
8794 {
8795 out.dump.hex(Output::Dump::HEX_LINE, restline_last, restline_data, restline_size);
8796 }
8797 else
8798 {
8799 bool lf_only = false;
8800 if (restline_size > 0)
8801 {
8802 lf_only = restline_data[restline_size - 1] == '\n';
8803 restline_size -= lf_only;
8804 if (restline_size > 0)
8805 {
8806 out.str(color_sl);
8807 out.str(restline_data, restline_size);
8808 out.str(color_off);
8809 }
8810 }
8811 out.nl(lf_only);
8812 }
8813
8814 restline_data = NULL;
8815 }
8816
8817 if (binary)
8818 out.dump.done();
8819 }
8820 else if (flag_before_context == 0 && flag_after_context == 0 && !flag_any_line)
8821 {
8822 // option -v without -A, -B, -C, -y
8823
8824 if (!init_read())
8825 goto exit_search;
8826
8827 // InvertMatchHandler requires lineno to be set precisely, i.e. after skipping --range lines
8828 size_t lineno = flag_min_line > 0 ? flag_min_line - 1 : 0;
8829 bool binfile = !flag_text && !flag_hex && !flag_with_hex && init_is_binary();
8830 bool hex = false;
8831 bool binary = false;
8832 bool stop = false;
8833
8834 // construct event handler functor with captured *this and some of the locals
8835 InvertMatchGrepHandler invert_match_handler(*this, pathname, lineno, binfile, hex, binary, matches, stop);
8836
8837 // register an event handler to display non-matching lines
8838 matcher->set_handler(&invert_match_handler);
8839
8840 // to get the context from the invert_match handler explicitly
8841 reflex::AbstractMatcher::Context context;
8842
8843 while (matcher->find())
8844 {
8845 size_t current_lineno = matcher->lineno();
8846
8847 if (lineno != current_lineno)
8848 {
8849 if (matchers != NULL)
8850 {
8851 const char *eol = matcher->eol(true); // warning: call eol() before bol() and end()
8852 const char *bol = matcher->bol();
8853
8854 // check CNF AND/OR/NOT matching
8855 if (!cnf_matching(bol, eol))
8856 continue;
8857 }
8858
8859 // get the lines before the matched line
8860 context = matcher->before();
8861
8862 // display non-matching lines up to this line
8863 if (context.len > 0)
8864 invert_match_handler(*matcher, context.buf, context.len, context.num);
8865
8866 if (matches > 0 && (binfile || (binary && !flag_hex && !flag_with_hex)))
8867 break;
8868
8869 if (binary)
8870 out.dump.done();
8871
8872 // --range: max line exceeded?
8873 if (flag_max_line > 0 && current_lineno > flag_max_line)
8874 goto done_search;
8875
8876 // --max-files: max reached?
8877 if (stop)
8878 goto exit_search;
8879
8880 // -m: max number of matches reached?
8881 if (flag_max_count > 0 && matches >= flag_max_count)
8882 goto done_search;
8883
8884 // output blocked?
8885 if (out.eof)
8886 goto exit_search;
8887 }
8888
8889 lineno = current_lineno + matcher->lines() - 1;
8890 }
8891
8892 // get the remaining context
8893 context = matcher->after();
8894
8895 if (context.len > 0)
8896 invert_match_handler(*matcher, context.buf, context.len, context.num);
8897
8898 if (matches > 0 && (binfile || (binary && !flag_hex && !flag_with_hex)))
8899 {
8900 if (flag_binary_without_match)
8901 matches = 0;
8902 else
8903 out.binary_file_matches(pathname, partname);
8904 }
8905
8906 if (binary)
8907 out.dump.done();
8908 }
8909 else if (flag_any_line)
8910 {
8911 // option -y
8912
8913 if (!init_read())
8914 goto exit_search;
8915
8916 // AnyLineGrepHandler requires lineno to be set precisely, i.e. after skipping --range lines
8917 size_t lineno = flag_min_line > 0 ? flag_min_line - 1 : 0;
8918 bool binfile = !flag_text && !flag_hex && !flag_with_hex && init_is_binary();
8919 bool hex = false;
8920 bool binary = false;
8921 bool stop = false;
8922
8923 // to display the rest of the matching line
8924 const char *restline_data = NULL;
8925 size_t restline_size = 0;
8926 size_t restline_last = 0;
8927
8928 // construct event handler functor with captured *this and some of the locals
8929 AnyLineGrepHandler any_line_handler(*this, pathname, lineno, binfile, hex, binary, matches, stop, restline_data, restline_size, restline_last);
8930
8931 // register an event handler functor to display non-matching lines
8932 matcher->set_handler(&any_line_handler);
8933
8934 // to display colors with or without -v
8935 short v_hex_line = flag_invert_match ? Output::Dump::HEX_CONTEXT_LINE : Output::Dump::HEX_LINE;
8936 short v_hex_match = flag_invert_match ? Output::Dump::HEX_CONTEXT_MATCH : Output::Dump::HEX_MATCH;
8937 const char *v_color_sl = flag_invert_match ? color_cx : color_sl;
8938 const char *v_match_ms = flag_invert_match ? match_mc : match_ms;
8939
8940 // to get the context from the any_line handler explicitly
8941 reflex::AbstractMatcher::Context context;
8942
8943 while (matcher->find())
8944 {
8945 size_t current_lineno = matcher->lineno();
8946
8947 if (lineno != current_lineno || flag_ungroup)
8948 {
8949 if (restline_data != NULL)
8950 {
8951 if (binary)
8952 {
8953 out.dump.hex(v_hex_line, restline_last, restline_data, restline_size);
8954 }
8955 else
8956 {
8957 bool lf_only = false;
8958 if (restline_size > 0)
8959 {
8960 lf_only = restline_data[restline_size - 1] == '\n';
8961 restline_size -= lf_only;
8962 if (restline_size > 0)
8963 {
8964 out.str(v_color_sl);
8965 out.str(restline_data, restline_size);
8966 out.str(color_off);
8967 }
8968 }
8969 out.nl(lf_only);
8970 }
8971
8972 restline_data = NULL;
8973 }
8974
8975 const char *eol = matcher->eol(true); // warning: call eol() before bol() and end()
8976 const char *bol = matcher->bol();
8977
8978 // check CNF AND/OR/NOT matching
8979 if (matchers != NULL && !cnf_matching(bol, eol))
8980 continue;
8981
8982 // get the lines before the matched line
8983 context = matcher->before();
8984
8985 if (context.len > 0)
8986 {
8987 any_line_handler(*matcher, context.buf, context.len, context.num);
8988
8989 if (matches > 0 && (binfile || (binary && !flag_hex && !flag_with_hex)))
8990 {
8991 if (flag_binary_without_match)
8992 {
8993 matches = 0;
8994 }
8995 else
8996 {
8997 out.binary_file_matches(pathname, partname);
8998 matches = 1;
8999 }
9000
9001 if (flag_files && matchers != NULL && out.holding())
9002 continue;
9003
9004 goto done_search;
9005 }
9006 }
9007
9008 // --range: max line exceeded?
9009 if (flag_max_line > 0 && current_lineno > flag_max_line)
9010 break;
9011
9012 // --max-files: max reached?
9013 if (stop)
9014 goto exit_search;
9015
9016 if (!flag_invert_match)
9017 {
9018 if (matches == 0 && (!flag_files || matchers == NULL))
9019 {
9020 // --max-files: max reached?
9021 if (!Stats::found_part())
9022 goto exit_search;
9023 }
9024
9025 ++matches;
9026 }
9027
9028 // -m: max number of matches reached?
9029 if (flag_max_count > 0 && matches >= flag_max_count)
9030 break;
9031
9032 // output blocked?
9033 if (out.eof)
9034 goto exit_search;
9035
9036 binary = flag_hex || (!flag_text && is_binary(bol, eol - bol));
9037
9038 if (binfile || (binary && !flag_hex && !flag_with_hex))
9039 {
9040 if (flag_binary_without_match)
9041 {
9042 matches = 0;
9043 }
9044 else if (flag_invert_match)
9045 {
9046 lineno = current_lineno + matcher->lines() - 1;
9047 continue;
9048 }
9049 else
9050 {
9051 out.binary_file_matches(pathname, partname);
9052 matches = 1;
9053 }
9054
9055 if (flag_files && matchers != NULL && out.holding())
9056 continue;
9057
9058 goto done_search;
9059 }
9060
9061 size_t border = matcher->border();
9062 size_t first = matcher->first();
9063 const char *begin = matcher->begin();
9064 const char *end = matcher->end();
9065 size_t size = matcher->size();
9066
9067 if (hex && !binary)
9068 out.dump.done();
9069
9070 if (!flag_no_header)
9071 {
9072 const char *separator = lineno != current_lineno ? flag_invert_match ? "-" : flag_separator : "+";
9073 out.header(pathname, partname, current_lineno, matcher, first, separator, binary);
9074 }
9075
9076 hex = binary;
9077
9078 lineno = current_lineno;
9079
9080 if (binary)
9081 {
9082 out.dump.hex(v_hex_line, first - border, bol, border);
9083 out.dump.hex(v_hex_match, first, begin, size);
9084
9085 if (flag_ungroup)
9086 {
9087 out.dump.hex(v_hex_line, matcher->last(), end, eol - end);
9088 out.dump.done();
9089 }
9090 else
9091 {
9092 restline.assign(end, eol - end);
9093 restline_data = restline.c_str();
9094 restline_size = restline.size();
9095 restline_last = matcher->last();
9096 }
9097
9098 lineno += matcher->lines() - 1;
9099 }
9100 else
9101 {
9102 out.str(v_color_sl);
9103 out.str(bol, border);
9104 out.str(color_off);
9105
9106 // echo multi-line matches line-by-line
9107
9108 const char *from = begin;
9109 const char *to;
9110
9111 while ((to = static_cast<const char*>(memchr(from, '\n', size - (from - begin)))) != NULL)
9112 {
9113 out.str(v_match_ms);
9114 out.str(from, to - from);
9115 out.str(match_off);
9116 out.chr('\n');
9117
9118 out.header(pathname, partname, ++lineno, NULL, first + (to - begin) + 1, "|", false);
9119
9120 from = to + 1;
9121 }
9122
9123 size -= from - begin;
9124 begin = from;
9125
9126 out.str(v_match_ms);
9127 out.str(begin, size);
9128 out.str(match_off);
9129
9130 if (flag_ungroup)
9131 {
9132 if (eol > end)
9133 {
9134 bool lf_only = end[eol - end - 1] == '\n';
9135 eol -= end[eol - end - 1] == '\n';
9136 if (eol > end)
9137 {
9138 out.str(v_color_sl);
9139 out.str(end, eol - end);
9140 out.str(color_off);
9141 }
9142 out.nl(lf_only);
9143 }
9144 else if (matcher->hit_end())
9145 {
9146 out.nl();
9147 }
9148 else
9149 {
9150 out.check_flush();
9151 }
9152 }
9153 else
9154 {
9155 restline.assign(end, eol - end);
9156 restline_data = restline.c_str();
9157 restline_size = restline.size();
9158 restline_last = matcher->last();
9159 }
9160 }
9161 }
9162 else if (!binfile && (!binary || flag_hex || flag_with_hex))
9163 {
9164 size_t size = matcher->size();
9165
9166 if (size > 0)
9167 {
9168 size_t lines = matcher->lines();
9169
9170 if (lines > 1 || colorize)
9171 {
9172 size_t first = matcher->first();
9173 size_t last = matcher->last();
9174 const char *begin = matcher->begin();
9175
9176 if (binary)
9177 {
9178 out.dump.hex(v_hex_line, restline_last, restline_data, first - restline_last);
9179 out.dump.hex(v_hex_match, first, begin, size);
9180 }
9181 else
9182 {
9183 out.str(v_color_sl);
9184 out.str(restline_data, first - restline_last);
9185 out.str(color_off);
9186
9187 if (lines > 1)
9188 {
9189 // echo multi-line matches line-by-line
9190
9191 const char *from = begin;
9192 const char *to;
9193 size_t num = 1;
9194
9195 while ((to = static_cast<const char*>(memchr(from, '\n', size - (from - begin)))) != NULL)
9196 {
9197 out.str(v_match_ms);
9198 out.str(from, to - from);
9199 out.str(match_off);
9200 out.chr('\n');
9201
9202 out.header(pathname, partname, lineno + num, NULL, first + (to - begin) + 1, "|", false);
9203
9204 from = to + 1;
9205 ++num;
9206 }
9207
9208 size -= from - begin;
9209 begin = from;
9210 }
9211
9212 out.str(v_match_ms);
9213 out.str(begin, size);
9214 out.str(match_off);
9215 }
9216
9217 if (lines == 1)
9218 {
9219 restline_data += last - restline_last;
9220 restline_size -= last - restline_last;
9221 restline_last = last;
9222 }
9223 else
9224 {
9225 const char *eol = matcher->eol(true); // warning: call eol() before end()
9226 const char *end = matcher->end();
9227
9228 binary = flag_hex || (!flag_text && is_binary(end, eol - end));
9229
9230 if (hex && !binary)
9231 out.dump.done();
9232 else if (!hex && binary)
9233 out.nl();
9234
9235 if (hex != binary && !flag_no_header)
9236 out.header(pathname, partname, lineno + lines - 1, matcher, last, flag_separator, binary);
9237
9238 hex = binary;
9239
9240 if (flag_ungroup)
9241 {
9242 if (binary)
9243 {
9244 out.dump.hex(v_hex_line, matcher->last(), end, eol - end);
9245 out.dump.done();
9246 }
9247 else
9248 {
9249 if (eol > end)
9250 {
9251 bool lf_only = end[eol - end - 1] == '\n';
9252 eol -= lf_only;
9253 if (eol > end)
9254 {
9255 out.str(v_color_sl);
9256 out.str(end, eol - end);
9257 out.str(color_off);
9258 }
9259 out.nl(lf_only);
9260 }
9261 else if (matcher->hit_end())
9262 {
9263 out.nl();
9264 }
9265 else
9266 {
9267 out.check_flush();
9268 }
9269 }
9270 }
9271 else
9272 {
9273 restline.assign(end, eol - end);
9274 restline_data = restline.c_str();
9275 restline_size = restline.size();
9276 restline_last = last;
9277 }
9278
9279 lineno += lines - 1;
9280 }
9281 }
9282 }
9283 }
9284 }
9285
9286 if (restline_data != NULL)
9287 {
9288 if (binary)
9289 {
9290 out.dump.hex(v_hex_line, restline_last, restline_data, restline_size);
9291 }
9292 else
9293 {
9294 bool lf_only = false;
9295 if (restline_size > 0)
9296 {
9297 lf_only = restline_data[restline_size - 1] == '\n';
9298 restline_size -= lf_only;
9299 if (restline_size > 0)
9300 {
9301 out.str(v_color_sl);
9302 out.str(restline_data, restline_size);
9303 out.str(color_off);
9304 }
9305 }
9306 out.nl(lf_only);
9307 }
9308
9309 restline_data = NULL;
9310 }
9311
9312 // get the remaining context
9313 context = matcher->after();
9314
9315 if (context.len > 0)
9316 any_line_handler(*matcher, context.buf, context.len, context.num);
9317
9318 if (matches > 0 && (binfile || (binary && !flag_hex && !flag_with_hex)))
9319 {
9320 if (flag_binary_without_match)
9321 matches = 0;
9322 else
9323 out.binary_file_matches(pathname, partname);
9324 }
9325
9326 if (binary)
9327 out.dump.done();
9328 }
9329 else if (!flag_invert_match)
9330 {
9331 // options -A, -B, -C without -v
9332
9333 if (!init_read())
9334 goto exit_search;
9335
9336 // ContextGrepHandler requires lineno to be set precisely, i.e. after skipping --range lines
9337 size_t lineno = flag_min_line > 0 ? flag_min_line - 1 : 0;
9338 bool binfile = !flag_text && !flag_hex && !flag_with_hex && init_is_binary();
9339 bool hex = false;
9340 bool binary = false;
9341 bool stop = false;
9342
9343 // to display the rest of the matching line
9344 const char *restline_data = NULL;
9345 size_t restline_size = 0;
9346 size_t restline_last = 0;
9347
9348 // construct event handler functor with captured *this and some of the locals
9349 ContextGrepHandler context_handler(*this, pathname, lineno, binfile, hex, binary, matches, stop, restline_data, restline_size, restline_last);
9350
9351 // register an event handler functor to display non-matching lines
9352 matcher->set_handler(&context_handler);
9353
9354 // to get the context from the any_line handler explicitly
9355 reflex::AbstractMatcher::Context context;
9356
9357 while (matcher->find())
9358 {
9359 size_t current_lineno = matcher->lineno();
9360
9361 if (lineno != current_lineno || flag_ungroup)
9362 {
9363 if (restline_data != NULL)
9364 {
9365 if (binary)
9366 {
9367 out.dump.hex(Output::Dump::HEX_LINE, restline_last, restline_data, restline_size);
9368 }
9369 else
9370 {
9371 bool lf_only = false;
9372 if (restline_size > 0)
9373 {
9374 lf_only = restline_data[restline_size - 1] == '\n';
9375 restline_size -= lf_only;
9376 if (restline_size > 0)
9377 {
9378 out.str(color_sl);
9379 out.str(restline_data, restline_size);
9380 out.str(color_off);
9381 }
9382 }
9383 out.nl(lf_only);
9384 }
9385
9386 restline_data = NULL;
9387 }
9388
9389 const char *eol = matcher->eol(true); // warning: call eol() before bol() and end()
9390 const char *bol = matcher->bol();
9391
9392 // check CNF AND/OR/NOT matching
9393 if (matchers != NULL && !cnf_matching(bol, eol))
9394 continue;
9395
9396 // get the lines before the matched line
9397 context = matcher->before();
9398
9399 if (context.len > 0)
9400 context_handler(*matcher, context.buf, context.len, context.num);
9401
9402 if (binfile || (binary && !flag_hex && !flag_with_hex))
9403 {
9404 if (flag_binary_without_match)
9405 {
9406 matches = 0;
9407 }
9408 else
9409 {
9410 out.binary_file_matches(pathname, partname);
9411 matches = 1;
9412 }
9413
9414 if (flag_files && matchers != NULL && out.holding())
9415 continue;
9416
9417 goto done_search;
9418 }
9419
9420 context_handler.output_before_context();
9421
9422 // --range: max line exceeded?
9423 if (flag_max_line > 0 && current_lineno > flag_max_line)
9424 break;
9425
9426 // --max-files: max reached?
9427 if (stop)
9428 goto exit_search;
9429
9430 if (matches == 0 && (!flag_files || matchers == NULL))
9431 {
9432 // --max-files: max reached?
9433 if (!Stats::found_part())
9434 goto exit_search;
9435 }
9436
9437 ++matches;
9438
9439 // -m: max number of matches reached?
9440 if (flag_max_count > 0 && matches >= flag_max_count)
9441 break;
9442
9443 // output blocked?
9444 if (out.eof)
9445 goto exit_search;
9446
9447 binary = flag_hex || (!flag_text && is_binary(bol, eol - bol));
9448
9449 if (binfile || (binary && !flag_hex && !flag_with_hex))
9450 {
9451 if (flag_binary_without_match)
9452 {
9453 matches = 0;
9454 }
9455 else
9456 {
9457 out.binary_file_matches(pathname, partname);
9458 matches = 1;
9459 }
9460
9461 if (flag_files && matchers != NULL && out.holding())
9462 continue;
9463
9464 goto done_search;
9465 }
9466
9467 size_t border = matcher->border();
9468 size_t first = matcher->first();
9469 const char *begin = matcher->begin();
9470 const char *end = matcher->end();
9471 size_t size = matcher->size();
9472
9473 if (hex && !binary)
9474 out.dump.done();
9475
9476 if (!flag_no_header)
9477 {
9478 const char *separator = lineno != current_lineno ? flag_invert_match ? "-" : flag_separator : "+";
9479 out.header(pathname, partname, current_lineno, matcher, first, separator, binary);
9480 }
9481
9482 hex = binary;
9483
9484 lineno = current_lineno;
9485
9486 if (binary)
9487 {
9488 out.dump.hex(Output::Dump::HEX_LINE, first - border, bol, border);
9489 out.dump.hex(Output::Dump::HEX_MATCH, first, begin, size);
9490
9491 if (flag_ungroup)
9492 {
9493 out.dump.hex(Output::Dump::HEX_LINE, matcher->last(), end, eol - end);
9494 out.dump.done();
9495 }
9496 else
9497 {
9498 restline.assign(end, eol - end);
9499 restline_data = restline.c_str();
9500 restline_size = restline.size();
9501 restline_last = matcher->last();
9502 }
9503
9504 lineno += matcher->lines() - 1;
9505 }
9506 else
9507 {
9508 out.str(color_sl);
9509 out.str(bol, border);
9510 out.str(color_off);
9511
9512 // echo multi-line matches line-by-line
9513
9514 const char *from = begin;
9515 const char *to;
9516
9517 while ((to = static_cast<const char*>(memchr(from, '\n', size - (from - begin)))) != NULL)
9518 {
9519 out.str(match_ms);
9520 out.str(from, to - from);
9521 out.str(match_off);
9522 out.chr('\n');
9523
9524 out.header(pathname, partname, ++lineno, NULL, first + (to - begin) + 1, "|", false);
9525
9526 from = to + 1;
9527 }
9528
9529 size -= from - begin;
9530 begin = from;
9531
9532 out.str(match_ms);
9533 out.str(begin, size);
9534 out.str(match_off);
9535
9536 if (flag_ungroup)
9537 {
9538 if (eol > end)
9539 {
9540 bool lf_only = end[eol - end - 1] == '\n';
9541 eol -= lf_only;
9542 if (eol > end)
9543 {
9544 out.str(color_sl);
9545 out.str(end, eol - end);
9546 out.str(color_off);
9547 }
9548 out.nl(lf_only);
9549 }
9550 else if (matcher->hit_end())
9551 {
9552 out.nl();
9553 }
9554 else
9555 {
9556 out.check_flush();
9557 }
9558 }
9559 else
9560 {
9561 restline.assign(end, eol - end);
9562 restline_data = restline.c_str();
9563 restline_size = restline.size();
9564 restline_last = matcher->last();
9565 }
9566 }
9567 }
9568 else
9569 {
9570 size_t size = matcher->size();
9571
9572 if (size > 0)
9573 {
9574 size_t lines = matcher->lines();
9575
9576 if (lines > 1 || colorize)
9577 {
9578 size_t first = matcher->first();
9579 size_t last = matcher->last();
9580 const char *begin = matcher->begin();
9581
9582 if (binary)
9583 {
9584 out.dump.hex(Output::Dump::HEX_LINE, restline_last, restline_data, first - restline_last);
9585 out.dump.hex(Output::Dump::HEX_MATCH, first, begin, size);
9586 }
9587 else
9588 {
9589 out.str(color_sl);
9590 out.str(restline_data, first - restline_last);
9591 out.str(color_off);
9592
9593 if (lines > 1)
9594 {
9595 // echo multi-line matches line-by-line
9596
9597 const char *from = begin;
9598 const char *to;
9599 size_t num = 1;
9600
9601 while ((to = static_cast<const char*>(memchr(from, '\n', size - (from - begin)))) != NULL)
9602 {
9603 out.str(match_ms);
9604 out.str(from, to - from);
9605 out.str(match_off);
9606 out.chr('\n');
9607
9608 out.header(pathname, partname, lineno + num, NULL, first + (to - begin) + 1, "|", false);
9609
9610 from = to + 1;
9611 ++num;
9612 }
9613
9614 size -= from - begin;
9615 begin = from;
9616 }
9617
9618 out.str(match_ms);
9619 out.str(begin, size);
9620 out.str(match_off);
9621 }
9622
9623 if (lines == 1)
9624 {
9625 restline_data += last - restline_last;
9626 restline_size -= last - restline_last;
9627 restline_last = last;
9628 }
9629 else
9630 {
9631 const char *eol = matcher->eol(true); // warning: call eol() before end()
9632 const char *end = matcher->end();
9633
9634 binary = flag_hex || (!flag_text && is_binary(end, eol - end));
9635
9636 if (hex && !binary)
9637 out.dump.done();
9638 else if (!hex && binary)
9639 out.nl();
9640
9641 if (hex != binary && !flag_no_header)
9642 out.header(pathname, partname, lineno + lines - 1, matcher, last, flag_separator, binary);
9643
9644 hex = binary;
9645
9646 if (flag_ungroup)
9647 {
9648 if (binary)
9649 {
9650 out.dump.hex(Output::Dump::HEX_LINE, matcher->last(), end, eol - end);
9651 out.dump.done();
9652 }
9653 else
9654 {
9655 if (eol > end)
9656 {
9657 bool lf_only = end[eol - end - 1] == '\n';
9658 eol -= lf_only;
9659 if (eol > end)
9660 {
9661 out.str(color_sl);
9662 out.str(end, eol - end);
9663 out.str(color_off);
9664 }
9665 out.nl(lf_only);
9666 }
9667 else if (matcher->hit_end())
9668 {
9669 out.nl();
9670 }
9671 else
9672 {
9673 out.check_flush();
9674 }
9675 }
9676 }
9677 else
9678 {
9679 restline.assign(end, eol - end);
9680 restline_data = restline.c_str();
9681 restline_size = restline.size();
9682 restline_last = last;
9683 }
9684
9685 lineno += lines - 1;
9686 }
9687 }
9688 }
9689 }
9690
9691 context_handler.set_after_lineno(lineno + 1);
9692 }
9693
9694 if (restline_data != NULL)
9695 {
9696 if (binary)
9697 {
9698 out.dump.hex(Output::Dump::HEX_LINE, restline_last, restline_data, restline_size);
9699 }
9700 else
9701 {
9702 bool lf_only = false;
9703 if (restline_size > 0)
9704 {
9705 lf_only = restline_data[restline_size - 1] == '\n';
9706 restline_size -= lf_only;
9707 if (restline_size > 0)
9708 {
9709 out.str(color_sl);
9710 out.str(restline_data, restline_size);
9711 out.str(color_off);
9712 }
9713 }
9714 out.nl(lf_only);
9715 }
9716
9717 restline_data = NULL;
9718 }
9719
9720 // get the remaining context
9721 context = matcher->after();
9722
9723 if (context.len > 0)
9724 context_handler(*matcher, context.buf, context.len, context.num);
9725
9726 if (binfile || (binary && !flag_hex && !flag_with_hex))
9727 {
9728 if (flag_binary_without_match)
9729 matches = 0;
9730 else if (matches > 0)
9731 out.binary_file_matches(pathname, partname);
9732 }
9733
9734 if (binary)
9735 out.dump.done();
9736 }
9737 else
9738 {
9739 // options -A, -B, -C with -v
9740
9741 if (!init_read())
9742 goto exit_search;
9743
9744 // InvertContextGrepHandler requires lineno to be set precisely, i.e. after skipping --range lines
9745 size_t lineno = flag_min_line > 0 ? flag_min_line - 1 : 0;
9746 size_t last_lineno = 0;
9747 size_t after = flag_after_context;
9748 bool binfile = !flag_text && !flag_hex && !flag_with_hex && init_is_binary();
9749 bool hex = false;
9750 bool binary = false;
9751 bool stop = false;
9752
9753 // to display the rest of the matching line
9754 const char *restline_data = NULL;
9755 size_t restline_size = 0;
9756 size_t restline_last = 0;
9757
9758 // construct event handler functor with captured *this and some of the locals
9759 InvertContextGrepHandler invert_context_handler(*this, pathname, lineno, binfile, hex, binary, matches, stop, restline_data, restline_size, restline_last);
9760
9761 // register an event handler functor to display non-matching lines
9762 matcher->set_handler(&invert_context_handler);
9763
9764 // to get the context from the any_line handler explicitly
9765 reflex::AbstractMatcher::Context context;
9766
9767 while (matcher->find())
9768 {
9769 size_t current_lineno = matcher->lineno();
9770 size_t lines = matcher->lines();
9771
9772 if (last_lineno + 1 >= current_lineno)
9773 after += lines;
9774 else if (last_lineno != current_lineno)
9775 after = 0;
9776
9777 if (last_lineno != current_lineno)
9778 {
9779 if (restline_data != NULL)
9780 {
9781 if (binary)
9782 {
9783 out.dump.hex(Output::Dump::HEX_CONTEXT_LINE, restline_last, restline_data, restline_size);
9784 }
9785 else
9786 {
9787 bool lf_only = false;
9788 if (restline_size > 0)
9789 {
9790 lf_only = restline_data[restline_size - 1] == '\n';
9791 restline_size -= lf_only;
9792 if (restline_size > 0)
9793 {
9794 out.str(color_cx);
9795 out.str(restline_data, restline_size);
9796 out.str(color_off);
9797 }
9798 }
9799 out.nl(lf_only);
9800 }
9801
9802 restline_data = NULL;
9803 }
9804
9805 const char *eol = matcher->eol(true); // warning: call eol() before bol() and end()
9806 const char *bol = matcher->bol();
9807
9808 // check CNF AND/OR/NOT matching
9809 if (matchers != NULL && !cnf_matching(bol, eol))
9810 continue;
9811
9812 // get the lines before the matched line
9813 context = matcher->before();
9814
9815 if (context.len > 0)
9816 {
9817 invert_context_handler(*matcher, context.buf, context.len, context.num);
9818
9819 if (matches > 0 && (binfile || (binary && !flag_hex && !flag_with_hex)))
9820 {
9821 if (flag_binary_without_match)
9822 {
9823 matches = 0;
9824 }
9825 else
9826 {
9827 out.binary_file_matches(pathname, partname);
9828 matches = 1;
9829 }
9830
9831 goto done_search;
9832 }
9833 }
9834
9835 lineno = current_lineno;
9836
9837 // --range: max line exceeded?
9838 if (flag_max_line > 0 && lineno > flag_max_line)
9839 break;
9840
9841 // --max-files: max reached?
9842 if (stop)
9843 goto exit_search;
9844
9845 /* logically OK but dead code because -v
9846 if (matches == 0 && !flag_invert_match && (!flag_files || matchers == NULL))
9847 {
9848 // --max-files: max reached?
9849 if (!Stats::found_part())
9850 goto exit_search;
9851 }
9852 */
9853
9854 // -m: max number of matches reached?
9855 if (flag_max_count > 0 && matches >= flag_max_count)
9856 break;
9857
9858 // output blocked?
9859 if (out.eof)
9860 goto exit_search;
9861
9862 if (after < flag_after_context)
9863 {
9864 binary = flag_hex || (!flag_text && is_binary(bol, eol - bol));
9865
9866 if (binfile || (binary && !flag_hex && !flag_with_hex))
9867 {
9868 if (flag_binary_without_match)
9869 {
9870 matches = 0;
9871 }
9872 else // if (flag_invert_match) is true
9873 {
9874 lineno = last_lineno = current_lineno + matcher->lines() - 1;
9875 continue;
9876 }
9877 /* logically OK but dead code because -v
9878 else
9879 {
9880 out.binary_file_matches(pathname, partname);
9881 matches = 1;
9882 }
9883 */
9884
9885 goto done_search;
9886 }
9887
9888 size_t border = matcher->border();
9889 size_t first = matcher->first();
9890 const char *begin = matcher->begin();
9891 const char *end = matcher->end();
9892 size_t size = matcher->size();
9893
9894 if (hex && !binary)
9895 out.dump.done();
9896
9897 if (!flag_no_header)
9898 out.header(pathname, partname, lineno, matcher, first, "-", binary);
9899
9900 hex = binary;
9901
9902 if (binary)
9903 {
9904 if (flag_hex || flag_with_hex)
9905 {
9906 out.dump.hex(Output::Dump::HEX_CONTEXT_LINE, first - border, bol, border);
9907 out.dump.hex(Output::Dump::HEX_CONTEXT_MATCH, first, begin, size);
9908
9909 restline.assign(end, eol - end);
9910 restline_data = restline.c_str();
9911 restline_size = restline.size();
9912 restline_last = matcher->last();
9913 }
9914 }
9915 else
9916 {
9917 out.str(color_cx);
9918 out.str(bol, border);
9919 out.str(color_off);
9920
9921 if (lines > 1)
9922 {
9923 // echo multi-line matches line-by-line
9924
9925 const char *from = begin;
9926 const char *to;
9927 size_t num = 1;
9928
9929 while ((to = static_cast<const char*>(memchr(from, '\n', size - (from - begin)))) != NULL)
9930 {
9931 out.str(match_mc);
9932 out.str(from, to - from);
9933 out.str(match_off);
9934 out.chr('\n');
9935
9936 out.header(pathname, partname, lineno + num, NULL, first + (to - begin) + 1, "-", false);
9937
9938 from = to + 1;
9939 ++num;
9940 }
9941
9942 size -= from - begin;
9943 begin = from;
9944 }
9945
9946 out.str(match_mc);
9947 out.str(begin, size);
9948 out.str(match_off);
9949
9950 restline.assign(end, eol - end);
9951 restline_data = restline.c_str();
9952 restline_size = restline.size();
9953 restline_last = matcher->last();
9954 }
9955 }
9956 else if (flag_before_context > 0)
9957 {
9958 binary = flag_hex || (!flag_text && is_binary(bol, eol - bol));
9959
9960 if (binfile || (binary && !flag_hex && !flag_with_hex))
9961 {
9962 if (flag_binary_without_match)
9963 {
9964 matches = 0;
9965 }
9966 else // if (flag_invert_match) is true
9967 {
9968 lineno = last_lineno = current_lineno + matcher->lines() - 1;
9969 continue;
9970 }
9971 /* logically OK but dead code because -v
9972 else
9973 {
9974 out.binary_file_matches(pathname, partname);
9975 matches = 1;
9976 }
9977 */
9978
9979 goto done_search;
9980 }
9981
9982 if (hex && !binary)
9983 out.dump.done();
9984 hex = binary;
9985
9986 const char *begin = matcher->begin();
9987 size_t size = matcher->size();
9988 size_t offset = matcher->first();
9989
9990 if (lines == 1)
9991 {
9992 invert_context_handler.add_before_context_line(bol, eol, matcher->columno(), offset - (begin - bol));
9993 invert_context_handler.add_before_context_match(begin - bol, size, offset);
9994 }
9995 else
9996 {
9997 // add lines to the before context
9998
9999 const char *from = begin;
10000 const char *to;
10001
10002 while ((to = static_cast<const char*>(memchr(from, '\n', eol - from))) != NULL)
10003 {
10004 if (from == begin)
10005 {
10006 invert_context_handler.add_before_context_line(bol, to + 1, matcher->columno(), offset - (begin - bol));
10007 invert_context_handler.add_before_context_match(begin - bol, to - from + 1, offset);
10008 }
10009 else
10010 {
10011 invert_context_handler.add_before_context_line(from, to + 1, 1, offset);
10012 invert_context_handler.add_before_context_match(0, to + 1 < from + size ? to - from + 1 : size, offset);
10013 }
10014
10015 size -= to - from + 1;
10016 offset += to - from + 1;
10017 from = to + 1;
10018 }
10019 }
10020 }
10021 }
10022 else if (after < flag_after_context)
10023 {
10024 size_t size = matcher->size();
10025
10026 if (size > 0)
10027 {
10028 if (lines > 1 || colorize)
10029 {
10030 size_t first = matcher->first();
10031 size_t last = matcher->last();
10032 const char *begin = matcher->begin();
10033
10034 if (binary)
10035 {
10036 out.dump.hex(Output::Dump::HEX_CONTEXT_LINE, restline_last, restline_data, first - restline_last);
10037 out.dump.hex(Output::Dump::HEX_CONTEXT_MATCH, first, begin, size);
10038 }
10039 else
10040 {
10041 out.str(color_cx);
10042 out.str(restline_data, first - restline_last);
10043 out.str(color_off);
10044
10045 if (lines > 1)
10046 {
10047 // echo multi-line matches line-by-line
10048
10049 const char *from = begin;
10050 const char *to;
10051 size_t num = 1;
10052
10053 while ((to = static_cast<const char*>(memchr(from, '\n', size - (from - begin)))) != NULL)
10054 {
10055 out.str(match_mc);
10056 out.str(from, to - from);
10057 out.str(match_off);
10058 out.chr('\n');
10059
10060 out.header(pathname, partname, lineno + num, NULL, first + (to - begin) + 1, "-", false);
10061
10062 from = to + 1;
10063 ++num;
10064 }
10065
10066 size -= from - begin;
10067 begin = from;
10068 }
10069
10070 out.str(match_mc);
10071 out.str(begin, size);
10072 out.str(match_off);
10073 }
10074
10075 if (lines == 1)
10076 {
10077 restline_data += last - restline_last;
10078 restline_size -= last - restline_last;
10079 restline_last = last;
10080 }
10081 else
10082 {
10083 const char *eol = matcher->eol(true); // warning: call eol() before end()
10084 const char *end = matcher->end();
10085
10086 binary = flag_hex || (!flag_text && is_binary(end, eol - end));
10087
10088 if (hex && !binary)
10089 out.dump.done();
10090 else if (!hex && binary)
10091 out.nl();
10092
10093 if (hex != binary && !flag_no_header)
10094 out.header(pathname, partname, lineno + lines - 1, matcher, last, "-", binary);
10095
10096 hex = binary;
10097
10098 restline.assign(end, eol - end);
10099 restline_data = restline.c_str();
10100 restline_size = restline.size();
10101 restline_last = last;
10102 }
10103 }
10104 }
10105 }
10106 else
10107 {
10108 if (restline_data != NULL)
10109 {
10110 if (binary)
10111 {
10112 out.dump.hex(Output::Dump::HEX_CONTEXT_LINE, restline_last, restline_data, restline_size);
10113 }
10114 else
10115 {
10116 bool lf_only = false;
10117 if (restline_size > 0)
10118 {
10119 lf_only = restline_data[restline_size - 1] == '\n';
10120 restline_size -= lf_only;
10121 if (restline_size > 0)
10122 {
10123 out.str(color_cx);
10124 out.str(restline_data, restline_size);
10125 out.str(color_off);
10126 }
10127 }
10128 out.nl(lf_only);
10129 }
10130
10131 restline_data = NULL;
10132 }
10133
10134 if (flag_before_context > 0)
10135 {
10136 const char *eol = matcher->eol(true); // warning: call eol() before bol()
10137 const char *bol = matcher->bol();
10138 const char *begin = matcher->begin();
10139 size_t size = matcher->size();
10140 size_t offset = matcher->first();
10141
10142 if (lines == 1)
10143 {
10144 invert_context_handler.add_before_context_match(begin - bol, size, offset);
10145 }
10146 else
10147 {
10148 // add lines to the before context
10149
10150 const char *end = matcher->end();
10151
10152 binary = flag_hex || (!flag_text && is_binary(end, eol - end));
10153
10154 if (binfile || (binary && !flag_hex && !flag_with_hex))
10155 {
10156 if (flag_binary_without_match)
10157 {
10158 matches = 0;
10159 }
10160 else // if (flag_invert_match) is true
10161 {
10162 lineno = last_lineno = current_lineno + matcher->lines() - 1;
10163 continue;
10164 }
10165 /* logically OK but dead code because -v
10166 else
10167 {
10168 out.binary_file_matches(pathname, partname);
10169 matches = 1;
10170 }
10171 */
10172
10173 goto done_search;
10174 }
10175
10176 if (hex && !binary)
10177 out.dump.done();
10178 hex = binary;
10179
10180 const char *from = begin;
10181 const char *to;
10182
10183 while ((to = static_cast<const char*>(memchr(from, '\n', eol - from))) != NULL)
10184 {
10185 if (from == begin)
10186 {
10187 invert_context_handler.add_before_context_match(begin - bol, to - from + 1, offset);
10188 }
10189 else
10190 {
10191 invert_context_handler.add_before_context_line(from, to + 1, 1, offset);
10192 invert_context_handler.add_before_context_match(0, to + 1 < from + size ? to - from + 1 : size, offset);
10193 }
10194
10195 size -= to - from + 1;
10196 offset += to - from + 1;
10197 from = to + 1;
10198 }
10199 }
10200 }
10201 }
10202
10203 lineno = last_lineno = current_lineno + lines - 1;
10204 }
10205
10206 if (restline_data != NULL)
10207 {
10208 if (binary)
10209 {
10210 out.dump.hex(Output::Dump::HEX_CONTEXT_LINE, restline_last, restline_data, restline_size);
10211 }
10212 else
10213 {
10214 bool lf_only = false;
10215 if (restline_size > 0)
10216 {
10217 lf_only = restline_data[restline_size - 1] == '\n';
10218 restline_size -= lf_only;
10219 if (restline_size > 0)
10220 {
10221 out.str(color_cx);
10222 out.str(restline_data, restline_size);
10223 out.str(color_off);
10224 }
10225 }
10226 out.nl(lf_only);
10227 }
10228
10229 restline_data = NULL;
10230 }
10231
10232 // get the remaining context
10233 context = matcher->after();
10234
10235 if (context.len > 0)
10236 invert_context_handler(*matcher, context.buf, context.len, context.num);
10237
10238 if (matches > 0 && (binfile || (binary && !flag_hex && !flag_with_hex)))
10239 {
10240 if (flag_binary_without_match)
10241 matches = 0;
10242 else
10243 out.binary_file_matches(pathname, partname);
10244 }
10245
10246 if (binary)
10247 out.dump.done();
10248 }
10249
10250 done_search:
10251
10252 // --files: check if all CNF conditions are met globally to launch output or reset matches
10253 if (flag_files && matchers != NULL)
10254 if (!cnf_satisfied())
10255 matches = 0;
10256
10257 // any matches in this file or archive?
10258 if (matches > 0)
10259 matched = true;
10260
10261 // --break: add a line break when applicable
10262 if (flag_break && (matches > 0 || flag_any_line) && !flag_quiet && !flag_files_with_matches && !flag_count && flag_format == NULL)
10263 out.nl();
10264 }
10265
10266 catch (EXIT_SEARCH&)
10267 {
10268 // --files: cnf_matching() rejected a file, no need to search this file any further
10269 }
10270
10271 catch (...)
10272 {
10273 // this should never happen
10274 warning("exception while searching", pathname);
10275 }
10276
10277 exit_search:
10278
10279 // flush and release output to allow other workers to output results
10280 out.release();
10281
10282 // close file or -z: loop over next extracted archive parts, when applicable
10283 } while (close_file(pathname));
10284
10285 // this file or archive has a match
10286 if (matched)
10287 Stats::found_file();
10288 }
10289
10290 // read globs from a file and split them into files or dirs to include or exclude
split_globs(FILE * file,std::vector<std::string> & files,std::vector<std::string> & dirs)10291 void split_globs(FILE *file, std::vector<std::string>& files, std::vector<std::string>& dirs)
10292 {
10293 // read globs from the specified file or files
10294 reflex::BufferedInput input(file);
10295 std::string line;
10296
10297 while (true)
10298 {
10299 // read the next line
10300 if (getline(input, line))
10301 break;
10302
10303 // trim white space from either end
10304 trim(line);
10305
10306 // add glob to files or dirs using gitignore glob pattern rules
10307 if (!line.empty() && line.front() != '#')
10308 {
10309 if (line.front() != '!' || line.size() > 1)
10310 {
10311 if (line.back() == '/')
10312 {
10313 if (line.size() > 1)
10314 line.pop_back();
10315 dirs.emplace_back(line);
10316 }
10317 else
10318 {
10319 files.emplace_back(line);
10320 }
10321 }
10322 }
10323 }
10324 }
10325
10326 // display format with option --format-begin and --format-end
format(const char * format,size_t matches)10327 void format(const char *format, size_t matches)
10328 {
10329 const char *sep = NULL;
10330 size_t len = 0;
10331 const char *s = format;
10332 while (*s != '\0')
10333 {
10334 const char *a = NULL;
10335 const char *t = s;
10336 while (*s != '\0' && *s != '%')
10337 ++s;
10338 fwrite(t, 1, s - t, output);
10339 if (*s == '\0' || *(s + 1) == '\0')
10340 break;
10341 ++s;
10342 if (*s == '[')
10343 {
10344 a = ++s;
10345 while (*s != '\0' && *s != ']')
10346 ++s;
10347 if (*s == '\0' || *(s + 1) == '\0')
10348 break;
10349 ++s;
10350 }
10351 int c = *s;
10352 switch (c)
10353 {
10354 case 'T':
10355 if (flag_initial_tab)
10356 {
10357 if (a)
10358 fwrite(a, 1, s - a - 1, output);
10359 fputc('\t', output);
10360 }
10361 break;
10362
10363 case 'S':
10364 if (matches > 1)
10365 {
10366 if (a)
10367 fwrite(a, 1, s - a - 1, output);
10368 if (sep != NULL)
10369 fwrite(sep, 1, len, output);
10370 else
10371 fputs(flag_separator, output);
10372 }
10373 break;
10374
10375 case '$':
10376 sep = a;
10377 len = s - a - 1;
10378 break;
10379
10380 case 't':
10381 fputc('\t', output);
10382 break;
10383
10384 case 's':
10385 if (sep != NULL)
10386 fwrite(sep, 1, len, output);
10387 else
10388 fputs(flag_separator, output);
10389 break;
10390
10391 case '~':
10392 #ifdef OS_WIN
10393 fputc('\r', output);
10394 #endif
10395 fputc('\n', output);
10396 break;
10397
10398 case 'm':
10399 fprintf(output, "%zu", matches);
10400 break;
10401
10402 case '<':
10403 if (matches <= 1 && a)
10404 fwrite(a, 1, s - a - 1, output);
10405 break;
10406
10407 case '>':
10408 if (matches > 1 && a)
10409 fwrite(a, 1, s - a - 1, output);
10410 break;
10411
10412 case ',':
10413 case ':':
10414 case ';':
10415 case '|':
10416 if (matches > 1)
10417 fputc(c, output);
10418 break;
10419
10420 default:
10421 fputc(c, output);
10422 }
10423 ++s;
10424 }
10425 }
10426
10427 // trim white space from either end of the line
trim(std::string & line)10428 void trim(std::string& line)
10429 {
10430 size_t len = line.length();
10431 size_t pos;
10432
10433 for (pos = 0; pos < len && isspace(line.at(pos)); ++pos)
10434 continue;
10435
10436 if (pos > 0)
10437 line.erase(0, pos);
10438
10439 len -= pos;
10440
10441 for (pos = len; pos > 0 && isspace(line.at(pos - 1)); --pos)
10442 continue;
10443
10444 if (len > pos)
10445 line.erase(pos, len - pos);
10446 }
10447
10448 // trim path separators from an argv[] argument - important: modifies the argv[] string
trim_pathname_arg(const char * arg)10449 void trim_pathname_arg(const char *arg)
10450 {
10451 // remove trailing path separators after the drive prefix and path, if any - note: this truncates argv[] strings
10452 const char *path = strchr(arg, ':');
10453 if (path != NULL)
10454 ++path;
10455 else
10456 path = arg;
10457 size_t len = strlen(path);
10458 while (len > 1 && path[--len] == PATHSEPCHR)
10459 const_cast<char*>(path)[len] = '\0';
10460 }
10461
10462 // convert GREP_COLORS and set the color substring to the ANSI SGR codes
set_color(const char * colors,const char * parameter,char color[COLORLEN])10463 void set_color(const char *colors, const char *parameter, char color[COLORLEN])
10464 {
10465 if (colors != NULL)
10466 {
10467 const char *s = strstr(colors, parameter);
10468
10469 // check if substring parameter is present in colors
10470 if (s != NULL)
10471 {
10472 s += 3;
10473 char *t = color + 2;
10474
10475 #ifdef WITH_EASY_GREP_COLORS
10476
10477 // foreground colors: k=black, r=red, g=green, y=yellow b=blue, m=magenta, c=cyan, w=white
10478 // background colors: K=black, R=red, G=green, Y=yellow B=blue, M=magenta, C=cyan, W=white
10479 // bright colors: +k, +r, +g, +y, +b, +m, +c, +w, +K, +R, +G, +Y, +B, +M, +C, +W
10480 // modifiers: h=highlight, u=underline, i=invert, f=faint, n=normal, H=highlight off, U=underline off, I=invert off
10481 // semicolons are not required and abbreviations can be mixed with numeric ANSI SGR codes
10482
10483 uint8_t offset = 30;
10484 bool sep = false;
10485
10486 while (*s != '\0' && *s != ':' && t - color < COLORLEN - 6)
10487 {
10488 if (isdigit(*s))
10489 {
10490 if (sep)
10491 *t++ = ';';
10492 if (offset == 90)
10493 {
10494 *t++ = '1';
10495 *t++ = ';';
10496 offset = 30;
10497 }
10498 *t++ = *s++;
10499 while (isdigit(*s) && t - color < COLORLEN - 2)
10500 *t++ = *s++;
10501 sep = true;
10502 continue;
10503 }
10504
10505 if (*s == '+')
10506 {
10507 offset = 90;
10508 }
10509 else if (*s == 'n')
10510 {
10511 if (sep)
10512 *t++ = ';';
10513 *t++ = '0';
10514 sep = true;
10515 }
10516 else if (*s == 'h')
10517 {
10518 if (sep)
10519 *t++ = ';';
10520 *t++ = '1';
10521 sep = true;
10522 }
10523 else if (*s == 'H')
10524 {
10525 if (sep)
10526 *t++ = ';';
10527 *t++ = '2';
10528 *t++ = '1';
10529 offset = 30;
10530 sep = true;
10531 }
10532 else if (*s == 'f')
10533 {
10534 if (sep)
10535 *t++ = ';';
10536 *t++ = '2';
10537 sep = true;
10538 }
10539 else if (*s == 'u')
10540 {
10541 if (sep)
10542 *t++ = ';';
10543 *t++ = '4';
10544 sep = true;
10545 }
10546 else if (*s == 'U')
10547 {
10548 if (sep)
10549 *t++ = ';';
10550 *t++ = '2';
10551 *t++ = '4';
10552 sep = true;
10553 }
10554 else if (*s == 'i')
10555 {
10556 if (sep)
10557 *t++ = ';';
10558 *t++ = '7';
10559 sep = true;
10560 }
10561 else if (*s == 'I')
10562 {
10563 if (sep)
10564 *t++ = ';';
10565 *t++ = '2';
10566 *t++ = '7';
10567 sep = true;
10568 }
10569 else if (*s == ',' || *s == ';' || isspace(*s))
10570 {
10571 if (sep)
10572 *t++ = ';';
10573 sep = false;
10574 }
10575 else
10576 {
10577 const char *c = "krgybmcw KRGYBMCW";
10578 const char *k = strchr(c, *s);
10579
10580 if (k != NULL)
10581 {
10582 if (sep)
10583 *t++ = ';';
10584 uint8_t n = offset + static_cast<uint8_t>(k - c);
10585 if (n >= 100)
10586 {
10587 *t++ = '1';
10588 n -= 100;
10589 }
10590 *t++ = '0' + n / 10;
10591 *t++ = '0' + n % 10;
10592 offset = 30;
10593 sep = true;
10594 }
10595 }
10596
10597 ++s;
10598 }
10599
10600 #else
10601
10602 // traditional grep SGR parameters
10603 while ((*s == ';' || isdigit(*s)) && t - color < COLORLEN - 2)
10604 *t++ = *s++;
10605
10606 #endif
10607
10608 if (t > color + 2)
10609 {
10610 color[0] = '\033';
10611 color[1] = '[';
10612 *t++ = 'm';
10613 *t++ = '\0';
10614 }
10615 else
10616 {
10617 color[0] = '\0';
10618 }
10619 }
10620 }
10621 }
10622
10623 // convert unsigned decimal to non-negative size_t, produce error when conversion fails
strtonum(const char * string,const char * message)10624 size_t strtonum(const char *string, const char *message)
10625 {
10626 char *rest = NULL;
10627 size_t size = static_cast<size_t>(strtoull(string, &rest, 10));
10628 if (rest == NULL || *rest != '\0')
10629 usage(message, string);
10630 return size;
10631 }
10632
10633 // convert unsigned decimal to positive size_t, produce error when conversion fails or when the value is zero
strtopos(const char * string,const char * message)10634 size_t strtopos(const char *string, const char *message)
10635 {
10636 size_t size = strtonum(string, message);
10637 if (size == 0)
10638 usage(message, string);
10639 return size;
10640 }
10641
10642 // convert one or two comma-separated unsigned decimals specifying a range to positive size_t, produce error when conversion fails or when the range is invalid
strtopos2(const char * string,size_t & pos1,size_t & pos2,const char * message,bool optional_first)10643 void strtopos2(const char *string, size_t& pos1, size_t& pos2, const char *message, bool optional_first)
10644 {
10645 char *rest = const_cast<char*>(string);
10646 if (*string != ',')
10647 pos1 = static_cast<size_t>(strtoull(string, &rest, 10));
10648 else
10649 pos1 = 0;
10650 if (*rest == ',')
10651 pos2 = static_cast<size_t>(strtoull(rest + 1, &rest, 10));
10652 else if (optional_first)
10653 pos2 = pos1, pos1 = 0;
10654 else
10655 pos2 = 0;
10656 if (rest == NULL || *rest != '\0' || (pos2 > 0 && pos1 > pos2))
10657 usage(message, string);
10658 }
10659
10660 // convert unsigned decimal MAX fuzzy with optional prefix '+', '-', or '~' to positive size_t
strtofuzzy(const char * string,const char * message)10661 size_t strtofuzzy(const char *string, const char *message)
10662 {
10663 char *rest = NULL;
10664 size_t flags = 0;
10665 size_t max = 1;
10666 while (*string != '\0')
10667 {
10668 switch (*string)
10669 {
10670 case '+':
10671 flags |= reflex::FuzzyMatcher::INS;
10672 ++string;
10673 break;
10674 case '-':
10675 flags |= reflex::FuzzyMatcher::DEL;
10676 ++string;
10677 break;
10678 case '~':
10679 flags |= reflex::FuzzyMatcher::SUB;
10680 ++string;
10681 break;
10682 default:
10683 max = static_cast<size_t>(strtoull(string, &rest, 10));
10684 if (max == 0 || max > 255 || rest == NULL || *rest != '\0')
10685 usage(message, string);
10686 string = rest;
10687 }
10688 }
10689 return max | flags;
10690 }
10691
10692 // display diagnostic message
usage(const char * message,const char * arg,const char * valid)10693 void usage(const char *message, const char *arg, const char *valid)
10694 {
10695 std::cerr << "ugrep: " << message << (arg != NULL ? arg : "");
10696 if (valid != NULL)
10697 std::cerr << ", did you mean " << valid << "?";
10698 std::cerr << std::endl;
10699 if (!flag_usage_warnings)
10700 exit(EXIT_ERROR);
10701 ++warnings;
10702 }
10703
10704 // display usage/help information and exit
help(std::ostream & out)10705 void help(std::ostream& out)
10706 {
10707 out <<
10708 "Usage: ugrep [OPTIONS] [PATTERN] [-f FILE] [-e PATTERN] [FILE ...]\n\n\
10709 -A NUM, --after-context=NUM\n\
10710 Print NUM lines of trailing context after matching lines. Places\n\
10711 a --group-separator between contiguous groups of matches. See also\n\
10712 options -B, -C, and -y.\n\
10713 -a, --text\n\
10714 Process a binary file as if it were text. This is equivalent to\n\
10715 the --binary-files=text option. This option might output binary\n\
10716 garbage to the terminal, which can have problematic consequences if\n\
10717 the terminal driver interprets some of it as commands.\n\
10718 --and [[-e] PATTERN] ... -e PATTERN\n\
10719 Specify additional patterns to match. Patterns must be specified\n\
10720 with -e. Each -e PATTERN following this option is considered an\n\
10721 alternative pattern to match, i.e. each -e is interpreted as an OR\n\
10722 pattern. For example, -e A -e B --and -e C -e D matches lines with\n\
10723 (`A' or `B') and (`C' or `D'). Note that multiple -e PATTERN are\n\
10724 alternations that bind more tightly together than --and. Option\n\
10725 --stats displays the search patterns applied. See also options\n\
10726 --not, --andnot, --bool, --files, and --lines.\n\
10727 --andnot [[-e] PATTERN] ...\n\
10728 Combines --and --not. See also options --and, --not, and --bool.\n\
10729 -B NUM, --before-context=NUM\n\
10730 Print NUM lines of leading context before matching lines. Places\n\
10731 a --group-separator between contiguous groups of matches. See also\n\
10732 options -A, -C, and -y.\n\
10733 -b, --byte-offset\n\
10734 The offset in bytes of a matched line is displayed in front of the\n\
10735 respective matched line. If -u is specified, displays the offset\n\
10736 for each pattern matched on the same line. Byte offsets are exact\n\
10737 for ASCII, UTF-8, and raw binary input. Otherwise, the byte offset\n\
10738 in the UTF-8 normalized input is displayed.\n\
10739 --binary-files=TYPE\n\
10740 Controls searching and reporting pattern matches in binary files.\n\
10741 TYPE can be `binary', `without-match`, `text`, `hex`, and\n\
10742 `with-hex'. The default is `binary' to search binary files and to\n\
10743 report a match without displaying the match. `without-match'\n\
10744 ignores binary matches. `text' treats all binary files as text,\n\
10745 which might output binary garbage to the terminal, which can have\n\
10746 problematic consequences if the terminal driver interprets some of\n\
10747 it as commands. `hex' reports all matches in hexadecimal.\n\
10748 `with-hex' only reports binary matches in hexadecimal, leaving text\n\
10749 matches alone. A match is considered binary when matching a zero\n\
10750 byte or invalid UTF. Short options are -a, -I, -U, -W, and -X.\n\
10751 --bool, -%\n\
10752 Specifies Boolean query patterns. A Boolean query pattern is\n\
10753 composed of `AND', `OR', `NOT' operators and grouping with `(' `)'.\n\
10754 Spacing between subpatterns is the same as `AND', `|' is the same\n\
10755 as `OR', and a `-' is the same as `NOT'. The `OR' operator binds\n\
10756 more tightly than `AND'. For example, --bool 'A|B C|D' matches\n\
10757 lines with (`A' or `B') and (`C' or `D'), --bool 'A -B' matches\n\
10758 lines with `A' and not `B'. Operators `AND', `OR', `NOT' require\n\
10759 proper spacing. For example, --bool 'A OR B AND C OR D' matches\n\
10760 lines with (`A' or `B') and (`C' or `D'), --bool 'A AND NOT B'\n\
10761 matches lines with `A' without `B'. Quoted subpatterns are matched\n\
10762 literally as strings. For example, --bool 'A \"AND\"|\"OR\"' matches\n\
10763 lines with `A' and also either `AND' or `OR'. Parenthesis are used\n\
10764 for grouping. For example, --bool '(A B)|C' matches lines with `A'\n\
10765 and `B', or lines with `C'. Note that all subpatterns in a Boolean\n\
10766 query pattern are regular expressions, unless option -F is used.\n\
10767 Options -E, -F, -G, -P, and -Z can be combined with --bool to match\n\
10768 subpatterns as strings or regular expressions (-E is the default.)\n\
10769 This option does not apply to -f FILE patterns. Option --stats\n\
10770 displays the search patterns applied. See also options --and,\n\
10771 --andnot, --not, --files, and --lines.\n\
10772 --break\n\
10773 Adds a line break between results from different files.\n\
10774 -C NUM, --context=NUM\n\
10775 Print NUM lines of leading and trailing context surrounding each\n\
10776 match. Places a --group-separator between contiguous groups of\n\
10777 matches. See also options -A, -B, and -y.\n\
10778 -c, --count\n\
10779 Only a count of selected lines is written to standard output.\n\
10780 If -o or -u is specified, counts the number of patterns matched.\n\
10781 If -v is specified, counts the number of non-matching lines.\n\
10782 --color[=WHEN], --colour[=WHEN]\n\
10783 Mark up the matching text with the expression stored in the\n\
10784 GREP_COLOR or GREP_COLORS environment variable. WHEN can be\n\
10785 `never', `always', or `auto', where `auto' marks up matches only\n\
10786 when output on a terminal. The default is `auto'.\n\
10787 --colors=COLORS, --colours=COLORS\n\
10788 Use COLORS to mark up text. COLORS is a colon-separated list of\n\
10789 one or more parameters `sl=' (selected line), `cx=' (context line),\n\
10790 `mt=' (matched text), `ms=' (match selected), `mc=' (match\n\
10791 context), `fn=' (file name), `ln=' (line number), `cn=' (column\n\
10792 number), `bn=' (byte offset), `se=' (separator). Parameter values\n\
10793 are ANSI SGR color codes or `k' (black), `r' (red), `g' (green),\n\
10794 `y' (yellow), `b' (blue), `m' (magenta), `c' (cyan), `w' (white).\n\
10795 Upper case specifies background colors. A `+' qualifies a color as\n\
10796 bright. A foreground and a background color may be combined with\n\
10797 font properties `n' (normal), `f' (faint), `h' (highlight), `i'\n\
10798 (invert), `u' (underline). Parameter `hl' enables file name\n\
10799 hyperlinks. Parameter `rv' reverses the `sl=' and `cx=' parameters\n\
10800 with option -v. Selectively overrides GREP_COLORS.\n\
10801 --config[=FILE], ---[FILE]\n\
10802 Use configuration FILE. The default FILE is `.ugrep'. The working\n\
10803 directory is checked first for FILE, then the home directory. The\n\
10804 options specified in the configuration FILE are parsed first,\n\
10805 followed by the remaining options specified on the command line.\n\
10806 --confirm\n\
10807 Confirm actions in -Q query mode. The default is confirm.\n\
10808 --cpp\n\
10809 Output file matches in C++. See also options --format and -u.\n\
10810 --csv\n\
10811 Output file matches in CSV. If -H, -n, -k, or -b is specified,\n\
10812 additional values are output. See also options --format and -u.\n\
10813 -D ACTION, --devices=ACTION\n\
10814 If an input file is a device, FIFO or socket, use ACTION to process\n\
10815 it. By default, ACTION is `skip', which means that devices are\n\
10816 silently skipped. If ACTION is `read', devices read just as if\n\
10817 they were ordinary files.\n\
10818 -d ACTION, --directories=ACTION\n\
10819 If an input file is a directory, use ACTION to process it. By\n\
10820 default, ACTION is `skip', i.e., silently skip directories unless\n\
10821 specified on the command line. If ACTION is `read', warn when\n\
10822 directories are read as input. If ACTION is `recurse', read all\n\
10823 files under each directory, recursively, following symbolic links\n\
10824 only if they are on the command line. This is equivalent to the -r\n\
10825 option. If ACTION is `dereference-recurse', read all files under\n\
10826 each directory, recursively, following symbolic links. This is\n\
10827 equivalent to the -R option.\n\
10828 --depth=[MIN,][MAX], -1, -2 ... -9, --10, --11 ...\n\
10829 Restrict recursive searches from MIN to MAX directory levels deep,\n\
10830 where -1 (--depth=1) searches the specified path without recursing\n\
10831 into subdirectories. Note that -3 -5, -3-5, or -35 searches 3 to 5\n\
10832 levels deep. Enables -R if -R or -r is not specified.\n\
10833 --dotall\n\
10834 Dot `.' in regular expressions matches anything, including newline.\n\
10835 Note that `.*' matches all input and should not be used.\n\
10836 -E, --extended-regexp\n\
10837 Interpret patterns as extended regular expressions (EREs). This is\n\
10838 the default.\n\
10839 -e PATTERN, --regexp=PATTERN\n\
10840 Specify a PATTERN used during the search of the input: an input\n\
10841 line is selected if it matches any of the specified patterns.\n\
10842 Note that longer patterns take precedence over shorter patterns.\n\
10843 This option is most useful when multiple -e options are used to\n\
10844 specify multiple patterns, when a pattern begins with a dash (`-'),\n\
10845 to specify a pattern after option -f or after the FILE arguments.\n\
10846 --encoding=ENCODING\n\
10847 The encoding format of the input, where ENCODING can be:";
10848 for (int i = 0; encoding_table[i].format != NULL; ++i)
10849 out << (i == 0 ? "" : ",") << (i % 4 ? " " : "\n ") << "`" << encoding_table[i].format << "'";
10850 out << ".\n\
10851 --exclude=GLOB\n\
10852 Skip files whose name matches GLOB using wildcard matching, same as\n\
10853 -g ^GLOB. GLOB can use **, *, ?, and [...] as wildcards, and \\ to\n\
10854 quote a wildcard or backslash character literally. When GLOB\n\
10855 contains a `/', full pathnames are matched. Otherwise basenames\n\
10856 are matched. When GLOB ends with a `/', directories are excluded\n\
10857 as if --exclude-dir is specified. Otherwise files are excluded.\n\
10858 Note that --exclude patterns take priority over --include patterns.\n\
10859 GLOB should be quoted to prevent shell globbing. This option may\n\
10860 be repeated.\n\
10861 --exclude-dir=GLOB\n\
10862 Exclude directories whose name matches GLOB from recursive\n\
10863 searches, same as -g ^GLOB/. GLOB can use **, *, ?, and [...] as\n\
10864 wildcards, and \\ to quote a wildcard or backslash character\n\
10865 literally. When GLOB contains a `/', full pathnames are matched.\n\
10866 Otherwise basenames are matched. Note that --exclude-dir patterns\n\
10867 take priority over --include-dir patterns. GLOB should be quoted\n\
10868 to prevent shell globbing. This option may be repeated.\n\
10869 --exclude-from=FILE\n\
10870 Read the globs from FILE and skip files and directories whose name\n\
10871 matches one or more globs. A glob can use **, *, ?, and [...] as\n\
10872 wildcards, and \\ to quote a wildcard or backslash character\n\
10873 literally. When a glob contains a `/', full pathnames are matched.\n\
10874 Otherwise basenames are matched. When a glob ends with a `/',\n\
10875 directories are excluded as if --exclude-dir is specified.\n\
10876 Otherwise files are excluded. A glob starting with a `!' overrides\n\
10877 previously-specified exclusions by including matching files. Lines\n\
10878 starting with a `#' and empty lines in FILE are ignored. When FILE\n\
10879 is a `-', standard input is read. This option may be repeated.\n\
10880 --exclude-fs=MOUNTS\n\
10881 Exclude file systems specified by MOUNTS from recursive searches,\n\
10882 MOUNTS is a comma-separated list of mount points or pathnames of\n\
10883 directories on file systems. Note that --exclude-fs mounts take\n\
10884 priority over --include-fs mounts. This option may be repeated.\n"
10885 #ifndef HAVE_STATVFS
10886 "\
10887 This option is not available in this build configuration of ugrep.\n"
10888 #endif
10889 "\
10890 -F, --fixed-strings\n\
10891 Interpret pattern as a set of fixed strings, separated by newlines,\n\
10892 any of which is to be matched. This makes ugrep behave as fgrep.\n\
10893 If a PATTERN is specified, or -e PATTERN or -N PATTERN, then this\n\
10894 option has no effect on -f FILE patterns to allow -f FILE patterns\n\
10895 to narrow or widen the scope of the PATTERN search.\n\
10896 -f FILE, --file=FILE\n\
10897 Read newline-separated patterns from FILE. White space in patterns\n\
10898 is significant. Empty lines in FILE are ignored. If FILE does not\n\
10899 exist, the GREP_PATH environment variable is used as path to FILE.\n"
10900 #ifdef GREP_PATH
10901 "\
10902 If that fails, looks for FILE in " GREP_PATH ".\n"
10903 #endif
10904 "\
10905 When FILE is a `-', standard input is read. Empty files contain no\n\
10906 patterns; thus nothing is matched. This option may be repeated.\n"
10907 #ifndef OS_WIN
10908 "\
10909 --filter=COMMANDS\n\
10910 Filter files through the specified COMMANDS first before searching.\n\
10911 COMMANDS is a comma-separated list of `exts:command [option ...]',\n\
10912 where `exts' is a comma-separated list of filename extensions and\n\
10913 `command' is a filter utility. The filter utility should read from\n\
10914 standard input and write to standard output. Files matching one of\n\
10915 `exts' are filtered. When `exts' is `*', files with non-matching\n\
10916 extensions are filtered. One or more `option' separated by spacing\n\
10917 may be specified, which are passed verbatim to the command. A `%'\n\
10918 as `option' expands into the pathname to search. For example,\n\
10919 --filter='pdf:pdftotext % -' searches PDF files. The `%' expands\n\
10920 into a `-' when searching standard input. Option --label=.ext may\n\
10921 be used to specify extension `ext' when searching standard input.\n\
10922 --filter-magic-label=[+]LABEL:MAGIC\n\
10923 Associate LABEL with files whose signature \"magic bytes\" match the\n\
10924 MAGIC regex pattern. Only files that have no filename extension\n\
10925 are labeled, unless +LABEL is specified. When LABEL matches an\n\
10926 extension specified in --filter=COMMANDS, the corresponding command\n\
10927 is invoked. This option may be repeated.\n"
10928 #endif
10929 "\
10930 --format=FORMAT\n\
10931 Output FORMAT-formatted matches. For example --format='%f:%n:%O%~'\n\
10932 outputs matching lines `%O' with filename `%f` and line number `%n'\n\
10933 followed by a newline `%~'. Context options -A, -B, -C, and -y are\n\
10934 ignored. See `man ugrep' section FORMAT.\n\
10935 --free-space\n\
10936 Spacing (blanks and tabs) in regular expressions are ignored.\n\
10937 -G, --basic-regexp\n\
10938 Interpret pattern as a basic regular expression, i.e. make ugrep\n\
10939 behave as traditional grep.\n\
10940 -g GLOBS, --glob=GLOBS\n\
10941 Search only files whose name matches the specified comma-separated\n\
10942 list of GLOBS, same as --include='glob' for each `glob' in GLOBS.\n\
10943 When a `glob' is preceded by a `!' or a `^', skip files whose name\n\
10944 matches `glob', same as --exclude='glob'. When `glob' contains a\n\
10945 `/', full pathnames are matched. Otherwise basenames are matched.\n\
10946 When `glob' ends with a `/', directories are matched, same as\n\
10947 --include-dir='glob' and --exclude-dir='glob'. A leading `/'\n\
10948 matches the working directory. This option may be repeated and may\n\
10949 be combined with options -M, -O and -t to expand the recursive\n\
10950 search.\n\
10951 --group-separator[=SEP]\n\
10952 Use SEP as a group separator for context options -A, -B, and -C.\n\
10953 The default is a double hyphen (`--').\n\
10954 -H, --with-filename\n\
10955 Always print the filename with output lines. This is the default\n\
10956 when there is more than one file to search.\n\
10957 -h, --no-filename\n\
10958 Never print filenames with output lines. This is the default\n\
10959 when there is only one file (or only standard input) to search.\n\
10960 --heading, -+\n\
10961 Group matches per file. Adds a heading and a line break between\n\
10962 results from different files.\n\
10963 --help [WHAT], -? [WHAT]\n\
10964 Display a help message, specifically on WHAT when specified.\n\
10965 --hexdump=[1-8][a][b][c][h]\n\
10966 Output matches in 1 to 8 columns of 8 hexadecimal octets. The\n\
10967 default is 2 columns or 16 octets per line. Option `a' outputs a\n\
10968 `*' for all hex lines that are identical to the previous hex line,\n\
10969 `b' removes all space breaks, `c' removes the character column and\n\
10970 `h' removes hex spacing. Enables -X if -W or -X is not specified.\n\
10971 --hidden, -.\n\
10972 Search "
10973 #ifdef OS_WIN
10974 "Windows system and "
10975 #endif
10976 "hidden files and directories.\n\
10977 --hyperlink\n\
10978 Hyperlinks are enabled for file names when colors are enabled.\n\
10979 Same as --colors=hl.\n\
10980 -I, --ignore-binary\n\
10981 Ignore matches in binary files. This option is equivalent to the\n\
10982 --binary-files=without-match option.\n\
10983 -i, --ignore-case\n\
10984 Perform case insensitive matching. By default, ugrep is case\n\
10985 sensitive. By default, this option applies to ASCII letters only.\n\
10986 Use options -P and -i for Unicode case insensitive matching.\n\
10987 --ignore-files[=FILE]\n\
10988 Ignore files and directories matching the globs in each FILE that\n\
10989 is encountered in recursive searches. The default FILE is\n\
10990 `" DEFAULT_IGNORE_FILE "'. Matching files and directories located in the\n\
10991 directory of a FILE's location and in directories below are ignored\n\
10992 by temporarily overriding the --exclude and --exclude-dir globs,\n\
10993 as if --exclude-from=FILE is locally enforced. Globbing is the\n\
10994 same as --exclude-from=FILE and supports gitignore syntax, but\n\
10995 directories are not automatically excluded from searches (use a\n\
10996 glob ending with a `/' to identify directories to ignore, same as\n\
10997 git). Files and directories explicitly specified as command line\n\
10998 arguments are never ignored. This option may be repeated.\n\
10999 --include=GLOB\n\
11000 Search only files whose name matches GLOB using wildcard matching,\n\
11001 same as -g GLOB. GLOB can use **, *, ?, and [...] as wildcards,\n\
11002 and \\ to quote a wildcard or backslash character literally. When\n\
11003 GLOB contains a `/', full pathnames are matched. Otherwise\n\
11004 basenames are matched. When GLOB ends with a `/', directories are\n\
11005 included as if --include-dir is specified. Otherwise files are\n\
11006 included. Note that --exclude patterns take priority over\n\
11007 --include patterns. GLOB should be quoted to prevent shell\n\
11008 globbing. This option may be repeated.\n\
11009 --include-dir=GLOB\n\
11010 Only directories whose name matches GLOB are included in recursive\n\
11011 searches, same as -g GLOB/. GLOB can use **, *, ?, and [...] as\n\
11012 wildcards, and \\ to quote a wildcard or backslash character\n\
11013 literally. When GLOB contains a `/', full pathnames are matched.\n\
11014 Otherwise basenames are matched. Note that --exclude-dir patterns\n\
11015 take priority over --include-dir patterns. GLOB should be quoted\n\
11016 to prevent shell globbing. This option may be repeated.\n\
11017 --include-from=FILE\n\
11018 Read the globs from FILE and search only files and directories\n\
11019 whose name matches one or more globs. A glob can use **, *, ?, and\n\
11020 [...] as wildcards, and \\ to quote a wildcard or backslash\n\
11021 character literally. When a glob contains a `/', full pathnames\n\
11022 are matched. Otherwise basenames are matched. When a glob ends\n\
11023 with a `/', directories are included as if --include-dir is\n\
11024 specified. Otherwise files are included. A glob starting with a\n\
11025 `!' overrides previously-specified inclusions by excluding matching\n\
11026 files. Lines starting with a `#' and empty lines in FILE are\n\
11027 ignored. When FILE is a `-', standard input is read. This option\n\
11028 may be repeated.\n\
11029 --include-fs=MOUNTS\n\
11030 Only file systems specified by MOUNTS are included in recursive\n\
11031 searches. MOUNTS is a comma-separated list of mount points or\n\
11032 pathnames of directories on file systems. --include-fs=. restricts\n\
11033 recursive searches to the file system of the working directory\n\
11034 only. Note that --exclude-fs mounts take priority over\n\
11035 --include-fs mounts. This option may be repeated.\n"
11036 #ifndef HAVE_STATVFS
11037 "\
11038 This option is not available in this build configuration of ugrep.\n"
11039 #endif
11040 "\
11041 -J NUM, --jobs=NUM\n\
11042 Specifies the number of threads spawned to search files. By\n\
11043 default an optimum number of threads is spawned to search files\n\
11044 simultaneously. -J1 disables threading: files are searched in the\n\
11045 same order as specified.\n\
11046 -j, --smart-case\n\
11047 Perform case insensitive matching like option -i, unless a pattern\n\
11048 is specified with a literal ASCII upper case letter.\n\
11049 --json\n\
11050 Output file matches in JSON. If -H, -n, -k, or -b is specified,\n\
11051 additional values are output. See also options --format and -u.\n\
11052 -K FIRST[,LAST], --range=FIRST[,LAST]\n\
11053 Start searching at line FIRST, stop at line LAST when specified.\n\
11054 -k, --column-number\n\
11055 The column number of a matched pattern is displayed in front of the\n\
11056 respective matched line, starting at column 1. Tabs are expanded\n\
11057 when columns are counted, see also option --tabs.\n\
11058 -L, --files-without-match\n\
11059 Only the names of files not containing selected lines are written\n\
11060 to standard output. Pathnames are listed once per file searched.\n\
11061 If the standard input is searched, the string ``(standard input)''\n\
11062 is written.\n\
11063 -l, --files-with-matches\n\
11064 Only the names of files containing selected lines are written to\n\
11065 standard output. ugrep will only search a file until a match has\n\
11066 been found, making searches potentially less expensive. Pathnames\n\
11067 are listed once per file searched. If the standard input is\n\
11068 searched, the string ``(standard input)'' is written.\n\
11069 --label=LABEL\n\
11070 Displays the LABEL value when input is read from standard input\n\
11071 where a file name would normally be printed in the output.\n\
11072 Associates a filename extension with standard input when LABEL has\n\
11073 a suffix. The default value is `(standard input)'.\n\
11074 --line-buffered\n\
11075 Force output to be line buffered instead of block buffered.\n\
11076 --lines\n\
11077 Apply Boolean queries to match lines, the opposite of --files.\n\
11078 This is the default Boolean query mode to match specific lines.\n\
11079 -M MAGIC, --file-magic=MAGIC\n\
11080 Only files matching the signature pattern MAGIC are searched. The\n\
11081 signature \"magic bytes\" at the start of a file are compared to\n\
11082 the MAGIC regex pattern. When matching, the file will be searched.\n\
11083 When MAGIC is preceded by a `!' or a `^', skip files with matching\n\
11084 MAGIC signatures. This option may be repeated and may be combined\n\
11085 with options -O and -t to expand the search. Every file on the\n\
11086 search path is read, making searches potentially more expensive.\n\
11087 -m NUM, --max-count=NUM\n\
11088 Stop reading the input after NUM matches in each input file.\n\
11089 --match\n\
11090 Match all input. Same as specifying an empty pattern to search.\n\
11091 --max-files=NUM\n\
11092 Restrict the number of files matched to NUM. Note that --sort or\n\
11093 -J1 may be specified to produce replicable results. If --sort is\n\
11094 specified, the number of threads spawned is limited to NUM.\n\
11095 --mmap[=MAX]\n\
11096 Use memory maps to search files. By default, memory maps are used\n\
11097 under certain conditions to improve performance. When MAX is\n\
11098 specified, use up to MAX mmap memory per thread.\n\
11099 -N PATTERN, --neg-regexp=PATTERN\n\
11100 Specify a negative PATTERN used during the search of the input:\n\
11101 an input line is selected only if it matches any of the specified\n\
11102 patterns unless a subpattern of PATTERN. Same as -e (?^PATTERN).\n\
11103 Negative PATTERN matches are essentially removed before any other\n\
11104 patterns are matched. Note that longer patterns take precedence\n\
11105 over shorter patterns. This option may be repeated.\n\
11106 -n, --line-number\n\
11107 Each output line is preceded by its relative line number in the\n\
11108 file, starting at line 1. The line number counter is reset for\n\
11109 each file processed.\n\
11110 --no-group-separator\n\
11111 Removes the group separator line from the output for context\n\
11112 options -A, -B, and -C.\n\
11113 --not [-e] PATTERN\n\
11114 Specifies that PATTERN should not match. Note that -e A --not -e B\n\
11115 matches lines with `A' or lines without a `B'. To match lines with\n\
11116 `A' that have no `B', specify -e A --andnot -e B. Option --stats\n\
11117 displays the search patterns applied. See also options --and,\n\
11118 --andnot, --bool, --files, and --lines.\n\
11119 -O EXTENSIONS, --file-extension=EXTENSIONS\n\
11120 Search only files whose filename extensions match the specified\n\
11121 comma-separated list of EXTENSIONS, same as --include='*.ext' for\n\
11122 each `ext' in EXTENSIONS. When an `ext' is preceded by a `!' or a\n\
11123 `^', skip files whose filename extensions matches `ext', same as\n\
11124 --exclude='*.ext'. This option may be repeated and may be combined\n\
11125 with options -g, -M and -t to expand the recursive search.\n\
11126 -o, --only-matching\n\
11127 Print only the matching part of lines. When multiple lines match,\n\
11128 the line numbers with option -n are displayed using `|' as the\n\
11129 field separator for each additional line matched by the pattern.\n\
11130 If -u is specified, ungroups multiple matches on the same line.\n\
11131 This option cannot be combined with options -A, -B, -C, -v, and -y.\n\
11132 --only-line-number\n\
11133 The line number of the matching line in the file is output without\n\
11134 displaying the match. The line number counter is reset for each\n\
11135 file processed.\n\
11136 --files\n\
11137 Apply Boolean queries to match files, the opposite of --lines. A\n\
11138 file matches if all Boolean conditions are satisfied by the lines\n\
11139 matched in the file. For example, --files -e A --and -e B -e C\n\
11140 --andnot -e D matches a file if some lines match `A' and some lines\n\
11141 match (`B' or `C') and no line in the file matches `D'. May also\n\
11142 be specified as --files --bool 'A B|C -D'. Option -v cannot be\n\
11143 specified with --files. See also options --and, --andnot, --not,\n\
11144 --bool and --lines.\n\
11145 -P, --perl-regexp\n\
11146 Interpret PATTERN as a Perl regular expression"
11147 #if defined(HAVE_PCRE2)
11148 " using PCRE2.\n"
11149 #elif defined(HAVE_BOOST_REGEX)
11150 " using Boost.Regex.\n"
11151 #else
11152 ".\n\
11153 This option is not available in this build configuration of ugrep.\n"
11154 #endif
11155 "\
11156 Note that Perl pattern matching differs from the default grep POSIX\n\
11157 pattern matching.\n\
11158 -p, --no-dereference\n\
11159 If -R or -r is specified, no symbolic links are followed, even when\n\
11160 they are specified on the command line.\n\
11161 --pager[=COMMAND]\n\
11162 When output is sent to the terminal, uses COMMAND to page through\n\
11163 the output. The default COMMAND is `" DEFAULT_PAGER_COMMAND "'. Enables --heading\n\
11164 and --line-buffered.\n\
11165 --pretty\n\
11166 When output is sent to a terminal, enables --color, --heading, -n,\n\
11167 --sort and -T when not explicitly disabled or set.\n\
11168 -Q[DELAY], --query[=DELAY]\n\
11169 Query mode: user interface to perform interactive searches. This\n\
11170 mode requires an ANSI capable terminal. An optional DELAY argument\n\
11171 may be specified to reduce or increase the response time to execute\n\
11172 searches after the last key press, in increments of 100ms, where\n\
11173 the default is 5 (0.5s delay). No whitespace may be given between\n\
11174 -Q and its argument DELAY. Initial patterns may be specified with\n\
11175 -e PATTERN, i.e. a PATTERN argument requires option -e. Press F1\n\
11176 or CTRL-Z to view the help screen. Press F2 or CTRL-Y to invoke a\n\
11177 command to view or edit the file shown at the top of the screen.\n\
11178 The command can be specified with option --view, or defaults to\n\
11179 environment variable PAGER if defined, or EDITOR. Press Tab and\n\
11180 Shift-Tab to navigate directories and to select a file to search.\n\
11181 Press Enter to select lines to output. Press ALT-l for option -l\n\
11182 to list files, ALT-n for -n, etc. Non-option commands include\n\
11183 ALT-] to increase fuzziness and ALT-} to increase context. Enables\n\
11184 --heading. See also options --confirm and --view.\n\
11185 -q, --quiet, --silent\n\
11186 Quiet mode: suppress all output. ugrep will only search until a\n\
11187 match has been found.\n\
11188 -R, --dereference-recursive\n\
11189 Recursively read all files under each directory. Follow all\n\
11190 symbolic links, unlike -r. When -J1 is specified, files are\n\
11191 searched in the same order as specified. Note that when no FILE\n\
11192 arguments are specified and input is read from a terminal,\n\
11193 recursive searches are performed as if -R is specified.\n\
11194 -r, --recursive\n\
11195 Recursively read all files under each directory, following symbolic\n\
11196 links only if they are on the command line. When -J1 is specified,\n\
11197 files are searched in the same order as specified.\n\
11198 -S, --dereference\n\
11199 If -r is specified, all symbolic links are followed, like -R. The\n\
11200 default is not to follow symbolic links.\n\
11201 -s, --no-messages\n\
11202 Silent mode: nonexistent and unreadable files are ignored, i.e.\n\
11203 their error messages are suppressed.\n\
11204 --save-config[=FILE]\n\
11205 Save configuration FILE. By default `.ugrep' is saved. If FILE is\n\
11206 a `-', write the configuration to standard output.\n\
11207 --separator[=SEP]\n\
11208 Use SEP as field separator between file name, line number, column\n\
11209 number, byte offset, and the matched line. The default is a colon\n\
11210 (`:').\n\
11211 --sort[=KEY]\n\
11212 Displays matching files in the order specified by KEY in recursive\n\
11213 searches. KEY can be `name' to sort by pathname (default), `best'\n\
11214 to sort by best match with option -Z (sort by best match requires\n\
11215 two passes over the input files), `size' to sort by file size,\n\
11216 `used' to sort by last access time, `changed' to sort by last\n\
11217 modification time, and `created' to sort by creation time. Sorting\n\
11218 is reversed with `rname', `rbest', `rsize', `rused', `rchanged', or\n\
11219 `rcreated'. Archive contents are not sorted. Subdirectories are\n\
11220 sorted and displayed after matching files. FILE arguments are\n\
11221 searched in the same order as specified. Normally ugrep displays\n\
11222 matches in no particular order to improve performance.\n\
11223 --stats\n\
11224 Output statistics on the number of files and directories searched,\n\
11225 and the inclusion and exclusion constraints applied.\n\
11226 -T, --initial-tab\n\
11227 Add a tab space to separate the file name, line number, column\n\
11228 number, and byte offset with the matched line.\n\
11229 -t TYPES, --file-type=TYPES\n\
11230 Search only files associated with TYPES, a comma-separated list of\n\
11231 file types. Each file type corresponds to a set of filename\n\
11232 extensions passed to option -O and filenames passed to option -g.\n\
11233 For capitalized file types, the search is expanded to include files\n\
11234 with matching file signature magic bytes, as if passed to option\n\
11235 -M. When a type is preceded by a `!' or a `^', excludes files of\n\
11236 the specified type. This option may be repeated. The possible\n\
11237 file types can be (where -tlist displays a detailed list):";
11238 for (int i = 0; type_table[i].type != NULL; ++i)
11239 out << (i == 0 ? "" : ",") << (i % 7 ? " " : "\n ") << "`" << type_table[i].type << "'";
11240 out << ".\n\
11241 --tabs[=NUM]\n\
11242 Set the tab size to NUM to expand tabs for option -k. The value of\n\
11243 NUM may be 1, 2, 4, or 8. The default tab size is 8.\n\
11244 --tag[=TAG[,END]]\n\
11245 Disables colors to mark up matches with TAG. END marks the end of\n\
11246 a match if specified, otherwise TAG. The default is `___'.\n\
11247 -U, --binary\n\
11248 Disables Unicode matching for binary file matching, forcing PATTERN\n\
11249 to match bytes, not Unicode characters. For example, -U '\\xa3'\n\
11250 matches byte A3 (hex) instead of the Unicode code point U+00A3\n\
11251 represented by the UTF-8 sequence C2 A3. See also option --dotall.\n\
11252 -u, --ungroup\n\
11253 Do not group multiple pattern matches on the same matched line.\n\
11254 Output the matched line again for each additional pattern match,\n\
11255 using `+' as the field separator.\n\
11256 -V, --version\n\
11257 Display version information and exit.\n\
11258 -v, --invert-match\n\
11259 Selected lines are those not matching any of the specified\n\
11260 patterns.\n\
11261 --view[=COMMAND]\n\
11262 Use COMMAND to view/edit a file in query mode when pressing CTRL-Y.\n\
11263 -W, --with-hex\n\
11264 Output binary matches in hexadecimal, leaving text matches alone.\n\
11265 This option is equivalent to the --binary-files=with-hex option.\n\
11266 -w, --word-regexp\n\
11267 The PATTERN is searched for as a word, such that the matching text\n\
11268 is preceded by a non-word character and is followed by a non-word\n\
11269 character. Word characters are letters, digits, and the\n\
11270 underscore. With option -P, word characters are Unicode letters,\n\
11271 digits, and underscore. This option has no effect if -x is also\n\
11272 specified. If a PATTERN is specified, or -e PATTERN or -N PATTERN,\n\
11273 then this option has no effect on -f FILE patterns to allow -f FILE\n\
11274 patterns to narrow or widen the scope of the PATTERN search.\n\
11275 -X, --hex\n\
11276 Output matches in hexadecimal. This option is equivalent to the\n\
11277 --binary-files=hex option. See also option --hexdump.\n\
11278 -x, --line-regexp\n\
11279 Select only those matches that exactly match the whole line, as if\n\
11280 the patterns are surrounded by ^ and $. If a PATTERN is specified,\n\
11281 or -e PATTERN or -N PATTERN, then this option has no effect on\n\
11282 -f FILE patterns to allow -f FILE patterns to narrow or widen the\n\
11283 scope of the PATTERN search.\n\
11284 --xml\n\
11285 Output file matches in XML. If -H, -n, -k, or -b is specified,\n\
11286 additional values are output. See also options --format and -u.\n\
11287 -Y, --empty\n\
11288 Permits empty matches. By default, empty matches are disabled,\n\
11289 unless a pattern begins with `^' or ends with `$'. With this\n\
11290 option, empty-matching patterns such as x? and x*, match all input,\n\
11291 not only lines containing the character `x'.\n\
11292 -y, --any-line\n\
11293 Any matching or non-matching line is output. Non-matching lines\n\
11294 are output with the `-' separator as context of the matching lines.\n\
11295 See also options -A, -B, and -C.\n\
11296 -Z[[+-~]MAX], --fuzzy[=[+-~]MAX]\n\
11297 Fuzzy mode: report approximate pattern matches within MAX errors.\n\
11298 By default, MAX is 1: one deletion, insertion or substitution is\n\
11299 allowed. When `+' and/or `-' precede MAX, only insertions and/or\n\
11300 deletions are allowed, respectively. When `~' precedes MAX,\n\
11301 substitution counts as one error. For example, -Z+~3 allows up to\n\
11302 three insertions or substitutions, but no deletions. The first\n\
11303 character of an approximate match always matches the begin of a\n\
11304 pattern. Option --sort=best orders matching files by best match.\n\
11305 No whitespace may be given between -Z and its argument.\n\
11306 -z, --decompress\n\
11307 Decompress files to search, when compressed. Archives (.cpio,\n\
11308 .pax, .tar and .zip) and compressed archives (e.g. .taz, .tgz,\n\
11309 .tpz, .tbz, .tbz2, .tb2, .tz2, .tlz, .txz, .tzst) are searched and\n\
11310 matching pathnames of files in archives are output in braces. If\n\
11311 -g, -O, -M, or -t is specified, searches files within archives\n\
11312 whose name matches globs, matches file name extensions, matches\n\
11313 file signature magic bytes, or matches file types, respectively.\n"
11314 #ifndef HAVE_LIBZ
11315 "\
11316 This option is not available in this build configuration of ugrep.\n"
11317 #else
11318 "\
11319 Supported compression formats: gzip (.gz), compress (.Z), zip"
11320 #ifdef HAVE_LIBBZ2
11321 ",\n\
11322 bzip2 (requires suffix .bz, .bz2, .bzip2, .tbz, .tbz2, .tb2, .tz2)"
11323 #endif
11324 #ifdef HAVE_LIBLZMA
11325 ",\n\
11326 lzma and xz (requires suffix .lzma, .tlz, .xz, .txz)"
11327 #endif
11328 #ifdef HAVE_LIBLZ4
11329 ",\n\
11330 lz4 (requires suffix .lz4)"
11331 #endif
11332 #ifdef HAVE_LIBZSTD
11333 ",\n\
11334 zstd (requires suffix .zst, .zstd, .tzst)"
11335 #endif
11336 ".\n"
11337 #endif
11338 "\
11339 -0, --null\n\
11340 Prints a zero-byte (NUL) after the file name. This option can be\n\
11341 used with commands such as `find -print0' and `xargs -0' to process\n\
11342 arbitrary file names.\n\
11343 \n\
11344 Long options may start with `--no-' to disable, when applicable.\n\
11345 \n\
11346 The ugrep utility exits with one of the following values:\n\
11347 0 One or more lines were selected.\n\
11348 1 No lines were selected.\n\
11349 >1 An error occurred.\n\
11350 \n\
11351 If -q or --quiet or --silent is used and a line is selected, the exit\n\
11352 status is 0 even if an error occurred.\n\n";
11353 }
11354
11355 // display helpful information for WHAT, if specified, and exit
help(const char * what)11356 void help(const char *what)
11357 {
11358 if (what == NULL)
11359 {
11360 help(std::cout);
11361 }
11362 else
11363 {
11364 if (*what == '=')
11365 ++what;
11366
11367 if (strncmp(what, "--no", 4) == 0)
11368 what += 4;
11369
11370 if (*what == '\0')
11371 {
11372 help(std::cout);
11373 }
11374 else
11375 {
11376 std::stringstream text;
11377 help(text);
11378 const std::string& str = text.str();
11379
11380 int found = 0;
11381
11382 for (int pass = 0; pass < 2; ++pass)
11383 {
11384 size_t pos = 0;
11385
11386 while (true)
11387 {
11388 size_t end = str.find("\n -", pos + 1);
11389
11390 if (end == std::string::npos)
11391 end = str.find("\n\n", pos + 1);
11392
11393 if (end == std::string::npos)
11394 break;
11395
11396 size_t nl = str.find('\n', pos + 1);
11397
11398 // roughly find a case-independent match of WHAT
11399 for (size_t i = pos + 5; i < (pass == 0 ? nl : end); ++i)
11400 {
11401 size_t j = 0;
11402
11403 for (j = 0; what[j] != '\0'; ++j)
11404 if (((what[j] ^ str.at(i + j)) & ~0x20) != 0)
11405 break;
11406
11407 if (what[j] == '\0')
11408 {
11409 if (pass == 0 ? i < nl: i > nl)
11410 {
11411 if (found == 0 && pass == 0)
11412 std::cout << "\nOptions and arguments:\n";
11413 else if (found == 1 && pass == 1)
11414 std::cout << "\n\nOther options:\n";
11415 else if (found == 0)
11416 std::cout << "\nNo matching option, other relevant options:\n";
11417
11418 std::cout << str.substr(pos, end - pos);
11419 found = pass + 1;
11420 }
11421 break;
11422 }
11423 }
11424
11425 pos = end;
11426 }
11427 }
11428
11429 if (found == 0)
11430 std::cout << "ugrep --help: nothing appropriate for " << what;
11431
11432 std::cout << "\n\n";
11433 }
11434 }
11435
11436 exit(EXIT_ERROR);
11437 }
11438
11439 // display version info
version()11440 void version()
11441 {
11442 #if defined(HAVE_PCRE2)
11443 uint32_t tmp = 0;
11444 #endif
11445 std::cout << "ugrep " UGREP_VERSION " " PLATFORM <<
11446 #if defined(HAVE_AVX512BW)
11447 (reflex::have_HW_AVX512BW() ? " +avx512" : (reflex::have_HW_AVX2() ? " +avx2" : reflex::have_HW_SSE2() ? " +sse2" : " (no sse2!)")) <<
11448 #elif defined(HAVE_AVX2)
11449 (reflex::have_HW_AVX2() ? " +avx2" : reflex::have_HW_SSE2() ? " +sse2" : " (no sse2!)") <<
11450 #elif defined(HAVE_SSE2)
11451 (reflex::have_HW_SSE2() ? " +sse2" : " (no sse2!)") <<
11452 #elif defined(HAVE_NEON)
11453 " +neon" <<
11454 #endif
11455 #if defined(HAVE_PCRE2)
11456 (pcre2_config(PCRE2_CONFIG_JIT, &tmp) >= 0 && tmp != 0 ? " +pcre2_jit" : " +pcre2") <<
11457 #elif defined(HAVE_BOOST_REGEX)
11458 " +boost_regex" <<
11459 #endif
11460 #ifdef HAVE_LIBZ
11461 " +zlib" <<
11462 #endif
11463 #ifdef HAVE_LIBBZ2
11464 " +bzip2" <<
11465 #endif
11466 #ifdef HAVE_LIBLZMA
11467 " +lzma" <<
11468 #endif
11469 #ifdef HAVE_LIBLZ4
11470 " +lz4" <<
11471 #endif
11472 #ifdef HAVE_LIBZSTD
11473 " +zstd" <<
11474 #endif
11475 "\n"
11476 "License BSD-3-Clause: <https://opensource.org/licenses/BSD-3-Clause>\n"
11477 "Written by Robert van Engelen and others: <https://github.com/Genivia/ugrep>" << std::endl;
11478 exit(EXIT_OK);
11479 }
11480
11481 // print to standard error: ... is a directory if -q is not specified
is_directory(const char * pathname)11482 void is_directory(const char *pathname)
11483 {
11484 if (!flag_no_messages)
11485 fprintf(stderr, "%sugrep: %s%s%s is a directory\n", color_off, color_high, pathname, color_off);
11486 }
11487
11488 #ifdef HAVE_LIBZ
11489 // print to standard error: cannot decompress message if -q is not specified
cannot_decompress(const char * pathname,const char * message)11490 void cannot_decompress(const char *pathname, const char *message)
11491 {
11492 if (!flag_no_messages)
11493 {
11494 fprintf(stderr, "%sugrep: %swarning:%s %scannot decompress %s:%s %s%s%s\n", color_off, color_warning, color_off, color_high, pathname, color_off, color_message, message ? message : "", color_off);
11495 ++warnings;
11496 }
11497 }
11498 #endif
11499
11500 // print to standard error: warning message if -q is not specified, assumes errno is set, like perror()
warning(const char * message,const char * arg)11501 void warning(const char *message, const char *arg)
11502 {
11503 if (!flag_no_messages)
11504 {
11505 // use safe strerror_s() instead of strerror() when available
11506 #if defined(__STDC_LIB_EXT1__) || defined(OS_WIN)
11507 char errmsg[256];
11508 strerror_s(errmsg, sizeof(errmsg), errno);
11509 #else
11510 const char *errmsg = strerror(errno);
11511 #endif
11512 fprintf(stderr, "%sugrep: %swarning:%s %s%s%s%s:%s %s%s%s\n", color_off, color_warning, color_off, color_high, message ? message : "", message ? " " : "", arg ? arg : "", color_off, color_message, errmsg, color_off);
11513 ++warnings;
11514 }
11515 }
11516
11517 // print to standard error: error message, assumes errno is set, like perror(), then exit
error(const char * message,const char * arg)11518 void error(const char *message, const char *arg)
11519 {
11520 // use safe strerror_s() instead of strerror() when available
11521 #if defined(__STDC_LIB_EXT1__) || defined(OS_WIN)
11522 char errmsg[256];
11523 strerror_s(errmsg, sizeof(errmsg), errno);
11524 #else
11525 const char *errmsg = strerror(errno);
11526 #endif
11527 fprintf(stderr, "%sugrep: %serror:%s %s%s%s%s:%s %s%s%s\n\n", color_off, color_error, color_off, color_high, message ? message : "", message ? " " : "", arg ? arg : "", color_off, color_message, errmsg, color_off);
11528 exit(EXIT_ERROR);
11529 }
11530
11531 // print to standard error: abort message with exception details, then exit
abort(const char * message)11532 void abort(const char *message)
11533 {
11534 fprintf(stderr, "%sugrep: %s%s%s\n\n", color_off, color_error, message, color_off);
11535 exit(EXIT_ERROR);
11536 }
11537
11538 // print to standard error: abort message with exception details, then exit
abort(const char * message,const std::string & what)11539 void abort(const char *message, const std::string& what)
11540 {
11541 fprintf(stderr, "%sugrep: %s%s%s%s%s%s\n\n", color_off, color_error, message ? message : "", color_off, color_high, what.c_str(), color_off);
11542 exit(EXIT_ERROR);
11543 }
11544