1 /**
2  * Copyright (c) 2007-2012, Timothy Stack
3  *
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * * Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * * Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  * * Neither the name of Timothy Stack nor the names of its contributors
15  * may be used to endorse or promote products derived from this software
16  * without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
22  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  *
29  * @file grep_proc.hh
30  */
31 
32 #ifndef grep_proc_hh
33 #define grep_proc_hh
34 
35 #include "config.h"
36 
37 #include <stdio.h>
38 #include <sys/types.h>
39 #include <unistd.h>
40 #include <poll.h>
41 
42 #ifdef HAVE_PCRE_H
43 #include <pcre.h>
44 #elif HAVE_PCRE_PCRE_H
45 #include <pcre/pcre.h>
46 #endif
47 
48 #include <deque>
49 #include <string>
50 #include <vector>
51 #include <exception>
52 
53 #include "pcrepp/pcrepp.hh"
54 #include "auto_fd.hh"
55 #include "auto_mem.hh"
56 #include "base/lnav_log.hh"
57 #include "strong_int.hh"
58 #include "line_buffer.hh"
59 
60 template<typename LineType>
61 class grep_proc;
62 
63 /**
64  * Data source for lines to be searched using a grep_proc.
65  */
66 template<typename LineType>
67 class grep_proc_source {
68 public:
69     virtual ~grep_proc_source() = default;
70 
register_proc(grep_proc<LineType> * proc)71     virtual void register_proc(grep_proc<LineType> *proc) {
72         this->gps_proc = proc;
73     }
74 
75     /**
76      * Get the value for a particular line in the source.
77      *
78      * @param line The line to retrieve.
79      * @param value_out The destination for the line value.
80      */
81     virtual bool grep_value_for_line(LineType line, std::string &value_out) = 0;
82 
grep_initial_line(LineType start,LineType highest)83     virtual LineType grep_initial_line(LineType start, LineType highest) {
84         if (start == -1) {
85             return highest;
86         }
87         return start;
88     };
89 
grep_next_line(LineType & line)90     virtual void grep_next_line(LineType &line) {
91         line = line + LineType(1);
92     };
93 
94     grep_proc<LineType> *gps_proc;
95 };
96 
97 /**
98  * Delegate interface for control messages from the grep_proc.
99  */
100 class grep_proc_control {
101 public:
102 
103     virtual ~grep_proc_control() = default;
104 
105     /** @param msg The error encountered while attempting the grep. */
grep_error(const std::string & msg)106     virtual void grep_error(const std::string& msg) { };
107 };
108 
109 /**
110  * Sink for matches produced by a grep_proc instance.
111  */
112 template<typename LineType>
113 class grep_proc_sink {
114 public:
115     virtual ~grep_proc_sink() = default;
116 
117     /** Called at the start of a new grep run. */
grep_begin(grep_proc<LineType> & gp,LineType start,LineType stop)118     virtual void grep_begin(grep_proc<LineType> &gp, LineType start, LineType stop) { };
119 
120     /** Called periodically between grep_begin and grep_end. */
grep_end_batch(grep_proc<LineType> & gp)121     virtual void grep_end_batch(grep_proc<LineType> &gp) { };
122 
123     /** Called at the end of a grep run. */
grep_end(grep_proc<LineType> & gp)124     virtual void grep_end(grep_proc<LineType> &gp) { };
125 
126     /**
127      * Called when a match is found on 'line' and between [start, end).
128      *
129      * @param line The line number that matched.
130      * @param start The offset within the line where the match begins.
131      * @param end The offset of the character after the last character in the
132      * match.
133      */
134     virtual void grep_match(grep_proc<LineType> &gp,
135                             LineType line,
136                             int start,
137                             int end) = 0;
138 
139     /**
140      * Called for each captured substring in the line.
141      *
142      * @param line The line number that matched.
143      * @param start The offset within the line where the capture begins.
144      * @param end The offset of the character after the last character in the
145      * capture.
146      * @param capture The captured substring itself.
147      */
grep_capture(grep_proc<LineType> & gp,LineType line,int start,int end,char * capture)148     virtual void grep_capture(grep_proc<LineType> &gp,
149                               LineType line,
150                               int start,
151                               int end,
152                               char *capture) { };
153 
grep_match_end(grep_proc<LineType> & gp,LineType line)154     virtual void grep_match_end(grep_proc<LineType> &gp, LineType line) { };
155 };
156 
157 /**
158  * "Grep" that runs in a separate process so it doesn't stall user-interaction.
159  * This class manages the child process and any interactions between the parent
160  * and child.  The source data to be matched comes from the grep_proc_source
161  * delegate and the results are sent to the grep_proc_sink delegate in the
162  * parent process.
163  *
164  * Note: The "grep" executable is not actually used, instead we use the pcre(3)
165  * library directly.
166  */
167 template<typename LineType>
168 class grep_proc {
169 public:
170     class error
171         : public std::exception {
172 public:
error(int err)173         error(int err)
174             : e_err(err) { };
175 
176         int e_err;
177     };
178 
179     /**
180      * Construct a grep_proc object.  You must call the start() method
181      * to fork off the child process and begin processing.
182      *
183      * @param code The pcre code to run over the lines of input.
184      * @param gps The source of the data to match.
185      */
186     grep_proc(pcre *code, grep_proc_source<LineType> &gps);
187 
188     virtual ~grep_proc();
189 
190     /** @param gpd The sink to send resuls to. */
set_sink(grep_proc_sink<LineType> * gpd)191     void set_sink(grep_proc_sink<LineType> *gpd)
192     {
193         this->gp_sink = gpd;
194     };
195 
196     grep_proc &invalidate();
197 
198     /** @param gpd The sink to send results to. */
set_control(grep_proc_control * gpc)199     void set_control(grep_proc_control *gpc)
200     {
201         this->gp_control = gpc;
202     };
203 
204     /** @return The sink to send results to. */
get_sink()205     grep_proc_sink<LineType> *get_sink() { return this->gp_sink; };
206 
207     /**
208      * Queue a request to search the input between the given line numbers.
209      *
210      * @param start The line number to start the search at.
211      * @param stop The line number to stop the search at (exclusive) or -1 to
212      * read until the end-of-file.
213      */
queue_request(LineType start=LineType (0),LineType stop=LineType (-1))214     grep_proc &queue_request(LineType start = LineType(0),
215                              LineType stop = LineType(-1))
216     {
217         require(start != -1 || stop == -1);
218         require(stop == -1 || start < stop);
219 
220         this->gp_queue.emplace_back(start, stop);
221         if (this->gp_sink) {
222             this->gp_sink->grep_begin(*this, start, stop);
223         }
224 
225         return *this;
226     };
227 
228     /**
229      * Start the search requests that have been queued up with queue_request.
230      */
231     void start();
232 
update_poll_set(std::vector<struct pollfd> & pollfds)233     void update_poll_set(std::vector<struct pollfd> &pollfds)
234     {
235         if (this->gp_line_buffer.get_fd() != -1) {
236             pollfds.push_back((struct pollfd) {
237                     this->gp_line_buffer.get_fd(),
238                     POLLIN,
239                     0
240             });
241         }
242         if (this->gp_err_pipe != -1) {
243             pollfds.push_back((struct pollfd) {
244                     this->gp_err_pipe,
245                     POLLIN,
246                     0
247             });
248         }
249     };
250 
251     /**
252      * Check the fd_set to see if there is any new data to be processed.
253      *
254      * @param ready_rfds The set of ready-to-read file descriptors.
255      */
256     void check_poll_set(const std::vector<struct pollfd> &pollfds);
257 
258     /** Check the invariants for this object. */
invariant()259     bool invariant()
260     {
261         if (this->gp_child_started) {
262             require(this->gp_child > 0);
263             require(this->gp_line_buffer.get_fd() != -1);
264         }
265         else {
266             /* require(this->gp_child == -1); XXX doesnt work with static destr */
267             require(this->gp_line_buffer.get_fd() == -1);
268         }
269 
270         return true;
271     };
272 
273 protected:
274 
275     /**
276      * Dispatch a line received from the child.
277      */
278     void dispatch_line(char *line);
279 
280     /**
281      * Free any resources used by the object and make sure the child has been
282      * terminated.
283      */
284     void cleanup();
285 
286     void child_loop();
287 
child_init()288     virtual void child_init() { };
289 
child_batch()290     virtual void child_batch() { fflush(stdout); };
291 
child_term()292     virtual void child_term() { fflush(stdout); };
293 
294     virtual void handle_match(int line,
295                               std::string &line_value,
296                               int off,
297                               int *matches,
298                               int count);
299 
300     pcrepp             gp_pcre;
301     grep_proc_source<LineType> &gp_source;        /*< The data source delegate. */
302 
303     auto_fd     gp_err_pipe;             /*< Standard error from the child. */
304     line_buffer gp_line_buffer;          /*< Standard out from the child. */
305     file_range  gp_pipe_range;
306 
307     pid_t gp_child{-1};                     /*<
308                                          * The child's pid or zero in the
309                                          * child.
310                                          */
311     bool     gp_child_started{false};          /*< True if the child was start()'d. */
312     size_t gp_child_queue_size{0};
313 
314     /** The queue of search requests. */
315     std::deque<std::pair<LineType, LineType> > gp_queue;
316     LineType gp_last_line{0};           /*<
317                                          * The last line number received from
318                                          * the child.  For multiple matches,
319                                          * the line number is only sent once.
320                                          */
321     LineType gp_highest_line;        /*< The highest numbered line processed
322                                          * by the grep child process.  This
323                                          * value is used when the start line
324                                          * for a queued request is -1.
325                                          */
326     grep_proc_sink<LineType> *gp_sink{nullptr};         /*< The sink delegate. */
327     grep_proc_control *gp_control{nullptr};      /*< The control delegate. */
328 };
329 #endif
330