1 /** 2 * Copyright (c) 2007-2012, Timothy Stack 3 * 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * * Redistributions of source code must retain the above copyright notice, this 10 * list of conditions and the following disclaimer. 11 * * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 * * Neither the name of Timothy Stack nor the names of its contributors 15 * may be used to endorse or promote products derived from this software 16 * without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY 19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY 22 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 * 29 * @file grep_proc.hh 30 */ 31 32 #ifndef grep_proc_hh 33 #define grep_proc_hh 34 35 #include "config.h" 36 37 #include <stdio.h> 38 #include <sys/types.h> 39 #include <unistd.h> 40 #include <poll.h> 41 42 #ifdef HAVE_PCRE_H 43 #include <pcre.h> 44 #elif HAVE_PCRE_PCRE_H 45 #include <pcre/pcre.h> 46 #endif 47 48 #include <deque> 49 #include <string> 50 #include <vector> 51 #include <exception> 52 53 #include "pcrepp/pcrepp.hh" 54 #include "auto_fd.hh" 55 #include "auto_mem.hh" 56 #include "base/lnav_log.hh" 57 #include "strong_int.hh" 58 #include "line_buffer.hh" 59 60 template<typename LineType> 61 class grep_proc; 62 63 /** 64 * Data source for lines to be searched using a grep_proc. 65 */ 66 template<typename LineType> 67 class grep_proc_source { 68 public: 69 virtual ~grep_proc_source() = default; 70 register_proc(grep_proc<LineType> * proc)71 virtual void register_proc(grep_proc<LineType> *proc) { 72 this->gps_proc = proc; 73 } 74 75 /** 76 * Get the value for a particular line in the source. 77 * 78 * @param line The line to retrieve. 79 * @param value_out The destination for the line value. 80 */ 81 virtual bool grep_value_for_line(LineType line, std::string &value_out) = 0; 82 grep_initial_line(LineType start,LineType highest)83 virtual LineType grep_initial_line(LineType start, LineType highest) { 84 if (start == -1) { 85 return highest; 86 } 87 return start; 88 }; 89 grep_next_line(LineType & line)90 virtual void grep_next_line(LineType &line) { 91 line = line + LineType(1); 92 }; 93 94 grep_proc<LineType> *gps_proc; 95 }; 96 97 /** 98 * Delegate interface for control messages from the grep_proc. 99 */ 100 class grep_proc_control { 101 public: 102 103 virtual ~grep_proc_control() = default; 104 105 /** @param msg The error encountered while attempting the grep. */ grep_error(const std::string & msg)106 virtual void grep_error(const std::string& msg) { }; 107 }; 108 109 /** 110 * Sink for matches produced by a grep_proc instance. 111 */ 112 template<typename LineType> 113 class grep_proc_sink { 114 public: 115 virtual ~grep_proc_sink() = default; 116 117 /** Called at the start of a new grep run. */ grep_begin(grep_proc<LineType> & gp,LineType start,LineType stop)118 virtual void grep_begin(grep_proc<LineType> &gp, LineType start, LineType stop) { }; 119 120 /** Called periodically between grep_begin and grep_end. */ grep_end_batch(grep_proc<LineType> & gp)121 virtual void grep_end_batch(grep_proc<LineType> &gp) { }; 122 123 /** Called at the end of a grep run. */ grep_end(grep_proc<LineType> & gp)124 virtual void grep_end(grep_proc<LineType> &gp) { }; 125 126 /** 127 * Called when a match is found on 'line' and between [start, end). 128 * 129 * @param line The line number that matched. 130 * @param start The offset within the line where the match begins. 131 * @param end The offset of the character after the last character in the 132 * match. 133 */ 134 virtual void grep_match(grep_proc<LineType> &gp, 135 LineType line, 136 int start, 137 int end) = 0; 138 139 /** 140 * Called for each captured substring in the line. 141 * 142 * @param line The line number that matched. 143 * @param start The offset within the line where the capture begins. 144 * @param end The offset of the character after the last character in the 145 * capture. 146 * @param capture The captured substring itself. 147 */ grep_capture(grep_proc<LineType> & gp,LineType line,int start,int end,char * capture)148 virtual void grep_capture(grep_proc<LineType> &gp, 149 LineType line, 150 int start, 151 int end, 152 char *capture) { }; 153 grep_match_end(grep_proc<LineType> & gp,LineType line)154 virtual void grep_match_end(grep_proc<LineType> &gp, LineType line) { }; 155 }; 156 157 /** 158 * "Grep" that runs in a separate process so it doesn't stall user-interaction. 159 * This class manages the child process and any interactions between the parent 160 * and child. The source data to be matched comes from the grep_proc_source 161 * delegate and the results are sent to the grep_proc_sink delegate in the 162 * parent process. 163 * 164 * Note: The "grep" executable is not actually used, instead we use the pcre(3) 165 * library directly. 166 */ 167 template<typename LineType> 168 class grep_proc { 169 public: 170 class error 171 : public std::exception { 172 public: error(int err)173 error(int err) 174 : e_err(err) { }; 175 176 int e_err; 177 }; 178 179 /** 180 * Construct a grep_proc object. You must call the start() method 181 * to fork off the child process and begin processing. 182 * 183 * @param code The pcre code to run over the lines of input. 184 * @param gps The source of the data to match. 185 */ 186 grep_proc(pcre *code, grep_proc_source<LineType> &gps); 187 188 virtual ~grep_proc(); 189 190 /** @param gpd The sink to send resuls to. */ set_sink(grep_proc_sink<LineType> * gpd)191 void set_sink(grep_proc_sink<LineType> *gpd) 192 { 193 this->gp_sink = gpd; 194 }; 195 196 grep_proc &invalidate(); 197 198 /** @param gpd The sink to send results to. */ set_control(grep_proc_control * gpc)199 void set_control(grep_proc_control *gpc) 200 { 201 this->gp_control = gpc; 202 }; 203 204 /** @return The sink to send results to. */ get_sink()205 grep_proc_sink<LineType> *get_sink() { return this->gp_sink; }; 206 207 /** 208 * Queue a request to search the input between the given line numbers. 209 * 210 * @param start The line number to start the search at. 211 * @param stop The line number to stop the search at (exclusive) or -1 to 212 * read until the end-of-file. 213 */ queue_request(LineType start=LineType (0),LineType stop=LineType (-1))214 grep_proc &queue_request(LineType start = LineType(0), 215 LineType stop = LineType(-1)) 216 { 217 require(start != -1 || stop == -1); 218 require(stop == -1 || start < stop); 219 220 this->gp_queue.emplace_back(start, stop); 221 if (this->gp_sink) { 222 this->gp_sink->grep_begin(*this, start, stop); 223 } 224 225 return *this; 226 }; 227 228 /** 229 * Start the search requests that have been queued up with queue_request. 230 */ 231 void start(); 232 update_poll_set(std::vector<struct pollfd> & pollfds)233 void update_poll_set(std::vector<struct pollfd> &pollfds) 234 { 235 if (this->gp_line_buffer.get_fd() != -1) { 236 pollfds.push_back((struct pollfd) { 237 this->gp_line_buffer.get_fd(), 238 POLLIN, 239 0 240 }); 241 } 242 if (this->gp_err_pipe != -1) { 243 pollfds.push_back((struct pollfd) { 244 this->gp_err_pipe, 245 POLLIN, 246 0 247 }); 248 } 249 }; 250 251 /** 252 * Check the fd_set to see if there is any new data to be processed. 253 * 254 * @param ready_rfds The set of ready-to-read file descriptors. 255 */ 256 void check_poll_set(const std::vector<struct pollfd> &pollfds); 257 258 /** Check the invariants for this object. */ invariant()259 bool invariant() 260 { 261 if (this->gp_child_started) { 262 require(this->gp_child > 0); 263 require(this->gp_line_buffer.get_fd() != -1); 264 } 265 else { 266 /* require(this->gp_child == -1); XXX doesnt work with static destr */ 267 require(this->gp_line_buffer.get_fd() == -1); 268 } 269 270 return true; 271 }; 272 273 protected: 274 275 /** 276 * Dispatch a line received from the child. 277 */ 278 void dispatch_line(char *line); 279 280 /** 281 * Free any resources used by the object and make sure the child has been 282 * terminated. 283 */ 284 void cleanup(); 285 286 void child_loop(); 287 child_init()288 virtual void child_init() { }; 289 child_batch()290 virtual void child_batch() { fflush(stdout); }; 291 child_term()292 virtual void child_term() { fflush(stdout); }; 293 294 virtual void handle_match(int line, 295 std::string &line_value, 296 int off, 297 int *matches, 298 int count); 299 300 pcrepp gp_pcre; 301 grep_proc_source<LineType> &gp_source; /*< The data source delegate. */ 302 303 auto_fd gp_err_pipe; /*< Standard error from the child. */ 304 line_buffer gp_line_buffer; /*< Standard out from the child. */ 305 file_range gp_pipe_range; 306 307 pid_t gp_child{-1}; /*< 308 * The child's pid or zero in the 309 * child. 310 */ 311 bool gp_child_started{false}; /*< True if the child was start()'d. */ 312 size_t gp_child_queue_size{0}; 313 314 /** The queue of search requests. */ 315 std::deque<std::pair<LineType, LineType> > gp_queue; 316 LineType gp_last_line{0}; /*< 317 * The last line number received from 318 * the child. For multiple matches, 319 * the line number is only sent once. 320 */ 321 LineType gp_highest_line; /*< The highest numbered line processed 322 * by the grep child process. This 323 * value is used when the start line 324 * for a queued request is -1. 325 */ 326 grep_proc_sink<LineType> *gp_sink{nullptr}; /*< The sink delegate. */ 327 grep_proc_control *gp_control{nullptr}; /*< The control delegate. */ 328 }; 329 #endif 330