1 /** 2 * Copyright (c) 2007-2012, Timothy Stack 3 * 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * * Redistributions of source code must retain the above copyright notice, this 10 * list of conditions and the following disclaimer. 11 * * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 * * Neither the name of Timothy Stack nor the names of its contributors 15 * may be used to endorse or promote products derived from this software 16 * without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY 19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY 22 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 * 29 * @file logfile.hh 30 */ 31 32 #ifndef logfile_hh 33 #define logfile_hh 34 35 #include <stdio.h> 36 #include <stdint.h> 37 #include <sys/stat.h> 38 #include <sys/types.h> 39 #include <sys/resource.h> 40 41 #include <string> 42 #include <vector> 43 #include <utility> 44 45 #include "base/lnav_log.hh" 46 #include "base/result.h" 47 #include "byte_array.hh" 48 #include "line_buffer.hh" 49 #include "unique_path.hh" 50 #include "text_format.hh" 51 #include "shared_buffer.hh" 52 #include "ghc/filesystem.hpp" 53 #include "logfile_fwd.hh" 54 #include "log_format_fwd.hh" 55 #include "safe/safe.h" 56 57 /** 58 * Observer interface for logfile indexing progress. 59 * 60 * @see logfile 61 */ 62 class logfile_observer { 63 public: 64 virtual ~logfile_observer() = default; 65 66 enum class indexing_result { 67 CONTINUE, 68 BREAK, 69 }; 70 71 /** 72 * @param lf The logfile object that is doing the indexing. 73 * @param off The current offset in the file being processed. 74 * @param total The total size of the file. 75 * @return false 76 */ 77 virtual indexing_result logfile_indexing(const std::shared_ptr<logfile>& lf, 78 file_off_t off, 79 file_size_t total) = 0; 80 }; 81 82 struct logfile_activity { 83 int64_t la_polls{0}; 84 int64_t la_reads{0}; 85 struct rusage la_initial_index_rusage{}; 86 }; 87 88 /** 89 * Container for the lines in a log file and some metadata. 90 */ 91 class logfile : 92 public unique_path_source, 93 public std::enable_shared_from_this<logfile> { 94 public: 95 typedef std::vector<logline>::iterator iterator; 96 typedef std::vector<logline>::const_iterator const_iterator; 97 98 /** 99 * Construct a logfile with the given arguments. 100 * 101 * @param filename The name of the log file. 102 * @param fd The file descriptor for accessing the file or -1 if the 103 * constructor should open the file specified by 'filename'. The 104 * descriptor needs to be seekable. 105 */ 106 static Result<std::shared_ptr<logfile>, std::string> open( 107 std::string filename, logfile_open_options &loo); 108 109 ~logfile() override; 110 get_activity() const111 const logfile_activity &get_activity() const { 112 return this->lf_activity; 113 }; 114 get_actual_path() const115 nonstd::optional<ghc::filesystem::path> get_actual_path() const { 116 return this->lf_actual_path; 117 } 118 119 /** @return The filename as given in the constructor. */ get_filename() const120 const std::string &get_filename() const { return this->lf_filename; }; 121 122 /** @return The filename as given in the constructor, excluding the path prefix. */ get_basename() const123 const std::string &get_basename() const { return this->lf_basename; }; 124 get_fd() const125 int get_fd() const { return this->lf_line_buffer.get_fd(); }; 126 127 /** @param filename The new filename for this log file. */ set_filename(const std::string & filename)128 void set_filename(const std::string &filename) 129 { 130 if (this->lf_filename != filename) { 131 this->lf_filename = filename; 132 ghc::filesystem::path p(filename); 133 this->lf_basename = p.filename(); 134 } 135 }; 136 get_content_id() const137 const std::string &get_content_id() const { return this->lf_content_id; }; 138 139 /** @return The inode for this log file. */ get_stat() const140 const struct stat &get_stat() const { return this->lf_stat; }; 141 get_longest_line_length() const142 size_t get_longest_line_length() const { 143 return this->lf_longest_line; 144 } 145 is_compressed() const146 bool is_compressed() const { 147 return this->lf_line_buffer.is_compressed(); 148 }; 149 is_valid_filename() const150 bool is_valid_filename() const { 151 return this->lf_valid_filename; 152 }; 153 get_index_size() const154 file_off_t get_index_size() const { 155 return this->lf_index_size; 156 } 157 158 /** 159 * @return The detected format, rebuild_index() must be called before this 160 * will return a value other than NULL. 161 */ get_format() const162 std::shared_ptr<log_format> get_format() const { return this->lf_format; }; 163 164 intern_string_t get_format_name() const; 165 get_text_format() const166 text_format_t get_text_format() const { 167 return this->lf_text_format; 168 } 169 170 /** 171 * @return The last modified time of the file when the file was last 172 * indexed. 173 */ get_modified_time() const174 time_t get_modified_time() const { return this->lf_index_time; }; 175 get_time_offset_line() const176 int get_time_offset_line() const { 177 return this->lf_time_offset_line; 178 }; 179 get_time_offset() const180 const struct timeval &get_time_offset() const { 181 return this->lf_time_offset; 182 }; 183 adjust_content_time(int line,const struct timeval & tv,bool abs_offset=true)184 void adjust_content_time(int line, 185 const struct timeval &tv, 186 bool abs_offset=true) { 187 struct timeval old_time = this->lf_time_offset; 188 189 this->lf_time_offset_line = line; 190 if (abs_offset) { 191 this->lf_time_offset = tv; 192 } 193 else { 194 timeradd(&old_time, &tv, &this->lf_time_offset); 195 } 196 for (auto &iter : *this) { 197 struct timeval curr, diff, new_time; 198 199 curr = iter.get_timeval(); 200 timersub(&curr, &old_time, &diff); 201 timeradd(&diff, &this->lf_time_offset, &new_time); 202 iter.set_time(new_time); 203 } 204 this->lf_sort_needed = true; 205 }; 206 clear_time_offset()207 void clear_time_offset() { 208 struct timeval tv = { 0, 0 }; 209 210 this->adjust_content_time(-1, tv); 211 }; 212 213 void mark_as_duplicate(const std::string& name); 214 get_open_options() const215 const logfile_open_options& get_open_options() const { 216 return this->lf_options; 217 } 218 219 void reset_state(); 220 is_time_adjusted() const221 bool is_time_adjusted() const { 222 return (this->lf_time_offset.tv_sec != 0 || 223 this->lf_time_offset.tv_usec != 0); 224 } 225 begin()226 iterator begin() { return this->lf_index.begin(); } 227 begin() const228 const_iterator begin() const { return this->lf_index.begin(); } 229 cbegin() const230 const_iterator cbegin() const { return this->lf_index.begin(); } 231 end()232 iterator end() { return this->lf_index.end(); } 233 end() const234 const_iterator end() const { return this->lf_index.end(); } 235 cend() const236 const_iterator cend() const { return this->lf_index.end(); } 237 238 /** @return The number of lines in the index. */ size() const239 size_t size() const { return this->lf_index.size(); } 240 241 nonstd::optional<const_iterator> find_from_time(const struct timeval& tv) const; 242 operator [](int index)243 logline &operator[](int index) { return this->lf_index[index]; }; 244 front()245 logline &front() { 246 return this->lf_index.front(); 247 } 248 back()249 logline &back() { 250 return this->lf_index.back(); 251 }; 252 253 /** @return True if this log file still exists. */ 254 bool exists() const; 255 close()256 void close() { 257 this->lf_is_closed = true; 258 }; 259 is_closed() const260 bool is_closed() const { 261 return this->lf_is_closed; 262 }; 263 original_line_time(iterator ll)264 struct timeval original_line_time(iterator ll) { 265 if (this->is_time_adjusted()) { 266 struct timeval line_time = ll->get_timeval(); 267 struct timeval retval; 268 269 timersub(&line_time, &this->lf_time_offset, &retval); 270 return retval; 271 } 272 273 return ll->get_timeval(); 274 }; 275 276 Result<shared_buffer_ref, std::string> read_line(iterator ll); 277 line_base(iterator ll)278 iterator line_base(iterator ll) { 279 auto retval = ll; 280 281 while (retval != this->begin() && retval->get_sub_offset() != 0) { 282 --retval; 283 } 284 285 return retval; 286 }; 287 message_start(iterator ll)288 iterator message_start(iterator ll) { 289 auto retval = ll; 290 291 while (retval != this->begin() && 292 (retval->get_sub_offset() != 0 || !retval->is_message())) { 293 --retval; 294 } 295 296 return retval; 297 } 298 299 size_t line_length(const_iterator ll, bool include_continues = true); 300 get_file_range(const_iterator ll,bool include_continues=true)301 file_range get_file_range(const_iterator ll, bool include_continues = true) { 302 return {ll->get_offset(), 303 (file_ssize_t) this->line_length(ll, include_continues)}; 304 } 305 306 void read_full_message(const_iterator ll, shared_buffer_ref &msg_out, int max_lines=50); 307 308 Result<shared_buffer_ref, std::string> read_raw_message(const_iterator ll); 309 310 enum class rebuild_result_t { 311 INVALID, 312 NO_NEW_LINES, 313 NEW_LINES, 314 NEW_ORDER, 315 }; 316 317 /** 318 * Index any new data in the log file. 319 * 320 * @param lo The observer object that will be called regularly during 321 * indexing. 322 * @return True if any new lines were indexed. 323 */ 324 rebuild_result_t rebuild_index(nonstd::optional<ui_clock::time_point> deadline = nonstd::nullopt); 325 326 void reobserve_from(iterator iter); 327 set_logfile_observer(logfile_observer * lo)328 void set_logfile_observer(logfile_observer *lo) { 329 this->lf_logfile_observer = lo; 330 }; 331 332 void set_logline_observer(logline_observer *llo); 333 get_logline_observer() const334 logline_observer *get_logline_observer() const { 335 return this->lf_logline_observer; 336 }; 337 operator <(const logfile & rhs) const338 bool operator<(const logfile &rhs) const 339 { 340 bool retval; 341 342 if (this->lf_index.empty()) { 343 retval = true; 344 } 345 else if (rhs.lf_index.empty()) { 346 retval = false; 347 } 348 else { 349 retval = this->lf_index[0].get_time() < rhs.lf_index[0].get_time(); 350 } 351 352 return retval; 353 }; 354 is_indexing() const355 bool is_indexing() const { 356 return this->lf_indexing; 357 } 358 359 /** Check the invariants for this object. */ invariant()360 bool invariant() 361 { 362 require(!this->lf_filename.empty()); 363 364 return true; 365 } 366 367 ghc::filesystem::path get_path() const override; 368 369 enum class note_type { 370 indexing_disabled, 371 duplicate, 372 not_utf, 373 }; 374 375 using note_map = std::map<note_type, std::string>; 376 using safe_notes = safe::Safe<note_map>; 377 get_notes() const378 note_map get_notes() const { 379 return *this->lf_notes.readAccess(); 380 } 381 382 protected: 383 /** 384 * Process a line from the file. 385 * 386 * @param offset The offset of the line in the file. 387 * @param prefix The contents of the line. 388 * @param len The length of the 'prefix' string. 389 */ 390 bool process_prefix(shared_buffer_ref &sbr, const line_info &li); 391 392 void set_format_base_time(log_format *lf); 393 394 private: 395 logfile(std::string filename, logfile_open_options &loo); 396 397 std::string lf_filename; 398 logfile_open_options lf_options; 399 logfile_activity lf_activity; 400 bool lf_named_file{true}; 401 bool lf_valid_filename{true}; 402 nonstd::optional<ghc::filesystem::path> lf_actual_path; 403 std::string lf_basename; 404 std::string lf_content_id; 405 struct stat lf_stat{}; 406 std::shared_ptr<log_format> lf_format; 407 std::vector<logline> lf_index; 408 time_t lf_index_time{0}; 409 file_off_t lf_index_size{0}; 410 bool lf_sort_needed{false}; 411 line_buffer lf_line_buffer; 412 int lf_time_offset_line{0}; 413 struct timeval lf_time_offset{0, 0}; 414 bool lf_is_closed{false}; 415 bool lf_indexing{true}; 416 bool lf_partial_line{false}; 417 logline_observer *lf_logline_observer{nullptr}; 418 logfile_observer *lf_logfile_observer{nullptr}; 419 size_t lf_longest_line{0}; 420 text_format_t lf_text_format{text_format_t::TF_UNKNOWN}; 421 uint32_t lf_out_of_time_order_count{0}; 422 safe_notes lf_notes; 423 424 nonstd::optional<std::pair<file_off_t, size_t>> lf_next_line_cache; 425 }; 426 427 class logline_observer { 428 public: 429 virtual ~logline_observer() = default; 430 431 virtual void logline_restart(const logfile &lf, file_size_t rollback_size) = 0; 432 433 virtual void logline_new_lines( 434 const logfile &lf, 435 logfile::const_iterator ll_begin, 436 logfile::const_iterator ll_end, 437 shared_buffer_ref &sbr) = 0; 438 439 virtual void logline_eof(const logfile &lf) = 0; 440 }; 441 442 #endif 443