1 /**
2  * Copyright (c) 2007-2012, Timothy Stack
3  *
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * * Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * * Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  * * Neither the name of Timothy Stack nor the names of its contributors
15  * may be used to endorse or promote products derived from this software
16  * without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
22  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  *
29  * @file logfile.hh
30  */
31 
32 #ifndef logfile_hh
33 #define logfile_hh
34 
35 #include <stdio.h>
36 #include <stdint.h>
37 #include <sys/stat.h>
38 #include <sys/types.h>
39 #include <sys/resource.h>
40 
41 #include <string>
42 #include <vector>
43 #include <utility>
44 
45 #include "base/lnav_log.hh"
46 #include "base/result.h"
47 #include "byte_array.hh"
48 #include "line_buffer.hh"
49 #include "unique_path.hh"
50 #include "text_format.hh"
51 #include "shared_buffer.hh"
52 #include "ghc/filesystem.hpp"
53 #include "logfile_fwd.hh"
54 #include "log_format_fwd.hh"
55 #include "safe/safe.h"
56 
57 /**
58  * Observer interface for logfile indexing progress.
59  *
60  * @see logfile
61  */
62 class logfile_observer {
63 public:
64     virtual ~logfile_observer() = default;
65 
66     enum class indexing_result {
67         CONTINUE,
68         BREAK,
69     };
70 
71     /**
72      * @param lf The logfile object that is doing the indexing.
73      * @param off The current offset in the file being processed.
74      * @param total The total size of the file.
75      * @return false
76      */
77     virtual indexing_result logfile_indexing(const std::shared_ptr<logfile>& lf,
78                                              file_off_t off,
79                                              file_size_t total) = 0;
80 };
81 
82 struct logfile_activity {
83     int64_t la_polls{0};
84     int64_t la_reads{0};
85     struct rusage la_initial_index_rusage{};
86 };
87 
88 /**
89  * Container for the lines in a log file and some metadata.
90  */
91 class logfile :
92     public unique_path_source,
93     public std::enable_shared_from_this<logfile> {
94 public:
95     typedef std::vector<logline>::iterator       iterator;
96     typedef std::vector<logline>::const_iterator const_iterator;
97 
98     /**
99      * Construct a logfile with the given arguments.
100      *
101      * @param filename The name of the log file.
102      * @param fd The file descriptor for accessing the file or -1 if the
103      * constructor should open the file specified by 'filename'.  The
104      * descriptor needs to be seekable.
105      */
106     static Result<std::shared_ptr<logfile>, std::string> open(
107         std::string filename, logfile_open_options &loo);
108 
109     ~logfile() override;
110 
get_activity() const111     const logfile_activity &get_activity() const {
112         return this->lf_activity;
113     };
114 
get_actual_path() const115     nonstd::optional<ghc::filesystem::path> get_actual_path() const {
116         return this->lf_actual_path;
117     }
118 
119     /** @return The filename as given in the constructor. */
get_filename() const120     const std::string &get_filename() const { return this->lf_filename; };
121 
122     /** @return The filename as given in the constructor, excluding the path prefix. */
get_basename() const123     const std::string &get_basename() const { return this->lf_basename; };
124 
get_fd() const125     int get_fd() const { return this->lf_line_buffer.get_fd(); };
126 
127     /** @param filename The new filename for this log file. */
set_filename(const std::string & filename)128     void set_filename(const std::string &filename)
129     {
130         if (this->lf_filename != filename) {
131             this->lf_filename = filename;
132             ghc::filesystem::path p(filename);
133             this->lf_basename = p.filename();
134         }
135     };
136 
get_content_id() const137     const std::string &get_content_id() const { return this->lf_content_id; };
138 
139     /** @return The inode for this log file. */
get_stat() const140     const struct stat &get_stat() const { return this->lf_stat; };
141 
get_longest_line_length() const142     size_t get_longest_line_length() const {
143         return this->lf_longest_line;
144     }
145 
is_compressed() const146     bool is_compressed() const {
147         return this->lf_line_buffer.is_compressed();
148     };
149 
is_valid_filename() const150     bool is_valid_filename() const {
151         return this->lf_valid_filename;
152     };
153 
get_index_size() const154     file_off_t get_index_size() const {
155         return this->lf_index_size;
156     }
157 
158     /**
159      * @return The detected format, rebuild_index() must be called before this
160      * will return a value other than NULL.
161      */
get_format() const162     std::shared_ptr<log_format> get_format() const { return this->lf_format; };
163 
164     intern_string_t get_format_name() const;
165 
get_text_format() const166     text_format_t get_text_format() const {
167         return this->lf_text_format;
168     }
169 
170     /**
171      * @return The last modified time of the file when the file was last
172      * indexed.
173      */
get_modified_time() const174     time_t get_modified_time() const { return this->lf_index_time; };
175 
get_time_offset_line() const176     int get_time_offset_line() const {
177         return this->lf_time_offset_line;
178     };
179 
get_time_offset() const180     const struct timeval &get_time_offset() const {
181         return this->lf_time_offset;
182     };
183 
adjust_content_time(int line,const struct timeval & tv,bool abs_offset=true)184     void adjust_content_time(int line,
185                              const struct timeval &tv,
186                              bool abs_offset=true) {
187         struct timeval old_time = this->lf_time_offset;
188 
189         this->lf_time_offset_line = line;
190         if (abs_offset) {
191             this->lf_time_offset = tv;
192         }
193         else {
194             timeradd(&old_time, &tv, &this->lf_time_offset);
195         }
196         for (auto &iter : *this) {
197             struct timeval curr, diff, new_time;
198 
199             curr = iter.get_timeval();
200             timersub(&curr, &old_time, &diff);
201             timeradd(&diff, &this->lf_time_offset, &new_time);
202             iter.set_time(new_time);
203         }
204         this->lf_sort_needed = true;
205     };
206 
clear_time_offset()207     void clear_time_offset() {
208         struct timeval tv = { 0, 0 };
209 
210         this->adjust_content_time(-1, tv);
211     };
212 
213     void mark_as_duplicate(const std::string& name);
214 
get_open_options() const215     const logfile_open_options& get_open_options() const {
216         return this->lf_options;
217     }
218 
219     void reset_state();
220 
is_time_adjusted() const221     bool is_time_adjusted() const {
222         return (this->lf_time_offset.tv_sec != 0 ||
223                 this->lf_time_offset.tv_usec != 0);
224     }
225 
begin()226     iterator begin() { return this->lf_index.begin(); }
227 
begin() const228     const_iterator begin() const { return this->lf_index.begin(); }
229 
cbegin() const230     const_iterator cbegin() const { return this->lf_index.begin(); }
231 
end()232     iterator end() { return this->lf_index.end(); }
233 
end() const234     const_iterator end() const { return this->lf_index.end(); }
235 
cend() const236     const_iterator cend() const { return this->lf_index.end(); }
237 
238     /** @return The number of lines in the index. */
size() const239     size_t size() const { return this->lf_index.size(); }
240 
241     nonstd::optional<const_iterator> find_from_time(const struct timeval& tv) const;
242 
operator [](int index)243     logline &operator[](int index) { return this->lf_index[index]; };
244 
front()245     logline &front() {
246         return this->lf_index.front();
247     }
248 
back()249     logline &back() {
250         return this->lf_index.back();
251     };
252 
253     /** @return True if this log file still exists. */
254     bool exists() const;
255 
close()256     void close() {
257         this->lf_is_closed = true;
258     };
259 
is_closed() const260     bool is_closed() const {
261         return this->lf_is_closed;
262     };
263 
original_line_time(iterator ll)264     struct timeval original_line_time(iterator ll) {
265         if (this->is_time_adjusted()) {
266             struct timeval line_time = ll->get_timeval();
267             struct timeval retval;
268 
269             timersub(&line_time, &this->lf_time_offset, &retval);
270             return retval;
271         }
272 
273         return ll->get_timeval();
274     };
275 
276     Result<shared_buffer_ref, std::string> read_line(iterator ll);
277 
line_base(iterator ll)278     iterator line_base(iterator ll) {
279         auto retval = ll;
280 
281         while (retval != this->begin() && retval->get_sub_offset() != 0) {
282             --retval;
283         }
284 
285         return retval;
286     };
287 
message_start(iterator ll)288     iterator message_start(iterator ll) {
289         auto retval = ll;
290 
291         while (retval != this->begin() &&
292                 (retval->get_sub_offset() != 0 || !retval->is_message())) {
293             --retval;
294         }
295 
296         return retval;
297     }
298 
299     size_t line_length(const_iterator ll, bool include_continues = true);
300 
get_file_range(const_iterator ll,bool include_continues=true)301     file_range get_file_range(const_iterator ll, bool include_continues = true) {
302         return {ll->get_offset(),
303                 (file_ssize_t) this->line_length(ll, include_continues)};
304     }
305 
306     void read_full_message(const_iterator ll, shared_buffer_ref &msg_out, int max_lines=50);
307 
308     Result<shared_buffer_ref, std::string> read_raw_message(const_iterator ll);
309 
310     enum class rebuild_result_t {
311         INVALID,
312         NO_NEW_LINES,
313         NEW_LINES,
314         NEW_ORDER,
315     };
316 
317     /**
318      * Index any new data in the log file.
319      *
320      * @param lo The observer object that will be called regularly during
321      * indexing.
322      * @return True if any new lines were indexed.
323      */
324     rebuild_result_t rebuild_index(nonstd::optional<ui_clock::time_point> deadline = nonstd::nullopt);
325 
326     void reobserve_from(iterator iter);
327 
set_logfile_observer(logfile_observer * lo)328     void set_logfile_observer(logfile_observer *lo) {
329         this->lf_logfile_observer = lo;
330     };
331 
332     void set_logline_observer(logline_observer *llo);
333 
get_logline_observer() const334     logline_observer *get_logline_observer() const {
335         return this->lf_logline_observer;
336     };
337 
operator <(const logfile & rhs) const338     bool operator<(const logfile &rhs) const
339     {
340         bool retval;
341 
342         if (this->lf_index.empty()) {
343             retval = true;
344         }
345         else if (rhs.lf_index.empty()) {
346             retval = false;
347         }
348         else {
349             retval = this->lf_index[0].get_time() < rhs.lf_index[0].get_time();
350         }
351 
352         return retval;
353     };
354 
is_indexing() const355     bool is_indexing() const {
356         return this->lf_indexing;
357     }
358 
359     /** Check the invariants for this object. */
invariant()360     bool invariant()
361     {
362         require(!this->lf_filename.empty());
363 
364         return true;
365     }
366 
367     ghc::filesystem::path get_path() const override;
368 
369     enum class note_type {
370         indexing_disabled,
371         duplicate,
372         not_utf,
373     };
374 
375     using note_map = std::map<note_type, std::string>;
376     using safe_notes = safe::Safe<note_map>;
377 
get_notes() const378     note_map get_notes() const {
379         return *this->lf_notes.readAccess();
380     }
381 
382 protected:
383     /**
384      * Process a line from the file.
385      *
386      * @param offset The offset of the line in the file.
387      * @param prefix The contents of the line.
388      * @param len The length of the 'prefix' string.
389      */
390     bool process_prefix(shared_buffer_ref &sbr, const line_info &li);
391 
392     void set_format_base_time(log_format *lf);
393 
394 private:
395     logfile(std::string filename, logfile_open_options &loo);
396 
397     std::string lf_filename;
398     logfile_open_options lf_options;
399     logfile_activity lf_activity;
400     bool        lf_named_file{true};
401     bool        lf_valid_filename{true};
402     nonstd::optional<ghc::filesystem::path> lf_actual_path;
403     std::string lf_basename;
404     std::string lf_content_id;
405     struct stat lf_stat{};
406     std::shared_ptr<log_format> lf_format;
407     std::vector<logline>      lf_index;
408     time_t      lf_index_time{0};
409     file_off_t  lf_index_size{0};
410     bool lf_sort_needed{false};
411     line_buffer lf_line_buffer;
412     int lf_time_offset_line{0};
413     struct timeval lf_time_offset{0, 0};
414     bool lf_is_closed{false};
415     bool lf_indexing{true};
416     bool lf_partial_line{false};
417     logline_observer *lf_logline_observer{nullptr};
418     logfile_observer *lf_logfile_observer{nullptr};
419     size_t lf_longest_line{0};
420     text_format_t lf_text_format{text_format_t::TF_UNKNOWN};
421     uint32_t lf_out_of_time_order_count{0};
422     safe_notes lf_notes;
423 
424     nonstd::optional<std::pair<file_off_t, size_t>> lf_next_line_cache;
425 };
426 
427 class logline_observer {
428 public:
429     virtual ~logline_observer() = default;
430 
431     virtual void logline_restart(const logfile &lf, file_size_t rollback_size) = 0;
432 
433     virtual void logline_new_lines(
434         const logfile &lf,
435         logfile::const_iterator ll_begin,
436         logfile::const_iterator ll_end,
437         shared_buffer_ref &sbr) = 0;
438 
439     virtual void logline_eof(const logfile &lf) = 0;
440 };
441 
442 #endif
443