1 /**
2  * Copyright (c) 2007-2012, Timothy Stack
3  *
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * * Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * * Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  * * Neither the name of Timothy Stack nor the names of its contributors
15  * may be used to endorse or promote products derived from this software
16  * without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
22  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  *
29  * @file log_format.hh
30  */
31 
32 #ifndef log_format_hh
33 #define log_format_hh
34 
35 #include <time.h>
36 #include <sys/time.h>
37 #include <stdint.h>
38 #define __STDC_FORMAT_MACROS
39 #include <inttypes.h>
40 #include <sys/types.h>
41 
42 #include <memory>
43 #include <set>
44 #include <list>
45 #include <string>
46 #include <utility>
47 #include <vector>
48 #include <limits>
49 #include <memory>
50 #include <sstream>
51 
52 #include "optional.hpp"
53 #include "pcrepp/pcrepp.hh"
54 #include "base/lnav_log.hh"
55 #include "base/date_time_scanner.hh"
56 #include "byte_array.hh"
57 #include "base/intern_string.hh"
58 #include "shared_buffer.hh"
59 #include "highlighter.hh"
60 #include "log_level.hh"
61 #include "line_buffer.hh"
62 #include "log_format_fwd.hh"
63 
64 struct sqlite3;
65 class logfile;
66 class log_vtab_manager;
67 struct exec_context;
68 
69 enum class scale_op_t {
70     SO_IDENTITY,
71     SO_MULTIPLY,
72     SO_DIVIDE
73 };
74 
75 struct scaling_factor {
scaling_factorscaling_factor76     scaling_factor() : sf_op(scale_op_t::SO_IDENTITY), sf_value(1) { };
77 
78     template<typename T>
scalescaling_factor79     void scale(T &val) const {
80         switch (this->sf_op) {
81         case scale_op_t::SO_IDENTITY:
82             break;
83         case scale_op_t::SO_DIVIDE:
84             val = val / (T)this->sf_value;
85             break;
86         case scale_op_t::SO_MULTIPLY:
87             val = val * (T)this->sf_value;
88             break;
89         }
90     }
91 
92     scale_op_t sf_op;
93     double sf_value;
94 };
95 
96 enum class value_kind_t : int {
97     VALUE_UNKNOWN = -1,
98     VALUE_NULL,
99     VALUE_TEXT,
100     VALUE_INTEGER,
101     VALUE_FLOAT,
102     VALUE_BOOLEAN,
103     VALUE_JSON,
104     VALUE_STRUCT,
105     VALUE_QUOTED,
106     VALUE_W3C_QUOTED,
107     VALUE_TIMESTAMP,
108     VALUE_XML,
109 
110     VALUE__MAX
111 };
112 
113 struct logline_value_meta {
logline_value_metalogline_value_meta114     logline_value_meta(
115         intern_string_t name,
116         value_kind_t kind,
117         int col = -1,
118         const nonstd::optional<log_format *>& format = nonstd::nullopt)
119         : lvm_name(name), lvm_kind(kind), lvm_column(col), lvm_format(format)
120     {};
121 
is_hiddenlogline_value_meta122     bool is_hidden() const {
123         return this->lvm_hidden || this->lvm_user_hidden;
124     }
125 
with_struct_namelogline_value_meta126     logline_value_meta& with_struct_name(intern_string_t name) {
127         this->lvm_struct_name = name;
128         return *this;
129     }
130 
131     intern_string_t lvm_name;
132     value_kind_t lvm_kind;
133     int lvm_column{-1};
134     bool lvm_identifier{false};
135     bool lvm_hidden{false};
136     bool lvm_user_hidden{false};
137     bool lvm_from_module{false};
138     intern_string_t lvm_struct_name;
139     nonstd::optional<log_format *> lvm_format;
140 };
141 
142 class logline_value {
143 public:
144 
logline_value(logline_value_meta lvm)145     logline_value(logline_value_meta lvm)
146         : lv_meta(std::move(lvm)) {
147         this->lv_meta.lvm_kind = value_kind_t::VALUE_NULL;
148     };
logline_value(logline_value_meta lvm,bool b)149     logline_value(logline_value_meta lvm, bool b)
150         : lv_meta(std::move(lvm)),
151           lv_value((int64_t)(b ? 1 : 0)) {
152         this->lv_meta.lvm_kind = value_kind_t::VALUE_BOOLEAN;
153     }
logline_value(logline_value_meta lvm,int64_t i)154     logline_value(logline_value_meta lvm, int64_t i)
155         : lv_meta(std::move(lvm)), lv_value(i) {
156         this->lv_meta.lvm_kind = value_kind_t::VALUE_INTEGER;
157     };
logline_value(logline_value_meta lvm,double i)158     logline_value(logline_value_meta lvm, double i)
159         : lv_meta(std::move(lvm)), lv_value(i) {
160         this->lv_meta.lvm_kind = value_kind_t::VALUE_FLOAT;
161     };
logline_value(logline_value_meta lvm,shared_buffer_ref & sbr)162     logline_value(logline_value_meta lvm, shared_buffer_ref &sbr)
163         : lv_meta(std::move(lvm)), lv_sbr(sbr) {
164     };
logline_value(logline_value_meta lvm,const intern_string_t val)165     logline_value(logline_value_meta lvm, const intern_string_t val)
166             : lv_meta(std::move(lvm)), lv_intern_string(val) {
167 
168     };
169     logline_value(logline_value_meta lvm, shared_buffer_ref &sbr,
170                   struct line_range origin);
171 
apply_scaling(const scaling_factor * sf)172     void apply_scaling(const scaling_factor *sf) {
173         if (sf != nullptr) {
174             switch (this->lv_meta.lvm_kind) {
175                 case value_kind_t::VALUE_INTEGER:
176                     sf->scale(this->lv_value.i);
177                     break;
178                 case value_kind_t::VALUE_FLOAT:
179                     sf->scale(this->lv_value.d);
180                     break;
181                 default:
182                     break;
183             }
184         }
185     }
186 
187     std::string to_string() const;
188 
text_value() const189     const char *text_value() const {
190         if (this->lv_sbr.empty()) {
191             if (this->lv_intern_string.empty()) {
192                 return "";
193             }
194             return this->lv_intern_string.get();
195         }
196         return this->lv_sbr.get_data();
197     };
198 
text_length() const199     size_t text_length() const {
200         if (this->lv_sbr.empty()) {
201             return this->lv_intern_string.size();
202         }
203         return this->lv_sbr.length();
204     }
205 
206     struct line_range origin_in_full_msg(const char *msg, ssize_t len) const;
207 
208     logline_value_meta lv_meta;
209     union value_u {
210         int64_t i;
211         double  d;
212 
value_u()213         value_u() : i(0) { };
value_u(int64_t i)214         value_u(int64_t i) : i(i) { };
value_u(double d)215         value_u(double d) : d(d) { };
216     } lv_value;
217     shared_buffer_ref lv_sbr;
218     int lv_sub_offset{0};
219     intern_string_t lv_intern_string;
220     struct line_range lv_origin;
221 };
222 
223 struct logline_value_stats {
224 
logline_value_statslogline_value_stats225     logline_value_stats() {
226         this->clear();
227     };
228 
clearlogline_value_stats229     void clear() {
230         this->lvs_count = 0;
231         this->lvs_total = 0;
232         this->lvs_min_value = std::numeric_limits<double>::max();
233         this->lvs_max_value = -std::numeric_limits<double>::max();
234     };
235 
mergelogline_value_stats236     void merge(const logline_value_stats &other) {
237         if (other.lvs_count == 0) {
238             return;
239         }
240 
241         require(other.lvs_min_value <= other.lvs_max_value);
242 
243         if (other.lvs_min_value < this->lvs_min_value) {
244             this->lvs_min_value = other.lvs_min_value;
245         }
246         if (other.lvs_max_value > this->lvs_max_value) {
247             this->lvs_max_value = other.lvs_max_value;
248         }
249         this->lvs_count += other.lvs_count;
250         this->lvs_total += other.lvs_total;
251 
252         ensure(this->lvs_count >= 0);
253         ensure(this->lvs_min_value <= this->lvs_max_value);
254     };
255 
add_valuelogline_value_stats256     void add_value(double value) {
257         if (value < this->lvs_min_value) {
258             this->lvs_min_value = value;
259         }
260         if (value > this->lvs_max_value) {
261             this->lvs_max_value = value;
262         }
263         this->lvs_count += 1;
264         this->lvs_total += value;
265     };
266 
267     int64_t lvs_count;
268     double lvs_total;
269     double lvs_min_value;
270     double lvs_max_value;
271 };
272 
273 struct logline_value_cmp {
logline_value_cmplogline_value_cmp274     explicit logline_value_cmp(const intern_string_t *name = nullptr, int col = -1)
275         : lvc_name(name), lvc_column(col) {
276 
277     };
278 
operator ()logline_value_cmp279     bool operator()(const logline_value &lv) const {
280         bool retval = true;
281 
282         if (this->lvc_name != nullptr) {
283             retval = retval && ((*this->lvc_name) == lv.lv_meta.lvm_name);
284         }
285         if (this->lvc_column != -1) {
286             retval = retval && (this->lvc_column == lv.lv_meta.lvm_column);
287         }
288 
289         return retval;
290     };
291 
292     const intern_string_t *lvc_name;
293     int lvc_column;
294 };
295 
296 class log_vtab_impl;
297 
298 /**
299  * Base class for implementations of log format parsers.
300  */
301 class log_format {
302 public:
303 
304     /**
305      * @return The collection of builtin log formats.
306      */
307     static std::vector<std::shared_ptr<log_format>> &get_root_formats();
308 
find_root_format(const char * name)309     static std::shared_ptr<log_format> find_root_format(const char *name) {
310         auto& fmts = get_root_formats();
311         for (auto& lf : fmts) {
312             if (lf->get_name() == name) {
313                 return lf;
314             }
315         }
316         return nullptr;
317     }
318 
319     struct action_def {
320         std::string ad_name;
321         std::string ad_label;
322         std::vector<std::string> ad_cmdline;
323         bool ad_capture_output;
324 
action_deflog_format::action_def325         action_def() : ad_capture_output(false) { };
326 
operator <log_format::action_def327         bool operator<(const action_def &rhs) const {
328             return this->ad_name < rhs.ad_name;
329         };
330     };
331 
332     virtual ~log_format() = default;
333 
clear()334     virtual void clear()
335     {
336         this->lf_pattern_locks.clear();
337         this->lf_date_time.clear();
338     };
339 
340     /**
341      * Get the name of this log format.
342      *
343      * @return The log format name.
344      */
345     virtual const intern_string_t get_name() const = 0;
346 
match_name(const std::string & filename)347     virtual bool match_name(const std::string &filename) { return true; };
348 
349     enum scan_result_t {
350         SCAN_MATCH,
351         SCAN_NO_MATCH,
352         SCAN_INCOMPLETE,
353     };
354 
355     /**
356      * Scan a log line to see if it matches this log format.
357      *
358      * @param dst The vector of loglines that the formatter should append to
359      *   if it detected a match.
360      * @param offset The offset in the file where this line is located.
361      * @param prefix The contents of the line.
362      * @param len The length of the prefix string.
363      */
364     virtual scan_result_t scan(logfile &lf,
365                                std::vector<logline> &dst,
366                                const line_info &li,
367                                shared_buffer_ref &sbr) = 0;
368 
scan_for_partial(shared_buffer_ref & sbr,size_t & len_out) const369     virtual bool scan_for_partial(shared_buffer_ref &sbr, size_t &len_out) const {
370         return false;
371     };
372 
373     /**
374      * Remove redundant data from the log line string.
375      *
376      * XXX We should probably also add some attributes to the line here, so we
377      * can highlight things like the date.
378      *
379      * @param line The log line to edit.
380      */
scrub(std::string & line)381     virtual void scrub(std::string &line) { };
382 
383     virtual void
annotate(uint64_t line_number,shared_buffer_ref & sbr,string_attrs_t & sa,std::vector<logline_value> & values,bool annotate_module=true) const384     annotate(uint64_t line_number, shared_buffer_ref &sbr, string_attrs_t &sa,
385                  std::vector<logline_value> &values, bool annotate_module = true) const
386     { };
387 
rewrite(exec_context & ec,shared_buffer_ref & line,string_attrs_t & sa,std::string & value_out)388     virtual void rewrite(exec_context &ec,
389                          shared_buffer_ref &line,
390                          string_attrs_t &sa,
391                          std::string &value_out) {
392         value_out.assign(line.get_data(), line.length());
393     };
394 
stats_for_value(const intern_string_t & name) const395     virtual const logline_value_stats *stats_for_value(const intern_string_t &name) const {
396         return nullptr;
397     };
398 
399     virtual std::shared_ptr<log_format> specialized(int fmt_lock = -1) = 0;
400 
get_vtab_impl() const401     virtual std::shared_ptr<log_vtab_impl> get_vtab_impl() const {
402         return nullptr;
403     };
404 
get_subline(const logline & ll,shared_buffer_ref & sbr,bool full_message=false)405     virtual void get_subline(const logline &ll, shared_buffer_ref &sbr, bool full_message = false) {
406     };
407 
get_actions(const logline_value & lv) const408     virtual const std::vector<std::string> *get_actions(const logline_value &lv) const {
409         return nullptr;
410     };
411 
get_source_path() const412     virtual std::set<std::string> get_source_path() const {
413         std::set<std::string> retval;
414 
415         retval.insert("default");
416 
417         return retval;
418     };
419 
hide_field(const intern_string_t field_name,bool val)420     virtual bool hide_field(const intern_string_t field_name, bool val) {
421         return false;
422     };
423 
get_timestamp_formats() const424     const char * const *get_timestamp_formats() const {
425         if (this->lf_timestamp_format.empty()) {
426             return nullptr;
427         }
428 
429         return &this->lf_timestamp_format[0];
430     };
431 
432     void check_for_new_year(std::vector<logline> &dst, exttm log_tv,
433                             timeval timeval1);
434 
435     virtual std::string get_pattern_name(uint64_t line_number) const;
436 
get_pattern_regex(uint64_t line_number) const437     virtual std::string get_pattern_regex(uint64_t line_number) const {
438         return "";
439     };
440 
441     struct pattern_for_lines {
442         pattern_for_lines(uint32_t pfl_line, uint32_t pfl_pat_index);
443 
444         uint32_t pfl_line;
445         int pfl_pat_index;
446     };
447 
last_pattern_index() const448     int last_pattern_index() const {
449         if (this->lf_pattern_locks.empty()) {
450             return -1;
451         }
452 
453         return this->lf_pattern_locks.back().pfl_pat_index;
454     }
455 
456     int pattern_index_for_line(uint64_t line_number) const;
457 
458     uint8_t lf_mod_index{0};
459     date_time_scanner lf_date_time;
460     std::vector<pattern_for_lines> lf_pattern_locks;
461     intern_string_t lf_timestamp_field{intern_string::lookup("timestamp", -1)};
462     std::vector<const char *> lf_timestamp_format;
463     unsigned int lf_timestamp_flags{0};
464     std::map<std::string, action_def> lf_action_defs;
465     std::vector<logline_value_stats> lf_value_stats;
466     std::vector<highlighter> lf_highlighters;
467     bool lf_is_self_describing{false};
468     bool lf_time_ordered{true};
469     bool lf_specialized{false};
470 protected:
471     static std::vector<std::shared_ptr<log_format>> lf_root_formats;
472 
473     struct pcre_format {
pcre_formatlog_format::pcre_format474         pcre_format(const char *regex) : name(regex), pcre(regex) {
475             this->pf_timestamp_index = this->pcre.name_index("timestamp");
476         };
477 
pcre_formatlog_format::pcre_format478         pcre_format() : name(nullptr), pcre("") { };
479 
480         const char *name;
481         pcrepp pcre;
482         int pf_timestamp_index{-1};
483     };
484 
485     static bool next_format(pcre_format *fmt, int &index, int &locked_index);
486 
487     const char *log_scanf(uint32_t line_number,
488                           const char *line,
489                           size_t len,
490                           pcre_format *fmt,
491                           const char *time_fmt[],
492                           struct exttm *tm_out,
493                           struct timeval *tv_out,
494                           ...);
495 };
496 
497 #endif
498