1 /**
2  * Copyright (c) 2020, Timothy Stack
3  *
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * * Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * * Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  * * Neither the name of Timothy Stack nor the names of its contributors
15  * may be used to endorse or promote products derived from this software
16  * without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
22  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  *
29  * @file log_format_ext.hh
30  */
31 
32 #ifndef lnav_log_format_ext_hh
33 #define lnav_log_format_ext_hh
34 
35 #include <unordered_map>
36 
37 #include "log_format.hh"
38 #include "yajlpp/yajlpp.hh"
39 
40 class module_format;
41 
42 class external_log_format : public log_format {
43 
44 public:
45     struct sample {
sampleexternal_log_format::sample46         sample() : s_level(LEVEL_UNKNOWN) {};
47 
48         std::string s_line;
49         log_level_t s_level;
50     };
51 
52     struct value_def {
value_defexternal_log_format::value_def53         value_def(intern_string_t name, value_kind_t kind, int col, log_format *format)
54         : vd_meta(name, kind, col, format) {};
55 
56         logline_value_meta vd_meta;
57         std::string vd_collate;
58         bool vd_foreign_key{false};
59         intern_string_t vd_unit_field;
60         std::map<const intern_string_t, scaling_factor> vd_unit_scaling;
61         ssize_t vd_values_index{-1};
62         bool vd_internal{false};
63         std::vector<std::string> vd_action_list;
64         std::string vd_rewriter;
65         std::string vd_description;
66     };
67 
68     struct indexed_value_def {
indexed_value_defexternal_log_format::indexed_value_def69         indexed_value_def(int index = -1,
70                           int unit_index = -1,
71                           std::shared_ptr<value_def> vd = nullptr)
72             : ivd_index(index),
73               ivd_unit_field_index(unit_index),
74               ivd_value_def(std::move(vd)) {
75         }
76 
77         int ivd_index;
78         int ivd_unit_field_index;
79         std::shared_ptr<value_def> ivd_value_def;
80 
operator <external_log_format::indexed_value_def81         bool operator<(const indexed_value_def &rhs) const {
82             return this->ivd_index < rhs.ivd_index;
83         }
84     };
85 
86     struct pattern {
87         std::string p_config_path;
88         std::string p_string;
89         std::unique_ptr<pcrepp> p_pcre;
90         std::vector<indexed_value_def> p_value_by_index;
91         std::vector<int> p_numeric_value_indexes;
92         int p_timestamp_field_index{-1};
93         int p_level_field_index{-1};
94         int p_module_field_index{-1};
95         int p_opid_field_index{-1};
96         int p_body_field_index{-1};
97         int p_timestamp_end{-1};
98         bool p_module_format{false};
99     };
100 
101     struct level_pattern {
102         std::string lp_regex;
103         std::shared_ptr<pcrepp> lp_pcre;
104     };
105 
106     struct yajl_handle_deleter {
operator ()external_log_format::yajl_handle_deleter107         void operator()(yajl_handle handle) const {
108             if (handle != nullptr) {
109                 yajl_free(handle);
110             }
111         }
112     };
113 
external_log_format(const intern_string_t name)114     external_log_format(const intern_string_t name)
115         : elf_column_count(0),
116           elf_timestamp_divisor(1.0),
117           elf_level_field(intern_string::lookup("level", -1)),
118           elf_body_field(intern_string::lookup("body", -1)),
119           elf_multiline(true),
120           elf_container(false),
121           elf_has_module_format(false),
122           elf_builtin_format(false),
123           elf_type(ELF_TYPE_TEXT),
124           jlf_hide_extra(false),
125           jlf_cached_offset(-1),
126           jlf_yajl_handle(nullptr, yajl_handle_deleter()),
127           elf_name(name) {
128         this->jlf_line_offsets.reserve(128);
129     };
130 
get_name() const131     const intern_string_t get_name() const {
132         return this->elf_name;
133     };
134 
135     bool match_name(const std::string &filename);
136 
137     scan_result_t scan(logfile &lf,
138                        std::vector<logline> &dst,
139                        const line_info &offset,
140                        shared_buffer_ref &sbr);
141 
142     bool scan_for_partial(shared_buffer_ref &sbr, size_t &len_out) const;
143 
144     void annotate(uint64_t line_number, shared_buffer_ref &line, string_attrs_t &sa,
145                   std::vector<logline_value> &values, bool annotate_module = true) const;
146 
147     void rewrite(exec_context &ec,
148                  shared_buffer_ref &line,
149                  string_attrs_t &sa,
150                  std::string &value_out);
151 
152     void build(std::vector<std::string> &errors);
153 
154     void register_vtabs(log_vtab_manager *vtab_manager,
155                         std::vector<std::string> &errors);
156 
157     bool match_samples(const std::vector<sample> &samples) const;
158 
hide_field(const intern_string_t field_name,bool val)159     bool hide_field(const intern_string_t field_name, bool val) {
160         auto vd_iter = this->elf_value_defs.find(field_name);
161 
162         if (vd_iter == this->elf_value_defs.end()) {
163             return false;
164         }
165 
166         vd_iter->second->vd_meta.lvm_user_hidden = val;
167         return true;
168     };
169 
170     std::shared_ptr<log_format> specialized(int fmt_lock);
171 
stats_for_value(const intern_string_t & name) const172     const logline_value_stats *stats_for_value(const intern_string_t &name) const {
173         const logline_value_stats *retval = nullptr;
174 
175         for (size_t lpc = 0; lpc < this->elf_numeric_value_defs.size(); lpc++) {
176             value_def &vd = *this->elf_numeric_value_defs[lpc];
177 
178             if (vd.vd_meta.lvm_name == name) {
179                 retval = &this->lf_value_stats[lpc];
180                 break;
181             }
182         }
183 
184         return retval;
185     };
186 
187     void get_subline(const logline &ll, shared_buffer_ref &sbr, bool full_message);
188 
189     std::shared_ptr<log_vtab_impl> get_vtab_impl() const;
190 
get_actions(const logline_value & lv) const191     const std::vector<std::string> *get_actions(const logline_value &lv) const {
192         const std::vector<std::string> *retval = nullptr;
193 
194         const auto iter = this->elf_value_defs.find(lv.lv_meta.lvm_name);
195         if (iter != this->elf_value_defs.end()) {
196             retval = &iter->second->vd_action_list;
197         }
198 
199         return retval;
200     };
201 
get_source_path() const202     std::set<std::string> get_source_path() const {
203         return this->elf_source_path;
204     };
205 
206     enum json_log_field {
207         JLF_CONSTANT,
208         JLF_VARIABLE
209     };
210 
211     struct json_format_element {
212         enum class align_t {
213             LEFT,
214             RIGHT,
215         };
216 
217         enum class overflow_t {
218             ABBREV,
219             TRUNCATE,
220             DOTDOT,
221         };
222 
223         enum class transform_t {
224             NONE,
225             UPPERCASE,
226             LOWERCASE,
227             CAPITALIZE,
228         };
229 
json_format_elementexternal_log_format::json_format_element230         json_format_element()
231             : jfe_type(JLF_CONSTANT), jfe_default_value("-"), jfe_min_width(0),
232               jfe_max_width(LLONG_MAX), jfe_align(align_t::LEFT),
233               jfe_overflow(overflow_t::ABBREV),
234               jfe_text_transform(transform_t::NONE)
235         { };
236 
237         json_log_field jfe_type;
238         intern_string_t jfe_value;
239         std::string jfe_default_value;
240         long long jfe_min_width;
241         long long jfe_max_width;
242         align_t jfe_align;
243         overflow_t jfe_overflow;
244         transform_t jfe_text_transform;
245         std::string jfe_ts_format;
246     };
247 
248     struct json_field_cmp {
json_field_cmpexternal_log_format::json_field_cmp249         json_field_cmp(json_log_field type,
250                        const intern_string_t name)
251             : jfc_type(type), jfc_field_name(name) {
252         };
253 
operator ()external_log_format::json_field_cmp254         bool operator()(const json_format_element &jfe) const {
255             return (this->jfc_type == jfe.jfe_type &&
256                     this->jfc_field_name == jfe.jfe_value);
257         };
258 
259         json_log_field jfc_type;
260         const intern_string_t jfc_field_name;
261     };
262 
263     struct highlighter_def {
highlighter_defexternal_log_format::highlighter_def264         highlighter_def() : hd_underline(false), hd_blink(false) {
265         }
266 
267         std::string hd_pattern;
268         std::string hd_color;
269         std::string hd_background_color;
270         bool hd_underline;
271         bool hd_blink;
272     };
273 
value_line_count(const intern_string_t ist,bool top_level,const unsigned char * str=nullptr,ssize_t len=-1) const274     long value_line_count(const intern_string_t ist,
275                           bool top_level,
276                           const unsigned char *str = nullptr,
277                           ssize_t len = -1) const {
278         const auto iter = this->elf_value_defs.find(ist);
279         long line_count = (str != NULL) ? std::count(&str[0], &str[len], '\n') + 1 : 1;
280 
281         if (iter == this->elf_value_defs.end()) {
282             return (this->jlf_hide_extra || !top_level) ? 0 : line_count;
283         }
284 
285         if (iter->second->vd_meta.lvm_hidden) {
286             return 0;
287         }
288 
289         if (std::find_if(this->jlf_line_format.begin(),
290                          this->jlf_line_format.end(),
291                          json_field_cmp(JLF_VARIABLE, ist)) !=
292             this->jlf_line_format.end()) {
293             return line_count - 1;
294         }
295 
296         return line_count;
297     };
298 
has_value_def(const intern_string_t ist) const299     bool has_value_def(const intern_string_t ist) const {
300         const auto iter = this->elf_value_defs.find(ist);
301 
302         return iter != this->elf_value_defs.end();
303     };
304 
get_pattern_name(uint64_t line_number) const305     std::string get_pattern_name(uint64_t line_number) const {
306         if (this->elf_type != ELF_TYPE_TEXT) {
307             return "structured";
308         }
309         int pat_index = this->pattern_index_for_line(line_number);
310         return this->elf_pattern_order[pat_index]->p_config_path;
311     }
312 
get_pattern_regex(uint64_t line_number) const313     std::string get_pattern_regex(uint64_t line_number) const {
314         if (this->elf_type != ELF_TYPE_TEXT) {
315             return "";
316         }
317         int pat_index = this->pattern_index_for_line(line_number);
318         return this->elf_pattern_order[pat_index]->p_string;
319     }
320 
convert_level(const pcre_input & pi,pcre_context::capture_t * level_cap) const321     log_level_t convert_level(const pcre_input &pi, pcre_context::capture_t *level_cap) const {
322         log_level_t retval = LEVEL_INFO;
323 
324         if (level_cap != nullptr && level_cap->is_valid()) {
325             pcre_context_static<128> pc_level;
326             pcre_input pi_level(pi.get_substr_start(level_cap),
327                                 0,
328                                 level_cap->length());
329 
330             if (this->elf_level_patterns.empty()) {
331                 retval = string2level(pi_level.get_string(), level_cap->length());
332             } else {
333                 for (const auto &elf_level_pattern : this->elf_level_patterns) {
334                     if (elf_level_pattern.second.lp_pcre->match(pc_level, pi_level)) {
335                         retval = elf_level_pattern.first;
336                         break;
337                     }
338                 }
339             }
340         }
341 
342         return retval;
343     }
344 
345     typedef std::map<intern_string_t, module_format> mod_map_t;
346     static mod_map_t MODULE_FORMATS;
347     static std::vector<std::shared_ptr<external_log_format>> GRAPH_ORDERED_FORMATS;
348 
349     std::set<std::string> elf_source_path;
350     std::list<intern_string_t> elf_collision;
351     std::string elf_file_pattern;
352     std::shared_ptr<pcrepp> elf_filename_pcre;
353     std::map<std::string, std::shared_ptr<pattern>> elf_patterns;
354     std::vector<std::shared_ptr<pattern>> elf_pattern_order;
355     std::vector<sample> elf_samples;
356     std::unordered_map<const intern_string_t, std::shared_ptr<value_def>>
357         elf_value_defs;
358     std::vector<std::shared_ptr<value_def>> elf_value_def_order;
359     std::vector<std::shared_ptr<value_def>> elf_numeric_value_defs;
360     int elf_column_count;
361     double elf_timestamp_divisor;
362     intern_string_t elf_level_field;
363     intern_string_t elf_body_field;
364     intern_string_t elf_module_id_field;
365     intern_string_t elf_opid_field;
366     std::map<log_level_t, level_pattern> elf_level_patterns;
367     std::vector<std::pair<int64_t, log_level_t> > elf_level_pairs;
368     bool elf_multiline;
369     bool elf_container;
370     bool elf_has_module_format;
371     bool elf_builtin_format;
372     std::vector<std::pair<intern_string_t, std::string> > elf_search_tables;
373     std::map<const intern_string_t, highlighter_def> elf_highlighter_patterns;
374 
375     enum elf_type_t {
376         ELF_TYPE_TEXT,
377         ELF_TYPE_JSON,
378         ELF_TYPE_CSV,
379     };
380 
381     elf_type_t elf_type;
382 
json_append_to_cache(const char * value,ssize_t len)383     void json_append_to_cache(const char *value, ssize_t len) {
384         size_t old_size = this->jlf_cached_line.size();
385         if (len == -1) {
386             len = strlen(value);
387         }
388         this->jlf_cached_line.resize(old_size + len);
389         memcpy(&(this->jlf_cached_line[old_size]), value, len);
390     };
391 
json_append_to_cache(ssize_t len)392     void json_append_to_cache(ssize_t len) {
393         size_t old_size = this->jlf_cached_line.size();
394         this->jlf_cached_line.resize(old_size + len);
395         memset(&this->jlf_cached_line[old_size], ' ', len);
396     };
397 
json_append(const json_format_element & jfe,const char * value,ssize_t len)398     void json_append(const json_format_element &jfe, const char *value, ssize_t len) {
399         if (len == -1) {
400             len = strlen(value);
401         }
402         if (jfe.jfe_align == json_format_element::align_t::RIGHT) {
403             if (len < jfe.jfe_min_width) {
404                 this->json_append_to_cache(jfe.jfe_min_width - len);
405             }
406         }
407         this->json_append_to_cache(value, len);
408         if (jfe.jfe_align == json_format_element::align_t::LEFT) {
409             if (len < jfe.jfe_min_width) {
410                 this->json_append_to_cache(jfe.jfe_min_width - len);
411             }
412         }
413     };
414 
get_value_meta(intern_string_t field_name,value_kind_t kind)415     logline_value_meta get_value_meta(intern_string_t field_name,
416                                       value_kind_t kind) {
417         auto iter = this->elf_value_defs.find(field_name);
418 
419         if (iter == this->elf_value_defs.end()) {
420             auto retval = logline_value_meta(field_name, kind, -1, this);
421 
422             retval.lvm_hidden = this->jlf_hide_extra;
423             return retval;
424         }
425 
426         auto lvm = iter->second->vd_meta;
427 
428         lvm.lvm_kind = kind;
429         return lvm;
430     }
431 
432     bool jlf_hide_extra;
433     std::vector<json_format_element> jlf_line_format;
434     int jlf_line_format_init_count{0};
435     std::vector<logline_value> jlf_line_values;
436 
437     off_t jlf_cached_offset;
438     bool jlf_cached_full{false};
439     std::vector<off_t> jlf_line_offsets;
440     shared_buffer jlf_share_manager;
441     std::vector<char> jlf_cached_line;
442     string_attrs_t jlf_line_attrs;
443     std::shared_ptr<yajlpp_parse_context> jlf_parse_context;
444     std::shared_ptr<yajl_handle_t> jlf_yajl_handle;
445 private:
446     const intern_string_t elf_name;
447 
448     static uint8_t module_scan(const pcre_input &pi,
449                                pcre_context::capture_t *body_cap,
450                                const intern_string_t &mod_name);
451 };
452 
453 class module_format {
454 
455 public:
456     std::shared_ptr<log_format> mf_mod_format;
457 };
458 
459 #endif
460