1 /** 2 * Copyright (c) 2007-2012, Timothy Stack 3 * 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * * Redistributions of source code must retain the above copyright notice, this 10 * list of conditions and the following disclaimer. 11 * * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 * * Neither the name of Timothy Stack nor the names of its contributors 15 * may be used to endorse or promote products derived from this software 16 * without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY 19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY 22 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 * 29 * @file log_format.hh 30 */ 31 32 #ifndef log_format_hh 33 #define log_format_hh 34 35 #include <time.h> 36 #include <sys/time.h> 37 #include <stdint.h> 38 #define __STDC_FORMAT_MACROS 39 #include <inttypes.h> 40 #include <sys/types.h> 41 42 #include <memory> 43 #include <set> 44 #include <list> 45 #include <string> 46 #include <utility> 47 #include <vector> 48 #include <limits> 49 #include <memory> 50 #include <sstream> 51 52 #include "optional.hpp" 53 #include "pcrepp/pcrepp.hh" 54 #include "base/lnav_log.hh" 55 #include "base/date_time_scanner.hh" 56 #include "byte_array.hh" 57 #include "base/intern_string.hh" 58 #include "shared_buffer.hh" 59 #include "highlighter.hh" 60 #include "log_level.hh" 61 #include "line_buffer.hh" 62 #include "log_format_fwd.hh" 63 64 struct sqlite3; 65 class logfile; 66 class log_vtab_manager; 67 struct exec_context; 68 69 enum class scale_op_t { 70 SO_IDENTITY, 71 SO_MULTIPLY, 72 SO_DIVIDE 73 }; 74 75 struct scaling_factor { scaling_factorscaling_factor76 scaling_factor() : sf_op(scale_op_t::SO_IDENTITY), sf_value(1) { }; 77 78 template<typename T> scalescaling_factor79 void scale(T &val) const { 80 switch (this->sf_op) { 81 case scale_op_t::SO_IDENTITY: 82 break; 83 case scale_op_t::SO_DIVIDE: 84 val = val / (T)this->sf_value; 85 break; 86 case scale_op_t::SO_MULTIPLY: 87 val = val * (T)this->sf_value; 88 break; 89 } 90 } 91 92 scale_op_t sf_op; 93 double sf_value; 94 }; 95 96 enum class value_kind_t : int { 97 VALUE_UNKNOWN = -1, 98 VALUE_NULL, 99 VALUE_TEXT, 100 VALUE_INTEGER, 101 VALUE_FLOAT, 102 VALUE_BOOLEAN, 103 VALUE_JSON, 104 VALUE_STRUCT, 105 VALUE_QUOTED, 106 VALUE_W3C_QUOTED, 107 VALUE_TIMESTAMP, 108 VALUE_XML, 109 110 VALUE__MAX 111 }; 112 113 struct logline_value_meta { logline_value_metalogline_value_meta114 logline_value_meta( 115 intern_string_t name, 116 value_kind_t kind, 117 int col = -1, 118 const nonstd::optional<log_format *>& format = nonstd::nullopt) 119 : lvm_name(name), lvm_kind(kind), lvm_column(col), lvm_format(format) 120 {}; 121 is_hiddenlogline_value_meta122 bool is_hidden() const { 123 return this->lvm_hidden || this->lvm_user_hidden; 124 } 125 with_struct_namelogline_value_meta126 logline_value_meta& with_struct_name(intern_string_t name) { 127 this->lvm_struct_name = name; 128 return *this; 129 } 130 131 intern_string_t lvm_name; 132 value_kind_t lvm_kind; 133 int lvm_column{-1}; 134 bool lvm_identifier{false}; 135 bool lvm_hidden{false}; 136 bool lvm_user_hidden{false}; 137 bool lvm_from_module{false}; 138 intern_string_t lvm_struct_name; 139 nonstd::optional<log_format *> lvm_format; 140 }; 141 142 class logline_value { 143 public: 144 logline_value(logline_value_meta lvm)145 logline_value(logline_value_meta lvm) 146 : lv_meta(std::move(lvm)) { 147 this->lv_meta.lvm_kind = value_kind_t::VALUE_NULL; 148 }; logline_value(logline_value_meta lvm,bool b)149 logline_value(logline_value_meta lvm, bool b) 150 : lv_meta(std::move(lvm)), 151 lv_value((int64_t)(b ? 1 : 0)) { 152 this->lv_meta.lvm_kind = value_kind_t::VALUE_BOOLEAN; 153 } logline_value(logline_value_meta lvm,int64_t i)154 logline_value(logline_value_meta lvm, int64_t i) 155 : lv_meta(std::move(lvm)), lv_value(i) { 156 this->lv_meta.lvm_kind = value_kind_t::VALUE_INTEGER; 157 }; logline_value(logline_value_meta lvm,double i)158 logline_value(logline_value_meta lvm, double i) 159 : lv_meta(std::move(lvm)), lv_value(i) { 160 this->lv_meta.lvm_kind = value_kind_t::VALUE_FLOAT; 161 }; logline_value(logline_value_meta lvm,shared_buffer_ref & sbr)162 logline_value(logline_value_meta lvm, shared_buffer_ref &sbr) 163 : lv_meta(std::move(lvm)), lv_sbr(sbr) { 164 }; logline_value(logline_value_meta lvm,const intern_string_t val)165 logline_value(logline_value_meta lvm, const intern_string_t val) 166 : lv_meta(std::move(lvm)), lv_intern_string(val) { 167 168 }; 169 logline_value(logline_value_meta lvm, shared_buffer_ref &sbr, 170 struct line_range origin); 171 apply_scaling(const scaling_factor * sf)172 void apply_scaling(const scaling_factor *sf) { 173 if (sf != nullptr) { 174 switch (this->lv_meta.lvm_kind) { 175 case value_kind_t::VALUE_INTEGER: 176 sf->scale(this->lv_value.i); 177 break; 178 case value_kind_t::VALUE_FLOAT: 179 sf->scale(this->lv_value.d); 180 break; 181 default: 182 break; 183 } 184 } 185 } 186 187 std::string to_string() const; 188 text_value() const189 const char *text_value() const { 190 if (this->lv_sbr.empty()) { 191 if (this->lv_intern_string.empty()) { 192 return ""; 193 } 194 return this->lv_intern_string.get(); 195 } 196 return this->lv_sbr.get_data(); 197 }; 198 text_length() const199 size_t text_length() const { 200 if (this->lv_sbr.empty()) { 201 return this->lv_intern_string.size(); 202 } 203 return this->lv_sbr.length(); 204 } 205 206 struct line_range origin_in_full_msg(const char *msg, ssize_t len) const; 207 208 logline_value_meta lv_meta; 209 union value_u { 210 int64_t i; 211 double d; 212 value_u()213 value_u() : i(0) { }; value_u(int64_t i)214 value_u(int64_t i) : i(i) { }; value_u(double d)215 value_u(double d) : d(d) { }; 216 } lv_value; 217 shared_buffer_ref lv_sbr; 218 int lv_sub_offset{0}; 219 intern_string_t lv_intern_string; 220 struct line_range lv_origin; 221 }; 222 223 struct logline_value_stats { 224 logline_value_statslogline_value_stats225 logline_value_stats() { 226 this->clear(); 227 }; 228 clearlogline_value_stats229 void clear() { 230 this->lvs_count = 0; 231 this->lvs_total = 0; 232 this->lvs_min_value = std::numeric_limits<double>::max(); 233 this->lvs_max_value = -std::numeric_limits<double>::max(); 234 }; 235 mergelogline_value_stats236 void merge(const logline_value_stats &other) { 237 if (other.lvs_count == 0) { 238 return; 239 } 240 241 require(other.lvs_min_value <= other.lvs_max_value); 242 243 if (other.lvs_min_value < this->lvs_min_value) { 244 this->lvs_min_value = other.lvs_min_value; 245 } 246 if (other.lvs_max_value > this->lvs_max_value) { 247 this->lvs_max_value = other.lvs_max_value; 248 } 249 this->lvs_count += other.lvs_count; 250 this->lvs_total += other.lvs_total; 251 252 ensure(this->lvs_count >= 0); 253 ensure(this->lvs_min_value <= this->lvs_max_value); 254 }; 255 add_valuelogline_value_stats256 void add_value(double value) { 257 if (value < this->lvs_min_value) { 258 this->lvs_min_value = value; 259 } 260 if (value > this->lvs_max_value) { 261 this->lvs_max_value = value; 262 } 263 this->lvs_count += 1; 264 this->lvs_total += value; 265 }; 266 267 int64_t lvs_count; 268 double lvs_total; 269 double lvs_min_value; 270 double lvs_max_value; 271 }; 272 273 struct logline_value_cmp { logline_value_cmplogline_value_cmp274 explicit logline_value_cmp(const intern_string_t *name = nullptr, int col = -1) 275 : lvc_name(name), lvc_column(col) { 276 277 }; 278 operator ()logline_value_cmp279 bool operator()(const logline_value &lv) const { 280 bool retval = true; 281 282 if (this->lvc_name != nullptr) { 283 retval = retval && ((*this->lvc_name) == lv.lv_meta.lvm_name); 284 } 285 if (this->lvc_column != -1) { 286 retval = retval && (this->lvc_column == lv.lv_meta.lvm_column); 287 } 288 289 return retval; 290 }; 291 292 const intern_string_t *lvc_name; 293 int lvc_column; 294 }; 295 296 class log_vtab_impl; 297 298 /** 299 * Base class for implementations of log format parsers. 300 */ 301 class log_format { 302 public: 303 304 /** 305 * @return The collection of builtin log formats. 306 */ 307 static std::vector<std::shared_ptr<log_format>> &get_root_formats(); 308 find_root_format(const char * name)309 static std::shared_ptr<log_format> find_root_format(const char *name) { 310 auto& fmts = get_root_formats(); 311 for (auto& lf : fmts) { 312 if (lf->get_name() == name) { 313 return lf; 314 } 315 } 316 return nullptr; 317 } 318 319 struct action_def { 320 std::string ad_name; 321 std::string ad_label; 322 std::vector<std::string> ad_cmdline; 323 bool ad_capture_output; 324 action_deflog_format::action_def325 action_def() : ad_capture_output(false) { }; 326 operator <log_format::action_def327 bool operator<(const action_def &rhs) const { 328 return this->ad_name < rhs.ad_name; 329 }; 330 }; 331 332 virtual ~log_format() = default; 333 clear()334 virtual void clear() 335 { 336 this->lf_pattern_locks.clear(); 337 this->lf_date_time.clear(); 338 }; 339 340 /** 341 * Get the name of this log format. 342 * 343 * @return The log format name. 344 */ 345 virtual const intern_string_t get_name() const = 0; 346 match_name(const std::string & filename)347 virtual bool match_name(const std::string &filename) { return true; }; 348 349 enum scan_result_t { 350 SCAN_MATCH, 351 SCAN_NO_MATCH, 352 SCAN_INCOMPLETE, 353 }; 354 355 /** 356 * Scan a log line to see if it matches this log format. 357 * 358 * @param dst The vector of loglines that the formatter should append to 359 * if it detected a match. 360 * @param offset The offset in the file where this line is located. 361 * @param prefix The contents of the line. 362 * @param len The length of the prefix string. 363 */ 364 virtual scan_result_t scan(logfile &lf, 365 std::vector<logline> &dst, 366 const line_info &li, 367 shared_buffer_ref &sbr) = 0; 368 scan_for_partial(shared_buffer_ref & sbr,size_t & len_out) const369 virtual bool scan_for_partial(shared_buffer_ref &sbr, size_t &len_out) const { 370 return false; 371 }; 372 373 /** 374 * Remove redundant data from the log line string. 375 * 376 * XXX We should probably also add some attributes to the line here, so we 377 * can highlight things like the date. 378 * 379 * @param line The log line to edit. 380 */ scrub(std::string & line)381 virtual void scrub(std::string &line) { }; 382 383 virtual void annotate(uint64_t line_number,shared_buffer_ref & sbr,string_attrs_t & sa,std::vector<logline_value> & values,bool annotate_module=true) const384 annotate(uint64_t line_number, shared_buffer_ref &sbr, string_attrs_t &sa, 385 std::vector<logline_value> &values, bool annotate_module = true) const 386 { }; 387 rewrite(exec_context & ec,shared_buffer_ref & line,string_attrs_t & sa,std::string & value_out)388 virtual void rewrite(exec_context &ec, 389 shared_buffer_ref &line, 390 string_attrs_t &sa, 391 std::string &value_out) { 392 value_out.assign(line.get_data(), line.length()); 393 }; 394 stats_for_value(const intern_string_t & name) const395 virtual const logline_value_stats *stats_for_value(const intern_string_t &name) const { 396 return nullptr; 397 }; 398 399 virtual std::shared_ptr<log_format> specialized(int fmt_lock = -1) = 0; 400 get_vtab_impl() const401 virtual std::shared_ptr<log_vtab_impl> get_vtab_impl() const { 402 return nullptr; 403 }; 404 get_subline(const logline & ll,shared_buffer_ref & sbr,bool full_message=false)405 virtual void get_subline(const logline &ll, shared_buffer_ref &sbr, bool full_message = false) { 406 }; 407 get_actions(const logline_value & lv) const408 virtual const std::vector<std::string> *get_actions(const logline_value &lv) const { 409 return nullptr; 410 }; 411 get_source_path() const412 virtual std::set<std::string> get_source_path() const { 413 std::set<std::string> retval; 414 415 retval.insert("default"); 416 417 return retval; 418 }; 419 hide_field(const intern_string_t field_name,bool val)420 virtual bool hide_field(const intern_string_t field_name, bool val) { 421 return false; 422 }; 423 get_timestamp_formats() const424 const char * const *get_timestamp_formats() const { 425 if (this->lf_timestamp_format.empty()) { 426 return nullptr; 427 } 428 429 return &this->lf_timestamp_format[0]; 430 }; 431 432 void check_for_new_year(std::vector<logline> &dst, exttm log_tv, 433 timeval timeval1); 434 435 virtual std::string get_pattern_name(uint64_t line_number) const; 436 get_pattern_regex(uint64_t line_number) const437 virtual std::string get_pattern_regex(uint64_t line_number) const { 438 return ""; 439 }; 440 441 struct pattern_for_lines { 442 pattern_for_lines(uint32_t pfl_line, uint32_t pfl_pat_index); 443 444 uint32_t pfl_line; 445 int pfl_pat_index; 446 }; 447 last_pattern_index() const448 int last_pattern_index() const { 449 if (this->lf_pattern_locks.empty()) { 450 return -1; 451 } 452 453 return this->lf_pattern_locks.back().pfl_pat_index; 454 } 455 456 int pattern_index_for_line(uint64_t line_number) const; 457 458 uint8_t lf_mod_index{0}; 459 date_time_scanner lf_date_time; 460 std::vector<pattern_for_lines> lf_pattern_locks; 461 intern_string_t lf_timestamp_field{intern_string::lookup("timestamp", -1)}; 462 std::vector<const char *> lf_timestamp_format; 463 unsigned int lf_timestamp_flags{0}; 464 std::map<std::string, action_def> lf_action_defs; 465 std::vector<logline_value_stats> lf_value_stats; 466 std::vector<highlighter> lf_highlighters; 467 bool lf_is_self_describing{false}; 468 bool lf_time_ordered{true}; 469 bool lf_specialized{false}; 470 protected: 471 static std::vector<std::shared_ptr<log_format>> lf_root_formats; 472 473 struct pcre_format { pcre_formatlog_format::pcre_format474 pcre_format(const char *regex) : name(regex), pcre(regex) { 475 this->pf_timestamp_index = this->pcre.name_index("timestamp"); 476 }; 477 pcre_formatlog_format::pcre_format478 pcre_format() : name(nullptr), pcre("") { }; 479 480 const char *name; 481 pcrepp pcre; 482 int pf_timestamp_index{-1}; 483 }; 484 485 static bool next_format(pcre_format *fmt, int &index, int &locked_index); 486 487 const char *log_scanf(uint32_t line_number, 488 const char *line, 489 size_t len, 490 pcre_format *fmt, 491 const char *time_fmt[], 492 struct exttm *tm_out, 493 struct timeval *tv_out, 494 ...); 495 }; 496 497 #endif 498