1 /**
2 * Copyright (c) 2007-2017, Timothy Stack
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * * Redistributions of source code must retain the above copyright notice, this
10 * list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 * * Neither the name of Timothy Stack nor the names of its contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
22 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * @file log_format_impls.cc
30 */
31
32 #include "config.h"
33
34 #include <stdio.h>
35
36 #include <utility>
37 #include <algorithm>
38
39 #include "pcrepp/pcrepp.hh"
40 #include "sql_util.hh"
41 #include "log_format.hh"
42 #include "log_vtab_impl.hh"
43 #include "base/opt_util.hh"
44 #include "base/injector.bind.hh"
45 #include "yajlpp/yajlpp.hh"
46 #include "formats/logfmt/logfmt.parser.hh"
47
48 using namespace std;
49
50 static pcrepp RDNS_PATTERN("^(?:com|net|org|edu|[a-z][a-z])"
51 "(\\.\\w+)+(.+)");
52
53 /**
54 * Attempt to scrub a reverse-DNS string.
55 *
56 * @param str The string to scrub. If the string looks like a reverse-DNS
57 * string, the leading components of the name will be reduced to a single
58 * letter. For example, "com.example.foo" will be reduced to "c.e.foo".
59 * @return The scrubbed version of the input string or the original string
60 * if it is not a reverse-DNS string.
61 */
scrub_rdns(const string & str)62 static string scrub_rdns(const string &str)
63 {
64 pcre_context_static<30> context;
65 pcre_input input(str);
66 string retval;
67
68 if (RDNS_PATTERN.match(context, input)) {
69 pcre_context::capture_t *cap;
70
71 cap = context.begin();
72 for (int index = 0; index < cap->c_begin; index++) {
73 if (index == 0 || str[index - 1] == '.') {
74 if (index > 0) {
75 retval.append(1, '.');
76 }
77 retval.append(1, str[index]);
78 }
79 }
80 retval += input.get_substr(cap);
81 retval += input.get_substr(cap + 1);
82 }
83 else {
84 retval = str;
85 }
86 return retval;
87 }
88
89 class generic_log_format : public log_format {
scrub_pattern()90 static pcrepp &scrub_pattern()
91 {
92 static pcrepp SCRUB_PATTERN(
93 "\\d+-(\\d+-\\d+ \\d+:\\d+:\\d+(?:,\\d+)?:)\\w+:(.*)");
94
95 return SCRUB_PATTERN;
96 }
97
get_pcre_log_formats()98 static pcre_format *get_pcre_log_formats() {
99 static pcre_format log_fmt[] = {
100 pcre_format("^(?:\\*\\*\\*\\s+)?(?<timestamp>@[0-9a-zA-Z]{16,24})(.*)"),
101 pcre_format("^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\dTZ: +/\\-,\\.-]+)([^:]+)"),
102 pcre_format("^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w:+/\\.-]+) \\[\\w (.*)"),
103 pcre_format("^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w:,/\\.-]+) (.*)"),
104 pcre_format("^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w:,/\\.-]+) - (.*)"),
105 pcre_format("^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w: \\.,/-]+) - (.*)"),
106 pcre_format("^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w: \\.,/-]+)\\[[^\\]]+\\](.*)"),
107 pcre_format("^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w: \\.,/-]+) (.*)"),
108
109 pcre_format(R"(^(?:\*\*\*\s+)?\[(?<timestamp>[\w: \.,+/-]+)\]\s*(\w+):?)"),
110 pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: \\.,+/-]+)\\] (.*)"),
111 pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: \\.,+/-]+)\\] \\[(\\w+)\\]"),
112 pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: \\.,+/-]+)\\] \\w+ (.*)"),
113 pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: ,+/-]+)\\] \\(\\d+\\) (.*)"),
114
115 pcre_format()
116 };
117
118 return log_fmt;
119 };
120
get_pattern_regex(uint64_t line_number) const121 std::string get_pattern_regex(uint64_t line_number) const {
122 int pat_index = this->pattern_index_for_line(line_number);
123 return get_pcre_log_formats()[pat_index].name;
124 }
125
get_name() const126 const intern_string_t get_name() const {
127 return intern_string::lookup("generic_log");
128 };
129
scrub(string & line)130 void scrub(string &line)
131 {
132 pcre_context_static<30> context;
133 pcre_input pi(line);
134 string new_line;
135
136 if (scrub_pattern().match(context, pi)) {
137 pcre_context::capture_t *cap;
138
139 for (cap = context.begin(); cap != context.end(); cap++) {
140 new_line += scrub_rdns(pi.get_substr(cap));
141 }
142
143 line = new_line;
144 }
145 };
146
scan(logfile & lf,vector<logline> & dst,const line_info & li,shared_buffer_ref & sbr)147 scan_result_t scan(logfile &lf,
148 vector<logline> &dst,
149 const line_info &li,
150 shared_buffer_ref &sbr)
151 {
152 struct exttm log_time;
153 struct timeval log_tv;
154 pcre_context::capture_t ts, level;
155 const char *last_pos;
156
157 if ((last_pos = this->log_scanf(
158 dst.size(),
159 sbr.get_data(),
160 sbr.length(),
161 get_pcre_log_formats(),
162 nullptr,
163 &log_time,
164 &log_tv,
165
166 &ts,
167 &level)) != nullptr) {
168 const char *level_str = &sbr.get_data()[level.c_begin];
169 log_level_t level_val = string2level(level_str, level.length());
170
171 if (!((log_time.et_flags & ETF_DAY_SET) &&
172 (log_time.et_flags & ETF_MONTH_SET) &&
173 (log_time.et_flags & ETF_YEAR_SET))) {
174 this->check_for_new_year(dst, log_time, log_tv);
175 }
176
177 dst.emplace_back(li.li_file_range.fr_offset, log_tv, level_val);
178 return SCAN_MATCH;
179 }
180
181 return SCAN_NO_MATCH;
182 };
183
annotate(uint64_t line_number,shared_buffer_ref & line,string_attrs_t & sa,std::vector<logline_value> & values,bool annotate_module) const184 void annotate(uint64_t line_number, shared_buffer_ref &line, string_attrs_t &sa,
185 std::vector<logline_value> &values, bool annotate_module) const
186 {
187 int pat_index = this->pattern_index_for_line(line_number);
188 pcre_format &fmt = get_pcre_log_formats()[pat_index];
189 struct line_range lr;
190 int prefix_len = 0;
191 pcre_input pi(line.get_data(), 0, line.length());
192 pcre_context_static<30> pc;
193
194 if (!fmt.pcre.match(pc, pi)) {
195 return;
196 }
197
198 lr.lr_start = pc[0]->c_begin;
199 lr.lr_end = pc[0]->c_end;
200 sa.emplace_back(lr, &logline::L_TIMESTAMP);
201
202 const char *level = &line.get_data()[pc[1]->c_begin];
203
204 if (string2level(level, pc[1]->length(), true) == LEVEL_UNKNOWN) {
205 prefix_len = pc[0]->c_end;
206 }
207 else {
208 prefix_len = pc[1]->c_end;
209 }
210
211 lr.lr_start = 0;
212 lr.lr_end = prefix_len;
213 sa.emplace_back(lr, &logline::L_PREFIX);
214
215 lr.lr_start = prefix_len;
216 lr.lr_end = line.length();
217 sa.emplace_back(lr, &SA_BODY);
218 };
219
specialized(int fmt_lock)220 shared_ptr<log_format> specialized(int fmt_lock)
221 {
222 return std::make_shared<generic_log_format>(*this);
223 };
224 };
225
from_escaped_string(const char * str,size_t len)226 string from_escaped_string(const char *str, size_t len)
227 {
228 string retval;
229
230 for (size_t lpc = 0; lpc < len; lpc++) {
231 switch (str[lpc]) {
232 case '\\':
233 if ((lpc + 3) < len && str[lpc + 1] == 'x') {
234 int ch;
235
236 if (sscanf(&str[lpc + 2], "%2x", &ch) == 1) {
237 retval.append(1, (char) ch & 0xff);
238 lpc += 3;
239 }
240 }
241 break;
242 default:
243 retval.append(1, str[lpc]);
244 break;
245 }
246 }
247
248 return retval;
249 }
250
251 nonstd::optional<const char *>
lnav_strnstr(const char * s,const char * find,size_t slen)252 lnav_strnstr(const char *s, const char *find, size_t slen)
253 {
254 char c, sc;
255 size_t len;
256
257 if ((c = *find++) != '\0') {
258 len = strlen(find);
259 do {
260 do {
261 if (slen < 1 || (sc = *s) == '\0') {
262 return nonstd::nullopt;
263 }
264 --slen;
265 ++s;
266 } while (sc != c);
267 if (len > slen) {
268 return nonstd::nullopt;
269 }
270 } while (strncmp(s, find, len) != 0);
271 s--;
272 }
273 return s;
274 }
275
276 struct separated_string {
277 const char *ss_str;
278 size_t ss_len;
279 const char *ss_separator;
280 size_t ss_separator_len;
281
separated_stringseparated_string282 separated_string(const char *str, size_t len)
283 : ss_str(str), ss_len(len), ss_separator(",") {
284 this->ss_separator_len = strlen(this->ss_separator);
285 };
286
with_separatorseparated_string287 separated_string &with_separator(const char *sep) {
288 this->ss_separator = sep;
289 this->ss_separator_len = strlen(sep);
290 return *this;
291 };
292
293 struct iterator {
294 const separated_string &i_parent;
295 const char *i_pos;
296 const char *i_next_pos;
297 size_t i_index;
298
iteratorseparated_string::iterator299 iterator(const separated_string &ss, const char *pos)
300 : i_parent(ss), i_pos(pos), i_next_pos(pos), i_index(0) {
301 this->update();
302 };
303
updateseparated_string::iterator304 void update() {
305 const separated_string &ss = this->i_parent;
306 auto next_field = lnav_strnstr(
307 this->i_pos,
308 ss.ss_separator,
309 ss.ss_len - (this->i_pos - ss.ss_str));
310 if (next_field) {
311 this->i_next_pos = next_field.value() + ss.ss_separator_len;
312 } else {
313 this->i_next_pos = ss.ss_str + ss.ss_len;
314 }
315 };
316
operator ++separated_string::iterator317 iterator &operator++() {
318 this->i_pos = this->i_next_pos;
319 this->update();
320 this->i_index += 1;
321
322 return *this;
323 };
324
operator *separated_string::iterator325 string_fragment operator*() {
326 const separated_string &ss = this->i_parent;
327 int end;
328
329 if (this->i_next_pos < (ss.ss_str + ss.ss_len)) {
330 end = this->i_next_pos - ss.ss_str - ss.ss_separator_len;
331 } else {
332 end = this->i_next_pos - ss.ss_str;
333 }
334 return string_fragment(ss.ss_str, this->i_pos - ss.ss_str, end);
335 };
336
operator ==separated_string::iterator337 bool operator==(const iterator &other) const {
338 return (&this->i_parent == &other.i_parent) &&
339 (this->i_pos == other.i_pos);
340 };
341
operator !=separated_string::iterator342 bool operator!=(const iterator &other) const {
343 return !(*this == other);
344 };
345
indexseparated_string::iterator346 size_t index() const {
347 return this->i_index;
348 };
349 };
350
beginseparated_string351 iterator begin() {
352 return {*this, this->ss_str};
353 };
354
endseparated_string355 iterator end() {
356 return {*this, this->ss_str + this->ss_len};
357 };
358 };
359
360 class bro_log_format : public log_format {
361 public:
362
363 struct field_def {
364 logline_value_meta fd_meta;
365 std::string fd_collator;
366 int fd_numeric_index;
367
field_defbro_log_format::field_def368 explicit field_def(const intern_string_t name, int col, log_format *format)
369 : fd_meta(name, value_kind_t::VALUE_TEXT, col, format),
370 fd_numeric_index(-1) {
371 };
372
with_kindbro_log_format::field_def373 field_def &with_kind(value_kind_t kind,
374 bool identifier = false,
375 const std::string &collator = "") {
376 this->fd_meta.lvm_kind = kind;
377 this->fd_meta.lvm_identifier = identifier;
378 this->fd_collator = collator;
379 return *this;
380 };
381
with_numeric_indexbro_log_format::field_def382 field_def &with_numeric_index(int index) {
383 this->fd_numeric_index = index;
384 return *this;
385 }
386 };
387
bro_log_format()388 bro_log_format() {
389 this->lf_is_self_describing = true;
390 this->lf_time_ordered = false;
391 };
392
get_name() const393 const intern_string_t get_name() const {
394 static const intern_string_t name(intern_string::lookup("bro"));
395
396 return this->blf_format_name.empty() ? name : this->blf_format_name;
397 };
398
clear()399 virtual void clear() {
400 this->log_format::clear();
401 this->blf_format_name.clear();
402 this->blf_field_defs.clear();
403 };
404
scan_int(std::vector<logline> & dst,const line_info & li,shared_buffer_ref & sbr)405 scan_result_t scan_int(std::vector<logline> &dst,
406 const line_info &li,
407 shared_buffer_ref &sbr) {
408 static const intern_string_t STATUS_CODE = intern_string::lookup("bro_status_code");
409 static const intern_string_t TS = intern_string::lookup("bro_ts");
410 static const intern_string_t UID = intern_string::lookup("bro_uid");
411
412 separated_string ss(sbr.get_data(), sbr.length());
413 struct timeval tv;
414 struct exttm tm;
415 bool found_ts = false;
416 log_level_t level = LEVEL_INFO;
417 uint8_t opid = 0;
418
419 ss.with_separator(this->blf_separator.get());
420
421 for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
422 if (iter.index() == 0 && *iter == "#close") {
423 return SCAN_MATCH;
424 }
425
426 if (iter.index() >= this->blf_field_defs.size()) {
427 break;
428 }
429
430 const auto &fd = this->blf_field_defs[iter.index()];
431
432 if (TS == fd.fd_meta.lvm_name) {
433 string_fragment sf = *iter;
434
435 if (this->lf_date_time.scan(sf.data(),
436 sf.length(),
437 nullptr,
438 &tm,
439 tv)) {
440 this->lf_timestamp_flags = tm.et_flags;
441 found_ts = true;
442 }
443 } else if (STATUS_CODE == fd.fd_meta.lvm_name) {
444 string_fragment sf = *iter;
445
446 if (!sf.empty() && sf[0] >= '4') {
447 level = LEVEL_ERROR;
448 }
449 } else if (UID == fd.fd_meta.lvm_name) {
450 string_fragment sf = *iter;
451
452 opid = hash_str(sf.data(), sf.length());
453 }
454
455 if (fd.fd_numeric_index >= 0) {
456 switch (fd.fd_meta.lvm_kind) {
457 case value_kind_t::VALUE_INTEGER:
458 case value_kind_t::VALUE_FLOAT: {
459 string_fragment sf = *iter;
460 char field_copy[sf.length() + 1];
461 double val;
462
463 if (sscanf(sf.to_string(field_copy), "%lf", &val) == 1) {
464 this->lf_value_stats[fd.fd_numeric_index].add_value(val);
465 }
466 break;
467 }
468 default:
469 break;
470 }
471 }
472 }
473
474 if (found_ts) {
475 dst.emplace_back(li.li_file_range.fr_offset, tv, level, 0, opid);
476 return SCAN_MATCH;
477 } else {
478 return SCAN_NO_MATCH;
479 }
480 }
481
scan(logfile & lf,std::vector<logline> & dst,const line_info & li,shared_buffer_ref & sbr)482 scan_result_t scan(logfile &lf,
483 std::vector<logline> &dst,
484 const line_info &li,
485 shared_buffer_ref &sbr) {
486 static pcrepp SEP_RE(R"(^#separator\s+(.+))");
487
488 if (!this->blf_format_name.empty()) {
489 return this->scan_int(dst, li, sbr);
490 }
491
492 if (dst.empty() || dst.size() > 20 || sbr.empty() || sbr.get_data()[0] == '#') {
493 return SCAN_NO_MATCH;
494 }
495
496 pcre_context_static<20> pc;
497 auto line_iter = dst.begin();
498 auto read_result = lf.read_line(line_iter);
499
500 if (read_result.isErr()) {
501 return SCAN_NO_MATCH;
502 }
503
504 auto line = read_result.unwrap();
505 pcre_input pi(line.get_data(), 0, line.length());
506
507 if (!SEP_RE.match(pc, pi)) {
508 return SCAN_NO_MATCH;
509 }
510
511 this->clear();
512
513 string sep = from_escaped_string(pi.get_substr_start(pc[0]), pc[0]->length());
514 this->blf_separator = intern_string::lookup(sep);
515
516 for (++line_iter; line_iter != dst.end(); ++line_iter) {
517 auto next_read_result = lf.read_line(line_iter);
518
519 if (next_read_result.isErr()) {
520 return SCAN_NO_MATCH;
521 }
522
523 line = next_read_result.unwrap();
524 separated_string ss(line.get_data(), line.length());
525
526 ss.with_separator(this->blf_separator.get());
527 auto iter = ss.begin();
528
529 string_fragment directive = *iter;
530
531 if (directive.empty() || directive[0] != '#') {
532 continue;
533 }
534
535 ++iter;
536 if (iter == ss.end()) {
537 continue;
538 }
539
540 if (directive == "#set_separator") {
541 this->blf_set_separator = intern_string::lookup(*iter);
542 } else if (directive == "#empty_field") {
543 this->blf_empty_field = intern_string::lookup(*iter);
544 } else if (directive == "#unset_field") {
545 this->blf_unset_field = intern_string::lookup(*iter);
546 } else if (directive == "#path") {
547 string path = to_string(*iter);
548 char full_name[128];
549 snprintf(full_name, sizeof(full_name), "bro_%s_log", path.c_str());
550 this->blf_format_name = intern_string::lookup(full_name);
551 } else if (directive == "#fields") {
552 do {
553 this->blf_field_defs.emplace_back(
554 intern_string::lookup("bro_" + sql_safe_ident(*iter)),
555 this->blf_field_defs.size(),
556 this);
557 ++iter;
558 } while (iter != ss.end());
559 } else if (directive == "#types") {
560 static const char *KNOWN_IDS[] = {
561 "bro_conn_uids",
562 "bro_fuid",
563 "bro_host",
564 "bro_info_code",
565 "bro_method",
566 "bro_mime_type",
567 "bro_orig_fuids",
568 "bro_parent_fuid",
569 "bro_proto",
570 "bro_referrer",
571 "bro_resp_fuids",
572 "bro_service",
573 "bro_status_code",
574 "bro_uid",
575 "bro_uri",
576 "bro_user_agent",
577 "bro_username",
578 };
579
580 int numeric_count = 0;
581
582 do {
583 string_fragment field_type = *iter;
584 auto &fd = this->blf_field_defs[iter.index() - 1];
585
586 if (field_type == "time") {
587 fd.with_kind(value_kind_t::VALUE_TIMESTAMP);
588 } else if (field_type == "string") {
589 bool ident = binary_search(begin(KNOWN_IDS), end(KNOWN_IDS), fd.fd_meta.lvm_name);
590 fd.with_kind(value_kind_t::VALUE_TEXT, ident);
591 } else if (field_type == "count") {
592 bool ident = binary_search(begin(KNOWN_IDS), end(KNOWN_IDS), fd.fd_meta.lvm_name);
593 fd.with_kind(value_kind_t::VALUE_INTEGER, ident)
594 .with_numeric_index(numeric_count);
595 numeric_count += 1;
596 } else if (field_type == "bool") {
597 fd.with_kind(value_kind_t::VALUE_BOOLEAN);
598 } else if (field_type == "addr") {
599 fd.with_kind(value_kind_t::VALUE_TEXT, true, "ipaddress");
600 } else if (field_type == "port") {
601 fd.with_kind(value_kind_t::VALUE_INTEGER, true);
602 } else if (field_type == "interval") {
603 fd.with_kind(value_kind_t::VALUE_FLOAT)
604 .with_numeric_index(numeric_count);
605 numeric_count += 1;
606 }
607
608 ++iter;
609 } while (iter != ss.end());
610
611 this->lf_value_stats.resize(numeric_count);
612 }
613 }
614
615 if (!this->blf_format_name.empty() &&
616 !this->blf_separator.empty() &&
617 !this->blf_field_defs.empty()) {
618 dst.clear();
619 return this->scan_int(dst, li, sbr);
620 }
621
622 this->blf_format_name.clear();
623 this->lf_value_stats.clear();
624
625 return SCAN_NO_MATCH;
626 };
627
annotate(uint64_t line_number,shared_buffer_ref & sbr,string_attrs_t & sa,std::vector<logline_value> & values,bool annotate_module) const628 void annotate(uint64_t line_number, shared_buffer_ref &sbr, string_attrs_t &sa,
629 std::vector<logline_value> &values, bool annotate_module) const {
630 static const intern_string_t TS = intern_string::lookup("bro_ts");
631 static const intern_string_t UID = intern_string::lookup("bro_uid");
632
633 separated_string ss(sbr.get_data(), sbr.length());
634
635 ss.with_separator(this->blf_separator.get());
636
637 for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
638 if (iter.index() >= this->blf_field_defs.size()) {
639 return;
640 }
641
642 const field_def &fd = this->blf_field_defs[iter.index()];
643 string_fragment sf = *iter;
644
645 if (sf == this->blf_empty_field) {
646 sf.clear();
647 } else if (sf == this->blf_unset_field) {
648 sf.invalidate();
649 }
650
651 auto lr = line_range(sf.sf_begin, sf.sf_end);
652
653 if (fd.fd_meta.lvm_name == TS) {
654 sa.emplace_back(lr, &logline::L_TIMESTAMP);
655 } else if (fd.fd_meta.lvm_name == UID) {
656 sa.emplace_back(lr, &logline::L_OPID);
657 }
658
659 if (lr.is_valid()) {
660 values.emplace_back(fd.fd_meta, sbr, lr);
661 } else {
662 values.emplace_back(fd.fd_meta);
663 }
664 }
665 };
666
stats_for_value(const intern_string_t & name) const667 const logline_value_stats *stats_for_value(const intern_string_t &name) const {
668 const logline_value_stats *retval = nullptr;
669
670 for (size_t lpc = 0; lpc < this->blf_field_defs.size(); lpc++) {
671 if (this->blf_field_defs[lpc].fd_meta.lvm_name == name) {
672 if (this->blf_field_defs[lpc].fd_numeric_index < 0) {
673 break;
674 }
675 retval = &this->lf_value_stats[this->blf_field_defs[lpc].fd_numeric_index];
676 break;
677 }
678 }
679
680 return retval;
681 };
682
specialized(int fmt_lock=-1)683 std::shared_ptr<log_format> specialized(int fmt_lock = -1) {
684 return make_shared<bro_log_format>(*this);
685 };
686
687 class bro_log_table : public log_format_vtab_impl {
688 public:
bro_log_table(const bro_log_format & format)689 bro_log_table(const bro_log_format &format)
690 : log_format_vtab_impl(format), blt_format(format) {
691
692 }
693
get_columns(vector<vtab_column> & cols) const694 void get_columns(vector<vtab_column> &cols) const override {
695 for (const auto &fd : this->blt_format.blf_field_defs) {
696 std::pair<int, unsigned int> type_pair = log_vtab_impl::logline_value_to_sqlite_type(fd.fd_meta.lvm_kind);
697
698 cols.emplace_back(fd.fd_meta.lvm_name.to_string(), type_pair.first, fd.fd_collator, false, "", type_pair.second);
699 }
700 };
701
get_foreign_keys(std::vector<std::string> & keys_inout) const702 void get_foreign_keys(std::vector<std::string> &keys_inout) const override {
703 this->log_vtab_impl::get_foreign_keys(keys_inout);
704
705 for (const auto &fd : this->blt_format.blf_field_defs) {
706 if (fd.fd_meta.lvm_identifier) {
707 keys_inout.push_back(fd.fd_meta.lvm_name.to_string());
708 }
709 }
710 }
711
712 const bro_log_format &blt_format;
713 };
714
get_tables()715 static map<intern_string_t, std::shared_ptr<bro_log_table>> &get_tables() {
716 static map<intern_string_t, std::shared_ptr<bro_log_table>> retval;
717
718 return retval;
719 };
720
get_vtab_impl() const721 std::shared_ptr<log_vtab_impl> get_vtab_impl() const {
722 if (this->blf_format_name.empty()) {
723 return nullptr;
724 }
725
726 std::shared_ptr<bro_log_table> retval = nullptr;
727
728 auto &tables = get_tables();
729 auto iter = tables.find(this->blf_format_name);
730 if (iter == tables.end()) {
731 retval = std::make_shared<bro_log_table>(*this);
732 tables[this->blf_format_name] = retval;
733 }
734
735 return retval;
736 };
737
get_subline(const logline & ll,shared_buffer_ref & sbr,bool full_message)738 void get_subline(const logline &ll,
739 shared_buffer_ref &sbr,
740 bool full_message) {
741 }
742
743 intern_string_t blf_format_name;
744 intern_string_t blf_separator;
745 intern_string_t blf_set_separator;
746 intern_string_t blf_empty_field;
747 intern_string_t blf_unset_field;
748 vector<field_def> blf_field_defs;
749
750 };
751
752 struct ws_separated_string {
753 const char *ss_str;
754 size_t ss_len;
755
ws_separated_stringws_separated_string756 explicit ws_separated_string(const char *str = nullptr, size_t len = -1)
757 : ss_str(str), ss_len(len) {
758 };
759
760 struct iterator {
761 enum class state_t {
762 NORMAL,
763 QUOTED,
764 };
765
766 const ws_separated_string &i_parent;
767 const char *i_pos;
768 const char *i_next_pos;
769 size_t i_index{0};
770 state_t i_state{state_t::NORMAL};
771
iteratorws_separated_string::iterator772 iterator(const ws_separated_string &ss, const char *pos)
773 : i_parent(ss), i_pos(pos), i_next_pos(pos) {
774 this->update();
775 };
776
updatews_separated_string::iterator777 void update() {
778 const auto &ss = this->i_parent;
779 bool done = false;
780
781 while (!done && this->i_next_pos < (ss.ss_str + ss.ss_len)) {
782 switch (this->i_state) {
783 case state_t::NORMAL:
784 if (*this->i_next_pos == '"') {
785 this->i_state = state_t::QUOTED;
786 } else if (isspace(*this->i_next_pos)) {
787 done = true;
788 }
789 break;
790 case state_t::QUOTED:
791 if (*this->i_next_pos == '"') {
792 this->i_state = state_t::NORMAL;
793 }
794 break;
795 }
796 if (!done) {
797 this->i_next_pos += 1;
798 }
799 }
800 };
801
operator ++ws_separated_string::iterator802 iterator &operator++() {
803 const auto &ss = this->i_parent;
804
805 this->i_pos = this->i_next_pos;
806 while (this->i_pos < (ss.ss_str + ss.ss_len) &&
807 isspace(*this->i_pos)) {
808 this->i_pos += 1;
809 this->i_next_pos += 1;
810 }
811 this->update();
812 this->i_index += 1;
813
814 return *this;
815 };
816
operator *ws_separated_string::iterator817 string_fragment operator*() {
818 const auto &ss = this->i_parent;
819 int end = this->i_next_pos - ss.ss_str;
820
821 return string_fragment(ss.ss_str, this->i_pos - ss.ss_str, end);
822 };
823
operator ==ws_separated_string::iterator824 bool operator==(const iterator &other) const {
825 return (&this->i_parent == &other.i_parent) &&
826 (this->i_pos == other.i_pos);
827 };
828
operator !=ws_separated_string::iterator829 bool operator!=(const iterator &other) const {
830 return !(*this == other);
831 };
832
indexws_separated_string::iterator833 size_t index() const {
834 return this->i_index;
835 };
836 };
837
beginws_separated_string838 iterator begin() {
839 return {*this, this->ss_str};
840 };
841
endws_separated_string842 iterator end() {
843 return {*this, this->ss_str + this->ss_len};
844 };
845 };
846
847 class w3c_log_format : public log_format {
848 public:
849
850 struct field_def {
851 const intern_string_t fd_name;
852 logline_value_meta fd_meta;
853 std::string fd_collator;
854 int fd_numeric_index;
855
field_defw3c_log_format::field_def856 explicit field_def(const intern_string_t name)
857 : fd_name(name),
858 fd_meta(intern_string::lookup(sql_safe_ident(name.to_string_fragment())),
859 value_kind_t::VALUE_TEXT),
860 fd_numeric_index(-1) {
861 };
862
field_defw3c_log_format::field_def863 field_def(const intern_string_t name, logline_value_meta meta)
864 : fd_name(name), fd_meta(meta), fd_numeric_index(-1) {
865 }
866
field_defw3c_log_format::field_def867 field_def(int col, const char *name, value_kind_t kind, bool ident = false, std::string coll = "")
868 : fd_name(intern_string::lookup(name)),
869 fd_meta(intern_string::lookup(sql_safe_ident(string_fragment(name))),
870 kind,
871 col),
872 fd_collator(std::move(coll)),
873 fd_numeric_index(-1) {
874 this->fd_meta.lvm_identifier = ident;
875 }
876
with_kindw3c_log_format::field_def877 field_def &with_kind(value_kind_t kind,
878 bool identifier = false,
879 const std::string &collator = "") {
880 this->fd_meta.lvm_kind = kind;
881 this->fd_meta.lvm_identifier = identifier;
882 this->fd_collator = collator;
883 return *this;
884 };
885
with_numeric_indexw3c_log_format::field_def886 field_def &with_numeric_index(int index) {
887 this->fd_numeric_index = index;
888 return *this;
889 }
890 };
891
892 struct field_to_struct_t {
field_to_struct_tw3c_log_format::field_to_struct_t893 field_to_struct_t(const char *prefix, const char *struct_name)
894 : fs_prefix(prefix),
895 fs_struct_name(intern_string::lookup(struct_name)) {
896 }
897
898 const char *fs_prefix;
899 intern_string_t fs_struct_name;
900 };
901
902 static const std::vector<field_def> KNOWN_FIELDS;
903 const static std::vector<field_to_struct_t> KNOWN_STRUCT_FIELDS;
904
w3c_log_format()905 w3c_log_format() {
906 this->lf_is_self_describing = true;
907 this->lf_time_ordered = false;
908 };
909
get_name() const910 const intern_string_t get_name() const override {
911 static const intern_string_t name(intern_string::lookup("w3c"));
912
913 return this->wlf_format_name.empty() ? name : this->wlf_format_name;
914 };
915
clear()916 void clear() override {
917 this->log_format::clear();
918 this->wlf_time_scanner.clear();
919 this->wlf_format_name.clear();
920 this->wlf_field_defs.clear();
921 };
922
scan_int(std::vector<logline> & dst,const line_info & li,shared_buffer_ref & sbr)923 scan_result_t scan_int(std::vector<logline> &dst,
924 const line_info &li,
925 shared_buffer_ref &sbr) {
926 static const intern_string_t F_DATE = intern_string::lookup("date");
927 static const intern_string_t F_DATE_LOCAL = intern_string::lookup("date-local");
928 static const intern_string_t F_DATE_UTC = intern_string::lookup("date-UTC");
929 static const intern_string_t F_TIME = intern_string::lookup("time");
930 static const intern_string_t F_TIME_LOCAL = intern_string::lookup("time-local");
931 static const intern_string_t F_TIME_UTC = intern_string::lookup("time-UTC");
932 static const intern_string_t F_STATUS_CODE = intern_string::lookup("sc-status");
933
934 ws_separated_string ss(sbr.get_data(), sbr.length());
935 struct timeval date_tv{0, 0}, time_tv{0, 0};
936 struct exttm date_tm, time_tm;
937 bool found_date = false, found_time = false;
938 log_level_t level = LEVEL_INFO;
939
940 for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
941 if (iter.index() >= this->wlf_field_defs.size()) {
942 level = LEVEL_INVALID;
943 break;
944 }
945
946 const field_def &fd = this->wlf_field_defs[iter.index()];
947 string_fragment sf = *iter;
948
949 if (sf.startswith("#")) {
950 if (sf == "#Date:") {
951 date_time_scanner dts;
952 struct exttm tm;
953 struct timeval tv;
954
955 if (dts.scan(sbr.get_data_at(sf.length() + 1),
956 sbr.length() - sf.length() - 1,
957 nullptr,
958 &tm,
959 tv)) {
960 this->lf_date_time.set_base_time(tv.tv_sec);
961 this->wlf_time_scanner.set_base_time(tv.tv_sec);
962 }
963 }
964 dst.emplace_back(li.li_file_range.fr_offset, 0, 0, LEVEL_IGNORE, 0);
965 return SCAN_MATCH;
966 }
967
968 sf.trim("\" \t");
969 if (F_DATE == fd.fd_name ||
970 F_DATE_LOCAL == fd.fd_name ||
971 F_DATE_UTC == fd.fd_name) {
972 if (this->lf_date_time.scan(sf.data(),
973 sf.length(),
974 nullptr,
975 &date_tm,
976 date_tv)) {
977 this->lf_timestamp_flags |= date_tm.et_flags;
978 found_date = true;
979 }
980 } else if (F_TIME == fd.fd_name ||
981 F_TIME_LOCAL == fd.fd_name ||
982 F_TIME_UTC == fd.fd_name) {
983 if (this->wlf_time_scanner.scan(sf.data(),
984 sf.length(),
985 nullptr,
986 &time_tm,
987 time_tv)) {
988 this->lf_timestamp_flags |= time_tm.et_flags;
989 found_time = true;
990 }
991 } else if (F_STATUS_CODE == fd.fd_name) {
992 if (!sf.empty() && sf[0] >= '4') {
993 level = LEVEL_ERROR;
994 }
995 }
996
997 if (fd.fd_numeric_index >= 0) {
998 switch (fd.fd_meta.lvm_kind) {
999 case value_kind_t::VALUE_INTEGER:
1000 case value_kind_t::VALUE_FLOAT: {
1001 char field_copy[sf.length() + 1];
1002 double val;
1003
1004 if (sscanf(sf.to_string(field_copy), "%lf", &val) == 1) {
1005 this->lf_value_stats[fd.fd_numeric_index].add_value(val);
1006 }
1007 break;
1008 }
1009 default:
1010 break;
1011 }
1012 }
1013 }
1014
1015 if (found_time) {
1016 struct exttm tm = time_tm;
1017 struct timeval tv;
1018
1019 if (found_date) {
1020 tm.et_tm.tm_year = date_tm.et_tm.tm_year;
1021 tm.et_tm.tm_mday = date_tm.et_tm.tm_mday;
1022 tm.et_tm.tm_mon = date_tm.et_tm.tm_mon;
1023 tm.et_tm.tm_wday = date_tm.et_tm.tm_wday;
1024 tm.et_tm.tm_yday = date_tm.et_tm.tm_yday;
1025 }
1026
1027 tv.tv_sec = tm2sec(&tm.et_tm);
1028 tv.tv_usec = tm.et_nsec / 1000;
1029
1030 dst.emplace_back(li.li_file_range.fr_offset, tv, level, 0);
1031 return SCAN_MATCH;
1032 } else {
1033 return SCAN_NO_MATCH;
1034 }
1035 }
1036
scan(logfile & lf,std::vector<logline> & dst,const line_info & li,shared_buffer_ref & sbr)1037 scan_result_t scan(logfile &lf,
1038 std::vector<logline> &dst,
1039 const line_info &li,
1040 shared_buffer_ref &sbr) override {
1041 static auto W3C_LOG_NAME = intern_string::lookup("w3c_log");
1042 static auto X_FIELDS_NAME = intern_string::lookup("x_fields");
1043 static auto X_FIELDS_IDX = 0;
1044
1045 if (!this->wlf_format_name.empty()) {
1046 return this->scan_int(dst, li, sbr);
1047 }
1048
1049 if (dst.empty() || dst.size() > 20 || sbr.empty() || sbr.get_data()[0] == '#') {
1050 return SCAN_NO_MATCH;
1051 }
1052
1053 this->clear();
1054
1055 for (auto line_iter = dst.begin(); line_iter != dst.end(); ++line_iter) {
1056 auto next_read_result = lf.read_line(line_iter);
1057
1058 if (next_read_result.isErr()) {
1059 return SCAN_NO_MATCH;
1060 }
1061
1062 auto line = next_read_result.unwrap();
1063 ws_separated_string ss(line.get_data(), line.length());
1064 auto iter = ss.begin();
1065
1066 string_fragment directive = *iter;
1067
1068 if (directive.empty() || directive[0] != '#') {
1069 continue;
1070 }
1071
1072 ++iter;
1073 if (iter == ss.end()) {
1074 continue;
1075 }
1076
1077 if (directive == "#Date:") {
1078 date_time_scanner dts;
1079 struct exttm tm;
1080 struct timeval tv;
1081
1082 if (dts.scan(line.get_data_at(directive.length() + 1),
1083 line.length() - directive.length() - 1,
1084 nullptr,
1085 &tm,
1086 tv)) {
1087 this->lf_date_time.set_base_time(tv.tv_sec);
1088 this->wlf_time_scanner.set_base_time(tv.tv_sec);
1089 }
1090 } else if (directive == "#Fields:") {
1091 int numeric_count = 0;
1092
1093 do {
1094 string_fragment sf = *iter;
1095
1096 sf.trim(")");
1097 auto field_iter = std::find_if(begin(KNOWN_FIELDS),
1098 end(KNOWN_FIELDS),
1099 [&sf](auto elem) {
1100 return sf == elem.fd_name;
1101 });
1102 if (field_iter != end(KNOWN_FIELDS)) {
1103 this->wlf_field_defs.emplace_back(*field_iter);
1104 } else if (sf == "date" || sf == "time") {
1105 this->wlf_field_defs.emplace_back(
1106 intern_string::lookup(sf));
1107 } else {
1108 const auto fs_iter = std::find_if(
1109 begin(KNOWN_STRUCT_FIELDS),
1110 end(KNOWN_STRUCT_FIELDS),
1111 [&sf](auto elem) {
1112 return sf.startswith(elem.fs_prefix);
1113 });
1114 if (fs_iter != end(KNOWN_STRUCT_FIELDS)) {
1115 auto field_name = intern_string::lookup(sf.substr(3));
1116 this->wlf_field_defs.emplace_back(
1117 field_name, logline_value_meta(
1118 field_name,
1119 value_kind_t::VALUE_TEXT,
1120 KNOWN_FIELDS.size() + 1 +
1121 std::distance(begin(KNOWN_STRUCT_FIELDS), fs_iter),
1122 this)
1123 .with_struct_name(fs_iter->fs_struct_name));
1124 } else {
1125 auto field_name = intern_string::lookup(sf);
1126 this->wlf_field_defs.emplace_back(
1127 field_name,
1128 logline_value_meta(field_name,
1129 value_kind_t::VALUE_TEXT,
1130 KNOWN_FIELDS.size() +
1131 X_FIELDS_IDX,
1132 this)
1133 .with_struct_name(X_FIELDS_NAME));
1134 }
1135 }
1136 auto& fd = this->wlf_field_defs.back();
1137 fd.fd_meta.lvm_format = nonstd::make_optional(this);
1138 switch (fd.fd_meta.lvm_kind) {
1139 case value_kind_t::VALUE_FLOAT:
1140 case value_kind_t::VALUE_INTEGER:
1141 fd.with_numeric_index(numeric_count);
1142 numeric_count += 1;
1143 break;
1144 default:
1145 break;
1146 }
1147
1148 ++iter;
1149 } while (iter != ss.end());
1150
1151 this->wlf_format_name = W3C_LOG_NAME;
1152 this->lf_value_stats.resize(numeric_count);
1153 }
1154 }
1155
1156 if (!this->wlf_format_name.empty() &&
1157 !this->wlf_field_defs.empty()) {
1158 dst.clear();
1159 return this->scan_int(dst, li, sbr);
1160 }
1161
1162 this->wlf_format_name.clear();
1163 this->lf_value_stats.clear();
1164
1165 return SCAN_NO_MATCH;
1166 };
1167
annotate(uint64_t line_number,shared_buffer_ref & sbr,string_attrs_t & sa,std::vector<logline_value> & values,bool annotate_module) const1168 void annotate(uint64_t line_number, shared_buffer_ref &sbr, string_attrs_t &sa,
1169 std::vector<logline_value> &values, bool annotate_module) const override {
1170 ws_separated_string ss(sbr.get_data(), sbr.length());
1171
1172 for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
1173 string_fragment sf = *iter;
1174
1175 if (iter.index() >= this->wlf_field_defs.size()) {
1176 sa.emplace_back(line_range{sf.sf_begin, -1},
1177 &SA_INVALID,
1178 (void *) "extra fields detected");
1179 return;
1180 }
1181
1182 const field_def &fd = this->wlf_field_defs[iter.index()];
1183
1184 if (sf == "-") {
1185 sf.invalidate();
1186 }
1187
1188 auto lr = line_range(sf.sf_begin, sf.sf_end);
1189
1190 if (lr.is_valid()) {
1191 values.emplace_back(fd.fd_meta, sbr, lr);
1192 if (sf.startswith("\"")) {
1193 auto& meta = values.back().lv_meta;
1194
1195 if (meta.lvm_kind == value_kind_t::VALUE_TEXT) {
1196 meta.lvm_kind = value_kind_t::VALUE_W3C_QUOTED;
1197 } else {
1198 meta.lvm_kind = value_kind_t::VALUE_NULL;
1199 }
1200 }
1201 } else {
1202 values.emplace_back(fd.fd_meta);
1203 }
1204 }
1205 };
1206
stats_for_value(const intern_string_t & name) const1207 const logline_value_stats *stats_for_value(const intern_string_t &name) const override {
1208 const logline_value_stats *retval = nullptr;
1209
1210 for (const auto & wlf_field_def : this->wlf_field_defs) {
1211 if (wlf_field_def.fd_meta.lvm_name == name) {
1212 if (wlf_field_def.fd_numeric_index < 0) {
1213 break;
1214 }
1215 retval = &this->lf_value_stats[wlf_field_def.fd_numeric_index];
1216 break;
1217 }
1218 }
1219
1220 return retval;
1221 };
1222
specialized(int fmt_lock=-1)1223 std::shared_ptr<log_format> specialized(int fmt_lock = -1) override {
1224 return make_shared<w3c_log_format>(*this);
1225 };
1226
1227 class w3c_log_table : public log_format_vtab_impl {
1228 public:
w3c_log_table(const w3c_log_format & format)1229 explicit w3c_log_table(const w3c_log_format &format)
1230 : log_format_vtab_impl(format), wlt_format(format) {
1231
1232 }
1233
get_columns(vector<vtab_column> & cols) const1234 void get_columns(vector<vtab_column> &cols) const override {
1235 for (const auto &fd : KNOWN_FIELDS) {
1236 auto type_pair = log_vtab_impl::logline_value_to_sqlite_type(
1237 fd.fd_meta.lvm_kind);
1238
1239 cols.emplace_back(fd.fd_meta.lvm_name.to_string(),
1240 type_pair.first,
1241 fd.fd_collator,
1242 false,
1243 "",
1244 type_pair.second);
1245 }
1246 cols.emplace_back("x_fields");
1247 cols.back().with_comment(
1248 "A JSON-object that contains fields that are not first-class columns");
1249 for (const auto& fs : KNOWN_STRUCT_FIELDS) {
1250 cols.emplace_back(fs.fs_struct_name.to_string());
1251 }
1252 };
1253
get_foreign_keys(std::vector<std::string> & keys_inout) const1254 void get_foreign_keys(std::vector<std::string> &keys_inout) const override {
1255 this->log_vtab_impl::get_foreign_keys(keys_inout);
1256
1257 for (const auto &fd : KNOWN_FIELDS) {
1258 if (fd.fd_meta.lvm_identifier) {
1259 keys_inout.push_back(fd.fd_meta.lvm_name.to_string());
1260 }
1261 }
1262 }
1263
1264 const w3c_log_format &wlt_format;
1265 };
1266
get_tables()1267 static map<intern_string_t, std::shared_ptr<w3c_log_table>> &get_tables() {
1268 static map<intern_string_t, std::shared_ptr<w3c_log_table>> retval;
1269
1270 return retval;
1271 };
1272
get_vtab_impl() const1273 std::shared_ptr<log_vtab_impl> get_vtab_impl() const override {
1274 if (this->wlf_format_name.empty()) {
1275 return nullptr;
1276 }
1277
1278 std::shared_ptr<w3c_log_table> retval = nullptr;
1279
1280 auto &tables = get_tables();
1281 auto iter = tables.find(this->wlf_format_name);
1282 if (iter == tables.end()) {
1283 retval = std::make_shared<w3c_log_table>(*this);
1284 tables[this->wlf_format_name] = retval;
1285 }
1286
1287 return retval;
1288 };
1289
get_subline(const logline & ll,shared_buffer_ref & sbr,bool full_message)1290 void get_subline(const logline &ll,
1291 shared_buffer_ref &sbr,
1292 bool full_message) override {
1293 }
1294
1295 date_time_scanner wlf_time_scanner;
1296 intern_string_t wlf_format_name;
1297 vector<field_def> wlf_field_defs;
1298 };
1299
1300 static int KNOWN_FIELD_INDEX = 0;
1301 const std::vector<w3c_log_format::field_def> w3c_log_format::KNOWN_FIELDS = {
1302 {
1303 KNOWN_FIELD_INDEX++,
1304 "cs-method",
1305 value_kind_t::VALUE_TEXT,
1306 true,
1307 },
1308 {
1309 KNOWN_FIELD_INDEX++,
1310 "c-ip",
1311 value_kind_t::VALUE_TEXT,
1312 true,
1313 "ipaddress",
1314 },
1315 {
1316 KNOWN_FIELD_INDEX++,
1317 "cs-bytes",
1318 value_kind_t::VALUE_INTEGER,
1319 false,
1320 },
1321 {
1322 KNOWN_FIELD_INDEX++,
1323 "cs-host",
1324 value_kind_t::VALUE_TEXT,
1325 true,
1326 },
1327 {
1328 KNOWN_FIELD_INDEX++,
1329 "cs-uri-stem",
1330 value_kind_t::VALUE_TEXT,
1331 true,
1332 "naturalnocase",
1333 },
1334 {
1335 KNOWN_FIELD_INDEX++,
1336 "cs-uri-query",
1337 value_kind_t::VALUE_TEXT,
1338 false,
1339 },
1340 {
1341 KNOWN_FIELD_INDEX++,
1342 "cs-username",
1343 value_kind_t::VALUE_TEXT,
1344 false,
1345 },
1346 {
1347 KNOWN_FIELD_INDEX++,
1348 "cs-version",
1349 value_kind_t::VALUE_TEXT,
1350 true,
1351 },
1352 {
1353 KNOWN_FIELD_INDEX++,
1354 "s-ip",
1355 value_kind_t::VALUE_TEXT,
1356 true,
1357 "ipaddress",
1358 },
1359 {
1360 KNOWN_FIELD_INDEX++,
1361 "s-port",
1362 value_kind_t::VALUE_INTEGER,
1363 true,
1364 },
1365 {
1366 KNOWN_FIELD_INDEX++,
1367 "s-computername",
1368 value_kind_t::VALUE_TEXT,
1369 true,
1370 },
1371 {
1372 KNOWN_FIELD_INDEX++,
1373 "s-sitename",
1374 value_kind_t::VALUE_TEXT,
1375 true,
1376 },
1377 {
1378 KNOWN_FIELD_INDEX++,
1379 "sc-bytes",
1380 value_kind_t::VALUE_INTEGER,
1381 false,
1382 },
1383 {
1384 KNOWN_FIELD_INDEX++,
1385 "sc-status",
1386 value_kind_t::VALUE_INTEGER,
1387 false,
1388 },
1389 {
1390 KNOWN_FIELD_INDEX++,
1391 "sc-substatus",
1392 value_kind_t::VALUE_INTEGER,
1393 false,
1394 },
1395 {
1396 KNOWN_FIELD_INDEX++,
1397 "time-taken",
1398 value_kind_t::VALUE_FLOAT,
1399 false,
1400 },
1401 };
1402
1403 const std::vector<w3c_log_format::field_to_struct_t> w3c_log_format::KNOWN_STRUCT_FIELDS = {
1404 {"cs(", "cs_headers"},
1405 {"sc(", "sc_headers"},
1406 {"rs(", "rs_headers"},
1407 {"sr(", "sr_headers"},
1408 };
1409
1410 struct logfmt_pair_handler {
logfmt_pair_handlerlogfmt_pair_handler1411 explicit logfmt_pair_handler(date_time_scanner &dts) : lph_dt_scanner(dts)
1412 {
1413 }
1414
process_valuelogfmt_pair_handler1415 bool process_value(const string_fragment& value_frag) {
1416 if (this->lph_key_frag == "time" ||
1417 this->lph_key_frag == "ts") {
1418 if (!this->lph_dt_scanner.scan(value_frag.data(),
1419 value_frag.length(),
1420 nullptr,
1421 &this->lph_time_tm,
1422 this->lph_tv)) {
1423 return false;
1424 }
1425 this->lph_found_time = true;
1426 } else if (this->lph_key_frag == "level") {
1427 this->lph_level = string2level(value_frag.data(), value_frag.length());
1428 }
1429 return true;
1430 }
1431
1432 date_time_scanner &lph_dt_scanner;
1433 bool lph_found_time{false};
1434 struct exttm lph_time_tm{};
1435 struct timeval lph_tv{0, 0};
1436 log_level_t lph_level{log_level_t::LEVEL_INFO};
1437 string_fragment lph_key_frag{""};
1438 };
1439
1440 class logfmt_format : public log_format {
1441 public:
get_name() const1442 const intern_string_t get_name() const override
1443 {
1444 const static auto NAME = intern_string::lookup("logfmt_log");
1445
1446 return NAME;
1447 }
1448
1449 class logfmt_log_table : public log_format_vtab_impl {
1450 public:
logfmt_log_table(const log_format & format)1451 logfmt_log_table(const log_format &format) : log_format_vtab_impl(format) {}
1452
get_columns(vector<vtab_column> & cols) const1453 void get_columns(vector<vtab_column> &cols) const override {
1454 static const auto FIELDS = std::string("fields");
1455
1456 cols.emplace_back(FIELDS);
1457 };
1458 };
1459
get_vtab_impl() const1460 shared_ptr<log_vtab_impl> get_vtab_impl() const override
1461 {
1462 static auto retval = std::make_shared<logfmt_log_table>(*this);
1463
1464 return retval;
1465 }
1466
scan(logfile & lf,vector<logline> & dst,const line_info & li,shared_buffer_ref & sbr)1467 scan_result_t scan(logfile &lf, vector<logline> &dst, const line_info &li,
1468 shared_buffer_ref &sbr) override
1469 {
1470 auto p = logfmt::parser(string_fragment{sbr.get_data(), 0, (int) sbr.length()});
1471 scan_result_t retval = scan_result_t::SCAN_NO_MATCH;
1472 bool done = false;
1473 logfmt_pair_handler lph(this->lf_date_time);
1474
1475 while (!done) {
1476 auto parse_result = p.step();
1477
1478 done = parse_result.match(
1479 [](const logfmt::parser::end_of_input &) {
1480 return true;
1481 },
1482 [&lph](const logfmt::parser::kvpair &kvp) {
1483 lph.lph_key_frag = kvp.first;
1484
1485 return kvp.second.match(
1486 [](const logfmt::parser::bool_value& bv) {
1487 return false;
1488 },
1489 [&lph](const logfmt::parser::float_value& fv) {
1490 return lph.process_value(fv.fv_str_value);
1491 },
1492 [&lph](const logfmt::parser::int_value& iv) {
1493 return lph.process_value(iv.iv_str_value);
1494 },
1495 [&lph](const logfmt::parser::quoted_value &qv) {
1496 auto_mem<yajl_handle_t> handle(yajl_free);
1497 yajl_callbacks cb;
1498
1499 handle = yajl_alloc(&cb, nullptr, &lph);
1500 memset(&cb, 0, sizeof(cb));
1501 cb.yajl_string = +[](void *ctx, const unsigned char* str, size_t len) -> int {
1502 auto& lph = *((logfmt_pair_handler *)ctx);
1503 string_fragment value_frag{str, 0, (int) len};
1504
1505 return lph.process_value(value_frag);
1506 };
1507
1508 if (yajl_parse(handle,
1509 (const unsigned char *) qv.qv_value.data(),
1510 qv.qv_value.length()) != yajl_status_ok ||
1511 yajl_complete_parse(handle) != yajl_status_ok) {
1512 log_debug("json parsing failed");
1513 string_fragment unq_frag{
1514 qv.qv_value.sf_string,
1515 qv.qv_value.sf_begin + 1,
1516 qv.qv_value.sf_end - 1,
1517 };
1518
1519 return lph.process_value(unq_frag);
1520 }
1521
1522 return false;
1523 },
1524 [&lph](const logfmt::parser::unquoted_value &uv) {
1525 return lph.process_value(uv.uv_value);
1526 }
1527 );
1528 },
1529 [](const logfmt::parser::error &err) {
1530 // log_error("logfmt parse error: %s", err.e_msg.c_str());
1531 return true;
1532 }
1533 );
1534 }
1535
1536 if (lph.lph_found_time) {
1537 dst.emplace_back(li.li_file_range.fr_offset, lph.lph_tv, lph.lph_level);
1538 retval = scan_result_t::SCAN_MATCH;
1539 }
1540
1541 return retval;
1542 }
1543
1544 void
annotate(uint64_t line_number,shared_buffer_ref & sbr,string_attrs_t & sa,vector<logline_value> & values,bool annotate_module) const1545 annotate(uint64_t line_number, shared_buffer_ref &sbr, string_attrs_t &sa,
1546 vector<logline_value> &values, bool annotate_module) const override
1547 {
1548 static const auto FIELDS_NAME = intern_string::lookup("fields");
1549
1550 auto p = logfmt::parser(
1551 string_fragment{sbr.get_data(), 0, (int) sbr.length()});
1552 bool done = false;
1553
1554 while (!done) {
1555 auto parse_result = p.step();
1556
1557 done = parse_result.match(
1558 [](const logfmt::parser::end_of_input &) {
1559 return true;
1560 },
1561 [this, &sa, &values, &sbr](const logfmt::parser::kvpair &kvp) {
1562 auto value_frag = kvp.second.match(
1563 [this, &kvp, &values](const logfmt::parser::bool_value& bv) {
1564 auto lvm = logline_value_meta{
1565 intern_string::lookup(kvp.first),
1566 value_kind_t::VALUE_INTEGER,
1567 0,
1568 (log_format *) this
1569 }
1570 .with_struct_name(FIELDS_NAME);
1571 values.emplace_back(lvm, bv.bv_value);
1572
1573 return bv.bv_str_value;
1574 },
1575 [this, &kvp, &values](const logfmt::parser::int_value& iv) {
1576 auto lvm = logline_value_meta{
1577 intern_string::lookup(kvp.first),
1578 value_kind_t::VALUE_INTEGER,
1579 0,
1580 (log_format *) this
1581 }
1582 .with_struct_name(FIELDS_NAME);
1583 values.emplace_back(lvm, iv.iv_value);
1584
1585 return iv.iv_str_value;
1586 },
1587 [this, &kvp, &values](const logfmt::parser::float_value& fv) {
1588 auto lvm = logline_value_meta{
1589 intern_string::lookup(kvp.first),
1590 value_kind_t::VALUE_INTEGER,
1591 0,
1592 (log_format *) this
1593 }
1594 .with_struct_name(FIELDS_NAME);
1595 values.emplace_back(lvm, fv.fv_value);
1596
1597 return fv.fv_str_value;
1598 },
1599 [](const logfmt::parser::quoted_value &qv) {
1600 return qv.qv_value;
1601 },
1602 [](const logfmt::parser::unquoted_value &uv) {
1603 return uv.uv_value;
1604 }
1605 );
1606 auto value_lr = line_range{
1607 value_frag.sf_begin, value_frag.sf_end
1608 };
1609
1610 if (kvp.first == "time" || kvp.first == "ts") {
1611 sa.emplace_back(value_lr, &logline::L_TIMESTAMP);
1612 } else if (kvp.first == "level") {
1613 } else if (kvp.first == "msg") {
1614 sa.emplace_back(value_lr, &SA_BODY);
1615 } else if (!kvp.second.is<logfmt::parser::int_value>() &&
1616 !kvp.second.is<logfmt::parser::bool_value>()) {
1617 auto lvm = logline_value_meta{
1618 intern_string::lookup(kvp.first),
1619 value_frag.startswith("\"") ?
1620 value_kind_t::VALUE_JSON :
1621 value_kind_t::VALUE_TEXT,
1622 0,
1623 (log_format *) this
1624 }
1625 .with_struct_name(FIELDS_NAME);
1626 shared_buffer_ref value_sbr;
1627
1628 value_sbr.subset(sbr, value_frag.sf_begin, value_frag.length());
1629 values.emplace_back(lvm, value_sbr);
1630 }
1631
1632 return false;
1633 },
1634 [line_number, &sbr](const logfmt::parser::error &err) {
1635 log_error("bad line %.*s", sbr.length(), sbr.get_data());
1636 log_error("%lld:logfmt parse error: %s", line_number, err.e_msg.c_str());
1637 return true;
1638 }
1639 );
1640 }
1641 }
1642
specialized(int fmt_lock)1643 shared_ptr<log_format> specialized(int fmt_lock) override
1644 {
1645 return std::make_shared<logfmt_format>(*this);
1646 };
1647 };
1648
1649 static auto format_binder = injector::bind_multiple<log_format>()
1650 .add<logfmt_format>()
1651 .add<bro_log_format>()
1652 .add<w3c_log_format>()
1653 .add<generic_log_format>();
1654