1 /**
2 * Copyright (c) 2020, Timothy Stack
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * * Redistributions of source code must retain the above copyright notice, this
10 * list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 * * Neither the name of Timothy Stack nor the names of its contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
22 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #include "config.h"
31
32 #include "log_data_table.hh"
33
log_data_table(logfile_sub_source & lss,log_vtab_manager & lvm,content_line_t template_line,intern_string_t table_name)34 log_data_table::log_data_table(logfile_sub_source &lss, log_vtab_manager &lvm,
35 content_line_t template_line,
36 intern_string_t table_name)
37 : log_vtab_impl(table_name),
38 ldt_log_source(lss),
39 ldt_template_line(template_line),
40 ldt_instance(-1) {
41 std::shared_ptr<logfile> lf = lss.find(template_line);
42 auto format = lf->get_format();
43
44 this->vi_supports_indexes = false;
45 this->ldt_format_impl = lvm.lookup_impl(format->get_name());
46 this->get_columns_int();
47 }
48
get_columns_int()49 void log_data_table::get_columns_int()
50 {
51 static intern_string_t instance_name = intern_string::lookup("log_msg_instance");
52
53 auto& cols = this->ldt_cols;
54 auto& metas = this->ldt_value_metas;
55 content_line_t cl_copy = this->ldt_template_line;
56 std::shared_ptr<logfile> lf = this->ldt_log_source.find(cl_copy);
57 struct line_range body;
58 string_attrs_t sa;
59 std::vector<logline_value> line_values;
60 auto format = lf->get_format();
61 shared_buffer_ref line;
62
63 if (this->ldt_format_impl != nullptr) {
64 this->ldt_format_impl->get_columns(cols);
65 }
66 lf->read_full_message(lf->begin() + cl_copy, line);
67 format->annotate(cl_copy, line, sa, line_values, false);
68 body = find_string_attr_range(sa, &SA_BODY);
69 if (body.lr_end == -1) {
70 this->ldt_schema_id.clear();
71 return;
72 }
73
74 data_scanner ds(line, body.lr_start, body.lr_end);
75 data_parser dp(&ds);
76 column_namer cn;
77
78 dp.parse();
79
80 metas.emplace_back(
81 instance_name, value_kind_t::VALUE_INTEGER, cols.size(), format.get());
82 cols.emplace_back("log_msg_instance", SQLITE_INTEGER);
83 for (auto pair_iter = dp.dp_pairs.begin();
84 pair_iter != dp.dp_pairs.end();
85 ++pair_iter) {
86 std::string key_str = dp.get_element_string(
87 pair_iter->e_sub_elements->front());
88 std::string colname = cn.add_column(key_str);
89 int sql_type = SQLITE3_TEXT;
90 value_kind_t kind = value_kind_t::VALUE_TEXT;
91 std::string collator;
92
93 switch (pair_iter->e_sub_elements->back().value_token()) {
94 case DT_IPV4_ADDRESS:
95 case DT_IPV6_ADDRESS:
96 collator = "ipaddress";
97 break;
98
99 case DT_NUMBER:
100 sql_type = SQLITE_FLOAT;
101 kind = value_kind_t::VALUE_FLOAT;
102 break;
103
104 default:
105 collator = "naturalnocase";
106 break;
107 }
108 metas.emplace_back(
109 intern_string::lookup(colname), kind, cols.size(), format.get());
110 cols.emplace_back(colname, sql_type, collator);
111 }
112 this->ldt_schema_id = dp.dp_schema_id;
113 }
114
next(log_cursor & lc,logfile_sub_source & lss)115 bool log_data_table::next(log_cursor &lc, logfile_sub_source &lss)
116 {
117 if (lc.lc_curr_line == vis_line_t(-1)) {
118 this->ldt_instance = -1;
119 }
120
121 lc.lc_curr_line = lc.lc_curr_line + vis_line_t(1);
122 lc.lc_sub_index = 0;
123
124 if (lc.lc_curr_line == (int)lss.text_line_count()) {
125 return true;
126 }
127
128 content_line_t cl;
129
130 cl = lss.at(lc.lc_curr_line);
131 std::shared_ptr<logfile> lf = lss.find(cl);
132 auto lf_iter = lf->begin() + cl;
133
134 if (!lf_iter->is_message()) {
135 return false;
136 }
137
138 if (lf_iter->has_schema() &&
139 !lf_iter->match_schema(this->ldt_schema_id)) {
140 return false;
141 }
142
143 string_attrs_t sa;
144 struct line_range body;
145 std::vector<logline_value> line_values;
146
147 lf->read_full_message(lf_iter, this->ldt_current_line);
148 lf->get_format()->annotate(cl,
149 this->ldt_current_line,
150 sa,
151 line_values,
152 false);
153 body = find_string_attr_range(sa, &SA_BODY);
154 if (body.lr_end == -1) {
155 return false;
156 }
157
158 data_scanner ds(this->ldt_current_line, body.lr_start, body.lr_end);
159 data_parser dp(&ds);
160 dp.parse();
161
162 lf_iter->set_schema(dp.dp_schema_id);
163
164 /* The cached schema ID in the log line is not complete, so we still */
165 /* need to check for a full match. */
166 if (dp.dp_schema_id != this->ldt_schema_id) {
167 return false;
168 }
169
170 this->ldt_pairs.clear();
171 this->ldt_pairs.swap(dp.dp_pairs, __FILE__, __LINE__);
172 this->ldt_instance += 1;
173
174 return true;
175 }
176
extract(std::shared_ptr<logfile> lf,uint64_t line_number,shared_buffer_ref & line,std::vector<logline_value> & values)177 void log_data_table::extract(std::shared_ptr<logfile> lf, uint64_t line_number,
178 shared_buffer_ref &line,
179 std::vector<logline_value> &values)
180 {
181 auto meta_iter = this->ldt_value_metas.begin();
182
183 this->ldt_format_impl->extract(lf, line_number, line, values);
184 values.emplace_back(*meta_iter, this->ldt_instance);
185 ++meta_iter;
186 for (auto &ldt_pair : this->ldt_pairs) {
187 const data_parser::element &pvalue = ldt_pair.get_pair_value();
188
189 switch (pvalue.value_token()) {
190 case DT_NUMBER: {
191 char scan_value[line.length() + 1];
192 double d = 0.0;
193
194 memcpy(scan_value,
195 line.get_data() + pvalue.e_capture.c_begin,
196 pvalue.e_capture.length());
197 scan_value[pvalue.e_capture.length()] = '\0';
198 if (sscanf(scan_value, "%lf", &d) != 1) {
199 d = 0.0;
200 }
201 values.emplace_back(*meta_iter, d);
202 break;
203 }
204
205 default: {
206 values.emplace_back(
207 *meta_iter,
208 line,
209 line_range{
210 pvalue.e_capture.c_begin,
211 pvalue.e_capture.c_end
212 });
213 break;
214 }
215
216 }
217 ++meta_iter;
218 }
219 }
220