1 /**
2  * Copyright (c) 2020, Timothy Stack
3  *
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * * Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * * Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  * * Neither the name of Timothy Stack nor the names of its contributors
15  * may be used to endorse or promote products derived from this software
16  * without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
22  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include "config.h"
31 
32 #include "log_data_table.hh"
33 
log_data_table(logfile_sub_source & lss,log_vtab_manager & lvm,content_line_t template_line,intern_string_t table_name)34 log_data_table::log_data_table(logfile_sub_source &lss, log_vtab_manager &lvm,
35                                content_line_t template_line,
36                                intern_string_t table_name)
37     : log_vtab_impl(table_name),
38       ldt_log_source(lss),
39       ldt_template_line(template_line),
40       ldt_instance(-1) {
41     std::shared_ptr<logfile> lf = lss.find(template_line);
42     auto format = lf->get_format();
43 
44     this->vi_supports_indexes = false;
45     this->ldt_format_impl = lvm.lookup_impl(format->get_name());
46     this->get_columns_int();
47 }
48 
get_columns_int()49 void log_data_table::get_columns_int()
50 {
51     static intern_string_t instance_name = intern_string::lookup("log_msg_instance");
52 
53     auto& cols = this->ldt_cols;
54     auto& metas = this->ldt_value_metas;
55     content_line_t cl_copy = this->ldt_template_line;
56     std::shared_ptr<logfile> lf = this->ldt_log_source.find(cl_copy);
57     struct line_range          body;
58     string_attrs_t             sa;
59     std::vector<logline_value> line_values;
60     auto format = lf->get_format();
61     shared_buffer_ref line;
62 
63     if (this->ldt_format_impl != nullptr) {
64         this->ldt_format_impl->get_columns(cols);
65     }
66     lf->read_full_message(lf->begin() + cl_copy, line);
67     format->annotate(cl_copy, line, sa, line_values, false);
68     body = find_string_attr_range(sa, &SA_BODY);
69     if (body.lr_end == -1) {
70         this->ldt_schema_id.clear();
71         return;
72     }
73 
74     data_scanner ds(line, body.lr_start, body.lr_end);
75     data_parser  dp(&ds);
76     column_namer cn;
77 
78     dp.parse();
79 
80     metas.emplace_back(
81         instance_name, value_kind_t::VALUE_INTEGER, cols.size(), format.get());
82     cols.emplace_back("log_msg_instance", SQLITE_INTEGER);
83     for (auto pair_iter = dp.dp_pairs.begin();
84          pair_iter != dp.dp_pairs.end();
85          ++pair_iter) {
86         std::string key_str = dp.get_element_string(
87             pair_iter->e_sub_elements->front());
88         std::string colname  = cn.add_column(key_str);
89         int         sql_type = SQLITE3_TEXT;
90         value_kind_t kind = value_kind_t::VALUE_TEXT;
91         std::string collator;
92 
93         switch (pair_iter->e_sub_elements->back().value_token()) {
94             case DT_IPV4_ADDRESS:
95             case DT_IPV6_ADDRESS:
96                 collator = "ipaddress";
97                 break;
98 
99             case DT_NUMBER:
100                 sql_type = SQLITE_FLOAT;
101                 kind = value_kind_t::VALUE_FLOAT;
102                 break;
103 
104             default:
105                 collator = "naturalnocase";
106                 break;
107         }
108         metas.emplace_back(
109             intern_string::lookup(colname), kind, cols.size(), format.get());
110         cols.emplace_back(colname, sql_type, collator);
111     }
112     this->ldt_schema_id = dp.dp_schema_id;
113 }
114 
next(log_cursor & lc,logfile_sub_source & lss)115 bool log_data_table::next(log_cursor &lc, logfile_sub_source &lss)
116 {
117     if (lc.lc_curr_line == vis_line_t(-1)) {
118         this->ldt_instance = -1;
119     }
120 
121     lc.lc_curr_line = lc.lc_curr_line + vis_line_t(1);
122     lc.lc_sub_index = 0;
123 
124     if (lc.lc_curr_line == (int)lss.text_line_count()) {
125         return true;
126     }
127 
128     content_line_t cl;
129 
130     cl = lss.at(lc.lc_curr_line);
131     std::shared_ptr<logfile> lf = lss.find(cl);
132     auto lf_iter = lf->begin() + cl;
133 
134     if (!lf_iter->is_message()) {
135         return false;
136     }
137 
138     if (lf_iter->has_schema() &&
139         !lf_iter->match_schema(this->ldt_schema_id)) {
140         return false;
141     }
142 
143     string_attrs_t             sa;
144     struct line_range          body;
145     std::vector<logline_value> line_values;
146 
147     lf->read_full_message(lf_iter, this->ldt_current_line);
148     lf->get_format()->annotate(cl,
149                                this->ldt_current_line,
150                                sa,
151                                line_values,
152                                false);
153     body = find_string_attr_range(sa, &SA_BODY);
154     if (body.lr_end == -1) {
155         return false;
156     }
157 
158     data_scanner ds(this->ldt_current_line, body.lr_start, body.lr_end);
159     data_parser  dp(&ds);
160     dp.parse();
161 
162     lf_iter->set_schema(dp.dp_schema_id);
163 
164     /* The cached schema ID in the log line is not complete, so we still */
165     /* need to check for a full match. */
166     if (dp.dp_schema_id != this->ldt_schema_id) {
167         return false;
168     }
169 
170     this->ldt_pairs.clear();
171     this->ldt_pairs.swap(dp.dp_pairs, __FILE__, __LINE__);
172     this->ldt_instance += 1;
173 
174     return true;
175 }
176 
extract(std::shared_ptr<logfile> lf,uint64_t line_number,shared_buffer_ref & line,std::vector<logline_value> & values)177 void log_data_table::extract(std::shared_ptr<logfile> lf, uint64_t line_number,
178                              shared_buffer_ref &line,
179                              std::vector<logline_value> &values)
180 {
181     auto meta_iter = this->ldt_value_metas.begin();
182 
183     this->ldt_format_impl->extract(lf, line_number, line, values);
184     values.emplace_back(*meta_iter, this->ldt_instance);
185     ++meta_iter;
186     for (auto &ldt_pair : this->ldt_pairs) {
187         const data_parser::element &pvalue = ldt_pair.get_pair_value();
188 
189         switch (pvalue.value_token()) {
190             case DT_NUMBER: {
191                 char scan_value[line.length() + 1];
192                 double d = 0.0;
193 
194                 memcpy(scan_value,
195                        line.get_data() + pvalue.e_capture.c_begin,
196                        pvalue.e_capture.length());
197                 scan_value[pvalue.e_capture.length()] = '\0';
198                 if (sscanf(scan_value, "%lf", &d) != 1) {
199                     d = 0.0;
200                 }
201                 values.emplace_back(*meta_iter, d);
202                 break;
203             }
204 
205             default: {
206                 values.emplace_back(
207                     *meta_iter,
208                     line,
209                     line_range{
210                         pvalue.e_capture.c_begin,
211                         pvalue.e_capture.c_end
212                     });
213                 break;
214             }
215 
216         }
217         ++meta_iter;
218     }
219 }
220