1 /*
2 * Copyright (c) 2002-2015 Balabit
3 * Copyright (c) 1998-2015 Balázs Scheidler
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published
7 * by the Free Software Foundation, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 *
18 * As an additional exemption you are allowed to compile & link against the
19 * OpenSSL libraries as published by the OpenSSL project. See the file
20 * COPYING for details.
21 *
22 */
23
24 #include "csvparser.h"
25 #include "scanner/csv-scanner/csv-scanner.h"
26 #include "parser/parser-expr.h"
27 #include "scratch-buffers.h"
28
29 #include <string.h>
30
31 typedef struct _CSVParser
32 {
33 LogParser super;
34 CSVScannerOptions options;
35 gboolean drop_invalid;
36 gchar *prefix;
37 gint prefix_len;
38 } CSVParser;
39
40 #define CSV_PARSER_FLAGS_SHIFT 16
41 #define CSV_PARSER_FLAGS_MASK 0xFFFF0000
42 #define CSV_SCANNER_FLAGS_MASK 0xFFFF
43
44 #define CSV_PARSER_DIALECT_MASK (0x7 << CSV_PARSER_FLAGS_SHIFT)
45 #define CSV_PARSER_ESCAPE_MODE_NONE (1 << CSV_PARSER_FLAGS_SHIFT)
46 #define CSV_PARSER_ESCAPE_MODE_BACKSLASH (2 << CSV_PARSER_FLAGS_SHIFT)
47 #define CSV_PARSER_ESCAPE_MODE_DOUBLE_CHAR (4 << CSV_PARSER_FLAGS_SHIFT)
48 #define CSV_PARSER_DROP_INVALID (8 << CSV_PARSER_FLAGS_SHIFT)
49
50 CSVScannerOptions *
csv_parser_get_scanner_options(LogParser * s)51 csv_parser_get_scanner_options(LogParser *s)
52 {
53 CSVParser *self = (CSVParser *) s;
54
55 return &self->options;
56 }
57
58 gboolean
csv_parser_set_flags(LogParser * s,guint32 flags)59 csv_parser_set_flags(LogParser *s, guint32 flags)
60 {
61 CSVParser *self = (CSVParser *) s;
62 guint32 dialect = (flags & CSV_PARSER_DIALECT_MASK);
63 guint32 scanner_flags = flags & CSV_SCANNER_FLAGS_MASK;
64
65 csv_scanner_options_set_flags(&self->options, scanner_flags);
66 switch (dialect)
67 {
68 case 0:
69 break;
70 case CSV_PARSER_ESCAPE_MODE_NONE:
71 csv_scanner_options_set_dialect(&self->options, CSV_SCANNER_ESCAPE_NONE);
72 break;
73 case CSV_PARSER_ESCAPE_MODE_BACKSLASH:
74 csv_scanner_options_set_dialect(&self->options, CSV_SCANNER_ESCAPE_BACKSLASH);
75 break;
76 case CSV_PARSER_ESCAPE_MODE_DOUBLE_CHAR:
77 csv_scanner_options_set_dialect(&self->options, CSV_SCANNER_ESCAPE_DOUBLE_CHAR);
78 break;
79 default:
80 return FALSE;
81 }
82 if (flags & CSV_PARSER_DROP_INVALID)
83 self->drop_invalid = TRUE;
84 return TRUE;
85 }
86
87 void
csv_parser_set_prefix(LogParser * s,const gchar * prefix)88 csv_parser_set_prefix(LogParser *s, const gchar *prefix)
89 {
90 CSVParser *self = (CSVParser *) s;
91
92 g_free(self->prefix);
93 if (prefix)
94 {
95 self->prefix = g_strdup(prefix);
96 self->prefix_len = strlen(prefix);
97 }
98 else
99 {
100 self->prefix = NULL;
101 self->prefix_len = 0;
102 }
103 }
104
105 void
csv_parser_set_drop_invalid(LogParser * s,gboolean drop_invalid)106 csv_parser_set_drop_invalid(LogParser *s, gboolean drop_invalid)
107 {
108 CSVParser *self = (CSVParser *) s;
109
110 self->drop_invalid = drop_invalid;
111 }
112
113 static const gchar *
_format_key_for_prefix(GString * scratch,const gchar * key,const gint prefix_len)114 _format_key_for_prefix(GString *scratch, const gchar *key, const gint prefix_len)
115 {
116 g_string_truncate(scratch, prefix_len);
117 g_string_append(scratch, key);
118 return scratch->str;
119 }
120
121 static const gchar *
_return_key(GString * scratch,const gchar * key,const gint prefix_len)122 _return_key(GString *scratch, const gchar *key, const gint prefix_len)
123 {
124 return key;
125 }
126
127 typedef const gchar *(*key_formatter_t)(GString *scratch, const gchar *key, const gint prefix_len);
128
129 static key_formatter_t
dispatch_key_formatter(gchar * prefix)130 dispatch_key_formatter(gchar *prefix)
131 {
132 return prefix ? _format_key_for_prefix : _return_key;
133 }
134
135 static gboolean
csv_parser_process(LogParser * s,LogMessage ** pmsg,const LogPathOptions * path_options,const gchar * input,gsize input_len)136 csv_parser_process(LogParser *s, LogMessage **pmsg, const LogPathOptions *path_options, const gchar *input,
137 gsize input_len)
138 {
139 CSVParser *self = (CSVParser *) s;
140 LogMessage *msg = log_msg_make_writable(pmsg, path_options);
141
142 msg_trace("csv-parser message processing started",
143 evt_tag_str ("input", input),
144 evt_tag_str ("prefix", self->prefix),
145 evt_tag_printf("msg", "%p", *pmsg));
146 CSVScanner scanner;
147 csv_scanner_init(&scanner, &self->options, input);
148
149 GString *key_scratch = scratch_buffers_alloc();
150 if (self->prefix)
151 g_string_assign(key_scratch, self->prefix);
152
153 key_formatter_t _key_formatter = dispatch_key_formatter(self->prefix);
154 while (csv_scanner_scan_next(&scanner))
155 {
156
157 log_msg_set_value_by_name(msg,
158 _key_formatter(key_scratch, csv_scanner_get_current_name(&scanner), self->prefix_len),
159 csv_scanner_get_current_value(&scanner),
160 csv_scanner_get_current_value_len(&scanner));
161 }
162
163 gboolean result = TRUE;
164 if (self->drop_invalid && !csv_scanner_is_scan_complete(&scanner))
165 {
166 msg_debug("csv-parser() failed",
167 evt_tag_str("error", "csv-parser() failed to parse its input and drop-invalid(yes) was specified"),
168 evt_tag_str("input", input));
169
170 result = FALSE;
171 }
172 csv_scanner_deinit(&scanner);
173
174 return result;
175 }
176
177 static LogPipe *
csv_parser_clone(LogPipe * s)178 csv_parser_clone(LogPipe *s)
179 {
180 CSVParser *self = (CSVParser *) s;
181 CSVParser *cloned;
182
183 cloned = (CSVParser *) csv_parser_new(s->cfg);
184 csv_scanner_options_copy(&cloned->options, &self->options);
185 cloned->super.template = log_template_ref(self->super.template);
186 csv_parser_set_prefix(&cloned->super, self->prefix);
187 csv_parser_set_drop_invalid(&cloned->super, self->drop_invalid);
188 return &cloned->super.super;
189 }
190
191 static void
csv_parser_free(LogPipe * s)192 csv_parser_free(LogPipe *s)
193 {
194 CSVParser *self = (CSVParser *) s;
195
196 csv_scanner_options_clean(&self->options);
197 g_free(self->prefix);
198 log_parser_free_method(s);
199 }
200
201 /*
202 * Parse comma-separated values from a log message.
203 */
204 LogParser *
csv_parser_new(GlobalConfig * cfg)205 csv_parser_new(GlobalConfig *cfg)
206 {
207 CSVParser *self = g_new0(CSVParser, 1);
208
209 log_parser_init_instance(&self->super, cfg);
210 self->super.super.free_fn = csv_parser_free;
211 self->super.super.clone = csv_parser_clone;
212 self->super.process = csv_parser_process;
213 csv_scanner_options_set_delimiters(&self->options, " ");
214 csv_scanner_options_set_quote_pairs(&self->options, "\"\"''");
215 csv_scanner_options_set_flags(&self->options, CSV_SCANNER_STRIP_WHITESPACE);
216 csv_scanner_options_set_dialect(&self->options, CSV_SCANNER_ESCAPE_NONE);
217 return &self->super;
218 }
219
220 guint32
csv_parser_lookup_flag(const gchar * flag)221 csv_parser_lookup_flag(const gchar *flag)
222 {
223 if (strcmp(flag, "escape-none") == 0)
224 return CSV_PARSER_ESCAPE_MODE_NONE;
225 else if (strcmp(flag, "escape-backslash") == 0)
226 return CSV_PARSER_ESCAPE_MODE_BACKSLASH;
227 else if (strcmp(flag, "escape-double-char") == 0)
228 return CSV_PARSER_ESCAPE_MODE_DOUBLE_CHAR;
229 else if (strcmp(flag, "strip-whitespace") == 0)
230 return CSV_SCANNER_STRIP_WHITESPACE;
231 else if (strcmp(flag, "greedy") == 0)
232 return CSV_SCANNER_GREEDY;
233 else if (strcmp(flag, "drop-invalid") == 0)
234 return CSV_PARSER_DROP_INVALID;
235 return 0;
236 }
237
238 gint
csv_parser_lookup_dialect(const gchar * flag)239 csv_parser_lookup_dialect(const gchar *flag)
240 {
241 if (strcmp(flag, "escape-none") == 0)
242 return CSV_SCANNER_ESCAPE_NONE;
243 else if (strcmp(flag, "escape-backslash") == 0)
244 return CSV_SCANNER_ESCAPE_BACKSLASH;
245 else if (strcmp(flag, "escape-double-char") == 0)
246 return CSV_SCANNER_ESCAPE_DOUBLE_CHAR;
247 return -1;
248 }
249