1 /*
2  * Copyright (c) 2002-2015 Balabit
3  * Copyright (c) 1998-2015 Balázs Scheidler
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published
7  * by the Free Software Foundation, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  *
18  * As an additional exemption you are allowed to compile & link against the
19  * OpenSSL libraries as published by the OpenSSL project. See the file
20  * COPYING for details.
21  *
22  */
23 
24 #include "csvparser.h"
25 #include "scanner/csv-scanner/csv-scanner.h"
26 #include "parser/parser-expr.h"
27 #include "scratch-buffers.h"
28 
29 #include <string.h>
30 
31 typedef struct _CSVParser
32 {
33   LogParser super;
34   CSVScannerOptions options;
35   gboolean drop_invalid;
36   gchar *prefix;
37   gint prefix_len;
38 } CSVParser;
39 
40 #define CSV_PARSER_FLAGS_SHIFT 16
41 #define CSV_PARSER_FLAGS_MASK  0xFFFF0000
42 #define CSV_SCANNER_FLAGS_MASK 0xFFFF
43 
44 #define CSV_PARSER_DIALECT_MASK              (0x7 << CSV_PARSER_FLAGS_SHIFT)
45 #define CSV_PARSER_ESCAPE_MODE_NONE          (1 << CSV_PARSER_FLAGS_SHIFT)
46 #define CSV_PARSER_ESCAPE_MODE_BACKSLASH     (2 << CSV_PARSER_FLAGS_SHIFT)
47 #define CSV_PARSER_ESCAPE_MODE_DOUBLE_CHAR   (4 << CSV_PARSER_FLAGS_SHIFT)
48 #define CSV_PARSER_DROP_INVALID              (8 << CSV_PARSER_FLAGS_SHIFT)
49 
50 CSVScannerOptions *
csv_parser_get_scanner_options(LogParser * s)51 csv_parser_get_scanner_options(LogParser *s)
52 {
53   CSVParser *self = (CSVParser *) s;
54 
55   return &self->options;
56 }
57 
58 gboolean
csv_parser_set_flags(LogParser * s,guint32 flags)59 csv_parser_set_flags(LogParser *s, guint32 flags)
60 {
61   CSVParser *self = (CSVParser *) s;
62   guint32 dialect = (flags & CSV_PARSER_DIALECT_MASK);
63   guint32 scanner_flags = flags & CSV_SCANNER_FLAGS_MASK;
64 
65   csv_scanner_options_set_flags(&self->options, scanner_flags);
66   switch (dialect)
67     {
68     case 0:
69       break;
70     case CSV_PARSER_ESCAPE_MODE_NONE:
71       csv_scanner_options_set_dialect(&self->options, CSV_SCANNER_ESCAPE_NONE);
72       break;
73     case CSV_PARSER_ESCAPE_MODE_BACKSLASH:
74       csv_scanner_options_set_dialect(&self->options, CSV_SCANNER_ESCAPE_BACKSLASH);
75       break;
76     case CSV_PARSER_ESCAPE_MODE_DOUBLE_CHAR:
77       csv_scanner_options_set_dialect(&self->options, CSV_SCANNER_ESCAPE_DOUBLE_CHAR);
78       break;
79     default:
80       return FALSE;
81     }
82   if (flags & CSV_PARSER_DROP_INVALID)
83     self->drop_invalid = TRUE;
84   return TRUE;
85 }
86 
87 void
csv_parser_set_prefix(LogParser * s,const gchar * prefix)88 csv_parser_set_prefix(LogParser *s, const gchar *prefix)
89 {
90   CSVParser *self = (CSVParser *) s;
91 
92   g_free(self->prefix);
93   if (prefix)
94     {
95       self->prefix = g_strdup(prefix);
96       self->prefix_len = strlen(prefix);
97     }
98   else
99     {
100       self->prefix = NULL;
101       self->prefix_len = 0;
102     }
103 }
104 
105 void
csv_parser_set_drop_invalid(LogParser * s,gboolean drop_invalid)106 csv_parser_set_drop_invalid(LogParser *s, gboolean drop_invalid)
107 {
108   CSVParser *self = (CSVParser *) s;
109 
110   self->drop_invalid = drop_invalid;
111 }
112 
113 static const gchar *
_format_key_for_prefix(GString * scratch,const gchar * key,const gint prefix_len)114 _format_key_for_prefix(GString *scratch, const gchar *key, const gint prefix_len)
115 {
116   g_string_truncate(scratch, prefix_len);
117   g_string_append(scratch, key);
118   return scratch->str;
119 }
120 
121 static const gchar *
_return_key(GString * scratch,const gchar * key,const gint prefix_len)122 _return_key(GString *scratch, const gchar *key, const gint prefix_len)
123 {
124   return key;
125 }
126 
127 typedef const gchar *(*key_formatter_t)(GString *scratch, const gchar *key, const gint prefix_len);
128 
129 static key_formatter_t
dispatch_key_formatter(gchar * prefix)130 dispatch_key_formatter(gchar *prefix)
131 {
132   return prefix ? _format_key_for_prefix : _return_key;
133 }
134 
135 static gboolean
csv_parser_process(LogParser * s,LogMessage ** pmsg,const LogPathOptions * path_options,const gchar * input,gsize input_len)136 csv_parser_process(LogParser *s, LogMessage **pmsg, const LogPathOptions *path_options, const gchar *input,
137                    gsize input_len)
138 {
139   CSVParser *self = (CSVParser *) s;
140   LogMessage *msg = log_msg_make_writable(pmsg, path_options);
141 
142   msg_trace("csv-parser message processing started",
143             evt_tag_str ("input", input),
144             evt_tag_str ("prefix", self->prefix),
145             evt_tag_printf("msg", "%p", *pmsg));
146   CSVScanner scanner;
147   csv_scanner_init(&scanner, &self->options, input);
148 
149   GString *key_scratch = scratch_buffers_alloc();
150   if (self->prefix)
151     g_string_assign(key_scratch, self->prefix);
152 
153   key_formatter_t _key_formatter = dispatch_key_formatter(self->prefix);
154   while (csv_scanner_scan_next(&scanner))
155     {
156 
157       log_msg_set_value_by_name(msg,
158                                 _key_formatter(key_scratch, csv_scanner_get_current_name(&scanner), self->prefix_len),
159                                 csv_scanner_get_current_value(&scanner),
160                                 csv_scanner_get_current_value_len(&scanner));
161     }
162 
163   gboolean result = TRUE;
164   if (self->drop_invalid && !csv_scanner_is_scan_complete(&scanner))
165     {
166       msg_debug("csv-parser() failed",
167                 evt_tag_str("error", "csv-parser() failed to parse its input and drop-invalid(yes) was specified"),
168                 evt_tag_str("input", input));
169 
170       result = FALSE;
171     }
172   csv_scanner_deinit(&scanner);
173 
174   return result;
175 }
176 
177 static LogPipe *
csv_parser_clone(LogPipe * s)178 csv_parser_clone(LogPipe *s)
179 {
180   CSVParser *self = (CSVParser *) s;
181   CSVParser *cloned;
182 
183   cloned = (CSVParser *) csv_parser_new(s->cfg);
184   csv_scanner_options_copy(&cloned->options, &self->options);
185   cloned->super.template = log_template_ref(self->super.template);
186   csv_parser_set_prefix(&cloned->super, self->prefix);
187   csv_parser_set_drop_invalid(&cloned->super, self->drop_invalid);
188   return &cloned->super.super;
189 }
190 
191 static void
csv_parser_free(LogPipe * s)192 csv_parser_free(LogPipe *s)
193 {
194   CSVParser *self = (CSVParser *) s;
195 
196   csv_scanner_options_clean(&self->options);
197   g_free(self->prefix);
198   log_parser_free_method(s);
199 }
200 
201 /*
202  * Parse comma-separated values from a log message.
203  */
204 LogParser *
csv_parser_new(GlobalConfig * cfg)205 csv_parser_new(GlobalConfig *cfg)
206 {
207   CSVParser *self = g_new0(CSVParser, 1);
208 
209   log_parser_init_instance(&self->super, cfg);
210   self->super.super.free_fn = csv_parser_free;
211   self->super.super.clone = csv_parser_clone;
212   self->super.process = csv_parser_process;
213   csv_scanner_options_set_delimiters(&self->options, " ");
214   csv_scanner_options_set_quote_pairs(&self->options, "\"\"''");
215   csv_scanner_options_set_flags(&self->options, CSV_SCANNER_STRIP_WHITESPACE);
216   csv_scanner_options_set_dialect(&self->options, CSV_SCANNER_ESCAPE_NONE);
217   return &self->super;
218 }
219 
220 guint32
csv_parser_lookup_flag(const gchar * flag)221 csv_parser_lookup_flag(const gchar *flag)
222 {
223   if (strcmp(flag, "escape-none") == 0)
224     return CSV_PARSER_ESCAPE_MODE_NONE;
225   else if (strcmp(flag, "escape-backslash") == 0)
226     return CSV_PARSER_ESCAPE_MODE_BACKSLASH;
227   else if (strcmp(flag, "escape-double-char") == 0)
228     return CSV_PARSER_ESCAPE_MODE_DOUBLE_CHAR;
229   else if (strcmp(flag, "strip-whitespace") == 0)
230     return CSV_SCANNER_STRIP_WHITESPACE;
231   else if (strcmp(flag, "greedy") == 0)
232     return CSV_SCANNER_GREEDY;
233   else if (strcmp(flag, "drop-invalid") == 0)
234     return CSV_PARSER_DROP_INVALID;
235   return 0;
236 }
237 
238 gint
csv_parser_lookup_dialect(const gchar * flag)239 csv_parser_lookup_dialect(const gchar *flag)
240 {
241   if (strcmp(flag, "escape-none") == 0)
242     return CSV_SCANNER_ESCAPE_NONE;
243   else if (strcmp(flag, "escape-backslash") == 0)
244     return CSV_SCANNER_ESCAPE_BACKSLASH;
245   else if (strcmp(flag, "escape-double-char") == 0)
246     return CSV_SCANNER_ESCAPE_DOUBLE_CHAR;
247   return -1;
248 }
249