1 /*
2  * Wireshark - Network traffic analyzer
3  * By Gerald Combs <gerald@wireshark.org>
4  * Copyright 2001 Gerald Combs
5  *
6  * SPDX-License-Identifier: GPL-2.0-or-later
7  */
8 
9 #include "config.h"
10 #define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER
11 
12 #include <stdio.h>
13 #include <string.h>
14 
15 #include "dfilter-int.h"
16 #include "syntax-tree.h"
17 #include "gencode.h"
18 #include "semcheck.h"
19 #include "dfvm.h"
20 #include <epan/epan_dissect.h>
21 #include "dfilter.h"
22 #include "dfilter-macro.h"
23 #include "scanner_lex.h"
24 #include <wsutil/wslog.h>
25 #include <wsutil/ws_assert.h>
26 #include "grammar.h"
27 
28 
29 #define DFILTER_TOKEN_ID_OFFSET	1
30 
31 /* Holds the singular instance of our Lemon parser object */
32 static void*	ParserObj = NULL;
33 
34 /*
35  * XXX - if we're using a version of Flex that supports reentrant lexical
36  * analyzers, we should put this into the lexical analyzer's state.
37  */
38 dfwork_t *global_dfw;
39 
40 void
dfilter_fail(dfwork_t * dfw,const char * format,...)41 dfilter_fail(dfwork_t *dfw, const char *format, ...)
42 {
43 	va_list	args;
44 
45 	/* If we've already reported one error, don't overwite it */
46 	if (dfw->error_message != NULL)
47 		return;
48 
49 	va_start(args, format);
50 	dfw->error_message = g_strdup_vprintf(format, args);
51 	va_end(args);
52 }
53 
54 /* Initialize the dfilter module */
55 void
dfilter_init(void)56 dfilter_init(void)
57 {
58 	if (ParserObj) {
59 		ws_message("I expected ParserObj to be NULL\n");
60 		/* Free the Lemon Parser object */
61 		DfilterFree(ParserObj, g_free);
62 	}
63 	/* Allocate an instance of our Lemon-based parser */
64 	ParserObj = DfilterAlloc(g_malloc);
65 
66 /* Enable parser tracing by defining AM_CFLAGS
67  * so that it contains "-DDFTRACE".
68  */
69 #ifdef DFTRACE
70 	/* Trace parser */
71 	DfilterTrace(stdout, "lemon> ");
72 #endif
73 
74 	/* Initialize the syntax-tree sub-sub-system */
75 	sttype_init();
76 
77 	dfilter_macro_init();
78 }
79 
80 /* Clean-up the dfilter module */
81 void
dfilter_cleanup(void)82 dfilter_cleanup(void)
83 {
84 	dfilter_macro_cleanup();
85 
86 	/* Free the Lemon Parser object */
87 	if (ParserObj) {
88 		DfilterFree(ParserObj, g_free);
89 	}
90 
91 	/* Clean up the syntax-tree sub-sub-system */
92 	sttype_cleanup();
93 }
94 
95 static dfilter_t*
dfilter_new(GPtrArray * deprecated)96 dfilter_new(GPtrArray *deprecated)
97 {
98 	dfilter_t	*df;
99 
100 	df = g_new0(dfilter_t, 1);
101 	df->insns = NULL;
102 
103 	if (deprecated)
104 		df->deprecated = g_ptr_array_ref(deprecated);
105 
106 	return df;
107 }
108 
109 /* Given a GPtrArray of instructions (dfvm_insn_t),
110  * free them. */
111 static void
free_insns(GPtrArray * insns)112 free_insns(GPtrArray *insns)
113 {
114 	unsigned int	i;
115 	dfvm_insn_t	*insn;
116 
117 	for (i = 0; i < insns->len; i++) {
118 		insn = (dfvm_insn_t	*)g_ptr_array_index(insns, i);
119 		dfvm_insn_free(insn);
120 	}
121 	g_ptr_array_free(insns, TRUE);
122 }
123 
124 void
dfilter_free(dfilter_t * df)125 dfilter_free(dfilter_t *df)
126 {
127 	guint i;
128 
129 	if (!df)
130 		return;
131 
132 	if (df->insns) {
133 		free_insns(df->insns);
134 	}
135 	if (df->consts) {
136 		free_insns(df->consts);
137 	}
138 
139 	g_free(df->interesting_fields);
140 
141 	/* Clear registers with constant values (as set by dfvm_init_const).
142 	 * Other registers were cleared on RETURN by free_register_overhead. */
143 	for (i = df->num_registers; i < df->max_registers; i++) {
144 		g_list_free(df->registers[i]);
145 	}
146 
147 	if (df->deprecated)
148 		g_ptr_array_unref(df->deprecated);
149 
150 	g_free(df->registers);
151 	g_free(df->attempted_load);
152 	g_free(df->owns_memory);
153 	g_free(df);
154 }
155 
156 
157 static dfwork_t*
dfwork_new(void)158 dfwork_new(void)
159 {
160 	dfwork_t	*dfw;
161 
162 	dfw = g_new0(dfwork_t, 1);
163 	dfw->first_constant = -1;
164 
165 	return dfw;
166 }
167 
168 static void
dfwork_free(dfwork_t * dfw)169 dfwork_free(dfwork_t *dfw)
170 {
171 	if (dfw->st_root) {
172 		stnode_free(dfw->st_root);
173 	}
174 
175 	if (dfw->loaded_fields) {
176 		g_hash_table_destroy(dfw->loaded_fields);
177 	}
178 
179 	if (dfw->interesting_fields) {
180 		g_hash_table_destroy(dfw->interesting_fields);
181 	}
182 
183 	if (dfw->insns) {
184 		free_insns(dfw->insns);
185 	}
186 
187 	if (dfw->consts) {
188 		free_insns(dfw->consts);
189 	}
190 
191 	if (dfw->deprecated)
192 		g_ptr_array_unref(dfw->deprecated);
193 
194 	/*
195 	 * We don't free the error message string; our caller will return
196 	 * it to its caller.
197 	 */
198 	g_free(dfw);
199 }
200 
tokenstr(int token)201 const char *tokenstr(int token)
202 {
203 	switch (token) {
204 		case TOKEN_TEST_AND:	return "TEST_AND";
205 		case TOKEN_TEST_OR: 	return "TEST_OR";
206 		case TOKEN_TEST_EQ:	return "TEST_EQ";
207 		case TOKEN_TEST_NE:	return "TEST_NE";
208 		case TOKEN_TEST_LT:	return "TEST_LT";
209 		case TOKEN_TEST_LE:	return "TEST_LE";
210 		case TOKEN_TEST_GT:	return "TEST_GT";
211 		case TOKEN_TEST_GE:	return "TEST_GE";
212 		case TOKEN_TEST_CONTAINS: return "TEST_CONTAINS";
213 		case TOKEN_TEST_MATCHES: return "TEST_MATCHES";
214 		case TOKEN_TEST_BITWISE_AND: return "TEST_BITWISE_AND";
215 		case TOKEN_TEST_NOT:	return "TEST_NOT";
216 		case TOKEN_FIELD:	return "FIELD";
217 		case TOKEN_STRING:	return "STRING";
218 		case TOKEN_CHARCONST:	return "CHARCONST";
219 		case TOKEN_UNPARSED:	return "UNPARSED";
220 		case TOKEN_LBRACKET:	return "LBRACKET";
221 		case TOKEN_RBRACKET:	return "RBRACKET";
222 		case TOKEN_COMMA:	return "COMMA";
223 		case TOKEN_INTEGER:	return "INTEGER";
224 		case TOKEN_COLON:	return "COLON";
225 		case TOKEN_HYPHEN:	return "HYPHEN";
226 		case TOKEN_TEST_IN:	return "TEST_IN";
227 		case TOKEN_LBRACE:	return "LBRACE";
228 		case TOKEN_RBRACE:	return "RBRACE";
229 		case TOKEN_WHITESPACE:	return "WHITESPACE";
230 		case TOKEN_DOTDOT:	return "DOTDOT";
231 		case TOKEN_FUNCTION:	return "FUNCTION";
232 		case TOKEN_LPAREN:	return "LPAREN";
233 		case TOKEN_RPAREN:	return "RPAREN";
234 		default:		return "<unknown>";
235 	}
236 	ws_assert_not_reached();
237 }
238 
239 void
add_deprecated_token(dfwork_t * dfw,const char * token)240 add_deprecated_token(dfwork_t *dfw, const char *token)
241 {
242 	if (dfw->deprecated == NULL)
243 		dfw->deprecated  = g_ptr_array_new_full(0, g_free);
244 
245 	GPtrArray *deprecated = dfw->deprecated;
246 
247 	for (guint i = 0; i < deprecated->len; i++) {
248 		const char *str = g_ptr_array_index(deprecated, i);
249 		if (g_ascii_strcasecmp(token, str) == 0) {
250 			/* It's already in our list */
251 			return;
252 		}
253 	}
254 	g_ptr_array_add(deprecated, g_strdup(token));
255 }
256 
257 gboolean
dfilter_compile(const gchar * text,dfilter_t ** dfp,gchar ** err_msg)258 dfilter_compile(const gchar *text, dfilter_t **dfp, gchar **err_msg)
259 {
260 	gchar		*expanded_text;
261 	int		token;
262 	dfilter_t	*dfilter;
263 	dfwork_t	*dfw;
264 	df_scanner_state_t state;
265 	yyscan_t	scanner;
266 	YY_BUFFER_STATE in_buffer;
267 	gboolean failure = FALSE;
268 
269 	ws_assert(dfp);
270 
271 	if (!text) {
272 		*dfp = NULL;
273 		if (err_msg != NULL)
274 			*err_msg = g_strdup("BUG: NULL text pointer passed to dfilter_compile()");
275 		return FALSE;
276 	}
277 
278 	if ( !( expanded_text = dfilter_macro_apply(text, err_msg) ) ) {
279 		*dfp = NULL;
280 		return FALSE;
281 	}
282 
283 	if (df_lex_init(&scanner) != 0) {
284 		wmem_free(NULL, expanded_text);
285 		*dfp = NULL;
286 		if (err_msg != NULL)
287 			*err_msg = g_strdup_printf("Can't initialize scanner: %s",
288 			    g_strerror(errno));
289 		return FALSE;
290 	}
291 
292 	in_buffer = df__scan_string(expanded_text, scanner);
293 
294 	dfw = dfwork_new();
295 
296 	state.dfw = dfw;
297 	state.quoted_string = NULL;
298 	state.in_set = FALSE;
299 	state.raw_string = FALSE;
300 
301 	df_set_extra(&state, scanner);
302 
303 	while (1) {
304 		df_lval = stnode_new(STTYPE_UNINITIALIZED, NULL, NULL);
305 		token = df_lex(scanner);
306 
307 		/* Check for scanner failure */
308 		if (token == SCAN_FAILED) {
309 			failure = TRUE;
310 			break;
311 		}
312 
313 		/* Check for end-of-input */
314 		if (token == 0) {
315 			break;
316 		}
317 
318 		ws_debug("Token: %d %s", token, tokenstr(token));
319 
320 		/* Give the token to the parser */
321 		Dfilter(ParserObj, token, df_lval, dfw);
322 		/* We've used the stnode_t, so we don't want to free it */
323 		df_lval = NULL;
324 
325 		if (dfw->syntax_error) {
326 			failure = TRUE;
327 			break;
328 		}
329 
330 	} /* while (1) */
331 
332 	/* If we created an stnode_t but didn't use it, free it; the
333 	 * parser doesn't know about it and won't free it for us. */
334 	if (df_lval) {
335 		stnode_free(df_lval);
336 		df_lval = NULL;
337 	}
338 
339 	/* Tell the parser that we have reached the end of input; that
340 	 * way, it'll reset its state for the next compile.  (We want
341 	 * to do that even if we got a syntax error, to make sure the
342 	 * parser state is cleaned up; we don't create a new parser
343 	 * object when we start a new parse, and don't destroy it when
344 	 * the parse finishes.) */
345 	Dfilter(ParserObj, 0, NULL, dfw);
346 
347 	/* One last check for syntax error (after EOF) */
348 	if (dfw->syntax_error)
349 		failure = TRUE;
350 
351 	/* Free scanner state */
352 	if (state.quoted_string != NULL)
353 		g_string_free(state.quoted_string, TRUE);
354 	df__delete_buffer(in_buffer, scanner);
355 	df_lex_destroy(scanner);
356 
357 	if (failure)
358 		goto FAILURE;
359 
360 	/* Success, but was it an empty filter? If so, discard
361 	 * it and set *dfp to NULL */
362 	if (dfw->st_root == NULL) {
363 		*dfp = NULL;
364 	}
365 	else {
366 		log_syntax_tree(LOG_LEVEL_NOISY, dfw->st_root, "Syntax tree before semantic check");
367 
368 		/* Check semantics and do necessary type conversion*/
369 		if (!dfw_semcheck(dfw)) {
370 			goto FAILURE;
371 		}
372 
373 		log_syntax_tree(LOG_LEVEL_NOISY, dfw->st_root, "Syntax tree after successful semantic check");
374 
375 		/* Create bytecode */
376 		dfw_gencode(dfw);
377 
378 		/* Tuck away the bytecode in the dfilter_t */
379 		dfilter = dfilter_new(dfw->deprecated);
380 		dfilter->insns = dfw->insns;
381 		dfilter->consts = dfw->consts;
382 		dfw->insns = NULL;
383 		dfw->consts = NULL;
384 		dfilter->interesting_fields = dfw_interesting_fields(dfw,
385 			&dfilter->num_interesting_fields);
386 
387 		/* Initialize run-time space */
388 		dfilter->num_registers = dfw->first_constant;
389 		dfilter->max_registers = dfw->next_register;
390 		dfilter->registers = g_new0(GList*, dfilter->max_registers);
391 		dfilter->attempted_load = g_new0(gboolean, dfilter->max_registers);
392 		dfilter->owns_memory = g_new0(gboolean, dfilter->max_registers);
393 
394 		/* Initialize constants */
395 		dfvm_init_const(dfilter);
396 
397 		/* And give it to the user. */
398 		*dfp = dfilter;
399 	}
400 	/* SUCCESS */
401 	global_dfw = NULL;
402 	dfwork_free(dfw);
403 	wmem_free(NULL, expanded_text);
404 	return TRUE;
405 
406 FAILURE:
407 	if (dfw) {
408 		if (err_msg != NULL)
409 			*err_msg = dfw->error_message;
410 		else
411 			g_free(dfw->error_message);
412 		global_dfw = NULL;
413 		dfwork_free(dfw);
414 	}
415 	if (err_msg != NULL) {
416 		/*
417 		 * Default error message.
418 		 *
419 		 * XXX - we should really make sure that this is never the
420 		 * case for any error.
421 		 */
422 		if (*err_msg == NULL)
423 			*err_msg = g_strdup_printf("Unable to parse filter string \"%s\".", expanded_text);
424 	}
425 	wmem_free(NULL, expanded_text);
426 	*dfp = NULL;
427 	return FALSE;
428 }
429 
430 
431 gboolean
dfilter_apply(dfilter_t * df,proto_tree * tree)432 dfilter_apply(dfilter_t *df, proto_tree *tree)
433 {
434 	return dfvm_apply(df, tree);
435 }
436 
437 gboolean
dfilter_apply_edt(dfilter_t * df,epan_dissect_t * edt)438 dfilter_apply_edt(dfilter_t *df, epan_dissect_t* edt)
439 {
440 	return dfvm_apply(df, edt->tree);
441 }
442 
443 
444 void
dfilter_prime_proto_tree(const dfilter_t * df,proto_tree * tree)445 dfilter_prime_proto_tree(const dfilter_t *df, proto_tree *tree)
446 {
447 	int i;
448 
449 	for (i = 0; i < df->num_interesting_fields; i++) {
450 		proto_tree_prime_with_hfid(tree, df->interesting_fields[i]);
451 	}
452 }
453 
454 gboolean
dfilter_has_interesting_fields(const dfilter_t * df)455 dfilter_has_interesting_fields(const dfilter_t *df)
456 {
457 	return (df->num_interesting_fields > 0);
458 }
459 
460 GPtrArray *
dfilter_deprecated_tokens(dfilter_t * df)461 dfilter_deprecated_tokens(dfilter_t *df) {
462 	if (df->deprecated && df->deprecated->len > 0) {
463 		return df->deprecated;
464 	}
465 	return NULL;
466 }
467 
468 void
dfilter_dump(dfilter_t * df)469 dfilter_dump(dfilter_t *df)
470 {
471 	guint i;
472 	const gchar *sep = "";
473 
474 	dfvm_dump(stdout, df);
475 
476 	if (df->deprecated && df->deprecated->len) {
477 		printf("\nDeprecated tokens: ");
478 		for (i = 0; i < df->deprecated->len; i++) {
479 			printf("%s\"%s\"", sep, (char *) g_ptr_array_index(df->deprecated, i));
480 			sep = ", ";
481 		}
482 		printf("\n");
483 	}
484 }
485 
486 /*
487  * Editor modelines  -  https://www.wireshark.org/tools/modelines.html
488  *
489  * Local variables:
490  * c-basic-offset: 8
491  * tab-width: 8
492  * indent-tabs-mode: t
493  * End:
494  *
495  * vi: set shiftwidth=8 tabstop=8 noexpandtab:
496  * :indentSize=8:tabSize=8:noTabs=false:
497  */
498