1 #include "../burp.h"
2 #include "../alloc.h"
3 #include "../handy.h"
4 #include "../conf.h"
5 #include "../conffile.h"
6 #include "../regexp.h"
7 #include "../strlist.h"
8 #include "../log.h"
9 #include "find.h"
10 #include "find_logic.h"
11 
12 #include <uthash.h>
13 
14 #ifdef HAVE_LINUX_OS
15 #include <sys/statfs.h>
16 #endif
17 #ifdef HAVE_SUN_OS
18 #include <sys/statvfs.h>
19 #endif
20 
21 typedef struct _node node;
22 typedef struct _dllist dllist;
23 typedef int TOKENS;
24 
25 // define our tokens as 'flags' to ease the parsing
26 #define EVAL_FALSE   0  // The evaluated function returned FALSE
27 #define EVAL_TRUE    1  // The evaluated function returned TRUE
28 #define LEFT_PARENS  2  // (
29 #define RIGHT_PARENS 3  // )
30 #define AND_FUNC     4  // and
31 #define OR_FUNC      5  // or
32 #define NOT          6  // not
33 #define GTE          7  // >=
34 #define GT           8  // >
35 #define LTE          9  // <=
36 #define LT           10 // <
37 #define EQ           11 // =
38 #define PLACEHOLDER  99 // placeholder value
39 
40 // a node contains a TOKEN with a reference to its successor and ancestor
41 struct _node
42 {
43 	TOKENS val;
44 
45 	node *next;
46 	node *prev;
47 };
48 
49 // our expression will be converted in a doubly-linked list of nodes to ease
50 // its evaluation
51 struct _dllist
52 {
53 	node *head;
54 	node *tail;
55 	size_t len;
56 };
57 
58 // the first parsing will split the string expression in string tokens
59 // ie. 'file_size>10Kb and (file_ext=pst or file_ext=ost)' =>
60 // {"file_size>10Kb", "and", "(", "file_ext=pst", "or", "file_ext=ost", ")"}
61 struct tokens
62 {
63 	char **list;
64 	size_t size;
65 	int valid;
66 };
67 
68 // an expression is a hash record to retrieve already parsed records
69 struct expression
70 {
71 	char *id;
72 	struct tokens *tokens;
73 	UT_hash_handle hh;
74 };
75 
76 // a cregex is a pre-compiled regex
77 struct cregex
78 {
79 	char *id;
80 	regex_t *compiled;
81 	UT_hash_handle hh;
82 };
83 
84 // these are some caches
85 struct expression *cache=NULL;
86 struct cregex *regex_cache=NULL;
87 
free_tokens(struct tokens * ptr)88 static void free_tokens(struct tokens *ptr)
89 {
90 	if(!ptr) return;
91 	free_list_w(&ptr->list, ptr->size);
92 	free_v((void **)&ptr);
93 }
94 
free_expression(struct expression * ptr)95 static void free_expression(struct expression *ptr)
96 {
97 	if(!ptr) return;
98 	free_tokens(ptr->tokens);
99 	free_v((void **)&ptr);
100 }
101 
free_cregex(struct cregex * ptr)102 static void free_cregex(struct cregex *ptr)
103 {
104 	if(!ptr) return;
105 	regex_free(&(ptr->compiled));
106 	free_v((void **)&ptr);
107 }
108 
free_node(node ** ptr)109 static void free_node(node **ptr)
110 {
111 	if(!*ptr) return;
112 	free_v((void **)ptr);
113 }
114 
115 // append a node to a given list
list_append(dllist ** list,node * data)116 static void list_append(dllist **list, node *data)
117 {
118 	dllist *l=*list;
119 	if(!l || !data) return;
120 	if(!l->tail)
121 	{
122 		l->head=data;
123 		l->tail=data;
124 		data->prev=NULL;
125 		data->next=NULL;
126 	}
127 	else
128 	{
129 		l->tail->next=data;
130 		data->prev=l->tail;
131 		l->tail=data;
132 		data->next=NULL;
133 	}
134 	l->len++;
135 }
136 
137 // retrieve a node by its position in the list
list_get_node_by_id(dllist * list,int id)138 static node *list_get_node_by_id(dllist *list, int id)
139 {
140 	node *ret=NULL;
141 	int cpt=0;
142 	if(!list || id<0 || id>(int)list->len) return ret;
143 	ret=list->head;
144 	for(cpt=0, ret=list->head; ret && cpt<id; cpt++, ret=ret->next);
145 	if(cpt<id) return NULL;  // id out of range
146 	return ret;
147 }
148 
149 // empty list
list_reset(dllist ** list)150 static void list_reset(dllist **list)
151 {
152 	node *tmp;
153 	dllist *l=*list;
154 	if(!l || l->len==0) return;
155 	tmp=l->tail;
156 	while(tmp)
157 	{
158 		node *buf=tmp->prev;
159 		l->tail=buf;
160 		if(l->tail)
161 			l->tail->next=NULL;
162 		l->len--;
163 		tmp->prev=NULL;
164 		free_node(&tmp);
165 		tmp=buf;
166 	}
167 }
168 
new_list(void)169 static dllist *new_list(void)
170 {
171 	dllist *ret;
172 	if(!(ret=(dllist *)malloc_w(sizeof(*ret), __func__))) return NULL;
173 	ret->len=0;
174 	ret->head=NULL;
175 	ret->tail=NULL;
176 	return ret;
177 }
178 
new_node(int value)179 static node *new_node(int value)
180 {
181 	node *ret;
182 	if(!(ret=(node *)malloc_w(sizeof(*ret), __func__))) return NULL;
183 	ret->val=value;
184 	ret->next=NULL;
185 	ret->prev=NULL;
186 	return ret;
187 }
188 
189 // here we actually convert our expression into a list of string tokens
create_token_list(char * expr)190 static struct tokens *create_token_list(char *expr)
191 {
192 	char *n=NULL;
193 	char *n2=NULL;
194 	char **toks=NULL;
195 	size_t nb_elements=0;
196 	struct tokens *ret=NULL;
197 	int opened, closed;
198 
199 	if(!(n=charreplace_noescaped_w(expr, '(', " ( ", &opened, __func__))) goto end;
200 	if(!(n2=charreplace_noescaped_w(n, ')', " ) ", &closed, __func__))) goto end;
201 	if(!(toks=charsplit_noescaped_w(n2, ' ', &nb_elements, __func__))) goto end;
202 	if(!(ret=(struct tokens *)malloc_w(sizeof(*ret), __func__))) goto end;
203 
204 	ret->list=toks;
205 	ret->size=nb_elements;
206 	ret->valid=(opened==closed);
207 end:
208 	free_w(&n);
209 	free_w(&n2);
210 	return ret;
211 }
212 
213 // we create our "expression" record to be cached
parse_expression(char * expr)214 static struct expression *parse_expression(char *expr)
215 {
216 	struct expression *ret=NULL;
217 	struct tokens *toks;
218 	if(!(toks=create_token_list(expr))) return ret;
219 	if(!(ret=(struct expression *)malloc_w(sizeof(*ret), __func__))) goto error;
220 	ret->tokens=toks;
221 	ret->id=expr;
222 	return ret;
223 error:
224 	free_tokens(toks);
225 	return ret;
226 }
227 
228 // search for the positions of the 'what' token in our tokens list
find(dllist * toks,TOKENS what,int start,dllist ** positions)229 static void find(dllist *toks, TOKENS what, int start, dllist **positions)
230 {
231 	int i;
232 	node *tmp;
233 	for(i=0, tmp=toks->head; i<(int)toks->len; i++, tmp=tmp->next)
234 	{
235 		if(i<start) continue;  // skip the unwanted positions
236 		if(tmp && tmp->val==what) list_append(positions, new_node(i));
237 	}
238 }
239 
240 // search for parentheses and return their positions
241 // always return the deepest parentheses first
242 // example:
243 //       false or ( false or ( true or false ) )
244 // 1 =>                      ^               ^   (true, 5, 9)
245 // 2 =>           ^                            ^ (true, 2, 10)
parens(dllist * toks,int * has,int * left,int * right)246 static void parens(dllist *toks, int *has, int *left, int *right)
247 {
248 	dllist *positions;
249 	if(!(positions=new_list()))
250 	{
251 		*has=0;
252 		*left=-1;
253 		*right=-1;
254 		return;
255 	}
256 	find(toks, LEFT_PARENS, 0, &positions);
257 	if(positions->len==0)
258 	{
259 		*has=0;
260 		*left=-1;
261 		*right=-1;
262 		goto end;
263 	}
264 	*left=positions->tail->val;
265 	list_reset(&positions);
266 	find(toks, RIGHT_PARENS, *left+4, &positions);
267 	if(positions->len==0)
268 	{
269 		// special case (token) instead of ( token or/and token )
270 		list_reset(&positions);
271 		find(toks, RIGHT_PARENS, *left+1, &positions);
272 	}
273 	*right=positions->head->val;
274 	*has=1;
275 end:
276 	list_reset(&positions);
277 	free_v((void **)&positions);
278 }
279 
280 // utility function
strip_quotes(char * src)281 static char *strip_quotes(char *src)
282 {
283 	int len;
284 	char *strip=NULL;
285 	if(!(len=strlen(src))) goto end;
286 	if((*src=='\'' || *src=='"') && *src==src[len-1]) // strip the quotes
287 	{
288 		if(!(strip=(char *)malloc_w(len-1, __func__))) goto end;
289 		strip=strncpy(strip, src+1, len-2);
290 		strip[len-2]='\0';
291 	}
292 end:
293 	return strip;
294 }
295 
296 // function 'file_ext'
eval_file_ext(char * tok,const char * fname)297 static int eval_file_ext(char *tok, const char *fname)
298 {
299 	const char *cp;
300 	int len;
301 	char *strip=NULL;
302 	if(!(len=strlen(tok))) goto end;
303 	for(; *tok=='='; ++tok);
304 	if(!(len=strlen(tok))) goto end;  // test again after we trimmed the '='
305 	strip=strip_quotes(tok);
306 	for(cp=fname+strlen(fname)-1; cp>=fname; cp--)
307 	{
308 		if(*cp!='.') continue;
309 		if((strip && !strcasecmp(strip, cp+1))
310 		   || (!strip && !strcasecmp(tok, cp+1)))
311 			return EVAL_TRUE;
312 	}
313 end:
314 	free_w(&strip);
315 	return EVAL_FALSE;
316 }
317 
318 // function 'path_match'
eval_path_match(char * tok,const char * fname)319 static int eval_path_match(char *tok, const char *fname)
320 {
321 	int ret=EVAL_FALSE;
322 	struct cregex *reg;
323 	char *strip=NULL;
324 	if(strlen(tok)==0) goto end;
325 	for(; *tok=='='; ++tok);
326 	if(strlen(tok)==0) goto end;  // test again after we trimmed the '='
327 	if(regex_cache)
328 		HASH_FIND_STR(regex_cache, tok, reg);
329 	else
330 		reg=NULL;
331 	if(!reg)
332 	{
333 		regex_t *tmp;
334 		if((strip=strip_quotes(tok)))
335 			tmp=regex_compile_backup(strip);
336 		else
337 			tmp=regex_compile_backup(tok);
338 		if(!(reg=(struct cregex *)malloc_w(sizeof(*reg), __func__)))
339 		{
340 			regex_free(&tmp);
341 			goto end;
342 		}
343 		reg->id=strdup_w(tok, __func__);
344 		reg->compiled=tmp;
345 		HASH_ADD_KEYPTR(hh, regex_cache, reg->id, strlen(reg->id), reg);
346 	}
347 	if(regex_check(reg->compiled, fname))
348 		ret=EVAL_TRUE;
349 end:
350 	free_w(&strip);
351 	return ret;
352 }
353 
354 // function 'file_match'
eval_file_match(char * tok,const char * fname)355 static int eval_file_match(char *tok, const char *fname)
356 {
357 	int len=strlen(fname);
358 	for(; len>0 && fname[len-1]!='/'; len--);
359 	return eval_path_match(tok, fname+len);
360 }
361 
362 // function 'file_size'
eval_file_size(char * tok,uint64_t filesize)363 static int eval_file_size(char *tok, uint64_t filesize)
364 {
365 	int ret=EVAL_FALSE;
366 	char *strip=NULL;
367 	TOKENS eval=PLACEHOLDER;
368 	uint64_t s=0;
369 	if(strlen(tok)==0) goto end;
370 	for(; ; tok++)
371 	{
372 		if(*tok!='>' && *tok!='<' && *tok!='=') break;
373 		switch(*tok)
374 		{
375 			case '<':
376 				eval=LT;
377 				break;
378 			case '>':
379 				eval=GT;
380 				break;
381 			case '=':
382 				switch(eval)
383 				{
384 					case LT:
385 						eval=LTE;
386 						break;
387 					case GT:
388 						eval=GTE;
389 						break;
390 					case PLACEHOLDER:
391 					case EQ:
392 						eval=EQ;
393 						break;
394 					default:
395 						eval=EVAL_FALSE;
396 				}
397 				break;
398 		}
399 	}
400 	if((strip=strip_quotes(tok)))
401 		get_file_size(strip, &s, NULL, -1);
402 	else
403 		get_file_size(tok, &s, NULL, -1);
404 	switch(eval)
405 	{
406 		case LT:
407 			ret=filesize<s;
408 			break;
409 		case LTE:
410 			ret=filesize<=s;
411 			break;
412 		case GT:
413 			ret=filesize>s;
414 			break;
415 		case GTE:
416 			ret=filesize>=s;
417 			break;
418 		case EQ:
419 			ret=filesize==s;
420 			break;
421 		default:
422 			ret=EVAL_FALSE;
423 	}
424 end:
425 	free_w(&strip);
426 	return ret;
427 }
428 
429 // search what function to use
eval_func(char * tok,const char * filename,uint64_t filesize)430 static int eval_func(char *tok, const char *filename, uint64_t filesize)
431 {
432 	int ret;
433 	if(!strncmp(tok, "file_ext", 8))
434 		ret=eval_file_ext(tok+8, filename);
435 	else if(!strncmp(tok, "file_match", 10))
436 		ret=eval_file_match(tok+10, filename);
437 	else if(!strncmp(tok, "path_match", 10))
438 		ret=eval_path_match(tok+10, filename);
439 	else if(!strncmp(tok, "file_size", 9))
440 		ret=eval_file_size(tok+9, filesize);
441 	else
442 		ret=EVAL_FALSE;
443 	return ret;
444 }
445 
446 // convert a string token into a TOKENS
str_to_node(char * tok,const char * filename,uint64_t filesize)447 static node *str_to_node(char *tok, const char *filename, uint64_t filesize)
448 {
449 	int ret;
450 	if(!strncmp(tok, "and", 3))
451 		ret=AND_FUNC;
452 	else if(!strncmp(tok, "or", 2))
453 		ret=OR_FUNC;
454 	else if(!strncmp(tok, "(", 1))
455 		ret=LEFT_PARENS;
456 	else if(!strncmp(tok, ")", 1))
457 		ret=RIGHT_PARENS;
458 	else if(!strncmp(tok, "not", 3))
459 		ret=NOT;
460 	else
461 		ret=eval_func(tok, filename, filesize);
462 	return new_node(ret);
463 }
464 
465 // evaluate a trio of tokens like 'true or false'
eval_triplet(node * head,int def)466 static int eval_triplet(node *head, int def)
467 {
468 	TOKENS left, func, right;
469 	left=head->val;
470 	func=head->next->val;
471 	right=head->next->next->val;
472 	switch(func)
473 	{
474 		case AND_FUNC:
475 			return left && right;
476 		case OR_FUNC:
477 			return left || right;
478 		default:
479 			return def;
480 	}
481 }
482 
483 // factorise tokens by recursively evaluating them
bool_eval(dllist ** tokens,int def)484 static int bool_eval(dllist **tokens, int def)
485 {
486 	dllist *toks=*tokens;
487 	if(toks->len==1)
488 		return toks->head->val;
489 	else if(toks->len==2)
490 	{
491 		switch(toks->head->val)
492 		{
493 			case NOT:
494 				return !toks->tail->val;
495 			default:
496 				return toks->tail->val;
497 		}
498 	}
499 	/* here we search for 'not' tokens */
500 	if(toks->len>3)
501 	{
502 		dllist *new_tokens;
503 		node *tmp;
504 		int negate=0, is_negation=0;
505 		for(tmp=toks->head; tmp; tmp=tmp->next)
506 		{
507 			if(tmp->val==NOT)
508 			{
509 				is_negation=1;
510 				break;
511 			}
512 		}
513 		if(is_negation)
514 		{
515 			if(!(new_tokens=new_list())) return 0;
516 			for(tmp=toks->head; tmp; tmp=tmp->next)
517 			{
518 				if(tmp->val==NOT)
519 					negate=!negate;
520 				else
521 				{
522 					if(negate)
523 					{
524 						list_append(&new_tokens, new_node(!(tmp->val)));
525 						negate=0;
526 					}
527 					else
528 					{
529 						list_append(&new_tokens, new_node(tmp->val));
530 					}
531 				}
532 			}
533 			list_reset(tokens);
534 			free_v((void **)tokens);
535 			*tokens=new_tokens;
536 			toks=*tokens;
537 		}
538 	}
539 	/* here we don't have any negations anymore, but we may have chains of
540 	 * expressions to evaluate recursively */
541 	if(toks->len>3)
542 	{
543 		node *tmp;
544 		dllist *new_tokens;
545 		int i;
546 		tmp=new_node(eval_triplet(toks->head, def));
547 		if(!(new_tokens=new_list()))
548 		{
549 			free_node(&tmp);
550 			return 0;
551 		}
552 		list_append(&new_tokens, tmp);
553 		for(tmp=toks->head, i=0; tmp; tmp=tmp->next, i++)
554 		{
555 			if(i<3) continue;
556 			list_append(&new_tokens, new_node(tmp->val));
557 		}
558 		list_reset(tokens);
559 		free_v((void **)tokens);
560 		*tokens=new_tokens;
561 		toks=*tokens;
562 		return bool_eval(tokens, def);
563 	}
564 	if(toks->len%3!=0) return def;
565 	return eval_triplet(toks->head, def);
566 }
567 
568 // evaluate our list of tokens
eval_parsed_expression(dllist ** tokens,int def)569 static int eval_parsed_expression(dllist **tokens, int def)
570 {
571 	dllist *toks=*tokens, *sub;
572 	node *begin, *end, *tmp;
573 	int has, left, right, count;
574 	if(!toks || toks->len==0) return def;
575 	if(toks->len==1) return toks->head->val;
576 	parens(toks, &has, &left, &right);
577 	// we don't have parentheses, we can evaluate the tokens
578 	if(!has)
579 		return bool_eval(tokens, def);
580 	// we have parentheses
581 	// we retrieve the two nodes '(' and ')'
582 	begin=list_get_node_by_id(toks, left);
583 	end=list_get_node_by_id(toks, right);
584 	// then we capture only the tokens surrounded by the parentheses
585 	if(!(sub=new_list())) return def;
586 	tmp=begin->next;
587 	count=0;
588 	while(tmp && count<right-left-1)
589 	{
590 		list_append(&sub, new_node(tmp->val));
591 		tmp=tmp->next;
592 		count++;
593 	}
594 	count++;
595 	// evaluate the inner expression
596 	tmp=new_node(bool_eval(&sub, def));
597 	// we replace all the tokens parentheses included with the new computed node
598 	// first element of the list
599 	if(!begin->prev)
600 		(*tokens)->head=tmp;
601 	else if (!end->next)  // last element of the list
602 		(*tokens)->tail=tmp;
603 	toks->len-=count;  // decrement our list size
604 	tmp->prev=begin->prev;
605 	tmp->next=end->next;
606 	if(begin->prev)
607 		begin->prev->next=tmp;
608 	else if(end->next)
609 		end->next->prev=tmp;
610 	// cleanup "orphans" nodes
611 	tmp=begin;
612 	while(tmp && count>=0)
613 	{
614 		if(tmp)
615 		{
616 			node *buf=tmp->next;
617 			free_node(&tmp);
618 			tmp=buf;
619 			count--;
620 		}
621 	}
622 	list_reset(&sub);
623 	free_v((void **)&sub);
624 	return eval_parsed_expression(tokens, def);
625 }
626 
eval_expression(char * expr,const char * filename,uint64_t filesize,int def)627 static int eval_expression(char *expr, const char *filename, uint64_t filesize, int def)
628 {
629 	int ret=def, i;
630 	struct expression *parsed;
631 	dllist *tokens=NULL;
632 	if(cache)
633 		HASH_FIND_STR(cache, expr, parsed);
634 	else
635 		parsed=NULL;
636 	if(!parsed)
637 	{
638 		if(!(parsed=parse_expression(expr))) return def;
639 		HASH_ADD_KEYPTR(hh, cache, parsed->id, strlen(parsed->id), parsed);
640 	}
641 	if(!parsed || !parsed->tokens->valid) goto end;
642 	if(!(tokens=new_list())) goto end;
643 	for(i=0; i<(int)parsed->tokens->size; i++)
644 		list_append(&tokens, str_to_node(parsed->tokens->list[i], filename, filesize));
645 	ret=eval_parsed_expression(&tokens, def);
646 end:
647 	list_reset(&tokens);
648 	free_v((void **)&tokens);
649 	return ret;
650 }
651 
652 // cleanup our caches
free_logic_cache(void)653 void free_logic_cache(void)
654 {
655 	struct expression *parsed, *tmp;
656 	struct cregex *reg, *tmp2;
657 	if(cache)
658 	{
659 		HASH_ITER(hh, cache, parsed, tmp)
660 		{
661 			HASH_DEL(cache, parsed);
662 			free_expression(parsed);
663 		}
664 	}
665 	if(regex_cache)
666 	{
667 		HASH_ITER(hh, regex_cache, reg, tmp2)
668 		{
669 			HASH_DEL(regex_cache, reg);
670 			free_w(&(reg->id));
671 			free_cregex(reg);
672 		}
673 	}
674 }
675 
676 /* return 1 if there is a match, 'def' is there isn't any match, and 'miss' if
677  * there are no rules to eval */
is_logic(struct strlist * list,struct FF_PKT * ff,int miss,int def)678 static int is_logic(struct strlist *list, struct FF_PKT *ff, int miss, int def)
679 {
680 	if(!list) return miss;
681 	if(!S_ISREG(ff->statp.st_mode)) return def;  // ignore directories
682 	for(; list; list=list->next)
683 		if(eval_expression(list->path, ff->fname, (uint64_t)ff->statp.st_size, miss))
684 			return 1;
685 	return def;
686 }
687 
is_logic_excluded(struct conf ** confs,struct FF_PKT * ff)688 int is_logic_excluded(struct conf **confs, struct FF_PKT *ff)
689 {
690 	return is_logic(get_strlist(confs[OPT_EXCLOGIC]), ff, /* missing */ 0, /* default */ 0);
691 }
692 
is_logic_included(struct conf ** confs,struct FF_PKT * ff)693 int is_logic_included(struct conf **confs, struct FF_PKT *ff)
694 {
695 	return is_logic(get_strlist(confs[OPT_INCLOGIC]), ff, /* missing */ 1, /* default */ 0);
696 }
697