1 #include "../burp.h"
2 #include "../alloc.h"
3 #include "../handy.h"
4 #include "../conf.h"
5 #include "../conffile.h"
6 #include "../regexp.h"
7 #include "../strlist.h"
8 #include "../log.h"
9 #include "find.h"
10 #include "find_logic.h"
11
12 #include <uthash.h>
13
14 #ifdef HAVE_LINUX_OS
15 #include <sys/statfs.h>
16 #endif
17 #ifdef HAVE_SUN_OS
18 #include <sys/statvfs.h>
19 #endif
20
21 typedef struct _node node;
22 typedef struct _dllist dllist;
23 typedef int TOKENS;
24
25 // define our tokens as 'flags' to ease the parsing
26 #define EVAL_FALSE 0 // The evaluated function returned FALSE
27 #define EVAL_TRUE 1 // The evaluated function returned TRUE
28 #define LEFT_PARENS 2 // (
29 #define RIGHT_PARENS 3 // )
30 #define AND_FUNC 4 // and
31 #define OR_FUNC 5 // or
32 #define NOT 6 // not
33 #define GTE 7 // >=
34 #define GT 8 // >
35 #define LTE 9 // <=
36 #define LT 10 // <
37 #define EQ 11 // =
38 #define PLACEHOLDER 99 // placeholder value
39
40 // a node contains a TOKEN with a reference to its successor and ancestor
41 struct _node
42 {
43 TOKENS val;
44
45 node *next;
46 node *prev;
47 };
48
49 // our expression will be converted in a doubly-linked list of nodes to ease
50 // its evaluation
51 struct _dllist
52 {
53 node *head;
54 node *tail;
55 size_t len;
56 };
57
58 // the first parsing will split the string expression in string tokens
59 // ie. 'file_size>10Kb and (file_ext=pst or file_ext=ost)' =>
60 // {"file_size>10Kb", "and", "(", "file_ext=pst", "or", "file_ext=ost", ")"}
61 struct tokens
62 {
63 char **list;
64 size_t size;
65 int valid;
66 };
67
68 // an expression is a hash record to retrieve already parsed records
69 struct expression
70 {
71 char *id;
72 struct tokens *tokens;
73 UT_hash_handle hh;
74 };
75
76 // a cregex is a pre-compiled regex
77 struct cregex
78 {
79 char *id;
80 regex_t *compiled;
81 UT_hash_handle hh;
82 };
83
84 // these are some caches
85 struct expression *cache=NULL;
86 struct cregex *regex_cache=NULL;
87
free_tokens(struct tokens * ptr)88 static void free_tokens(struct tokens *ptr)
89 {
90 if(!ptr) return;
91 free_list_w(&ptr->list, ptr->size);
92 free_v((void **)&ptr);
93 }
94
free_expression(struct expression * ptr)95 static void free_expression(struct expression *ptr)
96 {
97 if(!ptr) return;
98 free_tokens(ptr->tokens);
99 free_v((void **)&ptr);
100 }
101
free_cregex(struct cregex * ptr)102 static void free_cregex(struct cregex *ptr)
103 {
104 if(!ptr) return;
105 regex_free(&(ptr->compiled));
106 free_v((void **)&ptr);
107 }
108
free_node(node ** ptr)109 static void free_node(node **ptr)
110 {
111 if(!*ptr) return;
112 free_v((void **)ptr);
113 }
114
115 // append a node to a given list
list_append(dllist ** list,node * data)116 static void list_append(dllist **list, node *data)
117 {
118 dllist *l=*list;
119 if(!l || !data) return;
120 if(!l->tail)
121 {
122 l->head=data;
123 l->tail=data;
124 data->prev=NULL;
125 data->next=NULL;
126 }
127 else
128 {
129 l->tail->next=data;
130 data->prev=l->tail;
131 l->tail=data;
132 data->next=NULL;
133 }
134 l->len++;
135 }
136
137 // retrieve a node by its position in the list
list_get_node_by_id(dllist * list,int id)138 static node *list_get_node_by_id(dllist *list, int id)
139 {
140 node *ret=NULL;
141 int cpt=0;
142 if(!list || id<0 || id>(int)list->len) return ret;
143 ret=list->head;
144 for(cpt=0, ret=list->head; ret && cpt<id; cpt++, ret=ret->next);
145 if(cpt<id) return NULL; // id out of range
146 return ret;
147 }
148
149 // empty list
list_reset(dllist ** list)150 static void list_reset(dllist **list)
151 {
152 node *tmp;
153 dllist *l=*list;
154 if(!l || l->len==0) return;
155 tmp=l->tail;
156 while(tmp)
157 {
158 node *buf=tmp->prev;
159 l->tail=buf;
160 if(l->tail)
161 l->tail->next=NULL;
162 l->len--;
163 tmp->prev=NULL;
164 free_node(&tmp);
165 tmp=buf;
166 }
167 }
168
new_list(void)169 static dllist *new_list(void)
170 {
171 dllist *ret;
172 if(!(ret=(dllist *)malloc_w(sizeof(*ret), __func__))) return NULL;
173 ret->len=0;
174 ret->head=NULL;
175 ret->tail=NULL;
176 return ret;
177 }
178
new_node(int value)179 static node *new_node(int value)
180 {
181 node *ret;
182 if(!(ret=(node *)malloc_w(sizeof(*ret), __func__))) return NULL;
183 ret->val=value;
184 ret->next=NULL;
185 ret->prev=NULL;
186 return ret;
187 }
188
189 // here we actually convert our expression into a list of string tokens
create_token_list(char * expr)190 static struct tokens *create_token_list(char *expr)
191 {
192 char *n=NULL;
193 char *n2=NULL;
194 char **toks=NULL;
195 size_t nb_elements=0;
196 struct tokens *ret=NULL;
197 int opened, closed;
198
199 if(!(n=charreplace_noescaped_w(expr, '(', " ( ", &opened, __func__))) goto end;
200 if(!(n2=charreplace_noescaped_w(n, ')', " ) ", &closed, __func__))) goto end;
201 if(!(toks=charsplit_noescaped_w(n2, ' ', &nb_elements, __func__))) goto end;
202 if(!(ret=(struct tokens *)malloc_w(sizeof(*ret), __func__))) goto end;
203
204 ret->list=toks;
205 ret->size=nb_elements;
206 ret->valid=(opened==closed);
207 end:
208 free_w(&n);
209 free_w(&n2);
210 return ret;
211 }
212
213 // we create our "expression" record to be cached
parse_expression(char * expr)214 static struct expression *parse_expression(char *expr)
215 {
216 struct expression *ret=NULL;
217 struct tokens *toks;
218 if(!(toks=create_token_list(expr))) return ret;
219 if(!(ret=(struct expression *)malloc_w(sizeof(*ret), __func__))) goto error;
220 ret->tokens=toks;
221 ret->id=expr;
222 return ret;
223 error:
224 free_tokens(toks);
225 return ret;
226 }
227
228 // search for the positions of the 'what' token in our tokens list
find(dllist * toks,TOKENS what,int start,dllist ** positions)229 static void find(dllist *toks, TOKENS what, int start, dllist **positions)
230 {
231 int i;
232 node *tmp;
233 for(i=0, tmp=toks->head; i<(int)toks->len; i++, tmp=tmp->next)
234 {
235 if(i<start) continue; // skip the unwanted positions
236 if(tmp && tmp->val==what) list_append(positions, new_node(i));
237 }
238 }
239
240 // search for parentheses and return their positions
241 // always return the deepest parentheses first
242 // example:
243 // false or ( false or ( true or false ) )
244 // 1 => ^ ^ (true, 5, 9)
245 // 2 => ^ ^ (true, 2, 10)
parens(dllist * toks,int * has,int * left,int * right)246 static void parens(dllist *toks, int *has, int *left, int *right)
247 {
248 dllist *positions;
249 if(!(positions=new_list()))
250 {
251 *has=0;
252 *left=-1;
253 *right=-1;
254 return;
255 }
256 find(toks, LEFT_PARENS, 0, &positions);
257 if(positions->len==0)
258 {
259 *has=0;
260 *left=-1;
261 *right=-1;
262 goto end;
263 }
264 *left=positions->tail->val;
265 list_reset(&positions);
266 find(toks, RIGHT_PARENS, *left+4, &positions);
267 if(positions->len==0)
268 {
269 // special case (token) instead of ( token or/and token )
270 list_reset(&positions);
271 find(toks, RIGHT_PARENS, *left+1, &positions);
272 }
273 *right=positions->head->val;
274 *has=1;
275 end:
276 list_reset(&positions);
277 free_v((void **)&positions);
278 }
279
280 // utility function
strip_quotes(char * src)281 static char *strip_quotes(char *src)
282 {
283 int len;
284 char *strip=NULL;
285 if(!(len=strlen(src))) goto end;
286 if((*src=='\'' || *src=='"') && *src==src[len-1]) // strip the quotes
287 {
288 if(!(strip=(char *)malloc_w(len-1, __func__))) goto end;
289 strip=strncpy(strip, src+1, len-2);
290 strip[len-2]='\0';
291 }
292 end:
293 return strip;
294 }
295
296 // function 'file_ext'
eval_file_ext(char * tok,const char * fname)297 static int eval_file_ext(char *tok, const char *fname)
298 {
299 const char *cp;
300 int len;
301 char *strip=NULL;
302 if(!(len=strlen(tok))) goto end;
303 for(; *tok=='='; ++tok);
304 if(!(len=strlen(tok))) goto end; // test again after we trimmed the '='
305 strip=strip_quotes(tok);
306 for(cp=fname+strlen(fname)-1; cp>=fname; cp--)
307 {
308 if(*cp!='.') continue;
309 if((strip && !strcasecmp(strip, cp+1))
310 || (!strip && !strcasecmp(tok, cp+1)))
311 return EVAL_TRUE;
312 }
313 end:
314 free_w(&strip);
315 return EVAL_FALSE;
316 }
317
318 // function 'path_match'
eval_path_match(char * tok,const char * fname)319 static int eval_path_match(char *tok, const char *fname)
320 {
321 int ret=EVAL_FALSE;
322 struct cregex *reg;
323 char *strip=NULL;
324 if(strlen(tok)==0) goto end;
325 for(; *tok=='='; ++tok);
326 if(strlen(tok)==0) goto end; // test again after we trimmed the '='
327 if(regex_cache)
328 HASH_FIND_STR(regex_cache, tok, reg);
329 else
330 reg=NULL;
331 if(!reg)
332 {
333 regex_t *tmp;
334 if((strip=strip_quotes(tok)))
335 tmp=regex_compile_backup(strip);
336 else
337 tmp=regex_compile_backup(tok);
338 if(!(reg=(struct cregex *)malloc_w(sizeof(*reg), __func__)))
339 {
340 regex_free(&tmp);
341 goto end;
342 }
343 reg->id=strdup_w(tok, __func__);
344 reg->compiled=tmp;
345 HASH_ADD_KEYPTR(hh, regex_cache, reg->id, strlen(reg->id), reg);
346 }
347 if(regex_check(reg->compiled, fname))
348 ret=EVAL_TRUE;
349 end:
350 free_w(&strip);
351 return ret;
352 }
353
354 // function 'file_match'
eval_file_match(char * tok,const char * fname)355 static int eval_file_match(char *tok, const char *fname)
356 {
357 int len=strlen(fname);
358 for(; len>0 && fname[len-1]!='/'; len--);
359 return eval_path_match(tok, fname+len);
360 }
361
362 // function 'file_size'
eval_file_size(char * tok,uint64_t filesize)363 static int eval_file_size(char *tok, uint64_t filesize)
364 {
365 int ret=EVAL_FALSE;
366 char *strip=NULL;
367 TOKENS eval=PLACEHOLDER;
368 uint64_t s=0;
369 if(strlen(tok)==0) goto end;
370 for(; ; tok++)
371 {
372 if(*tok!='>' && *tok!='<' && *tok!='=') break;
373 switch(*tok)
374 {
375 case '<':
376 eval=LT;
377 break;
378 case '>':
379 eval=GT;
380 break;
381 case '=':
382 switch(eval)
383 {
384 case LT:
385 eval=LTE;
386 break;
387 case GT:
388 eval=GTE;
389 break;
390 case PLACEHOLDER:
391 case EQ:
392 eval=EQ;
393 break;
394 default:
395 eval=EVAL_FALSE;
396 }
397 break;
398 }
399 }
400 if((strip=strip_quotes(tok)))
401 get_file_size(strip, &s, NULL, -1);
402 else
403 get_file_size(tok, &s, NULL, -1);
404 switch(eval)
405 {
406 case LT:
407 ret=filesize<s;
408 break;
409 case LTE:
410 ret=filesize<=s;
411 break;
412 case GT:
413 ret=filesize>s;
414 break;
415 case GTE:
416 ret=filesize>=s;
417 break;
418 case EQ:
419 ret=filesize==s;
420 break;
421 default:
422 ret=EVAL_FALSE;
423 }
424 end:
425 free_w(&strip);
426 return ret;
427 }
428
429 // search what function to use
eval_func(char * tok,const char * filename,uint64_t filesize)430 static int eval_func(char *tok, const char *filename, uint64_t filesize)
431 {
432 int ret;
433 if(!strncmp(tok, "file_ext", 8))
434 ret=eval_file_ext(tok+8, filename);
435 else if(!strncmp(tok, "file_match", 10))
436 ret=eval_file_match(tok+10, filename);
437 else if(!strncmp(tok, "path_match", 10))
438 ret=eval_path_match(tok+10, filename);
439 else if(!strncmp(tok, "file_size", 9))
440 ret=eval_file_size(tok+9, filesize);
441 else
442 ret=EVAL_FALSE;
443 return ret;
444 }
445
446 // convert a string token into a TOKENS
str_to_node(char * tok,const char * filename,uint64_t filesize)447 static node *str_to_node(char *tok, const char *filename, uint64_t filesize)
448 {
449 int ret;
450 if(!strncmp(tok, "and", 3))
451 ret=AND_FUNC;
452 else if(!strncmp(tok, "or", 2))
453 ret=OR_FUNC;
454 else if(!strncmp(tok, "(", 1))
455 ret=LEFT_PARENS;
456 else if(!strncmp(tok, ")", 1))
457 ret=RIGHT_PARENS;
458 else if(!strncmp(tok, "not", 3))
459 ret=NOT;
460 else
461 ret=eval_func(tok, filename, filesize);
462 return new_node(ret);
463 }
464
465 // evaluate a trio of tokens like 'true or false'
eval_triplet(node * head,int def)466 static int eval_triplet(node *head, int def)
467 {
468 TOKENS left, func, right;
469 left=head->val;
470 func=head->next->val;
471 right=head->next->next->val;
472 switch(func)
473 {
474 case AND_FUNC:
475 return left && right;
476 case OR_FUNC:
477 return left || right;
478 default:
479 return def;
480 }
481 }
482
483 // factorise tokens by recursively evaluating them
bool_eval(dllist ** tokens,int def)484 static int bool_eval(dllist **tokens, int def)
485 {
486 dllist *toks=*tokens;
487 if(toks->len==1)
488 return toks->head->val;
489 else if(toks->len==2)
490 {
491 switch(toks->head->val)
492 {
493 case NOT:
494 return !toks->tail->val;
495 default:
496 return toks->tail->val;
497 }
498 }
499 /* here we search for 'not' tokens */
500 if(toks->len>3)
501 {
502 dllist *new_tokens;
503 node *tmp;
504 int negate=0, is_negation=0;
505 for(tmp=toks->head; tmp; tmp=tmp->next)
506 {
507 if(tmp->val==NOT)
508 {
509 is_negation=1;
510 break;
511 }
512 }
513 if(is_negation)
514 {
515 if(!(new_tokens=new_list())) return 0;
516 for(tmp=toks->head; tmp; tmp=tmp->next)
517 {
518 if(tmp->val==NOT)
519 negate=!negate;
520 else
521 {
522 if(negate)
523 {
524 list_append(&new_tokens, new_node(!(tmp->val)));
525 negate=0;
526 }
527 else
528 {
529 list_append(&new_tokens, new_node(tmp->val));
530 }
531 }
532 }
533 list_reset(tokens);
534 free_v((void **)tokens);
535 *tokens=new_tokens;
536 toks=*tokens;
537 }
538 }
539 /* here we don't have any negations anymore, but we may have chains of
540 * expressions to evaluate recursively */
541 if(toks->len>3)
542 {
543 node *tmp;
544 dllist *new_tokens;
545 int i;
546 tmp=new_node(eval_triplet(toks->head, def));
547 if(!(new_tokens=new_list()))
548 {
549 free_node(&tmp);
550 return 0;
551 }
552 list_append(&new_tokens, tmp);
553 for(tmp=toks->head, i=0; tmp; tmp=tmp->next, i++)
554 {
555 if(i<3) continue;
556 list_append(&new_tokens, new_node(tmp->val));
557 }
558 list_reset(tokens);
559 free_v((void **)tokens);
560 *tokens=new_tokens;
561 toks=*tokens;
562 return bool_eval(tokens, def);
563 }
564 if(toks->len%3!=0) return def;
565 return eval_triplet(toks->head, def);
566 }
567
568 // evaluate our list of tokens
eval_parsed_expression(dllist ** tokens,int def)569 static int eval_parsed_expression(dllist **tokens, int def)
570 {
571 dllist *toks=*tokens, *sub;
572 node *begin, *end, *tmp;
573 int has, left, right, count;
574 if(!toks || toks->len==0) return def;
575 if(toks->len==1) return toks->head->val;
576 parens(toks, &has, &left, &right);
577 // we don't have parentheses, we can evaluate the tokens
578 if(!has)
579 return bool_eval(tokens, def);
580 // we have parentheses
581 // we retrieve the two nodes '(' and ')'
582 begin=list_get_node_by_id(toks, left);
583 end=list_get_node_by_id(toks, right);
584 // then we capture only the tokens surrounded by the parentheses
585 if(!(sub=new_list())) return def;
586 tmp=begin->next;
587 count=0;
588 while(tmp && count<right-left-1)
589 {
590 list_append(&sub, new_node(tmp->val));
591 tmp=tmp->next;
592 count++;
593 }
594 count++;
595 // evaluate the inner expression
596 tmp=new_node(bool_eval(&sub, def));
597 // we replace all the tokens parentheses included with the new computed node
598 // first element of the list
599 if(!begin->prev)
600 (*tokens)->head=tmp;
601 else if (!end->next) // last element of the list
602 (*tokens)->tail=tmp;
603 toks->len-=count; // decrement our list size
604 tmp->prev=begin->prev;
605 tmp->next=end->next;
606 if(begin->prev)
607 begin->prev->next=tmp;
608 else if(end->next)
609 end->next->prev=tmp;
610 // cleanup "orphans" nodes
611 tmp=begin;
612 while(tmp && count>=0)
613 {
614 if(tmp)
615 {
616 node *buf=tmp->next;
617 free_node(&tmp);
618 tmp=buf;
619 count--;
620 }
621 }
622 list_reset(&sub);
623 free_v((void **)&sub);
624 return eval_parsed_expression(tokens, def);
625 }
626
eval_expression(char * expr,const char * filename,uint64_t filesize,int def)627 static int eval_expression(char *expr, const char *filename, uint64_t filesize, int def)
628 {
629 int ret=def, i;
630 struct expression *parsed;
631 dllist *tokens=NULL;
632 if(cache)
633 HASH_FIND_STR(cache, expr, parsed);
634 else
635 parsed=NULL;
636 if(!parsed)
637 {
638 if(!(parsed=parse_expression(expr))) return def;
639 HASH_ADD_KEYPTR(hh, cache, parsed->id, strlen(parsed->id), parsed);
640 }
641 if(!parsed || !parsed->tokens->valid) goto end;
642 if(!(tokens=new_list())) goto end;
643 for(i=0; i<(int)parsed->tokens->size; i++)
644 list_append(&tokens, str_to_node(parsed->tokens->list[i], filename, filesize));
645 ret=eval_parsed_expression(&tokens, def);
646 end:
647 list_reset(&tokens);
648 free_v((void **)&tokens);
649 return ret;
650 }
651
652 // cleanup our caches
free_logic_cache(void)653 void free_logic_cache(void)
654 {
655 struct expression *parsed, *tmp;
656 struct cregex *reg, *tmp2;
657 if(cache)
658 {
659 HASH_ITER(hh, cache, parsed, tmp)
660 {
661 HASH_DEL(cache, parsed);
662 free_expression(parsed);
663 }
664 }
665 if(regex_cache)
666 {
667 HASH_ITER(hh, regex_cache, reg, tmp2)
668 {
669 HASH_DEL(regex_cache, reg);
670 free_w(&(reg->id));
671 free_cregex(reg);
672 }
673 }
674 }
675
676 /* return 1 if there is a match, 'def' is there isn't any match, and 'miss' if
677 * there are no rules to eval */
is_logic(struct strlist * list,struct FF_PKT * ff,int miss,int def)678 static int is_logic(struct strlist *list, struct FF_PKT *ff, int miss, int def)
679 {
680 if(!list) return miss;
681 if(!S_ISREG(ff->statp.st_mode)) return def; // ignore directories
682 for(; list; list=list->next)
683 if(eval_expression(list->path, ff->fname, (uint64_t)ff->statp.st_size, miss))
684 return 1;
685 return def;
686 }
687
is_logic_excluded(struct conf ** confs,struct FF_PKT * ff)688 int is_logic_excluded(struct conf **confs, struct FF_PKT *ff)
689 {
690 return is_logic(get_strlist(confs[OPT_EXCLOGIC]), ff, /* missing */ 0, /* default */ 0);
691 }
692
is_logic_included(struct conf ** confs,struct FF_PKT * ff)693 int is_logic_included(struct conf **confs, struct FF_PKT *ff)
694 {
695 return is_logic(get_strlist(confs[OPT_INCLOGIC]), ff, /* missing */ 1, /* default */ 0);
696 }
697