1 /*
2  * Copyright (c) 2000 Sasha Vasko <sasha at aftercode.net>
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17  */
18 
19 #include "config.h"
20 
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 
25 #include "audit.h"
26 #include "astypes.h"
27 #include "mystring.h"
28 #include "safemalloc.h"
29 #include "regexp.h"
30 
31 /************************************************************************/
32 /************************************************************************/
33 /*			regexp compiler and datastructures		*/
34 /************************************************************************/
35 
36 typedef struct reg_exp_multichar_part
37 {
38 	unsigned char code[254];
39 	unsigned char size;
40 }
41 reg_exp_multichar_part;
42 
43 
44 typedef struct reg_exp_sym
45 {
46 	unsigned char negate;					   /* if we have [!blahblah] */
47 	/* in worst case scenario code should not occupy more then 2*255 characters */
48 	unsigned char code[512];
49 	unsigned short size;
50 }
51 reg_exp_sym;
52 
53 typedef struct reg_exp
54 {
55 	struct reg_exp *prev, *next;
56 
57 	short int     left_offset, right_offset;   /* min number of characters that we have to
58 											      leave on each size of the string in question
59 											      for other patterns to be able to match */
60 	short int     left_fixed, right_fixed;	   /* indicates if we have a * on left or
61 											      right side and therefore flexible
62 											      with our position */
63 
64 	short int     lead_len;					   /* -1 for '*' */
65 	/* number of symbols : */
66 	unsigned char size;						   /* upto 255 characters in regexp */
67 	/* symbol can be:
68 	   - single character    : c
69 	   - set of characters   : [ccccccc]
70 	   - range of characters : [r-r]
71 	   more then that, it can be negated : [!cccccc] or [!r-r]
72 	   or it can be combined         : [cccc,r-r] or [!ccccc,r-r]
73 
74 	   methinks we should pack them all together like so:
75 
76 	   ccccc0x01rr0x01rr0x01rrccccc0x00.....
77 	   where ccccc - are single characters that are compared one-to-one
78 	   and rr - represent range on characters like r-r.
79 	   0x01 denotes range
80 	   0x00 denotes end of the symbol.
81 	   so we'll store them in single character array,
82 	   plus another array will store negation indicators.
83 	 */
84 
85 	unsigned char *symbols;
86 	unsigned char *negation;
87 
88 	/* in Booer-Moor algorithm we need to use a skip table  -
89 	   indicating where we should go if character mismatches any symbol
90 	   (skip offset for each character in alphabeth)
91 	 */
92 	unsigned char skip[256];				   /* skip table */
93 
94 }
95 reg_exp;
96 
97 unsigned char
parse_singlechar(unsigned char ** data,char * reserved)98 parse_singlechar (unsigned char **data, char *reserved)
99 {
100 	register unsigned char c = **data;
101 
102     if( c != '\0' )
103     {
104         while (*reserved)
105             if ((c == (unsigned char)*(reserved++)))
106                 return 0x00;
107 
108         if (c == '\\')
109             c = *(++(*data));
110 
111         (*data)++;
112     }
113 	return c;
114 }
115 
116 reg_exp_multichar_part *
parse_multichar_part(unsigned char ** data)117 parse_multichar_part (unsigned char **data)
118 {
119 	static reg_exp_multichar_part part;
120 	unsigned char c;
121 
122 	if ((c = parse_singlechar (data, "],")) == 0x00)
123 		return NULL;
124 	if (**data == '-')
125 	{										   /* range */
126 		part.size = 3;
127 		(*data)++;
128 		part.code[0] = 0x01;
129 		part.code[1] = c;
130 		if ((c = parse_singlechar (data, "],")) == 0x00)
131 			return NULL;
132 		part.code[2] = c;
133 	} else
134 	{										   /* enumeration */
135 		part.size = 1;
136 		part.code[0] = c;
137 		while ((c = parse_singlechar (data, "],")) != 0x00)
138 			part.code[part.size++] = c;
139 	}
140 	return &part;
141 }
142 
143 reg_exp_sym  *
parse_reg_exp_sym(unsigned char ** data)144 parse_reg_exp_sym (unsigned char **data)
145 {											   /* this is a non-reentrant function in order to conserve memory */
146 	static reg_exp_sym sym;
147 
148 	sym.negate = 0;
149 	sym.size = 0;
150 
151 	if (**data == '[')
152 	{
153 		unsigned char *ptr = (*data) + 1;
154 
155 		if (*ptr == '!')
156 		{
157 			sym.negate = 1;
158 			ptr++;
159 		}
160 		do
161 		{
162 			reg_exp_multichar_part *p_part;
163 
164 			p_part = parse_multichar_part (&ptr);
165 			if (p_part == NULL)
166 				break;
167 			memcpy (&(sym.code[sym.size]), p_part->code, p_part->size);
168 			sym.size += p_part->size;
169 			if (*ptr == ',')
170 				ptr++;
171 		}
172 		while (*ptr != ']');
173 		*data = ptr + 1;
174 	} else if ((sym.code[sym.size++] = parse_singlechar (data, "*?")) == 0x00)
175 		return NULL;
176 
177 	if (sym.size == 0)
178 		return NULL;
179 
180 	sym.code[sym.size++] = 0x00;			   /* terminator */
181 	return &sym;
182 }
183 
184 void
fix_skip_table(reg_exp * rexp)185 fix_skip_table (reg_exp * rexp)
186 {
187 	register int  i;
188 	unsigned char *ptr;
189 	unsigned char c;
190 
191 	if (rexp == NULL)
192 		return;
193 	ptr = rexp->symbols;
194 
195 	for (i = 0; i < 256; i++)
196 		rexp->skip[i] = rexp->size;
197 	/* symbols are stored in opposite direction - so skip is increasing */
198 	for (i = 0; i < rexp->size; i++)
199 	{
200 		while (*ptr)
201 		{
202 			if (*ptr == 0x01)				   /* multichar range */
203 			{
204 				ptr++;
205 				ptr++;
206 				for (c = *(ptr - 1); c <= *ptr; c++)
207 					rexp->skip[c] = i;
208 			} else
209 				rexp->skip[*ptr] = i;
210 			ptr++;
211 		}
212 		ptr++;
213 	}
214 }
215 
216 unsigned char *
optimize_reg_exp_sym(unsigned char * trg,reg_exp_sym * p_sym)217 optimize_reg_exp_sym (unsigned char *trg, reg_exp_sym * p_sym)
218 {
219 /* there could be up to 255 possible charcters :) - that excludes 0x00 */
220 	unsigned char scale[256];
221 	unsigned char *ptr = p_sym->code;
222 	unsigned char r_start, r_end;
223 
224 	memset (scale, 0x00, 256);
225 	while (*ptr)
226 	{
227 		if (*ptr == 0x01)
228 		{
229 			r_start = *(++ptr);
230 			ptr++;
231 			/* we need to doublecheck if ranges are really min-max */
232 			if (r_start > *ptr)
233 			{
234 				r_end = r_start;
235 				r_start = *ptr;
236 			} else
237 				r_end = *ptr;
238 
239 			if (r_start > 0x00 && r_end > 0x00)
240 			{
241 				while (r_start < r_end)
242 					scale[r_start++] = 1;
243 				/* to avoid overflow and endless loop doing it as separate step */
244 				scale[r_end] = 1;
245 			}
246 		} else
247 			scale[*ptr] = 1;
248 		ptr++;
249 	}
250 	/* we want to reduce ranges by excluding what is overlapping */
251 	/* then reduce single characters by removing those that fall into ranges */
252 	/* and maybe join them all together if they form continuous sequences */
253 	/* to do that simply scan through the scale */
254 	r_start = r_end = 0x00;
255 	/* skipping 0x00 and 0x01 as special characters */
256 	for (r_end = 2; r_end < 255; r_end++)
257 	{
258 		if (scale[r_end] == 0x00)
259 		{
260 			if (r_start != 0x00)
261 			{
262 				if (r_end - 1 > r_start + 1)
263 					*(trg++) = 0x01;
264 				*(trg++) = r_start;
265 				if (r_end - 1 > r_start)
266 					*(trg++) = r_end - 1;
267 				r_start = 0x00;
268 			}
269 		} else if (r_start == 0x00)
270 			r_start = r_end;
271 	}
272 	/* final data may have got forgotten since last scale element may not be 0x00 */
273 	if (r_start != 0x00)
274 	{
275 		if (scale[r_end] == 0x00)
276 			r_end--;
277 		if (r_end > r_start + 1)
278 			*(trg++) = 0x01;
279 		*(trg++) = r_start;
280 		if (r_end > r_start)
281 			*(trg++) = r_end;
282 	}
283 	/* Note also that this procedure will arrange all the codes in ascending order */
284 	/* so later on when we trying to match it and code in question is less then
285 	   any code - we can stop and make decision that we have no match :) */
286 	*(trg++) = 0x00;
287 	return trg;
288 }
289 
290 reg_exp      *
parse_reg_exp(short int lead_len,unsigned char ** data)291 parse_reg_exp (short int lead_len, unsigned char **data)
292 {											   /* this is a non-reentrant function in order to conserve memory */
293 	reg_exp      *trg;
294 	reg_exp_sym  *p_sym;
295 
296 	p_sym = parse_reg_exp_sym (data);
297 	if (lead_len == 0 && p_sym == NULL)
298 		return NULL;
299 
300 	trg = safemalloc (sizeof (reg_exp));
301 	memset (trg, 0x00, sizeof (reg_exp));
302 
303 	trg->lead_len = lead_len;
304 	trg->size = 0;
305 
306 	if (p_sym)
307 	{
308 		int           len = strlen ((char*)*data) + p_sym->size + 1;
309 		unsigned char *sym_buf, *sym_next, *neg_buf;
310 		register unsigned char *src, *dst;
311 		int           i;
312 
313 		/* in worst case scenario symbols will occupy 2*n where n -
314 		   length of the remaining data.
315 		   at the same time negation will occupy only n bytes max anytime :
316 		 */
317 
318 		sym_next = sym_buf = safemalloc (len * 2);
319 		neg_buf = safemalloc (len);
320 
321 		/* lets avoid recursion a little bit where we can - */
322 		/* since reg_exp is nothing but a row of symbols :  */
323 		do
324 		{
325 			sym_next = optimize_reg_exp_sym (sym_next, p_sym);
326 			neg_buf[(trg->size)++] = p_sym->negate;
327 		}
328 		while ((p_sym = parse_reg_exp_sym (data)) != NULL);
329 
330 		dst = trg->symbols = safemalloc (sym_next - sym_buf);
331 		trg->negation = safemalloc (trg->size);
332 		sym_next--;
333 		/* we have to store symbols in reverse order, since
334 		   Booyer-Moor algorithm scans in opposite direction */
335 		for (i = 0; i < trg->size; i++)
336 		{
337 			sym_next--;
338 			while (sym_next > sym_buf && *sym_next)
339 				sym_next--;
340 			src = (*sym_next) ? sym_next : sym_next + 1;
341 			while (*src)
342 				*(dst++) = *(src++);
343 			*(dst++) = *src;
344 
345 			trg->negation[i] = neg_buf[trg->size - i - 1];
346 		}
347 
348 		free (sym_buf);
349 		free (neg_buf);
350 	} else
351 	{
352 		trg->symbols = NULL;
353 		trg->negation = NULL;
354 	}
355 
356 	fix_skip_table (trg);
357 
358 	return trg;
359 }
360 
361 short int
parse_wild(unsigned char ** data)362 parse_wild (unsigned char **data)
363 {											   /* we go through any sequence of the '*' and '?' characters */
364 	short int     count = 0;
365 	register unsigned char *ptr = *data;
366 
367 	do
368 	{
369 		if (*ptr == '*')
370 			count = -1;
371 		else if (*ptr == '?')
372 		{
373 			if (count >= 0)
374 				count++;
375 		} else
376 			break;
377 	}
378 	while (*(++ptr));
379 
380 	*data = ptr;
381 	return count;
382 }
383 
384 wild_reg_exp *
parse_wild_reg_exp(unsigned char ** data)385 parse_wild_reg_exp (unsigned char **data)
386 {
387 	wild_reg_exp *trg;
388 	reg_exp      *p_reg;
389 	short int     count = 0;
390 
391 	if (**data == '\0')
392 		return NULL;
393 
394 	/* first comes wildcards (maybe) */
395 	count = parse_wild (data);
396 
397 	/* then reg_exp */
398 	if ((p_reg = parse_reg_exp (count, data)) == NULL)
399 		return NULL;
400 
401 	/* then we can find another long wild_reg_exp */
402 	trg = parse_wild_reg_exp (data);
403 
404 	if (trg)
405 	{
406 		p_reg->next = trg->head;
407 		if (trg->head)
408 			trg->head->prev = p_reg;
409 		p_reg->prev = NULL;
410 		trg->head = p_reg;
411 
412 		trg->hard_total += p_reg->size;
413 		if (p_reg->lead_len >= 0)
414 			trg->soft_total += p_reg->lead_len;
415 		else
416 			trg->wildcards_num++;
417 
418 		if (trg->max_size < p_reg->size)
419 		{
420 			trg->max_size = p_reg->size;
421 			trg->longest = p_reg;
422 		}
423 	} else
424 	{
425 		trg = safemalloc (sizeof (wild_reg_exp));
426 		trg->max_size = p_reg->size;
427 		trg->longest = p_reg;
428 		trg->head = trg->tail = p_reg;
429 
430 		trg->hard_total = p_reg->size;
431 		trg->soft_total = (p_reg->lead_len > 0) ? p_reg->lead_len : 0;
432 		trg->wildcards_num = (p_reg->lead_len < 0) ? 1 : 0;
433 	}
434 	return trg;
435 }
436 
437 char         *
place_singlechar(char * trg,unsigned char c)438 place_singlechar (char *trg, unsigned char c)
439 {
440 	if (c == '*' || c == '?' || c == '[' || c == ']' || c == '!' || c == ',' || c == '-')
441 		*(trg++) = '\\';
442 
443 	*(trg++) = c;
444 	return trg;
445 }
446 
447 char         *
flatten_wild_reg_exp(wild_reg_exp * wrexp)448 flatten_wild_reg_exp (wild_reg_exp * wrexp)
449 {
450 	int           size = 0;
451 	reg_exp      *curr;
452 	int           i;
453 	unsigned char *ptr;
454 	char         *trg_start, *trg;
455 
456 	/* dry run to find out just how much space we need to allocate for our string */
457 	for (curr = wrexp->head; curr; curr = curr->next)
458 	{
459 		size += (curr->lead_len < 0) ? 1 : curr->lead_len;
460 		ptr = curr->symbols;
461 		for (i = 0; i < curr->size; i++)
462 		{
463 			if (*(ptr + 1) || curr->negation[i])
464 			{
465 				size += 2;
466 				if (curr->negation[i])
467 					size++;
468 			}
469 
470 			for (; *ptr; ptr++)
471 			{
472 				if (*ptr == '*' || *ptr == '?' || *ptr == '[' ||
473 					*ptr == ']' || *ptr == '!' || *ptr == ',' || *ptr == '-')
474 					size++;					   /* for prepending \ */
475 				else if (*ptr == 0x01)
476 					size += 2;				   /* for comma */
477 				size++;
478 			}
479 			ptr++;
480 		}
481 	}
482 	trg = trg_start = safemalloc (size + 1);
483 	/* now real run */
484 	for (curr = wrexp->head; curr; curr = curr->next)
485 	{
486 		unsigned char *tail;
487 
488 		if (curr->lead_len < 0)
489 			*(trg++) = '*';
490 		else
491 			for (i = 0; i < curr->lead_len; i++)
492 				*(trg++) = '?';
493 		tail = curr->symbols;
494 		for (i = curr->size - 1; i >= 0; i--)
495 			while (*(++tail));
496 
497 		for (i = curr->size - 1; i >= 0; i--)
498 		{
499 			char          in_multi;
500 			int           part_type;
501 
502 			ptr = tail - 1;
503 
504 			while (ptr > curr->symbols && *ptr)
505 				ptr--;
506 			tail = ptr;
507 			if (ptr > curr->symbols)
508 				ptr++;
509 
510 			if (*(ptr + 1) || curr->negation[i])
511 			{
512 				*(trg++) = '[';
513 				in_multi = ']';
514 				if (curr->negation[i])
515 					*(trg++) = '!';
516 			} else
517 				in_multi = '\0';
518 
519 			part_type = (*ptr == 0x01) ? 1 : 0;
520 			for (; *ptr; ptr++)
521 			{
522 				if (*ptr == 0x01)
523 				{
524 					if (part_type != 1)
525 						*(trg++) = ',';
526 					part_type = 2;
527 					if (*(++ptr))
528 					{
529 						trg = place_singlechar (trg, *ptr);
530 						if (*(++ptr))
531 						{
532 							*(trg++) = '-';
533 							trg = place_singlechar (trg, *ptr);
534 						}
535 					}
536 				} else
537 				{
538 					if (part_type != 0)
539 						*(trg++) = ',';
540 					part_type = 0;
541 					trg = place_singlechar (trg, *ptr);
542 				}
543 			}
544 			if (in_multi)
545 				*(trg++) = in_multi;
546 		}
547 	}
548 	*(trg) = '\0';
549 	return trg_start;
550 
551 }
552 
553 void
make_offsets(wild_reg_exp * wrexp)554 make_offsets (wild_reg_exp * wrexp)
555 {
556 	reg_exp      *curr;
557 	short int     offset, fixed;
558 
559 	if (wrexp)
560 	{
561 		offset = 0;
562 		fixed = 1;
563 		for (curr = wrexp->head; curr; curr = curr->next)
564 		{
565 			if (curr->lead_len < 0)
566 				fixed = 0;
567 			else
568 				offset += curr->lead_len;
569 			/* very important: *? prior to this regexp does affect left_fixed !!! */
570 			curr->left_offset = offset;
571 			curr->left_fixed = fixed;
572 
573 			offset += curr->size;
574 		}
575 		offset = 0;
576 		fixed = 1;
577 		for (curr = wrexp->tail; curr; curr = curr->prev)
578 		{
579 			curr->right_offset = offset;
580 			/* very important: *? prior to this regexp does not affect right_fixed !!! */
581 			curr->right_fixed = fixed;
582 
583 			if (curr->lead_len < 0)
584 				fixed = 0;
585 			else
586 				offset += curr->lead_len;
587 
588 			offset += curr->size;
589 		}
590 	}
591 }
592 
593 wild_reg_exp *
compile_wild_reg_exp(const char * pattern)594 compile_wild_reg_exp (const char *pattern)
595 {
596 	unsigned char *buffer;
597 	unsigned char *ptr = (unsigned char *)pattern;
598 	register int  i;
599 	wild_reg_exp *trg = NULL;
600 
601 	if (ptr == NULL)
602 		return NULL;
603 
604 	for (i = 0; *(ptr + i) != '\0'; i++);
605 	if (i > 254)
606 		i = 254;
607 	buffer = safemalloc (i + 1);
608 
609 	for (i = 0; i < 253; i++)
610 	{
611 		buffer[i] = *(ptr++);
612 		if (*ptr == '\0')
613 		{
614 			buffer[++i] = *ptr;
615 			break;
616 		}
617 	}
618 	if (*ptr)
619 	{										   /* lets make sure we don't exceed 255 chars limit */
620 		if (buffer[252] == '\\' && buffer[251] != '\\')
621 		{
622 			buffer[252] = '*';
623 			buffer[253] = '\0';
624 		} else
625 		{
626 			buffer[253] = '*';
627 			buffer[254] = '\0';
628 		}
629 	}
630 	ptr = buffer;
631 	trg = parse_wild_reg_exp (&ptr);
632 	free (buffer);
633 
634 	trg->raw = (unsigned char*)flatten_wild_reg_exp (trg);
635 	make_offsets (trg);
636 	return trg;
637 }
638 
639 void
print_wild_reg_exp(wild_reg_exp * wrexp)640 print_wild_reg_exp (wild_reg_exp * wrexp)
641 {
642 	reg_exp      *curr;
643 	int           i = 0, k;
644 
645 	if (wrexp == NULL)
646 		return;
647 
648 	fprintf (stderr, "wild_reg_exp :{%s}\n", wrexp->raw);
649 	fprintf (stderr, "    max_size : %d\n", wrexp->max_size);
650 	fprintf (stderr, "  total size : (hard)%d (soft)%d (wildcards)%d\n{\n",
651 			 wrexp->hard_total, wrexp->soft_total, wrexp->wildcards_num);
652 	for (curr = wrexp->head; curr; curr = curr->next)
653 	{
654 		fprintf (stderr, "\tregexp #%d:\n\t{\n", i++);
655 
656 		{
657 			unsigned char *ptr = curr->symbols;
658 
659 			fprintf (stderr, "\t\t offsets : (%d<%s>,%d<%s>)\n",
660 					 curr->left_offset,
661 					 (curr->left_fixed) ? "fixed" : "not fixed",
662 					 curr->right_offset, (curr->right_fixed) ? "fixed" : "not fixed");
663 			fprintf (stderr, "\t\t lead_len = %d\n", curr->lead_len);
664 			fprintf (stderr, "\t\t size = %d\n", curr->size);
665 			fprintf (stderr, "\t\t Symbols :\n\t\t{\n\t\t\t");
666 			for (k = 0; k < curr->size; k++)
667 			{
668 				fprintf (stderr, "#%d->", k);
669 
670 				while (*ptr)
671 					fprintf (stderr, "%c", *(ptr++));
672 
673 				if (curr->negation[k])
674 					fprintf (stderr, "\t\tNegated");
675 				fprintf (stderr, "\n\t\t\t");
676 				ptr++;
677 			}
678 			fprintf (stderr, "\n\t\t}\n");
679 		}
680 
681 		fprintf (stderr, "\t}\n");
682 	}
683 	fprintf (stderr, "}\n");
684 }
685 
686 /************************************************************************/
687 /************************************************************************/
688 /*			Search and sorting methods 			*/
689 /************************************************************************/
690 
691 int
match_substring(reg_exp * rexp,char * string,size_t len,int dir)692 match_substring (reg_exp * rexp, char *string, size_t len, int dir)
693 {
694 	signed int    start = 0, end = len; /* Important - we need signed int here !!! */
695 	signed int    pos, offset, size;
696 	register unsigned char *sym, *neg;
697 	unsigned char c, r1;
698 	signed int    skip, lead_len;
699 
700 	if (rexp == NULL)
701 		return 0;							   /* we've riched the end of the search */
702 	if (string == NULL)
703 		return 1;
704 	size = rexp->size;
705 	if (len < size)
706 		return -1;
707 
708 	lead_len = rexp->lead_len;
709 	if (size == 0)							   /* all we have is a wildcard */
710 		if (lead_len < 0 || len == lead_len)
711 			return 0;
712 
713 	if (lead_len < 0)
714 		lead_len = 0;
715 
716 	start = (dir & DIR_LEFT) ? rexp->left_offset : lead_len;
717 	end = (dir & DIR_RIGHT) ? len - rexp->right_offset : len;
718 /*fprintf( stderr, "dir = %d, lead_len = %d, size = %d, start = %d, end = %d, len = %d\n string = {%s}\n", dir, lead_len, size, start, end, len, string );
719 */
720 	if (rexp->left_fixed || (!(dir & DIR_LEFT) && rexp->lead_len >= 0))
721 	{
722 		if (rexp->right_fixed)
723 		{
724 			if (end - start != size)
725 				return -1;
726 		} else
727 			end = start + size;
728 	} else if (rexp->right_fixed || !(dir & DIR_RIGHT))
729 		start = end - size;
730 
731 	pos = start + size - 1;
732 /*fprintf( stderr, "adjusted: pos = %d, start = %d, end = %d\n", pos, start, end );
733 */
734 	while (pos < end)
735 	{
736 		sym = rexp->symbols;
737 		neg = rexp->negation;
738 		c = '\0';
739 		for (offset = pos; offset >= start; offset--)
740 		{
741 			c = string[offset];
742 			/* try and compare current symbol */
743 			while (*sym)
744 			{
745 				if (*sym == 0x01)
746 				{
747 					sym++;
748 					r1 = *(sym++);
749 					if (c >= r1 && c <= *sym)
750 						break;
751 				} else if (c == *sym)
752 					break;
753 				sym++;
754 			}
755 			if (*sym == '\0' || *neg == 1)	   /* then we have a mismatch */
756 				break;
757 			while (*sym)
758 				sym++;
759 			sym++;
760 			neg++;
761 		}
762 
763 		if (offset < start)
764 		{									   /* we have a match - now try to match other regexps : */
765 			int           bres = 0;
766 
767 			if ((dir & DIR_LEFT) && rexp->prev)
768 				bres = match_substring (rexp->prev, string, start - lead_len, DIR_LEFT);
769 			if (bres == 0 && (dir & DIR_RIGHT) && rexp->next)
770 				bres = match_substring (rexp->next, string + pos + 1, len - (pos + 1), DIR_RIGHT);
771 			if (bres == 0)
772 				return 0;					   /* hooorrraaaaayyyyy !!!!!!!!!! */
773 		}
774 		/* ouch, we have a mismatch ! */
775 		skip = pos - offset + 1;
776 		/* additional heuristic here: */
777 		if (skip < rexp->skip[c])
778 			skip = rexp->skip[c];
779 
780 		pos += skip;
781 		start += skip;
782 	}
783 	return -1;
784 }
785 
786 int											   /* returns 0 if we have a match - -1 if we have to keep searching, 1 - error */
match_wild_reg_exp(char * string,wild_reg_exp * wrexp)787 match_wild_reg_exp (char *string, wild_reg_exp * wrexp)
788 {
789 	size_t        len;
790 
791 	if (wrexp == NULL || string == NULL)
792 		return 1;
793 	if (wrexp->longest == NULL)
794 		return -1;							   /* empty regexp matches nothing */
795 	len = strlen (string);
796 	if (len < wrexp->hard_total + wrexp->soft_total)
797 		return -1;							   /* string is too short to match criteria */
798 	return match_substring (wrexp->longest, string, len, DIR_BOTH);
799 }
800 
801 int                                            /* returns 0 if we have a match - -1 if we have to keep searching, 1 - error */
match_string_list(char ** list,int max_elem,wild_reg_exp * wrexp)802 match_string_list (char **list, int max_elem, wild_reg_exp * wrexp)
803 {
804     int res = -1;
805     register int i ;
806 
807     if (wrexp == NULL || list == NULL)
808         return 1;
809     if (wrexp->longest == NULL)  /* empty regexp matches nothing */
810         return -1;
811 
812     for( i = 0 ; i < max_elem ; i++ )
813     {
814         if( list[i] == NULL ) break ;
815         else
816         {
817             int len = strlen (list[i]);
818             if (len >= wrexp->hard_total + wrexp->soft_total)/* string has sufficient length */
819                 if( (res = match_substring (wrexp->longest, list[i], len, DIR_BOTH)) >= 0 )
820                     break ;
821         }
822     }
823     return res;
824 }
825 
826 
827 int
compare_wild_reg_exp(wild_reg_exp * wrexp1,wild_reg_exp * wrexp2)828 compare_wild_reg_exp (wild_reg_exp * wrexp1, wild_reg_exp * wrexp2)
829 {
830 	int           strcmp_res, cmp_res;
831 
832 	if (wrexp1 == wrexp2)
833 		return 0;
834 	if (wrexp1 == NULL)
835 		return -1;
836 	if (wrexp2 == NULL)
837 		return 1;
838 	strcmp_res = strcmp ((const char*)(wrexp1->raw), (const char*)(wrexp2->raw));
839 	if (strcmp_res == 0)
840 		return 0;
841 	cmp_res = wrexp1->hard_total - wrexp2->hard_total;
842 	if (cmp_res == 0)
843 	{
844 		cmp_res = wrexp1->soft_total - wrexp2->soft_total;
845 		if (cmp_res == 0)
846 		{									   /*  the more wildcrads we have - the less strict pattern it is
847 											      so here we use different way then above : */
848 			cmp_res = -wrexp1->wildcards_num + wrexp2->wildcards_num;
849 			if (cmp_res == 0)
850 				cmp_res = strcmp_res;
851 		}
852 	}
853 	return cmp_res;
854 }
855 
856 /************************************************************************/
857 
858 void
destroy_wild_reg_exp(wild_reg_exp * wrexp)859 destroy_wild_reg_exp (wild_reg_exp * wrexp)
860 {
861 	reg_exp      *curr, *next = NULL;
862 
863 	if (wrexp == NULL)
864 		return;
865 
866 	for (curr = wrexp->head; curr; curr = next)
867 	{
868 		next = curr->next;
869 		if (curr->symbols)
870 			free (curr->symbols);
871 		if (curr->negation)
872 			free (curr->negation);
873 		free (curr);
874 	}
875 
876 	if (wrexp->raw)
877 		free (wrexp->raw);
878 	free (wrexp);
879 }
880 
881 /************************************************************************/
882 /* Older code: very ugly but simple and may come in handy someplace :   */
883 /************************************************************************/
884 /************************************************************************
885  *	Does `string' match `pattern'? '*' in pattern matches any sub-string
886  *	(including the null string) '?' matches any single char. For use
887  *	by filenameforall. Note that '*' matches across directory boundaries
888  *
889  *      This code donated by  Paul Hudson <paulh@harlequin.co.uk>
890  *      It is public domain, no strings attached. No guarantees either.
891  *		Modified by Emanuele Caratti <wiz@iol.it>
892  *
893  *************************************************************************/
894 
895 int
matchWildcards(const char * pattern,const char * string)896 matchWildcards (const char *pattern, const char *string)
897 {
898 	if (pattern == NULL)
899 		return 1;
900 	else if (*pattern == '*' && !*(pattern + 1))
901 		return 1;
902 
903 	if (string == NULL)
904 		return 0;
905 
906 	while (*string && *pattern)
907 	{
908 		if (*pattern == '?')
909 		{
910 			/* match any character */
911 			pattern++;
912 			string++;
913 		} else if (*pattern == '*')
914 		{
915 			/* see if the rest of the pattern matches any trailing substring
916 			 * of the string. */
917 			pattern++;
918 			if (*pattern == 0)
919 			{
920 				return 1;					   /* trailing * must match rest */
921 			}
922 			while (1)
923 			{
924 				while (*string && (*string != *pattern))
925 					string++;
926 				if (!*string)
927 					return 0;
928 				if (matchWildcards (pattern, string))
929 					return 1;
930 				string++;
931 			}
932 
933 		} else
934 		{
935 			if (*pattern == '\\')
936 				pattern++;					   /* has strange, but harmless effects if the last
937 											      * character is a '\\' */
938 			if (*pattern++ != *string++)
939 			{
940 				return 0;
941 			}
942 		}
943 	}
944 	if ((*pattern == 0) && (*string == 0))
945 		return 1;
946 	if ((*string == 0) && (strcmp (pattern, "*") == 0))
947 		return 1;
948 	return 0;
949 }
950 
951