1 /*
2 * Copyright (c) 2000 Sasha Vasko <sasha at aftercode.net>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19 #include "config.h"
20
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24
25 #include "audit.h"
26 #include "astypes.h"
27 #include "mystring.h"
28 #include "safemalloc.h"
29 #include "regexp.h"
30
31 /************************************************************************/
32 /************************************************************************/
33 /* regexp compiler and datastructures */
34 /************************************************************************/
35
36 typedef struct reg_exp_multichar_part
37 {
38 unsigned char code[254];
39 unsigned char size;
40 }
41 reg_exp_multichar_part;
42
43
44 typedef struct reg_exp_sym
45 {
46 unsigned char negate; /* if we have [!blahblah] */
47 /* in worst case scenario code should not occupy more then 2*255 characters */
48 unsigned char code[512];
49 unsigned short size;
50 }
51 reg_exp_sym;
52
53 typedef struct reg_exp
54 {
55 struct reg_exp *prev, *next;
56
57 short int left_offset, right_offset; /* min number of characters that we have to
58 leave on each size of the string in question
59 for other patterns to be able to match */
60 short int left_fixed, right_fixed; /* indicates if we have a * on left or
61 right side and therefore flexible
62 with our position */
63
64 short int lead_len; /* -1 for '*' */
65 /* number of symbols : */
66 unsigned char size; /* upto 255 characters in regexp */
67 /* symbol can be:
68 - single character : c
69 - set of characters : [ccccccc]
70 - range of characters : [r-r]
71 more then that, it can be negated : [!cccccc] or [!r-r]
72 or it can be combined : [cccc,r-r] or [!ccccc,r-r]
73
74 methinks we should pack them all together like so:
75
76 ccccc0x01rr0x01rr0x01rrccccc0x00.....
77 where ccccc - are single characters that are compared one-to-one
78 and rr - represent range on characters like r-r.
79 0x01 denotes range
80 0x00 denotes end of the symbol.
81 so we'll store them in single character array,
82 plus another array will store negation indicators.
83 */
84
85 unsigned char *symbols;
86 unsigned char *negation;
87
88 /* in Booer-Moor algorithm we need to use a skip table -
89 indicating where we should go if character mismatches any symbol
90 (skip offset for each character in alphabeth)
91 */
92 unsigned char skip[256]; /* skip table */
93
94 }
95 reg_exp;
96
97 unsigned char
parse_singlechar(unsigned char ** data,char * reserved)98 parse_singlechar (unsigned char **data, char *reserved)
99 {
100 register unsigned char c = **data;
101
102 if( c != '\0' )
103 {
104 while (*reserved)
105 if ((c == (unsigned char)*(reserved++)))
106 return 0x00;
107
108 if (c == '\\')
109 c = *(++(*data));
110
111 (*data)++;
112 }
113 return c;
114 }
115
116 reg_exp_multichar_part *
parse_multichar_part(unsigned char ** data)117 parse_multichar_part (unsigned char **data)
118 {
119 static reg_exp_multichar_part part;
120 unsigned char c;
121
122 if ((c = parse_singlechar (data, "],")) == 0x00)
123 return NULL;
124 if (**data == '-')
125 { /* range */
126 part.size = 3;
127 (*data)++;
128 part.code[0] = 0x01;
129 part.code[1] = c;
130 if ((c = parse_singlechar (data, "],")) == 0x00)
131 return NULL;
132 part.code[2] = c;
133 } else
134 { /* enumeration */
135 part.size = 1;
136 part.code[0] = c;
137 while ((c = parse_singlechar (data, "],")) != 0x00)
138 part.code[part.size++] = c;
139 }
140 return ∂
141 }
142
143 reg_exp_sym *
parse_reg_exp_sym(unsigned char ** data)144 parse_reg_exp_sym (unsigned char **data)
145 { /* this is a non-reentrant function in order to conserve memory */
146 static reg_exp_sym sym;
147
148 sym.negate = 0;
149 sym.size = 0;
150
151 if (**data == '[')
152 {
153 unsigned char *ptr = (*data) + 1;
154
155 if (*ptr == '!')
156 {
157 sym.negate = 1;
158 ptr++;
159 }
160 do
161 {
162 reg_exp_multichar_part *p_part;
163
164 p_part = parse_multichar_part (&ptr);
165 if (p_part == NULL)
166 break;
167 memcpy (&(sym.code[sym.size]), p_part->code, p_part->size);
168 sym.size += p_part->size;
169 if (*ptr == ',')
170 ptr++;
171 }
172 while (*ptr != ']');
173 *data = ptr + 1;
174 } else if ((sym.code[sym.size++] = parse_singlechar (data, "*?")) == 0x00)
175 return NULL;
176
177 if (sym.size == 0)
178 return NULL;
179
180 sym.code[sym.size++] = 0x00; /* terminator */
181 return &sym;
182 }
183
184 void
fix_skip_table(reg_exp * rexp)185 fix_skip_table (reg_exp * rexp)
186 {
187 register int i;
188 unsigned char *ptr;
189 unsigned char c;
190
191 if (rexp == NULL)
192 return;
193 ptr = rexp->symbols;
194
195 for (i = 0; i < 256; i++)
196 rexp->skip[i] = rexp->size;
197 /* symbols are stored in opposite direction - so skip is increasing */
198 for (i = 0; i < rexp->size; i++)
199 {
200 while (*ptr)
201 {
202 if (*ptr == 0x01) /* multichar range */
203 {
204 ptr++;
205 ptr++;
206 for (c = *(ptr - 1); c <= *ptr; c++)
207 rexp->skip[c] = i;
208 } else
209 rexp->skip[*ptr] = i;
210 ptr++;
211 }
212 ptr++;
213 }
214 }
215
216 unsigned char *
optimize_reg_exp_sym(unsigned char * trg,reg_exp_sym * p_sym)217 optimize_reg_exp_sym (unsigned char *trg, reg_exp_sym * p_sym)
218 {
219 /* there could be up to 255 possible charcters :) - that excludes 0x00 */
220 unsigned char scale[256];
221 unsigned char *ptr = p_sym->code;
222 unsigned char r_start, r_end;
223
224 memset (scale, 0x00, 256);
225 while (*ptr)
226 {
227 if (*ptr == 0x01)
228 {
229 r_start = *(++ptr);
230 ptr++;
231 /* we need to doublecheck if ranges are really min-max */
232 if (r_start > *ptr)
233 {
234 r_end = r_start;
235 r_start = *ptr;
236 } else
237 r_end = *ptr;
238
239 if (r_start > 0x00 && r_end > 0x00)
240 {
241 while (r_start < r_end)
242 scale[r_start++] = 1;
243 /* to avoid overflow and endless loop doing it as separate step */
244 scale[r_end] = 1;
245 }
246 } else
247 scale[*ptr] = 1;
248 ptr++;
249 }
250 /* we want to reduce ranges by excluding what is overlapping */
251 /* then reduce single characters by removing those that fall into ranges */
252 /* and maybe join them all together if they form continuous sequences */
253 /* to do that simply scan through the scale */
254 r_start = r_end = 0x00;
255 /* skipping 0x00 and 0x01 as special characters */
256 for (r_end = 2; r_end < 255; r_end++)
257 {
258 if (scale[r_end] == 0x00)
259 {
260 if (r_start != 0x00)
261 {
262 if (r_end - 1 > r_start + 1)
263 *(trg++) = 0x01;
264 *(trg++) = r_start;
265 if (r_end - 1 > r_start)
266 *(trg++) = r_end - 1;
267 r_start = 0x00;
268 }
269 } else if (r_start == 0x00)
270 r_start = r_end;
271 }
272 /* final data may have got forgotten since last scale element may not be 0x00 */
273 if (r_start != 0x00)
274 {
275 if (scale[r_end] == 0x00)
276 r_end--;
277 if (r_end > r_start + 1)
278 *(trg++) = 0x01;
279 *(trg++) = r_start;
280 if (r_end > r_start)
281 *(trg++) = r_end;
282 }
283 /* Note also that this procedure will arrange all the codes in ascending order */
284 /* so later on when we trying to match it and code in question is less then
285 any code - we can stop and make decision that we have no match :) */
286 *(trg++) = 0x00;
287 return trg;
288 }
289
290 reg_exp *
parse_reg_exp(short int lead_len,unsigned char ** data)291 parse_reg_exp (short int lead_len, unsigned char **data)
292 { /* this is a non-reentrant function in order to conserve memory */
293 reg_exp *trg;
294 reg_exp_sym *p_sym;
295
296 p_sym = parse_reg_exp_sym (data);
297 if (lead_len == 0 && p_sym == NULL)
298 return NULL;
299
300 trg = safemalloc (sizeof (reg_exp));
301 memset (trg, 0x00, sizeof (reg_exp));
302
303 trg->lead_len = lead_len;
304 trg->size = 0;
305
306 if (p_sym)
307 {
308 int len = strlen ((char*)*data) + p_sym->size + 1;
309 unsigned char *sym_buf, *sym_next, *neg_buf;
310 register unsigned char *src, *dst;
311 int i;
312
313 /* in worst case scenario symbols will occupy 2*n where n -
314 length of the remaining data.
315 at the same time negation will occupy only n bytes max anytime :
316 */
317
318 sym_next = sym_buf = safemalloc (len * 2);
319 neg_buf = safemalloc (len);
320
321 /* lets avoid recursion a little bit where we can - */
322 /* since reg_exp is nothing but a row of symbols : */
323 do
324 {
325 sym_next = optimize_reg_exp_sym (sym_next, p_sym);
326 neg_buf[(trg->size)++] = p_sym->negate;
327 }
328 while ((p_sym = parse_reg_exp_sym (data)) != NULL);
329
330 dst = trg->symbols = safemalloc (sym_next - sym_buf);
331 trg->negation = safemalloc (trg->size);
332 sym_next--;
333 /* we have to store symbols in reverse order, since
334 Booyer-Moor algorithm scans in opposite direction */
335 for (i = 0; i < trg->size; i++)
336 {
337 sym_next--;
338 while (sym_next > sym_buf && *sym_next)
339 sym_next--;
340 src = (*sym_next) ? sym_next : sym_next + 1;
341 while (*src)
342 *(dst++) = *(src++);
343 *(dst++) = *src;
344
345 trg->negation[i] = neg_buf[trg->size - i - 1];
346 }
347
348 free (sym_buf);
349 free (neg_buf);
350 } else
351 {
352 trg->symbols = NULL;
353 trg->negation = NULL;
354 }
355
356 fix_skip_table (trg);
357
358 return trg;
359 }
360
361 short int
parse_wild(unsigned char ** data)362 parse_wild (unsigned char **data)
363 { /* we go through any sequence of the '*' and '?' characters */
364 short int count = 0;
365 register unsigned char *ptr = *data;
366
367 do
368 {
369 if (*ptr == '*')
370 count = -1;
371 else if (*ptr == '?')
372 {
373 if (count >= 0)
374 count++;
375 } else
376 break;
377 }
378 while (*(++ptr));
379
380 *data = ptr;
381 return count;
382 }
383
384 wild_reg_exp *
parse_wild_reg_exp(unsigned char ** data)385 parse_wild_reg_exp (unsigned char **data)
386 {
387 wild_reg_exp *trg;
388 reg_exp *p_reg;
389 short int count = 0;
390
391 if (**data == '\0')
392 return NULL;
393
394 /* first comes wildcards (maybe) */
395 count = parse_wild (data);
396
397 /* then reg_exp */
398 if ((p_reg = parse_reg_exp (count, data)) == NULL)
399 return NULL;
400
401 /* then we can find another long wild_reg_exp */
402 trg = parse_wild_reg_exp (data);
403
404 if (trg)
405 {
406 p_reg->next = trg->head;
407 if (trg->head)
408 trg->head->prev = p_reg;
409 p_reg->prev = NULL;
410 trg->head = p_reg;
411
412 trg->hard_total += p_reg->size;
413 if (p_reg->lead_len >= 0)
414 trg->soft_total += p_reg->lead_len;
415 else
416 trg->wildcards_num++;
417
418 if (trg->max_size < p_reg->size)
419 {
420 trg->max_size = p_reg->size;
421 trg->longest = p_reg;
422 }
423 } else
424 {
425 trg = safemalloc (sizeof (wild_reg_exp));
426 trg->max_size = p_reg->size;
427 trg->longest = p_reg;
428 trg->head = trg->tail = p_reg;
429
430 trg->hard_total = p_reg->size;
431 trg->soft_total = (p_reg->lead_len > 0) ? p_reg->lead_len : 0;
432 trg->wildcards_num = (p_reg->lead_len < 0) ? 1 : 0;
433 }
434 return trg;
435 }
436
437 char *
place_singlechar(char * trg,unsigned char c)438 place_singlechar (char *trg, unsigned char c)
439 {
440 if (c == '*' || c == '?' || c == '[' || c == ']' || c == '!' || c == ',' || c == '-')
441 *(trg++) = '\\';
442
443 *(trg++) = c;
444 return trg;
445 }
446
447 char *
flatten_wild_reg_exp(wild_reg_exp * wrexp)448 flatten_wild_reg_exp (wild_reg_exp * wrexp)
449 {
450 int size = 0;
451 reg_exp *curr;
452 int i;
453 unsigned char *ptr;
454 char *trg_start, *trg;
455
456 /* dry run to find out just how much space we need to allocate for our string */
457 for (curr = wrexp->head; curr; curr = curr->next)
458 {
459 size += (curr->lead_len < 0) ? 1 : curr->lead_len;
460 ptr = curr->symbols;
461 for (i = 0; i < curr->size; i++)
462 {
463 if (*(ptr + 1) || curr->negation[i])
464 {
465 size += 2;
466 if (curr->negation[i])
467 size++;
468 }
469
470 for (; *ptr; ptr++)
471 {
472 if (*ptr == '*' || *ptr == '?' || *ptr == '[' ||
473 *ptr == ']' || *ptr == '!' || *ptr == ',' || *ptr == '-')
474 size++; /* for prepending \ */
475 else if (*ptr == 0x01)
476 size += 2; /* for comma */
477 size++;
478 }
479 ptr++;
480 }
481 }
482 trg = trg_start = safemalloc (size + 1);
483 /* now real run */
484 for (curr = wrexp->head; curr; curr = curr->next)
485 {
486 unsigned char *tail;
487
488 if (curr->lead_len < 0)
489 *(trg++) = '*';
490 else
491 for (i = 0; i < curr->lead_len; i++)
492 *(trg++) = '?';
493 tail = curr->symbols;
494 for (i = curr->size - 1; i >= 0; i--)
495 while (*(++tail));
496
497 for (i = curr->size - 1; i >= 0; i--)
498 {
499 char in_multi;
500 int part_type;
501
502 ptr = tail - 1;
503
504 while (ptr > curr->symbols && *ptr)
505 ptr--;
506 tail = ptr;
507 if (ptr > curr->symbols)
508 ptr++;
509
510 if (*(ptr + 1) || curr->negation[i])
511 {
512 *(trg++) = '[';
513 in_multi = ']';
514 if (curr->negation[i])
515 *(trg++) = '!';
516 } else
517 in_multi = '\0';
518
519 part_type = (*ptr == 0x01) ? 1 : 0;
520 for (; *ptr; ptr++)
521 {
522 if (*ptr == 0x01)
523 {
524 if (part_type != 1)
525 *(trg++) = ',';
526 part_type = 2;
527 if (*(++ptr))
528 {
529 trg = place_singlechar (trg, *ptr);
530 if (*(++ptr))
531 {
532 *(trg++) = '-';
533 trg = place_singlechar (trg, *ptr);
534 }
535 }
536 } else
537 {
538 if (part_type != 0)
539 *(trg++) = ',';
540 part_type = 0;
541 trg = place_singlechar (trg, *ptr);
542 }
543 }
544 if (in_multi)
545 *(trg++) = in_multi;
546 }
547 }
548 *(trg) = '\0';
549 return trg_start;
550
551 }
552
553 void
make_offsets(wild_reg_exp * wrexp)554 make_offsets (wild_reg_exp * wrexp)
555 {
556 reg_exp *curr;
557 short int offset, fixed;
558
559 if (wrexp)
560 {
561 offset = 0;
562 fixed = 1;
563 for (curr = wrexp->head; curr; curr = curr->next)
564 {
565 if (curr->lead_len < 0)
566 fixed = 0;
567 else
568 offset += curr->lead_len;
569 /* very important: *? prior to this regexp does affect left_fixed !!! */
570 curr->left_offset = offset;
571 curr->left_fixed = fixed;
572
573 offset += curr->size;
574 }
575 offset = 0;
576 fixed = 1;
577 for (curr = wrexp->tail; curr; curr = curr->prev)
578 {
579 curr->right_offset = offset;
580 /* very important: *? prior to this regexp does not affect right_fixed !!! */
581 curr->right_fixed = fixed;
582
583 if (curr->lead_len < 0)
584 fixed = 0;
585 else
586 offset += curr->lead_len;
587
588 offset += curr->size;
589 }
590 }
591 }
592
593 wild_reg_exp *
compile_wild_reg_exp(const char * pattern)594 compile_wild_reg_exp (const char *pattern)
595 {
596 unsigned char *buffer;
597 unsigned char *ptr = (unsigned char *)pattern;
598 register int i;
599 wild_reg_exp *trg = NULL;
600
601 if (ptr == NULL)
602 return NULL;
603
604 for (i = 0; *(ptr + i) != '\0'; i++);
605 if (i > 254)
606 i = 254;
607 buffer = safemalloc (i + 1);
608
609 for (i = 0; i < 253; i++)
610 {
611 buffer[i] = *(ptr++);
612 if (*ptr == '\0')
613 {
614 buffer[++i] = *ptr;
615 break;
616 }
617 }
618 if (*ptr)
619 { /* lets make sure we don't exceed 255 chars limit */
620 if (buffer[252] == '\\' && buffer[251] != '\\')
621 {
622 buffer[252] = '*';
623 buffer[253] = '\0';
624 } else
625 {
626 buffer[253] = '*';
627 buffer[254] = '\0';
628 }
629 }
630 ptr = buffer;
631 trg = parse_wild_reg_exp (&ptr);
632 free (buffer);
633
634 trg->raw = (unsigned char*)flatten_wild_reg_exp (trg);
635 make_offsets (trg);
636 return trg;
637 }
638
639 void
print_wild_reg_exp(wild_reg_exp * wrexp)640 print_wild_reg_exp (wild_reg_exp * wrexp)
641 {
642 reg_exp *curr;
643 int i = 0, k;
644
645 if (wrexp == NULL)
646 return;
647
648 fprintf (stderr, "wild_reg_exp :{%s}\n", wrexp->raw);
649 fprintf (stderr, " max_size : %d\n", wrexp->max_size);
650 fprintf (stderr, " total size : (hard)%d (soft)%d (wildcards)%d\n{\n",
651 wrexp->hard_total, wrexp->soft_total, wrexp->wildcards_num);
652 for (curr = wrexp->head; curr; curr = curr->next)
653 {
654 fprintf (stderr, "\tregexp #%d:\n\t{\n", i++);
655
656 {
657 unsigned char *ptr = curr->symbols;
658
659 fprintf (stderr, "\t\t offsets : (%d<%s>,%d<%s>)\n",
660 curr->left_offset,
661 (curr->left_fixed) ? "fixed" : "not fixed",
662 curr->right_offset, (curr->right_fixed) ? "fixed" : "not fixed");
663 fprintf (stderr, "\t\t lead_len = %d\n", curr->lead_len);
664 fprintf (stderr, "\t\t size = %d\n", curr->size);
665 fprintf (stderr, "\t\t Symbols :\n\t\t{\n\t\t\t");
666 for (k = 0; k < curr->size; k++)
667 {
668 fprintf (stderr, "#%d->", k);
669
670 while (*ptr)
671 fprintf (stderr, "%c", *(ptr++));
672
673 if (curr->negation[k])
674 fprintf (stderr, "\t\tNegated");
675 fprintf (stderr, "\n\t\t\t");
676 ptr++;
677 }
678 fprintf (stderr, "\n\t\t}\n");
679 }
680
681 fprintf (stderr, "\t}\n");
682 }
683 fprintf (stderr, "}\n");
684 }
685
686 /************************************************************************/
687 /************************************************************************/
688 /* Search and sorting methods */
689 /************************************************************************/
690
691 int
match_substring(reg_exp * rexp,char * string,size_t len,int dir)692 match_substring (reg_exp * rexp, char *string, size_t len, int dir)
693 {
694 signed int start = 0, end = len; /* Important - we need signed int here !!! */
695 signed int pos, offset, size;
696 register unsigned char *sym, *neg;
697 unsigned char c, r1;
698 signed int skip, lead_len;
699
700 if (rexp == NULL)
701 return 0; /* we've riched the end of the search */
702 if (string == NULL)
703 return 1;
704 size = rexp->size;
705 if (len < size)
706 return -1;
707
708 lead_len = rexp->lead_len;
709 if (size == 0) /* all we have is a wildcard */
710 if (lead_len < 0 || len == lead_len)
711 return 0;
712
713 if (lead_len < 0)
714 lead_len = 0;
715
716 start = (dir & DIR_LEFT) ? rexp->left_offset : lead_len;
717 end = (dir & DIR_RIGHT) ? len - rexp->right_offset : len;
718 /*fprintf( stderr, "dir = %d, lead_len = %d, size = %d, start = %d, end = %d, len = %d\n string = {%s}\n", dir, lead_len, size, start, end, len, string );
719 */
720 if (rexp->left_fixed || (!(dir & DIR_LEFT) && rexp->lead_len >= 0))
721 {
722 if (rexp->right_fixed)
723 {
724 if (end - start != size)
725 return -1;
726 } else
727 end = start + size;
728 } else if (rexp->right_fixed || !(dir & DIR_RIGHT))
729 start = end - size;
730
731 pos = start + size - 1;
732 /*fprintf( stderr, "adjusted: pos = %d, start = %d, end = %d\n", pos, start, end );
733 */
734 while (pos < end)
735 {
736 sym = rexp->symbols;
737 neg = rexp->negation;
738 c = '\0';
739 for (offset = pos; offset >= start; offset--)
740 {
741 c = string[offset];
742 /* try and compare current symbol */
743 while (*sym)
744 {
745 if (*sym == 0x01)
746 {
747 sym++;
748 r1 = *(sym++);
749 if (c >= r1 && c <= *sym)
750 break;
751 } else if (c == *sym)
752 break;
753 sym++;
754 }
755 if (*sym == '\0' || *neg == 1) /* then we have a mismatch */
756 break;
757 while (*sym)
758 sym++;
759 sym++;
760 neg++;
761 }
762
763 if (offset < start)
764 { /* we have a match - now try to match other regexps : */
765 int bres = 0;
766
767 if ((dir & DIR_LEFT) && rexp->prev)
768 bres = match_substring (rexp->prev, string, start - lead_len, DIR_LEFT);
769 if (bres == 0 && (dir & DIR_RIGHT) && rexp->next)
770 bres = match_substring (rexp->next, string + pos + 1, len - (pos + 1), DIR_RIGHT);
771 if (bres == 0)
772 return 0; /* hooorrraaaaayyyyy !!!!!!!!!! */
773 }
774 /* ouch, we have a mismatch ! */
775 skip = pos - offset + 1;
776 /* additional heuristic here: */
777 if (skip < rexp->skip[c])
778 skip = rexp->skip[c];
779
780 pos += skip;
781 start += skip;
782 }
783 return -1;
784 }
785
786 int /* returns 0 if we have a match - -1 if we have to keep searching, 1 - error */
match_wild_reg_exp(char * string,wild_reg_exp * wrexp)787 match_wild_reg_exp (char *string, wild_reg_exp * wrexp)
788 {
789 size_t len;
790
791 if (wrexp == NULL || string == NULL)
792 return 1;
793 if (wrexp->longest == NULL)
794 return -1; /* empty regexp matches nothing */
795 len = strlen (string);
796 if (len < wrexp->hard_total + wrexp->soft_total)
797 return -1; /* string is too short to match criteria */
798 return match_substring (wrexp->longest, string, len, DIR_BOTH);
799 }
800
801 int /* returns 0 if we have a match - -1 if we have to keep searching, 1 - error */
match_string_list(char ** list,int max_elem,wild_reg_exp * wrexp)802 match_string_list (char **list, int max_elem, wild_reg_exp * wrexp)
803 {
804 int res = -1;
805 register int i ;
806
807 if (wrexp == NULL || list == NULL)
808 return 1;
809 if (wrexp->longest == NULL) /* empty regexp matches nothing */
810 return -1;
811
812 for( i = 0 ; i < max_elem ; i++ )
813 {
814 if( list[i] == NULL ) break ;
815 else
816 {
817 int len = strlen (list[i]);
818 if (len >= wrexp->hard_total + wrexp->soft_total)/* string has sufficient length */
819 if( (res = match_substring (wrexp->longest, list[i], len, DIR_BOTH)) >= 0 )
820 break ;
821 }
822 }
823 return res;
824 }
825
826
827 int
compare_wild_reg_exp(wild_reg_exp * wrexp1,wild_reg_exp * wrexp2)828 compare_wild_reg_exp (wild_reg_exp * wrexp1, wild_reg_exp * wrexp2)
829 {
830 int strcmp_res, cmp_res;
831
832 if (wrexp1 == wrexp2)
833 return 0;
834 if (wrexp1 == NULL)
835 return -1;
836 if (wrexp2 == NULL)
837 return 1;
838 strcmp_res = strcmp ((const char*)(wrexp1->raw), (const char*)(wrexp2->raw));
839 if (strcmp_res == 0)
840 return 0;
841 cmp_res = wrexp1->hard_total - wrexp2->hard_total;
842 if (cmp_res == 0)
843 {
844 cmp_res = wrexp1->soft_total - wrexp2->soft_total;
845 if (cmp_res == 0)
846 { /* the more wildcrads we have - the less strict pattern it is
847 so here we use different way then above : */
848 cmp_res = -wrexp1->wildcards_num + wrexp2->wildcards_num;
849 if (cmp_res == 0)
850 cmp_res = strcmp_res;
851 }
852 }
853 return cmp_res;
854 }
855
856 /************************************************************************/
857
858 void
destroy_wild_reg_exp(wild_reg_exp * wrexp)859 destroy_wild_reg_exp (wild_reg_exp * wrexp)
860 {
861 reg_exp *curr, *next = NULL;
862
863 if (wrexp == NULL)
864 return;
865
866 for (curr = wrexp->head; curr; curr = next)
867 {
868 next = curr->next;
869 if (curr->symbols)
870 free (curr->symbols);
871 if (curr->negation)
872 free (curr->negation);
873 free (curr);
874 }
875
876 if (wrexp->raw)
877 free (wrexp->raw);
878 free (wrexp);
879 }
880
881 /************************************************************************/
882 /* Older code: very ugly but simple and may come in handy someplace : */
883 /************************************************************************/
884 /************************************************************************
885 * Does `string' match `pattern'? '*' in pattern matches any sub-string
886 * (including the null string) '?' matches any single char. For use
887 * by filenameforall. Note that '*' matches across directory boundaries
888 *
889 * This code donated by Paul Hudson <paulh@harlequin.co.uk>
890 * It is public domain, no strings attached. No guarantees either.
891 * Modified by Emanuele Caratti <wiz@iol.it>
892 *
893 *************************************************************************/
894
895 int
matchWildcards(const char * pattern,const char * string)896 matchWildcards (const char *pattern, const char *string)
897 {
898 if (pattern == NULL)
899 return 1;
900 else if (*pattern == '*' && !*(pattern + 1))
901 return 1;
902
903 if (string == NULL)
904 return 0;
905
906 while (*string && *pattern)
907 {
908 if (*pattern == '?')
909 {
910 /* match any character */
911 pattern++;
912 string++;
913 } else if (*pattern == '*')
914 {
915 /* see if the rest of the pattern matches any trailing substring
916 * of the string. */
917 pattern++;
918 if (*pattern == 0)
919 {
920 return 1; /* trailing * must match rest */
921 }
922 while (1)
923 {
924 while (*string && (*string != *pattern))
925 string++;
926 if (!*string)
927 return 0;
928 if (matchWildcards (pattern, string))
929 return 1;
930 string++;
931 }
932
933 } else
934 {
935 if (*pattern == '\\')
936 pattern++; /* has strange, but harmless effects if the last
937 * character is a '\\' */
938 if (*pattern++ != *string++)
939 {
940 return 0;
941 }
942 }
943 }
944 if ((*pattern == 0) && (*string == 0))
945 return 1;
946 if ((*string == 0) && (strcmp (pattern, "*") == 0))
947 return 1;
948 return 0;
949 }
950
951