1 /* $Id$ $Revision$ */
2 /* vim:set shiftwidth=4 ts=8: */
3
4 /*************************************************************************
5 * Copyright (c) 2011 AT&T Intellectual Property
6 * All rights reserved. This program and the accompanying materials
7 * are made available under the terms of the Eclipse Public License v1.0
8 * which accompanies this distribution, and is available at
9 * http://www.eclipse.org/legal/epl-v10.html
10 *
11 * Contributors: See CVS logs. Details at http://www.graphviz.org/
12 *************************************************************************/
13
14
15 /*
16 * D. G. Korn
17 * G. S. Fowler
18 * AT&T Research
19 *
20 * match shell file patterns -- derived from Bourne and Korn shell gmatch()
21 *
22 * sh pattern egrep RE description
23 * ---------- -------- -----------
24 * * .* 0 or more chars
25 * ? . any single char
26 * [.] [.] char class
27 * [!.] [^.] negated char class
28 * [[:.:]] [[:.:]] ctype class
29 * [[=.=]] [[=.=]] equivalence class
30 * [[...]] [[...]] collation element
31 * *(.) (.)* 0 or more of
32 * +(.) (.)+ 1 or more of
33 * ?(.) (.)? 0 or 1 of
34 * (.) (.) 1 of
35 * @(.) (.) 1 of
36 * a|b a|b a or b
37 * \# () subgroup back reference [1-9]
38 * a&b a and b
39 * !(.) none of
40 *
41 * \ used to escape metacharacters
42 *
43 * *, ?, (, |, &, ), [, \ must be \'d outside of [...]
44 * only ] must be \'d inside [...]
45 *
46 * BUG: unbalanced ) terminates top level pattern
47 *
48 * BOTCH: collating element sort order and character class ranges apparently
49 * do not have strcoll() in common so we resort to fnmatch(), calling
50 * it up to COLL_MAX times to determine the matched collating
51 * element size
52 */
53
54 #include <ast.h>
55 #include <ctype.h>
56 #include <hashkey.h>
57 #include <string.h>
58
59 #if _hdr_wchar && _lib_wctype && _lib_iswctype
60
61 #include <stdio.h> /* because <wchar.h> includes it and we generate it */
62 #include <wchar.h>
63 #if _hdr_wctype
64 #include <wctype.h>
65 #endif
66
67 #undef isalnum
68 #define isalnum(x) iswalnum(x)
69 #undef isalpha
70 #define isalpha(x) iswalpha(x)
71 #undef iscntrl
72 #define iscntrl(x) iswcntrl(x)
73 #undef isblank
74 #define isblank(x) iswblank(x)
75 #undef isdigit
76 #define isdigit(x) iswdigit(x)
77 #undef isgraph
78 #define isgraph(x) iswgraph(x)
79 #undef islower
80 #define islower(x) iswlower(x)
81 #undef isprint
82 #define isprint(x) iswprint(x)
83 #undef ispunct
84 #define ispunct(x) iswpunct(x)
85 #undef isspace
86 #define isspace(x) iswspace(x)
87 #undef isupper
88 #define isupper(x) iswupper(x)
89 #undef isxdigit
90 #define isxdigit(x) iswxdigit(x)
91
92 #if !defined(iswblank) && !_lib_iswblank
93
iswblank(wint_t wc)94 static int iswblank(wint_t wc)
95 {
96 static int initialized;
97 static wctype_t wt;
98
99 if (!initialized) {
100 initialized = 1;
101 wt = wctype("blank");
102 }
103 return iswctype(wc, wt);
104 }
105
106 #endif
107
108 #else
109
110 #undef _lib_wctype
111
112 #ifndef isblank
113 #define isblank(x) ((x)==' '||(x)=='\t')
114 #endif
115
116 #ifndef isgraph
117 #define isgraph(x) (isprint(x)&&!isblank(x))
118 #endif
119
120 #endif
121
122 #if _DEBUG_MATCH
123 #include <error.h>
124 #endif
125
126 #define MAXGROUP 10
127
128 typedef struct {
129 char *beg[MAXGROUP];
130 char *end[MAXGROUP];
131 char *next_s;
132 short groups;
133 } Group_t;
134
135 typedef struct {
136 Group_t current;
137 Group_t best;
138 char *last_s;
139 char *next_p;
140 } Match_t;
141
142 #if _lib_mbtowc && MB_LEN_MAX > 1
143 #define mbgetchar(p) ((ast.locale.set&AST_LC_multibyte)?((ast.tmp_int=mbtowc(&ast.tmp_wchar,p,MB_CUR_MAX))>=0?((p+=ast.tmp_int),ast.tmp_wchar):0):(*p++))
144 #else
145 #define mbgetchar(p) (*p++)
146 #endif
147
148 #ifndef isxdigit
149 #define isxdigit(c) (((c)>='0'&&(c)<='9')||((c)>='a'&&(c)<='f')||((c)>='A'&&(c)<='F'))
150 #endif
151
152 #define getsource(s,e) (((s)>=(e))?0:mbgetchar(s))
153
154 #define COLL_MAX 3
155
156 #if !_lib_strcoll
157 #undef _lib_fnmatch
158 #endif
159
160 #if _lib_fnmatch
161 extern int fnmatch(const char *, const char *, int);
162 #endif
163
164 /*
165 * gobble chars up to <sub> or ) keeping track of (...) and [...]
166 * sub must be one of { '|', '&', 0 }
167 * 0 returned if s runs out
168 */
169
gobble(Match_t * mp,register char * s,register int sub,int * g,int clear)170 static char *gobble(Match_t * mp, register char *s, register int sub,
171 int *g, int clear)
172 {
173 register int p = 0;
174 register char *b = 0;
175 int c = 0;
176 int n;
177
178 for (;;)
179 switch (mbgetchar(s)) {
180 case '\\':
181 if (mbgetchar(s))
182 break;
183 /*FALLTHROUGH*/ case 0:
184 return 0;
185 case '[':
186 if (!b) {
187 if (*s == '!')
188 (void)mbgetchar(s);
189 b = s;
190 } else if (*s == '.' || *s == '=' || *s == ':')
191 c = *s;
192 break;
193 case ']':
194 if (b) {
195 if (*(s - 2) == c)
196 c = 0;
197 else if (b != (s - 1))
198 b = 0;
199 }
200 break;
201 case '(':
202 if (!b) {
203 p++;
204 n = (*g)++;
205 if (clear) {
206 if (!sub)
207 n++;
208 if (n < MAXGROUP)
209 mp->current.beg[n] = mp->current.end[n] = 0;
210 }
211 }
212 break;
213 case ')':
214 if (!b && p-- <= 0)
215 return sub ? 0 : s;
216 break;
217 case '|':
218 if (!b && !p && sub == '|')
219 return s;
220 break;
221 }
222 }
223
224 static int grpmatch(Match_t *, int, char *, register char *, char *, int);
225
226 #if _DEBUG_MATCH
227 #define RETURN(v) {error_info.indent--;return (v);}
228 #else
229 #define RETURN(v) {return (v);}
230 #endif
231
232 /*
233 * match a single pattern
234 * e is the end (0) of the substring in s
235 * r marks the start of a repeated subgroup pattern
236 */
237
238 static int
onematch(Match_t * mp,int g,char * s,char * p,char * e,char * r,int flags)239 onematch(Match_t * mp, int g, char *s, char *p, char *e, char *r,
240 int flags)
241 {
242 register int pc;
243 register int sc;
244 register int n;
245 register int icase;
246 char *olds;
247 char *oldp;
248
249 #if _DEBUG_MATCH
250 error_info.indent++;
251 error(-1, "onematch g=%d s=%-.*s p=%s r=%p flags=%o", g, e - s, s, p,
252 r, flags);
253 #endif
254 icase = flags & STR_ICASE;
255 do {
256 olds = s;
257 sc = getsource(s, e);
258 if (icase && isupper(sc))
259 sc = tolower(sc);
260 oldp = p;
261 switch (pc = mbgetchar(p)) {
262 case '(':
263 case '*':
264 case '?':
265 case '+':
266 case '@':
267 case '!':
268 if (pc == '(' || *p == '(') {
269 char *subp;
270 int oldg;
271
272 s = olds;
273 subp = p + (pc != '(');
274 oldg = g;
275 n = ++g;
276 if (g < MAXGROUP && (!r || g > mp->current.groups))
277 mp->current.beg[g] = mp->current.end[g] = 0;
278 if (!(p = gobble(mp, subp, 0, &g, !r)))
279 RETURN(0);
280 if (pc == '*' || pc == '?' || (pc == '+' && oldp == r)) {
281 if (onematch(mp, g, s, p, e, NiL, flags))
282 RETURN(1);
283 if (!sc || !getsource(s, e)) {
284 mp->current.groups = oldg;
285 RETURN(0);
286 }
287 }
288 if (pc == '*' || pc == '+') {
289 p = oldp;
290 sc = n - 1;
291 } else
292 sc = g;
293 pc = (pc != '!');
294 do {
295 if (grpmatch(mp, n, olds, subp, s, flags) == pc) {
296 if (n < MAXGROUP) {
297 if (!mp->current.beg[n]
298 || mp->current.beg[n] > olds)
299 mp->current.beg[n] = olds;
300 if (s > mp->current.end[n])
301 mp->current.end[n] = s;
302 #if _DEBUG_MATCH
303 error(-4,
304 "subgroup#%d n=%d beg=%p end=%p len=%d",
305 __LINE__, n, mp->current.beg[n],
306 mp->current.end[n],
307 mp->current.end[n] - mp->current.beg[n]);
308 #endif
309 }
310 if (onematch(mp, sc, s, p, e, oldp, flags)) {
311 if (p == oldp && n < MAXGROUP) {
312 if (!mp->current.beg[n]
313 || mp->current.beg[n] > olds)
314 mp->current.beg[n] = olds;
315 if (s > mp->current.end[n])
316 mp->current.end[n] = s;
317 #if _DEBUG_MATCH
318 error(-4,
319 "subgroup#%d n=%d beg=%p end=%p len=%d",
320 __LINE__, n, mp->current.beg[n],
321 mp->current.end[n],
322 mp->current.end[n] -
323 mp->current.beg[n]);
324 #endif
325 }
326 RETURN(1);
327 }
328 }
329 } while (s < e && mbgetchar(s));
330 mp->current.groups = oldg;
331 RETURN(0);
332 } else if (pc == '*') {
333 /*
334 * several stars are the same as one
335 */
336
337 while (*p == '*' && *(p + 1) != '(')
338 p++;
339 oldp = p;
340 switch (pc = mbgetchar(p)) {
341 case '@':
342 case '!':
343 case '+':
344 n = *p == '(';
345 break;
346 case '(':
347 case '[':
348 case '?':
349 case '*':
350 n = 1;
351 break;
352 case 0:
353 case '|':
354 case '&':
355 case ')':
356 mp->current.next_s = (flags & STR_MAXIMAL) ? e : olds;
357 mp->next_p = oldp;
358 mp->current.groups = g;
359 if (!pc
360 && (!mp->best.next_s
361 || ((flags & STR_MAXIMAL)
362 && mp->current.next_s > mp->best.next_s)
363 || (!(flags & STR_MAXIMAL)
364 && mp->current.next_s <
365 mp->best.next_s))) {
366 mp->best = mp->current;
367 #if _DEBUG_MATCH
368 error(-3, "best#%d groups=%d next=\"%s\"",
369 __LINE__, mp->best.groups, mp->best.next_s);
370 #endif
371 }
372 RETURN(1);
373 case '\\':
374 if (!(pc = mbgetchar(p)))
375 RETURN(0);
376 if (pc >= '0' && pc <= '9') {
377 n = pc - '0';
378 #if _DEBUG_MATCH
379 error(-2,
380 "backref#%d n=%d g=%d beg=%p end=%p len=%d",
381 __LINE__, n, g, mp->current.beg[n],
382 mp->current.end[n],
383 mp->current.end[n] - mp->current.beg[n]);
384 #endif
385 if (n <= g && mp->current.beg[n])
386 pc = *mp->current.beg[n];
387 }
388 /*FALLTHROUGH*/ default:
389 if (icase && isupper(pc))
390 pc = tolower(pc);
391 n = 0;
392 break;
393 }
394 p = oldp;
395 for (;;) {
396 if ((n || pc == sc)
397 && onematch(mp, g, olds, p, e, NiL, flags))
398 RETURN(1);
399 if (!sc)
400 RETURN(0);
401 olds = s;
402 sc = getsource(s, e);
403 if ((flags & STR_ICASE) && isupper(sc))
404 sc = tolower(sc);
405 }
406 } else if (pc != '?' && pc != sc)
407 RETURN(0);
408 break;
409 case 0:
410 if (!(flags & STR_MAXIMAL))
411 sc = 0;
412 /*FALLTHROUGH*/ case '|':
413 case '&':
414 case ')':
415 if (!sc) {
416 mp->current.next_s = olds;
417 mp->next_p = oldp;
418 mp->current.groups = g;
419 }
420 if (!pc
421 && (!mp->best.next_s
422 || ((flags & STR_MAXIMAL) && olds > mp->best.next_s)
423 || (!(flags & STR_MAXIMAL)
424 && olds < mp->best.next_s))) {
425 mp->best = mp->current;
426 mp->best.next_s = olds;
427 mp->best.groups = g;
428 #if _DEBUG_MATCH
429 error(-3, "best#%d groups=%d next=\"%s\"", __LINE__,
430 mp->best.groups, mp->best.next_s);
431 #endif
432 }
433 RETURN(!sc);
434 case '[':
435 {
436 /*UNDENT... */
437
438 int invert;
439 int x;
440 int ok = 0;
441 char *range;
442
443 if (!sc)
444 RETURN(0);
445 #if _lib_fnmatch
446 if (ast.locale.set & (1 << AST_LC_COLLATE))
447 range = p - 1;
448 else
449 #endif
450 range = 0;
451 n = 0;
452 if ((invert = (*p == '!')))
453 p++;
454 for (;;) {
455 oldp = p;
456 if (!(pc = mbgetchar(p))) {
457 RETURN(0);
458 } else if (pc == '['
459 && (*p == ':' || *p == '=' || *p == '.')) {
460 x = 0;
461 n = mbgetchar(p);
462 oldp = p;
463 for (;;) {
464 if (!(pc = mbgetchar(p)))
465 RETURN(0);
466 if (pc == n && *p == ']')
467 break;
468 x++;
469 }
470 (void)mbgetchar(p);
471 if (ok)
472 /*NOP*/;
473 else if (n == ':') {
474 switch (HASHNKEY5
475 (x, oldp[0], oldp[1], oldp[2], oldp[3],
476 oldp[4])) {
477 case HASHNKEY5(5, 'a', 'l', 'n', 'u', 'm'):
478 if (isalnum(sc))
479 ok = 1;
480 break;
481 case HASHNKEY5(5, 'a', 'l', 'p', 'h', 'a'):
482 if (isalpha(sc))
483 ok = 1;
484 break;
485 case HASHNKEY5(5, 'b', 'l', 'a', 'n', 'k'):
486 if (isblank(sc))
487 ok = 1;
488 break;
489 case HASHNKEY5(5, 'c', 'n', 't', 'r', 'l'):
490 if (iscntrl(sc))
491 ok = 1;
492 break;
493 case HASHNKEY5(5, 'd', 'i', 'g', 'i', 't'):
494 if (isdigit(sc))
495 ok = 1;
496 break;
497 case HASHNKEY5(5, 'g', 'r', 'a', 'p', 'h'):
498 if (isgraph(sc))
499 ok = 1;
500 break;
501 case HASHNKEY5(5, 'l', 'o', 'w', 'e', 'r'):
502 if (islower(sc))
503 ok = 1;
504 break;
505 case HASHNKEY5(5, 'p', 'r', 'i', 'n', 't'):
506 if (isprint(sc))
507 ok = 1;
508 break;
509 case HASHNKEY5(5, 'p', 'u', 'n', 'c', 't'):
510 if (ispunct(sc))
511 ok = 1;
512 break;
513 case HASHNKEY5(5, 's', 'p', 'a', 'c', 'e'):
514 if (isspace(sc))
515 ok = 1;
516 break;
517 case HASHNKEY5(5, 'u', 'p', 'p', 'e', 'r'):
518 if (icase ? islower(sc) : isupper(sc))
519 ok = 1;
520 break;
521 case HASHNKEY5(6, 'x', 'd', 'i', 'g', 'i'):
522 if (oldp[5] == 't' && isxdigit(sc))
523 ok = 1;
524 break;
525 #if _lib_wctype
526 default:
527 {
528 char cc[32];
529
530 if (x >= sizeof(cc))
531 x = sizeof(cc) - 1;
532 strncpy(cc, oldp, x);
533 cc[x] = 0;
534 if (iswctype(sc, wctype(cc)))
535 ok = 1;
536 }
537 break;
538 #endif
539 }
540 }
541 #if _lib_fnmatch
542 else if (ast.locale.set & (1 << AST_LC_COLLATE))
543 ok = -1;
544 #endif
545 else if (range)
546 goto getrange;
547 else if (*p == '-' && *(p + 1) != ']') {
548 (void)mbgetchar(p);
549 range = oldp;
550 } else
551 if ((isalpha(*oldp) && isalpha(*olds)
552 && tolower(*oldp) == tolower(*olds))
553 || sc == mbgetchar(oldp))
554 ok = 1;
555 n = 1;
556 } else if (pc == ']' && n) {
557 #if _lib_fnmatch
558 if (ok < 0) {
559 char pat[2 * UCHAR_MAX];
560 char str[COLL_MAX + 1];
561
562 if (p - range > sizeof(pat) - 2)
563 RETURN(0);
564 memcpy(pat, range, p - range);
565 pat[p - range] = '*';
566 pat[p - range + 1] = 0;
567 if (fnmatch(pat, olds, 0))
568 RETURN(0);
569 pat[p - range] = 0;
570 ok = 0;
571 for (x = 0; x < sizeof(str) - 1 && olds[x];
572 x++) {
573 str[x] = olds[x];
574 str[x + 1] = 0;
575 if (!fnmatch(pat, str, 0))
576 ok = 1;
577 else if (ok)
578 break;
579 }
580 s = olds + x;
581 break;
582 }
583 #endif
584 if (ok != invert)
585 break;
586 RETURN(0);
587 } else if (pc == '\\'
588 && (oldp = p, !(pc = mbgetchar(p)))) {
589 RETURN(0);
590 } else if (ok)
591 /*NOP*/;
592 #if _lib_fnmatch
593 else if (range
594 && !(ast.locale.set & (1 << AST_LC_COLLATE)))
595 #else
596 else if (range)
597 #endif
598 {
599 getrange:
600 #if _lib_mbtowc
601 if (ast.locale.set & AST_LC_multibyte) {
602 wchar_t sw;
603 wchar_t bw;
604 wchar_t ew;
605 int sz;
606 int bz;
607 int ez;
608
609 sz = mbtowc(&sw, olds, MB_CUR_MAX);
610 bz = mbtowc(&bw, range, MB_CUR_MAX);
611 ez = mbtowc(&ew, oldp, MB_CUR_MAX);
612 if (sw == bw || sw == ew)
613 ok = 1;
614 else if (sz > 1 || bz > 1 || ez > 1) {
615 if (sz == bz && sz == ez && sw > bw
616 && sw < ew)
617 ok = 1;
618 else
619 RETURN(0);
620 }
621 }
622 if (!ok)
623 #endif
624 if (icase && isupper(pc))
625 pc = tolower(pc);
626 x = mbgetchar(range);
627 if (icase && isupper(x))
628 x = tolower(x);
629 if (sc == x || sc == pc || (sc > x && sc < pc))
630 ok = 1;
631 if (*p == '-' && *(p + 1) != ']') {
632 (void)mbgetchar(p);
633 range = oldp;
634 } else
635 range = 0;
636 n = 1;
637 } else if (*p == '-' && *(p + 1) != ']') {
638 (void)mbgetchar(p);
639 #if _lib_fnmatch
640 if (ast.locale.set & (1 << AST_LC_COLLATE))
641 ok = -1;
642 else
643 #endif
644 range = oldp;
645 n = 1;
646 } else {
647 if (icase && isupper(pc))
648 pc = tolower(pc);
649 if (sc == pc)
650 ok = 1;
651 n = pc;
652 }
653 }
654
655 /*...INDENT */
656 }
657 break;
658 case '\\':
659 if (!(pc = mbgetchar(p)))
660 RETURN(0);
661 if (pc >= '0' && pc <= '9') {
662 n = pc - '0';
663 #if _DEBUG_MATCH
664 error(-2, "backref#%d n=%d g=%d beg=%p end=%p len=%d",
665 __LINE__, n, g, mp->current.beg[n],
666 mp->current.end[n],
667 mp->current.end[n] - mp->current.beg[n]);
668 #endif
669 if (n <= g && (oldp = mp->current.beg[n])) {
670 while (oldp < mp->current.end[n])
671 if (!*olds || *olds++ != *oldp++)
672 RETURN(0);
673 s = olds;
674 break;
675 }
676 }
677 /*FALLTHROUGH*/ default:
678 if (icase && isupper(pc))
679 pc = tolower(pc);
680 if (pc != sc)
681 RETURN(0);
682 break;
683 }
684 } while (sc);
685 RETURN(0);
686 }
687
688 /*
689 * match any pattern in a group
690 * | and & subgroups are parsed here
691 */
692
693 static int
grpmatch(Match_t * mp,int g,char * s,register char * p,char * e,int flags)694 grpmatch(Match_t * mp, int g, char *s, register char *p, char *e,
695 int flags)
696 {
697 register char *a;
698
699 #if _DEBUG_MATCH
700 error_info.indent++;
701 error(-1, "grpmatch g=%d s=%-.*s p=%s flags=%o", g, e - s, s, p,
702 flags);
703 #endif
704 do {
705 for (a = p; onematch(mp, g, s, a, e, NiL, flags); a++)
706 if (*(a = mp->next_p) != '&')
707 RETURN(1);
708 } while ((p = gobble(mp, p, '|', &g, 1)));
709 RETURN(0);
710 }
711
712 /*
713 * subgroup match
714 * 0 returned if no match
715 * otherwise number of subgroups matched returned
716 * match group begin offsets are even elements of sub
717 * match group end offsets are odd elements of sub
718 * the matched string is from s+sub[0] up to but not
719 * including s+sub[1]
720 */
721
strgrpmatch(const char * b,const char * p,int * sub,int n,int flags)722 int strgrpmatch(const char *b, const char *p, int *sub, int n, int flags)
723 {
724 register int i;
725 register char *s;
726 char *e;
727 Match_t match;
728
729 s = (char *) b;
730 match.last_s = e = s + strlen(s);
731 for (;;) {
732 match.best.next_s = 0;
733 match.current.groups = 0;
734 match.current.beg[0] = 0;
735 if ((i = grpmatch(&match, 0, s, (char *) p, e, flags))
736 || match.best.next_s) {
737 if (!(flags & STR_RIGHT) || (match.current.next_s == e)) {
738 if (!i)
739 match.current = match.best;
740 match.current.groups++;
741 match.current.end[0] = match.current.next_s;
742 #if _DEBUG_MATCH
743 error(-1,
744 "match i=%d s=\"%s\" p=\"%s\" flags=%o groups=%d next=\"%s\"",
745 i, s, p, flags, match.current.groups,
746 match.current.next_s);
747 #endif
748 break;
749 }
750 }
751 if ((flags & STR_LEFT) || s >= e)
752 return 0;
753 s++;
754 }
755 if ((flags & STR_RIGHT) && match.current.next_s != e)
756 return 0;
757 if (!sub)
758 return 1;
759 match.current.beg[0] = s;
760 s = (char *) b;
761 if (n > match.current.groups)
762 n = match.current.groups;
763 for (i = 0; i < n; i++) {
764 sub[i * 2] = match.current.end[i] ? match.current.beg[i] - s : 0;
765 sub[i * 2 + 1] =
766 match.current.end[i] ? match.current.end[i] - s : 0;
767 }
768 return n;
769 }
770
771 /*
772 * compare the string s with the shell pattern p
773 * returns 1 for match 0 otherwise
774 */
775
strmatch(const char * s,const char * p)776 int strmatch(const char *s, const char *p)
777 {
778 return strgrpmatch(s, p, NiL, 0, STR_MAXIMAL | STR_LEFT | STR_RIGHT);
779 }
780