1 /*---------------------------------------------------------------------------
2 
3   match.c
4 
5   The match() routine recursively compares a string to a "pattern" (regular
6   expression), returning TRUE if a match is found or FALSE if not.  This
7   version is specifically for use with unzip.c:  as did the previous match()
8   routines from SEA and J. Kercheval, it leaves the case (upper, lower, or
9   mixed) of the string alone, but converts any uppercase characters in the
10   pattern to lowercase if indicated by the global var pInfo->lcflag (which
11   is to say, string is assumed to have been converted to lowercase already,
12   if such was necessary).
13 
14   GRR:  reversed order of text, pattern in matche() (now same as match());
15         added ignore_case/ic flags, Case() macro.
16 
17   PK:   replaced matche() with recmatch() from Zip, modified to have an
18         ignore_case argument; replaced test frame with simpler one.
19 
20   ---------------------------------------------------------------------------
21 
22   Copyright on recmatch() from Zip's util.c (although recmatch() was almost
23   certainly written by Mark Adler...ask me how I can tell :-) ):
24 
25      Copyright (C) 1990-1992 Mark Adler, Richard B. Wales, Jean-loup Gailly,
26      Kai Uwe Rommel and Igor Mandrichenko.
27 
28 This program is free software; you can redistribute it and/or
29 modify it under the terms of the GNU General Public License
30 as published by the Free Software Foundation; either version 2
31 of the License, or (at your option) any later version.
32 
33 This program is distributed in the hope that it will be useful,
34 but WITHOUT ANY WARRANTY; without even the implied warranty of
35 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
36 GNU General Public License for more details.
37 
38 You should have received a copy of the GNU General Public License
39 along with this program; if not, write to the Free Software
40 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
41 
42   ---------------------------------------------------------------------------
43 
44    Info-ZIP's home WWW site is listed on Yahoo and is at:
45 
46    http://www.cdrom.com/pub/infozip/
47 
48    e-mail : Zip-Bugs@lists.wku.edu
49 
50 
51   ---------------------------------------------------------------------------
52 
53   Match the pattern (wildcard) against the string (fixed):
54 
55      match(string, pattern, ignore_case);
56 
57   returns TRUE if string matches pattern, FALSE otherwise.  In the pattern:
58 
59      `*' matches any sequence of characters (zero or more)
60      `?' matches any character
61      [SET] matches any character in the specified set,
62      [!SET] or [^SET] matches any character not in the specified set.
63 
64   A set is composed of characters or ranges; a range looks like ``character
65   hyphen character'' (as in 0-9 or A-Z).  [0-9a-zA-Z_] is the minimal set of
66   characters allowed in the [..] pattern construct.  Other characters are
67   allowed (ie. 8 bit characters) if your system will support them.
68 
69   To suppress the special syntactic significance of any of ``[]*?!^-\'', in-
70   side or outside a [..] construct and match the character exactly, precede
71   it with a ``\'' (backslash).
72 
73   Note that "*.*" and "*." are treated specially under MS-DOS if DOSWILD is
74   defined.  See the DOSWILD section below for an explanation.
75 
76   ---------------------------------------------------------------------------*/
77 
78 #include <string.h>
79 #include <ctype.h>
80 #include "match.h" /* define ToLower() in here (for Unix, define ToLower
81                        * to be macro (using isupper()); otherwise just use
82                        * tolower() */
83 
84 #define Case(x)  (ic? tolower(x) : (int)(x))
85 
86 /* dd_match() is a shell to recmatch() to return only Boolean values. */
87 static int recmatch(uch *pattern, uch *string, int ignore_case);
88 
dd_match(const char * string,const char * pattern,int ignore_case)89 int dd_match(const char *string,const char *pattern,int ignore_case)
90 {
91 #if (defined(__MSDOS__) && defined(DOSWILD))
92     char *dospattern;
93     int j = strlen(pattern);
94 
95 /*---------------------------------------------------------------------------
96     Optional MS-DOS preprocessing section:  compare last three chars of the
97     wildcard to "*.*" and translate to "*" if found; else compare the last
98     two characters to "*." and, if found, scan the non-wild string for dots.
99     If in the latter case a dot is found, return failure; else translate the
100     "*." to "*".  In either case, continue with the normal (Unix-like) match
101     procedure after translation.  (If not enough memory, default to normal
102     match.)  This causes "a*.*" and "a*." to behave as MS-DOS users expect.
103   ---------------------------------------------------------------------------*/
104 
105     if ((dospattern = (char *)malloc(j+1)) != NULL) {
106         strcpy(dospattern, pattern);
107         if (!strcmp(dospattern+j-3, "*.*")) {
108             dospattern[j-2] = '\0';                    /* nuke the ".*" */
109         } else if (!strcmp(dospattern+j-2, "*.")) {
110             char *p = strchr(string, '.');
111 
112             if (p) {   /* found a dot:  match fails */
113                 free(dospattern);
114                 return 0;
115             }
116             dospattern[j-1] = '\0';                    /* nuke the end "." */
117         }
118         j = recmatch((uch *)dospattern, (uch *)string, ignore_case);
119         free(dospattern);
120         return j == 1;
121     } else
122 #endif /* __MSDOS__ && DOSWILD */
123     return recmatch((uch *)pattern, (uch *)string, ignore_case) == 1;
124 }
125 
126 
recmatch(uch * p,uch * s,int ic)127 static int recmatch(uch *p,uch *s,int ic)
128    /*  uch *p;  			 sh pattern to match */
129    /*  uch *s;  			 string to which to match it */
130    /*  int ic;  			 true for case insensitivity */
131 /* Recursively compare the sh pattern p with the string s and return 1 if
132  * they match, and 0 or 2 if they don't or if there is a syntax error in the
133  * pattern.  This routine recurses on itself no more deeply than the number
134  * of characters in the pattern. */
135 {
136     int c;       /* pattern char or start of range in [-] loop */
137 
138     /* Get first character, the pattern for new recmatch calls follows */
139     c = *p++;
140 
141     /* If that was the end of the pattern, match if string empty too */
142     if (c == 0)
143         return *s == 0;
144 
145     /* '?' (or '%') matches any character (but not an empty string) */
146 #ifdef VMS
147     if (c == '%')         /* GRR:  make this conditional, too? */
148 #else /* !VMS */
149     if (c == '?')
150 #endif /* ?VMS */
151         return *s ? recmatch(p, s + 1, ic) : 0;
152 
153     /* '*' matches any number of characters, including zero */
154 #ifdef AMIGA
155     if (c == '#' && *p == '?')     /* "#?" is Amiga-ese for "*" */
156         c = '*', p++;
157 #endif /* AMIGA */
158     if (c == '*') {
159         if (*p == 0)
160             return 1;
161         for (; *s; s++)
162             if ((c = recmatch(p, s, ic)) != 0)
163                 return (int)c;
164         return 2;       /* 2 means give up--match will return false */
165     }
166 
167 #ifndef VMS             /* No bracket matching in VMS */
168     /* Parse and process the list of characters and ranges in brackets */
169     if (c == '[') {
170         int e;          /* flag true if next char to be taken literally */
171         uch *q;         /* pointer to end of [-] group */
172         int r;          /* flag true to match anything but the range */
173 
174         if (*s == 0)                           /* need a character to match */
175             return 0;
176         p += (r = (*p == '!' || *p == '^'));   /* see if reverse */
177         for (q = p, e = 0; *q; q++)            /* find closing bracket */
178             if (e)
179                 e = 0;
180             else
181                 if (*q == '\\')      /* GRR:  change to ^ for MS-DOS, OS/2? */
182                     e = 1;
183                 else if (*q == ']')
184                     break;
185         if (*q != ']')               /* nothing matches if bad syntax */
186             return 0;
187         for (c = 0, e = *p == '-'; p < q; p++) {  /* go through the list */
188             if (e == 0 && *p == '\\')             /* set escape flag if \ */
189                 e = 1;
190             else if (e == 0 && *p == '-')         /* set start of range if - */
191                 c = *(p-1);
192             else {
193                 int cc = Case(*s);
194 
195                 if (*(p+1) != '-')
196                     for (c = c ? c : *p; c <= *p; c++)  /* compare range */
197                         if (Case(c) == cc)
198                             return r ? 0 : recmatch(q + 1, s + 1, ic);
199                 c = e = 0;   /* clear range, escape flags */
200             }
201         }
202         return r ? recmatch(q + 1, s + 1, ic) : 0;  /* bracket match failed */
203     }
204 #endif /* !VMS */
205 
206     /* if escape ('\'), just compare next character */
207     if (c == '\\' && (c = *p++) == 0)     /* if \ at end, then syntax error */
208         return 0;
209 
210     /* just a character--compare it */
211     return Case((uch)c) == Case(*s) ? recmatch(p, ++s, ic) : 0;
212 
213 } /* end function recmatch() */
214 
215 
216 
217 
218 #ifdef WILD_STAT_BUG   /* Turbo/Borland C, Watcom C, VAX C, Atari MiNT libs */
219 
dd_iswild(const char * p)220 int dd_iswild(const char *p)
221 {
222     for (; *p; ++p)
223         if (*p == '\\' && *(p+1))
224             ++p;
225 #ifdef VMS
226         else if (*p == '%' || *p == '*')
227 #else /* !VMS */
228 #ifdef AMIGA
229         else if (*p == '?' || *p == '*' || (*p=='#' && p[1]=='?') || *p == '[')
230 #else /* !AMIGA */
231         else if (*p == '?' || *p == '*' || *p == '[')
232 #endif /* ?AMIGA */
233 #endif /* ?VMS */
234             return 1;
235 
236     return 0;
237 
238 } /* end function dd_iswild() */
239 
240 #endif /* WILD_STAT_BUG */
241 
242 
243 
244 
245 #ifdef TEST_MATCH
246 /* replaced gets() with fgets(). gets() can be dangerous,
247    has known to give security problems, because it does not
248    check for bufferoverflow.
249    Erwin Waterlander, Jul 22 1998 */
250 
251 #define put(s) { fputs(s, stdout); fflush(stdout); }
252 
main(void)253 void main(void)
254 {
255     char pat[256], str[256];
256 
257     for (;;) {
258         put("Pattern (return to exit): ");
259         fgets(pat,100,stdin);
260         if (!pat[0])
261             break;
262         for (;;) {
263             put("String (return for new pattern): ");
264             fgets(str,100,stdin);
265             if (!str[0])
266                 break;
267             printf("Case sensitive: %s  insensitive: %s\n",
268               dd_match(str, pat, 0) ? "YES" : "NO",
269               dd_match(str, pat, 1) ? "YES" : "NO");
270         }
271     }
272     exit(0);
273 }
274 
275 #endif /* TEST_MATCH */
276