1 /*---------------------------------------------------------------------------
2
3 match.c
4
5 The match() routine recursively compares a string to a "pattern" (regular
6 expression), returning TRUE if a match is found or FALSE if not. This
7 version is specifically for use with unzip.c: as did the previous match()
8 routines from SEA and J. Kercheval, it leaves the case (upper, lower, or
9 mixed) of the string alone, but converts any uppercase characters in the
10 pattern to lowercase if indicated by the global var pInfo->lcflag (which
11 is to say, string is assumed to have been converted to lowercase already,
12 if such was necessary).
13
14 GRR: reversed order of text, pattern in matche() (now same as match());
15 added ignore_case/ic flags, Case() macro.
16
17 PK: replaced matche() with recmatch() from Zip, modified to have an
18 ignore_case argument; replaced test frame with simpler one.
19
20 ---------------------------------------------------------------------------
21
22 Copyright on recmatch() from Zip's util.c (although recmatch() was almost
23 certainly written by Mark Adler...ask me how I can tell :-) ):
24
25 Copyright (C) 1990-1992 Mark Adler, Richard B. Wales, Jean-loup Gailly,
26 Kai Uwe Rommel and Igor Mandrichenko.
27
28 This program is free software; you can redistribute it and/or
29 modify it under the terms of the GNU General Public License
30 as published by the Free Software Foundation; either version 2
31 of the License, or (at your option) any later version.
32
33 This program is distributed in the hope that it will be useful,
34 but WITHOUT ANY WARRANTY; without even the implied warranty of
35 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36 GNU General Public License for more details.
37
38 You should have received a copy of the GNU General Public License
39 along with this program; if not, write to the Free Software
40 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
41
42 ---------------------------------------------------------------------------
43
44 Info-ZIP's home WWW site is listed on Yahoo and is at:
45
46 http://www.cdrom.com/pub/infozip/
47
48 e-mail : Zip-Bugs@lists.wku.edu
49
50
51 ---------------------------------------------------------------------------
52
53 Match the pattern (wildcard) against the string (fixed):
54
55 match(string, pattern, ignore_case);
56
57 returns TRUE if string matches pattern, FALSE otherwise. In the pattern:
58
59 `*' matches any sequence of characters (zero or more)
60 `?' matches any character
61 [SET] matches any character in the specified set,
62 [!SET] or [^SET] matches any character not in the specified set.
63
64 A set is composed of characters or ranges; a range looks like ``character
65 hyphen character'' (as in 0-9 or A-Z). [0-9a-zA-Z_] is the minimal set of
66 characters allowed in the [..] pattern construct. Other characters are
67 allowed (ie. 8 bit characters) if your system will support them.
68
69 To suppress the special syntactic significance of any of ``[]*?!^-\'', in-
70 side or outside a [..] construct and match the character exactly, precede
71 it with a ``\'' (backslash).
72
73 Note that "*.*" and "*." are treated specially under MS-DOS if DOSWILD is
74 defined. See the DOSWILD section below for an explanation.
75
76 ---------------------------------------------------------------------------*/
77
78 #include <string.h>
79 #include <ctype.h>
80 #include "match.h" /* define ToLower() in here (for Unix, define ToLower
81 * to be macro (using isupper()); otherwise just use
82 * tolower() */
83
84 #define Case(x) (ic? tolower(x) : (int)(x))
85
86 /* dd_match() is a shell to recmatch() to return only Boolean values. */
87 static int recmatch(uch *pattern, uch *string, int ignore_case);
88
dd_match(const char * string,const char * pattern,int ignore_case)89 int dd_match(const char *string,const char *pattern,int ignore_case)
90 {
91 #if (defined(__MSDOS__) && defined(DOSWILD))
92 char *dospattern;
93 int j = strlen(pattern);
94
95 /*---------------------------------------------------------------------------
96 Optional MS-DOS preprocessing section: compare last three chars of the
97 wildcard to "*.*" and translate to "*" if found; else compare the last
98 two characters to "*." and, if found, scan the non-wild string for dots.
99 If in the latter case a dot is found, return failure; else translate the
100 "*." to "*". In either case, continue with the normal (Unix-like) match
101 procedure after translation. (If not enough memory, default to normal
102 match.) This causes "a*.*" and "a*." to behave as MS-DOS users expect.
103 ---------------------------------------------------------------------------*/
104
105 if ((dospattern = (char *)malloc(j+1)) != NULL) {
106 strcpy(dospattern, pattern);
107 if (!strcmp(dospattern+j-3, "*.*")) {
108 dospattern[j-2] = '\0'; /* nuke the ".*" */
109 } else if (!strcmp(dospattern+j-2, "*.")) {
110 char *p = strchr(string, '.');
111
112 if (p) { /* found a dot: match fails */
113 free(dospattern);
114 return 0;
115 }
116 dospattern[j-1] = '\0'; /* nuke the end "." */
117 }
118 j = recmatch((uch *)dospattern, (uch *)string, ignore_case);
119 free(dospattern);
120 return j == 1;
121 } else
122 #endif /* __MSDOS__ && DOSWILD */
123 return recmatch((uch *)pattern, (uch *)string, ignore_case) == 1;
124 }
125
126
recmatch(uch * p,uch * s,int ic)127 static int recmatch(uch *p,uch *s,int ic)
128 /* uch *p; sh pattern to match */
129 /* uch *s; string to which to match it */
130 /* int ic; true for case insensitivity */
131 /* Recursively compare the sh pattern p with the string s and return 1 if
132 * they match, and 0 or 2 if they don't or if there is a syntax error in the
133 * pattern. This routine recurses on itself no more deeply than the number
134 * of characters in the pattern. */
135 {
136 int c; /* pattern char or start of range in [-] loop */
137
138 /* Get first character, the pattern for new recmatch calls follows */
139 c = *p++;
140
141 /* If that was the end of the pattern, match if string empty too */
142 if (c == 0)
143 return *s == 0;
144
145 /* '?' (or '%') matches any character (but not an empty string) */
146 #ifdef VMS
147 if (c == '%') /* GRR: make this conditional, too? */
148 #else /* !VMS */
149 if (c == '?')
150 #endif /* ?VMS */
151 return *s ? recmatch(p, s + 1, ic) : 0;
152
153 /* '*' matches any number of characters, including zero */
154 #ifdef AMIGA
155 if (c == '#' && *p == '?') /* "#?" is Amiga-ese for "*" */
156 c = '*', p++;
157 #endif /* AMIGA */
158 if (c == '*') {
159 if (*p == 0)
160 return 1;
161 for (; *s; s++)
162 if ((c = recmatch(p, s, ic)) != 0)
163 return (int)c;
164 return 2; /* 2 means give up--match will return false */
165 }
166
167 #ifndef VMS /* No bracket matching in VMS */
168 /* Parse and process the list of characters and ranges in brackets */
169 if (c == '[') {
170 int e; /* flag true if next char to be taken literally */
171 uch *q; /* pointer to end of [-] group */
172 int r; /* flag true to match anything but the range */
173
174 if (*s == 0) /* need a character to match */
175 return 0;
176 p += (r = (*p == '!' || *p == '^')); /* see if reverse */
177 for (q = p, e = 0; *q; q++) /* find closing bracket */
178 if (e)
179 e = 0;
180 else
181 if (*q == '\\') /* GRR: change to ^ for MS-DOS, OS/2? */
182 e = 1;
183 else if (*q == ']')
184 break;
185 if (*q != ']') /* nothing matches if bad syntax */
186 return 0;
187 for (c = 0, e = *p == '-'; p < q; p++) { /* go through the list */
188 if (e == 0 && *p == '\\') /* set escape flag if \ */
189 e = 1;
190 else if (e == 0 && *p == '-') /* set start of range if - */
191 c = *(p-1);
192 else {
193 int cc = Case(*s);
194
195 if (*(p+1) != '-')
196 for (c = c ? c : *p; c <= *p; c++) /* compare range */
197 if (Case(c) == cc)
198 return r ? 0 : recmatch(q + 1, s + 1, ic);
199 c = e = 0; /* clear range, escape flags */
200 }
201 }
202 return r ? recmatch(q + 1, s + 1, ic) : 0; /* bracket match failed */
203 }
204 #endif /* !VMS */
205
206 /* if escape ('\'), just compare next character */
207 if (c == '\\' && (c = *p++) == 0) /* if \ at end, then syntax error */
208 return 0;
209
210 /* just a character--compare it */
211 return Case((uch)c) == Case(*s) ? recmatch(p, ++s, ic) : 0;
212
213 } /* end function recmatch() */
214
215
216
217
218 #ifdef WILD_STAT_BUG /* Turbo/Borland C, Watcom C, VAX C, Atari MiNT libs */
219
dd_iswild(const char * p)220 int dd_iswild(const char *p)
221 {
222 for (; *p; ++p)
223 if (*p == '\\' && *(p+1))
224 ++p;
225 #ifdef VMS
226 else if (*p == '%' || *p == '*')
227 #else /* !VMS */
228 #ifdef AMIGA
229 else if (*p == '?' || *p == '*' || (*p=='#' && p[1]=='?') || *p == '[')
230 #else /* !AMIGA */
231 else if (*p == '?' || *p == '*' || *p == '[')
232 #endif /* ?AMIGA */
233 #endif /* ?VMS */
234 return 1;
235
236 return 0;
237
238 } /* end function dd_iswild() */
239
240 #endif /* WILD_STAT_BUG */
241
242
243
244
245 #ifdef TEST_MATCH
246 /* replaced gets() with fgets(). gets() can be dangerous,
247 has known to give security problems, because it does not
248 check for bufferoverflow.
249 Erwin Waterlander, Jul 22 1998 */
250
251 #define put(s) { fputs(s, stdout); fflush(stdout); }
252
main(void)253 void main(void)
254 {
255 char pat[256], str[256];
256
257 for (;;) {
258 put("Pattern (return to exit): ");
259 fgets(pat,100,stdin);
260 if (!pat[0])
261 break;
262 for (;;) {
263 put("String (return for new pattern): ");
264 fgets(str,100,stdin);
265 if (!str[0])
266 break;
267 printf("Case sensitive: %s insensitive: %s\n",
268 dd_match(str, pat, 0) ? "YES" : "NO",
269 dd_match(str, pat, 1) ? "YES" : "NO");
270 }
271 }
272 exit(0);
273 }
274
275 #endif /* TEST_MATCH */
276