xref: /original-bsd/lib/libc/gen/glob.c (revision 2bd07fe6)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Guido van Rossum.
7  *
8  * Redistribution and use in source and binary forms are permitted
9  * provided that the above copyright notice and this paragraph are
10  * duplicated in all such forms and that any documentation,
11  * advertising materials, and other materials related to such
12  * distribution and use acknowledge that the software was developed
13  * by the University of California, Berkeley.  The name of the
14  * University may not be used to endorse or promote products derived
15  * from this software without specific prior written permission.
16  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
19  */
20 
21 #if defined(LIBC_SCCS) && !defined(lint)
22 static char sccsid[] = "@(#)glob.c	5.1 (Berkeley) 02/15/90";
23 #endif /* LIBC_SCCS and not lint */
24 
25 /*
26  * Glob: the interface is a superset of the one defined in POSIX 1003.2,
27  * draft 9.
28  *
29  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
30  *
31  * Optional extra services, controlled by flags not defined by POSIX:
32  *	GLOB_QUOTE: escaping convention: \ inhibits any special meaning
33 		the following character might have (except \ at end of
34  *		string is kept);
35  */
36 
37 #include <sys/param.h>
38 #include <sys/stat.h>
39 #include <dirent.h>
40 #include <glob.h>
41 #include <ctype.h>
42 #include <errno.h>
43 #include <string.h>
44 #include <stdio.h>
45 
46 char *malloc(), *realloc();
47 
48 typedef int bool_t;
49 
50 #define	DOLLAR		'$'
51 #define	DOT		'.'
52 #define	EOS		'\0'
53 #define	LBRACKET	'['
54 #define	NOT		'!'
55 #define	QUESTION	'?'
56 #define	QUOTE		'\\'
57 #define	RANGE		'-'
58 #define	RBRACKET	']'
59 #define	SEP		'/'
60 #define	STAR		'*'
61 #define	TILDE		'~'
62 #define	UNDERSCORE	'_'
63 
64 #define	METABIT		0x80
65 #define	META(c)		((c)|METABIT)
66 #define	M_ALL		META('*')
67 #define	M_END		META(']')
68 #define	M_NOT		META('!')
69 #define	M_ONE		META('?')
70 #define	M_RNG		META('-')
71 #define	M_SET		META('[')
72 #define	ismeta(c)	(((c)&METABIT) != 0)
73 
74 static
75 compare(p, q)
76 	char **p, **q;
77 {
78 	return(strcmp(*p, *q));
79 }
80 
81 
82 /*
83  * The main glob() routine: compiles the pattern (optionally processing
84  * quotes), calls glob1() to do the real pattern matching, and finally
85  * sorts the list (unless unsorted operation is requested).  Returns 0
86  * if things went well, nonzero if errors occurred.  It is not an error
87  * to find no matches.
88  */
89 glob(pattern, flags, errfunc, pglob)
90 	char *pattern;
91 	int flags, (*errfunc)();
92 	glob_t *pglob;
93 {
94 	int err, oldpathc;
95 	char *bufnext, *bufend, *compilebuf, *compilepat, *patnext;
96 	char c, patbuf[MAXPATHLEN+1];
97 
98 	patnext = pattern;
99 	if (!(flags & GLOB_APPEND)) {
100 		pglob->gl_pathc = 0;
101 		pglob->gl_pathv = NULL;
102 		if (!(flags & GLOB_DOOFFS))
103 			pglob->gl_offs = 0;
104 	}
105 	pglob->gl_flags = flags;
106 	pglob->gl_errfunc = errfunc;
107 	oldpathc = pglob->gl_pathc;
108 
109 	bufnext = patbuf;
110 	bufend = bufnext+MAXPATHLEN;
111 
112 	compilebuf = bufnext;
113 	compilepat = patnext;
114 	while (bufnext < bufend && (c = *patnext++) != EOS) {
115 		switch (c) {
116 		case LBRACKET:
117 			c = *patnext;
118 			if (c == NOT)
119 				++patnext;
120 			if (*patnext == EOS ||
121 			    strchr(patnext+1, RBRACKET) == NULL) {
122 				*bufnext++ = LBRACKET;
123 				if (c == NOT)
124 					--patnext;
125 				break;
126 			}
127 			*bufnext++ = M_SET;
128 			if (c == NOT)
129 				*bufnext++ = M_NOT;
130 			c = *patnext++;
131 			do {
132 				/* todo: quoting */
133 				*bufnext++ = c;
134 				if (*patnext == RANGE &&
135 				    (c = patnext[1]) != RBRACKET) {
136 					*bufnext++ = M_RNG;
137 					*bufnext++ = c;
138 					patnext += 2;
139 				}
140 			} while ((c = *patnext++) != RBRACKET);
141 			*bufnext++ = M_END;
142 			break;
143 		case QUESTION:
144 			*bufnext++ = M_ONE;
145 			break;
146 		case QUOTE:
147 			if (!(flags & GLOB_QUOTE))
148 				*bufnext++ = QUOTE;
149 			else {
150 				if ((c = *patnext++) == EOS) {
151 					c = QUOTE;
152 					--patnext;
153 				}
154 				*bufnext++ = c;
155 			}
156 			break;
157 		case STAR:
158 			*bufnext++ = M_ALL;
159 			break;
160 		default:
161 			*bufnext++ = c;
162 			break;
163 		}
164 	}
165 	*bufnext = EOS;
166 
167 	if ((err = glob1(patbuf, pglob)) != 0)
168 		return(err);
169 
170 	if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) {
171 		if (!(flags & GLOB_QUOTE))
172 			(void)strcpy(compilebuf, compilepat);
173 		else {
174 			/*
175 			 * copy pattern, interpreting quotes; this is slightly
176 			 * different than the interpretation of quotes above
177 			 * -- which should prevail?
178 			 */
179 			while (*compilepat != EOS) {
180 				if (*compilepat == QUOTE) {
181 					if (*++compilepat == EOS)
182 						--compilepat;
183 				}
184 				*compilebuf++ = *compilepat++;
185 			}
186 			*compilebuf = EOS;
187 		}
188 		return(globextend(patbuf, pglob));
189 	} else if (!(flags & GLOB_NOSORT))
190 		qsort((char*) (pglob->gl_pathv + pglob->gl_offs + oldpathc),
191 		    pglob->gl_pathc - oldpathc, sizeof(char*), compare);
192 	return(0);
193 }
194 
195 static
196 glob1(pattern, pglob)
197 	char *pattern;
198 	glob_t *pglob;
199 {
200 	char pathbuf[MAXPATHLEN+1];
201 
202 	/*
203 	 * a null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
204 	if (*pattern == EOS)
205 		return(0);
206 	return(glob2(pathbuf, pathbuf, pattern, pglob));
207 }
208 
209 /*
210  * functions glob2 and glob3 are mutually recursive; there is one level
211  * of recursion for each segment in the pattern that contains one or
212  * more meta characters.
213  */
214 static
215 glob2(pathbuf, pathend, pattern, pglob)
216 	char *pathbuf, *pathend, *pattern;
217 	glob_t *pglob;
218 {
219 	struct stat sbuf;
220 	bool_t anymeta = 0;
221 	char *p, *q;
222 
223 	/*
224 	 * loop over pattern segments until end of pattern or until
225 	 * segment with meta character found.
226 	 */
227 	for (;;) {
228 		if (*pattern == EOS) {		/* end of pattern? */
229 			*pathend = EOS;
230 			if (stat(pathbuf, &sbuf) != 0)
231 				return(0);	/* need error call here? */
232 			if ((pglob->gl_flags & GLOB_MARK) &&
233 			    pathend[-1] != SEP && S_ISDIR(sbuf.st_mode)) {
234 				*pathend++ = SEP;
235 				*pathend = EOS;
236 			}
237 			return(globextend(pathbuf, pglob));
238 		}
239 
240 		/* find end of next segment, copy tentatively to pathend */
241 		q = pathend;
242 		p = pattern;
243 		while (*p != EOS && *p != SEP) {
244 			if (ismeta(*p))
245 				anymeta = 1;
246 			*q++ = *p++;
247 		}
248 
249 		if (!anymeta) {		/* no expansion, do next segment */
250 			pathend = q;
251 			pattern = p;
252 			while (*pattern == SEP)
253 				*pathend++ = *pattern++;
254 		} else			/* need expansion, recurse */
255 			return(glob3(pathbuf, pathend, pattern, p, pglob));
256 	}
257 	/* NOTREACHED */
258 }
259 
260 static
261 glob3(pathbuf, pathend, pattern, restpattern, pglob)
262 	char *pathbuf, *pathend, *pattern, *restpattern;
263 	glob_t *pglob;
264 {
265 	extern int errno;
266 	DIR *dirp;
267 	struct dirent *dp;
268 	int len, err;
269 
270 	*pathend = EOS;
271 	errno = 0;
272 	if (!(dirp = opendir(pathbuf)))
273 		/* todo: don't call for ENOENT or ENOTDIR? */
274 		if (pglob->gl_errfunc &&
275 		    (*pglob->gl_errfunc)(pathbuf, errno) ||
276 		    (pglob->gl_flags & GLOB_ERR))
277 			return(GLOB_ABEND);
278 		else
279 			return(0);
280 
281 	err = 0;
282 
283 	/* search directory for matching names */
284 	while ((dp = readdir(dirp))) {
285 		/* initial DOT must be matched literally */
286 		if (dp->d_name[0] == DOT && *pattern != DOT)
287 			continue;
288 		if (!match(dp->d_name, pattern, restpattern))
289 			continue;
290 		len = dp->d_namlen;
291 		(void)strcpy(pathend, dp->d_name);
292 		err = glob2(pathbuf, pathend+len, restpattern, pglob);
293 		if (err)
294 			break;
295 	}
296 	/* todo: check error from readdir? */
297 	(void)closedir(dirp);
298 	return(err);
299 }
300 
301 
302 /*
303  * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
304  * add the new item, and update gl_pathc.
305  *
306  * This assumes the BSD realloc, which only copies the block when its size
307  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
308  * behavior.
309  *
310  * Return 0 if new item added, error code if memory couldn't be allocated.
311  *
312  * Invariant of the glob_t structure:
313  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
314  *	 gl_pathv points to (gl_offs + gl_pathc + 1) items.
315  */
316 static
317 globextend(path, pglob)
318 	char *path;
319 	glob_t *pglob;
320 {
321 	register char **pathv;
322 	register int i;
323 	u_int copysize, newsize;
324 	char *copy;
325 
326 	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
327 	pathv = (char **)realloc((char *)(pathv = pglob->gl_pathv), newsize);
328 	if (pathv == NULL)
329 		return(GLOB_NOSPACE);
330 
331 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
332 		/* first time around -- clear initial gl_offs items */
333 		pathv += pglob->gl_offs;
334 		for (i = pglob->gl_offs; --i >= 0; )
335 			*--pathv = NULL;
336 	}
337 	pglob->gl_pathv = pathv;
338 
339 	copysize = strlen(path) + 1;
340 	if ((copy = malloc(copysize)) != NULL) {
341 		(void)strcpy(copy, path);
342 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
343 	}
344 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
345 	return((copy == NULL) ? GLOB_NOSPACE : 0);
346 }
347 
348 
349 /*
350  * pattern matching function for filenames.  Each occurrence of the *
351  * pattern causes a recursion level.
352  */
353 static bool_t
354 match(name, pat, patend)
355 	register char *name, *pat, *patend;
356 {
357 	bool_t ok, negate_range;
358 	char c, k;
359 
360 	while (pat < patend) {
361 		c = *pat++;
362 		switch (c & 0xff) {
363 		case M_ALL:
364 			if (pat == patend)
365 				return(1);
366 			for (; *name != EOS; ++name) {
367 				if (match(name, pat, patend))
368 					return(1);
369 			}
370 			return(0);
371 		case M_ONE:
372 			if (*name++ == EOS)
373 				return(0);
374 			break;
375 		case M_SET:
376 			ok = 0;
377 			k = *name++;
378 			if (negate_range = (*pat & 0xff) == M_NOT)
379 				++pat;
380 			while (((c = *pat++) & 0xff) != M_END) {
381 				if ((*pat & 0xff) == M_RNG) {
382 					if (c <= k && k <= pat[1])
383 						ok = 1;
384 					pat += 2;
385 				}
386 				else if (c == k)
387 					ok = 1;
388 			}
389 			if (ok == negate_range)
390 				return(0);
391 			break;
392 		default:
393 			if (*name++ != c)
394 				return(0);
395 			break;
396 		}
397 	}
398 	return(*name == EOS);
399 }
400 
401 /* free allocated data belonging to a glob_t structure */
402 void
403 globfree(pglob)
404 	glob_t *pglob;
405 {
406 	register int i;
407 	register char **pp;
408 
409 	if (pglob->gl_pathv != NULL) {
410 		pp = pglob->gl_pathv + pglob->gl_offs;
411 		for (i = pglob->gl_pathc; i--; ++pp)
412 			if (*pp)
413 				(void)free(*pp);
414 		(void)free((char *)pp);
415 	}
416 }
417