xref: /original-bsd/lib/libc/gen/glob.c (revision 1afa398d)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Guido van Rossum.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #if defined(LIBC_SCCS) && !defined(lint)
12 static char sccsid[] = "@(#)glob.c	5.4 (Berkeley) 02/23/91";
13 #endif /* LIBC_SCCS and not lint */
14 
15 /*
16  * Glob: the interface is a superset of the one defined in POSIX 1003.2,
17  * draft 9.
18  *
19  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
20  *
21  * Optional extra services, controlled by flags not defined by POSIX:
22  *	GLOB_QUOTE: escaping convention: \ inhibits any special meaning
23 		the following character might have (except \ at end of
24  *		string is kept);
25  */
26 
27 #include <sys/param.h>
28 #include <sys/stat.h>
29 #include <dirent.h>
30 #include <glob.h>
31 #include <ctype.h>
32 #include <errno.h>
33 #include <string.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 
37 typedef int bool_t;
38 
39 static glob1();
40 static glob2();
41 static glob3();
42 static globextend();
43 static bool_t match();
44 
45 #define	DOLLAR		'$'
46 #define	DOT		'.'
47 #define	EOS		'\0'
48 #define	LBRACKET	'['
49 #define	NOT		'!'
50 #define	QUESTION	'?'
51 #define	QUOTE		'\\'
52 #define	RANGE		'-'
53 #define	RBRACKET	']'
54 #define	SEP		'/'
55 #define	STAR		'*'
56 #define	TILDE		'~'
57 #define	UNDERSCORE	'_'
58 
59 #define	METABIT		0x80
60 #define	META(c)		((c)|METABIT)
61 #define	M_ALL		META('*')
62 #define	M_END		META(']')
63 #define	M_NOT		META('!')
64 #define	M_ONE		META('?')
65 #define	M_RNG		META('-')
66 #define	M_SET		META('[')
67 #define	ismeta(c)	(((c)&METABIT) != 0)
68 
69 static
70 compare(p, q)
71 	void **p, **q;
72 {
73 	return(strcmp(*(char **)p, *(char **)q));
74 }
75 
76 
77 /*
78  * The main glob() routine: compiles the pattern (optionally processing
79  * quotes), calls glob1() to do the real pattern matching, and finally
80  * sorts the list (unless unsorted operation is requested).  Returns 0
81  * if things went well, nonzero if errors occurred.  It is not an error
82  * to find no matches.
83  */
84 glob(pattern, flags, errfunc, pglob)
85 	const char *pattern;
86 	int flags;
87 	int (*errfunc) __P((char *, int));
88 	glob_t *pglob;
89 {
90 	int err, oldpathc;
91 	char *bufnext, *bufend, *compilebuf;
92 	const char *compilepat, *patnext;
93 	char c, patbuf[MAXPATHLEN+1];
94 
95 	patnext = pattern;
96 	if (!(flags & GLOB_APPEND)) {
97 		pglob->gl_pathc = 0;
98 		pglob->gl_pathv = NULL;
99 		if (!(flags & GLOB_DOOFFS))
100 			pglob->gl_offs = 0;
101 	}
102 	pglob->gl_flags = flags;
103 	pglob->gl_errfunc = errfunc;
104 	oldpathc = pglob->gl_pathc;
105 
106 	bufnext = patbuf;
107 	bufend = bufnext+MAXPATHLEN;
108 
109 	compilebuf = bufnext;
110 	compilepat = patnext;
111 	while (bufnext < bufend && (c = *patnext++) != EOS) {
112 		switch (c) {
113 		case LBRACKET:
114 			c = *patnext;
115 			if (c == NOT)
116 				++patnext;
117 			if (*patnext == EOS ||
118 			    strchr(patnext+1, RBRACKET) == NULL) {
119 				*bufnext++ = LBRACKET;
120 				if (c == NOT)
121 					--patnext;
122 				break;
123 			}
124 			*bufnext++ = M_SET;
125 			if (c == NOT)
126 				*bufnext++ = M_NOT;
127 			c = *patnext++;
128 			do {
129 				/* todo: quoting */
130 				*bufnext++ = c;
131 				if (*patnext == RANGE &&
132 				    (c = patnext[1]) != RBRACKET) {
133 					*bufnext++ = M_RNG;
134 					*bufnext++ = c;
135 					patnext += 2;
136 				}
137 			} while ((c = *patnext++) != RBRACKET);
138 			*bufnext++ = M_END;
139 			break;
140 		case QUESTION:
141 			*bufnext++ = M_ONE;
142 			break;
143 		case QUOTE:
144 			if (!(flags & GLOB_QUOTE))
145 				*bufnext++ = QUOTE;
146 			else {
147 				if ((c = *patnext++) == EOS) {
148 					c = QUOTE;
149 					--patnext;
150 				}
151 				*bufnext++ = c;
152 			}
153 			break;
154 		case STAR:
155 			*bufnext++ = M_ALL;
156 			break;
157 		default:
158 			*bufnext++ = c;
159 			break;
160 		}
161 	}
162 	*bufnext = EOS;
163 
164 	if ((err = glob1(patbuf, pglob)) != 0)
165 		return(err);
166 
167 	if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) {
168 		if (!(flags & GLOB_QUOTE))
169 			(void)strcpy(compilebuf, compilepat);
170 		else {
171 			/*
172 			 * copy pattern, interpreting quotes; this is slightly
173 			 * different than the interpretation of quotes above
174 			 * -- which should prevail?
175 			 */
176 			while (*compilepat != EOS) {
177 				if (*compilepat == QUOTE) {
178 					if (*++compilepat == EOS)
179 						--compilepat;
180 				}
181 				*compilebuf++ = *compilepat++;
182 			}
183 			*compilebuf = EOS;
184 		}
185 		return(globextend(patbuf, pglob));
186 	} else if (!(flags & GLOB_NOSORT))
187 		qsort((char*) (pglob->gl_pathv + pglob->gl_offs + oldpathc),
188 		    pglob->gl_pathc - oldpathc, sizeof(char*), compare);
189 	return(0);
190 }
191 
192 static
193 glob1(pattern, pglob)
194 	char *pattern;
195 	glob_t *pglob;
196 {
197 	char pathbuf[MAXPATHLEN+1];
198 
199 	/*
200 	 * a null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
201 	if (*pattern == EOS)
202 		return(0);
203 	return(glob2(pathbuf, pathbuf, pattern, pglob));
204 }
205 
206 /*
207  * functions glob2 and glob3 are mutually recursive; there is one level
208  * of recursion for each segment in the pattern that contains one or
209  * more meta characters.
210  */
211 static
212 glob2(pathbuf, pathend, pattern, pglob)
213 	char *pathbuf, *pathend, *pattern;
214 	glob_t *pglob;
215 {
216 	struct stat sbuf;
217 	bool_t anymeta = 0;
218 	char *p, *q;
219 
220 	/*
221 	 * loop over pattern segments until end of pattern or until
222 	 * segment with meta character found.
223 	 */
224 	for (;;) {
225 		if (*pattern == EOS) {		/* end of pattern? */
226 			*pathend = EOS;
227 			if (stat(pathbuf, &sbuf) != 0)
228 				return(0);	/* need error call here? */
229 			if ((pglob->gl_flags & GLOB_MARK) &&
230 			    pathend[-1] != SEP && S_ISDIR(sbuf.st_mode)) {
231 				*pathend++ = SEP;
232 				*pathend = EOS;
233 			}
234 			return(globextend(pathbuf, pglob));
235 		}
236 
237 		/* find end of next segment, copy tentatively to pathend */
238 		q = pathend;
239 		p = pattern;
240 		while (*p != EOS && *p != SEP) {
241 			if (ismeta(*p))
242 				anymeta = 1;
243 			*q++ = *p++;
244 		}
245 
246 		if (!anymeta) {		/* no expansion, do next segment */
247 			pathend = q;
248 			pattern = p;
249 			while (*pattern == SEP)
250 				*pathend++ = *pattern++;
251 		} else			/* need expansion, recurse */
252 			return(glob3(pathbuf, pathend, pattern, p, pglob));
253 	}
254 	/* NOTREACHED */
255 }
256 
257 static
258 glob3(pathbuf, pathend, pattern, restpattern, pglob)
259 	char *pathbuf, *pathend, *pattern, *restpattern;
260 	glob_t *pglob;
261 {
262 	extern int errno;
263 	DIR *dirp;
264 	struct dirent *dp;
265 	int len, err;
266 
267 	*pathend = EOS;
268 	errno = 0;
269 	if (!(dirp = opendir(pathbuf)))
270 		/* todo: don't call for ENOENT or ENOTDIR? */
271 		if (pglob->gl_errfunc &&
272 		    (*pglob->gl_errfunc)(pathbuf, errno) ||
273 		    (pglob->gl_flags & GLOB_ERR))
274 			return(GLOB_ABEND);
275 		else
276 			return(0);
277 
278 	err = 0;
279 
280 	/* search directory for matching names */
281 	while ((dp = readdir(dirp))) {
282 		/* initial DOT must be matched literally */
283 		if (dp->d_name[0] == DOT && *pattern != DOT)
284 			continue;
285 		if (!match(dp->d_name, pattern, restpattern))
286 			continue;
287 		len = dp->d_namlen;
288 		(void)strcpy(pathend, dp->d_name);
289 		err = glob2(pathbuf, pathend+len, restpattern, pglob);
290 		if (err)
291 			break;
292 	}
293 	/* todo: check error from readdir? */
294 	(void)closedir(dirp);
295 	return(err);
296 }
297 
298 
299 /*
300  * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
301  * add the new item, and update gl_pathc.
302  *
303  * This assumes the BSD realloc, which only copies the block when its size
304  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
305  * behavior.
306  *
307  * Return 0 if new item added, error code if memory couldn't be allocated.
308  *
309  * Invariant of the glob_t structure:
310  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
311  *	 gl_pathv points to (gl_offs + gl_pathc + 1) items.
312  */
313 static
314 globextend(path, pglob)
315 	char *path;
316 	glob_t *pglob;
317 {
318 	register char **pathv;
319 	register int i;
320 	u_int copysize, newsize;
321 	char *copy;
322 
323 	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
324 	pathv = (char **)realloc((char *)(pathv = pglob->gl_pathv), newsize);
325 	if (pathv == NULL)
326 		return(GLOB_NOSPACE);
327 
328 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
329 		/* first time around -- clear initial gl_offs items */
330 		pathv += pglob->gl_offs;
331 		for (i = pglob->gl_offs; --i >= 0; )
332 			*--pathv = NULL;
333 	}
334 	pglob->gl_pathv = pathv;
335 
336 	copysize = strlen(path) + 1;
337 	if ((copy = malloc(copysize)) != NULL) {
338 		(void)strcpy(copy, path);
339 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
340 	}
341 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
342 	return((copy == NULL) ? GLOB_NOSPACE : 0);
343 }
344 
345 
346 /*
347  * pattern matching function for filenames.  Each occurrence of the *
348  * pattern causes a recursion level.
349  */
350 static bool_t
351 match(name, pat, patend)
352 	register char *name, *pat, *patend;
353 {
354 	bool_t ok, negate_range;
355 	char c, k;
356 
357 	while (pat < patend) {
358 		c = *pat++;
359 		switch (c & 0xff) {
360 		case M_ALL:
361 			if (pat == patend)
362 				return(1);
363 			for (; *name != EOS; ++name) {
364 				if (match(name, pat, patend))
365 					return(1);
366 			}
367 			return(0);
368 		case M_ONE:
369 			if (*name++ == EOS)
370 				return(0);
371 			break;
372 		case M_SET:
373 			ok = 0;
374 			k = *name++;
375 			if (negate_range = (*pat & 0xff) == M_NOT)
376 				++pat;
377 			while (((c = *pat++) & 0xff) != M_END) {
378 				if ((*pat & 0xff) == M_RNG) {
379 					if (c <= k && k <= pat[1])
380 						ok = 1;
381 					pat += 2;
382 				}
383 				else if (c == k)
384 					ok = 1;
385 			}
386 			if (ok == negate_range)
387 				return(0);
388 			break;
389 		default:
390 			if (*name++ != c)
391 				return(0);
392 			break;
393 		}
394 	}
395 	return(*name == EOS);
396 }
397 
398 /* free allocated data belonging to a glob_t structure */
399 void
400 globfree(pglob)
401 	glob_t *pglob;
402 {
403 	register int i;
404 	register char **pp;
405 
406 	if (pglob->gl_pathv != NULL) {
407 		pp = pglob->gl_pathv + pglob->gl_offs;
408 		for (i = pglob->gl_pathc; i--; ++pp)
409 			if (*pp)
410 				(void)free(*pp);
411 		(void)free((char *)pp);
412 	}
413 }
414