xref: /original-bsd/lib/libc/gen/glob.c (revision 2852c678)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Guido van Rossum.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #if defined(LIBC_SCCS) && !defined(lint)
12 static char sccsid[] = "@(#)glob.c	5.6 (Berkeley) 04/02/91";
13 #endif /* LIBC_SCCS and not lint */
14 
15 /*
16  * Glob: the interface is a superset of the one defined in POSIX 1003.2,
17  * draft 9.
18  *
19  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
20  *
21  * Optional extra services, controlled by flags not defined by POSIX:
22  *
23  * GLOB_QUOTE:
24  *	Escaping convention: \ inhibits any special meaning the following
25  *	character might have (except \ at end of string is retained).
26  * GLOB_MAGCHAR:
27  *	Set in gl_flags is pattern contained a globbing character.
28  * gl_matchc:
29  *	Number of matches in the current invocation of glob.
30  */
31 
32 #include <sys/param.h>
33 #include <sys/stat.h>
34 #include <dirent.h>
35 #include <glob.h>
36 #include <ctype.h>
37 #include <errno.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 
42 typedef int bool_t;
43 
44 static glob1();
45 static glob2();
46 static glob3();
47 static globextend();
48 static bool_t match();
49 
50 #define	DOLLAR		'$'
51 #define	DOT		'.'
52 #define	EOS		'\0'
53 #define	LBRACKET	'['
54 #define	NOT		'!'
55 #define	QUESTION	'?'
56 #define	QUOTE		'\\'
57 #define	RANGE		'-'
58 #define	RBRACKET	']'
59 #define	SEP		'/'
60 #define	STAR		'*'
61 #define	TILDE		'~'
62 #define	UNDERSCORE	'_'
63 
64 #define	METABIT		0x80
65 #define	META(c)		((c)|METABIT)
66 #define	M_ALL		META('*')
67 #define	M_END		META(']')
68 #define	M_NOT		META('!')
69 #define	M_ONE		META('?')
70 #define	M_RNG		META('-')
71 #define	M_SET		META('[')
72 #define	ismeta(c)	(((c)&METABIT) != 0)
73 
74 static
75 compare(p, q)
76 	void **p, **q;
77 {
78 	return(strcmp(*(char **)p, *(char **)q));
79 }
80 
81 
82 /*
83  * The main glob() routine: compiles the pattern (optionally processing
84  * quotes), calls glob1() to do the real pattern matching, and finally
85  * sorts the list (unless unsorted operation is requested).  Returns 0
86  * if things went well, nonzero if errors occurred.  It is not an error
87  * to find no matches.
88  */
89 glob(pattern, flags, errfunc, pglob)
90 	const char *pattern;
91 	int flags;
92 	int (*errfunc) __P((char *, int));
93 	glob_t *pglob;
94 {
95 	int err, oldpathc;
96 	char *bufnext, *bufend, *compilebuf;
97 	const char *compilepat, *patnext;
98 	char c, patbuf[MAXPATHLEN+1];
99 
100 	patnext = pattern;
101 	if (!(flags & GLOB_APPEND)) {
102 		pglob->gl_pathc = 0;
103 		pglob->gl_pathv = NULL;
104 		if (!(flags & GLOB_DOOFFS))
105 			pglob->gl_offs = 0;
106 	}
107 	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
108 	pglob->gl_errfunc = errfunc;
109 	oldpathc = pglob->gl_pathc;
110 	pglob->gl_matchc = 0;
111 
112 	bufnext = patbuf;
113 	bufend = bufnext+MAXPATHLEN;
114 
115 	compilebuf = bufnext;
116 	compilepat = patnext;
117 	while (bufnext < bufend && (c = *patnext++) != EOS) {
118 		switch (c) {
119 		case LBRACKET:
120 			pglob->gl_flags |= GLOB_MAGCHAR;
121 			c = *patnext;
122 			if (c == NOT)
123 				++patnext;
124 			if (*patnext == EOS ||
125 			    strchr(patnext+1, RBRACKET) == NULL) {
126 				*bufnext++ = LBRACKET;
127 				if (c == NOT)
128 					--patnext;
129 				break;
130 			}
131 			*bufnext++ = M_SET;
132 			if (c == NOT)
133 				*bufnext++ = M_NOT;
134 			c = *patnext++;
135 			do {
136 				/* todo: quoting */
137 				*bufnext++ = c;
138 				if (*patnext == RANGE &&
139 				    (c = patnext[1]) != RBRACKET) {
140 					*bufnext++ = M_RNG;
141 					*bufnext++ = c;
142 					patnext += 2;
143 				}
144 			} while ((c = *patnext++) != RBRACKET);
145 			*bufnext++ = M_END;
146 			break;
147 		case QUESTION:
148 			pglob->gl_flags |= GLOB_MAGCHAR;
149 			*bufnext++ = M_ONE;
150 			break;
151 		case QUOTE:
152 			if (!(flags & GLOB_QUOTE))
153 				*bufnext++ = QUOTE;
154 			else {
155 				if ((c = *patnext++) == EOS) {
156 					c = QUOTE;
157 					--patnext;
158 				}
159 				*bufnext++ = c;
160 			}
161 			break;
162 		case STAR:
163 			pglob->gl_flags |= GLOB_MAGCHAR;
164 			*bufnext++ = M_ALL;
165 			break;
166 		default:
167 			*bufnext++ = c;
168 			break;
169 		}
170 	}
171 	*bufnext = EOS;
172 
173 	if ((err = glob1(patbuf, pglob)) != 0)
174 		return(err);
175 
176 	if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) {
177 		if (!(flags & GLOB_QUOTE))
178 			(void)strcpy(compilebuf, compilepat);
179 		else {
180 			/*
181 			 * copy pattern, interpreting quotes; this is slightly
182 			 * different than the interpretation of quotes above
183 			 * -- which should prevail?
184 			 */
185 			while (*compilepat != EOS) {
186 				if (*compilepat == QUOTE) {
187 					if (*++compilepat == EOS)
188 						--compilepat;
189 				}
190 				*compilebuf++ = *compilepat++;
191 			}
192 			*compilebuf = EOS;
193 		}
194 		return(globextend(patbuf, pglob));
195 	} else if (!(flags & GLOB_NOSORT))
196 		qsort((char*) (pglob->gl_pathv + pglob->gl_offs + oldpathc),
197 		    pglob->gl_pathc - oldpathc, sizeof(char*), compare);
198 	return(0);
199 }
200 
201 static
202 glob1(pattern, pglob)
203 	char *pattern;
204 	glob_t *pglob;
205 {
206 	char pathbuf[MAXPATHLEN+1];
207 
208 	/*
209 	 * a null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
210 	if (*pattern == EOS)
211 		return(0);
212 	return(glob2(pathbuf, pathbuf, pattern, pglob));
213 }
214 
215 /*
216  * functions glob2 and glob3 are mutually recursive; there is one level
217  * of recursion for each segment in the pattern that contains one or
218  * more meta characters.
219  */
220 static
221 glob2(pathbuf, pathend, pattern, pglob)
222 	char *pathbuf, *pathend, *pattern;
223 	glob_t *pglob;
224 {
225 	struct stat sbuf;
226 	bool_t anymeta = 0;
227 	char *p, *q;
228 
229 	/*
230 	 * loop over pattern segments until end of pattern or until
231 	 * segment with meta character found.
232 	 */
233 	for (;;) {
234 		if (*pattern == EOS) {		/* end of pattern? */
235 			*pathend = EOS;
236 			if (stat(pathbuf, &sbuf) != 0)
237 				return(0);	/* need error call here? */
238 			if ((pglob->gl_flags & GLOB_MARK) &&
239 			    pathend[-1] != SEP && S_ISDIR(sbuf.st_mode)) {
240 				*pathend++ = SEP;
241 				*pathend = EOS;
242 			}
243 			++pglob->gl_matchc;
244 			return(globextend(pathbuf, pglob));
245 		}
246 
247 		/* find end of next segment, copy tentatively to pathend */
248 		q = pathend;
249 		p = pattern;
250 		while (*p != EOS && *p != SEP) {
251 			if (ismeta(*p))
252 				anymeta = 1;
253 			*q++ = *p++;
254 		}
255 
256 		if (!anymeta) {		/* no expansion, do next segment */
257 			pathend = q;
258 			pattern = p;
259 			while (*pattern == SEP)
260 				*pathend++ = *pattern++;
261 		} else			/* need expansion, recurse */
262 			return(glob3(pathbuf, pathend, pattern, p, pglob));
263 	}
264 	/* NOTREACHED */
265 }
266 
267 static
268 glob3(pathbuf, pathend, pattern, restpattern, pglob)
269 	char *pathbuf, *pathend, *pattern, *restpattern;
270 	glob_t *pglob;
271 {
272 	extern int errno;
273 	DIR *dirp;
274 	struct dirent *dp;
275 	int len, err;
276 
277 	*pathend = EOS;
278 	errno = 0;
279 	if (!(dirp = opendir(pathbuf)))
280 		/* todo: don't call for ENOENT or ENOTDIR? */
281 		if (pglob->gl_errfunc &&
282 		    (*pglob->gl_errfunc)(pathbuf, errno) ||
283 		    (pglob->gl_flags & GLOB_ERR))
284 			return(GLOB_ABEND);
285 		else
286 			return(0);
287 
288 	err = 0;
289 
290 	/* search directory for matching names */
291 	while ((dp = readdir(dirp))) {
292 		/* initial DOT must be matched literally */
293 		if (dp->d_name[0] == DOT && *pattern != DOT)
294 			continue;
295 		if (!match(dp->d_name, pattern, restpattern))
296 			continue;
297 		len = dp->d_namlen;
298 		(void)strcpy(pathend, dp->d_name);
299 		err = glob2(pathbuf, pathend+len, restpattern, pglob);
300 		if (err)
301 			break;
302 	}
303 	/* todo: check error from readdir? */
304 	(void)closedir(dirp);
305 	return(err);
306 }
307 
308 
309 /*
310  * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
311  * add the new item, and update gl_pathc.
312  *
313  * This assumes the BSD realloc, which only copies the block when its size
314  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
315  * behavior.
316  *
317  * Return 0 if new item added, error code if memory couldn't be allocated.
318  *
319  * Invariant of the glob_t structure:
320  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
321  *	 gl_pathv points to (gl_offs + gl_pathc + 1) items.
322  */
323 static
324 globextend(path, pglob)
325 	char *path;
326 	glob_t *pglob;
327 {
328 	register char **pathv;
329 	register int i;
330 	u_int copysize, newsize;
331 	char *copy;
332 
333 	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
334 	pathv = (char **)realloc((char *)(pathv = pglob->gl_pathv), newsize);
335 	if (pathv == NULL)
336 		return(GLOB_NOSPACE);
337 
338 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
339 		/* first time around -- clear initial gl_offs items */
340 		pathv += pglob->gl_offs;
341 		for (i = pglob->gl_offs; --i >= 0; )
342 			*--pathv = NULL;
343 	}
344 	pglob->gl_pathv = pathv;
345 
346 	copysize = strlen(path) + 1;
347 	if ((copy = malloc(copysize)) != NULL) {
348 		(void)strcpy(copy, path);
349 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
350 	}
351 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
352 	return((copy == NULL) ? GLOB_NOSPACE : 0);
353 }
354 
355 
356 /*
357  * pattern matching function for filenames.  Each occurrence of the *
358  * pattern causes a recursion level.
359  */
360 static bool_t
361 match(name, pat, patend)
362 	register char *name, *pat, *patend;
363 {
364 	bool_t ok, negate_range;
365 	char c, k;
366 
367 	while (pat < patend) {
368 		c = *pat++;
369 		switch (c & 0xff) {
370 		case M_ALL:
371 			if (pat == patend)
372 				return(1);
373 			for (; *name != EOS; ++name) {
374 				if (match(name, pat, patend))
375 					return(1);
376 			}
377 			return(0);
378 		case M_ONE:
379 			if (*name++ == EOS)
380 				return(0);
381 			break;
382 		case M_SET:
383 			ok = 0;
384 			k = *name++;
385 			if (negate_range = (*pat & 0xff) == M_NOT)
386 				++pat;
387 			while (((c = *pat++) & 0xff) != M_END) {
388 				if ((*pat & 0xff) == M_RNG) {
389 					if (c <= k && k <= pat[1])
390 						ok = 1;
391 					pat += 2;
392 				}
393 				else if (c == k)
394 					ok = 1;
395 			}
396 			if (ok == negate_range)
397 				return(0);
398 			break;
399 		default:
400 			if (*name++ != c)
401 				return(0);
402 			break;
403 		}
404 	}
405 	return(*name == EOS);
406 }
407 
408 /* free allocated data belonging to a glob_t structure */
409 void
410 globfree(pglob)
411 	glob_t *pglob;
412 {
413 	register int i;
414 	register char **pp;
415 
416 	if (pglob->gl_pathv != NULL) {
417 		pp = pglob->gl_pathv + pglob->gl_offs;
418 		for (i = pglob->gl_pathc; i--; ++pp)
419 			if (*pp)
420 				free((void *)*pp);
421 		free((void *)pglob->gl_pathv);
422 	}
423 }
424