xref: /original-bsd/lib/libc/gen/glob.c (revision 237fdba6)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Guido van Rossum.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #if defined(LIBC_SCCS) && !defined(lint)
12 static char sccsid[] = "@(#)glob.c	5.3 (Berkeley) 06/23/90";
13 #endif /* LIBC_SCCS and not lint */
14 
15 /*
16  * Glob: the interface is a superset of the one defined in POSIX 1003.2,
17  * draft 9.
18  *
19  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
20  *
21  * Optional extra services, controlled by flags not defined by POSIX:
22  *	GLOB_QUOTE: escaping convention: \ inhibits any special meaning
23 		the following character might have (except \ at end of
24  *		string is kept);
25  */
26 
27 #include <sys/param.h>
28 #include <sys/stat.h>
29 #include <dirent.h>
30 #include <glob.h>
31 #include <ctype.h>
32 #include <errno.h>
33 #include <string.h>
34 #include <stdio.h>
35 
36 char *malloc(), *realloc();
37 
38 typedef int bool_t;
39 
40 #define	DOLLAR		'$'
41 #define	DOT		'.'
42 #define	EOS		'\0'
43 #define	LBRACKET	'['
44 #define	NOT		'!'
45 #define	QUESTION	'?'
46 #define	QUOTE		'\\'
47 #define	RANGE		'-'
48 #define	RBRACKET	']'
49 #define	SEP		'/'
50 #define	STAR		'*'
51 #define	TILDE		'~'
52 #define	UNDERSCORE	'_'
53 
54 #define	METABIT		0x80
55 #define	META(c)		((c)|METABIT)
56 #define	M_ALL		META('*')
57 #define	M_END		META(']')
58 #define	M_NOT		META('!')
59 #define	M_ONE		META('?')
60 #define	M_RNG		META('-')
61 #define	M_SET		META('[')
62 #define	ismeta(c)	(((c)&METABIT) != 0)
63 
64 static
65 compare(p, q)
66 	void **p, **q;
67 {
68 	return(strcmp(*(char **)p, *(char **)q));
69 }
70 
71 
72 /*
73  * The main glob() routine: compiles the pattern (optionally processing
74  * quotes), calls glob1() to do the real pattern matching, and finally
75  * sorts the list (unless unsorted operation is requested).  Returns 0
76  * if things went well, nonzero if errors occurred.  It is not an error
77  * to find no matches.
78  */
79 glob(pattern, flags, errfunc, pglob)
80 	char *pattern;
81 	int flags, (*errfunc)();
82 	glob_t *pglob;
83 {
84 	int err, oldpathc;
85 	char *bufnext, *bufend, *compilebuf, *compilepat, *patnext;
86 	char c, patbuf[MAXPATHLEN+1];
87 
88 	patnext = pattern;
89 	if (!(flags & GLOB_APPEND)) {
90 		pglob->gl_pathc = 0;
91 		pglob->gl_pathv = NULL;
92 		if (!(flags & GLOB_DOOFFS))
93 			pglob->gl_offs = 0;
94 	}
95 	pglob->gl_flags = flags;
96 	pglob->gl_errfunc = errfunc;
97 	oldpathc = pglob->gl_pathc;
98 
99 	bufnext = patbuf;
100 	bufend = bufnext+MAXPATHLEN;
101 
102 	compilebuf = bufnext;
103 	compilepat = patnext;
104 	while (bufnext < bufend && (c = *patnext++) != EOS) {
105 		switch (c) {
106 		case LBRACKET:
107 			c = *patnext;
108 			if (c == NOT)
109 				++patnext;
110 			if (*patnext == EOS ||
111 			    strchr(patnext+1, RBRACKET) == NULL) {
112 				*bufnext++ = LBRACKET;
113 				if (c == NOT)
114 					--patnext;
115 				break;
116 			}
117 			*bufnext++ = M_SET;
118 			if (c == NOT)
119 				*bufnext++ = M_NOT;
120 			c = *patnext++;
121 			do {
122 				/* todo: quoting */
123 				*bufnext++ = c;
124 				if (*patnext == RANGE &&
125 				    (c = patnext[1]) != RBRACKET) {
126 					*bufnext++ = M_RNG;
127 					*bufnext++ = c;
128 					patnext += 2;
129 				}
130 			} while ((c = *patnext++) != RBRACKET);
131 			*bufnext++ = M_END;
132 			break;
133 		case QUESTION:
134 			*bufnext++ = M_ONE;
135 			break;
136 		case QUOTE:
137 			if (!(flags & GLOB_QUOTE))
138 				*bufnext++ = QUOTE;
139 			else {
140 				if ((c = *patnext++) == EOS) {
141 					c = QUOTE;
142 					--patnext;
143 				}
144 				*bufnext++ = c;
145 			}
146 			break;
147 		case STAR:
148 			*bufnext++ = M_ALL;
149 			break;
150 		default:
151 			*bufnext++ = c;
152 			break;
153 		}
154 	}
155 	*bufnext = EOS;
156 
157 	if ((err = glob1(patbuf, pglob)) != 0)
158 		return(err);
159 
160 	if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) {
161 		if (!(flags & GLOB_QUOTE))
162 			(void)strcpy(compilebuf, compilepat);
163 		else {
164 			/*
165 			 * copy pattern, interpreting quotes; this is slightly
166 			 * different than the interpretation of quotes above
167 			 * -- which should prevail?
168 			 */
169 			while (*compilepat != EOS) {
170 				if (*compilepat == QUOTE) {
171 					if (*++compilepat == EOS)
172 						--compilepat;
173 				}
174 				*compilebuf++ = *compilepat++;
175 			}
176 			*compilebuf = EOS;
177 		}
178 		return(globextend(patbuf, pglob));
179 	} else if (!(flags & GLOB_NOSORT))
180 		qsort((char*) (pglob->gl_pathv + pglob->gl_offs + oldpathc),
181 		    pglob->gl_pathc - oldpathc, sizeof(char*), compare);
182 	return(0);
183 }
184 
185 static
186 glob1(pattern, pglob)
187 	char *pattern;
188 	glob_t *pglob;
189 {
190 	char pathbuf[MAXPATHLEN+1];
191 
192 	/*
193 	 * a null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
194 	if (*pattern == EOS)
195 		return(0);
196 	return(glob2(pathbuf, pathbuf, pattern, pglob));
197 }
198 
199 /*
200  * functions glob2 and glob3 are mutually recursive; there is one level
201  * of recursion for each segment in the pattern that contains one or
202  * more meta characters.
203  */
204 static
205 glob2(pathbuf, pathend, pattern, pglob)
206 	char *pathbuf, *pathend, *pattern;
207 	glob_t *pglob;
208 {
209 	struct stat sbuf;
210 	bool_t anymeta = 0;
211 	char *p, *q;
212 
213 	/*
214 	 * loop over pattern segments until end of pattern or until
215 	 * segment with meta character found.
216 	 */
217 	for (;;) {
218 		if (*pattern == EOS) {		/* end of pattern? */
219 			*pathend = EOS;
220 			if (stat(pathbuf, &sbuf) != 0)
221 				return(0);	/* need error call here? */
222 			if ((pglob->gl_flags & GLOB_MARK) &&
223 			    pathend[-1] != SEP && S_ISDIR(sbuf.st_mode)) {
224 				*pathend++ = SEP;
225 				*pathend = EOS;
226 			}
227 			return(globextend(pathbuf, pglob));
228 		}
229 
230 		/* find end of next segment, copy tentatively to pathend */
231 		q = pathend;
232 		p = pattern;
233 		while (*p != EOS && *p != SEP) {
234 			if (ismeta(*p))
235 				anymeta = 1;
236 			*q++ = *p++;
237 		}
238 
239 		if (!anymeta) {		/* no expansion, do next segment */
240 			pathend = q;
241 			pattern = p;
242 			while (*pattern == SEP)
243 				*pathend++ = *pattern++;
244 		} else			/* need expansion, recurse */
245 			return(glob3(pathbuf, pathend, pattern, p, pglob));
246 	}
247 	/* NOTREACHED */
248 }
249 
250 static
251 glob3(pathbuf, pathend, pattern, restpattern, pglob)
252 	char *pathbuf, *pathend, *pattern, *restpattern;
253 	glob_t *pglob;
254 {
255 	extern int errno;
256 	DIR *dirp;
257 	struct dirent *dp;
258 	int len, err;
259 
260 	*pathend = EOS;
261 	errno = 0;
262 	if (!(dirp = opendir(pathbuf)))
263 		/* todo: don't call for ENOENT or ENOTDIR? */
264 		if (pglob->gl_errfunc &&
265 		    (*pglob->gl_errfunc)(pathbuf, errno) ||
266 		    (pglob->gl_flags & GLOB_ERR))
267 			return(GLOB_ABEND);
268 		else
269 			return(0);
270 
271 	err = 0;
272 
273 	/* search directory for matching names */
274 	while ((dp = readdir(dirp))) {
275 		/* initial DOT must be matched literally */
276 		if (dp->d_name[0] == DOT && *pattern != DOT)
277 			continue;
278 		if (!match(dp->d_name, pattern, restpattern))
279 			continue;
280 		len = dp->d_namlen;
281 		(void)strcpy(pathend, dp->d_name);
282 		err = glob2(pathbuf, pathend+len, restpattern, pglob);
283 		if (err)
284 			break;
285 	}
286 	/* todo: check error from readdir? */
287 	(void)closedir(dirp);
288 	return(err);
289 }
290 
291 
292 /*
293  * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
294  * add the new item, and update gl_pathc.
295  *
296  * This assumes the BSD realloc, which only copies the block when its size
297  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
298  * behavior.
299  *
300  * Return 0 if new item added, error code if memory couldn't be allocated.
301  *
302  * Invariant of the glob_t structure:
303  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
304  *	 gl_pathv points to (gl_offs + gl_pathc + 1) items.
305  */
306 static
307 globextend(path, pglob)
308 	char *path;
309 	glob_t *pglob;
310 {
311 	register char **pathv;
312 	register int i;
313 	u_int copysize, newsize;
314 	char *copy;
315 
316 	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
317 	pathv = (char **)realloc((char *)(pathv = pglob->gl_pathv), newsize);
318 	if (pathv == NULL)
319 		return(GLOB_NOSPACE);
320 
321 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
322 		/* first time around -- clear initial gl_offs items */
323 		pathv += pglob->gl_offs;
324 		for (i = pglob->gl_offs; --i >= 0; )
325 			*--pathv = NULL;
326 	}
327 	pglob->gl_pathv = pathv;
328 
329 	copysize = strlen(path) + 1;
330 	if ((copy = malloc(copysize)) != NULL) {
331 		(void)strcpy(copy, path);
332 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
333 	}
334 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
335 	return((copy == NULL) ? GLOB_NOSPACE : 0);
336 }
337 
338 
339 /*
340  * pattern matching function for filenames.  Each occurrence of the *
341  * pattern causes a recursion level.
342  */
343 static bool_t
344 match(name, pat, patend)
345 	register char *name, *pat, *patend;
346 {
347 	bool_t ok, negate_range;
348 	char c, k;
349 
350 	while (pat < patend) {
351 		c = *pat++;
352 		switch (c & 0xff) {
353 		case M_ALL:
354 			if (pat == patend)
355 				return(1);
356 			for (; *name != EOS; ++name) {
357 				if (match(name, pat, patend))
358 					return(1);
359 			}
360 			return(0);
361 		case M_ONE:
362 			if (*name++ == EOS)
363 				return(0);
364 			break;
365 		case M_SET:
366 			ok = 0;
367 			k = *name++;
368 			if (negate_range = (*pat & 0xff) == M_NOT)
369 				++pat;
370 			while (((c = *pat++) & 0xff) != M_END) {
371 				if ((*pat & 0xff) == M_RNG) {
372 					if (c <= k && k <= pat[1])
373 						ok = 1;
374 					pat += 2;
375 				}
376 				else if (c == k)
377 					ok = 1;
378 			}
379 			if (ok == negate_range)
380 				return(0);
381 			break;
382 		default:
383 			if (*name++ != c)
384 				return(0);
385 			break;
386 		}
387 	}
388 	return(*name == EOS);
389 }
390 
391 /* free allocated data belonging to a glob_t structure */
392 void
393 globfree(pglob)
394 	glob_t *pglob;
395 {
396 	register int i;
397 	register char **pp;
398 
399 	if (pglob->gl_pathv != NULL) {
400 		pp = pglob->gl_pathv + pglob->gl_offs;
401 		for (i = pglob->gl_pathc; i--; ++pp)
402 			if (*pp)
403 				(void)free(*pp);
404 		(void)free((char *)pp);
405 	}
406 }
407