xref: /original-bsd/lib/libc/gen/glob.c (revision ba762ddc)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Guido van Rossum.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #if defined(LIBC_SCCS) && !defined(lint)
12 static char sccsid[] = "@(#)glob.c	5.8 (Berkeley) 04/23/91";
13 #endif /* LIBC_SCCS and not lint */
14 
15 /*
16  * Glob: the interface is a superset of the one defined in POSIX 1003.2,
17  * draft 9.
18  *
19  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
20  *
21  * Optional extra services, controlled by flags not defined by POSIX:
22  *
23  * GLOB_QUOTE:
24  *	Escaping convention: \ inhibits any special meaning the following
25  *	character might have (except \ at end of string is retained).
26  * GLOB_MAGCHAR:
27  *	Set in gl_flags if pattern contained a globbing character.
28  * gl_matchc:
29  *	Number of matches in the current invocation of glob.
30  */
31 
32 #include <sys/cdefs.h>
33 #include <sys/param.h>
34 #include <sys/stat.h>
35 #include <dirent.h>
36 #include <glob.h>
37 #include <ctype.h>
38 #include <errno.h>
39 #include <string.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 
43 static int glob1(), glob2(), glob3(), globextend(), match();
44 
45 #define	DOLLAR		'$'
46 #define	DOT		'.'
47 #define	EOS		'\0'
48 #define	LBRACKET	'['
49 #define	NOT		'!'
50 #define	QUESTION	'?'
51 #define	QUOTE		'\\'
52 #define	RANGE		'-'
53 #define	RBRACKET	']'
54 #define	SEP		'/'
55 #define	STAR		'*'
56 #define	TILDE		'~'
57 #define	UNDERSCORE	'_'
58 
59 #define	SHORT_STRINGS
60 #ifdef SHORT_STRINGS
61 typedef u_short shortchar_t;
62 #define	METABIT		0x8000
63 #define	M_MASK		0xffff
64 #else
65 typedef char shortchar_t;
66 #define	METABIT		0x80
67 #define	M_MASK		0xff
68 #endif
69 
70 #define	META(c)		((c)|METABIT)
71 #define	M_ALL		META('*')
72 #define	M_END		META(']')
73 #define	M_NOT		META('!')
74 #define	M_ONE		META('?')
75 #define	M_RNG		META('-')
76 #define	M_SET		META('[')
77 #define	ismeta(c)	(((c)&METABIT) != 0)
78 
79 #ifdef SHORT_STRINGS
80 static DIR *
81 Opendir(str)
82 register shortchar_t *str;
83 {
84 	register char *dc;
85 	char buf[MAXPATHLEN];
86 
87 	if (!*str)
88 		return(opendir("."));
89 	for (dc = buf; *dc++ = *str++;);
90 	return(opendir(buf));
91 }
92 
93 static int
94 Lstat(fn, sb)
95 register shortchar_t *fn;
96 struct stat *sb;
97 {
98 	register char *dc;
99 	char buf[MAXPATHLEN];
100 
101 	for (dc = buf; *dc++ = *fn++;);
102 	return(lstat(buf, sb));
103 }
104 
105 static int
106 Stat(fn, sb)
107 register shortchar_t *fn;
108 struct stat *sb;
109 {
110 	register char *dc;
111 	char buf[MAXPATHLEN];
112 
113 	for (dc = buf; *dc++ = *fn++;);
114 	return(stat(buf, sb));
115 }
116 #else
117 #define	Opendir	opendir
118 #define	Stat	stat
119 #define	Lstat	lstat
120 #endif
121 
122 static int
123 compare(p, q)
124 	void **p, **q;
125 {
126 	return(strcmp(*(char **)p, *(char **)q));
127 }
128 
129 /*
130  * The main glob() routine: compiles the pattern (optionally processing
131  * quotes), calls glob1() to do the real pattern matching, and finally
132  * sorts the list (unless unsorted operation is requested).  Returns 0
133  * if things went well, nonzero if errors occurred.  It is not an error
134  * to find no matches.
135  */
136 glob(pattern, flags, errfunc, pglob)
137 	const char *pattern;
138 	int flags;
139 	int (*errfunc) __P((char *, int));
140 	glob_t *pglob;
141 {
142 	int err, oldpathc;
143 	shortchar_t *bufnext, *bufend, *compilebuf;
144 	const char *compilepat, *patnext;
145 	char c;
146 	shortchar_t patbuf[MAXPATHLEN+1];
147 
148 	patnext = pattern;
149 	if (!(flags & GLOB_APPEND)) {
150 		pglob->gl_pathc = 0;
151 		pglob->gl_pathv = NULL;
152 		if (!(flags & GLOB_DOOFFS))
153 			pglob->gl_offs = 0;
154 	}
155 	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
156 	pglob->gl_errfunc = errfunc;
157 	oldpathc = pglob->gl_pathc;
158 	pglob->gl_matchc = 0;
159 
160 	bufnext = patbuf;
161 	bufend = bufnext+MAXPATHLEN;
162 
163 	compilebuf = bufnext;
164 	compilepat = patnext;
165 	while (bufnext < bufend && (c = *patnext++) != EOS) {
166 		switch (c) {
167 		case LBRACKET:
168 			pglob->gl_flags |= GLOB_MAGCHAR;
169 			c = *patnext;
170 			if (c == NOT)
171 				++patnext;
172 			if (*patnext == EOS ||
173 			    strchr(patnext+1, RBRACKET) == NULL) {
174 				*bufnext++ = LBRACKET;
175 				if (c == NOT)
176 					--patnext;
177 				break;
178 			}
179 			*bufnext++ = M_SET;
180 			if (c == NOT)
181 				*bufnext++ = M_NOT;
182 			c = *patnext++;
183 			do {
184 				/* todo: quoting */
185 				*bufnext++ = c;
186 				if (*patnext == RANGE &&
187 				    (c = patnext[1]) != RBRACKET) {
188 					*bufnext++ = M_RNG;
189 					*bufnext++ = c;
190 					patnext += 2;
191 				}
192 			} while ((c = *patnext++) != RBRACKET);
193 			*bufnext++ = M_END;
194 			break;
195 		case QUESTION:
196 			pglob->gl_flags |= GLOB_MAGCHAR;
197 			*bufnext++ = M_ONE;
198 			break;
199 		case QUOTE:
200 			if (!(flags & GLOB_QUOTE))
201 				*bufnext++ = QUOTE;
202 			else {
203 				if ((c = *patnext++) == EOS) {
204 					c = QUOTE;
205 					--patnext;
206 				}
207 				*bufnext++ = c;
208 			}
209 			break;
210 		case STAR:
211 			pglob->gl_flags |= GLOB_MAGCHAR;
212 			*bufnext++ = M_ALL;
213 			break;
214 		default:
215 			*bufnext++ = c;
216 			break;
217 		}
218 	}
219 	*bufnext = EOS;
220 
221 	if ((err = glob1(patbuf, pglob)) != 0)
222 		return(err);
223 
224 	if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) {
225 		if (!(flags & GLOB_QUOTE)) {
226 			shortchar_t *dp = compilebuf;
227 			const char *sp = compilepat;
228 			while (*dp++ = (u_char)*sp++);
229 		}
230 		else {
231 			/*
232 			 * copy pattern, interpreting quotes; this is slightly
233 			 * different than the interpretation of quotes above
234 			 * -- which should prevail?
235 			 */
236 			while (*compilepat != EOS) {
237 				if (*compilepat == QUOTE) {
238 					if (*++compilepat == EOS)
239 						--compilepat;
240 				}
241 				*compilebuf++ = (u_char)*compilepat++;
242 			}
243 			*compilebuf = EOS;
244 		}
245 		return(globextend(patbuf, pglob));
246 	} else if (!(flags & GLOB_NOSORT))
247 		qsort((char*)(pglob->gl_pathv + pglob->gl_offs + oldpathc),
248 		    pglob->gl_pathc - oldpathc, sizeof(char*), compare);
249 	return(0);
250 }
251 
252 static
253 glob1(pattern, pglob)
254 	shortchar_t *pattern;
255 	glob_t *pglob;
256 {
257 	shortchar_t pathbuf[MAXPATHLEN+1];
258 
259 	/*
260 	 * a null pathname is invalid -- POSIX 1003.1 sect. 2.4.
261 	 */
262 	if (*pattern == EOS)
263 		return(0);
264 	return(glob2(pathbuf, pathbuf, pattern, pglob));
265 }
266 
267 /*
268  * functions glob2 and glob3 are mutually recursive; there is one level
269  * of recursion for each segment in the pattern that contains one or
270  * more meta characters.
271  */
272 static
273 glob2(pathbuf, pathend, pattern, pglob)
274 	shortchar_t *pathbuf, *pathend, *pattern;
275 	glob_t *pglob;
276 {
277 	struct stat sbuf;
278 	shortchar_t *p, *q;
279 	int anymeta;
280 
281 	/*
282 	 * loop over pattern segments until end of pattern or until
283 	 * segment with meta character found.
284 	 */
285 	for (anymeta = 0;;) {
286 		if (*pattern == EOS) {		/* end of pattern? */
287 			*pathend = EOS;
288 			if (Lstat(pathbuf, &sbuf))
289 				return(0);
290 
291 			if (((pglob->gl_flags & GLOB_MARK) &&
292 			    pathend[-1] != SEP) && (S_ISDIR(sbuf.st_mode)
293 			    || (S_ISLNK(sbuf.st_mode) &&
294 			    (Stat(pathbuf, &sbuf) == 0) &&
295 			    S_ISDIR(sbuf.st_mode)))) {
296 				*pathend++ = SEP;
297 				*pathend = EOS;
298 			}
299 			++pglob->gl_matchc;
300 			return(globextend(pathbuf, pglob));
301 		}
302 
303 		/* find end of next segment, copy tentatively to pathend */
304 		q = pathend;
305 		p = pattern;
306 		while (*p != EOS && *p != SEP) {
307 			if (ismeta(*p))
308 				anymeta = 1;
309 			*q++ = *p++;
310 		}
311 
312 		if (!anymeta) {		/* no expansion, do next segment */
313 			pathend = q;
314 			pattern = p;
315 			while (*pattern == SEP)
316 				*pathend++ = *pattern++;
317 		} else			/* need expansion, recurse */
318 			return(glob3(pathbuf, pathend, pattern, p, pglob));
319 	}
320 	/* NOTREACHED */
321 }
322 
323 
324 static
325 glob3(pathbuf, pathend, pattern, restpattern, pglob)
326 	shortchar_t *pathbuf, *pathend, *pattern, *restpattern;
327 	glob_t *pglob;
328 {
329 	extern int errno;
330 	DIR *dirp;
331 	struct dirent *dp;
332 	int len, err;
333 
334 	*pathend = EOS;
335 	errno = 0;
336 
337 	if (!(dirp = Opendir(pathbuf)))
338 		/* todo: don't call for ENOENT or ENOTDIR? */
339 		if (pglob->gl_errfunc &&
340 		    (*pglob->gl_errfunc)(pathbuf, errno) ||
341 		    (pglob->gl_flags & GLOB_ERR))
342 			return(GLOB_ABEND);
343 		else
344 			return(0);
345 
346 	err = 0;
347 
348 	/* search directory for matching names */
349 	while ((dp = readdir(dirp))) {
350 		register char *sc;
351 		register shortchar_t *dc;
352 		/* initial DOT must be matched literally */
353 		if (dp->d_name[0] == DOT && *pattern != DOT)
354 			continue;
355 		for (sc = dp->d_name, dc = pathend;
356 		     *dc++ = (u_char)*sc++;);
357 		if (!match(pathend, pattern, restpattern)) {
358 			*pathend = EOS;
359 			continue;
360 		}
361 		err = glob2(pathbuf, --dc, restpattern, pglob);
362 		if (err)
363 			break;
364 	}
365 	/* todo: check error from readdir? */
366 	(void)closedir(dirp);
367 	return(err);
368 }
369 
370 
371 /*
372  * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
373  * add the new item, and update gl_pathc.
374  *
375  * This assumes the BSD realloc, which only copies the block when its size
376  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
377  * behavior.
378  *
379  * Return 0 if new item added, error code if memory couldn't be allocated.
380  *
381  * Invariant of the glob_t structure:
382  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
383  *	 gl_pathv points to (gl_offs + gl_pathc + 1) items.
384  */
385 static int
386 globextend(path, pglob)
387 	shortchar_t *path;
388 	glob_t *pglob;
389 {
390 	register char **pathv;
391 	register int i;
392 	u_int newsize;
393 	char *copy;
394 	shortchar_t *p;
395 
396 	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
397 	pathv = (char **)realloc((char *)pglob->gl_pathv, newsize);
398 	if (pathv == NULL)
399 		return(GLOB_NOSPACE);
400 
401 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
402 		/* first time around -- clear initial gl_offs items */
403 		pathv += pglob->gl_offs;
404 		for (i = pglob->gl_offs; --i >= 0; )
405 			*--pathv = NULL;
406 	}
407 	pglob->gl_pathv = pathv;
408 
409 	for (p = path; *p++;);
410 	if ((copy = malloc(p - path)) != NULL) {
411 		register char *dc = copy;
412 		register shortchar_t *sc = path;
413 		while (*dc++ = *sc++);
414 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
415 	}
416 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
417 	return((copy == NULL) ? GLOB_NOSPACE : 0);
418 }
419 
420 
421 /*
422  * pattern matching function for filenames.  Each occurrence of the *
423  * pattern causes a recursion level.
424  */
425 static
426 match(name, pat, patend)
427 	register shortchar_t *name, *pat, *patend;
428 {
429 	int ok, negate_range;
430 	shortchar_t c, k;
431 
432 	while (pat < patend) {
433 		c = *pat++;
434 		switch (c & M_MASK) {
435 		case M_ALL:
436 			if (pat == patend)
437 				return(1);
438 			for (; *name != EOS; ++name) {
439 				if (match(name, pat, patend))
440 					return(1);
441 			}
442 			return(0);
443 		case M_ONE:
444 			if (*name++ == EOS)
445 				return(0);
446 			break;
447 		case M_SET:
448 			ok = 0;
449 			k = *name++;
450 			if (negate_range = ((*pat & M_MASK) == M_NOT))
451 				++pat;
452 			while (((c = *pat++) & M_MASK) != M_END) {
453 				if ((*pat & M_MASK) == M_RNG) {
454 					if (c <= k && k <= pat[1])
455 						ok = 1;
456 					pat += 2;
457 				}
458 				else if (c == k)
459 					ok = 1;
460 			}
461 			if (ok == negate_range)
462 				return(0);
463 			break;
464 		default:
465 			if (*name++ != c)
466 				return(0);
467 			break;
468 		}
469 	}
470 	return(*name == EOS);
471 }
472 
473 /* free allocated data belonging to a glob_t structure */
474 void
475 globfree(pglob)
476 	glob_t *pglob;
477 {
478 	register int i;
479 	register char **pp;
480 
481 	if (pglob->gl_pathv != NULL) {
482 		pp = pglob->gl_pathv + pglob->gl_offs;
483 		for (i = pglob->gl_pathc; i--; ++pp)
484 			if (*pp)
485 				free((void *)*pp);
486 		free((void *)pglob->gl_pathv);
487 	}
488 }
489