xref: /original-bsd/lib/libc/gen/glob.c (revision 404544c8)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Guido van Rossum.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #if defined(LIBC_SCCS) && !defined(lint)
12 static char sccsid[] = "@(#)glob.c	5.16 (Berkeley) 10/01/92";
13 #endif /* LIBC_SCCS and not lint */
14 
15 /*
16  * glob(3) -- a superset of the one defined in POSIX 1003.2.
17  *
18  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
19  *
20  * Optional extra services, controlled by flags not defined by POSIX:
21  *
22  * GLOB_QUOTE:
23  *	Escaping convention: \ inhibits any special meaning the following
24  *	character might have (except \ at end of string is retained).
25  * GLOB_MAGCHAR:
26  *	Set in gl_flags if pattern contained a globbing character.
27  * GLOB_NOMAGIC:
28  *	Same as GLOB_NOCHECK, but it will only append pattern if it did
29  *	not contain any magic characters.  [Used in csh style globbing]
30  * gl_matchc:
31  *	Number of matches in the current invocation of glob.
32  */
33 
34 #include <sys/param.h>
35 #include <sys/stat.h>
36 #include <dirent.h>
37 #include <glob.h>
38 #include <ctype.h>
39 #include <errno.h>
40 #include <string.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 
44 #define	DOLLAR		'$'
45 #define	DOT		'.'
46 #define	EOS		'\0'
47 #define	LBRACKET	'['
48 #define	NOT		'!'
49 #define	QUESTION	'?'
50 #define	QUOTE		'\\'
51 #define	RANGE		'-'
52 #define	RBRACKET	']'
53 #define	SEP		'/'
54 #define	STAR		'*'
55 #define	TILDE		'~'
56 #define	UNDERSCORE	'_'
57 
58 #define	M_QUOTE		0x8000
59 #define	M_PROTECT	0x4000
60 #define	M_MASK		0xffff
61 #define	M_ASCII		0x00ff
62 
63 #define	CHAR(c)		((c)&M_ASCII)
64 #define	META(c)		((c)|M_QUOTE)
65 #define	M_ALL		META('*')
66 #define	M_END		META(']')
67 #define	M_NOT		META('!')
68 #define	M_ONE		META('?')
69 #define	M_RNG		META('-')
70 #define	M_SET		META('[')
71 #define	ismeta(c)	(((c)&M_QUOTE) != 0)
72 
73 typedef u_short Char;
74 
75 static int	 compare __P((const void *, const void *));
76 static void	 g_Ctoc __P((Char *, char *));
77 static int	 g_lstat __P((Char *, struct stat *));
78 static DIR	*g_opendir __P((Char *));
79 static Char	*g_strchr __P((Char *, int));
80 static int	 g_stat __P((Char *, struct stat *));
81 static int	 glob1 __P((Char *, glob_t *));
82 static int	 glob2 __P((Char *, Char *, Char *, glob_t *));
83 static int	 glob3 __P((Char *, Char *, Char *, Char *, glob_t *));
84 static int	 globextend __P((Char *, glob_t *));
85 static int	 match __P((Char *, Char *, Char *));
86 #ifdef DEBUG
87 static void	 qprintf __P((Char *));
88 #endif
89 
90 /*
91  * The main glob() routine: compiles the pattern (optionally processing
92  * quotes), calls glob1() to do the real pattern matching, and finally
93  * sorts the list (unless unsorted operation is requested).  Returns 0
94  * if things went well, nonzero if errors occurred.  It is not an error
95  * to find no matches.
96  */
97 glob(pattern, flags, errfunc, pglob)
98 	const char *pattern;
99 	int flags, (*errfunc) __P((char *, int));
100 	glob_t *pglob;
101 {
102 	const u_char *compilepat, *patnext;
103 	int c, err, oldpathc;
104 	Char *bufnext, *bufend, *compilebuf, *qpatnext, patbuf[MAXPATHLEN+1];
105 
106 	patnext = (u_char *) pattern;
107 	if (!(flags & GLOB_APPEND)) {
108 		pglob->gl_pathc = 0;
109 		pglob->gl_pathv = NULL;
110 		if (!(flags & GLOB_DOOFFS))
111 			pglob->gl_offs = 0;
112 	}
113 	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
114 	pglob->gl_errfunc = errfunc;
115 	oldpathc = pglob->gl_pathc;
116 	pglob->gl_matchc = 0;
117 
118 	bufnext = patbuf;
119 	bufend = bufnext + MAXPATHLEN;
120 	compilebuf = bufnext;
121 	compilepat = patnext;
122 	if (flags & GLOB_QUOTE) {
123 		/* Protect the quoted characters. */
124 		while (bufnext < bufend && (c = *patnext++) != EOS)
125 			if (c == QUOTE) {
126 				if ((c = *patnext++) == EOS) {
127 					c = QUOTE;
128 					--patnext;
129 				}
130 				*bufnext++ = c | M_PROTECT;
131 			}
132 			else
133 				*bufnext++ = c;
134 	}
135 	else
136 	    while (bufnext < bufend && (c = *patnext++) != EOS)
137 		    *bufnext++ = c;
138 	*bufnext = EOS;
139 
140 	bufnext = patbuf;
141 	qpatnext = patbuf;
142 	/* We don't need to check for buffer overflow any more. */
143 	while ((c = *qpatnext++) != EOS) {
144 		switch (c) {
145 		case LBRACKET:
146 			c = *qpatnext;
147 			if (c == NOT)
148 				++qpatnext;
149 			if (*qpatnext == EOS ||
150 			    g_strchr(qpatnext+1, RBRACKET) == NULL) {
151 				*bufnext++ = LBRACKET;
152 				if (c == NOT)
153 					--qpatnext;
154 				break;
155 			}
156 			*bufnext++ = M_SET;
157 			if (c == NOT)
158 				*bufnext++ = M_NOT;
159 			c = *qpatnext++;
160 			do {
161 				*bufnext++ = CHAR(c);
162 				if (*qpatnext == RANGE &&
163 				    (c = qpatnext[1]) != RBRACKET) {
164 					*bufnext++ = M_RNG;
165 					*bufnext++ = CHAR(c);
166 					qpatnext += 2;
167 				}
168 			} while ((c = *qpatnext++) != RBRACKET);
169 			pglob->gl_flags |= GLOB_MAGCHAR;
170 			*bufnext++ = M_END;
171 			break;
172 		case QUESTION:
173 			pglob->gl_flags |= GLOB_MAGCHAR;
174 			*bufnext++ = M_ONE;
175 			break;
176 		case STAR:
177 			pglob->gl_flags |= GLOB_MAGCHAR;
178 			/* collapse adjacent stars to one,
179 			 * to avoid exponential behavior
180 			 */
181 			if (bufnext == patbuf || bufnext[-1] != M_ALL)
182 			    *bufnext++ = M_ALL;
183 			break;
184 		default:
185 			*bufnext++ = CHAR(c);
186 			break;
187 		}
188 	}
189 	*bufnext = EOS;
190 #ifdef DEBUG
191 	qprintf(patbuf);
192 #endif
193 
194 	if ((err = glob1(patbuf, pglob)) != 0)
195 		return(err);
196 
197 	/*
198 	 * If there was no match we are going to append the pattern
199 	 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
200 	 * and the pattern did not contain any magic characters
201 	 * GLOB_NOMAGIC is there just for compatibility with csh.
202 	 */
203 	if (pglob->gl_pathc == oldpathc &&
204 	    ((flags & GLOB_NOCHECK) ||
205 	     ((flags & GLOB_NOMAGIC) && !(pglob->gl_flags & GLOB_MAGCHAR)))) {
206 		if (!(flags & GLOB_QUOTE)) {
207 			Char *dp = compilebuf;
208 			const u_char *sp = compilepat;
209 			while (*dp++ = *sp++);
210 		}
211 		else {
212 			/*
213 			 * Copy pattern, interpreting quotes; this is slightly
214 			 * different than the interpretation of quotes above
215 			 * -- which should prevail?
216 			 */
217 			while (*compilepat != EOS) {
218 				if (*compilepat == QUOTE) {
219 					if (*++compilepat == EOS)
220 						--compilepat;
221 				}
222 				*compilebuf++ = (u_char)*compilepat++;
223 			}
224 			*compilebuf = EOS;
225 		}
226 		return(globextend(patbuf, pglob));
227 	} else if (!(flags & GLOB_NOSORT))
228 		qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
229 		    pglob->gl_pathc - oldpathc, sizeof(char *), compare);
230 	return(0);
231 }
232 
233 static int
234 compare(p, q)
235 	const void *p, *q;
236 {
237 	return(strcmp(*(char **)p, *(char **)q));
238 }
239 
240 static
241 glob1(pattern, pglob)
242 	Char *pattern;
243 	glob_t *pglob;
244 {
245 	Char pathbuf[MAXPATHLEN+1];
246 
247 	/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
248 	if (*pattern == EOS)
249 		return(0);
250 	return(glob2(pathbuf, pathbuf, pattern, pglob));
251 }
252 
253 /*
254  * The functions glob2 and glob3 are mutually recursive; there is one level
255  * of recursion for each segment in the pattern that contains one or more
256  * meta characters.
257  */
258 static
259 glob2(pathbuf, pathend, pattern, pglob)
260 	Char *pathbuf, *pathend, *pattern;
261 	glob_t *pglob;
262 {
263 	struct stat sb;
264 	Char *p, *q;
265 	int anymeta;
266 
267 	/*
268 	 * Loop over pattern segments until end of pattern or until
269 	 * segment with meta character found.
270 	 */
271 	for (anymeta = 0;;) {
272 		if (*pattern == EOS) {		/* End of pattern? */
273 			*pathend = EOS;
274 			if (g_lstat(pathbuf, &sb))
275 				return(0);
276 
277 			if (((pglob->gl_flags & GLOB_MARK) &&
278 			    pathend[-1] != SEP) && (S_ISDIR(sb.st_mode)
279 			    || (S_ISLNK(sb.st_mode) &&
280 			    (g_stat(pathbuf, &sb) == 0) &&
281 			    S_ISDIR(sb.st_mode)))) {
282 				*pathend++ = SEP;
283 				*pathend = EOS;
284 			}
285 			++pglob->gl_matchc;
286 			return(globextend(pathbuf, pglob));
287 		}
288 
289 		/* Find end of next segment, copy tentatively to pathend. */
290 		q = pathend;
291 		p = pattern;
292 		while (*p != EOS && *p != SEP) {
293 			if (ismeta(*p))
294 				anymeta = 1;
295 			*q++ = *p++;
296 		}
297 
298 		if (!anymeta) {		/* No expansion, do next segment. */
299 			pathend = q;
300 			pattern = p;
301 			while (*pattern == SEP)
302 				*pathend++ = *pattern++;
303 		} else			/* Need expansion, recurse. */
304 			return(glob3(pathbuf, pathend, pattern, p, pglob));
305 	}
306 	/* NOTREACHED */
307 }
308 
309 static
310 glob3(pathbuf, pathend, pattern, restpattern, pglob)
311 	Char *pathbuf, *pathend, *pattern, *restpattern;
312 	glob_t *pglob;
313 {
314 	register struct dirent *dp;
315 	DIR *dirp;
316 	int len, err;
317 
318 	*pathend = EOS;
319 	errno = 0;
320 
321 	if (!(dirp = g_opendir(pathbuf)))
322 		/* TODO: don't call for ENOENT or ENOTDIR? */
323 		if (pglob->gl_errfunc &&
324 		    (*pglob->gl_errfunc)(pathbuf, errno) ||
325 		    (pglob->gl_flags & GLOB_ERR))
326 			return(GLOB_ABEND);
327 		else
328 			return(0);
329 
330 	err = 0;
331 
332 	/* Search directory for matching names. */
333 	while ((dp = readdir(dirp))) {
334 		register u_char *sc;
335 		register Char *dc;
336 
337 		/* Initial DOT must be matched literally. */
338 		if (dp->d_name[0] == DOT && *pattern != DOT)
339 			continue;
340 		for (sc = (u_char *) dp->d_name, dc = pathend;
341 		     *dc++ = *sc++;);
342 		if (!match(pathend, pattern, restpattern)) {
343 			*pathend = EOS;
344 			continue;
345 		}
346 		err = glob2(pathbuf, --dc, restpattern, pglob);
347 		if (err)
348 			break;
349 	}
350 
351 	/* TODO: check error from readdir? */
352 	(void)closedir(dirp);
353 	return(err);
354 }
355 
356 
357 /*
358  * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
359  * add the new item, and update gl_pathc.
360  *
361  * This assumes the BSD realloc, which only copies the block when its size
362  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
363  * behavior.
364  *
365  * Return 0 if new item added, error code if memory couldn't be allocated.
366  *
367  * Invariant of the glob_t structure:
368  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
369  *	gl_pathv points to (gl_offs + gl_pathc + 1) items.
370  */
371 static int
372 globextend(path, pglob)
373 	Char *path;
374 	glob_t *pglob;
375 {
376 	register char **pathv;
377 	register int i;
378 	u_int newsize;
379 	char *copy;
380 	Char *p;
381 
382 	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
383 	pathv = (char **)realloc((char *)pglob->gl_pathv, newsize);
384 	if (pathv == NULL)
385 		return(GLOB_NOSPACE);
386 
387 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
388 		/* first time around -- clear initial gl_offs items */
389 		pathv += pglob->gl_offs;
390 		for (i = pglob->gl_offs; --i >= 0; )
391 			*--pathv = NULL;
392 	}
393 	pglob->gl_pathv = pathv;
394 
395 	for (p = path; *p++;);
396 	if ((copy = malloc(p - path)) != NULL) {
397 		g_Ctoc(path, copy);
398 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
399 	}
400 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
401 	return(copy == NULL ? GLOB_NOSPACE : 0);
402 }
403 
404 
405 /*
406  * pattern matching function for filenames.  Each occurrence of the *
407  * pattern causes a recursion level.
408  */
409 static
410 match(name, pat, patend)
411 	register Char *name, *pat, *patend;
412 {
413 	int ok, negate_range;
414 	Char c, k;
415 
416 	while (pat < patend) {
417 		c = *pat++;
418 		switch (c & M_MASK) {
419 		case M_ALL:
420 			if (pat == patend)
421 				return(1);
422 			do
423 			    if (match(name, pat, patend))
424 				    return(1);
425 			while (*name++ != EOS);
426 			return(0);
427 		case M_ONE:
428 			if (*name++ == EOS)
429 				return(0);
430 			break;
431 		case M_SET:
432 			ok = 0;
433 			if ((k = *name++) == EOS)
434 				return(0);
435 			if (negate_range = ((*pat & M_MASK) == M_NOT))
436 				++pat;
437 			while (((c = *pat++) & M_MASK) != M_END)
438 				if ((*pat & M_MASK) == M_RNG) {
439 					if (c <= k && k <= pat[1])
440 						ok = 1;
441 					pat += 2;
442 				} else if (c == k)
443 					ok = 1;
444 			if (ok == negate_range)
445 				return(0);
446 			break;
447 		default:
448 			if (*name++ != c)
449 				return(0);
450 			break;
451 		}
452 	}
453 	return(*name == EOS);
454 }
455 
456 /* Free allocated data belonging to a glob_t structure. */
457 void
458 globfree(pglob)
459 	glob_t *pglob;
460 {
461 	register int i;
462 	register char **pp;
463 
464 	if (pglob->gl_pathv != NULL) {
465 		pp = pglob->gl_pathv + pglob->gl_offs;
466 		for (i = pglob->gl_pathc; i--; ++pp)
467 			if (*pp)
468 				free(*pp);
469 		free(pglob->gl_pathv);
470 	}
471 }
472 
473 static DIR *
474 g_opendir(str)
475 	register Char *str;
476 {
477 	char buf[MAXPATHLEN];
478 
479 	if (!*str)
480 		return(opendir("."));
481 	g_Ctoc(str, buf);
482 	return(opendir(buf));
483 }
484 
485 static int
486 g_lstat(fn, sb)
487 	register Char *fn;
488 	struct stat *sb;
489 {
490 	char buf[MAXPATHLEN];
491 
492 	g_Ctoc(fn, buf);
493 	return(lstat(buf, sb));
494 }
495 
496 static int
497 g_stat(fn, sb)
498 	register Char *fn;
499 	struct stat *sb;
500 {
501 	char buf[MAXPATHLEN];
502 
503 	g_Ctoc(fn, buf);
504 	return(stat(buf, sb));
505 }
506 
507 static Char *
508 g_strchr(str, ch)
509 	Char *str;
510 	int ch;
511 {
512 	do {
513 		if (*str == ch)
514 			return (str);
515 	} while (*str++);
516 	return (NULL);
517 }
518 
519 static void
520 g_Ctoc(str, buf)
521 	register Char *str;
522 	char *buf;
523 {
524 	register char *dc;
525 
526 	for (dc = buf; *dc++ = *str++;);
527 }
528 
529 #ifdef DEBUG
530 static void
531 qprintf(s)
532 	register Char *s;
533 {
534 	register Char *p;
535 
536 	for (p = s; *p; p++)
537 		(void)printf("%c", *p & 0xff);
538 	(void)printf("\n");
539 	for (p = s; *p; p++)
540 		(void)printf("%c", *p & M_PROTECT ? '"' : ' ');
541 	(void)printf("\n");
542 	for (p = s; *p; p++)
543 		(void)printf("%c", *p & M_META ? '_' : ' ');
544 	(void)printf("\n");
545 }
546 #endif
547