xref: /original-bsd/lib/libc/gen/glob.c (revision d54be081)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Guido van Rossum.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #if defined(LIBC_SCCS) && !defined(lint)
12 static char sccsid[] = "@(#)glob.c	5.12 (Berkeley) 06/24/91";
13 #endif /* LIBC_SCCS and not lint */
14 
15 /*
16  * glob(3) -- a superset of the one defined in POSIX 1003.2.
17  *
18  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
19  *
20  * Optional extra services, controlled by flags not defined by POSIX:
21  *
22  * GLOB_QUOTE:
23  *	Escaping convention: \ inhibits any special meaning the following
24  *	character might have (except \ at end of string is retained).
25  * GLOB_MAGCHAR:
26  *	Set in gl_flags if pattern contained a globbing character.
27  * gl_matchc:
28  *	Number of matches in the current invocation of glob.
29  */
30 
31 #include <sys/cdefs.h>
32 #include <sys/param.h>
33 #include <sys/stat.h>
34 #include <dirent.h>
35 #include <glob.h>
36 #include <ctype.h>
37 #include <errno.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 
42 #define	DOLLAR		'$'
43 #define	DOT		'.'
44 #define	EOS		'\0'
45 #define	LBRACKET	'['
46 #define	NOT		'!'
47 #define	QUESTION	'?'
48 #define	QUOTE		'\\'
49 #define	RANGE		'-'
50 #define	RBRACKET	']'
51 #define	SEP		'/'
52 #define	STAR		'*'
53 #define	TILDE		'~'
54 #define	UNDERSCORE	'_'
55 
56 #define	M_QUOTE		0x8000
57 #define	M_PROTECT	0x4000
58 #define	M_MASK		0xffff
59 #define	M_ASCII		0x00ff
60 
61 #define	CHAR(c)		((c)&M_ASCII)
62 #define	META(c)		((c)|M_QUOTE)
63 #define	M_ALL		META('*')
64 #define	M_END		META(']')
65 #define	M_NOT		META('!')
66 #define	M_ONE		META('?')
67 #define	M_RNG		META('-')
68 #define	M_SET		META('[')
69 #define	ismeta(c)	(((c)&M_QUOTE) != 0)
70 
71 typedef u_short Char;
72 
73 static int	 compare __P((const void *, const void *));
74 static void	 g_Ctoc __P((Char *, char *));
75 static int	 g_lstat __P((Char *, struct stat *));
76 static DIR	*g_opendir __P((Char *));
77 static Char	*g_strchr __P((Char *, int));
78 static int	 g_stat __P((Char *, struct stat *));
79 static int	 glob1 __P((Char *, glob_t *));
80 static int	 glob2 __P((Char *, Char *, Char *, glob_t *));
81 static int	 glob3 __P((Char *, Char *, Char *, Char *, glob_t *));
82 static int	 globextend __P((Char *, glob_t *));
83 static int	 match __P((Char *, Char *, Char *));
84 #ifdef DEBUG
85 static void	 qprintf __P((Char *));
86 #endif
87 
88 /*
89  * The main glob() routine: compiles the pattern (optionally processing
90  * quotes), calls glob1() to do the real pattern matching, and finally
91  * sorts the list (unless unsorted operation is requested).  Returns 0
92  * if things went well, nonzero if errors occurred.  It is not an error
93  * to find no matches.
94  */
95 glob(pattern, flags, errfunc, pglob)
96 	const char *pattern;
97 	int flags, (*errfunc) __P((char *, int));
98 	glob_t *pglob;
99 {
100 	const u_char *compilepat, *patnext;
101 	int c, err, oldpathc;
102 	Char *bufnext, *bufend, *compilebuf, *qpatnext, patbuf[MAXPATHLEN+1];
103 
104 	patnext = (u_char *) pattern;
105 	if (!(flags & GLOB_APPEND)) {
106 		pglob->gl_pathc = 0;
107 		pglob->gl_pathv = NULL;
108 		if (!(flags & GLOB_DOOFFS))
109 			pglob->gl_offs = 0;
110 	}
111 	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
112 	pglob->gl_errfunc = errfunc;
113 	oldpathc = pglob->gl_pathc;
114 	pglob->gl_matchc = 0;
115 
116 	bufnext = patbuf;
117 	bufend = bufnext + MAXPATHLEN;
118 	compilebuf = bufnext;
119 	compilepat = patnext;
120 	if (flags & GLOB_QUOTE) {
121 		/* Protect the quoted characters. */
122 		while (bufnext < bufend && (c = *patnext++) != EOS)
123 			if (c == QUOTE) {
124 				if ((c = *patnext++) == EOS) {
125 					c = QUOTE;
126 					--patnext;
127 				}
128 				*bufnext++ = c | M_PROTECT;
129 			}
130 			else
131 				*bufnext++ = c;
132 	}
133 	else
134 	    while (bufnext < bufend && (c = *patnext++) != EOS)
135 		    *bufnext++ = c;
136 	*bufnext = EOS;
137 
138 	bufnext = patbuf;
139 	qpatnext = patbuf;
140 	/* We don't need to check for buffer overflow any more. */
141 	while ((c = *qpatnext++) != EOS) {
142 		switch (c) {
143 		case LBRACKET:
144 			pglob->gl_flags |= GLOB_MAGCHAR;
145 			c = *qpatnext;
146 			if (c == NOT)
147 				++qpatnext;
148 			if (*qpatnext == EOS ||
149 			    g_strchr(qpatnext+1, RBRACKET) == NULL) {
150 				*bufnext++ = LBRACKET;
151 				if (c == NOT)
152 					--qpatnext;
153 				break;
154 			}
155 			*bufnext++ = M_SET;
156 			if (c == NOT)
157 				*bufnext++ = M_NOT;
158 			c = *qpatnext++;
159 			do {
160 				*bufnext++ = CHAR(c);
161 				if (*qpatnext == RANGE &&
162 				    (c = qpatnext[1]) != RBRACKET) {
163 					*bufnext++ = M_RNG;
164 					*bufnext++ = CHAR(c);
165 					qpatnext += 2;
166 				}
167 			} while ((c = *qpatnext++) != RBRACKET);
168 			*bufnext++ = M_END;
169 			break;
170 		case QUESTION:
171 			pglob->gl_flags |= GLOB_MAGCHAR;
172 			*bufnext++ = M_ONE;
173 			break;
174 		case STAR:
175 			pglob->gl_flags |= GLOB_MAGCHAR;
176 			*bufnext++ = M_ALL;
177 			break;
178 		default:
179 			*bufnext++ = CHAR(c);
180 			break;
181 		}
182 	}
183 	*bufnext = EOS;
184 #ifdef DEBUG
185 	qprintf(patbuf);
186 #endif
187 
188 	if ((err = glob1(patbuf, pglob)) != 0)
189 		return(err);
190 
191 	if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) {
192 		if (!(flags & GLOB_QUOTE)) {
193 			Char *dp = compilebuf;
194 			const u_char *sp = compilepat;
195 			while (*dp++ = *sp++);
196 		}
197 		else {
198 			/*
199 			 * Copy pattern, interpreting quotes; this is slightly
200 			 * different than the interpretation of quotes above
201 			 * -- which should prevail?
202 			 */
203 			while (*compilepat != EOS) {
204 				if (*compilepat == QUOTE) {
205 					if (*++compilepat == EOS)
206 						--compilepat;
207 				}
208 				*compilebuf++ = (u_char)*compilepat++;
209 			}
210 			*compilebuf = EOS;
211 		}
212 		return(globextend(patbuf, pglob));
213 	} else if (!(flags & GLOB_NOSORT))
214 		qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
215 		    pglob->gl_pathc - oldpathc, sizeof(char *), compare);
216 	return(0);
217 }
218 
219 static int
220 compare(p, q)
221 	const void *p, *q;
222 {
223 	return(strcmp(*(char **)p, *(char **)q));
224 }
225 
226 static
227 glob1(pattern, pglob)
228 	Char *pattern;
229 	glob_t *pglob;
230 {
231 	Char pathbuf[MAXPATHLEN+1];
232 
233 	/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
234 	if (*pattern == EOS)
235 		return(0);
236 	return(glob2(pathbuf, pathbuf, pattern, pglob));
237 }
238 
239 /*
240  * The functions glob2 and glob3 are mutually recursive; there is one level
241  * of recursion for each segment in the pattern that contains one or more
242  * meta characters.
243  */
244 static
245 glob2(pathbuf, pathend, pattern, pglob)
246 	Char *pathbuf, *pathend, *pattern;
247 	glob_t *pglob;
248 {
249 	struct stat sb;
250 	Char *p, *q;
251 	int anymeta;
252 
253 	/*
254 	 * Loop over pattern segments until end of pattern or until
255 	 * segment with meta character found.
256 	 */
257 	for (anymeta = 0;;) {
258 		if (*pattern == EOS) {		/* End of pattern? */
259 			*pathend = EOS;
260 			if (g_stat(pathbuf, &sb))
261 				return(0);
262 
263 			if (((pglob->gl_flags & GLOB_MARK) &&
264 			    pathend[-1] != SEP) && (S_ISDIR(sb.st_mode)
265 			    || (S_ISLNK(sb.st_mode) &&
266 			    (g_stat(pathbuf, &sb) == 0) &&
267 			    S_ISDIR(sb.st_mode)))) {
268 				*pathend++ = SEP;
269 				*pathend = EOS;
270 			}
271 			++pglob->gl_matchc;
272 			return(globextend(pathbuf, pglob));
273 		}
274 
275 		/* Find end of next segment, copy tentatively to pathend. */
276 		q = pathend;
277 		p = pattern;
278 		while (*p != EOS && *p != SEP) {
279 			if (ismeta(*p))
280 				anymeta = 1;
281 			*q++ = *p++;
282 		}
283 
284 		if (!anymeta) {		/* No expansion, do next segment. */
285 			pathend = q;
286 			pattern = p;
287 			while (*pattern == SEP)
288 				*pathend++ = *pattern++;
289 		} else			/* Need expansion, recurse. */
290 			return(glob3(pathbuf, pathend, pattern, p, pglob));
291 	}
292 	/* NOTREACHED */
293 }
294 
295 static
296 glob3(pathbuf, pathend, pattern, restpattern, pglob)
297 	Char *pathbuf, *pathend, *pattern, *restpattern;
298 	glob_t *pglob;
299 {
300 	register struct dirent *dp;
301 	DIR *dirp;
302 	int len, err;
303 
304 	*pathend = EOS;
305 	errno = 0;
306 
307 	if (!(dirp = g_opendir(pathbuf)))
308 		/* TODO: don't call for ENOENT or ENOTDIR? */
309 		if (pglob->gl_errfunc &&
310 		    (*pglob->gl_errfunc)(pathbuf, errno) ||
311 		    (pglob->gl_flags & GLOB_ERR))
312 			return(GLOB_ABEND);
313 		else
314 			return(0);
315 
316 	err = 0;
317 
318 	/* Search directory for matching names. */
319 	while ((dp = readdir(dirp))) {
320 		register u_char *sc;
321 		register Char *dc;
322 
323 		/* Initial DOT must be matched literally. */
324 		if (dp->d_name[0] == DOT && *pattern != DOT)
325 			continue;
326 		for (sc = (u_char *) dp->d_name, dc = pathend;
327 		     *dc++ = *sc++;);
328 		if (!match(pathend, pattern, restpattern)) {
329 			*pathend = EOS;
330 			continue;
331 		}
332 		err = glob2(pathbuf, --dc, restpattern, pglob);
333 		if (err)
334 			break;
335 	}
336 
337 	/* TODO: check error from readdir? */
338 	(void)closedir(dirp);
339 	return(err);
340 }
341 
342 
343 /*
344  * Extend the gl_pathv member of a glob_t structure to accomodate a new item,
345  * add the new item, and update gl_pathc.
346  *
347  * This assumes the BSD realloc, which only copies the block when its size
348  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
349  * behavior.
350  *
351  * Return 0 if new item added, error code if memory couldn't be allocated.
352  *
353  * Invariant of the glob_t structure:
354  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
355  *	gl_pathv points to (gl_offs + gl_pathc + 1) items.
356  */
357 static int
358 globextend(path, pglob)
359 	Char *path;
360 	glob_t *pglob;
361 {
362 	register char **pathv;
363 	register int i;
364 	u_int newsize;
365 	char *copy;
366 	Char *p;
367 
368 	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
369 	pathv = (char **)realloc((char *)pglob->gl_pathv, newsize);
370 	if (pathv == NULL)
371 		return(GLOB_NOSPACE);
372 
373 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
374 		/* first time around -- clear initial gl_offs items */
375 		pathv += pglob->gl_offs;
376 		for (i = pglob->gl_offs; --i >= 0; )
377 			*--pathv = NULL;
378 	}
379 	pglob->gl_pathv = pathv;
380 
381 	for (p = path; *p++;);
382 	if ((copy = malloc(p - path)) != NULL) {
383 		g_Ctoc(path, copy);
384 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
385 	}
386 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
387 	return(copy == NULL ? GLOB_NOSPACE : 0);
388 }
389 
390 
391 /*
392  * pattern matching function for filenames.  Each occurrence of the *
393  * pattern causes a recursion level.
394  */
395 static
396 match(name, pat, patend)
397 	register Char *name, *pat, *patend;
398 {
399 	int ok, negate_range;
400 	Char c, k;
401 
402 	while (pat < patend) {
403 		c = *pat++;
404 		switch (c & M_MASK) {
405 		case M_ALL:
406 			if (pat == patend)
407 				return(1);
408 			for (; *name != EOS; ++name)
409 				if (match(name, pat, patend))
410 					return(1);
411 			return(0);
412 		case M_ONE:
413 			if (*name++ == EOS)
414 				return(0);
415 			break;
416 		case M_SET:
417 			ok = 0;
418 			k = *name++;
419 			if (negate_range = ((*pat & M_MASK) == M_NOT))
420 				++pat;
421 			while (((c = *pat++) & M_MASK) != M_END)
422 				if ((*pat & M_MASK) == M_RNG) {
423 					if (c <= k && k <= pat[1])
424 						ok = 1;
425 					pat += 2;
426 				} else if (c == k)
427 					ok = 1;
428 			if (ok == negate_range)
429 				return(0);
430 			break;
431 		default:
432 			if (*name++ != c)
433 				return(0);
434 			break;
435 		}
436 	}
437 	return(*name == EOS);
438 }
439 
440 /* Free allocated data belonging to a glob_t structure. */
441 void
442 globfree(pglob)
443 	glob_t *pglob;
444 {
445 	register int i;
446 	register char **pp;
447 
448 	if (pglob->gl_pathv != NULL) {
449 		pp = pglob->gl_pathv + pglob->gl_offs;
450 		for (i = pglob->gl_pathc; i--; ++pp)
451 			if (*pp)
452 				free(*pp);
453 		free(pglob->gl_pathv);
454 	}
455 }
456 
457 static DIR *
458 g_opendir(str)
459 	register Char *str;
460 {
461 	char buf[MAXPATHLEN];
462 
463 	if (!*str)
464 		return(opendir("."));
465 	g_Ctoc(str, buf);
466 	return(opendir(buf));
467 }
468 
469 static int
470 g_lstat(fn, sb)
471 	register Char *fn;
472 	struct stat *sb;
473 {
474 	char buf[MAXPATHLEN];
475 
476 	g_Ctoc(fn, buf);
477 	return(lstat(buf, sb));
478 }
479 
480 static int
481 g_stat(fn, sb)
482 	register Char *fn;
483 	struct stat *sb;
484 {
485 	char buf[MAXPATHLEN];
486 
487 	g_Ctoc(fn, buf);
488 	return(stat(buf, sb));
489 }
490 
491 static Char *
492 g_strchr(str, ch)
493 	Char *str;
494 	int ch;
495 {
496 	do {
497 		if (*str == ch)
498 			return (str);
499 	} while (*str++);
500 	return (NULL);
501 }
502 
503 static void
504 g_Ctoc(str, buf)
505 	register Char *str;
506 	char *buf;
507 {
508 	register char *dc;
509 
510 	for (dc = buf; *dc++ = *str++;);
511 }
512 
513 #ifdef DEBUG
514 static void
515 qprintf(s)
516 	register Char *s;
517 {
518 	register Char *p;
519 
520 	for (p = s; *p; p++)
521 		(void)printf("%c", *p & 0xff);
522 	(void)printf("\n");
523 	for (p = s; *p; p++)
524 		(void)printf("%c", *p & M_PROTECT ? '"' : ' ');
525 	(void)printf("\n");
526 	for (p = s; *p; p++)
527 		(void)printf("%c", *p & M_META ? '_' : ' ');
528 	(void)printf("\n");
529 }
530 #endif
531