xref: /original-bsd/bin/csh/glob.c (revision 95a66346)
1 /*
2  * Copyright (c) 1980 Regents of the University of California.
3  * All rights reserved.  The Berkeley Software License Agreement
4  * specifies the terms and conditions for redistribution.
5  */
6 
7 #ifndef lint
8 static char *sccsid = "@(#)glob.c	5.11 (Berkeley) 03/20/91";
9 #endif
10 
11 #include "sh.h"
12 #include "glob.h"
13 
14 static int noglob, nonomatch;
15 
16 static int pargsiz, gargsiz;
17 /*
18  * Values for gflag
19  */
20 #define G_NONE	0	/* No globbing needed			*/
21 #define G_GLOB	1	/* string contains *?[] characters	*/
22 #define G_CSH	2	/* string contains ~`{ characters	*/
23 
24 #define LBRC '{'
25 #define RBRC '}'
26 #define LBRK '['
27 #define RBRK ']'
28 
29 #define EOS '\0'
30 char **gargv = (char **) 0;
31 short gargc = 0;
32 
33 /*
34  * globbing is now done in two stages. In the first pass we expand
35  * csh globbing idioms ~`{ and then we proceed doing the normal
36  * globbing if needed ?*[
37  *
38  * Csh type globbing is handled in globexpand() and the rest is
39  * handled in glob() which is part of the 4.4BSD libc. To do the
40  * 'No match' checking, we try to glob everything without checking
41  * and then we look if the string still contains globbing characters.
42  * If it does, then globbing failed for at least one component of the
43  * block.
44  *
45  */
46 
47 
48 static char *
49 globtilde(s)
50 char *s;
51 {
52 	char gbuf[MAXPATHLEN], *gstart, *b, *u;
53 
54 	gstart = gbuf;
55 	*gstart++ = *s++;
56 	for (u = s, b = gstart; alnum(*s) || *s == '-'; *b++ = *s++)
57 		 continue;
58 	*b = EOS;
59 	if (*s == EOS || *s == '/') {
60 		if (s == u)
61 			gstart = strcpy(gbuf, value("home"));
62 		else if (gethdir(gstart))
63 			error("Unknown user: %s", gstart);
64 		b = &gstart[strlen(gstart)];
65 	}
66 	while (*s) *b++ = *s++;
67 	*b = EOS;
68 	return(savestr(gstart));
69 }
70 
71 static int
72 globbrace(s, p, bl)
73 char *s, *p, ***bl;
74 {
75 	int i, len;
76 	char *pm, *pe, *lm, *pl;
77 	char **nv, **vl;
78 	char gbuf[MAXPATHLEN];
79 	int size = GAVSIZ;
80 
81 	nv = vl = (char **) xalloc(sizeof(char *) * size);
82 
83 	len = 0;
84 	/* copy part up to the brace */
85 	for (lm = gbuf, p = s; *p != LBRC; *lm++ = *p++)
86 		continue;
87 
88 	/* check for balanced braces */
89 	for (i = 0, pe = ++p; *pe; pe++)
90 		if (*pe == LBRK) {
91 			/* Ignore everything between [] */
92 			for (++pe; *pe != RBRK && *pe != EOS; pe++)
93 				continue;
94 			if (*pe == EOS) {
95 				blkfree(nv);
96 				return(- LBRK);
97 			}
98 		}
99 		else if (*pe == LBRC)
100 			i++;
101 		else if (*pe == RBRC) {
102 			if (i == 0)
103 				break;
104 			i--;
105 		}
106 
107 	if (i != 0) {
108 		blkfree(nv);
109 		return(- LBRC);
110 	}
111 
112 	for (i = 0, pl = pm = p; pm <= pe; pm++)
113 	switch (*pm) {
114 	case LBRK:
115 		for (++pm; *pm != RBRK && *pm != EOS; pm++)
116 			continue;
117 		if (*pm == EOS) {
118 			blkfree(nv);
119 			return(- RBRK);
120 		}
121 		break;
122 	case LBRC:
123 		i++;
124 		break;
125 	case RBRC:
126 		if (i) {
127 			i--;
128 			break;
129 		}
130 		/*FALLTHROUGH*/
131 	case ',':
132 		if (i && *pm == ',')
133 			break;
134 		else {
135 			char savec = *pm;
136 			*pm = EOS;
137 			(void) strcpy(lm, pl);
138 			(void) strcat(gbuf, pe + 1);
139 			*pm = savec;
140 			*vl++ = savestr(gbuf);
141 			len++;
142 			pl = pm + 1;
143 			if (vl == &nv[size]) {
144 				size += GAVSIZ;
145 				nv = (char **) xrealloc(nv,
146 							size * sizeof(char *));
147 				vl = &nv[size - GAVSIZ];
148 			}
149 		}
150 		break;
151 	}
152 	*vl = (char *) 0;
153 	*bl = nv;
154 	return(len);
155 }
156 
157 static char **
158 globexpand(v)
159 	char **v;
160 {
161 	char *s;
162 	char **nv, **vl, **el;
163 	int size = GAVSIZ;
164 
165 
166 	nv = vl = (char **) xalloc(sizeof(char *) * size);
167 	*vl = (char *) 0;
168 
169 	/*
170 	 * Step 1: expand backquotes.
171 	 */
172 	while (s = *v++) {
173 		if (index(s, '`')) {
174 			int i;
175 
176 			dobackp(s, 0);
177 			for (i = 0; i < pargc; i++) {
178 				*vl++ = pargv[i];
179 				if (vl == &nv[size]) {
180 					size += GAVSIZ;
181 					nv = (char **) xrealloc(nv,
182 							size * sizeof(char *));
183 					vl = &nv[size - GAVSIZ];
184 				}
185 			}
186 			xfree((char *) pargv);
187 			pargv = (char **) 0;
188 		}
189 		else {
190 			*vl++ = savestr(s);
191 			if (vl == &nv[size]) {
192 				size += GAVSIZ;
193 				nv = (char **) xrealloc(nv,
194 							size * sizeof(char *));
195 				vl = &nv[size - GAVSIZ];
196 			}
197 		}
198 	}
199 	*vl = (char *) 0;
200 
201 	if (noglob)
202 		return(nv);
203 
204 	/*
205 	 * Step 2: expand tilde and braces
206 	 */
207 	el = vl;
208 	vl = nv;
209 	for (s = *vl; s; s = *++vl) {
210 		char *b;
211 		char **vp, **bp;
212 		if (*s == '~') {
213 			*vl = globtilde(s);
214 			xfree(s);
215 		}
216 		if (b = index(s, LBRC)) {
217 			char **bl;
218 			int i, len;
219 			if ((len = globbrace(s, b, &bl)) < 0) {
220 				blkfree(nv);
221 				error("Missing %c", -len);
222 			}
223 			xfree(s);
224 			if (len == 1) {
225 				*vl-- = *bl;
226 				xfree((char *) bl);
227 				continue;
228 			}
229 			len = blklen(bl);
230 			if (&el[len] >= &nv[size]) {
231 				int l, e;
232 				l = &el[len] - &nv[size];
233 				size += GAVSIZ > l ? GAVSIZ : l;
234 				l = vl - nv;
235 				e = el - nv;
236 				nv = (char **) xrealloc(nv,
237 							size * sizeof(char *));
238 				vl = nv + l;
239 				el = nv + e;
240 			}
241 			vp = vl--;
242 			*vp = *bl;
243 			len--;
244 			for (bp = el; bp != vp; bp--)
245 			     bp[len] = *bp;
246 			el += len;
247 			vp++;
248 			for (bp = bl + 1; *bp; *vp++ = *bp++)
249 				continue;
250 			xfree(bl);
251 		}
252 
253 	}
254 	vl = nv;
255 	return(vl);
256 }
257 
258 char *
259 globone(str)
260 	char *str;
261 {
262 
263 	char *v[2];
264 	char *nstr;
265 
266 	noglob = adrof("noglob") != 0;
267 	gflag = 0;
268 	v[0] = str;
269 	v[1] = 0;
270 	tglob(v);
271 	if (gflag == G_NONE)
272 		return (strip(savestr(str)));
273 
274 	if (gflag & G_CSH) {
275 		char **vl;
276 
277 		/*
278 		 * Expand back-quote, tilde and brace
279 		 */
280 		vl = globexpand(v);
281 		if (vl[1] != (char *) 0) {
282 			blkfree(vl);
283 			setname(str);
284 			bferr("Ambiguous");
285 			/*NOTREACHED*/
286 		}
287 		nstr = vl[0];
288 		xfree((char *) vl);
289 	}
290 	else
291 		nstr = str;
292 
293 	if (!noglob && (gflag & G_GLOB)) {
294 		glob_t globv;
295 
296 		globv.gl_offs = 0;
297 		globv.gl_pathv = 0;
298 		nonomatch = adrof("nonomatch") != 0;
299 		glob(nstr, nonomatch ? GLOB_NOCHECK : 0, 0, &globv);
300 		if (gflag & G_CSH)
301 			xfree(nstr);
302 		switch (globv.gl_pathc) {
303 		case 0:
304 			setname(str);
305 			globfree(&globv);
306 			bferr("No match");
307 			/*NOTREACHED*/
308 		case 1:
309 			str = strip(savestr(globv.gl_pathv[0]));
310 			globfree(&globv);
311 			return(str);
312 		default:
313 			setname(str);
314 			globfree(&globv);
315 			bferr("Ambiguous");
316 			/*NOTREACHED*/
317 		}
318 	}
319 	return(nstr ? strip(nstr) : (char *) 0);
320 }
321 
322 char **
323 globall(v)
324 	char **v;
325 {
326 	char *c, **vl, **vo;
327 
328 	if (!v || !v[0]) {
329 		gargv = saveblk(v);
330 		gargc = blklen(gargv);
331 		return (gargv);
332 	}
333 
334 	noglob = adrof("noglob") != 0;
335 	nonomatch = adrof("nonomatch") != 0;
336 
337 	if (gflag & G_CSH)
338 		/*
339 		 * Expand back-quote, tilde and brace
340 		 */
341 		vl = vo = globexpand(v);
342 	else
343 		vl = vo = saveblk(v);
344 
345 	if (!noglob && (gflag & G_GLOB)) {
346 		/*
347 		 * Glob the strings in vl using the glob routine
348 		 * from libc
349 		 */
350 		int gappend = 0;
351 		glob_t globv;
352 
353 		globv.gl_offs = 0;
354 		globv.gl_pathv = 0;
355 		gargc = 0;
356 		do {
357 			glob(*vl, gappend | GLOB_NOCHECK, 0, &globv);
358 			if (!nonomatch && (globv.gl_matchc == 0) &&
359 			    (globv.gl_flags & GLOB_MAGCHAR)) {
360 				if (gflag & G_CSH)
361 					blkfree(vo);
362 				globfree(&globv);
363 				gargc = 0;
364 				return(gargv = (char **) 0);
365 			}
366 			gappend = GLOB_APPEND;
367 		}
368 		while (*++vl);
369 
370 		if (gflag & G_CSH)
371 			blkfree(vo);
372 		if (globv.gl_pathc)
373 			vl = saveblk(globv.gl_pathv);
374 		else
375 			vl = 0;
376 		globfree(&globv);
377 	}
378 
379 	gargc = vl ? blklen(vl) : 0;
380 	return(gargv = vl);
381 }
382 
383 ginit()
384 {
385 	gargsiz = GAVSIZ;
386 	gargv = (char **) xalloc(sizeof(char *) * gargsiz);
387 	gargv[0] = 0;
388 	gargc = 0;
389 }
390 
391 rscan(t, f)
392 	register char **t;
393 	int (*f)();
394 {
395 	register char *p;
396 
397 	while (p = *t++)
398 		while (*p)
399 			(*f)(*p++);
400 }
401 
402 trim(t)
403 	register char **t;
404 {
405 	register char *p;
406 
407 	while (p = *t++)
408 		while (*p)
409 			*p++ &= TRIM;
410 }
411 
412 tglob(t)
413 	register char **t;
414 {
415 	register char *p, c;
416 
417 	while (p = *t++) {
418 		if (*p == '~')
419 			gflag |= G_CSH;
420 		else if (*p == '{' &&
421 		    (p[1] == '\0' || p[1] == '}' && p[2] == '\0'))
422 			continue;
423 		while (c = *p++)
424 			if (isglob(c))
425 			    gflag |= (c == '{' || c == '`') ? G_CSH : G_GLOB;
426 	}
427 }
428 
429 /*
430  * Command substitute cp.  If literal, then this is a substitution from a
431  * << redirection, and so we should not crunch blanks and tabs, separating
432  * words only at newlines.
433  */
434 char **
435 dobackp(cp, literal)
436 	char *cp;
437 	bool literal;
438 {
439 	register char *lp, *rp;
440 	char *ep, word[MAXPATHLEN];
441 
442 	if (pargv) {
443 		abort();
444 		blkfree(pargv);
445 	}
446 	pargsiz = GAVSIZ;
447 	pargv = (char **) xalloc(sizeof(char *) * pargsiz);
448 	pargv[0] = NOSTR;
449 	pargcp = pargs = word;
450 	pargc = 0;
451 	pnleft = MAXPATHLEN - 4;
452 	for (;;) {
453 		for (lp = cp; *lp != '`'; lp++) {
454 			if (*lp == 0) {
455 				if (pargcp != pargs)
456 					pword();
457 				return (pargv);
458 			}
459 			psave(*lp);
460 		}
461 		lp++;
462 		for (rp = lp; *rp && *rp != '`'; rp++)
463 			if (*rp == '\\') {
464 				rp++;
465 				if (!*rp)
466 					goto oops;
467 			}
468 		if (!*rp)
469 oops:			error("Unmatched `");
470 		ep = savestr(lp);
471 		ep[rp - lp] = 0;
472 		backeval(ep, literal);
473 		cp = rp + 1;
474 	}
475 }
476 
477 backeval(cp, literal)
478 	char *cp;
479 	bool literal;
480 {
481 	register int icnt, c;
482 	register char *ip;
483 	struct command faket;
484 	bool hadnl;
485 	int pvec[2], quoted;
486 	char *fakecom[2], ibuf[BUFSIZ];
487 
488 	hadnl = 0;
489 	icnt = 0;
490 	quoted = (literal || (cp[0] & QUOTE)) ? QUOTE : 0;
491 	faket.t_dtyp = TCOM;
492 	faket.t_dflg = 0;
493 	faket.t_dlef = 0;
494 	faket.t_drit = 0;
495 	faket.t_dspr = 0;
496 	faket.t_dcom = fakecom;
497 	fakecom[0] = "` ... `";
498 	fakecom[1] = 0;
499 
500 	/*
501 	 * We do the psave job to temporarily change the current job so that
502 	 * the following fork is considered a separate job.  This is so that
503 	 * when backquotes are used in a builtin function that calls glob the
504 	 * "current job" is not corrupted.  We only need one level of pushed
505 	 * jobs as long as we are sure to fork here.
506 	 */
507 	psavejob();
508 
509 	/*
510 	 * It would be nicer if we could integrate this redirection more with
511 	 * the routines in sh.sem.c by doing a fake execute on a builtin
512 	 * function that was piped out.
513 	 */
514 	mypipe(pvec);
515 	if (pfork(&faket, -1) == 0) {
516 		struct wordent paraml;
517 		struct command *t;
518 
519 		(void) close(pvec[0]);
520 		(void) dmove(pvec[1], 1);
521 		(void) dmove(SHDIAG, 2);
522 		initdesc();
523 		arginp = cp;
524 		while (*cp)
525 			*cp++ &= TRIM;
526 		(void) lex(&paraml);
527 		if (err)
528 			error(err);
529 		alias(&paraml);
530 		t = syntax(paraml.next, &paraml, 0);
531 		if (err)
532 			error(err);
533 		if (t)
534 			t->t_dflg |= FPAR;
535 		(void) signal(SIGTSTP, SIG_IGN);
536 		(void) signal(SIGTTIN, SIG_IGN);
537 		(void) signal(SIGTTOU, SIG_IGN);
538 		execute(t, -1);
539 		exitstat();
540 	}
541 	xfree(cp);
542 	(void) close(pvec[1]);
543 	do {
544 		int cnt = 0;
545 		for (;;) {
546 			if (icnt == 0) {
547 				ip = ibuf;
548 				icnt = read(pvec[0], ip, BUFSIZ);
549 				if (icnt <= 0) {
550 					c = -1;
551 					break;
552 				}
553 			}
554 			if (hadnl)
555 				break;
556 			--icnt;
557 			c = (*ip++ & TRIM);
558 			if (c == 0)
559 				break;
560 			if (c == '\n') {
561 				/*
562 				 * Continue around the loop one more time, so
563 				 * that we can eat the last newline without
564 				 * terminating this word.
565 				 */
566 				hadnl = 1;
567 				continue;
568 			}
569 			if (!quoted && (c == ' ' || c == '\t'))
570 				break;
571 			cnt++;
572 			psave(c | quoted);
573 		}
574 		/*
575 		 * Unless at end-of-file, we will form a new word here if there
576 		 * were characters in the word, or in any case when we take
577 		 * text literally.  If we didn't make empty words here when
578 		 * literal was set then we would lose blank lines.
579 		 */
580 		if (c != -1 && (cnt || literal))
581 			pword();
582 		hadnl = 0;
583 	} while (c >= 0);
584 	(void) close(pvec[0]);
585 	pwait();
586 	prestjob();
587 }
588 
589 psave(c)
590 	char c;
591 {
592 	if (--pnleft <= 0)
593 		error("Word too long");
594 	*pargcp++ = c;
595 }
596 
597 pword()
598 {
599 	psave(0);
600 	if (pargc == pargsiz - 1) {
601 	    pargsiz += GAVSIZ;
602 	    pargv = (char **) xrealloc(pargv, pargsiz * sizeof(char *));
603 	}
604 	pargv[pargc++] = savestr(pargs);
605 	pargv[pargc] = NOSTR;
606 	pargcp = pargs;
607 	pnleft = MAXPATHLEN - 4;
608 }
609 
610 Gmatch(string, pattern)
611 	register char *string, *pattern;
612 {
613 	register char stringc, patternc;
614 	int match;
615 	char lastchar, rangec;
616 
617 	for (;; ++string) {
618 		stringc = *string & TRIM;
619 		switch (patternc = *pattern++) {
620 		case 0:
621 			return (stringc == 0);
622 		case '?':
623 			if (stringc == 0)
624 				return (0);
625 			break;
626 		case '*':
627 			if (!*pattern)
628 				return (1);
629 			while (*string)
630 				if (Gmatch(string++, pattern))
631 					return (1);
632 			return (0);
633 		case '[':
634 			lastchar = -1;
635 			match = 0;
636 			while (rangec = *pattern++) {
637 				if (rangec == ']')
638 					if (match)
639 						break;
640 					else
641 						return (0);
642 				if (match)
643 					continue;
644 				if (rangec == '-')
645 					match = (stringc <= *pattern++ &&
646 					    *(pattern-2) <= stringc);
647 				else {
648 					lastchar = rangec;
649 					match = (stringc == rangec);
650 				}
651 			}
652 			if (rangec == 0)
653 				bferr("Missing ]");
654 			break;
655 		default:
656 			if ((patternc & TRIM) != stringc)
657 				return (0);
658 			break;
659 
660 		}
661 	}
662 }
663 
664 Gcat(s1, s2)
665 	char *s1, *s2;
666 {
667 	register char *p, *q;
668 	int n;
669 
670 	for (p = s1; *p++;);
671 	for (q = s2; *q++;);
672 	n = (p - s1) + (q - s2) - 1;
673 	if (++gargc >= gargsiz) {
674 		gargsiz += GAVSIZ;
675 		gargv = (char **) xrealloc(gargv, gargsiz * sizeof(char *));
676 	}
677 	gargv[gargc] = 0;
678 	p = gargv[gargc - 1] = xalloc((unsigned)n);
679 	for (q = s1; *p++ = *q++;);
680 	for (p--, q = s2; *p++ = *q++;);
681 }
682 
683 int
684 sortscmp(a1, a2)
685 	char **a1, **a2;
686 {
687 	return(strcmp(*a1, *a2));
688 }
689