xref: /freebsd/usr.bin/xargs/xargs.c (revision d184218c)
1 /*-
2  * Copyright (c) 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * John B. Roll Jr.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * $xMach: xargs.c,v 1.6 2002/02/23 05:27:47 tim Exp $
33  */
34 
35 #if 0
36 #ifndef lint
37 static const char copyright[] =
38 "@(#) Copyright (c) 1990, 1993\n\
39 	The Regents of the University of California.  All rights reserved.\n";
40 #endif /* not lint */
41 
42 #ifndef lint
43 static char sccsid[] = "@(#)xargs.c	8.1 (Berkeley) 6/6/93";
44 #endif /* not lint */
45 #endif
46 #include <sys/cdefs.h>
47 __FBSDID("$FreeBSD$");
48 
49 #include <sys/param.h>
50 #include <sys/wait.h>
51 
52 #include <err.h>
53 #include <errno.h>
54 #include <fcntl.h>
55 #include <langinfo.h>
56 #include <locale.h>
57 #include <paths.h>
58 #include <regex.h>
59 #include <stdio.h>
60 #include <stdlib.h>
61 #include <string.h>
62 #include <unistd.h>
63 
64 #include "pathnames.h"
65 
66 static void	parse_input(int, char *[]);
67 static void	prerun(int, char *[]);
68 static int	prompt(void);
69 static void	run(char **);
70 static void	usage(void);
71 void		strnsubst(char **, const char *, const char *, size_t);
72 static pid_t	xwait(int block, int *status);
73 static void	xexit(const char *, const int);
74 static void	waitchildren(const char *, int);
75 static void	pids_init(void);
76 static int	pids_empty(void);
77 static int	pids_full(void);
78 static void	pids_add(pid_t pid);
79 static int	pids_remove(pid_t pid);
80 static int	findslot(pid_t pid);
81 static int	findfreeslot(void);
82 static void	clearslot(int slot);
83 
84 static char echo[] = _PATH_ECHO;
85 static char **av, **bxp, **ep, **endxp, **xp;
86 static char *argp, *bbp, *ebp, *inpline, *p, *replstr;
87 static const char *eofstr;
88 static int count, insingle, indouble, oflag, pflag, tflag, Rflag, rval, zflag;
89 static int cnt, Iflag, jfound, Lflag, Sflag, wasquoted, xflag;
90 static int curprocs, maxprocs;
91 static pid_t *childpids;
92 
93 static volatile int childerr;
94 
95 extern char **environ;
96 
97 int
98 main(int argc, char *argv[])
99 {
100 	long arg_max;
101 	int ch, Jflag, nargs, nflag, nline;
102 	size_t linelen;
103 	char *endptr;
104 
105 	inpline = replstr = NULL;
106 	ep = environ;
107 	eofstr = "";
108 	Jflag = nflag = 0;
109 
110 	(void)setlocale(LC_ALL, "");
111 
112 	/*
113 	 * POSIX.2 limits the exec line length to ARG_MAX - 2K.  Running that
114 	 * caused some E2BIG errors, so it was changed to ARG_MAX - 4K.  Given
115 	 * that the smallest argument is 2 bytes in length, this means that
116 	 * the number of arguments is limited to:
117 	 *
118 	 *	 (ARG_MAX - 4K - LENGTH(utility + arguments)) / 2.
119 	 *
120 	 * We arbitrarily limit the number of arguments to 5000.  This is
121 	 * allowed by POSIX.2 as long as the resulting minimum exec line is
122 	 * at least LINE_MAX.  Realloc'ing as necessary is possible, but
123 	 * probably not worthwhile.
124 	 */
125 	nargs = 5000;
126 	if ((arg_max = sysconf(_SC_ARG_MAX)) == -1)
127 		errx(1, "sysconf(_SC_ARG_MAX) failed");
128 	nline = arg_max - 4 * 1024;
129 	while (*ep != NULL) {
130 		/* 1 byte for each '\0' */
131 		nline -= strlen(*ep++) + 1 + sizeof(*ep);
132 	}
133 	maxprocs = 1;
134 	while ((ch = getopt(argc, argv, "0E:I:J:L:n:oP:pR:S:s:rtx")) != -1)
135 		switch (ch) {
136 		case 'E':
137 			eofstr = optarg;
138 			break;
139 		case 'I':
140 			Jflag = 0;
141 			Iflag = 1;
142 			Lflag = 1;
143 			replstr = optarg;
144 			break;
145 		case 'J':
146 			Iflag = 0;
147 			Jflag = 1;
148 			replstr = optarg;
149 			break;
150 		case 'L':
151 			Lflag = atoi(optarg);
152 			break;
153 		case 'n':
154 			nflag = 1;
155 			if ((nargs = atoi(optarg)) <= 0)
156 				errx(1, "illegal argument count");
157 			break;
158 		case 'o':
159 			oflag = 1;
160 			break;
161 		case 'P':
162 			if ((maxprocs = atoi(optarg)) <= 0)
163 				errx(1, "max. processes must be >0");
164 			break;
165 		case 'p':
166 			pflag = 1;
167 			break;
168 		case 'R':
169 			Rflag = strtol(optarg, &endptr, 10);
170 			if (*endptr != '\0')
171 				errx(1, "replacements must be a number");
172 			break;
173 		case 'r':
174 			/* GNU compatibility */
175 			break;
176 		case 'S':
177 			Sflag = strtoul(optarg, &endptr, 10);
178 			if (*endptr != '\0')
179 				errx(1, "replsize must be a number");
180 			break;
181 		case 's':
182 			nline = atoi(optarg);
183 			break;
184 		case 't':
185 			tflag = 1;
186 			break;
187 		case 'x':
188 			xflag = 1;
189 			break;
190 		case '0':
191 			zflag = 1;
192 			break;
193 		case '?':
194 		default:
195 			usage();
196 	}
197 	argc -= optind;
198 	argv += optind;
199 
200 	if (!Iflag && Rflag)
201 		usage();
202 	if (!Iflag && Sflag)
203 		usage();
204 	if (Iflag && !Rflag)
205 		Rflag = 5;
206 	if (Iflag && !Sflag)
207 		Sflag = 255;
208 	if (xflag && !nflag)
209 		usage();
210 	if (Iflag || Lflag)
211 		xflag = 1;
212 	if (replstr != NULL && *replstr == '\0')
213 		errx(1, "replstr may not be empty");
214 
215 	pids_init();
216 
217 	/*
218 	 * Allocate pointers for the utility name, the utility arguments,
219 	 * the maximum arguments to be read from stdin and the trailing
220 	 * NULL.
221 	 */
222 	linelen = 1 + argc + nargs + 1;
223 	if ((av = bxp = malloc(linelen * sizeof(char **))) == NULL)
224 		errx(1, "malloc failed");
225 
226 	/*
227 	 * Use the user's name for the utility as argv[0], just like the
228 	 * shell.  Echo is the default.  Set up pointers for the user's
229 	 * arguments.
230 	 */
231 	if (*argv == NULL)
232 		cnt = strlen(*bxp++ = echo);
233 	else {
234 		do {
235 			if (Jflag && strcmp(*argv, replstr) == 0) {
236 				char **avj;
237 				jfound = 1;
238 				argv++;
239 				for (avj = argv; *avj; avj++)
240 					cnt += strlen(*avj) + 1;
241 				break;
242 			}
243 			cnt += strlen(*bxp++ = *argv) + 1;
244 		} while (*++argv != NULL);
245 	}
246 
247 	/*
248 	 * Set up begin/end/traversing pointers into the array.  The -n
249 	 * count doesn't include the trailing NULL pointer, so the malloc
250 	 * added in an extra slot.
251 	 */
252 	endxp = (xp = bxp) + nargs;
253 
254 	/*
255 	 * Allocate buffer space for the arguments read from stdin and the
256 	 * trailing NULL.  Buffer space is defined as the default or specified
257 	 * space, minus the length of the utility name and arguments.  Set up
258 	 * begin/end/traversing pointers into the array.  The -s count does
259 	 * include the trailing NULL, so the malloc didn't add in an extra
260 	 * slot.
261 	 */
262 	nline -= cnt;
263 	if (nline <= 0)
264 		errx(1, "insufficient space for command");
265 
266 	if ((bbp = malloc((size_t)(nline + 1))) == NULL)
267 		errx(1, "malloc failed");
268 	ebp = (argp = p = bbp) + nline - 1;
269 	for (;;)
270 		parse_input(argc, argv);
271 }
272 
273 static void
274 parse_input(int argc, char *argv[])
275 {
276 	int ch, foundeof;
277 	char **avj;
278 
279 	foundeof = 0;
280 
281 	switch (ch = getchar()) {
282 	case EOF:
283 		/* No arguments since last exec. */
284 		if (p == bbp)
285 			xexit(*av, rval);
286 		goto arg1;
287 	case ' ':
288 	case '\t':
289 		/* Quotes escape tabs and spaces. */
290 		if (insingle || indouble || zflag)
291 			goto addch;
292 		goto arg2;
293 	case '\0':
294 		if (zflag) {
295 			/*
296 			 * Increment 'count', so that nulls will be treated
297 			 * as end-of-line, as well as end-of-argument.  This
298 			 * is needed so -0 works properly with -I and -L.
299 			 */
300 			count++;
301 			goto arg2;
302 		}
303 		goto addch;
304 	case '\n':
305 		if (zflag)
306 			goto addch;
307 		count++;	    /* Indicate end-of-line (used by -L) */
308 
309 		/* Quotes do not escape newlines. */
310 arg1:		if (insingle || indouble) {
311 			warnx("unterminated quote");
312 			xexit(*av, 1);
313 		}
314 arg2:
315 		foundeof = *eofstr != '\0' &&
316 		    strncmp(argp, eofstr, p - argp) == 0;
317 
318 		/* Do not make empty args unless they are quoted */
319 		if ((argp != p || wasquoted) && !foundeof) {
320 			*p++ = '\0';
321 			*xp++ = argp;
322 			if (Iflag) {
323 				size_t curlen;
324 
325 				if (inpline == NULL)
326 					curlen = 0;
327 				else {
328 					/*
329 					 * If this string is not zero
330 					 * length, append a space for
331 					 * separation before the next
332 					 * argument.
333 					 */
334 					if ((curlen = strlen(inpline)))
335 						strcat(inpline, " ");
336 				}
337 				curlen++;
338 				/*
339 				 * Allocate enough to hold what we will
340 				 * be holding in a second, and to append
341 				 * a space next time through, if we have
342 				 * to.
343 				 */
344 				inpline = realloc(inpline, curlen + 2 +
345 				    strlen(argp));
346 				if (inpline == NULL) {
347 					warnx("realloc failed");
348 					xexit(*av, 1);
349 				}
350 				if (curlen == 1)
351 					strcpy(inpline, argp);
352 				else
353 					strcat(inpline, argp);
354 			}
355 		}
356 
357 		/*
358 		 * If max'd out on args or buffer, or reached EOF,
359 		 * run the command.  If xflag and max'd out on buffer
360 		 * but not on args, object.  Having reached the limit
361 		 * of input lines, as specified by -L is the same as
362 		 * maxing out on arguments.
363 		 */
364 		if (xp == endxp || p > ebp || ch == EOF ||
365 		    (Lflag <= count && xflag) || foundeof) {
366 			if (xflag && xp != endxp && p > ebp) {
367 				warnx("insufficient space for arguments");
368 				xexit(*av, 1);
369 			}
370 			if (jfound) {
371 				for (avj = argv; *avj; avj++)
372 					*xp++ = *avj;
373 			}
374 			prerun(argc, av);
375 			if (ch == EOF || foundeof)
376 				xexit(*av, rval);
377 			p = bbp;
378 			xp = bxp;
379 			count = 0;
380 		}
381 		argp = p;
382 		wasquoted = 0;
383 		break;
384 	case '\'':
385 		if (indouble || zflag)
386 			goto addch;
387 		insingle = !insingle;
388 		wasquoted = 1;
389 		break;
390 	case '"':
391 		if (insingle || zflag)
392 			goto addch;
393 		indouble = !indouble;
394 		wasquoted = 1;
395 		break;
396 	case '\\':
397 		if (zflag)
398 			goto addch;
399 		/* Backslash escapes anything, is escaped by quotes. */
400 		if (!insingle && !indouble && (ch = getchar()) == EOF) {
401 			warnx("backslash at EOF");
402 			xexit(*av, 1);
403 		}
404 		/* FALLTHROUGH */
405 	default:
406 addch:		if (p < ebp) {
407 			*p++ = ch;
408 			break;
409 		}
410 
411 		/* If only one argument, not enough buffer space. */
412 		if (bxp == xp) {
413 			warnx("insufficient space for argument");
414 			xexit(*av, 1);
415 		}
416 		/* Didn't hit argument limit, so if xflag object. */
417 		if (xflag) {
418 			warnx("insufficient space for arguments");
419 			xexit(*av, 1);
420 		}
421 
422 		if (jfound) {
423 			for (avj = argv; *avj; avj++)
424 				*xp++ = *avj;
425 		}
426 		prerun(argc, av);
427 		xp = bxp;
428 		cnt = ebp - argp;
429 		memcpy(bbp, argp, (size_t)cnt);
430 		p = (argp = bbp) + cnt;
431 		*p++ = ch;
432 		break;
433 	}
434 }
435 
436 /*
437  * Do things necessary before run()'ing, such as -I substitution,
438  * and then call run().
439  */
440 static void
441 prerun(int argc, char *argv[])
442 {
443 	char **tmp, **tmp2, **avj;
444 	int repls;
445 
446 	repls = Rflag;
447 
448 	if (argc == 0 || repls == 0) {
449 		*xp = NULL;
450 		run(argv);
451 		return;
452 	}
453 
454 	avj = argv;
455 
456 	/*
457 	 * Allocate memory to hold the argument list, and
458 	 * a NULL at the tail.
459 	 */
460 	tmp = malloc((argc + 1) * sizeof(char**));
461 	if (tmp == NULL) {
462 		warnx("malloc failed");
463 		xexit(*argv, 1);
464 	}
465 	tmp2 = tmp;
466 
467 	/*
468 	 * Save the first argument and iterate over it, we
469 	 * cannot do strnsubst() to it.
470 	 */
471 	if ((*tmp++ = strdup(*avj++)) == NULL) {
472 		warnx("strdup failed");
473 		xexit(*argv, 1);
474 	}
475 
476 	/*
477 	 * For each argument to utility, if we have not used up
478 	 * the number of replacements we are allowed to do, and
479 	 * if the argument contains at least one occurrence of
480 	 * replstr, call strnsubst(), else just save the string.
481 	 * Iterations over elements of avj and tmp are done
482 	 * where appropriate.
483 	 */
484 	while (--argc) {
485 		*tmp = *avj++;
486 		if (repls && strstr(*tmp, replstr) != NULL) {
487 			strnsubst(tmp++, replstr, inpline, (size_t)Sflag);
488 			if (repls > 0)
489 				repls--;
490 		} else {
491 			if ((*tmp = strdup(*tmp)) == NULL) {
492 				warnx("strdup failed");
493 				xexit(*argv, 1);
494 			}
495 			tmp++;
496 		}
497 	}
498 
499 	/*
500 	 * Run it.
501 	 */
502 	*tmp = NULL;
503 	run(tmp2);
504 
505 	/*
506 	 * Walk from the tail to the head, free along the way.
507 	 */
508 	for (; tmp2 != tmp; tmp--)
509 		free(*tmp);
510 	/*
511 	 * Now free the list itself.
512 	 */
513 	free(tmp2);
514 
515 	/*
516 	 * Free the input line buffer, if we have one.
517 	 */
518 	if (inpline != NULL) {
519 		free(inpline);
520 		inpline = NULL;
521 	}
522 }
523 
524 static void
525 run(char **argv)
526 {
527 	pid_t pid;
528 	int fd;
529 	char **avec;
530 
531 	/*
532 	 * If the user wants to be notified of each command before it is
533 	 * executed, notify them.  If they want the notification to be
534 	 * followed by a prompt, then prompt them.
535 	 */
536 	if (tflag || pflag) {
537 		(void)fprintf(stderr, "%s", *argv);
538 		for (avec = argv + 1; *avec != NULL; ++avec)
539 			(void)fprintf(stderr, " %s", *avec);
540 		/*
541 		 * If the user has asked to be prompted, do so.
542 		 */
543 		if (pflag)
544 			/*
545 			 * If they asked not to exec, return without execution
546 			 * but if they asked to, go to the execution.  If we
547 			 * could not open their tty, break the switch and drop
548 			 * back to -t behaviour.
549 			 */
550 			switch (prompt()) {
551 			case 0:
552 				return;
553 			case 1:
554 				goto exec;
555 			case 2:
556 				break;
557 			}
558 		(void)fprintf(stderr, "\n");
559 		(void)fflush(stderr);
560 	}
561 exec:
562 	childerr = 0;
563 	switch (pid = vfork()) {
564 	case -1:
565 		warn("vfork");
566 		xexit(*argv, 1);
567 	case 0:
568 		if (oflag) {
569 			if ((fd = open(_PATH_TTY, O_RDONLY)) == -1)
570 				err(1, "can't open /dev/tty");
571 		} else {
572 			fd = open(_PATH_DEVNULL, O_RDONLY);
573 		}
574 		if (fd > STDIN_FILENO) {
575 			if (dup2(fd, STDIN_FILENO) != 0)
576 				err(1, "can't dup2 to stdin");
577 			close(fd);
578 		}
579 		execvp(argv[0], argv);
580 		childerr = errno;
581 		_exit(1);
582 	}
583 	pids_add(pid);
584 	waitchildren(*argv, 0);
585 }
586 
587 /*
588  * Wait for a tracked child to exit and return its pid and exit status.
589  *
590  * Ignores (discards) all untracked child processes.
591  * Returns -1 and sets errno to ECHILD if no tracked children exist.
592  * If block is set, waits indefinitely for a child process to exit.
593  * If block is not set and no children have exited, returns 0 immediately.
594  */
595 static pid_t
596 xwait(int block, int *status) {
597 	pid_t pid;
598 
599 	if (pids_empty()) {
600 		errno = ECHILD;
601 		return (-1);
602 	}
603 
604 	while ((pid = waitpid(-1, status, block ? 0 : WNOHANG)) > 0)
605 		if (pids_remove(pid))
606 			break;
607 
608 	return (pid);
609 }
610 
611 static void
612 xexit(const char *name, const int exit_code) {
613 	waitchildren(name, 1);
614 	exit(exit_code);
615 }
616 
617 static void
618 waitchildren(const char *name, int waitall)
619 {
620 	pid_t pid;
621 	int status;
622 	int cause_exit = 0;
623 
624 	while ((pid = xwait(waitall || pids_full(), &status)) > 0) {
625 		/*
626 		 * If we couldn't invoke the utility or if utility exited
627 		 * because of a signal or with a value of 255, warn (per
628 		 * POSIX), and then wait until all other children have
629 		 * exited before exiting 1-125. POSIX requires us to stop
630 		 * reading if child exits because of a signal or with 255,
631 		 * but it does not require us to exit immediately; waiting
632 		 * is preferable to orphaning.
633 		 */
634 		if (childerr != 0 && cause_exit == 0) {
635 			errno = childerr;
636 			waitall = 1;
637 			cause_exit = ENOENT ? 127 : 126;
638 			warn("%s", name);
639 		} else if (WIFSIGNALED(status)) {
640 			waitall = cause_exit = 1;
641 			warnx("%s: terminated with signal %d; aborting",
642 			    name, WTERMSIG(status));
643 		} else if (WEXITSTATUS(status) == 255) {
644 			waitall = cause_exit = 1;
645 			warnx("%s: exited with status 255; aborting", name);
646 		} else if (WEXITSTATUS(status))
647  			rval = 1;
648 	}
649 
650  	if (cause_exit)
651 		exit(cause_exit);
652 	if (pid == -1 && errno != ECHILD)
653 		err(1, "waitpid");
654 }
655 
656 #define	NOPID	(0)
657 
658 static void
659 pids_init(void)
660 {
661 	int i;
662 
663 	if ((childpids = malloc(maxprocs * sizeof(*childpids))) == NULL)
664 		errx(1, "malloc failed");
665 
666 	for (i = 0; i < maxprocs; i++)
667 		clearslot(i);
668 }
669 
670 static int
671 pids_empty(void)
672 {
673 	return (curprocs == 0);
674 }
675 
676 static int
677 pids_full(void)
678 {
679 	return (curprocs >= maxprocs);
680 }
681 
682 static void
683 pids_add(pid_t pid)
684 {
685 	int slot;
686 
687 	slot = findfreeslot();
688 	childpids[slot] = pid;
689 	curprocs++;
690 }
691 
692 static int
693 pids_remove(pid_t pid)
694 {
695 	int slot;
696 
697 	if ((slot = findslot(pid)) < 0)
698 		return (0);
699 
700 	clearslot(slot);
701 	curprocs--;
702 	return (1);
703 }
704 
705 static int
706 findfreeslot(void)
707 {
708 	int slot;
709 
710 	if ((slot = findslot(NOPID)) < 0)
711 		errx(1, "internal error: no free pid slot");
712 
713 	return (slot);
714 }
715 
716 static int
717 findslot(pid_t pid)
718 {
719 	int slot;
720 
721 	for (slot = 0; slot < maxprocs; slot++)
722 		if (childpids[slot] == pid)
723 			return (slot);
724 
725 	return (-1);
726 }
727 
728 static void
729 clearslot(int slot)
730 {
731 	childpids[slot] = NOPID;
732 }
733 
734 /*
735  * Prompt the user about running a command.
736  */
737 static int
738 prompt(void)
739 {
740 	regex_t cre;
741 	size_t rsize;
742 	int match;
743 	char *response;
744 	FILE *ttyfp;
745 
746 	if ((ttyfp = fopen(_PATH_TTY, "r")) == NULL)
747 		return (2);	/* Indicate that the TTY failed to open. */
748 	(void)fprintf(stderr, "?...");
749 	(void)fflush(stderr);
750 	if ((response = fgetln(ttyfp, &rsize)) == NULL ||
751 	    regcomp(&cre, nl_langinfo(YESEXPR), REG_BASIC) != 0) {
752 		(void)fclose(ttyfp);
753 		return (0);
754 	}
755 	response[rsize - 1] = '\0';
756 	match = regexec(&cre, response, 0, NULL, 0);
757 	(void)fclose(ttyfp);
758 	regfree(&cre);
759 	return (match == 0);
760 }
761 
762 static void
763 usage(void)
764 {
765 	fprintf(stderr,
766 "usage: xargs [-0opt] [-E eofstr] [-I replstr [-R replacements] [-S replsize]]\n"
767 "             [-J replstr] [-L number] [-n number [-x]] [-P maxprocs]\n"
768 "             [-s size] [utility [argument ...]]\n");
769 	exit(1);
770 }
771