xref: /netbsd/usr.bin/xargs/xargs.c (revision 6550d01e)
1 /*	$NetBSD: xargs.c,v 1.20 2010/12/17 11:32:57 plunky Exp $	*/
2 
3 /*-
4  * Copyright (c) 1990, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * John B. Roll Jr.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $xMach: xargs.c,v 1.6 2002/02/23 05:27:47 tim Exp $
35  */
36 
37 #include <sys/cdefs.h>
38 #ifndef lint
39 __COPYRIGHT("@(#) Copyright (c) 1990, 1993\
40  The Regents of the University of California.  All rights reserved.");
41 #if 0
42 static char sccsid[] = "@(#)xargs.c	8.1 (Berkeley) 6/6/93";
43 __FBSDID("$FreeBSD: src/usr.bin/xargs/xargs.c,v 1.62 2006/01/01 22:59:54 jmallett Exp $");
44 #endif
45 __RCSID("$NetBSD: xargs.c,v 1.20 2010/12/17 11:32:57 plunky Exp $");
46 #endif /* not lint */
47 
48 #include <sys/param.h>
49 #include <sys/wait.h>
50 
51 #include <err.h>
52 #include <errno.h>
53 #include <fcntl.h>
54 #include <langinfo.h>
55 #include <locale.h>
56 #include <paths.h>
57 #include <regex.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <signal.h>
62 #include <unistd.h>
63 
64 #include "pathnames.h"
65 
66 static void	parse_input(int, char *[]);
67 static void	prerun(int, char *[]);
68 static int	prompt(void);
69 static void	run(char **);
70 static void	usage(void) __dead;
71 void		strnsubst(char **, const char *, const char *, size_t);
72 static void	waitchildren(const char *, int);
73 
74 static char echo[] = _PATH_ECHO;
75 static char **av, **bxp, **ep, **endxp, **xp;
76 static char *argp, *bbp, *ebp, *inpline, *p, *replstr;
77 static const char *eofstr;
78 static int count, insingle, indouble, oflag, pflag, tflag, Rflag, rval, zflag;
79 static int cnt, Iflag, jfound, Lflag, Sflag, wasquoted, xflag;
80 static int curprocs, maxprocs;
81 
82 static volatile int childerr;
83 
84 extern char **environ;
85 
86 int
87 main(int argc, char *argv[])
88 {
89 	long arg_max;
90 	int ch, Jflag, nargs, nflag, nline;
91 	size_t linelen;
92 	char *endptr;
93 
94 	setprogname(argv[0]);
95 
96 	inpline = replstr = NULL;
97 	ep = environ;
98 	eofstr = "";
99 	Jflag = nflag = 0;
100 
101 	(void)setlocale(LC_ALL, "");
102 
103 	/*
104 	 * SUSv3 says of the exec family of functions:
105 	 *     The number of bytes available for the new process'
106 	 *     combined argument and environment lists is {ARG_MAX}. It
107 	 *     is implementation-defined whether null terminators,
108 	 *     pointers, and/or any alignment bytes are included in this
109 	 *     total.
110 	 *
111 	 * SUSv3 says of xargs:
112 	 *     ... the combined argument and environment lists ...
113 	 *     shall not exceed {ARG_MAX}-2048.
114 	 *
115 	 * To be conservative, we use ARG_MAX - 4K, and we do include
116 	 * nul terminators and pointers in the calculation.
117 	 *
118 	 * Given that the smallest argument is 2 bytes in length, this
119 	 * means that the number of arguments is limited to:
120 	 *
121 	 *	 (ARG_MAX - 4K - LENGTH(env + utility + arguments)) / 2.
122 	 *
123 	 * We arbitrarily limit the number of arguments to 5000.  This is
124 	 * allowed by POSIX.2 as long as the resulting minimum exec line is
125 	 * at least LINE_MAX.  Realloc'ing as necessary is possible, but
126 	 * probably not worthwhile.
127 	 */
128 	nargs = 5000;
129 	if ((arg_max = sysconf(_SC_ARG_MAX)) == -1)
130 		errx(1, "sysconf(_SC_ARG_MAX) failed");
131 	nline = arg_max - 4 * 1024;
132 	while (*ep != NULL) {
133 		/* 1 byte for each '\0' */
134 		nline -= strlen(*ep++) + 1 + sizeof(*ep);
135 	}
136 	maxprocs = 1;
137 	while ((ch = getopt(argc, argv, "0E:I:J:L:n:oP:pR:S:s:rtx")) != -1)
138 		switch (ch) {
139 		case 'E':
140 			eofstr = optarg;
141 			break;
142 		case 'I':
143 			Jflag = 0;
144 			Iflag = 1;
145 			Lflag = 1;
146 			replstr = optarg;
147 			break;
148 		case 'J':
149 			Iflag = 0;
150 			Jflag = 1;
151 			replstr = optarg;
152 			break;
153 		case 'L':
154 			Lflag = atoi(optarg);
155 			break;
156 		case 'n':
157 			nflag = 1;
158 			if ((nargs = atoi(optarg)) <= 0)
159 				errx(1, "illegal argument count");
160 			break;
161 		case 'o':
162 			oflag = 1;
163 			break;
164 		case 'P':
165 			if ((maxprocs = atoi(optarg)) <= 0)
166 				errx(1, "max. processes must be >0");
167 			break;
168 		case 'p':
169 			pflag = 1;
170 			break;
171 		case 'R':
172 			Rflag = strtol(optarg, &endptr, 10);
173 			if (*endptr != '\0')
174 				errx(1, "replacements must be a number");
175 			break;
176 		case 'r':
177 			/* GNU compatibility */
178 			break;
179 		case 'S':
180 			Sflag = strtoul(optarg, &endptr, 10);
181 			if (*endptr != '\0')
182 				errx(1, "replsize must be a number");
183 			break;
184 		case 's':
185 			nline = atoi(optarg);
186 			break;
187 		case 't':
188 			tflag = 1;
189 			break;
190 		case 'x':
191 			xflag = 1;
192 			break;
193 		case '0':
194 			zflag = 1;
195 			break;
196 		case '?':
197 		default:
198 			usage();
199 	}
200 	argc -= optind;
201 	argv += optind;
202 
203 	if (!Iflag && Rflag)
204 		usage();
205 	if (!Iflag && Sflag)
206 		usage();
207 	if (Iflag && !Rflag)
208 		Rflag = 5;
209 	if (Iflag && !Sflag)
210 		Sflag = 255;
211 	if (xflag && !nflag)
212 		usage();
213 	if (Iflag || Lflag)
214 		xflag = 1;
215 	if (replstr != NULL && *replstr == '\0')
216 		errx(1, "replstr may not be empty");
217 
218 	/*
219 	 * Allocate pointers for the utility name, the utility arguments,
220 	 * the maximum arguments to be read from stdin and the trailing
221 	 * NULL.
222 	 */
223 	linelen = 1 + argc + nargs + 1;
224 	if ((av = bxp = malloc(linelen * sizeof(char **))) == NULL)
225 		errx(1, "malloc failed");
226 
227 	/*
228 	 * Use the user's name for the utility as argv[0], just like the
229 	 * shell.  Echo is the default.  Set up pointers for the user's
230 	 * arguments.
231 	 */
232 	if (*argv == NULL)
233 		cnt = strlen(*bxp++ = echo);
234 	else {
235 		do {
236 			if (Jflag && strcmp(*argv, replstr) == 0) {
237 				char **avj;
238 				jfound = 1;
239 				argv++;
240 				for (avj = argv; *avj; avj++)
241 					cnt += strlen(*avj) + 1;
242 				break;
243 			}
244 			cnt += strlen(*bxp++ = *argv) + 1;
245 		} while (*++argv != NULL);
246 	}
247 
248 	/*
249 	 * Set up begin/end/traversing pointers into the array.  The -n
250 	 * count doesn't include the trailing NULL pointer, so the malloc
251 	 * added in an extra slot.
252 	 */
253 	endxp = (xp = bxp) + nargs;
254 
255 	/*
256 	 * Allocate buffer space for the arguments read from stdin and the
257 	 * trailing NULL.  Buffer space is defined as the default or specified
258 	 * space, minus the length of the utility name and arguments.  Set up
259 	 * begin/end/traversing pointers into the array.  The -s count does
260 	 * include the trailing NULL, so the malloc didn't add in an extra
261 	 * slot.
262 	 */
263 	nline -= cnt;
264 	if (nline <= 0)
265 		errx(1, "insufficient space for command");
266 
267 	if ((bbp = malloc((size_t)(nline + 1))) == NULL)
268 		errx(1, "malloc failed");
269 	ebp = (argp = p = bbp) + nline - 1;
270 	for (;;)
271 		parse_input(argc, argv);
272 }
273 
274 static void
275 parse_input(int argc, char *argv[])
276 {
277 	int ch, foundeof;
278 	char **avj;
279 
280 	foundeof = 0;
281 
282 	switch (ch = getchar()) {
283 	case EOF:
284 		/* No arguments since last exec. */
285 		if (p == bbp) {
286 			waitchildren(*argv, 1);
287 			exit(rval);
288 		}
289 		goto arg1;
290 	case ' ':
291 	case '\t':
292 		/* Quotes escape tabs and spaces. */
293 		if (insingle || indouble || zflag)
294 			goto addch;
295 		goto arg2;
296 	case '\0':
297 		if (zflag) {
298 			/*
299 			 * Increment 'count', so that nulls will be treated
300 			 * as end-of-line, as well as end-of-argument.  This
301 			 * is needed so -0 works properly with -I and -L.
302 			 */
303 			count++;
304 			goto arg2;
305 		}
306 		goto addch;
307 	case '\n':
308 		if (zflag)
309 			goto addch;
310 		count++;	    /* Indicate end-of-line (used by -L) */
311 
312 		/* Quotes do not escape newlines. */
313 arg1:		if (insingle || indouble)
314 			errx(1, "unterminated quote");
315 arg2:
316 		foundeof = *eofstr != '\0' &&
317 		    strncmp(argp, eofstr, (size_t)(p - argp)) == 0;
318 
319 		/* Do not make empty args unless they are quoted */
320 		if ((argp != p || wasquoted) && !foundeof) {
321 			*p++ = '\0';
322 			*xp++ = argp;
323 			if (Iflag) {
324 				size_t curlen;
325 
326 				if (inpline == NULL)
327 					curlen = 0;
328 				else {
329 					/*
330 					 * If this string is not zero
331 					 * length, append a space for
332 					 * separation before the next
333 					 * argument.
334 					 */
335 					if ((curlen = strlen(inpline)) != 0)
336 						(void)strcat(inpline, " ");
337 				}
338 				curlen++;
339 				/*
340 				 * Allocate enough to hold what we will
341 				 * be holding in a second, and to append
342 				 * a space next time through, if we have
343 				 * to.
344 				 */
345 				inpline = realloc(inpline, curlen + 2 +
346 				    strlen(argp));
347 				if (inpline == NULL)
348 					errx(1, "realloc failed");
349 				if (curlen == 1)
350 					(void)strcpy(inpline, argp);
351 				else
352 					(void)strcat(inpline, argp);
353 			}
354 		}
355 
356 		/*
357 		 * If max'd out on args or buffer, or reached EOF,
358 		 * run the command.  If xflag and max'd out on buffer
359 		 * but not on args, object.  Having reached the limit
360 		 * of input lines, as specified by -L is the same as
361 		 * maxing out on arguments.
362 		 */
363 		if (xp == endxp || p > ebp || ch == EOF ||
364 		    (Lflag <= count && xflag) || foundeof) {
365 			if (xflag && xp != endxp && p > ebp)
366 				errx(1, "insufficient space for arguments");
367 			if (jfound) {
368 				for (avj = argv; *avj; avj++)
369 					*xp++ = *avj;
370 			}
371 			prerun(argc, av);
372 			if (ch == EOF || foundeof) {
373 				waitchildren(*argv, 1);
374 				exit(rval);
375 			}
376 			p = bbp;
377 			xp = bxp;
378 			count = 0;
379 		}
380 		argp = p;
381 		wasquoted = 0;
382 		break;
383 	case '\'':
384 		if (indouble || zflag)
385 			goto addch;
386 		insingle = !insingle;
387 		wasquoted = 1;
388 		break;
389 	case '"':
390 		if (insingle || zflag)
391 			goto addch;
392 		indouble = !indouble;
393 		wasquoted = 1;
394 		break;
395 	case '\\':
396 		if (zflag)
397 			goto addch;
398 		/* Backslash escapes anything, is escaped by quotes. */
399 		if (!insingle && !indouble && (ch = getchar()) == EOF)
400 			errx(1, "backslash at EOF");
401 		/* FALLTHROUGH */
402 	default:
403 addch:		if (p < ebp) {
404 			*p++ = ch;
405 			break;
406 		}
407 
408 		/* If only one argument, not enough buffer space. */
409 		if (bxp == xp)
410 			errx(1, "insufficient space for argument");
411 		/* Didn't hit argument limit, so if xflag object. */
412 		if (xflag)
413 			errx(1, "insufficient space for arguments");
414 
415 		if (jfound) {
416 			for (avj = argv; *avj; avj++)
417 				*xp++ = *avj;
418 		}
419 		prerun(argc, av);
420 		xp = bxp;
421 		cnt = ebp - argp;
422 		(void)memcpy(bbp, argp, (size_t)cnt);
423 		p = (argp = bbp) + cnt;
424 		*p++ = ch;
425 		break;
426 	}
427 }
428 
429 /*
430  * Do things necessary before run()'ing, such as -I substitution,
431  * and then call run().
432  */
433 static void
434 prerun(int argc, char *argv[])
435 {
436 	char **tmp, **tmp2, **avj;
437 	int repls;
438 
439 	repls = Rflag;
440 
441 	if (argc == 0 || repls == 0) {
442 		*xp = NULL;
443 		run(argv);
444 		return;
445 	}
446 
447 	avj = argv;
448 
449 	/*
450 	 * Allocate memory to hold the argument list, and
451 	 * a NULL at the tail.
452 	 */
453 	tmp = malloc((argc + 1) * sizeof(char**));
454 	if (tmp == NULL)
455 		errx(1, "malloc failed");
456 	tmp2 = tmp;
457 
458 	/*
459 	 * Save the first argument and iterate over it, we
460 	 * cannot do strnsubst() to it.
461 	 */
462 	if ((*tmp++ = strdup(*avj++)) == NULL)
463 		errx(1, "strdup failed");
464 
465 	/*
466 	 * For each argument to utility, if we have not used up
467 	 * the number of replacements we are allowed to do, and
468 	 * if the argument contains at least one occurrence of
469 	 * replstr, call strnsubst(), else just save the string.
470 	 * Iterations over elements of avj and tmp are done
471 	 * where appropriate.
472 	 */
473 	while (--argc) {
474 		*tmp = *avj++;
475 		if (repls && strstr(*tmp, replstr) != NULL) {
476 			strnsubst(tmp++, replstr, inpline, (size_t)Sflag);
477 			if (repls > 0)
478 				repls--;
479 		} else {
480 			if ((*tmp = strdup(*tmp)) == NULL)
481 				errx(1, "strdup failed");
482 			tmp++;
483 		}
484 	}
485 
486 	/*
487 	 * Run it.
488 	 */
489 	*tmp = NULL;
490 	run(tmp2);
491 
492 	/*
493 	 * Walk from the tail to the head, free along the way.
494 	 */
495 	for (; tmp2 != tmp; tmp--)
496 		free(*tmp);
497 	/*
498 	 * Now free the list itself.
499 	 */
500 	free(tmp2);
501 
502 	/*
503 	 * Free the input line buffer, if we have one.
504 	 */
505 	if (inpline != NULL) {
506 		free(inpline);
507 		inpline = NULL;
508 	}
509 }
510 
511 static void
512 run(char **argv)
513 {
514 	int fd;
515 	char **avec;
516 
517 	/*
518 	 * If the user wants to be notified of each command before it is
519 	 * executed, notify them.  If they want the notification to be
520 	 * followed by a prompt, then prompt them.
521 	 */
522 	if (tflag || pflag) {
523 		(void)fprintf(stderr, "%s", *argv);
524 		for (avec = argv + 1; *avec != NULL; ++avec)
525 			(void)fprintf(stderr, " %s", *avec);
526 		/*
527 		 * If the user has asked to be prompted, do so.
528 		 */
529 		if (pflag)
530 			/*
531 			 * If they asked not to exec, return without execution
532 			 * but if they asked to, go to the execution.  If we
533 			 * could not open their tty, break the switch and drop
534 			 * back to -t behaviour.
535 			 */
536 			switch (prompt()) {
537 			case 0:
538 				return;
539 			case 1:
540 				goto exec;
541 			case 2:
542 				break;
543 			}
544 		(void)fprintf(stderr, "\n");
545 		(void)fflush(stderr);
546 	}
547 exec:
548 	childerr = 0;
549 	switch (vfork()) {
550 	case -1:
551 		err(1, "vfork");
552 		/*NOTREACHED*/
553 	case 0:
554 		if (oflag) {
555 			if ((fd = open(_PATH_TTY, O_RDONLY)) == -1)
556 				err(1, "can't open /dev/tty");
557 		} else {
558 			fd = open(_PATH_DEVNULL, O_RDONLY);
559 		}
560 		if (fd > STDIN_FILENO) {
561 			if (dup2(fd, STDIN_FILENO) != 0)
562 				err(1, "can't dup2 to stdin");
563 			(void)close(fd);
564 		}
565 		(void)execvp(argv[0], argv);
566 		childerr = errno;
567 		_exit(1);
568 	}
569 	curprocs++;
570 	waitchildren(*argv, 0);
571 }
572 
573 static void
574 waitchildren(const char *name, int waitall)
575 {
576 	pid_t pid;
577 	int status;
578 
579 	while ((pid = waitpid(-1, &status, !waitall && curprocs < maxprocs ?
580 	    WNOHANG : 0)) > 0) {
581 		curprocs--;
582 		/* If we couldn't invoke the utility, exit. */
583 		if (childerr != 0) {
584 			errno = childerr;
585 			err(errno == ENOENT ? 127 : 126, "%s", name);
586 		}
587 		/*
588 		 * According to POSIX, we have to exit if the utility exits
589 		 * with a 255 status, or is interrupted by a signal. xargs
590 		 * is allowed to return any exit status between 1 and 125
591 		 * in these cases, but we'll use 124 and 125, the same
592 		 * values used by GNU xargs.
593 		 */
594 		if (WIFEXITED(status)) {
595 			if (WEXITSTATUS (status) == 255) {
596 				warnx ("%s exited with status 255", name);
597 				exit(124);
598 			} else if (WEXITSTATUS (status) != 0) {
599 				rval = 123;
600 			}
601 		} else if (WIFSIGNALED (status)) {
602 			if (WTERMSIG(status) < NSIG) {
603 				warnx("%s terminated by SIG%s", name,
604 				    sys_signame[WTERMSIG(status)]);
605 			} else {
606 				warnx("%s terminated by signal %d", name,
607 				    WTERMSIG(status));
608 			}
609 			exit(125);
610 		}
611 	}
612 	if (pid == -1 && errno != ECHILD)
613 		err(1, "waitpid");
614 }
615 
616 /*
617  * Prompt the user about running a command.
618  */
619 static int
620 prompt(void)
621 {
622 	regex_t cre;
623 	size_t rsize;
624 	int match;
625 	char *response;
626 	FILE *ttyfp;
627 
628 	if ((ttyfp = fopen(_PATH_TTY, "r")) == NULL)
629 		return (2);	/* Indicate that the TTY failed to open. */
630 	(void)fprintf(stderr, "?...");
631 	(void)fflush(stderr);
632 	if ((response = fgetln(ttyfp, &rsize)) == NULL ||
633 	    regcomp(&cre, nl_langinfo(YESEXPR), REG_BASIC) != 0) {
634 		(void)fclose(ttyfp);
635 		return (0);
636 	}
637 	response[rsize - 1] = '\0';
638 	match = regexec(&cre, response, 0, NULL, 0);
639 	(void)fclose(ttyfp);
640 	regfree(&cre);
641 	return (match == 0);
642 }
643 
644 static void
645 usage(void)
646 {
647 	(void)fprintf(stderr,
648 "Usage: %s [-0opt] [-E eofstr] [-I replstr [-R replacements] [-S replsize]]\n"
649 "             [-J replstr] [-L number] [-n number [-x]] [-P maxprocs]\n"
650 "             [-s size] [utility [argument ...]]\n", getprogname());
651 	exit(1);
652 }
653