xref: /openbsd/usr.bin/xargs/xargs.c (revision 09467b48)
1 /*	$OpenBSD: xargs.c,v 1.35 2020/07/19 13:19:25 schwarze Exp $	*/
2 /*	$FreeBSD: xargs.c,v 1.51 2003/05/03 19:09:11 obrien Exp $	*/
3 
4 /*-
5  * Copyright (c) 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * John B. Roll Jr.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  * $xMach: xargs.c,v 1.6 2002/02/23 05:27:47 tim Exp $
36  */
37 
38 #include <sys/wait.h>
39 
40 #include <ctype.h>
41 #include <err.h>
42 #include <errno.h>
43 #include <fcntl.h>
44 #include <paths.h>
45 #include <signal.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50 #include <limits.h>
51 
52 #include "pathnames.h"
53 
54 static void	parse_input(int, char *[]);
55 static void	prerun(int, char *[]);
56 static int	prompt(void);
57 static void	run(char **);
58 static void	usage(void);
59 void		strnsubst(char **, const char *, const char *, size_t);
60 static void	waitchildren(const char *, int);
61 
62 static char **av, **bxp, **ep, **endxp, **xp;
63 static char *argp, *bbp, *ebp, *inpline, *p, *replstr;
64 static const char *eofstr;
65 static int count, insingle, indouble, oflag, pflag, tflag, Rflag, rval, zflag;
66 static int cnt, Iflag, jfound, Lflag, wasquoted, xflag, runeof = 1;
67 static int curprocs, maxprocs;
68 static size_t inpsize;
69 
70 extern char **environ;
71 
72 int
73 main(int argc, char *argv[])
74 {
75 	long arg_max;
76 	int ch, Jflag, nargs, nflag, nline;
77 	size_t linelen;
78 	char *endptr;
79 	const char *errstr;
80 
81 	inpline = replstr = NULL;
82 	ep = environ;
83 	eofstr = "";
84 	Jflag = nflag = 0;
85 
86 	/*
87 	 * POSIX.2 limits the exec line length to ARG_MAX - 2K.  Running that
88 	 * caused some E2BIG errors, so it was changed to ARG_MAX - 4K.  Given
89 	 * that the smallest argument is 2 bytes in length, this means that
90 	 * the number of arguments is limited to:
91 	 *
92 	 *	 (ARG_MAX - 4K - LENGTH(utility + arguments)) / 2.
93 	 *
94 	 * We arbitrarily limit the number of arguments to 5000.  This is
95 	 * allowed by POSIX.2 as long as the resulting minimum exec line is
96 	 * at least LINE_MAX.  Realloc'ing as necessary is possible, but
97 	 * probably not worthwhile.
98 	 */
99 	nargs = 5000;
100 	if ((arg_max = sysconf(_SC_ARG_MAX)) == -1)
101 		errx(1, "sysconf(_SC_ARG_MAX) failed");
102 
103 	if (pledge("stdio rpath proc exec", NULL) == -1)
104 		err(1, "pledge");
105 
106 	nline = arg_max - 4 * 1024;
107 	while (*ep != NULL) {
108 		/* 1 byte for each '\0' */
109 		nline -= strlen(*ep++) + 1 + sizeof(*ep);
110 	}
111 	maxprocs = 1;
112 	while ((ch = getopt(argc, argv, "0E:I:J:L:n:oP:pR:rs:tx")) != -1)
113 		switch (ch) {
114 		case 'E':
115 			eofstr = optarg;
116 			break;
117 		case 'I':
118 			Jflag = 0;
119 			Iflag = 1;
120 			Lflag = 1;
121 			replstr = optarg;
122 			break;
123 		case 'J':
124 			Iflag = 0;
125 			Jflag = 1;
126 			replstr = optarg;
127 			break;
128 		case 'L':
129 			Lflag = strtonum(optarg, 0, INT_MAX, &errstr);
130 			if (errstr)
131 				errx(1, "-L %s: %s", optarg, errstr);
132 			break;
133 		case 'n':
134 			nflag = 1;
135 			nargs = strtonum(optarg, 1, INT_MAX, &errstr);
136 			if (errstr)
137 				errx(1, "-n %s: %s", optarg, errstr);
138 			break;
139 		case 'o':
140 			oflag = 1;
141 			break;
142 		case 'P':
143 			maxprocs = strtonum(optarg, 1, INT_MAX, &errstr);
144 			if (errstr)
145 				errx(1, "-P %s: %s", optarg, errstr);
146 			break;
147 		case 'p':
148 			pflag = 1;
149 			break;
150 		case 'r':
151 			runeof = 0;
152 			break;
153 		case 'R':
154 			Rflag = strtol(optarg, &endptr, 10);
155 			if (*endptr != '\0')
156 				errx(1, "replacements must be a number");
157 			break;
158 		case 's':
159 			nline = strtonum(optarg, 0, INT_MAX, &errstr);
160 			if (errstr)
161 				errx(1, "-s %s: %s", optarg, errstr);
162 			break;
163 		case 't':
164 			tflag = 1;
165 			break;
166 		case 'x':
167 			xflag = 1;
168 			break;
169 		case '0':
170 			zflag = 1;
171 			break;
172 		case '?':
173 		default:
174 			usage();
175 	}
176 	argc -= optind;
177 	argv += optind;
178 
179 	if (!Iflag && Rflag)
180 		usage();
181 	if (Iflag && !Rflag)
182 		Rflag = 5;
183 	if (xflag && !nflag)
184 		usage();
185 	if (Iflag || Lflag)
186 		xflag = 1;
187 	if (replstr != NULL && *replstr == '\0')
188 		errx(1, "replstr may not be empty");
189 
190 	/*
191 	 * Allocate pointers for the utility name, the utility arguments,
192 	 * the maximum arguments to be read from stdin and the trailing
193 	 * NULL.
194 	 */
195 	linelen = 1 + argc + nargs + 1;
196 	if ((av = bxp = calloc(linelen, sizeof(char *))) == NULL)
197 		err(1, NULL);
198 
199 	/*
200 	 * Use the user's name for the utility as argv[0], just like the
201 	 * shell.  Echo is the default.  Set up pointers for the user's
202 	 * arguments.
203 	 */
204 	if (*argv == NULL)
205 		cnt = strlen(*bxp++ = _PATH_ECHO);
206 	else {
207 		do {
208 			if (Jflag && strcmp(*argv, replstr) == 0) {
209 				char **avj;
210 				jfound = 1;
211 				argv++;
212 				for (avj = argv; *avj; avj++)
213 					cnt += strlen(*avj) + 1;
214 				break;
215 			}
216 			cnt += strlen(*bxp++ = *argv) + 1;
217 		} while (*++argv != NULL);
218 	}
219 
220 	/*
221 	 * Set up begin/end/traversing pointers into the array.  The -n
222 	 * count doesn't include the trailing NULL pointer, so the malloc
223 	 * added in an extra slot.
224 	 */
225 	endxp = (xp = bxp) + nargs;
226 
227 	/*
228 	 * Allocate buffer space for the arguments read from stdin and the
229 	 * trailing NULL.  Buffer space is defined as the default or specified
230 	 * space, minus the length of the utility name and arguments.  Set up
231 	 * begin/end/traversing pointers into the array.  The -s count does
232 	 * include the trailing NULL, so the malloc didn't add in an extra
233 	 * slot.
234 	 */
235 	nline -= cnt;
236 	if (nline <= 0)
237 		errx(1, "insufficient space for command");
238 
239 	if ((bbp = malloc((size_t)(nline + 1))) == NULL)
240 		err(1, NULL);
241 	ebp = (argp = p = bbp) + nline - 1;
242 	for (;;)
243 		parse_input(argc, argv);
244 }
245 
246 static void
247 parse_input(int argc, char *argv[])
248 {
249 	int hasblank = 0;
250 	static int hadblank = 0;
251 	int ch, foundeof = 0;
252 	char **avj;
253 
254 	ch = getchar();
255 	if (isblank(ch)) {
256 		/* Quotes escape tabs and spaces. */
257 		if (insingle || indouble)
258 			goto addch;
259 		hasblank = 1;
260 		if (zflag)
261 			goto addch;
262 		goto arg2;
263 	}
264 
265 	switch (ch) {
266 	case EOF:
267 		/* No arguments since last exec. */
268 		if (p == bbp) {
269 			if (runeof)
270 				prerun(0, av);
271 			waitchildren(*argv, 1);
272 			exit(rval);
273 		}
274 		goto arg1;
275 	case '\0':
276 		if (zflag) {
277 			/*
278 			 * Increment 'count', so that nulls will be treated
279 			 * as end-of-line, as well as end-of-argument.  This
280 			 * is needed so -0 works properly with -I and -L.
281 			 */
282 			count++;
283 			goto arg2;
284 		}
285 		goto addch;
286 	case '\n':
287 		if (zflag)
288 			goto addch;
289 		hasblank = 1;
290 		if (hadblank == 0)
291 			count++;
292 
293 		/* Quotes do not escape newlines. */
294 arg1:		if (insingle || indouble)
295 			errx(1, "unterminated quote");
296 arg2:
297 		foundeof = *eofstr != '\0' &&
298 		    strcmp(argp, eofstr) == 0;
299 
300 		/* Do not make empty args unless they are quoted */
301 		if ((argp != p || wasquoted) && !foundeof) {
302 			*p++ = '\0';
303 			*xp++ = argp;
304 			if (Iflag) {
305 				size_t curlen;
306 
307 				if (inpline == NULL)
308 					curlen = 0;
309 				else {
310 					/*
311 					 * If this string is not zero
312 					 * length, append a space for
313 					 * separation before the next
314 					 * argument.
315 					 */
316 					if ((curlen = strlen(inpline)))
317 						strlcat(inpline, " ", inpsize);
318 				}
319 				curlen++;
320 				/*
321 				 * Allocate enough to hold what we will
322 				 * be holding in a second, and to append
323 				 * a space next time through, if we have
324 				 * to.
325 				 */
326 				inpsize = curlen + 2 + strlen(argp);
327 				inpline = realloc(inpline, inpsize);
328 				if (inpline == NULL)
329 					errx(1, "realloc failed");
330 				if (curlen == 1)
331 					strlcpy(inpline, argp, inpsize);
332 				else
333 					strlcat(inpline, argp, inpsize);
334 			}
335 		}
336 
337 		/*
338 		 * If max'd out on args or buffer, or reached EOF,
339 		 * run the command.  If xflag and max'd out on buffer
340 		 * but not on args, object.  Having reached the limit
341 		 * of input lines, as specified by -L is the same as
342 		 * maxing out on arguments.
343 		 */
344 		if (xp == endxp || p > ebp || ch == EOF ||
345 		    (Lflag <= count && xflag) || foundeof) {
346 			if (xflag && xp != endxp && p > ebp)
347 				errx(1, "insufficient space for arguments");
348 			if (jfound) {
349 				for (avj = argv; *avj; avj++)
350 					*xp++ = *avj;
351 			}
352 			prerun(argc, av);
353 			if (ch == EOF || foundeof) {
354 				waitchildren(*argv, 1);
355 				exit(rval);
356 			}
357 			p = bbp;
358 			xp = bxp;
359 			count = 0;
360 		}
361 		argp = p;
362 		wasquoted = 0;
363 		break;
364 	case '\'':
365 		if (indouble || zflag)
366 			goto addch;
367 		insingle = !insingle;
368 		wasquoted = 1;
369 		break;
370 	case '"':
371 		if (insingle || zflag)
372 			goto addch;
373 		indouble = !indouble;
374 		wasquoted = 1;
375 		break;
376 	case '\\':
377 		if (zflag)
378 			goto addch;
379 		/* Backslash escapes anything, is escaped by quotes. */
380 		if (!insingle && !indouble && (ch = getchar()) == EOF)
381 			errx(1, "backslash at EOF");
382 		/* FALLTHROUGH */
383 	default:
384 addch:		if (p < ebp) {
385 			*p++ = ch;
386 			break;
387 		}
388 
389 		/* If only one argument, not enough buffer space. */
390 		if (bxp == xp)
391 			errx(1, "insufficient space for argument");
392 		/* Didn't hit argument limit, so if xflag object. */
393 		if (xflag)
394 			errx(1, "insufficient space for arguments");
395 
396 		if (jfound) {
397 			for (avj = argv; *avj; avj++)
398 				*xp++ = *avj;
399 		}
400 		prerun(argc, av);
401 		xp = bxp;
402 		cnt = ebp - argp;
403 		memmove(bbp, argp, (size_t)cnt);
404 		p = (argp = bbp) + cnt;
405 		*p++ = ch;
406 		break;
407 	}
408 	hadblank = hasblank;
409 }
410 
411 /*
412  * Do things necessary before run()'ing, such as -I substitution,
413  * and then call run().
414  */
415 static void
416 prerun(int argc, char *argv[])
417 {
418 	char **tmp, **tmp2, **avj;
419 	int repls;
420 
421 	repls = Rflag;
422 	runeof = 0;
423 
424 	if (argc == 0 || repls == 0) {
425 		*xp = NULL;
426 		run(argv);
427 		return;
428 	}
429 
430 	avj = argv;
431 
432 	/*
433 	 * Allocate memory to hold the argument list, and
434 	 * a NULL at the tail.
435 	 */
436 	tmp = calloc(argc + 1, sizeof(char *));
437 	if (tmp == NULL)
438 		err(1, NULL);
439 	tmp2 = tmp;
440 
441 	/*
442 	 * Save the first argument and iterate over it, we
443 	 * cannot do strnsubst() to it.
444 	 */
445 	if ((*tmp++ = strdup(*avj++)) == NULL)
446 		err(1, NULL);
447 
448 	/*
449 	 * For each argument to utility, if we have not used up
450 	 * the number of replacements we are allowed to do, and
451 	 * if the argument contains at least one occurrence of
452 	 * replstr, call strnsubst(), else just save the string.
453 	 * Iterations over elements of avj and tmp are done
454 	 * where appropriate.
455 	 */
456 	while (--argc) {
457 		*tmp = *avj++;
458 		if (repls && strstr(*tmp, replstr) != NULL) {
459 			strnsubst(tmp++, replstr, inpline, (size_t)255);
460 			if (repls > 0)
461 				repls--;
462 		} else {
463 			if ((*tmp = strdup(*tmp)) == NULL)
464 				err(1, NULL);
465 			tmp++;
466 		}
467 	}
468 
469 	/*
470 	 * Run it.
471 	 */
472 	*tmp = NULL;
473 	run(tmp2);
474 
475 	/*
476 	 * Walk from the tail to the head, free along the way.
477 	 */
478 	for (; tmp2 != tmp; tmp--)
479 		free(*tmp);
480 	/*
481 	 * Now free the list itself.
482 	 */
483 	free(tmp2);
484 
485 	/*
486 	 * Free the input line buffer, if we have one.
487 	 */
488 	free(inpline);
489 	inpline = NULL;
490 }
491 
492 static void
493 run(char **argv)
494 {
495 	pid_t pid;
496 	int fd;
497 	char **avec;
498 
499 	/*
500 	 * If the user wants to be notified of each command before it is
501 	 * executed, notify them.  If they want the notification to be
502 	 * followed by a prompt, then prompt them.
503 	 */
504 	if (tflag || pflag) {
505 		fprintf(stderr, "%s", *argv);
506 		for (avec = argv + 1; *avec != NULL; ++avec)
507 			fprintf(stderr, " %s", *avec);
508 		/*
509 		 * If the user has asked to be prompted, do so.
510 		 */
511 		if (pflag)
512 			/*
513 			 * If they asked not to exec, return without execution
514 			 * but if they asked to, go to the execution.  If we
515 			 * could not open their tty, break the switch and drop
516 			 * back to -t behaviour.
517 			 */
518 			switch (prompt()) {
519 			case 0:
520 				return;
521 			case 1:
522 				goto exec;
523 			case 2:
524 				break;
525 			}
526 		fprintf(stderr, "\n");
527 		fflush(stderr);
528 	}
529 exec:
530 	switch (pid = vfork()) {
531 	case -1:
532 		err(1, "vfork");
533 	case 0:
534 		if (oflag) {
535 			if ((fd = open(_PATH_TTY, O_RDONLY)) == -1) {
536 				warn("can't open /dev/tty");
537 				_exit(1);
538 			}
539 		} else {
540 			fd = open(_PATH_DEVNULL, O_RDONLY);
541 		}
542 		if (fd > STDIN_FILENO) {
543 			if (dup2(fd, STDIN_FILENO) != 0) {
544 				warn("can't dup2 to stdin");
545 				_exit(1);
546 			}
547 			close(fd);
548 		}
549 		execvp(argv[0], argv);
550 		warn("%s", argv[0]);
551 		_exit(errno == ENOENT ? 127 : 126);
552 	}
553 	curprocs++;
554 	waitchildren(*argv, 0);
555 }
556 
557 static void
558 waitchildren(const char *name, int waitall)
559 {
560 	pid_t pid;
561 	int status;
562 
563 	while ((pid = waitpid(-1, &status, !waitall && curprocs < maxprocs ?
564 	    WNOHANG : 0)) > 0) {
565 		curprocs--;
566 		/*
567 		 * According to POSIX, we have to exit if the utility exits
568 		 * with a 255 status, or is interrupted by a signal.
569 		 * We are allowed to return any exit status between 1 and
570 		 * 125 in these cases, but we'll use 124 and 125, the same
571 		 * values used by GNU xargs.
572 		 */
573 		if (WIFEXITED(status)) {
574 			if (WEXITSTATUS(status) == 255) {
575 				warnx("%s exited with status 255", name);
576 				exit(124);
577 			} else if (WEXITSTATUS(status) == 127 ||
578 			    WEXITSTATUS(status) == 126) {
579 				exit(WEXITSTATUS(status));
580 			} else if (WEXITSTATUS(status) != 0) {
581 				rval = 123;
582 			}
583 		} else if (WIFSIGNALED(status)) {
584 			if (WTERMSIG(status) != SIGPIPE) {
585 				if (WTERMSIG(status) < NSIG)
586 					warnx("%s terminated by SIG%s", name,
587 					    sys_signame[WTERMSIG(status)]);
588 				else
589 					warnx("%s terminated by signal %d",
590 					    name, WTERMSIG(status));
591 			}
592 			exit(125);
593 		}
594 	}
595 	if (pid == -1 && errno != ECHILD)
596 		err(1, "waitpid");
597 }
598 
599 /*
600  * Prompt the user about running a command.
601  */
602 static int
603 prompt(void)
604 {
605 	size_t rsize;
606 	char *response;
607 	FILE *ttyfp;
608 	int doit = 0;
609 
610 	if ((ttyfp = fopen(_PATH_TTY, "r")) == NULL)
611 		return (2);	/* Indicate that the TTY failed to open. */
612 	fprintf(stderr, "?...");
613 	fflush(stderr);
614 	response = fgetln(ttyfp, &rsize);
615 	doit = response != NULL && (*response == 'y' || *response == 'Y');
616 	fclose(ttyfp);
617 	return (doit);
618 }
619 
620 static void
621 usage(void)
622 {
623 	fprintf(stderr,
624 "usage: xargs [-0oprt] [-E eofstr] [-I replstr [-R replacements]] [-J replstr]\n"
625 "             [-L number] [-n number [-x]] [-P maxprocs] [-s size]\n"
626 "             [utility [argument ...]]\n");
627 	exit(1);
628 }
629