xref: /openbsd/usr.bin/xargs/xargs.c (revision d415bd75)
1 /*	$OpenBSD: xargs.c,v 1.36 2022/12/04 23:50:50 cheloha Exp $	*/
2 /*	$FreeBSD: xargs.c,v 1.51 2003/05/03 19:09:11 obrien Exp $	*/
3 
4 /*-
5  * Copyright (c) 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * John B. Roll Jr.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  * $xMach: xargs.c,v 1.6 2002/02/23 05:27:47 tim Exp $
36  */
37 
38 #include <sys/wait.h>
39 
40 #include <ctype.h>
41 #include <err.h>
42 #include <errno.h>
43 #include <fcntl.h>
44 #include <paths.h>
45 #include <signal.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50 #include <limits.h>
51 
52 #include "pathnames.h"
53 
54 static void	parse_input(int, char *[]);
55 static void	prerun(int, char *[]);
56 static int	prompt(void);
57 static void	run(char **);
58 static void	usage(void);
59 void		strnsubst(char **, const char *, const char *, size_t);
60 static void	waitchildren(const char *, int);
61 
62 static char **av, **bxp, **ep, **endxp, **xp;
63 static char *argp, *bbp, *ebp, *inpline, *p, *replstr;
64 static const char *eofstr;
65 static int count, insingle, indouble, oflag, pflag, tflag, Rflag, rval, zflag;
66 static int cnt, Iflag, jfound, Lflag, wasquoted, xflag, runeof = 1;
67 static int curprocs, maxprocs;
68 static size_t inpsize;
69 
70 extern char **environ;
71 
72 int
73 main(int argc, char *argv[])
74 {
75 	long arg_max;
76 	int ch, Jflag, nargs, nflag, nline;
77 	size_t linelen;
78 	char *endptr;
79 	const char *errstr;
80 
81 	inpline = replstr = NULL;
82 	ep = environ;
83 	eofstr = "";
84 	Jflag = nflag = 0;
85 
86 	/*
87 	 * POSIX.2 limits the exec line length to ARG_MAX - 2K.  Running that
88 	 * caused some E2BIG errors, so it was changed to ARG_MAX - 4K.  Given
89 	 * that the smallest argument is 2 bytes in length, this means that
90 	 * the number of arguments is limited to:
91 	 *
92 	 *	 (ARG_MAX - 4K - LENGTH(utility + arguments)) / 2.
93 	 *
94 	 * We arbitrarily limit the number of arguments to 5000.  This is
95 	 * allowed by POSIX.2 as long as the resulting minimum exec line is
96 	 * at least LINE_MAX.  Realloc'ing as necessary is possible, but
97 	 * probably not worthwhile.
98 	 */
99 	nargs = 5000;
100 	if ((arg_max = sysconf(_SC_ARG_MAX)) == -1)
101 		errx(1, "sysconf(_SC_ARG_MAX) failed");
102 
103 	if (pledge("stdio rpath proc exec", NULL) == -1)
104 		err(1, "pledge");
105 
106 	nline = arg_max - 4 * 1024;
107 	while (*ep != NULL) {
108 		/* 1 byte for each '\0' */
109 		nline -= strlen(*ep++) + 1 + sizeof(*ep);
110 	}
111 	maxprocs = 1;
112 	while ((ch = getopt(argc, argv, "0E:I:J:L:n:oP:pR:rs:tx")) != -1)
113 		switch (ch) {
114 		case 'E':
115 			eofstr = optarg;
116 			break;
117 		case 'I':
118 			Jflag = 0;
119 			Iflag = 1;
120 			Lflag = 1;
121 			replstr = optarg;
122 			break;
123 		case 'J':
124 			Iflag = 0;
125 			Jflag = 1;
126 			replstr = optarg;
127 			break;
128 		case 'L':
129 			Lflag = strtonum(optarg, 0, INT_MAX, &errstr);
130 			if (errstr)
131 				errx(1, "-L %s: %s", optarg, errstr);
132 			break;
133 		case 'n':
134 			nflag = 1;
135 			nargs = strtonum(optarg, 1, INT_MAX, &errstr);
136 			if (errstr)
137 				errx(1, "-n %s: %s", optarg, errstr);
138 			break;
139 		case 'o':
140 			oflag = 1;
141 			break;
142 		case 'P':
143 			maxprocs = strtonum(optarg, 1, INT_MAX, &errstr);
144 			if (errstr)
145 				errx(1, "-P %s: %s", optarg, errstr);
146 			break;
147 		case 'p':
148 			pflag = 1;
149 			break;
150 		case 'r':
151 			runeof = 0;
152 			break;
153 		case 'R':
154 			Rflag = strtol(optarg, &endptr, 10);
155 			if (*endptr != '\0')
156 				errx(1, "replacements must be a number");
157 			break;
158 		case 's':
159 			nline = strtonum(optarg, 0, INT_MAX, &errstr);
160 			if (errstr)
161 				errx(1, "-s %s: %s", optarg, errstr);
162 			break;
163 		case 't':
164 			tflag = 1;
165 			break;
166 		case 'x':
167 			xflag = 1;
168 			break;
169 		case '0':
170 			zflag = 1;
171 			break;
172 		default:
173 			usage();
174 		}
175 	argc -= optind;
176 	argv += optind;
177 
178 	if (!Iflag && Rflag)
179 		usage();
180 	if (Iflag && !Rflag)
181 		Rflag = 5;
182 	if (xflag && !nflag)
183 		usage();
184 	if (Iflag || Lflag)
185 		xflag = 1;
186 	if (replstr != NULL && *replstr == '\0')
187 		errx(1, "replstr may not be empty");
188 
189 	/*
190 	 * Allocate pointers for the utility name, the utility arguments,
191 	 * the maximum arguments to be read from stdin and the trailing
192 	 * NULL.
193 	 */
194 	linelen = 1 + argc + nargs + 1;
195 	if ((av = bxp = calloc(linelen, sizeof(char *))) == NULL)
196 		err(1, NULL);
197 
198 	/*
199 	 * Use the user's name for the utility as argv[0], just like the
200 	 * shell.  Echo is the default.  Set up pointers for the user's
201 	 * arguments.
202 	 */
203 	if (*argv == NULL)
204 		cnt = strlen(*bxp++ = _PATH_ECHO);
205 	else {
206 		do {
207 			if (Jflag && strcmp(*argv, replstr) == 0) {
208 				char **avj;
209 				jfound = 1;
210 				argv++;
211 				for (avj = argv; *avj; avj++)
212 					cnt += strlen(*avj) + 1;
213 				break;
214 			}
215 			cnt += strlen(*bxp++ = *argv) + 1;
216 		} while (*++argv != NULL);
217 	}
218 
219 	/*
220 	 * Set up begin/end/traversing pointers into the array.  The -n
221 	 * count doesn't include the trailing NULL pointer, so the malloc
222 	 * added in an extra slot.
223 	 */
224 	endxp = (xp = bxp) + nargs;
225 
226 	/*
227 	 * Allocate buffer space for the arguments read from stdin and the
228 	 * trailing NULL.  Buffer space is defined as the default or specified
229 	 * space, minus the length of the utility name and arguments.  Set up
230 	 * begin/end/traversing pointers into the array.  The -s count does
231 	 * include the trailing NULL, so the malloc didn't add in an extra
232 	 * slot.
233 	 */
234 	nline -= cnt;
235 	if (nline <= 0)
236 		errx(1, "insufficient space for command");
237 
238 	if ((bbp = malloc((size_t)(nline + 1))) == NULL)
239 		err(1, NULL);
240 	ebp = (argp = p = bbp) + nline - 1;
241 	for (;;)
242 		parse_input(argc, argv);
243 }
244 
245 static void
246 parse_input(int argc, char *argv[])
247 {
248 	int hasblank = 0;
249 	static int hadblank = 0;
250 	int ch, foundeof = 0;
251 	char **avj;
252 
253 	ch = getchar();
254 	if (isblank(ch)) {
255 		/* Quotes escape tabs and spaces. */
256 		if (insingle || indouble)
257 			goto addch;
258 		hasblank = 1;
259 		if (zflag)
260 			goto addch;
261 		goto arg2;
262 	}
263 
264 	switch (ch) {
265 	case EOF:
266 		/* No arguments since last exec. */
267 		if (p == bbp) {
268 			if (runeof)
269 				prerun(0, av);
270 			waitchildren(*argv, 1);
271 			exit(rval);
272 		}
273 		goto arg1;
274 	case '\0':
275 		if (zflag) {
276 			/*
277 			 * Increment 'count', so that nulls will be treated
278 			 * as end-of-line, as well as end-of-argument.  This
279 			 * is needed so -0 works properly with -I and -L.
280 			 */
281 			count++;
282 			goto arg2;
283 		}
284 		goto addch;
285 	case '\n':
286 		if (zflag)
287 			goto addch;
288 		hasblank = 1;
289 		if (hadblank == 0)
290 			count++;
291 
292 		/* Quotes do not escape newlines. */
293 arg1:		if (insingle || indouble)
294 			errx(1, "unterminated quote");
295 arg2:
296 		foundeof = *eofstr != '\0' &&
297 		    strcmp(argp, eofstr) == 0;
298 
299 		/* Do not make empty args unless they are quoted */
300 		if ((argp != p || wasquoted) && !foundeof) {
301 			*p++ = '\0';
302 			*xp++ = argp;
303 			if (Iflag) {
304 				size_t curlen;
305 
306 				if (inpline == NULL)
307 					curlen = 0;
308 				else {
309 					/*
310 					 * If this string is not zero
311 					 * length, append a space for
312 					 * separation before the next
313 					 * argument.
314 					 */
315 					if ((curlen = strlen(inpline)))
316 						strlcat(inpline, " ", inpsize);
317 				}
318 				curlen++;
319 				/*
320 				 * Allocate enough to hold what we will
321 				 * be holding in a second, and to append
322 				 * a space next time through, if we have
323 				 * to.
324 				 */
325 				inpsize = curlen + 2 + strlen(argp);
326 				inpline = realloc(inpline, inpsize);
327 				if (inpline == NULL)
328 					errx(1, "realloc failed");
329 				if (curlen == 1)
330 					strlcpy(inpline, argp, inpsize);
331 				else
332 					strlcat(inpline, argp, inpsize);
333 			}
334 		}
335 
336 		/*
337 		 * If max'd out on args or buffer, or reached EOF,
338 		 * run the command.  If xflag and max'd out on buffer
339 		 * but not on args, object.  Having reached the limit
340 		 * of input lines, as specified by -L is the same as
341 		 * maxing out on arguments.
342 		 */
343 		if (xp == endxp || p > ebp || ch == EOF ||
344 		    (Lflag <= count && xflag) || foundeof) {
345 			if (xflag && xp != endxp && p > ebp)
346 				errx(1, "insufficient space for arguments");
347 			if (jfound) {
348 				for (avj = argv; *avj; avj++)
349 					*xp++ = *avj;
350 			}
351 			prerun(argc, av);
352 			if (ch == EOF || foundeof) {
353 				waitchildren(*argv, 1);
354 				exit(rval);
355 			}
356 			p = bbp;
357 			xp = bxp;
358 			count = 0;
359 		}
360 		argp = p;
361 		wasquoted = 0;
362 		break;
363 	case '\'':
364 		if (indouble || zflag)
365 			goto addch;
366 		insingle = !insingle;
367 		wasquoted = 1;
368 		break;
369 	case '"':
370 		if (insingle || zflag)
371 			goto addch;
372 		indouble = !indouble;
373 		wasquoted = 1;
374 		break;
375 	case '\\':
376 		if (zflag)
377 			goto addch;
378 		/* Backslash escapes anything, is escaped by quotes. */
379 		if (!insingle && !indouble && (ch = getchar()) == EOF)
380 			errx(1, "backslash at EOF");
381 		/* FALLTHROUGH */
382 	default:
383 addch:		if (p < ebp) {
384 			*p++ = ch;
385 			break;
386 		}
387 
388 		/* If only one argument, not enough buffer space. */
389 		if (bxp == xp)
390 			errx(1, "insufficient space for argument");
391 		/* Didn't hit argument limit, so if xflag object. */
392 		if (xflag)
393 			errx(1, "insufficient space for arguments");
394 
395 		if (jfound) {
396 			for (avj = argv; *avj; avj++)
397 				*xp++ = *avj;
398 		}
399 		prerun(argc, av);
400 		xp = bxp;
401 		cnt = ebp - argp;
402 		memmove(bbp, argp, (size_t)cnt);
403 		p = (argp = bbp) + cnt;
404 		*p++ = ch;
405 		break;
406 	}
407 	hadblank = hasblank;
408 }
409 
410 /*
411  * Do things necessary before run()'ing, such as -I substitution,
412  * and then call run().
413  */
414 static void
415 prerun(int argc, char *argv[])
416 {
417 	char **tmp, **tmp2, **avj;
418 	int repls;
419 
420 	repls = Rflag;
421 	runeof = 0;
422 
423 	if (argc == 0 || repls == 0) {
424 		*xp = NULL;
425 		run(argv);
426 		return;
427 	}
428 
429 	avj = argv;
430 
431 	/*
432 	 * Allocate memory to hold the argument list, and
433 	 * a NULL at the tail.
434 	 */
435 	tmp = calloc(argc + 1, sizeof(char *));
436 	if (tmp == NULL)
437 		err(1, NULL);
438 	tmp2 = tmp;
439 
440 	/*
441 	 * Save the first argument and iterate over it, we
442 	 * cannot do strnsubst() to it.
443 	 */
444 	if ((*tmp++ = strdup(*avj++)) == NULL)
445 		err(1, NULL);
446 
447 	/*
448 	 * For each argument to utility, if we have not used up
449 	 * the number of replacements we are allowed to do, and
450 	 * if the argument contains at least one occurrence of
451 	 * replstr, call strnsubst(), else just save the string.
452 	 * Iterations over elements of avj and tmp are done
453 	 * where appropriate.
454 	 */
455 	while (--argc) {
456 		*tmp = *avj++;
457 		if (repls && strstr(*tmp, replstr) != NULL) {
458 			strnsubst(tmp++, replstr, inpline, (size_t)255);
459 			if (repls > 0)
460 				repls--;
461 		} else {
462 			if ((*tmp = strdup(*tmp)) == NULL)
463 				err(1, NULL);
464 			tmp++;
465 		}
466 	}
467 
468 	/*
469 	 * Run it.
470 	 */
471 	*tmp = NULL;
472 	run(tmp2);
473 
474 	/*
475 	 * Walk from the tail to the head, free along the way.
476 	 */
477 	for (; tmp2 != tmp; tmp--)
478 		free(*tmp);
479 	/*
480 	 * Now free the list itself.
481 	 */
482 	free(tmp2);
483 
484 	/*
485 	 * Free the input line buffer, if we have one.
486 	 */
487 	free(inpline);
488 	inpline = NULL;
489 }
490 
491 static void
492 run(char **argv)
493 {
494 	pid_t pid;
495 	int fd;
496 	char **avec;
497 
498 	/*
499 	 * If the user wants to be notified of each command before it is
500 	 * executed, notify them.  If they want the notification to be
501 	 * followed by a prompt, then prompt them.
502 	 */
503 	if (tflag || pflag) {
504 		fprintf(stderr, "%s", *argv);
505 		for (avec = argv + 1; *avec != NULL; ++avec)
506 			fprintf(stderr, " %s", *avec);
507 		/*
508 		 * If the user has asked to be prompted, do so.
509 		 */
510 		if (pflag)
511 			/*
512 			 * If they asked not to exec, return without execution
513 			 * but if they asked to, go to the execution.  If we
514 			 * could not open their tty, break the switch and drop
515 			 * back to -t behaviour.
516 			 */
517 			switch (prompt()) {
518 			case 0:
519 				return;
520 			case 1:
521 				goto exec;
522 			case 2:
523 				break;
524 			}
525 		fprintf(stderr, "\n");
526 		fflush(stderr);
527 	}
528 exec:
529 	switch (pid = vfork()) {
530 	case -1:
531 		err(1, "vfork");
532 	case 0:
533 		if (oflag) {
534 			if ((fd = open(_PATH_TTY, O_RDONLY)) == -1) {
535 				warn("can't open /dev/tty");
536 				_exit(1);
537 			}
538 		} else {
539 			fd = open(_PATH_DEVNULL, O_RDONLY);
540 		}
541 		if (fd > STDIN_FILENO) {
542 			if (dup2(fd, STDIN_FILENO) != 0) {
543 				warn("can't dup2 to stdin");
544 				_exit(1);
545 			}
546 			close(fd);
547 		}
548 		execvp(argv[0], argv);
549 		warn("%s", argv[0]);
550 		_exit(errno == ENOENT ? 127 : 126);
551 	}
552 	curprocs++;
553 	waitchildren(*argv, 0);
554 }
555 
556 static void
557 waitchildren(const char *name, int waitall)
558 {
559 	pid_t pid;
560 	int status;
561 
562 	while ((pid = waitpid(-1, &status, !waitall && curprocs < maxprocs ?
563 	    WNOHANG : 0)) > 0) {
564 		curprocs--;
565 		/*
566 		 * According to POSIX, we have to exit if the utility exits
567 		 * with a 255 status, or is interrupted by a signal.
568 		 * We are allowed to return any exit status between 1 and
569 		 * 125 in these cases, but we'll use 124 and 125, the same
570 		 * values used by GNU xargs.
571 		 */
572 		if (WIFEXITED(status)) {
573 			if (WEXITSTATUS(status) == 255) {
574 				warnx("%s exited with status 255", name);
575 				exit(124);
576 			} else if (WEXITSTATUS(status) == 127 ||
577 			    WEXITSTATUS(status) == 126) {
578 				exit(WEXITSTATUS(status));
579 			} else if (WEXITSTATUS(status) != 0) {
580 				rval = 123;
581 			}
582 		} else if (WIFSIGNALED(status)) {
583 			if (WTERMSIG(status) != SIGPIPE) {
584 				if (WTERMSIG(status) < NSIG)
585 					warnx("%s terminated by SIG%s", name,
586 					    sys_signame[WTERMSIG(status)]);
587 				else
588 					warnx("%s terminated by signal %d",
589 					    name, WTERMSIG(status));
590 			}
591 			exit(125);
592 		}
593 	}
594 	if (pid == -1 && errno != ECHILD)
595 		err(1, "waitpid");
596 }
597 
598 /*
599  * Prompt the user about running a command.
600  */
601 static int
602 prompt(void)
603 {
604 	size_t rsize;
605 	char *response;
606 	FILE *ttyfp;
607 	int doit = 0;
608 
609 	if ((ttyfp = fopen(_PATH_TTY, "r")) == NULL)
610 		return (2);	/* Indicate that the TTY failed to open. */
611 	fprintf(stderr, "?...");
612 	fflush(stderr);
613 	response = fgetln(ttyfp, &rsize);
614 	doit = response != NULL && (*response == 'y' || *response == 'Y');
615 	fclose(ttyfp);
616 	return (doit);
617 }
618 
619 static void
620 usage(void)
621 {
622 	fprintf(stderr,
623 "usage: xargs [-0oprt] [-E eofstr] [-I replstr [-R replacements]] [-J replstr]\n"
624 "             [-L number] [-n number [-x]] [-P maxprocs] [-s size]\n"
625 "             [utility [argument ...]]\n");
626 	exit(1);
627 }
628