xref: /freebsd/usr.bin/xargs/xargs.c (revision 39beb93c)
1 /*-
2  * Copyright (c) 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * John B. Roll Jr.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  * $xMach: xargs.c,v 1.6 2002/02/23 05:27:47 tim Exp $
37  */
38 
39 #if 0
40 #ifndef lint
41 static const char copyright[] =
42 "@(#) Copyright (c) 1990, 1993\n\
43 	The Regents of the University of California.  All rights reserved.\n";
44 #endif /* not lint */
45 
46 #ifndef lint
47 static char sccsid[] = "@(#)xargs.c	8.1 (Berkeley) 6/6/93";
48 #endif /* not lint */
49 #endif
50 #include <sys/cdefs.h>
51 __FBSDID("$FreeBSD$");
52 
53 #include <sys/param.h>
54 #include <sys/wait.h>
55 
56 #include <err.h>
57 #include <errno.h>
58 #include <fcntl.h>
59 #include <langinfo.h>
60 #include <locale.h>
61 #include <paths.h>
62 #include <regex.h>
63 #include <stdio.h>
64 #include <stdlib.h>
65 #include <string.h>
66 #include <unistd.h>
67 
68 #include "pathnames.h"
69 
70 static void	parse_input(int, char *[]);
71 static void	prerun(int, char *[]);
72 static int	prompt(void);
73 static void	run(char **);
74 static void	usage(void);
75 void		strnsubst(char **, const char *, const char *, size_t);
76 static void	waitchildren(const char *, int);
77 
78 static char echo[] = _PATH_ECHO;
79 static char **av, **bxp, **ep, **endxp, **xp;
80 static char *argp, *bbp, *ebp, *inpline, *p, *replstr;
81 static const char *eofstr;
82 static int count, insingle, indouble, oflag, pflag, tflag, Rflag, rval, zflag;
83 static int cnt, Iflag, jfound, Lflag, Sflag, wasquoted, xflag;
84 static int curprocs, maxprocs;
85 
86 static volatile int childerr;
87 
88 extern char **environ;
89 
90 int
91 main(int argc, char *argv[])
92 {
93 	long arg_max;
94 	int ch, Jflag, nargs, nflag, nline;
95 	size_t linelen;
96 	char *endptr;
97 
98 	inpline = replstr = NULL;
99 	ep = environ;
100 	eofstr = "";
101 	Jflag = nflag = 0;
102 
103 	(void)setlocale(LC_ALL, "");
104 
105 	/*
106 	 * POSIX.2 limits the exec line length to ARG_MAX - 2K.  Running that
107 	 * caused some E2BIG errors, so it was changed to ARG_MAX - 4K.  Given
108 	 * that the smallest argument is 2 bytes in length, this means that
109 	 * the number of arguments is limited to:
110 	 *
111 	 *	 (ARG_MAX - 4K - LENGTH(utility + arguments)) / 2.
112 	 *
113 	 * We arbitrarily limit the number of arguments to 5000.  This is
114 	 * allowed by POSIX.2 as long as the resulting minimum exec line is
115 	 * at least LINE_MAX.  Realloc'ing as necessary is possible, but
116 	 * probably not worthwhile.
117 	 */
118 	nargs = 5000;
119 	if ((arg_max = sysconf(_SC_ARG_MAX)) == -1)
120 		errx(1, "sysconf(_SC_ARG_MAX) failed");
121 	nline = arg_max - 4 * 1024;
122 	while (*ep != NULL) {
123 		/* 1 byte for each '\0' */
124 		nline -= strlen(*ep++) + 1 + sizeof(*ep);
125 	}
126 	maxprocs = 1;
127 	while ((ch = getopt(argc, argv, "0E:I:J:L:n:oP:pR:S:s:rtx")) != -1)
128 		switch (ch) {
129 		case 'E':
130 			eofstr = optarg;
131 			break;
132 		case 'I':
133 			Jflag = 0;
134 			Iflag = 1;
135 			Lflag = 1;
136 			replstr = optarg;
137 			break;
138 		case 'J':
139 			Iflag = 0;
140 			Jflag = 1;
141 			replstr = optarg;
142 			break;
143 		case 'L':
144 			Lflag = atoi(optarg);
145 			break;
146 		case 'n':
147 			nflag = 1;
148 			if ((nargs = atoi(optarg)) <= 0)
149 				errx(1, "illegal argument count");
150 			break;
151 		case 'o':
152 			oflag = 1;
153 			break;
154 		case 'P':
155 			if ((maxprocs = atoi(optarg)) <= 0)
156 				errx(1, "max. processes must be >0");
157 			break;
158 		case 'p':
159 			pflag = 1;
160 			break;
161 		case 'R':
162 			Rflag = strtol(optarg, &endptr, 10);
163 			if (*endptr != '\0')
164 				errx(1, "replacements must be a number");
165 			break;
166 		case 'r':
167 			/* GNU compatibility */
168 			break;
169 		case 'S':
170 			Sflag = strtoul(optarg, &endptr, 10);
171 			if (*endptr != '\0')
172 				errx(1, "replsize must be a number");
173 			break;
174 		case 's':
175 			nline = atoi(optarg);
176 			break;
177 		case 't':
178 			tflag = 1;
179 			break;
180 		case 'x':
181 			xflag = 1;
182 			break;
183 		case '0':
184 			zflag = 1;
185 			break;
186 		case '?':
187 		default:
188 			usage();
189 	}
190 	argc -= optind;
191 	argv += optind;
192 
193 	if (!Iflag && Rflag)
194 		usage();
195 	if (!Iflag && Sflag)
196 		usage();
197 	if (Iflag && !Rflag)
198 		Rflag = 5;
199 	if (Iflag && !Sflag)
200 		Sflag = 255;
201 	if (xflag && !nflag)
202 		usage();
203 	if (Iflag || Lflag)
204 		xflag = 1;
205 	if (replstr != NULL && *replstr == '\0')
206 		errx(1, "replstr may not be empty");
207 
208 	/*
209 	 * Allocate pointers for the utility name, the utility arguments,
210 	 * the maximum arguments to be read from stdin and the trailing
211 	 * NULL.
212 	 */
213 	linelen = 1 + argc + nargs + 1;
214 	if ((av = bxp = malloc(linelen * sizeof(char **))) == NULL)
215 		errx(1, "malloc failed");
216 
217 	/*
218 	 * Use the user's name for the utility as argv[0], just like the
219 	 * shell.  Echo is the default.  Set up pointers for the user's
220 	 * arguments.
221 	 */
222 	if (*argv == NULL)
223 		cnt = strlen(*bxp++ = echo);
224 	else {
225 		do {
226 			if (Jflag && strcmp(*argv, replstr) == 0) {
227 				char **avj;
228 				jfound = 1;
229 				argv++;
230 				for (avj = argv; *avj; avj++)
231 					cnt += strlen(*avj) + 1;
232 				break;
233 			}
234 			cnt += strlen(*bxp++ = *argv) + 1;
235 		} while (*++argv != NULL);
236 	}
237 
238 	/*
239 	 * Set up begin/end/traversing pointers into the array.  The -n
240 	 * count doesn't include the trailing NULL pointer, so the malloc
241 	 * added in an extra slot.
242 	 */
243 	endxp = (xp = bxp) + nargs;
244 
245 	/*
246 	 * Allocate buffer space for the arguments read from stdin and the
247 	 * trailing NULL.  Buffer space is defined as the default or specified
248 	 * space, minus the length of the utility name and arguments.  Set up
249 	 * begin/end/traversing pointers into the array.  The -s count does
250 	 * include the trailing NULL, so the malloc didn't add in an extra
251 	 * slot.
252 	 */
253 	nline -= cnt;
254 	if (nline <= 0)
255 		errx(1, "insufficient space for command");
256 
257 	if ((bbp = malloc((size_t)(nline + 1))) == NULL)
258 		errx(1, "malloc failed");
259 	ebp = (argp = p = bbp) + nline - 1;
260 	for (;;)
261 		parse_input(argc, argv);
262 }
263 
264 static void
265 parse_input(int argc, char *argv[])
266 {
267 	int ch, foundeof;
268 	char **avj;
269 
270 	foundeof = 0;
271 
272 	switch (ch = getchar()) {
273 	case EOF:
274 		/* No arguments since last exec. */
275 		if (p == bbp) {
276 			waitchildren(*argv, 1);
277 			exit(rval);
278 		}
279 		goto arg1;
280 	case ' ':
281 	case '\t':
282 		/* Quotes escape tabs and spaces. */
283 		if (insingle || indouble || zflag)
284 			goto addch;
285 		goto arg2;
286 	case '\0':
287 		if (zflag) {
288 			/*
289 			 * Increment 'count', so that nulls will be treated
290 			 * as end-of-line, as well as end-of-argument.  This
291 			 * is needed so -0 works properly with -I and -L.
292 			 */
293 			count++;
294 			goto arg2;
295 		}
296 		goto addch;
297 	case '\n':
298 		if (zflag)
299 			goto addch;
300 		count++;	    /* Indicate end-of-line (used by -L) */
301 
302 		/* Quotes do not escape newlines. */
303 arg1:		if (insingle || indouble)
304 			errx(1, "unterminated quote");
305 arg2:
306 		foundeof = *eofstr != '\0' &&
307 		    strncmp(argp, eofstr, p - argp) == 0;
308 
309 		/* Do not make empty args unless they are quoted */
310 		if ((argp != p || wasquoted) && !foundeof) {
311 			*p++ = '\0';
312 			*xp++ = argp;
313 			if (Iflag) {
314 				size_t curlen;
315 
316 				if (inpline == NULL)
317 					curlen = 0;
318 				else {
319 					/*
320 					 * If this string is not zero
321 					 * length, append a space for
322 					 * separation before the next
323 					 * argument.
324 					 */
325 					if ((curlen = strlen(inpline)))
326 						strcat(inpline, " ");
327 				}
328 				curlen++;
329 				/*
330 				 * Allocate enough to hold what we will
331 				 * be holding in a second, and to append
332 				 * a space next time through, if we have
333 				 * to.
334 				 */
335 				inpline = realloc(inpline, curlen + 2 +
336 				    strlen(argp));
337 				if (inpline == NULL)
338 					errx(1, "realloc failed");
339 				if (curlen == 1)
340 					strcpy(inpline, argp);
341 				else
342 					strcat(inpline, argp);
343 			}
344 		}
345 
346 		/*
347 		 * If max'd out on args or buffer, or reached EOF,
348 		 * run the command.  If xflag and max'd out on buffer
349 		 * but not on args, object.  Having reached the limit
350 		 * of input lines, as specified by -L is the same as
351 		 * maxing out on arguments.
352 		 */
353 		if (xp == endxp || p > ebp || ch == EOF ||
354 		    (Lflag <= count && xflag) || foundeof) {
355 			if (xflag && xp != endxp && p > ebp)
356 				errx(1, "insufficient space for arguments");
357 			if (jfound) {
358 				for (avj = argv; *avj; avj++)
359 					*xp++ = *avj;
360 			}
361 			prerun(argc, av);
362 			if (ch == EOF || foundeof) {
363 				waitchildren(*argv, 1);
364 				exit(rval);
365 			}
366 			p = bbp;
367 			xp = bxp;
368 			count = 0;
369 		}
370 		argp = p;
371 		wasquoted = 0;
372 		break;
373 	case '\'':
374 		if (indouble || zflag)
375 			goto addch;
376 		insingle = !insingle;
377 		wasquoted = 1;
378 		break;
379 	case '"':
380 		if (insingle || zflag)
381 			goto addch;
382 		indouble = !indouble;
383 		wasquoted = 1;
384 		break;
385 	case '\\':
386 		if (zflag)
387 			goto addch;
388 		/* Backslash escapes anything, is escaped by quotes. */
389 		if (!insingle && !indouble && (ch = getchar()) == EOF)
390 			errx(1, "backslash at EOF");
391 		/* FALLTHROUGH */
392 	default:
393 addch:		if (p < ebp) {
394 			*p++ = ch;
395 			break;
396 		}
397 
398 		/* If only one argument, not enough buffer space. */
399 		if (bxp == xp)
400 			errx(1, "insufficient space for argument");
401 		/* Didn't hit argument limit, so if xflag object. */
402 		if (xflag)
403 			errx(1, "insufficient space for arguments");
404 
405 		if (jfound) {
406 			for (avj = argv; *avj; avj++)
407 				*xp++ = *avj;
408 		}
409 		prerun(argc, av);
410 		xp = bxp;
411 		cnt = ebp - argp;
412 		memcpy(bbp, argp, (size_t)cnt);
413 		p = (argp = bbp) + cnt;
414 		*p++ = ch;
415 		break;
416 	}
417 }
418 
419 /*
420  * Do things necessary before run()'ing, such as -I substitution,
421  * and then call run().
422  */
423 static void
424 prerun(int argc, char *argv[])
425 {
426 	char **tmp, **tmp2, **avj;
427 	int repls;
428 
429 	repls = Rflag;
430 
431 	if (argc == 0 || repls == 0) {
432 		*xp = NULL;
433 		run(argv);
434 		return;
435 	}
436 
437 	avj = argv;
438 
439 	/*
440 	 * Allocate memory to hold the argument list, and
441 	 * a NULL at the tail.
442 	 */
443 	tmp = malloc((argc + 1) * sizeof(char**));
444 	if (tmp == NULL)
445 		errx(1, "malloc failed");
446 	tmp2 = tmp;
447 
448 	/*
449 	 * Save the first argument and iterate over it, we
450 	 * cannot do strnsubst() to it.
451 	 */
452 	if ((*tmp++ = strdup(*avj++)) == NULL)
453 		errx(1, "strdup failed");
454 
455 	/*
456 	 * For each argument to utility, if we have not used up
457 	 * the number of replacements we are allowed to do, and
458 	 * if the argument contains at least one occurrence of
459 	 * replstr, call strnsubst(), else just save the string.
460 	 * Iterations over elements of avj and tmp are done
461 	 * where appropriate.
462 	 */
463 	while (--argc) {
464 		*tmp = *avj++;
465 		if (repls && strstr(*tmp, replstr) != NULL) {
466 			strnsubst(tmp++, replstr, inpline, (size_t)Sflag);
467 			if (repls > 0)
468 				repls--;
469 		} else {
470 			if ((*tmp = strdup(*tmp)) == NULL)
471 				errx(1, "strdup failed");
472 			tmp++;
473 		}
474 	}
475 
476 	/*
477 	 * Run it.
478 	 */
479 	*tmp = NULL;
480 	run(tmp2);
481 
482 	/*
483 	 * Walk from the tail to the head, free along the way.
484 	 */
485 	for (; tmp2 != tmp; tmp--)
486 		free(*tmp);
487 	/*
488 	 * Now free the list itself.
489 	 */
490 	free(tmp2);
491 
492 	/*
493 	 * Free the input line buffer, if we have one.
494 	 */
495 	if (inpline != NULL) {
496 		free(inpline);
497 		inpline = NULL;
498 	}
499 }
500 
501 static void
502 run(char **argv)
503 {
504 	pid_t pid;
505 	int fd;
506 	char **avec;
507 
508 	/*
509 	 * If the user wants to be notified of each command before it is
510 	 * executed, notify them.  If they want the notification to be
511 	 * followed by a prompt, then prompt them.
512 	 */
513 	if (tflag || pflag) {
514 		(void)fprintf(stderr, "%s", *argv);
515 		for (avec = argv + 1; *avec != NULL; ++avec)
516 			(void)fprintf(stderr, " %s", *avec);
517 		/*
518 		 * If the user has asked to be prompted, do so.
519 		 */
520 		if (pflag)
521 			/*
522 			 * If they asked not to exec, return without execution
523 			 * but if they asked to, go to the execution.  If we
524 			 * could not open their tty, break the switch and drop
525 			 * back to -t behaviour.
526 			 */
527 			switch (prompt()) {
528 			case 0:
529 				return;
530 			case 1:
531 				goto exec;
532 			case 2:
533 				break;
534 			}
535 		(void)fprintf(stderr, "\n");
536 		(void)fflush(stderr);
537 	}
538 exec:
539 	childerr = 0;
540 	switch (pid = vfork()) {
541 	case -1:
542 		err(1, "vfork");
543 	case 0:
544 		if (oflag) {
545 			if ((fd = open(_PATH_TTY, O_RDONLY)) == -1)
546 				err(1, "can't open /dev/tty");
547 		} else {
548 			fd = open(_PATH_DEVNULL, O_RDONLY);
549 		}
550 		if (fd > STDIN_FILENO) {
551 			if (dup2(fd, STDIN_FILENO) != 0)
552 				err(1, "can't dup2 to stdin");
553 			close(fd);
554 		}
555 		execvp(argv[0], argv);
556 		childerr = errno;
557 		_exit(1);
558 	}
559 	curprocs++;
560 	waitchildren(*argv, 0);
561 }
562 
563 static void
564 waitchildren(const char *name, int waitall)
565 {
566 	pid_t pid;
567 	int status;
568 
569 	while ((pid = waitpid(-1, &status, !waitall && curprocs < maxprocs ?
570 	    WNOHANG : 0)) > 0) {
571 		curprocs--;
572 		/* If we couldn't invoke the utility, exit. */
573 		if (childerr != 0) {
574 			errno = childerr;
575 			err(errno == ENOENT ? 127 : 126, "%s", name);
576 		}
577 		/*
578 		 * If utility signaled or exited with a value of 255,
579 		 * exit 1-125.
580 		 */
581 		if (WIFSIGNALED(status) || WEXITSTATUS(status) == 255)
582 			exit(1);
583 		if (WEXITSTATUS(status))
584 			rval = 1;
585 	}
586 	if (pid == -1 && errno != ECHILD)
587 		err(1, "wait3");
588 }
589 
590 /*
591  * Prompt the user about running a command.
592  */
593 static int
594 prompt(void)
595 {
596 	regex_t cre;
597 	size_t rsize;
598 	int match;
599 	char *response;
600 	FILE *ttyfp;
601 
602 	if ((ttyfp = fopen(_PATH_TTY, "r")) == NULL)
603 		return (2);	/* Indicate that the TTY failed to open. */
604 	(void)fprintf(stderr, "?...");
605 	(void)fflush(stderr);
606 	if ((response = fgetln(ttyfp, &rsize)) == NULL ||
607 	    regcomp(&cre, nl_langinfo(YESEXPR), REG_BASIC) != 0) {
608 		(void)fclose(ttyfp);
609 		return (0);
610 	}
611 	response[rsize - 1] = '\0';
612 	match = regexec(&cre, response, 0, NULL, 0);
613 	(void)fclose(ttyfp);
614 	regfree(&cre);
615 	return (match == 0);
616 }
617 
618 static void
619 usage(void)
620 {
621 	fprintf(stderr,
622 "usage: xargs [-0opt] [-E eofstr] [-I replstr [-R replacements] [-S replsize]]\n"
623 "             [-J replstr] [-L number] [-n number [-x]] [-P maxprocs]\n"
624 "             [-s size] [utility [argument ...]]\n");
625 	exit(1);
626 }
627