xref: /illumos-gate/usr/src/cmd/xargs/xargs.c (revision 4bc0a2ef)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #include <stdio.h>
34 #include <sys/types.h>
35 #include <sys/wait.h>
36 #include <unistd.h>
37 #include <fcntl.h>
38 #include <string.h>
39 #include <stdarg.h>
40 #include <libgen.h>
41 #include <stdlib.h>
42 #include <limits.h>
43 #include <wchar.h>
44 #include <locale.h>
45 #include <langinfo.h>
46 #include <stropts.h>
47 #include <poll.h>
48 #include <errno.h>
49 #include <stdarg.h>
50 
51 #define	HEAD	0
52 #define	TAIL	1
53 #define	FALSE 0
54 #define	TRUE 1
55 #define	MAXSBUF 255
56 #define	MAXIBUF 512
57 #define	MAXINSERTS 5
58 #define	BUFSIZE LINE_MAX
59 #define	MAXARGS 255
60 #define	INSPAT_STR	"{}"	/* default replstr string for -[Ii]	*/
61 #define	FORK_RETRY	5
62 
63 #define	QBUF_STARTLEN 255  /* start size of growable string buffer */
64 #define	QBUF_INC 100	   /* how much to grow a growable string by */
65 
66 static wctype_t	blank;
67 static char	*arglist[MAXARGS+1];
68 static char	argbuf[BUFSIZE+1];
69 static char	*next = argbuf;
70 static char	*lastarg = "";
71 static char	**ARGV = arglist;
72 static char	*LEOF = "_";
73 static char	*INSPAT = INSPAT_STR;
74 static char	ins_buf[MAXIBUF];
75 static char	*p_ibuf;
76 
77 static struct inserts {
78 	char	**p_ARGV;	/* where to put newarg ptr in arg list */
79 	char	*p_skel;	/* ptr to arg template */
80 } saveargv[MAXINSERTS];
81 
82 static off_t	file_offset = 0;
83 static int	PROMPT = -1;
84 static int	BUFLIM = BUFSIZE;
85 static int	N_ARGS = 0;
86 static int	N_args = 0;
87 static int	N_lines = 0;
88 static int	DASHX = FALSE;
89 static int	MORE = TRUE;
90 static int	PER_LINE = FALSE;
91 static int	ERR = FALSE;
92 static int	OK = TRUE;
93 static int	LEGAL = FALSE;
94 static int	TRACE = FALSE;
95 static int	INSERT = FALSE;
96 static int	linesize = 0;
97 static int	ibufsize = 0;
98 static char	*yesstr;	/* the string contains int'l for "yes"	*/
99 static int	exitstat = 0;	/* our exit status			*/
100 static int	mac;		/* modified argc, after parsing		*/
101 static char	**mav;		/* modified argv, after parsing		*/
102 static int	n_inserts;	/* # of insertions.			*/
103 static int	inquote = 0;	/* processing a quoted string		*/
104 
105 /*
106  * the pio structure is used to save any pending input before the
107  * user replies to a prompt. the pending input is saved here,
108  * for the appropriate processing later.
109  */
110 typedef struct pio {
111 	struct pio *next;	/* next in stack			*/
112 	char *start;		/* starting addr of the buffer		*/
113 	char *cur;		/* ptr to current char in buf		*/
114 	size_t length;		/* number of bytes remaining		*/
115 } pio;
116 
117 static pio *queued_data = NULL;
118 
119 /* our usage message:							*/
120 #define	USAGEMSG "Usage: xargs: [-t] [-p] [-e[eofstr]] [-E eofstr] "\
121 	"[-I replstr] [-i[replstr]] [-L #] [-l[#]] [-n # [-x]] [-s size] "\
122 	"[cmd [args ...]]\n"
123 
124 static int	echoargs();
125 static int	getchr(void);
126 static wchar_t	getwchr(void);
127 static void	ungetwchr(wchar_t);
128 static int	lcall(char *sub, char **subargs);
129 static int	xindex(char *as1, char *as2);
130 static void	addibuf(struct inserts *p);
131 static void	ermsg(char *messages, ...);
132 static char	*addarg(char *arg);
133 static char	*checklen(char *arg);
134 static size_t   store_wchr(char **, size_t *, size_t, wchar_t);
135 static char	*getarg();
136 static char	*insert(char *pattern, char *subst);
137 static void	usage();
138 static void	parseargs();
139 static void	saveinput();
140 
141 
142 int
143 main(int argc, char **argv)
144 {
145 	int	j;
146 	struct inserts *psave;
147 	int c;
148 	int	initsize;
149 	char	*cmdname, *initbuf, **initlist;
150 
151 
152 	/* initialization */
153 
154 	blank = wctype("blank");
155 	n_inserts = 0;
156 	psave = saveargv;
157 	(void) setlocale(LC_ALL, "");
158 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D 		*/
159 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't 		*/
160 #endif
161 	(void) textdomain(TEXT_DOMAIN);
162 
163 	/*
164 	 * now we get the appropriate "yes" string for our locale.
165 	 * since this may be a multibyte character, we store the
166 	 * string which is returned. later on, when we're looking for
167 	 * a "y" in response to our prompt, we'll use the first
168 	 * multibyte character of yesstr as a comparision.
169 	 */
170 	initbuf = nl_langinfo(YESSTR);	/* initbuf is a tmp placeholder here */
171 	if ((yesstr = malloc(strlen(initbuf) + 1)) == NULL) {
172 		perror(gettext("xargs: Memory allocation failure"));
173 		exit(1);
174 	}
175 	(void) strcpy(yesstr, initbuf);
176 
177 	parseargs(argc, argv);
178 
179 	/* handling all of xargs arguments:				*/
180 	while ((c = getopt(mac, mav, "tpe:E:I:i:L:l:n:s:x")) != EOF) {
181 		switch (c) {
182 		case 't':	/* -t: turn trace mode on		*/
183 			TRACE = TRUE;
184 			break;
185 
186 		case 'p':	/* -p: turn on prompt mode.		*/
187 			if ((PROMPT = open("/dev/tty", O_RDONLY)) == -1) {
188 				perror(gettext("can't read from tty for -p"));
189 			} else {
190 				TRACE = TRUE;
191 			}
192 			break;
193 
194 		case 'e':
195 			/*
196 			 * -e[eofstr]: set/disable end-of-file.
197 			 * N.B. that an argument *isn't* required here; but
198 			 * parseargs forced an argument if not was given.  The
199 			 * forced argument is the default...
200 			 */
201 			LEOF = optarg; /* can be empty */
202 			break;
203 
204 		case 'E':
205 			/*
206 			 * -E eofstr: change end-of-file string.
207 			 * eofstr *is* required here:
208 			 */
209 			LEOF = optarg;
210 #ifdef XPG6
211 			if (LEOF == NULL) {
212 #else
213 			if ((LEOF == NULL) || (*LEOF == NULL)) {
214 #endif
215 				ermsg(gettext(
216 				    "Option requires an argument: -%c\n"), c);
217 			}
218 			break;
219 
220 		case 'I':
221 			/* -I replstr: Insert mode. replstr *is* required. */
222 			INSERT = PER_LINE = LEGAL = TRUE;
223 			N_ARGS = 0;
224 			if ((optarg != NULL) && (*optarg != '\0')) {
225 				INSPAT = optarg;
226 			} else {
227 				ermsg(gettext(
228 				    "Option requires an argument: -%c\n"), c);
229 			}
230 			break;
231 
232 		case 'i':
233 			/*
234 			 * -i [replstr]: insert mode, with *optional* replstr.
235 			 * N.B. that an argument *isn't* required here; if
236 			 * it's not given, then the string INSPAT_STR will
237 			 * be assumed.
238 			 *
239 			 * Since getopts(3C) doesn't handle the case of an
240 			 * optional variable argument at all, we have to
241 			 * parse this by hand:
242 			 */
243 
244 			INSERT = PER_LINE = LEGAL = TRUE;
245 			N_ARGS = 0;
246 			if ((optarg != NULL) && (*optarg != '\0')) {
247 				INSPAT = optarg;
248 			} else {
249 				/*
250 				 * here, there is no next argument. so
251 				 * we reset INSPAT to the INSPAT_STR.
252 				 * we *have* to do this, as -i/I may have
253 				 * been given previously, and XCU4 requires
254 				 * that only "the last one specified takes
255 				 * effect".
256 				 */
257 				INSPAT = INSPAT_STR;
258 			}
259 			break;
260 
261 		case 'L':
262 			/*
263 			 * -L number: # of times cmd is executed
264 			 * number *is* required here:
265 			 */
266 			PER_LINE = TRUE;
267 			N_ARGS = 0;
268 			INSERT = FALSE;
269 			if ((optarg == NULL) || (*optarg == '\0')) {
270 				ermsg(gettext(
271 				    "Option requires an argument: -%c\n"), c);
272 			} else if ((PER_LINE = atoi(optarg)) <= 0) {
273 				ermsg(gettext("#lines must be positive "
274 				    "int: %s\n"), optarg);
275 			}
276 			break;
277 
278 		case 'l':
279 			/*
280 			 * -l [number]: # of times cmd is executed
281 			 * N.B. that an argument *isn't* required here; if
282 			 * it's not given, then 1 is assumed.
283 			 *
284 			 * parseargs handles the optional arg processing.
285 			 */
286 
287 			PER_LINE = LEGAL = TRUE;  /* initialization	*/
288 			N_ARGS = 0;
289 			INSERT = FALSE;
290 
291 			if ((optarg != NULL) && (*optarg != '\0')) {
292 				if ((PER_LINE = atoi(optarg)) <= 0)
293 					PER_LINE = 1;
294 			}
295 			break;
296 
297 		case 'n':	/* -n number: # stdin args		*/
298 			/*
299 			 * -n number: # stdin args.
300 			 * number *is* required here:
301 			 */
302 			if ((optarg == NULL) || (*optarg == '\0')) {
303 				ermsg(gettext(
304 				    "Option requires an argument: -%c\n"), c);
305 			} else if ((N_ARGS = atoi(optarg)) <= 0) {
306 				ermsg(gettext("#args must be positive "
307 				    "int: %s\n"), optarg);
308 			} else {
309 				LEGAL = DASHX || N_ARGS == 1;
310 				INSERT = PER_LINE = FALSE;
311 			}
312 			break;
313 
314 		case 's':	/* -s size: set max size of each arg list */
315 			if ((optarg == NULL) || (*optarg == '\0')) {
316 				ermsg(gettext(
317 				    "Option requires an argument: -%c\n"), c);
318 			} else {
319 				BUFLIM = atoi(optarg);
320 				if (BUFLIM > BUFSIZE || BUFLIM <= 0) {
321 					ermsg(gettext(
322 					    "0 < max-cmd-line-size <= %d: "
323 					    "%s\n"), BUFSIZE, optarg);
324 				}
325 			}
326 			break;
327 
328 		case 'x':	/* -x: terminate if args > size limit	*/
329 			DASHX = LEGAL = TRUE;
330 			break;
331 
332 		default:
333 			/*
334 			 * bad argument. complain and get ready to die.
335 			 */
336 			ERR = TRUE;
337 			usage();
338 
339 			exit(2);
340 			break;
341 		}
342 	}
343 
344 	/*
345 	 * if anything called ermsg(), something screwed up, so
346 	 * we exit early.
347 	 */
348 	if (OK == FALSE) {
349 		ERR = TRUE;
350 		usage();
351 		exit(2);
352 	}
353 
354 	/*
355 	 * we're finished handling xargs's options, so now pick up
356 	 * the command name (if any), and it's options.
357 	 */
358 
359 
360 	mac -= optind;	/* dec arg count by what we've processed 	*/
361 	mav += optind;	/* inc to current mav				*/
362 
363 	if (mac <= 0) {	/* if there're no more args to process,	*/
364 		cmdname = "/usr/bin/echo";	/* our default command	*/
365 		*ARGV++ = addarg(cmdname);	/* use the default cmd.	*/
366 	} else {	/* otherwise keep parsing rest of the string.	*/
367 		/*
368 		 * note that we can't use getopts(3C), and *must* parse
369 		 * this by hand, as we don't know apriori what options the
370 		 * command will take.
371 		 */
372 		cmdname = *mav;	/* get the command name	*/
373 
374 
375 		/* pick up the remaining args from the command line:	*/
376 		while ((OK == TRUE) && (mac-- > 0)) {
377 			/*
378 			 * while we haven't crapped out, and there's
379 			 * work to do:
380 			 */
381 			if (INSERT && ! ERR) {
382 				if (xindex(*mav, INSPAT) != -1) {
383 					if (++n_inserts > MAXINSERTS) {
384 						ermsg(gettext("too many args "
385 						    "with %s\n"), INSPAT);
386 						ERR = TRUE;
387 					}
388 					psave->p_ARGV = ARGV;
389 					(psave++)->p_skel = *mav;
390 				}
391 			}
392 			*ARGV++ = addarg(*mav++);
393 		}
394 	}
395 
396 	/* pick up args from standard input */
397 
398 	initbuf = next;
399 	initlist = ARGV;
400 	initsize = linesize;
401 
402 	while (OK && MORE) {
403 		N_args = 0;
404 		N_lines = 0;
405 		next = initbuf;
406 		ARGV = initlist;
407 		linesize = initsize;
408 		if (*lastarg) {
409 			*ARGV++ = addarg(lastarg);
410 			lastarg = "";
411 		}
412 
413 		while (((ARGV - arglist) < MAXARGS) &&
414 		    ((*ARGV++ = getarg()) != NULL) && OK)
415 			;
416 
417 		/* insert arg if requested */
418 
419 		if (!ERR && INSERT) {
420 			if ((!MORE) && (N_lines == 0)) {
421 				exit(exitstat);
422 			}
423 					/* no more input lines */
424 			p_ibuf = ins_buf;
425 			ARGV--;
426 			j = ibufsize = 0;
427 			for (psave = saveargv; ++j <= n_inserts; ++psave) {
428 				addibuf(psave);
429 				if (ERR)
430 					break;
431 			}
432 		}
433 		*ARGV = 0;
434 
435 		if (n_inserts > 0) {
436 			int t_ninserts;
437 
438 			/*
439 			 * if we've done any insertions, re-calculate the
440 			 * linesize. bomb out if we've exceeded our length.
441 			 */
442 			t_ninserts = n_inserts;
443 			n_inserts = 0;	/* inserts have been done 	*/
444 			linesize = 0;	/* recalculate this		*/
445 
446 			/* for each current argument in the list:	*/
447 			for (ARGV = arglist; *ARGV != NULL; ARGV++) {
448 				/* recalculate everything.		*/
449 				if (checklen(*ARGV) != 0) {
450 					if (N_ARGS && (N_args >= N_ARGS)) {
451 						N_lines = N_args = 0;
452 						OK = FALSE;
453 						ERR = TRUE;
454 					}
455 				}
456 			}
457 			n_inserts = t_ninserts;
458 		}
459 
460 		/* exec command */
461 
462 		if (!ERR) {
463 			if (!MORE &&
464 			    (PER_LINE && N_lines == 0 || N_ARGS && N_args == 0))
465 				exit(exitstat);
466 			OK = TRUE;
467 			j = TRACE ? echoargs() : TRUE;
468 			if (j) {
469 				/*
470 				 * for xcu4, all invocations of cmdname must
471 				 * return 0, in order for us to return 0.
472 				 * so if we have a non-zero status here,
473 				 * quit immediately.
474 				 */
475 				if ((exitstat |= lcall(cmdname, arglist)) == 0)
476 					continue;
477 			}
478 		}
479 	}
480 
481 	(void) lseek(0, file_offset, SEEK_SET);
482 	if (OK) {
483 		return (exitstat);
484 	} else {
485 		/*
486 		 * if exitstat was set, to match XCU4 complience,
487 		 * return that value, otherwise, return 1.
488 		 */
489 		return (exitstat ? exitstat : 1);
490 	}
491 }
492 
493 static void
494 queue(char *buffer, int len, int where)
495 {
496 	pio *new, *element;
497 
498 	if ((new = malloc(sizeof (pio))) == NULL) {
499 		perror(gettext("xargs: Memory allocation failure"));
500 		exit(1);
501 	}
502 	new->cur = new->start = buffer;
503 	new->length = len;
504 
505 	if (where == TAIL) {
506 		new->next = NULL;
507 		if (queued_data == NULL) {
508 			queued_data = new;
509 		} else {
510 			element = queued_data;
511 			while (element->next != NULL) {
512 				element = element->next;
513 			}
514 			element->next = new;
515 		}
516 	} else {
517 		file_offset -= len;
518 		new->next = queued_data;
519 		queued_data = new;
520 	}
521 }
522 
523 static char *
524 checklen(char *arg)
525 {
526 	int	oklen;
527 
528 	oklen = TRUE;
529 	linesize += strlen(arg) + 1;
530 	if (linesize >= BUFLIM) {
531 		/*
532 		 * we skip this if there're inserts. we'll handle the
533 		 * argument counting after all the insertions have
534 		 * been done.
535 		 */
536 		if (n_inserts == 0) {
537 			lastarg = arg;
538 			oklen = OK = FALSE;
539 
540 			if (LEGAL) {
541 				ERR = TRUE;
542 				ermsg(gettext("arg list too long\n"));
543 			} else if (N_args > 1) {
544 				N_args = 1;
545 			} else {
546 				ermsg(gettext("a single arg was greater than "
547 				    "the max arglist size of %d characters\n"),
548 				    BUFLIM);
549 				ERR = TRUE;
550 			}
551 		}
552 	}
553 	return (oklen ? arg : 0);
554 }
555 
556 static char *
557 addarg(char *arg)
558 {
559 	if (checklen(arg) != 0) {
560 		(void) strcpy(next, arg);
561 		arg = next;
562 		next += strlen(arg) + 1;
563 		return (arg);
564 	}
565 	return ((char *)0);
566 }
567 
568 /*
569  * store_wchr() : append a wchar_t to a char buffer, resize buffer if required.
570  *
571  *     Given a pointer to the beginning of a string buffer, the length of the
572  *     buffer and an offset indicating the next place to write within that
573  *     buffer, the passed wchar_t will be appended to the buffer if there is
574  *     enough space. If there is not enough space, an attempt to reallocate the
575  *     buffer will be made and if successful the passed pointer and size will be
576  *     updated to describe the reallocated block. Returns the new value for
577  *     'offset' (it will be incremented by the number of bytes written).
578  */
579 static size_t
580 store_wchr(char **buffer, size_t *buflen, size_t offset, wchar_t c)
581 {
582 	int bytes;
583 
584 	/*
585 	 * Make sure that there is enough room in the buffer to store the
586 	 * maximum length of c.
587 	 */
588 	if ((offset + MB_CUR_MAX) > *buflen) {
589 		/*
590 		 * Not enough room so attempt to reallocate. Add 'MB_CUR_MAX' to
591 		 * buffer length to ensure that there is always enough room to
592 		 * store 'c' if realloc succeeds, no matter what QBUF_INC is
593 		 * defined as.
594 		 */
595 		*buflen += (QBUF_INC + MB_CUR_MAX);
596 		if ((*buffer = realloc(*buffer, *buflen)) == NULL) {
597 			perror(gettext("xargs: Memory allocation failure"));
598 			exit(1);
599 		}
600 	}
601 	/* store bytes from wchar into buffer */
602 	bytes = wctomb(*buffer + offset, c);
603 	if (bytes == -1) {
604 		/* char was invalid */
605 		bytes = 1;
606 		*(*buffer + offset) = (char)c;
607 	}
608 
609 	/* return new value for offset */
610 	return (offset + bytes);
611 }
612 
613 static char *
614 getarg()
615 {
616 	int	bytes;
617 	wchar_t	c;
618 	char	*arg;
619 	char	*retarg, *requeue_buf;
620 	size_t  requeue_offset = 0, requeue_len;
621 	char	mbc[MB_LEN_MAX];
622 
623 	while (iswspace(c = getwchr()) || c == '\n')
624 		;
625 
626 	if (c == '\0') {
627 		MORE = FALSE;
628 		return (0);
629 	}
630 
631 	/*
632 	 * While we are reading in an argument, it is possible that we will
633 	 * reach the maximum length of the overflow buffer and we'll have to
634 	 * requeue what we have read so far. To handle this we allocate an
635 	 * initial buffer here which will keep an unprocessed copy of the data
636 	 * that we read in (this buffer will grow as required).
637 	 */
638 	requeue_len = (size_t)QBUF_STARTLEN;
639 	if ((requeue_buf = (char *)malloc(requeue_len)) == NULL) {
640 		perror(gettext("xargs: Memory allocation failure"));
641 		exit(1);
642 	}
643 
644 	for (arg = next; ; c = getwchr()) {
645 		bytes = wctomb(mbc, c);
646 
647 		/*
648 		 * Store the char that we have read before processing it in case
649 		 * the current argument needs to be requeued.
650 		 */
651 		requeue_offset = store_wchr(&requeue_buf, &requeue_len,
652 		    requeue_offset, c);
653 
654 		/* Check for overflow the input buffer */
655 		if ((next + ((bytes == -1) ? 1 : bytes)) >= &argbuf[BUFLIM]) {
656 			/*
657 			 * It's only an error if there are no Args in buffer
658 			 * already.
659 			 */
660 			if ((N_ARGS || PER_LINE) && LEGAL) {
661 				ERR = TRUE;
662 				ermsg(gettext("Argument list too long\n"));
663 				free(requeue_buf);
664 				return (0);
665 			} else if (N_args == 0) {
666 				lastarg = "";
667 				ERR = TRUE;
668 				ermsg(gettext("A single arg was greater than "
669 				    "the max arglist size of %d characters\n"),
670 				    BUFSIZE);
671 				free(requeue_buf);
672 				return (0);
673 			}
674 			/*
675 			 * Otherwise we put back the current argument
676 			 * and use what we have collected so far...
677 			 */
678 			queue(requeue_buf, requeue_offset, HEAD);
679 			/* reset inquote because we have requeued the quotes */
680 			inquote = 0;
681 			return (NULL);
682 		}
683 
684 
685 		if (iswctype(c, blank) && inquote == 0) {
686 			if (INSERT) {
687 				if (bytes == -1) {
688 					*next++ = (char)c;
689 				} else {
690 					(void) wctomb(next, c);
691 					next += bytes;
692 				}
693 				continue;
694 			}
695 
696 			/* skip over trailing whitespace till next arg */
697 			while (iswctype((c = getwchr()), blank) &&
698 			    (c != '\n') && (c != '\0'))
699 				;
700 
701 			/*
702 			 * if there was space till end of line then the last
703 			 * character was really a newline...
704 			 */
705 			if (c == L'\n' || c == L'\0') {
706 				ungetwchr(L'\n');
707 			} else {
708 				/* later code needs to know this was a space */
709 				ungetwchr(c);
710 				c = L' ';
711 			}
712 			goto end_arg;
713 		}
714 		switch (c) {
715 		case L'\0':
716 		case L'\n':
717 			if (inquote) {
718 				*next++ = '\0';
719 				ermsg(gettext("Missing quote: %s\n"), arg);
720 				ERR = TRUE;
721 				free(requeue_buf);
722 				return (0);
723 			}
724 
725 			N_lines++;
726 end_arg:		*next++ = '\0';
727 			/* we finished without requeuing so free requeue_buf */
728 			free(requeue_buf);
729 			if (strcmp(arg, LEOF) == 0 || (c == '\0' &&
730 			    strlen(arg) == 0)) {
731 				MORE = FALSE;
732 				/* absorb the rest of the line */
733 				if ((c != '\n') && (c != '\0'))
734 					while (c = getwchr())
735 						if ((c == '\n') || (c == '\0'))
736 							break;
737 				return (0);
738 			} else {
739 				++N_args;
740 				if (retarg = checklen(arg)) {
741 					if ((PER_LINE &&
742 					    N_lines >= PER_LINE &&
743 					    (c == '\0' || c == '\n')) ||
744 					    (N_ARGS && N_args >= N_ARGS)) {
745 						N_lines = N_args = 0;
746 						lastarg = "";
747 						OK = FALSE;
748 					}
749 				}
750 				return (retarg);
751 			}
752 
753 		case '"':
754 			if (inquote == 1)	/* in single quoted string */
755 				goto is_default;
756 			if (inquote == 2)	/* terminating double quote */
757 				inquote = 0;
758 			else			/* starting quoted string */
759 				inquote = 2;
760 			break;
761 
762 		case '\'':
763 			if (inquote == 2)	/* in double quoted string */
764 				goto is_default;
765 			if (inquote == 1)	/* terminating single quote */
766 				inquote = 0;
767 			else			/* starting quoted string */
768 				inquote = 1;
769 			break;
770 
771 		case L'\\':
772 			c = getwchr();
773 			/* store quoted char for potential requeueing */
774 			requeue_offset = store_wchr(&requeue_buf, &requeue_len,
775 			    requeue_offset, c);
776 
777 		default:
778 is_default:		if (bytes == -1) {
779 				*next++ = (char)c;
780 			} else {
781 				(void) wctomb(next, c);
782 				next += bytes;
783 			}
784 			break;
785 		}
786 	}
787 }
788 
789 
790 /*
791  * ermsg():	print out an error message, and indicate failure globally.
792  *
793  *	Assumes that message has already been gettext()'d. It would be
794  *	nice if we could just do the gettext() here, but we can't, since
795  *	since xgettext(1M) wouldn't be able to pick up our error message.
796  */
797 /* PRINTFLIKE1 */
798 static void
799 ermsg(char *messages, ...)
800 {
801 	va_list	ap;
802 
803 	va_start(ap, messages);
804 
805 	(void) fprintf(stderr, "xargs: ");
806 	(void) vfprintf(stderr, messages, ap);
807 
808 	va_end(ap);
809 	OK = FALSE;
810 }
811 
812 
813 /*
814  * Function: int rpmatch(char *)
815  *
816  * Description:
817  *
818  *	Internationalized get yes / no answer.
819  *
820  * Inputs:
821  *	s	-> Pointer to answer to compare against.
822  *
823  * Returns:
824  *	TRUE	-> Answer was affirmative
825  *	FALSE	-> Answer was negative
826  */
827 
828 static int
829 rpmatch(char *s)
830 {
831 	static char	*default_yesexpr = "^[Yy].*";
832 	static char	*compiled_yesexpr = (char *)NULL;
833 
834 	/* Execute once to initialize */
835 	if (compiled_yesexpr == (char *)NULL) {
836 		char	*yesexpr;
837 
838 		/* get yes expression according to current locale */
839 		yesexpr = nl_langinfo(YESEXPR);
840 		/*
841 		 * If the was no expression or if there is a compile error
842 		 * use default yes expression.  Anchor
843 		 */
844 		if ((yesexpr == (char *)NULL) || (*yesexpr == (char)NULL) ||
845 		    ((compiled_yesexpr =
846 		    regcmp(yesexpr, 0)) == NULL))
847 			compiled_yesexpr = regcmp(default_yesexpr, 0);
848 	}
849 
850 	/* match yesexpr */
851 	if (regex(compiled_yesexpr, s) == NULL) {
852 		return (FALSE);
853 	}
854 	return (TRUE);
855 }
856 
857 static int
858 echoargs()
859 {
860 	char	**anarg;
861 	char	**tanarg;	/* tmp ptr			*/
862 	int		i;
863 	char		reply[LINE_MAX];
864 
865 	tanarg = anarg = arglist-1;
866 
867 	/*
868 	 * write out each argument, separated by a space. the tanarg
869 	 * nonsense is for xcu4 testsuite compliance - so that an
870 	 * extra space isn't echoed after the last argument.
871 	 */
872 	while (*++anarg) {		/* while there's an argument	*/
873 		++tanarg;		/* follow anarg			*/
874 		(void) write(2, *anarg, strlen(*anarg));
875 
876 		if (*++tanarg) {	/* if there's another argument:	*/
877 			(void) write(2, " ", 1); /* add a space		*/
878 			--tanarg;	/* reset back to anarg		*/
879 		}
880 	}
881 	if (PROMPT == -1) {
882 		(void) write(2, "\n", 1);
883 		return (TRUE);
884 	}
885 
886 	/*
887 	 * at this point, there may be unexpected input pending on stdin,
888 	 * if one has used the -n flag. this presents a problem, because
889 	 * if we simply do a read(), we'll get the extra input, instead
890 	 * of our desired y/n input. so, we see if there's any extra
891 	 * input, and if there is, then we will store it.
892 	 */
893 
894 	saveinput();
895 
896 	(void) write(2, "?...", 4);	/* ask the user for input	*/
897 
898 	for (i = 0; i < LINE_MAX && read(PROMPT, &reply[i], 1) > 0; i++) {
899 		if (reply[i] == '\n') {
900 			if (i == 0)
901 				return (FALSE);
902 			break;
903 		}
904 	}
905 	reply[i] = 0;
906 
907 	/* flush remainder of line if necessary */
908 	if (i == LINE_MAX) {
909 		char	bitbucket;
910 
911 		while ((read(PROMPT, &bitbucket, 1) > 0) && (bitbucket != '\n'))
912 			;
913 	}
914 
915 	/*
916 	 * now we have to figure out whether the user typed an
917 	 * internationalized version of 'y' for yes. note that in some
918 	 * countries, they've gotten used to typing an ASCII 'y'! so
919 	 * even if our int'l version fails, we will check for an ASCII
920 	 * 'y', in order to be backwards compatible.
921 	 */
922 	return (rpmatch(reply));
923 }
924 
925 
926 static char *
927 insert(char *pattern, char *subst)
928 {
929 	static char	buffer[MAXSBUF+1];
930 	int		len, ipatlen;
931 	char	*pat;
932 	char	*bufend;
933 	char	*pbuf;
934 
935 	len = strlen(subst);
936 	ipatlen = strlen(INSPAT) - 1;
937 	pat = pattern - 1;
938 	pbuf = buffer;
939 	bufend = &buffer[MAXSBUF];
940 
941 	while (*++pat) {
942 		if (xindex(pat, INSPAT) == 0) {
943 			if (pbuf + len >= bufend) {
944 				break;
945 			} else {
946 				(void) strcpy(pbuf, subst);
947 				pat += ipatlen;
948 				pbuf += len;
949 			}
950 		} else {
951 			*pbuf++ = *pat;
952 			if (pbuf >= bufend)
953 				break;
954 		}
955 	}
956 
957 	if (!*pat) {
958 		*pbuf = '\0';
959 		return (buffer);
960 	} else {
961 		ermsg(gettext("Maximum argument size with insertion via %s's "
962 		    "exceeded\n"), INSPAT);
963 		ERR = TRUE;
964 		return (0);
965 	}
966 }
967 
968 
969 static void
970 addibuf(struct inserts	*p)
971 {
972 	char	*newarg, *skel, *sub;
973 	int		l;
974 
975 	skel = p->p_skel;
976 	sub = *ARGV;
977 	linesize -= strlen(skel) + 1;
978 	newarg = insert(skel, sub);
979 	if (ERR)
980 		return;
981 
982 	if (checklen(newarg)) {
983 		if ((ibufsize += (l = strlen(newarg) + 1)) > MAXIBUF) {
984 			ermsg(gettext("Insert buffer overflow\n"));
985 			ERR = TRUE;
986 		}
987 		(void) strcpy(p_ibuf, newarg);
988 		*(p->p_ARGV) = p_ibuf;
989 		p_ibuf += l;
990 	}
991 }
992 
993 
994 /*
995  * getchr():	get the next character.
996  * description:
997  *	we get the next character from pio.structure, if there's a character
998  *	to get. this may happen when we've had to flush stdin=/dev/tty,
999  *	but still wanted to preserve the characters for later processing.
1000  *
1001  *	otherwise we just get the character from stdin.
1002  */
1003 static int
1004 getchr(void)
1005 {
1006 	char	c;
1007 
1008 	do {
1009 		if (queued_data == NULL) {
1010 			char	*buffer;
1011 			int	len;
1012 
1013 			if ((buffer = malloc(BUFSIZE)) == NULL) {
1014 				perror(gettext(
1015 				    "xargs: Memory allocation failure"));
1016 				exit(1);
1017 			}
1018 
1019 			if ((len = read(0, buffer, BUFSIZE)) == 0)
1020 				return (0);
1021 			if (len == -1) {
1022 				perror(gettext("xargs: Read failure"));
1023 				exit(1);
1024 			}
1025 
1026 			queue(buffer, len, TAIL);
1027 		}
1028 
1029 		file_offset++;
1030 		c = *queued_data->cur++;	 /* get the next character */
1031 		if (--queued_data->length == 0) { /* at the end of buffer? */
1032 			pio	*nxt = queued_data->next;
1033 
1034 			free(queued_data->start);
1035 			free(queued_data);
1036 			queued_data = nxt;
1037 		}
1038 	} while (c == '\0');
1039 	return (c);
1040 }
1041 
1042 
1043 static wchar_t
1044 getwchr(void)
1045 {
1046 	int		i;
1047 	wchar_t		wch;
1048 	unsigned char	buffer[MB_LEN_MAX + 1];
1049 
1050 	for (i = 0; i < (int)MB_CUR_MAX; ) {
1051 		if ((buffer[i++] = getchr()) == NULL) {
1052 			/* We have reached  EOF */
1053 			if (i == 1) {
1054 				/* TRUE EOF has been reached */
1055 				return (NULL);
1056 			}
1057 			/*
1058 			 * We have some characters in our buffer still so it
1059 			 * must be an invalid character right before EOF.
1060 			 */
1061 			break;
1062 		}
1063 
1064 		/* If this succeeds then we are done */
1065 		if (mbtowc(&wch, (char *)buffer, i) != -1)
1066 			return (wch);
1067 	}
1068 
1069 	/*
1070 	 * We have now encountered an illegal character sequence.
1071 	 * There is nothing much we can do at this point but
1072 	 * return an error.  If we attempt to recover we may in fact
1073 	 * return garbage as arguments, from the customer's point
1074 	 * of view.  After all what if they are feeding us a file
1075 	 * generated in another locale?
1076 	 */
1077 	errno = EILSEQ;
1078 	perror(gettext("xargs: Corrupt input file"));
1079 	exit(1);
1080 	/* NOTREACHED */
1081 }
1082 
1083 
1084 static void
1085 ungetwchr(wchar_t wch)
1086 {
1087 	char	*buffer;
1088 	int	bytes;
1089 
1090 	if ((buffer = malloc(MB_LEN_MAX)) == NULL) {
1091 		perror(gettext("xargs: Memory allocation failure"));
1092 		exit(1);
1093 	}
1094 	bytes = wctomb(buffer, wch);
1095 	queue(buffer, bytes, HEAD);
1096 }
1097 
1098 
1099 static int
1100 lcall(char *sub, char **subargs)
1101 {
1102 	int retcode, retry = 0;
1103 	pid_t iwait, child;
1104 
1105 	for (; ; ) {
1106 		switch (child = fork()) {
1107 		default:
1108 			while ((iwait = wait(&retcode)) != child &&
1109 			    iwait != (pid_t)-1)
1110 				;
1111 			if (iwait == (pid_t)-1) {
1112 				perror(gettext("xargs: Wait failure"));
1113 				exit(122);
1114 				/* NOTREACHED */
1115 			}
1116 			if (WIFSIGNALED(retcode)) {
1117 				ermsg(gettext("Child killed with signal %d\n"),
1118 				    WTERMSIG(retcode));
1119 				exit(125);
1120 				/* NOTREACHED */
1121 			}
1122 			if ((WEXITSTATUS(retcode) & 0377) == 0377) {
1123 				ermsg(gettext("Command could not continue "
1124 				    "processing data\n"));
1125 				exit(124);
1126 				/* NOTREACHED */
1127 			}
1128 			return (WEXITSTATUS(retcode));
1129 		case 0:
1130 			(void) execvp(sub, subargs);
1131 			perror(gettext("xargs: Could not exec command"));
1132 			if (errno == EACCES)
1133 				exit(126);
1134 			exit(127);
1135 			/* NOTREACHED */
1136 		case -1:
1137 			if (errno != EAGAIN && retry++ < FORK_RETRY) {
1138 				perror(gettext("xargs: Could not fork child"));
1139 				exit(123);
1140 			}
1141 			(void) sleep(1);
1142 		}
1143 	}
1144 }
1145 
1146 
1147 /*
1148  * If `s2' is a substring of `s1' return the offset of the first
1149  * occurrence of `s2' in `s1', else return -1.
1150  */
1151 static int
1152 xindex(char *as1, char *as2)
1153 {
1154 	char	*s1, *s2, c;
1155 	int		offset;
1156 
1157 	s1 = as1;
1158 	s2 = as2;
1159 	c = *s2;
1160 
1161 	while (*s1) {
1162 		if (*s1++ == c) {
1163 			offset = s1 - as1 - 1;
1164 			s2++;
1165 			while ((c = *s2++) == *s1++ && c)
1166 				;
1167 			if (c == 0)
1168 				return (offset);
1169 			s1 = offset + as1 + 1;
1170 			s2 = as2;
1171 			c = *s2;
1172 		}
1173 	}
1174 	return (-1);
1175 }
1176 
1177 
1178 static void
1179 usage()
1180 {
1181 	ermsg(gettext(USAGEMSG));
1182 	OK = FALSE;
1183 }
1184 
1185 
1186 
1187 /*
1188  * parseargs():		modify the args
1189  *	since the -e, -i and -l flags all take optional subarguments,
1190  *	and getopts(3C) is clueless about this nonsense, we change the
1191  *	our local argument count and strings to separate this out,
1192  *	and make it easier to handle via getopts(3c).
1193  *
1194  *	-e	-> "-e ""
1195  *	-e3	-> "-e "3"
1196  *	-Estr	-> "-E "str"
1197  *	-i	-> "-i "{}"
1198  *	-irep	-> "-i "rep"
1199  *	-l	-> "-i "1"
1200  *	-l10	-> "-i "10"
1201  *
1202  *	since the -e, -i and -l flags all take optional subarguments,
1203  */
1204 static void
1205 parseargs(int ac, char **av)
1206 {
1207 	int i;			/* current argument			*/
1208 	int cflag;		/* 0 = not processing cmd arg		*/
1209 
1210 	if ((mav = malloc((ac * 2 + 1) * sizeof (char *))) == NULL) {
1211 		perror(gettext("xargs: Memory allocation failure"));
1212 		exit(1);
1213 	}
1214 
1215 	/* for each argument, see if we need to change things:		*/
1216 	for (i = mac = cflag = 0; (av[i] != NULL) && i < ac; i++, mac++) {
1217 		if ((mav[mac] = strdup(av[i])) == NULL) {
1218 			perror(gettext("xargs: Memory allocation failure"));
1219 			exit(1);
1220 		}
1221 
1222 		/* -- has been found or argument list is fully processes */
1223 		if (cflag)
1224 			continue;
1225 
1226 		/*
1227 		 * if we're doing special processing, and we've got a flag
1228 		 */
1229 		else if ((av[i][0] == '-') && (av[i][1] != NULL)) {
1230 			char	*def;
1231 
1232 			switch (av[i][1]) {
1233 			case	'e':
1234 				def = ""; /* -e with no arg turns off eof */
1235 				goto process_special;
1236 			case	'i':
1237 				def = INSPAT_STR;
1238 				goto process_special;
1239 			case	'l':
1240 				def = "1";
1241 process_special:
1242 				/*
1243 				 * if there's no sub-option, we *must* add
1244 				 * a default one. this is because xargs must
1245 				 * be able to distinguish between a valid
1246 				 * suboption, and a command name.
1247 				 */
1248 				if (av[i][2] == NULL) {
1249 					mav[++mac] = strdup(def);
1250 				} else {
1251 					/* clear out our version: */
1252 					mav[mac][2] = NULL;
1253 					mav[++mac] = strdup(&av[i][2]);
1254 				}
1255 				if (mav[mac] == NULL) {
1256 					perror(gettext("xargs: Memory"
1257 					    " allocation failure"));
1258 					exit(1);
1259 				}
1260 				break;
1261 
1262 			/* flags with required subarguments:		*/
1263 
1264 			/*
1265 			 * there are two separate cases here. either the
1266 			 * flag can have the normal XCU4 handling
1267 			 * (of the form: -X subargument); or it can have
1268 			 * the old solaris 2.[0-4] handling (of the
1269 			 * form: -Xsubargument). in order to maintain
1270 			 * backwards compatibility, we must support the
1271 			 * latter case. we handle the latter possibility
1272 			 * first so both the old solaris way of handling
1273 			 * and the new XCU4 way of handling things are allowed.
1274 			 */
1275 			case	'n':	/* FALLTHROUGH			*/
1276 			case	's':	/* FALLTHROUGH			*/
1277 			case	'E':	/* FALLTHROUGH			*/
1278 			case	'I':	/* FALLTHROUGH			*/
1279 			case	'L':
1280 				/*
1281 				 * if the second character isn't null, then
1282 				 * the user has specified the old syntax.
1283 				 * we move the subargument into our
1284 				 * mod'd argument list.
1285 				 */
1286 				if (av[i][2] != NULL) {
1287 					/* first clean things up:	*/
1288 					mav[mac][2] = NULL;
1289 
1290 					/* now add the separation:	*/
1291 					++mac;	/* inc to next mod'd arg */
1292 					if ((mav[mac] = strdup(&av[i][2])) ==
1293 					    NULL) {
1294 						perror(gettext("xargs: Memory"
1295 						    " allocation failure"));
1296 						exit(1);
1297 					}
1298 					break;
1299 				}
1300 				i++;
1301 				mac++;
1302 #ifdef XPG6
1303 				if (av[i] != NULL) {
1304 					if ((mav[mac] = strdup(av[i]))
1305 					    == NULL) {
1306 						perror(gettext("xargs: Memory"
1307 						    " allocation failure"));
1308 						exit(1);
1309 					}
1310 				}
1311 #else
1312 				if (av[i] == NULL) {
1313 					if ((mav[mac++] = strdup("")) == NULL) {
1314 						perror(gettext("xargs: Memory "
1315 						    " allocation failure"));
1316 						exit(1);
1317 					}
1318 					mav[mac] = NULL;
1319 					return;
1320 				}
1321 				if ((mav[mac] = strdup(av[i])) == NULL) {
1322 					perror(gettext("xargs: Memory"
1323 					    " allocation failure"));
1324 					exit(1);
1325 				}
1326 
1327 #endif
1328 				break;
1329 
1330 			/* flags */
1331 			case 'p' :
1332 			case 't' :
1333 			case 'x' :
1334 				break;
1335 
1336 			case '-' :
1337 			default:
1338 				/*
1339 				 * here we've hit the cmd argument. so
1340 				 * we'll stop special processing, as the
1341 				 * cmd may have a "-i" etc., argument,
1342 				 * and we don't want to add a "" to it.
1343 				 */
1344 				cflag = 1;
1345 				break;
1346 			}
1347 		} else if (i > 0) {	/* if we're not the 1st arg	*/
1348 			/*
1349 			 * if it's not a flag, then it *must* be the cmd.
1350 			 * set cflag, so we don't mishandle the -[eil] flags.
1351 			 */
1352 			cflag = 1;
1353 		}
1354 	}
1355 
1356 	mav[mac] = NULL;
1357 }
1358 
1359 
1360 /*
1361  * saveinput(): pick up any pending input, so it can be processed later.
1362  *
1363  * description:
1364  *	the purpose of this routine is to allow us to handle the user
1365  *	typing in a 'y' or 'n', when there's existing characters already
1366  *	in stdin. this happens when one gives the "-n" option along with
1367  *	"-p". the problem occurs when the user first types in more arguments
1368  *	than specified by the -n number. echoargs() wants to read stdin
1369  *	in order to get the user's response, but if there's already stuff
1370  *	there, echoargs() won't read the proper character.
1371  *
1372  *	the solution provided by this routine is to pick up all characters
1373  *	(if any), and store them for later processing.
1374  */
1375 
1376 void
1377 saveinput()
1378 {
1379 	char *buffer;		/* ptr to the floating data buffer	*/
1380 	struct strpeek speek;	/* to see what's on the queue		*/
1381 	struct strpeek *ps;
1382 
1383 	/* if we're not in -p mode, skip				*/
1384 	if (PROMPT == -1) {
1385 		return;
1386 	}
1387 
1388 
1389 	/* now see if there's any activity pending:			*/
1390 	ps = &speek;
1391 	ps->ctlbuf.maxlen = 0;
1392 	ps->ctlbuf.len = 0;
1393 	ps->ctlbuf.buf = NULL;
1394 	ps->flags = 0;
1395 	ps->databuf.maxlen = MAX_INPUT;
1396 	ps->databuf.len = 0;
1397 	if ((buffer = malloc((size_t)MAX_INPUT)) == NULL) {
1398 		perror(gettext("xargs: Memory allocation failure"));
1399 		exit(1);
1400 	}
1401 	ps->databuf.buf = (char *)buffer;
1402 
1403 	if (ioctl(PROMPT, I_PEEK, ps) == -1) {
1404 		perror(gettext("xargs: I_PEEK failure"));
1405 		exit(1);
1406 	}
1407 
1408 	if (ps->databuf.len > 0) {
1409 		int	len;
1410 
1411 		if ((len = read(PROMPT, buffer, ps->databuf.len)) == -1) {
1412 			perror(gettext("xargs: read failure"));
1413 			exit(1);
1414 		}
1415 		queue(buffer, len, TAIL);
1416 	}
1417 }
1418