xref: /dragonfly/usr.bin/printf/printf.c (revision 3856b434)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright 2018 Staysail Systems, Inc. <info@staysail.tech>
5  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
6  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
7  * Copyright (c) 1989, 1993
8  *	The Regents of the University of California.  All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * @(#) Copyright (c) 1989, 1993 The Regents of the University of California.  All rights reserved.
35  * @(#)printf.c	8.1 (Berkeley) 7/20/93
36  * $FreeBSD: head/usr.bin/printf/printf.c 337618 2018-08-11 11:13:34Z jilles $
37  */
38 /*
39  * Important: This file is used both as a standalone program /usr/bin/printf
40  * and as a builtin for /bin/sh (#define SHELL).
41  */
42 
43 #include <sys/types.h>
44 
45 #include <ctype.h>
46 #include <err.h>
47 #include <errno.h>
48 #include <inttypes.h>
49 #include <limits.h>
50 #include <locale.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <string.h>
54 #include <unistd.h>
55 #include <wchar.h>
56 
57 #ifdef SHELL
58 #define	main printfcmd
59 #include "bltin/bltin.h"
60 #include "options.h"
61 #endif
62 
63 #define	PF(f, func) do {						\
64 	if (havewidth)							\
65 		if (haveprec)						\
66 			printf(f, fieldwidth, precision, func);	\
67 		else							\
68 			printf(f, fieldwidth, func);			\
69 	else if (haveprec)						\
70 		printf(f, precision, func);				\
71 	else								\
72 		printf(f, func);					\
73 } while (0)
74 
75 static int	 asciicode(void);
76 static char	*printf_doformat(char *, int *);
77 static int	 escape(char *, int, size_t *);
78 static int	 getchr(void);
79 static int	 getfloating(long double *, int);
80 static int	 getint(int *);
81 static int	 getnum(intmax_t *, uintmax_t *, int);
82 static const char
83 		*getstr(void);
84 static char	*mknum(char *, char);
85 static void	 usage(void);
86 
87 static const char digits[] = "0123456789";
88 
89 static char end_fmt[1];
90 
91 static int  myargc;
92 static char **myargv;
93 static char **gargv;
94 static char **maxargv;
95 
96 int
97 main(int argc, char *argv[])
98 {
99 	size_t len;
100 	int end, rval;
101 	char *format, *fmt, *start;
102 
103 #ifndef SHELL
104 	setlocale(LC_ALL, "");
105 #endif
106 
107 	/*
108 	 * We may not use getopt(3) because calling
109 	 * "printf -f%s oo" may not result in an invalid
110 	 * option error.
111 	 * However common usage and other implementations seem
112 	 * to indicate that we need to allow -- as a discardable
113 	 * option separator.
114 	 */
115 	if (argc > 1 && strcmp(argv[1], "--") == 0) {
116 		argc--;
117 		argv++;
118 	}
119 
120 	if (argc < 2) {
121 		usage();
122 		return (1);
123 	}
124 
125 	argv++;
126 
127 #ifdef SHELL
128 	INTOFF;
129 #endif
130 	/*
131 	 * Basic algorithm is to scan the format string for conversion
132 	 * specifications -- once one is found, find out if the field
133 	 * width or precision is a '*'; if it is, gather up value.  Note,
134 	 * format strings are reused as necessary to use up the provided
135 	 * arguments, arguments of zero/null string are provided to use
136 	 * up the format string.
137 	 */
138 	fmt = format = *argv;
139 	escape(fmt, 1, &len);		/* backslash interpretation */
140 	rval = end = 0;
141 	gargv = ++argv;
142 
143 	for (;;) {
144 		maxargv = gargv;
145 
146 		myargv = gargv;
147 		for (myargc = 0; gargv[myargc]; myargc++)
148 			/* nop */;
149 		start = fmt;
150 		while (fmt < format + len) {
151 			if (fmt[0] == '%') {
152 				fwrite(start, 1, fmt - start, stdout);
153 				if (fmt[1] == '%') {
154 					/* %% prints a % */
155 					putchar('%');
156 					fmt += 2;
157 				} else {
158 					fmt = printf_doformat(fmt, &rval);
159 					if (fmt == NULL || fmt == end_fmt) {
160 #ifdef SHELL
161 						INTON;
162 #endif
163 						return (fmt == NULL ? 1 : rval);
164 					}
165 					end = 0;
166 				}
167 				start = fmt;
168 			} else
169 				fmt++;
170 			if (gargv > maxargv)
171 				maxargv = gargv;
172 		}
173 		gargv = maxargv;
174 
175 		if (end == 1) {
176 			warnx("missing format character");
177 #ifdef SHELL
178 			INTON;
179 #endif
180 			return (1);
181 		}
182 		fwrite(start, 1, fmt - start, stdout);
183 		if (!*gargv) {
184 #ifdef SHELL
185 			INTON;
186 #endif
187 			return (rval);
188 		}
189 		/* Restart at the beginning of the format string. */
190 		fmt = format;
191 		end = 1;
192 	}
193 	/* NOTREACHED */
194 }
195 
196 
197 static char *
198 printf_doformat(char *fmt, int *rval)
199 {
200 	static const char skip1[] = "#'-+ 0";
201 	int fieldwidth, haveprec, havewidth, mod_ldbl, precision;
202 	char convch, nextch;
203 	char start[strlen(fmt) + 1];
204 	char **fargv;
205 	char *dptr;
206 	int l;
207 
208 	dptr = start;
209 	*dptr++ = '%';
210 	*dptr = 0;
211 
212 	fmt++;
213 
214 	/* look for "n$" field index specifier */
215 	l = strspn(fmt, digits);
216 	if ((l > 0) && (fmt[l] == '$')) {
217 		int idx = atoi(fmt);
218 		if (idx <= myargc) {
219 			gargv = &myargv[idx - 1];
220 		} else {
221 			gargv = &myargv[myargc];
222 		}
223 		if (gargv > maxargv)
224 			maxargv = gargv;
225 		fmt += l + 1;
226 
227 		/* save format argument */
228 		fargv = gargv;
229 	} else {
230 		fargv = NULL;
231 	}
232 
233 	/* skip to field width */
234 	while (*fmt && strchr(skip1, *fmt) != NULL) {
235 		*dptr++ = *fmt++;
236 		*dptr = 0;
237 	}
238 
239 	if (*fmt == '*') {
240 
241 		fmt++;
242 		l = strspn(fmt, digits);
243 		if ((l > 0) && (fmt[l] == '$')) {
244 			int idx = atoi(fmt);
245 			if (fargv == NULL) {
246 				warnx("incomplete use of n$");
247 				return (NULL);
248 			}
249 			if (idx <= myargc) {
250 				gargv = &myargv[idx - 1];
251 			} else {
252 				gargv = &myargv[myargc];
253 			}
254 			fmt += l + 1;
255 		} else if (fargv != NULL) {
256 			warnx("incomplete use of n$");
257 			return (NULL);
258 		}
259 
260 		if (getint(&fieldwidth))
261 			return (NULL);
262 		if (gargv > maxargv)
263 			maxargv = gargv;
264 		havewidth = 1;
265 
266 		*dptr++ = '*';
267 		*dptr = 0;
268 	} else {
269 		havewidth = 0;
270 
271 		/* skip to possible '.', get following precision */
272 		while (isdigit(*fmt)) {
273 			*dptr++ = *fmt++;
274 			*dptr = 0;
275 		}
276 	}
277 
278 	if (*fmt == '.') {
279 		/* precision present? */
280 		fmt++;
281 		*dptr++ = '.';
282 
283 		if (*fmt == '*') {
284 
285 			fmt++;
286 			l = strspn(fmt, digits);
287 			if ((l > 0) && (fmt[l] == '$')) {
288 				int idx = atoi(fmt);
289 				if (fargv == NULL) {
290 					warnx("incomplete use of n$");
291 					return (NULL);
292 				}
293 				if (idx <= myargc) {
294 					gargv = &myargv[idx - 1];
295 				} else {
296 					gargv = &myargv[myargc];
297 				}
298 				fmt += l + 1;
299 			} else if (fargv != NULL) {
300 				warnx("incomplete use of n$");
301 				return (NULL);
302 			}
303 
304 			if (getint(&precision))
305 				return (NULL);
306 			if (gargv > maxargv)
307 				maxargv = gargv;
308 			haveprec = 1;
309 			*dptr++ = '*';
310 			*dptr = 0;
311 		} else {
312 			haveprec = 0;
313 
314 			/* skip to conversion char */
315 			while (isdigit(*fmt)) {
316 				*dptr++ = *fmt++;
317 				*dptr = 0;
318 			}
319 		}
320 	} else
321 		haveprec = 0;
322 	if (!*fmt) {
323 		warnx("missing format character");
324 		return (NULL);
325 	}
326 	*dptr++ = *fmt;
327 	*dptr = 0;
328 
329 	/*
330 	 * Look for a length modifier.  POSIX doesn't have these, so
331 	 * we only support them for floating-point conversions, which
332 	 * are extensions.  This is useful because the L modifier can
333 	 * be used to gain extra range and precision, while omitting
334 	 * it is more likely to produce consistent results on different
335 	 * architectures.  This is not so important for integers
336 	 * because overflow is the only bad thing that can happen to
337 	 * them, but consider the command  printf %a 1.1
338 	 */
339 	if (*fmt == 'L') {
340 		mod_ldbl = 1;
341 		fmt++;
342 		if (!strchr("aAeEfFgG", *fmt)) {
343 			warnx("bad modifier L for %%%c", *fmt);
344 			return (NULL);
345 		}
346 	} else {
347 		mod_ldbl = 0;
348 	}
349 
350 	/* save the current arg offset, and set to the format arg */
351 	if (fargv != NULL) {
352 		gargv = fargv;
353 	}
354 
355 	convch = *fmt;
356 	nextch = *++fmt;
357 
358 	*fmt = '\0';
359 	switch (convch) {
360 	case 'b': {
361 		size_t len;
362 		char *p;
363 		int getout;
364 
365 		/* Convert "b" to "s" for output. */
366 		start[strlen(start) - 1] = 's';
367 		if ((p = strdup(getstr())) == NULL) {
368 			warnx("%s", strerror(ENOMEM));
369 			return (NULL);
370 		}
371 		getout = escape(p, 0, &len);
372 		PF(start, p);
373 		/* Restore format for next loop. */
374 
375 		free(p);
376 		if (getout)
377 			return (end_fmt);
378 		break;
379 	}
380 	case 'c': {
381 		char p;
382 
383 		p = getchr();
384 		if (p != '\0')
385 			PF(start, p);
386 		break;
387 	}
388 	case 's': {
389 		const char *p;
390 
391 		p = getstr();
392 		PF(start, p);
393 		break;
394 	}
395 	case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': {
396 		char *f;
397 		intmax_t val;
398 		uintmax_t uval;
399 		int signedconv;
400 
401 		signedconv = (convch == 'd' || convch == 'i');
402 		if ((f = mknum(start, convch)) == NULL)
403 			return (NULL);
404 		if (getnum(&val, &uval, signedconv))
405 			*rval = 1;
406 		if (signedconv)
407 			PF(f, val);
408 		else
409 			PF(f, uval);
410 		break;
411 	}
412 	case 'e': case 'E':
413 	case 'f': case 'F':
414 	case 'g': case 'G':
415 	case 'a': case 'A': {
416 		long double p;
417 
418 		if (getfloating(&p, mod_ldbl))
419 			*rval = 1;
420 		if (mod_ldbl)
421 			PF(start, p);
422 		else
423 			PF(start, (double)p);
424 		break;
425 	}
426 	default:
427 		warnx("illegal format character %c", convch);
428 		return (NULL);
429 	}
430 	*fmt = nextch;
431 	/* return the gargv to the next element */
432 	return (fmt);
433 }
434 
435 static char *
436 mknum(char *str, char ch)
437 {
438 	static char *copy;
439 	static size_t copy_size;
440 	char *newcopy;
441 	size_t len, newlen;
442 
443 	len = strlen(str) + 2;
444 	if (len > copy_size) {
445 		newlen = ((len + 1023) >> 10) << 10;
446 		if ((newcopy = realloc(copy, newlen)) == NULL) {
447 			warnx("%s", strerror(ENOMEM));
448 			return (NULL);
449 		}
450 		copy = newcopy;
451 		copy_size = newlen;
452 	}
453 
454 	memmove(copy, str, len - 3);
455 	copy[len - 3] = 'j';
456 	copy[len - 2] = ch;
457 	copy[len - 1] = '\0';
458 	return (copy);
459 }
460 
461 static int
462 escape(char *fmt, int percent, size_t *len)
463 {
464 	char *save, *store, c;
465 	int value;
466 
467 	for (save = store = fmt; ((c = *fmt) != 0); ++fmt, ++store) {
468 		if (c != '\\') {
469 			*store = c;
470 			continue;
471 		}
472 		switch (*++fmt) {
473 		case '\0':		/* EOS, user error */
474 			*store = '\\';
475 			*++store = '\0';
476 			*len = store - save;
477 			return (0);
478 		case '\\':		/* backslash */
479 		case '\'':		/* single quote */
480 			*store = *fmt;
481 			break;
482 		case 'a':		/* bell/alert */
483 			*store = '\a';
484 			break;
485 		case 'b':		/* backspace */
486 			*store = '\b';
487 			break;
488 		case 'c':
489 			if (!percent) {
490 				*store = '\0';
491 				*len = store - save;
492 				return (1);
493 			}
494 			*store = 'c';
495 			break;
496 		case 'f':		/* form-feed */
497 			*store = '\f';
498 			break;
499 		case 'n':		/* newline */
500 			*store = '\n';
501 			break;
502 		case 'r':		/* carriage-return */
503 			*store = '\r';
504 			break;
505 		case 't':		/* horizontal tab */
506 			*store = '\t';
507 			break;
508 		case 'v':		/* vertical tab */
509 			*store = '\v';
510 			break;
511 					/* octal constant */
512 		case '0': case '1': case '2': case '3':
513 		case '4': case '5': case '6': case '7':
514 			c = (!percent && *fmt == '0') ? 4 : 3;
515 			for (value = 0;
516 			    c-- && *fmt >= '0' && *fmt <= '7'; ++fmt) {
517 				value <<= 3;
518 				value += *fmt - '0';
519 			}
520 			--fmt;
521 			if (percent && value == '%') {
522 				*store++ = '%';
523 				*store = '%';
524 			} else
525 				*store = (char)value;
526 			break;
527 		default:
528 			*store = *fmt;
529 			break;
530 		}
531 	}
532 	*store = '\0';
533 	*len = store - save;
534 	return (0);
535 }
536 
537 static int
538 getchr(void)
539 {
540 	if (!*gargv)
541 		return ('\0');
542 	return ((int)**gargv++);
543 }
544 
545 static const char *
546 getstr(void)
547 {
548 	if (!*gargv)
549 		return ("");
550 	return (*gargv++);
551 }
552 
553 static int
554 getint(int *ip)
555 {
556 	intmax_t val;
557 	uintmax_t uval;
558 	int rval;
559 
560 	if (getnum(&val, &uval, 1))
561 		return (1);
562 	rval = 0;
563 	if (val < INT_MIN || val > INT_MAX) {
564 		warnx("%s: %s", *gargv, strerror(ERANGE));
565 		rval = 1;
566 	}
567 	*ip = (int)val;
568 	return (rval);
569 }
570 
571 static int
572 getnum(intmax_t *ip, uintmax_t *uip, int signedconv)
573 {
574 	char *ep;
575 	int rval;
576 
577 	if (!*gargv) {
578 		*ip = *uip = 0;
579 		return (0);
580 	}
581 	if (**gargv == '"' || **gargv == '\'') {
582 		if (signedconv)
583 			*ip = asciicode();
584 		else
585 			*uip = asciicode();
586 		return (0);
587 	}
588 	rval = 0;
589 	errno = 0;
590 	if (signedconv)
591 		*ip = strtoimax(*gargv, &ep, 0);
592 	else
593 		*uip = strtoumax(*gargv, &ep, 0);
594 	if (ep == *gargv) {
595 		warnx("%s: expected numeric value", *gargv);
596 		rval = 1;
597 	}
598 	else if (*ep != '\0') {
599 		warnx("%s: not completely converted", *gargv);
600 		rval = 1;
601 	}
602 	if (errno == ERANGE) {
603 		warnx("%s: %s", *gargv, strerror(ERANGE));
604 		rval = 1;
605 	}
606 	++gargv;
607 	return (rval);
608 }
609 
610 static int
611 getfloating(long double *dp, int mod_ldbl)
612 {
613 	char *ep;
614 	int rval;
615 
616 	if (!*gargv) {
617 		*dp = 0.0;
618 		return (0);
619 	}
620 	if (**gargv == '"' || **gargv == '\'') {
621 		*dp = asciicode();
622 		return (0);
623 	}
624 	rval = 0;
625 	errno = 0;
626 	if (mod_ldbl)
627 		*dp = strtold(*gargv, &ep);
628 	else
629 		*dp = strtod(*gargv, &ep);
630 	if (ep == *gargv) {
631 		warnx("%s: expected numeric value", *gargv);
632 		rval = 1;
633 	} else if (*ep != '\0') {
634 		warnx("%s: not completely converted", *gargv);
635 		rval = 1;
636 	}
637 	if (errno == ERANGE) {
638 		warnx("%s: %s", *gargv, strerror(ERANGE));
639 		rval = 1;
640 	}
641 	++gargv;
642 	return (rval);
643 }
644 
645 static int
646 asciicode(void)
647 {
648 	int ch;
649 	wchar_t wch;
650 	mbstate_t mbs;
651 
652 	ch = (unsigned char)**gargv;
653 	if (ch == '\'' || ch == '"') {
654 		memset(&mbs, 0, sizeof(mbs));
655 		switch (mbrtowc(&wch, *gargv + 1, MB_LEN_MAX, &mbs)) {
656 		case (size_t)-2:
657 		case (size_t)-1:
658 			wch = (unsigned char)gargv[0][1];
659 			break;
660 		case 0:
661 			wch = 0;
662 			break;
663 		}
664 		ch = wch;
665 	}
666 	++gargv;
667 	return (ch);
668 }
669 
670 static void
671 usage(void)
672 {
673 	fprintf(stderr, "usage: printf format [arguments ...]\n");
674 }
675