xref: /openbsd/usr.bin/fold/fold.c (revision 09467b48)
1 /*	$OpenBSD: fold.c,v 1.18 2016/05/23 10:31:42 schwarze Exp $	*/
2 /*	$NetBSD: fold.c,v 1.6 1995/09/01 01:42:44 jtc Exp $	*/
3 
4 /*-
5  * Copyright (c) 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Kevin Ruddy.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <ctype.h>
37 #include <err.h>
38 #include <limits.h>
39 #include <locale.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <unistd.h>
44 #include <wchar.h>
45 
46 #define	DEFLINEWIDTH	80
47 
48 static void fold(unsigned int);
49 static int isu8cont(unsigned char);
50 static __dead void usage(void);
51 
52 int count_bytes = 0;
53 int split_words = 0;
54 
55 int
56 main(int argc, char *argv[])
57 {
58 	int ch, lastch, newarg, prevoptind;
59 	unsigned int width;
60 	const char *errstr;
61 
62 	setlocale(LC_CTYPE, "");
63 
64 	if (pledge("stdio rpath", NULL) == -1)
65 		err(1, "pledge");
66 
67 	width = 0;
68 	lastch = '\0';
69 	prevoptind = 1;
70 	newarg = 1;
71 	while ((ch = getopt(argc, argv, "0123456789bsw:")) != -1) {
72 		switch (ch) {
73 		case 'b':
74 			count_bytes = 1;
75 			break;
76 		case 's':
77 			split_words = 1;
78 			break;
79 		case 'w':
80 			width = strtonum(optarg, 1, UINT_MAX, &errstr);
81 			if (errstr != NULL)
82 				errx(1, "illegal width value, %s: %s", errstr,
83 					optarg);
84 			break;
85 		case '0': case '1': case '2': case '3': case '4':
86 		case '5': case '6': case '7': case '8': case '9':
87 			if (newarg)
88 				width = 0;
89 			else if (!isdigit(lastch))
90 				usage();
91 			if (width > UINT_MAX / 10 - 1)
92 				errx(1, "illegal width value, too large");
93 			width = (width * 10) + (ch - '0');
94 			if (width < 1)
95 				errx(1, "illegal width value, too small");
96 			break;
97 		default:
98 			usage();
99 		}
100 		lastch = ch;
101 		newarg = optind != prevoptind;
102 		prevoptind = optind;
103 	}
104 	argv += optind;
105 	argc -= optind;
106 
107 	if (width == 0)
108 		width = DEFLINEWIDTH;
109 
110 	if (!*argv) {
111 		if (pledge("stdio", NULL) == -1)
112 			err(1, "pledge");
113 		fold(width);
114 	} else {
115 		for (; *argv; ++argv) {
116 			if (!freopen(*argv, "r", stdin))
117 				err(1, "%s", *argv);
118 			else
119 				fold(width);
120 		}
121 	}
122 	return 0;
123 }
124 
125 /*
126  * Fold the contents of standard input to fit within WIDTH columns
127  * (or bytes) and write to standard output.
128  *
129  * If split_words is set, split the line at the last space character
130  * on the line.  This flag necessitates storing the line in a buffer
131  * until the current column > width, or a newline or EOF is read.
132  *
133  * The buffer can grow larger than WIDTH due to backspaces and carriage
134  * returns embedded in the input stream.
135  */
136 static void
137 fold(unsigned int max_width)
138 {
139 	static char	*buf = NULL;
140 	static size_t	 bufsz = 2048;
141 	char		*cp;	/* Current mb character. */
142 	char		*np;	/* Next mb character. */
143 	char		*sp;	/* To search for the last space. */
144 	char		*nbuf;	/* For buffer reallocation. */
145 	wchar_t		 wc;	/* Current wide character. */
146 	int		 ch;	/* Last byte read. */
147 	int		 len;	/* Bytes in the current mb character. */
148 	unsigned int	 col;	/* Current display position. */
149 	int		 width; /* Display width of wc. */
150 
151 	if (buf == NULL && (buf = malloc(bufsz)) == NULL)
152 		err(1, NULL);
153 
154 	np = cp = buf;
155 	ch = 0;
156 	col = 0;
157 
158 	while (ch != EOF) {  /* Loop on input characters. */
159 		while ((ch = getchar()) != EOF) {  /* Loop on input bytes. */
160 			if (np + 1 == buf + bufsz) {
161 				nbuf = reallocarray(buf, 2, bufsz);
162 				if (nbuf == NULL)
163 					err(1, NULL);
164 				bufsz *= 2;
165 				cp = nbuf + (cp - buf);
166 				np = nbuf + (np - buf);
167 				buf = nbuf;
168 			}
169 			*np++ = ch;
170 
171 			/*
172 			 * Read up to and including the first byte of
173 			 * the next character, such that we are sure
174 			 * to have a complete character in the buffer.
175 			 * There is no need to read more than five bytes
176 			 * ahead, since UTF-8 characters are four bytes
177 			 * long at most.
178 			 */
179 
180 			if (np - cp > 4 || (np - cp > 1 && !isu8cont(ch)))
181 				break;
182 		}
183 
184 		while (cp < np) {  /* Loop on output characters. */
185 
186 			/* Handle end of line and backspace. */
187 
188 			if (*cp == '\n' || (*cp == '\r' && !count_bytes)) {
189 				fwrite(buf, 1, ++cp - buf, stdout);
190 				memmove(buf, cp, np - cp);
191 				np = buf + (np - cp);
192 				cp = buf;
193 				col = 0;
194 				continue;
195 			}
196 			if (*cp == '\b' && !count_bytes) {
197 				if (col)
198 					col--;
199 				cp++;
200 				continue;
201 			}
202 
203 			/*
204 			 * Measure display width.
205 			 * Process the last byte only if
206 			 * end of file was reached.
207 			 */
208 
209 			if (np - cp > (ch != EOF)) {
210 				len = 1;
211 				width = 1;
212 
213 				if (*cp == '\t') {
214 					if (count_bytes == 0)
215 						width = 8 - (col & 7);
216 				} else if ((len = mbtowc(&wc, cp,
217 				    np - cp)) < 1)
218 					len = 1;
219 				else if (count_bytes)
220 					width = len;
221 				else if ((width = wcwidth(wc)) < 0)
222 					width = 1;
223 
224 				col += width;
225 				if (col <= max_width || cp == buf) {
226 					cp += len;
227 					continue;
228 				}
229 			}
230 
231 			/* Line break required. */
232 
233 			if (col > max_width) {
234 				if (split_words) {
235 					for (sp = cp; sp > buf; sp--) {
236 						if (sp[-1] == ' ') {
237 							cp = sp;
238 							break;
239 						}
240 					}
241 				}
242 				fwrite(buf, 1, cp - buf, stdout);
243 				putchar('\n');
244 				memmove(buf, cp, np - cp);
245 				np = buf + (np - cp);
246 				cp = buf;
247 				col = 0;
248 				continue;
249 			}
250 
251 			/* Need more input. */
252 
253 			break;
254 		}
255 	}
256 	fwrite(buf, 1, np - buf, stdout);
257 
258 	if (ferror(stdin))
259 		err(1, NULL);
260 }
261 
262 static int
263 isu8cont(unsigned char c)
264 {
265 	return MB_CUR_MAX > 1 && (c & (0x80 | 0x40)) == 0x80;
266 }
267 
268 static __dead void
269 usage(void)
270 {
271 	(void)fprintf(stderr, "usage: fold [-bs] [-w width] [file ...]\n");
272 	exit(1);
273 }
274