1 /* $OpenBSD: fold.c,v 1.18 2016/05/23 10:31:42 schwarze Exp $ */
2 /* $NetBSD: fold.c,v 1.6 1995/09/01 01:42:44 jtc Exp $ */
3
4 /*-
5 * Copyright (c) 1990, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Kevin Ruddy.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 #include <ctype.h>
37 #include <err.h>
38 #include <limits.h>
39 #include <locale.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <unistd.h>
44 #include <wchar.h>
45
46 #define DEFLINEWIDTH 80
47
48 static void fold(unsigned int);
49 static int isu8cont(unsigned char);
50 static __dead void usage(void);
51
52 int count_bytes = 0;
53 int split_words = 0;
54
55 int
main(int argc,char * argv[])56 main(int argc, char *argv[])
57 {
58 int ch, lastch, newarg, prevoptind;
59 unsigned int width;
60 const char *errstr;
61
62 setlocale(LC_CTYPE, "");
63
64 if (pledge("stdio rpath", NULL) == -1)
65 err(1, "pledge");
66
67 width = 0;
68 lastch = '\0';
69 prevoptind = 1;
70 newarg = 1;
71 while ((ch = getopt(argc, argv, "0123456789bsw:")) != -1) {
72 switch (ch) {
73 case 'b':
74 count_bytes = 1;
75 break;
76 case 's':
77 split_words = 1;
78 break;
79 case 'w':
80 width = strtonum(optarg, 1, UINT_MAX, &errstr);
81 if (errstr != NULL)
82 errx(1, "illegal width value, %s: %s", errstr,
83 optarg);
84 break;
85 case '0': case '1': case '2': case '3': case '4':
86 case '5': case '6': case '7': case '8': case '9':
87 if (newarg)
88 width = 0;
89 else if (!isdigit(lastch))
90 usage();
91 if (width > UINT_MAX / 10 - 1)
92 errx(1, "illegal width value, too large");
93 width = (width * 10) + (ch - '0');
94 if (width < 1)
95 errx(1, "illegal width value, too small");
96 break;
97 default:
98 usage();
99 }
100 lastch = ch;
101 newarg = optind != prevoptind;
102 prevoptind = optind;
103 }
104 argv += optind;
105 argc -= optind;
106
107 if (width == 0)
108 width = DEFLINEWIDTH;
109
110 if (!*argv) {
111 if (pledge("stdio", NULL) == -1)
112 err(1, "pledge");
113 fold(width);
114 } else {
115 for (; *argv; ++argv) {
116 if (!freopen(*argv, "r", stdin))
117 err(1, "%s", *argv);
118 else
119 fold(width);
120 }
121 }
122 return 0;
123 }
124
125 /*
126 * Fold the contents of standard input to fit within WIDTH columns
127 * (or bytes) and write to standard output.
128 *
129 * If split_words is set, split the line at the last space character
130 * on the line. This flag necessitates storing the line in a buffer
131 * until the current column > width, or a newline or EOF is read.
132 *
133 * The buffer can grow larger than WIDTH due to backspaces and carriage
134 * returns embedded in the input stream.
135 */
136 static void
fold(unsigned int max_width)137 fold(unsigned int max_width)
138 {
139 static char *buf = NULL;
140 static size_t bufsz = 2048;
141 char *cp; /* Current mb character. */
142 char *np; /* Next mb character. */
143 char *sp; /* To search for the last space. */
144 char *nbuf; /* For buffer reallocation. */
145 wchar_t wc; /* Current wide character. */
146 int ch; /* Last byte read. */
147 int len; /* Bytes in the current mb character. */
148 unsigned int col; /* Current display position. */
149 int width; /* Display width of wc. */
150
151 if (buf == NULL && (buf = malloc(bufsz)) == NULL)
152 err(1, NULL);
153
154 np = cp = buf;
155 ch = 0;
156 col = 0;
157
158 while (ch != EOF) { /* Loop on input characters. */
159 while ((ch = getchar()) != EOF) { /* Loop on input bytes. */
160 if (np + 1 == buf + bufsz) {
161 nbuf = reallocarray(buf, 2, bufsz);
162 if (nbuf == NULL)
163 err(1, NULL);
164 bufsz *= 2;
165 cp = nbuf + (cp - buf);
166 np = nbuf + (np - buf);
167 buf = nbuf;
168 }
169 *np++ = ch;
170
171 /*
172 * Read up to and including the first byte of
173 * the next character, such that we are sure
174 * to have a complete character in the buffer.
175 * There is no need to read more than five bytes
176 * ahead, since UTF-8 characters are four bytes
177 * long at most.
178 */
179
180 if (np - cp > 4 || (np - cp > 1 && !isu8cont(ch)))
181 break;
182 }
183
184 while (cp < np) { /* Loop on output characters. */
185
186 /* Handle end of line and backspace. */
187
188 if (*cp == '\n' || (*cp == '\r' && !count_bytes)) {
189 fwrite(buf, 1, ++cp - buf, stdout);
190 memmove(buf, cp, np - cp);
191 np = buf + (np - cp);
192 cp = buf;
193 col = 0;
194 continue;
195 }
196 if (*cp == '\b' && !count_bytes) {
197 if (col)
198 col--;
199 cp++;
200 continue;
201 }
202
203 /*
204 * Measure display width.
205 * Process the last byte only if
206 * end of file was reached.
207 */
208
209 if (np - cp > (ch != EOF)) {
210 len = 1;
211 width = 1;
212
213 if (*cp == '\t') {
214 if (count_bytes == 0)
215 width = 8 - (col & 7);
216 } else if ((len = mbtowc(&wc, cp,
217 np - cp)) < 1)
218 len = 1;
219 else if (count_bytes)
220 width = len;
221 else if ((width = wcwidth(wc)) < 0)
222 width = 1;
223
224 col += width;
225 if (col <= max_width || cp == buf) {
226 cp += len;
227 continue;
228 }
229 }
230
231 /* Line break required. */
232
233 if (col > max_width) {
234 if (split_words) {
235 for (sp = cp; sp > buf; sp--) {
236 if (sp[-1] == ' ') {
237 cp = sp;
238 break;
239 }
240 }
241 }
242 fwrite(buf, 1, cp - buf, stdout);
243 putchar('\n');
244 memmove(buf, cp, np - cp);
245 np = buf + (np - cp);
246 cp = buf;
247 col = 0;
248 continue;
249 }
250
251 /* Need more input. */
252
253 break;
254 }
255 }
256 fwrite(buf, 1, np - buf, stdout);
257
258 if (ferror(stdin))
259 err(1, NULL);
260 }
261
262 static int
isu8cont(unsigned char c)263 isu8cont(unsigned char c)
264 {
265 return MB_CUR_MAX > 1 && (c & (0x80 | 0x40)) == 0x80;
266 }
267
268 static __dead void
usage(void)269 usage(void)
270 {
271 (void)fprintf(stderr, "usage: fold [-bs] [-w width] [file ...]\n");
272 exit(1);
273 }
274