1 /* $OpenBSD: fold.c,v 1.18 2016/05/23 10:31:42 schwarze Exp $ */ 2 /* $NetBSD: fold.c,v 1.6 1995/09/01 01:42:44 jtc Exp $ */ 3 4 /*- 5 * Copyright (c) 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Kevin Ruddy. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <ctype.h> 37 #include <err.h> 38 #include <limits.h> 39 #include <locale.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <unistd.h> 44 #include <wchar.h> 45 46 #define DEFLINEWIDTH 80 47 48 static void fold(unsigned int); 49 static int isu8cont(unsigned char); 50 static __dead void usage(void); 51 52 int count_bytes = 0; 53 int split_words = 0; 54 55 int 56 main(int argc, char *argv[]) 57 { 58 int ch, lastch, newarg, prevoptind; 59 unsigned int width; 60 const char *errstr; 61 62 setlocale(LC_CTYPE, ""); 63 64 if (pledge("stdio rpath", NULL) == -1) 65 err(1, "pledge"); 66 67 width = 0; 68 lastch = '\0'; 69 prevoptind = 1; 70 newarg = 1; 71 while ((ch = getopt(argc, argv, "0123456789bsw:")) != -1) { 72 switch (ch) { 73 case 'b': 74 count_bytes = 1; 75 break; 76 case 's': 77 split_words = 1; 78 break; 79 case 'w': 80 width = strtonum(optarg, 1, UINT_MAX, &errstr); 81 if (errstr != NULL) 82 errx(1, "illegal width value, %s: %s", errstr, 83 optarg); 84 break; 85 case '0': case '1': case '2': case '3': case '4': 86 case '5': case '6': case '7': case '8': case '9': 87 if (newarg) 88 width = 0; 89 else if (!isdigit(lastch)) 90 usage(); 91 if (width > UINT_MAX / 10 - 1) 92 errx(1, "illegal width value, too large"); 93 width = (width * 10) + (ch - '0'); 94 if (width < 1) 95 errx(1, "illegal width value, too small"); 96 break; 97 default: 98 usage(); 99 } 100 lastch = ch; 101 newarg = optind != prevoptind; 102 prevoptind = optind; 103 } 104 argv += optind; 105 argc -= optind; 106 107 if (width == 0) 108 width = DEFLINEWIDTH; 109 110 if (!*argv) { 111 if (pledge("stdio", NULL) == -1) 112 err(1, "pledge"); 113 fold(width); 114 } else { 115 for (; *argv; ++argv) { 116 if (!freopen(*argv, "r", stdin)) 117 err(1, "%s", *argv); 118 else 119 fold(width); 120 } 121 } 122 return 0; 123 } 124 125 /* 126 * Fold the contents of standard input to fit within WIDTH columns 127 * (or bytes) and write to standard output. 128 * 129 * If split_words is set, split the line at the last space character 130 * on the line. This flag necessitates storing the line in a buffer 131 * until the current column > width, or a newline or EOF is read. 132 * 133 * The buffer can grow larger than WIDTH due to backspaces and carriage 134 * returns embedded in the input stream. 135 */ 136 static void 137 fold(unsigned int max_width) 138 { 139 static char *buf = NULL; 140 static size_t bufsz = 2048; 141 char *cp; /* Current mb character. */ 142 char *np; /* Next mb character. */ 143 char *sp; /* To search for the last space. */ 144 char *nbuf; /* For buffer reallocation. */ 145 wchar_t wc; /* Current wide character. */ 146 int ch; /* Last byte read. */ 147 int len; /* Bytes in the current mb character. */ 148 unsigned int col; /* Current display position. */ 149 int width; /* Display width of wc. */ 150 151 if (buf == NULL && (buf = malloc(bufsz)) == NULL) 152 err(1, NULL); 153 154 np = cp = buf; 155 ch = 0; 156 col = 0; 157 158 while (ch != EOF) { /* Loop on input characters. */ 159 while ((ch = getchar()) != EOF) { /* Loop on input bytes. */ 160 if (np + 1 == buf + bufsz) { 161 nbuf = reallocarray(buf, 2, bufsz); 162 if (nbuf == NULL) 163 err(1, NULL); 164 bufsz *= 2; 165 cp = nbuf + (cp - buf); 166 np = nbuf + (np - buf); 167 buf = nbuf; 168 } 169 *np++ = ch; 170 171 /* 172 * Read up to and including the first byte of 173 * the next character, such that we are sure 174 * to have a complete character in the buffer. 175 * There is no need to read more than five bytes 176 * ahead, since UTF-8 characters are four bytes 177 * long at most. 178 */ 179 180 if (np - cp > 4 || (np - cp > 1 && !isu8cont(ch))) 181 break; 182 } 183 184 while (cp < np) { /* Loop on output characters. */ 185 186 /* Handle end of line and backspace. */ 187 188 if (*cp == '\n' || (*cp == '\r' && !count_bytes)) { 189 fwrite(buf, 1, ++cp - buf, stdout); 190 memmove(buf, cp, np - cp); 191 np = buf + (np - cp); 192 cp = buf; 193 col = 0; 194 continue; 195 } 196 if (*cp == '\b' && !count_bytes) { 197 if (col) 198 col--; 199 cp++; 200 continue; 201 } 202 203 /* 204 * Measure display width. 205 * Process the last byte only if 206 * end of file was reached. 207 */ 208 209 if (np - cp > (ch != EOF)) { 210 len = 1; 211 width = 1; 212 213 if (*cp == '\t') { 214 if (count_bytes == 0) 215 width = 8 - (col & 7); 216 } else if ((len = mbtowc(&wc, cp, 217 np - cp)) < 1) 218 len = 1; 219 else if (count_bytes) 220 width = len; 221 else if ((width = wcwidth(wc)) < 0) 222 width = 1; 223 224 col += width; 225 if (col <= max_width || cp == buf) { 226 cp += len; 227 continue; 228 } 229 } 230 231 /* Line break required. */ 232 233 if (col > max_width) { 234 if (split_words) { 235 for (sp = cp; sp > buf; sp--) { 236 if (sp[-1] == ' ') { 237 cp = sp; 238 break; 239 } 240 } 241 } 242 fwrite(buf, 1, cp - buf, stdout); 243 putchar('\n'); 244 memmove(buf, cp, np - cp); 245 np = buf + (np - cp); 246 cp = buf; 247 col = 0; 248 continue; 249 } 250 251 /* Need more input. */ 252 253 break; 254 } 255 } 256 fwrite(buf, 1, np - buf, stdout); 257 258 if (ferror(stdin)) 259 err(1, NULL); 260 } 261 262 static int 263 isu8cont(unsigned char c) 264 { 265 return MB_CUR_MAX > 1 && (c & (0x80 | 0x40)) == 0x80; 266 } 267 268 static __dead void 269 usage(void) 270 { 271 (void)fprintf(stderr, "usage: fold [-bs] [-w width] [file ...]\n"); 272 exit(1); 273 } 274