1 /*- 2 * Copyright (c) 1990, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Michael Rendell of the Memorial University of Newfoundland. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#) Copyright (c) 1990, 1993, 1994 The Regents of the University of California. All rights reserved. 33 * @(#)col.c 8.5 (Berkeley) 5/4/95 34 * $FreeBSD: head/usr.bin/col/col.c 282722 2015-05-10 11:41:38Z bapt $ 35 */ 36 37 #include <err.h> 38 #include <errno.h> 39 #include <limits.h> 40 #include <locale.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include <string.h> 44 #include <termios.h> 45 #include <unistd.h> 46 #include <wchar.h> 47 #include <wctype.h> 48 49 #define BS '\b' /* backspace */ 50 #define TAB '\t' /* tab */ 51 #define SPACE ' ' /* space */ 52 #define NL '\n' /* newline */ 53 #define CR '\r' /* carriage return */ 54 #define ESC '\033' /* escape */ 55 #define SI '\017' /* shift in to normal character set */ 56 #define SO '\016' /* shift out to alternate character set */ 57 #define VT '\013' /* vertical tab (aka reverse line feed) */ 58 #define RLF '7' /* ESC-7 reverse line feed */ 59 #define RHLF '8' /* ESC-8 reverse half-line feed */ 60 #define FHLF '9' /* ESC-9 forward half-line feed */ 61 62 /* build up at least this many lines before flushing them out */ 63 #define BUFFER_MARGIN 32 64 65 typedef char CSET; 66 67 typedef struct char_str { 68 #define CS_NORMAL 1 69 #define CS_ALTERNATE 2 70 short c_column; /* column character is in */ 71 CSET c_set; /* character set (currently only 2) */ 72 wchar_t c_char; /* character in question */ 73 int c_width; /* character width */ 74 } CHAR; 75 76 typedef struct line_str LINE; 77 struct line_str { 78 CHAR *l_line; /* characters on the line */ 79 LINE *l_prev; /* previous line */ 80 LINE *l_next; /* next line */ 81 int l_lsize; /* allocated sizeof l_line */ 82 int l_line_len; /* strlen(l_line) */ 83 int l_needs_sort; /* set if chars went in out of order */ 84 int l_max_col; /* max column in the line */ 85 }; 86 87 static void addto_lineno(int *, int); 88 static LINE *alloc_line(void); 89 static void dowarn(int); 90 static void flush_line(LINE *); 91 static void flush_lines(int); 92 static void flush_blanks(void); 93 static void free_line(LINE *); 94 static void usage(void); 95 96 static CSET last_set; /* char_set of last char printed */ 97 static LINE *lines; 98 static int compress_spaces; /* if doing space -> tab conversion */ 99 static int fine; /* if `fine' resolution (half lines) */ 100 static int max_bufd_lines; /* max # of half lines to keep in memory */ 101 static int nblank_lines; /* # blanks after last flushed line */ 102 static int no_backspaces; /* if not to output any backspaces */ 103 static int pass_unknown_seqs; /* pass unknown control sequences */ 104 105 #define PUTC(ch) \ 106 do { \ 107 if (putwchar(ch) == WEOF) \ 108 errx(1, "write error"); \ 109 } while (0) 110 111 int 112 main(int argc, char **argv) 113 { 114 wint_t ch; 115 CHAR *c; 116 CSET cur_set; /* current character set */ 117 LINE *l; /* current line */ 118 int extra_lines; /* # of lines above first line */ 119 int cur_col; /* current column */ 120 int cur_line; /* line number of current position */ 121 int max_line; /* max value of cur_line */ 122 int this_line; /* line l points to */ 123 int nflushd_lines; /* number of lines that were flushed */ 124 int adjust, opt, warned, width; 125 const char *errstr; 126 127 setlocale(LC_CTYPE, ""); 128 129 max_bufd_lines = 256; 130 compress_spaces = 1; /* compress spaces into tabs */ 131 while ((opt = getopt(argc, argv, "bfhl:px")) != -1) 132 switch (opt) { 133 case 'b': /* do not output backspaces */ 134 no_backspaces = 1; 135 break; 136 case 'f': /* allow half forward line feeds */ 137 fine = 1; 138 break; 139 case 'h': /* compress spaces into tabs */ 140 compress_spaces = 1; 141 break; 142 case 'l': /* buffered line count */ 143 max_bufd_lines = strtonum(optarg, 1, 144 (INT_MAX - BUFFER_MARGIN) / 2, &errstr) * 2; 145 if (errstr != NULL) 146 errx(1, "bad -l argument, %s: %s", errstr, 147 optarg); 148 break; 149 case 'p': /* pass unknown control sequences */ 150 pass_unknown_seqs = 1; 151 break; 152 case 'x': /* do not compress spaces into tabs */ 153 compress_spaces = 0; 154 break; 155 case '?': 156 default: 157 usage(); 158 } 159 160 if (optind != argc) 161 usage(); 162 163 adjust = cur_col = extra_lines = warned = 0; 164 cur_line = max_line = nflushd_lines = this_line = 0; 165 cur_set = last_set = CS_NORMAL; 166 lines = l = alloc_line(); 167 168 while ((ch = getwchar()) != WEOF) { 169 if (!iswgraph(ch)) { 170 switch (ch) { 171 case BS: /* can't go back further */ 172 if (cur_col == 0) 173 continue; 174 --cur_col; 175 continue; 176 case CR: 177 cur_col = 0; 178 continue; 179 case ESC: /* just ignore EOF */ 180 switch (getwchar()) { 181 /* 182 * In the input stream, accept both the 183 * XPG5 sequences ESC-digit and the 184 * traditional BSD sequences ESC-ctrl. 185 */ 186 case '\007': 187 /* FALLTHROUGH */ 188 case RLF: 189 addto_lineno(&cur_line, -2); 190 break; 191 case '\010': 192 /* FALLTHROUGH */ 193 case RHLF: 194 addto_lineno(&cur_line, -1); 195 break; 196 case '\011': 197 /* FALLTHROUGH */ 198 case FHLF: 199 addto_lineno(&cur_line, 1); 200 if (cur_line > max_line) 201 max_line = cur_line; 202 } 203 continue; 204 case NL: 205 addto_lineno(&cur_line, 2); 206 if (cur_line > max_line) 207 max_line = cur_line; 208 cur_col = 0; 209 continue; 210 case SPACE: 211 ++cur_col; 212 continue; 213 case SI: 214 cur_set = CS_NORMAL; 215 continue; 216 case SO: 217 cur_set = CS_ALTERNATE; 218 continue; 219 case TAB: /* adjust column */ 220 cur_col |= 7; 221 ++cur_col; 222 continue; 223 case VT: 224 addto_lineno(&cur_line, -2); 225 continue; 226 } 227 if (iswspace(ch)) { 228 if ((width = wcwidth(ch)) > 0) 229 cur_col += width; 230 continue; 231 } 232 if (!pass_unknown_seqs) 233 continue; 234 } 235 236 /* Must stuff ch in a line - are we at the right one? */ 237 if (cur_line + adjust != this_line) { 238 LINE *lnew; 239 240 /* round up to next line */ 241 adjust = !fine && (cur_line & 1); 242 243 if (cur_line + adjust < this_line) { 244 while (cur_line + adjust < this_line && 245 l->l_prev != NULL) { 246 l = l->l_prev; 247 this_line--; 248 } 249 if (cur_line + adjust < this_line) { 250 if (nflushd_lines == 0) { 251 /* 252 * Allow backup past first 253 * line if nothing has been 254 * flushed yet. 255 */ 256 while (cur_line + adjust 257 < this_line) { 258 lnew = alloc_line(); 259 l->l_prev = lnew; 260 lnew->l_next = l; 261 l = lines = lnew; 262 extra_lines++; 263 this_line--; 264 } 265 } else { 266 if (!warned++) 267 dowarn(cur_line); 268 cur_line = this_line - adjust; 269 } 270 } 271 } else { 272 /* may need to allocate here */ 273 while (cur_line + adjust > this_line) { 274 if (l->l_next == NULL) { 275 l->l_next = alloc_line(); 276 l->l_next->l_prev = l; 277 } 278 l = l->l_next; 279 this_line++; 280 } 281 } 282 if (this_line > nflushd_lines && 283 this_line - nflushd_lines >= 284 max_bufd_lines + BUFFER_MARGIN) { 285 if (extra_lines) { 286 flush_lines(extra_lines); 287 extra_lines = 0; 288 } 289 flush_lines(this_line - nflushd_lines - 290 max_bufd_lines); 291 nflushd_lines = this_line - max_bufd_lines; 292 } 293 } 294 /* grow line's buffer? */ 295 if (l->l_line_len + 1 >= l->l_lsize) { 296 int need; 297 298 need = l->l_lsize ? l->l_lsize * 2 : 90; 299 if ((l->l_line = realloc(l->l_line, 300 (unsigned)need * sizeof(CHAR))) == NULL) 301 err(1, NULL); 302 l->l_lsize = need; 303 } 304 c = &l->l_line[l->l_line_len++]; 305 c->c_char = ch; 306 c->c_set = cur_set; 307 c->c_column = cur_col; 308 c->c_width = wcwidth(ch); 309 /* 310 * If things are put in out of order, they will need sorting 311 * when it is flushed. 312 */ 313 if (cur_col < l->l_max_col) 314 l->l_needs_sort = 1; 315 else 316 l->l_max_col = cur_col; 317 if (c->c_width > 0) 318 cur_col += c->c_width; 319 } 320 if (ferror(stdin)) 321 err(1, NULL); 322 if (extra_lines) 323 flush_lines(extra_lines); 324 325 /* goto the last line that had a character on it */ 326 for (; l->l_next; l = l->l_next) 327 this_line++; 328 flush_lines(this_line - nflushd_lines + 1); 329 330 /* make sure we leave things in a sane state */ 331 if (last_set != CS_NORMAL) 332 PUTC(SI); 333 334 /* flush out the last few blank lines */ 335 if (max_line > this_line) 336 nblank_lines = max_line - this_line; 337 if (max_line & 1) 338 nblank_lines++; 339 flush_blanks(); 340 exit(0); 341 } 342 343 static void 344 flush_lines(int nflush) 345 { 346 LINE *l; 347 348 while (--nflush >= 0) { 349 l = lines; 350 lines = l->l_next; 351 if (l->l_line) { 352 flush_blanks(); 353 flush_line(l); 354 } 355 if (l->l_line || l->l_next) 356 nblank_lines++; 357 if (l->l_line) 358 free(l->l_line); 359 free_line(l); 360 } 361 if (lines) 362 lines->l_prev = NULL; 363 } 364 365 /* 366 * Print a number of newline/half newlines. If fine flag is set, nblank_lines 367 * is the number of half line feeds, otherwise it is the number of whole line 368 * feeds. 369 */ 370 static void 371 flush_blanks(void) 372 { 373 int half, i, nb; 374 375 half = 0; 376 nb = nblank_lines; 377 if (nb & 1) { 378 if (fine) 379 half = 1; 380 else 381 nb++; 382 } 383 nb /= 2; 384 for (i = nb; --i >= 0;) 385 PUTC('\n'); 386 if (half) { 387 PUTC(ESC); 388 PUTC(FHLF); 389 if (!nb) 390 PUTC('\r'); 391 } 392 nblank_lines = 0; 393 } 394 395 /* 396 * Write a line to stdout taking care of space to tab conversion (-h flag) 397 * and character set shifts. 398 */ 399 static void 400 flush_line(LINE *l) 401 { 402 CHAR *c, *endc; 403 int i, j, nchars, last_col, save, this_col, tot; 404 405 last_col = 0; 406 nchars = l->l_line_len; 407 408 if (l->l_needs_sort) { 409 static CHAR *sorted; 410 static int count_size, *count, sorted_size; 411 412 /* 413 * Do an O(n) sort on l->l_line by column being careful to 414 * preserve the order of characters in the same column. 415 */ 416 if (l->l_lsize > sorted_size) { 417 sorted_size = l->l_lsize; 418 if ((sorted = realloc(sorted, 419 (unsigned)sizeof(CHAR) * sorted_size)) == NULL) 420 err(1, NULL); 421 } 422 if (l->l_max_col >= count_size) { 423 count_size = l->l_max_col + 1; 424 if ((count = realloc(count, 425 (unsigned)sizeof(int) * count_size)) == NULL) 426 err(1, NULL); 427 } 428 memset(count, 0, sizeof(int) * l->l_max_col + 1); 429 for (i = nchars, c = l->l_line; --i >= 0; c++) 430 count[c->c_column]++; 431 432 /* 433 * calculate running total (shifted down by 1) to use as 434 * indices into new line. 435 */ 436 for (tot = 0, i = 0; i <= l->l_max_col; i++) { 437 save = count[i]; 438 count[i] = tot; 439 tot += save; 440 } 441 442 for (i = nchars, c = l->l_line; --i >= 0; c++) 443 sorted[count[c->c_column]++] = *c; 444 c = sorted; 445 } else 446 c = l->l_line; 447 while (nchars > 0) { 448 this_col = c->c_column; 449 endc = c; 450 do { 451 ++endc; 452 } while (--nchars > 0 && this_col == endc->c_column); 453 454 /* if -b only print last character */ 455 if (no_backspaces) { 456 c = endc - 1; 457 if (nchars > 0 && 458 this_col + c->c_width > endc->c_column) 459 continue; 460 } 461 462 if (this_col > last_col) { 463 int nspace = this_col - last_col; 464 465 if (compress_spaces && nspace > 1) { 466 while (1) { 467 int tab_col, tab_size; 468 469 tab_col = (last_col + 8) & ~7; 470 if (tab_col > this_col) 471 break; 472 tab_size = tab_col - last_col; 473 if (tab_size == 1) 474 PUTC(' '); 475 else 476 PUTC('\t'); 477 nspace -= tab_size; 478 last_col = tab_col; 479 } 480 } 481 while (--nspace >= 0) 482 PUTC(' '); 483 last_col = this_col; 484 } 485 486 for (;;) { 487 if (c->c_set != last_set) { 488 switch (c->c_set) { 489 case CS_NORMAL: 490 PUTC(SI); 491 break; 492 case CS_ALTERNATE: 493 PUTC(SO); 494 } 495 last_set = c->c_set; 496 } 497 PUTC(c->c_char); 498 if ((c + 1) < endc) 499 for (j = 0; j < c->c_width; j++) 500 PUTC('\b'); 501 if (++c >= endc) 502 break; 503 } 504 last_col += (c - 1)->c_width; 505 } 506 } 507 508 /* 509 * Increment or decrement a line number, checking for overflow. 510 * Stop one below INT_MAX such that the adjust variable is safe. 511 */ 512 void 513 addto_lineno(int *lno, int offset) 514 { 515 if (offset > 0) { 516 if (*lno >= INT_MAX - offset) 517 errx(1, "too many lines"); 518 } else { 519 if (*lno < INT_MIN - offset) 520 errx(1, "too many reverse line feeds"); 521 } 522 *lno += offset; 523 } 524 525 #define NALLOC 64 526 527 static LINE *line_freelist; 528 529 static LINE * 530 alloc_line(void) 531 { 532 LINE *l; 533 int i; 534 535 if (!line_freelist) { 536 if ((l = realloc(NULL, sizeof(LINE) * NALLOC)) == NULL) 537 err(1, NULL); 538 line_freelist = l; 539 for (i = 1; i < NALLOC; i++, l++) 540 l->l_next = l + 1; 541 l->l_next = NULL; 542 } 543 l = line_freelist; 544 line_freelist = l->l_next; 545 546 memset(l, 0, sizeof(LINE)); 547 return (l); 548 } 549 550 static void 551 free_line(LINE *l) 552 { 553 554 l->l_next = line_freelist; 555 line_freelist = l; 556 } 557 558 static void 559 usage(void) 560 { 561 562 fprintf(stderr, "usage: col [-bfhpx] [-l nline]\n"); 563 exit(1); 564 } 565 566 static void 567 dowarn(int line) 568 { 569 570 warnx("warning: can't back up %s", 571 line < 0 ? "past first line" : "-- line already flushed"); 572 } 573