1 /*- 2 * Copyright (c) 1990, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Michael Rendell of the Memorial University of Newfoundland. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#) Copyright (c) 1990, 1993, 1994 The Regents of the University of California. All rights reserved. 33 * @(#)col.c 8.5 (Berkeley) 5/4/95 34 * $FreeBSD: src/usr.bin/col/col.c,v 1.6.6.4 2001/08/02 01:27:12 obrien Exp $ 35 * $DragonFly: src/usr.bin/col/col.c,v 1.5 2006/08/03 16:40:48 swildner Exp $ 36 */ 37 38 #include <ctype.h> 39 #include <err.h> 40 #include <string.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include <unistd.h> 44 #include <locale.h> 45 46 #define BS '\b' /* backspace */ 47 #define TAB '\t' /* tab */ 48 #define SPACE ' ' /* space */ 49 #define NL '\n' /* newline */ 50 #define CR '\r' /* carriage return */ 51 #define ESC '\033' /* escape */ 52 #define SI '\017' /* shift in to normal character set */ 53 #define SO '\016' /* shift out to alternate character set */ 54 #define VT '\013' /* vertical tab (aka reverse line feed) */ 55 #define RLF '\007' /* ESC-07 reverse line feed */ 56 #define RHLF '\010' /* ESC-010 reverse half-line feed */ 57 #define FHLF '\011' /* ESC-011 forward half-line feed */ 58 59 /* build up at least this many lines before flushing them out */ 60 #define BUFFER_MARGIN 32 61 62 typedef char CSET; 63 64 typedef struct char_str { 65 #define CS_NORMAL 1 66 #define CS_ALTERNATE 2 67 short c_column; /* column character is in */ 68 CSET c_set; /* character set (currently only 2) */ 69 char c_char; /* character in question */ 70 } CHAR; 71 72 typedef struct line_str LINE; 73 struct line_str { 74 CHAR *l_line; /* characters on the line */ 75 LINE *l_prev; /* previous line */ 76 LINE *l_next; /* next line */ 77 int l_lsize; /* allocated sizeof l_line */ 78 int l_line_len; /* strlen(l_line) */ 79 int l_needs_sort; /* set if chars went in out of order */ 80 int l_max_col; /* max column in the line */ 81 }; 82 83 LINE *alloc_line(void); 84 void dowarn(int); 85 void flush_line(LINE *); 86 void flush_lines(int); 87 void flush_blanks(void); 88 void free_line(LINE *); 89 int main(int, char **); 90 void usage(void); 91 92 CSET last_set; /* char_set of last char printed */ 93 LINE *lines; 94 int compress_spaces; /* if doing space -> tab conversion */ 95 int fine; /* if `fine' resolution (half lines) */ 96 int max_bufd_lines; /* max # lines to keep in memory */ 97 int nblank_lines; /* # blanks after last flushed line */ 98 int no_backspaces; /* if not to output any backspaces */ 99 int pass_unknown_seqs; /* pass unknown control sequences */ 100 101 #define PUTC(ch) \ 102 do { \ 103 if (putchar(ch) == EOF) \ 104 errx(1, "write error"); \ 105 } while (0) 106 107 int 108 main(int argc, char **argv) 109 { 110 int ch; 111 CHAR *c; 112 CSET cur_set; /* current character set */ 113 LINE *l; /* current line */ 114 int extra_lines; /* # of lines above first line */ 115 int cur_col; /* current column */ 116 int cur_line; /* line number of current position */ 117 int max_line; /* max value of cur_line */ 118 int this_line; /* line l points to */ 119 int nflushd_lines; /* number of lines that were flushed */ 120 int adjust, opt, warned; 121 122 (void)setlocale(LC_CTYPE, ""); 123 124 max_bufd_lines = 128; 125 compress_spaces = 1; /* compress spaces into tabs */ 126 while ((opt = getopt(argc, argv, "bfhl:px")) != -1) 127 switch (opt) { 128 case 'b': /* do not output backspaces */ 129 no_backspaces = 1; 130 break; 131 case 'f': /* allow half forward line feeds */ 132 fine = 1; 133 break; 134 case 'h': /* compress spaces into tabs */ 135 compress_spaces = 1; 136 break; 137 case 'l': /* buffered line count */ 138 if ((max_bufd_lines = atoi(optarg)) <= 0) 139 errx(1, "bad -l argument %s", optarg); 140 break; 141 case 'p': /* pass unknown control sequences */ 142 pass_unknown_seqs = 1; 143 break; 144 case 'x': /* do not compress spaces into tabs */ 145 compress_spaces = 0; 146 break; 147 case '?': 148 default: 149 usage(); 150 } 151 152 if (optind != argc) 153 usage(); 154 155 /* this value is in half lines */ 156 max_bufd_lines *= 2; 157 158 adjust = cur_col = extra_lines = warned = 0; 159 cur_line = max_line = nflushd_lines = this_line = 0; 160 cur_set = last_set = CS_NORMAL; 161 lines = l = alloc_line(); 162 163 while ((ch = getchar()) != EOF) { 164 if (!isgraph(ch)) { 165 switch (ch) { 166 case BS: /* can't go back further */ 167 if (cur_col == 0) 168 continue; 169 --cur_col; 170 continue; 171 case CR: 172 cur_col = 0; 173 continue; 174 case ESC: /* just ignore EOF */ 175 switch(getchar()) { 176 case RLF: 177 cur_line -= 2; 178 break; 179 case RHLF: 180 cur_line--; 181 break; 182 case FHLF: 183 cur_line++; 184 if (cur_line > max_line) 185 max_line = cur_line; 186 } 187 continue; 188 case NL: 189 cur_line += 2; 190 if (cur_line > max_line) 191 max_line = cur_line; 192 cur_col = 0; 193 continue; 194 case SPACE: 195 ++cur_col; 196 continue; 197 case SI: 198 cur_set = CS_NORMAL; 199 continue; 200 case SO: 201 cur_set = CS_ALTERNATE; 202 continue; 203 case TAB: /* adjust column */ 204 cur_col |= 7; 205 ++cur_col; 206 continue; 207 case VT: 208 cur_line -= 2; 209 continue; 210 } 211 if (!pass_unknown_seqs) 212 continue; 213 } 214 215 /* Must stuff ch in a line - are we at the right one? */ 216 if (cur_line != this_line - adjust) { 217 LINE *lnew; 218 int nmove; 219 220 adjust = 0; 221 nmove = cur_line - this_line; 222 if (!fine) { 223 /* round up to next line */ 224 if (cur_line & 1) { 225 adjust = 1; 226 nmove++; 227 } 228 } 229 if (nmove < 0) { 230 for (; nmove < 0 && l->l_prev; nmove++) 231 l = l->l_prev; 232 if (nmove) { 233 if (nflushd_lines == 0) { 234 /* 235 * Allow backup past first 236 * line if nothing has been 237 * flushed yet. 238 */ 239 for (; nmove < 0; nmove++) { 240 lnew = alloc_line(); 241 l->l_prev = lnew; 242 lnew->l_next = l; 243 l = lines = lnew; 244 extra_lines++; 245 } 246 } else { 247 if (!warned++) 248 dowarn(cur_line); 249 cur_line -= nmove; 250 } 251 } 252 } else { 253 /* may need to allocate here */ 254 for (; nmove > 0 && l->l_next; nmove--) 255 l = l->l_next; 256 for (; nmove > 0; nmove--) { 257 lnew = alloc_line(); 258 lnew->l_prev = l; 259 l->l_next = lnew; 260 l = lnew; 261 } 262 } 263 this_line = cur_line + adjust; 264 nmove = this_line - nflushd_lines; 265 if (nmove >= max_bufd_lines + BUFFER_MARGIN) { 266 nflushd_lines += nmove - max_bufd_lines; 267 flush_lines(nmove - max_bufd_lines); 268 } 269 } 270 /* grow line's buffer? */ 271 if (l->l_line_len + 1 >= l->l_lsize) { 272 int need; 273 274 need = l->l_lsize ? l->l_lsize * 2 : 90; 275 if ((l->l_line = realloc(l->l_line, 276 (unsigned)need * sizeof(CHAR))) == NULL) 277 err(1, NULL); 278 l->l_lsize = need; 279 } 280 c = &l->l_line[l->l_line_len++]; 281 c->c_char = ch; 282 c->c_set = cur_set; 283 c->c_column = cur_col; 284 /* 285 * If things are put in out of order, they will need sorting 286 * when it is flushed. 287 */ 288 if (cur_col < l->l_max_col) 289 l->l_needs_sort = 1; 290 else 291 l->l_max_col = cur_col; 292 cur_col++; 293 } 294 if (max_line == 0) 295 exit(0); /* no lines, so just exit */ 296 297 /* goto the last line that had a character on it */ 298 for (; l->l_next; l = l->l_next) 299 this_line++; 300 flush_lines(this_line - nflushd_lines + extra_lines + 1); 301 302 /* make sure we leave things in a sane state */ 303 if (last_set != CS_NORMAL) 304 PUTC('\017'); 305 306 /* flush out the last few blank lines */ 307 nblank_lines = max_line - this_line; 308 if (max_line & 1) 309 nblank_lines++; 310 else if (!nblank_lines) 311 /* missing a \n on the last line? */ 312 nblank_lines = 2; 313 flush_blanks(); 314 exit(0); 315 } 316 317 void 318 flush_lines(int nflush) 319 { 320 LINE *l; 321 322 while (--nflush >= 0) { 323 l = lines; 324 lines = l->l_next; 325 if (l->l_line) { 326 flush_blanks(); 327 flush_line(l); 328 } 329 nblank_lines++; 330 if (l->l_line) 331 (void)free(l->l_line); 332 free_line(l); 333 } 334 if (lines) 335 lines->l_prev = NULL; 336 } 337 338 /* 339 * Print a number of newline/half newlines. If fine flag is set, nblank_lines 340 * is the number of half line feeds, otherwise it is the number of whole line 341 * feeds. 342 */ 343 void 344 flush_blanks(void) 345 { 346 int half, i, nb; 347 348 half = 0; 349 nb = nblank_lines; 350 if (nb & 1) { 351 if (fine) 352 half = 1; 353 else 354 nb++; 355 } 356 nb /= 2; 357 for (i = nb; --i >= 0;) 358 PUTC('\n'); 359 if (half) { 360 PUTC('\033'); 361 PUTC('9'); 362 if (!nb) 363 PUTC('\r'); 364 } 365 nblank_lines = 0; 366 } 367 368 /* 369 * Write a line to stdout taking care of space to tab conversion (-h flag) 370 * and character set shifts. 371 */ 372 void 373 flush_line(LINE *l) 374 { 375 CHAR *c, *endc; 376 int nchars, last_col, this_col; 377 378 last_col = 0; 379 nchars = l->l_line_len; 380 381 if (l->l_needs_sort) { 382 static CHAR *sorted; 383 static int count_size, *count, i, save, sorted_size, tot; 384 385 /* 386 * Do an O(n) sort on l->l_line by column being careful to 387 * preserve the order of characters in the same column. 388 */ 389 if (l->l_lsize > sorted_size) { 390 sorted_size = l->l_lsize; 391 if ((sorted = realloc(sorted, 392 (unsigned)sizeof(CHAR) * sorted_size)) == NULL) 393 err(1, NULL); 394 } 395 if (l->l_max_col >= count_size) { 396 count_size = l->l_max_col + 1; 397 if ((count = realloc(count, 398 (unsigned)sizeof(int) * count_size)) == NULL) 399 err(1, NULL); 400 } 401 memset(count, 0, sizeof(int) * l->l_max_col + 1); 402 for (i = nchars, c = l->l_line; --i >= 0; c++) 403 count[c->c_column]++; 404 405 /* 406 * calculate running total (shifted down by 1) to use as 407 * indices into new line. 408 */ 409 for (tot = 0, i = 0; i <= l->l_max_col; i++) { 410 save = count[i]; 411 count[i] = tot; 412 tot += save; 413 } 414 415 for (i = nchars, c = l->l_line; --i >= 0; c++) 416 sorted[count[c->c_column]++] = *c; 417 c = sorted; 418 } else 419 c = l->l_line; 420 while (nchars > 0) { 421 this_col = c->c_column; 422 endc = c; 423 do { 424 ++endc; 425 } while (--nchars > 0 && this_col == endc->c_column); 426 427 /* if -b only print last character */ 428 if (no_backspaces) 429 c = endc - 1; 430 431 if (this_col > last_col) { 432 int nspace = this_col - last_col; 433 434 if (compress_spaces && nspace > 1) { 435 while (1) { 436 int tab_col, tab_size; 437 438 tab_col = (last_col + 8) & ~7; 439 if (tab_col > this_col) 440 break; 441 tab_size = tab_col - last_col; 442 if (tab_size == 1) 443 PUTC(' '); 444 else 445 PUTC('\t'); 446 nspace -= tab_size; 447 last_col = tab_col; 448 } 449 } 450 while (--nspace >= 0) 451 PUTC(' '); 452 last_col = this_col; 453 } 454 last_col++; 455 456 for (;;) { 457 if (c->c_set != last_set) { 458 switch (c->c_set) { 459 case CS_NORMAL: 460 PUTC('\017'); 461 break; 462 case CS_ALTERNATE: 463 PUTC('\016'); 464 } 465 last_set = c->c_set; 466 } 467 PUTC(c->c_char); 468 if (++c >= endc) 469 break; 470 PUTC('\b'); 471 } 472 } 473 } 474 475 #define NALLOC 64 476 477 static LINE *line_freelist; 478 479 LINE * 480 alloc_line(void) 481 { 482 LINE *l; 483 int i; 484 485 if (!line_freelist) { 486 if ((l = realloc(NULL, sizeof(LINE) * NALLOC)) == NULL) 487 err(1, NULL); 488 line_freelist = l; 489 for (i = 1; i < NALLOC; i++, l++) 490 l->l_next = l + 1; 491 l->l_next = NULL; 492 } 493 l = line_freelist; 494 line_freelist = l->l_next; 495 496 memset(l, 0, sizeof(LINE)); 497 return (l); 498 } 499 500 void 501 free_line(LINE *l) 502 { 503 504 l->l_next = line_freelist; 505 line_freelist = l; 506 } 507 508 void 509 usage(void) 510 { 511 512 (void)fprintf(stderr, "usage: col [-bfhpx] [-l nline]\n"); 513 exit(1); 514 } 515 516 void 517 dowarn(int line) 518 { 519 520 warnx("warning: can't back up %s", 521 line < 0 ? "past first line" : "-- line already flushed"); 522 } 523