1 /*-
2 * Copyright (c) 1990, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Michael Rendell of the Memorial University of Newfoundland.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#) Copyright (c) 1990, 1993, 1994 The Regents of the University of California. All rights reserved.
33 * @(#)col.c 8.5 (Berkeley) 5/4/95
34 * $FreeBSD: head/usr.bin/col/col.c 282722 2015-05-10 11:41:38Z bapt $
35 */
36
37 #include <err.h>
38 #include <errno.h>
39 #include <limits.h>
40 #include <locale.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <termios.h>
45 #include <unistd.h>
46 #include <wchar.h>
47 #include <wctype.h>
48
49 #define BS '\b' /* backspace */
50 #define TAB '\t' /* tab */
51 #define SPACE ' ' /* space */
52 #define NL '\n' /* newline */
53 #define CR '\r' /* carriage return */
54 #define ESC '\033' /* escape */
55 #define SI '\017' /* shift in to normal character set */
56 #define SO '\016' /* shift out to alternate character set */
57 #define VT '\013' /* vertical tab (aka reverse line feed) */
58 #define RLF '7' /* ESC-7 reverse line feed */
59 #define RHLF '8' /* ESC-8 reverse half-line feed */
60 #define FHLF '9' /* ESC-9 forward half-line feed */
61
62 /* build up at least this many lines before flushing them out */
63 #define BUFFER_MARGIN 32
64
65 typedef char CSET;
66
67 typedef struct char_str {
68 #define CS_NORMAL 1
69 #define CS_ALTERNATE 2
70 short c_column; /* column character is in */
71 CSET c_set; /* character set (currently only 2) */
72 wchar_t c_char; /* character in question */
73 int c_width; /* character width */
74 } CHAR;
75
76 typedef struct line_str LINE;
77 struct line_str {
78 CHAR *l_line; /* characters on the line */
79 LINE *l_prev; /* previous line */
80 LINE *l_next; /* next line */
81 int l_lsize; /* allocated sizeof l_line */
82 int l_line_len; /* strlen(l_line) */
83 int l_needs_sort; /* set if chars went in out of order */
84 int l_max_col; /* max column in the line */
85 };
86
87 static void addto_lineno(int *, int);
88 static LINE *alloc_line(void);
89 static void dowarn(int);
90 static void flush_line(LINE *);
91 static void flush_lines(int);
92 static void flush_blanks(void);
93 static void free_line(LINE *);
94 static void usage(void);
95
96 static CSET last_set; /* char_set of last char printed */
97 static LINE *lines;
98 static int compress_spaces; /* if doing space -> tab conversion */
99 static int fine; /* if `fine' resolution (half lines) */
100 static int max_bufd_lines; /* max # of half lines to keep in memory */
101 static int nblank_lines; /* # blanks after last flushed line */
102 static int no_backspaces; /* if not to output any backspaces */
103 static int pass_unknown_seqs; /* pass unknown control sequences */
104
105 #define PUTC(ch) \
106 do { \
107 if (putwchar(ch) == WEOF) \
108 errx(1, "write error"); \
109 } while (0)
110
111 int
main(int argc,char ** argv)112 main(int argc, char **argv)
113 {
114 wint_t ch;
115 CHAR *c;
116 CSET cur_set; /* current character set */
117 LINE *l; /* current line */
118 int extra_lines; /* # of lines above first line */
119 int cur_col; /* current column */
120 int cur_line; /* line number of current position */
121 int max_line; /* max value of cur_line */
122 int this_line; /* line l points to */
123 int nflushd_lines; /* number of lines that were flushed */
124 int adjust, opt, warned, width;
125 const char *errstr;
126
127 setlocale(LC_CTYPE, "");
128
129 max_bufd_lines = 256;
130 compress_spaces = 1; /* compress spaces into tabs */
131 while ((opt = getopt(argc, argv, "bfhl:px")) != -1)
132 switch (opt) {
133 case 'b': /* do not output backspaces */
134 no_backspaces = 1;
135 break;
136 case 'f': /* allow half forward line feeds */
137 fine = 1;
138 break;
139 case 'h': /* compress spaces into tabs */
140 compress_spaces = 1;
141 break;
142 case 'l': /* buffered line count */
143 max_bufd_lines = strtonum(optarg, 1,
144 (INT_MAX - BUFFER_MARGIN) / 2, &errstr) * 2;
145 if (errstr != NULL)
146 errx(1, "bad -l argument, %s: %s", errstr,
147 optarg);
148 break;
149 case 'p': /* pass unknown control sequences */
150 pass_unknown_seqs = 1;
151 break;
152 case 'x': /* do not compress spaces into tabs */
153 compress_spaces = 0;
154 break;
155 case '?':
156 default:
157 usage();
158 }
159
160 if (optind != argc)
161 usage();
162
163 adjust = cur_col = extra_lines = warned = 0;
164 cur_line = max_line = nflushd_lines = this_line = 0;
165 cur_set = last_set = CS_NORMAL;
166 lines = l = alloc_line();
167
168 while ((ch = getwchar()) != WEOF) {
169 if (!iswgraph(ch)) {
170 switch (ch) {
171 case BS: /* can't go back further */
172 if (cur_col == 0)
173 continue;
174 --cur_col;
175 continue;
176 case CR:
177 cur_col = 0;
178 continue;
179 case ESC: /* just ignore EOF */
180 switch (getwchar()) {
181 /*
182 * In the input stream, accept both the
183 * XPG5 sequences ESC-digit and the
184 * traditional BSD sequences ESC-ctrl.
185 */
186 case '\007':
187 /* FALLTHROUGH */
188 case RLF:
189 addto_lineno(&cur_line, -2);
190 break;
191 case '\010':
192 /* FALLTHROUGH */
193 case RHLF:
194 addto_lineno(&cur_line, -1);
195 break;
196 case '\011':
197 /* FALLTHROUGH */
198 case FHLF:
199 addto_lineno(&cur_line, 1);
200 if (cur_line > max_line)
201 max_line = cur_line;
202 }
203 continue;
204 case NL:
205 addto_lineno(&cur_line, 2);
206 if (cur_line > max_line)
207 max_line = cur_line;
208 cur_col = 0;
209 continue;
210 case SPACE:
211 ++cur_col;
212 continue;
213 case SI:
214 cur_set = CS_NORMAL;
215 continue;
216 case SO:
217 cur_set = CS_ALTERNATE;
218 continue;
219 case TAB: /* adjust column */
220 cur_col |= 7;
221 ++cur_col;
222 continue;
223 case VT:
224 addto_lineno(&cur_line, -2);
225 continue;
226 }
227 if (iswspace(ch)) {
228 if ((width = wcwidth(ch)) > 0)
229 cur_col += width;
230 continue;
231 }
232 if (!pass_unknown_seqs)
233 continue;
234 }
235
236 /* Must stuff ch in a line - are we at the right one? */
237 if (cur_line + adjust != this_line) {
238 LINE *lnew;
239
240 /* round up to next line */
241 adjust = !fine && (cur_line & 1);
242
243 if (cur_line + adjust < this_line) {
244 while (cur_line + adjust < this_line &&
245 l->l_prev != NULL) {
246 l = l->l_prev;
247 this_line--;
248 }
249 if (cur_line + adjust < this_line) {
250 if (nflushd_lines == 0) {
251 /*
252 * Allow backup past first
253 * line if nothing has been
254 * flushed yet.
255 */
256 while (cur_line + adjust
257 < this_line) {
258 lnew = alloc_line();
259 l->l_prev = lnew;
260 lnew->l_next = l;
261 l = lines = lnew;
262 extra_lines++;
263 this_line--;
264 }
265 } else {
266 if (!warned++)
267 dowarn(cur_line);
268 cur_line = this_line - adjust;
269 }
270 }
271 } else {
272 /* may need to allocate here */
273 while (cur_line + adjust > this_line) {
274 if (l->l_next == NULL) {
275 l->l_next = alloc_line();
276 l->l_next->l_prev = l;
277 }
278 l = l->l_next;
279 this_line++;
280 }
281 }
282 if (this_line > nflushd_lines &&
283 this_line - nflushd_lines >=
284 max_bufd_lines + BUFFER_MARGIN) {
285 if (extra_lines) {
286 flush_lines(extra_lines);
287 extra_lines = 0;
288 }
289 flush_lines(this_line - nflushd_lines -
290 max_bufd_lines);
291 nflushd_lines = this_line - max_bufd_lines;
292 }
293 }
294 /* grow line's buffer? */
295 if (l->l_line_len + 1 >= l->l_lsize) {
296 int need;
297
298 need = l->l_lsize ? l->l_lsize * 2 : 90;
299 if ((l->l_line = realloc(l->l_line,
300 (unsigned)need * sizeof(CHAR))) == NULL)
301 err(1, NULL);
302 l->l_lsize = need;
303 }
304 c = &l->l_line[l->l_line_len++];
305 c->c_char = ch;
306 c->c_set = cur_set;
307 c->c_column = cur_col;
308 c->c_width = wcwidth(ch);
309 /*
310 * If things are put in out of order, they will need sorting
311 * when it is flushed.
312 */
313 if (cur_col < l->l_max_col)
314 l->l_needs_sort = 1;
315 else
316 l->l_max_col = cur_col;
317 if (c->c_width > 0)
318 cur_col += c->c_width;
319 }
320 if (ferror(stdin))
321 err(1, NULL);
322 if (extra_lines)
323 flush_lines(extra_lines);
324
325 /* goto the last line that had a character on it */
326 for (; l->l_next; l = l->l_next)
327 this_line++;
328 flush_lines(this_line - nflushd_lines + 1);
329
330 /* make sure we leave things in a sane state */
331 if (last_set != CS_NORMAL)
332 PUTC(SI);
333
334 /* flush out the last few blank lines */
335 if (max_line > this_line)
336 nblank_lines = max_line - this_line;
337 if (max_line & 1)
338 nblank_lines++;
339 flush_blanks();
340 exit(0);
341 }
342
343 static void
flush_lines(int nflush)344 flush_lines(int nflush)
345 {
346 LINE *l;
347
348 while (--nflush >= 0) {
349 l = lines;
350 lines = l->l_next;
351 if (l->l_line) {
352 flush_blanks();
353 flush_line(l);
354 }
355 if (l->l_line || l->l_next)
356 nblank_lines++;
357 if (l->l_line)
358 free(l->l_line);
359 free_line(l);
360 }
361 if (lines)
362 lines->l_prev = NULL;
363 }
364
365 /*
366 * Print a number of newline/half newlines. If fine flag is set, nblank_lines
367 * is the number of half line feeds, otherwise it is the number of whole line
368 * feeds.
369 */
370 static void
flush_blanks(void)371 flush_blanks(void)
372 {
373 int half, i, nb;
374
375 half = 0;
376 nb = nblank_lines;
377 if (nb & 1) {
378 if (fine)
379 half = 1;
380 else
381 nb++;
382 }
383 nb /= 2;
384 for (i = nb; --i >= 0;)
385 PUTC('\n');
386 if (half) {
387 PUTC(ESC);
388 PUTC(FHLF);
389 if (!nb)
390 PUTC('\r');
391 }
392 nblank_lines = 0;
393 }
394
395 /*
396 * Write a line to stdout taking care of space to tab conversion (-h flag)
397 * and character set shifts.
398 */
399 static void
flush_line(LINE * l)400 flush_line(LINE *l)
401 {
402 CHAR *c, *endc;
403 int i, j, nchars, last_col, save, this_col, tot;
404
405 last_col = 0;
406 nchars = l->l_line_len;
407
408 if (l->l_needs_sort) {
409 static CHAR *sorted;
410 static int count_size, *count, sorted_size;
411
412 /*
413 * Do an O(n) sort on l->l_line by column being careful to
414 * preserve the order of characters in the same column.
415 */
416 if (l->l_lsize > sorted_size) {
417 sorted_size = l->l_lsize;
418 if ((sorted = realloc(sorted,
419 (unsigned)sizeof(CHAR) * sorted_size)) == NULL)
420 err(1, NULL);
421 }
422 if (l->l_max_col >= count_size) {
423 count_size = l->l_max_col + 1;
424 if ((count = realloc(count,
425 (unsigned)sizeof(int) * count_size)) == NULL)
426 err(1, NULL);
427 }
428 memset(count, 0, sizeof(int) * l->l_max_col + 1);
429 for (i = nchars, c = l->l_line; --i >= 0; c++)
430 count[c->c_column]++;
431
432 /*
433 * calculate running total (shifted down by 1) to use as
434 * indices into new line.
435 */
436 for (tot = 0, i = 0; i <= l->l_max_col; i++) {
437 save = count[i];
438 count[i] = tot;
439 tot += save;
440 }
441
442 for (i = nchars, c = l->l_line; --i >= 0; c++)
443 sorted[count[c->c_column]++] = *c;
444 c = sorted;
445 } else
446 c = l->l_line;
447 while (nchars > 0) {
448 this_col = c->c_column;
449 endc = c;
450 do {
451 ++endc;
452 } while (--nchars > 0 && this_col == endc->c_column);
453
454 /* if -b only print last character */
455 if (no_backspaces) {
456 c = endc - 1;
457 if (nchars > 0 &&
458 this_col + c->c_width > endc->c_column)
459 continue;
460 }
461
462 if (this_col > last_col) {
463 int nspace = this_col - last_col;
464
465 if (compress_spaces && nspace > 1) {
466 while (1) {
467 int tab_col, tab_size;
468
469 tab_col = (last_col + 8) & ~7;
470 if (tab_col > this_col)
471 break;
472 tab_size = tab_col - last_col;
473 if (tab_size == 1)
474 PUTC(' ');
475 else
476 PUTC('\t');
477 nspace -= tab_size;
478 last_col = tab_col;
479 }
480 }
481 while (--nspace >= 0)
482 PUTC(' ');
483 last_col = this_col;
484 }
485
486 for (;;) {
487 if (c->c_set != last_set) {
488 switch (c->c_set) {
489 case CS_NORMAL:
490 PUTC(SI);
491 break;
492 case CS_ALTERNATE:
493 PUTC(SO);
494 }
495 last_set = c->c_set;
496 }
497 PUTC(c->c_char);
498 if ((c + 1) < endc)
499 for (j = 0; j < c->c_width; j++)
500 PUTC('\b');
501 if (++c >= endc)
502 break;
503 }
504 last_col += (c - 1)->c_width;
505 }
506 }
507
508 /*
509 * Increment or decrement a line number, checking for overflow.
510 * Stop one below INT_MAX such that the adjust variable is safe.
511 */
512 void
addto_lineno(int * lno,int offset)513 addto_lineno(int *lno, int offset)
514 {
515 if (offset > 0) {
516 if (*lno >= INT_MAX - offset)
517 errx(1, "too many lines");
518 } else {
519 if (*lno < INT_MIN - offset)
520 errx(1, "too many reverse line feeds");
521 }
522 *lno += offset;
523 }
524
525 #define NALLOC 64
526
527 static LINE *line_freelist;
528
529 static LINE *
alloc_line(void)530 alloc_line(void)
531 {
532 LINE *l;
533 int i;
534
535 if (!line_freelist) {
536 if ((l = realloc(NULL, sizeof(LINE) * NALLOC)) == NULL)
537 err(1, NULL);
538 line_freelist = l;
539 for (i = 1; i < NALLOC; i++, l++)
540 l->l_next = l + 1;
541 l->l_next = NULL;
542 }
543 l = line_freelist;
544 line_freelist = l->l_next;
545
546 memset(l, 0, sizeof(LINE));
547 return (l);
548 }
549
550 static void
free_line(LINE * l)551 free_line(LINE *l)
552 {
553
554 l->l_next = line_freelist;
555 line_freelist = l;
556 }
557
558 static void
usage(void)559 usage(void)
560 {
561
562 fprintf(stderr, "usage: col [-bfhpx] [-l nline]\n");
563 exit(1);
564 }
565
566 static void
dowarn(int line)567 dowarn(int line)
568 {
569
570 warnx("warning: can't back up %s",
571 line < 0 ? "past first line" : "-- line already flushed");
572 }
573