1 /* $OpenBSD: col.c,v 1.20 2022/12/04 23:50:47 cheloha Exp $ */
2 /* $NetBSD: col.c,v 1.7 1995/09/02 05:48:50 jtc Exp $ */
3
4 /*-
5 * Copyright (c) 1990, 1993, 1994
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Michael Rendell of the Memorial University of Newfoundland.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 #include <ctype.h>
37 #include <err.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <unistd.h>
42 #include <limits.h>
43
44 #define BS '\b' /* backspace */
45 #define TAB '\t' /* tab */
46 #define SPACE ' ' /* space */
47 #define NL '\n' /* newline */
48 #define CR '\r' /* carriage return */
49 #define ESC '\033' /* escape */
50 #define SI '\017' /* shift in to normal character set */
51 #define SO '\016' /* shift out to alternate character set */
52 #define VT '\013' /* vertical tab (aka reverse line feed) */
53
54 /* build up at least this many lines before flushing them out */
55 #define BUFFER_MARGIN 32
56
57 typedef char CSET;
58
59 typedef struct char_str {
60 #define CS_NORMAL 1
61 #define CS_ALTERNATE 2
62 size_t c_column; /* column character is in */
63 CSET c_set; /* character set (currently only 2) */
64 char c_char; /* character in question */
65 } CHAR;
66
67 typedef struct line_str LINE;
68 struct line_str {
69 CHAR *l_line; /* characters on the line */
70 LINE *l_prev; /* previous line */
71 LINE *l_next; /* next line */
72 size_t l_lsize; /* allocated sizeof l_line */
73 size_t l_line_len; /* strlen(l_line) */
74 size_t l_max_col; /* max column in the line */
75 int l_needs_sort; /* set if chars went in out of order */
76 };
77
78 void addto_lineno(int *, int);
79 LINE *alloc_line(void);
80 void dowarn(int);
81 void flush_line(LINE *);
82 void flush_lines(int);
83 void flush_blanks(void);
84 void free_line(LINE *);
85 void usage(void);
86 void *xreallocarray(void *, size_t, size_t);
87
88 CSET last_set; /* char_set of last char printed */
89 LINE *lines;
90 int compress_spaces; /* if doing space -> tab conversion */
91 int fine; /* if `fine' resolution (half lines) */
92 int max_bufd_lines; /* max # of half lines to keep in memory */
93 int nblank_lines; /* # blanks after last flushed line */
94 int no_backspaces; /* if not to output any backspaces */
95
96 #define PUTC(ch) \
97 if (putchar(ch) == EOF) \
98 err(1, "stdout");
99
100 int
main(int argc,char * argv[])101 main(int argc, char *argv[])
102 {
103 int ch;
104 CHAR *c;
105 CSET cur_set; /* current character set */
106 LINE *l; /* current line */
107 int extra_lines; /* # of lines above first line */
108 size_t cur_col; /* current column */
109 int cur_line; /* line number of current position */
110 int max_line; /* max value of cur_line */
111 int this_line; /* line l points to */
112 int nflushd_lines; /* number of lines that were flushed */
113 int adjust, opt, warned;
114 const char *errstr;
115
116 if (pledge("stdio", NULL) == -1)
117 err(1, "pledge");
118
119 max_bufd_lines = 256;
120 compress_spaces = 1; /* compress spaces into tabs */
121 while ((opt = getopt(argc, argv, "bfhl:x")) != -1)
122 switch (opt) {
123 case 'b': /* do not output backspaces */
124 no_backspaces = 1;
125 break;
126 case 'f': /* allow half forward line feeds */
127 fine = 1;
128 break;
129 case 'h': /* compress spaces into tabs */
130 compress_spaces = 1;
131 break;
132 case 'l': /* buffered line count */
133 max_bufd_lines = strtonum(optarg, 1,
134 (INT_MAX - BUFFER_MARGIN) / 2, &errstr) * 2;
135 if (errstr != NULL)
136 errx(1, "bad -l argument, %s: %s", errstr,
137 optarg);
138 break;
139 case 'x': /* do not compress spaces into tabs */
140 compress_spaces = 0;
141 break;
142 default:
143 usage();
144 }
145
146 if (optind != argc)
147 usage();
148
149 adjust = extra_lines = warned = 0;
150 cur_col = 0;
151 cur_line = max_line = nflushd_lines = this_line = 0;
152 cur_set = last_set = CS_NORMAL;
153 lines = l = alloc_line();
154
155 while ((ch = getchar()) != EOF) {
156 if (!isgraph(ch)) {
157 switch (ch) {
158 case BS: /* can't go back further */
159 if (cur_col == 0)
160 continue;
161 --cur_col;
162 continue;
163 case CR:
164 cur_col = 0;
165 continue;
166 case ESC: /* just ignore EOF */
167 /*
168 * In the input stream, accept both the
169 * XPG5 sequences ESC-digit and the
170 * traditional BSD sequences ESC-ctrl.
171 */
172 switch(getchar()) {
173 case '7': /* reverse line feed */
174 /* FALLTHROUGH */
175 case '\007':
176 addto_lineno(&cur_line, -2);
177 break;
178 case '8': /* reverse half-line feed */
179 /* FALLTHROUGH */
180 case '\010':
181 addto_lineno(&cur_line, -1);
182 break;
183 case '9': /* forward half-line feed */
184 /* FALLTHROUGH */
185 case '\011':
186 addto_lineno(&cur_line, 1);
187 if (cur_line > max_line)
188 max_line = cur_line;
189 }
190 continue;
191 case NL:
192 addto_lineno(&cur_line, 2);
193 if (cur_line > max_line)
194 max_line = cur_line;
195 cur_col = 0;
196 continue;
197 case SPACE:
198 ++cur_col;
199 continue;
200 case SI:
201 cur_set = CS_NORMAL;
202 continue;
203 case SO:
204 cur_set = CS_ALTERNATE;
205 continue;
206 case TAB: /* adjust column */
207 cur_col |= 7;
208 ++cur_col;
209 continue;
210 case VT:
211 addto_lineno(&cur_line, -2);
212 continue;
213 }
214 continue;
215 }
216
217 /* Must stuff ch in a line - are we at the right one? */
218 if (cur_line + adjust != this_line) {
219 LINE *lnew;
220
221 /* round up to next line */
222 adjust = !fine && (cur_line & 1);
223
224 if (cur_line + adjust < this_line) {
225 while (cur_line + adjust < this_line &&
226 l->l_prev != NULL) {
227 l = l->l_prev;
228 this_line--;
229 }
230 if (cur_line + adjust < this_line) {
231 if (nflushd_lines == 0) {
232 /*
233 * Allow backup past first
234 * line if nothing has been
235 * flushed yet.
236 */
237 while (cur_line + adjust
238 < this_line) {
239 lnew = alloc_line();
240 l->l_prev = lnew;
241 lnew->l_next = l;
242 l = lines = lnew;
243 extra_lines++;
244 this_line--;
245 }
246 } else {
247 if (!warned++)
248 dowarn(cur_line);
249 cur_line = this_line - adjust;
250 }
251 }
252 } else {
253 /* may need to allocate here */
254 while (cur_line + adjust > this_line) {
255 if (l->l_next == NULL) {
256 l->l_next = alloc_line();
257 l->l_next->l_prev = l;
258 }
259 l = l->l_next;
260 this_line++;
261 }
262 }
263 if (this_line > nflushd_lines &&
264 this_line - nflushd_lines >=
265 max_bufd_lines + BUFFER_MARGIN) {
266 if (extra_lines) {
267 flush_lines(extra_lines);
268 extra_lines = 0;
269 }
270 flush_lines(this_line - nflushd_lines -
271 max_bufd_lines);
272 nflushd_lines = this_line - max_bufd_lines;
273 }
274 }
275 /* grow line's buffer? */
276 if (l->l_line_len + 1 >= l->l_lsize) {
277 size_t need;
278
279 need = l->l_lsize ? l->l_lsize : 45;
280 l->l_line = xreallocarray(l->l_line,
281 need, 2 * sizeof(CHAR));
282 l->l_lsize = need * 2;
283 }
284 c = &l->l_line[l->l_line_len++];
285 c->c_char = ch;
286 c->c_set = cur_set;
287 c->c_column = cur_col;
288 /*
289 * If things are put in out of order, they will need sorting
290 * when it is flushed.
291 */
292 if (cur_col < l->l_max_col)
293 l->l_needs_sort = 1;
294 else
295 l->l_max_col = cur_col;
296 cur_col++;
297 }
298 if (extra_lines)
299 flush_lines(extra_lines);
300
301 /* goto the last line that had a character on it */
302 for (; l->l_next; l = l->l_next)
303 this_line++;
304 flush_lines(this_line - nflushd_lines + 1);
305
306 /* make sure we leave things in a sane state */
307 if (last_set != CS_NORMAL)
308 PUTC(SI);
309
310 /* flush out the last few blank lines */
311 if (max_line > this_line)
312 nblank_lines = max_line - this_line;
313 if (max_line & 1)
314 nblank_lines++;
315 flush_blanks();
316 exit(0);
317 }
318
319 void
flush_lines(int nflush)320 flush_lines(int nflush)
321 {
322 LINE *l;
323
324 while (--nflush >= 0) {
325 l = lines;
326 lines = l->l_next;
327 if (l->l_line) {
328 flush_blanks();
329 flush_line(l);
330 }
331 if (l->l_line || l->l_next)
332 nblank_lines++;
333 if (l->l_line)
334 (void)free((void *)l->l_line);
335 free_line(l);
336 }
337 if (lines)
338 lines->l_prev = NULL;
339 }
340
341 /*
342 * Print a number of newline/half newlines. If fine flag is set, nblank_lines
343 * is the number of half line feeds, otherwise it is the number of whole line
344 * feeds.
345 */
346 void
flush_blanks(void)347 flush_blanks(void)
348 {
349 int half, i, nb;
350
351 half = 0;
352 nb = nblank_lines;
353 if (nb & 1) {
354 if (fine)
355 half = 1;
356 else
357 nb++;
358 }
359 nb /= 2;
360 for (i = nb; --i >= 0;)
361 PUTC('\n');
362 if (half) {
363 /*
364 * In the output stream, always generate
365 * escape sequences conforming to XPG5.
366 */
367 PUTC(ESC);
368 PUTC('9');
369 if (!nb)
370 PUTC('\r');
371 }
372 nblank_lines = 0;
373 }
374
375 /*
376 * Write a line to stdout taking care of space to tab conversion (-h flag)
377 * and character set shifts.
378 */
379 void
flush_line(LINE * l)380 flush_line(LINE *l)
381 {
382 CHAR *c, *endc;
383 size_t nchars, last_col, this_col;
384
385 last_col = 0;
386 nchars = l->l_line_len;
387
388 if (l->l_needs_sort) {
389 static CHAR *sorted;
390 static size_t count_size, i, sorted_size;
391 static int *count, save, tot;
392
393 /*
394 * Do an O(n) sort on l->l_line by column being careful to
395 * preserve the order of characters in the same column.
396 */
397 if (l->l_lsize > sorted_size) {
398 sorted_size = l->l_lsize;
399 sorted = xreallocarray(sorted,
400 sorted_size, sizeof(CHAR));
401 }
402 if (l->l_max_col >= count_size) {
403 count_size = l->l_max_col + 1;
404 count = xreallocarray(count,
405 count_size, sizeof(int));
406 }
407 memset(count, 0, sizeof(*count) * (l->l_max_col + 1));
408 for (i = nchars, c = l->l_line; i-- > 0; c++)
409 count[c->c_column]++;
410
411 /*
412 * calculate running total (shifted down by 1) to use as
413 * indices into new line.
414 */
415 for (tot = 0, i = 0; i <= l->l_max_col; i++) {
416 save = count[i];
417 count[i] = tot;
418 tot += save;
419 }
420
421 for (i = nchars, c = l->l_line; i-- > 0; c++)
422 sorted[count[c->c_column]++] = *c;
423 c = sorted;
424 } else
425 c = l->l_line;
426 while (nchars > 0) {
427 this_col = c->c_column;
428 endc = c;
429 do {
430 ++endc;
431 } while (--nchars > 0 && this_col == endc->c_column);
432
433 /* if -b only print last character */
434 if (no_backspaces)
435 c = endc - 1;
436
437 if (this_col > last_col) {
438 size_t nspace = this_col - last_col;
439
440 if (compress_spaces && nspace > 1) {
441 size_t ntabs;
442
443 ntabs = ((last_col % 8) + nspace) / 8;
444 if (ntabs) {
445 nspace -= (ntabs * 8) - (last_col % 8);
446 while (ntabs-- > 0)
447 PUTC('\t');
448 }
449 }
450 while (nspace-- > 0)
451 PUTC(' ');
452 last_col = this_col;
453 }
454 last_col++;
455
456 for (;;) {
457 if (c->c_set != last_set) {
458 switch (c->c_set) {
459 case CS_NORMAL:
460 PUTC(SI);
461 break;
462 case CS_ALTERNATE:
463 PUTC(SO);
464 }
465 last_set = c->c_set;
466 }
467 PUTC(c->c_char);
468 if (++c >= endc)
469 break;
470 PUTC('\b');
471 }
472 }
473 }
474
475 /*
476 * Increment or decrement a line number, checking for overflow.
477 * Stop one below INT_MAX such that the adjust variable is safe.
478 */
479 void
addto_lineno(int * lno,int offset)480 addto_lineno(int *lno, int offset)
481 {
482 if (offset > 0) {
483 if (*lno >= INT_MAX - offset)
484 errx(1, "too many lines");
485 } else {
486 if (*lno < INT_MIN - offset)
487 errx(1, "too many reverse line feeds");
488 }
489 *lno += offset;
490 }
491
492 #define NALLOC 64
493
494 static LINE *line_freelist;
495
496 LINE *
alloc_line(void)497 alloc_line(void)
498 {
499 LINE *l;
500 int i;
501
502 if (!line_freelist) {
503 l = xreallocarray(NULL, NALLOC, sizeof(LINE));
504 line_freelist = l;
505 for (i = 1; i < NALLOC; i++, l++)
506 l->l_next = l + 1;
507 l->l_next = NULL;
508 }
509 l = line_freelist;
510 line_freelist = l->l_next;
511
512 memset(l, 0, sizeof(LINE));
513 return (l);
514 }
515
516 void
free_line(LINE * l)517 free_line(LINE *l)
518 {
519
520 l->l_next = line_freelist;
521 line_freelist = l;
522 }
523
524 void *
xreallocarray(void * p,size_t n,size_t size)525 xreallocarray(void *p, size_t n, size_t size)
526 {
527
528 if (!(p = reallocarray(p, n, size)))
529 err(1, "realloc failed");
530 return (p);
531 }
532
533 void
usage(void)534 usage(void)
535 {
536 (void)fprintf(stderr, "usage: col [-bfhx] [-l num]\n");
537 exit(1);
538 }
539
540 void
dowarn(int line)541 dowarn(int line)
542 {
543
544 warnx("warning: can't back up %s",
545 line < 0 ? "past first line" : "-- line already flushed");
546 }
547