xref: /openbsd/usr.bin/col/col.c (revision d7259957)
1 /*	$OpenBSD: col.c,v 1.20 2022/12/04 23:50:47 cheloha Exp $	*/
2 /*	$NetBSD: col.c,v 1.7 1995/09/02 05:48:50 jtc Exp $	*/
3 
4 /*-
5  * Copyright (c) 1990, 1993, 1994
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Michael Rendell of the Memorial University of Newfoundland.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <ctype.h>
37 #include <err.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <unistd.h>
42 #include <limits.h>
43 
44 #define	BS	'\b'		/* backspace */
45 #define	TAB	'\t'		/* tab */
46 #define	SPACE	' '		/* space */
47 #define	NL	'\n'		/* newline */
48 #define	CR	'\r'		/* carriage return */
49 #define	ESC	'\033'		/* escape */
50 #define	SI	'\017'		/* shift in to normal character set */
51 #define	SO	'\016'		/* shift out to alternate character set */
52 #define	VT	'\013'		/* vertical tab (aka reverse line feed) */
53 
54 /* build up at least this many lines before flushing them out */
55 #define	BUFFER_MARGIN		32
56 
57 typedef char CSET;
58 
59 typedef struct char_str {
60 #define	CS_NORMAL	1
61 #define	CS_ALTERNATE	2
62 	size_t		c_column;	/* column character is in */
63 	CSET		c_set;		/* character set (currently only 2) */
64 	char		c_char;		/* character in question */
65 } CHAR;
66 
67 typedef struct line_str LINE;
68 struct line_str {
69 	CHAR	*l_line;		/* characters on the line */
70 	LINE	*l_prev;		/* previous line */
71 	LINE	*l_next;		/* next line */
72 	size_t	l_lsize;		/* allocated sizeof l_line */
73 	size_t	l_line_len;		/* strlen(l_line) */
74 	size_t	l_max_col;		/* max column in the line */
75 	int	l_needs_sort;		/* set if chars went in out of order */
76 };
77 
78 void	addto_lineno(int *, int);
79 LINE   *alloc_line(void);
80 void	dowarn(int);
81 void	flush_line(LINE *);
82 void	flush_lines(int);
83 void	flush_blanks(void);
84 void	free_line(LINE *);
85 void	usage(void);
86 void   *xreallocarray(void *, size_t, size_t);
87 
88 CSET	last_set;		/* char_set of last char printed */
89 LINE   *lines;
90 int	compress_spaces;	/* if doing space -> tab conversion */
91 int	fine;			/* if `fine' resolution (half lines) */
92 int	max_bufd_lines;		/* max # of half lines to keep in memory */
93 int	nblank_lines;		/* # blanks after last flushed line */
94 int	no_backspaces;		/* if not to output any backspaces */
95 
96 #define	PUTC(ch) \
97 	if (putchar(ch) == EOF) \
98 		err(1, "stdout");
99 
100 int
main(int argc,char * argv[])101 main(int argc, char *argv[])
102 {
103 	int ch;
104 	CHAR *c;
105 	CSET cur_set;			/* current character set */
106 	LINE *l;			/* current line */
107 	int extra_lines;		/* # of lines above first line */
108 	size_t cur_col;			/* current column */
109 	int cur_line;			/* line number of current position */
110 	int max_line;			/* max value of cur_line */
111 	int this_line;			/* line l points to */
112 	int nflushd_lines;		/* number of lines that were flushed */
113 	int adjust, opt, warned;
114 	const char *errstr;
115 
116 	if (pledge("stdio", NULL) == -1)
117 		err(1, "pledge");
118 
119 	max_bufd_lines = 256;
120 	compress_spaces = 1;		/* compress spaces into tabs */
121 	while ((opt = getopt(argc, argv, "bfhl:x")) != -1)
122 		switch (opt) {
123 		case 'b':		/* do not output backspaces */
124 			no_backspaces = 1;
125 			break;
126 		case 'f':		/* allow half forward line feeds */
127 			fine = 1;
128 			break;
129 		case 'h':		/* compress spaces into tabs */
130 			compress_spaces = 1;
131 			break;
132 		case 'l':		/* buffered line count */
133 			max_bufd_lines = strtonum(optarg, 1,
134 			    (INT_MAX - BUFFER_MARGIN) / 2, &errstr) * 2;
135 			if (errstr != NULL)
136 				errx(1, "bad -l argument, %s: %s", errstr,
137 					optarg);
138 			break;
139 		case 'x':		/* do not compress spaces into tabs */
140 			compress_spaces = 0;
141 			break;
142 		default:
143 			usage();
144 		}
145 
146 	if (optind != argc)
147 		usage();
148 
149 	adjust = extra_lines = warned = 0;
150 	cur_col = 0;
151 	cur_line = max_line = nflushd_lines = this_line = 0;
152 	cur_set = last_set = CS_NORMAL;
153 	lines = l = alloc_line();
154 
155 	while ((ch = getchar()) != EOF) {
156 		if (!isgraph(ch)) {
157 			switch (ch) {
158 			case BS:		/* can't go back further */
159 				if (cur_col == 0)
160 					continue;
161 				--cur_col;
162 				continue;
163 			case CR:
164 				cur_col = 0;
165 				continue;
166 			case ESC:		/* just ignore EOF */
167 				/*
168 				 * In the input stream, accept both the
169 				 * XPG5 sequences ESC-digit and the
170 				 * traditional BSD sequences ESC-ctrl.
171 				 */
172 				switch(getchar()) {
173 				case '7':  /* reverse line feed */
174 					/* FALLTHROUGH */
175 				case '\007':
176 					addto_lineno(&cur_line, -2);
177 					break;
178 				case '8':  /* reverse half-line feed */
179 					/* FALLTHROUGH */
180 				case '\010':
181 					addto_lineno(&cur_line, -1);
182 					break;
183 				case '9':  /* forward half-line feed */
184 					/* FALLTHROUGH */
185 				case '\011':
186 					addto_lineno(&cur_line, 1);
187 					if (cur_line > max_line)
188 						max_line = cur_line;
189 				}
190 				continue;
191 			case NL:
192 				addto_lineno(&cur_line, 2);
193 				if (cur_line > max_line)
194 					max_line = cur_line;
195 				cur_col = 0;
196 				continue;
197 			case SPACE:
198 				++cur_col;
199 				continue;
200 			case SI:
201 				cur_set = CS_NORMAL;
202 				continue;
203 			case SO:
204 				cur_set = CS_ALTERNATE;
205 				continue;
206 			case TAB:		/* adjust column */
207 				cur_col |= 7;
208 				++cur_col;
209 				continue;
210 			case VT:
211 				addto_lineno(&cur_line, -2);
212 				continue;
213 			}
214 			continue;
215 		}
216 
217 		/* Must stuff ch in a line - are we at the right one? */
218 		if (cur_line + adjust != this_line) {
219 			LINE *lnew;
220 
221 			/* round up to next line */
222 			adjust = !fine && (cur_line & 1);
223 
224 			if (cur_line + adjust < this_line) {
225 				while (cur_line + adjust < this_line &&
226 				    l->l_prev != NULL) {
227 					l = l->l_prev;
228 					this_line--;
229 				}
230 				if (cur_line + adjust < this_line) {
231 					if (nflushd_lines == 0) {
232 						/*
233 						 * Allow backup past first
234 						 * line if nothing has been
235 						 * flushed yet.
236 						 */
237 						while (cur_line + adjust
238 						    < this_line) {
239 							lnew = alloc_line();
240 							l->l_prev = lnew;
241 							lnew->l_next = l;
242 							l = lines = lnew;
243 							extra_lines++;
244 							this_line--;
245 						}
246 					} else {
247 						if (!warned++)
248 							dowarn(cur_line);
249 						cur_line = this_line - adjust;
250 					}
251 				}
252 			} else {
253 				/* may need to allocate here */
254 				while (cur_line + adjust > this_line) {
255 					if (l->l_next == NULL) {
256 						l->l_next = alloc_line();
257 						l->l_next->l_prev = l;
258 					}
259 					l = l->l_next;
260 					this_line++;
261 				}
262 			}
263 			if (this_line > nflushd_lines &&
264 			    this_line - nflushd_lines >=
265 			    max_bufd_lines + BUFFER_MARGIN) {
266 				if (extra_lines) {
267 					flush_lines(extra_lines);
268 					extra_lines = 0;
269 				}
270 				flush_lines(this_line - nflushd_lines -
271 				    max_bufd_lines);
272 				nflushd_lines = this_line - max_bufd_lines;
273 			}
274 		}
275 		/* grow line's buffer? */
276 		if (l->l_line_len + 1 >= l->l_lsize) {
277 			size_t need;
278 
279 			need = l->l_lsize ? l->l_lsize : 45;
280 			l->l_line = xreallocarray(l->l_line,
281 			    need, 2 * sizeof(CHAR));
282 			l->l_lsize = need * 2;
283 		}
284 		c = &l->l_line[l->l_line_len++];
285 		c->c_char = ch;
286 		c->c_set = cur_set;
287 		c->c_column = cur_col;
288 		/*
289 		 * If things are put in out of order, they will need sorting
290 		 * when it is flushed.
291 		 */
292 		if (cur_col < l->l_max_col)
293 			l->l_needs_sort = 1;
294 		else
295 			l->l_max_col = cur_col;
296 		cur_col++;
297 	}
298 	if (extra_lines)
299 		flush_lines(extra_lines);
300 
301 	/* goto the last line that had a character on it */
302 	for (; l->l_next; l = l->l_next)
303 		this_line++;
304 	flush_lines(this_line - nflushd_lines + 1);
305 
306 	/* make sure we leave things in a sane state */
307 	if (last_set != CS_NORMAL)
308 		PUTC(SI);
309 
310 	/* flush out the last few blank lines */
311 	if (max_line > this_line)
312 		nblank_lines = max_line - this_line;
313 	if (max_line & 1)
314 		nblank_lines++;
315 	flush_blanks();
316 	exit(0);
317 }
318 
319 void
flush_lines(int nflush)320 flush_lines(int nflush)
321 {
322 	LINE *l;
323 
324 	while (--nflush >= 0) {
325 		l = lines;
326 		lines = l->l_next;
327 		if (l->l_line) {
328 			flush_blanks();
329 			flush_line(l);
330 		}
331 		if (l->l_line || l->l_next)
332 			nblank_lines++;
333 		if (l->l_line)
334 			(void)free((void *)l->l_line);
335 		free_line(l);
336 	}
337 	if (lines)
338 		lines->l_prev = NULL;
339 }
340 
341 /*
342  * Print a number of newline/half newlines.  If fine flag is set, nblank_lines
343  * is the number of half line feeds, otherwise it is the number of whole line
344  * feeds.
345  */
346 void
flush_blanks(void)347 flush_blanks(void)
348 {
349 	int half, i, nb;
350 
351 	half = 0;
352 	nb = nblank_lines;
353 	if (nb & 1) {
354 		if (fine)
355 			half = 1;
356 		else
357 			nb++;
358 	}
359 	nb /= 2;
360 	for (i = nb; --i >= 0;)
361 		PUTC('\n');
362 	if (half) {
363 		/*
364 		 * In the output stream, always generate
365 		 * escape sequences conforming to XPG5.
366 		 */
367 		PUTC(ESC);
368 		PUTC('9');
369 		if (!nb)
370 			PUTC('\r');
371 	}
372 	nblank_lines = 0;
373 }
374 
375 /*
376  * Write a line to stdout taking care of space to tab conversion (-h flag)
377  * and character set shifts.
378  */
379 void
flush_line(LINE * l)380 flush_line(LINE *l)
381 {
382 	CHAR *c, *endc;
383 	size_t nchars, last_col, this_col;
384 
385 	last_col = 0;
386 	nchars = l->l_line_len;
387 
388 	if (l->l_needs_sort) {
389 		static CHAR *sorted;
390 		static size_t count_size, i, sorted_size;
391 		static int *count, save, tot;
392 
393 		/*
394 		 * Do an O(n) sort on l->l_line by column being careful to
395 		 * preserve the order of characters in the same column.
396 		 */
397 		if (l->l_lsize > sorted_size) {
398 			sorted_size = l->l_lsize;
399 			sorted = xreallocarray(sorted,
400 			    sorted_size, sizeof(CHAR));
401 		}
402 		if (l->l_max_col >= count_size) {
403 			count_size = l->l_max_col + 1;
404 			count = xreallocarray(count,
405 			    count_size, sizeof(int));
406 		}
407 		memset(count, 0, sizeof(*count) * (l->l_max_col + 1));
408 		for (i = nchars, c = l->l_line; i-- > 0; c++)
409 			count[c->c_column]++;
410 
411 		/*
412 		 * calculate running total (shifted down by 1) to use as
413 		 * indices into new line.
414 		 */
415 		for (tot = 0, i = 0; i <= l->l_max_col; i++) {
416 			save = count[i];
417 			count[i] = tot;
418 			tot += save;
419 		}
420 
421 		for (i = nchars, c = l->l_line; i-- > 0; c++)
422 			sorted[count[c->c_column]++] = *c;
423 		c = sorted;
424 	} else
425 		c = l->l_line;
426 	while (nchars > 0) {
427 		this_col = c->c_column;
428 		endc = c;
429 		do {
430 			++endc;
431 		} while (--nchars > 0 && this_col == endc->c_column);
432 
433 		/* if -b only print last character */
434 		if (no_backspaces)
435 			c = endc - 1;
436 
437 		if (this_col > last_col) {
438 			size_t nspace = this_col - last_col;
439 
440 			if (compress_spaces && nspace > 1) {
441 				size_t ntabs;
442 
443 				ntabs = ((last_col % 8) + nspace) / 8;
444 				if (ntabs) {
445 					nspace -= (ntabs * 8) - (last_col % 8);
446 					while (ntabs-- > 0)
447 						PUTC('\t');
448 				}
449 			}
450 			while (nspace-- > 0)
451 				PUTC(' ');
452 			last_col = this_col;
453 		}
454 		last_col++;
455 
456 		for (;;) {
457 			if (c->c_set != last_set) {
458 				switch (c->c_set) {
459 				case CS_NORMAL:
460 					PUTC(SI);
461 					break;
462 				case CS_ALTERNATE:
463 					PUTC(SO);
464 				}
465 				last_set = c->c_set;
466 			}
467 			PUTC(c->c_char);
468 			if (++c >= endc)
469 				break;
470 			PUTC('\b');
471 		}
472 	}
473 }
474 
475 /*
476  * Increment or decrement a line number, checking for overflow.
477  * Stop one below INT_MAX such that the adjust variable is safe.
478  */
479 void
addto_lineno(int * lno,int offset)480 addto_lineno(int *lno, int offset)
481 {
482 	if (offset > 0) {
483 		if (*lno >= INT_MAX - offset)
484 			errx(1, "too many lines");
485 	} else {
486 		if (*lno < INT_MIN - offset)
487 			errx(1, "too many reverse line feeds");
488 	}
489 	*lno += offset;
490 }
491 
492 #define	NALLOC 64
493 
494 static LINE *line_freelist;
495 
496 LINE *
alloc_line(void)497 alloc_line(void)
498 {
499 	LINE *l;
500 	int i;
501 
502 	if (!line_freelist) {
503 		l = xreallocarray(NULL, NALLOC, sizeof(LINE));
504 		line_freelist = l;
505 		for (i = 1; i < NALLOC; i++, l++)
506 			l->l_next = l + 1;
507 		l->l_next = NULL;
508 	}
509 	l = line_freelist;
510 	line_freelist = l->l_next;
511 
512 	memset(l, 0, sizeof(LINE));
513 	return (l);
514 }
515 
516 void
free_line(LINE * l)517 free_line(LINE *l)
518 {
519 
520 	l->l_next = line_freelist;
521 	line_freelist = l;
522 }
523 
524 void *
xreallocarray(void * p,size_t n,size_t size)525 xreallocarray(void *p, size_t n, size_t size)
526 {
527 
528 	if (!(p = reallocarray(p, n, size)))
529 		err(1, "realloc failed");
530 	return (p);
531 }
532 
533 void
usage(void)534 usage(void)
535 {
536 	(void)fprintf(stderr, "usage: col [-bfhx] [-l num]\n");
537 	exit(1);
538 }
539 
540 void
dowarn(int line)541 dowarn(int line)
542 {
543 
544 	warnx("warning: can't back up %s",
545 		line < 0 ? "past first line" : "-- line already flushed");
546 }
547