xref: /openbsd/usr.bin/col/col.c (revision 5af055cd)
1 /*	$OpenBSD: col.c,v 1.19 2015/10/09 01:37:06 deraadt Exp $	*/
2 /*	$NetBSD: col.c,v 1.7 1995/09/02 05:48:50 jtc Exp $	*/
3 
4 /*-
5  * Copyright (c) 1990, 1993, 1994
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Michael Rendell of the Memorial University of Newfoundland.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <ctype.h>
37 #include <err.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <unistd.h>
42 #include <limits.h>
43 
44 #define	BS	'\b'		/* backspace */
45 #define	TAB	'\t'		/* tab */
46 #define	SPACE	' '		/* space */
47 #define	NL	'\n'		/* newline */
48 #define	CR	'\r'		/* carriage return */
49 #define	ESC	'\033'		/* escape */
50 #define	SI	'\017'		/* shift in to normal character set */
51 #define	SO	'\016'		/* shift out to alternate character set */
52 #define	VT	'\013'		/* vertical tab (aka reverse line feed) */
53 
54 /* build up at least this many lines before flushing them out */
55 #define	BUFFER_MARGIN		32
56 
57 typedef char CSET;
58 
59 typedef struct char_str {
60 #define	CS_NORMAL	1
61 #define	CS_ALTERNATE	2
62 	size_t		c_column;	/* column character is in */
63 	CSET		c_set;		/* character set (currently only 2) */
64 	char		c_char;		/* character in question */
65 } CHAR;
66 
67 typedef struct line_str LINE;
68 struct line_str {
69 	CHAR	*l_line;		/* characters on the line */
70 	LINE	*l_prev;		/* previous line */
71 	LINE	*l_next;		/* next line */
72 	size_t	l_lsize;		/* allocated sizeof l_line */
73 	size_t	l_line_len;		/* strlen(l_line) */
74 	size_t	l_max_col;		/* max column in the line */
75 	int	l_needs_sort;		/* set if chars went in out of order */
76 };
77 
78 void	addto_lineno(int *, int);
79 LINE   *alloc_line(void);
80 void	dowarn(int);
81 void	flush_line(LINE *);
82 void	flush_lines(int);
83 void	flush_blanks(void);
84 void	free_line(LINE *);
85 void	usage(void);
86 void   *xreallocarray(void *, size_t, size_t);
87 
88 CSET	last_set;		/* char_set of last char printed */
89 LINE   *lines;
90 int	compress_spaces;	/* if doing space -> tab conversion */
91 int	fine;			/* if `fine' resolution (half lines) */
92 int	max_bufd_lines;		/* max # of half lines to keep in memory */
93 int	nblank_lines;		/* # blanks after last flushed line */
94 int	no_backspaces;		/* if not to output any backspaces */
95 
96 #define	PUTC(ch) \
97 	if (putchar(ch) == EOF) \
98 		err(1, "stdout");
99 
100 int
101 main(int argc, char *argv[])
102 {
103 	int ch;
104 	CHAR *c;
105 	CSET cur_set;			/* current character set */
106 	LINE *l;			/* current line */
107 	int extra_lines;		/* # of lines above first line */
108 	size_t cur_col;			/* current column */
109 	int cur_line;			/* line number of current position */
110 	int max_line;			/* max value of cur_line */
111 	int this_line;			/* line l points to */
112 	int nflushd_lines;		/* number of lines that were flushed */
113 	int adjust, opt, warned;
114 	const char *errstr;
115 
116 	if (pledge("stdio", NULL) == -1)
117 		err(1, "pledge");
118 
119 	max_bufd_lines = 256;
120 	compress_spaces = 1;		/* compress spaces into tabs */
121 	while ((opt = getopt(argc, argv, "bfhl:x")) != -1)
122 		switch (opt) {
123 		case 'b':		/* do not output backspaces */
124 			no_backspaces = 1;
125 			break;
126 		case 'f':		/* allow half forward line feeds */
127 			fine = 1;
128 			break;
129 		case 'h':		/* compress spaces into tabs */
130 			compress_spaces = 1;
131 			break;
132 		case 'l':		/* buffered line count */
133 			max_bufd_lines = strtonum(optarg, 1,
134 			    (INT_MAX - BUFFER_MARGIN) / 2, &errstr) * 2;
135 			if (errstr != NULL)
136 				errx(1, "bad -l argument, %s: %s", errstr,
137 					optarg);
138 			break;
139 		case 'x':		/* do not compress spaces into tabs */
140 			compress_spaces = 0;
141 			break;
142 		case '?':
143 		default:
144 			usage();
145 		}
146 
147 	if (optind != argc)
148 		usage();
149 
150 	adjust = extra_lines = warned = 0;
151 	cur_col = 0;
152 	cur_line = max_line = nflushd_lines = this_line = 0;
153 	cur_set = last_set = CS_NORMAL;
154 	lines = l = alloc_line();
155 
156 	while ((ch = getchar()) != EOF) {
157 		if (!isgraph(ch)) {
158 			switch (ch) {
159 			case BS:		/* can't go back further */
160 				if (cur_col == 0)
161 					continue;
162 				--cur_col;
163 				continue;
164 			case CR:
165 				cur_col = 0;
166 				continue;
167 			case ESC:		/* just ignore EOF */
168 				/*
169 				 * In the input stream, accept both the
170 				 * XPG5 sequences ESC-digit and the
171 				 * traditional BSD sequences ESC-ctrl.
172 				 */
173 				switch(getchar()) {
174 				case '7':  /* reverse line feed */
175 					/* FALLTHROUGH */
176 				case '\007':
177 					addto_lineno(&cur_line, -2);
178 					break;
179 				case '8':  /* reverse half-line feed */
180 					/* FALLTHROUGH */
181 				case '\010':
182 					addto_lineno(&cur_line, -1);
183 					break;
184 				case '9':  /* forward half-line feed */
185 					/* FALLTHROUGH */
186 				case '\011':
187 					addto_lineno(&cur_line, 1);
188 					if (cur_line > max_line)
189 						max_line = cur_line;
190 				}
191 				continue;
192 			case NL:
193 				addto_lineno(&cur_line, 2);
194 				if (cur_line > max_line)
195 					max_line = cur_line;
196 				cur_col = 0;
197 				continue;
198 			case SPACE:
199 				++cur_col;
200 				continue;
201 			case SI:
202 				cur_set = CS_NORMAL;
203 				continue;
204 			case SO:
205 				cur_set = CS_ALTERNATE;
206 				continue;
207 			case TAB:		/* adjust column */
208 				cur_col |= 7;
209 				++cur_col;
210 				continue;
211 			case VT:
212 				addto_lineno(&cur_line, -2);
213 				continue;
214 			}
215 			continue;
216 		}
217 
218 		/* Must stuff ch in a line - are we at the right one? */
219 		if (cur_line + adjust != this_line) {
220 			LINE *lnew;
221 
222 			/* round up to next line */
223 			adjust = !fine && (cur_line & 1);
224 
225 			if (cur_line + adjust < this_line) {
226 				while (cur_line + adjust < this_line &&
227 				    l->l_prev != NULL) {
228 					l = l->l_prev;
229 					this_line--;
230 				}
231 				if (cur_line + adjust < this_line) {
232 					if (nflushd_lines == 0) {
233 						/*
234 						 * Allow backup past first
235 						 * line if nothing has been
236 						 * flushed yet.
237 						 */
238 						while (cur_line + adjust
239 						    < this_line) {
240 							lnew = alloc_line();
241 							l->l_prev = lnew;
242 							lnew->l_next = l;
243 							l = lines = lnew;
244 							extra_lines++;
245 							this_line--;
246 						}
247 					} else {
248 						if (!warned++)
249 							dowarn(cur_line);
250 						cur_line = this_line - adjust;
251 					}
252 				}
253 			} else {
254 				/* may need to allocate here */
255 				while (cur_line + adjust > this_line) {
256 					if (l->l_next == NULL) {
257 						l->l_next = alloc_line();
258 						l->l_next->l_prev = l;
259 					}
260 					l = l->l_next;
261 					this_line++;
262 				}
263 			}
264 			if (this_line > nflushd_lines &&
265 			    this_line - nflushd_lines >=
266 			    max_bufd_lines + BUFFER_MARGIN) {
267 				if (extra_lines) {
268 					flush_lines(extra_lines);
269 					extra_lines = 0;
270 				}
271 				flush_lines(this_line - nflushd_lines -
272 				    max_bufd_lines);
273 				nflushd_lines = this_line - max_bufd_lines;
274 			}
275 		}
276 		/* grow line's buffer? */
277 		if (l->l_line_len + 1 >= l->l_lsize) {
278 			size_t need;
279 
280 			need = l->l_lsize ? l->l_lsize : 45;
281 			l->l_line = xreallocarray(l->l_line,
282 			    need, 2 * sizeof(CHAR));
283 			l->l_lsize = need * 2;
284 		}
285 		c = &l->l_line[l->l_line_len++];
286 		c->c_char = ch;
287 		c->c_set = cur_set;
288 		c->c_column = cur_col;
289 		/*
290 		 * If things are put in out of order, they will need sorting
291 		 * when it is flushed.
292 		 */
293 		if (cur_col < l->l_max_col)
294 			l->l_needs_sort = 1;
295 		else
296 			l->l_max_col = cur_col;
297 		cur_col++;
298 	}
299 	if (extra_lines)
300 		flush_lines(extra_lines);
301 
302 	/* goto the last line that had a character on it */
303 	for (; l->l_next; l = l->l_next)
304 		this_line++;
305 	flush_lines(this_line - nflushd_lines + 1);
306 
307 	/* make sure we leave things in a sane state */
308 	if (last_set != CS_NORMAL)
309 		PUTC(SI);
310 
311 	/* flush out the last few blank lines */
312 	if (max_line > this_line)
313 		nblank_lines = max_line - this_line;
314 	if (max_line & 1)
315 		nblank_lines++;
316 	flush_blanks();
317 	exit(0);
318 }
319 
320 void
321 flush_lines(int nflush)
322 {
323 	LINE *l;
324 
325 	while (--nflush >= 0) {
326 		l = lines;
327 		lines = l->l_next;
328 		if (l->l_line) {
329 			flush_blanks();
330 			flush_line(l);
331 		}
332 		if (l->l_line || l->l_next)
333 			nblank_lines++;
334 		if (l->l_line)
335 			(void)free((void *)l->l_line);
336 		free_line(l);
337 	}
338 	if (lines)
339 		lines->l_prev = NULL;
340 }
341 
342 /*
343  * Print a number of newline/half newlines.  If fine flag is set, nblank_lines
344  * is the number of half line feeds, otherwise it is the number of whole line
345  * feeds.
346  */
347 void
348 flush_blanks(void)
349 {
350 	int half, i, nb;
351 
352 	half = 0;
353 	nb = nblank_lines;
354 	if (nb & 1) {
355 		if (fine)
356 			half = 1;
357 		else
358 			nb++;
359 	}
360 	nb /= 2;
361 	for (i = nb; --i >= 0;)
362 		PUTC('\n');
363 	if (half) {
364 		/*
365 		 * In the output stream, always generate
366 		 * escape sequences conforming to XPG5.
367 		 */
368 		PUTC(ESC);
369 		PUTC('9');
370 		if (!nb)
371 			PUTC('\r');
372 	}
373 	nblank_lines = 0;
374 }
375 
376 /*
377  * Write a line to stdout taking care of space to tab conversion (-h flag)
378  * and character set shifts.
379  */
380 void
381 flush_line(LINE *l)
382 {
383 	CHAR *c, *endc;
384 	size_t nchars, last_col, this_col;
385 
386 	last_col = 0;
387 	nchars = l->l_line_len;
388 
389 	if (l->l_needs_sort) {
390 		static CHAR *sorted;
391 		static size_t count_size, i, sorted_size;
392 		static int *count, save, tot;
393 
394 		/*
395 		 * Do an O(n) sort on l->l_line by column being careful to
396 		 * preserve the order of characters in the same column.
397 		 */
398 		if (l->l_lsize > sorted_size) {
399 			sorted_size = l->l_lsize;
400 			sorted = xreallocarray(sorted,
401 			    sorted_size, sizeof(CHAR));
402 		}
403 		if (l->l_max_col >= count_size) {
404 			count_size = l->l_max_col + 1;
405 			count = xreallocarray(count,
406 			    count_size, sizeof(int));
407 		}
408 		memset(count, 0, sizeof(*count) * (l->l_max_col + 1));
409 		for (i = nchars, c = l->l_line; i-- > 0; c++)
410 			count[c->c_column]++;
411 
412 		/*
413 		 * calculate running total (shifted down by 1) to use as
414 		 * indices into new line.
415 		 */
416 		for (tot = 0, i = 0; i <= l->l_max_col; i++) {
417 			save = count[i];
418 			count[i] = tot;
419 			tot += save;
420 		}
421 
422 		for (i = nchars, c = l->l_line; i-- > 0; c++)
423 			sorted[count[c->c_column]++] = *c;
424 		c = sorted;
425 	} else
426 		c = l->l_line;
427 	while (nchars > 0) {
428 		this_col = c->c_column;
429 		endc = c;
430 		do {
431 			++endc;
432 		} while (--nchars > 0 && this_col == endc->c_column);
433 
434 		/* if -b only print last character */
435 		if (no_backspaces)
436 			c = endc - 1;
437 
438 		if (this_col > last_col) {
439 			size_t nspace = this_col - last_col;
440 
441 			if (compress_spaces && nspace > 1) {
442 				size_t ntabs;
443 
444 				ntabs = ((last_col % 8) + nspace) / 8;
445 				if (ntabs) {
446 					nspace -= (ntabs * 8) - (last_col % 8);
447 					while (ntabs-- > 0)
448 						PUTC('\t');
449 				}
450 			}
451 			while (nspace-- > 0)
452 				PUTC(' ');
453 			last_col = this_col;
454 		}
455 		last_col++;
456 
457 		for (;;) {
458 			if (c->c_set != last_set) {
459 				switch (c->c_set) {
460 				case CS_NORMAL:
461 					PUTC(SI);
462 					break;
463 				case CS_ALTERNATE:
464 					PUTC(SO);
465 				}
466 				last_set = c->c_set;
467 			}
468 			PUTC(c->c_char);
469 			if (++c >= endc)
470 				break;
471 			PUTC('\b');
472 		}
473 	}
474 }
475 
476 /*
477  * Increment or decrement a line number, checking for overflow.
478  * Stop one below INT_MAX such that the adjust variable is safe.
479  */
480 void
481 addto_lineno(int *lno, int offset)
482 {
483 	if (offset > 0) {
484 		if (*lno >= INT_MAX - offset)
485 			errx(1, "too many lines");
486 	} else {
487 		if (*lno < INT_MIN - offset)
488 			errx(1, "too many reverse line feeds");
489 	}
490 	*lno += offset;
491 }
492 
493 #define	NALLOC 64
494 
495 static LINE *line_freelist;
496 
497 LINE *
498 alloc_line(void)
499 {
500 	LINE *l;
501 	int i;
502 
503 	if (!line_freelist) {
504 		l = xreallocarray(NULL, NALLOC, sizeof(LINE));
505 		line_freelist = l;
506 		for (i = 1; i < NALLOC; i++, l++)
507 			l->l_next = l + 1;
508 		l->l_next = NULL;
509 	}
510 	l = line_freelist;
511 	line_freelist = l->l_next;
512 
513 	memset(l, 0, sizeof(LINE));
514 	return (l);
515 }
516 
517 void
518 free_line(LINE *l)
519 {
520 
521 	l->l_next = line_freelist;
522 	line_freelist = l;
523 }
524 
525 void *
526 xreallocarray(void *p, size_t n, size_t size)
527 {
528 
529 	if (!(p = reallocarray(p, n, size)))
530 		err(1, "realloc failed");
531 	return (p);
532 }
533 
534 void
535 usage(void)
536 {
537 	(void)fprintf(stderr, "usage: col [-bfhx] [-l num]\n");
538 	exit(1);
539 }
540 
541 void
542 dowarn(int line)
543 {
544 
545 	warnx("warning: can't back up %s",
546 		line < 0 ? "past first line" : "-- line already flushed");
547 }
548