xref: /openbsd/usr.bin/col/col.c (revision cecf84d4)
1 /*	$OpenBSD: col.c,v 1.17 2015/05/09 20:36:18 schwarze Exp $	*/
2 /*	$NetBSD: col.c,v 1.7 1995/09/02 05:48:50 jtc Exp $	*/
3 
4 /*-
5  * Copyright (c) 1990, 1993, 1994
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Michael Rendell of the Memorial University of Newfoundland.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <ctype.h>
37 #include <err.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <unistd.h>
42 #include <limits.h>
43 
44 #define	BS	'\b'		/* backspace */
45 #define	TAB	'\t'		/* tab */
46 #define	SPACE	' '		/* space */
47 #define	NL	'\n'		/* newline */
48 #define	CR	'\r'		/* carriage return */
49 #define	ESC	'\033'		/* escape */
50 #define	SI	'\017'		/* shift in to normal character set */
51 #define	SO	'\016'		/* shift out to alternate character set */
52 #define	VT	'\013'		/* vertical tab (aka reverse line feed) */
53 
54 /* build up at least this many lines before flushing them out */
55 #define	BUFFER_MARGIN		32
56 
57 typedef char CSET;
58 
59 typedef struct char_str {
60 #define	CS_NORMAL	1
61 #define	CS_ALTERNATE	2
62 	size_t		c_column;	/* column character is in */
63 	CSET		c_set;		/* character set (currently only 2) */
64 	char		c_char;		/* character in question */
65 } CHAR;
66 
67 typedef struct line_str LINE;
68 struct line_str {
69 	CHAR	*l_line;		/* characters on the line */
70 	LINE	*l_prev;		/* previous line */
71 	LINE	*l_next;		/* next line */
72 	size_t	l_lsize;		/* allocated sizeof l_line */
73 	size_t	l_line_len;		/* strlen(l_line) */
74 	size_t	l_max_col;		/* max column in the line */
75 	int	l_needs_sort;		/* set if chars went in out of order */
76 };
77 
78 void	addto_lineno(int *, int);
79 LINE   *alloc_line(void);
80 void	dowarn(int);
81 void	flush_line(LINE *);
82 void	flush_lines(int);
83 void	flush_blanks(void);
84 void	free_line(LINE *);
85 void	usage(void);
86 void   *xreallocarray(void *, size_t, size_t);
87 
88 CSET	last_set;		/* char_set of last char printed */
89 LINE   *lines;
90 int	compress_spaces;	/* if doing space -> tab conversion */
91 int	fine;			/* if `fine' resolution (half lines) */
92 int	max_bufd_lines;		/* max # of half lines to keep in memory */
93 int	nblank_lines;		/* # blanks after last flushed line */
94 int	no_backspaces;		/* if not to output any backspaces */
95 
96 #define	PUTC(ch) \
97 	if (putchar(ch) == EOF) \
98 		err(1, "stdout");
99 
100 int
101 main(int argc, char *argv[])
102 {
103 	int ch;
104 	CHAR *c;
105 	CSET cur_set;			/* current character set */
106 	LINE *l;			/* current line */
107 	int extra_lines;		/* # of lines above first line */
108 	size_t cur_col;			/* current column */
109 	int cur_line;			/* line number of current position */
110 	int max_line;			/* max value of cur_line */
111 	int this_line;			/* line l points to */
112 	int nflushd_lines;		/* number of lines that were flushed */
113 	int adjust, opt, warned;
114 	const char *errstr;
115 
116 	max_bufd_lines = 256;
117 	compress_spaces = 1;		/* compress spaces into tabs */
118 	while ((opt = getopt(argc, argv, "bfhl:x")) != -1)
119 		switch (opt) {
120 		case 'b':		/* do not output backspaces */
121 			no_backspaces = 1;
122 			break;
123 		case 'f':		/* allow half forward line feeds */
124 			fine = 1;
125 			break;
126 		case 'h':		/* compress spaces into tabs */
127 			compress_spaces = 1;
128 			break;
129 		case 'l':		/* buffered line count */
130 			max_bufd_lines = strtonum(optarg, 1,
131 			    (INT_MAX - BUFFER_MARGIN) / 2, &errstr) * 2;
132 			if (errstr != NULL)
133 				errx(1, "bad -l argument, %s: %s", errstr,
134 					optarg);
135 			break;
136 		case 'x':		/* do not compress spaces into tabs */
137 			compress_spaces = 0;
138 			break;
139 		case '?':
140 		default:
141 			usage();
142 		}
143 
144 	if (optind != argc)
145 		usage();
146 
147 	adjust = extra_lines = warned = 0;
148 	cur_col = 0;
149 	cur_line = max_line = nflushd_lines = this_line = 0;
150 	cur_set = last_set = CS_NORMAL;
151 	lines = l = alloc_line();
152 
153 	while ((ch = getchar()) != EOF) {
154 		if (!isgraph(ch)) {
155 			switch (ch) {
156 			case BS:		/* can't go back further */
157 				if (cur_col == 0)
158 					continue;
159 				--cur_col;
160 				continue;
161 			case CR:
162 				cur_col = 0;
163 				continue;
164 			case ESC:		/* just ignore EOF */
165 				/*
166 				 * In the input stream, accept both the
167 				 * XPG5 sequences ESC-digit and the
168 				 * traditional BSD sequences ESC-ctrl.
169 				 */
170 				switch(getchar()) {
171 				case '7':  /* reverse line feed */
172 					/* FALLTHROUGH */
173 				case '\007':
174 					addto_lineno(&cur_line, -2);
175 					break;
176 				case '8':  /* reverse half-line feed */
177 					/* FALLTHROUGH */
178 				case '\010':
179 					addto_lineno(&cur_line, -1);
180 					break;
181 				case '9':  /* forward half-line feed */
182 					/* FALLTHROUGH */
183 				case '\011':
184 					addto_lineno(&cur_line, 1);
185 					if (cur_line > max_line)
186 						max_line = cur_line;
187 				}
188 				continue;
189 			case NL:
190 				addto_lineno(&cur_line, 2);
191 				if (cur_line > max_line)
192 					max_line = cur_line;
193 				cur_col = 0;
194 				continue;
195 			case SPACE:
196 				++cur_col;
197 				continue;
198 			case SI:
199 				cur_set = CS_NORMAL;
200 				continue;
201 			case SO:
202 				cur_set = CS_ALTERNATE;
203 				continue;
204 			case TAB:		/* adjust column */
205 				cur_col |= 7;
206 				++cur_col;
207 				continue;
208 			case VT:
209 				addto_lineno(&cur_line, -2);
210 				continue;
211 			}
212 			continue;
213 		}
214 
215 		/* Must stuff ch in a line - are we at the right one? */
216 		if (cur_line + adjust != this_line) {
217 			LINE *lnew;
218 
219 			/* round up to next line */
220 			adjust = !fine && (cur_line & 1);
221 
222 			if (cur_line + adjust < this_line) {
223 				while (cur_line + adjust < this_line &&
224 				    l->l_prev != NULL) {
225 					l = l->l_prev;
226 					this_line--;
227 				}
228 				if (cur_line + adjust < this_line) {
229 					if (nflushd_lines == 0) {
230 						/*
231 						 * Allow backup past first
232 						 * line if nothing has been
233 						 * flushed yet.
234 						 */
235 						while (cur_line + adjust
236 						    < this_line) {
237 							lnew = alloc_line();
238 							l->l_prev = lnew;
239 							lnew->l_next = l;
240 							l = lines = lnew;
241 							extra_lines++;
242 							this_line--;
243 						}
244 					} else {
245 						if (!warned++)
246 							dowarn(cur_line);
247 						cur_line = this_line - adjust;
248 					}
249 				}
250 			} else {
251 				/* may need to allocate here */
252 				while (cur_line + adjust > this_line) {
253 					if (l->l_next == NULL) {
254 						l->l_next = alloc_line();
255 						l->l_next->l_prev = l;
256 					}
257 					l = l->l_next;
258 					this_line++;
259 				}
260 			}
261 			if (this_line > nflushd_lines &&
262 			    this_line - nflushd_lines >=
263 			    max_bufd_lines + BUFFER_MARGIN) {
264 				if (extra_lines) {
265 					flush_lines(extra_lines);
266 					extra_lines = 0;
267 				}
268 				flush_lines(this_line - nflushd_lines -
269 				    max_bufd_lines);
270 				nflushd_lines = this_line - max_bufd_lines;
271 			}
272 		}
273 		/* grow line's buffer? */
274 		if (l->l_line_len + 1 >= l->l_lsize) {
275 			size_t need;
276 
277 			need = l->l_lsize ? l->l_lsize : 45;
278 			l->l_line = xreallocarray(l->l_line,
279 			    need, 2 * sizeof(CHAR));
280 			l->l_lsize = need * 2;
281 		}
282 		c = &l->l_line[l->l_line_len++];
283 		c->c_char = ch;
284 		c->c_set = cur_set;
285 		c->c_column = cur_col;
286 		/*
287 		 * If things are put in out of order, they will need sorting
288 		 * when it is flushed.
289 		 */
290 		if (cur_col < l->l_max_col)
291 			l->l_needs_sort = 1;
292 		else
293 			l->l_max_col = cur_col;
294 		cur_col++;
295 	}
296 	if (extra_lines)
297 		flush_lines(extra_lines);
298 
299 	/* goto the last line that had a character on it */
300 	for (; l->l_next; l = l->l_next)
301 		this_line++;
302 	flush_lines(this_line - nflushd_lines + 1);
303 
304 	/* make sure we leave things in a sane state */
305 	if (last_set != CS_NORMAL)
306 		PUTC(SI);
307 
308 	/* flush out the last few blank lines */
309 	if (max_line > this_line)
310 		nblank_lines = max_line - this_line;
311 	if (max_line & 1)
312 		nblank_lines++;
313 	flush_blanks();
314 	exit(0);
315 }
316 
317 void
318 flush_lines(int nflush)
319 {
320 	LINE *l;
321 
322 	while (--nflush >= 0) {
323 		l = lines;
324 		lines = l->l_next;
325 		if (l->l_line) {
326 			flush_blanks();
327 			flush_line(l);
328 		}
329 		if (l->l_line || l->l_next)
330 			nblank_lines++;
331 		if (l->l_line)
332 			(void)free((void *)l->l_line);
333 		free_line(l);
334 	}
335 	if (lines)
336 		lines->l_prev = NULL;
337 }
338 
339 /*
340  * Print a number of newline/half newlines.  If fine flag is set, nblank_lines
341  * is the number of half line feeds, otherwise it is the number of whole line
342  * feeds.
343  */
344 void
345 flush_blanks(void)
346 {
347 	int half, i, nb;
348 
349 	half = 0;
350 	nb = nblank_lines;
351 	if (nb & 1) {
352 		if (fine)
353 			half = 1;
354 		else
355 			nb++;
356 	}
357 	nb /= 2;
358 	for (i = nb; --i >= 0;)
359 		PUTC('\n');
360 	if (half) {
361 		/*
362 		 * In the output stream, always generate
363 		 * escape sequences conforming to XPG5.
364 		 */
365 		PUTC(ESC);
366 		PUTC('9');
367 		if (!nb)
368 			PUTC('\r');
369 	}
370 	nblank_lines = 0;
371 }
372 
373 /*
374  * Write a line to stdout taking care of space to tab conversion (-h flag)
375  * and character set shifts.
376  */
377 void
378 flush_line(LINE *l)
379 {
380 	CHAR *c, *endc;
381 	size_t nchars, last_col, this_col;
382 
383 	last_col = 0;
384 	nchars = l->l_line_len;
385 
386 	if (l->l_needs_sort) {
387 		static CHAR *sorted;
388 		static size_t count_size, i, sorted_size;
389 		static int *count, save, tot;
390 
391 		/*
392 		 * Do an O(n) sort on l->l_line by column being careful to
393 		 * preserve the order of characters in the same column.
394 		 */
395 		if (l->l_lsize > sorted_size) {
396 			sorted_size = l->l_lsize;
397 			sorted = xreallocarray(sorted,
398 			    sorted_size, sizeof(CHAR));
399 		}
400 		if (l->l_max_col >= count_size) {
401 			count_size = l->l_max_col + 1;
402 			count = xreallocarray(count,
403 			    count_size, sizeof(int));
404 		}
405 		memset(count, 0, sizeof(*count) * (l->l_max_col + 1));
406 		for (i = nchars, c = l->l_line; i-- > 0; c++)
407 			count[c->c_column]++;
408 
409 		/*
410 		 * calculate running total (shifted down by 1) to use as
411 		 * indices into new line.
412 		 */
413 		for (tot = 0, i = 0; i <= l->l_max_col; i++) {
414 			save = count[i];
415 			count[i] = tot;
416 			tot += save;
417 		}
418 
419 		for (i = nchars, c = l->l_line; i-- > 0; c++)
420 			sorted[count[c->c_column]++] = *c;
421 		c = sorted;
422 	} else
423 		c = l->l_line;
424 	while (nchars > 0) {
425 		this_col = c->c_column;
426 		endc = c;
427 		do {
428 			++endc;
429 		} while (--nchars > 0 && this_col == endc->c_column);
430 
431 		/* if -b only print last character */
432 		if (no_backspaces)
433 			c = endc - 1;
434 
435 		if (this_col > last_col) {
436 			size_t nspace = this_col - last_col;
437 
438 			if (compress_spaces && nspace > 1) {
439 				size_t ntabs;
440 
441 				ntabs = ((last_col % 8) + nspace) / 8;
442 				if (ntabs) {
443 					nspace -= (ntabs * 8) - (last_col % 8);
444 					while (ntabs-- > 0)
445 						PUTC('\t');
446 				}
447 			}
448 			while (nspace-- > 0)
449 				PUTC(' ');
450 			last_col = this_col;
451 		}
452 		last_col++;
453 
454 		for (;;) {
455 			if (c->c_set != last_set) {
456 				switch (c->c_set) {
457 				case CS_NORMAL:
458 					PUTC(SI);
459 					break;
460 				case CS_ALTERNATE:
461 					PUTC(SO);
462 				}
463 				last_set = c->c_set;
464 			}
465 			PUTC(c->c_char);
466 			if (++c >= endc)
467 				break;
468 			PUTC('\b');
469 		}
470 	}
471 }
472 
473 /*
474  * Increment or decrement a line number, checking for overflow.
475  * Stop one below INT_MAX such that the adjust variable is safe.
476  */
477 void
478 addto_lineno(int *lno, int offset)
479 {
480 	if (offset > 0) {
481 		if (*lno >= INT_MAX - offset)
482 			errx(1, "too many lines");
483 	} else {
484 		if (*lno < INT_MIN - offset)
485 			errx(1, "too many reverse line feeds");
486 	}
487 	*lno += offset;
488 }
489 
490 #define	NALLOC 64
491 
492 static LINE *line_freelist;
493 
494 LINE *
495 alloc_line(void)
496 {
497 	LINE *l;
498 	int i;
499 
500 	if (!line_freelist) {
501 		l = xreallocarray(NULL, NALLOC, sizeof(LINE));
502 		line_freelist = l;
503 		for (i = 1; i < NALLOC; i++, l++)
504 			l->l_next = l + 1;
505 		l->l_next = NULL;
506 	}
507 	l = line_freelist;
508 	line_freelist = l->l_next;
509 
510 	memset(l, 0, sizeof(LINE));
511 	return (l);
512 }
513 
514 void
515 free_line(LINE *l)
516 {
517 
518 	l->l_next = line_freelist;
519 	line_freelist = l;
520 }
521 
522 void *
523 xreallocarray(void *p, size_t n, size_t size)
524 {
525 
526 	if (!(p = reallocarray(p, n, size)))
527 		err(1, "realloc failed");
528 	return (p);
529 }
530 
531 void
532 usage(void)
533 {
534 	(void)fprintf(stderr, "usage: col [-bfhx] [-l num]\n");
535 	exit(1);
536 }
537 
538 void
539 dowarn(int line)
540 {
541 
542 	warnx("warning: can't back up %s",
543 		line < 0 ? "past first line" : "-- line already flushed");
544 }
545