xref: /netbsd/usr.bin/fmt/fmt.c (revision bf9ec67e)
1 /*	$NetBSD: fmt.c,v 1.16 2002/03/02 13:55:13 wiz Exp $	*/
2 
3 /*
4  * Copyright (c) 1980, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the University of
18  *	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/cdefs.h>
37 #ifndef lint
38 __COPYRIGHT("@(#) Copyright (c) 1980, 1993\n\
39 	The Regents of the University of California.  All rights reserved.\n");
40 #endif /* not lint */
41 
42 #ifndef lint
43 #if 0
44 static char sccsid[] = "@(#)fmt.c	8.1 (Berkeley) 7/20/93";
45 #endif
46 __RCSID("$NetBSD: fmt.c,v 1.16 2002/03/02 13:55:13 wiz Exp $");
47 #endif /* not lint */
48 
49 #include <ctype.h>
50 #include <locale.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <string.h>
54 
55 /*
56  * fmt -- format the concatenation of input files or standard input
57  * onto standard output.  Designed for use with Mail ~|
58  *
59  * Syntax : fmt [ goal [ max ] ] [ name ... ]
60  * Authors: Kurt Shoens (UCB) 12/7/78;
61  *          Liz Allen (UMCP) 2/24/83 [Addition of goal length concept].
62  */
63 
64 /* LIZ@UOM 6/18/85 -- Don't need LENGTH any more.
65  * #define	LENGTH	72		Max line length in output
66  */
67 #define	NOSTR	((char *) 0)	/* Null string pointer for lint */
68 
69 /* LIZ@UOM 6/18/85 --New variables goal_length and max_length */
70 #define GOAL_LENGTH 65
71 #define MAX_LENGTH 75
72 int	goal_length;		/* Target or goal line length in output */
73 int	max_length;		/* Max line length in output */
74 int	pfx;			/* Current leading blank count */
75 int	lineno;			/* Current input line */
76 int	mark;			/* Last place we saw a head line */
77 int	center;
78 
79 char	*headnames[] = {"To", "Subject", "Cc", 0};
80 
81 static void	fmt(FILE *);
82 static int	ispref(const char *, const char *);
83 static void	leadin(void);
84 static void	oflush(void);
85 static void	pack(const char *, int);
86 static void	prefix(const char *, int);
87 static void	setout(void);
88 static void	split(const char *, int);
89 static void	tabulate(char *);
90 
91 int	ishead(const char *);
92 int	main(int, char **);
93 
94 /*
95  * Drive the whole formatter by managing input files.  Also,
96  * cause initialization of the output stuff and flush it out
97  * at the end.
98  */
99 
100 int
101 main(int argc, char **argv)
102 {
103 	FILE *fi;
104 	int errs = 0;
105 	int number;		/* LIZ@UOM 6/18/85 */
106 
107 	goal_length = GOAL_LENGTH;
108 	max_length = MAX_LENGTH;
109 	setout();
110 	lineno = 1;
111 	mark = -10;
112 
113 	setlocale(LC_ALL, "");
114 
115 	/*
116 	 * LIZ@UOM 6/18/85 -- Check for goal and max length arguments
117 	 */
118 	if (argc > 1 && !strcmp(argv[1], "-C")) {
119 		center++;
120 		argc--;
121 		argv++;
122 	}
123 	if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
124 		argv++;
125 		argc--;
126 		goal_length = abs(number);
127 		if (argc > 1 && (1 == (sscanf(argv[1], "%d", &number)))) {
128 			argv++;
129 			argc--;
130 			max_length = abs(number);
131 		}
132 	}
133 	if (max_length <= goal_length) {
134 		fprintf(stderr, "Max length must be greater than %s\n",
135 			"goal length");
136 		exit(1);
137 	}
138 	if (argc < 2) {
139 		fmt(stdin);
140 		oflush();
141 		exit(0);
142 	}
143 	while (--argc) {
144 		if ((fi = fopen(*++argv, "r")) == NULL) {
145 			perror(*argv);
146 			errs++;
147 			continue;
148 		}
149 		fmt(fi);
150 		fclose(fi);
151 	}
152 	oflush();
153 	exit(errs);
154 }
155 
156 /*
157  * Read up characters from the passed input file, forming lines,
158  * doing ^H processing, expanding tabs, stripping trailing blanks,
159  * and sending each line down for analysis.
160  */
161 static void
162 fmt(FILE *fi)
163 {
164 	char linebuf[BUFSIZ], canonb[BUFSIZ];
165 	char *cp, *cp2;
166 	int c, col, add_space;
167 
168 	if (center) {
169 		while (1) {
170 			cp = fgets(linebuf, BUFSIZ, fi);
171 			if (!cp)
172 				return;
173 			while (*cp && isspace(*cp))
174 				cp++;
175 			cp2 = cp + strlen(cp) - 1;
176 			while (cp2 > cp && isspace(*cp2))
177 				cp2--;
178 			if (cp == cp2)
179 				putchar('\n');
180 			col = cp2 - cp;
181 			for (c = 0; c < (goal_length-col)/2; c++)
182 				putchar(' ');
183 			while (cp <= cp2)
184 				putchar(*cp++);
185 			putchar('\n');
186 		}
187 	}
188 	c = getc(fi);
189 	while (c != EOF) {
190 		/*
191 		 * Collect a line, doing ^H processing.
192 		 * Leave tabs for now.
193 		 */
194 		cp = linebuf;
195 		while (c != '\n' && c != EOF && cp-linebuf < BUFSIZ-1) {
196 			if (c == '\b') {
197 				if (cp > linebuf)
198 					cp--;
199 				c = getc(fi);
200 				continue;
201 			}
202 			if(!(isprint(c) || c == '\t' || c >= 160)) {
203 				c = getc(fi);
204 				continue;
205 			}
206 			*cp++ = c;
207 			c = getc(fi);
208 		}
209 		*cp = '\0';
210 
211 		/*
212 		 * By default, add space after the end of current input
213 		 * (normally end of line)
214 		 */
215 		add_space = 1;
216 
217 		/*
218 		 * If the input line is longer than linebuf buffer can hold,
219 		 * process the data read so far as if it was a separate line -
220 		 * if there is any whitespace character in the read data,
221 		 * process all the data up to it, otherwise process all.
222 		 */
223 		if (c != '\n' && c != EOF && !isspace(c)) {
224 			/*
225 			 * Find out if any whitespace character has been read.
226 			 */
227 			for(cp2 = cp; cp2 >= linebuf
228 				&& !isspace((unsigned char)*cp2); cp2--);
229 
230 			if (cp2 < linebuf) {
231 				/*
232 				 * ungetc() last read character so that it
233 				 * won't get lost.
234 				 */
235 				ungetc(c, fi);
236 				/*
237 				 * Don't append space on the end in split().
238 				 */
239 				add_space = 0;
240 			} else {
241 				/*
242 				 * To avoid splitting a word in a middle,
243 				 * ungetc() all characters after last
244 				 * whitespace char.
245 				 */
246 				while (!isspace(c) && (cp >= linebuf)) {
247 					ungetc(c, fi);
248 					c = *--cp;
249 				}
250 				*cp = '\0';
251 			}
252 		}
253 
254 		/*
255 		 * Expand tabs on the way to canonb.
256 		 */
257 		col = 0;
258 		cp = linebuf;
259 		cp2 = canonb;
260 		while ((c = *cp++) != 0) {
261 			if (c != '\t') {
262 				col++;
263 				if (cp2-canonb < BUFSIZ-1)
264 					*cp2++ = c;
265 				continue;
266 			}
267 			do {
268 				if (cp2-canonb < BUFSIZ-1)
269 					*cp2++ = ' ';
270 				col++;
271 			} while ((col & 07) != 0);
272 		}
273 
274 		/*
275 		 * Swipe trailing blanks from the line.
276 		 */
277 		for (cp2--; cp2 >= canonb && *cp2 == ' '; cp2--)
278 			;
279 		*++cp2 = '\0';
280 		prefix(canonb, add_space);
281 		if (c != EOF)
282 			c = getc(fi);
283 	}
284 }
285 
286 /*
287  * Take a line devoid of tabs and other garbage and determine its
288  * blank prefix.  If the indent changes, call for a linebreak.
289  * If the input line is blank, echo the blank line on the output.
290  * Finally, if the line minus the prefix is a mail header, try to keep
291  * it on a line by itself.
292  */
293 static void
294 prefix(const char line[], int add_space)
295 {
296 	const char *cp;
297 	char **hp;
298 	int np, h;
299 
300 	if (strlen(line) == 0) {
301 		oflush();
302 		putchar('\n');
303 		return;
304 	}
305 	for (cp = line; *cp == ' '; cp++)
306 		;
307 	np = cp - line;
308 
309 	/*
310 	 * The following horrible expression attempts to avoid linebreaks
311 	 * when the indent changes due to a paragraph.
312 	 */
313 	if (np != pfx && (np > pfx || abs(pfx-np) > 8))
314 		oflush();
315 	if ((h = ishead(cp)) != 0)
316 		oflush(), mark = lineno;
317 	if (lineno - mark < 3 && lineno - mark > 0)
318 		for (hp = &headnames[0]; *hp != (char *) 0; hp++)
319 			if (ispref(*hp, cp)) {
320 				h = 1;
321 				oflush();
322 				break;
323 			}
324 	if (!h && (h = (*cp == '.')))
325 		oflush();
326 	pfx = np;
327 	if (h) {
328 		pack(cp, strlen(cp));
329 		oflush();
330 	} else
331 		split(cp, add_space);
332 	lineno++;
333 }
334 
335 /*
336  * Split up the passed line into output "words" which are
337  * maximal strings of non-blanks with the blank separation
338  * attached at the end.  Pass these words along to the output
339  * line packer.
340  */
341 static void
342 split(const char line[], int add_space)
343 {
344 	const char *cp;
345 	char *cp2;
346 	char word[BUFSIZ];
347 	int wordl;		/* LIZ@UOM 6/18/85 */
348 
349 	cp = line;
350 	while (*cp) {
351 		cp2 = word;
352 		wordl = 0;	/* LIZ@UOM 6/18/85 */
353 
354 		/*
355 		 * Collect a 'word,' allowing it to contain escaped white
356 		 * space.
357 		 */
358 		while (*cp && *cp != ' ') {
359 			if (*cp == '\\' && isspace((unsigned char)cp[1]))
360 				*cp2++ = *cp++;
361 			*cp2++ = *cp++;
362 			wordl++;/* LIZ@UOM 6/18/85 */
363 		}
364 
365 		/*
366 		 * Guarantee a space at end of line. Two spaces after end of
367 		 * sentence punctuation.
368 		 */
369 		if (*cp == '\0' && add_space) {
370 			*cp2++ = ' ';
371 			if (strchr(".:!", cp[-1]))
372 				*cp2++ = ' ';
373 		}
374 		while (*cp == ' ')
375 			*cp2++ = *cp++;
376 		*cp2 = '\0';
377 		/*
378 		 * LIZ@UOM 6/18/85 pack(word);
379 		 */
380 		pack(word, wordl);
381 	}
382 }
383 
384 /*
385  * Output section.
386  * Build up line images from the words passed in.  Prefix
387  * each line with correct number of blanks.  The buffer "outbuf"
388  * contains the current partial line image, including prefixed blanks.
389  * "outp" points to the next available space therein.  When outp is NOSTR,
390  * there ain't nothing in there yet.  At the bottom of this whole mess,
391  * leading tabs are reinserted.
392  */
393 char	outbuf[BUFSIZ];			/* Sandbagged output line image */
394 char	*outp;				/* Pointer in above */
395 
396 /*
397  * Initialize the output section.
398  */
399 static void
400 setout(void)
401 {
402 	outp = NOSTR;
403 }
404 
405 /*
406  * Pack a word onto the output line.  If this is the beginning of
407  * the line, push on the appropriately-sized string of blanks first.
408  * If the word won't fit on the current line, flush and begin a new
409  * line.  If the word is too long to fit all by itself on a line,
410  * just give it its own and hope for the best.
411  *
412  * LIZ@UOM 6/18/85 -- If the new word will fit in at less than the
413  *	goal length, take it.  If not, then check to see if the line
414  *	will be over the max length; if so put the word on the next
415  *	line.  If not, check to see if the line will be closer to the
416  *	goal length with or without the word and take it or put it on
417  *	the next line accordingly.
418  */
419 
420 /*
421  * LIZ@UOM 6/18/85 -- pass in the length of the word as well
422  * pack(word)
423  *	char word[];
424  */
425 static void
426 pack(const char word[], int wl)
427 {
428 	const char *cp;
429 	int s, t;
430 
431 	if (outp == NOSTR)
432 		leadin();
433 	/*
434 	 * LIZ@UOM 6/18/85 -- change condition to check goal_length; s is the
435 	 * length of the line before the word is added; t is now the length
436 	 * of the line after the word is added
437 	 *	t = strlen(word);
438 	 *	if (t+s <= LENGTH)
439 	 */
440 	s = outp - outbuf;
441 	t = wl + s;
442 	if ((t <= goal_length) ||
443 	    ((t <= max_length) && (t - goal_length <= goal_length - s))) {
444 		/*
445 		 * In like flint!
446 		 */
447 		for (cp = word; *cp; *outp++ = *cp++);
448 		return;
449 	}
450 	if (s > pfx) {
451 		oflush();
452 		leadin();
453 	}
454 	for (cp = word; *cp; *outp++ = *cp++);
455 }
456 
457 /*
458  * If there is anything on the current output line, send it on
459  * its way.  Set outp to NOSTR to indicate the absence of the current
460  * line prefix.
461  */
462 static void
463 oflush(void)
464 {
465 	if (outp == NOSTR)
466 		return;
467 	*outp = '\0';
468 	tabulate(outbuf);
469 	outp = NOSTR;
470 }
471 
472 /*
473  * Take the passed line buffer, insert leading tabs where possible, and
474  * output on standard output (finally).
475  */
476 static void
477 tabulate(char line[])
478 {
479 	char *cp;
480 	int b, t;
481 
482 	/*
483 	 * Toss trailing blanks in the output line.
484 	 */
485 	cp = line + strlen(line) - 1;
486 	while (cp >= line && *cp == ' ')
487 		cp--;
488 	*++cp = '\0';
489 
490 	/*
491 	 * Count the leading blank space and tabulate.
492 	 */
493 	for (cp = line; *cp == ' '; cp++)
494 		;
495 	b = cp-line;
496 	t = b >> 3;
497 	b &= 07;
498 	if (t > 0)
499 		do
500 			putc('\t', stdout);
501 		while (--t);
502 	if (b > 0)
503 		do
504 			putc(' ', stdout);
505 		while (--b);
506 	while (*cp)
507 		putc(*cp++, stdout);
508 	putc('\n', stdout);
509 }
510 
511 /*
512  * Initialize the output line with the appropriate number of
513  * leading blanks.
514  */
515 static void
516 leadin(void)
517 {
518 	int b;
519 	char *cp;
520 
521 	for (b = 0, cp = outbuf; b < pfx; b++)
522 		*cp++ = ' ';
523 	outp = cp;
524 }
525 
526 /*
527  * Is s1 a prefix of s2??
528  */
529 static int
530 ispref(const char *s1, const char *s2)
531 {
532 
533 	while (*s1++ == *s2)
534 		;
535 	return (*s1 == '\0');
536 }
537