xref: /openbsd/usr.bin/indent/indent.c (revision 09467b48)
1 /*	$OpenBSD: indent.c,v 1.31 2019/06/28 13:35:01 deraadt Exp $	*/
2 
3 /*
4  * Copyright (c) 1980, 1993
5  *	The Regents of the University of California.
6  * Copyright (c) 1976 Board of Trustees of the University of Illinois.
7  * Copyright (c) 1985 Sun Microsystems, Inc.
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <fcntl.h>
36 #include <unistd.h>
37 #include <limits.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include "indent_globs.h"
42 #include "indent_codes.h"
43 #include <ctype.h>
44 #include <errno.h>
45 #include <err.h>
46 
47 char       *in_name = "Standard Input";	/* will always point to name of input
48 					 * file */
49 char       *out_name = "Standard Output";	/* will always point to name
50 						 * of output file */
51 char        bakfile[PATH_MAX] = "";
52 
53 void bakcopy(void);
54 
55 int
56 main(int argc, char **argv)
57 {
58 
59     extern int  found_err;	/* flag set in diag() on error */
60     int         dec_ind;	/* current indentation for declarations */
61     int         di_stack[20];	/* a stack of structure indentation levels */
62     int         flushed_nl;	/* used when buffering up comments to remember
63 				 * that a newline was passed over */
64     int         force_nl;	/* when true, code must be broken */
65     int         hd_type;	/* used to store type of stmt for if (...),
66 				 * for (...), etc */
67     int 	i;		/* local loop counter */
68     int         scase;		/* set to true when we see a case, so we will
69 				 * know what to do with the following colon */
70     int         sp_sw;		/* when true, we are in the expressin of
71 				 * if(...), while(...), etc. */
72     int         squest;		/* when this is positive, we have seen a ?
73 				 * without the matching : in a <c>?<s>:<s>
74 				 * construct */
75     char 	*t_ptr;		/* used for copying tokens */
76     int         tabs_to_var;	/* true if using tabs to indent to var name */
77     int         type_code;	/* the type of token, returned by lexi */
78 
79     int         last_else = 0;	/* true iff last keyword was an else */
80 
81     if (pledge("stdio rpath wpath cpath", NULL) == -1)
82 	err(1, "pledge");
83 
84     /*-----------------------------------------------*\
85     |		      INITIALIZATION		      |
86     \*-----------------------------------------------*/
87 
88 
89     hd_type = 0;
90     ps.p_stack[0] = stmt;	/* this is the parser's stack */
91     ps.last_nl = true;		/* this is true if the last thing scanned was
92 				 * a newline */
93     ps.last_token = semicolon;
94     combuf = malloc(bufsize);
95     labbuf = malloc(bufsize);
96     codebuf = malloc(bufsize);
97     tokenbuf = malloc(bufsize);
98     if (combuf == NULL || labbuf == NULL || codebuf == NULL ||
99         tokenbuf == NULL)
100 	    err(1, NULL);
101     l_com = combuf + bufsize - 5;
102     l_lab = labbuf + bufsize - 5;
103     l_code = codebuf + bufsize - 5;
104     l_token = tokenbuf + bufsize - 5;
105     combuf[0] = codebuf[0] = labbuf[0] = ' ';	/* set up code, label, and
106 						 * comment buffers */
107     combuf[1] = codebuf[1] = labbuf[1] = '\0';
108     ps.else_if = 1;		/* Default else-if special processing to on */
109     s_lab = e_lab = labbuf + 1;
110     s_code = e_code = codebuf + 1;
111     s_com = e_com = combuf + 1;
112     s_token = e_token = tokenbuf + 1;
113 
114     in_buffer = malloc(10);
115     if (in_buffer == NULL)
116 	    err(1, NULL);
117     in_buffer_limit = in_buffer + 8;
118     buf_ptr = buf_end = in_buffer;
119     line_no = 1;
120     had_eof = ps.in_decl = ps.decl_on_line = break_comma = false;
121     sp_sw = force_nl = false;
122     ps.in_or_st = false;
123     ps.bl_line = true;
124     dec_ind = 0;
125     di_stack[ps.dec_nest = 0] = 0;
126     ps.want_blank = ps.in_stmt = ps.ind_stmt = false;
127 
128 
129     scase = ps.pcase = false;
130     squest = 0;
131     sc_end = 0;
132     bp_save = 0;
133     be_save = 0;
134 
135     output = 0;
136 
137 
138 
139     /*--------------------------------------------------*\
140     |   		COMMAND LINE SCAN		 |
141     \*--------------------------------------------------*/
142 
143 #ifdef undef
144     max_col = 78;		/* -l78 */
145     lineup_to_parens = 1;	/* -lp */
146     ps.ljust_decl = 0;		/* -ndj */
147     ps.com_ind = 33;		/* -c33 */
148     star_comment_cont = 1;	/* -sc */
149     ps.ind_size = 8;		/* -i8 */
150     verbose = 0;
151     ps.decl_indent = 16;	/* -di16 */
152     ps.indent_parameters = 1;	/* -ip */
153     ps.decl_com_ind = 0;	/* if this is not set to some positive value
154 				 * by an arg, we will set this equal to
155 				 * ps.com_ind */
156     btype_2 = 1;		/* -br */
157     cuddle_else = 1;		/* -ce */
158     ps.unindent_displace = 0;	/* -d0 */
159     ps.case_indent = 0;		/* -cli0 */
160     format_col1_comments = 1;	/* -fc1 */
161     procnames_start_line = 1;	/* -psl */
162     proc_calls_space = 0;	/* -npcs */
163     comment_delimiter_on_blankline = 1;	/* -cdb */
164     ps.leave_comma = 1;		/* -nbc */
165 #endif
166 
167     for (i = 1; i < argc; ++i)
168 	if (strcmp(argv[i], "-npro") == 0)
169 	    break;
170     set_defaults();
171     if (i >= argc)
172 	set_profile();
173 
174     for (i = 1; i < argc; ++i) {
175 
176 	/*
177 	 * look thru args (if any) for changes to defaults
178 	 */
179 	if (argv[i][0] != '-') {/* no flag on parameter */
180 	    if (input == 0) {	/* we must have the input file */
181 		in_name = argv[i];	/* remember name of input file */
182 		input = fopen(in_name, "r");
183 		if (input == NULL)		/* check for open error */
184 			err(1, "%s", in_name);
185 		continue;
186 	    }
187 	    else if (output == 0) {	/* we have the output file */
188 		out_name = argv[i];	/* remember name of output file */
189 		if (strcmp(in_name, out_name) == 0)	/* attempt to overwrite
190 							 * the file */
191 			errx(1, "input and output files must be different");
192 		output = fopen(out_name, "w");
193 		if (output == NULL)	/* check for create error */
194 			err(1, "%s", out_name);
195 		continue;
196 	    }
197 	    errx(1, "unknown parameter: %s", argv[i]);
198 	}
199 	else
200 	    set_option(argv[i]);
201     }				/* end of for */
202     if (input == NULL) {
203 	input = stdin;
204     }
205     if (output == NULL) {
206 	if (troff || input == stdin)
207 	    output = stdout;
208 	else {
209 	    out_name = in_name;
210 	    bakcopy();
211 	}
212     }
213     if (ps.com_ind <= 1)
214 	ps.com_ind = 2;		/* dont put normal comments before column 2 */
215     if (troff) {
216 	if (bodyf.font[0] == 0)
217 	    parsefont(&bodyf, "R");
218 	if (scomf.font[0] == 0)
219 	    parsefont(&scomf, "I");
220 	if (blkcomf.font[0] == 0)
221 	    blkcomf = scomf, blkcomf.size += 2;
222 	if (boxcomf.font[0] == 0)
223 	    boxcomf = blkcomf;
224 	if (stringf.font[0] == 0)
225 	    parsefont(&stringf, "L");
226 	if (keywordf.font[0] == 0)
227 	    parsefont(&keywordf, "B");
228 	writefdef(&bodyf, 'B');
229 	writefdef(&scomf, 'C');
230 	writefdef(&blkcomf, 'L');
231 	writefdef(&boxcomf, 'X');
232 	writefdef(&stringf, 'S');
233 	writefdef(&keywordf, 'K');
234     }
235     if (block_comment_max_col <= 0)
236 	block_comment_max_col = max_col;
237     if (ps.decl_com_ind <= 0)	/* if not specified by user, set this */
238 	ps.decl_com_ind = ps.ljust_decl ? (ps.com_ind <= 10 ? 2 : ps.com_ind - 8) : ps.com_ind;
239     if (continuation_indent == 0)
240 	continuation_indent = ps.ind_size;
241     fill_buffer();	/* get first batch of stuff into input buffer */
242 
243     parse(semicolon);
244     {
245 	char *p = buf_ptr;
246 	int   col = 1;
247 
248 	while (1) {
249 	    if (*p == ' ')
250 		col++;
251 	    else if (*p == '\t')
252 		col = ((col - 1) & ~7) + 9;
253 	    else
254 		break;
255 	    p++;
256 	}
257 	if (col > ps.ind_size)
258 	    ps.ind_level = ps.i_l_follow = col / ps.ind_size;
259     }
260     if (troff) {
261 	char *p = in_name,
262 	           *beg = in_name;
263 
264 	while (*p)
265 	    if (*p++ == '/')
266 		beg = p;
267 	fprintf(output, ".Fn \"%s\"\n", beg);
268     }
269     /*
270      * START OF MAIN LOOP
271      */
272 
273     while (1) {			/* this is the main loop.  it will go until we
274 				 * reach eof */
275 	int         is_procname;
276 
277 	type_code = lexi();	/* lexi reads one token.  The actual
278 				 * characters read are stored in "token". lexi
279 				 * returns a code indicating the type of token */
280 	is_procname = ps.procname[0];
281 
282 	/*
283 	 * The following code moves everything following an if (), while (),
284 	 * else, etc. up to the start of the following stmt to a buffer. This
285 	 * allows proper handling of both kinds of brace placement.
286 	 */
287 
288 	flushed_nl = false;
289 	while (ps.search_brace) {	/* if we scanned an if(), while(),
290 					 * etc., we might need to copy stuff
291 					 * into a buffer we must loop, copying
292 					 * stuff into save_com, until we find
293 					 * the start of the stmt which follows
294 					 * the if, or whatever */
295 	    switch (type_code) {
296 	    case newline:
297 		++line_no;
298 		flushed_nl = true;
299 	    case form_feed:
300 		break;		/* form feeds and newlines found here will be
301 				 * ignored */
302 
303 	    case lbrace:	/* this is a brace that starts the compound
304 				 * stmt */
305 		if (sc_end == 0) {	/* ignore buffering if a comment wasnt
306 					 * stored up */
307 		    ps.search_brace = false;
308 		    goto check_type;
309 		}
310 		if (btype_2) {
311 		    save_com[0] = '{';	/* we either want to put the brace
312 					 * right after the if */
313 		    goto sw_buffer;	/* go to common code to get out of
314 					 * this loop */
315 		}
316 	    case comment:	/* we have a comment, so we must copy it into
317 				 * the buffer */
318 		if (!flushed_nl || sc_end != 0) {
319 		    if (sc_end == 0) {	/* if this is the first comment, we
320 					 * must set up the buffer */
321 			save_com[0] = save_com[1] = ' ';
322 			sc_end = &(save_com[2]);
323 		    }
324 		    else {
325 			*sc_end++ = '\n';	/* add newline between
326 						 * comments */
327 			*sc_end++ = ' ';
328 			--line_no;
329 		    }
330 		    *sc_end++ = '/';	/* copy in start of comment */
331 		    *sc_end++ = '*';
332 
333 		    for (;;) {	/* loop until we get to the end of the comment */
334 			*sc_end = *buf_ptr++;
335 			if (buf_ptr >= buf_end)
336 			    fill_buffer();
337 
338 			if (*sc_end++ == '*' && *buf_ptr == '/')
339 			    break;	/* we are at end of comment */
340 
341 			if (sc_end >= &(save_com[sc_size])) {	/* check for temp buffer
342 								 * overflow */
343 			    diag(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever.");
344 			    fflush(output);
345 			    exit(1);
346 			}
347 		    }
348 		    *sc_end++ = '/';	/* add ending slash */
349 		    if (++buf_ptr >= buf_end)	/* get past / in buffer */
350 			fill_buffer();
351 		    break;
352 		}
353 	    default:		/* it is the start of a normal statment */
354 		if (flushed_nl)	/* if we flushed a newline, make sure it is
355 				 * put back */
356 		    force_nl = true;
357 		if ((type_code == sp_paren && *token == 'i'
358 		     && last_else && ps.else_if) ||
359 		    (type_code == sp_nparen && *token == 'e'
360 		     && e_code != s_code && e_code[-1] == '}'))
361 			force_nl = false;
362 
363 		if (sc_end == 0) {	/* ignore buffering if comment wasnt
364 					 * saved up */
365 		    ps.search_brace = false;
366 		    goto check_type;
367 		}
368 		if (force_nl) {	/* if we should insert a nl here, put it into
369 				 * the buffer */
370 		    force_nl = false;
371 		    --line_no;	/* this will be re-increased when the nl is
372 				 * read from the buffer */
373 		    *sc_end++ = '\n';
374 		    *sc_end++ = ' ';
375 		    if (verbose && !flushed_nl)	/* print error msg if the line
376 						 * was not already broken */
377 			diag(0, "Line broken");
378 		    flushed_nl = false;
379 		}
380 		for (t_ptr = token; *t_ptr; ++t_ptr)
381 		    *sc_end++ = *t_ptr;	/* copy token into temp buffer */
382 		ps.procname[0] = 0;
383 
384 	sw_buffer:
385 		ps.search_brace = false;	/* stop looking for start of
386 						 * stmt */
387 		bp_save = buf_ptr;	/* save current input buffer */
388 		be_save = buf_end;
389 		buf_ptr = save_com;	/* fix so that subsequent calls to
390 					 * lexi will take tokens out of
391 					 * save_com */
392 		*sc_end++ = ' ';/* add trailing blank, just in case */
393 		buf_end = sc_end;
394 		sc_end = 0;
395 		break;
396 	    }			/* end of switch */
397 	    if (type_code != 0)	/* we must make this check, just in case there
398 				 * was an unexpected EOF */
399 		type_code = lexi();	/* read another token */
400 	    /* if (ps.search_brace) ps.procname[0] = 0; */
401 	    if ((is_procname = ps.procname[0]) && flushed_nl
402 		    && !procnames_start_line && ps.in_decl
403 		    && type_code == ident)
404 		flushed_nl = 0;
405 	}			/* end of while (search_brace) */
406 	last_else = 0;
407 check_type:
408 	if (type_code == 0) {	/* we got eof */
409 	    if (s_lab != e_lab || s_code != e_code
410 		    || s_com != e_com)	/* must dump end of line */
411 		dump_line();
412 	    if (ps.tos > 1)	/* check for balanced braces */
413 		diag(1, "Missing braces at end of file.");
414 
415 	    if (verbose) {
416 		printf("There were %d output lines and %d comments\n",
417 		       ps.out_lines, ps.out_coms);
418 		printf("(Lines with comments)/(Lines with code): %6.3f\n",
419 		       (1.0 * ps.com_lines) / code_lines);
420 	    }
421 	    fflush(output);
422 	    exit(found_err);
423 	}
424 	if (
425 		(type_code != comment) &&
426 		(type_code != newline) &&
427 		(type_code != preesc) &&
428 		(type_code != form_feed)) {
429 	    if (force_nl &&
430 		    (type_code != semicolon) &&
431 		    (type_code != lbrace || !btype_2)) {
432 		/* we should force a broken line here */
433 		if (verbose && !flushed_nl)
434 		    diag(0, "Line broken");
435 		flushed_nl = false;
436 		dump_line();
437 		ps.want_blank = false;	/* dont insert blank at line start */
438 		force_nl = false;
439 	    }
440 	    ps.in_stmt = true;	/* turn on flag which causes an extra level of
441 				 * indentation. this is turned off by a ; or
442 				 * '}' */
443 	    if (s_com != e_com) {	/* the turkey has embedded a comment
444 					 * in a line. fix it */
445 		*e_code++ = ' ';
446 		for (t_ptr = s_com; *t_ptr; ++t_ptr) {
447 		    CHECK_SIZE_CODE;
448 		    *e_code++ = *t_ptr;
449 		}
450 		*e_code++ = ' ';
451 		*e_code = '\0';	/* null terminate code sect */
452 		ps.want_blank = false;
453 		e_com = s_com;
454 	    }
455 	}
456 	else if (type_code != comment)	/* preserve force_nl thru a comment */
457 	    force_nl = false;	/* cancel forced newline after newline, form
458 				 * feed, etc */
459 
460 
461 
462 	/*-----------------------------------------------------*\
463 	|	   do switch on type of token scanned		|
464 	\*-----------------------------------------------------*/
465 	CHECK_SIZE_CODE;
466 	switch (type_code) {	/* now, decide what to do with the token */
467 
468 	case form_feed:	/* found a form feed in line */
469 	    ps.use_ff = true;	/* a form feed is treated much like a newline */
470 	    dump_line();
471 	    ps.want_blank = false;
472 	    break;
473 
474 	case newline:
475 	    if (ps.last_token != comma || ps.p_l_follow > 0
476 		    || !ps.leave_comma || ps.block_init || !break_comma || s_com != e_com) {
477 		dump_line();
478 		ps.want_blank = false;
479 	    }
480 	    ++line_no;		/* keep track of input line number */
481 	    break;
482 
483 	case lparen:		/* got a '(' or '[' */
484 	    ++ps.p_l_follow;	/* count parens to make Healy happy */
485 	    if (ps.want_blank && *token != '[' &&
486 		    (ps.last_token != ident || proc_calls_space
487 	      || (ps.its_a_keyword && (!ps.sizeof_keyword || Bill_Shannon))))
488 		*e_code++ = ' ';
489 	    if (ps.in_decl && !ps.block_init)
490 		if (troff && !ps.dumped_decl_indent && !is_procname && ps.last_token == decl) {
491 		    ps.dumped_decl_indent = 1;
492 		    snprintf(e_code, (l_code - e_code) + 5,
493 			"\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token);
494 		    e_code += strlen(e_code);
495 		    CHECK_SIZE_CODE;
496 		}
497 		else {
498 		    while ((e_code - s_code) < dec_ind) {
499 			CHECK_SIZE_CODE;
500 			*e_code++ = ' ';
501 		    }
502 		    *e_code++ = token[0];
503 		}
504 	    else
505 		*e_code++ = token[0];
506 	    ps.paren_indents[ps.p_l_follow - 1] = e_code - s_code;
507 	    if (sp_sw && ps.p_l_follow == 1 && extra_expression_indent
508 		    && ps.paren_indents[0] < 2 * ps.ind_size)
509 		ps.paren_indents[0] = 2 * ps.ind_size;
510 	    ps.want_blank = false;
511 	    if (ps.in_or_st && *token == '(' && ps.tos <= 2) {
512 		/*
513 		 * this is a kluge to make sure that declarations will be
514 		 * aligned right if proc decl has an explicit type on it, i.e.
515 		 * "int a(x) {..."
516 		 */
517 		parse(semicolon);	/* I said this was a kluge... */
518 		ps.in_or_st = false;	/* turn off flag for structure decl or
519 					 * initialization */
520 	    }
521 	    if (ps.sizeof_keyword)
522 		ps.sizeof_mask |= 1 << ps.p_l_follow;
523 	    break;
524 
525 	case rparen:		/* got a ')' or ']' */
526 	    rparen_count--;
527 	    if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.sizeof_mask) {
528 		ps.last_u_d = true;
529 		ps.cast_mask &= (1 << ps.p_l_follow) - 1;
530 	    }
531 	    ps.sizeof_mask &= (1 << ps.p_l_follow) - 1;
532 	    if (--ps.p_l_follow < 0) {
533 		ps.p_l_follow = 0;
534 		diag(0, "Extra %c", *token);
535 	    }
536 	    if (e_code == s_code)	/* if the paren starts the line */
537 		ps.paren_level = ps.p_l_follow;	/* then indent it */
538 
539 	    *e_code++ = token[0];
540 	    ps.want_blank = true;
541 
542 	    if (sp_sw && (ps.p_l_follow == 0)) {	/* check for end of if
543 							 * (...), or some such */
544 		sp_sw = false;
545 		force_nl = true;/* must force newline after if */
546 		ps.last_u_d = true;	/* inform lexi that a following
547 					 * operator is unary */
548 		ps.in_stmt = false;	/* dont use stmt continuation
549 					 * indentation */
550 
551 		parse(hd_type);	/* let parser worry about if, or whatever */
552 	    }
553 	    ps.search_brace = btype_2;	/* this should insure that constructs
554 					 * such as main(){...} and int[]{...}
555 					 * have their braces put in the right
556 					 * place */
557 	    break;
558 
559 	case unary_op:		/* this could be any unary operation */
560 	    if (ps.want_blank)
561 		*e_code++ = ' ';
562 
563 	    if (troff && !ps.dumped_decl_indent && ps.in_decl && !is_procname) {
564 		snprintf(e_code, (l_code - e_code) + 5,
565 		    "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token);
566 		ps.dumped_decl_indent = 1;
567 		e_code += strlen(e_code);
568 		CHECK_SIZE_CODE;
569 	    }
570 	    else {
571 		char       *res = token;
572 
573 		if (ps.in_decl && !ps.block_init) {	/* if this is a unary op
574 							 * in a declaration, we
575 							 * should indent this
576 							 * token */
577 		    for (i = 0; token[i]; ++i);	/* find length of token */
578 		    while ((e_code - s_code) < (dec_ind - i)) {
579 			CHECK_SIZE_CODE;
580 			*e_code++ = ' ';	/* pad it */
581 		    }
582 		}
583 		if (troff && token[0] == '-' && token[1] == '>')
584 		    res = "\\(->";
585 		for (t_ptr = res; *t_ptr; ++t_ptr) {
586 		    CHECK_SIZE_CODE;
587 		    *e_code++ = *t_ptr;
588 		}
589 	    }
590 	    ps.want_blank = false;
591 	    break;
592 
593 	case binary_op:	/* any binary operation */
594 	    if (ps.want_blank)
595 		*e_code++ = ' ';
596 	    {
597 		char       *res = token;
598 
599 		if (troff)
600 		    switch (token[0]) {
601 		    case '<':
602 			if (token[1] == '=')
603 			    res = "\\(<=";
604 			break;
605 		    case '>':
606 			if (token[1] == '=')
607 			    res = "\\(>=";
608 			break;
609 		    case '!':
610 			if (token[1] == '=')
611 			    res = "\\(!=";
612 			break;
613 		    case '|':
614 			if (token[1] == '|')
615 			    res = "\\(br\\(br";
616 			else if (token[1] == 0)
617 			    res = "\\(br";
618 			break;
619 		    }
620 		for (t_ptr = res; *t_ptr; ++t_ptr) {
621 		    CHECK_SIZE_CODE;
622 		    *e_code++ = *t_ptr;	/* move the operator */
623 		}
624 	    }
625 	    ps.want_blank = true;
626 	    break;
627 
628 	case postop:		/* got a trailing ++ or -- */
629 	    *e_code++ = token[0];
630 	    *e_code++ = token[1];
631 	    ps.want_blank = true;
632 	    break;
633 
634 	case question:		/* got a ? */
635 	    squest++;		/* this will be used when a later colon
636 				 * appears so we can distinguish the
637 				 * <c>?<n>:<n> construct */
638 	    if (ps.want_blank)
639 		*e_code++ = ' ';
640 	    *e_code++ = '?';
641 	    ps.want_blank = true;
642 	    break;
643 
644 	case casestmt:		/* got word 'case' or 'default' */
645 	    scase = true;	/* so we can process the later colon properly */
646 	    goto copy_id;
647 
648 	case colon:		/* got a ':' */
649 	    if (squest > 0) {	/* it is part of the <c>?<n>: <n> construct */
650 		--squest;
651 		if (ps.want_blank)
652 		    *e_code++ = ' ';
653 		*e_code++ = ':';
654 		ps.want_blank = true;
655 		break;
656 	    }
657 	    if (ps.in_decl) {
658 		*e_code++ = ':';
659 		ps.want_blank = false;
660 		break;
661 	    }
662 	    ps.in_stmt = false;	/* seeing a label does not imply we are in a
663 				 * stmt */
664 	    for (t_ptr = s_code; *t_ptr; ++t_ptr)
665 		*e_lab++ = *t_ptr;	/* turn everything so far into a label */
666 	    e_code = s_code;
667 	    *e_lab++ = ':';
668 	    *e_lab++ = ' ';
669 	    *e_lab = '\0';
670 
671 	    force_nl = ps.pcase = scase;	/* ps.pcase will be used by
672 						 * dump_line to decide how to
673 						 * indent the label. force_nl
674 						 * will force a case n: to be
675 						 * on a line by itself */
676 	    scase = false;
677 	    ps.want_blank = false;
678 	    break;
679 
680 	case semicolon:	/* got a ';' */
681 	    ps.in_or_st = false;/* we are not in an initialization or
682 				 * structure declaration */
683 	    scase = false;	/* these will only need resetting in a error */
684 	    squest = 0;
685 	    if (ps.last_token == rparen && rparen_count == 0)
686 		ps.in_parameter_declaration = 0;
687 	    ps.cast_mask = 0;
688 	    ps.sizeof_mask = 0;
689 	    ps.block_init = 0;
690 	    ps.block_init_level = 0;
691 	    ps.just_saw_decl--;
692 
693 	    if (ps.in_decl && s_code == e_code && !ps.block_init)
694 		while ((e_code - s_code) < (dec_ind - 1)) {
695 		    CHECK_SIZE_CODE;
696 		    *e_code++ = ' ';
697 		}
698 
699 	    ps.in_decl = (ps.dec_nest > 0);	/* if we were in a first level
700 						 * structure declaration, we
701 						 * arent any more */
702 
703 	    if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) {
704 
705 		/*
706 		 * This should be true iff there were unbalanced parens in the
707 		 * stmt.  It is a bit complicated, because the semicolon might
708 		 * be in a for stmt
709 		 */
710 		diag(1, "Unbalanced parens");
711 		ps.p_l_follow = 0;
712 		if (sp_sw) {	/* this is a check for a if, while, etc. with
713 				 * unbalanced parens */
714 		    sp_sw = false;
715 		    parse(hd_type);	/* dont lose the if, or whatever */
716 		}
717 	    }
718 	    *e_code++ = ';';
719 	    ps.want_blank = true;
720 	    ps.in_stmt = (ps.p_l_follow > 0);	/* we are no longer in the
721 						 * middle of a stmt */
722 
723 	    if (!sp_sw) {	/* if not if for (;;) */
724 		parse(semicolon);	/* let parser know about end of stmt */
725 		force_nl = true;/* force newline after a end of stmt */
726 	    }
727 	    break;
728 
729 	case lbrace:		/* got a '{' */
730 	    ps.in_stmt = false;	/* dont indent the {} */
731 	    if (!ps.block_init)
732 		force_nl = true;/* force other stuff on same line as '{' onto
733 				 * new line */
734 	    else if (ps.block_init_level <= 0)
735 		ps.block_init_level = 1;
736 	    else
737 		ps.block_init_level++;
738 
739 	    if (s_code != e_code && !ps.block_init) {
740 		if (!btype_2) {
741 		    dump_line();
742 		    ps.want_blank = false;
743 		}
744 		else if (ps.in_parameter_declaration && !ps.in_or_st) {
745 		    ps.i_l_follow = 0;
746 		    dump_line();
747 		    ps.want_blank = false;
748 		}
749 	    }
750 	    if (ps.in_parameter_declaration)
751 		prefix_blankline_requested = 0;
752 
753 	    if (ps.p_l_follow > 0) {	/* check for preceding unbalanced
754 					 * parens */
755 		diag(1, "Unbalanced parens");
756 		ps.p_l_follow = 0;
757 		if (sp_sw) {	/* check for unclosed if, for, etc. */
758 		    sp_sw = false;
759 		    parse(hd_type);
760 		    ps.ind_level = ps.i_l_follow;
761 		}
762 	    }
763 	    if (s_code == e_code)
764 		ps.ind_stmt = false;	/* dont put extra indentation on line
765 					 * with '{' */
766 	    if (ps.in_decl && ps.in_or_st) {	/* this is either a structure
767 						 * declaration or an init */
768 		di_stack[ps.dec_nest++] = dec_ind;
769 		/* ?		dec_ind = 0; */
770 	    }
771 	    else {
772 		ps.decl_on_line = false;
773 		/* we can't be in the middle of a declaration, so don't do
774 		 * special indentation of comments */
775 		if (blanklines_after_declarations_at_proctop
776 			&& ps.in_parameter_declaration)
777 		    postfix_blankline_requested = 1;
778 		ps.in_parameter_declaration = 0;
779 	    }
780 	    dec_ind = 0;
781 	    parse(lbrace);	/* let parser know about this */
782 	    if (ps.want_blank)	/* put a blank before '{' if '{' is not at
783 				 * start of line */
784 		*e_code++ = ' ';
785 	    ps.want_blank = false;
786 	    *e_code++ = '{';
787 	    ps.just_saw_decl = 0;
788 	    break;
789 
790 	case rbrace:		/* got a '}' */
791 	    if (ps.p_stack[ps.tos] == decl && !ps.block_init)	/* semicolons can be
792 								 * omitted in
793 								 * declarations */
794 		parse(semicolon);
795 	    if (ps.p_l_follow) {/* check for unclosed if, for, else. */
796 		diag(1, "Unbalanced parens");
797 		ps.p_l_follow = 0;
798 		sp_sw = false;
799 	    }
800 	    ps.just_saw_decl = 0;
801 	    ps.block_init_level--;
802 	    if (s_code != e_code && !ps.block_init) {	/* '}' must be first on
803 							 * line */
804 		if (verbose)
805 		    diag(0, "Line broken");
806 		dump_line();
807 	    }
808 	    *e_code++ = '}';
809 	    ps.want_blank = true;
810 	    ps.in_stmt = ps.ind_stmt = false;
811 	    if (ps.dec_nest > 0) {	/* we are in multi-level structure
812 					 * declaration */
813 		dec_ind = di_stack[--ps.dec_nest];
814 		if (ps.dec_nest == 0 && !ps.in_parameter_declaration)
815 		    ps.just_saw_decl = 2;
816 		ps.in_decl = true;
817 	    }
818 	    prefix_blankline_requested = 0;
819 	    parse(rbrace);	/* let parser know about this */
820 	    ps.search_brace = cuddle_else && ps.p_stack[ps.tos] == ifhead
821 		&& ps.il[ps.tos] >= ps.ind_level;
822 	    if (ps.tos <= 1 && blanklines_after_procs && ps.dec_nest <= 0)
823 		postfix_blankline_requested = 1;
824 	    break;
825 
826 	case swstmt:		/* got keyword "switch" */
827 	    sp_sw = true;
828 	    hd_type = swstmt;	/* keep this for when we have seen the
829 				 * expression */
830 	    goto copy_id;	/* go move the token into buffer */
831 
832 	case sp_paren:		/* token is if, while, for */
833 	    sp_sw = true;	/* the interesting stuff is done after the
834 				 * expression is scanned */
835 	    hd_type = (*token == 'i' ? ifstmt :
836 		       (*token == 'w' ? whilestmt : forstmt));
837 
838 	    /*
839 	     * remember the type of header for later use by parser
840 	     */
841 	    goto copy_id;	/* copy the token into line */
842 
843 	case sp_nparen:	/* got else, do */
844 	    ps.in_stmt = false;
845 	    if (*token == 'e') {
846 		if (e_code != s_code && (!cuddle_else || e_code[-1] != '}')) {
847 		    if (verbose)
848 			diag(0, "Line broken");
849 		    dump_line();/* make sure this starts a line */
850 		    ps.want_blank = false;
851 		}
852 		force_nl = true;/* also, following stuff must go onto new line */
853 		last_else = 1;
854 		parse(elselit);
855 	    }
856 	    else {
857 		if (e_code != s_code) {	/* make sure this starts a line */
858 		    if (verbose)
859 			diag(0, "Line broken");
860 		    dump_line();
861 		    ps.want_blank = false;
862 		}
863 		force_nl = true;/* also, following stuff must go onto new line */
864 		last_else = 0;
865 		parse(dolit);
866 	    }
867 	    goto copy_id;	/* move the token into line */
868 
869 	case decl:		/* we have a declaration type (int, register,
870 				 * etc.) */
871 	    parse(decl);	/* let parser worry about indentation */
872 	    if (ps.last_token == rparen && ps.tos <= 1) {
873 		ps.in_parameter_declaration = 1;
874 		if (s_code != e_code) {
875 		    dump_line();
876 		    ps.want_blank = 0;
877 		}
878 	    }
879 	    if (ps.in_parameter_declaration && ps.indent_parameters && ps.dec_nest == 0) {
880 		ps.ind_level = ps.i_l_follow = 1;
881 		ps.ind_stmt = 0;
882 	    }
883 	    ps.in_or_st = true;	/* this might be a structure or initialization
884 				 * declaration */
885 	    ps.in_decl = ps.decl_on_line = true;
886 	    if ( /* !ps.in_or_st && */ ps.dec_nest <= 0)
887 		ps.just_saw_decl = 2;
888 	    prefix_blankline_requested = 0;
889 	    for (i = 0; token[i++];);	/* get length of token */
890 
891 	    /*
892 	     * dec_ind = e_code - s_code + (ps.decl_indent>i ? ps.decl_indent
893 	     * : i);
894 	     */
895 	    dec_ind = ps.decl_indent > 0 ? ps.decl_indent : i;
896 	    tabs_to_var = (use_tabs ? ps.decl_indent > 0 : 0);
897 	    goto copy_id;
898 
899 	case ident:		/* got an identifier or constant */
900 	    if (ps.in_decl) {	/* if we are in a declaration, we must indent
901 				 * identifier */
902 		if (ps.want_blank)
903 		    *e_code++ = ' ';
904 		ps.want_blank = false;
905 		if (is_procname == 0 || !procnames_start_line) {
906 		    if (!ps.block_init) {
907 			if (troff && !ps.dumped_decl_indent) {
908 			    snprintf(e_code, (l_code - e_code) + 5,
909 				"\n.De %dp+\200p\n", dec_ind * 7);
910 			    ps.dumped_decl_indent = 1;
911 			    e_code += strlen(e_code);
912 			    CHECK_SIZE_CODE;
913 			} else {
914 			    int cur_dec_ind;
915 			    int pos, startpos;
916 
917 			    /*
918 			     * in order to get the tab math right for
919 			     * indentations that are not multiples of 8 we
920 			     * need to modify both startpos and dec_ind
921 			     * (cur_dec_ind) here by eight minus the
922 			     * remainder of the current starting column
923 			     * divided by eight. This seems to be a
924 			     * properly working fix
925 			     */
926 			    startpos = e_code - s_code;
927 			    cur_dec_ind = dec_ind;
928 			    pos = startpos;
929 			    if ((ps.ind_level * ps.ind_size) % 8 != 0) {
930 				pos += (ps.ind_level * ps.ind_size) % 8;
931 				cur_dec_ind += (ps.ind_level * ps.ind_size) % 8;
932 			    }
933 
934 			    if (tabs_to_var) {
935 				while ((pos & ~7) + 8 <= cur_dec_ind) {
936 				    CHECK_SIZE_CODE;
937 				    *e_code++ = '\t';
938 				    pos = (pos & ~7) + 8;
939 				}
940 			    }
941 			    while (pos < cur_dec_ind) {
942 				CHECK_SIZE_CODE;
943 				*e_code++ = ' ';
944 				pos++;
945 			    }
946 			    if (ps.want_blank && e_code - s_code == startpos)
947 				*e_code++ = ' ';
948 			    ps.want_blank = false;
949 			}
950 		    }
951 		}
952 		else {
953 		    if (dec_ind && s_code != e_code)
954 			dump_line();
955 		    dec_ind = 0;
956 		    ps.want_blank = false;
957 		}
958 	    }
959 	    else if (sp_sw && ps.p_l_follow == 0) {
960 		sp_sw = false;
961 		force_nl = true;
962 		ps.last_u_d = true;
963 		ps.in_stmt = false;
964 		parse(hd_type);
965 	    }
966     copy_id:
967 	    if (ps.want_blank)
968 		*e_code++ = ' ';
969 	    if (troff && ps.its_a_keyword) {
970 		e_code = chfont(&bodyf, &keywordf, e_code);
971 		for (t_ptr = token; *t_ptr; ++t_ptr) {
972 		    CHECK_SIZE_CODE;
973 		    *e_code++ = keywordf.allcaps &&
974 		      islower((unsigned char)*t_ptr) ?
975 		      toupper((unsigned char)*t_ptr) : *t_ptr;
976 		}
977 		e_code = chfont(&keywordf, &bodyf, e_code);
978 	    }
979 	    else
980 		for (t_ptr = token; *t_ptr; ++t_ptr) {
981 		    CHECK_SIZE_CODE;
982 		    *e_code++ = *t_ptr;
983 		}
984 	    ps.want_blank = true;
985 	    break;
986 
987 	case period:		/* treat a period kind of like a binary
988 				 * operation */
989 	    *e_code++ = '.';	/* move the period into line */
990 	    ps.want_blank = false;	/* dont put a blank after a period */
991 	    break;
992 
993 	case comma:
994 	    ps.want_blank = (s_code != e_code);	/* only put blank after comma
995 						 * if comma does not start the
996 						 * line */
997 	    if (ps.in_decl && is_procname == 0 && !ps.block_init)
998 		while ((e_code - s_code) < (dec_ind - 1)) {
999 		    CHECK_SIZE_CODE;
1000 		    *e_code++ = ' ';
1001 		}
1002 
1003 	    *e_code++ = ',';
1004 	    if (ps.p_l_follow == 0) {
1005 		if (ps.block_init_level <= 0)
1006 		    ps.block_init = 0;
1007 		if (break_comma && (!ps.leave_comma || compute_code_target() + (e_code - s_code) > max_col - 8))
1008 		    force_nl = true;
1009 	    }
1010 	    break;
1011 
1012 	case preesc:		/* got the character '#' */
1013 	    if ((s_com != e_com) ||
1014 		    (s_lab != e_lab) ||
1015 		    (s_code != e_code))
1016 		dump_line();
1017 	    *e_lab++ = '#';	/* move whole line to 'label' buffer */
1018 	    {
1019 		int         in_comment = 0;
1020 		int         com_start = 0;
1021 		char        quote = 0;
1022 		int         com_end = 0;
1023 
1024 		while (*buf_ptr == ' ' || *buf_ptr == '\t') {
1025 		    buf_ptr++;
1026 		    if (buf_ptr >= buf_end)
1027 			fill_buffer();
1028 		}
1029 		while (*buf_ptr != '\n' || (in_comment && !had_eof)) {
1030 		    CHECK_SIZE_LAB;
1031 		    *e_lab = *buf_ptr++;
1032 		    if (buf_ptr >= buf_end)
1033 			fill_buffer();
1034 		    switch (*e_lab++) {
1035 		    case BACKSLASH:
1036 			if (troff)
1037 			    *e_lab++ = BACKSLASH;
1038 			if (!in_comment) {
1039 			    *e_lab++ = *buf_ptr++;
1040 			    if (buf_ptr >= buf_end)
1041 				fill_buffer();
1042 			}
1043 			break;
1044 		    case '/':
1045 			if (*buf_ptr == '*' && !in_comment && !quote) {
1046 			    in_comment = 1;
1047 			    *e_lab++ = *buf_ptr++;
1048 			    com_start = e_lab - s_lab - 2;
1049 			}
1050 			break;
1051 		    case '"':
1052 			if (quote == '"')
1053 			    quote = 0;
1054 			break;
1055 		    case '\'':
1056 			if (quote == '\'')
1057 			    quote = 0;
1058 			break;
1059 		    case '*':
1060 			if (*buf_ptr == '/' && in_comment) {
1061 			    in_comment = 0;
1062 			    *e_lab++ = *buf_ptr++;
1063 			    com_end = e_lab - s_lab;
1064 			}
1065 			break;
1066 		    }
1067 		}
1068 
1069 		while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t'))
1070 		    e_lab--;
1071 		if (e_lab - s_lab == com_end && bp_save == 0) {	/* comment on
1072 								 * preprocessor line */
1073 		    if (sc_end == 0)	/* if this is the first comment, we
1074 					 * must set up the buffer */
1075 			sc_end = &(save_com[0]);
1076 		    else {
1077 			*sc_end++ = '\n';	/* add newline between
1078 						 * comments */
1079 			*sc_end++ = ' ';
1080 			--line_no;
1081 		    }
1082 		    bcopy(s_lab + com_start, sc_end, com_end - com_start);
1083 		    sc_end += com_end - com_start;
1084 		    if (sc_end >= &save_com[sc_size])
1085 			abort();
1086 		    e_lab = s_lab + com_start;
1087 		    while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t'))
1088 			e_lab--;
1089 		    bp_save = buf_ptr;	/* save current input buffer */
1090 		    be_save = buf_end;
1091 		    buf_ptr = save_com;	/* fix so that subsequent calls to
1092 					 * lexi will take tokens out of
1093 					 * save_com */
1094 		    *sc_end++ = ' ';	/* add trailing blank, just in case */
1095 		    buf_end = sc_end;
1096 		    sc_end = 0;
1097 		}
1098 		*e_lab = '\0';	/* null terminate line */
1099 		ps.pcase = false;
1100 	    }
1101 
1102 	    if (strncmp(s_lab, "#if", 3) == 0) {
1103 		if (blanklines_around_conditional_compilation) {
1104 		    int    c;
1105 		    prefix_blankline_requested++;
1106 		    while ((c = getc(input)) == '\n');
1107 		    ungetc(c, input);
1108 		}
1109 		if (ifdef_level < sizeof state_stack / sizeof state_stack[0]) {
1110 		    match_state[ifdef_level].tos = -1;
1111 		    state_stack[ifdef_level++] = ps;
1112 		}
1113 		else
1114 		    diag(1, "#if stack overflow");
1115 	    }
1116 	    else if (strncmp(s_lab, "#else", 5) == 0)
1117 		if (ifdef_level <= 0)
1118 		    diag(1, "Unmatched #else");
1119 		else {
1120 		    match_state[ifdef_level - 1] = ps;
1121 		    ps = state_stack[ifdef_level - 1];
1122 		}
1123 	    else if (strncmp(s_lab, "#endif", 6) == 0) {
1124 		if (ifdef_level <= 0)
1125 		    diag(1, "Unmatched #endif");
1126 		else {
1127 		    ifdef_level--;
1128 
1129 #ifdef undef
1130 		    /*
1131 		     * This match needs to be more intelligent before the
1132 		     * message is useful
1133 		     */
1134 		    if (match_state[ifdef_level].tos >= 0
1135 			  && bcmp(&ps, &match_state[ifdef_level], sizeof ps))
1136 			diag(0, "Syntactically inconsistent #ifdef alternatives.");
1137 #endif
1138 		}
1139 		if (blanklines_around_conditional_compilation) {
1140 		    postfix_blankline_requested++;
1141 		    n_real_blanklines = 0;
1142 		}
1143 	    }
1144 	    break;		/* subsequent processing of the newline
1145 				 * character will cause the line to be printed */
1146 
1147 	case comment:		/* we have gotten a comment this is a biggie */
1148 	    if (flushed_nl) {	/* we should force a broken line here */
1149 		flushed_nl = false;
1150 		dump_line();
1151 		ps.want_blank = false;	/* dont insert blank at line start */
1152 		force_nl = false;
1153 	    }
1154 	    pr_comment();
1155 	    break;
1156 	}			/* end of big switch stmt */
1157 
1158 	*e_code = '\0';		/* make sure code section is null terminated */
1159 	if (type_code != comment && type_code != newline && type_code != preesc)
1160 	    ps.last_token = type_code;
1161     }				/* end of main while (1) loop */
1162 }
1163 
1164 /*
1165  * copy input file to backup file if in_name is /blah/blah/blah/file, then
1166  * backup file will be ".Bfile" then make the backup file the input and
1167  * original input file the output
1168  */
1169 void
1170 bakcopy(void)
1171 {
1172     int         n,
1173                 bakchn;
1174     char        buff[8 * 1024];
1175     char       *p;
1176 
1177     /* construct file name .Bfile */
1178     for (p = in_name; *p; p++);	/* skip to end of string */
1179     while (p > in_name && *p != '/')	/* find last '/' */
1180 	p--;
1181     if (*p == '/')
1182 	p++;
1183     if (snprintf(bakfile, PATH_MAX, "%s.BAK", p) >= PATH_MAX)
1184 	    errc(1, ENAMETOOLONG, "%s.BAK", p);
1185 
1186     /* copy in_name to backup file */
1187     bakchn = open(bakfile, O_CREAT | O_TRUNC | O_WRONLY, 0600);
1188     if (bakchn == -1)
1189 	err(1, "%s", bakfile);
1190     while ((n = read(fileno(input), buff, sizeof buff)) > 0)
1191 	if (write(bakchn, buff, n) != n)
1192 	    err(1, "%s", bakfile);
1193     if (n == -1)
1194 	err(1, "%s", in_name);
1195     close(bakchn);
1196     fclose(input);
1197 
1198     /* re-open backup file as the input file */
1199     input = fopen(bakfile, "r");
1200     if (input == NULL)
1201 	err(1, "%s", bakfile);
1202     /* now the original input file will be the output */
1203     output = fopen(in_name, "w");
1204     if (output == NULL) {
1205 	int saved_errno = errno;
1206 	unlink(bakfile);
1207 	errc(1, saved_errno, "%s", in_name);
1208     }
1209 }
1210