1 /* $OpenBSD: indent.c,v 1.33 2022/12/26 19:16:01 jmc Exp $ */
2
3 /*
4 * Copyright (c) 1980, 1993
5 * The Regents of the University of California.
6 * Copyright (c) 1976 Board of Trustees of the University of Illinois.
7 * Copyright (c) 1985 Sun Microsystems, Inc.
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 #include <fcntl.h>
36 #include <unistd.h>
37 #include <limits.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include "indent_globs.h"
42 #include "indent_codes.h"
43 #include <ctype.h>
44 #include <errno.h>
45 #include <err.h>
46
47 char *in_name = "Standard Input"; /* will always point to name of input
48 * file */
49 char *out_name = "Standard Output"; /* will always point to name
50 * of output file */
51 char bakfile[PATH_MAX] = "";
52
53 FILE *input; /* the fid for the input file */
54 FILE *output; /* the output file */
55
56 char *labbuf; /* buffer for label */
57 char *s_lab; /* start ... */
58 char *e_lab; /* .. and end of stored label */
59 char *l_lab; /* limit of label buffer */
60
61 char *codebuf; /* buffer for code section */
62 char *s_code; /* start ... */
63 char *e_code; /* .. and end of stored code */
64 char *l_code; /* limit of code section */
65
66 char *combuf; /* buffer for comments */
67 char *s_com; /* start ... */
68 char *e_com; /* ... and end of stored comments */
69 char *l_com; /* limit of comment buffer */
70
71 char *tokenbuf; /* the last token scanned */
72 char *s_token;
73 char *e_token;
74 char *l_token;
75
76 char *in_buffer; /* input buffer */
77 char *in_buffer_limit; /* the end of the input buffer */
78 char *buf_ptr; /* ptr to next character to be taken from
79 * in_buffer */
80 char *buf_end; /* ptr to first after last char in in_buffer */
81
82 char save_com[sc_size]; /* input text is saved here when looking for
83 * the brace after an if, while, etc */
84 char *sc_end; /* pointer into save_com buffer */
85
86 char *bp_save; /* saved value of buf_ptr when taking input
87 * from save_com */
88 char *be_save; /* similarly saved value of buf_end */
89
90 int pointer_as_binop;
91 int blanklines_after_declarations;
92 int blanklines_before_blockcomments;
93 int blanklines_after_procs;
94 int blanklines_around_conditional_compilation;
95 int swallow_optional_blanklines;
96 int n_real_blanklines;
97 int prefix_blankline_requested;
98 int postfix_blankline_requested;
99 int break_comma; /* when true and not in parens, break after a
100 * comma */
101 int btype_2; /* when true, brace should be on same line as
102 * if, while, etc */
103 float case_ind; /* indentation level to be used for a "case
104 * n:" */
105 int code_lines; /* count of lines with code */
106 int had_eof; /* set to true when input is exhausted */
107 int line_no; /* the current line number. */
108 int max_col; /* the maximum allowable line length */
109 int verbose; /* when true, non-essential error messages are
110 * printed */
111 int cuddle_else; /* true if else should cuddle up to '}' */
112 int star_comment_cont; /* true iff comment continuation lines should
113 * have stars at the beginning of each line. */
114 int comment_delimiter_on_blankline;
115 int troff; /* true iff were generating troff input */
116 int procnames_start_line; /* if true, the names of procedures
117 * being defined get placed in column
118 * 1 (ie. a newline is placed between
119 * the type of the procedure and its
120 * name) */
121 int proc_calls_space; /* If true, procedure calls look like:
122 * foo(bar) rather than foo (bar) */
123 int format_col1_comments; /* If comments which start in column 1
124 * are to be magically reformatted
125 * (just like comments that begin in
126 * later columns) */
127 int inhibit_formatting; /* true if INDENT OFF is in effect */
128 int suppress_blanklines;/* set iff following blanklines should be
129 * suppressed */
130 int continuation_indent;/* set to the indentation between the edge of
131 * code and continuation lines */
132 int lineup_to_parens; /* if true, continued code within parens will
133 * be lined up to the open paren */
134 int Bill_Shannon; /* true iff a blank should always be inserted
135 * after sizeof */
136 int blanklines_after_declarations_at_proctop; /* This is vaguely
137 * similar to
138 * blanklines_after_decla
139 * rations except that
140 * it only applies to
141 * the first set of
142 * declarations in a
143 * procedure (just after
144 * the first '{') and it
145 * causes a blank line
146 * to be generated even
147 * if there are no
148 * declarations */
149 int block_comment_max_col;
150 int extra_expression_indent; /* True if continuation lines from the
151 * expression part of "if(e)",
152 * "while(e)", "for(e;e;e)" should be
153 * indented an extra tab stop so that
154 * they don't conflict with the code
155 * that follows */
156 int use_tabs; /* set true to use tabs for spacing,
157 * false uses all spaces */
158
159 /* -troff font state information */
160
161 struct fstate
162 keywordf, /* keyword font */
163 stringf, /* string font */
164 boxcomf, /* Box comment font */
165 blkcomf, /* Block comment font */
166 scomf, /* Same line comment font */
167 bodyf; /* major body font */
168
169 struct parser_state ps;
170
171 int ifdef_level;
172 int rparen_count;
173 struct parser_state state_stack[5];
174 struct parser_state match_state[5];
175
176
177 void bakcopy(void);
178
179 int
main(int argc,char ** argv)180 main(int argc, char **argv)
181 {
182
183 extern int found_err; /* flag set in diag() on error */
184 int dec_ind; /* current indentation for declarations */
185 int di_stack[20]; /* a stack of structure indentation levels */
186 int flushed_nl; /* used when buffering up comments to remember
187 * that a newline was passed over */
188 int force_nl; /* when true, code must be broken */
189 int hd_type; /* used to store type of stmt for if (...),
190 * for (...), etc */
191 int i; /* local loop counter */
192 int scase; /* set to true when we see a case, so we will
193 * know what to do with the following colon */
194 int sp_sw; /* when true, we are in the expressin of
195 * if(...), while(...), etc. */
196 int squest; /* when this is positive, we have seen a ?
197 * without the matching : in a <c>?<s>:<s>
198 * construct */
199 char *t_ptr; /* used for copying tokens */
200 int tabs_to_var; /* true if using tabs to indent to var name */
201 int type_code; /* the type of token, returned by lexi */
202
203 int last_else = 0; /* true iff last keyword was an else */
204
205 if (pledge("stdio rpath wpath cpath", NULL) == -1)
206 err(1, "pledge");
207
208 /*-----------------------------------------------*\
209 | INITIALIZATION |
210 \*-----------------------------------------------*/
211
212
213 hd_type = 0;
214 ps.p_stack[0] = stmt; /* this is the parser's stack */
215 ps.last_nl = true; /* this is true if the last thing scanned was
216 * a newline */
217 ps.last_token = semicolon;
218 combuf = malloc(bufsize);
219 labbuf = malloc(bufsize);
220 codebuf = malloc(bufsize);
221 tokenbuf = malloc(bufsize);
222 if (combuf == NULL || labbuf == NULL || codebuf == NULL ||
223 tokenbuf == NULL)
224 err(1, NULL);
225 l_com = combuf + bufsize - 5;
226 l_lab = labbuf + bufsize - 5;
227 l_code = codebuf + bufsize - 5;
228 l_token = tokenbuf + bufsize - 5;
229 combuf[0] = codebuf[0] = labbuf[0] = ' '; /* set up code, label, and
230 * comment buffers */
231 combuf[1] = codebuf[1] = labbuf[1] = '\0';
232 ps.else_if = 1; /* Default else-if special processing to on */
233 s_lab = e_lab = labbuf + 1;
234 s_code = e_code = codebuf + 1;
235 s_com = e_com = combuf + 1;
236 s_token = e_token = tokenbuf + 1;
237
238 in_buffer = malloc(10);
239 if (in_buffer == NULL)
240 err(1, NULL);
241 in_buffer_limit = in_buffer + 8;
242 buf_ptr = buf_end = in_buffer;
243 line_no = 1;
244 had_eof = ps.in_decl = ps.decl_on_line = break_comma = false;
245 sp_sw = force_nl = false;
246 ps.in_or_st = false;
247 ps.bl_line = true;
248 dec_ind = 0;
249 di_stack[ps.dec_nest = 0] = 0;
250 ps.want_blank = ps.in_stmt = ps.ind_stmt = false;
251
252
253 scase = ps.pcase = false;
254 squest = 0;
255 sc_end = 0;
256 bp_save = 0;
257 be_save = 0;
258
259 output = 0;
260
261
262
263 /*--------------------------------------------------*\
264 | COMMAND LINE SCAN |
265 \*--------------------------------------------------*/
266
267 #ifdef undef
268 max_col = 78; /* -l78 */
269 lineup_to_parens = 1; /* -lp */
270 ps.ljust_decl = 0; /* -ndj */
271 ps.com_ind = 33; /* -c33 */
272 star_comment_cont = 1; /* -sc */
273 ps.ind_size = 8; /* -i8 */
274 verbose = 0;
275 ps.decl_indent = 16; /* -di16 */
276 ps.indent_parameters = 1; /* -ip */
277 ps.decl_com_ind = 0; /* if this is not set to some positive value
278 * by an arg, we will set this equal to
279 * ps.com_ind */
280 btype_2 = 1; /* -br */
281 cuddle_else = 1; /* -ce */
282 ps.unindent_displace = 0; /* -d0 */
283 ps.case_indent = 0; /* -cli0 */
284 format_col1_comments = 1; /* -fc1 */
285 procnames_start_line = 1; /* -psl */
286 proc_calls_space = 0; /* -npcs */
287 comment_delimiter_on_blankline = 1; /* -cdb */
288 ps.leave_comma = 1; /* -nbc */
289 #endif
290
291 for (i = 1; i < argc; ++i)
292 if (strcmp(argv[i], "-npro") == 0)
293 break;
294 set_defaults();
295 if (i >= argc)
296 set_profile();
297
298 for (i = 1; i < argc; ++i) {
299
300 /*
301 * look thru args (if any) for changes to defaults
302 */
303 if (argv[i][0] != '-') {/* no flag on parameter */
304 if (input == 0) { /* we must have the input file */
305 in_name = argv[i]; /* remember name of input file */
306 input = fopen(in_name, "r");
307 if (input == NULL) /* check for open error */
308 err(1, "%s", in_name);
309 continue;
310 }
311 else if (output == 0) { /* we have the output file */
312 out_name = argv[i]; /* remember name of output file */
313 if (strcmp(in_name, out_name) == 0) /* attempt to overwrite
314 * the file */
315 errx(1, "input and output files must be different");
316 output = fopen(out_name, "w");
317 if (output == NULL) /* check for create error */
318 err(1, "%s", out_name);
319 continue;
320 }
321 errx(1, "unknown parameter: %s", argv[i]);
322 }
323 else
324 set_option(argv[i]);
325 } /* end of for */
326 if (input == NULL) {
327 input = stdin;
328 }
329 if (output == NULL) {
330 if (troff || input == stdin)
331 output = stdout;
332 else {
333 out_name = in_name;
334 bakcopy();
335 }
336 }
337 if (ps.com_ind <= 1)
338 ps.com_ind = 2; /* dont put normal comments before column 2 */
339 if (troff) {
340 if (bodyf.font[0] == 0)
341 parsefont(&bodyf, "R");
342 if (scomf.font[0] == 0)
343 parsefont(&scomf, "I");
344 if (blkcomf.font[0] == 0)
345 blkcomf = scomf, blkcomf.size += 2;
346 if (boxcomf.font[0] == 0)
347 boxcomf = blkcomf;
348 if (stringf.font[0] == 0)
349 parsefont(&stringf, "L");
350 if (keywordf.font[0] == 0)
351 parsefont(&keywordf, "B");
352 writefdef(&bodyf, 'B');
353 writefdef(&scomf, 'C');
354 writefdef(&blkcomf, 'L');
355 writefdef(&boxcomf, 'X');
356 writefdef(&stringf, 'S');
357 writefdef(&keywordf, 'K');
358 }
359 if (block_comment_max_col <= 0)
360 block_comment_max_col = max_col;
361 if (ps.decl_com_ind <= 0) /* if not specified by user, set this */
362 ps.decl_com_ind = ps.ljust_decl ? (ps.com_ind <= 10 ? 2 : ps.com_ind - 8) : ps.com_ind;
363 if (continuation_indent == 0)
364 continuation_indent = ps.ind_size;
365 fill_buffer(); /* get first batch of stuff into input buffer */
366
367 parse(semicolon);
368 {
369 char *p = buf_ptr;
370 int col = 1;
371
372 while (1) {
373 if (*p == ' ')
374 col++;
375 else if (*p == '\t')
376 col = ((col - 1) & ~7) + 9;
377 else
378 break;
379 p++;
380 }
381 if (col > ps.ind_size)
382 ps.ind_level = ps.i_l_follow = col / ps.ind_size;
383 }
384 if (troff) {
385 char *p = in_name,
386 *beg = in_name;
387
388 while (*p)
389 if (*p++ == '/')
390 beg = p;
391 fprintf(output, ".Fn \"%s\"\n", beg);
392 }
393 /*
394 * START OF MAIN LOOP
395 */
396
397 while (1) { /* this is the main loop. it will go until we
398 * reach eof */
399 int is_procname;
400
401 type_code = lexi(); /* lexi reads one token. The actual
402 * characters read are stored in "token". lexi
403 * returns a code indicating the type of token */
404 is_procname = ps.procname[0];
405
406 /*
407 * The following code moves everything following an if (), while (),
408 * else, etc. up to the start of the following stmt to a buffer. This
409 * allows proper handling of both kinds of brace placement.
410 */
411
412 flushed_nl = false;
413 while (ps.search_brace) { /* if we scanned an if(), while(),
414 * etc., we might need to copy stuff
415 * into a buffer we must loop, copying
416 * stuff into save_com, until we find
417 * the start of the stmt which follows
418 * the if, or whatever */
419 switch (type_code) {
420 case newline:
421 ++line_no;
422 flushed_nl = true;
423 case form_feed:
424 break; /* form feeds and newlines found here will be
425 * ignored */
426
427 case lbrace: /* this is a brace that starts the compound
428 * stmt */
429 if (sc_end == 0) { /* ignore buffering if a comment wasnt
430 * stored up */
431 ps.search_brace = false;
432 goto check_type;
433 }
434 if (btype_2) {
435 save_com[0] = '{'; /* we either want to put the brace
436 * right after the if */
437 goto sw_buffer; /* go to common code to get out of
438 * this loop */
439 }
440 case comment: /* we have a comment, so we must copy it into
441 * the buffer */
442 if (!flushed_nl || sc_end != 0) {
443 if (sc_end == 0) { /* if this is the first comment, we
444 * must set up the buffer */
445 save_com[0] = save_com[1] = ' ';
446 sc_end = &(save_com[2]);
447 }
448 else {
449 *sc_end++ = '\n'; /* add newline between
450 * comments */
451 *sc_end++ = ' ';
452 --line_no;
453 }
454 *sc_end++ = '/'; /* copy in start of comment */
455 *sc_end++ = '*';
456
457 for (;;) { /* loop until we get to the end of the comment */
458 *sc_end = *buf_ptr++;
459 if (buf_ptr >= buf_end)
460 fill_buffer();
461
462 if (*sc_end++ == '*' && *buf_ptr == '/')
463 break; /* we are at end of comment */
464
465 if (sc_end >= &(save_com[sc_size])) { /* check for temp buffer
466 * overflow */
467 diag(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever.");
468 fflush(output);
469 exit(1);
470 }
471 }
472 *sc_end++ = '/'; /* add ending slash */
473 if (++buf_ptr >= buf_end) /* get past / in buffer */
474 fill_buffer();
475 break;
476 }
477 default: /* it is the start of a normal statement */
478 if (flushed_nl) /* if we flushed a newline, make sure it is
479 * put back */
480 force_nl = true;
481 if ((type_code == sp_paren && *token == 'i'
482 && last_else && ps.else_if) ||
483 (type_code == sp_nparen && *token == 'e'
484 && e_code != s_code && e_code[-1] == '}'))
485 force_nl = false;
486
487 if (sc_end == 0) { /* ignore buffering if comment wasnt
488 * saved up */
489 ps.search_brace = false;
490 goto check_type;
491 }
492 if (force_nl) { /* if we should insert a nl here, put it into
493 * the buffer */
494 force_nl = false;
495 --line_no; /* this will be re-increased when the nl is
496 * read from the buffer */
497 *sc_end++ = '\n';
498 *sc_end++ = ' ';
499 if (verbose && !flushed_nl) /* print error msg if the line
500 * was not already broken */
501 diag(0, "Line broken");
502 flushed_nl = false;
503 }
504 for (t_ptr = token; *t_ptr; ++t_ptr)
505 *sc_end++ = *t_ptr; /* copy token into temp buffer */
506 ps.procname[0] = 0;
507
508 sw_buffer:
509 ps.search_brace = false; /* stop looking for start of
510 * stmt */
511 bp_save = buf_ptr; /* save current input buffer */
512 be_save = buf_end;
513 buf_ptr = save_com; /* fix so that subsequent calls to
514 * lexi will take tokens out of
515 * save_com */
516 *sc_end++ = ' ';/* add trailing blank, just in case */
517 buf_end = sc_end;
518 sc_end = 0;
519 break;
520 } /* end of switch */
521 if (type_code != 0) /* we must make this check, just in case there
522 * was an unexpected EOF */
523 type_code = lexi(); /* read another token */
524 /* if (ps.search_brace) ps.procname[0] = 0; */
525 if ((is_procname = ps.procname[0]) && flushed_nl
526 && !procnames_start_line && ps.in_decl
527 && type_code == ident)
528 flushed_nl = 0;
529 } /* end of while (search_brace) */
530 last_else = 0;
531 check_type:
532 if (type_code == 0) { /* we got eof */
533 if (s_lab != e_lab || s_code != e_code
534 || s_com != e_com) /* must dump end of line */
535 dump_line();
536 if (ps.tos > 1) /* check for balanced braces */
537 diag(1, "Missing braces at end of file.");
538
539 if (verbose) {
540 printf("There were %d output lines and %d comments\n",
541 ps.out_lines, ps.out_coms);
542 printf("(Lines with comments)/(Lines with code): %6.3f\n",
543 (1.0 * ps.com_lines) / code_lines);
544 }
545 fflush(output);
546 exit(found_err);
547 }
548 if (
549 (type_code != comment) &&
550 (type_code != newline) &&
551 (type_code != preesc) &&
552 (type_code != form_feed)) {
553 if (force_nl &&
554 (type_code != semicolon) &&
555 (type_code != lbrace || !btype_2)) {
556 /* we should force a broken line here */
557 if (verbose && !flushed_nl)
558 diag(0, "Line broken");
559 flushed_nl = false;
560 dump_line();
561 ps.want_blank = false; /* dont insert blank at line start */
562 force_nl = false;
563 }
564 ps.in_stmt = true; /* turn on flag which causes an extra level of
565 * indentation. this is turned off by a ; or
566 * '}' */
567 if (s_com != e_com) { /* the turkey has embedded a comment
568 * in a line. fix it */
569 *e_code++ = ' ';
570 for (t_ptr = s_com; *t_ptr; ++t_ptr) {
571 CHECK_SIZE_CODE;
572 *e_code++ = *t_ptr;
573 }
574 *e_code++ = ' ';
575 *e_code = '\0'; /* null terminate code sect */
576 ps.want_blank = false;
577 e_com = s_com;
578 }
579 }
580 else if (type_code != comment) /* preserve force_nl thru a comment */
581 force_nl = false; /* cancel forced newline after newline, form
582 * feed, etc */
583
584
585
586 /*-----------------------------------------------------*\
587 | do switch on type of token scanned |
588 \*-----------------------------------------------------*/
589 CHECK_SIZE_CODE;
590 switch (type_code) { /* now, decide what to do with the token */
591
592 case form_feed: /* found a form feed in line */
593 ps.use_ff = true; /* a form feed is treated much like a newline */
594 dump_line();
595 ps.want_blank = false;
596 break;
597
598 case newline:
599 if (ps.last_token != comma || ps.p_l_follow > 0
600 || !ps.leave_comma || ps.block_init || !break_comma || s_com != e_com) {
601 dump_line();
602 ps.want_blank = false;
603 }
604 ++line_no; /* keep track of input line number */
605 break;
606
607 case lparen: /* got a '(' or '[' */
608 ++ps.p_l_follow; /* count parens to make Healy happy */
609 if (ps.want_blank && *token != '[' &&
610 (ps.last_token != ident || proc_calls_space
611 || (ps.its_a_keyword && (!ps.sizeof_keyword || Bill_Shannon))))
612 *e_code++ = ' ';
613 if (ps.in_decl && !ps.block_init)
614 if (troff && !ps.dumped_decl_indent && !is_procname && ps.last_token == decl) {
615 ps.dumped_decl_indent = 1;
616 snprintf(e_code, (l_code - e_code) + 5,
617 "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token);
618 e_code += strlen(e_code);
619 CHECK_SIZE_CODE;
620 }
621 else {
622 while ((e_code - s_code) < dec_ind) {
623 CHECK_SIZE_CODE;
624 *e_code++ = ' ';
625 }
626 *e_code++ = token[0];
627 }
628 else
629 *e_code++ = token[0];
630 ps.paren_indents[ps.p_l_follow - 1] = e_code - s_code;
631 if (sp_sw && ps.p_l_follow == 1 && extra_expression_indent
632 && ps.paren_indents[0] < 2 * ps.ind_size)
633 ps.paren_indents[0] = 2 * ps.ind_size;
634 ps.want_blank = false;
635 if (ps.in_or_st && *token == '(' && ps.tos <= 2) {
636 /*
637 * this is a kluge to make sure that declarations will be
638 * aligned right if proc decl has an explicit type on it, i.e.
639 * "int a(x) {..."
640 */
641 parse(semicolon); /* I said this was a kluge... */
642 ps.in_or_st = false; /* turn off flag for structure decl or
643 * initialization */
644 }
645 if (ps.sizeof_keyword)
646 ps.sizeof_mask |= 1 << ps.p_l_follow;
647 break;
648
649 case rparen: /* got a ')' or ']' */
650 rparen_count--;
651 if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.sizeof_mask) {
652 ps.last_u_d = true;
653 ps.cast_mask &= (1 << ps.p_l_follow) - 1;
654 }
655 ps.sizeof_mask &= (1 << ps.p_l_follow) - 1;
656 if (--ps.p_l_follow < 0) {
657 ps.p_l_follow = 0;
658 diag(0, "Extra %c", *token);
659 }
660 if (e_code == s_code) /* if the paren starts the line */
661 ps.paren_level = ps.p_l_follow; /* then indent it */
662
663 *e_code++ = token[0];
664 ps.want_blank = true;
665
666 if (sp_sw && (ps.p_l_follow == 0)) { /* check for end of if
667 * (...), or some such */
668 sp_sw = false;
669 force_nl = true;/* must force newline after if */
670 ps.last_u_d = true; /* inform lexi that a following
671 * operator is unary */
672 ps.in_stmt = false; /* dont use stmt continuation
673 * indentation */
674
675 parse(hd_type); /* let parser worry about if, or whatever */
676 }
677 ps.search_brace = btype_2; /* this should insure that constructs
678 * such as main(){...} and int[]{...}
679 * have their braces put in the right
680 * place */
681 break;
682
683 case unary_op: /* this could be any unary operation */
684 if (ps.want_blank)
685 *e_code++ = ' ';
686
687 if (troff && !ps.dumped_decl_indent && ps.in_decl && !is_procname) {
688 snprintf(e_code, (l_code - e_code) + 5,
689 "\n.Du %dp+\200p \"%s\"\n", dec_ind * 7, token);
690 ps.dumped_decl_indent = 1;
691 e_code += strlen(e_code);
692 CHECK_SIZE_CODE;
693 }
694 else {
695 char *res = token;
696
697 if (ps.in_decl && !ps.block_init) { /* if this is a unary op
698 * in a declaration, we
699 * should indent this
700 * token */
701 for (i = 0; token[i]; ++i); /* find length of token */
702 while ((e_code - s_code) < (dec_ind - i)) {
703 CHECK_SIZE_CODE;
704 *e_code++ = ' '; /* pad it */
705 }
706 }
707 if (troff && token[0] == '-' && token[1] == '>')
708 res = "\\(->";
709 for (t_ptr = res; *t_ptr; ++t_ptr) {
710 CHECK_SIZE_CODE;
711 *e_code++ = *t_ptr;
712 }
713 }
714 ps.want_blank = false;
715 break;
716
717 case binary_op: /* any binary operation */
718 if (ps.want_blank)
719 *e_code++ = ' ';
720 {
721 char *res = token;
722
723 if (troff)
724 switch (token[0]) {
725 case '<':
726 if (token[1] == '=')
727 res = "\\(<=";
728 break;
729 case '>':
730 if (token[1] == '=')
731 res = "\\(>=";
732 break;
733 case '!':
734 if (token[1] == '=')
735 res = "\\(!=";
736 break;
737 case '|':
738 if (token[1] == '|')
739 res = "\\(br\\(br";
740 else if (token[1] == 0)
741 res = "\\(br";
742 break;
743 }
744 for (t_ptr = res; *t_ptr; ++t_ptr) {
745 CHECK_SIZE_CODE;
746 *e_code++ = *t_ptr; /* move the operator */
747 }
748 }
749 ps.want_blank = true;
750 break;
751
752 case postop: /* got a trailing ++ or -- */
753 *e_code++ = token[0];
754 *e_code++ = token[1];
755 ps.want_blank = true;
756 break;
757
758 case question: /* got a ? */
759 squest++; /* this will be used when a later colon
760 * appears so we can distinguish the
761 * <c>?<n>:<n> construct */
762 if (ps.want_blank)
763 *e_code++ = ' ';
764 *e_code++ = '?';
765 ps.want_blank = true;
766 break;
767
768 case casestmt: /* got word 'case' or 'default' */
769 scase = true; /* so we can process the later colon properly */
770 goto copy_id;
771
772 case colon: /* got a ':' */
773 if (squest > 0) { /* it is part of the <c>?<n>: <n> construct */
774 --squest;
775 if (ps.want_blank)
776 *e_code++ = ' ';
777 *e_code++ = ':';
778 ps.want_blank = true;
779 break;
780 }
781 if (ps.in_decl) {
782 *e_code++ = ':';
783 ps.want_blank = false;
784 break;
785 }
786 ps.in_stmt = false; /* seeing a label does not imply we are in a
787 * stmt */
788 for (t_ptr = s_code; *t_ptr; ++t_ptr)
789 *e_lab++ = *t_ptr; /* turn everything so far into a label */
790 e_code = s_code;
791 *e_lab++ = ':';
792 *e_lab++ = ' ';
793 *e_lab = '\0';
794
795 force_nl = ps.pcase = scase; /* ps.pcase will be used by
796 * dump_line to decide how to
797 * indent the label. force_nl
798 * will force a case n: to be
799 * on a line by itself */
800 scase = false;
801 ps.want_blank = false;
802 break;
803
804 case semicolon: /* got a ';' */
805 ps.in_or_st = false;/* we are not in an initialization or
806 * structure declaration */
807 scase = false; /* these will only need resetting in a error */
808 squest = 0;
809 if (ps.last_token == rparen && rparen_count == 0)
810 ps.in_parameter_declaration = 0;
811 ps.cast_mask = 0;
812 ps.sizeof_mask = 0;
813 ps.block_init = 0;
814 ps.block_init_level = 0;
815 ps.just_saw_decl--;
816
817 if (ps.in_decl && s_code == e_code && !ps.block_init)
818 while ((e_code - s_code) < (dec_ind - 1)) {
819 CHECK_SIZE_CODE;
820 *e_code++ = ' ';
821 }
822
823 ps.in_decl = (ps.dec_nest > 0); /* if we were in a first level
824 * structure declaration, we
825 * arent any more */
826
827 if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) {
828
829 /*
830 * This should be true iff there were unbalanced parens in the
831 * stmt. It is a bit complicated, because the semicolon might
832 * be in a for stmt
833 */
834 diag(1, "Unbalanced parens");
835 ps.p_l_follow = 0;
836 if (sp_sw) { /* this is a check for a if, while, etc. with
837 * unbalanced parens */
838 sp_sw = false;
839 parse(hd_type); /* dont lose the if, or whatever */
840 }
841 }
842 *e_code++ = ';';
843 ps.want_blank = true;
844 ps.in_stmt = (ps.p_l_follow > 0); /* we are no longer in the
845 * middle of a stmt */
846
847 if (!sp_sw) { /* if not if for (;;) */
848 parse(semicolon); /* let parser know about end of stmt */
849 force_nl = true;/* force newline after a end of stmt */
850 }
851 break;
852
853 case lbrace: /* got a '{' */
854 ps.in_stmt = false; /* dont indent the {} */
855 if (!ps.block_init)
856 force_nl = true;/* force other stuff on same line as '{' onto
857 * new line */
858 else if (ps.block_init_level <= 0)
859 ps.block_init_level = 1;
860 else
861 ps.block_init_level++;
862
863 if (s_code != e_code && !ps.block_init) {
864 if (!btype_2) {
865 dump_line();
866 ps.want_blank = false;
867 }
868 else if (ps.in_parameter_declaration && !ps.in_or_st) {
869 ps.i_l_follow = 0;
870 dump_line();
871 ps.want_blank = false;
872 }
873 }
874 if (ps.in_parameter_declaration)
875 prefix_blankline_requested = 0;
876
877 if (ps.p_l_follow > 0) { /* check for preceding unbalanced
878 * parens */
879 diag(1, "Unbalanced parens");
880 ps.p_l_follow = 0;
881 if (sp_sw) { /* check for unclosed if, for, etc. */
882 sp_sw = false;
883 parse(hd_type);
884 ps.ind_level = ps.i_l_follow;
885 }
886 }
887 if (s_code == e_code)
888 ps.ind_stmt = false; /* dont put extra indentation on line
889 * with '{' */
890 if (ps.in_decl && ps.in_or_st) { /* this is either a structure
891 * declaration or an init */
892 di_stack[ps.dec_nest++] = dec_ind;
893 /* ? dec_ind = 0; */
894 }
895 else {
896 ps.decl_on_line = false;
897 /* we can't be in the middle of a declaration, so don't do
898 * special indentation of comments */
899 if (blanklines_after_declarations_at_proctop
900 && ps.in_parameter_declaration)
901 postfix_blankline_requested = 1;
902 ps.in_parameter_declaration = 0;
903 }
904 dec_ind = 0;
905 parse(lbrace); /* let parser know about this */
906 if (ps.want_blank) /* put a blank before '{' if '{' is not at
907 * start of line */
908 *e_code++ = ' ';
909 ps.want_blank = false;
910 *e_code++ = '{';
911 ps.just_saw_decl = 0;
912 break;
913
914 case rbrace: /* got a '}' */
915 if (ps.p_stack[ps.tos] == decl && !ps.block_init) /* semicolons can be
916 * omitted in
917 * declarations */
918 parse(semicolon);
919 if (ps.p_l_follow) {/* check for unclosed if, for, else. */
920 diag(1, "Unbalanced parens");
921 ps.p_l_follow = 0;
922 sp_sw = false;
923 }
924 ps.just_saw_decl = 0;
925 ps.block_init_level--;
926 if (s_code != e_code && !ps.block_init) { /* '}' must be first on
927 * line */
928 if (verbose)
929 diag(0, "Line broken");
930 dump_line();
931 }
932 *e_code++ = '}';
933 ps.want_blank = true;
934 ps.in_stmt = ps.ind_stmt = false;
935 if (ps.dec_nest > 0) { /* we are in multi-level structure
936 * declaration */
937 dec_ind = di_stack[--ps.dec_nest];
938 if (ps.dec_nest == 0 && !ps.in_parameter_declaration)
939 ps.just_saw_decl = 2;
940 ps.in_decl = true;
941 }
942 prefix_blankline_requested = 0;
943 parse(rbrace); /* let parser know about this */
944 ps.search_brace = cuddle_else && ps.p_stack[ps.tos] == ifhead
945 && ps.il[ps.tos] >= ps.ind_level;
946 if (ps.tos <= 1 && blanklines_after_procs && ps.dec_nest <= 0)
947 postfix_blankline_requested = 1;
948 break;
949
950 case swstmt: /* got keyword "switch" */
951 sp_sw = true;
952 hd_type = swstmt; /* keep this for when we have seen the
953 * expression */
954 goto copy_id; /* go move the token into buffer */
955
956 case sp_paren: /* token is if, while, for */
957 sp_sw = true; /* the interesting stuff is done after the
958 * expression is scanned */
959 hd_type = (*token == 'i' ? ifstmt :
960 (*token == 'w' ? whilestmt : forstmt));
961
962 /*
963 * remember the type of header for later use by parser
964 */
965 goto copy_id; /* copy the token into line */
966
967 case sp_nparen: /* got else, do */
968 ps.in_stmt = false;
969 if (*token == 'e') {
970 if (e_code != s_code && (!cuddle_else || e_code[-1] != '}')) {
971 if (verbose)
972 diag(0, "Line broken");
973 dump_line();/* make sure this starts a line */
974 ps.want_blank = false;
975 }
976 force_nl = true;/* also, following stuff must go onto new line */
977 last_else = 1;
978 parse(elselit);
979 }
980 else {
981 if (e_code != s_code) { /* make sure this starts a line */
982 if (verbose)
983 diag(0, "Line broken");
984 dump_line();
985 ps.want_blank = false;
986 }
987 force_nl = true;/* also, following stuff must go onto new line */
988 last_else = 0;
989 parse(dolit);
990 }
991 goto copy_id; /* move the token into line */
992
993 case decl: /* we have a declaration type (int, register,
994 * etc.) */
995 parse(decl); /* let parser worry about indentation */
996 if (ps.last_token == rparen && ps.tos <= 1) {
997 ps.in_parameter_declaration = 1;
998 if (s_code != e_code) {
999 dump_line();
1000 ps.want_blank = 0;
1001 }
1002 }
1003 if (ps.in_parameter_declaration && ps.indent_parameters && ps.dec_nest == 0) {
1004 ps.ind_level = ps.i_l_follow = 1;
1005 ps.ind_stmt = 0;
1006 }
1007 ps.in_or_st = true; /* this might be a structure or initialization
1008 * declaration */
1009 ps.in_decl = ps.decl_on_line = true;
1010 if ( /* !ps.in_or_st && */ ps.dec_nest <= 0)
1011 ps.just_saw_decl = 2;
1012 prefix_blankline_requested = 0;
1013 for (i = 0; token[i++];); /* get length of token */
1014
1015 /*
1016 * dec_ind = e_code - s_code + (ps.decl_indent>i ? ps.decl_indent
1017 * : i);
1018 */
1019 dec_ind = ps.decl_indent > 0 ? ps.decl_indent : i;
1020 tabs_to_var = (use_tabs ? ps.decl_indent > 0 : 0);
1021 goto copy_id;
1022
1023 case ident: /* got an identifier or constant */
1024 if (ps.in_decl) { /* if we are in a declaration, we must indent
1025 * identifier */
1026 if (ps.want_blank)
1027 *e_code++ = ' ';
1028 ps.want_blank = false;
1029 if (is_procname == 0 || !procnames_start_line) {
1030 if (!ps.block_init) {
1031 if (troff && !ps.dumped_decl_indent) {
1032 snprintf(e_code, (l_code - e_code) + 5,
1033 "\n.De %dp+\200p\n", dec_ind * 7);
1034 ps.dumped_decl_indent = 1;
1035 e_code += strlen(e_code);
1036 CHECK_SIZE_CODE;
1037 } else {
1038 int cur_dec_ind;
1039 int pos, startpos;
1040
1041 /*
1042 * in order to get the tab math right for
1043 * indentations that are not multiples of 8 we
1044 * need to modify both startpos and dec_ind
1045 * (cur_dec_ind) here by eight minus the
1046 * remainder of the current starting column
1047 * divided by eight. This seems to be a
1048 * properly working fix
1049 */
1050 startpos = e_code - s_code;
1051 cur_dec_ind = dec_ind;
1052 pos = startpos;
1053 if ((ps.ind_level * ps.ind_size) % 8 != 0) {
1054 pos += (ps.ind_level * ps.ind_size) % 8;
1055 cur_dec_ind += (ps.ind_level * ps.ind_size) % 8;
1056 }
1057
1058 if (tabs_to_var) {
1059 while ((pos & ~7) + 8 <= cur_dec_ind) {
1060 CHECK_SIZE_CODE;
1061 *e_code++ = '\t';
1062 pos = (pos & ~7) + 8;
1063 }
1064 }
1065 while (pos < cur_dec_ind) {
1066 CHECK_SIZE_CODE;
1067 *e_code++ = ' ';
1068 pos++;
1069 }
1070 if (ps.want_blank && e_code - s_code == startpos)
1071 *e_code++ = ' ';
1072 ps.want_blank = false;
1073 }
1074 }
1075 }
1076 else {
1077 if (dec_ind && s_code != e_code)
1078 dump_line();
1079 dec_ind = 0;
1080 ps.want_blank = false;
1081 }
1082 }
1083 else if (sp_sw && ps.p_l_follow == 0) {
1084 sp_sw = false;
1085 force_nl = true;
1086 ps.last_u_d = true;
1087 ps.in_stmt = false;
1088 parse(hd_type);
1089 }
1090 copy_id:
1091 if (ps.want_blank)
1092 *e_code++ = ' ';
1093 if (troff && ps.its_a_keyword) {
1094 e_code = chfont(&bodyf, &keywordf, e_code);
1095 for (t_ptr = token; *t_ptr; ++t_ptr) {
1096 CHECK_SIZE_CODE;
1097 *e_code++ = keywordf.allcaps &&
1098 islower((unsigned char)*t_ptr) ?
1099 toupper((unsigned char)*t_ptr) : *t_ptr;
1100 }
1101 e_code = chfont(&keywordf, &bodyf, e_code);
1102 }
1103 else
1104 for (t_ptr = token; *t_ptr; ++t_ptr) {
1105 CHECK_SIZE_CODE;
1106 *e_code++ = *t_ptr;
1107 }
1108 ps.want_blank = true;
1109 break;
1110
1111 case period: /* treat a period kind of like a binary
1112 * operation */
1113 *e_code++ = '.'; /* move the period into line */
1114 ps.want_blank = false; /* dont put a blank after a period */
1115 break;
1116
1117 case comma:
1118 ps.want_blank = (s_code != e_code); /* only put blank after comma
1119 * if comma does not start the
1120 * line */
1121 if (ps.in_decl && is_procname == 0 && !ps.block_init)
1122 while ((e_code - s_code) < (dec_ind - 1)) {
1123 CHECK_SIZE_CODE;
1124 *e_code++ = ' ';
1125 }
1126
1127 *e_code++ = ',';
1128 if (ps.p_l_follow == 0) {
1129 if (ps.block_init_level <= 0)
1130 ps.block_init = 0;
1131 if (break_comma && (!ps.leave_comma || compute_code_target() + (e_code - s_code) > max_col - 8))
1132 force_nl = true;
1133 }
1134 break;
1135
1136 case preesc: /* got the character '#' */
1137 if ((s_com != e_com) ||
1138 (s_lab != e_lab) ||
1139 (s_code != e_code))
1140 dump_line();
1141 *e_lab++ = '#'; /* move whole line to 'label' buffer */
1142 {
1143 int in_comment = 0;
1144 int com_start = 0;
1145 char quote = 0;
1146 int com_end = 0;
1147
1148 while (*buf_ptr == ' ' || *buf_ptr == '\t') {
1149 buf_ptr++;
1150 if (buf_ptr >= buf_end)
1151 fill_buffer();
1152 }
1153 while (*buf_ptr != '\n' || (in_comment && !had_eof)) {
1154 CHECK_SIZE_LAB;
1155 *e_lab = *buf_ptr++;
1156 if (buf_ptr >= buf_end)
1157 fill_buffer();
1158 switch (*e_lab++) {
1159 case BACKSLASH:
1160 if (troff)
1161 *e_lab++ = BACKSLASH;
1162 if (!in_comment) {
1163 *e_lab++ = *buf_ptr++;
1164 if (buf_ptr >= buf_end)
1165 fill_buffer();
1166 }
1167 break;
1168 case '/':
1169 if (*buf_ptr == '*' && !in_comment && !quote) {
1170 in_comment = 1;
1171 *e_lab++ = *buf_ptr++;
1172 com_start = e_lab - s_lab - 2;
1173 }
1174 break;
1175 case '"':
1176 if (quote == '"')
1177 quote = 0;
1178 break;
1179 case '\'':
1180 if (quote == '\'')
1181 quote = 0;
1182 break;
1183 case '*':
1184 if (*buf_ptr == '/' && in_comment) {
1185 in_comment = 0;
1186 *e_lab++ = *buf_ptr++;
1187 com_end = e_lab - s_lab;
1188 }
1189 break;
1190 }
1191 }
1192
1193 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t'))
1194 e_lab--;
1195 if (e_lab - s_lab == com_end && bp_save == 0) { /* comment on
1196 * preprocessor line */
1197 if (sc_end == 0) /* if this is the first comment, we
1198 * must set up the buffer */
1199 sc_end = &(save_com[0]);
1200 else {
1201 *sc_end++ = '\n'; /* add newline between
1202 * comments */
1203 *sc_end++ = ' ';
1204 --line_no;
1205 }
1206 bcopy(s_lab + com_start, sc_end, com_end - com_start);
1207 sc_end += com_end - com_start;
1208 if (sc_end >= &save_com[sc_size])
1209 abort();
1210 e_lab = s_lab + com_start;
1211 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t'))
1212 e_lab--;
1213 bp_save = buf_ptr; /* save current input buffer */
1214 be_save = buf_end;
1215 buf_ptr = save_com; /* fix so that subsequent calls to
1216 * lexi will take tokens out of
1217 * save_com */
1218 *sc_end++ = ' '; /* add trailing blank, just in case */
1219 buf_end = sc_end;
1220 sc_end = 0;
1221 }
1222 *e_lab = '\0'; /* null terminate line */
1223 ps.pcase = false;
1224 }
1225
1226 if (strncmp(s_lab, "#if", 3) == 0) {
1227 if (blanklines_around_conditional_compilation) {
1228 int c;
1229 prefix_blankline_requested++;
1230 while ((c = getc(input)) == '\n');
1231 ungetc(c, input);
1232 }
1233 if (ifdef_level < sizeof state_stack / sizeof state_stack[0]) {
1234 match_state[ifdef_level].tos = -1;
1235 state_stack[ifdef_level++] = ps;
1236 }
1237 else
1238 diag(1, "#if stack overflow");
1239 }
1240 else if (strncmp(s_lab, "#else", 5) == 0)
1241 if (ifdef_level <= 0)
1242 diag(1, "Unmatched #else");
1243 else {
1244 match_state[ifdef_level - 1] = ps;
1245 ps = state_stack[ifdef_level - 1];
1246 }
1247 else if (strncmp(s_lab, "#endif", 6) == 0) {
1248 if (ifdef_level <= 0)
1249 diag(1, "Unmatched #endif");
1250 else {
1251 ifdef_level--;
1252
1253 #ifdef undef
1254 /*
1255 * This match needs to be more intelligent before the
1256 * message is useful
1257 */
1258 if (match_state[ifdef_level].tos >= 0
1259 && bcmp(&ps, &match_state[ifdef_level], sizeof ps))
1260 diag(0, "Syntactically inconsistent #ifdef alternatives.");
1261 #endif
1262 }
1263 if (blanklines_around_conditional_compilation) {
1264 postfix_blankline_requested++;
1265 n_real_blanklines = 0;
1266 }
1267 }
1268 break; /* subsequent processing of the newline
1269 * character will cause the line to be printed */
1270
1271 case comment: /* we have gotten a comment this is a biggie */
1272 if (flushed_nl) { /* we should force a broken line here */
1273 flushed_nl = false;
1274 dump_line();
1275 ps.want_blank = false; /* dont insert blank at line start */
1276 force_nl = false;
1277 }
1278 pr_comment();
1279 break;
1280 } /* end of big switch stmt */
1281
1282 *e_code = '\0'; /* make sure code section is null terminated */
1283 if (type_code != comment && type_code != newline && type_code != preesc)
1284 ps.last_token = type_code;
1285 } /* end of main while (1) loop */
1286 }
1287
1288 /*
1289 * copy input file to backup file if in_name is /blah/blah/blah/file, then
1290 * backup file will be ".Bfile" then make the backup file the input and
1291 * original input file the output
1292 */
1293 void
bakcopy(void)1294 bakcopy(void)
1295 {
1296 int n,
1297 bakchn;
1298 char buff[8 * 1024];
1299 char *p;
1300
1301 /* construct file name .Bfile */
1302 for (p = in_name; *p; p++); /* skip to end of string */
1303 while (p > in_name && *p != '/') /* find last '/' */
1304 p--;
1305 if (*p == '/')
1306 p++;
1307 if (snprintf(bakfile, PATH_MAX, "%s.BAK", p) >= PATH_MAX)
1308 errc(1, ENAMETOOLONG, "%s.BAK", p);
1309
1310 /* copy in_name to backup file */
1311 bakchn = open(bakfile, O_CREAT | O_TRUNC | O_WRONLY, 0600);
1312 if (bakchn == -1)
1313 err(1, "%s", bakfile);
1314 while ((n = read(fileno(input), buff, sizeof buff)) > 0)
1315 if (write(bakchn, buff, n) != n)
1316 err(1, "%s", bakfile);
1317 if (n == -1)
1318 err(1, "%s", in_name);
1319 close(bakchn);
1320 fclose(input);
1321
1322 /* re-open backup file as the input file */
1323 input = fopen(bakfile, "r");
1324 if (input == NULL)
1325 err(1, "%s", bakfile);
1326 /* now the original input file will be the output */
1327 output = fopen(in_name, "w");
1328 if (output == NULL) {
1329 int saved_errno = errno;
1330 unlink(bakfile);
1331 errc(1, saved_errno, "%s", in_name);
1332 }
1333 }
1334