1 /*- 2 * SPDX-License-Identifier: BSD-4-Clause 3 * 4 * Copyright (c) 1985 Sun Microsystems, Inc. 5 * Copyright (c) 1976 Board of Trustees of the University of Illinois. 6 * Copyright (c) 1980, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)indent.c 5.17 (Berkeley) 6/7/93 34 * $FreeBSD: head/usr.bin/indent/indent.c 340138 2018-11-04 19:24:49Z oshogbo $ 35 */ 36 37 #include <sys/param.h> 38 #include <err.h> 39 #include <errno.h> 40 #include <fcntl.h> 41 #include <unistd.h> 42 #include <stdio.h> 43 #include <stdlib.h> 44 #include <string.h> 45 #include <ctype.h> 46 #include "indent_globs.h" 47 #include "indent_codes.h" 48 #include "indent.h" 49 50 static void bakcopy(void); 51 static void indent_declaration(int, int); 52 53 const char *in_name = "Standard Input"; /* will always point to name of input 54 * file */ 55 const char *out_name = "Standard Output"; /* will always point to name 56 * of output file */ 57 const char *simple_backup_suffix = ".BAK"; /* Suffix to use for backup 58 * files */ 59 char bakfile[MAXPATHLEN] = ""; 60 61 int 62 main(int argc, char **argv) 63 { 64 int dec_ind; /* current indentation for declarations */ 65 int di_stack[20]; /* a stack of structure indentation levels */ 66 int force_nl; /* when true, code must be broken */ 67 int hd_type = 0; /* used to store type of stmt for if (...), 68 * for (...), etc */ 69 int i; /* local loop counter */ 70 int scase; /* set to true when we see a case, so we will 71 * know what to do with the following colon */ 72 int sp_sw; /* when true, we are in the expression of 73 * if(...), while(...), etc. */ 74 int squest; /* when this is positive, we have seen a ? 75 * without the matching : in a <c>?<s>:<s> 76 * construct */ 77 const char *t_ptr; /* used for copying tokens */ 78 int tabs_to_var; /* true if using tabs to indent to var name */ 79 int type_code; /* the type of token, returned by lexi */ 80 81 int last_else = 0; /* true iff last keyword was an else */ 82 const char *profile_name = NULL; 83 const char *envval = NULL; 84 struct parser_state transient_state; /* a copy for lookup */ 85 86 /*-----------------------------------------------*\ 87 | INITIALIZATION | 88 \*-----------------------------------------------*/ 89 90 found_err = 0; 91 92 ps.p_stack[0] = stmt; /* this is the parser's stack */ 93 ps.last_nl = true; /* this is true if the last thing scanned was 94 * a newline */ 95 ps.last_token = semicolon; 96 combuf = (char *) malloc(bufsize); 97 if (combuf == NULL) 98 err(1, NULL); 99 labbuf = (char *) malloc(bufsize); 100 if (labbuf == NULL) 101 err(1, NULL); 102 codebuf = (char *) malloc(bufsize); 103 if (codebuf == NULL) 104 err(1, NULL); 105 tokenbuf = (char *) malloc(bufsize); 106 if (tokenbuf == NULL) 107 err(1, NULL); 108 alloc_typenames(); 109 init_constant_tt(); 110 l_com = combuf + bufsize - 5; 111 l_lab = labbuf + bufsize - 5; 112 l_code = codebuf + bufsize - 5; 113 l_token = tokenbuf + bufsize - 5; 114 combuf[0] = codebuf[0] = labbuf[0] = ' '; /* set up code, label, and 115 * comment buffers */ 116 combuf[1] = codebuf[1] = labbuf[1] = '\0'; 117 opt.else_if = 1; /* Default else-if special processing to on */ 118 s_lab = e_lab = labbuf + 1; 119 s_code = e_code = codebuf + 1; 120 s_com = e_com = combuf + 1; 121 s_token = e_token = tokenbuf + 1; 122 123 in_buffer = (char *) malloc(10); 124 if (in_buffer == NULL) 125 err(1, NULL); 126 in_buffer_limit = in_buffer + 8; 127 buf_ptr = buf_end = in_buffer; 128 line_no = 1; 129 had_eof = ps.in_decl = ps.decl_on_line = break_comma = false; 130 sp_sw = force_nl = false; 131 ps.in_or_st = false; 132 ps.bl_line = true; 133 dec_ind = 0; 134 di_stack[ps.dec_nest = 0] = 0; 135 ps.want_blank = ps.in_stmt = ps.ind_stmt = false; 136 137 scase = ps.pcase = false; 138 squest = 0; 139 sc_end = NULL; 140 bp_save = NULL; 141 be_save = NULL; 142 143 output = NULL; 144 tabs_to_var = 0; 145 146 envval = getenv("SIMPLE_BACKUP_SUFFIX"); 147 if (envval) 148 simple_backup_suffix = envval; 149 150 /*--------------------------------------------------*\ 151 | COMMAND LINE SCAN | 152 \*--------------------------------------------------*/ 153 154 #ifdef undef 155 max_col = 78; /* -l78 */ 156 lineup_to_parens = 1; /* -lp */ 157 lineup_to_parens_always = 0; /* -nlpl */ 158 ps.ljust_decl = 0; /* -ndj */ 159 ps.com_ind = 33; /* -c33 */ 160 star_comment_cont = 1; /* -sc */ 161 ps.ind_size = 8; /* -i8 */ 162 verbose = 0; 163 ps.decl_indent = 16; /* -di16 */ 164 ps.local_decl_indent = -1; /* if this is not set to some nonnegative value 165 * by an arg, we will set this equal to 166 * ps.decl_ind */ 167 ps.indent_parameters = 1; /* -ip */ 168 ps.decl_com_ind = 0; /* if this is not set to some positive value 169 * by an arg, we will set this equal to 170 * ps.com_ind */ 171 btype_2 = 1; /* -br */ 172 cuddle_else = 1; /* -ce */ 173 ps.unindent_displace = 0; /* -d0 */ 174 ps.case_indent = 0; /* -cli0 */ 175 format_block_comments = 1; /* -fcb */ 176 format_col1_comments = 1; /* -fc1 */ 177 procnames_start_line = 1; /* -psl */ 178 proc_calls_space = 0; /* -npcs */ 179 comment_delimiter_on_blankline = 1; /* -cdb */ 180 ps.leave_comma = 1; /* -nbc */ 181 #endif 182 183 for (i = 1; i < argc; ++i) 184 if (strcmp(argv[i], "-npro") == 0) 185 break; 186 else if (argv[i][0] == '-' && argv[i][1] == 'P' && argv[i][2] != '\0') 187 profile_name = argv[i]; /* non-empty -P (set profile) */ 188 set_defaults(); 189 if (i >= argc) 190 set_profile(profile_name); 191 192 for (i = 1; i < argc; ++i) { 193 194 /* 195 * look thru args (if any) for changes to defaults 196 */ 197 if (argv[i][0] != '-') { /* no flag on parameter */ 198 if (input == NULL) { /* we must have the input file */ 199 in_name = argv[i]; /* remember name of input file */ 200 input = fopen(in_name, "r"); 201 if (input == NULL) /* check for open error */ 202 err(1, "%s", in_name); 203 continue; 204 } 205 else if (output == NULL) { /* we have the output file */ 206 out_name = argv[i]; /* remember name of output file */ 207 if (strcmp(in_name, out_name) == 0) { /* attempt to overwrite 208 * the file */ 209 errx(1, "input and output files must be different"); 210 } 211 output = fopen(out_name, "w"); 212 if (output == NULL) /* check for create error */ 213 err(1, "%s", out_name); 214 continue; 215 } 216 errx(1, "unknown parameter: %s", argv[i]); 217 } 218 else 219 set_option(argv[i]); 220 } /* end of for */ 221 if (input == NULL) 222 input = stdin; 223 if (output == NULL) { 224 if (input == stdin) 225 output = stdout; 226 else { 227 out_name = in_name; 228 bakcopy(); 229 } 230 } 231 232 if (opt.com_ind <= 1) 233 opt.com_ind = 2; /* don't put normal comments before column 2 */ 234 if (opt.block_comment_max_col <= 0) 235 opt.block_comment_max_col = opt.max_col; 236 if (opt.local_decl_indent < 0) /* if not specified by user, set this */ 237 opt.local_decl_indent = opt.decl_indent; 238 if (opt.decl_com_ind <= 0) /* if not specified by user, set this */ 239 opt.decl_com_ind = opt.ljust_decl ? (opt.com_ind <= 10 ? 2 : opt.com_ind - 8) : opt.com_ind; 240 if (opt.continuation_indent == 0) 241 opt.continuation_indent = opt.ind_size; 242 fill_buffer(); /* get first batch of stuff into input buffer */ 243 244 parse(semicolon); 245 { 246 char *p = buf_ptr; 247 int col = 1; 248 249 while (1) { 250 if (*p == ' ') 251 col++; 252 else if (*p == '\t') 253 col = opt.tabsize * (1 + (col - 1) / opt.tabsize) + 1; 254 else 255 break; 256 p++; 257 } 258 if (col > opt.ind_size) 259 ps.ind_level = ps.i_l_follow = col / opt.ind_size; 260 } 261 262 /* 263 * START OF MAIN LOOP 264 */ 265 266 while (1) { /* this is the main loop. it will go until we 267 * reach eof */ 268 int comment_buffered = false; 269 270 type_code = lexi(&ps); /* lexi reads one token. The actual 271 * characters read are stored in "token". lexi 272 * returns a code indicating the type of token */ 273 274 /* 275 * The following code moves newlines and comments following an if (), 276 * while (), else, etc. up to the start of the following stmt to 277 * a buffer. This allows proper handling of both kinds of brace 278 * placement (-br, -bl) and cuddling "else" (-ce). 279 */ 280 281 while (ps.search_brace) { 282 switch (type_code) { 283 case newline: 284 if (sc_end == NULL) { 285 save_com = sc_buf; 286 save_com[0] = save_com[1] = ' '; 287 sc_end = &save_com[2]; 288 } 289 *sc_end++ = '\n'; 290 /* 291 * We may have inherited a force_nl == true from the previous 292 * token (like a semicolon). But once we know that a newline 293 * has been scanned in this loop, force_nl should be false. 294 * 295 * However, the force_nl == true must be preserved if newline 296 * is never scanned in this loop, so this assignment cannot be 297 * done earlier. 298 */ 299 force_nl = false; 300 case form_feed: 301 break; 302 case comment: 303 if (sc_end == NULL) { 304 /* 305 * Copy everything from the start of the line, because 306 * pr_comment() will use that to calculate original 307 * indentation of a boxed comment. 308 */ 309 memcpy(sc_buf, in_buffer, buf_ptr - in_buffer - 4); 310 save_com = sc_buf + (buf_ptr - in_buffer - 4); 311 save_com[0] = save_com[1] = ' '; 312 sc_end = &save_com[2]; 313 } 314 comment_buffered = true; 315 *sc_end++ = '/'; /* copy in start of comment */ 316 *sc_end++ = '*'; 317 for (;;) { /* loop until we get to the end of the comment */ 318 *sc_end = *buf_ptr++; 319 if (buf_ptr >= buf_end) 320 fill_buffer(); 321 if (*sc_end++ == '*' && *buf_ptr == '/') 322 break; /* we are at end of comment */ 323 if (sc_end >= &save_com[sc_size]) { /* check for temp buffer 324 * overflow */ 325 diag2(1, "Internal buffer overflow - Move big comment from right after if, while, or whatever"); 326 fflush(output); 327 exit(1); 328 } 329 } 330 /* FALLTHROUGH */ 331 *sc_end++ = '/'; /* add ending slash */ 332 if (++buf_ptr >= buf_end) /* get past / in buffer */ 333 fill_buffer(); 334 break; 335 case lbrace: 336 /* 337 * Put KNF-style lbraces before the buffered up tokens and 338 * jump out of this loop in order to avoid copying the token 339 * again under the default case of the switch below. 340 */ 341 if (sc_end != NULL && opt.btype_2) { 342 save_com[0] = '{'; 343 /* 344 * Originally the lbrace may have been alone on its own 345 * line, but it will be moved into "the else's line", so 346 * if there was a newline resulting from the "{" before, 347 * it must be scanned now and ignored. 348 */ 349 while (isspace((unsigned char)*buf_ptr)) { 350 if (++buf_ptr >= buf_end) 351 fill_buffer(); 352 if (*buf_ptr == '\n') 353 break; 354 } 355 goto sw_buffer; 356 } 357 /* FALLTHROUGH */ 358 default: /* it is the start of a normal statement */ 359 { 360 int remove_newlines; 361 362 remove_newlines = 363 /* "} else" */ 364 (type_code == sp_nparen && *token == 'e' && 365 e_code != s_code && e_code[-1] == '}') 366 /* "else if" */ 367 || (type_code == sp_paren && *token == 'i' && 368 last_else && opt.else_if); 369 if (remove_newlines) 370 force_nl = false; 371 if (sc_end == NULL) { /* ignore buffering if 372 * comment wasn't saved up */ 373 ps.search_brace = false; 374 goto check_type; 375 } 376 while (sc_end > save_com && isblank((unsigned char)sc_end[-1])) { 377 sc_end--; 378 } 379 if (opt.swallow_optional_blanklines || 380 (!comment_buffered && remove_newlines)) { 381 force_nl = !remove_newlines; 382 while (sc_end > save_com && sc_end[-1] == '\n') { 383 sc_end--; 384 } 385 } 386 if (force_nl) { /* if we should insert a nl here, put 387 * it into the buffer */ 388 force_nl = false; 389 --line_no; /* this will be re-increased when the 390 * newline is read from the buffer */ 391 *sc_end++ = '\n'; 392 *sc_end++ = ' '; 393 if (opt.verbose) /* print error msg if the line was 394 * not already broken */ 395 diag2(0, "Line broken"); 396 } 397 for (t_ptr = token; *t_ptr; ++t_ptr) 398 *sc_end++ = *t_ptr; 399 400 sw_buffer: 401 ps.search_brace = false; /* stop looking for start of 402 * stmt */ 403 bp_save = buf_ptr; /* save current input buffer */ 404 be_save = buf_end; 405 buf_ptr = save_com; /* fix so that subsequent calls to 406 * lexi will take tokens out of 407 * save_com */ 408 *sc_end++ = ' ';/* add trailing blank, just in case */ 409 buf_end = sc_end; 410 sc_end = NULL; 411 break; 412 } 413 } /* end of switch */ 414 /* 415 * We must make this check, just in case there was an unexpected 416 * EOF. 417 */ 418 if (type_code != 0) { 419 /* 420 * The only intended purpose of calling lexi() below is to 421 * categorize the next token in order to decide whether to 422 * continue buffering forthcoming tokens. Once the buffering 423 * is over, lexi() will be called again elsewhere on all of 424 * the tokens - this time for normal processing. 425 * 426 * Calling it for this purpose is a bug, because lexi() also 427 * changes the parser state and discards leading whitespace, 428 * which is needed mostly for comment-related considerations. 429 * 430 * Work around the former problem by giving lexi() a copy of 431 * the current parser state and discard it if the call turned 432 * out to be just a look ahead. 433 * 434 * Work around the latter problem by copying all whitespace 435 * characters into the buffer so that the later lexi() call 436 * will read them. 437 */ 438 if (sc_end != NULL) { 439 while (*buf_ptr == ' ' || *buf_ptr == '\t') { 440 *sc_end++ = *buf_ptr++; 441 if (sc_end >= &save_com[sc_size]) { 442 errx(1, "input too long"); 443 } 444 } 445 if (buf_ptr >= buf_end) { 446 fill_buffer(); 447 } 448 } 449 transient_state = ps; 450 type_code = lexi(&transient_state); /* read another token */ 451 if (type_code != newline && type_code != form_feed && 452 type_code != comment && !transient_state.search_brace) { 453 ps = transient_state; 454 } 455 } 456 } /* end of while (search_brace) */ 457 last_else = 0; 458 check_type: 459 if (type_code == 0) { /* we got eof */ 460 if (s_lab != e_lab || s_code != e_code 461 || s_com != e_com) /* must dump end of line */ 462 dump_line(); 463 if (ps.tos > 1) /* check for balanced braces */ 464 diag2(1, "Stuff missing from end of file"); 465 466 if (opt.verbose) { 467 printf("There were %d output lines and %d comments\n", 468 ps.out_lines, ps.out_coms); 469 printf("(Lines with comments)/(Lines with code): %6.3f\n", 470 (1.0 * ps.com_lines) / code_lines); 471 } 472 fflush(output); 473 exit(found_err); 474 } 475 if ( 476 (type_code != comment) && 477 (type_code != newline) && 478 (type_code != preesc) && 479 (type_code != form_feed)) { 480 if (force_nl && 481 (type_code != semicolon) && 482 (type_code != lbrace || !opt.btype_2)) { 483 /* we should force a broken line here */ 484 if (opt.verbose) 485 diag2(0, "Line broken"); 486 dump_line(); 487 ps.want_blank = false; /* dont insert blank at line start */ 488 force_nl = false; 489 } 490 ps.in_stmt = true; /* turn on flag which causes an extra level of 491 * indentation. this is turned off by a ; or 492 * '}' */ 493 if (s_com != e_com) { /* the turkey has embedded a comment 494 * in a line. fix it */ 495 int len = e_com - s_com; 496 497 CHECK_SIZE_CODE(len + 3); 498 *e_code++ = ' '; 499 memcpy(e_code, s_com, len); 500 e_code += len; 501 *e_code++ = ' '; 502 *e_code = '\0'; /* null terminate code sect */ 503 ps.want_blank = false; 504 e_com = s_com; 505 } 506 } 507 else if (type_code != comment) /* preserve force_nl thru a comment */ 508 force_nl = false; /* cancel forced newline after newline, form 509 * feed, etc */ 510 511 512 513 /*-----------------------------------------------------*\ 514 | do switch on type of token scanned | 515 \*-----------------------------------------------------*/ 516 CHECK_SIZE_CODE(3); /* maximum number of increments of e_code 517 * before the next CHECK_SIZE_CODE or 518 * dump_line() is 2. After that there's the 519 * final increment for the null character. */ 520 switch (type_code) { /* now, decide what to do with the token */ 521 522 case form_feed: /* found a form feed in line */ 523 ps.use_ff = true; /* a form feed is treated much like a newline */ 524 dump_line(); 525 ps.want_blank = false; 526 break; 527 528 case newline: 529 if (ps.last_token != comma || ps.p_l_follow > 0 530 || !opt.leave_comma || ps.block_init || !break_comma || s_com != e_com) { 531 dump_line(); 532 ps.want_blank = false; 533 } 534 ++line_no; /* keep track of input line number */ 535 break; 536 537 case lparen: /* got a '(' or '[' */ 538 /* count parens to make Healy happy */ 539 if (++ps.p_l_follow == nitems(ps.paren_indents)) { 540 diag3(0, "Reached internal limit of %d unclosed parens", 541 nitems(ps.paren_indents)); 542 ps.p_l_follow--; 543 } 544 if (*token == '[') 545 /* not a function pointer declaration or a function call */; 546 else if (ps.in_decl && !ps.block_init && !ps.dumped_decl_indent && 547 ps.procname[0] == '\0' && ps.paren_level == 0) { 548 /* function pointer declarations */ 549 indent_declaration(dec_ind, tabs_to_var); 550 ps.dumped_decl_indent = true; 551 } 552 else if (ps.want_blank && 553 ((ps.last_token != ident && ps.last_token != funcname) || 554 opt.proc_calls_space || 555 /* offsetof (1) is never allowed a space; sizeof (2) gets 556 * one iff -bs; all other keywords (>2) always get a space 557 * before lparen */ 558 ps.keyword + opt.Bill_Shannon > 2)) 559 *e_code++ = ' '; 560 ps.want_blank = false; 561 *e_code++ = token[0]; 562 ps.paren_indents[ps.p_l_follow - 1] = count_spaces_until(1, s_code, e_code) - 1; 563 if (sp_sw && ps.p_l_follow == 1 && opt.extra_expression_indent 564 && ps.paren_indents[0] < 2 * opt.ind_size) 565 ps.paren_indents[0] = 2 * opt.ind_size; 566 if (ps.in_or_st && *token == '(' && ps.tos <= 2) { 567 /* 568 * this is a kluge to make sure that declarations will be 569 * aligned right if proc decl has an explicit type on it, i.e. 570 * "int a(x) {..." 571 */ 572 parse(semicolon); /* I said this was a kluge... */ 573 ps.in_or_st = false; /* turn off flag for structure decl or 574 * initialization */ 575 } 576 /* parenthesized type following sizeof or offsetof is not a cast */ 577 if (ps.keyword == 1 || ps.keyword == 2) 578 ps.not_cast_mask |= 1 << ps.p_l_follow; 579 break; 580 581 case rparen: /* got a ')' or ']' */ 582 if (ps.cast_mask & (1 << ps.p_l_follow) & ~ps.not_cast_mask) { 583 ps.last_u_d = true; 584 ps.cast_mask &= (1 << ps.p_l_follow) - 1; 585 ps.want_blank = opt.space_after_cast; 586 } else 587 ps.want_blank = true; 588 ps.not_cast_mask &= (1 << ps.p_l_follow) - 1; 589 if (--ps.p_l_follow < 0) { 590 ps.p_l_follow = 0; 591 diag3(0, "Extra %c", *token); 592 } 593 if (e_code == s_code) /* if the paren starts the line */ 594 ps.paren_level = ps.p_l_follow; /* then indent it */ 595 596 *e_code++ = token[0]; 597 598 if (sp_sw && (ps.p_l_follow == 0)) { /* check for end of if 599 * (...), or some such */ 600 sp_sw = false; 601 force_nl = true;/* must force newline after if */ 602 ps.last_u_d = true; /* inform lexi that a following 603 * operator is unary */ 604 ps.in_stmt = false; /* dont use stmt continuation 605 * indentation */ 606 607 parse(hd_type); /* let parser worry about if, or whatever */ 608 } 609 ps.search_brace = opt.btype_2; /* this should ensure that 610 * constructs such as main(){...} 611 * and int[]{...} have their braces 612 * put in the right place */ 613 break; 614 615 case unary_op: /* this could be any unary operation */ 616 if (!ps.dumped_decl_indent && ps.in_decl && !ps.block_init && 617 ps.procname[0] == '\0' && ps.paren_level == 0) { 618 /* pointer declarations */ 619 620 /* 621 * if this is a unary op in a declaration, we should indent 622 * this token 623 */ 624 for (i = 0; token[i]; ++i) 625 /* find length of token */; 626 indent_declaration(dec_ind - i, tabs_to_var); 627 ps.dumped_decl_indent = true; 628 } 629 else if (ps.want_blank) 630 *e_code++ = ' '; 631 632 { 633 int len = e_token - s_token; 634 635 CHECK_SIZE_CODE(len); 636 memcpy(e_code, token, len); 637 e_code += len; 638 } 639 ps.want_blank = false; 640 break; 641 642 case binary_op: /* any binary operation */ 643 { 644 int len = e_token - s_token; 645 646 CHECK_SIZE_CODE(len + 1); 647 if (ps.want_blank) 648 *e_code++ = ' '; 649 memcpy(e_code, token, len); 650 e_code += len; 651 } 652 ps.want_blank = true; 653 break; 654 655 case postop: /* got a trailing ++ or -- */ 656 *e_code++ = token[0]; 657 *e_code++ = token[1]; 658 ps.want_blank = true; 659 break; 660 661 case question: /* got a ? */ 662 squest++; /* this will be used when a later colon 663 * appears so we can distinguish the 664 * <c>?<n>:<n> construct */ 665 if (ps.want_blank) 666 *e_code++ = ' '; 667 *e_code++ = '?'; 668 ps.want_blank = true; 669 break; 670 671 case casestmt: /* got word 'case' or 'default' */ 672 scase = true; /* so we can process the later colon properly */ 673 goto copy_id; 674 675 case colon: /* got a ':' */ 676 if (squest > 0) { /* it is part of the <c>?<n>: <n> construct */ 677 --squest; 678 if (ps.want_blank) 679 *e_code++ = ' '; 680 *e_code++ = ':'; 681 ps.want_blank = true; 682 break; 683 } 684 if (ps.in_or_st) { 685 *e_code++ = ':'; 686 ps.want_blank = false; 687 break; 688 } 689 ps.in_stmt = false; /* seeing a label does not imply we are in a 690 * stmt */ 691 /* 692 * turn everything so far into a label 693 */ 694 { 695 int len = e_code - s_code; 696 697 CHECK_SIZE_LAB(len + 3); 698 memcpy(e_lab, s_code, len); 699 e_lab += len; 700 *e_lab++ = ':'; 701 *e_lab = '\0'; 702 e_code = s_code; 703 } 704 force_nl = ps.pcase = scase; /* ps.pcase will be used by 705 * dump_line to decide how to 706 * indent the label. force_nl 707 * will force a case n: to be 708 * on a line by itself */ 709 scase = false; 710 ps.want_blank = false; 711 break; 712 713 case semicolon: /* got a ';' */ 714 if (ps.dec_nest == 0) 715 ps.in_or_st = false;/* we are not in an initialization or 716 * structure declaration */ 717 scase = false; /* these will only need resetting in an error */ 718 squest = 0; 719 if (ps.last_token == rparen) 720 ps.in_parameter_declaration = 0; 721 ps.cast_mask = 0; 722 ps.not_cast_mask = 0; 723 ps.block_init = 0; 724 ps.block_init_level = 0; 725 ps.just_saw_decl--; 726 727 if (ps.in_decl && s_code == e_code && !ps.block_init && 728 !ps.dumped_decl_indent && ps.paren_level == 0) { 729 /* indent stray semicolons in declarations */ 730 indent_declaration(dec_ind - 1, tabs_to_var); 731 ps.dumped_decl_indent = true; 732 } 733 734 ps.in_decl = (ps.dec_nest > 0); /* if we were in a first level 735 * structure declaration, we 736 * arent any more */ 737 738 if ((!sp_sw || hd_type != forstmt) && ps.p_l_follow > 0) { 739 740 /* 741 * This should be true iff there were unbalanced parens in the 742 * stmt. It is a bit complicated, because the semicolon might 743 * be in a for stmt 744 */ 745 diag2(1, "Unbalanced parens"); 746 ps.p_l_follow = 0; 747 if (sp_sw) { /* this is a check for an if, while, etc. with 748 * unbalanced parens */ 749 sp_sw = false; 750 parse(hd_type); /* dont lose the if, or whatever */ 751 } 752 } 753 *e_code++ = ';'; 754 ps.want_blank = true; 755 ps.in_stmt = (ps.p_l_follow > 0); /* we are no longer in the 756 * middle of a stmt */ 757 758 if (!sp_sw) { /* if not if for (;;) */ 759 parse(semicolon); /* let parser know about end of stmt */ 760 force_nl = true;/* force newline after an end of stmt */ 761 } 762 break; 763 764 case lbrace: /* got a '{' */ 765 ps.in_stmt = false; /* dont indent the {} */ 766 if (!ps.block_init) 767 force_nl = true;/* force other stuff on same line as '{' onto 768 * new line */ 769 else if (ps.block_init_level <= 0) 770 ps.block_init_level = 1; 771 else 772 ps.block_init_level++; 773 774 if (s_code != e_code && !ps.block_init) { 775 if (!opt.btype_2) { 776 dump_line(); 777 ps.want_blank = false; 778 } 779 else if (ps.in_parameter_declaration && !ps.in_or_st) { 780 ps.i_l_follow = 0; 781 if (opt.function_brace_split) { /* dump the line prior 782 * to the brace ... */ 783 dump_line(); 784 ps.want_blank = false; 785 } else /* add a space between the decl and brace */ 786 ps.want_blank = true; 787 } 788 } 789 if (ps.in_parameter_declaration) 790 prefix_blankline_requested = 0; 791 792 if (ps.p_l_follow > 0) { /* check for preceding unbalanced 793 * parens */ 794 diag2(1, "Unbalanced parens"); 795 ps.p_l_follow = 0; 796 if (sp_sw) { /* check for unclosed if, for, etc. */ 797 sp_sw = false; 798 parse(hd_type); 799 ps.ind_level = ps.i_l_follow; 800 } 801 } 802 if (s_code == e_code) 803 ps.ind_stmt = false; /* dont put extra indentation on line 804 * with '{' */ 805 if (ps.in_decl && ps.in_or_st) { /* this is either a structure 806 * declaration or an init */ 807 di_stack[ps.dec_nest] = dec_ind; 808 if (++ps.dec_nest == nitems(di_stack)) { 809 diag3(0, "Reached internal limit of %d struct levels", 810 nitems(di_stack)); 811 ps.dec_nest--; 812 } 813 /* ? dec_ind = 0; */ 814 } 815 else { 816 ps.decl_on_line = false; /* we can't be in the middle of 817 * a declaration, so don't do 818 * special indentation of 819 * comments */ 820 if (opt.blanklines_after_declarations_at_proctop 821 && ps.in_parameter_declaration) 822 postfix_blankline_requested = 1; 823 ps.in_parameter_declaration = 0; 824 ps.in_decl = false; 825 } 826 dec_ind = 0; 827 parse(lbrace); /* let parser know about this */ 828 if (ps.want_blank) /* put a blank before '{' if '{' is not at 829 * start of line */ 830 *e_code++ = ' '; 831 ps.want_blank = false; 832 *e_code++ = '{'; 833 ps.just_saw_decl = 0; 834 break; 835 836 case rbrace: /* got a '}' */ 837 if (ps.p_stack[ps.tos] == decl && !ps.block_init) /* semicolons can be 838 * omitted in 839 * declarations */ 840 parse(semicolon); 841 if (ps.p_l_follow) {/* check for unclosed if, for, else. */ 842 diag2(1, "Unbalanced parens"); 843 ps.p_l_follow = 0; 844 sp_sw = false; 845 } 846 ps.just_saw_decl = 0; 847 ps.block_init_level--; 848 if (s_code != e_code && !ps.block_init) { /* '}' must be first on 849 * line */ 850 if (opt.verbose) 851 diag2(0, "Line broken"); 852 dump_line(); 853 } 854 *e_code++ = '}'; 855 ps.want_blank = true; 856 ps.in_stmt = ps.ind_stmt = false; 857 if (ps.dec_nest > 0) { /* we are in multi-level structure 858 * declaration */ 859 dec_ind = di_stack[--ps.dec_nest]; 860 if (ps.dec_nest == 0 && !ps.in_parameter_declaration) 861 ps.just_saw_decl = 2; 862 ps.in_decl = true; 863 } 864 prefix_blankline_requested = 0; 865 parse(rbrace); /* let parser know about this */ 866 ps.search_brace = opt.cuddle_else && ps.p_stack[ps.tos] == ifhead 867 && ps.il[ps.tos] >= ps.ind_level; 868 if (ps.tos <= 1 && opt.blanklines_after_procs && ps.dec_nest <= 0) 869 postfix_blankline_requested = 1; 870 break; 871 872 case swstmt: /* got keyword "switch" */ 873 sp_sw = true; 874 hd_type = swstmt; /* keep this for when we have seen the 875 * expression */ 876 goto copy_id; /* go move the token into buffer */ 877 878 case sp_paren: /* token is if, while, for */ 879 sp_sw = true; /* the interesting stuff is done after the 880 * expression is scanned */ 881 hd_type = (*token == 'i' ? ifstmt : 882 (*token == 'w' ? whilestmt : forstmt)); 883 884 /* 885 * remember the type of header for later use by parser 886 */ 887 goto copy_id; /* copy the token into line */ 888 889 case sp_nparen: /* got else, do */ 890 ps.in_stmt = false; 891 if (*token == 'e') { 892 if (e_code != s_code && (!opt.cuddle_else || e_code[-1] != '}')) { 893 if (opt.verbose) 894 diag2(0, "Line broken"); 895 dump_line();/* make sure this starts a line */ 896 ps.want_blank = false; 897 } 898 force_nl = true;/* also, following stuff must go onto new line */ 899 last_else = 1; 900 parse(elselit); 901 } 902 else { 903 if (e_code != s_code) { /* make sure this starts a line */ 904 if (opt.verbose) 905 diag2(0, "Line broken"); 906 dump_line(); 907 ps.want_blank = false; 908 } 909 force_nl = true;/* also, following stuff must go onto new line */ 910 last_else = 0; 911 parse(dolit); 912 } 913 goto copy_id; /* move the token into line */ 914 915 case type_def: 916 case storage: 917 prefix_blankline_requested = 0; 918 goto copy_id; 919 920 case structure: 921 if (ps.p_l_follow > 0) 922 goto copy_id; 923 /* FALLTHROUGH */ 924 case decl: /* we have a declaration type (int, etc.) */ 925 parse(decl); /* let parser worry about indentation */ 926 if (ps.last_token == rparen && ps.tos <= 1) { 927 if (s_code != e_code) { 928 dump_line(); 929 ps.want_blank = 0; 930 } 931 } 932 if (ps.in_parameter_declaration && opt.indent_parameters && ps.dec_nest == 0) { 933 ps.ind_level = ps.i_l_follow = 1; 934 ps.ind_stmt = 0; 935 } 936 ps.in_or_st = true; /* this might be a structure or initialization 937 * declaration */ 938 ps.in_decl = ps.decl_on_line = ps.last_token != type_def; 939 if ( /* !ps.in_or_st && */ ps.dec_nest <= 0) 940 ps.just_saw_decl = 2; 941 prefix_blankline_requested = 0; 942 for (i = 0; token[i++];); /* get length of token */ 943 944 if (ps.ind_level == 0 || ps.dec_nest > 0) { 945 /* global variable or struct member in local variable */ 946 dec_ind = opt.decl_indent > 0 ? opt.decl_indent : i; 947 tabs_to_var = (opt.use_tabs ? opt.decl_indent > 0 : 0); 948 } else { 949 /* local variable */ 950 dec_ind = opt.local_decl_indent > 0 ? opt.local_decl_indent : i; 951 tabs_to_var = (opt.use_tabs ? opt.local_decl_indent > 0 : 0); 952 } 953 goto copy_id; 954 955 case funcname: 956 case ident: /* got an identifier or constant */ 957 if (ps.in_decl) { 958 if (type_code == funcname) { 959 ps.in_decl = false; 960 if (opt.procnames_start_line && s_code != e_code) { 961 *e_code = '\0'; 962 dump_line(); 963 } 964 else if (ps.want_blank) { 965 *e_code++ = ' '; 966 } 967 ps.want_blank = false; 968 } 969 else if (!ps.block_init && !ps.dumped_decl_indent && 970 ps.paren_level == 0) { /* if we are in a declaration, we 971 * must indent identifier */ 972 indent_declaration(dec_ind, tabs_to_var); 973 ps.dumped_decl_indent = true; 974 ps.want_blank = false; 975 } 976 } 977 else if (sp_sw && ps.p_l_follow == 0) { 978 sp_sw = false; 979 force_nl = true; 980 ps.last_u_d = true; 981 ps.in_stmt = false; 982 parse(hd_type); 983 } 984 copy_id: 985 { 986 int len = e_token - s_token; 987 988 CHECK_SIZE_CODE(len + 1); 989 if (ps.want_blank) 990 *e_code++ = ' '; 991 memcpy(e_code, s_token, len); 992 e_code += len; 993 } 994 if (type_code != funcname) 995 ps.want_blank = true; 996 break; 997 998 case strpfx: 999 { 1000 int len = e_token - s_token; 1001 1002 CHECK_SIZE_CODE(len + 1); 1003 if (ps.want_blank) 1004 *e_code++ = ' '; 1005 memcpy(e_code, token, len); 1006 e_code += len; 1007 } 1008 ps.want_blank = false; 1009 break; 1010 1011 case period: /* treat a period kind of like a binary 1012 * operation */ 1013 *e_code++ = '.'; /* move the period into line */ 1014 ps.want_blank = false; /* dont put a blank after a period */ 1015 break; 1016 1017 case comma: 1018 ps.want_blank = (s_code != e_code); /* only put blank after comma 1019 * if comma does not start the 1020 * line */ 1021 if (ps.in_decl && ps.procname[0] == '\0' && !ps.block_init && 1022 !ps.dumped_decl_indent && ps.paren_level == 0) { 1023 /* indent leading commas and not the actual identifiers */ 1024 indent_declaration(dec_ind - 1, tabs_to_var); 1025 ps.dumped_decl_indent = true; 1026 } 1027 *e_code++ = ','; 1028 if (ps.p_l_follow == 0) { 1029 if (ps.block_init_level <= 0) 1030 ps.block_init = 0; 1031 if (break_comma && (!opt.leave_comma || 1032 count_spaces_until(compute_code_target(), s_code, e_code) > 1033 opt.max_col - opt.tabsize)) 1034 force_nl = true; 1035 } 1036 break; 1037 1038 case preesc: /* got the character '#' */ 1039 if ((s_com != e_com) || 1040 (s_lab != e_lab) || 1041 (s_code != e_code)) 1042 dump_line(); 1043 CHECK_SIZE_LAB(1); 1044 *e_lab++ = '#'; /* move whole line to 'label' buffer */ 1045 { 1046 int in_comment = 0; 1047 int com_start = 0; 1048 char quote = 0; 1049 int com_end = 0; 1050 1051 while (*buf_ptr == ' ' || *buf_ptr == '\t') { 1052 buf_ptr++; 1053 if (buf_ptr >= buf_end) 1054 fill_buffer(); 1055 } 1056 while (*buf_ptr != '\n' || (in_comment && !had_eof)) { 1057 CHECK_SIZE_LAB(2); 1058 *e_lab = *buf_ptr++; 1059 if (buf_ptr >= buf_end) 1060 fill_buffer(); 1061 switch (*e_lab++) { 1062 case BACKSLASH: 1063 if (!in_comment) { 1064 *e_lab++ = *buf_ptr++; 1065 if (buf_ptr >= buf_end) 1066 fill_buffer(); 1067 } 1068 break; 1069 case '/': 1070 if (*buf_ptr == '*' && !in_comment && !quote) { 1071 in_comment = 1; 1072 *e_lab++ = *buf_ptr++; 1073 com_start = e_lab - s_lab - 2; 1074 } 1075 break; 1076 case '"': 1077 if (quote == '"') 1078 quote = 0; 1079 break; 1080 case '\'': 1081 if (quote == '\'') 1082 quote = 0; 1083 break; 1084 case '*': 1085 if (*buf_ptr == '/' && in_comment) { 1086 in_comment = 0; 1087 *e_lab++ = *buf_ptr++; 1088 com_end = e_lab - s_lab; 1089 } 1090 break; 1091 } 1092 } 1093 1094 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1095 e_lab--; 1096 if (e_lab - s_lab == com_end && bp_save == NULL) { 1097 /* comment on preprocessor line */ 1098 if (sc_end == NULL) { /* if this is the first comment, 1099 * we must set up the buffer */ 1100 save_com = sc_buf; 1101 sc_end = &save_com[0]; 1102 } 1103 else { 1104 *sc_end++ = '\n'; /* add newline between 1105 * comments */ 1106 *sc_end++ = ' '; 1107 --line_no; 1108 } 1109 if (sc_end - save_com + com_end - com_start > sc_size) 1110 errx(1, "input too long"); 1111 memmove(sc_end, s_lab + com_start, com_end - com_start); 1112 sc_end += com_end - com_start; 1113 e_lab = s_lab + com_start; 1114 while (e_lab > s_lab && (e_lab[-1] == ' ' || e_lab[-1] == '\t')) 1115 e_lab--; 1116 bp_save = buf_ptr; /* save current input buffer */ 1117 be_save = buf_end; 1118 buf_ptr = save_com; /* fix so that subsequent calls to 1119 * lexi will take tokens out of 1120 * save_com */ 1121 *sc_end++ = ' '; /* add trailing blank, just in case */ 1122 buf_end = sc_end; 1123 sc_end = NULL; 1124 } 1125 CHECK_SIZE_LAB(1); 1126 *e_lab = '\0'; /* null terminate line */ 1127 ps.pcase = false; 1128 } 1129 1130 if (strncmp(s_lab, "#if", 3) == 0) { /* also ifdef, ifndef */ 1131 if ((size_t)ifdef_level < nitems(state_stack)) { 1132 match_state[ifdef_level].tos = -1; 1133 state_stack[ifdef_level++] = ps; 1134 } 1135 else 1136 diag2(1, "#if stack overflow"); 1137 } 1138 else if (strncmp(s_lab, "#el", 3) == 0) { /* else, elif */ 1139 if (ifdef_level <= 0) 1140 diag2(1, s_lab[3] == 'i' ? "Unmatched #elif" : "Unmatched #else"); 1141 else { 1142 match_state[ifdef_level - 1] = ps; 1143 ps = state_stack[ifdef_level - 1]; 1144 } 1145 } 1146 else if (strncmp(s_lab, "#endif", 6) == 0) { 1147 if (ifdef_level <= 0) 1148 diag2(1, "Unmatched #endif"); 1149 else 1150 ifdef_level--; 1151 } else { 1152 struct directives { 1153 int size; 1154 const char *string; 1155 } 1156 recognized[] = { 1157 {7, "include"}, 1158 {6, "define"}, 1159 {5, "undef"}, 1160 {4, "line"}, 1161 {5, "error"}, 1162 {6, "pragma"} 1163 }; 1164 int d = nitems(recognized); 1165 while (--d >= 0) 1166 if (strncmp(s_lab + 1, recognized[d].string, recognized[d].size) == 0) 1167 break; 1168 if (d < 0) { 1169 diag2(1, "Unrecognized cpp directive"); 1170 break; 1171 } 1172 } 1173 if (opt.blanklines_around_conditional_compilation) { 1174 postfix_blankline_requested++; 1175 n_real_blanklines = 0; 1176 } 1177 else { 1178 postfix_blankline_requested = 0; 1179 prefix_blankline_requested = 0; 1180 } 1181 break; /* subsequent processing of the newline 1182 * character will cause the line to be printed */ 1183 1184 case comment: /* we have gotten a / followed by * this is a biggie */ 1185 pr_comment(); 1186 break; 1187 } /* end of big switch stmt */ 1188 1189 *e_code = '\0'; /* make sure code section is null terminated */ 1190 if (type_code != comment && type_code != newline && type_code != preesc) 1191 ps.last_token = type_code; 1192 } /* end of main while (1) loop */ 1193 } 1194 1195 /* 1196 * copy input file to backup file if in_name is /blah/blah/blah/file, then 1197 * backup file will be ".Bfile" then make the backup file the input and 1198 * original input file the output 1199 */ 1200 static void 1201 bakcopy(void) 1202 { 1203 int n, 1204 bakchn; 1205 char buff[8 * 1024]; 1206 const char *p; 1207 1208 /* construct file name .Bfile */ 1209 for (p = in_name; *p; p++); /* skip to end of string */ 1210 while (p > in_name && *p != '/') /* find last '/' */ 1211 p--; 1212 if (*p == '/') 1213 p++; 1214 sprintf(bakfile, "%s%s", p, simple_backup_suffix); 1215 1216 /* copy in_name to backup file */ 1217 bakchn = creat(bakfile, 0600); 1218 if (bakchn < 0) 1219 err(1, "%s", bakfile); 1220 while ((n = read(fileno(input), buff, sizeof(buff))) > 0) 1221 if (write(bakchn, buff, n) != n) 1222 err(1, "%s", bakfile); 1223 if (n < 0) 1224 err(1, "%s", in_name); 1225 close(bakchn); 1226 fclose(input); 1227 1228 /* re-open backup file as the input file */ 1229 input = fopen(bakfile, "r"); 1230 if (input == NULL) 1231 err(1, "%s", bakfile); 1232 /* now the original input file will be the output */ 1233 output = fopen(in_name, "w"); 1234 if (output == NULL) { 1235 unlink(bakfile); 1236 err(1, "%s", in_name); 1237 } 1238 } 1239 1240 static void 1241 indent_declaration(int cur_dec_ind, int tabs_to_var) 1242 { 1243 int pos = e_code - s_code; 1244 char *startpos = e_code; 1245 1246 /* 1247 * get the tab math right for indentations that are not multiples of tabsize 1248 */ 1249 if ((ps.ind_level * opt.ind_size) % opt.tabsize != 0) { 1250 pos += (ps.ind_level * opt.ind_size) % opt.tabsize; 1251 cur_dec_ind += (ps.ind_level * opt.ind_size) % opt.tabsize; 1252 } 1253 if (tabs_to_var) { 1254 int tpos; 1255 1256 CHECK_SIZE_CODE(cur_dec_ind / opt.tabsize); 1257 while ((tpos = opt.tabsize * (1 + pos / opt.tabsize)) <= cur_dec_ind) { 1258 *e_code++ = '\t'; 1259 pos = tpos; 1260 } 1261 } 1262 CHECK_SIZE_CODE(cur_dec_ind - pos + 1); 1263 while (pos < cur_dec_ind) { 1264 *e_code++ = ' '; 1265 pos++; 1266 } 1267 if (e_code == startpos && ps.want_blank) { 1268 *e_code++ = ' '; 1269 ps.want_blank = false; 1270 } 1271 } 1272