1 /* Implementation of -Wmisleading-indentation 2 Copyright (C) 2015-2018 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it under 7 the terms of the GNU General Public License as published by the Free 8 Software Foundation; either version 3, or (at your option) any later 9 version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 12 WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 #include "config.h" 21 #include "system.h" 22 #include "coretypes.h" 23 #include "tm.h" 24 #include "c-common.h" 25 #include "c-indentation.h" 26 27 extern cpp_options *cpp_opts; 28 29 /* Round up VIS_COLUMN to nearest tab stop. */ 30 31 static unsigned int 32 next_tab_stop (unsigned int vis_column) 33 { 34 const unsigned int tab_width = cpp_opts->tabstop; 35 vis_column = ((vis_column + tab_width) / tab_width) * tab_width; 36 return vis_column; 37 } 38 39 /* Convert libcpp's notion of a column (a 1-based char count) to 40 the "visual column" (0-based column, respecting tabs), by reading the 41 relevant line. 42 43 Returns true if a conversion was possible, writing the result to OUT, 44 otherwise returns false. If FIRST_NWS is not NULL, then write to it 45 the visual column corresponding to the first non-whitespace character 46 on the line. */ 47 48 static bool 49 get_visual_column (expanded_location exploc, location_t loc, 50 unsigned int *out, 51 unsigned int *first_nws) 52 { 53 /* PR c++/68819: if the column number is zero, we presumably 54 had a location_t > LINE_MAP_MAX_LOCATION_WITH_COLS, and so 55 we have no column information. 56 Act as if no conversion was possible, triggering the 57 error-handling path in the caller. */ 58 if (!exploc.column) 59 { 60 static bool issued_note = false; 61 if (!issued_note) 62 { 63 /* Notify the user the first time this happens. */ 64 issued_note = true; 65 inform (loc, 66 "-Wmisleading-indentation is disabled from this point" 67 " onwards, since column-tracking was disabled due to" 68 " the size of the code/headers"); 69 } 70 return false; 71 } 72 73 int line_len; 74 const char *line = location_get_source_line (exploc.file, exploc.line, 75 &line_len); 76 if (!line) 77 return false; 78 unsigned int vis_column = 0; 79 for (int i = 1; i < exploc.column; i++) 80 { 81 unsigned char ch = line[i - 1]; 82 83 if (first_nws != NULL && !ISSPACE (ch)) 84 { 85 *first_nws = vis_column; 86 first_nws = NULL; 87 } 88 89 if (ch == '\t') 90 vis_column = next_tab_stop (vis_column); 91 else 92 vis_column++; 93 } 94 95 if (first_nws != NULL) 96 *first_nws = vis_column; 97 98 *out = vis_column; 99 return true; 100 } 101 102 /* Attempt to determine the first non-whitespace character in line LINE_NUM 103 of source line FILE. 104 105 If this is possible, return true and write its "visual column" to 106 *FIRST_NWS. 107 Otherwise, return false, leaving *FIRST_NWS untouched. */ 108 109 static bool 110 get_first_nws_vis_column (const char *file, int line_num, 111 unsigned int *first_nws) 112 { 113 gcc_assert (first_nws); 114 115 int line_len; 116 const char *line = location_get_source_line (file, line_num, &line_len); 117 if (!line) 118 return false; 119 unsigned int vis_column = 0; 120 for (int i = 1; i < line_len; i++) 121 { 122 unsigned char ch = line[i - 1]; 123 124 if (!ISSPACE (ch)) 125 { 126 *first_nws = vis_column; 127 return true; 128 } 129 130 if (ch == '\t') 131 vis_column = next_tab_stop (vis_column); 132 else 133 vis_column++; 134 } 135 136 /* No non-whitespace characters found. */ 137 return false; 138 } 139 140 /* Determine if there is an unindent/outdent between 141 BODY_EXPLOC and NEXT_STMT_EXPLOC, to ensure that we don't 142 issue a warning for cases like the following: 143 144 (1) Preprocessor logic 145 146 if (flagA) 147 foo (); 148 ^ BODY_EXPLOC 149 #if SOME_CONDITION_THAT_DOES_NOT_HOLD 150 if (flagB) 151 #endif 152 bar (); 153 ^ NEXT_STMT_EXPLOC 154 155 "bar ();" is visually aligned below "foo ();" and 156 is (as far as the parser sees) the next token, but 157 this isn't misleading to a human reader. 158 159 (2) Empty macro with bad indentation 160 161 In the following, the 162 "if (i > 0)" 163 is poorly indented, and ought to be on the same column as 164 "engine_ref_debug(e, 0, -1)" 165 However, it is not misleadingly indented, due to the presence 166 of that macro. 167 168 #define engine_ref_debug(X, Y, Z) 169 170 if (locked) 171 i = foo (0); 172 else 173 i = foo (1); 174 engine_ref_debug(e, 0, -1) 175 if (i > 0) 176 return 1; 177 178 Return true if such an unindent/outdent is detected. */ 179 180 static bool 181 detect_intervening_unindent (const char *file, 182 int body_line, 183 int next_stmt_line, 184 unsigned int vis_column) 185 { 186 gcc_assert (file); 187 gcc_assert (next_stmt_line > body_line); 188 189 for (int line = body_line + 1; line < next_stmt_line; line++) 190 { 191 unsigned int line_vis_column; 192 if (get_first_nws_vis_column (file, line, &line_vis_column)) 193 if (line_vis_column < vis_column) 194 return true; 195 } 196 197 /* Not found. */ 198 return false; 199 } 200 201 202 /* Helper function for warn_for_misleading_indentation; see 203 description of that function below. */ 204 205 static bool 206 should_warn_for_misleading_indentation (const token_indent_info &guard_tinfo, 207 const token_indent_info &body_tinfo, 208 const token_indent_info &next_tinfo) 209 { 210 location_t guard_loc = guard_tinfo.location; 211 location_t body_loc = body_tinfo.location; 212 location_t next_stmt_loc = next_tinfo.location; 213 214 enum cpp_ttype body_type = body_tinfo.type; 215 enum cpp_ttype next_tok_type = next_tinfo.type; 216 217 /* Don't attempt to compare the indentation of BODY_LOC and NEXT_STMT_LOC 218 if either are within macros. */ 219 if (linemap_location_from_macro_expansion_p (line_table, body_loc) 220 || linemap_location_from_macro_expansion_p (line_table, next_stmt_loc)) 221 return false; 222 223 /* Don't attempt to compare indentation if #line or # 44 "file"-style 224 directives are present, suggesting generated code. 225 226 All bets are off if these are present: the file that the #line 227 directive could have an entirely different coding layout to C/C++ 228 (e.g. .md files). 229 230 To determine if a #line is present, in theory we could look for a 231 map with reason == LC_RENAME_VERBATIM. However, if there has 232 subsequently been a long line requiring a column number larger than 233 that representable by the original LC_RENAME_VERBATIM map, then 234 we'll have a map with reason LC_RENAME. 235 Rather than attempting to search all of the maps for a 236 LC_RENAME_VERBATIM, instead we have libcpp set a flag whenever one 237 is seen, and we check for the flag here. 238 */ 239 if (line_table->seen_line_directive) 240 return false; 241 242 /* We can't usefully warn about do-while and switch statements since the 243 bodies of these statements are always explicitly delimited at both ends, 244 so control flow is quite obvious. */ 245 if (guard_tinfo.keyword == RID_DO 246 || guard_tinfo.keyword == RID_SWITCH) 247 return false; 248 249 /* If the token following the body is a close brace or an "else" 250 then while indentation may be sloppy, there is not much ambiguity 251 about control flow, e.g. 252 253 if (foo) <- GUARD 254 bar (); <- BODY 255 else baz (); <- NEXT 256 257 { 258 while (foo) <- GUARD 259 bar (); <- BODY 260 } <- NEXT 261 baz (); 262 */ 263 if (next_tok_type == CPP_CLOSE_BRACE 264 || next_tinfo.keyword == RID_ELSE) 265 return false; 266 267 /* Likewise, if the body of the guard is a compound statement then control 268 flow is quite visually explicit regardless of the code's possibly poor 269 indentation, e.g. 270 271 while (foo) <- GUARD 272 { <- BODY 273 bar (); 274 } 275 baz (); <- NEXT 276 277 Things only get muddy when the body of the guard does not have 278 braces, e.g. 279 280 if (foo) <- GUARD 281 bar (); <- BODY 282 baz (); <- NEXT 283 */ 284 if (body_type == CPP_OPEN_BRACE) 285 return false; 286 287 /* Don't warn here about spurious semicolons. */ 288 if (next_tok_type == CPP_SEMICOLON) 289 return false; 290 291 expanded_location body_exploc = expand_location (body_loc); 292 expanded_location next_stmt_exploc = expand_location (next_stmt_loc); 293 expanded_location guard_exploc = expand_location (guard_loc); 294 295 /* They must be in the same file. */ 296 if (next_stmt_exploc.file != body_exploc.file) 297 return false; 298 299 /* If NEXT_STMT_LOC and BODY_LOC are on the same line, consider 300 the location of the guard. 301 302 Cases where we want to issue a warning: 303 304 if (flag) 305 foo (); bar (); 306 ^ WARN HERE 307 308 if (flag) foo (); bar (); 309 ^ WARN HERE 310 311 312 if (flag) ; { 313 ^ WARN HERE 314 315 if (flag) 316 ; { 317 ^ WARN HERE 318 319 Cases where we don't want to issue a warning: 320 321 various_code (); if (flag) foo (); bar (); more_code (); 322 ^ DON'T WARN HERE. */ 323 if (next_stmt_exploc.line == body_exploc.line) 324 { 325 if (guard_exploc.file != body_exploc.file) 326 return true; 327 if (guard_exploc.line < body_exploc.line) 328 /* The guard is on a line before a line that contains both 329 the body and the next stmt. */ 330 return true; 331 else if (guard_exploc.line == body_exploc.line) 332 { 333 /* They're all on the same line. */ 334 gcc_assert (guard_exploc.file == next_stmt_exploc.file); 335 gcc_assert (guard_exploc.line == next_stmt_exploc.line); 336 unsigned int guard_vis_column; 337 unsigned int guard_line_first_nws; 338 if (!get_visual_column (guard_exploc, guard_loc, 339 &guard_vis_column, 340 &guard_line_first_nws)) 341 return false; 342 /* Heuristic: only warn if the guard is the first thing 343 on its line. */ 344 if (guard_vis_column == guard_line_first_nws) 345 return true; 346 } 347 } 348 349 /* If NEXT_STMT_LOC is on a line after BODY_LOC, consider 350 their relative locations, and of the guard. 351 352 Cases where we want to issue a warning: 353 if (flag) 354 foo (); 355 bar (); 356 ^ WARN HERE 357 358 Cases where we don't want to issue a warning: 359 if (flag) 360 foo (); 361 bar (); 362 ^ DON'T WARN HERE (autogenerated code?) 363 364 if (flagA) 365 foo (); 366 #if SOME_CONDITION_THAT_DOES_NOT_HOLD 367 if (flagB) 368 #endif 369 bar (); 370 ^ DON'T WARN HERE 371 372 if (flag) 373 ; 374 foo (); 375 ^ DON'T WARN HERE 376 377 #define emit 378 if (flag) 379 foo (); 380 emit bar (); 381 ^ DON'T WARN HERE 382 383 */ 384 if (next_stmt_exploc.line > body_exploc.line) 385 { 386 /* Determine if GUARD_LOC and NEXT_STMT_LOC are aligned on the same 387 "visual column"... */ 388 unsigned int next_stmt_vis_column; 389 unsigned int next_stmt_line_first_nws; 390 unsigned int body_vis_column; 391 unsigned int body_line_first_nws; 392 unsigned int guard_vis_column; 393 unsigned int guard_line_first_nws; 394 /* If we can't determine it, don't issue a warning. This is sometimes 395 the case for input files containing #line directives, and these 396 are often for autogenerated sources (e.g. from .md files), where 397 it's not clear that it's meaningful to look at indentation. */ 398 if (!get_visual_column (next_stmt_exploc, next_stmt_loc, 399 &next_stmt_vis_column, 400 &next_stmt_line_first_nws)) 401 return false; 402 if (!get_visual_column (body_exploc, body_loc, 403 &body_vis_column, 404 &body_line_first_nws)) 405 return false; 406 if (!get_visual_column (guard_exploc, guard_loc, 407 &guard_vis_column, 408 &guard_line_first_nws)) 409 return false; 410 411 /* If the line where the next stmt starts has non-whitespace 412 on it before the stmt, then don't warn: 413 #define emit 414 if (flag) 415 foo (); 416 emit bar (); 417 ^ DON'T WARN HERE 418 (PR c/69122). */ 419 if (next_stmt_line_first_nws < next_stmt_vis_column) 420 return false; 421 422 if ((body_type != CPP_SEMICOLON 423 && next_stmt_vis_column == body_vis_column) 424 /* As a special case handle the case where the body is a semicolon 425 that may be hidden by a preceding comment, e.g. */ 426 427 // if (p) 428 // /* blah */; 429 // foo (1); 430 431 /* by looking instead at the column of the first non-whitespace 432 character on the body line. */ 433 || (body_type == CPP_SEMICOLON 434 && body_exploc.line > guard_exploc.line 435 && body_line_first_nws != body_vis_column 436 && next_stmt_vis_column > guard_line_first_nws)) 437 { 438 /* Don't warn if they are aligned on the same column 439 as the guard itself (suggesting autogenerated code that doesn't 440 bother indenting at all). 441 For "else" clauses, we consider the column of the first 442 non-whitespace character on the guard line instead of the column 443 of the actual guard token itself because it is more sensible. 444 Consider: 445 446 if (p) { 447 foo (1); 448 } else // GUARD 449 foo (2); // BODY 450 foo (3); // NEXT 451 452 and: 453 454 if (p) 455 foo (1); 456 } else // GUARD 457 foo (2); // BODY 458 foo (3); // NEXT 459 460 If we just used the column of the "else" token, we would warn on 461 the first example and not warn on the second. But we want the 462 exact opposite to happen: to not warn on the first example (which 463 is probably autogenerated) and to warn on the second (whose 464 indentation is misleading). Using the column of the first 465 non-whitespace character on the guard line makes that 466 happen. */ 467 unsigned int guard_column = (guard_tinfo.keyword == RID_ELSE 468 ? guard_line_first_nws 469 : guard_vis_column); 470 if (guard_column == body_vis_column) 471 return false; 472 473 /* We may have something like: 474 475 if (p) 476 { 477 foo (1); 478 } else // GUARD 479 foo (2); // BODY 480 foo (3); // NEXT 481 482 in which case the columns are not aligned but the code is not 483 misleadingly indented. If the column of the body isn't indented 484 more than the guard line then don't warn. */ 485 if (body_vis_column <= guard_line_first_nws) 486 return false; 487 488 /* Don't warn if there is an unindent between the two statements. */ 489 int vis_column = MIN (next_stmt_vis_column, body_vis_column); 490 if (detect_intervening_unindent (body_exploc.file, body_exploc.line, 491 next_stmt_exploc.line, 492 vis_column)) 493 return false; 494 495 /* Otherwise, they are visually aligned: issue a warning. */ 496 return true; 497 } 498 499 /* Also issue a warning for code having the form: 500 501 if (flag); 502 foo (); 503 504 while (flag); 505 { 506 ... 507 } 508 509 for (...); 510 { 511 ... 512 } 513 514 if (flag) 515 ; 516 else if (flag); 517 foo (); 518 519 where the semicolon at the end of each guard is most likely spurious. 520 521 But do not warn on: 522 523 for (..); 524 foo (); 525 526 where the next statement is aligned with the guard. 527 */ 528 if (body_type == CPP_SEMICOLON) 529 { 530 if (body_exploc.line == guard_exploc.line) 531 { 532 if (next_stmt_vis_column > guard_line_first_nws 533 || (next_tok_type == CPP_OPEN_BRACE 534 && next_stmt_vis_column == guard_line_first_nws)) 535 return true; 536 } 537 } 538 } 539 540 return false; 541 } 542 543 /* Return the string identifier corresponding to the given guard token. */ 544 545 const char * 546 guard_tinfo_to_string (enum rid keyword) 547 { 548 switch (keyword) 549 { 550 case RID_FOR: 551 return "for"; 552 case RID_ELSE: 553 return "else"; 554 case RID_IF: 555 return "if"; 556 case RID_WHILE: 557 return "while"; 558 case RID_DO: 559 return "do"; 560 case RID_SWITCH: 561 return "switch"; 562 default: 563 gcc_unreachable (); 564 } 565 } 566 567 /* Called by the C/C++ frontends when we have a guarding statement at 568 GUARD_LOC containing a statement at BODY_LOC, where the block wasn't 569 written using braces, like this: 570 571 if (flag) 572 foo (); 573 574 along with the location of the next token, at NEXT_STMT_LOC, 575 so that we can detect followup statements that are within 576 the same "visual block" as the guarded statement, but which 577 aren't logically grouped within the guarding statement, such 578 as: 579 580 GUARD_LOC 581 | 582 V 583 if (flag) 584 foo (); <- BODY_LOC 585 bar (); <- NEXT_STMT_LOC 586 587 In the above, "bar ();" isn't guarded by the "if", but 588 is indented to misleadingly suggest that it is in the same 589 block as "foo ();". 590 591 GUARD_KIND identifies the kind of clause e.g. "if", "else" etc. */ 592 593 void 594 warn_for_misleading_indentation (const token_indent_info &guard_tinfo, 595 const token_indent_info &body_tinfo, 596 const token_indent_info &next_tinfo) 597 { 598 /* Early reject for the case where -Wmisleading-indentation is disabled, 599 to avoid doing work only to have the warning suppressed inside the 600 diagnostic machinery. */ 601 if (!warn_misleading_indentation) 602 return; 603 604 if (should_warn_for_misleading_indentation (guard_tinfo, 605 body_tinfo, 606 next_tinfo)) 607 { 608 if (warning_at (guard_tinfo.location, OPT_Wmisleading_indentation, 609 "this %qs clause does not guard...", 610 guard_tinfo_to_string (guard_tinfo.keyword))) 611 inform (next_tinfo.location, 612 "...this statement, but the latter is misleadingly indented" 613 " as if it were guarded by the %qs", 614 guard_tinfo_to_string (guard_tinfo.keyword)); 615 } 616 } 617