1 /* Implementation of -Wmisleading-indentation
2    Copyright (C) 2015-2018 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "c-common.h"
25 #include "c-indentation.h"
26 
27 extern cpp_options *cpp_opts;
28 
29 /* Round up VIS_COLUMN to nearest tab stop. */
30 
31 static unsigned int
32 next_tab_stop (unsigned int vis_column)
33 {
34   const unsigned int tab_width = cpp_opts->tabstop;
35   vis_column = ((vis_column + tab_width) / tab_width) * tab_width;
36   return vis_column;
37 }
38 
39 /* Convert libcpp's notion of a column (a 1-based char count) to
40    the "visual column" (0-based column, respecting tabs), by reading the
41    relevant line.
42 
43    Returns true if a conversion was possible, writing the result to OUT,
44    otherwise returns false.  If FIRST_NWS is not NULL, then write to it
45    the visual column corresponding to the first non-whitespace character
46    on the line.  */
47 
48 static bool
49 get_visual_column (expanded_location exploc, location_t loc,
50 		   unsigned int *out,
51 		   unsigned int *first_nws)
52 {
53   /* PR c++/68819: if the column number is zero, we presumably
54      had a location_t > LINE_MAP_MAX_LOCATION_WITH_COLS, and so
55      we have no column information.
56      Act as if no conversion was possible, triggering the
57      error-handling path in the caller.  */
58   if (!exploc.column)
59     {
60       static bool issued_note = false;
61       if (!issued_note)
62 	{
63 	  /* Notify the user the first time this happens.  */
64 	  issued_note = true;
65 	  inform (loc,
66 		  "-Wmisleading-indentation is disabled from this point"
67 		  " onwards, since column-tracking was disabled due to"
68 		  " the size of the code/headers");
69 	}
70       return false;
71     }
72 
73   int line_len;
74   const char *line = location_get_source_line (exploc.file, exploc.line,
75 					       &line_len);
76   if (!line)
77     return false;
78   unsigned int vis_column = 0;
79   for (int i = 1; i < exploc.column; i++)
80     {
81       unsigned char ch = line[i - 1];
82 
83       if (first_nws != NULL && !ISSPACE (ch))
84 	{
85 	  *first_nws = vis_column;
86 	  first_nws = NULL;
87 	}
88 
89       if (ch == '\t')
90 	vis_column = next_tab_stop (vis_column);
91       else
92        vis_column++;
93     }
94 
95   if (first_nws != NULL)
96     *first_nws = vis_column;
97 
98   *out = vis_column;
99   return true;
100 }
101 
102 /* Attempt to determine the first non-whitespace character in line LINE_NUM
103    of source line FILE.
104 
105    If this is possible, return true and write its "visual column" to
106    *FIRST_NWS.
107    Otherwise, return false, leaving *FIRST_NWS untouched.  */
108 
109 static bool
110 get_first_nws_vis_column (const char *file, int line_num,
111 			  unsigned int *first_nws)
112 {
113   gcc_assert (first_nws);
114 
115   int line_len;
116   const char *line = location_get_source_line (file, line_num, &line_len);
117   if (!line)
118     return false;
119   unsigned int vis_column = 0;
120   for (int i = 1; i < line_len; i++)
121     {
122       unsigned char ch = line[i - 1];
123 
124       if (!ISSPACE (ch))
125 	{
126 	  *first_nws = vis_column;
127 	  return true;
128 	}
129 
130       if (ch == '\t')
131 	vis_column = next_tab_stop (vis_column);
132       else
133 	vis_column++;
134     }
135 
136   /* No non-whitespace characters found.  */
137   return false;
138 }
139 
140 /* Determine if there is an unindent/outdent between
141    BODY_EXPLOC and NEXT_STMT_EXPLOC, to ensure that we don't
142    issue a warning for cases like the following:
143 
144    (1) Preprocessor logic
145 
146 	if (flagA)
147 	  foo ();
148 	  ^ BODY_EXPLOC
149       #if SOME_CONDITION_THAT_DOES_NOT_HOLD
150 	if (flagB)
151       #endif
152 	  bar ();
153 	  ^ NEXT_STMT_EXPLOC
154 
155    "bar ();" is visually aligned below "foo ();" and
156    is (as far as the parser sees) the next token, but
157    this isn't misleading to a human reader.
158 
159    (2) Empty macro with bad indentation
160 
161    In the following, the
162      "if (i > 0)"
163    is poorly indented, and ought to be on the same column as
164       "engine_ref_debug(e, 0, -1)"
165    However, it is not misleadingly indented, due to the presence
166    of that macro.
167 
168       #define engine_ref_debug(X, Y, Z)
169 
170       if (locked)
171         i = foo (0);
172       else
173         i = foo (1);
174       engine_ref_debug(e, 0, -1)
175         if (i > 0)
176         return 1;
177 
178    Return true if such an unindent/outdent is detected.  */
179 
180 static bool
181 detect_intervening_unindent (const char *file,
182 			     int body_line,
183 			     int next_stmt_line,
184 			     unsigned int vis_column)
185 {
186   gcc_assert (file);
187   gcc_assert (next_stmt_line > body_line);
188 
189   for (int line = body_line + 1; line < next_stmt_line; line++)
190     {
191       unsigned int line_vis_column;
192       if (get_first_nws_vis_column (file, line, &line_vis_column))
193 	if (line_vis_column < vis_column)
194 	  return true;
195     }
196 
197   /* Not found.  */
198   return false;
199 }
200 
201 
202 /* Helper function for warn_for_misleading_indentation; see
203    description of that function below.  */
204 
205 static bool
206 should_warn_for_misleading_indentation (const token_indent_info &guard_tinfo,
207 					const token_indent_info &body_tinfo,
208 					const token_indent_info &next_tinfo)
209 {
210   location_t guard_loc = guard_tinfo.location;
211   location_t body_loc = body_tinfo.location;
212   location_t next_stmt_loc = next_tinfo.location;
213 
214   enum cpp_ttype body_type = body_tinfo.type;
215   enum cpp_ttype next_tok_type = next_tinfo.type;
216 
217   /* Don't attempt to compare the indentation of BODY_LOC and NEXT_STMT_LOC
218      if either are within macros.  */
219   if (linemap_location_from_macro_expansion_p (line_table, body_loc)
220       || linemap_location_from_macro_expansion_p (line_table, next_stmt_loc))
221     return false;
222 
223   /* Don't attempt to compare indentation if #line or # 44 "file"-style
224      directives are present, suggesting generated code.
225 
226      All bets are off if these are present: the file that the #line
227      directive could have an entirely different coding layout to C/C++
228      (e.g. .md files).
229 
230      To determine if a #line is present, in theory we could look for a
231      map with reason == LC_RENAME_VERBATIM.  However, if there has
232      subsequently been a long line requiring a column number larger than
233      that representable by the original LC_RENAME_VERBATIM map, then
234      we'll have a map with reason LC_RENAME.
235      Rather than attempting to search all of the maps for a
236      LC_RENAME_VERBATIM, instead we have libcpp set a flag whenever one
237      is seen, and we check for the flag here.
238   */
239   if (line_table->seen_line_directive)
240     return false;
241 
242   /* We can't usefully warn about do-while and switch statements since the
243      bodies of these statements are always explicitly delimited at both ends,
244      so control flow is quite obvious.  */
245   if (guard_tinfo.keyword == RID_DO
246       || guard_tinfo.keyword == RID_SWITCH)
247     return false;
248 
249   /* If the token following the body is a close brace or an "else"
250      then while indentation may be sloppy, there is not much ambiguity
251      about control flow, e.g.
252 
253      if (foo)       <- GUARD
254        bar ();      <- BODY
255        else baz (); <- NEXT
256 
257      {
258      while (foo)  <- GUARD
259      bar ();      <- BODY
260      }            <- NEXT
261      baz ();
262   */
263   if (next_tok_type == CPP_CLOSE_BRACE
264       || next_tinfo.keyword == RID_ELSE)
265     return false;
266 
267   /* Likewise, if the body of the guard is a compound statement then control
268      flow is quite visually explicit regardless of the code's possibly poor
269      indentation, e.g.
270 
271      while (foo)  <- GUARD
272        {          <- BODY
273        bar ();
274        }
275        baz ();    <- NEXT
276 
277     Things only get muddy when the body of the guard does not have
278     braces, e.g.
279 
280     if (foo)  <- GUARD
281       bar (); <- BODY
282       baz (); <- NEXT
283   */
284   if (body_type == CPP_OPEN_BRACE)
285     return false;
286 
287   /* Don't warn here about spurious semicolons.  */
288   if (next_tok_type == CPP_SEMICOLON)
289     return false;
290 
291   expanded_location body_exploc = expand_location (body_loc);
292   expanded_location next_stmt_exploc = expand_location (next_stmt_loc);
293   expanded_location guard_exploc = expand_location (guard_loc);
294 
295   /* They must be in the same file.  */
296   if (next_stmt_exploc.file != body_exploc.file)
297     return false;
298 
299   /* If NEXT_STMT_LOC and BODY_LOC are on the same line, consider
300      the location of the guard.
301 
302      Cases where we want to issue a warning:
303 
304        if (flag)
305          foo ();  bar ();
306                   ^ WARN HERE
307 
308        if (flag) foo (); bar ();
309                          ^ WARN HERE
310 
311 
312        if (flag) ; {
313                    ^ WARN HERE
314 
315        if (flag)
316         ; {
317           ^ WARN HERE
318 
319      Cases where we don't want to issue a warning:
320 
321        various_code (); if (flag) foo (); bar (); more_code ();
322                                           ^ DON'T WARN HERE.  */
323   if (next_stmt_exploc.line == body_exploc.line)
324     {
325       if (guard_exploc.file != body_exploc.file)
326 	return true;
327       if (guard_exploc.line < body_exploc.line)
328 	/* The guard is on a line before a line that contains both
329 	   the body and the next stmt.  */
330 	return true;
331       else if (guard_exploc.line == body_exploc.line)
332 	{
333 	  /* They're all on the same line.  */
334 	  gcc_assert (guard_exploc.file == next_stmt_exploc.file);
335 	  gcc_assert (guard_exploc.line == next_stmt_exploc.line);
336 	  unsigned int guard_vis_column;
337 	  unsigned int guard_line_first_nws;
338 	  if (!get_visual_column (guard_exploc, guard_loc,
339 				  &guard_vis_column,
340 				  &guard_line_first_nws))
341 	    return false;
342 	  /* Heuristic: only warn if the guard is the first thing
343 	     on its line.  */
344 	  if (guard_vis_column == guard_line_first_nws)
345 	    return true;
346 	}
347     }
348 
349   /* If NEXT_STMT_LOC is on a line after BODY_LOC, consider
350      their relative locations, and of the guard.
351 
352      Cases where we want to issue a warning:
353         if (flag)
354           foo ();
355           bar ();
356           ^ WARN HERE
357 
358      Cases where we don't want to issue a warning:
359         if (flag)
360         foo ();
361         bar ();
362         ^ DON'T WARN HERE (autogenerated code?)
363 
364 	if (flagA)
365 	  foo ();
366       #if SOME_CONDITION_THAT_DOES_NOT_HOLD
367 	if (flagB)
368       #endif
369 	  bar ();
370 	  ^ DON'T WARN HERE
371 
372 	if (flag)
373 	  ;
374 	  foo ();
375 	  ^ DON'T WARN HERE
376 
377 	#define emit
378 	if (flag)
379 	     foo ();
380 	emit bar ();
381 	     ^ DON'T WARN HERE
382 
383   */
384   if (next_stmt_exploc.line > body_exploc.line)
385     {
386       /* Determine if GUARD_LOC and NEXT_STMT_LOC are aligned on the same
387 	 "visual column"...  */
388       unsigned int next_stmt_vis_column;
389       unsigned int next_stmt_line_first_nws;
390       unsigned int body_vis_column;
391       unsigned int body_line_first_nws;
392       unsigned int guard_vis_column;
393       unsigned int guard_line_first_nws;
394       /* If we can't determine it, don't issue a warning.  This is sometimes
395 	 the case for input files containing #line directives, and these
396 	 are often for autogenerated sources (e.g. from .md files), where
397 	 it's not clear that it's meaningful to look at indentation.  */
398       if (!get_visual_column (next_stmt_exploc, next_stmt_loc,
399 			      &next_stmt_vis_column,
400 			      &next_stmt_line_first_nws))
401 	return false;
402       if (!get_visual_column (body_exploc, body_loc,
403 			      &body_vis_column,
404 			      &body_line_first_nws))
405 	return false;
406       if (!get_visual_column (guard_exploc, guard_loc,
407 			      &guard_vis_column,
408 			      &guard_line_first_nws))
409 	return false;
410 
411       /* If the line where the next stmt starts has non-whitespace
412 	 on it before the stmt, then don't warn:
413 	  #define emit
414 	  if (flag)
415 	       foo ();
416 	  emit bar ();
417 	       ^ DON'T WARN HERE
418 	 (PR c/69122).  */
419       if (next_stmt_line_first_nws < next_stmt_vis_column)
420 	return false;
421 
422       if ((body_type != CPP_SEMICOLON
423 	   && next_stmt_vis_column == body_vis_column)
424 	  /* As a special case handle the case where the body is a semicolon
425 	     that may be hidden by a preceding comment, e.g.  */
426 
427 	  // if (p)
428 	  //   /* blah */;
429 	  //   foo (1);
430 
431 	  /*  by looking instead at the column of the first non-whitespace
432 	      character on the body line.  */
433 	  || (body_type == CPP_SEMICOLON
434 	      && body_exploc.line > guard_exploc.line
435 	      && body_line_first_nws != body_vis_column
436 	      && next_stmt_vis_column > guard_line_first_nws))
437 	{
438           /* Don't warn if they are aligned on the same column
439 	     as the guard itself (suggesting autogenerated code that doesn't
440 	     bother indenting at all).
441 	     For "else" clauses, we consider the column of the first
442 	     non-whitespace character on the guard line instead of the column
443 	     of the actual guard token itself because it is more sensible.
444 	     Consider:
445 
446 	     if (p) {
447 	     foo (1);
448 	     } else     // GUARD
449 	     foo (2);   // BODY
450 	     foo (3);   // NEXT
451 
452 	     and:
453 
454 	     if (p)
455 	       foo (1);
456 	     } else       // GUARD
457 	       foo (2);   // BODY
458 	       foo (3);   // NEXT
459 
460 	     If we just used the column of the "else" token, we would warn on
461 	     the first example and not warn on the second.  But we want the
462 	     exact opposite to happen: to not warn on the first example (which
463 	     is probably autogenerated) and to warn on the second (whose
464 	     indentation is misleading).  Using the column of the first
465 	     non-whitespace character on the guard line makes that
466 	     happen.  */
467 	  unsigned int guard_column = (guard_tinfo.keyword == RID_ELSE
468 				       ? guard_line_first_nws
469 				       : guard_vis_column);
470 	  if (guard_column == body_vis_column)
471 	    return false;
472 
473 	  /* We may have something like:
474 
475 	     if (p)
476 	       {
477 	       foo (1);
478 	       } else  // GUARD
479 	     foo (2);  // BODY
480 	     foo (3);  // NEXT
481 
482 	     in which case the columns are not aligned but the code is not
483 	     misleadingly indented.  If the column of the body isn't indented
484 	     more than the guard line then don't warn.  */
485 	  if (body_vis_column <= guard_line_first_nws)
486 	    return false;
487 
488 	  /* Don't warn if there is an unindent between the two statements. */
489 	  int vis_column = MIN (next_stmt_vis_column, body_vis_column);
490 	  if (detect_intervening_unindent (body_exploc.file, body_exploc.line,
491 					   next_stmt_exploc.line,
492 					   vis_column))
493 	    return false;
494 
495 	  /* Otherwise, they are visually aligned: issue a warning.  */
496 	  return true;
497 	}
498 
499 	/* Also issue a warning for code having the form:
500 
501 	   if (flag);
502 	     foo ();
503 
504 	   while (flag);
505 	   {
506 	     ...
507 	   }
508 
509 	   for (...);
510 	     {
511 	       ...
512 	     }
513 
514 	   if (flag)
515 	     ;
516 	   else if (flag);
517 	     foo ();
518 
519 	   where the semicolon at the end of each guard is most likely spurious.
520 
521 	   But do not warn on:
522 
523 	   for (..);
524 	   foo ();
525 
526 	   where the next statement is aligned with the guard.
527 	*/
528 	if (body_type == CPP_SEMICOLON)
529 	  {
530 	    if (body_exploc.line == guard_exploc.line)
531 	      {
532 		if (next_stmt_vis_column > guard_line_first_nws
533 		    || (next_tok_type == CPP_OPEN_BRACE
534 			&& next_stmt_vis_column == guard_line_first_nws))
535 		  return true;
536 	      }
537 	  }
538     }
539 
540   return false;
541 }
542 
543 /* Return the string identifier corresponding to the given guard token.  */
544 
545 const char *
546 guard_tinfo_to_string (enum rid keyword)
547 {
548   switch (keyword)
549     {
550     case RID_FOR:
551       return "for";
552     case RID_ELSE:
553       return "else";
554     case RID_IF:
555       return "if";
556     case RID_WHILE:
557       return "while";
558     case RID_DO:
559       return "do";
560     case RID_SWITCH:
561       return "switch";
562     default:
563       gcc_unreachable ();
564     }
565 }
566 
567 /* Called by the C/C++ frontends when we have a guarding statement at
568    GUARD_LOC containing a statement at BODY_LOC, where the block wasn't
569    written using braces, like this:
570 
571      if (flag)
572        foo ();
573 
574    along with the location of the next token, at NEXT_STMT_LOC,
575    so that we can detect followup statements that are within
576    the same "visual block" as the guarded statement, but which
577    aren't logically grouped within the guarding statement, such
578    as:
579 
580      GUARD_LOC
581      |
582      V
583      if (flag)
584        foo (); <- BODY_LOC
585        bar (); <- NEXT_STMT_LOC
586 
587    In the above, "bar ();" isn't guarded by the "if", but
588    is indented to misleadingly suggest that it is in the same
589    block as "foo ();".
590 
591    GUARD_KIND identifies the kind of clause e.g. "if", "else" etc.  */
592 
593 void
594 warn_for_misleading_indentation (const token_indent_info &guard_tinfo,
595 				 const token_indent_info &body_tinfo,
596 				 const token_indent_info &next_tinfo)
597 {
598   /* Early reject for the case where -Wmisleading-indentation is disabled,
599      to avoid doing work only to have the warning suppressed inside the
600      diagnostic machinery.  */
601   if (!warn_misleading_indentation)
602     return;
603 
604   if (should_warn_for_misleading_indentation (guard_tinfo,
605 					      body_tinfo,
606 					      next_tinfo))
607     {
608       if (warning_at (guard_tinfo.location, OPT_Wmisleading_indentation,
609 		      "this %qs clause does not guard...",
610 		      guard_tinfo_to_string (guard_tinfo.keyword)))
611 	inform (next_tinfo.location,
612 		"...this statement, but the latter is misleadingly indented"
613 		" as if it were guarded by the %qs",
614 		guard_tinfo_to_string (guard_tinfo.keyword));
615     }
616 }
617