1#! /usr/bin/env perl
2#
3# Copyright 2020-2021 The OpenSSL Project Authors. All Rights Reserved.
4# Copyright Siemens AG 2019-2020
5#
6# Licensed under the Apache License 2.0 (the "License").
7# You may not use this file except in compliance with the License.
8# You can obtain a copy in the file LICENSE in the source distribution
9# or at https://www.openssl.org/source/license.html
10#
11# check-format.pl
12# - check formatting of C source according to OpenSSL coding style
13#
14# usage:
15#   check-format.pl [-l|--sloppy-len] [-l|--sloppy-bodylen]
16#                   [-s|--sloppy-space] [-c|--sloppy-comment]
17#                   [-m|--sloppy-macro] [-h|--sloppy-hang]
18#                   [-e|--eol-comment] [-1|--1-stmt]
19#                   <files>
20#
21# run self-tests:
22#   util/check-format.pl util/check-format-test-positives.c
23#   util/check-format.pl util/check-format-test-negatives.c
24#
25# checks adherence to the formatting rules of the OpenSSL coding guidelines
26# assuming that the input files contain syntactically correct C code.
27# This pragmatic tool is incomplete and yields some false positives.
28# Still it should be useful for detecting most typical glitches.
29#
30# options:
31#  -l | --sloppy-len     increase accepted max line length from 80 to 84
32#  -l | --sloppy-bodylen do not report function body length > 200
33#  -s | --sloppy-space   do not report whitespace nits
34#  -c | --sloppy-comment do not report indentation of comments
35#                        Otherwise for each multi-line comment the indentation of
36#                        its lines is checked for consistency. For each comment
37#                        that does not begin to the right of normal code its
38#                        indentation must be as for normal code, while in case it
39#                        also has no normal code to its right it is considered to
40#                        refer to the following line and may be indented equally.
41#  -m | --sloppy-macro   allow missing extra indentation of macro bodies
42#  -h | --sloppy-hang    when checking hanging indentation, do not report
43#                        * same indentation as on line before
44#                        * same indentation as non-hanging indent level
45#                        * indentation moved left (not beyond non-hanging indent)
46#                          just to fit contents within the line length limit
47#  -e | --eol-comment    report needless intermediate multiple consecutive spaces also before end-of-line comments
48#  -1 | --1-stmt         do more aggressive checks for { 1 stmt } - see below
49#
50# There are non-trivial false positives and negatives such as the following.
51#
52# * When a line contains several issues of the same kind only one is reported.
53#
54# * When a line contains more than one statement this is (correctly) reported
55#   but in some situations the indentation checks for subsequent lines go wrong.
56#
57# * There is the special OpenSSL rule not to unnecessarily use braces around
58#   single statements:
59#   {
60#       stmt;
61#   }
62#   except within if ... else constructs where some branch contains more than one
63#   statement. Since the exception is hard to recognize when such branches occur
64#   after the current position (such that false positives would be reported)
65#   the tool by checks for this rule by defaul only for do/while/for bodies.
66#   Yet with the --1-stmt option false positives are preferred over negatives.
67#   False negatives occur if the braces are more than two non-empty lines apart.
68#
69# * The presence of multiple consecutive spaces is regarded a coding style nit
70#   except when this is before end-of-line comments (unless the --eol-comment is given) and
71#   except when done in order to align certain columns over multiple lines, e.g.:
72#   # define AB  1
73#   # define CDE 22
74#   # define F   3333
75#   This pattern is recognized - and consequently extra space not reported -
76#   for a given line if in the nonempty line before or after (if existing)
77#   for each occurrence of "  \S" (where \S means non-space) in the given line
78#   there is " \S" in the other line in the respective column position.
79#   This may lead to both false negatives (in case of coincidental " \S")
80#   and false positives (in case of more complex multi-column alignment).
81#
82# * When just part of control structures depend on #if(n)(def), which can be
83#   considered bad programming style, indentation false positives occur, e.g.:
84#   #if X
85#       if (1) /* bad style */
86#   #else
87#       if (2) /* bad style resulting in false positive */
88#   #endif
89#           c; /* resulting further false positive */
90
91use strict;
92# use List::Util qw[min max];
93use POSIX;
94
95use constant INDENT_LEVEL => 4;
96use constant MAX_LINE_LENGTH => 80;
97use constant MAX_BODY_LENGTH => 200;
98
99# global variables @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
100
101# command-line options
102my $max_length = MAX_LINE_LENGTH;
103my $sloppy_bodylen = 0;
104my $sloppy_SPC = 0;
105my $sloppy_hang = 0;
106my $sloppy_cmt = 0;
107my $sloppy_macro = 0;
108my $eol_cmt = 0;
109my $extended_1_stmt = 0;
110
111while ($ARGV[0] =~ m/^-(\w|-[\w\-]+)$/) {
112    my $arg = $1; shift;
113    if ($arg =~ m/^(l|-sloppy-len)$/) {
114        $max_length += INDENT_LEVEL;
115    } elsif ($arg =~ m/^(b|-sloppy-bodylen)$/) {
116        $sloppy_bodylen = 1;
117    } elsif ($arg =~ m/^(s|-sloppy-space)$/) {
118        $sloppy_SPC= 1;
119    } elsif ($arg =~ m/^(c|-sloppy-comment)$/) {
120        $sloppy_cmt = 1;
121    } elsif ($arg =~ m/^(m|-sloppy-macro)$/) {
122        $sloppy_macro = 1;
123    } elsif ($arg =~ m/^(h|-sloppy-hang)$/) {
124        $sloppy_hang = 1;
125    } elsif ($arg =~ m/^(e|-eol-comment)$/) {
126        $eol_cmt = 1;
127    } elsif ($arg =~ m/^(1|-1-stmt)$/) {
128        $extended_1_stmt = 1;
129    } else {
130        die("unknown option: -$arg");
131    }
132}
133
134# status variables
135my $self_test;             # whether the current input file is regarded to contain (positive/negative) self-tests
136my $line;                  # current line number
137my $line_before;           # number of previous not essentially empty line (containing at most whitespace and '\')
138my $line_before2;          # number of not essentially empty line before previous not essentially empty line
139my $contents;              # contents of current line (without blinding)
140#  $_                      # current line, where comments etc. get blinded
141my $contents_before;       # contents of $line_before (without blinding), if $line_before > 0
142my $contents_before_;      # contents of $line_before after blinding comments etc., if $line_before > 0
143my $contents_before2;      # contents of $line_before2  (without blinding), if $line_before2 > 0
144my $contents_before_2;     # contents of $line_before2 after blinding comments etc., if $line_before2 > 0
145my $in_multiline_string;   # line starts within multi-line string literal
146my $count;                 # -1 or number of leading whitespace characters (except newline) in current line,
147                           # which should be $block_indent + $hanging_offset + $local_offset or $expr_indent
148my $count_before;          # number of leading whitespace characters (except line ending chars) in $contents_before
149my $has_label;             # current line contains label
150my $local_offset;          # current extra indent due to label, switch case/default, or leading closing brace(s)
151my $line_body_start;       # number of line where last function body started, or 0
152my $line_function_start;   # number of line where last function definition started, used if $line_body_start != 0
153my $last_function_header;  # header containing name of last function defined, used if $line_function_start != 0
154my $line_opening_brace;    # number of previous line with opening brace after do/while/for, optionally for if/else
155
156my $keyword_opening_brace; # name of previous keyword, used if $line_opening_brace != 0
157my $ifdef__cplusplus;      # line before contained '#ifdef __cplusplus' (used in header files)
158my $block_indent;          # currently required normal indentation at block/statement level
159my $hanging_offset;        # extra indent, which may be nested, for just one hanging statement or expr or typedef
160my @in_do_hanging_offsets; # stack of hanging offsets for nested 'do' ... 'while'
161my @in_if_hanging_offsets; # stack of hanging offsets for nested 'if' (but not its potential 'else' branch)
162my $if_maybe_terminated;   # 'if' ends and $hanging_offset should be reset unless the next line starts with 'else'
163my @nested_block_indents;  # stack of indentations at block/statement level, needed due to hanging statements
164my @nested_hanging_offsets;# stack of nested $hanging_offset values, in parallel to @nested_block_indents
165my @nested_in_typedecl;    # stack of nested $in_typedecl values, partly in parallel to @nested_block_indents
166my @nested_indents;        # stack of hanging indents due to parentheses, braces, brackets, or conditionals
167my @nested_symbols;        # stack of hanging symbols '(', '{', '[', or '?', in parallel to @nested_indents
168my @nested_conds_indents;  # stack of hanging indents due to conditionals ('?' ... ':')
169my $expr_indent;           # resulting hanging indent within (multi-line) expressions including type exprs, else 0
170my $hanging_symbol;        # character ('(', '{', '[', not: '?') responsible for $expr_indent, if $expr_indent != 0
171my $in_expr;               # in expression after if/while/for/switch/return/enum/LHS of assignment
172my $in_paren_expr;         # in parenthesized if/while/for condition and switch expression, if $expr_indent != 0
173my $in_typedecl;           # nesting level of typedef/struct/union/enum
174my $in_directive;          # number of lines so far within preprocessor directive, e.g., macro definition
175my $directive_nesting;     # currently required indentation of preprocessor directive according to #if(n)(def)
176my $directive_offset;      # indent offset within multi-line preprocessor directive, if $in_directive > 0
177my $in_macro_header;       # number of open parentheses + 1 in (multi-line) header of #define, if $in_directive > 0
178my $in_comment;            # number of lines so far within multi-line comment, or < 0 when end is on current line
179my $leading_comment;       # multi-line comment has no code before its beginning delimiter
180my $formatted_comment;     # multi-line comment beginning with "/*-", which indicates/allows special formatting
181my $comment_indent;        # comment indent, if $in_comment != 0
182my $num_reports_line = 0;  # number of issues found on current line
183my $num_reports = 0;       # total number of issues found
184my $num_indent_reports = 0;# total number of indentation issues found
185my $num_nesting_issues = 0;# total number of directive nesting issues found
186my $num_syntax_issues = 0; # total number of syntax issues found during sanity checks
187my $num_SPC_reports = 0;   # total number of whitespace issues found
188my $num_length_reports = 0;# total number of line length issues found
189
190sub reset_file_state {
191    $line = 0;
192    $line_before = 0;
193    $line_before2 = 0;
194    @nested_block_indents = ();
195    @nested_hanging_offsets = ();
196    @nested_in_typedecl = ();
197    @nested_symbols = ();
198    @nested_indents = ();
199    @nested_conds_indents = ();
200    $expr_indent = 0;
201    $in_paren_expr = 0;
202    $in_expr = 0;
203    $hanging_offset = 0;
204    @in_do_hanging_offsets = ();
205    @in_if_hanging_offsets = ();
206    $if_maybe_terminated = 0;
207    $block_indent = 0;
208    $ifdef__cplusplus = 0;
209    $in_multiline_string = 0;
210    $line_body_start = 0;
211    $line_opening_brace = 0;
212    $in_typedecl = 0;
213    $in_directive = 0;
214    $directive_nesting = 0;
215    $in_comment = 0;
216}
217
218# auxiliary submodules @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
219
220sub report_flexibly {
221    my $line = shift;
222    my $msg = shift;
223    my $contents = shift;
224    my $report_SPC = $msg =~ /space/;
225    return if $report_SPC && $sloppy_SPC;
226
227    print "$ARGV:$line:$msg:$contents" unless $self_test;
228    $num_reports_line++;
229    $num_reports++;
230    $num_indent_reports++ if $msg =~ m/indent/;
231    $num_nesting_issues++ if $msg =~ m/directive nesting/;
232    $num_syntax_issues++  if $msg =~ m/unclosed|unexpected/;
233    $num_SPC_reports++    if $report_SPC;
234    $num_length_reports++ if $msg =~ m/length/;
235}
236
237sub report {
238    my $msg = shift;
239    report_flexibly($line, $msg, $contents);
240}
241
242sub parens_balance { # count balance of opening parentheses - closing parentheses
243    my $str = shift;
244    return $str =~ tr/\(// - $str =~ tr/\)//;
245}
246
247sub blind_nonspace { # blind non-space text of comment as @, preserving length and spaces
248    # the @ character is used because it cannot occur in normal program code so there is no confusion
249    # comment text is not blinded to whitespace in order to be able to check extra SPC also in comments
250    my $comment_text = shift;
251    $comment_text =~ s/([\.\?\!])\s\s/$1. /g; # in extra SPC checks allow one extra SPC after period '.', '?', or '!' in comments
252    return $comment_text =~ tr/ /@/cr;
253}
254
255# submodule for indentation checking/reporting @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
256
257sub check_indent { # used for lines outside multi-line string literals
258    my $stmt_indent = $block_indent + $hanging_offset + $local_offset;
259    $stmt_indent = 0 if $stmt_indent < 0; # TODO maybe give warning/error
260    my $stmt_desc = $contents =~
261        m/^\s*\/\*/ ? "intra-line comment" :
262        $has_label ? "label" :
263        ($hanging_offset != 0 ? "hanging " : "").
264        ($hanging_offset != 0 ? "stmt/expr" : "stmt/decl"); # $in_typedecl is not fully to the point here
265    my ($ref_desc, $ref_indent) = $expr_indent == 0 ? ($stmt_desc, $stmt_indent)
266                                                    : ("hanging '$hanging_symbol'", $expr_indent);
267    my ($alt_desc, $alt_indent) = ("", $ref_indent);
268
269    # allow indent 1 for labels - this cannot happen for leading ':'
270    ($alt_desc, $alt_indent) = ("outermost position", 1) if $expr_indent == 0 && $has_label;
271
272    if (@nested_conds_indents != 0 && substr($_, $count, 1) eq ":") {
273        # leading ':' within stmt/expr/decl - this cannot happen for labels, leading '&&', or leading '||'
274        # allow special indent at level of corresponding "?"
275        ($alt_desc, $alt_indent) = ("leading ':'", @nested_conds_indents[-1]);
276    }
277    # allow extra indent offset leading '&&' or '||' - this cannot happen for leading ":"
278    ($alt_desc, $alt_indent) = ("leading '$1'", $ref_indent + INDENT_LEVEL) if $contents =~ m/^[\s@]*(\&\&|\|\|)/;
279
280    if ($expr_indent < 0) { # implies @nested_symbols != 0 && @nested_symbols[0] eq "{" && @nested_indents[-1] < 0
281        # allow normal stmt indentation level for hanging initializer/enum expressions after trailing '{'
282        # this cannot happen for labels and overrides special treatment of ':', '&&' and '||' for this line
283        ($alt_desc, $alt_indent) = ("lines after '{'", $stmt_indent);
284        # decide depending on current actual indentation, preventing forth and back
285        @nested_indents[-1] = $count == $stmt_indent ? $stmt_indent : -@nested_indents[-1]; # allow $stmt_indent
286        $ref_indent = $expr_indent = @nested_indents[-1];
287    }
288
289    # check consistency of indentation within multi-line comment (i.e., between its first, inner, and last lines)
290    if ($in_comment != 0 && $in_comment != 1) { # in multi-line comment but not on its first line
291        if (!$sloppy_cmt) {
292            if ($in_comment > 0) { # not at its end
293                report("indent = $count != $comment_indent within multi-line comment")
294                    if $count != $comment_indent;
295            } else {
296                my $tweak = $in_comment == -2 ? 1 : 0;
297                report("indent = ".($count + $tweak)." != $comment_indent at end of multi-line comment")
298                    if $count + $tweak != $comment_indent;
299            }
300        }
301        # do not check indentation of last line of non-leading multi-line comment
302        if ($in_comment < 0 && !$leading_comment) {
303            s/^(\s*)@/$1*/; # blind first '@' as '*' to prevent below delayed check for the line before
304            return;
305        }
306        return if $in_comment > 0; # not on its last line
307        # $comment_indent will be checked by the below checks for end of multi-line comment
308    }
309
310    # else check indentation of entire-line comment or entire-line end of multi-line comment
311    # ... w.r.t. indent of the following line by delayed check for the line before
312    if (($in_comment == 0 || $in_comment == 1) # no comment, intra-line comment, or begin of multi-line comment
313        && $line_before > 0 # there is a line before
314        && $contents_before_ =~ m/^(\s*)@[\s@]*$/) { # line before begins with '@', no code follows (except '\')
315        report_flexibly($line_before, "entire-line comment indent = $count_before != $count (of following line)",
316            $contents_before) if !$sloppy_cmt && $count_before != $count;
317    }
318    # ... but allow normal indentation for the current line, else above check will be done for the line before
319    if (($in_comment == 0 || $in_comment < 0) # (no commment,) intra-line comment or end of multi-line comment
320        && m/^(\s*)@[\s@]*$/) { # line begins with '@', no code follows (except '\')
321        if ($count == $ref_indent) { # indentation is like for (normal) code in this line
322            s/^(\s*)@/$1*/; # blind first '@' as '*' to prevent above delayed check for the line before
323            return;
324        }
325        return if !eof; # defer check of entire-line comment to next line
326    }
327
328    # else check indentation of leading intra-line comment or end of multi-line comment
329    if (m/^(\s*)@/) { # line begins with '@', i.e., any (remaining type of) comment
330        if (!$sloppy_cmt && $count != $ref_indent) {
331            report("intra-line comment indent = $count != $ref_indent") if $in_comment == 0;
332            report("multi-line comment indent = $count != $ref_indent") if $in_comment < 0;
333        }
334        return;
335    }
336
337    if ($sloppy_hang && ($hanging_offset != 0 || $expr_indent != 0)) {
338        # do not report same indentation as on the line before (potentially due to same violations)
339        return if $line_before > 0 && $count == $count_before;
340
341        # do not report indentation at normal indentation level while hanging expression indent would be required
342        return if $expr_indent != 0 && $count == $stmt_indent;
343
344        # do not report if contents have been shifted left of nested expr indent (but not as far as stmt indent)
345        # apparently aligned to the right in order to fit within line length limit
346        return if $stmt_indent < $count && $count < $expr_indent &&
347            length($contents) == MAX_LINE_LENGTH + length("\n");
348    }
349
350    report("indent = $count != $ref_indent for $ref_desc".
351           ($alt_desc eq ""
352            || $alt_indent == $ref_indent # prevent showing alternative that happens to have equal value
353            ? "" : " or $alt_indent for $alt_desc"))
354        if $count != $ref_indent && $count != $alt_indent;
355}
356
357# submodules handling indentation within expressions @@@@@@@@@@@@@@@@@@@@@@@@@@@
358
359sub update_nested_indents { # may reset $in_paren_expr and in this case also resets $in_expr
360    my $str = shift;
361    my $start = shift; # defaults to 0
362    my $terminator_position = -1;
363    for (my $i = $start; $i < length($str); $i++) {
364        my $c;
365        my $curr = substr($str, $i);
366        if ($curr =~ m/^(.*?)([{}()?:;\[\]])(.*)$/) { # match from position $i the first {}()?:;[]
367            $c = $2;
368        } else {
369            last;
370        }
371        my ($head, $tail) = (substr($str, 0, $i).$1, $3);
372        $i += length($1) + length($2) - 1;
373
374        # stop at terminator outside 'for(..;..;..)', assuming that 'for' is followed by '('
375        return $i if $c eq ";" && (!$in_paren_expr || @nested_indents == 0);
376
377        my $in_stmt = $in_expr || @nested_symbols != 0; # not: || $in_typedecl != 0
378        if ($c =~ m/[{([?]/) { # $c is '{', '(', '[', or '?'
379            if ($c eq "{") { # '{' in any context
380                # cancel newly hanging_offset if opening brace '{' is after non-whitespace non-comment:
381                $hanging_offset -= INDENT_LEVEL if $hanging_offset > 0 && $head =~ m/[^\s\@]/;
382                push @nested_block_indents, $block_indent;
383                push @nested_hanging_offsets, $in_expr ? $hanging_offset : 0;
384                push @nested_in_typedecl, $in_typedecl if $in_typedecl != 0;
385                $block_indent += INDENT_LEVEL + $hanging_offset;
386                $hanging_offset = 0;
387            }
388            if ($c ne "{" || $in_stmt) { # for '{' inside stmt/expr (not: decl), for '(', '[', or '?' anywhere
389                $tail =~ m/^([\s@]*)([^\s\@])/;
390                push @nested_indents, defined $2
391                    ? $i + 1 + length($1) # actual indentation of following non-space non-comment
392                    : $c ne "{" ? +($i + 1)  # just after '(' or '[' if only whitespace thereafter
393                                : -($i + 1); # allow also $stmt_indent if '{' with only whitespace thereafter
394                push @nested_symbols, $c; # done also for '?' to be able to check correct nesting
395                push @nested_conds_indents, $i if $c eq "?"; # remember special alternative indent for ':'
396            }
397        } elsif ($c =~ m/[})\]:]/) { # $c is '}', ')', ']', or ':'
398            my $opening_c = ($c =~ tr/})]:/{([/r);
399            if (($c ne ":" || $in_stmt    # ignore ':' outside stmt/expr/decl
400                # in the presence of ':', one could add this sanity check:
401                # && !(# ':' after initial label/case/default
402                #      $head =~ m/^([\s@]*)(case\W.*$|\w+$)/ || # this matching would not work for
403                #                                               # multi-line expr after 'case'
404                #      # bitfield length within unsigned type decl
405                #      $tail =~ m/^[\s@]*\d+/                   # this matching would need improvement
406                #     )
407                )) {
408                if ($c ne "}" || $in_stmt) { # for '}' inside stmt/expr/decl, ')', ']', or ':'
409                    if (@nested_symbols != 0 &&
410                        @nested_symbols[-1] == $opening_c) { # for $c there was a corresponding $opening_c
411                        pop @nested_indents;
412                        pop @nested_symbols;
413                        pop @nested_conds_indents if $opening_c eq "?";
414                    } else {
415                        report("unexpected '$c' @ ".($in_paren_expr ? "(expr)" : "expr"));
416                        next;
417                    }
418                }
419                if ($c eq "}") { # '}' at block level but also inside stmt/expr/decl
420                    if (@nested_block_indents == 0) {
421                        report("unexpected '}'");
422                    } else {
423                        $block_indent = pop @nested_block_indents;
424                        $hanging_offset = pop @nested_hanging_offsets;
425                        $in_typedecl = pop @nested_in_typedecl if @nested_in_typedecl != 0;
426                    }
427                }
428                if ($in_paren_expr && !grep(/\(/, @nested_symbols)) { # end of (expr)
429                    check_nested_nonblock_indents("(expr)");
430                    $in_paren_expr = $in_expr = 0;
431                    report("code after (expr)")
432                        if $tail =~ m/^([^{]*)/ && $1 =~ m/[^\s\@;]/; # non-space non-';' before any '{'
433                }
434            }
435        }
436    }
437    return -1;
438}
439
440sub check_nested_nonblock_indents {
441    my $position = shift;
442    while (@nested_symbols != 0) {
443        my $symbol = pop @nested_symbols;
444        report("unclosed '$symbol' in $position");
445        if ($symbol eq "{") { # repair stack of blocks
446            $block_indent = pop @nested_block_indents;
447            $hanging_offset = pop @nested_hanging_offsets;
448            $in_typedecl = pop @nested_in_typedecl if @nested_in_typedecl != 0;
449        }
450    }
451    @nested_indents = ();
452    @nested_conds_indents = ();
453}
454
455# start of main program @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
456
457reset_file_state();
458
459while (<>) { # loop over all lines of all input files
460    $self_test = $ARGV =~ m/check-format-test/;
461    $line++;
462    s/\r$//; # strip any trailing CR '\r' (which are typical on Windows systems)
463    $contents = $_;
464
465    # check for illegal characters
466    if (m/(.*?)([\x00-\x09\x0B-\x1F\x7F-\xFF])/) {
467        my $col = length($1);
468        report(($2 eq "\x09" ? "TAB" : $2 eq "\x0D" ? "CR " : $2 =~ m/[\x00-\x1F]/ ? "non-printable"
469                : "non-7bit char") . " at column $col") ;
470    }
471
472    # check for whitespace at EOL
473    report("trailing whitespace at EOL") if m/\s\n$/;
474
475    # assign to $count the actual indentation level of the current line
476    chomp; # remove trailing NL '\n'
477    m/^(\s*)/;
478    $count = length($1); # actual indentation
479    $has_label = 0;
480    $local_offset = 0;
481
482    # character/string literals @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
483
484    s/\\["']/@@/g; # blind all '"' and "'" escaped by '\' (typically within character literals or string literals)
485
486    # handle multi-line string literals to avoid confusion on starting/ending '"' and trailing '\'
487    if ($in_multiline_string) {
488        if (s#^([^"]*)"#($1 =~ tr/"/@/cr).'@'#e) { # string literal terminated by '"'
489            # string contents and its terminating '"' have been blinded as '@'
490            $count = -1; # do not check indentation
491        } else {
492            report("multi-line string literal not terminated by '\"' and trailing '\' is missing")
493                unless s#^([^\\]*)\s*\\\s*$#$1#; # strip trailing '\' plus any whitespace around
494            goto LINE_FINISHED;
495        }
496    }
497
498    # blind contents of character and string literals as @, preserving length (but not spaces)
499    # this prevents confusing any of the matching below, e.g., of whitespace and comment delimiters
500    s#('[^']*')#$1 =~ tr/'/@/cr#eg; # handle all intra-line character literals
501    s#("[^"]*")#$1 =~ tr/"/@/cr#eg; # handle all intra-line string literals
502    $in_multiline_string =          # handle trailing string literal terminated by '\'
503        s#^(([^"]*"[^"]*")*[^"]*)("[^"]*)\\(\s*)$#$1.($3 =~ tr/"/@/cr).'"'.$4#e;
504        # its contents have been blinded and the trailing '\' replaced by '"'
505
506    # strip any other trailing '\' along with any whitespace around it such that it does not interfere with various
507    # matching below; the later handling of multi-line macro definitions uses $contents where it is not stripped
508    s#^(.*?)\s*\\\s*$#$1#; # trailing '\' possibly preceded and/or followed by whitespace
509
510    # comments @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
511
512    # do/prepare checks within multi-line comments
513    my $self_test_exception = $self_test ? "@" : "";
514    if ($in_comment > 0) { # this still includes the last line of multi-line commment
515        my ($head, $any_symbol, $cmt_text) = m/^(\s*)(.?)(.*)$/;
516        if ($any_symbol eq "*") {
517            report("no space after leading '*' in multi-line comment") if $cmt_text =~ m|^[^/\s$self_test_exception]|;
518        } else {
519            report("no leading '*' in multi-line comment");
520        }
521        $in_comment++;
522    }
523
524    # detect end of comment, must be within multi-line comment, check if it is preceded by non-whitespace text
525    if ((my ($head, $tail) = m|^(.*?)\*/(.*)$|) && $1 ne '/') { # ending comment: '*/'
526        report("neither space nor '*' before '*/'") if $head =~ m/[^*\s]$/;
527        report("no space after '*/'") if $tail =~ m/^[^\s,;)}\]]/; # no space or ,;)}] after '*/'
528        if (!($head =~ m|/\*|)) { # not begin of comment '/*', which is is handled below
529            if ($in_comment == 0) {
530                report("unexpected '*/' outside comment");
531                $_ = "$head@@".$tail; # blind the "*/"
532            } else {
533                report("text before '*/' in multi-line comment") if ($head =~ m/\S/); # non-SPC before '*/'
534                $in_comment = -1; # indicate that multi-line comment ends on current line
535                if ($count > 0) {
536                    # make indentation of end of multi-line comment appear like of leading intra-line comment
537                    $head =~ s/^(\s*)\s/$1@/; # replace the last leading space by '@'
538                    $count--;
539                    $in_comment = -2; # indicate that multi-line comment ends on current line, with tweak
540                }
541                my $cmt_text = $head;
542                $_ = blind_nonspace($cmt_text)."@@".$tail;
543            }
544        }
545    }
546
547    # detect begin of comment, check if it is followed by non-space text
548  MATCH_COMMENT:
549    if (my ($head, $opt_minus, $tail) = m|^(.*?)/\*(-?)(.*)$|) { # begin of comment: '/*'
550        report("no space before '/*'")
551            if $head =~ m/[^\s(\*]$/; # not space, '(', or or '*' (needed to allow '*/') before comment delimiter
552        report("neither space nor '*' after '/*' or '/*-'") if $tail =~ m/^[^\s*$self_test_exception]/;
553        my $cmt_text = $opt_minus.$tail; # preliminary
554        if ($in_comment > 0) {
555            report("unexpected '/*' inside multi-line comment");
556        } elsif ($tail =~ m|^(.*?)\*/(.*)$|) { # comment end: */ on same line
557            report("unexpected '/*' inside intra-line comment") if $1 =~ /\/\*/;
558            # blind comment text, preserving length and spaces
559            ($cmt_text, my $rest) = ($opt_minus.$1, $2);
560            $_ = "$head@@".blind_nonspace($cmt_text)."@@".$rest;
561            goto MATCH_COMMENT;
562        } else { # begin of multi-line comment
563            my $self_test_exception = $self_test ? "(@\d?)?" : "";
564            report("text after '/*' in multi-line comment")
565                unless $tail =~ m/^$self_test_exception.?\s*$/;
566            # tail not essentially empty, first char already checked
567            # adapt to actual indentation of first line
568            $comment_indent = length($head) + 1;
569            $_ = "$head@@".blind_nonspace($cmt_text);
570            $in_comment = 1;
571            $leading_comment = $head =~ m/^\s*$/; # there is code before beginning delimiter
572            $formatted_comment = $opt_minus eq "-";
573        }
574    }
575
576    if ($in_comment > 1) { # still inside multi-line comment (not at its begin or end)
577        m/^(\s*)\*?(\s*)(.*)$/;
578        $_ = $1."@".$2.blind_nonspace($3);
579    }
580
581    # handle special case of line after '#ifdef __cplusplus' (which typically appears in header files)
582    if ($ifdef__cplusplus) {
583        $ifdef__cplusplus = 0;
584        $_ = "$1 $2" if $contents =~ m/^(\s*extern\s*"C"\s*)\{(\s*)$/; # ignore opening brace in 'extern "C" {'
585        goto LINE_FINISHED if m/^\s*\}\s*$/; # ignore closing brace '}'
586    }
587
588    # check for over-long lines,
589    # while allowing trailing (also multi-line) string literals to go past $max_length
590    my $len = length; # total line length (without trailing '\n')
591    if ($len > $max_length &&
592        !(m/^(.*)"[^"]*"\s*[\)\}\]]*[,;]?\s*$/ # string literal terminated by '"' (or '\'), then maybe )}],;
593          && length($1) < $max_length)
594        # this allows over-long trailing string literals with beginning col before $max_length
595        ) {
596        report("line length = $len > ".MAX_LINE_LENGTH);
597    }
598
599    # handle C++ / C99 - style end-of-line comments
600    if (my ($head, $cmt_text) = m|^(.*?)//(.*$)|) {
601        report("'//' end-of-line comment");  # the '//' comment style is not allowed for C90
602        # blind comment text, preserving length and spaces
603        $_ = "$head@@".blind_nonspace($cmt_text);
604    }
605
606    # at this point all non-space portions of any types of comments have been blinded as @
607
608    goto LINE_FINISHED if m/^\s*$/; # essentially empty line: just whitespace (and maybe a trailing '\')
609
610    # intra-line whitespace nits @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
611
612    my $in_multiline_comment = ($in_comment > 1 || $in_comment < 0); # $in_multiline_comment refers to line before
613    if (!$sloppy_SPC && !($in_multiline_comment && $formatted_comment)) {
614        sub extra_SPC {
615            my $intra_line = shift;
616            return "extra space".($intra_line =~ m/@\s\s/ ?
617                                  $in_comment != 0 ? " in multi-line comment"
618                                                   : " in intra-line comment" : "");
619        }
620        sub split_line_head { # split line contents into header containing leading spaces and the first non-space char, and the rest of the line
621            my $comment_symbol =
622                $in_comment != 0 ? "@" : ""; # '@' will match the blinded leading '*' in multi-line comment
623                                             # $in_comment may pertain to the following line due to delayed check
624            # do not check for extra SPC in leading spaces including any '#' (or '*' within multi-line comment)
625            shift =~ m/^(\s*([#$comment_symbol]\s*)?)(.*?)\s*$/;
626            return ($1, $3);
627        }
628        my ($head , $intra_line ) = split_line_head($_);
629        my ($head1, $intra_line1) = split_line_head($contents_before_ ) if $line_before > 0;
630        my ($head2, $intra_line2) = split_line_head($contents_before_2) if $line_before2 > 0;
631        if ($line_before > 0) { # check with one line delay, such that at least $contents_before is available
632            sub column_alignments_only { # return 1 if the given line has multiple consecutive spaces only at columns that match the reference line
633                # all parameter strings are assumed to contain contents after blinding comments etc.
634                my $head = shift;     # leading spaces and the first non-space char
635                my $intra = shift;    # the rest of the line contents
636                my $contents = shift; # reference line
637                # check if all extra SPC in $intra is used only for multi-line column alignment with $contents
638                my $offset = length($head);
639                for (my $col = 0; $col < length($intra) - 2; $col++) {
640                    my $substr = substr($intra, $col);
641                    next unless $substr =~ m/^\s\s\S/; # extra SPC (but not in leading spaces of the line)
642                    next if !$eol_cmt && $substr =~ m/^[@\s]+$/; # end-of-line comment
643                    return 0 unless substr($contents, $col + $offset + 1, 2) =~ m/\s\S/; # reference line contents do not match
644                }
645                return 1;
646            }
647            report_flexibly($line_before, extra_SPC($intra_line1), $contents_before) if $intra_line1 =~ m/\s\s\S/ &&
648               !(    column_alignments_only($head1, $intra_line1, $_                )    # compare with $line
649                 || ($line_before2 > 0 &&
650                     column_alignments_only($head1, $intra_line1, $contents_before_2))); # compare w/ $line_before2
651            report(extra_SPC($intra_line)) if $intra_line  =~ m/\s\s\S/ && eof
652                && ! column_alignments_only($head , $intra_line , $contents_before_ )  ; # compare w/ $line_before
653        } elsif (eof) { # special case: just one line exists
654            report(extra_SPC($intra_line)) if $intra_line  =~ m/\s\s\S/;
655        }
656        # ignore paths in #include
657        $intra_line =~ s/^(include\s*)(".*?"|<.*?>)/$1/e if $head =~ m/#/;
658        # treat op= and comparison operators as simple '=', simplifying matching below
659        $intra_line =~ s/([\+\-\*\/\/%\&\|\^\!<>=]|<<|>>)=/=/g;
660        # treat (type) variables within macro, indicated by trailing '\', as 'int' simplifying matching below
661        $intra_line =~ s/[A-Z_]+/int/g if $contents =~ m/^(.*?)\s*\\\s*$/;
662        # treat double &&, ||, <<, and >> as single ones, simplifying matching below
663        $intra_line =~ s/(&&|\|\||<<|>>)/substr($1, 0, 1)/eg;
664        # remove blinded comments etc. directly after [{(
665        while ($intra_line =~ s/([\[\{\(])@+\s?/$1/e) {} # /g does not work here
666        # remove blinded comments etc. directly before ,;)}]
667        while ($intra_line =~ s/\s?@+([,;\)\}\]])/$1/e) {} # /g does not work here
668        # treat remaining blinded comments and string literal contents as (single) space during matching below
669        $intra_line =~ s/@+/ /g;                     # note that extra SPC has already been handled above
670        $intra_line =~ s/\s+$//;                     # strip any (resulting) space at EOL
671        $intra_line =~ s/(for\s*\([^;]*);;(\))/"$1$2"/eg; # strip trailing ';;' in for (;;)
672        $intra_line =~ s/(for\s*\([^;]+;[^;]+);(\))/"$1$2"/eg; # strip trailing ';' in for (;;)
673        $intra_line =~ s/(=\s*)\{ /"$1@ "/eg;        # do not report {SPC in initializers such as ' = { 0, };'
674        $intra_line =~ s/, \};/, @;/g;               # do not report SPC} in initializers such as ' = { 0, };'
675        report("space before '$1'") if $intra_line =~ m/[\w)\]]\s+(\+\+|--)/;  # postfix ++/-- with preceding space
676        report("space after '$1'")  if $intra_line =~ m/(\+\+|--)\s+[a-zA-Z_(]/; # prefix ++/-- with following space
677        $intra_line =~ s/\.\.\./@/g;                 # blind '...'
678        report("space before '$1'") if $intra_line =~ m/\s(\.|->)/;            # '.' or '->' with preceding space
679        report("space after '$1'")  if $intra_line =~ m/(\.|->)\s/;            # '.' or '->' with following space
680        $intra_line =~ s/\-\>|\+\+|\-\-/@/g;         # blind '->,', '++', and '--'
681        report("space before '$2'")     if $intra_line =~ m/[^:]\s+(;)/;       # space before ';' but not after ':'
682        report("space before '$1'")     if $intra_line =~ m/\s([,)\]])/;       # space before ,)]
683        report("space after '$1'")      if $intra_line =~ m/([(\[~!])\s/;      # space after ([~!
684        report("space after '$1'")      if $intra_line =~ m/(defined)\s/;      # space after 'defined'
685        report("no space before '=' or '<op>='") if $intra_line =~ m/\S(=)/;   # '=' etc. without preceding space
686        report("no space before '$1'")  if $intra_line =~ m/\S([|\/%<>^\?])/;  # |/%<>^? without preceding space
687        # TODO ternary ':' without preceding SPC, while allowing no SPC before ':' after 'case'
688        report("no space before binary '$1'")  if $intra_line =~ m/[^\s{()\[]([+\-])/;# +/- without preceding space or {()[
689                                                                             # or ')' (which is used f type casts)
690        report("no space before binary '$1'")  if $intra_line =~ m/[^\s{()\[*!]([*])/; # '*' without preceding space or {()[*!
691        report("no space before binary '$1'")  if $intra_line =~ m/[^\s{()\[]([&])/;  # '&' without preceding space or {()[
692        report("no space after ternary '$1'") if $intra_line =~ m/(:)[^\s\d]/; # ':' without following space or digit
693        report("no space after '$1'")   if $intra_line =~ m/([,;=|\/%<>^\?])\S/; # ,;=|/%<>^? without following space
694        report("no space after binary '$1'") if $intra_line=~m/[^{(\[]([*])[^\sa-zA-Z_(),*]/;# '*' w/o space or \w(),* after
695        # TODO unary '*' must not be followed by SPC
696        report("no space after binary '$1'") if $intra_line=~m/([&])[^\sa-zA-Z_(]/;  # '&' w/o following space or \w(
697        # TODO unary '&' must not be followed by SPC
698        report("no space after binary '$1'") if $intra_line=~m/[^{(\[]([+\-])[^\s\d(]/;  # +/- w/o following space or \d(
699        # TODO unary '+' and '-' must not be followed by SPC
700        report("no space after '$2'")   if $intra_line =~ m/(^|\W)(if|while|for|switch|case)[^\w\s]/; # kw w/o SPC
701        report("no space after '$2'")   if $intra_line =~ m/(^|\W)(return)[^\w\s;]/;  # return w/o SPC or ';'
702        report("space after function/macro name")
703                                      if $intra_line =~ m/(\w+)\s+\(/        # fn/macro name with space before '('
704       && !($1 =~ m/^(if|while|for|switch|return|typedef|void|char|unsigned|int|long|float|double)$/) # not keyword
705                                    && !(m/^\s*#\s*define\s/); # we skip macro definitions here because macros
706                                    # without parameters but with body beginning with '(', e.g., '#define X (1)',
707                                    # would lead to false positives - TODO also check for macros with parameters
708        report("no space before '{'")   if $intra_line =~ m/[^\s{(\[]\{/;      # '{' without preceding space or {([
709        report("no space after '}'")    if $intra_line =~ m/\}[^\s,;\])}]/;    # '}' without following space or ,;])}
710    }
711
712    # preprocessor directives @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
713
714    # handle preprocessor directives
715    if (m/^\s*#(\s*)(\w+)/) { # line beginning with '#'
716        my $space_count = length($1); # maybe could also use indentation before '#'
717        my $directive = $2;
718        report("indent = $count != 0 for '#'") if $count != 0;
719        $directive_nesting-- if $directive =~ m/^(else|elif|endif)$/;
720        if ($directive_nesting < 0) {
721            $directive_nesting = 0;
722            report("unexpected '#$directive'");
723        }
724        report("'#' directive nesting = $space_count != $directive_nesting") if $space_count != $directive_nesting;
725        $directive_nesting++ if $directive =~ m/^if|ifdef|ifndef|else|elif$/;
726        $ifdef__cplusplus = m/^\s*#\s*ifdef\s+__cplusplus\s*$/;
727        goto POSTPROCESS_DIRECTIVE unless $directive =~ m/^define$/; # skip normal code handling except for #define
728        # TODO improve handling of indents of preprocessor directives ('\', $in_directive != 0) vs. normal C code
729        $count = -1; # do not check indentation of #define
730    }
731
732    # adapt required indentation @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
733
734    s/(\w*ASN1_[A-Z_]+END\w*([^(]|\(.*?\)|$))/$1;/g; # treat *ASN1_*END*(..) macro calls as if followed by ';'
735
736    my $nested_indents_position = 0;
737
738    # update indents according to leading closing brace(s) '}' or label or switch case
739    my $in_stmt = $in_expr || @nested_symbols != 0 || $in_typedecl != 0;
740    if ($in_stmt) { # expr/stmt/type decl/var def/fn hdr, i.e., not at block level
741        if (m/^([\s@]*\})/) { # leading '}', any preceding blinded comment must not be matched
742            my $head = $1;
743            update_nested_indents($head);
744            $nested_indents_position = length($head);
745            if (@nested_symbols >= 1) {
746                $hanging_symbol = @nested_symbols[-1];
747                $expr_indent = @nested_indents[-1];
748            } else { # typically end of initialiizer expr or enum
749                $expr_indent = 0;
750            }
751        } elsif (m/^([\s@]*)(static_)?ASN1_ITEM_TEMPLATE_END(\W|$)/) { # workaround for ASN1 macro indented as '}'
752            $local_offset = -INDENT_LEVEL;
753            $expr_indent = 0;
754        } elsif (m/;.*?\}/) { # expr ends with ';' before '}'
755            report("code before '}'");
756        }
757    }
758    if (@in_do_hanging_offsets != 0 && # note there is nothing like "unexpected 'while'"
759        m/^[\s@]*while(\W|$)/) { # leading 'while'
760        $hanging_offset = pop @in_do_hanging_offsets;
761    }
762    if ($if_maybe_terminated) {
763        if (m/(^|\W)else(\W|$)/) { # (not necessarily leading) 'else'
764            if (@in_if_hanging_offsets == 0) {
765                report("unexpected 'else'");
766            } else {
767                $hanging_offset = pop @in_if_hanging_offsets;
768            }
769        } else {
770            @in_if_hanging_offsets = (); # note there is nothing like "unclosed 'if'"
771            $hanging_offset = 0;
772        }
773    }
774    if (!$in_stmt) { # at block level, i.e., outside expr/stmt/type decl/var def/fn hdr
775        $if_maybe_terminated = 0;
776        if (my ($head, $before, $tail) = m/^([\s@]*([^{}]*)\})[\s@]*(.*)$/) { # leading closing '}', but possibly
777                                                                              # with non-whitespace non-'{' before
778            report("code after '}'") unless $tail eq "" || $tail =~ m/(else|while|OSSL_TRACE_END)(\W|$)/;
779            my $outermost_level = @nested_block_indents == 1 && @nested_block_indents[0] == 0;
780            if (!$sloppy_bodylen && $outermost_level && $line_body_start != 0) {
781                my $body_len = $line - $line_body_start - 1;
782                report_flexibly($line_function_start, "function body length = $body_len > ".MAX_BODY_LENGTH." lines",
783                    $last_function_header) if $body_len > MAX_BODY_LENGTH;
784                $line_body_start = 0;
785            }
786            if ($before ne "") { # non-whitespace non-'{' before '}'
787                report("code before '}'");
788            } else { # leading '}', any preceding blinded comment must not be matched
789                $local_offset = $block_indent + $hanging_offset - INDENT_LEVEL;
790                update_nested_indents($head);
791                $nested_indents_position = length($head);
792                $local_offset -= ($block_indent + $hanging_offset);
793                # in effect $local_offset = -INDENT_LEVEL relative to $block_indent + $hanging_offset values before
794            }
795        }
796
797        # handle opening brace '{' after if/else/while/for/switch/do on line before
798        if ($hanging_offset > 0 && m/^[\s@]*{/ && # leading opening '{'
799            $line_before > 0 &&
800            $contents_before_ =~ m/(^|^.*\W)(if|else|while|for|switch|do)(\W.*$|$)/) {
801            $keyword_opening_brace = $1;
802            $hanging_offset -= INDENT_LEVEL; # cancel newly hanging_offset
803        }
804
805        if (m/^[\s@]*(case|default)(\W.*$|$)/) { # leading 'case' or 'default'
806            my $keyword = $1;
807            report("code after $keyword: ") if $2 =~ /:.*[^\s@].*$/;
808            $local_offset = -INDENT_LEVEL;
809        } else {
810            if (m/^([\s@]*)(\w+):/) { # (leading) label, cannot be "default"
811                $local_offset = -INDENT_LEVEL;
812                $has_label = 1;
813            }
814        }
815    }
816
817    # potential adaptations of indent in first line of macro body in multi-line macro definition
818    if ($in_directive > 0 && $in_macro_header > 0) {
819        if ($in_macro_header > 1) { # still in macro definition header
820            $in_macro_header += parens_balance($_);
821        } else { # begin of macro body
822            $in_macro_header = 0;
823            if ($count == $block_indent - $directive_offset # body began with same indentation as preceding code
824                && $sloppy_macro) { # workaround for this situation is enabled
825                $block_indent -= $directive_offset;
826                $directive_offset = 0;
827            }
828        }
829    }
830
831    # check required indentation @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
832
833    check_indent() if $count >= 0; # not for #define and not if multi-line string literal is continued
834
835    $in_comment = 0 if $in_comment < 0; # multi-line comment has ended
836
837    # do some further checks @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
838
839    my $outermost_level = $block_indent == 0 + ($in_directive > 0 ? $directive_offset : 0);
840
841    report("more than one stmt") if !m/(^|\W)for(\W.*|$)/ && # no 'for' - TODO improve matching
842        m/;.*;/; # two or more terminators ';', so more than one statement
843
844    # check for code block containing a single line/statement
845    if ($line_before2 > 0 && !$outermost_level && # within function body
846        $in_typedecl == 0 && @nested_indents == 0 && # neither within type declaration nor inside stmt/expr
847        m/^[\s@]*\}/) { # leading closing brace '}', any preceding blinded comment must not be matched
848        # TODO extend detection from single-line to potentially multi-line statement
849        if ($line_opening_brace > 0 &&
850            ($line_opening_brace == $line_before2 ||
851             $line_opening_brace == $line_before)
852            && $contents_before =~ m/;/) { # there is at least one terminator ';', so there is some stmt
853            # TODO do not report cases where a further else branch
854            # follows with a block containg more than one line/statement
855            report_flexibly($line_before, "'$keyword_opening_brace' { 1 stmt }", $contents_before);
856        }
857    }
858
859    report("single-letter name '$2'") if (m/(^|.*\W)([IO])(\W.*|$)/); # single-letter name 'I' or 'O' # maybe re-add 'l'?
860    # constant on LHS of comparison or assignment, e.g., NULL != x or 'a' < c, but not a + 1 == b
861    report("constant on LHS of '$2'")
862        if (m/(['"]|([\+\-\*\/\/%\&\|\^<>]\s*)?\W[0-9]+L?|NULL)\s*([\!<>=]=|[<=>][^<>])/ && $2 eq "");
863
864    # TODO report #if 0 and #if 1
865
866    # TODO report empty line within local variable definitions
867
868    # TODO report missing empty line after local variable definitions
869
870    # TODO report needless use of parentheses, while
871    #      macro parameters should always be in parens (except when passed on), e.g., '#define ID(x) (x)'
872
873    # adapt required indentation for following lines @@@@@@@@@@@@@@@@@@@@@@@@@@@
874
875    # set $in_expr, $in_paren_expr, and $hanging_offset for if/while/for/switch, return/enum, and assignment RHS
876    my $paren_expr_start = 0;
877    my $return_enum_start = 0;
878    my $assignment_start = 0;
879    my $tmp = $_;
880    $tmp =~ s/[\!<>=]=/@@/g; # blind (in-)equality symbols like '<=' as '@@' to prevent matching them as '=' below
881    if      (m/^((^|.*\W)(if|while|for|switch))(\W.*|$)$/) { # (last) if/for/while/switch
882        $paren_expr_start = 1;
883    } elsif (m/^((^|.*\W)(return|enum))(\W.*|$)/             # (last) return/enum
884        && !$in_expr && @nested_indents == 0 && parens_balance($1) == 0) { # not nested enum
885        $return_enum_start = 1;
886    } elsif ($tmp =~ m/^(([^=]*)(=))(.*)$/                   # (last) '=', i.e., assignment
887        && !$in_expr && @nested_indents == 0 && parens_balance($1) == 0) { # not nested assignment
888        $assignment_start = 1;
889    }
890    if ($paren_expr_start || $return_enum_start || $assignment_start)
891    {
892        my ($head, $mid, $tail) = ($1, $3, $4);
893        $keyword_opening_brace = $mid if $mid ne "=";
894        # to cope with multi-line expressions, do this also if !($tail =~ m/\{/)
895        push @in_if_hanging_offsets, $hanging_offset if $mid eq "if";
896
897        # already handle $head, i.e., anything before expression
898        update_nested_indents($head, $nested_indents_position);
899        $nested_indents_position = length($head);
900        # now can set $in_expr and $in_paren_expr
901        $in_expr = 1;
902        $in_paren_expr = 1 if $paren_expr_start;
903        if ($mid eq "while" && @in_do_hanging_offsets != 0) {
904            $hanging_offset = pop @in_do_hanging_offsets;
905        } else {
906            $hanging_offset += INDENT_LEVEL; # tentatively set hanging_offset, may be canceled by following '{'
907        }
908    }
909
910    # set $hanging_offset and $keyword_opening_brace for do/else
911    if (my ($head, $mid, $tail) = m/(^|^.*\W)(else|do)(\W.*|$)$/) { # last else/do, where 'do' is preferred
912        my $code_before = $head =~ m/[^\s\@}]/; # leading non-whitespace non-comment non-'}'
913        report("code before '$mid'") if $code_before;
914        report("code after '$mid'" ) if $tail =~ m/[^\s\@{]/# trailing non-whitespace non-comment non-'{' (non-'\')
915                                                    && !($mid eq "else" && $tail =~ m/[\s@]*if(\W|$)/);
916        if ($mid eq "do") { # workarounds for code before 'do'
917            if ($head =~ m/(^|^.*\W)(else)(\W.*$|$)/) { # 'else' ... 'do'
918                $hanging_offset += INDENT_LEVEL; # tentatively set hanging_offset, may be canceled by following '{'
919            }
920            if ($head =~ m/;/) { # terminator ';' ... 'do'
921                @in_if_hanging_offsets = (); # note there is nothing like "unclosed 'if'"
922                $hanging_offset = 0;
923            }
924        }
925        push @in_do_hanging_offsets, $hanging_offset if $mid eq "do";
926        if ($code_before && $mid eq "do") {
927            $hanging_offset = length($head) - $block_indent;
928        }
929        if (!$in_paren_expr) {
930            $keyword_opening_brace = $mid if $tail =~ m/\{/;
931            $hanging_offset += INDENT_LEVEL;
932        }
933    }
934
935    # set $in_typedecl and potentially $hanging_offset for type declaration
936    if (!$in_expr && @nested_indents == 0 # not in expression
937        && m/(^|^.*\W)(typedef|struct|union|enum)(\W.*|$)$/
938        && parens_balance($1) == 0 # not in newly started expression or function arg list
939        && ($2 eq "typedef" || !($3 =~ m/\s*\w++\s*(.)/ && $1 ne "{")) # 'struct'/'union'/'enum' <name> not followed by '{'
940        # not needed: && $keyword_opening_brace = $2 if $3 =~ m/\{/;
941        ) {
942        $in_typedecl++;
943        $hanging_offset += INDENT_LEVEL if m/\*.*\(/; # '*' followed by '(' - seems consistent with Emacs C mode
944    }
945
946    my $bak_in_expr = $in_expr;
947    my $terminator_position = update_nested_indents($_, $nested_indents_position);
948
949    if ($bak_in_expr) {
950        # on end of non-if/while/for/switch (multi-line) expression (i.e., return/enum/assignment) and
951        # on end of statement/type declaration/variable definition/function header
952        if ($terminator_position >= 0 && ($in_typedecl == 0 || @nested_indents == 0)) {
953            check_nested_nonblock_indents("expr");
954            $in_expr = 0;
955        }
956    } else {
957        check_nested_nonblock_indents($in_typedecl == 0 ? "stmt" : "decl") if $terminator_position >= 0;
958    }
959
960    # on ';', which terminates the current statement/type declaration/variable definition/function declaration
961    if ($terminator_position >= 0) {
962        my $tail = substr($_, $terminator_position + 1);
963        if (@in_if_hanging_offsets != 0) {
964            if ($tail =~ m/\s*else(\W|$)/) {
965                pop @in_if_hanging_offsets;
966                $hanging_offset -= INDENT_LEVEL;
967            } elsif ($tail =~ m/[^\s@]/) { # code (not just comment) follows
968                @in_if_hanging_offsets = (); # note there is nothing like "unclosed 'if'"
969                $hanging_offset = 0;
970            } else {
971                $if_maybe_terminated = 1;
972            }
973        } elsif ($tail =~ m/^[\s@]*$/) { # ';' has been trailing, i.e. there is nothing but whitespace and comments
974            $hanging_offset = 0; # reset in case of terminated assignment ('=') etc.
975        }
976        $in_typedecl-- if $in_typedecl != 0 && @nested_in_typedecl == 0; # TODO handle multiple type decls per line
977        m/(;[^;]*)$/; # match last ';'
978        $terminator_position = length($_) - length($1) if $1;
979        # new $terminator_position value may be after the earlier one in case multiple terminators on current line
980        # TODO check treatment in case of multiple terminators on current line
981        update_nested_indents($_, $terminator_position + 1);
982    }
983
984    # set hanging expression indent according to nested indents - TODO maybe do better in update_nested_indents()
985    # also if $in_expr is 0: in statement/type declaration/variable definition/function header
986    $expr_indent = 0;
987    for (my $i = -1; $i >= -@nested_symbols; $i--) {
988        if (@nested_symbols[$i] ne "?") { # conditionals '?' ... ':' are treated specially in check_indent()
989            $hanging_symbol = @nested_symbols[$i];
990            $expr_indent = $nested_indents[$i];
991            # $expr_indent is guaranteed to be != 0 unless @nested_indents contains just outer conditionals
992            last;
993        }
994    }
995
996    # remember line number and header containing name of last function defined for reports w.r.t. MAX_BODY_LENGTH
997    if ($outermost_level && m/(\w+)\s*\(/ && $1 ne "STACK_OF") {
998        $line_function_start = $line;
999        $last_function_header = $contents;
1000    }
1001
1002    # special checks for last, typically trailing opening brace '{' in line
1003    if (my ($head, $tail) = m/^(.*)\{(.*)$/) { # match last ... '{'
1004        if ($in_directive == 0 && !$in_expr && $in_typedecl == 0) {
1005            if ($outermost_level) {
1006                if (!$assignment_start && !$bak_in_expr) {
1007                    # at end of function definition header (or stmt or var definition)
1008                    report("'{' not at beginning") if $head ne "";
1009                    $line_body_start = $contents =~ m/LONG BODY/ ? 0 : $line;
1010                }
1011            } else {
1012                $line_opening_brace = $line if $keyword_opening_brace =~ m/do|while|for/;
1013                # using, not assigning, $keyword_opening_brace here because it could be on an earlier line
1014                $line_opening_brace = $line if $keyword_opening_brace =~ m/if|else/ && $extended_1_stmt &&
1015                # TODO prevent false positives for if/else where braces around single-statement branches
1016                # should be avoided but only if all branches have just single statements
1017                # The following helps detecting the exception when handling multiple 'if ... else' branches:
1018                    !($keyword_opening_brace eq "else" && $line_opening_brace < $line_before2);
1019            }
1020            report("code after '{'") if $tail=~ m/[^\s\@]/ && # trailing non-whitespace non-comment (non-'\')
1021                                      !($tail=~ m/\}/);  # no '}' after last '{'
1022        }
1023    }
1024
1025    # check for opening brace after if/while/for/switch/do not on same line
1026    # note that "no '{' on same line after '} else'" is handled further below
1027    if (/^[\s@]*{/ && # leading '{'
1028        $line_before > 0 && !($contents_before_ =~ m/^\s*#/) && # not preprocessor directive '#if
1029        (my ($head, $mid, $tail) = ($contents_before_ =~ m/(^|^.*\W)(if|while|for|switch|do)(\W.*$|$)/))) {
1030        my $brace_after  = $tail =~ /^[\s@]*{/; # any whitespace or comments then '{'
1031        report("'{' not on same line as preceding '$mid'") if !$brace_after;
1032    }
1033    # check for closing brace on line before 'else' not followed by leading '{'
1034    elsif (my ($head, $tail) = m/(^|^.*\W)else(\W.*$|$)/) {
1035        if (parens_balance($tail) == 0 &&  # avoid false positive due to unfinished expr on current line
1036            !($tail =~ m/{/) && # after 'else' no '{' on same line
1037            !($head =~ m/}[\s@]*$/) && # not: '}' then any whitespace or comments before 'else'
1038            $line_before > 0 && $contents_before_ =~ /}[\s@]*$/) { # trailing '}' on line before
1039            report("no '{' after '} else'");
1040        }
1041    }
1042
1043    # check for closing brace before 'while' not on same line
1044    if (my ($head, $tail) = m/(^|^.*\W)while(\W.*$|$)/) {
1045        my $brace_before = $head =~ m/}[\s@]*$/; # '}' then any whitespace or comments
1046        # possibly 'if (...)' (with potentially inner '(' and ')') then any whitespace or comments then '{'
1047        if (!$brace_before &&
1048            # does not work here: @in_do_hanging_offsets != 0 && #'while' terminates loop
1049            parens_balance($tail) == 0 &&  # avoid false positive due to unfinished expr on current line
1050            $tail =~ /;/ && # 'while' terminates loop (by ';')
1051            $line_before > 0 &&
1052            $contents_before_ =~ /}[\s@]*$/) { # on line before: '}' then any whitespace or comments
1053                report("'while' not on same line as preceding '}'");
1054            }
1055    }
1056
1057    # check for missing brace on same line before or after 'else'
1058    if (my ($head, $tail) = m/(^|^.*\W)else(\W.*$|$)/) {
1059        my $brace_before = $head =~ /}[\s@]*$/; # '}' then any whitespace or comments
1060        my $brace_after  = $tail =~ /^[\s@]*if[\s@]*\(.*\)[\s@]*{|[\s@]*{/;
1061        # possibly 'if (...)' (with potentially inner '(' and ')') then any whitespace or comments then '{'
1062        if (!$brace_before) {
1063            if ($line_before > 0 && $contents_before_ =~ /}[\s@]*$/) {
1064                report("'else' not on same line as preceding '}'");
1065            } elsif (parens_balance($tail) == 0) { # avoid false positive due to unfinished expr on current line
1066                report("no '}' on same line before 'else ... {'") if $brace_after;
1067            }
1068        } elsif (parens_balance($tail) == 0) { # avoid false positive due to unfinished expr on current line
1069            report("no '{' on same line after '} else'") if $brace_before && !$brace_after;
1070        }
1071    }
1072
1073  POSTPROCESS_DIRECTIVE:
1074    # on begin of multi-line preprocessor directive, adapt indent
1075    # need to use original line contents because trailing '\' may have been stripped above
1076    if ($contents =~ m/^(.*?)[\s@]*\\[\s@]*$/) { # trailing '\' (which is not stripped from $contents),
1077        # typically used in macro definitions (or other preprocessor directives)
1078        if ($in_directive == 0) {
1079            $in_macro_header = m/^\s*#\s*define(\W|$)?(.*)/ ? 1 + parens_balance($2) : 0; # '#define' is beginning
1080            $directive_offset = INDENT_LEVEL;
1081            $block_indent += $directive_offset;
1082        }
1083        $in_directive += 1;
1084    }
1085
1086    # post-processing at end of line @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1087
1088  LINE_FINISHED:
1089    # on end of multi-line preprocessor directive, adapt indent
1090    if ($in_directive > 0 &&
1091        # need to use original line contents because trailing \ may have been stripped
1092        !($contents =~ m/^(.*?)[\s@]*\\[\s@]*$/)) { # no trailing '\'
1093        $block_indent -= $directive_offset;
1094        $in_directive = 0;
1095        # macro body typically does not include terminating ';'
1096        $hanging_offset = 0; # compensate for this in case macro ends, e.g., as 'while (0)'
1097    }
1098
1099    if (m/^\s*$/) { # at begin of file essentially empty line: just whitespace (and maybe a '\')
1100            report("leading ".($1 eq "" ? "empty" :"whitespace")." line") if $line == 1 && !$sloppy_SPC;
1101    } else {
1102        if ($line_before > 0) {
1103            my $linediff = $line - $line_before - 1;
1104            report("$linediff empty lines before") if $linediff > 1 && !$sloppy_SPC;
1105        }
1106        $line_before2      = $line_before;
1107        $contents_before2  = $contents_before;
1108        $contents_before_2 = $contents_before_;
1109        $line_before       = $line;
1110        $contents_before   = $contents;
1111        $contents_before_  = $_;
1112        $count_before      = $count;
1113    }
1114
1115    if ($self_test) { # debugging
1116        my $should_report = $contents =~ m/\*@(\d)?/ ? 1 : 0;
1117        $should_report = +$1 if $should_report != 0 && defined $1;
1118        print("$ARGV:$line:$num_reports_line reports on:$contents")
1119            if $num_reports_line != $should_report;
1120    }
1121    $num_reports_line = 0;
1122
1123    # post-processing at end of file @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1124
1125    if (eof) {
1126        # check for essentially empty line (which may include a '\') just before EOF
1127        report(($1 eq "\n" ? "empty line" : $2 ne "" ? "'\\'" : "whitespace")." at EOF")
1128            if $contents =~ m/^(\s*(\\?)\s*)$/ && !$sloppy_SPC;
1129
1130        # report unclosed expression-level nesting
1131        check_nested_nonblock_indents("expr at EOF"); # also adapts @nested_block_indents
1132
1133        # sanity-check balance of block-level { ... } via final $block_indent at end of file
1134        report_flexibly($line, +@nested_block_indents." unclosed '{'", "(EOF)\n") if @nested_block_indents != 0;
1135
1136        # sanity-check balance of #if ... #endif via final preprocessor directive indent at end of file
1137        report_flexibly($line, "$directive_nesting unclosed '#if'", "(EOF)\n") if $directive_nesting != 0;
1138
1139        reset_file_state();
1140    }
1141}
1142
1143# final summary report @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1144
1145my $num_other_reports = $num_reports - $num_indent_reports - $num_nesting_issues
1146    - $num_syntax_issues - $num_SPC_reports - $num_length_reports;
1147print "$num_reports ($num_indent_reports indentation, $num_nesting_issues directive nesting, ".
1148    "$num_syntax_issues syntax, $num_SPC_reports whitespace, $num_length_reports length, $num_other_reports other)".
1149    " issues have been found by $0\n" if $num_reports != 0 && !$self_test;
1150