1# Copyright (C) 2012 by the Massachusetts Institute of Technology.
2# All rights reserved.
3#
4# Export of this software from the United States of America may
5#   require a specific license from the United States Government.
6#   It is the responsibility of any person or organization contemplating
7#   export to obtain such a license before exporting.
8#
9# WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
10# distribute this software and its documentation for any purpose and
11# without fee is hereby granted, provided that the above copyright
12# notice appear in all copies and that both that copyright notice and
13# this permission notice appear in supporting documentation, and that
14# the name of M.I.T. not be used in advertising or publicity pertaining
15# to distribution of the software without specific, written prior
16# permission.  Furthermore if you modify this software you must label
17# your software as modified software and not distribute it in such a
18# fashion that it might be confused with the original M.I.T. software.
19# M.I.T. makes no representations about the suitability of
20# this software for any purpose.  It is provided "as is" without express
21# or implied warranty.
22
23# This program checks for some kinds of MIT krb5 coding style
24# violations in a single file.  Checked violations include:
25#
26#   Line is too long
27#   Tabs violations
28#   Trailing whitespace and final blank lines
29#   Comment formatting errors
30#   Preprocessor statements in function bodies
31#   Misplaced braces
32#   Space before paren in function call, or no space after if/for/while
33#   Parenthesized return expression
34#   Space after cast operator, or no space before * in cast operator
35#   Line broken before binary operator
36#   Lack of spaces around binary operator (sometimes)
37#   Assignment at the beginning of an if conditional
38#   Use of prohibited string functions
39#   Lack of braces around 2+ line flow control body
40#   Incorrect indentation as determined by emacs c-mode (if possible)
41#
42# This program does not check for the following:
43#
44#   Anything outside of a function body except line length/whitespace
45#   Anything non-syntactic (proper cleanup flow control, naming, etc.)
46#   UTF-8 violations
47#   Implicit tests against NULL or '\0'
48#   Inner-scope variable declarations
49#   Over- or under-parenthesization
50#   Long or deeply nested function bodies
51#   Syntax of function calls through pointers
52
53import os
54import re
55import sys
56from subprocess import call
57from tempfile import NamedTemporaryFile
58
59def warn(ln, msg):
60    print('%5d  %s' % (ln, msg))
61
62
63# If lines[0] indicates the krb5 C style, try to use emacs to reindent
64# a copy of lines.  Return None if the file does not use the krb5 C
65# style or if the emacs batch reindent is unsuccessful.
66def emacs_reindent(lines):
67    if 'c-basic-offset: 4; indent-tabs-mode: nil' not in lines[0]:
68        return None
69
70    util_dir = os.path.dirname(sys.argv[0])
71    cstyle_el = os.path.join(util_dir, 'krb5-c-style.el')
72    reindent_el = os.path.join(util_dir, 'krb5-batch-reindent.el')
73    with NamedTemporaryFile(suffix='.c', mode='w+') as f:
74        f.write(''.join(lines))
75        f.flush()
76        args = ['emacs', '-q', '-batch', '-l', cstyle_el, '-l', reindent_el,
77                f.name]
78        with open(os.devnull, 'w') as devnull:
79            try:
80                st = call(args, stdin=devnull, stdout=devnull, stderr=devnull)
81                if st != 0:
82                    return None
83            except OSError:
84                # Fail gracefully if emacs isn't installed.
85                return None
86        f.seek(0)
87        ilines = f.readlines()
88        f.close()
89        return ilines
90
91
92def check_length(line, ln):
93    if len(line) > 79 and not line.startswith(' * Copyright'):
94        warn(ln, 'Length exceeds 79 characters')
95
96
97def check_tabs(line, ln, allow_tabs, seen_tab):
98    if not allow_tabs:
99        if '\t' in line:
100            warn(ln, 'Tab character in file which does not allow tabs')
101    else:
102        if ' \t' in line:
103            warn(ln, 'Tab character immediately following space')
104        if '        ' in line and seen_tab:
105            warn(ln, '8+ spaces in file which uses tabs')
106
107
108def check_trailing_whitespace(line, ln):
109    if line and line[-1] in ' \t':
110        warn(ln, 'Trailing whitespace')
111
112
113def check_comment(lines, ln):
114    align = lines[0].index('/*') + 1
115    if not lines[0].lstrip().startswith('/*'):
116        warn(ln, 'Multi-line comment begins after code')
117    for line in lines[1:]:
118        ln += 1
119        if len(line) <= align or line[align] != '*':
120            warn(ln, 'Comment line does not have * aligned with top')
121        elif line[:align].lstrip() != '':
122            warn(ln, 'Garbage before * in comment line')
123    if not lines[-1].rstrip().endswith('*/'):
124        warn(ln, 'Code after end of multi-line comment')
125    if len(lines) > 2 and (lines[0].strip() not in ('/*', '/**') or
126                           lines[-1].strip() != '*/'):
127        warn(ln, 'Comment is 3+ lines but is not formatted as block comment')
128
129
130def check_preprocessor(line, ln):
131    if line.startswith('#'):
132        warn(ln, 'Preprocessor statement in function body')
133
134
135def check_braces(line, ln):
136    # Strip out one-line initializer expressions.
137    line = re.sub(r'=\s*{.*}', '', line)
138    if line.lstrip().startswith('{') and not line.startswith('{'):
139        warn(ln, 'Un-cuddled open brace')
140    if re.search(r'{\s*\S', line):
141        warn(ln, 'Code on line after open brace')
142    if re.search(r'\S.*}', line):
143        warn(ln, 'Code on line before close brace')
144
145
146# This test gives false positives on some function pointer type
147# declarations or casts.  Avoid this by using typedefs.
148def check_space_before_paren(line, ln):
149    for m in re.finditer(r'([\w]+)(\s*)\(', line):
150        ident, ws = m.groups()
151        if ident in ('void', 'char', 'int', 'long', 'unsigned'):
152            pass
153        elif ident in ('if', 'for', 'while', 'switch'):
154            if not ws:
155                warn(ln, 'No space after flow control keyword')
156        elif ident != 'return':
157            if ws:
158                warn(ln, 'Space before parenthesis in function call')
159
160    if re.search(r' \)', line):
161        warn(ln, 'Space before close parenthesis')
162
163
164def check_parenthesized_return(line, ln):
165    if re.search(r'return\s*\(.*\);', line):
166        warn(ln, 'Parenthesized return expression')
167
168
169def check_cast(line, ln):
170    # We can't reliably distinguish cast operators from parenthesized
171    # expressions or function call parameters without a real C parser,
172    # so we use some heuristics.  A cast operator is followed by an
173    # expression, which usually begins with an identifier or an open
174    # paren.  A function call or parenthesized expression is never
175    # followed by an identifier and only rarely by an open paren.  We
176    # won't detect a cast operator when it's followed by an expression
177    # beginning with '*', since it's hard to distinguish that from a
178    # multiplication operator.  We will get false positives from
179    # "(*fp) (args)" and "if (condition) statement", but both of those
180    # are erroneous anyway.
181    for m in re.finditer(r'\(([^(]+)\)(\s*)[a-zA-Z_(]', line):
182        if m.group(2):
183            warn(ln, 'Space after cast operator (or inline if/while body)')
184        # Check for casts like (char*) which should have a space.
185        if re.search(r'[^\s\*]\*+$', m.group(1)):
186            warn(ln, 'No space before * in cast operator')
187
188
189def check_binary_operator(line, ln):
190    binop = r'(\+|-|\*|/|%|\^|==|=|!=|<=|<|>=|>|&&|&|\|\||\|)'
191    if re.match(r'\s*' + binop + r'\s', line):
192        warn(ln - 1, 'Line broken before binary operator')
193    for m in re.finditer(r'(\s|\w)' + binop + r'(\s|\w)', line):
194        before, op, after = m.groups()
195        if not before.isspace() and not after.isspace():
196            warn(ln, 'No space before or after binary operator')
197        elif not before.isspace():
198            warn(ln, 'No space before binary operator')
199        elif op not in ('-', '*', '&') and not after.isspace():
200            warn(ln, 'No space after binary operator')
201
202
203def check_assignment_in_conditional(line, ln):
204    # Check specifically for if statements; we allow assignments in
205    # loop expressions.
206    if re.search(r'if\s*\(+\w+\s*=[^=]', line):
207        warn(ln, 'Assignment in if conditional')
208
209
210def indent(line):
211    return len(re.match('\s*', line).group(0).expandtabs())
212
213
214def check_unbraced_flow_body(line, ln, lines):
215    if re.match(r'\s*do$', line):
216        warn(ln, 'do statement without braces')
217        return
218
219    m = re.match(r'\s*(})?\s*else(\s*if\s*\(.*\))?\s*({)?\s*$', line)
220    if m and (m.group(1) is None) != (m.group(3) is None):
221        warn(ln, 'One arm of if/else statement braced but not the other')
222
223    if (re.match('\s*(if|else if|for|while)\s*\(.*\)$', line) or
224        re.match('\s*else$', line)):
225        base = indent(line)
226        # Look at the next two lines (ln is 1-based so lines[ln] is next).
227        if indent(lines[ln]) > base and indent(lines[ln + 1]) > base:
228            warn(ln, 'Body is 2+ lines but has no braces')
229
230
231def check_bad_string_fn(line, ln):
232    # This is intentionally pretty fuzzy so that we catch the whole scanf
233    if re.search(r'\W(strcpy|strcat|sprintf|\w*scanf)\W', line):
234        warn(ln, 'Prohibited string function')
235
236
237def check_indentation(line, indented_lines, ln):
238    if not indented_lines:
239        return
240
241    if ln - 1 >= len(indented_lines):
242        # This should only happen when the emacs reindent removed
243        # blank lines from the input file, but check.
244        if line.strip() == '':
245            warn(ln, 'Trailing blank line')
246        return
247
248    if line != indented_lines[ln - 1].rstrip('\r\n'):
249        warn(ln, 'Indentation may be incorrect')
250
251
252def check_file(lines):
253    # Check if this file allows tabs.
254    if len(lines) == 0:
255        return
256    allow_tabs = 'indent-tabs-mode: nil' not in lines[0]
257    seen_tab = False
258    indented_lines = emacs_reindent(lines)
259
260    in_function = False
261    comment = []
262    ln = 0
263    for line in lines:
264        ln += 1
265        line = line.rstrip('\r\n')
266        seen_tab = seen_tab or ('\t' in line)
267
268        # Check line structure issues before altering the line.
269        check_indentation(line, indented_lines, ln)
270        check_length(line, ln)
271        check_tabs(line, ln, allow_tabs, seen_tab)
272        check_trailing_whitespace(line, ln)
273
274        # Strip out single-line comments the contents of string literals.
275        if not comment:
276            line = re.sub(r'/\*.*?\*/', '', line)
277            line = re.sub(r'"(\\.|[^"])*"', '""', line)
278
279        # Parse out and check multi-line comments.  (Ignore code on
280        # the first or last line; check_comment will warn about it.)
281        if comment or '/*' in line:
282            comment.append(line)
283            if '*/' in line:
284                check_comment(comment, ln - len(comment) + 1)
285                comment = []
286            continue
287
288        # Warn if we see a // comment and ignore anything following.
289        if '//' in line:
290            warn(ln, '// comment')
291            line = re.sub(r'//.*/', '', line)
292
293        if line.startswith('{'):
294            in_function = True
295        elif line.startswith('}'):
296            in_function = False
297
298        if in_function:
299            check_preprocessor(line, ln)
300            check_braces(line, ln)
301            check_space_before_paren(line, ln)
302            check_parenthesized_return(line, ln)
303            check_cast(line, ln)
304            check_binary_operator(line, ln)
305            check_assignment_in_conditional(line, ln)
306            check_unbraced_flow_body(line, ln, lines)
307            check_bad_string_fn(line, ln)
308
309    if lines[-1] == '':
310        warn(ln, 'Blank line at end of file')
311
312
313if len(sys.argv) == 1:
314    lines = sys.stdin.readlines()
315elif len(sys.argv) == 2:
316    f = open(sys.argv[1])
317    lines = f.readlines()
318    f.close()
319else:
320    sys.stderr.write('Usage: cstyle-file [filename]\n')
321    sys.exit(1)
322
323check_file(lines)
324