1# Copyright (C) 2012 by the Massachusetts Institute of Technology. 2# All rights reserved. 3# 4# Export of this software from the United States of America may 5# require a specific license from the United States Government. 6# It is the responsibility of any person or organization contemplating 7# export to obtain such a license before exporting. 8# 9# WITHIN THAT CONSTRAINT, permission to use, copy, modify, and 10# distribute this software and its documentation for any purpose and 11# without fee is hereby granted, provided that the above copyright 12# notice appear in all copies and that both that copyright notice and 13# this permission notice appear in supporting documentation, and that 14# the name of M.I.T. not be used in advertising or publicity pertaining 15# to distribution of the software without specific, written prior 16# permission. Furthermore if you modify this software you must label 17# your software as modified software and not distribute it in such a 18# fashion that it might be confused with the original M.I.T. software. 19# M.I.T. makes no representations about the suitability of 20# this software for any purpose. It is provided "as is" without express 21# or implied warranty. 22 23# This program checks for some kinds of MIT krb5 coding style 24# violations in a single file. Checked violations include: 25# 26# Line is too long 27# Tabs violations 28# Trailing whitespace and final blank lines 29# Comment formatting errors 30# Preprocessor statements in function bodies 31# Misplaced braces 32# Space before paren in function call, or no space after if/for/while 33# Parenthesized return expression 34# Space after cast operator, or no space before * in cast operator 35# Line broken before binary operator 36# Lack of spaces around binary operator (sometimes) 37# Assignment at the beginning of an if conditional 38# Use of prohibited string functions 39# Lack of braces around 2+ line flow control body 40# Incorrect indentation as determined by emacs c-mode (if possible) 41# 42# This program does not check for the following: 43# 44# Anything outside of a function body except line length/whitespace 45# Anything non-syntactic (proper cleanup flow control, naming, etc.) 46# UTF-8 violations 47# Implicit tests against NULL or '\0' 48# Inner-scope variable declarations 49# Over- or under-parenthesization 50# Long or deeply nested function bodies 51# Syntax of function calls through pointers 52 53import os 54import re 55import sys 56from subprocess import call 57from tempfile import NamedTemporaryFile 58 59def warn(ln, msg): 60 print('%5d %s' % (ln, msg)) 61 62 63# If lines[0] indicates the krb5 C style, try to use emacs to reindent 64# a copy of lines. Return None if the file does not use the krb5 C 65# style or if the emacs batch reindent is unsuccessful. 66def emacs_reindent(lines): 67 if 'c-basic-offset: 4; indent-tabs-mode: nil' not in lines[0]: 68 return None 69 70 util_dir = os.path.dirname(sys.argv[0]) 71 cstyle_el = os.path.join(util_dir, 'krb5-c-style.el') 72 reindent_el = os.path.join(util_dir, 'krb5-batch-reindent.el') 73 with NamedTemporaryFile(suffix='.c', mode='w+') as f: 74 f.write(''.join(lines)) 75 f.flush() 76 args = ['emacs', '-q', '-batch', '-l', cstyle_el, '-l', reindent_el, 77 f.name] 78 with open(os.devnull, 'w') as devnull: 79 try: 80 st = call(args, stdin=devnull, stdout=devnull, stderr=devnull) 81 if st != 0: 82 return None 83 except OSError: 84 # Fail gracefully if emacs isn't installed. 85 return None 86 f.seek(0) 87 ilines = f.readlines() 88 f.close() 89 return ilines 90 91 92def check_length(line, ln): 93 if len(line) > 79 and not line.startswith(' * Copyright'): 94 warn(ln, 'Length exceeds 79 characters') 95 96 97def check_tabs(line, ln, allow_tabs, seen_tab): 98 if not allow_tabs: 99 if '\t' in line: 100 warn(ln, 'Tab character in file which does not allow tabs') 101 else: 102 if ' \t' in line: 103 warn(ln, 'Tab character immediately following space') 104 if ' ' in line and seen_tab: 105 warn(ln, '8+ spaces in file which uses tabs') 106 107 108def check_trailing_whitespace(line, ln): 109 if line and line[-1] in ' \t': 110 warn(ln, 'Trailing whitespace') 111 112 113def check_comment(lines, ln): 114 align = lines[0].index('/*') + 1 115 if not lines[0].lstrip().startswith('/*'): 116 warn(ln, 'Multi-line comment begins after code') 117 for line in lines[1:]: 118 ln += 1 119 if len(line) <= align or line[align] != '*': 120 warn(ln, 'Comment line does not have * aligned with top') 121 elif line[:align].lstrip() != '': 122 warn(ln, 'Garbage before * in comment line') 123 if not lines[-1].rstrip().endswith('*/'): 124 warn(ln, 'Code after end of multi-line comment') 125 if len(lines) > 2 and (lines[0].strip() not in ('/*', '/**') or 126 lines[-1].strip() != '*/'): 127 warn(ln, 'Comment is 3+ lines but is not formatted as block comment') 128 129 130def check_preprocessor(line, ln): 131 if line.startswith('#'): 132 warn(ln, 'Preprocessor statement in function body') 133 134 135def check_braces(line, ln): 136 # Strip out one-line initializer expressions. 137 line = re.sub(r'=\s*{.*}', '', line) 138 if line.lstrip().startswith('{') and not line.startswith('{'): 139 warn(ln, 'Un-cuddled open brace') 140 if re.search(r'{\s*\S', line): 141 warn(ln, 'Code on line after open brace') 142 if re.search(r'\S.*}', line): 143 warn(ln, 'Code on line before close brace') 144 145 146# This test gives false positives on some function pointer type 147# declarations or casts. Avoid this by using typedefs. 148def check_space_before_paren(line, ln): 149 for m in re.finditer(r'([\w]+)(\s*)\(', line): 150 ident, ws = m.groups() 151 if ident in ('void', 'char', 'int', 'long', 'unsigned'): 152 pass 153 elif ident in ('if', 'for', 'while', 'switch'): 154 if not ws: 155 warn(ln, 'No space after flow control keyword') 156 elif ident != 'return': 157 if ws: 158 warn(ln, 'Space before parenthesis in function call') 159 160 if re.search(r' \)', line): 161 warn(ln, 'Space before close parenthesis') 162 163 164def check_parenthesized_return(line, ln): 165 if re.search(r'return\s*\(.*\);', line): 166 warn(ln, 'Parenthesized return expression') 167 168 169def check_cast(line, ln): 170 # We can't reliably distinguish cast operators from parenthesized 171 # expressions or function call parameters without a real C parser, 172 # so we use some heuristics. A cast operator is followed by an 173 # expression, which usually begins with an identifier or an open 174 # paren. A function call or parenthesized expression is never 175 # followed by an identifier and only rarely by an open paren. We 176 # won't detect a cast operator when it's followed by an expression 177 # beginning with '*', since it's hard to distinguish that from a 178 # multiplication operator. We will get false positives from 179 # "(*fp) (args)" and "if (condition) statement", but both of those 180 # are erroneous anyway. 181 for m in re.finditer(r'\(([^(]+)\)(\s*)[a-zA-Z_(]', line): 182 if m.group(2): 183 warn(ln, 'Space after cast operator (or inline if/while body)') 184 # Check for casts like (char*) which should have a space. 185 if re.search(r'[^\s\*]\*+$', m.group(1)): 186 warn(ln, 'No space before * in cast operator') 187 188 189def check_binary_operator(line, ln): 190 binop = r'(\+|-|\*|/|%|\^|==|=|!=|<=|<|>=|>|&&|&|\|\||\|)' 191 if re.match(r'\s*' + binop + r'\s', line): 192 warn(ln - 1, 'Line broken before binary operator') 193 for m in re.finditer(r'(\s|\w)' + binop + r'(\s|\w)', line): 194 before, op, after = m.groups() 195 if not before.isspace() and not after.isspace(): 196 warn(ln, 'No space before or after binary operator') 197 elif not before.isspace(): 198 warn(ln, 'No space before binary operator') 199 elif op not in ('-', '*', '&') and not after.isspace(): 200 warn(ln, 'No space after binary operator') 201 202 203def check_assignment_in_conditional(line, ln): 204 # Check specifically for if statements; we allow assignments in 205 # loop expressions. 206 if re.search(r'if\s*\(+\w+\s*=[^=]', line): 207 warn(ln, 'Assignment in if conditional') 208 209 210def indent(line): 211 return len(re.match('\s*', line).group(0).expandtabs()) 212 213 214def check_unbraced_flow_body(line, ln, lines): 215 if re.match(r'\s*do$', line): 216 warn(ln, 'do statement without braces') 217 return 218 219 m = re.match(r'\s*(})?\s*else(\s*if\s*\(.*\))?\s*({)?\s*$', line) 220 if m and (m.group(1) is None) != (m.group(3) is None): 221 warn(ln, 'One arm of if/else statement braced but not the other') 222 223 if (re.match('\s*(if|else if|for|while)\s*\(.*\)$', line) or 224 re.match('\s*else$', line)): 225 base = indent(line) 226 # Look at the next two lines (ln is 1-based so lines[ln] is next). 227 if indent(lines[ln]) > base and indent(lines[ln + 1]) > base: 228 warn(ln, 'Body is 2+ lines but has no braces') 229 230 231def check_bad_string_fn(line, ln): 232 # This is intentionally pretty fuzzy so that we catch the whole scanf 233 if re.search(r'\W(strcpy|strcat|sprintf|\w*scanf)\W', line): 234 warn(ln, 'Prohibited string function') 235 236 237def check_indentation(line, indented_lines, ln): 238 if not indented_lines: 239 return 240 241 if ln - 1 >= len(indented_lines): 242 # This should only happen when the emacs reindent removed 243 # blank lines from the input file, but check. 244 if line.strip() == '': 245 warn(ln, 'Trailing blank line') 246 return 247 248 if line != indented_lines[ln - 1].rstrip('\r\n'): 249 warn(ln, 'Indentation may be incorrect') 250 251 252def check_file(lines): 253 # Check if this file allows tabs. 254 if len(lines) == 0: 255 return 256 allow_tabs = 'indent-tabs-mode: nil' not in lines[0] 257 seen_tab = False 258 indented_lines = emacs_reindent(lines) 259 260 in_function = False 261 comment = [] 262 ln = 0 263 for line in lines: 264 ln += 1 265 line = line.rstrip('\r\n') 266 seen_tab = seen_tab or ('\t' in line) 267 268 # Check line structure issues before altering the line. 269 check_indentation(line, indented_lines, ln) 270 check_length(line, ln) 271 check_tabs(line, ln, allow_tabs, seen_tab) 272 check_trailing_whitespace(line, ln) 273 274 # Strip out single-line comments the contents of string literals. 275 if not comment: 276 line = re.sub(r'/\*.*?\*/', '', line) 277 line = re.sub(r'"(\\.|[^"])*"', '""', line) 278 279 # Parse out and check multi-line comments. (Ignore code on 280 # the first or last line; check_comment will warn about it.) 281 if comment or '/*' in line: 282 comment.append(line) 283 if '*/' in line: 284 check_comment(comment, ln - len(comment) + 1) 285 comment = [] 286 continue 287 288 # Warn if we see a // comment and ignore anything following. 289 if '//' in line: 290 warn(ln, '// comment') 291 line = re.sub(r'//.*/', '', line) 292 293 if line.startswith('{'): 294 in_function = True 295 elif line.startswith('}'): 296 in_function = False 297 298 if in_function: 299 check_preprocessor(line, ln) 300 check_braces(line, ln) 301 check_space_before_paren(line, ln) 302 check_parenthesized_return(line, ln) 303 check_cast(line, ln) 304 check_binary_operator(line, ln) 305 check_assignment_in_conditional(line, ln) 306 check_unbraced_flow_body(line, ln, lines) 307 check_bad_string_fn(line, ln) 308 309 if lines[-1] == '': 310 warn(ln, 'Blank line at end of file') 311 312 313if len(sys.argv) == 1: 314 lines = sys.stdin.readlines() 315elif len(sys.argv) == 2: 316 f = open(sys.argv[1]) 317 lines = f.readlines() 318 f.close() 319else: 320 sys.stderr.write('Usage: cstyle-file [filename]\n') 321 sys.exit(1) 322 323check_file(lines) 324