1# vim: set ts=8 sts=4 et sw=4 tw=99: 2# This Source Code Form is subject to the terms of the Mozilla Public 3# License, v. 2.0. If a copy of the MPL was not distributed with this 4# file, You can obtain one at http://mozilla.org/MPL/2.0/. 5 6#---------------------------------------------------------------------------- 7# This script checks various aspects of SpiderMonkey code style. The current checks are as 8# follows. 9# 10# We check the following things in headers. 11# 12# - No cyclic dependencies. 13# 14# - No normal header should #include a inlines.h/-inl.h file. 15# 16# - #ifndef wrappers should have the right form. (XXX: not yet implemented) 17# - Every header file should have one. 18# - The guard name used should be appropriate for the filename. 19# 20# We check the following things in all files. 21# 22# - #includes should have full paths, e.g. "jit/Ion.h", not "Ion.h". 23# 24# - #includes should use the appropriate form for system headers (<...>) and 25# local headers ("..."). 26# 27# - #includes should be ordered correctly. 28# - Each one should be in the correct section. 29# - Alphabetical order should be used within sections. 30# - Sections should be in the right order. 31# Note that the presence of #if/#endif blocks complicates things, to the 32# point that it's not always clear where a conditionally-compiled #include 33# statement should go, even to a human. Therefore, we check the #include 34# statements within each #if/#endif block (including nested ones) in 35# isolation, but don't try to do any order checking between such blocks. 36#---------------------------------------------------------------------------- 37 38from __future__ import print_function 39 40import difflib 41import os 42import re 43import sys 44 45from mozversioncontrol import get_repository_from_env 46 47 48# We don't bother checking files in these directories, because they're (a) auxiliary or (b) 49# imported code that doesn't follow our coding style. 50ignored_js_src_dirs = [ 51 'js/src/config/', # auxiliary stuff 52 'js/src/ctypes/libffi/', # imported code 53 'js/src/devtools/', # auxiliary stuff 54 'js/src/editline/', # imported code 55 'js/src/gdb/', # auxiliary stuff 56 'js/src/vtune/' # imported code 57] 58 59# We ignore #includes of these files, because they don't follow the usual rules. 60included_inclnames_to_ignore = set([ 61 'ffi.h', # generated in ctypes/libffi/ 62 'devtools/sharkctl.h', # we ignore devtools/ in general 63 'devtools/Instruments.h', # we ignore devtools/ in general 64 'double-conversion.h', # strange MFBT case 65 'javascript-trace.h', # generated in $OBJDIR if HAVE_DTRACE is defined 66 'jsautokw.h', # generated in $OBJDIR 67 'jscustomallocator.h', # provided by embedders; allowed to be missing 68 'js-config.h', # generated in $OBJDIR 69 'fdlibm.h', # fdlibm 70 'pratom.h', # NSPR 71 'prcvar.h', # NSPR 72 'prerror.h', # NSPR 73 'prinit.h', # NSPR 74 'prio.h', # NSPR 75 'private/pprio.h', # NSPR 76 'prlink.h', # NSPR 77 'prlock.h', # NSPR 78 'prprf.h', # NSPR 79 'prthread.h', # NSPR 80 'prtypes.h', # NSPR 81 'selfhosted.out.h', # generated in $OBJDIR 82 'shellmoduleloader.out.h', # generated in $OBJDIR 83 'unicode/timezone.h', # ICU 84 'unicode/ucal.h', # ICU 85 'unicode/uclean.h', # ICU 86 'unicode/ucol.h', # ICU 87 'unicode/udat.h', # ICU 88 'unicode/udatpg.h', # ICU 89 'unicode/uenum.h', # ICU 90 'unicode/unorm.h', # ICU 91 'unicode/unum.h', # ICU 92 'unicode/unumsys.h', # ICU 93 'unicode/ustring.h', # ICU 94 'unicode/utypes.h', # ICU 95 'vtune/VTuneWrapper.h' # VTune 96]) 97 98# These files have additional constraints on where they are #included, so we 99# ignore #includes of them when checking #include ordering. 100oddly_ordered_inclnames = set([ 101 'ctypes/typedefs.h', # Included multiple times in the body of ctypes/CTypes.h 102 'jsautokw.h', # Included in the body of frontend/TokenStream.h 103 'jswin.h', # Must be #included before <psapi.h> 104 'machine/endian.h', # Must be included after <sys/types.h> on BSD 105 'winbase.h', # Must precede other system headers(?) 106 'windef.h' # Must precede other system headers(?) 107]) 108 109# The files in tests/style/ contain code that fails this checking in various 110# ways. Here is the output we expect. If the actual output differs from 111# this, one of the following must have happened. 112# - New SpiderMonkey code violates one of the checked rules. 113# - The tests/style/ files have changed without expected_output being changed 114# accordingly. 115# - This script has been broken somehow. 116# 117expected_output = '''\ 118js/src/tests/style/BadIncludes2.h:1: error: 119 vanilla header includes an inline-header file "tests/style/BadIncludes2-inl.h" 120 121js/src/tests/style/BadIncludes.h:3: error: 122 the file includes itself 123 124js/src/tests/style/BadIncludes.h:6: error: 125 "BadIncludes2.h" is included using the wrong path; 126 did you forget a prefix, or is the file not yet committed? 127 128js/src/tests/style/BadIncludes.h:8: error: 129 <tests/style/BadIncludes2.h> should be included using 130 the #include "..." form 131 132js/src/tests/style/BadIncludes.h:10: error: 133 "stdio.h" is included using the wrong path; 134 did you forget a prefix, or is the file not yet committed? 135 136js/src/tests/style/BadIncludesOrder-inl.h:5:6: error: 137 "vm/Interpreter-inl.h" should be included after "jsscriptinlines.h" 138 139js/src/tests/style/BadIncludesOrder-inl.h:6:7: error: 140 "jsscriptinlines.h" should be included after "js/Value.h" 141 142js/src/tests/style/BadIncludesOrder-inl.h:7:8: error: 143 "js/Value.h" should be included after "ds/LifoAlloc.h" 144 145js/src/tests/style/BadIncludesOrder-inl.h:8:9: error: 146 "ds/LifoAlloc.h" should be included after "jsapi.h" 147 148js/src/tests/style/BadIncludesOrder-inl.h:9:10: error: 149 "jsapi.h" should be included after <stdio.h> 150 151js/src/tests/style/BadIncludesOrder-inl.h:10:11: error: 152 <stdio.h> should be included after "mozilla/HashFunctions.h" 153 154js/src/tests/style/BadIncludesOrder-inl.h:27:28: error: 155 "jsobj.h" should be included after "jsfun.h" 156 157(multiple files): error: 158 header files form one or more cycles 159 160 tests/style/HeaderCycleA1.h 161 -> tests/style/HeaderCycleA2.h 162 -> tests/style/HeaderCycleA3.h 163 -> tests/style/HeaderCycleA1.h 164 165 tests/style/HeaderCycleB1-inl.h 166 -> tests/style/HeaderCycleB2-inl.h 167 -> tests/style/HeaderCycleB3-inl.h 168 -> tests/style/HeaderCycleB4-inl.h 169 -> tests/style/HeaderCycleB1-inl.h 170 -> tests/style/jsheadercycleB5inlines.h 171 -> tests/style/HeaderCycleB1-inl.h 172 -> tests/style/HeaderCycleB4-inl.h 173 174'''.splitlines(True) 175 176actual_output = [] 177 178 179def out(*lines): 180 for line in lines: 181 actual_output.append(line + '\n') 182 183 184def error(filename, linenum, *lines): 185 location = filename 186 if linenum is not None: 187 location += ':' + str(linenum) 188 out(location + ': error:') 189 for line in (lines): 190 out(' ' + line) 191 out('') 192 193 194class FileKind(object): 195 C = 1 196 CPP = 2 197 INL_H = 3 198 H = 4 199 TBL = 5 200 MSG = 6 201 202 @staticmethod 203 def get(filename): 204 if filename.endswith('.c'): 205 return FileKind.C 206 207 if filename.endswith('.cpp'): 208 return FileKind.CPP 209 210 if filename.endswith(('inlines.h', '-inl.h')): 211 return FileKind.INL_H 212 213 if filename.endswith('.h'): 214 return FileKind.H 215 216 if filename.endswith('.tbl'): 217 return FileKind.TBL 218 219 if filename.endswith('.msg'): 220 return FileKind.MSG 221 222 error(filename, None, 'unknown file kind') 223 224 225def check_style(): 226 # We deal with two kinds of name. 227 # - A "filename" is a full path to a file from the repository root. 228 # - An "inclname" is how a file is referred to in a #include statement. 229 # 230 # Examples (filename -> inclname) 231 # - "mfbt/Attributes.h" -> "mozilla/Attributes.h" 232 # - "mfbt/decimal/Decimal.h -> "mozilla/Decimal.h" 233 # - "mozglue/misc/TimeStamp.h -> "mozilla/TimeStamp.h" 234 # - "memory/mozalloc/mozalloc.h -> "mozilla/mozalloc.h" 235 # - "js/public/Vector.h" -> "js/Vector.h" 236 # - "js/src/vm/String.h" -> "vm/String.h" 237 238 non_js_dirnames = ('mfbt/', 239 'memory/mozalloc/', 240 'mozglue/') # type: tuple(str) 241 non_js_inclnames = set() # type: set(inclname) 242 js_names = dict() # type: dict(filename, inclname) 243 244 repo = get_repository_from_env() 245 246 # Select the appropriate files. 247 for filename in repo.get_files_in_working_directory(): 248 for non_js_dir in non_js_dirnames: 249 if filename.startswith(non_js_dir) and filename.endswith('.h'): 250 inclname = 'mozilla/' + filename.split('/')[-1] 251 non_js_inclnames.add(inclname) 252 253 if filename.startswith('js/public/') and filename.endswith('.h'): 254 inclname = 'js/' + filename[len('js/public/'):] 255 js_names[filename] = inclname 256 257 if filename.startswith('js/src/') and \ 258 not filename.startswith(tuple(ignored_js_src_dirs)) and \ 259 filename.endswith(('.c', '.cpp', '.h', '.tbl', '.msg')): 260 inclname = filename[len('js/src/'):] 261 js_names[filename] = inclname 262 263 all_inclnames = non_js_inclnames | set(js_names.values()) 264 265 edges = dict() # type: dict(inclname, set(inclname)) 266 267 # We don't care what's inside the MFBT and MOZALLOC files, but because they 268 # are #included from JS files we have to add them to the inclusion graph. 269 for inclname in non_js_inclnames: 270 edges[inclname] = set() 271 272 # Process all the JS files. 273 for filename in js_names.keys(): 274 inclname = js_names[filename] 275 file_kind = FileKind.get(filename) 276 if file_kind == FileKind.C or file_kind == FileKind.CPP or \ 277 file_kind == FileKind.H or file_kind == FileKind.INL_H: 278 included_h_inclnames = set() # type: set(inclname) 279 280 # This script is run in js/src/, so prepend '../../' to get to the root of the Mozilla 281 # source tree. 282 with open(os.path.join(repo.path, filename)) as f: 283 do_file(filename, inclname, file_kind, f, all_inclnames, included_h_inclnames) 284 285 edges[inclname] = included_h_inclnames 286 287 find_cycles(all_inclnames, edges) 288 289 # Compare expected and actual output. 290 difflines = difflib.unified_diff(expected_output, actual_output, 291 fromfile='check_spider_monkey_style.py expected output', 292 tofile='check_spider_monkey_style.py actual output') 293 ok = True 294 for diffline in difflines: 295 ok = False 296 print(diffline, end='') 297 298 return ok 299 300 301def module_name(name): 302 '''Strip the trailing .cpp, .h, inlines.h or -inl.h from a filename.''' 303 304 return name.replace('inlines.h', '').replace('-inl.h', '').replace('.h', '').replace('.cpp', '') 305 306 307def is_module_header(enclosing_inclname, header_inclname): 308 '''Determine if an included name is the "module header", i.e. should be 309 first in the file.''' 310 311 module = module_name(enclosing_inclname) 312 313 # Normal case, e.g. module == "foo/Bar", header_inclname == "foo/Bar.h". 314 if module == module_name(header_inclname): 315 return True 316 317 # A public header, e.g. module == "foo/Bar", header_inclname == "js/Bar.h". 318 m = re.match(r'js\/(.*)\.h', header_inclname) 319 if m is not None and module.endswith('/' + m.group(1)): 320 return True 321 322 return False 323 324 325class Include(object): 326 '''Important information for a single #include statement.''' 327 328 def __init__(self, inclname, linenum, is_system): 329 self.inclname = inclname 330 self.linenum = linenum 331 self.is_system = is_system 332 333 def isLeaf(self): 334 return True 335 336 def section(self, enclosing_inclname): 337 '''Identify which section inclname belongs to. 338 339 The section numbers are as follows. 340 0. Module header (e.g. jsfoo.h or jsfooinlines.h within jsfoo.cpp) 341 1. mozilla/Foo.h 342 2. <foo.h> or <foo> 343 3. jsfoo.h, prmjtime.h, etc 344 4. foo/Bar.h 345 5. jsfooinlines.h 346 6. foo/Bar-inl.h 347 7. non-.h, e.g. *.tbl, *.msg 348 ''' 349 350 if self.is_system: 351 return 2 352 353 if not self.inclname.endswith('.h'): 354 return 7 355 356 # A couple of modules have the .h file in js/ and the .cpp file elsewhere and so need 357 # special handling. 358 if is_module_header(enclosing_inclname, self.inclname): 359 return 0 360 361 if '/' in self.inclname: 362 if self.inclname.startswith('mozilla/'): 363 return 1 364 365 if self.inclname.endswith('-inl.h'): 366 return 6 367 368 return 4 369 370 if self.inclname.endswith('inlines.h'): 371 return 5 372 373 return 3 374 375 def quote(self): 376 if self.is_system: 377 return '<' + self.inclname + '>' 378 else: 379 return '"' + self.inclname + '"' 380 381 382class HashIfBlock(object): 383 '''Important information about a #if/#endif block. 384 385 A #if/#endif block is the contents of a #if/#endif (or similar) section. 386 The top-level block, which is not within a #if/#endif pair, is also 387 considered a block. 388 389 Each leaf is either an Include (representing a #include), or another 390 nested HashIfBlock.''' 391 def __init__(self): 392 self.kids = [] 393 394 def isLeaf(self): 395 return False 396 397 398def do_file(filename, inclname, file_kind, f, all_inclnames, included_h_inclnames): 399 block_stack = [HashIfBlock()] 400 401 # Extract the #include statements as a tree of IBlocks and IIncludes. 402 for linenum, line in enumerate(f, start=1): 403 # We're only interested in lines that contain a '#'. 404 if not '#' in line: 405 continue 406 407 # Look for a |#include "..."| line. 408 m = re.match(r'\s*#\s*include\s+"([^"]*)"', line) 409 if m is not None: 410 block_stack[-1].kids.append(Include(m.group(1), linenum, False)) 411 412 # Look for a |#include <...>| line. 413 m = re.match(r'\s*#\s*include\s+<([^>]*)>', line) 414 if m is not None: 415 block_stack[-1].kids.append(Include(m.group(1), linenum, True)) 416 417 # Look for a |#{if,ifdef,ifndef}| line. 418 m = re.match(r'\s*#\s*(if|ifdef|ifndef)\b', line) 419 if m is not None: 420 # Open a new block. 421 new_block = HashIfBlock() 422 block_stack[-1].kids.append(new_block) 423 block_stack.append(new_block) 424 425 # Look for a |#{elif,else}| line. 426 m = re.match(r'\s*#\s*(elif|else)\b', line) 427 if m is not None: 428 # Close the current block, and open an adjacent one. 429 block_stack.pop() 430 new_block = HashIfBlock() 431 block_stack[-1].kids.append(new_block) 432 block_stack.append(new_block) 433 434 # Look for a |#endif| line. 435 m = re.match(r'\s*#\s*endif\b', line) 436 if m is not None: 437 # Close the current block. 438 block_stack.pop() 439 440 def check_include_statement(include): 441 '''Check the style of a single #include statement.''' 442 443 if include.is_system: 444 # Check it is not a known local file (in which case it's probably a system header). 445 if include.inclname in included_inclnames_to_ignore or \ 446 include.inclname in all_inclnames: 447 error(filename, include.linenum, 448 include.quote() + ' should be included using', 449 'the #include "..." form') 450 451 else: 452 if include.inclname not in included_inclnames_to_ignore: 453 included_kind = FileKind.get(include.inclname) 454 455 # Check the #include path has the correct form. 456 if include.inclname not in all_inclnames: 457 error(filename, include.linenum, 458 include.quote() + ' is included using the wrong path;', 459 'did you forget a prefix, or is the file not yet committed?') 460 461 # Record inclusions of .h files for cycle detection later. 462 # (Exclude .tbl and .msg files.) 463 elif included_kind == FileKind.H or included_kind == FileKind.INL_H: 464 included_h_inclnames.add(include.inclname) 465 466 # Check a H file doesn't #include an INL_H file. 467 if file_kind == FileKind.H and included_kind == FileKind.INL_H: 468 error(filename, include.linenum, 469 'vanilla header includes an inline-header file ' + include.quote()) 470 471 # Check a file doesn't #include itself. (We do this here because the cycle 472 # detection below doesn't detect this case.) 473 if inclname == include.inclname: 474 error(filename, include.linenum, 'the file includes itself') 475 476 def check_includes_order(include1, include2): 477 '''Check the ordering of two #include statements.''' 478 479 if include1.inclname in oddly_ordered_inclnames or \ 480 include2.inclname in oddly_ordered_inclnames: 481 return 482 483 section1 = include1.section(inclname) 484 section2 = include2.section(inclname) 485 if (section1 > section2) or \ 486 ((section1 == section2) and (include1.inclname.lower() > include2.inclname.lower())): 487 error(filename, str(include1.linenum) + ':' + str(include2.linenum), 488 include1.quote() + ' should be included after ' + include2.quote()) 489 490 # Check the extracted #include statements, both individually, and the ordering of 491 # adjacent pairs that live in the same block. 492 def pair_traverse(prev, this): 493 if this.isLeaf(): 494 check_include_statement(this) 495 if prev is not None and prev.isLeaf(): 496 check_includes_order(prev, this) 497 else: 498 for prev2, this2 in zip([None] + this.kids[0:-1], this.kids): 499 pair_traverse(prev2, this2) 500 501 pair_traverse(None, block_stack[-1]) 502 503 504def find_cycles(all_inclnames, edges): 505 '''Find and draw any cycles.''' 506 507 SCCs = tarjan(all_inclnames, edges) 508 509 # The various sorted() calls below ensure the output is deterministic. 510 511 def draw_SCC(c): 512 cset = set(c) 513 drawn = set() 514 def draw(v, indent): 515 out(' ' * indent + ('-> ' if indent else ' ') + v) 516 if v in drawn: 517 return 518 drawn.add(v) 519 for succ in sorted(edges[v]): 520 if succ in cset: 521 draw(succ, indent + 1) 522 draw(sorted(c)[0], 0) 523 out('') 524 525 have_drawn_an_SCC = False 526 for scc in sorted(SCCs): 527 if len(scc) != 1: 528 if not have_drawn_an_SCC: 529 error('(multiple files)', None, 'header files form one or more cycles') 530 have_drawn_an_SCC = True 531 532 draw_SCC(scc) 533 534 535# Tarjan's algorithm for finding the strongly connected components (SCCs) of a graph. 536# https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm 537def tarjan(V, E): 538 vertex_index = {} 539 vertex_lowlink = {} 540 index = 0 541 S = [] 542 all_SCCs = [] 543 544 def strongconnect(v, index): 545 # Set the depth index for v to the smallest unused index 546 vertex_index[v] = index 547 vertex_lowlink[v] = index 548 index += 1 549 S.append(v) 550 551 # Consider successors of v 552 for w in E[v]: 553 if w not in vertex_index: 554 # Successor w has not yet been visited; recurse on it 555 index = strongconnect(w, index) 556 vertex_lowlink[v] = min(vertex_lowlink[v], vertex_lowlink[w]) 557 elif w in S: 558 # Successor w is in stack S and hence in the current SCC 559 vertex_lowlink[v] = min(vertex_lowlink[v], vertex_index[w]) 560 561 # If v is a root node, pop the stack and generate an SCC 562 if vertex_lowlink[v] == vertex_index[v]: 563 i = S.index(v) 564 scc = S[i:] 565 del S[i:] 566 all_SCCs.append(scc) 567 568 return index 569 570 for v in V: 571 if v not in vertex_index: 572 index = strongconnect(v, index) 573 574 return all_SCCs 575 576 577def main(): 578 ok = check_style() 579 580 if ok: 581 print('TEST-PASS | check_spidermonkey_style.py | ok') 582 else: 583 print('TEST-UNEXPECTED-FAIL | check_spidermonkey_style.py | actual output does not match expected output; diff is above') 584 585 sys.exit(0 if ok else 1) 586 587 588if __name__ == '__main__': 589 main() 590