1# vim: set ts=8 sts=4 et sw=4 tw=99:
2# This Source Code Form is subject to the terms of the Mozilla Public
3# License, v. 2.0. If a copy of the MPL was not distributed with this
4# file, You can obtain one at http://mozilla.org/MPL/2.0/.
5
6#----------------------------------------------------------------------------
7# This script checks various aspects of SpiderMonkey code style.  The current checks are as
8# follows.
9#
10# We check the following things in headers.
11#
12# - No cyclic dependencies.
13#
14# - No normal header should #include a inlines.h/-inl.h file.
15#
16# - #ifndef wrappers should have the right form. (XXX: not yet implemented)
17#   - Every header file should have one.
18#   - The guard name used should be appropriate for the filename.
19#
20# We check the following things in all files.
21#
22# - #includes should have full paths, e.g. "jit/Ion.h", not "Ion.h".
23#
24# - #includes should use the appropriate form for system headers (<...>) and
25#   local headers ("...").
26#
27# - #includes should be ordered correctly.
28#   - Each one should be in the correct section.
29#   - Alphabetical order should be used within sections.
30#   - Sections should be in the right order.
31#   Note that the presence of #if/#endif blocks complicates things, to the
32#   point that it's not always clear where a conditionally-compiled #include
33#   statement should go, even to a human.  Therefore, we check the #include
34#   statements within each #if/#endif block (including nested ones) in
35#   isolation, but don't try to do any order checking between such blocks.
36#----------------------------------------------------------------------------
37
38from __future__ import print_function
39
40import difflib
41import os
42import re
43import sys
44
45from mozversioncontrol import get_repository_from_env
46
47
48# We don't bother checking files in these directories, because they're (a) auxiliary or (b)
49# imported code that doesn't follow our coding style.
50ignored_js_src_dirs = [
51   'js/src/config/',            # auxiliary stuff
52   'js/src/ctypes/libffi/',     # imported code
53   'js/src/devtools/',          # auxiliary stuff
54   'js/src/editline/',          # imported code
55   'js/src/gdb/',               # auxiliary stuff
56   'js/src/vtune/'              # imported code
57]
58
59# We ignore #includes of these files, because they don't follow the usual rules.
60included_inclnames_to_ignore = set([
61    'ffi.h',                    # generated in ctypes/libffi/
62    'devtools/sharkctl.h',      # we ignore devtools/ in general
63    'devtools/Instruments.h',   # we ignore devtools/ in general
64    'double-conversion.h',      # strange MFBT case
65    'javascript-trace.h',       # generated in $OBJDIR if HAVE_DTRACE is defined
66    'jsautokw.h',               # generated in $OBJDIR
67    'jscustomallocator.h',      # provided by embedders;  allowed to be missing
68    'js-config.h',              # generated in $OBJDIR
69    'fdlibm.h',                 # fdlibm
70    'pratom.h',                 # NSPR
71    'prcvar.h',                 # NSPR
72    'prerror.h',                # NSPR
73    'prinit.h',                 # NSPR
74    'prio.h',                   # NSPR
75    'private/pprio.h',          # NSPR
76    'prlink.h',                 # NSPR
77    'prlock.h',                 # NSPR
78    'prprf.h',                  # NSPR
79    'prthread.h',               # NSPR
80    'prtypes.h',                # NSPR
81    'selfhosted.out.h',         # generated in $OBJDIR
82    'shellmoduleloader.out.h',  # generated in $OBJDIR
83    'unicode/timezone.h',       # ICU
84    'unicode/ucal.h',           # ICU
85    'unicode/uclean.h',         # ICU
86    'unicode/ucol.h',           # ICU
87    'unicode/udat.h',           # ICU
88    'unicode/udatpg.h',         # ICU
89    'unicode/uenum.h',          # ICU
90    'unicode/unorm.h',          # ICU
91    'unicode/unum.h',           # ICU
92    'unicode/unumsys.h',        # ICU
93    'unicode/ustring.h',        # ICU
94    'unicode/utypes.h',         # ICU
95    'vtune/VTuneWrapper.h'      # VTune
96])
97
98# These files have additional constraints on where they are #included, so we
99# ignore #includes of them when checking #include ordering.
100oddly_ordered_inclnames = set([
101    'ctypes/typedefs.h',        # Included multiple times in the body of ctypes/CTypes.h
102    'jsautokw.h',               # Included in the body of frontend/TokenStream.h
103    'jswin.h',                  # Must be #included before <psapi.h>
104    'machine/endian.h',         # Must be included after <sys/types.h> on BSD
105    'winbase.h',                # Must precede other system headers(?)
106    'windef.h'                  # Must precede other system headers(?)
107])
108
109# The files in tests/style/ contain code that fails this checking in various
110# ways.  Here is the output we expect.  If the actual output differs from
111# this, one of the following must have happened.
112# - New SpiderMonkey code violates one of the checked rules.
113# - The tests/style/ files have changed without expected_output being changed
114#   accordingly.
115# - This script has been broken somehow.
116#
117expected_output = '''\
118js/src/tests/style/BadIncludes2.h:1: error:
119    vanilla header includes an inline-header file "tests/style/BadIncludes2-inl.h"
120
121js/src/tests/style/BadIncludes.h:3: error:
122    the file includes itself
123
124js/src/tests/style/BadIncludes.h:6: error:
125    "BadIncludes2.h" is included using the wrong path;
126    did you forget a prefix, or is the file not yet committed?
127
128js/src/tests/style/BadIncludes.h:8: error:
129    <tests/style/BadIncludes2.h> should be included using
130    the #include "..." form
131
132js/src/tests/style/BadIncludes.h:10: error:
133    "stdio.h" is included using the wrong path;
134    did you forget a prefix, or is the file not yet committed?
135
136js/src/tests/style/BadIncludesOrder-inl.h:5:6: error:
137    "vm/Interpreter-inl.h" should be included after "jsscriptinlines.h"
138
139js/src/tests/style/BadIncludesOrder-inl.h:6:7: error:
140    "jsscriptinlines.h" should be included after "js/Value.h"
141
142js/src/tests/style/BadIncludesOrder-inl.h:7:8: error:
143    "js/Value.h" should be included after "ds/LifoAlloc.h"
144
145js/src/tests/style/BadIncludesOrder-inl.h:8:9: error:
146    "ds/LifoAlloc.h" should be included after "jsapi.h"
147
148js/src/tests/style/BadIncludesOrder-inl.h:9:10: error:
149    "jsapi.h" should be included after <stdio.h>
150
151js/src/tests/style/BadIncludesOrder-inl.h:10:11: error:
152    <stdio.h> should be included after "mozilla/HashFunctions.h"
153
154js/src/tests/style/BadIncludesOrder-inl.h:27:28: error:
155    "jsobj.h" should be included after "jsfun.h"
156
157(multiple files): error:
158    header files form one or more cycles
159
160   tests/style/HeaderCycleA1.h
161   -> tests/style/HeaderCycleA2.h
162      -> tests/style/HeaderCycleA3.h
163         -> tests/style/HeaderCycleA1.h
164
165   tests/style/HeaderCycleB1-inl.h
166   -> tests/style/HeaderCycleB2-inl.h
167      -> tests/style/HeaderCycleB3-inl.h
168         -> tests/style/HeaderCycleB4-inl.h
169            -> tests/style/HeaderCycleB1-inl.h
170            -> tests/style/jsheadercycleB5inlines.h
171               -> tests/style/HeaderCycleB1-inl.h
172      -> tests/style/HeaderCycleB4-inl.h
173
174'''.splitlines(True)
175
176actual_output = []
177
178
179def out(*lines):
180    for line in lines:
181        actual_output.append(line + '\n')
182
183
184def error(filename, linenum, *lines):
185    location = filename
186    if linenum is not None:
187        location += ':' + str(linenum)
188    out(location + ': error:')
189    for line in (lines):
190        out('    ' + line)
191    out('')
192
193
194class FileKind(object):
195    C = 1
196    CPP = 2
197    INL_H = 3
198    H = 4
199    TBL = 5
200    MSG = 6
201
202    @staticmethod
203    def get(filename):
204        if filename.endswith('.c'):
205            return FileKind.C
206
207        if filename.endswith('.cpp'):
208            return FileKind.CPP
209
210        if filename.endswith(('inlines.h', '-inl.h')):
211            return FileKind.INL_H
212
213        if filename.endswith('.h'):
214            return FileKind.H
215
216        if filename.endswith('.tbl'):
217            return FileKind.TBL
218
219        if filename.endswith('.msg'):
220            return FileKind.MSG
221
222        error(filename, None, 'unknown file kind')
223
224
225def check_style():
226    # We deal with two kinds of name.
227    # - A "filename" is a full path to a file from the repository root.
228    # - An "inclname" is how a file is referred to in a #include statement.
229    #
230    # Examples (filename -> inclname)
231    # - "mfbt/Attributes.h"         -> "mozilla/Attributes.h"
232    # - "mfbt/decimal/Decimal.h     -> "mozilla/Decimal.h"
233    # - "mozglue/misc/TimeStamp.h   -> "mozilla/TimeStamp.h"
234    # - "memory/mozalloc/mozalloc.h -> "mozilla/mozalloc.h"
235    # - "js/public/Vector.h"        -> "js/Vector.h"
236    # - "js/src/vm/String.h"        -> "vm/String.h"
237
238    non_js_dirnames = ('mfbt/',
239                       'memory/mozalloc/',
240                       'mozglue/')  # type: tuple(str)
241    non_js_inclnames = set()        # type: set(inclname)
242    js_names = dict()               # type: dict(filename, inclname)
243
244    repo = get_repository_from_env()
245
246    # Select the appropriate files.
247    for filename in repo.get_files_in_working_directory():
248        for non_js_dir in non_js_dirnames:
249            if filename.startswith(non_js_dir) and filename.endswith('.h'):
250                inclname = 'mozilla/' + filename.split('/')[-1]
251                non_js_inclnames.add(inclname)
252
253        if filename.startswith('js/public/') and filename.endswith('.h'):
254            inclname = 'js/' + filename[len('js/public/'):]
255            js_names[filename] = inclname
256
257        if filename.startswith('js/src/') and \
258           not filename.startswith(tuple(ignored_js_src_dirs)) and \
259           filename.endswith(('.c', '.cpp', '.h', '.tbl', '.msg')):
260            inclname = filename[len('js/src/'):]
261            js_names[filename] = inclname
262
263    all_inclnames = non_js_inclnames | set(js_names.values())
264
265    edges = dict()      # type: dict(inclname, set(inclname))
266
267    # We don't care what's inside the MFBT and MOZALLOC files, but because they
268    # are #included from JS files we have to add them to the inclusion graph.
269    for inclname in non_js_inclnames:
270        edges[inclname] = set()
271
272    # Process all the JS files.
273    for filename in js_names.keys():
274        inclname = js_names[filename]
275        file_kind = FileKind.get(filename)
276        if file_kind == FileKind.C or file_kind == FileKind.CPP or \
277           file_kind == FileKind.H or file_kind == FileKind.INL_H:
278            included_h_inclnames = set()    # type: set(inclname)
279
280            # This script is run in js/src/, so prepend '../../' to get to the root of the Mozilla
281            # source tree.
282            with open(os.path.join(repo.path, filename)) as f:
283                do_file(filename, inclname, file_kind, f, all_inclnames, included_h_inclnames)
284
285        edges[inclname] = included_h_inclnames
286
287    find_cycles(all_inclnames, edges)
288
289    # Compare expected and actual output.
290    difflines = difflib.unified_diff(expected_output, actual_output,
291                                     fromfile='check_spider_monkey_style.py expected output',
292                                       tofile='check_spider_monkey_style.py actual output')
293    ok = True
294    for diffline in difflines:
295        ok = False
296        print(diffline, end='')
297
298    return ok
299
300
301def module_name(name):
302    '''Strip the trailing .cpp, .h, inlines.h or -inl.h from a filename.'''
303
304    return name.replace('inlines.h', '').replace('-inl.h', '').replace('.h', '').replace('.cpp', '')
305
306
307def is_module_header(enclosing_inclname, header_inclname):
308    '''Determine if an included name is the "module header", i.e. should be
309    first in the file.'''
310
311    module = module_name(enclosing_inclname)
312
313    # Normal case, e.g. module == "foo/Bar", header_inclname == "foo/Bar.h".
314    if module == module_name(header_inclname):
315        return True
316
317    # A public header, e.g. module == "foo/Bar", header_inclname == "js/Bar.h".
318    m = re.match(r'js\/(.*)\.h', header_inclname)
319    if m is not None and module.endswith('/' + m.group(1)):
320        return True
321
322    return False
323
324
325class Include(object):
326    '''Important information for a single #include statement.'''
327
328    def __init__(self, inclname, linenum, is_system):
329        self.inclname = inclname
330        self.linenum = linenum
331        self.is_system = is_system
332
333    def isLeaf(self):
334        return True
335
336    def section(self, enclosing_inclname):
337        '''Identify which section inclname belongs to.
338
339        The section numbers are as follows.
340          0. Module header (e.g. jsfoo.h or jsfooinlines.h within jsfoo.cpp)
341          1. mozilla/Foo.h
342          2. <foo.h> or <foo>
343          3. jsfoo.h, prmjtime.h, etc
344          4. foo/Bar.h
345          5. jsfooinlines.h
346          6. foo/Bar-inl.h
347          7. non-.h, e.g. *.tbl, *.msg
348        '''
349
350        if self.is_system:
351            return 2
352
353        if not self.inclname.endswith('.h'):
354            return 7
355
356        # A couple of modules have the .h file in js/ and the .cpp file elsewhere and so need
357        # special handling.
358        if is_module_header(enclosing_inclname, self.inclname):
359            return 0
360
361        if '/' in self.inclname:
362            if self.inclname.startswith('mozilla/'):
363                return 1
364
365            if self.inclname.endswith('-inl.h'):
366                return 6
367
368            return 4
369
370        if self.inclname.endswith('inlines.h'):
371            return 5
372
373        return 3
374
375    def quote(self):
376        if self.is_system:
377            return '<' + self.inclname + '>'
378        else:
379            return '"' + self.inclname + '"'
380
381
382class HashIfBlock(object):
383    '''Important information about a #if/#endif block.
384
385    A #if/#endif block is the contents of a #if/#endif (or similar) section.
386    The top-level block, which is not within a #if/#endif pair, is also
387    considered a block.
388
389    Each leaf is either an Include (representing a #include), or another
390    nested HashIfBlock.'''
391    def __init__(self):
392        self.kids = []
393
394    def isLeaf(self):
395        return False
396
397
398def do_file(filename, inclname, file_kind, f, all_inclnames, included_h_inclnames):
399    block_stack = [HashIfBlock()]
400
401    # Extract the #include statements as a tree of IBlocks and IIncludes.
402    for linenum, line in enumerate(f, start=1):
403        # We're only interested in lines that contain a '#'.
404        if not '#' in line:
405            continue
406
407        # Look for a |#include "..."| line.
408        m = re.match(r'\s*#\s*include\s+"([^"]*)"', line)
409        if m is not None:
410            block_stack[-1].kids.append(Include(m.group(1), linenum, False))
411
412        # Look for a |#include <...>| line.
413        m = re.match(r'\s*#\s*include\s+<([^>]*)>', line)
414        if m is not None:
415            block_stack[-1].kids.append(Include(m.group(1), linenum, True))
416
417        # Look for a |#{if,ifdef,ifndef}| line.
418        m = re.match(r'\s*#\s*(if|ifdef|ifndef)\b', line)
419        if m is not None:
420            # Open a new block.
421            new_block = HashIfBlock()
422            block_stack[-1].kids.append(new_block)
423            block_stack.append(new_block)
424
425        # Look for a |#{elif,else}| line.
426        m = re.match(r'\s*#\s*(elif|else)\b', line)
427        if m is not None:
428            # Close the current block, and open an adjacent one.
429            block_stack.pop()
430            new_block = HashIfBlock()
431            block_stack[-1].kids.append(new_block)
432            block_stack.append(new_block)
433
434        # Look for a |#endif| line.
435        m = re.match(r'\s*#\s*endif\b', line)
436        if m is not None:
437            # Close the current block.
438            block_stack.pop()
439
440    def check_include_statement(include):
441        '''Check the style of a single #include statement.'''
442
443        if include.is_system:
444            # Check it is not a known local file (in which case it's probably a system header).
445            if include.inclname in included_inclnames_to_ignore or \
446               include.inclname in all_inclnames:
447                error(filename, include.linenum,
448                      include.quote() + ' should be included using',
449                      'the #include "..." form')
450
451        else:
452            if include.inclname not in included_inclnames_to_ignore:
453                included_kind = FileKind.get(include.inclname)
454
455                # Check the #include path has the correct form.
456                if include.inclname not in all_inclnames:
457                    error(filename, include.linenum,
458                          include.quote() + ' is included using the wrong path;',
459                          'did you forget a prefix, or is the file not yet committed?')
460
461                # Record inclusions of .h files for cycle detection later.
462                # (Exclude .tbl and .msg files.)
463                elif included_kind == FileKind.H or included_kind == FileKind.INL_H:
464                    included_h_inclnames.add(include.inclname)
465
466                # Check a H file doesn't #include an INL_H file.
467                if file_kind == FileKind.H and included_kind == FileKind.INL_H:
468                    error(filename, include.linenum,
469                          'vanilla header includes an inline-header file ' + include.quote())
470
471                # Check a file doesn't #include itself.  (We do this here because the cycle
472                # detection below doesn't detect this case.)
473                if inclname == include.inclname:
474                    error(filename, include.linenum, 'the file includes itself')
475
476    def check_includes_order(include1, include2):
477        '''Check the ordering of two #include statements.'''
478
479        if include1.inclname in oddly_ordered_inclnames or \
480           include2.inclname in oddly_ordered_inclnames:
481            return
482
483        section1 = include1.section(inclname)
484        section2 = include2.section(inclname)
485        if (section1 > section2) or \
486           ((section1 == section2) and (include1.inclname.lower() > include2.inclname.lower())):
487            error(filename, str(include1.linenum) + ':' + str(include2.linenum),
488                  include1.quote() + ' should be included after ' + include2.quote())
489
490    # Check the extracted #include statements, both individually, and the ordering of
491    # adjacent pairs that live in the same block.
492    def pair_traverse(prev, this):
493        if this.isLeaf():
494            check_include_statement(this)
495            if prev is not None and prev.isLeaf():
496                check_includes_order(prev, this)
497        else:
498            for prev2, this2 in zip([None] + this.kids[0:-1], this.kids):
499                pair_traverse(prev2, this2)
500
501    pair_traverse(None, block_stack[-1])
502
503
504def find_cycles(all_inclnames, edges):
505    '''Find and draw any cycles.'''
506
507    SCCs = tarjan(all_inclnames, edges)
508
509    # The various sorted() calls below ensure the output is deterministic.
510
511    def draw_SCC(c):
512        cset = set(c)
513        drawn = set()
514        def draw(v, indent):
515            out('   ' * indent + ('-> ' if indent else '   ') + v)
516            if v in drawn:
517                return
518            drawn.add(v)
519            for succ in sorted(edges[v]):
520                if succ in cset:
521                    draw(succ, indent + 1)
522        draw(sorted(c)[0], 0)
523        out('')
524
525    have_drawn_an_SCC = False
526    for scc in sorted(SCCs):
527        if len(scc) != 1:
528            if not have_drawn_an_SCC:
529                error('(multiple files)', None, 'header files form one or more cycles')
530                have_drawn_an_SCC = True
531
532            draw_SCC(scc)
533
534
535# Tarjan's algorithm for finding the strongly connected components (SCCs) of a graph.
536# https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
537def tarjan(V, E):
538    vertex_index = {}
539    vertex_lowlink = {}
540    index = 0
541    S = []
542    all_SCCs = []
543
544    def strongconnect(v, index):
545        # Set the depth index for v to the smallest unused index
546        vertex_index[v] = index
547        vertex_lowlink[v] = index
548        index += 1
549        S.append(v)
550
551        # Consider successors of v
552        for w in E[v]:
553            if w not in vertex_index:
554                # Successor w has not yet been visited; recurse on it
555                index = strongconnect(w, index)
556                vertex_lowlink[v] = min(vertex_lowlink[v], vertex_lowlink[w])
557            elif w in S:
558                # Successor w is in stack S and hence in the current SCC
559                vertex_lowlink[v] = min(vertex_lowlink[v], vertex_index[w])
560
561        # If v is a root node, pop the stack and generate an SCC
562        if vertex_lowlink[v] == vertex_index[v]:
563            i = S.index(v)
564            scc = S[i:]
565            del S[i:]
566            all_SCCs.append(scc)
567
568        return index
569
570    for v in V:
571        if v not in vertex_index:
572            index = strongconnect(v, index)
573
574    return all_SCCs
575
576
577def main():
578    ok = check_style()
579
580    if ok:
581        print('TEST-PASS | check_spidermonkey_style.py | ok')
582    else:
583        print('TEST-UNEXPECTED-FAIL | check_spidermonkey_style.py | actual output does not match expected output;  diff is above')
584
585    sys.exit(0 if ok else 1)
586
587
588if __name__ == '__main__':
589    main()
590