1#!/usr/bin/env python
2# Copyright 2018 The Tor Project, Inc.  See LICENSE file for licensing info.
3
4"""This script looks through all the directories for files matching *.c or
5   *.h, and checks their #include directives to make sure that only "permitted"
6   headers are included.
7
8   Any #include directives with angle brackets (like #include <stdio.h>) are
9   ignored -- only directives with quotes (like #include "foo.h") are
10   considered.
11
12   To decide what includes are permitted, this script looks at a .may_include
13   file in each directory.  This file contains empty lines, #-prefixed
14   comments, filenames (like "lib/foo/bar.h") and file globs (like lib/*/*.h)
15   for files that are permitted.
16
17   The script exits with an error if any non-permitted includes are found.
18   .may_include files that contain "!advisory" are considered advisory.
19   Advisory .may_include files only result in warnings, rather than errors.
20"""
21
22# Future imports for Python 2.7, mandatory in 3.0
23from __future__ import division
24from __future__ import print_function
25from __future__ import unicode_literals
26
27import fnmatch
28import os
29import re
30import sys
31
32if sys.version_info[0] <= 2:
33    def open_file(fname):
34        return open(fname, 'r')
35else:
36    def open_file(fname):
37        return open(fname, 'r', encoding='utf-8')
38
39def warn(msg):
40    print(msg, file=sys.stderr)
41
42def fname_is_c(fname):
43    """
44    Return true if 'fname' is the name of a file that we should
45    search for possibly disallowed #include directives.
46    """
47    if fname.endswith((".c", ".h")):
48        bname = os.path.basename(fname)
49        return not bname.startswith((".", "#"))
50    else:
51        return False
52
53INCLUDE_PATTERN = re.compile(r'\s*#\s*include\s+"([^"]*)"')
54RULES_FNAME = ".may_include"
55
56ALLOWED_PATTERNS = [
57    re.compile(r'^.*\*\.(h|inc)$'),
58    re.compile(r'^.*/.*\.h$'),
59    re.compile(r'^ext/.*\.c$'),
60    re.compile(r'^orconfig.h$'),
61    re.compile(r'^micro-revision.i$'),
62]
63
64TOPDIR = "src"
65
66def pattern_is_normal(s):
67    for p in ALLOWED_PATTERNS:
68        if p.match(s):
69            return True
70    return False
71
72class Error(object):
73    def __init__(self, location, msg, is_advisory=False):
74        self.location = location
75        self.msg = msg
76        self.is_advisory = is_advisory
77
78    def __str__(self):
79        return "{} at {}".format(self.msg, self.location)
80
81class Rules(object):
82    """ A 'Rules' object is the parsed version of a .may_include file. """
83    def __init__(self, dirpath):
84        self.dirpath = dirpath
85        if dirpath.startswith("src/"):
86            self.incpath = dirpath[4:]
87        else:
88            self.incpath = dirpath
89        self.patterns = []
90        self.usedPatterns = set()
91        self.is_advisory = False
92
93    def addPattern(self, pattern):
94        if pattern == "!advisory":
95            self.is_advisory = True
96            return
97        if not pattern_is_normal(pattern):
98            warn("Unusual pattern {} in {}".format(pattern, self.dirpath))
99        self.patterns.append(pattern)
100
101    def includeOk(self, path):
102        for pattern in self.patterns:
103            if fnmatch.fnmatchcase(path, pattern):
104                self.usedPatterns.add(pattern)
105                return True
106        return False
107
108    def applyToLines(self, lines, loc_prefix=""):
109        lineno = 0
110        for line in lines:
111            lineno += 1
112            m = INCLUDE_PATTERN.match(line)
113            if m:
114                include = m.group(1)
115                if not self.includeOk(include):
116                    yield Error("{}{}".format(loc_prefix,str(lineno)),
117                                "Forbidden include of {}".format(include),
118                                is_advisory=self.is_advisory)
119
120    def applyToFile(self, fname, f):
121        for error in self.applyToLines(iter(f), "{}:".format(fname)):
122            yield error
123
124    def noteUnusedRules(self):
125        for p in self.patterns:
126            if p not in self.usedPatterns:
127                warn("Pattern {} in {} was never used.".format(p, self.dirpath))
128
129    def getAllowedDirectories(self):
130        allowed = []
131        for p in self.patterns:
132            m = re.match(r'^(.*)/\*\.(h|inc)$', p)
133            if m:
134                allowed.append(m.group(1))
135                continue
136            m = re.match(r'^(.*)/[^/]*$', p)
137            if m:
138                allowed.append(m.group(1))
139                continue
140
141        return allowed
142
143
144def normalize_srcdir(fname):
145    """given the name of a source directory or file, return its name
146        relative to `src` in a unix-like format.
147    """
148    orig = fname
149    dirname, dirfile = os.path.split(fname)
150    if re.match(r'.*\.[ch]$', dirfile):
151        fname = dirname
152
153    # Now we have a directory.
154    dirname, result = os.path.split(fname)
155    for _ in range(100):
156        # prevent excess looping in case I missed a tricky case
157        dirname, dirpart = os.path.split(dirname)
158        if dirpart == 'src' or dirname == "":
159            #print(orig,"=>",result)
160            return result
161        result = "{}/{}".format(dirpart,result)
162
163    print("No progress!")
164    assert False
165
166include_rules_cache = {}
167
168def load_include_rules(fname):
169    """ Read a rules file from 'fname', and return it as a Rules object.
170        Return 'None' if fname does not exist.
171    """
172    if fname in include_rules_cache:
173        return include_rules_cache[fname]
174    if not os.path.exists(fname):
175        include_rules_cache[fname] = None
176        return None
177    result = Rules(os.path.split(fname)[0])
178    with open_file(fname) as f:
179        for line in f:
180            line = line.strip()
181            if line.startswith("#") or not line:
182                continue
183            result.addPattern(line)
184    include_rules_cache[fname] = result
185    return result
186
187def get_all_include_rules():
188    """Return a list of all the Rules objects we have loaded so far,
189       sorted by their directory names."""
190    return [ rules for (fname,rules) in
191             sorted(include_rules_cache.items())
192             if rules is not None ]
193
194def remove_self_edges(graph):
195    """Takes a directed graph in as an adjacency mapping (a mapping from
196       node to a list of the nodes to which it connects).
197
198       Remove all edges from a node to itself."""
199
200    for k in list(graph):
201        graph[k] = [ d for d in graph[k] if d != k ]
202
203def closure(graph):
204    """Takes a directed graph in as an adjacency mapping (a mapping from
205       node to a list of the nodes to which it connects), and completes
206       its closure.
207    """
208    graph = graph.copy()
209    changed = False
210    for k in graph.keys():
211        graph[k] = set(graph[k])
212    while True:
213        for k in graph.keys():
214            sz = len(graph[k])
215            for v in list(graph[k]):
216                graph[k].update(graph.get(v, []))
217            if sz != len(graph[k]):
218                changed = True
219
220        if not changed:
221            return graph
222        changed = False
223
224def toposort(graph, limit=100):
225    """Takes a directed graph in as an adjacency mapping (a mapping from
226       node to a list of the nodes to which it connects).  Tries to
227       perform a topological sort on the graph, arranging the nodes into
228       "levels", such that every member of each level is only reachable
229       by members of later levels.
230
231       Returns a list of the members of each level.
232
233       Modifies the input graph, removing every member that could be
234       sorted.  If the graph does not become empty, then it contains a
235       cycle.
236
237       "limit" is the max depth of the graph after which we give up trying
238       to sort it and conclude we have a cycle.
239    """
240    all_levels = []
241
242    n = 0
243    while graph:
244        n += 0
245        cur_level = []
246        all_levels.append(cur_level)
247        for k in list(graph):
248            graph[k] = [ d for d in graph[k] if d in graph ]
249            if graph[k] == []:
250                cur_level.append(k)
251        for k in cur_level:
252            del graph[k]
253        n += 1
254        if n > limit:
255            break
256
257    return all_levels
258
259def consider_include_rules(fname, f):
260    dirpath = os.path.split(fname)[0]
261    rules_fname = os.path.join(dirpath, RULES_FNAME)
262    rules = load_include_rules(os.path.join(dirpath, RULES_FNAME))
263    if rules is None:
264        return
265
266    for err in rules.applyToFile(fname, f):
267        yield err
268
269    list_unused = False
270    log_sorted_levels = False
271
272def walk_c_files(topdir="src"):
273    """Run through all .c and .h files under topdir, looking for
274       include-rule violations. Yield those violations."""
275
276    for dirpath, dirnames, fnames in os.walk(topdir):
277        for fname in fnames:
278            if fname_is_c(fname):
279                fullpath = os.path.join(dirpath,fname)
280                with open(fullpath) as f:
281                    for err in consider_include_rules(fullpath, f):
282                        yield err
283
284def open_or_stdin(fname):
285    if fname == '-':
286        return sys.stdin
287    else:
288        return open(fname)
289
290def check_subsys_file(fname, uses_dirs):
291    if not uses_dirs:
292        # We're doing a distcheck build, or for some other reason there are
293        # no .may_include files.
294        print("SKIPPING")
295        return False
296
297    uses_dirs = { normalize_srcdir(k) : { normalize_srcdir(d) for d in v }
298                  for (k,v) in uses_dirs.items() }
299    uses_closure = closure(uses_dirs)
300    ok = True
301    previous_subsystems = []
302
303    with open_or_stdin(fname) as f:
304        for line in f:
305            _, name, fname = line.split()
306            fname = normalize_srcdir(fname)
307            for prev in previous_subsystems:
308                if fname in uses_closure[prev]:
309                    print("INVERSION: {} uses {}".format(prev,fname))
310                    ok = False
311            previous_subsystems.append(fname)
312    return not ok
313
314def run_check_includes(topdir, list_unused=False, log_sorted_levels=False,
315                       list_advisories=False, check_subsystem_order=None):
316    trouble = False
317
318    for err in walk_c_files(topdir):
319        if err.is_advisory and not list_advisories:
320            continue
321        print(err, file=sys.stderr)
322        if not err.is_advisory:
323            trouble = True
324
325    if trouble:
326        warn(
327    """To change which includes are allowed in a C file, edit the {}
328    files in its enclosing directory.""".format(RULES_FNAME))
329        sys.exit(1)
330
331    if list_unused:
332        for rules in get_all_include_rules():
333            rules.noteUnusedRules()
334
335    uses_dirs = { }
336    for rules in get_all_include_rules():
337        uses_dirs[rules.incpath] = rules.getAllowedDirectories()
338
339    remove_self_edges(uses_dirs)
340
341    if check_subsystem_order:
342        if check_subsys_file(check_subsystem_order, uses_dirs):
343            sys.exit(1)
344
345    all_levels = toposort(uses_dirs)
346
347    if log_sorted_levels:
348        for (n, cur_level) in enumerate(all_levels):
349            if cur_level:
350                print(n, cur_level)
351
352    if uses_dirs:
353        print("There are circular .may_include dependencies in here somewhere:",
354              uses_dirs)
355        sys.exit(1)
356
357def main(argv):
358    import argparse
359
360    progname = argv[0]
361    parser = argparse.ArgumentParser(prog=progname)
362    parser.add_argument("--toposort", action="store_true",
363                        help="Print a topologically sorted list of modules")
364    parser.add_argument("--list-unused", action="store_true",
365                        help="List unused lines in .may_include files.")
366    parser.add_argument("--list-advisories", action="store_true",
367                        help="List advisories as well as forbidden includes")
368    parser.add_argument("--check-subsystem-order", action="store",
369                        help="Check a list of subsystems for ordering")
370    parser.add_argument("topdir", default="src", nargs="?",
371                        help="Top-level directory for the tor source")
372    args = parser.parse_args(argv[1:])
373
374    global TOPDIR
375    TOPDIR = args.topdir
376    run_check_includes(topdir=args.topdir,
377                       log_sorted_levels=args.toposort,
378                       list_unused=args.list_unused,
379                       list_advisories=args.list_advisories,
380                       check_subsystem_order=args.check_subsystem_order)
381
382if __name__ == '__main__':
383    main(sys.argv)
384