1#!/usr/bin/env python 2# Copyright 2018 The Tor Project, Inc. See LICENSE file for licensing info. 3 4"""This script looks through all the directories for files matching *.c or 5 *.h, and checks their #include directives to make sure that only "permitted" 6 headers are included. 7 8 Any #include directives with angle brackets (like #include <stdio.h>) are 9 ignored -- only directives with quotes (like #include "foo.h") are 10 considered. 11 12 To decide what includes are permitted, this script looks at a .may_include 13 file in each directory. This file contains empty lines, #-prefixed 14 comments, filenames (like "lib/foo/bar.h") and file globs (like lib/*/*.h) 15 for files that are permitted. 16 17 The script exits with an error if any non-permitted includes are found. 18 .may_include files that contain "!advisory" are considered advisory. 19 Advisory .may_include files only result in warnings, rather than errors. 20""" 21 22# Future imports for Python 2.7, mandatory in 3.0 23from __future__ import division 24from __future__ import print_function 25from __future__ import unicode_literals 26 27import fnmatch 28import os 29import re 30import sys 31 32if sys.version_info[0] <= 2: 33 def open_file(fname): 34 return open(fname, 'r') 35else: 36 def open_file(fname): 37 return open(fname, 'r', encoding='utf-8') 38 39def warn(msg): 40 print(msg, file=sys.stderr) 41 42def fname_is_c(fname): 43 """ 44 Return true if 'fname' is the name of a file that we should 45 search for possibly disallowed #include directives. 46 """ 47 if fname.endswith((".c", ".h")): 48 bname = os.path.basename(fname) 49 return not bname.startswith((".", "#")) 50 else: 51 return False 52 53INCLUDE_PATTERN = re.compile(r'\s*#\s*include\s+"([^"]*)"') 54RULES_FNAME = ".may_include" 55 56ALLOWED_PATTERNS = [ 57 re.compile(r'^.*\*\.(h|inc)$'), 58 re.compile(r'^.*/.*\.h$'), 59 re.compile(r'^ext/.*\.c$'), 60 re.compile(r'^orconfig.h$'), 61 re.compile(r'^micro-revision.i$'), 62] 63 64TOPDIR = "src" 65 66def pattern_is_normal(s): 67 for p in ALLOWED_PATTERNS: 68 if p.match(s): 69 return True 70 return False 71 72class Error(object): 73 def __init__(self, location, msg, is_advisory=False): 74 self.location = location 75 self.msg = msg 76 self.is_advisory = is_advisory 77 78 def __str__(self): 79 return "{} at {}".format(self.msg, self.location) 80 81class Rules(object): 82 """ A 'Rules' object is the parsed version of a .may_include file. """ 83 def __init__(self, dirpath): 84 self.dirpath = dirpath 85 if dirpath.startswith("src/"): 86 self.incpath = dirpath[4:] 87 else: 88 self.incpath = dirpath 89 self.patterns = [] 90 self.usedPatterns = set() 91 self.is_advisory = False 92 93 def addPattern(self, pattern): 94 if pattern == "!advisory": 95 self.is_advisory = True 96 return 97 if not pattern_is_normal(pattern): 98 warn("Unusual pattern {} in {}".format(pattern, self.dirpath)) 99 self.patterns.append(pattern) 100 101 def includeOk(self, path): 102 for pattern in self.patterns: 103 if fnmatch.fnmatchcase(path, pattern): 104 self.usedPatterns.add(pattern) 105 return True 106 return False 107 108 def applyToLines(self, lines, loc_prefix=""): 109 lineno = 0 110 for line in lines: 111 lineno += 1 112 m = INCLUDE_PATTERN.match(line) 113 if m: 114 include = m.group(1) 115 if not self.includeOk(include): 116 yield Error("{}{}".format(loc_prefix,str(lineno)), 117 "Forbidden include of {}".format(include), 118 is_advisory=self.is_advisory) 119 120 def applyToFile(self, fname, f): 121 for error in self.applyToLines(iter(f), "{}:".format(fname)): 122 yield error 123 124 def noteUnusedRules(self): 125 for p in self.patterns: 126 if p not in self.usedPatterns: 127 warn("Pattern {} in {} was never used.".format(p, self.dirpath)) 128 129 def getAllowedDirectories(self): 130 allowed = [] 131 for p in self.patterns: 132 m = re.match(r'^(.*)/\*\.(h|inc)$', p) 133 if m: 134 allowed.append(m.group(1)) 135 continue 136 m = re.match(r'^(.*)/[^/]*$', p) 137 if m: 138 allowed.append(m.group(1)) 139 continue 140 141 return allowed 142 143 144def normalize_srcdir(fname): 145 """given the name of a source directory or file, return its name 146 relative to `src` in a unix-like format. 147 """ 148 orig = fname 149 dirname, dirfile = os.path.split(fname) 150 if re.match(r'.*\.[ch]$', dirfile): 151 fname = dirname 152 153 # Now we have a directory. 154 dirname, result = os.path.split(fname) 155 for _ in range(100): 156 # prevent excess looping in case I missed a tricky case 157 dirname, dirpart = os.path.split(dirname) 158 if dirpart == 'src' or dirname == "": 159 #print(orig,"=>",result) 160 return result 161 result = "{}/{}".format(dirpart,result) 162 163 print("No progress!") 164 assert False 165 166include_rules_cache = {} 167 168def load_include_rules(fname): 169 """ Read a rules file from 'fname', and return it as a Rules object. 170 Return 'None' if fname does not exist. 171 """ 172 if fname in include_rules_cache: 173 return include_rules_cache[fname] 174 if not os.path.exists(fname): 175 include_rules_cache[fname] = None 176 return None 177 result = Rules(os.path.split(fname)[0]) 178 with open_file(fname) as f: 179 for line in f: 180 line = line.strip() 181 if line.startswith("#") or not line: 182 continue 183 result.addPattern(line) 184 include_rules_cache[fname] = result 185 return result 186 187def get_all_include_rules(): 188 """Return a list of all the Rules objects we have loaded so far, 189 sorted by their directory names.""" 190 return [ rules for (fname,rules) in 191 sorted(include_rules_cache.items()) 192 if rules is not None ] 193 194def remove_self_edges(graph): 195 """Takes a directed graph in as an adjacency mapping (a mapping from 196 node to a list of the nodes to which it connects). 197 198 Remove all edges from a node to itself.""" 199 200 for k in list(graph): 201 graph[k] = [ d for d in graph[k] if d != k ] 202 203def closure(graph): 204 """Takes a directed graph in as an adjacency mapping (a mapping from 205 node to a list of the nodes to which it connects), and completes 206 its closure. 207 """ 208 graph = graph.copy() 209 changed = False 210 for k in graph.keys(): 211 graph[k] = set(graph[k]) 212 while True: 213 for k in graph.keys(): 214 sz = len(graph[k]) 215 for v in list(graph[k]): 216 graph[k].update(graph.get(v, [])) 217 if sz != len(graph[k]): 218 changed = True 219 220 if not changed: 221 return graph 222 changed = False 223 224def toposort(graph, limit=100): 225 """Takes a directed graph in as an adjacency mapping (a mapping from 226 node to a list of the nodes to which it connects). Tries to 227 perform a topological sort on the graph, arranging the nodes into 228 "levels", such that every member of each level is only reachable 229 by members of later levels. 230 231 Returns a list of the members of each level. 232 233 Modifies the input graph, removing every member that could be 234 sorted. If the graph does not become empty, then it contains a 235 cycle. 236 237 "limit" is the max depth of the graph after which we give up trying 238 to sort it and conclude we have a cycle. 239 """ 240 all_levels = [] 241 242 n = 0 243 while graph: 244 n += 0 245 cur_level = [] 246 all_levels.append(cur_level) 247 for k in list(graph): 248 graph[k] = [ d for d in graph[k] if d in graph ] 249 if graph[k] == []: 250 cur_level.append(k) 251 for k in cur_level: 252 del graph[k] 253 n += 1 254 if n > limit: 255 break 256 257 return all_levels 258 259def consider_include_rules(fname, f): 260 dirpath = os.path.split(fname)[0] 261 rules_fname = os.path.join(dirpath, RULES_FNAME) 262 rules = load_include_rules(os.path.join(dirpath, RULES_FNAME)) 263 if rules is None: 264 return 265 266 for err in rules.applyToFile(fname, f): 267 yield err 268 269 list_unused = False 270 log_sorted_levels = False 271 272def walk_c_files(topdir="src"): 273 """Run through all .c and .h files under topdir, looking for 274 include-rule violations. Yield those violations.""" 275 276 for dirpath, dirnames, fnames in os.walk(topdir): 277 for fname in fnames: 278 if fname_is_c(fname): 279 fullpath = os.path.join(dirpath,fname) 280 with open(fullpath) as f: 281 for err in consider_include_rules(fullpath, f): 282 yield err 283 284def open_or_stdin(fname): 285 if fname == '-': 286 return sys.stdin 287 else: 288 return open(fname) 289 290def check_subsys_file(fname, uses_dirs): 291 if not uses_dirs: 292 # We're doing a distcheck build, or for some other reason there are 293 # no .may_include files. 294 print("SKIPPING") 295 return False 296 297 uses_dirs = { normalize_srcdir(k) : { normalize_srcdir(d) for d in v } 298 for (k,v) in uses_dirs.items() } 299 uses_closure = closure(uses_dirs) 300 ok = True 301 previous_subsystems = [] 302 303 with open_or_stdin(fname) as f: 304 for line in f: 305 _, name, fname = line.split() 306 fname = normalize_srcdir(fname) 307 for prev in previous_subsystems: 308 if fname in uses_closure[prev]: 309 print("INVERSION: {} uses {}".format(prev,fname)) 310 ok = False 311 previous_subsystems.append(fname) 312 return not ok 313 314def run_check_includes(topdir, list_unused=False, log_sorted_levels=False, 315 list_advisories=False, check_subsystem_order=None): 316 trouble = False 317 318 for err in walk_c_files(topdir): 319 if err.is_advisory and not list_advisories: 320 continue 321 print(err, file=sys.stderr) 322 if not err.is_advisory: 323 trouble = True 324 325 if trouble: 326 warn( 327 """To change which includes are allowed in a C file, edit the {} 328 files in its enclosing directory.""".format(RULES_FNAME)) 329 sys.exit(1) 330 331 if list_unused: 332 for rules in get_all_include_rules(): 333 rules.noteUnusedRules() 334 335 uses_dirs = { } 336 for rules in get_all_include_rules(): 337 uses_dirs[rules.incpath] = rules.getAllowedDirectories() 338 339 remove_self_edges(uses_dirs) 340 341 if check_subsystem_order: 342 if check_subsys_file(check_subsystem_order, uses_dirs): 343 sys.exit(1) 344 345 all_levels = toposort(uses_dirs) 346 347 if log_sorted_levels: 348 for (n, cur_level) in enumerate(all_levels): 349 if cur_level: 350 print(n, cur_level) 351 352 if uses_dirs: 353 print("There are circular .may_include dependencies in here somewhere:", 354 uses_dirs) 355 sys.exit(1) 356 357def main(argv): 358 import argparse 359 360 progname = argv[0] 361 parser = argparse.ArgumentParser(prog=progname) 362 parser.add_argument("--toposort", action="store_true", 363 help="Print a topologically sorted list of modules") 364 parser.add_argument("--list-unused", action="store_true", 365 help="List unused lines in .may_include files.") 366 parser.add_argument("--list-advisories", action="store_true", 367 help="List advisories as well as forbidden includes") 368 parser.add_argument("--check-subsystem-order", action="store", 369 help="Check a list of subsystems for ordering") 370 parser.add_argument("topdir", default="src", nargs="?", 371 help="Top-level directory for the tor source") 372 args = parser.parse_args(argv[1:]) 373 374 global TOPDIR 375 TOPDIR = args.topdir 376 run_check_includes(topdir=args.topdir, 377 log_sorted_levels=args.toposort, 378 list_unused=args.list_unused, 379 list_advisories=args.list_advisories, 380 check_subsystem_order=args.check_subsystem_order) 381 382if __name__ == '__main__': 383 main(sys.argv) 384