1"""Find modules used by a script, using introspection.""" 2 3import dis 4import importlib._bootstrap_external 5import importlib.machinery 6import marshal 7import os 8import io 9import sys 10import types 11import warnings 12 13 14LOAD_CONST = dis.opmap['LOAD_CONST'] 15IMPORT_NAME = dis.opmap['IMPORT_NAME'] 16STORE_NAME = dis.opmap['STORE_NAME'] 17STORE_GLOBAL = dis.opmap['STORE_GLOBAL'] 18STORE_OPS = STORE_NAME, STORE_GLOBAL 19EXTENDED_ARG = dis.EXTENDED_ARG 20 21# Old imp constants: 22 23_SEARCH_ERROR = 0 24_PY_SOURCE = 1 25_PY_COMPILED = 2 26_C_EXTENSION = 3 27_PKG_DIRECTORY = 5 28_C_BUILTIN = 6 29_PY_FROZEN = 7 30 31# Modulefinder does a good job at simulating Python's, but it can not 32# handle __path__ modifications packages make at runtime. Therefore there 33# is a mechanism whereby you can register extra paths in this map for a 34# package, and it will be honored. 35 36# Note this is a mapping is lists of paths. 37packagePathMap = {} 38 39# A Public interface 40def AddPackagePath(packagename, path): 41 packagePathMap.setdefault(packagename, []).append(path) 42 43replacePackageMap = {} 44 45# This ReplacePackage mechanism allows modulefinder to work around 46# situations in which a package injects itself under the name 47# of another package into sys.modules at runtime by calling 48# ReplacePackage("real_package_name", "faked_package_name") 49# before running ModuleFinder. 50 51def ReplacePackage(oldname, newname): 52 replacePackageMap[oldname] = newname 53 54 55def _find_module(name, path=None): 56 """An importlib reimplementation of imp.find_module (for our purposes).""" 57 58 # It's necessary to clear the caches for our Finder first, in case any 59 # modules are being added/deleted/modified at runtime. In particular, 60 # test_modulefinder.py changes file tree contents in a cache-breaking way: 61 62 importlib.machinery.PathFinder.invalidate_caches() 63 64 spec = importlib.machinery.PathFinder.find_spec(name, path) 65 66 if spec is None: 67 raise ImportError("No module named {name!r}".format(name=name), name=name) 68 69 # Some special cases: 70 71 if spec.loader is importlib.machinery.BuiltinImporter: 72 return None, None, ("", "", _C_BUILTIN) 73 74 if spec.loader is importlib.machinery.FrozenImporter: 75 return None, None, ("", "", _PY_FROZEN) 76 77 file_path = spec.origin 78 79 if spec.loader.is_package(name): 80 return None, os.path.dirname(file_path), ("", "", _PKG_DIRECTORY) 81 82 if isinstance(spec.loader, importlib.machinery.SourceFileLoader): 83 kind = _PY_SOURCE 84 85 elif isinstance(spec.loader, importlib.machinery.ExtensionFileLoader): 86 kind = _C_EXTENSION 87 88 elif isinstance(spec.loader, importlib.machinery.SourcelessFileLoader): 89 kind = _PY_COMPILED 90 91 else: # Should never happen. 92 return None, None, ("", "", _SEARCH_ERROR) 93 94 file = io.open_code(file_path) 95 suffix = os.path.splitext(file_path)[-1] 96 97 return file, file_path, (suffix, "rb", kind) 98 99 100class Module: 101 102 def __init__(self, name, file=None, path=None): 103 self.__name__ = name 104 self.__file__ = file 105 self.__path__ = path 106 self.__code__ = None 107 # The set of global names that are assigned to in the module. 108 # This includes those names imported through starimports of 109 # Python modules. 110 self.globalnames = {} 111 # The set of starimports this module did that could not be 112 # resolved, ie. a starimport from a non-Python module. 113 self.starimports = {} 114 115 def __repr__(self): 116 s = "Module(%r" % (self.__name__,) 117 if self.__file__ is not None: 118 s = s + ", %r" % (self.__file__,) 119 if self.__path__ is not None: 120 s = s + ", %r" % (self.__path__,) 121 s = s + ")" 122 return s 123 124class ModuleFinder: 125 126 def __init__(self, path=None, debug=0, excludes=None, replace_paths=None): 127 if path is None: 128 path = sys.path 129 self.path = path 130 self.modules = {} 131 self.badmodules = {} 132 self.debug = debug 133 self.indent = 0 134 self.excludes = excludes if excludes is not None else [] 135 self.replace_paths = replace_paths if replace_paths is not None else [] 136 self.processed_paths = [] # Used in debugging only 137 138 def msg(self, level, str, *args): 139 if level <= self.debug: 140 for i in range(self.indent): 141 print(" ", end=' ') 142 print(str, end=' ') 143 for arg in args: 144 print(repr(arg), end=' ') 145 print() 146 147 def msgin(self, *args): 148 level = args[0] 149 if level <= self.debug: 150 self.indent = self.indent + 1 151 self.msg(*args) 152 153 def msgout(self, *args): 154 level = args[0] 155 if level <= self.debug: 156 self.indent = self.indent - 1 157 self.msg(*args) 158 159 def run_script(self, pathname): 160 self.msg(2, "run_script", pathname) 161 with io.open_code(pathname) as fp: 162 stuff = ("", "rb", _PY_SOURCE) 163 self.load_module('__main__', fp, pathname, stuff) 164 165 def load_file(self, pathname): 166 dir, name = os.path.split(pathname) 167 name, ext = os.path.splitext(name) 168 with io.open_code(pathname) as fp: 169 stuff = (ext, "rb", _PY_SOURCE) 170 self.load_module(name, fp, pathname, stuff) 171 172 def import_hook(self, name, caller=None, fromlist=None, level=-1): 173 self.msg(3, "import_hook", name, caller, fromlist, level) 174 parent = self.determine_parent(caller, level=level) 175 q, tail = self.find_head_package(parent, name) 176 m = self.load_tail(q, tail) 177 if not fromlist: 178 return q 179 if m.__path__: 180 self.ensure_fromlist(m, fromlist) 181 return None 182 183 def determine_parent(self, caller, level=-1): 184 self.msgin(4, "determine_parent", caller, level) 185 if not caller or level == 0: 186 self.msgout(4, "determine_parent -> None") 187 return None 188 pname = caller.__name__ 189 if level >= 1: # relative import 190 if caller.__path__: 191 level -= 1 192 if level == 0: 193 parent = self.modules[pname] 194 assert parent is caller 195 self.msgout(4, "determine_parent ->", parent) 196 return parent 197 if pname.count(".") < level: 198 raise ImportError("relative importpath too deep") 199 pname = ".".join(pname.split(".")[:-level]) 200 parent = self.modules[pname] 201 self.msgout(4, "determine_parent ->", parent) 202 return parent 203 if caller.__path__: 204 parent = self.modules[pname] 205 assert caller is parent 206 self.msgout(4, "determine_parent ->", parent) 207 return parent 208 if '.' in pname: 209 i = pname.rfind('.') 210 pname = pname[:i] 211 parent = self.modules[pname] 212 assert parent.__name__ == pname 213 self.msgout(4, "determine_parent ->", parent) 214 return parent 215 self.msgout(4, "determine_parent -> None") 216 return None 217 218 def find_head_package(self, parent, name): 219 self.msgin(4, "find_head_package", parent, name) 220 if '.' in name: 221 i = name.find('.') 222 head = name[:i] 223 tail = name[i+1:] 224 else: 225 head = name 226 tail = "" 227 if parent: 228 qname = "%s.%s" % (parent.__name__, head) 229 else: 230 qname = head 231 q = self.import_module(head, qname, parent) 232 if q: 233 self.msgout(4, "find_head_package ->", (q, tail)) 234 return q, tail 235 if parent: 236 qname = head 237 parent = None 238 q = self.import_module(head, qname, parent) 239 if q: 240 self.msgout(4, "find_head_package ->", (q, tail)) 241 return q, tail 242 self.msgout(4, "raise ImportError: No module named", qname) 243 raise ImportError("No module named " + qname) 244 245 def load_tail(self, q, tail): 246 self.msgin(4, "load_tail", q, tail) 247 m = q 248 while tail: 249 i = tail.find('.') 250 if i < 0: i = len(tail) 251 head, tail = tail[:i], tail[i+1:] 252 mname = "%s.%s" % (m.__name__, head) 253 m = self.import_module(head, mname, m) 254 if not m: 255 self.msgout(4, "raise ImportError: No module named", mname) 256 raise ImportError("No module named " + mname) 257 self.msgout(4, "load_tail ->", m) 258 return m 259 260 def ensure_fromlist(self, m, fromlist, recursive=0): 261 self.msg(4, "ensure_fromlist", m, fromlist, recursive) 262 for sub in fromlist: 263 if sub == "*": 264 if not recursive: 265 all = self.find_all_submodules(m) 266 if all: 267 self.ensure_fromlist(m, all, 1) 268 elif not hasattr(m, sub): 269 subname = "%s.%s" % (m.__name__, sub) 270 submod = self.import_module(sub, subname, m) 271 if not submod: 272 raise ImportError("No module named " + subname) 273 274 def find_all_submodules(self, m): 275 if not m.__path__: 276 return 277 modules = {} 278 # 'suffixes' used to be a list hardcoded to [".py", ".pyc"]. 279 # But we must also collect Python extension modules - although 280 # we cannot separate normal dlls from Python extensions. 281 suffixes = [] 282 suffixes += importlib.machinery.EXTENSION_SUFFIXES[:] 283 suffixes += importlib.machinery.SOURCE_SUFFIXES[:] 284 suffixes += importlib.machinery.BYTECODE_SUFFIXES[:] 285 for dir in m.__path__: 286 try: 287 names = os.listdir(dir) 288 except OSError: 289 self.msg(2, "can't list directory", dir) 290 continue 291 for name in names: 292 mod = None 293 for suff in suffixes: 294 n = len(suff) 295 if name[-n:] == suff: 296 mod = name[:-n] 297 break 298 if mod and mod != "__init__": 299 modules[mod] = mod 300 return modules.keys() 301 302 def import_module(self, partname, fqname, parent): 303 self.msgin(3, "import_module", partname, fqname, parent) 304 try: 305 m = self.modules[fqname] 306 except KeyError: 307 pass 308 else: 309 self.msgout(3, "import_module ->", m) 310 return m 311 if fqname in self.badmodules: 312 self.msgout(3, "import_module -> None") 313 return None 314 if parent and parent.__path__ is None: 315 self.msgout(3, "import_module -> None") 316 return None 317 try: 318 fp, pathname, stuff = self.find_module(partname, 319 parent and parent.__path__, parent) 320 except ImportError: 321 self.msgout(3, "import_module ->", None) 322 return None 323 324 try: 325 m = self.load_module(fqname, fp, pathname, stuff) 326 finally: 327 if fp: 328 fp.close() 329 if parent: 330 setattr(parent, partname, m) 331 self.msgout(3, "import_module ->", m) 332 return m 333 334 def load_module(self, fqname, fp, pathname, file_info): 335 suffix, mode, type = file_info 336 self.msgin(2, "load_module", fqname, fp and "fp", pathname) 337 if type == _PKG_DIRECTORY: 338 m = self.load_package(fqname, pathname) 339 self.msgout(2, "load_module ->", m) 340 return m 341 if type == _PY_SOURCE: 342 co = compile(fp.read(), pathname, 'exec') 343 elif type == _PY_COMPILED: 344 try: 345 data = fp.read() 346 importlib._bootstrap_external._classify_pyc(data, fqname, {}) 347 except ImportError as exc: 348 self.msgout(2, "raise ImportError: " + str(exc), pathname) 349 raise 350 co = marshal.loads(memoryview(data)[16:]) 351 else: 352 co = None 353 m = self.add_module(fqname) 354 m.__file__ = pathname 355 if co: 356 if self.replace_paths: 357 co = self.replace_paths_in_code(co) 358 m.__code__ = co 359 self.scan_code(co, m) 360 self.msgout(2, "load_module ->", m) 361 return m 362 363 def _add_badmodule(self, name, caller): 364 if name not in self.badmodules: 365 self.badmodules[name] = {} 366 if caller: 367 self.badmodules[name][caller.__name__] = 1 368 else: 369 self.badmodules[name]["-"] = 1 370 371 def _safe_import_hook(self, name, caller, fromlist, level=-1): 372 # wrapper for self.import_hook() that won't raise ImportError 373 if name in self.badmodules: 374 self._add_badmodule(name, caller) 375 return 376 try: 377 self.import_hook(name, caller, level=level) 378 except ImportError as msg: 379 self.msg(2, "ImportError:", str(msg)) 380 self._add_badmodule(name, caller) 381 except SyntaxError as msg: 382 self.msg(2, "SyntaxError:", str(msg)) 383 self._add_badmodule(name, caller) 384 else: 385 if fromlist: 386 for sub in fromlist: 387 fullname = name + "." + sub 388 if fullname in self.badmodules: 389 self._add_badmodule(fullname, caller) 390 continue 391 try: 392 self.import_hook(name, caller, [sub], level=level) 393 except ImportError as msg: 394 self.msg(2, "ImportError:", str(msg)) 395 self._add_badmodule(fullname, caller) 396 397 def scan_opcodes(self, co): 398 # Scan the code, and yield 'interesting' opcode combinations 399 code = co.co_code 400 names = co.co_names 401 consts = co.co_consts 402 opargs = [(op, arg) for _, op, arg in dis._unpack_opargs(code) 403 if op != EXTENDED_ARG] 404 for i, (op, oparg) in enumerate(opargs): 405 if op in STORE_OPS: 406 yield "store", (names[oparg],) 407 continue 408 if (op == IMPORT_NAME and i >= 2 409 and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST): 410 level = consts[opargs[i-2][1]] 411 fromlist = consts[opargs[i-1][1]] 412 if level == 0: # absolute import 413 yield "absolute_import", (fromlist, names[oparg]) 414 else: # relative import 415 yield "relative_import", (level, fromlist, names[oparg]) 416 continue 417 418 def scan_code(self, co, m): 419 code = co.co_code 420 scanner = self.scan_opcodes 421 for what, args in scanner(co): 422 if what == "store": 423 name, = args 424 m.globalnames[name] = 1 425 elif what == "absolute_import": 426 fromlist, name = args 427 have_star = 0 428 if fromlist is not None: 429 if "*" in fromlist: 430 have_star = 1 431 fromlist = [f for f in fromlist if f != "*"] 432 self._safe_import_hook(name, m, fromlist, level=0) 433 if have_star: 434 # We've encountered an "import *". If it is a Python module, 435 # the code has already been parsed and we can suck out the 436 # global names. 437 mm = None 438 if m.__path__: 439 # At this point we don't know whether 'name' is a 440 # submodule of 'm' or a global module. Let's just try 441 # the full name first. 442 mm = self.modules.get(m.__name__ + "." + name) 443 if mm is None: 444 mm = self.modules.get(name) 445 if mm is not None: 446 m.globalnames.update(mm.globalnames) 447 m.starimports.update(mm.starimports) 448 if mm.__code__ is None: 449 m.starimports[name] = 1 450 else: 451 m.starimports[name] = 1 452 elif what == "relative_import": 453 level, fromlist, name = args 454 if name: 455 self._safe_import_hook(name, m, fromlist, level=level) 456 else: 457 parent = self.determine_parent(m, level=level) 458 self._safe_import_hook(parent.__name__, None, fromlist, level=0) 459 else: 460 # We don't expect anything else from the generator. 461 raise RuntimeError(what) 462 463 for c in co.co_consts: 464 if isinstance(c, type(co)): 465 self.scan_code(c, m) 466 467 def load_package(self, fqname, pathname): 468 self.msgin(2, "load_package", fqname, pathname) 469 newname = replacePackageMap.get(fqname) 470 if newname: 471 fqname = newname 472 m = self.add_module(fqname) 473 m.__file__ = pathname 474 m.__path__ = [pathname] 475 476 # As per comment at top of file, simulate runtime __path__ additions. 477 m.__path__ = m.__path__ + packagePathMap.get(fqname, []) 478 479 fp, buf, stuff = self.find_module("__init__", m.__path__) 480 try: 481 self.load_module(fqname, fp, buf, stuff) 482 self.msgout(2, "load_package ->", m) 483 return m 484 finally: 485 if fp: 486 fp.close() 487 488 def add_module(self, fqname): 489 if fqname in self.modules: 490 return self.modules[fqname] 491 self.modules[fqname] = m = Module(fqname) 492 return m 493 494 def find_module(self, name, path, parent=None): 495 if parent is not None: 496 # assert path is not None 497 fullname = parent.__name__+'.'+name 498 else: 499 fullname = name 500 if fullname in self.excludes: 501 self.msgout(3, "find_module -> Excluded", fullname) 502 raise ImportError(name) 503 504 if path is None: 505 if name in sys.builtin_module_names: 506 return (None, None, ("", "", _C_BUILTIN)) 507 508 path = self.path 509 510 return _find_module(name, path) 511 512 def report(self): 513 """Print a report to stdout, listing the found modules with their 514 paths, as well as modules that are missing, or seem to be missing. 515 """ 516 print() 517 print(" %-25s %s" % ("Name", "File")) 518 print(" %-25s %s" % ("----", "----")) 519 # Print modules found 520 keys = sorted(self.modules.keys()) 521 for key in keys: 522 m = self.modules[key] 523 if m.__path__: 524 print("P", end=' ') 525 else: 526 print("m", end=' ') 527 print("%-25s" % key, m.__file__ or "") 528 529 # Print missing modules 530 missing, maybe = self.any_missing_maybe() 531 if missing: 532 print() 533 print("Missing modules:") 534 for name in missing: 535 mods = sorted(self.badmodules[name].keys()) 536 print("?", name, "imported from", ', '.join(mods)) 537 # Print modules that may be missing, but then again, maybe not... 538 if maybe: 539 print() 540 print("Submodules that appear to be missing, but could also be", end=' ') 541 print("global names in the parent package:") 542 for name in maybe: 543 mods = sorted(self.badmodules[name].keys()) 544 print("?", name, "imported from", ', '.join(mods)) 545 546 def any_missing(self): 547 """Return a list of modules that appear to be missing. Use 548 any_missing_maybe() if you want to know which modules are 549 certain to be missing, and which *may* be missing. 550 """ 551 missing, maybe = self.any_missing_maybe() 552 return missing + maybe 553 554 def any_missing_maybe(self): 555 """Return two lists, one with modules that are certainly missing 556 and one with modules that *may* be missing. The latter names could 557 either be submodules *or* just global names in the package. 558 559 The reason it can't always be determined is that it's impossible to 560 tell which names are imported when "from module import *" is done 561 with an extension module, short of actually importing it. 562 """ 563 missing = [] 564 maybe = [] 565 for name in self.badmodules: 566 if name in self.excludes: 567 continue 568 i = name.rfind(".") 569 if i < 0: 570 missing.append(name) 571 continue 572 subname = name[i+1:] 573 pkgname = name[:i] 574 pkg = self.modules.get(pkgname) 575 if pkg is not None: 576 if pkgname in self.badmodules[name]: 577 # The package tried to import this module itself and 578 # failed. It's definitely missing. 579 missing.append(name) 580 elif subname in pkg.globalnames: 581 # It's a global in the package: definitely not missing. 582 pass 583 elif pkg.starimports: 584 # It could be missing, but the package did an "import *" 585 # from a non-Python module, so we simply can't be sure. 586 maybe.append(name) 587 else: 588 # It's not a global in the package, the package didn't 589 # do funny star imports, it's very likely to be missing. 590 # The symbol could be inserted into the package from the 591 # outside, but since that's not good style we simply list 592 # it missing. 593 missing.append(name) 594 else: 595 missing.append(name) 596 missing.sort() 597 maybe.sort() 598 return missing, maybe 599 600 def replace_paths_in_code(self, co): 601 new_filename = original_filename = os.path.normpath(co.co_filename) 602 for f, r in self.replace_paths: 603 if original_filename.startswith(f): 604 new_filename = r + original_filename[len(f):] 605 break 606 607 if self.debug and original_filename not in self.processed_paths: 608 if new_filename != original_filename: 609 self.msgout(2, "co_filename %r changed to %r" \ 610 % (original_filename,new_filename,)) 611 else: 612 self.msgout(2, "co_filename %r remains unchanged" \ 613 % (original_filename,)) 614 self.processed_paths.append(original_filename) 615 616 consts = list(co.co_consts) 617 for i in range(len(consts)): 618 if isinstance(consts[i], type(co)): 619 consts[i] = self.replace_paths_in_code(consts[i]) 620 621 return co.replace(co_consts=tuple(consts), co_filename=new_filename) 622 623 624def test(): 625 # Parse command line 626 import getopt 627 try: 628 opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:") 629 except getopt.error as msg: 630 print(msg) 631 return 632 633 # Process options 634 debug = 1 635 domods = 0 636 addpath = [] 637 exclude = [] 638 for o, a in opts: 639 if o == '-d': 640 debug = debug + 1 641 if o == '-m': 642 domods = 1 643 if o == '-p': 644 addpath = addpath + a.split(os.pathsep) 645 if o == '-q': 646 debug = 0 647 if o == '-x': 648 exclude.append(a) 649 650 # Provide default arguments 651 if not args: 652 script = "hello.py" 653 else: 654 script = args[0] 655 656 # Set the path based on sys.path and the script directory 657 path = sys.path[:] 658 path[0] = os.path.dirname(script) 659 path = addpath + path 660 if debug > 1: 661 print("path:") 662 for item in path: 663 print(" ", repr(item)) 664 665 # Create the module finder and turn its crank 666 mf = ModuleFinder(path, debug, exclude) 667 for arg in args[1:]: 668 if arg == '-m': 669 domods = 1 670 continue 671 if domods: 672 if arg[-2:] == '.*': 673 mf.import_hook(arg[:-2], None, ["*"]) 674 else: 675 mf.import_hook(arg) 676 else: 677 mf.load_file(arg) 678 mf.run_script(script) 679 mf.report() 680 return mf # for -i debugging 681 682 683if __name__ == '__main__': 684 try: 685 mf = test() 686 except KeyboardInterrupt: 687 print("\n[interrupted]") 688