1"""Find modules used by a script, using introspection."""
2
3import dis
4import importlib._bootstrap_external
5import importlib.machinery
6import marshal
7import os
8import io
9import sys
10import types
11import warnings
12
13
14LOAD_CONST = dis.opmap['LOAD_CONST']
15IMPORT_NAME = dis.opmap['IMPORT_NAME']
16STORE_NAME = dis.opmap['STORE_NAME']
17STORE_GLOBAL = dis.opmap['STORE_GLOBAL']
18STORE_OPS = STORE_NAME, STORE_GLOBAL
19EXTENDED_ARG = dis.EXTENDED_ARG
20
21# Old imp constants:
22
23_SEARCH_ERROR = 0
24_PY_SOURCE = 1
25_PY_COMPILED = 2
26_C_EXTENSION = 3
27_PKG_DIRECTORY = 5
28_C_BUILTIN = 6
29_PY_FROZEN = 7
30
31# Modulefinder does a good job at simulating Python's, but it can not
32# handle __path__ modifications packages make at runtime.  Therefore there
33# is a mechanism whereby you can register extra paths in this map for a
34# package, and it will be honored.
35
36# Note this is a mapping is lists of paths.
37packagePathMap = {}
38
39# A Public interface
40def AddPackagePath(packagename, path):
41    packagePathMap.setdefault(packagename, []).append(path)
42
43replacePackageMap = {}
44
45# This ReplacePackage mechanism allows modulefinder to work around
46# situations in which a package injects itself under the name
47# of another package into sys.modules at runtime by calling
48# ReplacePackage("real_package_name", "faked_package_name")
49# before running ModuleFinder.
50
51def ReplacePackage(oldname, newname):
52    replacePackageMap[oldname] = newname
53
54
55def _find_module(name, path=None):
56    """An importlib reimplementation of imp.find_module (for our purposes)."""
57
58    # It's necessary to clear the caches for our Finder first, in case any
59    # modules are being added/deleted/modified at runtime. In particular,
60    # test_modulefinder.py changes file tree contents in a cache-breaking way:
61
62    importlib.machinery.PathFinder.invalidate_caches()
63
64    spec = importlib.machinery.PathFinder.find_spec(name, path)
65
66    if spec is None:
67        raise ImportError("No module named {name!r}".format(name=name), name=name)
68
69    # Some special cases:
70
71    if spec.loader is importlib.machinery.BuiltinImporter:
72        return None, None, ("", "", _C_BUILTIN)
73
74    if spec.loader is importlib.machinery.FrozenImporter:
75        return None, None, ("", "", _PY_FROZEN)
76
77    file_path = spec.origin
78
79    if spec.loader.is_package(name):
80        return None, os.path.dirname(file_path), ("", "", _PKG_DIRECTORY)
81
82    if isinstance(spec.loader, importlib.machinery.SourceFileLoader):
83        kind = _PY_SOURCE
84
85    elif isinstance(spec.loader, importlib.machinery.ExtensionFileLoader):
86        kind = _C_EXTENSION
87
88    elif isinstance(spec.loader, importlib.machinery.SourcelessFileLoader):
89        kind = _PY_COMPILED
90
91    else:  # Should never happen.
92        return None, None, ("", "", _SEARCH_ERROR)
93
94    file = io.open_code(file_path)
95    suffix = os.path.splitext(file_path)[-1]
96
97    return file, file_path, (suffix, "rb", kind)
98
99
100class Module:
101
102    def __init__(self, name, file=None, path=None):
103        self.__name__ = name
104        self.__file__ = file
105        self.__path__ = path
106        self.__code__ = None
107        # The set of global names that are assigned to in the module.
108        # This includes those names imported through starimports of
109        # Python modules.
110        self.globalnames = {}
111        # The set of starimports this module did that could not be
112        # resolved, ie. a starimport from a non-Python module.
113        self.starimports = {}
114
115    def __repr__(self):
116        s = "Module(%r" % (self.__name__,)
117        if self.__file__ is not None:
118            s = s + ", %r" % (self.__file__,)
119        if self.__path__ is not None:
120            s = s + ", %r" % (self.__path__,)
121        s = s + ")"
122        return s
123
124class ModuleFinder:
125
126    def __init__(self, path=None, debug=0, excludes=None, replace_paths=None):
127        if path is None:
128            path = sys.path
129        self.path = path
130        self.modules = {}
131        self.badmodules = {}
132        self.debug = debug
133        self.indent = 0
134        self.excludes = excludes if excludes is not None else []
135        self.replace_paths = replace_paths if replace_paths is not None else []
136        self.processed_paths = []   # Used in debugging only
137
138    def msg(self, level, str, *args):
139        if level <= self.debug:
140            for i in range(self.indent):
141                print("   ", end=' ')
142            print(str, end=' ')
143            for arg in args:
144                print(repr(arg), end=' ')
145            print()
146
147    def msgin(self, *args):
148        level = args[0]
149        if level <= self.debug:
150            self.indent = self.indent + 1
151            self.msg(*args)
152
153    def msgout(self, *args):
154        level = args[0]
155        if level <= self.debug:
156            self.indent = self.indent - 1
157            self.msg(*args)
158
159    def run_script(self, pathname):
160        self.msg(2, "run_script", pathname)
161        with io.open_code(pathname) as fp:
162            stuff = ("", "rb", _PY_SOURCE)
163            self.load_module('__main__', fp, pathname, stuff)
164
165    def load_file(self, pathname):
166        dir, name = os.path.split(pathname)
167        name, ext = os.path.splitext(name)
168        with io.open_code(pathname) as fp:
169            stuff = (ext, "rb", _PY_SOURCE)
170            self.load_module(name, fp, pathname, stuff)
171
172    def import_hook(self, name, caller=None, fromlist=None, level=-1):
173        self.msg(3, "import_hook", name, caller, fromlist, level)
174        parent = self.determine_parent(caller, level=level)
175        q, tail = self.find_head_package(parent, name)
176        m = self.load_tail(q, tail)
177        if not fromlist:
178            return q
179        if m.__path__:
180            self.ensure_fromlist(m, fromlist)
181        return None
182
183    def determine_parent(self, caller, level=-1):
184        self.msgin(4, "determine_parent", caller, level)
185        if not caller or level == 0:
186            self.msgout(4, "determine_parent -> None")
187            return None
188        pname = caller.__name__
189        if level >= 1: # relative import
190            if caller.__path__:
191                level -= 1
192            if level == 0:
193                parent = self.modules[pname]
194                assert parent is caller
195                self.msgout(4, "determine_parent ->", parent)
196                return parent
197            if pname.count(".") < level:
198                raise ImportError("relative importpath too deep")
199            pname = ".".join(pname.split(".")[:-level])
200            parent = self.modules[pname]
201            self.msgout(4, "determine_parent ->", parent)
202            return parent
203        if caller.__path__:
204            parent = self.modules[pname]
205            assert caller is parent
206            self.msgout(4, "determine_parent ->", parent)
207            return parent
208        if '.' in pname:
209            i = pname.rfind('.')
210            pname = pname[:i]
211            parent = self.modules[pname]
212            assert parent.__name__ == pname
213            self.msgout(4, "determine_parent ->", parent)
214            return parent
215        self.msgout(4, "determine_parent -> None")
216        return None
217
218    def find_head_package(self, parent, name):
219        self.msgin(4, "find_head_package", parent, name)
220        if '.' in name:
221            i = name.find('.')
222            head = name[:i]
223            tail = name[i+1:]
224        else:
225            head = name
226            tail = ""
227        if parent:
228            qname = "%s.%s" % (parent.__name__, head)
229        else:
230            qname = head
231        q = self.import_module(head, qname, parent)
232        if q:
233            self.msgout(4, "find_head_package ->", (q, tail))
234            return q, tail
235        if parent:
236            qname = head
237            parent = None
238            q = self.import_module(head, qname, parent)
239            if q:
240                self.msgout(4, "find_head_package ->", (q, tail))
241                return q, tail
242        self.msgout(4, "raise ImportError: No module named", qname)
243        raise ImportError("No module named " + qname)
244
245    def load_tail(self, q, tail):
246        self.msgin(4, "load_tail", q, tail)
247        m = q
248        while tail:
249            i = tail.find('.')
250            if i < 0: i = len(tail)
251            head, tail = tail[:i], tail[i+1:]
252            mname = "%s.%s" % (m.__name__, head)
253            m = self.import_module(head, mname, m)
254            if not m:
255                self.msgout(4, "raise ImportError: No module named", mname)
256                raise ImportError("No module named " + mname)
257        self.msgout(4, "load_tail ->", m)
258        return m
259
260    def ensure_fromlist(self, m, fromlist, recursive=0):
261        self.msg(4, "ensure_fromlist", m, fromlist, recursive)
262        for sub in fromlist:
263            if sub == "*":
264                if not recursive:
265                    all = self.find_all_submodules(m)
266                    if all:
267                        self.ensure_fromlist(m, all, 1)
268            elif not hasattr(m, sub):
269                subname = "%s.%s" % (m.__name__, sub)
270                submod = self.import_module(sub, subname, m)
271                if not submod:
272                    raise ImportError("No module named " + subname)
273
274    def find_all_submodules(self, m):
275        if not m.__path__:
276            return
277        modules = {}
278        # 'suffixes' used to be a list hardcoded to [".py", ".pyc"].
279        # But we must also collect Python extension modules - although
280        # we cannot separate normal dlls from Python extensions.
281        suffixes = []
282        suffixes += importlib.machinery.EXTENSION_SUFFIXES[:]
283        suffixes += importlib.machinery.SOURCE_SUFFIXES[:]
284        suffixes += importlib.machinery.BYTECODE_SUFFIXES[:]
285        for dir in m.__path__:
286            try:
287                names = os.listdir(dir)
288            except OSError:
289                self.msg(2, "can't list directory", dir)
290                continue
291            for name in names:
292                mod = None
293                for suff in suffixes:
294                    n = len(suff)
295                    if name[-n:] == suff:
296                        mod = name[:-n]
297                        break
298                if mod and mod != "__init__":
299                    modules[mod] = mod
300        return modules.keys()
301
302    def import_module(self, partname, fqname, parent):
303        self.msgin(3, "import_module", partname, fqname, parent)
304        try:
305            m = self.modules[fqname]
306        except KeyError:
307            pass
308        else:
309            self.msgout(3, "import_module ->", m)
310            return m
311        if fqname in self.badmodules:
312            self.msgout(3, "import_module -> None")
313            return None
314        if parent and parent.__path__ is None:
315            self.msgout(3, "import_module -> None")
316            return None
317        try:
318            fp, pathname, stuff = self.find_module(partname,
319                                                   parent and parent.__path__, parent)
320        except ImportError:
321            self.msgout(3, "import_module ->", None)
322            return None
323
324        try:
325            m = self.load_module(fqname, fp, pathname, stuff)
326        finally:
327            if fp:
328                fp.close()
329        if parent:
330            setattr(parent, partname, m)
331        self.msgout(3, "import_module ->", m)
332        return m
333
334    def load_module(self, fqname, fp, pathname, file_info):
335        suffix, mode, type = file_info
336        self.msgin(2, "load_module", fqname, fp and "fp", pathname)
337        if type == _PKG_DIRECTORY:
338            m = self.load_package(fqname, pathname)
339            self.msgout(2, "load_module ->", m)
340            return m
341        if type == _PY_SOURCE:
342            co = compile(fp.read(), pathname, 'exec')
343        elif type == _PY_COMPILED:
344            try:
345                data = fp.read()
346                importlib._bootstrap_external._classify_pyc(data, fqname, {})
347            except ImportError as exc:
348                self.msgout(2, "raise ImportError: " + str(exc), pathname)
349                raise
350            co = marshal.loads(memoryview(data)[16:])
351        else:
352            co = None
353        m = self.add_module(fqname)
354        m.__file__ = pathname
355        if co:
356            if self.replace_paths:
357                co = self.replace_paths_in_code(co)
358            m.__code__ = co
359            self.scan_code(co, m)
360        self.msgout(2, "load_module ->", m)
361        return m
362
363    def _add_badmodule(self, name, caller):
364        if name not in self.badmodules:
365            self.badmodules[name] = {}
366        if caller:
367            self.badmodules[name][caller.__name__] = 1
368        else:
369            self.badmodules[name]["-"] = 1
370
371    def _safe_import_hook(self, name, caller, fromlist, level=-1):
372        # wrapper for self.import_hook() that won't raise ImportError
373        if name in self.badmodules:
374            self._add_badmodule(name, caller)
375            return
376        try:
377            self.import_hook(name, caller, level=level)
378        except ImportError as msg:
379            self.msg(2, "ImportError:", str(msg))
380            self._add_badmodule(name, caller)
381        except SyntaxError as msg:
382            self.msg(2, "SyntaxError:", str(msg))
383            self._add_badmodule(name, caller)
384        else:
385            if fromlist:
386                for sub in fromlist:
387                    fullname = name + "." + sub
388                    if fullname in self.badmodules:
389                        self._add_badmodule(fullname, caller)
390                        continue
391                    try:
392                        self.import_hook(name, caller, [sub], level=level)
393                    except ImportError as msg:
394                        self.msg(2, "ImportError:", str(msg))
395                        self._add_badmodule(fullname, caller)
396
397    def scan_opcodes(self, co):
398        # Scan the code, and yield 'interesting' opcode combinations
399        code = co.co_code
400        names = co.co_names
401        consts = co.co_consts
402        opargs = [(op, arg) for _, op, arg in dis._unpack_opargs(code)
403                  if op != EXTENDED_ARG]
404        for i, (op, oparg) in enumerate(opargs):
405            if op in STORE_OPS:
406                yield "store", (names[oparg],)
407                continue
408            if (op == IMPORT_NAME and i >= 2
409                    and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST):
410                level = consts[opargs[i-2][1]]
411                fromlist = consts[opargs[i-1][1]]
412                if level == 0: # absolute import
413                    yield "absolute_import", (fromlist, names[oparg])
414                else: # relative import
415                    yield "relative_import", (level, fromlist, names[oparg])
416                continue
417
418    def scan_code(self, co, m):
419        code = co.co_code
420        scanner = self.scan_opcodes
421        for what, args in scanner(co):
422            if what == "store":
423                name, = args
424                m.globalnames[name] = 1
425            elif what == "absolute_import":
426                fromlist, name = args
427                have_star = 0
428                if fromlist is not None:
429                    if "*" in fromlist:
430                        have_star = 1
431                    fromlist = [f for f in fromlist if f != "*"]
432                self._safe_import_hook(name, m, fromlist, level=0)
433                if have_star:
434                    # We've encountered an "import *". If it is a Python module,
435                    # the code has already been parsed and we can suck out the
436                    # global names.
437                    mm = None
438                    if m.__path__:
439                        # At this point we don't know whether 'name' is a
440                        # submodule of 'm' or a global module. Let's just try
441                        # the full name first.
442                        mm = self.modules.get(m.__name__ + "." + name)
443                    if mm is None:
444                        mm = self.modules.get(name)
445                    if mm is not None:
446                        m.globalnames.update(mm.globalnames)
447                        m.starimports.update(mm.starimports)
448                        if mm.__code__ is None:
449                            m.starimports[name] = 1
450                    else:
451                        m.starimports[name] = 1
452            elif what == "relative_import":
453                level, fromlist, name = args
454                if name:
455                    self._safe_import_hook(name, m, fromlist, level=level)
456                else:
457                    parent = self.determine_parent(m, level=level)
458                    self._safe_import_hook(parent.__name__, None, fromlist, level=0)
459            else:
460                # We don't expect anything else from the generator.
461                raise RuntimeError(what)
462
463        for c in co.co_consts:
464            if isinstance(c, type(co)):
465                self.scan_code(c, m)
466
467    def load_package(self, fqname, pathname):
468        self.msgin(2, "load_package", fqname, pathname)
469        newname = replacePackageMap.get(fqname)
470        if newname:
471            fqname = newname
472        m = self.add_module(fqname)
473        m.__file__ = pathname
474        m.__path__ = [pathname]
475
476        # As per comment at top of file, simulate runtime __path__ additions.
477        m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
478
479        fp, buf, stuff = self.find_module("__init__", m.__path__)
480        try:
481            self.load_module(fqname, fp, buf, stuff)
482            self.msgout(2, "load_package ->", m)
483            return m
484        finally:
485            if fp:
486                fp.close()
487
488    def add_module(self, fqname):
489        if fqname in self.modules:
490            return self.modules[fqname]
491        self.modules[fqname] = m = Module(fqname)
492        return m
493
494    def find_module(self, name, path, parent=None):
495        if parent is not None:
496            # assert path is not None
497            fullname = parent.__name__+'.'+name
498        else:
499            fullname = name
500        if fullname in self.excludes:
501            self.msgout(3, "find_module -> Excluded", fullname)
502            raise ImportError(name)
503
504        if path is None:
505            if name in sys.builtin_module_names:
506                return (None, None, ("", "", _C_BUILTIN))
507
508            path = self.path
509
510        return _find_module(name, path)
511
512    def report(self):
513        """Print a report to stdout, listing the found modules with their
514        paths, as well as modules that are missing, or seem to be missing.
515        """
516        print()
517        print("  %-25s %s" % ("Name", "File"))
518        print("  %-25s %s" % ("----", "----"))
519        # Print modules found
520        keys = sorted(self.modules.keys())
521        for key in keys:
522            m = self.modules[key]
523            if m.__path__:
524                print("P", end=' ')
525            else:
526                print("m", end=' ')
527            print("%-25s" % key, m.__file__ or "")
528
529        # Print missing modules
530        missing, maybe = self.any_missing_maybe()
531        if missing:
532            print()
533            print("Missing modules:")
534            for name in missing:
535                mods = sorted(self.badmodules[name].keys())
536                print("?", name, "imported from", ', '.join(mods))
537        # Print modules that may be missing, but then again, maybe not...
538        if maybe:
539            print()
540            print("Submodules that appear to be missing, but could also be", end=' ')
541            print("global names in the parent package:")
542            for name in maybe:
543                mods = sorted(self.badmodules[name].keys())
544                print("?", name, "imported from", ', '.join(mods))
545
546    def any_missing(self):
547        """Return a list of modules that appear to be missing. Use
548        any_missing_maybe() if you want to know which modules are
549        certain to be missing, and which *may* be missing.
550        """
551        missing, maybe = self.any_missing_maybe()
552        return missing + maybe
553
554    def any_missing_maybe(self):
555        """Return two lists, one with modules that are certainly missing
556        and one with modules that *may* be missing. The latter names could
557        either be submodules *or* just global names in the package.
558
559        The reason it can't always be determined is that it's impossible to
560        tell which names are imported when "from module import *" is done
561        with an extension module, short of actually importing it.
562        """
563        missing = []
564        maybe = []
565        for name in self.badmodules:
566            if name in self.excludes:
567                continue
568            i = name.rfind(".")
569            if i < 0:
570                missing.append(name)
571                continue
572            subname = name[i+1:]
573            pkgname = name[:i]
574            pkg = self.modules.get(pkgname)
575            if pkg is not None:
576                if pkgname in self.badmodules[name]:
577                    # The package tried to import this module itself and
578                    # failed. It's definitely missing.
579                    missing.append(name)
580                elif subname in pkg.globalnames:
581                    # It's a global in the package: definitely not missing.
582                    pass
583                elif pkg.starimports:
584                    # It could be missing, but the package did an "import *"
585                    # from a non-Python module, so we simply can't be sure.
586                    maybe.append(name)
587                else:
588                    # It's not a global in the package, the package didn't
589                    # do funny star imports, it's very likely to be missing.
590                    # The symbol could be inserted into the package from the
591                    # outside, but since that's not good style we simply list
592                    # it missing.
593                    missing.append(name)
594            else:
595                missing.append(name)
596        missing.sort()
597        maybe.sort()
598        return missing, maybe
599
600    def replace_paths_in_code(self, co):
601        new_filename = original_filename = os.path.normpath(co.co_filename)
602        for f, r in self.replace_paths:
603            if original_filename.startswith(f):
604                new_filename = r + original_filename[len(f):]
605                break
606
607        if self.debug and original_filename not in self.processed_paths:
608            if new_filename != original_filename:
609                self.msgout(2, "co_filename %r changed to %r" \
610                                    % (original_filename,new_filename,))
611            else:
612                self.msgout(2, "co_filename %r remains unchanged" \
613                                    % (original_filename,))
614            self.processed_paths.append(original_filename)
615
616        consts = list(co.co_consts)
617        for i in range(len(consts)):
618            if isinstance(consts[i], type(co)):
619                consts[i] = self.replace_paths_in_code(consts[i])
620
621        return co.replace(co_consts=tuple(consts), co_filename=new_filename)
622
623
624def test():
625    # Parse command line
626    import getopt
627    try:
628        opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
629    except getopt.error as msg:
630        print(msg)
631        return
632
633    # Process options
634    debug = 1
635    domods = 0
636    addpath = []
637    exclude = []
638    for o, a in opts:
639        if o == '-d':
640            debug = debug + 1
641        if o == '-m':
642            domods = 1
643        if o == '-p':
644            addpath = addpath + a.split(os.pathsep)
645        if o == '-q':
646            debug = 0
647        if o == '-x':
648            exclude.append(a)
649
650    # Provide default arguments
651    if not args:
652        script = "hello.py"
653    else:
654        script = args[0]
655
656    # Set the path based on sys.path and the script directory
657    path = sys.path[:]
658    path[0] = os.path.dirname(script)
659    path = addpath + path
660    if debug > 1:
661        print("path:")
662        for item in path:
663            print("   ", repr(item))
664
665    # Create the module finder and turn its crank
666    mf = ModuleFinder(path, debug, exclude)
667    for arg in args[1:]:
668        if arg == '-m':
669            domods = 1
670            continue
671        if domods:
672            if arg[-2:] == '.*':
673                mf.import_hook(arg[:-2], None, ["*"])
674            else:
675                mf.import_hook(arg)
676        else:
677            mf.load_file(arg)
678    mf.run_script(script)
679    mf.report()
680    return mf  # for -i debugging
681
682
683if __name__ == '__main__':
684    try:
685        mf = test()
686    except KeyboardInterrupt:
687        print("\n[interrupted]")
688