1"""Find modules used by a script, using introspection."""
2
3from __future__ import generators
4import dis
5import imp
6import marshal
7import os
8import sys
9import types
10import struct
11
12if hasattr(sys.__stdout__, "newlines"):
13    READ_MODE = "U"  # universal line endings
14else:
15    # Python < 2.3 compatibility, no longer strictly required
16    READ_MODE = "r"
17
18LOAD_CONST = dis.opmap['LOAD_CONST']
19IMPORT_NAME = dis.opmap['IMPORT_NAME']
20STORE_NAME = dis.opmap['STORE_NAME']
21STORE_GLOBAL = dis.opmap['STORE_GLOBAL']
22STORE_OPS = STORE_NAME, STORE_GLOBAL
23HAVE_ARGUMENT = dis.HAVE_ARGUMENT
24EXTENDED_ARG = dis.EXTENDED_ARG
25
26def _unpack_opargs(code):
27    # enumerate() is not an option, since we sometimes process
28    # multiple elements on a single pass through the loop
29    extended_arg = 0
30    n = len(code)
31    i = 0
32    while i < n:
33        op = ord(code[i])
34        offset = i
35        i = i+1
36        arg = None
37        if op >= HAVE_ARGUMENT:
38            arg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
39            extended_arg = 0
40            i = i+2
41            if op == EXTENDED_ARG:
42                extended_arg = arg*65536
43        yield (offset, op, arg)
44
45# Modulefinder does a good job at simulating Python's, but it can not
46# handle __path__ modifications packages make at runtime.  Therefore there
47# is a mechanism whereby you can register extra paths in this map for a
48# package, and it will be honored.
49
50# Note this is a mapping is lists of paths.
51packagePathMap = {}
52
53# A Public interface
54def AddPackagePath(packagename, path):
55    paths = packagePathMap.get(packagename, [])
56    paths.append(path)
57    packagePathMap[packagename] = paths
58
59replacePackageMap = {}
60
61# This ReplacePackage mechanism allows modulefinder to work around the
62# way the _xmlplus package injects itself under the name "xml" into
63# sys.modules at runtime by calling ReplacePackage("_xmlplus", "xml")
64# before running ModuleFinder.
65
66def ReplacePackage(oldname, newname):
67    replacePackageMap[oldname] = newname
68
69
70class Module:
71
72    def __init__(self, name, file=None, path=None):
73        self.__name__ = name
74        self.__file__ = file
75        self.__path__ = path
76        self.__code__ = None
77        # The set of global names that are assigned to in the module.
78        # This includes those names imported through starimports of
79        # Python modules.
80        self.globalnames = {}
81        # The set of starimports this module did that could not be
82        # resolved, ie. a starimport from a non-Python module.
83        self.starimports = {}
84
85    def __repr__(self):
86        s = "Module(%r" % (self.__name__,)
87        if self.__file__ is not None:
88            s = s + ", %r" % (self.__file__,)
89        if self.__path__ is not None:
90            s = s + ", %r" % (self.__path__,)
91        s = s + ")"
92        return s
93
94class ModuleFinder:
95
96    def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):
97        if path is None:
98            path = sys.path
99        self.path = path
100        self.modules = {}
101        self.badmodules = {}
102        self.debug = debug
103        self.indent = 0
104        self.excludes = excludes
105        self.replace_paths = replace_paths
106        self.processed_paths = []   # Used in debugging only
107
108    def msg(self, level, str, *args):
109        if level <= self.debug:
110            for i in range(self.indent):
111                print "   ",
112            print str,
113            for arg in args:
114                print repr(arg),
115            print
116
117    def msgin(self, *args):
118        level = args[0]
119        if level <= self.debug:
120            self.indent = self.indent + 1
121            self.msg(*args)
122
123    def msgout(self, *args):
124        level = args[0]
125        if level <= self.debug:
126            self.indent = self.indent - 1
127            self.msg(*args)
128
129    def run_script(self, pathname):
130        self.msg(2, "run_script", pathname)
131        with open(pathname, READ_MODE) as fp:
132            stuff = ("", "r", imp.PY_SOURCE)
133            self.load_module('__main__', fp, pathname, stuff)
134
135    def load_file(self, pathname):
136        dir, name = os.path.split(pathname)
137        name, ext = os.path.splitext(name)
138        with open(pathname, READ_MODE) as fp:
139            stuff = (ext, "r", imp.PY_SOURCE)
140            self.load_module(name, fp, pathname, stuff)
141
142    def import_hook(self, name, caller=None, fromlist=None, level=-1):
143        self.msg(3, "import_hook", name, caller, fromlist, level)
144        parent = self.determine_parent(caller, level=level)
145        q, tail = self.find_head_package(parent, name)
146        m = self.load_tail(q, tail)
147        if not fromlist:
148            return q
149        if m.__path__:
150            self.ensure_fromlist(m, fromlist)
151        return None
152
153    def determine_parent(self, caller, level=-1):
154        self.msgin(4, "determine_parent", caller, level)
155        if not caller or level == 0:
156            self.msgout(4, "determine_parent -> None")
157            return None
158        pname = caller.__name__
159        if level >= 1: # relative import
160            if caller.__path__:
161                level -= 1
162            if level == 0:
163                parent = self.modules[pname]
164                assert parent is caller
165                self.msgout(4, "determine_parent ->", parent)
166                return parent
167            if pname.count(".") < level:
168                raise ImportError, "relative importpath too deep"
169            pname = ".".join(pname.split(".")[:-level])
170            parent = self.modules[pname]
171            self.msgout(4, "determine_parent ->", parent)
172            return parent
173        if caller.__path__:
174            parent = self.modules[pname]
175            assert caller is parent
176            self.msgout(4, "determine_parent ->", parent)
177            return parent
178        if '.' in pname:
179            i = pname.rfind('.')
180            pname = pname[:i]
181            parent = self.modules[pname]
182            assert parent.__name__ == pname
183            self.msgout(4, "determine_parent ->", parent)
184            return parent
185        self.msgout(4, "determine_parent -> None")
186        return None
187
188    def find_head_package(self, parent, name):
189        self.msgin(4, "find_head_package", parent, name)
190        if '.' in name:
191            i = name.find('.')
192            head = name[:i]
193            tail = name[i+1:]
194        else:
195            head = name
196            tail = ""
197        if parent:
198            qname = "%s.%s" % (parent.__name__, head)
199        else:
200            qname = head
201        q = self.import_module(head, qname, parent)
202        if q:
203            self.msgout(4, "find_head_package ->", (q, tail))
204            return q, tail
205        if parent:
206            qname = head
207            parent = None
208            q = self.import_module(head, qname, parent)
209            if q:
210                self.msgout(4, "find_head_package ->", (q, tail))
211                return q, tail
212        self.msgout(4, "raise ImportError: No module named", qname)
213        raise ImportError, "No module named " + qname
214
215    def load_tail(self, q, tail):
216        self.msgin(4, "load_tail", q, tail)
217        m = q
218        while tail:
219            i = tail.find('.')
220            if i < 0: i = len(tail)
221            head, tail = tail[:i], tail[i+1:]
222            mname = "%s.%s" % (m.__name__, head)
223            m = self.import_module(head, mname, m)
224            if not m:
225                self.msgout(4, "raise ImportError: No module named", mname)
226                raise ImportError, "No module named " + mname
227        self.msgout(4, "load_tail ->", m)
228        return m
229
230    def ensure_fromlist(self, m, fromlist, recursive=0):
231        self.msg(4, "ensure_fromlist", m, fromlist, recursive)
232        for sub in fromlist:
233            if sub == "*":
234                if not recursive:
235                    all = self.find_all_submodules(m)
236                    if all:
237                        self.ensure_fromlist(m, all, 1)
238            elif not hasattr(m, sub):
239                subname = "%s.%s" % (m.__name__, sub)
240                submod = self.import_module(sub, subname, m)
241                if not submod:
242                    raise ImportError, "No module named " + subname
243
244    def find_all_submodules(self, m):
245        if not m.__path__:
246            return
247        modules = {}
248        # 'suffixes' used to be a list hardcoded to [".py", ".pyc", ".pyo"].
249        # But we must also collect Python extension modules - although
250        # we cannot separate normal dlls from Python extensions.
251        suffixes = []
252        for triple in imp.get_suffixes():
253            suffixes.append(triple[0])
254        for dir in m.__path__:
255            try:
256                names = os.listdir(dir)
257            except os.error:
258                self.msg(2, "can't list directory", dir)
259                continue
260            for name in names:
261                mod = None
262                for suff in suffixes:
263                    n = len(suff)
264                    if name[-n:] == suff:
265                        mod = name[:-n]
266                        break
267                if mod and mod != "__init__":
268                    modules[mod] = mod
269        return modules.keys()
270
271    def import_module(self, partname, fqname, parent):
272        self.msgin(3, "import_module", partname, fqname, parent)
273        try:
274            m = self.modules[fqname]
275        except KeyError:
276            pass
277        else:
278            self.msgout(3, "import_module ->", m)
279            return m
280        if fqname in self.badmodules:
281            self.msgout(3, "import_module -> None")
282            return None
283        if parent and parent.__path__ is None:
284            self.msgout(3, "import_module -> None")
285            return None
286        try:
287            fp, pathname, stuff = self.find_module(partname,
288                                                   parent and parent.__path__, parent)
289        except ImportError:
290            self.msgout(3, "import_module ->", None)
291            return None
292        try:
293            m = self.load_module(fqname, fp, pathname, stuff)
294        finally:
295            if fp: fp.close()
296        if parent:
297            setattr(parent, partname, m)
298        self.msgout(3, "import_module ->", m)
299        return m
300
301    def load_module(self, fqname, fp, pathname, file_info):
302        suffix, mode, type = file_info
303        self.msgin(2, "load_module", fqname, fp and "fp", pathname)
304        if type == imp.PKG_DIRECTORY:
305            m = self.load_package(fqname, pathname)
306            self.msgout(2, "load_module ->", m)
307            return m
308        if type == imp.PY_SOURCE:
309            co = compile(fp.read()+'\n', pathname, 'exec')
310        elif type == imp.PY_COMPILED:
311            if fp.read(4) != imp.get_magic():
312                self.msgout(2, "raise ImportError: Bad magic number", pathname)
313                raise ImportError, "Bad magic number in %s" % pathname
314            fp.read(4)
315            co = marshal.load(fp)
316        else:
317            co = None
318        m = self.add_module(fqname)
319        m.__file__ = pathname
320        if co:
321            if self.replace_paths:
322                co = self.replace_paths_in_code(co)
323            m.__code__ = co
324            self.scan_code(co, m)
325        self.msgout(2, "load_module ->", m)
326        return m
327
328    def _add_badmodule(self, name, caller):
329        if name not in self.badmodules:
330            self.badmodules[name] = {}
331        if caller:
332            self.badmodules[name][caller.__name__] = 1
333        else:
334            self.badmodules[name]["-"] = 1
335
336    def _safe_import_hook(self, name, caller, fromlist, level=-1):
337        # wrapper for self.import_hook() that won't raise ImportError
338        if name in self.badmodules:
339            self._add_badmodule(name, caller)
340            return
341        try:
342            self.import_hook(name, caller, level=level)
343        except ImportError, msg:
344            self.msg(2, "ImportError:", str(msg))
345            self._add_badmodule(name, caller)
346        else:
347            if fromlist:
348                for sub in fromlist:
349                    if sub in self.badmodules:
350                        self._add_badmodule(sub, caller)
351                        continue
352                    try:
353                        self.import_hook(name, caller, [sub], level=level)
354                    except ImportError, msg:
355                        self.msg(2, "ImportError:", str(msg))
356                        fullname = name + "." + sub
357                        self._add_badmodule(fullname, caller)
358
359    def scan_opcodes(self, co,
360                     unpack = struct.unpack):
361        # Scan the code, and yield 'interesting' opcode combinations
362        # Version for Python 2.4 and older
363        code = co.co_code
364        names = co.co_names
365        consts = co.co_consts
366        opargs = [(op, arg) for _, op, arg in _unpack_opargs(code)
367                  if op != EXTENDED_ARG]
368        for i, (op, oparg) in enumerate(opargs):
369            if c in STORE_OPS:
370                yield "store", (names[oparg],)
371                continue
372            if (op == IMPORT_NAME and i >= 1
373                    and opargs[i-1][0] == LOAD_CONST):
374                fromlist = consts[opargs[i-1][1]]
375                yield "import", (fromlist, names[oparg])
376                continue
377
378    def scan_opcodes_25(self, co):
379        # Scan the code, and yield 'interesting' opcode combinations
380        code = co.co_code
381        names = co.co_names
382        consts = co.co_consts
383        opargs = [(op, arg) for _, op, arg in _unpack_opargs(code)
384                  if op != EXTENDED_ARG]
385        for i, (op, oparg) in enumerate(opargs):
386            if op in STORE_OPS:
387                yield "store", (names[oparg],)
388                continue
389            if (op == IMPORT_NAME and i >= 2
390                    and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST):
391                level = consts[opargs[i-2][1]]
392                fromlist = consts[opargs[i-1][1]]
393                if level == -1: # normal import
394                    yield "import", (fromlist, names[oparg])
395                elif level == 0: # absolute import
396                    yield "absolute_import", (fromlist, names[oparg])
397                else: # relative import
398                    yield "relative_import", (level, fromlist, names[oparg])
399                continue
400
401    def scan_code(self, co, m):
402        code = co.co_code
403        if sys.version_info >= (2, 5):
404            scanner = self.scan_opcodes_25
405        else:
406            scanner = self.scan_opcodes
407        for what, args in scanner(co):
408            if what == "store":
409                name, = args
410                m.globalnames[name] = 1
411            elif what in ("import", "absolute_import"):
412                fromlist, name = args
413                have_star = 0
414                if fromlist is not None:
415                    if "*" in fromlist:
416                        have_star = 1
417                    fromlist = [f for f in fromlist if f != "*"]
418                if what == "absolute_import": level = 0
419                else: level = -1
420                self._safe_import_hook(name, m, fromlist, level=level)
421                if have_star:
422                    # We've encountered an "import *". If it is a Python module,
423                    # the code has already been parsed and we can suck out the
424                    # global names.
425                    mm = None
426                    if m.__path__:
427                        # At this point we don't know whether 'name' is a
428                        # submodule of 'm' or a global module. Let's just try
429                        # the full name first.
430                        mm = self.modules.get(m.__name__ + "." + name)
431                    if mm is None:
432                        mm = self.modules.get(name)
433                    if mm is not None:
434                        m.globalnames.update(mm.globalnames)
435                        m.starimports.update(mm.starimports)
436                        if mm.__code__ is None:
437                            m.starimports[name] = 1
438                    else:
439                        m.starimports[name] = 1
440            elif what == "relative_import":
441                level, fromlist, name = args
442                if name:
443                    self._safe_import_hook(name, m, fromlist, level=level)
444                else:
445                    parent = self.determine_parent(m, level=level)
446                    self._safe_import_hook(parent.__name__, None, fromlist, level=0)
447            else:
448                # We don't expect anything else from the generator.
449                raise RuntimeError(what)
450
451        for c in co.co_consts:
452            if isinstance(c, type(co)):
453                self.scan_code(c, m)
454
455    def load_package(self, fqname, pathname):
456        self.msgin(2, "load_package", fqname, pathname)
457        newname = replacePackageMap.get(fqname)
458        if newname:
459            fqname = newname
460        m = self.add_module(fqname)
461        m.__file__ = pathname
462        m.__path__ = [pathname]
463
464        # As per comment at top of file, simulate runtime __path__ additions.
465        m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
466
467        fp, buf, stuff = self.find_module("__init__", m.__path__)
468        self.load_module(fqname, fp, buf, stuff)
469        self.msgout(2, "load_package ->", m)
470        if fp:
471            fp.close()
472        return m
473
474    def add_module(self, fqname):
475        if fqname in self.modules:
476            return self.modules[fqname]
477        self.modules[fqname] = m = Module(fqname)
478        return m
479
480    def find_module(self, name, path, parent=None):
481        if parent is not None:
482            # assert path is not None
483            fullname = parent.__name__+'.'+name
484        else:
485            fullname = name
486        if fullname in self.excludes:
487            self.msgout(3, "find_module -> Excluded", fullname)
488            raise ImportError, name
489
490        if path is None:
491            if name in sys.builtin_module_names:
492                return (None, None, ("", "", imp.C_BUILTIN))
493
494            path = self.path
495        return imp.find_module(name, path)
496
497    def report(self):
498        """Print a report to stdout, listing the found modules with their
499        paths, as well as modules that are missing, or seem to be missing.
500        """
501        print
502        print "  %-25s %s" % ("Name", "File")
503        print "  %-25s %s" % ("----", "----")
504        # Print modules found
505        keys = self.modules.keys()
506        keys.sort()
507        for key in keys:
508            m = self.modules[key]
509            if m.__path__:
510                print "P",
511            else:
512                print "m",
513            print "%-25s" % key, m.__file__ or ""
514
515        # Print missing modules
516        missing, maybe = self.any_missing_maybe()
517        if missing:
518            print
519            print "Missing modules:"
520            for name in missing:
521                mods = self.badmodules[name].keys()
522                mods.sort()
523                print "?", name, "imported from", ', '.join(mods)
524        # Print modules that may be missing, but then again, maybe not...
525        if maybe:
526            print
527            print "Submodules that appear to be missing, but could also be",
528            print "global names in the parent package:"
529            for name in maybe:
530                mods = self.badmodules[name].keys()
531                mods.sort()
532                print "?", name, "imported from", ', '.join(mods)
533
534    def any_missing(self):
535        """Return a list of modules that appear to be missing. Use
536        any_missing_maybe() if you want to know which modules are
537        certain to be missing, and which *may* be missing.
538        """
539        missing, maybe = self.any_missing_maybe()
540        return missing + maybe
541
542    def any_missing_maybe(self):
543        """Return two lists, one with modules that are certainly missing
544        and one with modules that *may* be missing. The latter names could
545        either be submodules *or* just global names in the package.
546
547        The reason it can't always be determined is that it's impossible to
548        tell which names are imported when "from module import *" is done
549        with an extension module, short of actually importing it.
550        """
551        missing = []
552        maybe = []
553        for name in self.badmodules:
554            if name in self.excludes:
555                continue
556            i = name.rfind(".")
557            if i < 0:
558                missing.append(name)
559                continue
560            subname = name[i+1:]
561            pkgname = name[:i]
562            pkg = self.modules.get(pkgname)
563            if pkg is not None:
564                if pkgname in self.badmodules[name]:
565                    # The package tried to import this module itself and
566                    # failed. It's definitely missing.
567                    missing.append(name)
568                elif subname in pkg.globalnames:
569                    # It's a global in the package: definitely not missing.
570                    pass
571                elif pkg.starimports:
572                    # It could be missing, but the package did an "import *"
573                    # from a non-Python module, so we simply can't be sure.
574                    maybe.append(name)
575                else:
576                    # It's not a global in the package, the package didn't
577                    # do funny star imports, it's very likely to be missing.
578                    # The symbol could be inserted into the package from the
579                    # outside, but since that's not good style we simply list
580                    # it missing.
581                    missing.append(name)
582            else:
583                missing.append(name)
584        missing.sort()
585        maybe.sort()
586        return missing, maybe
587
588    def replace_paths_in_code(self, co):
589        new_filename = original_filename = os.path.normpath(co.co_filename)
590        for f, r in self.replace_paths:
591            if original_filename.startswith(f):
592                new_filename = r + original_filename[len(f):]
593                break
594
595        if self.debug and original_filename not in self.processed_paths:
596            if new_filename != original_filename:
597                self.msgout(2, "co_filename %r changed to %r" \
598                                    % (original_filename,new_filename,))
599            else:
600                self.msgout(2, "co_filename %r remains unchanged" \
601                                    % (original_filename,))
602            self.processed_paths.append(original_filename)
603
604        consts = list(co.co_consts)
605        for i in range(len(consts)):
606            if isinstance(consts[i], type(co)):
607                consts[i] = self.replace_paths_in_code(consts[i])
608
609        return types.CodeType(co.co_argcount, co.co_nlocals, co.co_stacksize,
610                         co.co_flags, co.co_code, tuple(consts), co.co_names,
611                         co.co_varnames, new_filename, co.co_name,
612                         co.co_firstlineno, co.co_lnotab,
613                         co.co_freevars, co.co_cellvars)
614
615
616def test():
617    # Parse command line
618    import getopt
619    try:
620        opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
621    except getopt.error, msg:
622        print msg
623        return
624
625    # Process options
626    debug = 1
627    domods = 0
628    addpath = []
629    exclude = []
630    for o, a in opts:
631        if o == '-d':
632            debug = debug + 1
633        if o == '-m':
634            domods = 1
635        if o == '-p':
636            addpath = addpath + a.split(os.pathsep)
637        if o == '-q':
638            debug = 0
639        if o == '-x':
640            exclude.append(a)
641
642    # Provide default arguments
643    if not args:
644        script = "hello.py"
645    else:
646        script = args[0]
647
648    # Set the path based on sys.path and the script directory
649    path = sys.path[:]
650    path[0] = os.path.dirname(script)
651    path = addpath + path
652    if debug > 1:
653        print "path:"
654        for item in path:
655            print "   ", repr(item)
656
657    # Create the module finder and turn its crank
658    mf = ModuleFinder(path, debug, exclude)
659    for arg in args[1:]:
660        if arg == '-m':
661            domods = 1
662            continue
663        if domods:
664            if arg[-2:] == '.*':
665                mf.import_hook(arg[:-2], None, ["*"])
666            else:
667                mf.import_hook(arg)
668        else:
669            mf.load_file(arg)
670    mf.run_script(script)
671    mf.report()
672    return mf  # for -i debugging
673
674
675if __name__ == '__main__':
676    try:
677        mf = test()
678    except KeyboardInterrupt:
679        print "\n[interrupt]"
680