1# epydoc -- Source code parsing
2#
3# Copyright (C) 2005 Edward Loper
4# Author: Edward Loper <edloper@loper.org>
5# URL: <http://epydoc.sf.net>
6#
7# $Id: docparser.py 1673 2008-01-29 05:42:58Z edloper $
8
9"""
10Extract API documentation about python objects by parsing their source
11code.
12
13The function L{parse_docs()}, which provides the main interface
14of this module, reads and parses the Python source code for a
15module, and uses it to create an L{APIDoc} object containing
16the API documentation for the variables and values defined in
17that modules.
18
19Currently, C{parse_docs()} extracts documentation from the following
20source code constructions:
21
22  - module docstring
23  - import statements
24  - class definition blocks
25  - function definition blocks
26  - assignment statements
27    - simple assignment statements
28    - assignment statements with multiple C{'='}s
29    - assignment statements with unpacked left-hand sides
30    - assignment statements that wrap a function in classmethod
31      or staticmethod.
32    - assignment to special variables __path__, __all__, and
33      __docformat__.
34  - delete statements
35
36C{parse_docs()} does not yet support the following source code
37constructions:
38
39  - assignment statements that create properties
40
41By default, C{parse_docs()} will expore the contents of top-level
42C{try} and C{if} blocks.  If desired, C{parse_docs()} can also
43be configured to explore the contents of C{while} and C{for} blocks.
44(See the configuration constants, below.)
45
46@todo: Make it possible to extend the functionality of C{parse_docs()},
47       by replacing process_line with a dispatch table that can be
48       customized (similarly to C{docintrospector.register_introspector()}).
49"""
50__docformat__ = 'epytext en'
51
52######################################################################
53## Imports
54######################################################################
55
56# Python source code parsing:
57import token, tokenize
58# Finding modules:
59import imp
60# File services:
61import os, os.path, sys
62# Unicode:
63import codecs
64# API documentation encoding:
65from epydoc.apidoc import *
66# For looking up the docs of builtins:
67import __builtin__, exceptions
68import epydoc.docintrospecter
69# Misc utility functions:
70from epydoc.util import *
71# Backwards compatibility
72from epydoc.compat import *
73
74######################################################################
75## Doc Parser
76######################################################################
77
78class ParseError(Exception):
79    """
80    An exception that is used to signify that C{docparser} encountered
81    syntactically invalid Python code while processing a Python source
82    file.
83    """
84
85_moduledoc_cache = {}
86"""A cache of C{ModuleDoc}s that we've already created.
87C{_moduledoc_cache} is a dictionary mapping from filenames to
88C{ValueDoc} objects.
89@type: C{dict}"""
90
91#////////////////////////////////////////////////////////////
92# Configuration Constants
93#////////////////////////////////////////////////////////////
94
95#{ Configuration Constants: Control Flow
96PARSE_TRY_BLOCKS = True
97"""Should the contents of C{try} blocks be examined?"""
98PARSE_EXCEPT_BLOCKS = True
99"""Should the contents of C{except} blocks be examined?"""
100PARSE_FINALLY_BLOCKS = True
101"""Should the contents of C{finally} blocks be examined?"""
102PARSE_IF_BLOCKS = True
103"""Should the contents of C{if} blocks be examined?"""
104PARSE_ELSE_BLOCKS = True
105"""Should the contents of C{else} and C{elif} blocks be examined?"""
106PARSE_WHILE_BLOCKS = False
107"""Should the contents of C{while} blocks be examined?"""
108PARSE_FOR_BLOCKS = False
109"""Should the contents of C{for} blocks be examined?"""
110
111#{ Configuration Constants: Imports
112IMPORT_HANDLING = 'link'
113"""What should C{docparser} do when it encounters an import
114statement?
115  - C{'link'}: Create variabledoc objects with imported_from pointers
116    to the source object.
117  - C{'parse'}: Parse the imported file, to find the actual
118    documentation for the imported object.  (This will fall back
119    to the 'link' behavior if the imported file can't be parsed,
120    e.g., if it's a builtin.)
121"""
122
123IMPORT_STAR_HANDLING = 'parse'
124"""When C{docparser} encounters a C{'from M{m} import *'}
125statement, and is unable to parse C{M{m}} (either because
126L{IMPORT_HANDLING}=C{'link'}, or because parsing failed), how
127should it determine the list of identifiers expored by C{M{m}}?
128  - C{'ignore'}: ignore the import statement, and don't create
129    any new variables.
130  - C{'parse'}: parse it to find a list of the identifiers that it
131    exports.  (This will fall back to the 'ignore' behavior if the
132    imported file can't be parsed, e.g., if it's a builtin.)
133  - C{'introspect'}: import the module and introspect it (using C{dir})
134    to find a list of the identifiers that it exports.  (This will
135    fall back to the 'ignore' behavior if the imported file can't
136    be parsed, e.g., if it's a builtin.)
137"""
138
139DEFAULT_DECORATOR_BEHAVIOR = 'transparent'
140"""When C{DocParse} encounters an unknown decorator, what should
141it do to the documentation of the decorated function?
142  - C{'transparent'}: leave the function's documentation as-is.
143  - C{'opaque'}: replace the function's documentation with an
144    empty C{ValueDoc} object, reflecting the fact that we have no
145    knowledge about what value the decorator returns.
146"""
147
148BASE_HANDLING = 'parse'#'link'
149"""What should C{docparser} do when it encounters a base class that
150was imported from another module?
151  - C{'link'}: Create a valuedoc with a C{proxy_for} pointer to the
152    base class.
153  - C{'parse'}: Parse the file containing the base class, to find
154    the actual documentation for it.  (This will fall back to the
155    'link' behavior if the imported file can't be parsed, e.g., if
156    it's a builtin.)
157"""
158
159#{ Configuration Constants: Comment docstrings
160COMMENT_DOCSTRING_MARKER = '#:'
161"""The prefix used to mark comments that contain attribute
162docstrings for variables."""
163
164#{ Configuration Constants: Grouping
165START_GROUP_MARKER = '#{'
166"""The prefix used to mark a comment that starts a group.  This marker
167should be followed (on the same line) by the name of the group.
168Following a start-group comment, all variables defined at the same
169indentation level will be assigned to this group name, until the
170parser reaches the end of the file, a matching end-group comment, or
171another start-group comment at the same indentation level.
172"""
173
174END_GROUP_MARKER = '#}'
175"""The prefix used to mark a comment that ends a group.  See
176L{START_GROUP_MARKER}."""
177
178#/////////////////////////////////////////////////////////////////
179#{ Module parser
180#/////////////////////////////////////////////////////////////////
181
182def parse_docs(filename=None, name=None, context=None, is_script=False):
183    """
184    Generate the API documentation for a specified object by
185    parsing Python source files, and return it as a L{ValueDoc}.
186    The object to generate documentation for may be specified
187    using the C{filename} parameter I{or} the C{name} parameter.
188    (It is an error to specify both a filename and a name; or to
189    specify neither a filename nor a name).
190
191    @param filename: The name of the file that contains the python
192        source code for a package, module, or script.  If
193        C{filename} is specified, then C{parse} will return a
194        C{ModuleDoc} describing its contents.
195    @param name: The fully-qualified python dotted name of any
196        value (including packages, modules, classes, and
197        functions).  C{parse_docs()} will automatically figure out
198        which module(s) it needs to parse in order to find the
199        documentation for the specified object.
200    @param context: The API documentation for the package that
201        contains C{filename}.  If no context is given, then
202        C{filename} is assumed to contain a top-level module or
203        package.  It is an error to specify a C{context} if the
204        C{name} argument is used.
205    @rtype: L{ValueDoc}
206    """
207    # Always introspect __builtins__ & exceptions (e.g., in case
208    # they're used as base classes.)
209    epydoc.docintrospecter.introspect_docs(__builtin__)
210    epydoc.docintrospecter.introspect_docs(exceptions)
211
212    # If our input is a python object name, then delegate to
213    # _find().
214    if filename is None and name is not None:
215        if context:
216            raise ValueError("context should only be specified together "
217                             "with filename, not with name.")
218        name = DottedName(name)
219        val_doc = _find(name)
220        if val_doc.canonical_name is UNKNOWN:
221            val_doc.canonical_name = name
222        return val_doc
223
224    # If our input is a filename, then create a ModuleDoc for it,
225    # and use process_file() to populate its attributes.
226    elif filename is not None and name is None:
227        # Use a python source version, if possible.
228        if not is_script:
229            try: filename = py_src_filename(filename)
230            except ValueError, e: raise ImportError('%s' % e)
231
232        # Check the cache, first.
233        if filename in _moduledoc_cache:
234            return _moduledoc_cache[filename]
235
236        log.info("Parsing %s" % filename)
237
238        # If the context wasn't provided, then check if the file is in
239        # a package directory.  If so, then update basedir & name to
240        # contain the topmost package's directory and the fully
241        # qualified name for this file.  (This update assume the
242        # default value of __path__ for the parent packages; if the
243        # parent packages override their __path__s, then this can
244        # cause us not to find the value.)
245        if context is None and not is_script:
246            basedir = os.path.split(filename)[0]
247            name = os.path.splitext(os.path.split(filename)[1])[0]
248            if name == '__init__':
249                basedir, name = os.path.split(basedir)
250            context = _parse_package(basedir)
251
252        # Figure out the canonical name of the module we're parsing.
253        if not is_script:
254            module_name, is_pkg = _get_module_name(filename, context)
255        else:
256            module_name = DottedName(munge_script_name(filename))
257            is_pkg = False
258
259        # Create a new ModuleDoc for the module, & add it to the cache.
260        module_doc = ModuleDoc(canonical_name=module_name, variables={},
261                               sort_spec=[], imports=[],
262                               filename=filename, package=context,
263                               is_package=is_pkg, submodules=[],
264                               docs_extracted_by='parser')
265        module_doc.defining_module = module_doc
266        _moduledoc_cache[filename] = module_doc
267
268        # Set the module's __path__ to its default value.
269        if is_pkg:
270            module_doc.path = [os.path.split(module_doc.filename)[0]]
271
272        # Add this module to the parent package's list of submodules.
273        if context is not None:
274            context.submodules.append(module_doc)
275
276        # Tokenize & process the contents of the module's source file.
277        try:
278            process_file(module_doc)
279        except tokenize.TokenError, e:
280            msg, (srow, scol) = e.args
281            raise ParseError('Error during parsing: %s '
282                             '(%s, line %d, char %d)' %
283                             (msg, module_doc.filename, srow, scol))
284        except IndentationError, e:
285            raise ParseError('Error during parsing: %s (%s)' %
286                             (e, module_doc.filename))
287
288        # Handle any special variables (__path__, __docformat__, etc.)
289        handle_special_module_vars(module_doc)
290
291        # Return the completed ModuleDoc
292        return module_doc
293    else:
294        raise ValueError("Expected exactly one of the following "
295                         "arguments: name, filename")
296
297def _parse_package(package_dir):
298    """
299    If the given directory is a package directory, then parse its
300    __init__.py file (and the __init__.py files of all ancestor
301    packages); and return its C{ModuleDoc}.
302    """
303    if not is_package_dir(package_dir):
304        return None
305    parent_dir = os.path.split(package_dir)[0]
306    parent_doc = _parse_package(parent_dir)
307    package_file = os.path.join(package_dir, '__init__')
308    return parse_docs(filename=package_file, context=parent_doc)
309
310# Special vars:
311# C{__docformat__}, C{__all__}, and C{__path__}.
312def handle_special_module_vars(module_doc):
313    # If __docformat__ is defined, parse its value.
314    toktree = _module_var_toktree(module_doc, '__docformat__')
315    if toktree is not None:
316        try: module_doc.docformat = parse_string(toktree)
317        except: pass
318        del module_doc.variables['__docformat__']
319
320    # If __all__ is defined, parse its value.
321    toktree = _module_var_toktree(module_doc, '__all__')
322    if toktree is not None:
323        try:
324            public_names = set(parse_string_list(toktree))
325            for name, var_doc in module_doc.variables.items():
326                if name in public_names:
327                    var_doc.is_public = True
328                    if not isinstance(var_doc, ModuleDoc):
329                        var_doc.is_imported = False
330                else:
331                    var_doc.is_public = False
332        except ParseError:
333            # If we couldn't parse the list, give precedence to introspection.
334            for name, var_doc in module_doc.variables.items():
335                if not isinstance(var_doc, ModuleDoc):
336                    var_doc.is_imported = UNKNOWN
337        del module_doc.variables['__all__']
338
339    # If __path__ is defined, then extract its value (pkgs only)
340    if module_doc.is_package:
341        toktree = _module_var_toktree(module_doc, '__path__')
342        if toktree is not None:
343            try:
344                module_doc.path = parse_string_list(toktree)
345            except ParseError:
346                pass # [xx]
347            del module_doc.variables['__path__']
348
349def _module_var_toktree(module_doc, name):
350    var_doc = module_doc.variables.get(name)
351    if (var_doc is None or var_doc.value in (None, UNKNOWN) or
352        var_doc.value.toktree is UNKNOWN):
353        return None
354    else:
355        return var_doc.value.toktree
356
357#////////////////////////////////////////////////////////////
358#{ Module Lookup
359#////////////////////////////////////////////////////////////
360
361def _find(name, package_doc=None):
362    """
363    Return the API documentaiton for the object whose name is
364    C{name}.  C{package_doc}, if specified, is the API
365    documentation for the package containing the named object.
366    """
367    # If we're inside a package, then find the package's path.
368    if package_doc is None:
369        path = None
370    elif package_doc.path is not UNKNOWN:
371        path = package_doc.path
372    else:
373        path = [os.path.split(package_doc.filename)[0]]
374
375    # The leftmost identifier in `name` should be a module or
376    # package on the given path; find it and parse it.
377    filename = _get_filename(name[0], path)
378    module_doc = parse_docs(filename, context=package_doc)
379
380    # If the name just has one identifier, then the module we just
381    # parsed is the object we're looking for; return it.
382    if len(name) == 1: return module_doc
383
384    # Otherwise, we're looking for something inside the module.
385    # First, check to see if it's in a variable (but ignore
386    # variables that just contain imported submodules).
387    if not _is_submodule_import_var(module_doc, name[1]):
388        try: return _find_in_namespace(name[1:], module_doc)
389        except ImportError: pass
390
391    # If not, then check to see if it's in a subpackage.
392    if module_doc.is_package:
393        return _find(name[1:], module_doc)
394
395    # If it's not in a variable or a subpackage, then we can't
396    # find it.
397    raise ImportError('Could not find value')
398
399def _is_submodule_import_var(module_doc, var_name):
400    """
401    Return true if C{var_name} is the name of a variable in
402    C{module_doc} that just contains an C{imported_from} link to a
403    submodule of the same name.  (I.e., is a variable created when
404    a package imports one of its own submodules.)
405    """
406    var_doc = module_doc.variables.get(var_name)
407    full_var_name = DottedName(module_doc.canonical_name, var_name)
408    return (var_doc is not None and
409            var_doc.imported_from == full_var_name)
410
411def _find_in_namespace(name, namespace_doc):
412    if name[0] not in namespace_doc.variables:
413        raise ImportError('Could not find value')
414
415    # Look up the variable in the namespace.
416    var_doc = namespace_doc.variables[name[0]]
417    if var_doc.value is UNKNOWN:
418        raise ImportError('Could not find value')
419    val_doc = var_doc.value
420
421    # If the variable's value was imported, then follow its
422    # alias link.
423    if var_doc.imported_from not in (None, UNKNOWN):
424        return _find(var_doc.imported_from+name[1:])
425
426    # Otherwise, if the name has one identifier, then this is the
427    # value we're looking for; return it.
428    elif len(name) == 1:
429        return val_doc
430
431    # Otherwise, if this value is a namespace, look inside it.
432    elif isinstance(val_doc, NamespaceDoc):
433        return _find_in_namespace(name[1:], val_doc)
434
435    # Otherwise, we ran into a dead end.
436    else:
437        raise ImportError('Could not find value')
438
439def _get_filename(identifier, path=None):
440    if path is UNKNOWN: path = None
441    try:
442        fp, filename, (s,m,typ) = imp.find_module(identifier, path)
443        if fp is not None: fp.close()
444    except ImportError:
445        raise ImportError, 'No Python source file found.'
446
447    if typ == imp.PY_SOURCE:
448        return filename
449    elif typ == imp.PY_COMPILED:
450        # See if we can find a corresponding non-compiled version.
451        filename = re.sub('.py\w$', '.py', filename)
452        if not os.path.exists(filename):
453            raise ImportError, 'No Python source file found.'
454        return filename
455    elif typ == imp.PKG_DIRECTORY:
456        filename = os.path.join(filename, '__init__.py')
457        if not os.path.exists(filename):
458            filename = os.path.join(filename, '__init__.pyw')
459            if not os.path.exists(filename):
460                raise ImportError, 'No package file found.'
461        return filename
462    elif typ == imp.C_BUILTIN:
463        raise ImportError, 'No Python source file for builtin modules.'
464    elif typ == imp.C_EXTENSION:
465        raise ImportError, 'No Python source file for c extensions.'
466    else:
467        raise ImportError, 'No Python source file found.'
468
469#/////////////////////////////////////////////////////////////////
470#{ File tokenization loop
471#/////////////////////////////////////////////////////////////////
472
473def process_file(module_doc):
474    """
475    Read the given C{ModuleDoc}'s file, and add variables
476    corresponding to any objects defined in that file.  In
477    particular, read and tokenize C{module_doc.filename}, and
478    process each logical line using L{process_line()}.
479    """
480    # Keep track of the current line number:
481    lineno = None
482
483    # Use this list to collect the tokens on a single logical line:
484    line_toks = []
485
486    # This list contains one APIDoc for each indentation level.
487    # The first element is the APIDoc for the module, and each
488    # subsequent element is the APIDoc for the object at that
489    # indentation level.  The final element of the list is the
490    # C{APIDoc} for the entity that we're currently processing.
491    parent_docs = [module_doc]
492
493    # The APIDoc for the object that was defined by the previous
494    # line, if any; or None otherwise.  This is used to update
495    # parent_docs when we encounter an indent; and to decide what
496    # object (if any) is described by a docstring.
497    prev_line_doc = module_doc
498
499    # A list of comments that occur before or on the current
500    # logical line, used to build the comment docstring.  Each
501    # element is a tuple (comment_text, comment_lineno).
502    comments = []
503
504    # A list of decorator lines that occur before the current
505    # logical line.  This is used so we can process a function
506    # declaration line and its decorators all at once.
507    decorators = []
508
509    # A list of group names, one for each indentation level.  This is
510    # used to keep track groups that are defined by comment markers
511    # START_GROUP_MARKER and END_GROUP_MARKER.
512    groups = [None]
513
514    # When we encounter a comment start group marker, set this to the
515    # name of the group; but wait until we're ready to process the
516    # next line before we actually set groups[-1] to this value.  This
517    # is necessary because at the top of a block, the tokenizer gives
518    # us comments before the INDENT token; but if we encounter a group
519    # start marker at the top of a block, then we want it to apply
520    # inside that block, not outside it.
521    start_group = None
522
523    # Check if the source file declares an encoding.
524    encoding = get_module_encoding(module_doc.filename)
525
526    # The token-eating loop:
527    try:
528        module_file = codecs.open(module_doc.filename, 'rU', encoding)
529    except LookupError:
530        log.warning("Unknown encoding %r for %s; using the default"
531                    "encoding instead (iso-8859-1)" %
532                    (encoding, module_doc.filename))
533        encoding = 'iso-8859-1'
534        module_file = codecs.open(module_doc.filename, 'rU', encoding)
535    tok_iter = tokenize.generate_tokens(module_file.readline)
536    for toktype, toktext, (srow,scol), (erow,ecol), line_str in tok_iter:
537        # BOM encoding marker: ignore.
538        if (toktype == token.ERRORTOKEN and
539            (toktext == u'\ufeff' or
540             toktext.encode(encoding) == '\xef\xbb\xbf')):
541            pass
542
543        # Error token: abort
544        elif toktype == token.ERRORTOKEN:
545            raise ParseError('Error during parsing: invalid syntax '
546                             '(%s, line %d, char %d: %r)' %
547                             (module_doc.filename, srow, scol, toktext))
548
549        # Indent token: update the parent_doc stack.
550        elif toktype == token.INDENT:
551            if prev_line_doc is None:
552                parent_docs.append(parent_docs[-1])
553            else:
554                parent_docs.append(prev_line_doc)
555            groups.append(None)
556
557        # Dedent token: update the parent_doc stack.
558        elif toktype == token.DEDENT:
559            if line_toks == []:
560                parent_docs.pop()
561                groups.pop()
562            else:
563                # This *should* only happen if the file ends on an
564                # indented line, with no final newline.
565                # (otherwise, this is the wrong thing to do.)
566                pass
567
568        # Line-internal newline token: if we're still at the start of
569        # the logical line, and we've seen one or more comment lines,
570        # then discard them: blank lines are not allowed between a
571        # comment block and the thing it describes.
572        elif toktype == tokenize.NL:
573            if comments and not line_toks:
574                log.warning('Ignoring docstring comment block followed by '
575                            'a blank line in %r on line %r' %
576                            (module_doc.filename, srow-1))
577                comments = []
578
579        # Comment token: add to comments if appropriate.
580        elif toktype == tokenize.COMMENT:
581            if toktext.startswith(COMMENT_DOCSTRING_MARKER):
582                comment_line = toktext[len(COMMENT_DOCSTRING_MARKER):].rstrip()
583                if comment_line.startswith(" "):
584                    comment_line = comment_line[1:]
585                comments.append( [comment_line, srow])
586            elif toktext.startswith(START_GROUP_MARKER):
587                start_group = toktext[len(START_GROUP_MARKER):].strip()
588            elif toktext.startswith(END_GROUP_MARKER):
589                for i in range(len(groups)-1, -1, -1):
590                    if groups[i]:
591                        groups[i] = None
592                        break
593                else:
594                    log.warning("Got group end marker without a corresponding "
595                                "start marker in %r on line %r" %
596                                (module_doc.filename, srow))
597
598        # Normal token: Add it to line_toks.  (If it's a non-unicode
599        # string literal, then we need to re-encode using the file's
600        # encoding, to get back to the original 8-bit data; and then
601        # convert that string with 8-bit data to a 7-bit ascii
602        # representation.)
603        elif toktype != token.NEWLINE and toktype != token.ENDMARKER:
604            if lineno is None: lineno = srow
605            if toktype == token.STRING:
606                str_prefixes = re.match('[^\'"]*', toktext).group()
607                if 'u' not in str_prefixes:
608                    s = toktext.encode(encoding)
609                    toktext = decode_with_backslashreplace(s)
610            line_toks.append( (toktype, toktext) )
611
612        # Decorator line: add it to the decorators list.
613        elif line_toks and line_toks[0] == (token.OP, '@'):
614            decorators.append(shallow_parse(line_toks))
615            line_toks = []
616
617        # End of line token, but nothing to do.
618        elif line_toks == []:
619            pass
620
621        # End of line token: parse the logical line & process it.
622        else:
623            if start_group:
624                groups[-1] = start_group
625                start_group = None
626
627            if parent_docs[-1] != 'skip_block':
628                try:
629                    prev_line_doc = process_line(
630                        shallow_parse(line_toks), parent_docs, prev_line_doc,
631                        lineno, comments, decorators, encoding)
632                except ParseError, e:
633                    raise ParseError('Error during parsing: invalid '
634                                     'syntax (%s, line %d) -- %s' %
635                                     (module_doc.filename, lineno, e))
636                except KeyboardInterrupt, e: raise
637                except Exception, e:
638                    log.error('Internal error during parsing (%s, line '
639                              '%s):\n%s' % (module_doc.filename, lineno, e))
640                    raise
641
642                # grouping...
643                if groups[-1] and prev_line_doc not in (None, 'skip_block'):
644                    if isinstance(prev_line_doc, VariableDoc):
645                        # prev_line_doc's container will only be
646                        # UNKNOWN if it's an instance variable that
647                        # didn't have a doc-comment, but might still
648                        # be followed by a docstring.  Since we
649                        # tokenize in order, we can't do lookahead to
650                        # see if the variable will have a comment; but
651                        # it should only be added to the container if
652                        # it does.  So we defer the grouping of that
653                        # to be handled by process_docstring instead.
654                        if prev_line_doc.container is not UNKNOWN:
655                            add_to_group(prev_line_doc.container,
656                                         prev_line_doc, groups[-1])
657                    elif isinstance(parent_docs[-1], NamespaceDoc):
658                        add_to_group(parent_docs[-1], prev_line_doc,
659                                     groups[-1])
660            else:
661                prev_line_doc = None
662
663            # Reset line contents.
664            line_toks = []
665            lineno = None
666            comments = []
667            decorators = []
668
669def add_to_group(container, api_doc, group_name):
670    if container.group_specs is UNKNOWN:
671        container.group_specs = []
672
673    if isinstance(api_doc, VariableDoc):
674        var_name = api_doc.name
675    else:
676        if api_doc.canonical_name is UNKNOWN: log.debug('ouch', `api_doc`)
677        var_name = api_doc.canonical_name[-1]
678
679    for (name, group_vars) in container.group_specs:
680        if name == group_name:
681            group_vars.append(var_name)
682            return
683    else:
684        container.group_specs.append( (group_name, [var_name]) )
685
686def script_guard(line):
687    """Detect the idiomatic trick C{if __name__ == "__main__":}"""
688    return (len(line) == 5
689        and line[1][1] == '__name__' # this is the most selective
690        and line[0][1] == 'if'
691        and line[2][1] == '=='
692        and line[4][1] == ':'
693        and line[3][1][1:-1] == '__main__')
694
695#/////////////////////////////////////////////////////////////////
696#{ Shallow parser
697#/////////////////////////////////////////////////////////////////
698
699def shallow_parse(line_toks):
700    """
701    Given a flat list of tokens, return a nested tree structure
702    (called a X{token tree}), whose leaves are identical to the
703    original list, but whose structure reflects the structure
704    implied by the grouping tokens (i.e., parenthases, braces, and
705    brackets).  If the parenthases, braces, and brackets do not
706    match, or are not balanced, then raise a ParseError.
707
708    Assign some structure to a sequence of structure (group parens).
709    """
710    stack = [[]]
711    parens = []
712    for tok in line_toks:
713        toktype, toktext = tok
714        if toktext in ('(','[','{'):
715            parens.append(tok)
716            stack.append([tok])
717        elif toktext in ('}',']',')'):
718            if not parens:
719                raise ParseError('Unbalanced parens')
720            left_paren = parens.pop()[1]
721            if left_paren+toktext not in ('()', '[]', '{}'):
722                raise ParseError('Mismatched parens')
723            lst = stack.pop()
724            lst.append(tok)
725            stack[-1].append(lst)
726        else:
727            stack[-1].append(tok)
728    if len(stack) != 1 or len(parens) != 0:
729        raise ParseError('Unbalanced parens')
730    return stack[0]
731
732#/////////////////////////////////////////////////////////////////
733#{ Line processing
734#/////////////////////////////////////////////////////////////////
735# The methods process_*() are used to handle lines.
736
737def process_line(line, parent_docs, prev_line_doc, lineno,
738                 comments, decorators, encoding):
739    """
740    @return: C{new-doc}, C{decorator}..?
741    """
742    args = (line, parent_docs, prev_line_doc, lineno,
743            comments, decorators, encoding)
744
745    if not line: # blank line.
746        return None
747    elif (token.OP, ':') in line[:-1]:
748        return process_one_line_block(*args)
749    elif (token.OP, ';') in line:
750        return process_multi_stmt(*args)
751    elif line[0] == (token.NAME, 'def'):
752        return process_funcdef(*args)
753    elif line[0] == (token.OP, '@'):
754        return process_funcdef(*args)
755    elif line[0] == (token.NAME, 'class'):
756        return process_classdef(*args)
757    elif line[0] == (token.NAME, 'import'):
758        return process_import(*args)
759    elif line[0] == (token.NAME, 'from'):
760        return process_from_import(*args)
761    elif line[0] == (token.NAME, 'del'):
762        return process_del(*args)
763    elif len(line)==1 and line[0][0] == token.STRING:
764        return process_docstring(*args)
765    elif (token.OP, '=') in line:
766        return process_assignment(*args)
767    elif (line[0][0] == token.NAME and
768          line[0][1] in CONTROL_FLOW_KEYWORDS):
769        return process_control_flow_line(*args)
770    else:
771        return None
772        # [xx] do something with control structures like for/if?
773
774#/////////////////////////////////////////////////////////////////
775# Line handler: control flow
776#/////////////////////////////////////////////////////////////////
777
778CONTROL_FLOW_KEYWORDS = [
779    #: A list of the control flow keywords.  If a line begins with
780    #: one of these keywords, then it should be handled by
781    #: C{process_control_flow_line}.
782    'if', 'elif', 'else', 'while', 'for', 'try', 'except', 'finally']
783
784def process_control_flow_line(line, parent_docs, prev_line_doc,
785                              lineno, comments, decorators, encoding):
786    keyword = line[0][1]
787
788    # If it's a 'for' block: create the loop variable.
789    if keyword == 'for' and PARSE_FOR_BLOCKS:
790        loopvar_name = parse_dotted_name(
791            split_on(line[1:], (token.NAME, 'in'))[0])
792        parent = get_lhs_parent(loopvar_name, parent_docs)
793        if parent is not None:
794            var_doc = VariableDoc(name=loopvar_name[-1], is_alias=False,
795                                  is_imported=False, is_instvar=False,
796                                  docs_extracted_by='parser')
797            set_variable(parent, var_doc)
798
799    if ((keyword == 'if' and PARSE_IF_BLOCKS and not script_guard(line)) or
800        (keyword == 'elif' and PARSE_ELSE_BLOCKS) or
801        (keyword == 'else' and PARSE_ELSE_BLOCKS) or
802        (keyword == 'while' and PARSE_WHILE_BLOCKS) or
803        (keyword == 'for' and PARSE_FOR_BLOCKS) or
804        (keyword == 'try' and PARSE_TRY_BLOCKS) or
805        (keyword == 'except' and PARSE_EXCEPT_BLOCKS) or
806        (keyword == 'finally' and PARSE_FINALLY_BLOCKS)):
807        # Return "None" to indicate that we should process the
808        # block using the same context that we were already in.
809        return None
810    else:
811        # Return 'skip_block' to indicate that we should ignore
812        # the contents of this block.
813        return 'skip_block'
814
815#/////////////////////////////////////////////////////////////////
816# Line handler: imports
817#/////////////////////////////////////////////////////////////////
818# [xx] I could optionally add ValueDoc's for the imported
819# variables with proxy_for set to the imported source; but
820# I don't think I gain much of anything by doing so.
821
822def process_import(line, parent_docs, prev_line_doc, lineno,
823                   comments, decorators, encoding):
824    if not isinstance(parent_docs[-1], NamespaceDoc): return
825
826    names = split_on(line[1:], (token.OP, ','))
827
828    for name in names:
829        name_pieces = split_on(name, (token.NAME, 'as'))
830        if len(name_pieces) == 1:
831            src_name = parse_dotted_name(name_pieces[0])
832            _import_var(src_name, parent_docs)
833        elif len(name_pieces) == 2:
834            if len(name_pieces[1]) != 1:
835                raise ParseError('Expected identifier after "as"')
836            src_name = parse_dotted_name(name_pieces[0])
837            var_name = parse_name(name_pieces[1][0])
838            _import_var_as(src_name, var_name, parent_docs)
839        else:
840            raise ParseError('Multiple "as" tokens in import')
841
842def process_from_import(line, parent_docs, prev_line_doc, lineno,
843                        comments, decorators, encoding):
844    if not isinstance(parent_docs[-1], NamespaceDoc): return
845
846    pieces = split_on(line[1:], (token.NAME, 'import'))
847    if len(pieces) != 2 or not pieces[0] or not pieces[1]:
848        raise ParseError("Bad from-import")
849    lhs, rhs = pieces
850
851    # The RHS might be parenthasized, as specified by PEP 328:
852    # http://www.python.org/peps/pep-0328.html
853    if (len(rhs) == 1 and isinstance(rhs[0], list) and
854        rhs[0][0] == (token.OP, '(') and rhs[0][-1] == (token.OP, ')')):
855        rhs = rhs[0][1:-1]
856
857    # >>> from __future__ import nested_scopes
858    if lhs == [(token.NAME, '__future__')]:
859        return
860
861    # >>> from sys import *
862    elif rhs == [(token.OP, '*')]:
863        src_name = parse_dotted_name(lhs)
864        _process_fromstar_import(src_name, parent_docs)
865
866    # >>> from os.path import join, split
867    else:
868        # Allow relative imports in this case, as per PEP 328
869        src_name = parse_dotted_name(lhs,
870            parent_name=parent_docs[-1].canonical_name)
871        parts = split_on(rhs, (token.OP, ','))
872        for part in parts:
873            # from m import x
874            if len(part) == 1:
875                var_name = parse_name(part[0])
876                _import_var_as(DottedName(src_name, var_name),
877                                    var_name, parent_docs)
878
879            # from m import x as y
880            elif len(part) == 3 and part[1] == (token.NAME, 'as'):
881                orig_name = parse_name(part[0])
882                var_name = parse_name(part[2])
883                _import_var_as(DottedName(src_name, orig_name),
884                                    var_name, parent_docs)
885
886            else:
887                ParseError("Bad from-import")
888
889def _process_fromstar_import(src, parent_docs):
890    """
891    Handle a statement of the form:
892        >>> from <src> import *
893
894    If L{IMPORT_HANDLING} is C{'parse'}, then first try to parse
895    the module C{M{<src>}}, and copy all of its exported variables
896    to C{parent_docs[-1]}.
897
898    Otherwise, try to determine the names of the variables exported by
899    C{M{<src>}}, and create a new variable for each export.  If
900    L{IMPORT_STAR_HANDLING} is C{'parse'}, then the list of exports if
901    found by parsing C{M{<src>}}; if it is C{'introspect'}, then the
902    list of exports is found by importing and introspecting
903    C{M{<src>}}.
904    """
905    # This is redundant: already checked by caller.
906    if not isinstance(parent_docs[-1], NamespaceDoc): return
907
908    # If src is package-local, then convert it to a global name.
909    src = _global_name(src, parent_docs)
910
911    # Record the import
912    parent_docs[0].imports.append(src) # mark that it's .*??
913
914    # [xx] add check for if we already have the source docs in our
915    # cache??
916
917    if (IMPORT_HANDLING == 'parse' or
918        IMPORT_STAR_HANDLING == 'parse'): # [xx] is this ok?
919        try: module_doc = _find(src)
920        except ImportError: module_doc = None
921        if isinstance(module_doc, ModuleDoc):
922            for name, imp_var in module_doc.variables.items():
923                # [xx] this is not exactly correct, but close.  It
924                # does the wrong thing if a __var__ is explicitly
925                # listed in __all__.
926                if (imp_var.is_public and
927                    not (name.startswith('__') and name.endswith('__'))):
928                    var_doc = _add_import_var(DottedName(src, name), name,
929                                              parent_docs[-1])
930                    if IMPORT_HANDLING == 'parse':
931                        var_doc.value = imp_var.value
932
933    # If we got here, then either IMPORT_HANDLING='link' or we
934    # failed to parse the `src` module.
935    if IMPORT_STAR_HANDLING == 'introspect':
936        try: module = __import__(str(src), {}, {}, [0])
937        except: return # We couldn't import it.
938        if module is None: return # We couldn't import it.
939        if hasattr(module, '__all__'):
940            names = list(module.__all__)
941        else:
942            names = [n for n in dir(module) if not n.startswith('_')]
943        for name in names:
944            _add_import_var(DottedName(src, name), name, parent_docs[-1])
945
946def _import_var(name, parent_docs):
947    """
948    Handle a statement of the form:
949        >>> import <name>
950
951    If L{IMPORT_HANDLING} is C{'parse'}, then first try to find
952    the value by parsing; and create an appropriate variable in
953    parentdoc.
954
955    Otherwise, add a variable for the imported variable.  (More than
956    one variable may be created for cases like C{'import a.b'}, where
957    we need to create a variable C{'a'} in parentdoc containing a
958    proxy module; and a variable C{'b'} in the proxy module.
959    """
960    # This is redundant: already checked by caller.
961    if not isinstance(parent_docs[-1], NamespaceDoc): return
962
963    # If name is package-local, then convert it to a global name.
964    src = _global_name(name, parent_docs)
965    src_prefix = src[:len(src)-len(name)]
966
967    # Record the import
968    parent_docs[0].imports.append(name)
969
970    # [xx] add check for if we already have the source docs in our
971    # cache??
972
973    if IMPORT_HANDLING == 'parse':
974        # Check to make sure that we can actually find the value.
975        try: val_doc = _find(src)
976        except ImportError: val_doc = None
977        if val_doc is not None:
978            # We found it; but it's not the value itself we want to
979            # import, but the module containing it; so import that
980            # module (=top_mod) and create a variable for it.
981            top_mod = src_prefix+name[0]
982            var_doc = _add_import_var(top_mod, name[0], parent_docs[-1])
983            var_doc.value = _find(DottedName(name[0]))
984            return
985
986    # If we got here, then either IMPORT_HANDLING='link', or we
987    # did not successfully find the value's docs by parsing; use
988    # a variable with an UNKNOWN value.
989
990    # Create any necessary intermediate proxy module values.
991    container = parent_docs[-1]
992    for i, identifier in enumerate(name[:-1]):
993        if (identifier not in container.variables or
994            not isinstance(container.variables[identifier], ModuleDoc)):
995            var_doc = _add_import_var(name[:i+1], identifier, container)
996            var_doc.value = ModuleDoc(variables={}, sort_spec=[],
997                                      proxy_for=src_prefix+name[:i+1],
998                                      submodules={},
999                                      docs_extracted_by='parser')
1000        container = container.variables[identifier].value
1001
1002    # Add the variable to the container.
1003    _add_import_var(src, name[-1], container)
1004
1005def _import_var_as(src, name, parent_docs):
1006    """
1007    Handle a statement of the form:
1008        >>> import src as name
1009
1010    If L{IMPORT_HANDLING} is C{'parse'}, then first try to find
1011    the value by parsing; and create an appropriate variable in
1012    parentdoc.
1013
1014    Otherwise, create a variables with its C{imported_from} attribute
1015    pointing to the imported object.
1016    """
1017    # This is redundant: already checked by caller.
1018    if not isinstance(parent_docs[-1], NamespaceDoc): return
1019
1020    # If src is package-local, then convert it to a global name.
1021    src = _global_name(src, parent_docs)
1022
1023    # Record the import
1024    parent_docs[0].imports.append(src)
1025
1026    if IMPORT_HANDLING == 'parse':
1027        # Parse the value and create a variable for it.
1028        try: val_doc = _find(src)
1029        except ImportError: val_doc = None
1030        if val_doc is not None:
1031            var_doc = VariableDoc(name=name, value=val_doc,
1032                                  is_imported=True, is_alias=False,
1033                                  imported_from=src,
1034                                  docs_extracted_by='parser')
1035            set_variable(parent_docs[-1], var_doc)
1036            return
1037
1038    # If we got here, then either IMPORT_HANDLING='link', or we
1039    # did not successfully find the value's docs by parsing; use a
1040    # variable with a proxy value.
1041    _add_import_var(src, name, parent_docs[-1])
1042
1043def _add_import_var(src, name, container):
1044    """
1045    Add a new imported variable named C{name} to C{container}, with
1046    C{imported_from=src}.
1047    """
1048    var_doc = VariableDoc(name=name, is_imported=True, is_alias=False,
1049                          imported_from=src, docs_extracted_by='parser')
1050    set_variable(container, var_doc)
1051    return var_doc
1052
1053def _global_name(name, parent_docs):
1054    """
1055    If the given name is package-local (relative to the current
1056    context, as determined by C{parent_docs}), then convert it
1057    to a global name.
1058    """
1059    # Get the containing package from parent_docs.
1060    if parent_docs[0].is_package:
1061        package = parent_docs[0]
1062    else:
1063        package = parent_docs[0].package
1064
1065    # Check each package (from closest to furthest) to see if it
1066    # contains a module named name[0]; if so, then treat `name` as
1067    # relative to that package.
1068    while package not in (None, UNKNOWN):
1069        try:
1070            fp = imp.find_module(name[0], package.path)[0]
1071            if fp is not None: fp.close()
1072        except ImportError:
1073            # No submodule found here; try the next package up.
1074            package = package.package
1075            continue
1076        # A submodule was found; return its name.
1077        return package.canonical_name + name
1078
1079    # We didn't find any package containing `name`; so just return
1080    # `name` as-is.
1081    return name
1082
1083#/////////////////////////////////////////////////////////////////
1084# Line handler: assignment
1085#/////////////////////////////////////////////////////////////////
1086
1087def process_assignment(line, parent_docs, prev_line_doc, lineno,
1088                       comments, decorators, encoding):
1089    # Divide the assignment statement into its pieces.
1090    pieces = split_on(line, (token.OP, '='))
1091
1092    lhs_pieces = pieces[:-1]
1093    rhs = pieces[-1]
1094
1095    # Decide whether the variable is an instance variable or not.
1096    # If it's an instance var, then discard the value.
1097    is_instvar = lhs_is_instvar(lhs_pieces, parent_docs)
1098
1099    # if it's not an instance var, and we're not in a namespace,
1100    # then it's just a local var -- so ignore it.
1101    if not (is_instvar or isinstance(parent_docs[-1], NamespaceDoc)):
1102        return None
1103
1104    # Evaluate the right hand side.
1105    if not is_instvar:
1106        rhs_val, is_alias = rhs_to_valuedoc(rhs, parent_docs)
1107    else:
1108        rhs_val, is_alias = UNKNOWN, False
1109
1110    # Assign the right hand side value to each left hand side.
1111    # (Do the rightmost assignment first)
1112    lhs_pieces.reverse()
1113    for lhs in lhs_pieces:
1114        # Try treating the LHS as a simple dotted name.
1115        try: lhs_name = parse_dotted_name(lhs)
1116        except: lhs_name = None
1117        if lhs_name is not None:
1118            lhs_parent = get_lhs_parent(lhs_name, parent_docs)
1119            if lhs_parent is None: continue
1120
1121            # Skip a special class variable.
1122            if lhs_name[-1] == '__slots__':
1123                continue
1124
1125            # Create the VariableDoc.
1126            var_doc = VariableDoc(name=lhs_name[-1], value=rhs_val,
1127                                  is_imported=False, is_alias=is_alias,
1128                                  is_instvar=is_instvar,
1129                                  docs_extracted_by='parser')
1130            # Extract a docstring from the comments, when present,
1131            # but only if there's a single LHS.
1132            if len(lhs_pieces) == 1:
1133                add_docstring_from_comments(var_doc, comments)
1134
1135            # Assign the variable to the containing namespace,
1136            # *unless* the variable is an instance variable
1137            # without a comment docstring.  In that case, we'll
1138            # only want to add it if we later discover that it's
1139            # followed by a variable docstring.  If it is, then
1140            # process_docstring will take care of adding it to the
1141            # containing clas.  (This is a little hackish, but
1142            # unfortunately is necessary because we won't know if
1143            # this assignment line is followed by a docstring
1144            # until later.)
1145            if (not is_instvar) or comments:
1146                set_variable(lhs_parent, var_doc, True)
1147
1148            # If it's the only var, then return the VarDoc for use
1149            # as the new `prev_line_doc`.
1150            if (len(lhs_pieces) == 1 and
1151                (len(lhs_name) == 1 or is_instvar)):
1152                return var_doc
1153
1154        # Otherwise, the LHS must be a complex expression; use
1155        # dotted_names_in() to decide what variables it contains,
1156        # and create VariableDoc's for all of them (with UNKNOWN
1157        # value).
1158        else:
1159            for lhs_name in dotted_names_in(lhs_pieces):
1160                lhs_parent = get_lhs_parent(lhs_name, parent_docs)
1161                if lhs_parent is None: continue
1162                var_doc = VariableDoc(name=lhs_name[-1],
1163                                      is_imported=False,
1164                                      is_alias=is_alias,
1165                                      is_instvar=is_instvar,
1166                                      docs_extracted_by='parser')
1167                set_variable(lhs_parent, var_doc, True)
1168
1169        # If we have multiple left-hand-sides, then all but the
1170        # rightmost one are considered aliases.
1171        is_alias = True
1172
1173
1174def lhs_is_instvar(lhs_pieces, parent_docs):
1175    if not isinstance(parent_docs[-1], RoutineDoc):
1176        return False
1177    # make sure that lhs_pieces is <self>.<name>, where <self> is
1178    # the name of the first arg to the containing routinedoc, and
1179    # <name> is a simple name.
1180    posargs = parent_docs[-1].posargs
1181    if posargs is UNKNOWN: return False
1182    if not (len(lhs_pieces)==1 and len(posargs) > 0 and
1183            len(lhs_pieces[0]) == 3 and
1184            lhs_pieces[0][0] == (token.NAME, posargs[0]) and
1185            lhs_pieces[0][1] == (token.OP, '.') and
1186            lhs_pieces[0][2][0] == token.NAME):
1187        return False
1188    # Make sure we're in an instance method, and not a
1189    # module-level function.
1190    for i in range(len(parent_docs)-1, -1, -1):
1191        if isinstance(parent_docs[i], ClassDoc):
1192            return True
1193        elif parent_docs[i] != parent_docs[-1]:
1194            return False
1195    return False
1196
1197def rhs_to_valuedoc(rhs, parent_docs):
1198    # Dotted variable:
1199    try:
1200        rhs_name = parse_dotted_name(rhs)
1201        rhs_val = lookup_value(rhs_name, parent_docs)
1202        if rhs_val is not None and rhs_val is not UNKNOWN:
1203            return rhs_val, True
1204    except ParseError:
1205        pass
1206
1207    # Decorators:
1208    if (len(rhs)==2 and rhs[0][0] == token.NAME and
1209        isinstance(rhs[1], list)):
1210        arg_val, _ = rhs_to_valuedoc(rhs[1][1:-1], parent_docs)
1211        if isinstance(arg_val, RoutineDoc):
1212            doc = apply_decorator(DottedName(rhs[0][1]), arg_val)
1213            doc.canonical_name = UNKNOWN
1214            doc.parse_repr = pp_toktree(rhs)
1215            return doc, False
1216
1217    # Nothing else to do: make a val with the source as its repr.
1218    return GenericValueDoc(parse_repr=pp_toktree(rhs), toktree=rhs,
1219                           defining_module=parent_docs[0],
1220                           docs_extracted_by='parser'), False
1221
1222def get_lhs_parent(lhs_name, parent_docs):
1223    assert isinstance(lhs_name, DottedName)
1224
1225    # For instance vars inside an __init__ method:
1226    if isinstance(parent_docs[-1], RoutineDoc):
1227        for i in range(len(parent_docs)-1, -1, -1):
1228            if isinstance(parent_docs[i], ClassDoc):
1229                return parent_docs[i]
1230        else:
1231            raise ValueError("%r is not a namespace or method" %
1232                             parent_docs[-1])
1233
1234    # For local variables:
1235    if len(lhs_name) == 1:
1236        return parent_docs[-1]
1237
1238    # For non-local variables:
1239    return lookup_value(lhs_name.container(), parent_docs)
1240
1241#/////////////////////////////////////////////////////////////////
1242# Line handler: single-line blocks
1243#/////////////////////////////////////////////////////////////////
1244
1245def process_one_line_block(line, parent_docs, prev_line_doc, lineno,
1246                           comments, decorators, encoding):
1247    """
1248    The line handler for single-line blocks, such as:
1249
1250        >>> def f(x): return x*2
1251
1252    This handler calls L{process_line} twice: once for the tokens
1253    up to and including the colon, and once for the remaining
1254    tokens.  The comment docstring is applied to the first line
1255    only.
1256    @return: C{None}
1257    """
1258    i = line.index((token.OP, ':'))
1259    doc1 = process_line(line[:i+1], parent_docs, prev_line_doc,
1260                             lineno, comments, decorators, encoding)
1261    doc2 = process_line(line[i+1:], parent_docs+[doc1],
1262                             doc1, lineno, None, [], encoding)
1263    return doc1
1264
1265#/////////////////////////////////////////////////////////////////
1266# Line handler: semicolon-separated statements
1267#/////////////////////////////////////////////////////////////////
1268
1269def process_multi_stmt(line, parent_docs, prev_line_doc, lineno,
1270                       comments, decorators, encoding):
1271    """
1272    The line handler for semicolon-separated statements, such as:
1273
1274        >>> x=1; y=2; z=3
1275
1276    This handler calls L{process_line} once for each statement.
1277    The comment docstring is not passed on to any of the
1278    sub-statements.
1279    @return: C{None}
1280    """
1281    for statement in split_on(line, (token.OP, ';')):
1282        if not statement: continue
1283        doc = process_line(statement, parent_docs, prev_line_doc,
1284                           lineno, None, decorators, encoding)
1285        prev_line_doc = doc
1286        decorators = []
1287    return None
1288
1289#/////////////////////////////////////////////////////////////////
1290# Line handler: delete statements
1291#/////////////////////////////////////////////////////////////////
1292
1293def process_del(line, parent_docs, prev_line_doc, lineno,
1294                comments, decorators, encoding):
1295    """
1296    The line handler for delete statements, such as:
1297
1298        >>> del x, y.z
1299
1300    This handler calls L{del_variable} for each dotted variable in
1301    the variable list.  The variable list may be nested.  Complex
1302    expressions in the variable list (such as C{x[3]}) are ignored.
1303    @return: C{None}
1304    """
1305    # If we're not in a namespace, then ignore it.
1306    parent_doc = parent_docs[-1]
1307    if not isinstance(parent_doc, NamespaceDoc): return
1308
1309    var_list = split_on(line[1:], (token.OP, ','))
1310    for var_name in dotted_names_in(var_list):
1311        del_variable(parent_docs[-1], var_name)
1312
1313    return None
1314
1315#/////////////////////////////////////////////////////////////////
1316# Line handler: docstrings
1317#/////////////////////////////////////////////////////////////////
1318
1319def process_docstring(line, parent_docs, prev_line_doc, lineno,
1320                      comments, decorators, encoding):
1321    """
1322    The line handler for bare string literals.  If
1323    C{prev_line_doc} is not C{None}, then the string literal is
1324    added to that C{APIDoc} as a docstring.  If it already has a
1325    docstring (from comment docstrings), then the new docstring
1326    will be appended to the old one.
1327    """
1328    if prev_line_doc is None: return
1329    docstring = parse_string(line)
1330
1331    # If the docstring is a str, then convert it to unicode.
1332    # According to a strict reading of PEP 263, this might not be the
1333    # right thing to do; but it will almost always be what the
1334    # module's author intended.
1335    if isinstance(docstring, str):
1336        try:
1337            docstring = docstring.decode(encoding)
1338        except UnicodeDecodeError:
1339            # If decoding failed, then fall back on using
1340            # decode_with_backslashreplace, which will map e.g.
1341            # "\xe9" -> u"\\xe9".
1342            docstring = decode_with_backslashreplace(docstring)
1343            log.warning("While parsing %s: docstring is not a unicode "
1344                        "string, but it contains non-ascii data." %
1345                        prev_line_doc.canonical_name)
1346
1347    # If the modified APIDoc is an instance variable, and it has
1348    # not yet been added to its class's C{variables} list,
1349    # then add it now.  This is done here, rather than in the
1350    # process_assignment() call that created the variable, because
1351    # we only want to add instance variables if they have an
1352    # associated docstring.  (For more info, see the comment above
1353    # the set_variable() call in process_assignment().)
1354    added_instvar = False
1355    if (isinstance(prev_line_doc, VariableDoc) and
1356         prev_line_doc.is_instvar and
1357         prev_line_doc.docstring in (None, UNKNOWN)):
1358        for i in range(len(parent_docs)-1, -1, -1):
1359            if isinstance(parent_docs[i], ClassDoc):
1360                set_variable(parent_docs[i], prev_line_doc, True)
1361                added_instvar = True
1362                break
1363
1364    if prev_line_doc.docstring not in (None, UNKNOWN):
1365        log.warning("%s has both a comment-docstring and a normal "
1366                    "(string) docstring; ignoring the comment-"
1367                    "docstring." % prev_line_doc.canonical_name)
1368
1369    prev_line_doc.docstring = docstring
1370    prev_line_doc.docstring_lineno = lineno
1371
1372    # If the modified APIDoc is an instance variable, and we added it
1373    # to the class's variables list here, then it still needs to be
1374    # grouped too; so return it for use as the new "prev_line_doc."
1375    if added_instvar:
1376        return prev_line_doc
1377
1378
1379#/////////////////////////////////////////////////////////////////
1380# Line handler: function declarations
1381#/////////////////////////////////////////////////////////////////
1382
1383def process_funcdef(line, parent_docs, prev_line_doc, lineno,
1384                    comments, decorators, encoding):
1385    """
1386    The line handler for function declaration lines, such as:
1387
1388        >>> def f(a, b=22, (c,d)):
1389
1390    This handler creates and initializes a new C{VariableDoc}
1391    containing a C{RoutineDoc}, adds the C{VariableDoc} to the
1392    containing namespace, and returns the C{RoutineDoc}.
1393    """
1394    # Check syntax.
1395    if len(line) != 4 or line[3] != (token.OP, ':'):
1396        raise ParseError("Bad function definition line")
1397
1398    # If we're not in a namespace, then ignore it.
1399    parent_doc = parent_docs[-1]
1400    if not isinstance(parent_doc, NamespaceDoc): return
1401
1402    # Get the function's name
1403    func_name = parse_name(line[1])
1404    canonical_name = DottedName(parent_doc.canonical_name, func_name)
1405
1406    # Create the function's RoutineDoc.
1407    func_doc = RoutineDoc(canonical_name=canonical_name,
1408                          defining_module=parent_docs[0],
1409                          lineno=lineno, docs_extracted_by='parser')
1410
1411    # Process the signature.
1412    init_arglist(func_doc, line[2])
1413
1414    # If the preceeding comment includes a docstring, then add it.
1415    add_docstring_from_comments(func_doc, comments)
1416
1417    # Apply any decorators.
1418    func_doc.decorators = [pp_toktree(deco[1:]) for deco in decorators]
1419    decorators.reverse()
1420    for decorator in decorators:
1421        try:
1422            deco_name = parse_dotted_name(decorator[1:])
1423        except ParseError:
1424            deco_name = None
1425        if func_doc.canonical_name is not UNKNOWN:
1426            deco_repr = '%s(%s)' % (pp_toktree(decorator[1:]),
1427                                    func_doc.canonical_name)
1428        elif func_doc.parse_repr not in (None, UNKNOWN):
1429            # [xx] this case should be improved.. when will func_doc
1430            # have a known parse_repr??
1431            deco_repr = '%s(%s)' % (pp_toktree(decorator[1:]),
1432                                    func_doc.parse_repr)
1433        else:
1434            deco_repr = UNKNOWN
1435        func_doc = apply_decorator(deco_name, func_doc)
1436        func_doc.parse_repr = deco_repr
1437        # [XX] Is there a reson the following should be done?  It
1438        # causes the grouping code to break.  Presumably the canonical
1439        # name should remain valid if we're just applying a standard
1440        # decorator.
1441        #func_doc.canonical_name = UNKNOWN
1442
1443    # Add a variable to the containing namespace.
1444    var_doc = VariableDoc(name=func_name, value=func_doc,
1445                          is_imported=False, is_alias=False,
1446                          docs_extracted_by='parser')
1447    set_variable(parent_doc, var_doc)
1448
1449    # Return the new ValueDoc.
1450    return func_doc
1451
1452def apply_decorator(decorator_name, func_doc):
1453    # [xx] what if func_doc is not a RoutineDoc?
1454    if decorator_name == DottedName('staticmethod'):
1455        return StaticMethodDoc(**func_doc.__dict__)
1456    elif decorator_name == DottedName('classmethod'):
1457        return ClassMethodDoc(**func_doc.__dict__)
1458    elif DEFAULT_DECORATOR_BEHAVIOR == 'transparent':
1459        return func_doc.__class__(**func_doc.__dict__) # make a copy.
1460    elif DEFAULT_DECORATOR_BEHAVIOR == 'opaque':
1461        return GenericValueDoc(docs_extracted_by='parser')
1462    else:
1463        raise ValueError, 'Bad value for DEFAULT_DECORATOR_BEHAVIOR'
1464
1465def init_arglist(func_doc, arglist):
1466    if not isinstance(arglist, list) or arglist[0] != (token.OP, '('):
1467        raise ParseError("Bad argument list")
1468
1469    # Initialize to defaults.
1470    func_doc.posargs = []
1471    func_doc.posarg_defaults = []
1472    func_doc.vararg = None
1473    func_doc.kwarg = None
1474
1475    # Divide the arglist into individual args.
1476    args = split_on(arglist[1:-1], (token.OP, ','))
1477
1478    # Keyword argument.
1479    if args and args[-1][0] == (token.OP, '**'):
1480        if len(args[-1]) != 2 or args[-1][1][0] != token.NAME:
1481            raise ParseError("Expected name after ** in argument list")
1482        func_doc.kwarg = args[-1][1][1]
1483        args.pop()
1484
1485    # Vararg argument.
1486    if args and args[-1][0] == (token.OP, '*'):
1487        if len(args[-1]) != 2 or args[-1][1][0] != token.NAME:
1488            raise ParseError("Expected name after * in argument list")
1489        func_doc.vararg = args[-1][1][1]
1490        args.pop()
1491
1492    # Positional arguments.
1493    for arg in args:
1494        func_doc.posargs.append(parse_funcdef_arg(arg[0]))
1495        if len(arg) == 1:
1496            func_doc.posarg_defaults.append(None)
1497        elif arg[1] != (token.OP, '=') or len(arg) == 2:
1498            raise ParseError("Bad argument list")
1499        else:
1500            default_repr = pp_toktree(arg[2:], 'tight')
1501            default_val = GenericValueDoc(parse_repr=default_repr,
1502                                          docs_extracted_by='parser')
1503            func_doc.posarg_defaults.append(default_val)
1504
1505#/////////////////////////////////////////////////////////////////
1506# Line handler: class declarations
1507#/////////////////////////////////////////////////////////////////
1508
1509def process_classdef(line, parent_docs, prev_line_doc, lineno,
1510                     comments, decorators, encoding):
1511    """
1512    The line handler for class declaration lines, such as:
1513
1514        >>> class Foo(Bar, Baz):
1515
1516    This handler creates and initializes a new C{VariableDoc}
1517    containing a C{ClassDoc}, adds the C{VariableDoc} to the
1518    containing namespace, and returns the C{ClassDoc}.
1519    """
1520    # Check syntax
1521    if len(line)<3 or len(line)>4 or line[-1] != (token.OP, ':'):
1522        raise ParseError("Bad class definition line")
1523
1524    # If we're not in a namespace, then ignore it.
1525    parent_doc = parent_docs[-1]
1526    if not isinstance(parent_doc, NamespaceDoc): return
1527
1528    # Get the class's name
1529    class_name = parse_name(line[1])
1530    canonical_name = DottedName(parent_doc.canonical_name, class_name)
1531
1532    # Create the class's ClassDoc & VariableDoc.
1533    class_doc = ClassDoc(variables={}, sort_spec=[],
1534                         bases=[], subclasses=[],
1535                         canonical_name=canonical_name,
1536                         defining_module=parent_docs[0],
1537                         docs_extracted_by='parser')
1538    var_doc = VariableDoc(name=class_name, value=class_doc,
1539                          is_imported=False, is_alias=False,
1540                          docs_extracted_by='parser')
1541
1542    # Add the bases.
1543    if len(line) == 4:
1544        if (not isinstance(line[2], list) or
1545            line[2][0] != (token.OP, '(')):
1546            raise ParseError("Expected base list")
1547        try:
1548            for base_name in parse_classdef_bases(line[2]):
1549                class_doc.bases.append(find_base(base_name, parent_docs))
1550        except ParseError, e:
1551            log.warning("Unable to extract the base list for %s: %s" %
1552                        (canonical_name, e))
1553            class_doc.bases = UNKNOWN
1554    else:
1555        class_doc.bases = []
1556
1557    # Register ourselves as a subclass to our bases.
1558    if class_doc.bases is not UNKNOWN:
1559        for basedoc in class_doc.bases:
1560            if isinstance(basedoc, ClassDoc):
1561                # This test avoids that a subclass gets listed twice when
1562                # both introspection and parsing.
1563                # [XXX] This check only works because currently parsing is
1564                # always performed just after introspection of the same
1565                # class. A more complete fix shuld be independent from
1566                # calling order; probably the subclasses list should be
1567                # replaced by a ClassDoc set or a {name: ClassDoc} mapping.
1568                if (basedoc.subclasses
1569                    and basedoc.subclasses[-1].canonical_name
1570                        != class_doc.canonical_name):
1571                    basedoc.subclasses.append(class_doc)
1572
1573    # If the preceeding comment includes a docstring, then add it.
1574    add_docstring_from_comments(class_doc, comments)
1575
1576    # Add the VariableDoc to our container.
1577    set_variable(parent_doc, var_doc)
1578
1579    return class_doc
1580
1581def _proxy_base(**attribs):
1582    return ClassDoc(variables={}, sort_spec=[], bases=[], subclasses=[],
1583                    docs_extracted_by='parser', **attribs)
1584
1585def find_base(name, parent_docs):
1586    assert isinstance(name, DottedName)
1587
1588    # Find the variable containing the base.
1589    base_var = lookup_variable(name, parent_docs)
1590    if base_var is None:
1591        # If we didn't find it, then it must have been imported.
1592        # First, check if it looks like it's contained in any
1593        # known imported variable:
1594        if len(name) > 1:
1595            src = lookup_name(name[0], parent_docs)
1596            if (src is not None and
1597                src.imported_from not in (None, UNKNOWN)):
1598                base_src = DottedName(src.imported_from, name[1:])
1599                base_var = VariableDoc(name=name[-1], is_imported=True,
1600                                       is_alias=False, imported_from=base_src,
1601                                       docs_extracted_by='parser')
1602        # Otherwise, it must have come from an "import *" statement
1603        # (or from magic, such as direct manipulation of the module's
1604        # dictionary), so we don't know where it came from.  So
1605        # there's nothing left but to use an empty proxy.
1606        if base_var is None:
1607            return _proxy_base(parse_repr=str(name))
1608            #raise ParseError("Could not find %s" % name)
1609
1610    # If the variable has a value, return that value.
1611    if base_var.value is not UNKNOWN:
1612        return base_var.value
1613
1614    # Otherwise, if BASE_HANDLING is 'parse', try parsing the docs for
1615    # the base class; if that fails, or if BASE_HANDLING is 'link',
1616    # just make a proxy object.
1617    if base_var.imported_from not in (None, UNKNOWN):
1618        if BASE_HANDLING == 'parse':
1619            old_sys_path = sys.path
1620            try:
1621                dirname = os.path.split(parent_docs[0].filename)[0]
1622                sys.path = [dirname] + sys.path
1623                try:
1624                    return parse_docs(name=str(base_var.imported_from))
1625                except ParseError:
1626                    log.info('Unable to parse base', base_var.imported_from)
1627                except ImportError:
1628                    log.info('Unable to find base', base_var.imported_from)
1629            finally:
1630                sys.path = old_sys_path
1631
1632        # Either BASE_HANDLING='link' or parsing the base class failed;
1633        # return a proxy value for the base class.
1634        return _proxy_base(proxy_for=base_var.imported_from)
1635    else:
1636        return _proxy_base(parse_repr=str(name))
1637
1638#/////////////////////////////////////////////////////////////////
1639#{ Parsing
1640#/////////////////////////////////////////////////////////////////
1641
1642def dotted_names_in(elt_list):
1643    """
1644    Return a list of all simple dotted names in the given
1645    expression.
1646    """
1647    names = []
1648    while elt_list:
1649        elt = elt_list.pop()
1650        if len(elt) == 1 and isinstance(elt[0], list):
1651            # Nested list: process the contents
1652            elt_list.extend(split_on(elt[0][1:-1], (token.OP, ',')))
1653        else:
1654            try:
1655                names.append(parse_dotted_name(elt))
1656            except ParseError:
1657                pass # complex expression -- ignore
1658    return names
1659
1660def parse_name(elt, strip_parens=False):
1661    """
1662    If the given token tree element is a name token, then return
1663    that name as a string.  Otherwise, raise ParseError.
1664    @param strip_parens: If true, then if elt is a single name
1665        enclosed in parenthases, then return that name.
1666    """
1667    if strip_parens and isinstance(elt, list):
1668        while (isinstance(elt, list) and len(elt) == 3 and
1669               elt[0] == (token.OP, '(') and
1670               elt[-1] == (token.OP, ')')):
1671            elt = elt[1]
1672    if isinstance(elt, list) or elt[0] != token.NAME:
1673        raise ParseError("Bad name")
1674    return elt[1]
1675
1676def parse_dotted_name(elt_list, strip_parens=True, parent_name=None):
1677    """
1678    @param parent_name: canonical name of referring module, to resolve
1679        relative imports.
1680    @type parent_name: L{DottedName}
1681    @bug: does not handle 'x.(y).z'
1682    """
1683    if len(elt_list) == 0: raise ParseError("Bad dotted name")
1684
1685    # Handle ((x.y).z).  (If the contents of the parens include
1686    # anything other than dotted names, such as (x,y), then we'll
1687    # catch it below and raise a ParseError.
1688    while (isinstance(elt_list[0], list) and
1689           len(elt_list[0]) >= 3 and
1690           elt_list[0][0] == (token.OP, '(') and
1691           elt_list[0][-1] == (token.OP, ')')):
1692        elt_list[:1] = elt_list[0][1:-1]
1693
1694    # Convert a relative import into an absolute name.
1695    prefix_name = None
1696    if parent_name is not None and elt_list[0][-1] == '.':
1697        items = 1
1698        while len(elt_list) > items and elt_list[items][-1] == '.':
1699            items += 1
1700
1701        elt_list = elt_list[items:]
1702        prefix_name = parent_name[:-items]
1703
1704        # >>> from . import foo
1705        if not elt_list:
1706            if prefix_name == []:
1707                raise ParseError("Attempted relative import in non-package, "
1708                                 "or beyond toplevel package")
1709            return prefix_name
1710
1711    if len(elt_list) % 2 != 1: raise ParseError("Bad dotted name")
1712    name = DottedName(parse_name(elt_list[0], True))
1713    if prefix_name is not None:
1714        name = prefix_name + name
1715
1716    for i in range(2, len(elt_list), 2):
1717        dot, identifier = elt_list[i-1], elt_list[i]
1718        if  dot != (token.OP, '.'):
1719            raise ParseError("Bad dotted name")
1720        name = DottedName(name, parse_name(identifier, True))
1721    return name
1722
1723def split_on(elt_list, split_tok):
1724    # [xx] add code to guarantee each elt is non-empty.
1725    result = [[]]
1726    for elt in elt_list:
1727        if elt == split_tok:
1728            if result[-1] == []: raise ParseError("Empty element from split")
1729            result.append([])
1730        else:
1731            result[-1].append(elt)
1732    if result[-1] == []: result.pop()
1733    return result
1734
1735def parse_funcdef_arg(elt):
1736    """
1737    If the given tree token element contains a valid function
1738    definition argument (i.e., an identifier token or nested list
1739    of identifiers), then return a corresponding string identifier
1740    or nested list of string identifiers.  Otherwise, raise a
1741    ParseError.
1742    """
1743    if isinstance(elt, list):
1744        if elt[0] == (token.OP, '('):
1745            if len(elt) == 3:
1746                return parse_funcdef_arg(elt[1])
1747            else:
1748                return [parse_funcdef_arg(e)
1749                        for e in elt[1:-1]
1750                        if e != (token.OP, ',')]
1751        else:
1752            raise ParseError("Bad argument -- expected name or tuple")
1753    elif elt[0] == token.NAME:
1754        return elt[1]
1755    else:
1756        raise ParseError("Bad argument -- expected name or tuple")
1757
1758def parse_classdef_bases(elt):
1759    """
1760    If the given tree token element contains a valid base list
1761    (that contains only dotted names), then return a corresponding
1762    list of L{DottedName}s.  Otherwise, raise a ParseError.
1763
1764    @bug: Does not handle either of::
1765        - class A( (base.in.parens) ): pass
1766        - class B( (lambda:calculated.base)() ): pass
1767    """
1768    if (not isinstance(elt, list) or
1769        elt[0] != (token.OP, '(')):
1770        raise ParseError("Bad base list")
1771
1772    return [parse_dotted_name(n)
1773            for n in split_on(elt[1:-1], (token.OP, ','))]
1774
1775# Used by: base list; 'del'; ...
1776def parse_dotted_name_list(elt_list):
1777    """
1778    If the given list of tree token elements contains a
1779    comma-separated list of dotted names, then return a
1780    corresponding list of L{DottedName} objects.  Otherwise, raise
1781    ParseError.
1782    """
1783    names = []
1784
1785    state = 0
1786    for elt in elt_list:
1787        # State 0 -- Expecting a name, or end of arglist
1788        if state == 0:
1789            # Make sure it's a name
1790            if isinstance(elt, tuple) and elt[0] == token.NAME:
1791                names.append(DottedName(elt[1]))
1792                state = 1
1793            else:
1794                raise ParseError("Expected a name")
1795        # State 1 -- Expecting comma, period, or end of arglist
1796        elif state == 1:
1797            if elt == (token.OP, '.'):
1798                state = 2
1799            elif elt == (token.OP, ','):
1800                state = 0
1801            else:
1802                raise ParseError("Expected '.' or ',' or end of list")
1803        # State 2 -- Continuation of dotted name.
1804        elif state == 2:
1805            if isinstance(elt, tuple) and elt[0] == token.NAME:
1806                names[-1] = DottedName(names[-1], elt[1])
1807                state = 1
1808            else:
1809                raise ParseError("Expected a name")
1810    if state == 2:
1811        raise ParseError("Expected a name")
1812    return names
1813
1814def parse_string(elt_list):
1815    if len(elt_list) == 1 and elt_list[0][0] == token.STRING:
1816        # [xx] use something safer here?  But it needs to deal with
1817        # any string type (eg r"foo\bar" etc).
1818        return eval(elt_list[0][1])
1819    else:
1820        raise ParseError("Expected a string")
1821
1822# ['1', 'b', 'c']
1823def parse_string_list(elt_list):
1824    if (len(elt_list) == 1 and isinstance(elt_list, list) and
1825        elt_list[0][0][1] in ('(', '[')):
1826        elt_list = elt_list[0][1:-1]
1827
1828    string_list = []
1829    for string_elt in split_on(elt_list, (token.OP, ',')):
1830        string_list.append(parse_string(string_elt))
1831
1832    return string_list
1833
1834#/////////////////////////////////////////////////////////////////
1835#{ Variable Manipulation
1836#/////////////////////////////////////////////////////////////////
1837
1838def set_variable(namespace, var_doc, preserve_docstring=False):
1839    """
1840    Add var_doc to namespace.  If namespace already contains a
1841    variable with the same name, then discard the old variable.  If
1842    C{preserve_docstring} is true, then keep the old variable's
1843    docstring when overwriting a variable.
1844    """
1845    # Choose which dictionary we'll be storing the variable in.
1846    if not isinstance(namespace, NamespaceDoc):
1847        return
1848
1849    # This happens when the class definition has not been parsed, e.g. in
1850    # sf bug #1693253 on ``Exception.x = y``
1851    if namespace.sort_spec is UNKNOWN:
1852        namespace.sort_spec = namespace.variables.keys()
1853
1854    # If we already have a variable with this name, then remove the
1855    # old VariableDoc from the sort_spec list; and if we gave its
1856    # value a canonical name, then delete it.
1857    if var_doc.name in namespace.variables:
1858        namespace.sort_spec.remove(var_doc.name)
1859        old_var_doc = namespace.variables[var_doc.name]
1860        if (old_var_doc.is_alias == False and
1861            old_var_doc.value is not UNKNOWN):
1862            old_var_doc.value.canonical_name = UNKNOWN
1863        if (preserve_docstring and var_doc.docstring in (None, UNKNOWN) and
1864            old_var_doc.docstring not in (None, UNKNOWN)):
1865            var_doc.docstring = old_var_doc.docstring
1866            var_doc.docstring_lineno = old_var_doc.docstring_lineno
1867    # Add the variable to the namespace.
1868    namespace.variables[var_doc.name] = var_doc
1869    namespace.sort_spec.append(var_doc.name)
1870    assert var_doc.container is UNKNOWN
1871    var_doc.container = namespace
1872
1873def del_variable(namespace, name):
1874    if not isinstance(namespace, NamespaceDoc):
1875        return
1876
1877    if name[0] in namespace.variables:
1878        if len(name) == 1:
1879            var_doc = namespace.variables[name[0]]
1880            namespace.sort_spec.remove(name[0])
1881            del namespace.variables[name[0]]
1882            if not var_doc.is_alias and var_doc.value is not UNKNOWN:
1883                var_doc.value.canonical_name = UNKNOWN
1884        else:
1885            del_variable(namespace.variables[name[0]].value, name[1:])
1886
1887#/////////////////////////////////////////////////////////////////
1888#{ Name Lookup
1889#/////////////////////////////////////////////////////////////////
1890
1891def lookup_name(identifier, parent_docs):
1892    """
1893    Find and return the documentation for the variable named by
1894    the given identifier.
1895
1896    @rtype: L{VariableDoc} or C{None}
1897    """
1898    # We need to check 3 namespaces: locals, globals, and builtins.
1899    # Note that this is true even if we're in a version of python with
1900    # nested scopes, because nested scope lookup does not apply to
1901    # nested class definitions, and we're not worried about variables
1902    # in nested functions.
1903    if not isinstance(identifier, basestring):
1904        raise TypeError('identifier must be a string')
1905
1906    # Locals
1907    if isinstance(parent_docs[-1], NamespaceDoc):
1908        if identifier in parent_docs[-1].variables:
1909            return parent_docs[-1].variables[identifier]
1910
1911    # Globals (aka the containing module)
1912    if isinstance(parent_docs[0], NamespaceDoc):
1913        if identifier in parent_docs[0].variables:
1914            return parent_docs[0].variables[identifier]
1915
1916    # Builtins
1917    builtins = epydoc.docintrospecter.introspect_docs(__builtin__)
1918    if isinstance(builtins, NamespaceDoc):
1919        if identifier in builtins.variables:
1920            return builtins.variables[identifier]
1921
1922    # We didn't find it; return None.
1923    return None
1924
1925def lookup_variable(dotted_name, parent_docs):
1926    assert isinstance(dotted_name, DottedName)
1927    # If it's a simple identifier, use lookup_name.
1928    if len(dotted_name) == 1:
1929        return lookup_name(dotted_name[0], parent_docs)
1930
1931    # If it's a dotted name with multiple pieces, look up the
1932    # namespace containing the var (=parent) first; and then
1933    # look for the var in that namespace.
1934    else:
1935        parent = lookup_value(dotted_name[:-1], parent_docs)
1936        if (isinstance(parent, NamespaceDoc) and
1937            dotted_name[-1] in parent.variables):
1938            return parent.variables[dotted_name[-1]]
1939        else:
1940            return None # var not found.
1941
1942def lookup_value(dotted_name, parent_docs):
1943    """
1944    Find and return the documentation for the value contained in
1945    the variable with the given name in the current namespace.
1946    """
1947    assert isinstance(dotted_name, DottedName)
1948    var_doc = lookup_name(dotted_name[0], parent_docs)
1949
1950    for i in range(1, len(dotted_name)):
1951        if var_doc is None: return None
1952
1953        if isinstance(var_doc.value, NamespaceDoc):
1954            var_dict = var_doc.value.variables
1955        elif (var_doc.value is UNKNOWN and
1956            var_doc.imported_from not in (None, UNKNOWN)):
1957            src_name = var_doc.imported_from + dotted_name[i:]
1958            # [xx] do I want to create a proxy here??
1959            return GenericValueDoc(proxy_for=src_name,
1960                                   parse_repr=str(dotted_name),
1961                                   docs_extracted_by='parser')
1962        else:
1963            return None
1964
1965        var_doc = var_dict.get(dotted_name[i])
1966
1967    if var_doc is None: return None
1968    return var_doc.value
1969
1970#/////////////////////////////////////////////////////////////////
1971#{ Docstring Comments
1972#/////////////////////////////////////////////////////////////////
1973
1974def add_docstring_from_comments(api_doc, comments):
1975    if api_doc is None or not comments: return
1976    api_doc.docstring = '\n'.join([line for (line, lineno) in comments])
1977    api_doc.docstring_lineno = comments[0][1]
1978
1979#/////////////////////////////////////////////////////////////////
1980#{ Tree tokens
1981#/////////////////////////////////////////////////////////////////
1982
1983def _join_toktree(s1, s2):
1984    # Join them.  s1 = left side; s2 = right side.
1985    if (s2=='' or s1=='' or
1986        s1 in ('-','`') or s2 in ('}',']',')','`',':') or
1987        s2[0] in ('.',',') or s1[-1] in ('(','[','{','.','\n',' ') or
1988        (s2[0] == '(' and s1[-1] not in (',','='))):
1989        return '%s%s' % (s1,s2)
1990    elif (spacing=='tight' and
1991          s1[-1] in '+-*/=,' or s2[0] in '+-*/=,'):
1992        return '%s%s' % (s1, s2)
1993    else:
1994        return '%s %s' % (s1, s2)
1995
1996def _pp_toktree_add_piece(spacing, pieces, piece):
1997    s1 = pieces[-1]
1998    s2 = piece
1999
2000    if (s2=='' or s1=='' or
2001        s1 in ('-','`') or s2 in ('}',']',')','`',':') or
2002        s2[0] in ('.',',') or s1[-1] in ('(','[','{','.','\n',' ') or
2003        (s2[0] == '(' and s1[-1] not in (',','='))):
2004        pass
2005    elif (spacing=='tight' and
2006          s1[-1] in '+-*/=,' or s2[0] in '+-*/=,'):
2007        pass
2008    else:
2009        pieces.append(' ')
2010
2011    pieces.append(piece)
2012
2013def pp_toktree(elts, spacing='normal', indent=0):
2014    pieces = ['']
2015    _pp_toktree(elts, spacing, indent, pieces)
2016    return ''.join(pieces)
2017
2018def _pp_toktree(elts, spacing, indent, pieces):
2019    add_piece = _pp_toktree_add_piece
2020
2021    for elt in elts:
2022        # Put a blank line before class & def statements.
2023        if elt == (token.NAME, 'class') or elt == (token.NAME, 'def'):
2024            add_piece(spacing, pieces, '\n%s' % ('    '*indent))
2025
2026        if isinstance(elt, tuple):
2027            if elt[0] == token.NEWLINE:
2028                add_piece(spacing, pieces, '    '+elt[1])
2029                add_piece(spacing, pieces, '\n%s' % ('    '*indent))
2030            elif elt[0] == token.INDENT:
2031                add_piece(spacing, pieces, '    ')
2032                indent += 1
2033            elif elt[0] == token.DEDENT:
2034                assert pieces[-1] == '    '
2035                pieces.pop()
2036                indent -= 1
2037            elif elt[0] == tokenize.COMMENT:
2038                add_piece(spacing, pieces, elt[1].rstrip() + '\n')
2039                add_piece('    '*indent)
2040            else:
2041                add_piece(spacing, pieces, elt[1])
2042        else:
2043            _pp_toktree(elt, spacing, indent, pieces)
2044
2045#/////////////////////////////////////////////////////////////////
2046#{ Helper Functions
2047#/////////////////////////////////////////////////////////////////
2048
2049def get_module_encoding(filename):
2050    """
2051    @see: U{PEP 263<http://www.python.org/peps/pep-0263.html>}
2052    """
2053    module_file = open(filename, 'rU')
2054    try:
2055        lines = [module_file.readline() for i in range(2)]
2056        if lines[0].startswith('\xef\xbb\xbf'):
2057            return 'utf-8'
2058        else:
2059            for line in lines:
2060                m = re.search("coding[:=]\s*([-\w.]+)", line)
2061                if m: return m.group(1)
2062
2063        # Fall back on Python's default encoding.
2064        return 'iso-8859-1' # aka 'latin-1'
2065    finally:
2066        module_file.close()
2067
2068def _get_module_name(filename, package_doc):
2069    """
2070    Return (dotted_name, is_package)
2071    """
2072    name = re.sub(r'.py\w?$', '', os.path.split(filename)[1])
2073    if name == '__init__':
2074        is_package = True
2075        name = os.path.split(os.path.split(filename)[0])[1]
2076    else:
2077        is_package = False
2078
2079    # [XX] if the module contains a script, then `name` may not
2080    # necessarily be a valid identifier -- which will cause
2081    # DottedName to raise an exception.  Is that what I want?
2082    if package_doc is None:
2083        dotted_name = DottedName(name)
2084    else:
2085        dotted_name = DottedName(package_doc.canonical_name, name)
2086
2087    # Check if the module looks like it's shadowed by a variable.
2088    # If so, then add a "'" to the end of its canonical name, to
2089    # distinguish it from the variable.
2090    if package_doc is not None and name in package_doc.variables:
2091        vardoc = package_doc.variables[name]
2092        if (vardoc.value not in (None, UNKNOWN) and
2093            vardoc.imported_from != dotted_name):
2094            log.warning("Module %s might be shadowed by a variable with "
2095                        "the same name." % dotted_name)
2096            dotted_name = DottedName(str(dotted_name)+"'")
2097
2098    return dotted_name, is_package
2099
2100def flatten(lst, out=None):
2101    """
2102    @return: a flat list containing the leaves of the given nested
2103        list.
2104    @param lst: The nested list that should be flattened.
2105    """
2106    if out is None: out = []
2107    for elt in lst:
2108        if isinstance(elt, (list, tuple)):
2109            flatten(elt, out)
2110        else:
2111            out.append(elt)
2112    return out
2113
2114