1#
2# epytext.py: epydoc formatted docstring parsing
3# Edward Loper
4#
5# Created [04/10/01 12:00 AM]
6# $Id: epytext.py 1652 2007-09-26 04:45:34Z edloper $
7#
8
9"""
10Parser for epytext strings.  Epytext is a lightweight markup whose
11primary intended application is Python documentation strings.  This
12parser converts Epytext strings to a simple DOM-like representation
13(encoded as a tree of L{Element} objects and strings).  Epytext
14strings can contain the following X{structural blocks}:
15
16    - X{epytext}: The top-level element of the DOM tree.
17    - X{para}: A paragraph of text.  Paragraphs contain no newlines,
18      and all spaces are soft.
19    - X{section}: A section or subsection.
20    - X{field}: A tagged field.  These fields provide information
21      about specific aspects of a Python object, such as the
22      description of a function's parameter, or the author of a
23      module.
24    - X{literalblock}: A block of literal text.  This text should be
25      displayed as it would be displayed in plaintext.  The
26      parser removes the appropriate amount of leading whitespace
27      from each line in the literal block.
28    - X{doctestblock}: A block containing sample python code,
29      formatted according to the specifications of the C{doctest}
30      module.
31    - X{ulist}: An unordered list.
32    - X{olist}: An ordered list.
33    - X{li}: A list item.  This tag is used both for unordered list
34      items and for ordered list items.
35
36Additionally, the following X{inline regions} may be used within
37C{para} blocks:
38
39    - X{code}:   Source code and identifiers.
40    - X{math}:   Mathematical expressions.
41    - X{index}:  A term which should be included in an index, if one
42                 is generated.
43    - X{italic}: Italicized text.
44    - X{bold}:   Bold-faced text.
45    - X{uri}:    A Universal Resource Indicator (URI) or Universal
46                 Resource Locator (URL)
47    - X{link}:   A Python identifier which should be hyperlinked to
48                 the named object's documentation, when possible.
49
50The returned DOM tree will conform to the the following Document Type
51Description::
52
53   <!ENTITY % colorized '(code | math | index | italic |
54                          bold | uri | link | symbol)*'>
55
56   <!ELEMENT epytext ((para | literalblock | doctestblock |
57                      section | ulist | olist)*, fieldlist?)>
58
59   <!ELEMENT para (#PCDATA | %colorized;)*>
60
61   <!ELEMENT section (para | listblock | doctestblock |
62                      section | ulist | olist)+>
63
64   <!ELEMENT fieldlist (field+)>
65   <!ELEMENT field (tag, arg?, (para | listblock | doctestblock)
66                                ulist | olist)+)>
67   <!ELEMENT tag (#PCDATA)>
68   <!ELEMENT arg (#PCDATA)>
69
70   <!ELEMENT literalblock (#PCDATA | %colorized;)*>
71   <!ELEMENT doctestblock (#PCDATA)>
72
73   <!ELEMENT ulist (li+)>
74   <!ELEMENT olist (li+)>
75   <!ELEMENT li (para | literalblock | doctestblock | ulist | olist)+>
76   <!ATTLIST li bullet NMTOKEN #IMPLIED>
77   <!ATTLIST olist start NMTOKEN #IMPLIED>
78
79   <!ELEMENT uri     (name, target)>
80   <!ELEMENT link    (name, target)>
81   <!ELEMENT name    (#PCDATA | %colorized;)*>
82   <!ELEMENT target  (#PCDATA)>
83
84   <!ELEMENT code    (#PCDATA | %colorized;)*>
85   <!ELEMENT math    (#PCDATA | %colorized;)*>
86   <!ELEMENT italic  (#PCDATA | %colorized;)*>
87   <!ELEMENT bold    (#PCDATA | %colorized;)*>
88   <!ELEMENT indexed (#PCDATA | %colorized;)>
89   <!ATTLIST code style CDATA #IMPLIED>
90
91   <!ELEMENT symbol (#PCDATA)>
92
93@var SYMBOLS: A list of the of escape symbols that are supported
94      by epydoc.  Currently the following symbols are supported:
95<<<SYMBOLS>>>
96"""
97# Note: the symbol list is appended to the docstring automatically,
98# below.
99
100__docformat__ = 'epytext en'
101
102# Code organization..
103#   1. parse()
104#   2. tokenize()
105#   3. colorize()
106#   4. helpers
107#   5. testing
108
109import re, string, types, sys, os.path
110from epydoc.markup import *
111from epydoc.util import wordwrap, plaintext_to_html, plaintext_to_latex
112from epydoc.markup.doctest import doctest_to_html, doctest_to_latex
113
114##################################################
115## DOM-Like Encoding
116##################################################
117
118class Element:
119    """
120    A very simple DOM-like representation for parsed epytext
121    documents.  Each epytext document is encoded as a tree whose nodes
122    are L{Element} objects, and whose leaves are C{string}s.  Each
123    node is marked by a I{tag} and zero or more I{attributes}.  Each
124    attribute is a mapping from a string key to a string value.
125    """
126    def __init__(self, tag, *children, **attribs):
127        self.tag = tag
128        """A string tag indicating the type of this element.
129        @type: C{string}"""
130
131        self.children = list(children)
132        """A list of the children of this element.
133        @type: C{list} of (C{string} or C{Element})"""
134
135        self.attribs = attribs
136        """A dictionary mapping attribute names to attribute values
137        for this element.
138        @type: C{dict} from C{string} to C{string}"""
139
140    def __str__(self):
141        """
142        Return a string representation of this element, using XML
143        notation.
144        @bug: Doesn't escape '<' or '&' or '>'.
145        """
146        attribs = ''.join([' %s=%r' % t for t in self.attribs.items()])
147        return ('<%s%s>' % (self.tag, attribs) +
148                ''.join([str(child) for child in self.children]) +
149                '</%s>' % self.tag)
150
151    def __repr__(self):
152        attribs = ''.join([', %s=%r' % t for t in self.attribs.items()])
153        args = ''.join([', %r' % c for c in self.children])
154        return 'Element(%s%s%s)' % (self.tag, args, attribs)
155
156##################################################
157## Constants
158##################################################
159
160# The possible heading underline characters, listed in order of
161# heading depth.
162_HEADING_CHARS = "=-~"
163
164# Escape codes.  These should be needed very rarely.
165_ESCAPES = {'lb':'{', 'rb': '}'}
166
167# Symbols.  These can be generated via S{...} escapes.
168SYMBOLS = [
169    # Arrows
170    '<-', '->', '^', 'v',
171
172    # Greek letters
173    'alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta',
174    'eta', 'theta', 'iota', 'kappa', 'lambda', 'mu',
175    'nu', 'xi', 'omicron', 'pi', 'rho', 'sigma',
176    'tau', 'upsilon', 'phi', 'chi', 'psi', 'omega',
177    'Alpha', 'Beta', 'Gamma', 'Delta', 'Epsilon', 'Zeta',
178    'Eta', 'Theta', 'Iota', 'Kappa', 'Lambda', 'Mu',
179    'Nu', 'Xi', 'Omicron', 'Pi', 'Rho', 'Sigma',
180    'Tau', 'Upsilon', 'Phi', 'Chi', 'Psi', 'Omega',
181
182    # HTML character entities
183    'larr', 'rarr', 'uarr', 'darr', 'harr', 'crarr',
184    'lArr', 'rArr', 'uArr', 'dArr', 'hArr',
185    'copy', 'times', 'forall', 'exist', 'part',
186    'empty', 'isin', 'notin', 'ni', 'prod', 'sum',
187    'prop', 'infin', 'ang', 'and', 'or', 'cap', 'cup',
188    'int', 'there4', 'sim', 'cong', 'asymp', 'ne',
189    'equiv', 'le', 'ge', 'sub', 'sup', 'nsub',
190    'sube', 'supe', 'oplus', 'otimes', 'perp',
191
192    # Alternate (long) names
193    'infinity', 'integral', 'product',
194    '>=', '<=',
195    ]
196# Convert to a dictionary, for quick lookup
197_SYMBOLS = {}
198for symbol in SYMBOLS: _SYMBOLS[symbol] = 1
199
200# Add symbols to the docstring.
201symblist = '      '
202symblist += ';\n      '.join([' - C{E{S}{%s}}=S{%s}' % (symbol, symbol)
203                              for symbol in SYMBOLS])
204__doc__ = __doc__.replace('<<<SYMBOLS>>>', symblist)
205del symbol, symblist
206
207# Tags for colorizing text.
208_COLORIZING_TAGS = {
209    'C': 'code',
210    'M': 'math',
211    'X': 'indexed',
212    'I': 'italic',
213    'B': 'bold',
214    'U': 'uri',
215    'L': 'link',       # A Python identifier that should be linked to
216    'E': 'escape',     # escapes characters or creates symbols
217    'S': 'symbol',
218    'G': 'graph',
219    }
220
221# Which tags can use "link syntax" (e.g., U{Python<www.python.org>})?
222_LINK_COLORIZING_TAGS = ['link', 'uri']
223
224##################################################
225## Structuring (Top Level)
226##################################################
227
228def parse(str, errors = None):
229    """
230    Return a DOM tree encoding the contents of an epytext string.  Any
231    errors generated during parsing will be stored in C{errors}.
232
233    @param str: The epytext string to parse.
234    @type str: C{string}
235    @param errors: A list where any errors generated during parsing
236        will be stored.  If no list is specified, then fatal errors
237        will generate exceptions, and non-fatal errors will be
238        ignored.
239    @type errors: C{list} of L{ParseError}
240    @return: a DOM tree encoding the contents of an epytext string.
241    @rtype: C{Element}
242    @raise ParseError: If C{errors} is C{None} and an error is
243        encountered while parsing.
244    """
245    # Initialize errors list.
246    if errors == None:
247        errors = []
248        raise_on_error = 1
249    else:
250        raise_on_error = 0
251
252    # Preprocess the string.
253    str = re.sub('\015\012', '\012', str)
254    str = string.expandtabs(str)
255
256    # Tokenize the input string.
257    tokens = _tokenize(str, errors)
258
259    # Have we encountered a field yet?
260    encountered_field = 0
261
262    # Create an document to hold the epytext.
263    doc = Element('epytext')
264
265    # Maintain two parallel stacks: one contains DOM elements, and
266    # gives the ancestors of the current block.  The other contains
267    # indentation values, and gives the indentation of the
268    # corresponding DOM elements.  An indentation of "None" reflects
269    # an unknown indentation.  However, the indentation must be
270    # greater than, or greater than or equal to, the indentation of
271    # the prior element (depending on what type of DOM element it
272    # corresponds to).  No 2 consecutive indent_stack values will be
273    # ever be "None."  Use initial dummy elements in the stack, so we
274    # don't have to worry about bounds checking.
275    stack = [None, doc]
276    indent_stack = [-1, None]
277
278    for token in tokens:
279        # Uncomment this for debugging:
280        #print ('%s: %s\n%s: %s\n' %
281        #       (''.join(['%-11s' % (t and t.tag) for t in stack]),
282        #        token.tag, ''.join(['%-11s' % i for i in indent_stack]),
283        #        token.indent))
284
285        # Pop any completed blocks off the stack.
286        _pop_completed_blocks(token, stack, indent_stack)
287
288        # If Token has type PARA, colorize and add the new paragraph
289        if token.tag == Token.PARA:
290            _add_para(doc, token, stack, indent_stack, errors)
291
292        # If Token has type HEADING, add the new section
293        elif token.tag == Token.HEADING:
294            _add_section(doc, token, stack, indent_stack, errors)
295
296        # If Token has type LBLOCK, add the new literal block
297        elif token.tag == Token.LBLOCK:
298            stack[-1].children.append(token.to_dom(doc))
299
300        # If Token has type DTBLOCK, add the new doctest block
301        elif token.tag == Token.DTBLOCK:
302            stack[-1].children.append(token.to_dom(doc))
303
304        # If Token has type BULLET, add the new list/list item/field
305        elif token.tag == Token.BULLET:
306            _add_list(doc, token, stack, indent_stack, errors)
307        else:
308            assert 0, 'Unknown token type: '+token.tag
309
310        # Check if the DOM element we just added was a field..
311        if stack[-1].tag == 'field':
312            encountered_field = 1
313        elif encountered_field == 1:
314            if len(stack) <= 3:
315                estr = ("Fields must be the final elements in an "+
316                        "epytext string.")
317                errors.append(StructuringError(estr, token.startline))
318
319    # Graphs use inline markup (G{...}) but are really block-level
320    # elements; so "raise" any graphs we generated.  This is a bit of
321    # a hack, but the alternative is to define a new markup for
322    # block-level elements, which I'd rather not do.  (See sourceforge
323    # bug #1673017.)
324    for child in doc.children:
325        _raise_graphs(child, doc)
326
327    # If there was an error, then signal it!
328    if len([e for e in errors if e.is_fatal()]) > 0:
329        if raise_on_error:
330            raise errors[0]
331        else:
332            return None
333
334    # Return the top-level epytext DOM element.
335    return doc
336
337def _raise_graphs(tree, parent):
338    # Recurse to children.
339    have_graph_child = False
340    for elt in tree.children:
341        if isinstance(elt, Element):
342            _raise_graphs(elt, tree)
343            if elt.tag == 'graph': have_graph_child = True
344
345    block = ('section', 'fieldlist', 'field', 'ulist', 'olist', 'li')
346    if have_graph_child and tree.tag not in block:
347        child_index = 0
348        for elt in tree.children:
349            if isinstance(elt, Element) and elt.tag == 'graph':
350                # We found a graph: splice it into the parent.
351                parent_index = parent.children.index(tree)
352                left = tree.children[:child_index]
353                right = tree.children[child_index+1:]
354                parent.children[parent_index:parent_index+1] = [
355                    Element(tree.tag, *left, **tree.attribs),
356                    elt,
357                    Element(tree.tag, *right, **tree.attribs)]
358                child_index = 0
359                parent_index += 2
360            else:
361                child_index += 1
362
363def _pop_completed_blocks(token, stack, indent_stack):
364    """
365    Pop any completed blocks off the stack.  This includes any
366    blocks that we have dedented past, as well as any list item
367    blocks that we've dedented to.  The top element on the stack
368    should only be a list if we're about to start a new list
369    item (i.e., if the next token is a bullet).
370    """
371    indent = token.indent
372    if indent != None:
373        while (len(stack) > 2):
374            pop = 0
375
376            # Dedent past a block
377            if indent_stack[-1]!=None and indent<indent_stack[-1]: pop=1
378            elif indent_stack[-1]==None and indent<indent_stack[-2]: pop=1
379
380            # Dedent to a list item, if it is follwed by another list
381            # item with the same indentation.
382            elif (token.tag == 'bullet' and indent==indent_stack[-2] and
383                  stack[-1].tag in ('li', 'field')): pop=1
384
385            # End of a list (no more list items available)
386            elif (stack[-1].tag in ('ulist', 'olist') and
387                  (token.tag != 'bullet' or token.contents[-1] == ':')):
388                pop=1
389
390            # Pop the block, if it's complete.  Otherwise, we're done.
391            if pop == 0: return
392            stack.pop()
393            indent_stack.pop()
394
395def _add_para(doc, para_token, stack, indent_stack, errors):
396    """Colorize the given paragraph, and add it to the DOM tree."""
397    # Check indentation, and update the parent's indentation
398    # when appropriate.
399    if indent_stack[-1] == None:
400        indent_stack[-1] = para_token.indent
401    if para_token.indent == indent_stack[-1]:
402        # Colorize the paragraph and add it.
403        para = _colorize(doc, para_token, errors)
404        if para_token.inline:
405            para.attribs['inline'] = True
406        stack[-1].children.append(para)
407    else:
408        estr = "Improper paragraph indentation."
409        errors.append(StructuringError(estr, para_token.startline))
410
411def _add_section(doc, heading_token, stack, indent_stack, errors):
412    """Add a new section to the DOM tree, with the given heading."""
413    if indent_stack[-1] == None:
414        indent_stack[-1] = heading_token.indent
415    elif indent_stack[-1] != heading_token.indent:
416        estr = "Improper heading indentation."
417        errors.append(StructuringError(estr, heading_token.startline))
418
419    # Check for errors.
420    for tok in stack[2:]:
421        if tok.tag != "section":
422            estr = "Headings must occur at the top level."
423            errors.append(StructuringError(estr, heading_token.startline))
424            break
425    if (heading_token.level+2) > len(stack):
426        estr = "Wrong underline character for heading."
427        errors.append(StructuringError(estr, heading_token.startline))
428
429    # Pop the appropriate number of headings so we're at the
430    # correct level.
431    stack[heading_token.level+2:] = []
432    indent_stack[heading_token.level+2:] = []
433
434    # Colorize the heading
435    head = _colorize(doc, heading_token, errors, 'heading')
436
437    # Add the section's and heading's DOM elements.
438    sec = Element("section")
439    stack[-1].children.append(sec)
440    stack.append(sec)
441    sec.children.append(head)
442    indent_stack.append(None)
443
444def _add_list(doc, bullet_token, stack, indent_stack, errors):
445    """
446    Add a new list item or field to the DOM tree, with the given
447    bullet or field tag.  When necessary, create the associated
448    list.
449    """
450    # Determine what type of bullet it is.
451    if bullet_token.contents[-1] == '-':
452        list_type = 'ulist'
453    elif bullet_token.contents[-1] == '.':
454        list_type = 'olist'
455    elif bullet_token.contents[-1] == ':':
456        list_type = 'fieldlist'
457    else:
458        raise AssertionError('Bad Bullet: %r' % bullet_token.contents)
459
460    # Is this a new list?
461    newlist = 0
462    if stack[-1].tag != list_type:
463        newlist = 1
464    elif list_type == 'olist' and stack[-1].tag == 'olist':
465        old_listitem = stack[-1].children[-1]
466        old_bullet = old_listitem.attribs.get("bullet").split('.')[:-1]
467        new_bullet = bullet_token.contents.split('.')[:-1]
468        if (new_bullet[:-1] != old_bullet[:-1] or
469            int(new_bullet[-1]) != int(old_bullet[-1])+1):
470            newlist = 1
471
472    # Create the new list.
473    if newlist:
474        if stack[-1].tag is 'fieldlist':
475            # The new list item is not a field list item (since this
476            # is a new list); but it's indented the same as the field
477            # list.  This either means that they forgot to indent the
478            # list, or they are trying to put something after the
479            # field list.  The first one seems more likely, so we'll
480            # just warn about that (to avoid confusion).
481            estr = "Lists must be indented."
482            errors.append(StructuringError(estr, bullet_token.startline))
483        if stack[-1].tag in ('ulist', 'olist', 'fieldlist'):
484            stack.pop()
485            indent_stack.pop()
486
487        if (list_type != 'fieldlist' and indent_stack[-1] is not None and
488            bullet_token.indent == indent_stack[-1]):
489            # Ignore this error if there's text on the same line as
490            # the comment-opening quote -- epydoc can't reliably
491            # determine the indentation for that line.
492            if bullet_token.startline != 1 or bullet_token.indent != 0:
493                estr = "Lists must be indented."
494                errors.append(StructuringError(estr, bullet_token.startline))
495
496        if list_type == 'fieldlist':
497            # Fieldlist should be at the top-level.
498            for tok in stack[2:]:
499                if tok.tag != "section":
500                    estr = "Fields must be at the top level."
501                    errors.append(
502                        StructuringError(estr, bullet_token.startline))
503                    break
504            stack[2:] = []
505            indent_stack[2:] = []
506
507        # Add the new list.
508        lst = Element(list_type)
509        stack[-1].children.append(lst)
510        stack.append(lst)
511        indent_stack.append(bullet_token.indent)
512        if list_type == 'olist':
513            start = bullet_token.contents.split('.')[:-1]
514            if start != '1':
515                lst.attribs["start"] = start[-1]
516
517    # Fields are treated somewhat specially: A "fieldlist"
518    # node is created to make the parsing simpler, but fields
519    # are adjoined directly into the "epytext" node, not into
520    # the "fieldlist" node.
521    if list_type == 'fieldlist':
522        li = Element("field")
523        token_words = bullet_token.contents[1:-1].split(None, 1)
524        tag_elt = Element("tag")
525        tag_elt.children.append(token_words[0])
526        li.children.append(tag_elt)
527
528        if len(token_words) > 1:
529            arg_elt = Element("arg")
530            arg_elt.children.append(token_words[1])
531            li.children.append(arg_elt)
532    else:
533        li = Element("li")
534        if list_type == 'olist':
535            li.attribs["bullet"] = bullet_token.contents
536
537    # Add the bullet.
538    stack[-1].children.append(li)
539    stack.append(li)
540    indent_stack.append(None)
541
542##################################################
543## Tokenization
544##################################################
545
546class Token:
547    """
548    C{Token}s are an intermediate data structure used while
549    constructing the structuring DOM tree for a formatted docstring.
550    There are five types of C{Token}:
551
552        - Paragraphs
553        - Literal blocks
554        - Doctest blocks
555        - Headings
556        - Bullets
557
558    The text contained in each C{Token} is stored in the
559    C{contents} variable.  The string in this variable has been
560    normalized.  For paragraphs, this means that it has been converted
561    into a single line of text, with newline/indentation replaced by
562    single spaces.  For literal blocks and doctest blocks, this means
563    that the appropriate amount of leading whitespace has been removed
564    from each line.
565
566    Each C{Token} has an indentation level associated with it,
567    stored in the C{indent} variable.  This indentation level is used
568    by the structuring procedure to assemble hierarchical blocks.
569
570    @type tag: C{string}
571    @ivar tag: This C{Token}'s type.  Possible values are C{Token.PARA}
572        (paragraph), C{Token.LBLOCK} (literal block), C{Token.DTBLOCK}
573        (doctest block), C{Token.HEADINGC}, and C{Token.BULLETC}.
574
575    @type startline: C{int}
576    @ivar startline: The line on which this C{Token} begins.  This
577        line number is only used for issuing errors.
578
579    @type contents: C{string}
580    @ivar contents: The normalized text contained in this C{Token}.
581
582    @type indent: C{int} or C{None}
583    @ivar indent: The indentation level of this C{Token} (in
584        number of leading spaces).  A value of C{None} indicates an
585        unknown indentation; this is used for list items and fields
586        that begin with one-line paragraphs.
587
588    @type level: C{int} or C{None}
589    @ivar level: The heading-level of this C{Token} if it is a
590        heading; C{None}, otherwise.  Valid heading levels are 0, 1,
591        and 2.
592
593    @type inline: C{bool}
594    @ivar inline: If True, the element is an inline level element, comparable
595        to an HTML C{<span>} tag. Else, it is a block level element, comparable
596        to an HTML C{<div>}.
597
598    @type PARA: C{string}
599    @cvar PARA: The C{tag} value for paragraph C{Token}s.
600    @type LBLOCK: C{string}
601    @cvar LBLOCK: The C{tag} value for literal C{Token}s.
602    @type DTBLOCK: C{string}
603    @cvar DTBLOCK: The C{tag} value for doctest C{Token}s.
604    @type HEADING: C{string}
605    @cvar HEADING: The C{tag} value for heading C{Token}s.
606    @type BULLET: C{string}
607    @cvar BULLET: The C{tag} value for bullet C{Token}s.  This C{tag}
608        value is also used for field tag C{Token}s, since fields
609        function syntactically the same as list items.
610    """
611    # The possible token types.
612    PARA = "para"
613    LBLOCK = "literalblock"
614    DTBLOCK = "doctestblock"
615    HEADING = "heading"
616    BULLET = "bullet"
617
618    def __init__(self, tag, startline, contents, indent, level=None,
619                 inline=False):
620        """
621        Create a new C{Token}.
622
623        @param tag: The type of the new C{Token}.
624        @type tag: C{string}
625        @param startline: The line on which the new C{Token} begins.
626        @type startline: C{int}
627        @param contents: The normalized contents of the new C{Token}.
628        @type contents: C{string}
629        @param indent: The indentation of the new C{Token} (in number
630            of leading spaces).  A value of C{None} indicates an
631            unknown indentation.
632        @type indent: C{int} or C{None}
633        @param level: The heading-level of this C{Token} if it is a
634            heading; C{None}, otherwise.
635        @type level: C{int} or C{None}
636        @param inline: Is this C{Token} inline as a C{<span>}?.
637        @type inline: C{bool}
638        """
639        self.tag = tag
640        self.startline = startline
641        self.contents = contents
642        self.indent = indent
643        self.level = level
644        self.inline = inline
645
646    def __repr__(self):
647        """
648        @rtype: C{string}
649        @return: the formal representation of this C{Token}.
650            C{Token}s have formal representaitons of the form::
651                <Token: para at line 12>
652        """
653        return '<Token: %s at line %s>' % (self.tag, self.startline)
654
655    def to_dom(self, doc):
656        """
657        @return: a DOM representation of this C{Token}.
658        @rtype: L{Element}
659        """
660        e = Element(self.tag)
661        e.children.append(self.contents)
662        return e
663
664# Construct regular expressions for recognizing bullets.  These are
665# global so they don't have to be reconstructed each time we tokenize
666# a docstring.
667_ULIST_BULLET = '[-]( +|$)'
668_OLIST_BULLET = '(\d+[.])+( +|$)'
669_FIELD_BULLET = '@\w+( [^{}:\n]+)?:'
670_BULLET_RE = re.compile(_ULIST_BULLET + '|' +
671                        _OLIST_BULLET + '|' +
672                        _FIELD_BULLET)
673_LIST_BULLET_RE = re.compile(_ULIST_BULLET + '|' + _OLIST_BULLET)
674_FIELD_BULLET_RE = re.compile(_FIELD_BULLET)
675del _ULIST_BULLET, _OLIST_BULLET, _FIELD_BULLET
676
677def _tokenize_doctest(lines, start, block_indent, tokens, errors):
678    """
679    Construct a L{Token} containing the doctest block starting at
680    C{lines[start]}, and append it to C{tokens}.  C{block_indent}
681    should be the indentation of the doctest block.  Any errors
682    generated while tokenizing the doctest block will be appended to
683    C{errors}.
684
685    @param lines: The list of lines to be tokenized
686    @param start: The index into C{lines} of the first line of the
687        doctest block to be tokenized.
688    @param block_indent: The indentation of C{lines[start]}.  This is
689        the indentation of the doctest block.
690    @param errors: A list where any errors generated during parsing
691        will be stored.  If no list is specified, then errors will
692        generate exceptions.
693    @return: The line number of the first line following the doctest
694        block.
695
696    @type lines: C{list} of C{string}
697    @type start: C{int}
698    @type block_indent: C{int}
699    @type tokens: C{list} of L{Token}
700    @type errors: C{list} of L{ParseError}
701    @rtype: C{int}
702    """
703    # If they dedent past block_indent, keep track of the minimum
704    # indentation.  This is used when removing leading indentation
705    # from the lines of the doctest block.
706    min_indent = block_indent
707
708    linenum = start + 1
709    while linenum < len(lines):
710        # Find the indentation of this line.
711        line = lines[linenum]
712        indent = len(line) - len(line.lstrip())
713
714        # A blank line ends doctest block.
715        if indent == len(line): break
716
717        # A Dedent past block_indent is an error.
718        if indent < block_indent:
719            min_indent = min(min_indent, indent)
720            estr = 'Improper doctest block indentation.'
721            errors.append(TokenizationError(estr, linenum))
722
723        # Go on to the next line.
724        linenum += 1
725
726    # Add the token, and return the linenum after the token ends.
727    contents = [line[min_indent:] for line in lines[start:linenum]]
728    contents = '\n'.join(contents)
729    tokens.append(Token(Token.DTBLOCK, start, contents, block_indent))
730    return linenum
731
732def _tokenize_literal(lines, start, block_indent, tokens, errors):
733    """
734    Construct a L{Token} containing the literal block starting at
735    C{lines[start]}, and append it to C{tokens}.  C{block_indent}
736    should be the indentation of the literal block.  Any errors
737    generated while tokenizing the literal block will be appended to
738    C{errors}.
739
740    @param lines: The list of lines to be tokenized
741    @param start: The index into C{lines} of the first line of the
742        literal block to be tokenized.
743    @param block_indent: The indentation of C{lines[start]}.  This is
744        the indentation of the literal block.
745    @param errors: A list of the errors generated by parsing.  Any
746        new errors generated while will tokenizing this paragraph
747        will be appended to this list.
748    @return: The line number of the first line following the literal
749        block.
750
751    @type lines: C{list} of C{string}
752    @type start: C{int}
753    @type block_indent: C{int}
754    @type tokens: C{list} of L{Token}
755    @type errors: C{list} of L{ParseError}
756    @rtype: C{int}
757    """
758    linenum = start + 1
759    while linenum < len(lines):
760        # Find the indentation of this line.
761        line = lines[linenum]
762        indent = len(line) - len(line.lstrip())
763
764        # A Dedent to block_indent ends the literal block.
765        # (Ignore blank likes, though)
766        if len(line) != indent and indent <= block_indent:
767            break
768
769        # Go on to the next line.
770        linenum += 1
771
772    # Add the token, and return the linenum after the token ends.
773    contents = [line[block_indent+1:] for line in lines[start:linenum]]
774    contents = '\n'.join(contents)
775    contents = re.sub('(\A[ \n]*\n)|(\n[ \n]*\Z)', '', contents)
776    tokens.append(Token(Token.LBLOCK, start, contents, block_indent))
777    return linenum
778
779def _tokenize_listart(lines, start, bullet_indent, tokens, errors):
780    """
781    Construct L{Token}s for the bullet and the first paragraph of the
782    list item (or field) starting at C{lines[start]}, and append them
783    to C{tokens}.  C{bullet_indent} should be the indentation of the
784    list item.  Any errors generated while tokenizing will be
785    appended to C{errors}.
786
787    @param lines: The list of lines to be tokenized
788    @param start: The index into C{lines} of the first line of the
789        list item to be tokenized.
790    @param bullet_indent: The indentation of C{lines[start]}.  This is
791        the indentation of the list item.
792    @param errors: A list of the errors generated by parsing.  Any
793        new errors generated while will tokenizing this paragraph
794        will be appended to this list.
795    @return: The line number of the first line following the list
796        item's first paragraph.
797
798    @type lines: C{list} of C{string}
799    @type start: C{int}
800    @type bullet_indent: C{int}
801    @type tokens: C{list} of L{Token}
802    @type errors: C{list} of L{ParseError}
803    @rtype: C{int}
804    """
805    linenum = start + 1
806    para_indent = None
807    doublecolon = lines[start].rstrip()[-2:] == '::'
808
809    # Get the contents of the bullet.
810    para_start = _BULLET_RE.match(lines[start], bullet_indent).end()
811    bcontents = lines[start][bullet_indent:para_start].strip()
812
813    while linenum < len(lines):
814        # Find the indentation of this line.
815        line = lines[linenum]
816        indent = len(line) - len(line.lstrip())
817
818        # "::" markers end paragraphs.
819        if doublecolon: break
820        if line.rstrip()[-2:] == '::': doublecolon = 1
821
822        # A blank line ends the token
823        if indent == len(line): break
824
825        # Dedenting past bullet_indent ends the list item.
826        if indent < bullet_indent: break
827
828        # A line beginning with a bullet ends the token.
829        if _BULLET_RE.match(line, indent): break
830
831        # If this is the second line, set the paragraph indentation, or
832        # end the token, as appropriate.
833        if para_indent == None: para_indent = indent
834
835        # A change in indentation ends the token
836        if indent != para_indent: break
837
838        # Go on to the next line.
839        linenum += 1
840
841    # Add the bullet token.
842    tokens.append(Token(Token.BULLET, start, bcontents, bullet_indent,
843                        inline=True))
844
845    # Add the paragraph token.
846    pcontents = ([lines[start][para_start:].strip()] +
847                 [line.strip() for line in lines[start+1:linenum]])
848    pcontents = ' '.join(pcontents).strip()
849    if pcontents:
850        tokens.append(Token(Token.PARA, start, pcontents, para_indent,
851                            inline=True))
852
853    # Return the linenum after the paragraph token ends.
854    return linenum
855
856def _tokenize_para(lines, start, para_indent, tokens, errors):
857    """
858    Construct a L{Token} containing the paragraph starting at
859    C{lines[start]}, and append it to C{tokens}.  C{para_indent}
860    should be the indentation of the paragraph .  Any errors
861    generated while tokenizing the paragraph will be appended to
862    C{errors}.
863
864    @param lines: The list of lines to be tokenized
865    @param start: The index into C{lines} of the first line of the
866        paragraph to be tokenized.
867    @param para_indent: The indentation of C{lines[start]}.  This is
868        the indentation of the paragraph.
869    @param errors: A list of the errors generated by parsing.  Any
870        new errors generated while will tokenizing this paragraph
871        will be appended to this list.
872    @return: The line number of the first line following the
873        paragraph.
874
875    @type lines: C{list} of C{string}
876    @type start: C{int}
877    @type para_indent: C{int}
878    @type tokens: C{list} of L{Token}
879    @type errors: C{list} of L{ParseError}
880    @rtype: C{int}
881    """
882    linenum = start + 1
883    doublecolon = 0
884    while linenum < len(lines):
885        # Find the indentation of this line.
886        line = lines[linenum]
887        indent = len(line) - len(line.lstrip())
888
889        # "::" markers end paragraphs.
890        if doublecolon: break
891        if line.rstrip()[-2:] == '::': doublecolon = 1
892
893        # Blank lines end paragraphs
894        if indent == len(line): break
895
896        # Indentation changes end paragraphs
897        if indent != para_indent: break
898
899        # List bullets end paragraphs
900        if _BULLET_RE.match(line, indent): break
901
902        # Check for mal-formatted field items.
903        if line[indent] == '@':
904            estr = "Possible mal-formatted field item."
905            errors.append(TokenizationError(estr, linenum, is_fatal=0))
906
907        # Go on to the next line.
908        linenum += 1
909
910    contents = [line.strip() for line in lines[start:linenum]]
911
912    # Does this token look like a heading?
913    if ((len(contents) < 2) or
914        (contents[1][0] not in _HEADING_CHARS) or
915        (abs(len(contents[0])-len(contents[1])) > 5)):
916        looks_like_heading = 0
917    else:
918        looks_like_heading = 1
919        for char in contents[1]:
920            if char != contents[1][0]:
921                looks_like_heading = 0
922                break
923
924    if looks_like_heading:
925        if len(contents[0]) != len(contents[1]):
926            estr = ("Possible heading typo: the number of "+
927                    "underline characters must match the "+
928                    "number of heading characters.")
929            errors.append(TokenizationError(estr, start, is_fatal=0))
930        else:
931            level = _HEADING_CHARS.index(contents[1][0])
932            tokens.append(Token(Token.HEADING, start,
933                                contents[0], para_indent, level))
934            return start+2
935
936    # Add the paragraph token, and return the linenum after it ends.
937    contents = ' '.join(contents)
938    tokens.append(Token(Token.PARA, start, contents, para_indent))
939    return linenum
940
941def _tokenize(str, errors):
942    """
943    Split a given formatted docstring into an ordered list of
944    C{Token}s, according to the epytext markup rules.
945
946    @param str: The epytext string
947    @type str: C{string}
948    @param errors: A list where any errors generated during parsing
949        will be stored.  If no list is specified, then errors will
950        generate exceptions.
951    @type errors: C{list} of L{ParseError}
952    @return: a list of the C{Token}s that make up the given string.
953    @rtype: C{list} of L{Token}
954    """
955    tokens = []
956    lines = str.split('\n')
957
958    # Scan through the lines, determining what @type of token we're
959    # dealing with, and tokenizing it, as appropriate.
960    linenum = 0
961    while linenum < len(lines):
962        # Get the current line and its indentation.
963        line = lines[linenum]
964        indent = len(line)-len(line.lstrip())
965
966        if indent == len(line):
967            # Ignore blank lines.
968            linenum += 1
969            continue
970        elif line[indent:indent+4] == '>>> ':
971            # blocks starting with ">>> " are doctest block tokens.
972            linenum = _tokenize_doctest(lines, linenum, indent,
973                                        tokens, errors)
974        elif _BULLET_RE.match(line, indent):
975            # blocks starting with a bullet are LI start tokens.
976            linenum = _tokenize_listart(lines, linenum, indent,
977                                        tokens, errors)
978            if tokens[-1].indent != None:
979                indent = tokens[-1].indent
980        else:
981            # Check for mal-formatted field items.
982            if line[indent] == '@':
983                estr = "Possible mal-formatted field item."
984                errors.append(TokenizationError(estr, linenum, is_fatal=0))
985
986            # anything else is either a paragraph or a heading.
987            linenum = _tokenize_para(lines, linenum, indent, tokens, errors)
988
989        # Paragraph tokens ending in '::' initiate literal blocks.
990        if (tokens[-1].tag == Token.PARA and
991            tokens[-1].contents[-2:] == '::'):
992            tokens[-1].contents = tokens[-1].contents[:-1]
993            linenum = _tokenize_literal(lines, linenum, indent, tokens, errors)
994
995    return tokens
996
997
998##################################################
999## Inline markup ("colorizing")
1000##################################################
1001
1002# Assorted regular expressions used for colorizing.
1003_BRACE_RE = re.compile('{|}')
1004_TARGET_RE = re.compile('^(.*?)\s*<(?:URI:|URL:)?([^<>]+)>$')
1005
1006def _colorize(doc, token, errors, tagName='para'):
1007    """
1008    Given a string containing the contents of a paragraph, produce a
1009    DOM C{Element} encoding that paragraph.  Colorized regions are
1010    represented using DOM C{Element}s, and text is represented using
1011    DOM C{Text}s.
1012
1013    @param errors: A list of errors.  Any newly generated errors will
1014        be appended to this list.
1015    @type errors: C{list} of C{string}
1016
1017    @param tagName: The element tag for the DOM C{Element} that should
1018        be generated.
1019    @type tagName: C{string}
1020
1021    @return: a DOM C{Element} encoding the given paragraph.
1022    @returntype: C{Element}
1023    """
1024    str = token.contents
1025    linenum = 0
1026
1027    # Maintain a stack of DOM elements, containing the ancestors of
1028    # the text currently being analyzed.  New elements are pushed when
1029    # "{" is encountered, and old elements are popped when "}" is
1030    # encountered.
1031    stack = [Element(tagName)]
1032
1033    # This is just used to make error-reporting friendlier.  It's a
1034    # stack parallel to "stack" containing the index of each element's
1035    # open brace.
1036    openbrace_stack = [0]
1037
1038    # Process the string, scanning for '{' and '}'s.  start is the
1039    # index of the first unprocessed character.  Each time through the
1040    # loop, we process the text from the first unprocessed character
1041    # to the next open or close brace.
1042    start = 0
1043    while 1:
1044        match = _BRACE_RE.search(str, start)
1045        if match == None: break
1046        end = match.start()
1047
1048        # Open braces start new colorizing elements.  When preceeded
1049        # by a capital letter, they specify a colored region, as
1050        # defined by the _COLORIZING_TAGS dictionary.  Otherwise,
1051        # use a special "literal braces" element (with tag "litbrace"),
1052        # and convert them to literal braces once we find the matching
1053        # close-brace.
1054        if match.group() == '{':
1055            if (end>0) and 'A' <= str[end-1] <= 'Z':
1056                if (end-1) > start:
1057                    stack[-1].children.append(str[start:end-1])
1058                if str[end-1] not in _COLORIZING_TAGS:
1059                    estr = "Unknown inline markup tag."
1060                    errors.append(ColorizingError(estr, token, end-1))
1061                    stack.append(Element('unknown'))
1062                else:
1063                    tag = _COLORIZING_TAGS[str[end-1]]
1064                    stack.append(Element(tag))
1065            else:
1066                if end > start:
1067                    stack[-1].children.append(str[start:end])
1068                stack.append(Element('litbrace'))
1069            openbrace_stack.append(end)
1070            stack[-2].children.append(stack[-1])
1071
1072        # Close braces end colorizing elements.
1073        elif match.group() == '}':
1074            # Check for (and ignore) unbalanced braces.
1075            if len(stack) <= 1:
1076                estr = "Unbalanced '}'."
1077                errors.append(ColorizingError(estr, token, end))
1078                start = end + 1
1079                continue
1080
1081            # Add any remaining text.
1082            if end > start:
1083                stack[-1].children.append(str[start:end])
1084
1085            # Special handling for symbols:
1086            if stack[-1].tag == 'symbol':
1087                if (len(stack[-1].children) != 1 or
1088                    not isinstance(stack[-1].children[0], basestring)):
1089                    estr = "Invalid symbol code."
1090                    errors.append(ColorizingError(estr, token, end))
1091                else:
1092                    symb = stack[-1].children[0]
1093                    if symb in _SYMBOLS:
1094                        # It's a symbol
1095                        stack[-2].children[-1] = Element('symbol', symb)
1096                    else:
1097                        estr = "Invalid symbol code."
1098                        errors.append(ColorizingError(estr, token, end))
1099
1100            # Special handling for escape elements:
1101            if stack[-1].tag == 'escape':
1102                if (len(stack[-1].children) != 1 or
1103                    not isinstance(stack[-1].children[0], basestring)):
1104                    estr = "Invalid escape code."
1105                    errors.append(ColorizingError(estr, token, end))
1106                else:
1107                    escp = stack[-1].children[0]
1108                    if escp in _ESCAPES:
1109                        # It's an escape from _ESCPAES
1110                        stack[-2].children[-1] = _ESCAPES[escp]
1111                    elif len(escp) == 1:
1112                        # It's a single-character escape (eg E{.})
1113                        stack[-2].children[-1] = escp
1114                    else:
1115                        estr = "Invalid escape code."
1116                        errors.append(ColorizingError(estr, token, end))
1117
1118            # Special handling for literal braces elements:
1119            if stack[-1].tag == 'litbrace':
1120                stack[-2].children[-1:] = ['{'] + stack[-1].children + ['}']
1121
1122            # Special handling for graphs:
1123            if stack[-1].tag == 'graph':
1124                _colorize_graph(doc, stack[-1], token, end, errors)
1125
1126            # Special handling for link-type elements:
1127            if stack[-1].tag in _LINK_COLORIZING_TAGS:
1128                _colorize_link(doc, stack[-1], token, end, errors)
1129
1130            # Pop the completed element.
1131            openbrace_stack.pop()
1132            stack.pop()
1133
1134        start = end+1
1135
1136    # Add any final text.
1137    if start < len(str):
1138        stack[-1].children.append(str[start:])
1139
1140    if len(stack) != 1:
1141        estr = "Unbalanced '{'."
1142        errors.append(ColorizingError(estr, token, openbrace_stack[-1]))
1143
1144    return stack[0]
1145
1146GRAPH_TYPES = ['classtree', 'packagetree', 'importgraph', 'callgraph']
1147
1148def _colorize_graph(doc, graph, token, end, errors):
1149    """
1150    Eg::
1151      G{classtree}
1152      G{classtree x, y, z}
1153      G{importgraph}
1154    """
1155    bad_graph_spec = False
1156
1157    children = graph.children[:]
1158    graph.children = []
1159
1160    if len(children) != 1 or not isinstance(children[0], basestring):
1161        bad_graph_spec = "Bad graph specification"
1162    else:
1163        pieces = children[0].split(None, 1)
1164        graphtype = pieces[0].replace(':','').strip().lower()
1165        if graphtype in GRAPH_TYPES:
1166            if len(pieces) == 2:
1167                if re.match(r'\s*:?\s*([\w\.]+\s*,?\s*)*', pieces[1]):
1168                    args = pieces[1].replace(',', ' ').replace(':','').split()
1169                else:
1170                    bad_graph_spec = "Bad graph arg list"
1171            else:
1172                args = []
1173        else:
1174            bad_graph_spec = ("Bad graph type %s -- use one of %s" %
1175                              (pieces[0], ', '.join(GRAPH_TYPES)))
1176
1177    if bad_graph_spec:
1178        errors.append(ColorizingError(bad_graph_spec, token, end))
1179        graph.children.append('none')
1180        graph.children.append('')
1181        return
1182
1183    graph.children.append(graphtype)
1184    for arg in args:
1185        graph.children.append(arg)
1186
1187def _colorize_link(doc, link, token, end, errors):
1188    variables = link.children[:]
1189
1190    # If the last child isn't text, we know it's bad.
1191    if len(variables)==0 or not isinstance(variables[-1], basestring):
1192        estr = "Bad %s target." % link.tag
1193        errors.append(ColorizingError(estr, token, end))
1194        return
1195
1196    # Did they provide an explicit target?
1197    match2 = _TARGET_RE.match(variables[-1])
1198    if match2:
1199        (text, target) = match2.groups()
1200        variables[-1] = text
1201    # Can we extract an implicit target?
1202    elif len(variables) == 1:
1203        target = variables[0]
1204    else:
1205        estr = "Bad %s target." % link.tag
1206        errors.append(ColorizingError(estr, token, end))
1207        return
1208
1209    # Construct the name element.
1210    name_elt = Element('name', *variables)
1211
1212    # Clean up the target.  For URIs, assume http or mailto if they
1213    # don't specify (no relative urls)
1214    target = re.sub(r'\s', '', target)
1215    if link.tag=='uri':
1216        if not re.match(r'\w+:', target):
1217            if re.match(r'\w+@(\w+)(\.\w+)*', target):
1218                target = 'mailto:' + target
1219            else:
1220                target = 'http://'+target
1221    elif link.tag=='link':
1222        # Remove arg lists for functions (e.g., L{_colorize_link()})
1223        target = re.sub(r'\(.*\)$', '', target)
1224        if not re.match(r'^[a-zA-Z_]\w*(\.[a-zA-Z_]\w*)*$', target):
1225            estr = "Bad link target."
1226            errors.append(ColorizingError(estr, token, end))
1227            return
1228
1229    # Construct the target element.
1230    target_elt = Element('target', target)
1231
1232    # Add them to the link element.
1233    link.children = [name_elt, target_elt]
1234
1235##################################################
1236## Formatters
1237##################################################
1238
1239def to_epytext(tree, indent=0, seclevel=0):
1240    """
1241    Convert a DOM document encoding epytext back to an epytext string.
1242    This is the inverse operation from L{parse}.  I.e., assuming there
1243    are no errors, the following is true:
1244        - C{parse(to_epytext(tree)) == tree}
1245
1246    The inverse is true, except that whitespace, line wrapping, and
1247    character escaping may be done differently.
1248        - C{to_epytext(parse(str)) == str} (approximately)
1249
1250    @param tree: A DOM document encoding of an epytext string.
1251    @type tree: C{Element}
1252    @param indent: The indentation for the string representation of
1253        C{tree}.  Each line of the returned string will begin with
1254        C{indent} space characters.
1255    @type indent: C{int}
1256    @param seclevel: The section level that C{tree} appears at.  This
1257        is used to generate section headings.
1258    @type seclevel: C{int}
1259    @return: The epytext string corresponding to C{tree}.
1260    @rtype: C{string}
1261    """
1262    if isinstance(tree, basestring):
1263        str = re.sub(r'\{', '\0', tree)
1264        str = re.sub(r'\}', '\1', str)
1265        return str
1266
1267    if tree.tag == 'epytext': indent -= 2
1268    if tree.tag == 'section': seclevel += 1
1269    variables = [to_epytext(c, indent+2, seclevel) for c in tree.children]
1270    childstr = ''.join(variables)
1271
1272    # Clean up for literal blocks (add the double "::" back)
1273    childstr = re.sub(':(\s*)\2', '::\\1', childstr)
1274
1275    if tree.tag == 'para':
1276        str = wordwrap(childstr, indent)+'\n'
1277        str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str)
1278        str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str)
1279        str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str)
1280        str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str)
1281        str = re.sub('\0', 'E{lb}', str)
1282        str = re.sub('\1', 'E{rb}', str)
1283        return str
1284    elif tree.tag == 'li':
1285        bullet = tree.attribs.get('bullet') or '-'
1286        return indent*' '+ bullet + ' ' + childstr.lstrip()
1287    elif tree.tag == 'heading':
1288        str = re.sub('\0', 'E{lb}',childstr)
1289        str = re.sub('\1', 'E{rb}', str)
1290        uline = len(childstr)*_HEADING_CHARS[seclevel-1]
1291        return (indent-2)*' ' + str + '\n' + (indent-2)*' '+uline+'\n'
1292    elif tree.tag == 'doctestblock':
1293        str = re.sub('\0', '{', childstr)
1294        str = re.sub('\1', '}', str)
1295        lines = ['  '+indent*' '+line for line in str.split('\n')]
1296        return '\n'.join(lines) + '\n\n'
1297    elif tree.tag == 'literalblock':
1298        str = re.sub('\0', '{', childstr)
1299        str = re.sub('\1', '}', str)
1300        lines = [(indent+1)*' '+line for line in str.split('\n')]
1301        return '\2' + '\n'.join(lines) + '\n\n'
1302    elif tree.tag == 'field':
1303        numargs = 0
1304        while tree.children[numargs+1].tag == 'arg': numargs += 1
1305        tag = variables[0]
1306        args = variables[1:1+numargs]
1307        body = variables[1+numargs:]
1308        str = (indent)*' '+'@'+variables[0]
1309        if args: str += '(' + ', '.join(args) + ')'
1310        return str + ':\n' + ''.join(body)
1311    elif tree.tag == 'target':
1312        return '<%s>' % childstr
1313    elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext',
1314                          'section', 'olist', 'ulist', 'name'):
1315        return childstr
1316    elif tree.tag == 'symbol':
1317        return 'E{%s}' % childstr
1318    elif tree.tag == 'graph':
1319        return 'G{%s}' % ' '.join(variables)
1320    else:
1321        for (tag, name) in _COLORIZING_TAGS.items():
1322            if name == tree.tag:
1323                return '%s{%s}' % (tag, childstr)
1324    raise ValueError('Unknown DOM element %r' % tree.tag)
1325
1326SYMBOL_TO_PLAINTEXT = {
1327    'crarr': '\\',
1328    }
1329
1330def to_plaintext(tree, indent=0, seclevel=0):
1331    """
1332    Convert a DOM document encoding epytext to a string representation.
1333    This representation is similar to the string generated by
1334    C{to_epytext}, but C{to_plaintext} removes inline markup, prints
1335    escaped characters in unescaped form, etc.
1336
1337    @param tree: A DOM document encoding of an epytext string.
1338    @type tree: C{Element}
1339    @param indent: The indentation for the string representation of
1340        C{tree}.  Each line of the returned string will begin with
1341        C{indent} space characters.
1342    @type indent: C{int}
1343    @param seclevel: The section level that C{tree} appears at.  This
1344        is used to generate section headings.
1345    @type seclevel: C{int}
1346    @return: The epytext string corresponding to C{tree}.
1347    @rtype: C{string}
1348    """
1349    if isinstance(tree, basestring): return tree
1350
1351    if tree.tag == 'section': seclevel += 1
1352
1353    # Figure out the child indent level.
1354    if tree.tag == 'epytext': cindent = indent
1355    elif tree.tag == 'li' and tree.attribs.get('bullet'):
1356        cindent = indent + 1 + len(tree.attribs.get('bullet'))
1357    else:
1358        cindent = indent + 2
1359    variables = [to_plaintext(c, cindent, seclevel) for c in tree.children]
1360    childstr = ''.join(variables)
1361
1362    if tree.tag == 'para':
1363        return wordwrap(childstr, indent)+'\n'
1364    elif tree.tag == 'li':
1365        # We should be able to use getAttribute here; but there's no
1366        # convenient way to test if an element has an attribute..
1367        bullet = tree.attribs.get('bullet') or '-'
1368        return indent*' ' + bullet + ' ' + childstr.lstrip()
1369    elif tree.tag == 'heading':
1370        uline = len(childstr)*_HEADING_CHARS[seclevel-1]
1371        return ((indent-2)*' ' + childstr + '\n' +
1372                (indent-2)*' ' + uline + '\n')
1373    elif tree.tag == 'doctestblock':
1374        lines = [(indent+2)*' '+line for line in childstr.split('\n')]
1375        return '\n'.join(lines) + '\n\n'
1376    elif tree.tag == 'literalblock':
1377        lines = [(indent+1)*' '+line for line in childstr.split('\n')]
1378        return '\n'.join(lines) + '\n\n'
1379    elif tree.tag == 'fieldlist':
1380        return childstr
1381    elif tree.tag == 'field':
1382        numargs = 0
1383        while tree.children[numargs+1].tag == 'arg': numargs += 1
1384        tag = variables[0]
1385        args = variables[1:1+numargs]
1386        body = variables[1+numargs:]
1387        str = (indent)*' '+'@'+variables[0]
1388        if args: str += '(' + ', '.join(args) + ')'
1389        return str + ':\n' + ''.join(body)
1390    elif tree.tag == 'uri':
1391        if len(variables) != 2: raise ValueError('Bad URI ')
1392        elif variables[0] == variables[1]: return '<%s>' % variables[1]
1393        else: return '%r<%s>' % (variables[0], variables[1])
1394    elif tree.tag == 'link':
1395        if len(variables) != 2: raise ValueError('Bad Link')
1396        return '%s' % variables[0]
1397    elif tree.tag in ('olist', 'ulist'):
1398        # [xx] always use condensed lists.
1399        ## Use a condensed list if each list item is 1 line long.
1400        #for child in variables:
1401        #    if child.count('\n') > 2: return childstr
1402        return childstr.replace('\n\n', '\n')+'\n'
1403    elif tree.tag == 'symbol':
1404        return '%s' % SYMBOL_TO_PLAINTEXT.get(childstr, childstr)
1405    elif tree.tag == 'graph':
1406        return '<<%s graph: %s>>' % (variables[0], ', '.join(variables[1:]))
1407    else:
1408        # Assume that anything else can be passed through.
1409        return childstr
1410
1411def to_debug(tree, indent=4, seclevel=0):
1412    """
1413    Convert a DOM document encoding epytext back to an epytext string,
1414    annotated with extra debugging information.  This function is
1415    similar to L{to_epytext}, but it adds explicit information about
1416    where different blocks begin, along the left margin.
1417
1418    @param tree: A DOM document encoding of an epytext string.
1419    @type tree: C{Element}
1420    @param indent: The indentation for the string representation of
1421        C{tree}.  Each line of the returned string will begin with
1422        C{indent} space characters.
1423    @type indent: C{int}
1424    @param seclevel: The section level that C{tree} appears at.  This
1425        is used to generate section headings.
1426    @type seclevel: C{int}
1427    @return: The epytext string corresponding to C{tree}.
1428    @rtype: C{string}
1429    """
1430    if isinstance(tree, basestring):
1431        str = re.sub(r'\{', '\0', tree)
1432        str = re.sub(r'\}', '\1', str)
1433        return str
1434
1435    if tree.tag == 'section': seclevel += 1
1436    variables = [to_debug(c, indent+2, seclevel) for c in tree.children]
1437    childstr = ''.join(variables)
1438
1439    # Clean up for literal blocks (add the double "::" back)
1440    childstr = re.sub(':( *\n     \|\n)\2', '::\\1', childstr)
1441
1442    if tree.tag == 'para':
1443        str = wordwrap(childstr, indent-6, 69)+'\n'
1444        str = re.sub(r'((^|\n)\s*\d+)\.', r'\1E{.}', str)
1445        str = re.sub(r'((^|\n)\s*)-', r'\1E{-}', str)
1446        str = re.sub(r'((^|\n)\s*)@', r'\1E{@}', str)
1447        str = re.sub(r'::(\s*($|\n))', r'E{:}E{:}\1', str)
1448        str = re.sub('\0', 'E{lb}', str)
1449        str = re.sub('\1', 'E{rb}', str)
1450        lines = str.rstrip().split('\n')
1451        lines[0] = '   P>|' + lines[0]
1452        lines[1:] = ['     |'+l for l in lines[1:]]
1453        return '\n'.join(lines)+'\n     |\n'
1454    elif tree.tag == 'li':
1455        bullet = tree.attribs.get('bullet') or '-'
1456        return '  LI>|'+ (indent-6)*' '+ bullet + ' ' + childstr[6:].lstrip()
1457    elif tree.tag in ('olist', 'ulist'):
1458        return 'LIST>|'+(indent-4)*' '+childstr[indent+2:]
1459    elif tree.tag == 'heading':
1460        str = re.sub('\0', 'E{lb}', childstr)
1461        str = re.sub('\1', 'E{rb}', str)
1462        uline = len(childstr)*_HEADING_CHARS[seclevel-1]
1463        return ('SEC'+`seclevel`+'>|'+(indent-8)*' ' + str + '\n' +
1464                '     |'+(indent-8)*' ' + uline + '\n')
1465    elif tree.tag == 'doctestblock':
1466        str = re.sub('\0', '{', childstr)
1467        str = re.sub('\1', '}', str)
1468        lines = ['     |'+(indent-4)*' '+line for line in str.split('\n')]
1469        lines[0] = 'DTST>'+lines[0][5:]
1470        return '\n'.join(lines) + '\n     |\n'
1471    elif tree.tag == 'literalblock':
1472        str = re.sub('\0', '{', childstr)
1473        str = re.sub('\1', '}', str)
1474        lines = ['     |'+(indent-5)*' '+line for line in str.split('\n')]
1475        lines[0] = ' LIT>'+lines[0][5:]
1476        return '\2' + '\n'.join(lines) + '\n     |\n'
1477    elif tree.tag == 'field':
1478        numargs = 0
1479        while tree.children[numargs+1].tag == 'arg': numargs += 1
1480        tag = variables[0]
1481        args = variables[1:1+numargs]
1482        body = variables[1+numargs:]
1483        str = ' FLD>|'+(indent-6)*' '+'@'+variables[0]
1484        if args: str += '(' + ', '.join(args) + ')'
1485        return str + ':\n' + ''.join(body)
1486    elif tree.tag == 'target':
1487        return '<%s>' % childstr
1488    elif tree.tag in ('fieldlist', 'tag', 'arg', 'epytext',
1489                          'section', 'olist', 'ulist', 'name'):
1490        return childstr
1491    elif tree.tag == 'symbol':
1492        return 'E{%s}' % childstr
1493    elif tree.tag == 'graph':
1494        return 'G{%s}' % ' '.join(variables)
1495    else:
1496        for (tag, name) in _COLORIZING_TAGS.items():
1497            if name == tree.tag:
1498                return '%s{%s}' % (tag, childstr)
1499    raise ValueError('Unknown DOM element %r' % tree.tag)
1500
1501##################################################
1502## Top-Level Wrapper function
1503##################################################
1504def pparse(str, show_warnings=1, show_errors=1, stream=sys.stderr):
1505    """
1506    Pretty-parse the string.  This parses the string, and catches any
1507    warnings or errors produced.  Any warnings and errors are
1508    displayed, and the resulting DOM parse structure is returned.
1509
1510    @param str: The string to parse.
1511    @type str: C{string}
1512    @param show_warnings: Whether or not to display non-fatal errors
1513        generated by parsing C{str}.
1514    @type show_warnings: C{boolean}
1515    @param show_errors: Whether or not to display fatal errors
1516        generated by parsing C{str}.
1517    @type show_errors: C{boolean}
1518    @param stream: The stream that warnings and errors should be
1519        written to.
1520    @type stream: C{stream}
1521    @return: a DOM document encoding the contents of C{str}.
1522    @rtype: C{Element}
1523    @raise SyntaxError: If any fatal errors were encountered.
1524    """
1525    errors = []
1526    confused = 0
1527    try:
1528        val = parse(str, errors)
1529        warnings = [e for e in errors if not e.is_fatal()]
1530        errors = [e for e in errors if e.is_fatal()]
1531    except:
1532        confused = 1
1533
1534    if not show_warnings: warnings = []
1535    warnings.sort()
1536    errors.sort()
1537    if warnings:
1538        print >>stream, '='*SCRWIDTH
1539        print >>stream, "WARNINGS"
1540        print >>stream, '-'*SCRWIDTH
1541        for warning in warnings:
1542            print >>stream, warning.as_warning()
1543        print >>stream, '='*SCRWIDTH
1544    if errors and show_errors:
1545        if not warnings: print >>stream, '='*SCRWIDTH
1546        print >>stream, "ERRORS"
1547        print >>stream, '-'*SCRWIDTH
1548        for error in errors:
1549            print >>stream, error
1550        print >>stream, '='*SCRWIDTH
1551
1552    if confused: raise
1553    elif errors: raise SyntaxError('Encountered Errors')
1554    else: return val
1555
1556##################################################
1557## Parse Errors
1558##################################################
1559
1560class TokenizationError(ParseError):
1561    """
1562    An error generated while tokenizing a formatted documentation
1563    string.
1564    """
1565
1566class StructuringError(ParseError):
1567    """
1568    An error generated while structuring a formatted documentation
1569    string.
1570    """
1571
1572class ColorizingError(ParseError):
1573    """
1574    An error generated while colorizing a paragraph.
1575    """
1576    def __init__(self, descr, token, charnum, is_fatal=1):
1577        """
1578        Construct a new colorizing exception.
1579
1580        @param descr: A short description of the error.
1581        @type descr: C{string}
1582        @param token: The token where the error occured
1583        @type token: L{Token}
1584        @param charnum: The character index of the position in
1585            C{token} where the error occured.
1586        @type charnum: C{int}
1587        """
1588        ParseError.__init__(self, descr, token.startline, is_fatal)
1589        self.token = token
1590        self.charnum = charnum
1591
1592    CONTEXT_RANGE = 20
1593    def descr(self):
1594        RANGE = self.CONTEXT_RANGE
1595        if self.charnum <= RANGE:
1596            left = self.token.contents[0:self.charnum]
1597        else:
1598            left = '...'+self.token.contents[self.charnum-RANGE:self.charnum]
1599        if (len(self.token.contents)-self.charnum) <= RANGE:
1600            right = self.token.contents[self.charnum:]
1601        else:
1602            right = (self.token.contents[self.charnum:self.charnum+RANGE]
1603                     + '...')
1604        return ('%s\n\n%s%s\n%s^' % (self._descr, left, right, ' '*len(left)))
1605
1606##################################################
1607## Convenience parsers
1608##################################################
1609
1610def parse_as_literal(str):
1611    """
1612    Return a DOM document matching the epytext DTD, containing a
1613    single literal block.  That literal block will include the
1614    contents of the given string.  This method is typically used as a
1615    fall-back when the parser fails.
1616
1617    @param str: The string which should be enclosed in a literal
1618        block.
1619    @type str: C{string}
1620
1621    @return: A DOM document containing C{str} in a single literal
1622        block.
1623    @rtype: C{Element}
1624    """
1625    return Element('epytext', Element('literalblock', str))
1626
1627def parse_as_para(str):
1628    """
1629    Return a DOM document matching the epytext DTD, containing a
1630    single paragraph.  That paragraph will include the contents of the
1631    given string.  This can be used to wrap some forms of
1632    automatically generated information (such as type names) in
1633    paragraphs.
1634
1635    @param str: The string which should be enclosed in a paragraph.
1636    @type str: C{string}
1637
1638    @return: A DOM document containing C{str} in a single paragraph.
1639    @rtype: C{Element}
1640    """
1641    return Element('epytext', Element('para', str))
1642
1643#################################################################
1644##                    SUPPORT FOR EPYDOC
1645#################################################################
1646
1647def parse_docstring(docstring, errors, **options):
1648    """
1649    Parse the given docstring, which is formatted using epytext; and
1650    return a C{ParsedDocstring} representation of its contents.
1651    @param docstring: The docstring to parse
1652    @type docstring: C{string}
1653    @param errors: A list where any errors generated during parsing
1654        will be stored.
1655    @type errors: C{list} of L{ParseError}
1656    @param options: Extra options.  Unknown options are ignored.
1657        Currently, no extra options are defined.
1658    @rtype: L{ParsedDocstring}
1659    """
1660    return ParsedEpytextDocstring(parse(docstring, errors), **options)
1661
1662class ParsedEpytextDocstring(ParsedDocstring):
1663    SYMBOL_TO_HTML = {
1664        # Symbols
1665        '<-': '&larr;', '->': '&rarr;', '^': '&uarr;', 'v': '&darr;',
1666
1667        # Greek letters
1668        'alpha': '&alpha;', 'beta': '&beta;', 'gamma': '&gamma;',
1669        'delta': '&delta;', 'epsilon': '&epsilon;', 'zeta': '&zeta;',
1670        'eta': '&eta;', 'theta': '&theta;', 'iota': '&iota;',
1671        'kappa': '&kappa;', 'lambda': '&lambda;', 'mu': '&mu;',
1672        'nu': '&nu;', 'xi': '&xi;', 'omicron': '&omicron;',
1673        'pi': '&pi;', 'rho': '&rho;', 'sigma': '&sigma;',
1674        'tau': '&tau;', 'upsilon': '&upsilon;', 'phi': '&phi;',
1675        'chi': '&chi;', 'psi': '&psi;', 'omega': '&omega;',
1676        'Alpha': '&Alpha;', 'Beta': '&Beta;', 'Gamma': '&Gamma;',
1677        'Delta': '&Delta;', 'Epsilon': '&Epsilon;', 'Zeta': '&Zeta;',
1678        'Eta': '&Eta;', 'Theta': '&Theta;', 'Iota': '&Iota;',
1679        'Kappa': '&Kappa;', 'Lambda': '&Lambda;', 'Mu': '&Mu;',
1680        'Nu': '&Nu;', 'Xi': '&Xi;', 'Omicron': '&Omicron;',
1681        'Pi': '&Pi;', 'Rho': '&Rho;', 'Sigma': '&Sigma;',
1682        'Tau': '&Tau;', 'Upsilon': '&Upsilon;', 'Phi': '&Phi;',
1683        'Chi': '&Chi;', 'Psi': '&Psi;', 'Omega': '&Omega;',
1684
1685        # HTML character entities
1686        'larr': '&larr;', 'rarr': '&rarr;', 'uarr': '&uarr;',
1687        'darr': '&darr;', 'harr': '&harr;', 'crarr': '&crarr;',
1688        'lArr': '&lArr;', 'rArr': '&rArr;', 'uArr': '&uArr;',
1689        'dArr': '&dArr;', 'hArr': '&hArr;',
1690        'copy': '&copy;', 'times': '&times;', 'forall': '&forall;',
1691        'exist': '&exist;', 'part': '&part;',
1692        'empty': '&empty;', 'isin': '&isin;', 'notin': '&notin;',
1693        'ni': '&ni;', 'prod': '&prod;', 'sum': '&sum;',
1694        'prop': '&prop;', 'infin': '&infin;', 'ang': '&ang;',
1695        'and': '&and;', 'or': '&or;', 'cap': '&cap;', 'cup': '&cup;',
1696        'int': '&int;', 'there4': '&there4;', 'sim': '&sim;',
1697        'cong': '&cong;', 'asymp': '&asymp;', 'ne': '&ne;',
1698        'equiv': '&equiv;', 'le': '&le;', 'ge': '&ge;',
1699        'sub': '&sub;', 'sup': '&sup;', 'nsub': '&nsub;',
1700        'sube': '&sube;', 'supe': '&supe;', 'oplus': '&oplus;',
1701        'otimes': '&otimes;', 'perp': '&perp;',
1702
1703        # Alternate (long) names
1704        'infinity': '&infin;', 'integral': '&int;', 'product': '&prod;',
1705        '<=': '&le;', '>=': '&ge;',
1706        }
1707
1708    SYMBOL_TO_LATEX = {
1709        # Symbols
1710        '<-': r'\(\leftarrow\)', '->': r'\(\rightarrow\)',
1711        '^': r'\(\uparrow\)', 'v': r'\(\downarrow\)',
1712
1713        # Greek letters (use lower case when upcase not available)
1714
1715        'alpha': r'\(\alpha\)', 'beta': r'\(\beta\)', 'gamma':
1716        r'\(\gamma\)', 'delta': r'\(\delta\)', 'epsilon':
1717        r'\(\epsilon\)', 'zeta': r'\(\zeta\)', 'eta': r'\(\eta\)',
1718        'theta': r'\(\theta\)', 'iota': r'\(\iota\)', 'kappa':
1719        r'\(\kappa\)', 'lambda': r'\(\lambda\)', 'mu': r'\(\mu\)',
1720        'nu': r'\(\nu\)', 'xi': r'\(\xi\)', 'omicron': r'\(o\)', 'pi':
1721        r'\(\pi\)', 'rho': r'\(\rho\)', 'sigma': r'\(\sigma\)', 'tau':
1722        r'\(\tau\)', 'upsilon': r'\(\upsilon\)', 'phi': r'\(\phi\)',
1723        'chi': r'\(\chi\)', 'psi': r'\(\psi\)', 'omega':
1724        r'\(\omega\)',
1725
1726        'Alpha': r'\(\alpha\)', 'Beta': r'\(\beta\)', 'Gamma':
1727        r'\(\Gamma\)', 'Delta': r'\(\Delta\)', 'Epsilon':
1728        r'\(\epsilon\)', 'Zeta': r'\(\zeta\)', 'Eta': r'\(\eta\)',
1729        'Theta': r'\(\Theta\)', 'Iota': r'\(\iota\)', 'Kappa':
1730        r'\(\kappa\)', 'Lambda': r'\(\Lambda\)', 'Mu': r'\(\mu\)',
1731        'Nu': r'\(\nu\)', 'Xi': r'\(\Xi\)', 'Omicron': r'\(o\)', 'Pi':
1732        r'\(\Pi\)', 'ho': r'\(\rho\)', 'Sigma': r'\(\Sigma\)', 'Tau':
1733        r'\(\tau\)', 'Upsilon': r'\(\Upsilon\)', 'Phi': r'\(\Phi\)',
1734        'Chi': r'\(\chi\)', 'Psi': r'\(\Psi\)', 'Omega':
1735        r'\(\Omega\)',
1736
1737        # HTML character entities
1738        'larr': r'\(\leftarrow\)', 'rarr': r'\(\rightarrow\)', 'uarr':
1739        r'\(\uparrow\)', 'darr': r'\(\downarrow\)', 'harr':
1740        r'\(\leftrightarrow\)', 'crarr': r'\(\hookleftarrow\)',
1741        'lArr': r'\(\Leftarrow\)', 'rArr': r'\(\Rightarrow\)', 'uArr':
1742        r'\(\Uparrow\)', 'dArr': r'\(\Downarrow\)', 'hArr':
1743        r'\(\Leftrightarrow\)', 'copy': r'{\textcopyright}',
1744        'times': r'\(\times\)', 'forall': r'\(\forall\)', 'exist':
1745        r'\(\exists\)', 'part': r'\(\partial\)', 'empty':
1746        r'\(\emptyset\)', 'isin': r'\(\in\)', 'notin': r'\(\notin\)',
1747        'ni': r'\(\ni\)', 'prod': r'\(\prod\)', 'sum': r'\(\sum\)',
1748        'prop': r'\(\propto\)', 'infin': r'\(\infty\)', 'ang':
1749        r'\(\angle\)', 'and': r'\(\wedge\)', 'or': r'\(\vee\)', 'cap':
1750        r'\(\cap\)', 'cup': r'\(\cup\)', 'int': r'\(\int\)', 'there4':
1751        r'\(\therefore\)', 'sim': r'\(\sim\)', 'cong': r'\(\cong\)',
1752        'asymp': r'\(\approx\)', 'ne': r'\(\ne\)', 'equiv':
1753        r'\(\equiv\)', 'le': r'\(\le\)', 'ge': r'\(\ge\)', 'sub':
1754        r'\(\subset\)', 'sup': r'\(\supset\)', 'nsub': r'\(\supset\)',
1755        'sube': r'\(\subseteq\)', 'supe': r'\(\supseteq\)', 'oplus':
1756        r'\(\oplus\)', 'otimes': r'\(\otimes\)', 'perp': r'\(\perp\)',
1757
1758        # Alternate (long) names
1759        'infinity': r'\(\infty\)', 'integral': r'\(\int\)', 'product':
1760        r'\(\prod\)', '<=': r'\(\le\)', '>=': r'\(\ge\)',
1761        }
1762
1763    def __init__(self, dom_tree, **options):
1764        self._tree = dom_tree
1765        # Caching:
1766        self._html = self._latex = self._plaintext = None
1767        self._terms = None
1768        # inline option -- mark top-level children as inline.
1769        if options.get('inline') and self._tree is not None:
1770            for elt in self._tree.children:
1771                elt.attribs['inline'] = True
1772
1773    def __str__(self):
1774        return str(self._tree)
1775
1776    def to_html(self, docstring_linker, directory=None, docindex=None,
1777                context=None, **options):
1778        if self._html is not None: return self._html
1779        if self._tree is None: return ''
1780        indent = options.get('indent', 0)
1781        self._html = self._to_html(self._tree, docstring_linker, directory,
1782                                   docindex, context, indent)
1783        return self._html
1784
1785    def to_latex(self, docstring_linker, **options):
1786        if self._latex is not None: return self._latex
1787        if self._tree is None: return ''
1788        indent = options.get('indent', 0)
1789        self._hyperref = options.get('hyperref', 1)
1790        self._latex = self._to_latex(self._tree, docstring_linker, indent)
1791        return self._latex
1792
1793    def to_plaintext(self, docstring_linker, **options):
1794        # [XX] don't cache -- different options might be used!!
1795        #if self._plaintext is not None: return self._plaintext
1796        if self._tree is None: return ''
1797        if 'indent' in options:
1798            self._plaintext = to_plaintext(self._tree,
1799                                           indent=options['indent'])
1800        else:
1801            self._plaintext = to_plaintext(self._tree)
1802        return self._plaintext
1803
1804    def _index_term_key(self, tree):
1805        str = to_plaintext(tree)
1806        str = re.sub(r'\s\s+', '-', str)
1807        return "index-"+re.sub("[^a-zA-Z0-9]", "_", str)
1808
1809    def _to_html(self, tree, linker, directory, docindex, context,
1810                 indent=0, seclevel=0):
1811        if isinstance(tree, basestring):
1812            return plaintext_to_html(tree)
1813
1814        if tree.tag == 'epytext': indent -= 2
1815        if tree.tag == 'section': seclevel += 1
1816
1817        # Process the variables first.
1818        variables = [self._to_html(c, linker, directory, docindex, context,
1819                                   indent+2, seclevel)
1820                    for c in tree.children]
1821
1822        # Construct the HTML string for the variables.
1823        childstr = ''.join(variables)
1824
1825        # Perform the approriate action for the DOM tree type.
1826        if tree.tag == 'para':
1827            return wordwrap(
1828                (tree.attribs.get('inline') and '%s' or '<p>%s</p>') % childstr,
1829                indent)
1830        elif tree.tag == 'code':
1831            style = tree.attribs.get('style')
1832            if style:
1833                return '<code class="%s">%s</code>' % (style, childstr)
1834            else:
1835                return '<code>%s</code>' % childstr
1836        elif tree.tag == 'uri':
1837            return ('<a href="%s" target="_top">%s</a>' %
1838                    (variables[1], variables[0]))
1839        elif tree.tag == 'link':
1840            return linker.translate_identifier_xref(variables[1], variables[0])
1841        elif tree.tag == 'italic':
1842            return '<i>%s</i>' % childstr
1843        elif tree.tag == 'math':
1844            return '<i class="math">%s</i>' % childstr
1845        elif tree.tag == 'indexed':
1846            term = Element('epytext', *tree.children, **tree.attribs)
1847            return linker.translate_indexterm(ParsedEpytextDocstring(term))
1848            #term_key = self._index_term_key(tree)
1849            #return linker.translate_indexterm(childstr, term_key)
1850        elif tree.tag == 'bold':
1851            return '<b>%s</b>' % childstr
1852        elif tree.tag == 'ulist':
1853            return '%s<ul>\n%s%s</ul>\n' % (indent*' ', childstr, indent*' ')
1854        elif tree.tag == 'olist':
1855            start = tree.attribs.get('start') or ''
1856            return ('%s<ol start="%s">\n%s%s</ol>\n' %
1857                    (indent*' ', start, childstr, indent*' '))
1858        elif tree.tag == 'li':
1859            return indent*' '+'<li>\n%s%s</li>\n' % (childstr, indent*' ')
1860        elif tree.tag == 'heading':
1861            return ('%s<h%s class="heading">%s</h%s>\n' %
1862                    ((indent-2)*' ', seclevel, childstr, seclevel))
1863        elif tree.tag == 'literalblock':
1864            return '<pre class="literalblock">\n%s\n</pre>\n' % childstr
1865        elif tree.tag == 'doctestblock':
1866            return doctest_to_html(tree.children[0].strip())
1867        elif tree.tag == 'fieldlist':
1868            raise AssertionError("There should not be any field lists left")
1869        elif tree.tag in ('epytext', 'section', 'tag', 'arg',
1870                              'name', 'target', 'html'):
1871            return childstr
1872        elif tree.tag == 'symbol':
1873            symbol = tree.children[0]
1874            return self.SYMBOL_TO_HTML.get(symbol, '[%s]' % symbol)
1875        elif tree.tag == 'graph':
1876            # Generate the graph.
1877            graph = self._build_graph(variables[0], variables[1:], linker,
1878                                      docindex, context)
1879            if not graph: return ''
1880            # Write the graph.
1881            image_url = '%s.gif' % graph.uid
1882            image_file = os.path.join(directory, image_url)
1883            return graph.to_html(image_file, image_url)
1884        else:
1885            raise ValueError('Unknown epytext DOM element %r' % tree.tag)
1886
1887    #GRAPH_TYPES = ['classtree', 'packagetree', 'importgraph']
1888    def _build_graph(self, graph_type, graph_args, linker,
1889                     docindex, context):
1890        # Generate the graph
1891        if graph_type == 'classtree':
1892            from epydoc.apidoc import ClassDoc
1893            if graph_args:
1894                bases = [docindex.find(name, context)
1895                         for name in graph_args]
1896            elif isinstance(context, ClassDoc):
1897                bases = [context]
1898            else:
1899                log.warning("Could not construct class tree: you must "
1900                            "specify one or more base classes.")
1901                return None
1902            from epydoc.docwriter.dotgraph import class_tree_graph
1903            return class_tree_graph(bases, linker, context)
1904        elif graph_type == 'packagetree':
1905            from epydoc.apidoc import ModuleDoc
1906            if graph_args:
1907                packages = [docindex.find(name, context)
1908                            for name in graph_args]
1909            elif isinstance(context, ModuleDoc):
1910                packages = [context]
1911            else:
1912                log.warning("Could not construct package tree: you must "
1913                            "specify one or more root packages.")
1914                return None
1915            from epydoc.docwriter.dotgraph import package_tree_graph
1916            return package_tree_graph(packages, linker, context)
1917        elif graph_type == 'importgraph':
1918            from epydoc.apidoc import ModuleDoc
1919            modules = [d for d in docindex.root if isinstance(d, ModuleDoc)]
1920            from epydoc.docwriter.dotgraph import import_graph
1921            return import_graph(modules, docindex, linker, context)
1922
1923        elif graph_type == 'callgraph':
1924            if graph_args:
1925                docs = [docindex.find(name, context) for name in graph_args]
1926                docs = [doc for doc in docs if doc is not None]
1927            else:
1928                docs = [context]
1929            from epydoc.docwriter.dotgraph import call_graph
1930            return call_graph(docs, docindex, linker, context)
1931        else:
1932            log.warning("Unknown graph type %s" % graph_type)
1933
1934
1935    def _to_latex(self, tree, linker, indent=0, seclevel=0, breakany=0):
1936        if isinstance(tree, basestring):
1937            return plaintext_to_latex(tree, breakany=breakany)
1938
1939        if tree.tag == 'section': seclevel += 1
1940
1941        # Figure out the child indent level.
1942        if tree.tag == 'epytext': cindent = indent
1943        else: cindent = indent + 2
1944        variables = [self._to_latex(c, linker, cindent, seclevel, breakany)
1945                    for c in tree.children]
1946        childstr = ''.join(variables)
1947
1948        if tree.tag == 'para':
1949            return wordwrap(childstr, indent)+'\n'
1950        elif tree.tag == 'code':
1951            return '\\texttt{%s}' % childstr
1952        elif tree.tag == 'uri':
1953            if len(variables) != 2: raise ValueError('Bad URI ')
1954            if self._hyperref:
1955                # ~ and # should not be escaped in the URI.
1956                uri = tree.children[1].children[0]
1957                uri = uri.replace('{\\textasciitilde}', '~')
1958                uri = uri.replace('\\#', '#')
1959                if variables[0] == variables[1]:
1960                    return '\\href{%s}{\\textit{%s}}' % (uri, variables[1])
1961                else:
1962                    return ('%s\\footnote{\\href{%s}{%s}}' %
1963                            (variables[0], uri, variables[1]))
1964            else:
1965                if variables[0] == variables[1]:
1966                    return '\\textit{%s}' % variables[1]
1967                else:
1968                    return '%s\\footnote{%s}' % (variables[0], variables[1])
1969        elif tree.tag == 'link':
1970            if len(variables) != 2: raise ValueError('Bad Link')
1971            return linker.translate_identifier_xref(variables[1], variables[0])
1972        elif tree.tag == 'italic':
1973            return '\\textit{%s}' % childstr
1974        elif tree.tag == 'math':
1975            return '\\textit{%s}' % childstr
1976        elif tree.tag == 'indexed':
1977            term = Element('epytext', *tree.children, **tree.attribs)
1978            return linker.translate_indexterm(ParsedEpytextDocstring(term))
1979        elif tree.tag == 'bold':
1980            return '\\textbf{%s}' % childstr
1981        elif tree.tag == 'li':
1982            return indent*' ' + '\\item ' + childstr.lstrip()
1983        elif tree.tag == 'heading':
1984            return ' '*(indent-2) + '(section) %s\n\n' % childstr
1985        elif tree.tag == 'doctestblock':
1986            return doctest_to_latex(tree.children[0].strip())
1987        elif tree.tag == 'literalblock':
1988            return '\\begin{alltt}\n%s\\end{alltt}\n\n' % childstr
1989        elif tree.tag == 'fieldlist':
1990            return indent*' '+'{omitted fieldlist}\n'
1991        elif tree.tag == 'olist':
1992            return (' '*indent + '\\begin{enumerate}\n\n' +
1993                    ' '*indent + '\\setlength{\\parskip}{0.5ex}\n' +
1994                    childstr +
1995                    ' '*indent + '\\end{enumerate}\n\n')
1996        elif tree.tag == 'ulist':
1997            return (' '*indent + '\\begin{itemize}\n' +
1998                    ' '*indent + '\\setlength{\\parskip}{0.6ex}\n' +
1999                    childstr +
2000                    ' '*indent + '\\end{itemize}\n\n')
2001        elif tree.tag == 'symbol':
2002            symbol = tree.children[0]
2003            return self.SYMBOL_TO_LATEX.get(symbol, '[%s]' % symbol)
2004        elif tree.tag == 'graph':
2005            return '(GRAPH)'
2006            #raise ValueError, 'graph not implemented yet for latex'
2007        else:
2008            # Assume that anything else can be passed through.
2009            return childstr
2010
2011    _SUMMARY_RE = re.compile(r'(\s*[\w\W]*?\.)(\s|$)')
2012
2013    def summary(self):
2014        if self._tree is None: return self, False
2015        tree = self._tree
2016        doc = Element('epytext')
2017
2018        # Find the first paragraph.
2019        variables = tree.children
2020        while (len(variables) > 0) and (variables[0].tag != 'para'):
2021            if variables[0].tag in ('section', 'ulist', 'olist', 'li'):
2022                variables = variables[0].children
2023            else:
2024                variables = variables[1:]
2025
2026        # Special case: if the docstring contains a single literal block,
2027        # then try extracting the summary from it.
2028        if (len(variables) == 0 and len(tree.children) == 1 and
2029            tree.children[0].tag == 'literalblock'):
2030            str = re.split(r'\n\s*(\n|$).*',
2031                           tree.children[0].children[0], 1)[0]
2032            variables = [Element('para')]
2033            variables[0].children.append(str)
2034
2035        # If we didn't find a paragraph, return an empty epytext.
2036        if len(variables) == 0: return ParsedEpytextDocstring(doc), False
2037
2038        # Is there anything else, excluding tags, after the first variable?
2039        long_docs = False
2040        for var in variables[1:]:
2041            if isinstance(var, Element) and var.tag == 'fieldlist':
2042                continue
2043            long_docs = True
2044            break
2045
2046        # Extract the first sentence.
2047        parachildren = variables[0].children
2048        para = Element('para', inline=True)
2049        doc.children.append(para)
2050        for parachild in parachildren:
2051            if isinstance(parachild, basestring):
2052                m = self._SUMMARY_RE.match(parachild)
2053                if m:
2054                    para.children.append(m.group(1))
2055                    long_docs |= parachild is not parachildren[-1]
2056                    if not long_docs:
2057                        other = parachild[m.end():]
2058                        if other and not other.isspace():
2059                            long_docs = True
2060                    return ParsedEpytextDocstring(doc), long_docs
2061            para.children.append(parachild)
2062
2063        return ParsedEpytextDocstring(doc), long_docs
2064
2065    def split_fields(self, errors=None):
2066        if self._tree is None: return (self, ())
2067        tree = Element(self._tree.tag, *self._tree.children,
2068                       **self._tree.attribs)
2069        fields = []
2070
2071        if (tree.children and
2072            tree.children[-1].tag == 'fieldlist' and
2073            tree.children[-1].children):
2074            field_nodes = tree.children[-1].children
2075            del tree.children[-1]
2076
2077            for field in field_nodes:
2078                # Get the tag
2079                tag = field.children[0].children[0].lower()
2080                del field.children[0]
2081
2082                # Get the argument.
2083                if field.children and field.children[0].tag == 'arg':
2084                    arg = field.children[0].children[0]
2085                    del field.children[0]
2086                else:
2087                    arg = None
2088
2089                # Process the field.
2090                field.tag = 'epytext'
2091                fields.append(Field(tag, arg, ParsedEpytextDocstring(field)))
2092
2093        # Save the remaining docstring as the description..
2094        if tree.children and tree.children[0].children:
2095            return ParsedEpytextDocstring(tree), fields
2096        else:
2097            return None, fields
2098
2099
2100    def index_terms(self):
2101        if self._terms is None:
2102            self._terms = []
2103            self._index_terms(self._tree, self._terms)
2104        return self._terms
2105
2106    def _index_terms(self, tree, terms):
2107        if tree is None or isinstance(tree, basestring):
2108            return
2109
2110        if tree.tag == 'indexed':
2111            term = Element('epytext', *tree.children, **tree.attribs)
2112            terms.append(ParsedEpytextDocstring(term))
2113
2114        # Look for index items in child nodes.
2115        for child in tree.children:
2116            self._index_terms(child, terms)
2117