1# Copyright The IETF Trust 2018, All Rights Reserved
2# -*- coding: utf-8 -*-
3from __future__ import unicode_literals, print_function, division
4
5import copy
6import datetime
7import inspect
8import re
9import sys
10import six
11import textwrap
12
13from codecs import open
14from collections import namedtuple
15from kitchen.text.display import textual_width as displength
16from lxml import etree
17
18try:
19    from xml2rfc import debug
20    debug.debug = True
21except ImportError:
22    debug = None
23    pass
24
25
26from xml2rfc import strings
27from xml2rfc.writers.base import default_options, BaseV3Writer, RfcWriterError
28from xml2rfc import utils
29from xml2rfc.uniscripts import is_script
30from xml2rfc.util.date import extract_date, augment_date, get_expiry_date, format_date
31from xml2rfc.util.name import short_author_name, short_author_ascii_name, short_author_name_parts, short_org_name_set
32
33from xml2rfc.util.name import full_author_name_set
34from xml2rfc.util.num import ol_style_formatter, num_width
35from xml2rfc.util.unicode import expand_unicode_element, textwidth
36from xml2rfc.util.postal import get_normalized_address_info, get_address_format_rules, address_field_mapping
37from xml2rfc.utils import justify_inline, clean_text
38
39
40IndexItem   = namedtuple('indexitem', ['item', 'subitem', 'anchor', 'page', ])
41Joiner      = namedtuple('joiner', ['join', 'indent', 'hang', 'overlap', 'do_outdent'])
42# Joiner parts:
43#   join    string used to join a rendered element to preceding text or lines
44#   indent  indentation of rendered element
45#   hang    additional indentation of second and follwoing lines
46#   overlap Join the last preceding and the first new line on one line, rather
47#           than simply appending new lines (when processing lines).
48#           Used to handle <dl newline="false"/"true"> and multiple emails
49#   outdent If necessary to fit content within width, use a smaller indent than
50#           indicated, in ljoin().  Used for <artwork>.
51
52# We don't use namedtuple for Line, because the resulting objects would be immutable:
53class Line(object):
54    def __init__(self, text, elem):
55        assert isinstance(text, six.text_type)
56        self.text = text
57        self.elem = elem
58        self.page = None
59        self.block = None
60        self.keep = False               # keep this line with the previous one
61
62# a couple of factory functions.  We may modify the resulting lines later,
63# which is why we can't just use static instances.
64def blankline():
65    return [ Line('', None) ]
66def pagefeed():
67    return [ Line('\f', None) ]
68def striplines(lines):
69    while lines and lines[0].text.strip(stripspace) == '':
70        lines = lines[1:]
71    while lines and lines[-1].text.strip(stripspace) == '':
72        lines = lines[:-1]
73    return lines
74
75class Block(object):
76    " Used to hold line block information needed for pagination."
77    def __init__(self, elem, prev, next=None, beg=None, end=None):
78        self.prev = prev                # previous block
79        self.next = next                # next block
80        self.elem = elem                # the block's element
81        self.beg  = beg                 # beginning line of block
82        self.end  = end                 # ending line of block
83
84wrapper = utils.TextWrapper(width=72)
85splitter = utils.TextSplitter(width=67)
86seen = set()
87
88# This is not a complete list of whitespace characters, and isn't intended to be.  It's
89# intended to be whitespace characters commonly occuring in XML input text which should be
90# ignored at the beginning and end of text blocks:
91stripspace = " \t\n\r\f\v"
92
93base_joiners = {
94            None:           Joiner('\n\n', 0, 0, False, False),
95            etree.Comment:  Joiner('', 0, 0, False, False),
96            etree.PI:       Joiner('', 0, 0, False, False),
97        }
98
99def set_joiners(kwargs, update):
100    kwargs['joiners'] = copy.copy(base_joiners)
101    kwargs['joiners'].update(update)
102
103def indent(text, indent=3, hang=0):
104    lines = []
105    text = text.replace('\u2028', '\n')
106    for l in text.split('\n'):
107        if l.strip(stripspace):
108            if lines:
109                lines.append(' '*(indent+hang) + l)
110            else:
111                lines.append(' '*indent + l)
112        else:
113            lines.append('')
114    return '\n'.join(lines)
115
116def lindent(lines, indent=3, hang=0):
117    for i, l in enumerate(lines):
118        if l.text.strip(stripspace):
119            if i == 0:
120                lines[i].text = ' '*(indent+hang) + l.text
121            else:
122                lines[i].text = ' '*(indent) + l.text
123    return lines
124
125def fill(text, **kwargs):
126    kwargs.pop('joiners', None)
127    kwargs.pop('prev', None)
128    #
129    indent = kwargs.pop('indent', 0)
130    hang   = kwargs.pop('hang', 0)
131    first  = kwargs.pop('first', 0)
132    keep   = kwargs.pop('keep_url', False)
133    initial=' '*(first+indent)
134    subsequent_indent = ' '*(indent+hang)
135    if keep:
136        text = utils.urlkeep(text, max=kwargs['width'])
137    result = wrapper.fill(text, initial=initial, subsequent_indent=subsequent_indent, **kwargs)
138    return result
139
140def center(text, width, **kwargs):
141    "Fold and center the given text"
142    # avoid centered text extending all the way to the margins
143    kwargs['width'] = width-4
144    text = text.replace('\u2028', '\n')
145    lines = text.split('\n')
146    if max([ len(l) for l in lines ]+[0]) > width:
147        # need to reflow
148        lines = wrapper.wrap(text, **kwargs)
149    for i, l in enumerate(lines):
150        lines[i] = l.center(width).rstrip(stripspace)
151    text = '\n'.join(lines).replace('\u00A0', ' ')
152    return text
153
154def align(lines, how, width):
155    "Align the given text block left, center, or right, as a block"
156    if not lines:
157        return lines
158    if   how == 'left':
159        return lines
160    w = max( len(l.text) for l in lines )
161    if w >= width:
162        return lines
163    shift = width - w
164    if how == 'center':
165        for i, l in enumerate(lines):
166            if l.text.strip(stripspace):
167                lines[i].text = ' '*(shift//2)+l.text
168    elif how == 'right':
169        for i, l in enumerate(lines):
170            if l.text.strip(stripspace):
171                lines[i].text = ' '*(shift)+l.text
172    else:
173        # XXX TODO: Raise execption, catch in TextWriter, and emit error
174        pass
175    return lines
176
177def mklines(arg, e):
178    if isinstance(arg, six.text_type):
179        # \u2028 and \u2029 are eliminated here, through splitlines()
180        lines = [ Line(t, e) for t in arg.splitlines() ]
181    else:
182        lines = arg
183    return lines
184
185def mktextblock(arg):
186    if isinstance(arg, six.text_type):
187        text = arg
188    else:
189        text = '\u2028'.join([ l.text for l in arg ])
190    return text
191
192def mktext(arg):
193    if isinstance(arg, six.text_type):
194        text = arg
195    else:
196        text = '\n'.join([ l.text for l in arg ])
197    return text
198
199def minwidth(arg):
200    text = mktext(arg)
201    words = text.split()
202    return min([ len(w) for w in words ]+[0])
203
204def stripl(l):
205    while l and l[0].text.strip(stripspace) == '':
206        del l[0]
207    while l and l[-1].text.strip(stripspace) == '':
208        del l[-1]
209    return l
210
211def findblocks(lines):
212    "Iterate through all lines, adding block beg/end and back/fwd links"
213    elem = None                         # last seen element
214    prev = None                         # previous block
215    keep = False                        # True if previous keepWithNext was true
216    block = None
217    for n, l in enumerate(lines):
218        if   l.elem == None:
219            if block!=None and not keep and not block.end:
220                block.end = n
221        elif l.elem != elem:
222            elem = l.elem
223            if elem.tag not in ['t', 'dl', 'dt', 'figure', 'ol', 'table', 'ul', ]:
224                keep = l.keep
225            if not keep:
226                block = Block(elem, prev, beg=n)
227                if prev!=None:
228                    prev.next = block
229                    if not prev.end:
230                        prev.end = n
231                prev = block
232            keep = (elem.get('keepWithNext') == 'true'
233                    or (elem.getnext()!=None and elem.getnext().get('keepWithPrevious') == 'true')
234                    or elem.tag == 'section')
235            l.block = block
236        else:
237            l.block = block
238    block.end = n
239    return lines
240
241def expand_ellipsis(text, width):
242    if re.search(r'\u2026\d+$', text):
243        head, tail = text.rsplit('\u2026', 1)   # split on ellipsis
244        head += ' '
245        if tail != '0000':
246            tail = '%4s' % tail.lstrip('0')     # strip leading zeros
247        last = head.split('\n')[-1]
248        lack = width - (len(last) + len(tail))
249        elip = (' .'*40)[-lack:]
250        text = head + elip + tail
251    return text
252
253# ------------------------------------------------------------------------------
254# Address formatting functions, based on i18naddress functions, but rewritten to
255# suit the text output format.
256
257def _format_address_line(line_format, address, rules):
258    def _get_field(name):
259        value = address.get(name, '')
260        if name == 'name':
261            role = address.get('role', '')
262            if role:
263                value += ' (%s)' % role
264        return value
265
266    replacements = {
267        '%%%s' % code: _get_field(field_name)
268        for code, field_name in address_field_mapping.items()}
269
270    fields = re.split('(%.)', line_format)
271    has_content = any([ replacements.get(f) for f in fields if (f.startswith('%') and f!= '%%') ])
272    if not has_content:
273        return ''
274    values = [replacements.get(f, f) for f in fields]
275    return ''.join(values).strip(stripspace).lstrip(', ')
276
277def format_address(address, latin=False, normalize=False):
278    def hasword(item):
279        return re.search(r'\w', item, re.U) != None
280    address_format, rules = get_address_format_rules(address, latin, normalize)
281    address_line_formats = address_format.split('%n')
282    address_lines = [
283        _format_address_line(lf, address, rules)
284        for lf in address_line_formats]
285    address_lines = filter(hasword, address_lines)
286    return '\n'.join(address_lines)
287
288class TextWriter(BaseV3Writer):
289
290    def __init__(self, xmlrfc, quiet=None, options=default_options, date=datetime.date.today()):
291        super(TextWriter, self).__init__(xmlrfc, quiet=quiet, options=options, date=date)
292        self.options.min_section_start_lines = 5
293        self.refname_mapping = self.get_refname_mapping()
294        self.rendered = None
295
296    def process(self):
297        if not self.rendered:
298            joiners = base_joiners
299            if self.options.pagination:
300                self.add_pageno_placeholders()
301            lines = self.render(self.root, width=72, joiners=joiners)
302
303            if self.options.pagination:
304                lines = findblocks(lines)
305                lines = self.paginate(lines)
306                lines = self.update_toc(lines)
307            if self.options.debug:
308                for i, l in enumerate(lines):
309                    tag  = l.elem.tag  if l.elem!=None else '-'
310                    page = l.elem.page if l.elem!=None else '-'
311                    if l.block:
312                        if six.PY2:
313                            sys.stderr.write(("%3d %10s %3d-%3d [%4s] %s\n" % (i, tag, l.block.beg, l.block.end, page, l.text)).encode('utf8'))
314                        else:
315                            sys.stderr.write(("%3d %10s %3d-%3d [%4s] %s\n" % (i, tag, l.block.beg, l.block.end, page, l.text)))
316                    else:
317                        if six.PY2:
318                            sys.stderr.write(("%3d %10s         [%4s] %s\n" % (i, tag,                           page, l.text)).encode('utf8'))
319                        else:
320                            sys.stderr.write(("%3d %10s         [%4s] %s\n" % (i, tag,                           page, l.text)))
321            for i, l in enumerate(lines):
322                length = len(l.text)
323                if length > 72:
324                    self.warn(l.elem, "Too long line found (L%s), %s characters longer than 72 characters: \n%s" %(i+1, length-72, l.text))
325
326            text = ('\n'.join( l.text for l in lines )).rstrip(stripspace) + '\n'
327
328            # Replace some code points whose utility has ended
329            text = text.replace(u'\u00A0', u' ')
330            text = text.replace(u'\u2011', u'-')
331            text = text.replace(u'\u200B', u'')
332            text = text.replace(u'\u2060', u'')
333            assert text == text.replace(u'\u2028', u' ')
334            assert text == text.replace(u'\uE060', u'')
335
336            self.rendered = text
337
338        return self.rendered
339
340    def write(self, filename):
341        """Write the document to a file """
342
343        text = self.process()
344
345        if self.errors:
346            raise RfcWriterError("Not creating output file due to errors (see above)")
347
348        encoding = 'utf-8-sig' if self.options.bom else 'utf-8'
349        with open(filename, 'w', encoding=encoding) as file:
350            file.write(text)
351
352        if not self.options.quiet:
353            self.log(' Created file %s' % filename)
354
355    def render(self, e, width, **kw):
356        if e.tag in (etree.PI, etree.Comment):
357            return e.tail.lstrip(stripspace) if (e.tail and e.tail.strip(stripspace)) else ''
358        kwargs = copy.deepcopy(kw)
359        func_name = "render_%s" % (e.tag.lower(),)
360        func = getattr(self, func_name, self.default_renderer)
361        if func == self.default_renderer:
362            if e.tag in self.__class__.deprecated_element_tags:
363                self.warn(e, "Was asked to render a deprecated element: <%s>" % (e.tag, ))
364            elif not e.tag in seen:
365                self.warn(e, "No renderer for <%s> found" % (e.tag, ))
366                seen.add(e.tag)
367        res = func(e, width, **kwargs)
368        return res
369
370    def add_pageno_placeholders(self):
371        toc = self.root.find('./front/toc/section')
372        for e in toc.xpath('.//xref[2]'):
373            e.set('pageno', '0000')
374
375    def paginate(self, lines):
376        """
377        The maximum length of page text is 48 lines.  Above this there are 4 lines of
378        top header, or 4 blank lines on the first page, below this there are 5 lines
379        of footer, with ^L on the last line and the footer on the next-to-last line.
380        """
381        header = justify_inline(self.page_top_left(),
382                                self.page_top_center(),
383                                self.page_top_right())
384        start_lineno = 0                # start of unbroken text
385        break_target = 4+48             # projected next break
386        page = 1
387        textlen = len(lines)
388        paginated = []
389        while start_lineno < textlen:
390            footer = justify_inline(self.page_bottom_left(),
391                                    self.page_bottom_center(),
392                                    "[Page %s]" % page)
393            # if the current block ends 1 after break_target, we'll have a widow line on
394            # the next page.  If the current block starts 1 before break_target, we'll
395            # have an orphan line on this page.  In either case, we insert the
396            # page break one line earlier, at break_target-1, and add a filler line.
397            break_lineno = break_target
398            pad = 0
399            if break_lineno >= textlen:
400                # The remaining text fits on the next page, this is the last page break
401                pad = break_lineno - textlen
402                break_target = textlen - 1                 # last line
403            else:
404                # See if we need to adjust break point to avoid break right after a section
405                # heading, and avoid creating orphans or widows
406                block = lines[break_target].block
407                if block is None:
408                    # check backwards for section start.  If we find one, check
409                    # again for another, in case it's a subsection.
410                    found = None
411                    i = break_target
412                    while i > break_target-12:
413                        for j in range(1,4):
414                            k = i - j
415                            if lines[k].elem != None and lines[k].elem.tag == 'section':
416                                found = True
417                                i = k
418                                break       # break for loop
419                        else:
420                            break           # break while loop
421                    if found:
422                        pad = break_target - i
423                        break_lineno = i
424                else:
425                    # Look for orphan and widow cases
426                    olen = break_target - block.beg # number of lines left at the end of this page
427                    wlen = block.end - break_target # number of lines at the start of next page
428                    blen = block.end - block.beg    # complete block length
429                    elem = lines[block.beg].elem
430                    if elem.tag == 'section':
431                        tcount = 0
432                        for r in range(block.beg, break_target):
433                            if lines[r].elem!=None and lines[r].elem.tag != 'section':
434                                tcount += 1
435                        if wlen == 1 or tcount <= self.options.min_section_start_lines:
436                            adj = break_lineno - block.beg
437                            pad += adj
438                            break_lineno -= adj
439                    elif elem.tag in ['artset', 'artwork', 'figure', 'sourcecode', 'table', ]:
440                        if blen < 48 or olen <= self.options.min_section_start_lines:
441                            adj = break_lineno - block.beg
442                            pad += adj
443                            break_lineno -= adj
444                        else:
445                            pass
446                    elif ( (olen in range(1, self.options.orphans+1) and blen > olen)
447                        or (wlen in range(1, self.options.widows+1) and blen > wlen)):
448                        break_lineno -= olen
449                        pad += olen
450                    else:
451                        pass
452            # Transfer lines to next page
453            pagestart = len(paginated)
454            if page > 1:
455                paginated += pagefeed() + mklines(header, None) + blankline()*2
456            paginated += lines[start_lineno:break_lineno]
457            paginated += blankline() * pad
458            paginated += blankline() * 3 + mklines(footer, None)
459            # make note of each line's page
460            for i in range(pagestart, len(paginated)):
461                paginated[i].page = page
462                if paginated[i].elem != None and not isinstance(paginated[i].elem, (etree._ProcessingInstruction, etree._Comment)):
463                    paginated[i].elem.page = page
464            # Set the next page start
465            start_lineno = break_lineno
466            # discard blank lines at the top of the next page, if any
467            while start_lineno < textlen and lines[start_lineno].text.strip(stripspace) == '':
468                start_lineno += 1
469            # advance page end to the next potential page break
470            break_target = start_lineno + 48
471            page += 1
472
473        return paginated
474
475    def update_toc(self, lines):
476        if self.root.get('tocInclude') != 'true':
477            return lines
478        toc = self.root.find('./front/toc/section')
479        in_toc = False
480        toc_start = None
481        toc_end = None
482        for i, l in enumerate(lines):
483            if l.elem is None:
484                continue
485            elif l.elem == toc:
486                in_toc = True
487                toc_start = i
488            elif in_toc and l.elem.tag == 'section':
489                # end of toc
490                in_toc = False
491                toc_end = i
492                break
493            elif in_toc and l.elem.tag in ['li', 't']:
494                xref = l.elem.find('.//xref[2]')
495                if xref!= None:
496                    id = xref.get('target')
497                    target = self.get_element_from_id(id)
498                    page = self.get_element_page(target)
499                    xref.set('pageno', '%s'%page )
500            elif in_toc and l.elem!=None:
501                self.error(l.elem, "Unexpected condition. <%s> in toc" % (l.elem.tag))
502            else:
503                pass
504        # new toc, to be used to replace the old one
505        toclines = self.render(toc, width=72, joiners=base_joiners)
506        if toc_start and toc_end:
507            j = 2
508            for i in range(toc_start+2, toc_end):
509                old = lines[i]
510                if old.elem is None:
511                    continue
512                new = toclines[j]
513                lines[i].text = new.text
514                j += 1
515        return lines
516
517    def tjoin(self, text, e, width, **kwargs):
518        '''
519        Render element e, then format and join it to text using the
520        appropriate settings in joiners.
521        '''
522        assert isinstance(text, six.text_type)
523        joiners = kwargs['joiners']
524        j = joiners[e.tag] if e.tag in joiners else joiners[None]
525        width -= j.indent + j.hang
526        if width < minwidth(text):
527            self.die(e, "Trying to render text in a too narrow column: width: %s, text: '%s'" % (width, text))
528        kwargs['hang'] = j.hang
529        etext = self.render(e, width, **kwargs)
530        itext = indent(etext, j.indent, j.hang)
531        if text:
532            if '\n' in j.join:
533                text += j.join + itext
534            elif j.join.strip(stripspace) and not itext.strip(stripspace):
535                # don't use non-empty joiners with empty content
536                pass
537            else:
538                text += j.join + itext.lstrip(stripspace)
539        else:
540            text  = itext
541        return text
542
543    def ljoin(self, lines, e, width, **kwargs):
544        '''
545        Render element e, then format and join it to preceding text using the
546        appropriate settings in joiners.
547        '''
548        assert isinstance(lines, list)
549        assert not lines or isinstance(lines[0], Line)
550        joiners = kwargs['joiners']
551        j = joiners[e.tag] if e.tag in joiners else joiners[None]
552        width -= j.indent
553        kwargs['hang'] = j.hang
554        res = mklines(self.render(e, width, **kwargs), e)
555        if lines:
556            for i in range(j.join.count('\n')-1):
557                lines += blankline()
558        reswidth = max(len(l.text) for l in res) if res else 0
559        indent = j.indent
560        residue = 0
561        if (hasattr(e, 'outdent') and e.outdent) or (j.do_outdent and reswidth > width):
562            outdent = e.outdent if e.outdent else reswidth-width
563            residue = max(0, outdent - indent)
564            if residue:
565                e.getparent().outdent = residue
566            indent -= min(indent, outdent)
567            self.warn(e, "%s too wide, reducing indentation from %s to %s" % (e.tag.capitalize(), j.indent, indent))
568        nlines = lindent(res, indent, j.hang)
569        if j.overlap and nlines:
570            firstline = nlines[0]
571            nlines = nlines[1:]
572            if firstline.text.strip(stripspace):
573                lines[-1].text += j.join + firstline.text.lstrip(stripspace)
574        lines += nlines
575        return lines
576
577
578    def element(self, tag, line=None, **attribs):
579        e = self.root.makeelement(tag, attrib=attribs)
580        if line:
581            e.sourceline = line
582        return e
583
584    def get_initials(self, author):
585        """author is an rfc2629 author element.  Return the author initials,
586        fixed up according to current flavour and policy."""
587        initials = author.attrib.get('initials', '')
588
589        initials_list = re.split("[. ]+", initials)
590        try:
591            initials_list.remove('')
592        except:
593            pass
594        if len(initials_list) > 0:
595            # preserve spacing, but make sure all parts have a trailing
596            # period
597            initials = initials.strip(stripspace)
598            initials += '.' if not initials.endswith('.') else ''
599            initials = re.sub('([^.]) ', r'\g<1>. ', initials)
600        return initials
601
602    # --- fallback rendering functions ------------------------------------------
603
604    def default_renderer(self, e, width, **kwargs):
605        # This is a fallback when a more specific function doesn't exist
606        text = "<%s>:%s" % (e.tag, e.text or '')
607        for c in e.getchildren():
608            ctext = self.render(c, width, **kwargs)
609            if isinstance(ctext, list):
610                ctext = "\n\n".join(ctext)
611            if ctext is None and debug:
612                debug.show('e')
613                debug.show('c')
614            text += '\n' + ctext
615        text += e.tail or ''
616        return text
617
618#     def parts_renderer(self, e, width, **kwargs):
619#         text = ""
620#         for c in e.getchildren():
621#             text = self.tjoin(text, c, width, **kwargs)
622#         return text
623
624    def inner_text_renderer(self, e, width=None, **kwargs):
625        text = e.text or ''
626        for c in e.getchildren():
627            try:
628                text += self.render(c, width, **kwargs)
629            except TypeError:
630                debug.show('c')
631                raise
632        return text.strip(stripspace)
633
634#     def text_renderer(self, e, width, **kwargs):
635#         text = self.inner_text_renderer(e, **kwargs)
636#         text += ' '+e.tail if e.tail else ''
637#         return text
638
639    def text_or_block_renderer(self, e, width, **kw):
640        # This handles the case where the element has two alternative content
641        # models, either text or block-level children; deal with them
642        # separately.  Return text and whether this was plain text.
643        kwargs = copy.deepcopy(kw)
644        if utils.hastext(e):
645            _tag = e.tag; e.tag = 't'
646            text = mktext(self.ljoin([], e, width, **kwargs))
647            e.tag = _tag
648            return text, True
649        else:
650            lines = []
651            for c in e.getchildren():
652                lines = self.ljoin(lines, c, width, **kwargs)
653                kwargs.pop('first', None)
654            return lines, False
655
656
657    def quote_renderer(self, e, width, prefix, by, cite, **kwargs):
658        set_joiners(kwargs, {
659            None:      Joiner('\n', 0, 0, False, False),
660            't':       Joiner('\n\n', 0, 0, False, False),
661            'artset':   Joiner('\n\n', 0, 0, False, False),
662            'artwork':  Joiner('\n\n', 3, 0, False, True),
663            'sourcecode':  Joiner('\n\n', 3, 0, False, False),
664        })
665        width = width if width else 69
666        text, plain = self.text_or_block_renderer(e, width-3, **kwargs)
667        if plain:
668            text = fill(text, width=width-3, **kwargs)
669        lines = mklines(text, e)
670        if by  or cite:
671            lines += [ Line('', e) ]
672        if by:
673            lines += mklines("-- %s" % fill(by, width=width-6, hang=3), e)
674        if cite:
675            lines += mklines("   %s\n" % fill(cite, width=width-6, hang=3), e)
676        for i, l in enumerate(lines):
677            lines[i].text = prefix + '  '+l.text
678        lines = lindent(lines, indent=kwargs.get('indent', 0))
679        return lines
680
681    def null_renderer(self, e, width, **kwargs):
682        self.die(e, "Did not expect to be asked to render <%s> while in %s//%s" % (e.tag, self.part, e.getparent().tag))
683        return None
684
685    # --- element rendering functions ------------------------------------------
686
687    # 2.1.  <abstract>
688    #
689    #    Contains the Abstract of the document.  See [RFC7322] for more
690    #    information on restrictions for the Abstract.
691    #
692    #    This element appears as a child element of <front> (Section 2.26).
693    #
694    # ...
695    #
696    # 2.1.1.  "anchor" Attribute
697    #
698    #    Document-wide unique identifier for the Abstract.
699    def render_abstract(self, e, width, **kwargs):
700        kwargs['joiners'].update({ None:       Joiner('\n\n', 3, 0, False, False), })
701        lines = [ Line("Abstract", e) ]
702        for c in e.getchildren():
703            lines = self.ljoin(lines, c, width, **kwargs)
704        return lines
705
706    # 2.2.  <address>
707    #
708    #    Provides address information for the author.
709    #
710    #    This element appears as a child element of <author> (Section 2.7).
711    def render_address(self, e, width, **kwargs):
712        set_joiners(kwargs, {
713            None:       Joiner('\n', 0, 0, False, False),
714            'email':    Joiner('', 0, 0, True, False),
715        })
716        lines = []
717        for c in e.getchildren():
718            lines = self.ljoin(lines, c, width, **kwargs)
719        return lines
720
721    # 2.3.  <annotation>
722    #
723    #    Provides additional prose augmenting a bibliographic reference.  This
724    #    text is intended to be shown after the rest of the generated
725    #    reference text.
726    #
727    #    This element appears as a child element of <reference>
728    #    (Section 2.40).
729    def render_annotation(self, e, width, **kwargs):
730        text = fill(self.inner_text_renderer(e), width=width, **kwargs)
731        return text
732
733    # 2.4.  <area>
734    #
735    #    Provides information about the IETF area to which this document
736    #    relates (currently not used when generating documents).
737    #
738    #    The value ought to be either the full name or the abbreviation of one
739    #    of the IETF areas as listed on <http://www.ietf.org/iesg/area.html>.
740    #    A list of full names and abbreviations will be kept by the RFC Series
741    #    Editor.
742    #
743    #    This element appears as a child element of <front> (Section 2.26).
744
745
746    # 2.5.  <artwork>
747    #
748    #    This element allows the inclusion of "artwork" in the document.
749    #    <artwork> provides full control of horizontal whitespace and line
750    #    breaks; thus, it is used for a variety of things, such as diagrams
751    #    ("line art") and protocol unit diagrams.  Tab characters (U+0009)
752    #    inside of this element are prohibited.
753    #
754    #    Alternatively, the "src" attribute allows referencing an external
755    #    graphics file, such as a vector drawing in SVG or a bitmap graphic
756    #    file, using a URI.  In this case, the textual content acts as a
757    #    fallback for output representations that do not support graphics;
758    #    thus, it ought to contain either (1) a "line art" variant of the
759    #    graphics or (2) prose that describes the included image in sufficient
760    #    detail.
761    #
762    #    In [RFC7749], the <artwork> element was also used for source code and
763    #    formal languages; in v3, this is now done with <sourcecode>.
764    #
765    #    There are at least five ways to include SVG in artwork in
766    #    Internet-Drafts:
767    #
768    #    o  Inline, by including all of the SVG in the content of the element,
769    #       such as: <artwork type="svg"><svg xmlns="http://www.w3.org/2000/
770    #       svg...">
771    #
772    #    o  Inline, but using XInclude (see Appendix B.1), such as: <artwork
773    #       type="svg"><xi:include href=...>
774    #
775    #    o  As a data: URI, such as: <artwork type="svg" src="data:image/
776    #       svg+xml,%3Csvg%20xmlns%3D%22http%3A%2F%2Fwww.w3...">
777    #
778    #    o  As a URI to an external entity, such as: <artwork type="svg"
779    #       src="http://www.example.com/...">
780    #
781    #    o  As a local file, such as: <artwork type="svg" src="diagram12.svg">
782    #
783    #    The use of SVG in Internet-Drafts and RFCs is covered in much more
784    #    detail in [RFC7996].
785    #
786    #    The above methods for inclusion of SVG art can also be used for
787    #    including text artwork, but using a data: URI is probably confusing
788    #    for text artwork.
789    #
790    #    Formatters that do pagination should attempt to keep artwork on a
791    #    single page.  This is to prevent artwork that is split across pages
792    #    from looking like two separate pieces of artwork.
793    #
794    #    See Section 5 for a description of how to deal with issues of using
795    #    "&" and "<" characters in artwork.
796
797    def render_artset(self, e, width, **kwargs):
798        preflist = ['ascii-art', ]
799        lines = []
800        for t in preflist:
801            for a in e.xpath('./artwork[@type="%s"]' % t):
802                lines = self.ljoin(lines, a, width, **kwargs)
803                return lines
804        else:
805            a = e[0]
806            if e.text and e.text.strip(stripspace):
807                lines = self.ljoin(lines, a, width, **kwargs)
808            else:
809                self.err(a, "Expected ascii-art text, but found none.")
810        return lines
811
812    def render_artwork(self, e, width, **kwargs):
813        msg  = ( "(Artwork only available as %s: %s)"
814                    % ( e.get('type', '(unknown type)'),
815                        e.get('originalSrc') or e.get('src') or 'No external link available, see %s.html for artwork.'%self.root.get('docName')))
816        msg  = fill(msg, width=width, **kwargs)
817#        text = (e.text.strip(stripspace) and e.text.expandtabs()) or msg
818#         text = text.strip('\n')
819#         text = '\n'.join( [ l.rstrip(stripspace) for l in text.split('\n') ] )
820        # We need this in order to deal with xml comments inside artwork:
821        text = (e.text or '') + ''.join([ c.tail for c in e.getchildren() ])
822        text = text.strip('\n')
823        text = (text.strip(stripspace) and text.expandtabs()) or msg
824        text = '\n'.join( [ l.rstrip(stripspace) for l in text.split('\n') ] )
825        #
826        lines = [ Line(t, e) for t in text.splitlines() ]
827        lines = align(lines, e.get('align', 'left'), width)
828        return lines
829
830    # 2.5.1.  "align" Attribute
831    #
832    #    Controls whether the artwork appears left justified (default),
833    #    centered, or right justified.  Artwork is aligned relative to the
834    #    left margin of the document.
835    #
836    #    Allowed values:
837    #
838    #    o  "left" (default)
839    #
840    #    o  "center"
841    #
842    #    o  "right"
843
844
845    # 2.5.2.  "alt" Attribute
846    #
847    #    Alternative text description of the artwork (which is more than just
848    #    a summary or caption).  When the art comes from the "src" attribute
849    #    and the format of that artwork supports alternate text, the
850    #    alternative text comes from the text of the artwork itself, not from
851    #    this attribute.  The contents of this attribute are important to
852    #    readers who are visually impaired, as well as those reading on
853    #    devices that cannot show the artwork well, or at all.
854
855
856    # 2.5.3.  "anchor" Attribute
857    #
858    #    Document-wide unique identifier for this artwork.
859
860    # 2.5.5.  "name" Attribute
861    #
862    #    A filename suitable for the contents (such as for extraction to a
863    #    local file).  This attribute can be helpful for other kinds of tools
864    #    (such as automated syntax checkers, which work by extracting the
865    #    artwork).  Note that the "name" attribute does not need to be unique
866    #    for <artwork> elements in a document.  If multiple <artwork> elements
867    #    have the same "name" attribute, a processing tool might assume that
868    #    the elements are all fragments of a single file, and the tool can
869    #    collect those fragments for later processing.  See Section 7 for a
870    #    discussion of possible problems with the value of this attribute.
871
872    # 2.5.6.  "src" Attribute
873    #
874    #    The URI reference of a graphics file [RFC3986], or the name of a file
875    #    on the local disk.  This can be a "data" URI [RFC2397] that contains
876    #    the contents of the graphics file.  Note that the inclusion of art
877    #    with the "src" attribute depends on the capabilities of the
878    #    processing tool reading the XML document.  Tools need to be able to
879    #    handle the file: URI, and they should be able to handle http: and
880    #    https: URIs as well.  The prep tool will be able to handle reading
881    #    the "src" attribute.
882    #
883    #    If no URI scheme is given in the attribute, the attribute is
884    #    considered to be a local filename relative to the current directory.
885    #    Processing tools must be careful to not accept dangerous values for
886    #    the filename, particularly those that contain absolute references
887    #    outside the current directory.  Document creators should think hard
888    #    before using relative URIs due to possible later problems if files
889    #    move around on the disk.  Also, documents should most likely use
890    #    explicit URI schemes wherever possible.
891    #
892    #    In some cases, the prep tool may remove the "src" attribute after
893    #    processing its value.  See [RFC7998] for a description of this.
894    #
895    #    It is an error to have both a "src" attribute and content in the
896    #    <artwork> element.
897
898    # 2.5.7.  "type" Attribute
899    #
900    #    Specifies the type of the artwork.  The value of this attribute is
901    #    free text with certain values designated as preferred.
902    #
903    #    The preferred values for <artwork> types are:
904    #
905    #    o  ascii-art
906    #
907    #    o  binary-art
908    #
909    #    o  call-flow
910    #
911    #    o  hex-dump
912    #
913    #    o  svg
914    #
915    #    The RFC Series Editor will maintain a complete list of the preferred
916    #    values on the RFC Editor web site, and that list is expected to be
917    #    updated over time.  Thus, a consumer of v3 XML should not cause a
918    #    failure when it encounters an unexpected type or no type is
919    #    specified.  The table will also indicate which type of art can appear
920    #    in plain-text output (for example, type="svg" cannot).
921
922
923
924    # 2.6.  <aside>
925    #
926    #    This element is a container for content that is semantically less
927    #    important or tangential to the content that surrounds it.
928    #
929    #    This element appears as a child element of <section> (Section 2.46).
930    #
931    # 2.6.1.  "anchor" Attribute
932    #
933    #    Document-wide unique identifier for this aside.
934    def render_aside(self, e, width, **kwargs):
935        kwargs['joiners'].update({ 't':       Joiner('\n\n', 0, 0, False, False), })
936        prefix = '   |  '
937        width -= len(prefix)
938        text, plain = self.text_or_block_renderer(e, width, **kwargs)
939        if plain:
940            text = fill(text, width=width, **kwargs)
941        lines = mklines(text, e)
942        for i, l in enumerate(lines):
943            lines[i].text = prefix + l.text
944        lines = lindent(lines, indent=kwargs.get('indent', 0))
945        return lines
946
947
948    # 2.7.  <author>
949    #
950    #    Provides information about a document's author.  This is used both
951    #    for the document itself (at the beginning of the document) and for
952    #    referenced documents.
953    #
954    #    The <author> elements contained within the document's <front> element
955    #    are used to fill the boilerplate and also to generate the "Author's
956    #    Address" section (see [RFC7322]).
957    #
958    #    Note that an "author" can also be just an organization (by not
959    #    specifying any of the "name" attributes, but adding the
960    #    <organization> child element).
961    #
962    #    Furthermore, the "role" attribute can be used to mark an author as
963    #    "editor".  This is reflected both on the front page and in the
964    #    "Author's Address" section, as well as in bibliographic references.
965    #    Note that this specification does not define a precise meaning for
966    #    the term "editor".
967    #
968    #    This element appears as a child element of <front> (Section 2.26).
969    #
970    # ...
971    #
972    # 2.7.1.  "asciiFullname" Attribute
973    #
974    #    The ASCII equivalent of the author's full name.
975    #
976    # 2.7.2.  "asciiInitials" Attribute
977    #
978    #    The ASCII equivalent of the author's initials, to be used in
979    #    conjunction with the separately specified asciiSurname.
980    #
981    # 2.7.3.  "asciiSurname" Attribute
982    #
983    #    The ASCII equivalent of the author's surname, to be used in
984    #    conjunction with the separately specified asciiInitials.
985    #
986    # 2.7.4.  "fullname" Attribute
987    #
988    #    The full name (used in the automatically generated "Author's Address"
989    #    section).  Although this attribute is optional, if one or more of the
990    #    "asciiFullname", "asciiInitials", or "asciiSurname" attributes have
991    #    values, the "fullname" attribute is required.
992    #
993    # 2.7.5.  "initials" Attribute
994    #
995    #    An abbreviated variant of the given name(s), to be used in
996    #    conjunction with the separately specified surname.  It usually
997    #    appears on the front page, in footers, and in references.
998    #
999    #    Some processors will post-process the value -- for instance, when it
1000    #    only contains a single letter (in which case they might add a
1001    #    trailing dot).  Relying on this kind of post-processing can lead to
1002    #    results varying across formatters and thus ought to be avoided.
1003    #
1004    # 2.7.6.  "role" Attribute
1005    #
1006    #    Specifies the role the author had in creating the document.
1007    #
1008    #    Allowed value:
1009    #
1010    #    o  "editor"
1011    #
1012    # 2.7.7.  "surname" Attribute
1013    #
1014    #    The author's surname, to be used in conjunction with the separately
1015    #    specified initials.  It usually appears on the front page, in
1016    #    footers, and in references.
1017    def render_author(self, e, width, **kwargs):
1018        """
1019        Render one author entry for the Authors' Addresses section.
1020        """
1021        set_joiners(kwargs, {
1022            None:       Joiner('\n', 0, 0, False, False),  # default
1023        })
1024        lines = []
1025        address = e.find('./address')
1026        if address is None:
1027            address = etree.Element('address')
1028            e.append(address)
1029        postal = e.find('./address/postal')
1030        if postal is None:
1031            # We render author name as part of postal, so make sure it's there
1032            address.insert(0, etree.Element('postal'))
1033        # ascii will be set only if name has codepoints not in the Latin script blocks
1034        name, ascii  = full_author_name_set(e)
1035        if ascii:
1036            for c in e.iterchildren('address'):
1037                lines = self.ljoin(lines, c, width, latin=True, **kwargs)
1038                lines = striplines(lines)
1039                lines += blankline()
1040                lines += [ Line( 'Additional contact information:', address) ]
1041                lines += blankline()
1042                lines += lindent(self.ljoin([], c, width, latin=False, **kwargs))
1043                lines = striplines(lines)
1044        else:
1045            for c in e.iterchildren('address'):
1046                lines = self.ljoin(lines, c, width, **kwargs)
1047                lines = striplines(lines)
1048        lines += blankline()
1049        return lines
1050
1051    def render_author_name(self, e, width, **kwargs):
1052        text = ''
1053        organization = self.render_organization(e.find('organization'), width, **kwargs)
1054        fullname = e.attrib.get('fullname', '')
1055        if not fullname:
1056            surname = e.attrib.get('surname', '')
1057            if surname:
1058                initials = self.get_initials(e)
1059                fullname = '%s %s' % (initials, fullname)
1060        if fullname:
1061            text = fullname
1062            if e.attrib.get('role', '') == 'editor':
1063                text += ' (editor)'
1064            if organization:
1065                text += '\n'+ organization
1066        elif organization:
1067            # Use organization instead of name
1068            text = organization
1069        else:
1070            text = ''
1071        return text
1072
1073    def render_contact(self, e, width, **kwargs):
1074        p = e.getparent()
1075        if   p.tag == 't':
1076            name, ascii = full_author_name_set(e)
1077            if ascii:
1078                contact = "%s (%s)" % (name, ascii)
1079            else:
1080                contact = name
1081            # Avoid sentence end space doubling
1082            contact = contact.replace('. ', '.\u00a0')
1083            return contact + (e.tail or '')
1084        elif p.tag == 'section':
1085            return self.render_author(e, width, **kwargs)
1086        else:
1087            return self.null_renderer(e, width, **kwargs)
1088
1089    def render_author_front(self, e, **kwargs):
1090        name = short_author_name(e)
1091        if not is_script(name, 'Latin'):
1092            aname = short_author_ascii_name(e)
1093            name = '%s (%s)' % (name, aname)
1094        #
1095        o = e.find('./organization')
1096
1097        if o != None:
1098            if o.get('showOnFrontPage') == 'true':
1099                organization = self.render_front_organization(o, **kwargs)
1100            else:
1101                organization = None
1102        else:
1103            organization = ''
1104        #
1105        if organization and not name:
1106            name = organization
1107            organization = None
1108        #
1109        if e.get('role') == 'editor':
1110            name += ', Ed.'
1111        return name, organization
1112
1113    def render_authors(self, e, width, **kwargs):
1114        """
1115        Render authors for reference display.  This has to take into
1116        consideration the particular presentation of surnames and initials
1117        used by the RFC Editor.
1118        """
1119        buf = []
1120        authors = list(e.iterdescendants('author'))
1121        for i, author in enumerate(authors):
1122            if i == len(authors) - 1 and len(authors) > 1:
1123                buf.append('and ')
1124            organization = author.find('organization')
1125            initials, surname = short_author_name_parts(author)
1126            if surname:
1127                initials = initials or ''
1128                if i == len(authors) - 1 and len(authors) > 1:
1129                    # Last author is rendered in reverse
1130                    if len(initials) > 0:
1131                        buf.append(initials + ' ' + \
1132                                     surname)
1133                    else:
1134                        buf.append(surname)
1135                elif len(initials) > 0:
1136                    buf.append(surname + ', ' + initials)
1137                else:
1138                    buf.append(surname)
1139                if author.attrib.get('role', '') == 'editor':
1140                    buf.append(', Ed.')
1141            elif organization is not None and organization.text:
1142                # Use organization instead of name
1143                buf.append(organization.text.strip(stripspace))
1144            else:
1145                continue
1146            if len(authors) == 2 and i == 0:
1147                buf.append(' ')
1148            elif i < len(authors) - 1:
1149                buf.append(', ')
1150        return ''.join(buf)
1151
1152    # 2.8.  <back>
1153    #
1154    #    Contains the "back" part of the document: the references and
1155    #    appendices.  In <back>, <section> elements indicate appendices.
1156    #
1157    #    This element appears as a child element of <rfc> (Section 2.45).
1158    def render_back(self, e, width, **kwargs):
1159        lines = []
1160        for c in e.getchildren():
1161            lines = self.ljoin(lines, c, width, **kwargs)
1162        return lines
1163
1164
1165    # 2.9.  <bcp14>
1166    #
1167    #    Marks text that are phrases defined in [BCP14] such as "MUST",
1168    #    "SHOULD NOT", and so on.  When shown in some of the output
1169    #    representations, the text in this element might be highlighted.  The
1170    #    use of this element is optional.
1171    #
1172    #    This element is only to be used around the actual phrase from BCP 14,
1173    #    not the full definition of a requirement.  For example, it is correct
1174    #    to say "The packet <bcp14>MUST</bcp14> be dropped.", but it is not
1175    #    correct to say "<bcp14>The packet MUST be dropped.</bcp14>".
1176    #
1177    #    This element appears as a child element of <annotation>
1178    #    (Section 2.3), <blockquote> (Section 2.10), <dd> (Section 2.18), <dt>
1179    #    (Section 2.21), <em> (Section 2.22), <li> (Section 2.29), <preamble>
1180    #    (Section 3.6), <refcontent> (Section 2.39), <strong> (Section 2.50),
1181    #    <sub> (Section 2.51), <sup> (Section 2.52), <t> (Section 2.53), <td>
1182    #    (Section 2.56), <th> (Section 2.58), and <tt> (Section 2.62).
1183    #
1184    #    Content model: only text content.
1185    def render_bcp14(self, e, width, **kwargs):
1186        return (e.text or '') + (e.tail or '')
1187
1188    # 2.10.  <blockquote>
1189    #
1190    #    Specifies that a block of text is a quotation.
1191    #
1192    #    This element appears as a child element of <section> (Section 2.46).
1193    #
1194    # 2.10.1.  "anchor" Attribute
1195    #
1196    #    Document-wide unique identifier for this quotation.
1197    #
1198    # 2.10.2.  "cite" Attribute
1199    #
1200    #    The source of the citation.  This must be a URI.  If the "quotedFrom"
1201    #    attribute is given, this URI will be used by processing tools as the
1202    #    link for the text of that attribute.
1203    #
1204    # 2.10.3.  "quotedFrom" Attribute
1205    #
1206    #    Name of person or document the text in this element is quoted from.
1207    #    A formatter should render this as visible text at the end of the
1208    #    quotation.
1209    def render_blockquote(self, e, width, **kwargs):
1210        by  = e.get('quotedFrom')
1211        cite = e.get('cite')
1212        return self.quote_renderer(e, width, '|', by, cite, **kwargs)
1213
1214    # 2.11.  <boilerplate>
1215    #
1216    #    Holds the boilerplate text for the document.  This element is filled
1217    #    in by the prep tool.
1218    #
1219    #    This element contains <section> elements.  Every <section> element in
1220    #    this element must have the "numbered" attribute set to "false".
1221    #
1222    #    This element appears as a child element of <front> (Section 2.26).
1223    def render_boilerplate(self, e, width, **kwargs):
1224        lines = []
1225        for c in e.getchildren():
1226            numbered = c.get('numbered')
1227            if not numbered == 'false':
1228                self.err(c, "Expected boilerplate section to have numbered='false', but found '%s'" % (numbered, ))
1229            keep_url = True if self.options.rfc else False
1230            lines = self.ljoin(lines, c, width, keep_url=keep_url, **kwargs)
1231        return lines
1232
1233    # 2.12.  <br>
1234    #
1235    #    Indicates that a line break should be inserted in the generated
1236    #    output by a formatting tool.  Multiple successive instances of this
1237    #    element are ignored.
1238    #
1239    #    This element appears as a child element of <td> (Section 2.56) and
1240    #    <th> (Section 2.58).
1241    def render_br(self, e, width, **kwargs):
1242        return '\u2028' + (e.tail or '')
1243
1244    # 2.13.  <city>
1245    #
1246    #    Gives the city name in a postal address.
1247    #
1248    #    This element appears as a child element of <postal> (Section 2.37).
1249    #
1250    # 2.13.1.  "ascii" Attribute
1251    #
1252    #    The ASCII equivalent of the city name.
1253    render_city = null_renderer         # handled in render_address
1254
1255    # 2.14.  <code>
1256    #
1257    #    Gives the postal region code.
1258    #
1259    #    This element appears as a child element of <postal> (Section 2.37).
1260    #
1261    # 2.14.1.  "ascii" Attribute
1262    #
1263    #    The ASCII equivalent of the postal code.
1264    render_code = null_renderer         # handled in render_address
1265
1266    # 2.15.  <country>
1267    #
1268    #    Gives the country name or code in a postal address.
1269    #
1270    #    This element appears as a child element of <postal> (Section 2.37).
1271    #
1272    # 2.15.1.  "ascii" Attribute
1273    #
1274    #    The ASCII equivalent of the country name.
1275    render_country = null_renderer      # handled in render_address
1276
1277    # 2.16.  <cref>
1278    #
1279    #    Represents a comment.
1280    #
1281    #    Comments can be used in a document while it is work in progress.
1282    #    They might appear either inline and visually highlighted, at the end
1283    #    of the document, or not at all, depending on the formatting tool.
1284    #
1285    #    This element appears as a child element of <annotation>
1286    #    (Section 2.3), <blockquote> (Section 2.10), <c> (Section 3.1), <dd>
1287    #    (Section 2.18), <dt> (Section 2.21), <em> (Section 2.22), <li>
1288    #    (Section 2.29), <name> (Section 2.32), <postamble> (Section 3.5),
1289    #    <preamble> (Section 3.6), <strong> (Section 2.50), <sub>
1290    #    (Section 2.51), <sup> (Section 2.52), <t> (Section 2.53), <td>
1291    #    (Section 2.56), <th> (Section 2.58), <tt> (Section 2.62), and <ttcol>
1292    #    (Section 3.9).
1293    #
1294    # 2.16.1.  "anchor" Attribute
1295    #
1296    #    Document-wide unique identifier for this comment.
1297    #
1298    # 2.16.2.  "display" Attribute
1299    #
1300    #    Suggests whether or not the comment should be displayed by formatting
1301    #    tools.  This might be set to "false" if you want to keep a comment in
1302    #    a document after the contents of the comment have already been dealt
1303    #    with.
1304    #
1305    #    Allowed values:
1306    #
1307    #    o  "true" (default)
1308    #
1309    #    o  "false"
1310    #
1311    # 2.16.3.  "source" Attribute
1312    #
1313    #    Holds the "source" of a comment, such as the name or the initials of
1314    #    the person who made the comment.
1315    def render_cref(self, e, width, **kwargs):
1316        display = e.get('display') == 'true'
1317        source = e.get('source')
1318        if display:
1319            text = '\u2028' + mktextblock(self.quote_renderer(e, width, '//', source, None, **kwargs))
1320            return text
1321        else:
1322            return ''
1323
1324    # 2.17.  <date>
1325    #
1326    #    Provides information about the publication date.  This element is
1327    #    used for two cases: the boilerplate of the document being produced,
1328    #    and inside bibliographic references that use the <front> element.
1329    #
1330    #    Boilerplate for Internet-Drafts and RFCs:  This element defines the
1331    #       date of publication for the current document (Internet-Draft or
1332    #       RFC).  When producing Internet-Drafts, the prep tool uses this
1333    #       date to compute the expiration date (see [IDGUIDE]).  When one or
1334    #       more of "year", "month", or "day" are left out, the prep tool will
1335    #       attempt to use the current system date if the attributes that are
1336    #       present are consistent with that date.
1337    #
1338    #       In dates in <rfc> elements, the month must be a number or a month
1339    #       in English.  The prep tool will silently change text month names
1340    #       to numbers.  Similarly, the year must be a four-digit number.
1341    #
1342    #       When the prep tool is used to create Internet-Drafts, it will
1343    #       reject a submitted Internet-Draft that has a <date> element in the
1344    #       boilerplate for itself that is anything other than today.  That
1345    #       is, the tool will not allow a submitter to specify a date other
1346    #       than the day of submission.  To avoid this problem, authors might
1347    #       simply not include a <date> element in the boilerplate.
1348    #
1349    #    Bibliographic references:  In dates in <reference> elements, the date
1350    #       information can have prose text for the month or year.  For
1351    #       example, vague dates (year="ca. 2000"), date ranges
1352    #       (year="2012-2013"), non-specific months (month="Second quarter"),
1353    #       and so on are allowed.
1354    #
1355    #    This element appears as a child element of <front> (Section 2.26).
1356    #
1357    # 2.17.1.  "day" Attribute
1358    #
1359    #    The day of publication.
1360    #
1361    # 2.17.2.  "month" Attribute
1362    #
1363    #    The month or months of publication.
1364    #
1365    # 2.17.3.  "year" Attribute
1366    #
1367    #    The year or years of publication.
1368    def render_date(self, e, width, **kwargs):
1369        #pp = e.getparent().getparent()
1370        #if pp.tag == 'rfc':
1371        have_date = e.get('day') or e.get('month') or e.get('year')
1372        year, month, day = extract_date(e, self.date)
1373        p = e.getparent()
1374        if p==None or p.getparent().tag != 'reference':
1375            # don't touch the given date if we're rendering a reference
1376            year, month, day = augment_date(year, month, day, self.date)
1377        date = format_date(year, month, day, self.options.legacy_date_format)
1378        if e.text and have_date:
1379            date = "%s (%s)" % (e.text, date)
1380        elif e.text:
1381            date = e.text
1382        else:
1383            # date = date
1384            pass
1385        return date
1386
1387    # 2.18.  <dd>
1388    #
1389    #    The definition part of an entry in a definition list.
1390    #
1391    #    This element appears as a child element of <dl> (Section 2.20).
1392    #
1393    # 2.18.1.  "anchor" Attribute
1394    #
1395    #    Document-wide unique identifier for this definition.
1396    def render_dd(self, e, width, **kwargs):
1397        dtwidth = kwargs.pop('dtwidth')
1398        j = kwargs['joiners']['dd']
1399        kwargs['first'] = dtwidth + len(j.join) - j.indent if j.overlap else 0
1400        r, foldable = self.text_or_block_renderer(e, width, **kwargs)
1401        lines = mklines(r, e) if foldable else r
1402        if lines:
1403            lines[0].keep = True            # keep first line of dd with preceding dt
1404        return lines
1405
1406    # 2.19.  <displayreference>
1407    #
1408    #    This element gives a mapping between the anchor of a reference and a
1409    #    name that will be displayed instead.  This allows authors to display
1410    #    more mnemonic anchor names for automatically included references.
1411    #    The mapping in this element only applies to <xref> elements whose
1412    #    format is "default".  For example, if the reference uses the anchor
1413    #    "RFC6949", the following would cause that anchor in the body of
1414    #    displayed documents to be "RFC-dev":
1415    #
1416    #    <displayreference target="RFC6949" to="RFC-dev"/>
1417    #
1418    #    If a reference section is sorted, this element changes the sort
1419    #    order.
1420    #
1421    #    It is expected that this element will only be valid in input
1422    #    documents.  It will likely be removed by prep tools when preparing a
1423    #    final version after those tools have replaced all of the associated
1424    #    anchors, targets, and "derivedContent" attributes.
1425    #
1426    #    This element appears as a child element of <back> (Section 2.8).
1427    #
1428    # 2.19.1.  "target" Attribute (Mandatory)
1429    #
1430    #    This attribute must be the name of an anchor in a <reference> or
1431    #    <referencegroup> element.
1432    #
1433    # 2.19.2.  "to" Attribute (Mandatory)
1434    #
1435    #    This attribute is a name that will be displayed as the anchor instead
1436    #    of the anchor that is given in the <reference> element.  The string
1437    #    given must start with one of the following characters: 0-9, a-z, or
1438    #    A-Z.  The other characters in the string must be 0-9, a-z, A-Z, "-",
1439    #    ".", or "_".
1440    def render_displayreference(self, e, width, **kwargs):
1441        return ''
1442
1443
1444    # 2.20.  <dl>
1445    #
1446    #    A definition list.  Each entry has a pair of elements: a term (<dt>)
1447    #    and a definition (<dd>).  (This is slightly different and simpler
1448    #    than the model used in HTML, which allows for multiple terms for a
1449    #    single definition.)
1450    #
1451    #    This element appears as a child element of <abstract> (Section 2.1),
1452    #    <aside> (Section 2.6), <blockquote> (Section 2.10), <dd>
1453    #    (Section 2.18), <li> (Section 2.29), <note> (Section 2.33), <section>
1454    #    (Section 2.46), <td> (Section 2.56), and <th> (Section 2.58).
1455    #
1456    # 2.20.1.  "anchor" Attribute
1457    #
1458    #    Document-wide unique identifier for the list.
1459    #
1460    # 2.20.2.  "hanging" Attribute
1461    #
1462    #    The "hanging" attribute defines whether or not the term appears on
1463    #    the same line as the definition.  hanging="true" indicates that the
1464    #    term is to the left of the definition, while hanging="false"
1465    #    indicates that the term will be on a separate line.
1466    #
1467    #    Allowed values:
1468    #
1469    #    o  "false"
1470    #
1471    #    o  "true" (default)
1472    #
1473    # 2.20.3.  "spacing" Attribute
1474    #
1475    #    Defines whether or not there is a blank line between entries.
1476    #    spacing="normal" indicates a single blank line, while
1477    #    spacing="compact" indicates no space between.
1478    #
1479    #    Allowed values:
1480    #
1481    #    o  "normal" (default)
1482    #
1483    #    o  "compact"
1484    def render_dl(self, e, width, **kwargs):
1485        newline = e.get('newline') == 'true'
1486        compact = e.get('spacing') == 'compact'
1487        tjoin  = '\n' if compact else '\n\n'
1488        #
1489        indent = int(e.get('indent') or '3')
1490        nljoin = Joiner('\n', indent, 0, False, False)
1491        spjoin = Joiner('  ', indent, 0, True, False)
1492        ddjoin  = nljoin if newline else spjoin
1493        set_joiners(kwargs, {
1494            None:       Joiner(tjoin, 0, 0, False, False),
1495            'dt':       Joiner(tjoin, 0, 0, False, False),
1496            'dd':       ddjoin,
1497        })
1498        # child tags which should always render with newline=True
1499        newline_tags = set([
1500                'artset',
1501                'artwork',
1502                'aside',
1503                'figure',
1504                'ol',
1505                'sourcecode',
1506                'table',
1507                'ul',
1508            ])
1509        # rendering
1510        lines = []
1511        text = ''
1512        dtwidth = indent
1513        for c in e.getchildren():
1514            if ((not newline and c.tag == 'dd' and c.text and c.text.strip(stripspace)
1515                 and (width - len('  ') - len(text)) < len(c.text.split(None, 1)[0]))
1516                or (len(c) and c[0].tag in newline_tags)):
1517                # Add a newline if first word of dd text won't fit to the right of dt
1518                kwargs['joiners']['dd'] = nljoin
1519            else:
1520                kwargs['joiners']['dd'] = ddjoin
1521            #
1522            lines = self.ljoin(lines, c, width, dtwidth=dtwidth, **kwargs)
1523            #
1524            if c.tag == 'dt':
1525                dtwidth = len(lines[-1].text)
1526        return lines
1527
1528
1529    # 2.21.  <dt>
1530    #
1531    #    The term being defined in a definition list.
1532    #
1533    #    This element appears as a child element of <dl> (Section 2.20).
1534    #
1535    # 2.21.1.  "anchor" Attribute
1536    #
1537    #    Document-wide unique identifier for this term.
1538    def render_dt(self, e, width, **kwargs):
1539        kwargs.pop('dtwidth')
1540        indent = kwargs['joiners']['dd'].indent
1541        join   = len(kwargs['joiners']['dd'].join)
1542        text = fill(self.inner_text_renderer(e), width=width-3, **kwargs)
1543        if len(text) < indent:
1544            text = text+' '*max(0, indent-join-len(text))
1545        return mklines(text, e)
1546
1547
1548    # 2.22.  <em>
1549    #
1550    #    Indicates text that is semantically emphasized.  Text enclosed within
1551    #    this element will be displayed as italic after processing.  This
1552    #    element can be combined with other character formatting elements, and
1553    #    the formatting will be additive.
1554    def render_em(self, e, width, **kwargs):
1555        # Render text with leading and trailing '_'
1556        text = '_%s_' % self.inner_text_renderer(e)
1557        text += e.tail or ''
1558        return text
1559
1560    # 2.23.  <email>
1561    #
1562    #    Provides an email address.
1563    #
1564    #    The value is expected to be the addr-spec defined in Section 2 of
1565    #    [RFC6068].
1566    #
1567    #    This element appears as a child element of <address> (Section 2.2).
1568    #
1569    #    Content model: only text content.
1570    #
1571    # 2.23.1.  "ascii" Attribute
1572    #
1573    #    The ASCII equivalent of the author's email address.  This is only
1574    #    used if the email address has any internationalized components.
1575    def render_email(self, e, width, **kwargs):
1576        latin = kwargs.pop('latin', None)
1577        prev = e.getprevious()
1578        if prev!=None and prev.tag==e.tag:
1579            text = fill(", %s"%e.text, width=width, **kwargs) if e.text and latin!=False else ''
1580        else:
1581            text = '\n'+fill("Email: %s"%e.text, width=width, **kwargs) if e.text and latin!=False else ''
1582        return text
1583
1584    # 2.24.  <eref>
1585    #
1586    #    Represents an "external" link (as specified in the "target"
1587    #    attribute).  This is useful for embedding URIs in the body of a
1588    #    document.
1589    #
1590    #    If the <eref> element has non-empty text content, formatters should
1591    #    use the content as the displayed text that is linked.  Otherwise, the
1592    #    formatter should use the value of the "target" attribute as the
1593    #    displayed text.  Formatters will link the displayed text to the value
1594    #    of the "target" attribute in a manner appropriate for the output
1595    #    format.
1596    #
1597    #    For example, with an input of:
1598    #
1599    #          This is described at
1600    #          <eref target="http://www.example.com/reports/r12.html"/>.
1601    #
1602    #    An HTML formatter might generate:
1603    #
1604    #          This is described at
1605    #          <a href="http://www.example.com/reports/r12.html">
1606    #          http://www.example.com/reports/r12.html</a>.
1607    #
1608    #    With an input of:
1609    #
1610    #          This is described
1611    #          <eref target="http://www.example.com/reports/r12.html">
1612    #          in this interesting report</eref>.
1613    #
1614    #    An HTML formatter might generate:
1615    #
1616    #          This is described
1617    #          <a href="http://www.example.com/reports/r12.html">
1618    #          in this interesting report</a>.
1619    #
1620    #    This element appears as a child element of <annotation>
1621    #    (Section 2.3), <blockquote> (Section 2.10), <c> (Section 3.1), <cref>
1622    #    (Section 2.16), <dd> (Section 2.18), <dt> (Section 2.21), <em>
1623    #    (Section 2.22), <li> (Section 2.29), <name> (Section 2.32),
1624    #    <postamble> (Section 3.5), <preamble> (Section 3.6), <strong>
1625    #    (Section 2.50), <sub> (Section 2.51), <sup> (Section 2.52), <t>
1626    #    (Section 2.53), <td> (Section 2.56), <th> (Section 2.58), <tt>
1627    #    (Section 2.62), and <ttcol> (Section 3.9).
1628    #
1629    #    Content model: only text content.
1630    #
1631    # 2.24.1.  "target" Attribute (Mandatory)
1632    #
1633    #    URI of the link target [RFC3986].  This must begin with a scheme name
1634    #    (such as "https://") and thus not be relative to the URL of the
1635    #    current document.
1636    def render_eref(self, e, width, **kwargs):
1637        target = e.get('target', '')
1638        brackets = e.get('brackets', self.attribute_defaults[e.tag]['brackets'])
1639        if not target:
1640            self.warn(e, "Expected the 'target' attribute to have a value, but found %s" % (etree.tostring(e), ))
1641        if   brackets == 'none':
1642            if e.text and target:
1643                target = "(%s)" % target
1644        elif brackets == 'angle':
1645            target = "<%s>" % target
1646        else:
1647            self.warn(e, 'Unexpected attribute value in <eref>: brackets="%s"' % brackets)
1648        text = ' '.join([ t for t in [e.text, target] if t ])
1649        text += e.tail or ''
1650        return text
1651
1652
1653    # 2.25.  <figure>
1654    #
1655    #    Contains a figure with a caption with the figure number.  If the
1656    #    element contains a <name> element, the caption will also show that
1657    #    name.
1658    #
1659    #    This element appears as a child element of <aside> (Section 2.6),
1660    #    <blockquote> (Section 2.10), <dd> (Section 2.18), <li>
1661    #    (Section 2.29), <section> (Section 2.46), <td> (Section 2.56), and
1662    #    <th> (Section 2.58).
1663    #
1664    #    Content model:
1665    #
1666    #    In this order:
1667    #
1668    #    1.  One optional <name> element (Section 2.32)
1669    #
1670    #    2.  Optional <iref> elements (Section 2.27)
1671    #
1672    #    3.  One optional <preamble> element (Section 3.6)
1673    #
1674    #    4.  In any order, but at least one of:
1675    #
1676    #        *  <artwork> elements (Section 2.5)
1677    #
1678    #        *  <sourcecode> elements (Section 2.48)
1679    #
1680    #    5.  One optional <postamble> element (Section 3.5)
1681    #
1682    # 2.25.1.  "align" Attribute
1683    #
1684    #    Deprecated.
1685    #
1686    #    Note: does not affect title or <artwork> alignment.
1687    #
1688    #    Allowed values:
1689    #
1690    #    o  "left" (default)
1691    #
1692    #    o  "center"
1693    #
1694    #    o  "right"
1695    #
1696    # 2.25.2.  "alt" Attribute
1697    #
1698    #    Deprecated.  If the goal is to provide a single URI for a reference,
1699    #    use the "target" attribute in <reference> instead.
1700    #
1701    # 2.25.3.  "anchor" Attribute
1702    #
1703    #    Document-wide unique identifier for this figure.
1704    #
1705    # 2.25.4.  "height" Attribute
1706    #
1707    #    Deprecated.
1708    #
1709    # 2.25.5.  "src" Attribute
1710    #
1711    #    Deprecated.
1712    #
1713    # 2.25.6.  "suppress-title" Attribute
1714    #
1715    #    Deprecated.
1716    #
1717    #    Allowed values:
1718    #
1719    #    o  "true"
1720    #
1721    #    o  "false" (default)
1722    #
1723    # 2.25.7.  "title" Attribute
1724    #
1725    #    Deprecated.  Use <name> instead.
1726    #
1727    # 2.25.8.  "width" Attribute
1728    #
1729    #    Deprecated.
1730    def render_figure(self, e, width, **kwargs):
1731        kwargs['joiners'].update({
1732            'name':         Joiner(': ', 0, 0, False, False),
1733            'artset':       Joiner('', 0, 0, False, False),
1734            'artwork':      Joiner('', 0, 0, False, True),
1735            'sourcecode':   Joiner('', 0, 0, False, False),
1736        })
1737        #
1738        pn = e.get('pn')
1739        num = pn.split('-')[1].capitalize()
1740        children = e.getchildren()
1741        title = "Figure %s" % (num, )
1742        if len(children) and children[0].tag == 'name':
1743            name = children[0]
1744            children = children[1:]
1745            title = self.tjoin(title, name, width, **kwargs)
1746        lines = []
1747        for c in children:
1748            lines = self.ljoin(lines, c, width, **kwargs)
1749        title = '\n'+center(title, width).rstrip(stripspace)
1750        lines += mklines(title, e)
1751        return lines
1752
1753    # 2.26.  <front>
1754    #
1755    #    Represents the "front matter": metadata (such as author information),
1756    #    the Abstract, and additional notes.
1757    #
1758    #    A <front> element may have more than one <seriesInfo> element.  A
1759    #    <seriesInfo> element determines the document number (for RFCs) or
1760    #    name (for Internet-Drafts).  Another <seriesInfo> element determines
1761    #    the "maturity level" (defined in [RFC2026]), using values of "std"
1762    #    for "Standards Track", "bcp" for "BCP", "info" for "Informational",
1763    #    "exp" for "Experimental", and "historic" for "Historic".  The "name"
1764    #    attributes of those multiple <seriesInfo> elements interact as
1765    #    described in Section 2.47.
1766    #
1767    #    This element appears as a child element of <reference> (Section 2.40)
1768    #    and <rfc> (Section 2.45).
1769    #
1770    #    Content model:
1771    #
1772    #    In this order:
1773    #
1774    # ...
1775    #
1776    def render_front(self, e, width, **kwargs):
1777        if e.getparent().tag == 'reference':
1778            return self.render_reference_front(e, width, **kwargs)
1779        else:
1780            text = '\n\n\n\n' + self.render_first_page_top(e, width, **kwargs) + '\n'
1781            lines = mklines(text, e)
1782            for c in e.getchildren():
1783                if c.tag in ['title', 'seriesInfo', 'author', 'date', 'area', 'workgroup', 'keyword', etree.PI, etree.Comment, ]:
1784                    # handled in render_first_page_top() or discarded
1785                    continue
1786                res = self.render(c, width, **kwargs)
1787                lines += blankline() + res
1788            return lines
1789
1790    def render_first_page_top(self, e, width, **kwargs):
1791        def join_cols(left, right):
1792            "Join left and right columns of page top into page top text"
1793            l = max(len(left), len(right))
1794            left  += ['']*(l-len(left))
1795            right += ['']*(l-len(right))
1796            lines = []
1797            t = len(left)
1798            for i in range(t):
1799                l = left[i]
1800                r = right[i]
1801                #assert displength(l)+displength(r)<70
1802                w = 72-displength(l)-displength(r)
1803                lines.append(l+' '*w+r)
1804            return '\n'.join(lines).rstrip(stripspace)+'\n'
1805        #
1806        def wrap(label, items, left, right, suffix=''):
1807            line = '%s%s%s' % (label, items, suffix)
1808            ll = len(left)
1809            lr = len(right)
1810            width = 48 if ll >= lr else min(48, 72-4-len(right[ll]))
1811            wrapper = textwrap.TextWrapper(width=width, subsequent_indent=' '*len(label))
1812            return wrapper.wrap(line)
1813        #
1814        def normalize(t):
1815            return re.sub(r',\s*', ', ', t).strip(', ')
1816
1817        def get_left(front, right):
1818            "Get front page top left column"
1819            #left_parts = ['source', 'seriesInfo', 'obsoletes', 'updates', 'category', 'issn', 'expires', ]
1820            left = []
1821            if self.root.get('ipr') == 'none':
1822                for group in front.xpath('./workgroup'):
1823                    if group.text and group.text.strip(stripspace):
1824                        found = True
1825                        left.append(group.text.strip(stripspace))
1826                return left
1827            if self.options.rfc:
1828                #
1829                #    There is a set of additional information that is needed at the front
1830                #    of the RFC.  Historically, this has been presented with the
1831                #    information below in a left hand column, and the author-related
1832                #    information described above in the right.
1833                #
1834                #    <document source>  This describes the area where the work originates.
1835                #       Historically, all RFCs were labeled "Network Working Group".
1836                #       Network Working Group refers to the original version of today's
1837                #       IETF when people from the original set of ARPANET sites and
1838                #       whomever else was interested -- the meetings were open -- got
1839                #       together to discuss, design, and document proposed protocols
1840                #       [RFC3].  Here, we obsolete the term "Network Working Group" in
1841                #       order to indicate the originating stream.
1842                #
1843                #       The <document source> is the name of the RFC stream, as defined in
1844                #       [RFC4844] and its successors.  At the time of this publication,
1845                #       the streams, and therefore the possible entries are:
1846                #
1847                #       *  Internet Engineering Task Force
1848                #       *  Internet Architecture Board
1849                #       *  Internet Research Task Force
1850                #       *  Independent Submission
1851                stream = self.root.get('submissionType')
1852                left.append(strings.stream_name[stream])
1853                #
1854                #    Request for Comments:  <RFC number>  This indicates the RFC number,
1855                #       assigned by the RFC Editor upon publication of the document.  This
1856                #       element is unchanged.
1857                for item in front.iter('seriesInfo'):
1858                    name  = item.get('name')
1859                    value = item.get('value')
1860                    if name == 'RFC':
1861                        name = 'Request for Comments'
1862                    left.append("%s: %s" % (name, value))
1863                #    <subseries ID> <subseries number>  Some document categories are also
1864                #       labeled as a subseries of RFCs.  These elements appear as
1865                #       appropriate for such categories, indicating the subseries and the
1866                #       documents number within that series.  Currently, there are
1867                #       subseries for BCPs [RFC2026] and STDs [RFC1311].  These subseries
1868                #       numbers may appear in several RFCs.  For example, when a new RFC
1869                #       obsoletes or updates an old one, the same subseries number is
1870                #       used.  Also, several RFCs may be assigned the same subseries
1871                #       number: a single STD, for example, may be composed of several
1872                #       RFCs, each of which will bear the same STD number.  This element
1873                #       is unchanged.
1874                category = self.root.get('category', '')
1875                #
1876                ## The following code duplicates series info also generated from <seriesInfo>
1877                ## entries.  Commented out.
1878                #series_no = self.root.get('seriesNo')
1879                #if category and category in strings.series_name and series_no:
1880                #    left.append('%s: %s' % (strings.series_name[category], series_no))
1881                #else:
1882                #    pass
1883                #    [<RFC relation>:<RFC number[s]>]  Some relations between RFCs in the
1884                #       series are explicitly noted in the RFC header.  For example, a new
1885                #       RFC may update one or more earlier RFCs.  Currently two
1886                #       relationships are defined: "Updates" and "Obsoletes" [RFC7322].
1887                #       Variants like "Obsoleted by" are also used (e.g, in [RFC5143]).
1888                #       Other types of relationships may be defined by the RFC Editor and
1889                #       may appear in future RFCs.
1890                obsoletes = self.root.get('obsoletes')
1891                if obsoletes:
1892                    left += wrap('Obsoletes: ', normalize(obsoletes), left, right)
1893                updates = self.root.get('updates')
1894                if updates:
1895                    left += wrap('Updates: ', normalize(updates), left, right)
1896
1897                #    Category: <category>  This indicates the initial RFC document
1898                #       category of the publication.  These are defined in [RFC2026].
1899                #       Currently, this is always one of: Standards Track, Best Current
1900                #       Practice, Experimental, Informational, or Historic.  This element
1901                #       is unchanged.
1902                if category:
1903                    if category in strings.category_name:
1904                        left.append('Category: %s' % (strings.category_name[category], ))
1905                    else:
1906                        self.warn(self.root, "Expected a known category, one of %s, but found '%s'" % (','.join(strings.category_name.keys()), category, ))
1907                else:
1908                    self.warn(self.root, "Expected a category, one of %s, but found none" % (','.join(strings.category_name.keys()), ))
1909                #
1910                left.append('ISSN: 2070-1721')
1911                #
1912            else:
1913                # Internet-Draft
1914                found = False
1915                for group in front.xpath('./workgroup'):
1916                    if group.text and group.text.strip(stripspace):
1917                        found = True
1918                        left.append(group.text.strip(stripspace))
1919                if not found:
1920                    left.append('Network Working Group')
1921                left.append('Internet-Draft')
1922                #
1923                category = self.root.get('category', '')
1924                #
1925                ## The following code duplicates series info also generated from <seriesInfo>
1926                ## entries.  Commented out.
1927                #series_no = self.root.get('seriesNo')
1928                #if category and series_no and category in strings.series_name:
1929                #    left.append('%s: %s (if approved)' % (strings.series_name[category], series_no))
1930                #else:
1931                #    pass
1932                #
1933                obsoletes = self.root.get('obsoletes')
1934                if obsoletes:
1935                    left += wrap('Obsoletes: ', normalize(obsoletes), left, right, suffix=' (if approved)')
1936                updates = self.root.get('updates')
1937                if updates:
1938                    left += wrap('Updates: ', normalize(updates), left, right, suffix=' (if approved)')
1939                #
1940                if category:
1941                    if category in strings.category_name:
1942                        left.append('Intended status: %s' % (strings.category_name[category], ))
1943                    else:
1944                        self.warn(self.root, "Expected a known category, one of %s, but found '%s'" % (','.join(strings.category_name.keys()), category, ))
1945                else:
1946                    self.warn(self.root, "Expected a category, one of %s, but found none" % (','.join(strings.category_name.keys()), ))
1947                #
1948                if self.root.get('ipr') != 'none':
1949                    exp = get_expiry_date(self.root, self.date)
1950                    left.append('Expires: %s' % format_date(exp.year, exp.month, exp.day, self.options.legacy_date_format))
1951            return left
1952        #
1953        def get_right(front):
1954            "Get front page top right column"
1955            # RFC 7841           RFC Streams, Headers, Boilerplates           May 2016
1956            #
1957            # 3.1.  The Title Page Header
1958            #
1959            #    The information at the front of the RFC includes the name and
1960            #    affiliation of the authors as well as the RFC publication month and
1961            #    year.
1962            #
1963            #-------------------------------------------------------------------------
1964            #
1965            # RFC 7322                     RFC Style Guide              September 2014
1966            #
1967            # 4.1.2.  Organization
1968            #
1969            #    The author's organization is indicated on the line following the
1970            #    author's name.
1971            #
1972            #    For multiple authors, each author name appears on its own line,
1973            #    followed by that author's organization.  When more than one author is
1974            #    affiliated with the same organization, the organization can be
1975            #    "factored out," appearing only once following the corresponding
1976            #    Author lines.  However, such factoring is inappropriate when it would
1977            #    force an unacceptable reordering of author names.
1978            right = []
1979            auth = namedtuple('author', ['name', 'org'])
1980            prev = auth(None, '')
1981            authors = front.xpath('./author')
1982            for a in authors:
1983                this = auth(*self.render_author_front(a, **kwargs))
1984                if right and this.name and this.org and this.org == prev.org:
1985                    right[-1] = this.name
1986                    right.append(this.org or '')
1987                else:
1988                    if this.name:
1989                        right.append(this.name)
1990                    if this.org!=None:
1991                        right.append(this.org)
1992                prev = this
1993            # We don't need show a trailing blank line if the last author has a blank organization
1994            if prev.org == '':
1995                right = right[:-1]
1996            right.append(self.render_date(front.find('./date'), width, **kwargs))
1997            return right
1998        #
1999        # get right first, in order to limit the width of left lines as needed
2000        right = get_right(e)
2001        left  = get_left(e, right)
2002        #
2003        first_page_header = join_cols(left, right)
2004        first_page_header += '\n\n'
2005        first_page_header += self.render_title_front(e.find('./title'), width, **kwargs)
2006        return first_page_header
2007
2008    def render_reference_front(self, e, width, **kwargs):
2009        return self.default_renderer(e, width, **kwargs)
2010
2011    # 2.27.  <iref>
2012    #
2013    #    Provides terms for the document's index.
2014    #
2015    #    Index entries can be either regular entries (when just the "item"
2016    #    attribute is given) or nested entries (by specifying "subitem" as
2017    #    well), grouped under a regular entry.
2018    #
2019    #    Index entries generally refer to the exact place where the <iref>
2020    #    element occurred.  An exception is the occurrence as a child element
2021    #    of <section>, in which case the whole section is considered to be
2022    #    relevant for that index entry.  In some formats, index entries of
2023    #    this type might be displayed as ranges.
2024    #
2025    #    When the prep tool is creating index content, it collects the items
2026    #    in a case-sensitive fashion for both the item and subitem level.
2027    #
2028    #    This element appears as a child element of <annotation>
2029    #    (Section 2.3), <aside> (Section 2.6), <blockquote> (Section 2.10),
2030    #    <c> (Section 3.1), <dd> (Section 2.18), <dt> (Section 2.21), <em>
2031    #    (Section 2.22), <figure> (Section 2.25), <li> (Section 2.29),
2032    #    <postamble> (Section 3.5), <preamble> (Section 3.6), <section>
2033    #    (Section 2.46), <strong> (Section 2.50), <sub> (Section 2.51), <sup>
2034    #    (Section 2.52), <t> (Section 2.53), <table> (Section 2.54), <td>
2035    #    (Section 2.56), <th> (Section 2.58), <tt> (Section 2.62), and <ttcol>
2036    #    (Section 3.9).
2037    #
2038    #    Content model: this element does not have any contents.
2039    def render_iref(self, e, width, **kwargs):
2040        p = e.getparent()
2041        self.index_items.append(IndexItem(e.get('item'), e.get('subitem'), p.get('pn'), None))
2042        return '' if p.tag in ['section', 'figure', 'table', ]  else e.tail or ''
2043
2044    # 2.27.1.  "item" Attribute (Mandatory)
2045    #
2046    #    The item to include.
2047    #
2048    # 2.27.2.  "primary" Attribute
2049    #
2050    #    Setting this to "true" declares the occurrence as "primary", which
2051    #    might cause it to be highlighted in the index.  There is no
2052    #    restriction on the number of occurrences that can be "primary".
2053    #
2054    #    Allowed values:
2055    #
2056    #    o  "true"
2057    #
2058    #    o  "false" (default)
2059    #
2060    # 2.27.3.  "subitem" Attribute
2061    #
2062    #    The subitem to include.
2063
2064
2065    # 2.28.  <keyword>
2066    #
2067    #    Specifies a keyword applicable to the document.
2068    #
2069    #    Note that each element should only contain a single keyword; for
2070    #    multiple keywords, the element can simply be repeated.
2071    #
2072    #    Keywords are used both in the RFC Index and in the metadata of
2073    #    generated document representations.
2074    #
2075    #    This element appears as a child element of <front> (Section 2.26).
2076    #
2077    #    Content model: only text content.
2078
2079
2080    # 2.29.  <li>
2081    #
2082    #    A list element, used in <ol> and <ul>.
2083    #
2084    #    This element appears as a child element of <ol> (Section 2.34) and
2085    #    <ul> (Section 2.63).
2086    #
2087    #    Content model:
2088    #
2089    #    Either:
2090    #
2091    #       In any order, but at least one of:
2092    #
2093    #       *  <artwork> elements (Section 2.5)
2094    #
2095    #       *  <dl> elements (Section 2.20)
2096    #
2097    #       *  <figure> elements (Section 2.25)
2098    #
2099    #       *  <ol> elements (Section 2.34)
2100    #
2101    #       *  <sourcecode> elements (Section 2.48)
2102    #
2103    #       *  <t> elements (Section 2.53)
2104    #
2105    #       *  <ul> elements (Section 2.63)
2106    #
2107    #    Or:
2108    #
2109    #       In any order, but at least one of:
2110    #
2111    #       *  Text
2112    #
2113    #       *  <bcp14> elements (Section 2.9)
2114    #
2115    #       *  <cref> elements (Section 2.16)
2116    #
2117    #       *  <em> elements (Section 2.22)
2118    #
2119    #       *  <eref> elements (Section 2.24)
2120    #
2121    #       *  <iref> elements (Section 2.27)
2122    #
2123    #       *  <relref> elements (Section 2.44)
2124    #
2125    #       *  <strong> elements (Section 2.50)
2126    #
2127    #       *  <sub> elements (Section 2.51)
2128    #
2129    #       *  <sup> elements (Section 2.52)
2130    #
2131    #       *  <tt> elements (Section 2.62)
2132    #
2133    #       *  <xref> elements (Section 2.66)
2134    #
2135    # 2.29.1.  "anchor" Attribute
2136    #
2137    #    Document-wide unique identifier for this list item.
2138    # Text
2139    def render_li(self, e, width, **kwargs):
2140        p = e.getparent()
2141        text = p._initial_text(e, p)
2142        tt, __ = self.text_or_block_renderer(e, width, **kwargs)
2143        if isinstance(tt, list):
2144            lines = stripl(tt)
2145            if lines and lines[0].elem.tag not in ['artwork', 'figure', 'sourcecode', 'li', ]:
2146                lines[0].text = text + lines[0].text.lstrip(stripspace)
2147        else:
2148            text += tt.lstrip(stripspace)
2149            lines = mklines(text, e)
2150        if self.options.pagination:
2151            for i, l in enumerate(lines):
2152                if '\u2026' in l.text:
2153                    lines[i].text = expand_ellipsis(l.text, width)
2154        return lines
2155
2156    def get_ol_li_initial_text(self, e, p):
2157        text = p._format % p._int2str(p._counter)
2158        text += ' '*(p._padding-len(text))
2159        p._counter += 1
2160        return text
2161
2162    def get_ul_li_initial_text(self, e, p):
2163        if p._bare:
2164            text = ''
2165        else:
2166            text = p._symbol
2167            text += ' '*(p._padding-len(text))
2168        return text
2169
2170    # 2.30.  <link>
2171    #
2172    #    A link to an external document that is related to the RFC.
2173    #
2174    #    The following are the supported types of external documents that can
2175    #    be pointed to in a <link> element:
2176    #
2177    #    o  The current International Standard Serial Number (ISSN) for the
2178    #       RFC Series.  The value for the "rel" attribute is "item".  The
2179    #       link should use the form "urn:issn:".
2180    #
2181    #    o  The Digital Object Identifier (DOI) for this document.  The value
2182    #       for the "rel" attribute is "describedBy".  The link should use the
2183    #       form specified in [RFC7669]; this is expected to change in the
2184    #       future.
2185    #
2186    #    o  The Internet-Draft that was submitted to the RFC Editor to become
2187    #       the published RFC.  The value for the "rel" attribute is
2188    #       "convertedFrom".  The link should be to an IETF-controlled web
2189    #       site that retains copies of Internet-Drafts.
2190    #
2191    #    o  A representation of the document offered by the document author.
2192    #       The value for the "rel" attribute is "alternate".  The link can be
2193    #       to a personally run web site.
2194    #
2195    #    In RFC production mode, the prep tool needs to check the values for
2196    #    <link> before an RFC is published.  In draft production mode, the
2197    #    prep tool might remove some <link> elements during the draft
2198    #    submission process.
2199    #
2200    #    This element appears as a child element of <rfc> (Section 2.45).
2201    #
2202    #    Content model: this element does not have any contents.
2203    def render_link(self, e, width, **kwargs):
2204        return ''
2205
2206    # 2.30.1.  "href" Attribute (Mandatory)
2207    #
2208    #    The URI of the external document.
2209    #
2210    # 2.30.2.  "rel" Attribute
2211    #
2212    #    The relationship of the external document to this one.  The
2213    #    relationships are taken from the "Link Relations" registry maintained
2214    #    by IANA [LINKRELATIONS].
2215
2216
2217    # 2.31.  <middle>
2218    #
2219    #    Represents the main content of the document.
2220    #
2221    #    This element appears as a child element of <rfc> (Section 2.45).
2222    #
2223    #    Content model:
2224    #
2225    #    One or more <section> elements (Section 2.46)
2226    def render_middle(self, e, width, **kwargs):
2227        kwargs['joiners'] = base_joiners
2228        lines = []
2229        for c in e.getchildren():
2230            lines = self.ljoin(lines, c, width, **kwargs)
2231        return lines
2232
2233    # 2.32.  <name>
2234    #
2235    #    The name of the section, note, figure, or texttable.  This name can
2236    #    indicate markup of flowing text (for example, including references or
2237    #    making some characters use a fixed-width font).
2238    #
2239    #    This element appears as a child element of <figure> (Section 2.25),
2240    #    <note> (Section 2.33), <references> (Section 2.42), <section>
2241    #    (Section 2.46), <table> (Section 2.54), and <texttable>
2242    #    (Section 3.8).
2243    #
2244    #    Content model:
2245    #
2246    #    In any order:
2247    #
2248    #    o  Text
2249    #
2250    #    o  <cref> elements (Section 2.16)
2251    #
2252    #    o  <eref> elements (Section 2.24)
2253    #
2254    #    o  <relref> elements (Section 2.44)
2255    #
2256    #    o  <tt> elements (Section 2.62)
2257    #
2258    #    o  <xref> elements (Section 2.66)
2259    def render_name(self, e, width, **kwargs):
2260        hang=kwargs['joiners'][e.tag].hang
2261        return fill(self.inner_text_renderer(e).strip(stripspace), width=width, hang=hang)
2262
2263    # 2.33.  <note>
2264    #
2265    #    Creates an unnumbered, titled block of text that appears after the
2266    #    Abstract.
2267    #
2268    #    It is usually used for additional information to reviewers (Working
2269    #    Group information, mailing list, ...) or for additional publication
2270    #    information such as "IESG Notes".
2271    #
2272    #    This element appears as a child element of <front> (Section 2.26).
2273    #
2274    #    Content model:
2275    #
2276    #    In this order:
2277    #
2278    #    1.  One optional <name> element (Section 2.32)
2279    #
2280    #    2.  In any order, but at least one of:
2281    #
2282    #        *  <dl> elements (Section 2.20)
2283    #
2284    #        *  <ol> elements (Section 2.34)
2285    #
2286    #        *  <t> elements (Section 2.53)
2287    #
2288    #        *  <ul> elements (Section 2.63)
2289    def render_note(self, e, width, **kwargs):
2290        kwargs['joiners'].update(
2291            {
2292                None:       Joiner('\n\n', 3, 0, False, False),
2293                'name':     Joiner(': ', 0, 0, False, False),
2294            }
2295        )
2296        lines = []
2297        if e[0].tag != 'name':
2298            lines.append(Line("Note", e))
2299        for c in e.getchildren():
2300            lines = self.ljoin(lines, c, width, **kwargs)
2301        return lines
2302
2303    # 2.33.1.  "removeInRFC" Attribute
2304    #
2305    #    If set to "true", this note is marked in the prep tool with text
2306    #    indicating that it should be removed before the document is published
2307    #    as an RFC.  That text will be "This note is to be removed before
2308    #    publishing as an RFC."
2309    #
2310    #    Allowed values:
2311    #
2312    #    o  "true"
2313    #
2314    #    o  "false" (default)
2315    #
2316    # 2.33.2.  "title" Attribute
2317    #
2318    #    Deprecated.  Use <name> instead.
2319
2320
2321    # 2.34.  <ol>
2322    #
2323    #    An ordered list.  The labels on the items will be either a number or
2324    #    a letter, depending on the value of the style attribute.
2325    #
2326    #    This element appears as a child element of <abstract> (Section 2.1),
2327    #    <aside> (Section 2.6), <blockquote> (Section 2.10), <dd>
2328    #    (Section 2.18), <li> (Section 2.29), <note> (Section 2.33), <section>
2329    #    (Section 2.46), <td> (Section 2.56), and <th> (Section 2.58).
2330    #
2331    #    Content model:
2332    #
2333    #    One or more <li> elements (Section 2.29)
2334    #
2335    # 2.34.1.  "anchor" Attribute
2336    #
2337    #    Document-wide unique identifier for the list.
2338    #
2339    # 2.34.2.  "group" Attribute
2340    #
2341    #    When the prep tool sees an <ol> element with a "group" attribute that
2342    #    has already been seen, it continues the numbering of the list from
2343    #    where the previous list with the same group name left off.  If an
2344    #    <ol> element has both a "group" attribute and a "start" attribute,
2345    #    the group's numbering is reset to the given start value.
2346    #
2347    # 2.34.3.  "spacing" Attribute
2348    #
2349    #    Defines whether or not there is a blank line between entries.
2350    #    spacing="normal" indicates a single blank line, while
2351    #    spacing="compact" indicates no space between.
2352    #
2353    #    Allowed values:
2354    #
2355    #    o  "normal" (default)
2356    #
2357    #    o  "compact"
2358    #
2359    # 2.34.4.  "start" Attribute
2360    #
2361    #    The ordinal value at which to start the list.  This defaults to "1"
2362    #    and must be an integer of 0 or greater.
2363    #
2364    # 2.34.5.  "type" Attribute
2365    #
2366    #    The type of the labels on list items.  If the length of the type
2367    #    value is 1, the meaning is the same as it is for HTML:
2368    #
2369    #    a  Lowercase letters (a, b, c, ...)
2370    #
2371    #    A  Uppercase letters (A, B, C, ...)
2372    #
2373    #    1  Decimal numbers (1, 2, 3, ...)
2374    #
2375    #    i  Lowercase Roman numerals (i, ii, iii, ...)
2376    #
2377    #    I  Uppercase Roman numerals (I, II, III, ...)
2378    #
2379    #    For types "a" and "A", after the 26th entry, the numbering starts at
2380    #    "aa"/"AA", then "ab"/"AB", and so on.
2381    #
2382    #    If the length of the type value is greater than 1, the value must
2383    #    contain a percent-encoded indicator and other text.  The value is a
2384    #    free-form text that allows counter values to be inserted using a
2385    #    "percent-letter" format.  For instance, "[REQ%d]" generates labels of
2386    #    the form "[REQ1]", where "%d" inserts the item number as a decimal
2387    #    number.
2388    #
2389    #    The following formats are supported:
2390    #
2391    #    %c Lowercase letters (a, b, c, ...)
2392    #
2393    #    %C Uppercase letters (A, B, C, ...)
2394    #
2395    #    %d Decimal numbers (1, 2, 3, ...)
2396    #
2397    #    %i Lowercase Roman numerals (i, ii, iii, ...)
2398    #
2399    #    %I Uppercase Roman numerals (I, II, III, ...)
2400    #
2401    #    %% Represents a percent sign
2402    #
2403    #    Other formats are reserved for future use.  Only one percent encoding
2404    #    other than "%%" is allowed in a type string.
2405    #
2406    #    It is an error for the type string to be empty.  For bulleted lists,
2407    #    use the <ul> element.  For lists that have neither bullets nor
2408    #    numbers, use the <ul> element with the 'empty="true"' attribute.
2409    #
2410    #    If no type attribute is given, the default type is the same as
2411    #    "type='%d.'".
2412    def render_ol(self, e, width, **kwargs):
2413        # setup and validation
2414        start = e.get('start')
2415        if not start.isdigit():
2416            self.warn(e, "Expected a numeric value for the 'start' attribute, but found %s" % (etree.tostring(e), ))
2417            start = '1'
2418        e._counter = int(start)
2419        #
2420        type = e.get('type')
2421        if not type:
2422            self.warn(e, "Expected the 'type' attribute to have a string value, but found %s" % (etree.tostring(e), ))
2423            type = '1'
2424        if '%p' in type:
2425            for p in e.iterancestors('li'):
2426                pcounter = p.get('derivedCounter')
2427                type = type.replace('%p', pcounter )
2428                break
2429        e._type = type
2430        if len(type) > 1:
2431            formspec = re.search('%([cCdiIoOxX])', type)
2432            if formspec:
2433                fchar = formspec.group(1)
2434                fspec = formspec.group(0)
2435                e._format = type.replace(fspec, '%s')
2436            else:
2437                self.err(e, "Expected an <ol> format specification of '%%' followed by upper- or lower-case letter, of one of c,d,i,o,x; but found '%s'" % (type, ))
2438                fchar = 'd'
2439                e._format = '%s'
2440        else:
2441            fchar = type
2442            e._format = '%s.'
2443        e._int2str = ol_style_formatter[fchar]
2444        e._initial_text = self.get_ol_li_initial_text
2445        #
2446        compact = e.get('spacing') == 'compact'
2447        ljoin  = '\n' if compact else '\n\n'
2448        #
2449        adaptive_indent = len(e._format % (' '*num_width(fchar, len(list(e))))) + len('  ')
2450        indent_attrib = e.get('indent') or '3'
2451        indent = int(indent_attrib) if indent_attrib.isdigit() else adaptive_indent
2452        e._padding = indent
2453        kwargs['joiners'].update({
2454            None:   Joiner(ljoin, indent, 0, False, False),
2455            'li':   Joiner(ljoin, 0, 0, False, False),
2456            't':    Joiner(ljoin, indent, 0, False, False),
2457        })
2458        # rendering
2459        lines = []
2460        for c in e.getchildren():
2461            lines = self.ljoin(lines, c, width, **kwargs)
2462        return lines
2463
2464    # 2.35.  <organization>
2465    #
2466    #    Specifies the affiliation [RFC7322] of an author.
2467    #
2468    #    This information appears both in the "Author's Address" section and
2469    #    on the front page (see [RFC7322] for more information).  If the value
2470    #    is long, an abbreviated variant can be specified in the "abbrev"
2471    #    attribute.
2472    #
2473    #    This element appears as a child element of <author> (Section 2.7).
2474    #
2475    #    Content model: only text content.
2476    #
2477    # 2.35.1.  "abbrev" Attribute
2478    #
2479    #    Abbreviated variant.
2480    #
2481    # 2.35.2.  "ascii" Attribute
2482    #
2483    #    The ASCII equivalent of the organization's name.
2484    def render_front_organization(self, e, **kwargs):
2485        author = e.getparent()
2486        org, ascii = short_org_name_set(author)
2487        if ascii:
2488            org += ' (%s)' % ascii.strip(stripspace)
2489        return org
2490
2491    def render_organization(self, e, width, **kwargs):
2492        text = ''
2493        if e != None:
2494            org = e.text or ''
2495            org = org.strip(stripspace)
2496            if org and not is_script(org, 'Latin'):
2497                ascii = e.get('ascii')
2498                if ascii and ascii != org:
2499                    org += ' (%s)' % ascii.strip(stripspace)
2500            text = fill(org, width=width, **kwargs)
2501        return text
2502
2503    # 2.36.  <phone>
2504    #
2505    #    Represents a phone number.
2506    #
2507    #    The value is expected to be the scheme-specific part of a "tel" URI
2508    #    (and so does not include the prefix "tel:"), using the
2509    #    "global-number-digits" syntax.  See Section 3 of [RFC3966] for
2510    #    details.
2511    #
2512    #    This element appears as a child element of <address> (Section 2.2).
2513    #
2514    #    Content model: only text content.
2515    def render_phone(self, e, width, **kwargs):
2516        latin = kwargs.pop('latin', None)
2517        text = fill("Phone: %s"%e.text, width=width, **kwargs) if e.text and latin!=False else ''
2518        return text
2519
2520    # 2.37.  <postal>
2521    #
2522    #    Contains optional child elements providing postal information.  These
2523    #    elements will be displayed in an order that is specific to
2524    #    formatters.  A postal address can contain only a set of <street>,
2525    #    <city>, <region>, <code>, and <country> elements, or only an ordered
2526    #    set of <postalLine> elements, but not both.
2527    #
2528    #    This element appears as a child element of <address> (Section 2.2).
2529    #
2530    #    Content model:
2531    #
2532    #    Either:
2533    #
2534    #       In any order:
2535    #
2536    #       *  <city> elements (Section 2.13)
2537    #
2538    #       *  <code> elements (Section 2.14)
2539    #
2540    #       *  <country> elements (Section 2.15)
2541    #
2542    #       *  <region> elements (Section 2.43)
2543    #
2544    #       *  <street> elements (Section 2.49)
2545    #
2546    #    Or:
2547    #
2548    #       One or more <postalLine> elements (Section 2.38)
2549    def render_postal(self, e, width, **kwargs):
2550        latin = kwargs.pop('latin', False)
2551        adr = get_normalized_address_info(self, e, latin=latin)
2552        for k in adr:
2553            if isinstance(adr[k], list):
2554                adr[k] = '\n'.join(adr[k])
2555        set_joiners(kwargs, { None: Joiner('\n', 0, 0, False, False), })
2556        if adr:
2557            if all(is_script(v, 'Latin') for v in adr.values() if v):
2558                latin = True
2559            try:
2560                text = format_address(adr, latin=latin)
2561                text = text.strip(stripspace)+'\n\n'
2562                return mklines(text, e)
2563            except:
2564                debug.pprint('adr')
2565                raise
2566        else:
2567            author = e.getparent().getparent()
2568            text = self.render_author_name(author, width, **kwargs)
2569            if e.find('./postalLine') != None:
2570                lines = []
2571                for c in e.getchildren():
2572                    lines = self.ljoin(lines, c, width, **kwargs)
2573            else:
2574                lines = []
2575                for street in e.findall('street'):
2576                    if street.text:
2577                        lines.append(Line(street.text, street))
2578                cityline = []
2579                city = e.find('city')
2580                if city is not None and city.text:
2581                    cityline.append(city.text)
2582                region = e.find('region')
2583                if region is not None and region.text:
2584                    if len(cityline) > 0: cityline.append(', ');
2585                    cityline.append(region.text)
2586                code = e.find('code')
2587                if code is not None and code.text:
2588                    if len(cityline) > 0: cityline.append('  ');
2589                    cityline.append(code.text)
2590                if len(cityline) > 0:
2591                    lines.append(Line(''.join(cityline), e))
2592                country = e.find('country')
2593                if country is not None and country.text:
2594                    lines.append(Line(country.text, country))
2595            lines.append( Line('', e) )
2596            return lines
2597
2598    # 2.38.  <postalLine>
2599    #
2600    #    Represents one line of a postal address.  When more than one
2601    #    <postalLine> is given, the prep tool emits them in the order given.
2602    #
2603    #    This element appears as a child element of <postal> (Section 2.37).
2604    #
2605    #    Content model: only text content.
2606    #
2607    # 2.38.1.  "ascii" Attribute
2608    #
2609    #    The ASCII equivalent of the text in the address line.
2610    def render_postalline(self, e, width, **kwargs):
2611        text = fill(self.inner_text_renderer(e), width=width, **kwargs)
2612        return text
2613
2614    # 2.39.  <refcontent>
2615    #
2616    #    Text that should appear between the title and the date of a
2617    #    reference.  The purpose of this element is to prevent the need to
2618    #    abuse <seriesInfo> to get such text in a reference.
2619    #
2620    #    For example:
2621    #
2622    #    <reference anchor="April1">
2623    #      <front>
2624    #        <title>On Being A Fool</title>
2625    #        <author initials="K." surname="Phunny" fullname="Knot Phunny"/>
2626    #        <date year="2000" month="April"/>
2627    #      </front>
2628    #      <refcontent>Self-published pamphlet</refcontent>
2629    #    </reference>
2630    #
2631    #    would render as:
2632    #
2633    #       [April1]     Phunny, K., "On Being A Fool", Self-published
2634    #                    pamphlet, April 2000.
2635    #
2636    #    This element appears as a child element of <reference>
2637    #    (Section 2.40).
2638    #
2639    #    Content model:
2640    #
2641    #    In any order:
2642    #
2643    #    o  Text
2644    #
2645    #    o  <bcp14> elements (Section 2.9)
2646    #
2647    #    o  <em> elements (Section 2.22)
2648    #
2649    #    o  <strong> elements (Section 2.50)
2650    #
2651    #    o  <sub> elements (Section 2.51)
2652    #
2653    #    o  <sup> elements (Section 2.52)
2654    #
2655    #    o  <tt> elements (Section 2.62)
2656    def render_refcontent(self, e, width, **kwargs):
2657        text = self.inner_text_renderer(e)
2658        return text
2659
2660    # 2.40.  <reference>
2661    #
2662    #    Represents a bibliographic reference.
2663    #
2664    #    This element appears as a child element of <referencegroup>
2665    #    (Section 2.41) and <references> (Section 2.42).
2666    #
2667    #    Content model:
2668    #
2669    #    In this order:
2670    #
2671    #    1.  One <front> element (Section 2.26)
2672    #
2673    #    2.  In any order:
2674    #
2675    #        *  <annotation> elements (Section 2.3)
2676    #
2677    #        *  <format> elements (Section 3.3)
2678    #
2679    #        *  <refcontent> elements (Section 2.39)
2680    #
2681    #        *  <seriesInfo> elements (Section 2.47; deprecated in this
2682    #           context)
2683    #
2684    # 2.40.1.  "anchor" Attribute (Mandatory)
2685    #
2686    #    Document-wide unique identifier for this reference.  Usually, this
2687    #    will be used both to "label" the reference in the "References"
2688    #    section and as an identifier in links to this reference entry.
2689    #
2690    # 2.40.2.  "quoteTitle" Attribute
2691    #
2692    #    Specifies whether or not the title in the reference should be quoted.
2693    #    This can be used to prevent quoting, such as on errata.
2694    #
2695    #    Allowed values:
2696    #
2697    #    o  "true" (default)
2698    #
2699    #    o  "false"
2700    #
2701    # 2.40.3.  "target" Attribute
2702    #
2703    #    Holds the URI for the reference.
2704    def render_reference(self, e, width, **kwargs):
2705        # rendering order: authors, title, seriesInfo, date, target, annotation
2706        p = e.getparent()
2707        if p.tag == 'referencegroup':
2708            label = ''
2709        else:
2710            label = self.refname_mapping[e.get('anchor')]
2711            label = ('[%s]' % label).ljust(11)
2712        # ensure the desired ordering
2713        elements = []
2714        for ctag in ('title', 'refcontent', 'stream', 'seriesInfo', 'date',):
2715            for c in e.iterdescendants(ctag):
2716                if p.tag == 'referencegroup' and c.tag == 'seriesInfo' and c.get('name') == 'DOI':
2717                    # Don't render DOI within a reference group
2718                    continue
2719                elements.append(c)
2720        if p.tag != 'referencegroup':
2721            target = e.get('target')
2722            if target:
2723                url = self.element('refcontent')
2724                url.text = '<%s>' % target
2725                elements.append(url)
2726        set_joiners(kwargs, {
2727            None:           Joiner(', ', 0, 0, False, False),
2728            'annotation':   Joiner('  ', 0, 0, False, False),
2729        })
2730        width = width-11
2731        text = self.render_authors(e, width, **kwargs)
2732        for c in elements:
2733            text = self.tjoin(text, c, width, **kwargs)
2734        text += '.'
2735        for ctag in ('annotation', ):
2736            for c in e.iterdescendants(ctag):
2737                text = self.tjoin(text, c, width, **kwargs)
2738        text = fill(text, width=width, fix_sentence_endings=False, keep_url=True, **kwargs).lstrip(stripspace)
2739
2740        text = indent(text, 11, 0)
2741        if p.tag != 'referencegroup':
2742            if len(label.strip(stripspace)) > 10:
2743                label += '\n'
2744            else:
2745                text = text.lstrip(stripspace)
2746        text = label + text
2747        lines = mklines(text, e)
2748        return lines
2749
2750
2751
2752    # 2.41.  <referencegroup>
2753    #
2754    #    Represents a list of bibliographic references that will be
2755    #    represented as a single reference.  This is most often used to
2756    #    reference STDs and BCPs, where a single reference (such as "BCP 9")
2757    #    may encompass more than one RFC.
2758    #
2759    #    This element appears as a child element of <references>
2760    #    (Section 2.42).
2761    #
2762    #    Content model:
2763    #
2764    #    One or more <reference> elements (Section 2.40)
2765    #
2766    # 2.41.1.  "anchor" Attribute (Mandatory)
2767    #
2768    #    Document-wide unique identifier for this reference group.  Usually,
2769    #    this will be used both to "label" the reference group in the
2770    #    "References" section and as an identifier in links to this reference
2771    #    entry.
2772    def render_referencegroup(self, e, width, **kwargs):
2773        kwargs['joiners'].update({
2774            'reference':    Joiner('\n\n', 0, 0, False, False),
2775            't':            Joiner('\n\n', 11, 0, False, False),
2776        })
2777        label = self.refname_mapping[e.get('anchor')]
2778        label = ('[%s]' % label).ljust(11)
2779        lines = []
2780        for c in e.getchildren():
2781            lines = self.ljoin(lines, c, width, **kwargs)
2782        target = e.get('target')
2783        if target:
2784            t = self.element('t')
2785            t.text = '<%s>' % target
2786            lines = self.ljoin(lines, t, width, **kwargs)
2787        if len(label.strip(stripspace)) > 10:
2788            lines = [ Line(label, e) ] + lines
2789        else:
2790            lines[0].text = label + lines[0].text.lstrip(stripspace)
2791        return lines
2792
2793    # 2.42.  <references>
2794    #
2795    #    Contains a set of bibliographic references.
2796    #
2797    #    In the early days of the RFC Series, there was only one "References"
2798    #    section per RFC.  This convention was later changed to group
2799    #    references into two sets, "Normative" and "Informative", as described
2800    #    in [RFC7322].  This vocabulary supports the split with the <name>
2801    #    child element.  In general, the title should be either "Normative
2802    #    References" or "Informative References".
2803    #
2804    #    This element appears as a child element of <back> (Section 2.8).
2805    #
2806    #    Content model:
2807    #
2808    #    In this order:
2809    #
2810    #    1.  One optional <name> element (Section 2.32)
2811    #
2812    #    2.  In any order:
2813    #
2814    #        *  <reference> elements (Section 2.40)
2815    #
2816    #        *  <referencegroup> elements (Section 2.41)
2817    #
2818    # 2.42.1.  "anchor" Attribute
2819    #
2820    #    An optional user-supplied identifier for this set of references.
2821    #
2822    # 2.42.2.  "title" Attribute
2823    #
2824    #    Deprecated.  Use <name> instead.
2825    def render_references(self, e, width, **kwargs):
2826        self.part = e.tag
2827        kwargs['joiners'].update({
2828            None:           Joiner('\n\n', 3, 0, False, False),
2829            'name':         Joiner('  '  , 0, 0, False, False),
2830            'reference':    Joiner('\n\n', 3, 0, False, False),
2831            'references':   Joiner('\n\n', 0, 0, False, False),
2832        })
2833        lines = []
2834        if e.find('name') != None:
2835            if self.check_refs_numbered():
2836                pn = e.get('pn')
2837                text = pn.split('-',1)[1].replace('-', ' ').title() +'.'
2838            else:
2839                text = ''
2840            lines += mklines(self.tjoin(text, e[0], width, **kwargs), e)
2841        for c in e[1:]:
2842            lines = self.ljoin(lines, c, width, **kwargs)
2843        return lines
2844
2845
2846    # 2.43.  <region>
2847    #
2848    #    Provides the region name in a postal address.
2849    #
2850    #    This element appears as a child element of <postal> (Section 2.37).
2851    #
2852    #    Content model: only text content.
2853    #
2854    # 2.43.1.  "ascii" Attribute
2855    #
2856    #    The ASCII equivalent of the region name.
2857    render_region = null_renderer       # handled in render_address
2858
2859    # 2.44.  <relref>
2860    #
2861    #    Represents a link to a specific part of a document that appears in a
2862    #    <reference> element.  Formatters that have links (such as HTML and
2863    #    PDF) render <relref> elements as external hyperlinks to the specified
2864    #    part of the reference, creating the link target by combining the base
2865    #    URI from the <reference> element with the "relative" attribute from
2866    #    this element.  The "target" attribute is required, and it must be the
2867    #    anchor of a <reference> element.
2868    #
2869    #    The "section" attribute is required, and the "relative" attribute is
2870    #    optional.  If the reference is not an RFC or Internet-Draft that is
2871    #    in the v3 format, the element needs to have a "relative" attribute;
2872    #    in this case, the value of the "section" attribute is ignored.
2873    #
2874    #    An example of the <relref> element with text content might be:
2875    #
2876    #          See
2877    #          <relref section="2.3" target="RFC9999" displayFormat="bare">
2878    #          the protocol overview</relref>
2879    #          for more information.
2880    #
2881    #    An HTML formatter might generate:
2882    #
2883    #          See
2884    #          <a href="http://www.rfc-editor.org/rfc/rfc9999.html#s-2.3">
2885    #          the protocol overview</a>
2886    #          for more information.
2887    #
2888    #    Note that the URL in the above example might be different when the
2889    #    RFC Editor deploys the v3 format.
2890    #
2891    #    This element appears as a child element of <annotation>
2892    #    (Section 2.3), <blockquote> (Section 2.10), <cref> (Section 2.16),
2893    #    <dd> (Section 2.18), <dt> (Section 2.21), <em> (Section 2.22), <li>
2894    #    (Section 2.29), <name> (Section 2.32), <preamble> (Section 3.6),
2895    #    <strong> (Section 2.50), <sub> (Section 2.51), <sup> (Section 2.52),
2896    #    <t> (Section 2.53), <td> (Section 2.56), <th> (Section 2.58), and
2897    #    <tt> (Section 2.62).
2898    #
2899    #    Content model: only text content.
2900    #
2901    # 2.44.1.  "displayFormat" Attribute
2902    #
2903    #    This attribute is used to signal formatters what the desired format
2904    #    of the relative reference should be.  Formatters for document types
2905    #    that have linking capability should wrap each part of the displayed
2906    #    text in hyperlinks.  If there is content in the <relref> element,
2907    #    formatters will ignore the value of this attribute.
2908    #
2909    #    "of"
2910    #
2911    #       A formatter should display the relative reference as the word
2912    #       "Section" followed by a space, the contents of the "section"
2913    #       attribute followed by a space, the word "of", another space, and
2914    #       the value from the "target" attribute enclosed in square brackets.
2915    #
2916    #       For example, with an input of:
2917    #
2918    #          See
2919    #          <relref section="2.3" target="RFC9999" displayFormat="of"/>
2920    #          for an overview.
2921    #
2922    #       An HTML formatter might generate:
2923    #
2924    #          See
2925    #          <a href="http://www.rfc-editor.org/info/rfc9999#s-2.3">
2926    #          Section 2.3</a> of
2927    #          [<a href="#RFC9999">RFC9999</a>]
2928    #          for an overview.
2929    #
2930    #       Note that "displayFormat='of'" is the default for <relref>, so it
2931    #       does not need to be given in a <relref> element if that format is
2932    #       desired.
2933    #
2934    #    "comma"
2935    #
2936    #       A formatter should display the relative reference as the value
2937    #       from the "target" attribute enclosed in square brackets, a comma,
2938    #       a space, the word "Section" followed by a space, and the "section"
2939    #       attribute.
2940    #
2941    #       For example, with an input of:
2942    #
2943    #          See
2944    #          <relref section="2.3" target="RFC9999" displayFormat="comma"/>,
2945    #          for an overview.
2946    #
2947    #       An HTML formatter might generate:
2948    #
2949    #          See
2950    #          [<a href="#RFC9999">RFC9999</a>],
2951    #          <a href="http://www.rfc-editor.org/info/rfc9999#s-2.3">
2952    #          Section 2.3</a>, for an overview.
2953    #
2954    #    "parens"
2955    #
2956    #       A formatter should display the relative reference as the value
2957    #       from the "target" attribute enclosed in square brackets, a space,
2958    #       a left parenthesis, the word "Section" followed by a space, the
2959    #       "section" attribute, and a right parenthesis.
2960    #
2961    #       For example, with an input of:
2962    #
2963    #          See
2964    #          <relref section="2.3" target="RFC9999" displayFormat="parens"/>
2965    #          for an overview.
2966    #
2967    #       An HTML formatter might generate:
2968    #
2969    #          See
2970    #          [<a href="#RFC9999">RFC9999</a>]
2971    #          (<a href="http://www.rfc-editor.org/info/rfc9999#s-2.3">
2972    #          Section 2.3</a>)
2973    #          for an overview.
2974    #
2975    #    "bare"
2976    #
2977    #       A formatter should display the relative reference as the contents
2978    #       of the "section" attribute and nothing else.  This is useful when
2979    #       there are multiple relative references to a single base reference.
2980    #
2981    #       For example:
2982    #
2983    #          See Sections
2984    #          <relref section="2.3" target="RFC9999" displayFormat="bare"/>
2985    #          and
2986    #          <relref section="2.4" target="RFC9999" displayFormat="of"/>
2987    #          for an overview.
2988    #
2989    #       An HTML formatter might generate:
2990    #
2991    #          See Sections
2992    #          <a href="http://www.rfc-editor.org/info/rfc9999#s-2.3">
2993    #          2.3</a>
2994    #          and
2995    #          <a href="http://www.rfc-editor.org/info/rfc9999#s-2.4">
2996    #          Section 2.4</a> of
2997    #          [<a href="#RFC9999">RFC9999</a>]
2998    #          for an overview.
2999    #
3000    #    Allowed values:
3001    #
3002    #    o  "of" (default)
3003    #
3004    #    o  "comma"
3005    #
3006    #    o  "parens"
3007    #
3008    #    o  "bare"
3009    #
3010    # 2.44.2.  "relative" Attribute
3011    #
3012    #    Specifies a relative reference from the URI in the target reference.
3013    #    This value must include whatever leading character is needed to
3014    #    create the relative reference; typically, this is "#" for HTML
3015    #    documents.
3016    #
3017    # 2.44.3.  "section" Attribute (Mandatory)
3018    #
3019    #    Specifies a section of the target reference.  If the reference is not
3020    #    an RFC or Internet-Draft in the v3 format, it is an error.
3021    #
3022    # 2.44.4.  "target" Attribute (Mandatory)
3023    #
3024    #    The anchor of the reference for this element.  If this value is not
3025    #    an anchor to a <reference> or <referencegroup> element, it is an
3026    #    error.  If the reference at the target has no URI, it is an error.
3027    def render_relref(self, e, width, **kwargs):
3028        return self.render_xref(e, width, **kwargs)
3029
3030    # 2.45.  <rfc>
3031    #
3032    #    This is the root element of the xml2rfc vocabulary.
3033    #
3034    #    Content model:
3035    #
3036    #    In this order:
3037    #
3038    #    1.  Optional <link> elements (Section 2.30)
3039    #
3040    #    2.  One <front> element (Section 2.26)
3041    #
3042    #    3.  One <middle> element (Section 2.31)
3043    #
3044    #    4.  One optional <back> element (Section 2.8)
3045    def render_rfc(self, e, width, **kwargs):
3046        self.part = e.tag
3047        lines = []
3048        for c in e.getchildren():
3049            if c.tag in (etree.PI, etree.Comment):
3050                continue
3051            self.part = c.tag
3052            lines = self.ljoin(lines, c, width, **kwargs)
3053        return lines
3054
3055    # 2.45.1.  "category" Attribute
3056    #
3057    #    Deprecated; instead, use the "name" attribute in <seriesInfo>.
3058    #
3059    # 2.45.2.  "consensus" Attribute
3060    #
3061    #    Affects the generated boilerplate.  Note that the values of "no" and
3062    #    "yes" are deprecated and are replaced by "false" (the default) and
3063    #    "true".
3064    #
3065    #    See [RFC7841] for more information.
3066    #
3067    #    Allowed values:
3068    #
3069    #    o  "no"
3070    #
3071    #    o  "yes"
3072    #
3073    #    o  "false" (default)
3074    #
3075    #    o  "true"
3076    #
3077    # 2.45.3.  "docName" Attribute
3078    #
3079    #    Deprecated; instead, use the "value" attribute in <seriesInfo>.
3080    #
3081    # 2.45.4.  "indexInclude" Attribute
3082    #
3083    #    Specifies whether or not a formatter is requested to include an index
3084    #    in generated files.  If the source file has no <iref> elements, an
3085    #    index is never generated.  This option is useful for generating
3086    #    documents where the source document has <iref> elements but the
3087    #    author no longer wants an index.
3088    #
3089    #    Allowed values:
3090    #
3091    #    o  "true" (default)
3092    #
3093    #    o  "false"
3094    #
3095    # 2.45.5.  "ipr" Attribute
3096    #
3097    #    Represents the Intellectual Property status of the document.  See
3098    #    Appendix A.1 for details.
3099    #
3100    # 2.45.6.  "iprExtract" Attribute
3101    #
3102    #    Identifies a single section within the document for which extraction
3103    #    "as is" is explicitly allowed (only relevant for historic values of
3104    #    the "ipr" attribute).
3105    #
3106    # 2.45.7.  "number" Attribute
3107    #
3108    #    Deprecated; instead, use the "value" attribute in <seriesInfo>.
3109    #
3110    # 2.45.8.  "obsoletes" Attribute
3111    #
3112    #    A comma-separated list of RFC numbers or Internet-Draft names.
3113    #
3114    #    The prep tool will parse the attribute value so that incorrect
3115    #    references can be detected.
3116    #
3117    # 2.45.9.  "prepTime" Attribute
3118    #
3119    #    The date that the XML was processed by a prep tool.  This is included
3120    #    in the XML file just before it is saved to disk.  The value is
3121    #    formatted using the "date-time" format defined in Section 5.6 of
3122    #    [RFC3339].  The "time-offset" should be "Z".
3123    #
3124    # 2.45.10.  "seriesNo" Attribute
3125    #
3126    #    Deprecated; instead, use the "value" attribute in <seriesInfo>.
3127    #
3128    # 2.45.11.  "sortRefs" Attribute
3129    #
3130    #    Specifies whether or not the prep tool will sort the references in
3131    #    each reference section.
3132    #
3133    #    Allowed values:
3134    #
3135    #    o  "true"
3136    #
3137    #    o  "false" (default)
3138    #
3139    # 2.45.12.  "submissionType" Attribute
3140    #
3141    #    The document stream, as described in [RFC7841].  (The RFC Series
3142    #    Editor may change the list of allowed values in the future.)
3143    #
3144    #    Allowed values:
3145    #
3146    #    o  "IETF" (default)
3147    #
3148    #    o  "IAB"
3149    #
3150    #    o  "IRTF"
3151    #
3152    #    o  "independent"
3153    #
3154    # 2.45.13.  "symRefs" Attribute
3155    #
3156    #    Specifies whether or not a formatter is requested to use symbolic
3157    #    references (such as "[RFC2119]").  If the value for this is "false",
3158    #    the references come out as numbers (such as "[3]").
3159    #
3160    #    Allowed values:
3161    #
3162    #    o  "true" (default)
3163    #
3164    #    o  "false"
3165    #
3166    # 2.45.14.  "tocDepth" Attribute
3167    #
3168    #    Specifies the number of levels of headings that a formatter is
3169    #    requested to include in the table of contents; the default is "3".
3170    #
3171    # 2.45.15.  "tocInclude" Attribute
3172    #
3173    #    Specifies whether or not a formatter is requested to include a table
3174    #    of contents in generated files.
3175    #
3176    #    Allowed values:
3177    #
3178    #    o  "true" (default)
3179    #
3180    #    o  "false"
3181    #
3182    # 2.45.16.  "updates" Attribute
3183    #
3184    #    A comma-separated list of RFC numbers or Internet-Draft names.
3185    #
3186    #    The prep tool will parse the attribute value so that incorrect
3187    #    references can be detected.
3188    #
3189    # 2.45.17.  "version" Attribute
3190    #
3191    #    Specifies the version of xml2rfc syntax used in this document.  The
3192    #    only expected value (for now) is "3".
3193
3194
3195    # 2.46.  <section>
3196    #
3197    #    Represents a section (when inside a <middle> element) or an appendix
3198    #    (when inside a <back> element).
3199    #
3200    #    Subsections are created by nesting <section> elements inside
3201    #    <section> elements.  Sections are allowed to be empty.
3202    #
3203    #    This element appears as a child element of <back> (Section 2.8),
3204    #    <boilerplate> (Section 2.11), <middle> (Section 2.31), and <section>
3205    #    (Section 2.46).
3206    #
3207    #    Content model:
3208    #
3209    #    In this order:
3210    #
3211    #    1.  One optional <name> element (Section 2.32)
3212    #
3213    #    2.  In any order:
3214    #
3215    #        *  <artwork> elements (Section 2.5)
3216    #
3217    #        *  <aside> elements (Section 2.6)
3218    #
3219    #        *  <blockquote> elements (Section 2.10)
3220    #
3221    #        *  <dl> elements (Section 2.20)
3222    #
3223    #        *  <figure> elements (Section 2.25)
3224    #
3225    #        *  <iref> elements (Section 2.27)
3226    #
3227    #        *  <ol> elements (Section 2.34)
3228    #
3229    #        *  <sourcecode> elements (Section 2.48)
3230    #
3231    #        *  <t> elements (Section 2.53)
3232    #
3233    #        *  <table> elements (Section 2.54)
3234    #
3235    #        *  <texttable> elements (Section 3.8)
3236    #
3237    #        *  <ul> elements (Section 2.63)
3238    #
3239    #    3.  Optional <section> elements (Section 2.46)
3240    #
3241    # 2.46.1.  "anchor" Attribute
3242    #
3243    #    Document-wide unique identifier for this section.
3244    #
3245    # 2.46.2.  "numbered" Attribute
3246    #
3247    #    If set to "false", the formatter is requested to not display a
3248    #    section number.  The prep tool will verify that such a section is not
3249    #    followed by a numbered section in this part of the document and will
3250    #    verify that the section is a top-level section.
3251    #
3252    #    Allowed values:
3253    #
3254    #    o  "true" (default)
3255    #
3256    #    o  "false"
3257    #
3258    # 2.46.3.  "removeInRFC" Attribute
3259    #
3260    #    If set to "true", this note is marked in the prep tool with text
3261    #    indicating that it should be removed before the document is published
3262    #    as an RFC.  That text will be "This note is to be removed before
3263    #    publishing as an RFC."
3264    #
3265    #    Allowed values:
3266    #
3267    #    o  "true"
3268    #
3269    #    o  "false" (default)
3270    #
3271    # 2.46.4.  "title" Attribute
3272    #
3273    #    Deprecated.  Use <name> instead.
3274    #
3275    # 2.46.5.  "toc" Attribute
3276    #
3277    #    Indicates to a formatter whether or not the section is to be included
3278    #    in a table of contents, if such a table of contents is produced.
3279    #    This only takes effect if the level of the section would have
3280    #    appeared in the table of contents based on the "tocDepth" attribute
3281    #    of the <rfc> element, and of course only if the table of contents is
3282    #    being created based on the "tocInclude" attribute of the <rfc>
3283    #    element.  If this is set to "exclude", any section below this one
3284    #    will be excluded as well.  The "default" value indicates inclusion of
3285    #    the section if it would be included by the tocDepth attribute of the
3286    #    <rfc> element.
3287    #
3288    #    Allowed values:
3289    #
3290    #    o  "include"
3291    #
3292    #    o  "exclude"
3293    #
3294    #    o  "default" (default)
3295    def render_section(self, e, width, **kwargs):
3296        kwargs['joiners'].update({
3297            None:       Joiner('\n\n', 3, 0, False, False), # default
3298            't':        Joiner('\n\n', 3, 0, False, False),
3299            'name':     Joiner('  ', 0, 0, False, False),
3300            'iref':     Joiner('  ', 0, 0, False, False),
3301            'section':  Joiner('\n\n', 0, 0, False, False),
3302            'artset':   Joiner('\n\n', 0, 0, False, False),
3303            'artwork':  Joiner('\n\n', 3, 0, False, True),
3304            'sourcecode':  Joiner('\n\n', 3, 0, False, False),
3305        })
3306        text = ''
3307        pn = e.get('pn', 'unknown-unknown')
3308        if e.get('numbered') == 'true':
3309            text = pn.split('-',1)[1].replace('-', ' ').title() +'.'
3310            if text.startswith('Appendix'):
3311                text = text.replace('.', ' ', 1)
3312            kwargs['joiners'].update({
3313                'name':     Joiner('  ', len(text)+2, 0, False, False),
3314            })
3315        lines = []
3316        name = e.find('name')
3317        if name != None:
3318            lines += mklines(self.tjoin(text, name, width, **kwargs), e)
3319        for c in e.getchildren():
3320            if c.tag == 'name':
3321                continue
3322            lines = self.ljoin(lines, c, width, **kwargs)
3323        return lines
3324
3325    # 2.47.  <seriesInfo>
3326    #
3327    #    Specifies the document series in which this document appears, and
3328    #    also specifies an identifier within that series.
3329    #
3330    #    A processing tool determines whether it is working on an RFC or an
3331    #    Internet-Draft by inspecting the "name" attribute of a <seriesInfo>
3332    #    element inside the <front> element inside the <rfc> element, looking
3333    #    for "RFC" or "Internet-Draft".  (Specifying neither value in any of
3334    #    the <seriesInfo> elements can be useful for producing other types of
3335    #    documents but is out of scope for this specification.)
3336    #
3337    #    It is invalid to have multiple <seriesInfo> elements inside the same
3338    #    <front> element containing the same "name" value.  Some combinations
3339    #    of <seriesInfo> "name" attribute values make no sense, such as having
3340    #    both <seriesInfo name="rfc"/> and <seriesInfo name="Internet-Draft"/>
3341    #    in the same <front> element.
3342    #
3343    #    This element appears as a child element of <front> (Section 2.26) and
3344    #    <reference> (Section 2.40; deprecated in this context).
3345    #
3346    #    Content model: this element does not have any contents.
3347    #
3348    # 2.47.1.  "asciiName" Attribute
3349    #
3350    #    The ASCII equivalent of the name field.
3351    #
3352    # 2.47.2.  "asciiValue" Attribute
3353    #
3354    #    The ASCII equivalent of the value field.
3355    #
3356    # 2.47.3.  "name" Attribute (Mandatory)
3357    #
3358    #    The name of the series.  The currently known values are "RFC",
3359    #    "Internet-Draft", and "DOI".  The RFC Series Editor may change this
3360    #    list in the future.
3361    #
3362    #    Some of the values for "name" interact as follows:
3363    #
3364    #    o  If a <front> element contains a <seriesInfo> element with a name
3365    #       of "Internet-Draft", it can also have at most one additional
3366    #       <seriesInfo> element with a "status" attribute whose value is of
3367    #       "standard", "full-standard", "bcp", "fyi", "informational",
3368    #       "experimental", or "historic" to indicate the intended status of
3369    #       this Internet-Draft, if it were to be later published as an RFC.
3370    #       If such an additional <seriesInfo> element has one of those
3371    #       statuses, the name needs to be "".
3372    #
3373    #    o  If a <front> element contains a <seriesInfo> element with a name
3374    #       of "RFC", it can also have at most one additional <seriesInfo>
3375    #       element with a "status" attribute whose value is of
3376    #       "full-standard", "bcp", or "fyi" to indicate the current status of
3377    #       this RFC.  If such an additional <seriesInfo> element has one of
3378    #       those statuses, the "value" attribute for that name needs to be
3379    #       the number within that series.  That <front> element might also
3380    #       contain an additional <seriesInfo> element with the status of
3381    #       "info", "exp", or "historic" and a name of "" to indicate the
3382    #       status of the RFC.
3383    #
3384    #    o  A <front> element that has a <seriesInfo> element that has the
3385    #       name "Internet-Draft" cannot also have a <seriesInfo> element that
3386    #       has the name "RFC".
3387    #
3388    #    o  The <seriesInfo> element can contain the DOI for the referenced
3389    #       document.  This cannot be used when the <seriesInfo> element is an
3390    #       eventual child element of an <rfc> element -- only as an eventual
3391    #       child of a <reference> element.  The "value" attribute should use
3392    #       the form specified in [RFC7669].
3393    #
3394    # 2.47.4.  "status" Attribute
3395    #
3396    #    The status of this document.  The currently known values are
3397    #    "standard", "informational", "experimental", "bcp", "fyi", and
3398    #    "full-standard".  The RFC Series Editor may change this list in the
3399    #    future.
3400    #
3401    # 2.47.5.  "stream" Attribute
3402    #
3403    #    The stream (as described in [RFC7841]) that originated the document.
3404    #    (The RFC Series Editor may change this list in the future.)
3405    #
3406    #    Allowed values:
3407    #
3408    #    o  "IETF" (default)
3409    #
3410    #    o  "IAB"
3411    #
3412    #    o  "IRTF"
3413    #
3414    #    o  "independent"
3415    #
3416    # 2.47.6.  "value" Attribute (Mandatory)
3417    #
3418    #    The identifier within the series specified by the "name" attribute.
3419    #
3420    #    For BCPs, FYIs, RFCs, and STDs, this is the number within the series.
3421    #    For Internet-Drafts, it is the full draft name (ending with the
3422    #    two-digit version number).  For DOIs, the value is given, such as
3423    #    "10.17487/rfc1149", as described in [RFC7669].
3424    #
3425    #    The name in the value should be the document name without any file
3426    #    extension.  For Internet-Drafts, the value for this attribute should
3427    #    be "draft-ietf-somewg-someprotocol-07", not
3428    #    "draft-ietf-somewg-someprotocol-07.txt".
3429    def render_seriesinfo(self, e, width, **kwargs):
3430        name = e.get('name')
3431        value = e.get('value')
3432        if name == 'Internet-Draft':
3433            return name + ', ' + value
3434        else:
3435            return name + '\u00A0' + value.replace('/', '/' + '\uE060')
3436
3437    # 2.48.  <sourcecode>
3438    #
3439    #    This element allows the inclusion of source code into the document.
3440    #
3441    #    When rendered, source code is always shown in a monospace font.  When
3442    #    <sourcecode> is a child of <figure> or <section>, it provides full
3443    #    control of horizontal whitespace and line breaks.  When formatted, it
3444    #    is indented relative to the left margin of the enclosing element.  It
3445    #    is thus useful for source code and formal languages (such as ABNF
3446    #    [RFC5234] or the RNC notation used in this document).  (When
3447    #    <sourcecode> is a child of other elements, it flows with the text
3448    #    that surrounds it.)  Tab characters (U+0009) inside of this element
3449    #    are prohibited.
3450    #
3451    #    For artwork such as character-based art, diagrams of message layouts,
3452    #    and so on, use the <artwork> element instead.
3453    #
3454    #    Output formatters that do pagination should attempt to keep source
3455    #    code on a single page.  This is to prevent source code that is split
3456    #    across pages from looking like two separate pieces of code.
3457    #
3458    #    See Section 5 for a description of how to deal with issues of using
3459    #    "&" and "<" characters in source code.
3460    #
3461    #    This element appears as a child element of <blockquote>
3462    #    (Section 2.10), <dd> (Section 2.18), <figure> (Section 2.25), <li>
3463    #    (Section 2.29), <section> (Section 2.46), <td> (Section 2.56), and
3464    #    <th> (Section 2.58).
3465    #
3466    #    Content model: only text content.
3467    #
3468    # 2.48.1.  "anchor" Attribute
3469    #
3470    #    Document-wide unique identifier for this source code.
3471    #
3472    # 2.48.2.  "name" Attribute
3473    #
3474    #    A filename suitable for the contents (such as for extraction to a
3475    #    local file).  This attribute can be helpful for other kinds of tools
3476    #    (such as automated syntax checkers, which work by extracting the
3477    #    source code).  Note that the "name" attribute does not need to be
3478    #    unique for <artwork> elements in a document.  If multiple
3479    #    <sourcecode> elements have the same "name" attribute, a formatter
3480    #    might assume that the elements are all fragments of a single file,
3481    #    and such a formatter can collect those fragments for later
3482    #    processing.
3483    #
3484    # 2.48.3.  "src" Attribute
3485    #
3486    #    The URI reference of a source file [RFC3986].
3487    #
3488    #    It is an error to have both a "src" attribute and content in the
3489    #    <sourcecode> element.
3490    #
3491    # 2.48.4.  "type" Attribute
3492    #
3493    #    Specifies the type of the source code.  The value of this attribute
3494    #    is free text with certain values designated as preferred.
3495    #
3496    #    The preferred values for <sourcecode> types are:
3497    #
3498    #    o  abnf
3499    #
3500    #    o  asn.1
3501    #
3502    #    o  bash
3503    #
3504    #    o  c++
3505    #
3506    #    o  c
3507    #
3508    #    o  cbor
3509    #
3510    #    o  dtd
3511    #
3512    #    o  java
3513    #
3514    #    o  javascript
3515    #
3516    #    o  json
3517    #
3518    #    o  mib
3519    #
3520    #    o  perl
3521    #
3522    #    o  pseudocode
3523    #
3524    #    o  python
3525    #
3526    #    o  rnc
3527    #
3528    #    o  xml
3529    #
3530    #    o  yang
3531    #
3532    #    The RFC Series Editor will maintain a complete list of the preferred
3533    #    values on the RFC Editor web site, and that list is expected to be
3534    #    updated over time.  Thus, a consumer of v3 XML should not cause a
3535    #    failure when it encounters an unexpected type or no type is
3536    #    specified.
3537    def render_sourcecode(self, e, width, **kwargs):
3538        markers = e.get('markers')
3539        lines = self.render_artwork(e, width, **kwargs)
3540        if markers == 'true':
3541            text = '<CODE BEGINS>'
3542            file = e.get('name')
3543            marker_lines = [ Line(text, e) ]
3544            if file:
3545                filetext = 'file "%s"' % file
3546                if len(filetext) > (width - len(text)):
3547                    marker_lines += [ Line('  ' + filetext, e) ]
3548                else:
3549                    marker_lines = [ Line(text + ' ' + filetext, e) ]
3550            lines = marker_lines + lines + [ Line('<CODE ENDS>', e) ]
3551        return lines
3552
3553
3554    def render_stream(self, e, width, **kwargs):
3555        text = e.text
3556        return text
3557
3558
3559    # 2.49.  <street>
3560    #
3561    #    Provides a street address.
3562    #
3563    #    This element appears as a child element of <postal> (Section 2.37).
3564    #
3565    #    Content model: only text content.
3566    #
3567    # 2.49.1.  "ascii" Attribute
3568    #
3569    #    The ASCII equivalent of the street address.
3570    render_street = null_renderer       # handled in render_address
3571
3572    # 2.50.  <strong>
3573    #
3574    #    Indicates text that is semantically strong.  Text enclosed within
3575    #    this element will be displayed as bold after processing.  This
3576    #    element can be combined with other character formatting elements, and
3577    #    the formatting will be additive.
3578    def render_strong(self, e, width, **kwargs):
3579        text = '*%s*' % self.inner_text_renderer(e)
3580        text += e.tail or ''
3581        return text
3582
3583
3584    # 2.51.  <sub>
3585    #
3586    #    Causes the text to be displayed as subscript, approximately half a
3587    #    letter-height lower than normal text.  This element can be combined
3588    #    with other character formatting elements, and the formatting will be
3589    #    additive.
3590    def render_sub(self, e, width, **kwargs):
3591        text = '_(%s)' % self.inner_text_renderer(e)
3592        text += e.tail or ''
3593        return text
3594
3595
3596    # 2.52.  <sup>
3597    #
3598    #    Causes the text to be displayed as superscript, approximately half a
3599    #    letter-height higher than normal text.  This element can be combined
3600    #    with other character formatting elements, and the formatting will be
3601    #    additive.
3602    def render_sup(self, e, width, **kwargs):
3603        text = '^(%s)' % self.inner_text_renderer(e)
3604        text += e.tail or ''
3605        return text
3606
3607
3608    # 2.53.  <t>
3609    #
3610    #    Contains a paragraph of text.
3611    #
3612    #    This element appears as a child element of <abstract> (Section 2.1),
3613    #    <aside> (Section 2.6), <blockquote> (Section 2.10), <dd>
3614    #    (Section 2.18), <li> (Section 2.29), <list> (Section 3.4), <note>
3615    #    (Section 2.33), <section> (Section 2.46), <td> (Section 2.56), and
3616    #    <th> (Section 2.58).
3617    #
3618    #    Content model:
3619    #
3620    #    In any order:
3621    #
3622    #    o  Text
3623    #
3624    #    o  <bcp14> elements (Section 2.9)
3625    #
3626    #    o  <cref> elements (Section 2.16)
3627    #
3628    #    o  <em> elements (Section 2.22)
3629    #
3630    #    o  <eref> elements (Section 2.24)
3631    #
3632    #    o  <iref> elements (Section 2.27)
3633    #
3634    #    o  <list> elements (Section 3.4)
3635    #
3636    #    o  <relref> elements (Section 2.44)
3637    #
3638    #    o  <spanx> elements (Section 3.7)
3639    #
3640    #    o  <strong> elements (Section 2.50)
3641    #
3642    #    o  <sub> elements (Section 2.51)
3643    #
3644    #    o  <sup> elements (Section 2.52)
3645    #
3646    #    o  <tt> elements (Section 2.62)
3647    #
3648    #    o  <vspace> elements (Section 3.10)
3649    #
3650    #    o  <xref> elements (Section 2.66)
3651    #
3652    # 2.53.1.  "anchor" Attribute
3653    #
3654    #    Document-wide unique identifier for this paragraph.
3655    #
3656    # 2.53.2.  "hangText" Attribute
3657    #
3658    #    Deprecated.  Instead, use <dd> inside of a definition list (<dl>).
3659    #
3660    # 2.53.3.  "keepWithNext" Attribute
3661    #
3662    #    Acts as a hint to the output formatters that do pagination to do a
3663    #    best-effort attempt to keep the paragraph with the next element,
3664    #    whatever that happens to be.  For example, the HTML output @media
3665    #    print CSS ("CSS" refers to Cascading Style Sheets) might translate
3666    #    this to page-break-after: avoid.  For PDF, the paginator could
3667    #    attempt to keep the paragraph with the next element.  Note: this
3668    #    attribute is strictly a hint and not always actionable.
3669    #
3670    #    Allowed values:
3671    #
3672    #    o  "false" (default)
3673    #
3674    #    o  "true"
3675    #
3676    # 2.53.4.  "keepWithPrevious" Attribute
3677    #
3678    #    Acts as a hint to the output formatters that do pagination to do a
3679    #    best-effort attempt to keep the paragraph with the previous element,
3680    #    whatever that happens to be.  For example, the HTML output @media
3681    #    print CSS might translate this to page-break-before: avoid.  For PDF,
3682    #    the paginator could attempt to keep the paragraph with the previous
3683    #    element.  Note: this attribute is strictly a hint and not always
3684    #    actionable.
3685    #
3686    #    Allowed values:
3687    #
3688    #    o  "false" (default)
3689    #
3690    #    o  "true"
3691    def render_t(self, e, width, **kwargs):
3692        def rreplace(s, old, new, max):
3693            lst = s.rsplit(old, max)
3694            return new.join(lst)
3695        indent = e.get('indent', None) or '0'
3696        if indent:
3697            kwargs['indent'] = int(indent)
3698        text = self.inner_text_renderer(e)
3699        if kwargs.pop('fill', True):
3700            text = fill(text, width=width, **kwargs)
3701            lines = mklines(text, e)
3702        else:
3703            if isinstance(text, six.binary_type):
3704                text = text.decode('utf-8')
3705            lines = [ Line(text, e) ]
3706        return lines
3707
3708
3709    # 2.54.  <table>
3710    #
3711    #    Contains a table with a caption with the table number.  If the
3712    #    element contains a <name> element, the caption will also show that
3713    #    name.
3714    #
3715    #    Inside the <table> element is, optionally, a <thead> element to
3716    #    contain the rows that will be the table's heading and, optionally, a
3717    #    <tfoot> element to contain the rows of the table's footer.  If the
3718    #    XML is converted to a representation that has page breaks (such as
3719    #    PDFs or printed HTML), the header and footer are meant to appear on
3720    #    each page.
3721    #
3722    #    This element appears as a child element of <aside> (Section 2.6) and
3723    #    <section> (Section 2.46).
3724    #
3725    #    Content model:
3726    #
3727    #    In this order:
3728    #
3729    #    1.  One optional <name> element (Section 2.32)
3730    #
3731    #    2.  Optional <iref> elements (Section 2.27)
3732    #
3733    #    3.  One optional <thead> element (Section 2.59)
3734    #
3735    #    4.  One or more <tbody> elements (Section 2.55)
3736    #
3737    #    5.  One optional <tfoot> element (Section 2.57)
3738    #
3739    # 2.54.1.  "anchor" Attribute
3740    #
3741    #    Document-wide unique identifier for this table.
3742    def build_table(self, e, width, **kwargs):
3743        # variations on border characters for table styles
3744        style = self.get_relevant_pi(e, 'table_borders') or self.options.table_borders
3745        bchar_sets = {
3746                'full': { '=': '=',
3747                          '-': '-',
3748                          '+': '+',
3749                          '|': '|',},
3750                'light':{ '=': '-',
3751                          '-': None,
3752                          '+': '+',
3753                          '|': '|',},
3754                'min':  { '=': '-',
3755                          '-': None,
3756                          '+': ' ',
3757                          '|': ' ',},
3758            }
3759        bchar_sets['minimal'] = bchar_sets['min']
3760        bchar = bchar_sets[style]
3761        class Cell(object):
3762            type    = b'None'
3763            text    = None
3764            wrapped = []
3765            colspan = 1
3766            rowspan = 1
3767            width   = None
3768            minwidth= None
3769            height  = None
3770            element = None
3771            padding = 0
3772            foldable= True
3773            top     = ''
3774            bot     = ''
3775
3776        def show(cells, attr='', note=''):
3777            debug.say('')
3778            debug.say('%s %s:' % (attr, note))
3779            for i in range(len(cells)):
3780                row = [ (c.type[1], getattr(c, attr)) if attr else c for c in cells[i] ]
3781                debug.say(str(row).replace('\u2028', '\u00a4'))
3782
3783        def array(rows, cols, init):
3784            a = []
3785            for i in range(rows):
3786                a.append([])
3787                for j in range(cols):
3788                    if inspect.isclass(init):
3789                        a[i].append(init())
3790                    else:
3791                        a[i].append(init)
3792            return a
3793
3794        def intattr(e, name):
3795            attr = e.get(name)
3796            if attr.isdigit():
3797                attr = int(attr)
3798            else:
3799                attr = 1
3800            return attr
3801
3802        def get_dimensions(e):
3803            cols = 0
3804            rows = 0
3805            # Find the dimensions of the table
3806            for p in e.iterchildren(['thead', 'tbody', 'tfoot']):
3807                for r in p.iterchildren('tr'):
3808                    ccols = 0
3809                    crows = 0
3810                    extrarows = 0
3811                    for c in r.iterchildren('td', 'th'):
3812                        colspan = intattr(c, 'colspan')
3813                        ccols += colspan
3814                        rowspan = intattr(c, 'rowspan')
3815                        crows = max(crows, rowspan)
3816                    cols = max(cols, ccols)
3817                    extrarows = max(extrarows, crows)
3818                    extrarows -=1
3819                    rows += 1
3820            if extrarows > 0:
3821                rows += extrarows
3822            return rows, cols
3823
3824        def justify(cell, line, minpad):
3825            align = cell.element.get('align')
3826            if align == 'center':
3827                padding = 0
3828                width = cell.colwidth
3829            else:
3830                padding = cell.colwidth - textwidth(line)
3831                width = cell.colwidth - min(2, padding)
3832            if   align == 'left':
3833                text = line.ljust(width)
3834            elif align == 'center':
3835                text = line.center(width)
3836            elif align == 'right':
3837                text = line.rjust(width)
3838            if   align == 'right':
3839                if padding > 1:
3840                    text = text + ' ' if minpad > 1 else ' ' + text
3841                if padding > 0:
3842                    text = ' ' + text
3843            elif align == 'left':
3844                if padding > 1:
3845                    text = ' ' + text if minpad > 1 else text + ' '
3846                if padding > 0:
3847                    text = text + ' '
3848            else:
3849                pass
3850            return text
3851
3852        def merge_border(c, d):
3853            border = {
3854                '=': { '=':'=', '-':'=', '+':'+', },
3855                '-': { '=':'=', '-':'-', '+':'+', },
3856                '+': { '=':'+', '-':'+', '+':'+', '|':'+', },
3857                '|': { '+':'+', '|':'|', },
3858            }
3859            if c in border and d in border[c]:
3860                return border[c][d]
3861            return c
3862
3863        def build_line(cells, i, cols, next=True):
3864            def table(e):
3865                return list(e.iterancestors('table'))[0]
3866            line = ''
3867            e = cells[i][0].element
3868            for j in range(cols):
3869                k, l = cells[i][j].origin
3870                # skip colspan cells
3871                if k==i and l<j:
3872                    continue
3873                cell = cells[k][l]
3874                part = cell.wrapped[cell.m]
3875                if next:
3876                    cell.m += 1
3877                if line:
3878                    if bchar['|']:
3879                        line = line[:-1] + merge_border(line[-1], part[0]) + part[1:]
3880                    else:
3881                        line = line + part
3882                else:
3883                    line = part
3884            return Line(line, table(e))
3885
3886        def find_minwidths(e, cells, hyphen_split=False):
3887            """
3888            Find the minimum column widths of regular cells
3889            """
3890            i = 0
3891            splitter = utils.TextSplitter(width=67, hyphen_split=hyphen_split)
3892            for p in e.iterchildren(['thead', 'tbody', 'tfoot']):
3893                for r in list(p.iterchildren('tr')):
3894                    j = 0
3895                    for c in r.iterchildren('td', 'th'):
3896                        # skip over cells belonging to an earlier row or column
3897                        while j < len(cells[i]) and cells[i][j].element != c:
3898                            j += 1
3899                        #
3900                        cell = cells[i][j]
3901                        if cell.foldable:
3902                            cell.text = cell.text.strip(stripspace)
3903                            cell.minwidth = max([0]+[ len(word.strip(stripspace)) for word in splitter._split(cell.text) ]) if cell.text else 0
3904                        else:
3905                            cell.minwidth = max([0]+[ len(word.strip(stripspace)) for line in cell.text.splitlines() for word in splitter._split(line) ])
3906                    i += 1
3907
3908        def set_colwidths(cells, rows, cols):
3909            """
3910            Compute the adjusted cell widths; the same for all rows of each column
3911            """
3912            for j in range(cols):
3913                colmax = 0
3914                for i in range(rows):
3915                    cell = cells[i][j]
3916                    if cell.minwidth:
3917                        cw = cell.minwidth // cell.colspan
3918                        if cw > colmax:
3919                            colmax = cw
3920                for i in range(rows):
3921                    cells[i][j].colwidth = colmax
3922
3923        # ----------------------------------------------------------------------
3924        rows, cols = get_dimensions(e)
3925        cells = array(rows, cols, Cell)
3926
3927        # ----------------------------------------------------------------------
3928        # Iterate through tr and th/td elements, and annotate the cells array
3929        # with rowspan, colspan, and owning element and its origin
3930        i = 0
3931        for p in e.iterchildren(['thead', 'tbody', 'tfoot']):
3932            for r in list(p.iterchildren('tr')):
3933                j = 0
3934                for c in r.iterchildren('td', 'th'):
3935                    # skip over cells belonging to an earlier row or column
3936                    while j < len(cells[i]) and cells[i][j].element != None:
3937                        j += 1
3938                    #
3939                    cell = cells[i][j]
3940                    cell.colspan = intattr(c, 'colspan')
3941                    cell.rowspan = intattr(c, 'rowspan')
3942                    if len(c) == 1 and c[0].tag == 't':
3943                        cell.text, cell.foldable = self.text_or_block_renderer(c[0], width, fill=False, **kwargs) or ('', True)
3944                    else:
3945                        cell.text, cell.foldable = self.text_or_block_renderer(c, width, fill=False, **kwargs) or ('', True)
3946                    cell.text = mktextblock(cell.text)
3947                    if cell.foldable:
3948                        cell.text = cell.text.strip(stripspace)
3949                        cell.minwidth = max([0]+[ len(word) for word in splitter._split(cell.text) ]) if cell.text else 0
3950                    else:
3951                        cell.minwidth = max([0]+[ len(word) for line in cell.text.splitlines() for word in splitter._split(line) ])
3952                    cell.type = p.tag
3953                    if c.tag == 'th':
3954                        cell.top = bchar['=']
3955                        cell.bot = bchar['=']
3956                    else:
3957                        cell.top = bchar['-'] if not cell.top else cell.top
3958                        cell.bot = bchar['-'] if not cell.bot else cell.bot
3959                    for k in range(i, i+cell.rowspan):
3960                        for l in range(j, j+cell.colspan):
3961                            cells[k][l].element = c
3962                            cells[k][l].origin  = (i, j)
3963                i += 1
3964        # Ensure we have top and bottom borders
3965        for j in range(len(cells[0])):
3966            if hasattr(cells[0][j], 'origin'):
3967                k, l = cells[0][j].origin
3968                if not cells[k][l].top:
3969                    cells[k][l].top = bchar['=']
3970        for j in range(len(cells[-1])):
3971            if hasattr(cells[-1][j], 'origin'):
3972                k, l = cells[-1][j].origin
3973                if not cells[k][l].bot:
3974                    cells[k][l].bot = bchar['=']
3975            del k, l
3976        #show(cells, 'origin')
3977
3978        # ----------------------------------------------------------------------
3979        # Find the minimum column widths of regular cells, and total width
3980        # per row.
3981        find_minwidths(e, cells, hyphen_split=self.options.table_hyphen_breaks)
3982        #show(cells, 'minwidth')
3983        #debug.pprint('totwidth')
3984
3985        # ----------------------------------------------------------------------
3986        # Compute the adjusted cell widths; the same for all rows of each column
3987        set_colwidths(cells, rows, cols)
3988        reqwidth = sum([ c.colwidth for c in cells[0] ]) + cols + 1
3989        if reqwidth > width:
3990            # Try again, splitting cell content on hyphens this time
3991            find_minwidths(e, cells, hyphen_split=True)
3992            set_colwidths(cells, rows, cols)
3993        #show(cells, 'colwidth', 'after aligned cell widths')
3994
3995        # ----------------------------------------------------------------------
3996        # Add padding if possible. Pad widest first.
3997        reqwidth = sum([ c.colwidth for c in cells[0] ]) + (cols + 1)*len(bchar['|'])
3998        if reqwidth > width:
3999            self.warn(e, "Total table width (%s) exceeds available width (%s)" % (reqwidth, width))
4000        excess = width - reqwidth
4001        #
4002        if excess > 0:
4003            widths = [ (c.colwidth, ic) for ic, c in enumerate(cells[0]) ]
4004            widths.sort()
4005            widths.reverse()
4006            for j in [ k for w, k in widths ]:
4007                pad = min(2, excess)
4008                excess -= pad
4009                for i in range(rows):
4010                    cells[i][j].colwidth += pad
4011                    cells[i][j].padding   = pad
4012        #show(cells, 'colwidth', 'after padding')
4013
4014        # ----------------------------------------------------------------------
4015        # Set up initial cell.wrapped values
4016        for i in range(rows):
4017            for j in range(cols):
4018                cell = cells[i][j]
4019                if cell.text:
4020                    if cell.foldable:
4021                        cell.wrapped = fill(cell.text, width=cell.colwidth, fix_sentence_endings=True).splitlines()
4022                    else:
4023                        cell.wrapped = cell.text.splitlines()
4024
4025        # ----------------------------------------------------------------------
4026        # Make columns wider, if possible
4027        while excess > 0:
4028            maxpos = (None, None)
4029            maxrows = 0
4030            for i in range(rows):
4031                for j in range(cols):
4032                    cell = cells[i][j]
4033                    if hasattr(cell, 'origin'):
4034                        if cell.origin == (i,j):
4035                            w = sum([ cells[i][k].colwidth for k in range(j, j+cell.colspan)])+ cell.colspan-1 - cell.padding
4036                            r = cell.rowspan
4037                            # this is simplified, and doesn't always account for the
4038                            # extra line from the missing border line in a rowspan cell:
4039                            if cell.text:
4040                                if cell.foldable:
4041                                    cell.wrapped = fill(cell.text, width=w, fix_sentence_endings=True).splitlines()
4042                                else:
4043                                    cell.wrapped = [ l.text for l in self.text_or_block_renderer(cell.element, width=w, fill=True, **kwargs)[0] ]
4044                                cell.height = len(cell.wrapped)
4045                                if maxrows < cell.height and cell.height > 1:
4046                                    maxrows = cell.height
4047                                    maxpos = (i, j)
4048                    else:
4049                        self.die(e, "Inconsistent table width: Found different row lengths in this table")
4050
4051            # calculate a better width for the cell with the largest number
4052            # of text rows
4053            if maxpos != (None, None):
4054                i, j = maxpos
4055                cell = cells[i][j]
4056                w = sum([ cells[i][k].colwidth for k in range(j, j+cell.colspan)])+ cell.colspan-1 - cell.padding
4057                r = cell.rowspan
4058                h = cell.height
4059                for l in range(1, excess+1):
4060                    lines = fill(cell.text, width=w+l, fix_sentence_endings=True).splitlines()
4061                    if len(lines) < h:
4062                        cell.height = lines
4063                        excess -= l
4064                        c = h//r
4065                        for k in range(rows):
4066                            cells[k][j].colwidth += l
4067                        break
4068                else:
4069                    break
4070            else:
4071                break
4072
4073        #show(cells, 'colwidth', 'after widening wide cells and re-wrapping lines')
4074        #show(cells, 'height')
4075        #show(cells, 'origin')
4076
4077        # ----------------------------------------------------------------------
4078        # Normalize cell height and lines lists
4079        #show(cells, 'wrapped', 'before height normalization')
4080        #show(cells, 'rowspan', 'before height normalization')
4081        for i in range(rows):
4082            minspan = sys.maxsize
4083            for j in range(cols):
4084                cell = cells[i][j]
4085                k, l = cell.origin
4086                hspan = cell.rowspan+k-i if cell.rowspan else minspan
4087                if hspan > 0 and hspan < minspan:
4088                    minspan = hspan
4089            maxlines = 0
4090            for j in range(cols):
4091                cell = cells[i][j]
4092                k, l = cell.origin
4093                hspan = cell.rowspan+k-i if cell.rowspan else minspan
4094                lines = len(cell.wrapped) if cell.wrapped else 0
4095                if hspan == minspan and lines > maxlines:
4096                    maxlines = lines
4097            for j in range(cols):
4098                cells[i][j].lines = maxlines
4099
4100        # ----------------------------------------------------------------------
4101        # Calculate total height for rowspan cells
4102        for i in range(rows):
4103            for j in range(cols):
4104                cells[i][j].m = None
4105                cells[i][j].height = None
4106                k, l = cells[i][j].origin
4107                cell = cells[k][l]
4108                if cell.m is None:
4109                    cell.m = 0
4110                    cell.height = sum([ cells[n][l].lines for n in range(k, k+cell.rowspan)]) + cell.rowspan-1
4111
4112        # ----------------------------------------------------------------------
4113        # Calculate total width for colspan cells
4114        for i in range(rows):
4115            for j in range(cols):
4116                k, l = cells[i][j].origin
4117                cell = cells[k][l]
4118                if cell.origin == (i,j):
4119                    cell.colwidth = sum([ cells[i][n].colwidth for n in range(j, j+cell.colspan)]) + cell.colspan-1
4120
4121        # ----------------------------------------------------------------------
4122        # Calculate minimum padding per table column
4123        minpad = [width,]*cols
4124        for i in range(rows):
4125            for j in range(cols):
4126                cell = cells[i][j]
4127                if cell.origin == (i, j):
4128                    padding = min([width] + [(cell.colwidth - textwidth(line)) for line in cell.wrapped])
4129                    if padding < minpad[j]:
4130                        minpad[j] = padding
4131
4132        # ----------------------------------------------------------------------
4133        # Add cell borders
4134        x = bchar['+']
4135        l = bchar['|']
4136        for i in range(rows):
4137            for j in range(cols):
4138                cell = cells[i][j]
4139                if cell.origin == (i, j):
4140                    wrapped = (cell.wrapped + ['']*cell.height)[:cell.height]
4141                    lines = (  ([ x + cell.top*cell.colwidth + x ] if cell.top else [])
4142                             + ([ l + justify(cell, line, minpad[j]) + l for line in wrapped ])
4143                             + ([ x + cell.bot*cell.colwidth + x ] if cell.bot else []) )
4144                    cell.wrapped = lines
4145
4146        #show(cells, 'lines', 'before assembly')
4147        # ----------------------------------------------------------------------
4148        # Emit combined cell content, line by line
4149        lines = []
4150        prev_bottom_border_line = None
4151        for i in range(rows):
4152            # For each table row, render the top cell border (if any) and content.  The bottom
4153            # border will be merged with the next row's top border when processing that row.
4154            has_top_border = any( c.top for c in cells[i] if c.wrapped)
4155            has_bot_border = any( c.bot for c in cells[i] if c.wrapped)
4156            for n in range(min(len(c.wrapped) for c in cells[i] if c.wrapped)-int(has_bot_border) ):
4157                line = build_line(cells, i, cols)
4158                lines.append(line)
4159                if prev_bottom_border_line:
4160                    if has_top_border:
4161                        line = lines[-1]
4162                        lines[-1] = Line(''.join(merge_border(prev_bottom_border_line.text[c], line.text[c]) for c in range(len(line.text))), line.elem)
4163                    else:
4164                        line = lines[-1]
4165                        lines[-1] = prev_bottom_border_line
4166                        lines.append(line)
4167                prev_bottom_border_line = None
4168            # Get the next line, which will contain the bottom border for completed cells,
4169            # without incrementing the line count (we might have rowspan cells which might
4170            # not have been completely consumed yet):
4171            prev_bottom_border_line = build_line(cells, i, cols, next=False) if has_bot_border else None
4172        lines.append(prev_bottom_border_line)
4173        return lines
4174
4175    def render_table(self, e, width, **kwargs):
4176        kwargs['joiners'].update({
4177            'name':     Joiner(': ', 0, 0, False, False),
4178            'dl':       Joiner('\n\n', 0, 0, False, False),
4179            'ol':       Joiner('\n\n', 0, 0, False, False),
4180            't':        Joiner('\n\n', 0, 0, False, False),
4181            'ul':       Joiner('\n\n', 0, 0, False, False),
4182        })
4183        #
4184        pn = e.get('pn')
4185        num = pn.split('-')[1].capitalize()
4186        children = e.getchildren()
4187        title = "Table %s" % (num, )
4188        if len(children) and children[0].tag == 'name':
4189            name = children[0]
4190            children = children[1:]
4191            title = self.tjoin(title, name, width, **kwargs)
4192        lines = self.build_table(e, width, **kwargs)
4193        table_width = min([ width, max( len(l.text) for l in lines ) ])
4194        min_title_width = min([ 26, len(title) ])
4195        if table_width < min_title_width:
4196            table_width = min_title_width
4197            lines = align(lines, 'center', table_width)
4198        title = '\n'+center(title, table_width).rstrip(stripspace)
4199        lines += mklines(title, e)
4200        lines = align(lines, e.get('align', 'center'), width)
4201        return lines
4202
4203
4204    # 2.55.  <tbody>
4205    #
4206    #    A container for a set of body rows for a table.
4207    #
4208    #    This element appears as a child element of <table> (Section 2.54).
4209    #
4210    #    Content model:
4211    #
4212    #    One or more <tr> elements (Section 2.61)
4213    #
4214    # 2.55.1.  "anchor" Attribute
4215    #
4216    #    Document-wide unique identifier for the tbody.
4217    render_tbody = null_renderer        # handled in build_table
4218
4219    # 2.56.  <td>
4220    #
4221    #    A cell in a table row.
4222    #
4223    #    This element appears as a child element of <tr> (Section 2.61).
4224    #
4225    #    Content model:
4226    #
4227    #    Either:
4228    #
4229    #       In any order, but at least one of:
4230    #
4231    #       *  <artwork> elements (Section 2.5)
4232    #
4233    #       *  <dl> elements (Section 2.20)
4234    #
4235    #       *  <figure> elements (Section 2.25)
4236    #
4237    #       *  <ol> elements (Section 2.34)
4238    #
4239    #       *  <sourcecode> elements (Section 2.48)
4240    #
4241    #       *  <t> elements (Section 2.53)
4242    #
4243    #       *  <ul> elements (Section 2.63)
4244    #
4245    #    Or:
4246    #
4247    #       In any order:
4248    #
4249    #       *  Text
4250    #
4251    #       *  <bcp14> elements (Section 2.9)
4252    #
4253    #       *  <br> elements (Section 2.12)
4254    #
4255    #       *  <cref> elements (Section 2.16)
4256    #
4257    #       *  <em> elements (Section 2.22)
4258    #
4259    #       *  <eref> elements (Section 2.24)
4260    #
4261    #       *  <iref> elements (Section 2.27)
4262    #
4263    #       *  <relref> elements (Section 2.44)
4264    #
4265    #       *  <strong> elements (Section 2.50)
4266    #
4267    #       *  <sub> elements (Section 2.51)
4268    #
4269    #       *  <sup> elements (Section 2.52)
4270    #
4271    #       *  <tt> elements (Section 2.62)
4272    #
4273    #       *  <xref> elements (Section 2.66)
4274    #
4275    # 2.56.1.  "align" Attribute
4276    #
4277    #    Controls whether the content of the cell appears left justified
4278    #    (default), centered, or right justified.  Note that "center" or
4279    #    "right" will probably only work well in cells with plain text; any
4280    #    other elements might make the contents render badly.
4281    #
4282    #    Allowed values:
4283    #
4284    #    o  "left" (default)
4285    #
4286    #    o  "center"
4287    #
4288    #    o  "right"
4289    #
4290    # 2.56.2.  "anchor" Attribute
4291    #
4292    #    Document-wide unique identifier for the cell.
4293    #
4294    # 2.56.3.  "colspan" Attribute
4295    #
4296    #    The number of columns that the cell is to span.  For example, setting
4297    #    "colspan='3'" indicates that the cell occupies the same horizontal
4298    #    space as three cells of a row without any "colspan" attributes.
4299    #
4300    # 2.56.4.  "rowspan" Attribute
4301    #
4302    #    The number of rows that the cell is to span.  For example, setting
4303    #    "rowspan='3'" indicates that the cell occupies the same vertical
4304    #    space as three rows.
4305    render_td = null_renderer           # handled in build_table
4306
4307
4308    # 2.57.  <tfoot>
4309    #
4310    #    A container for a set of footer rows for a table.
4311    #
4312    #    This element appears as a child element of <table> (Section 2.54).
4313    #
4314    #    Content model:
4315    #
4316    #    One or more <tr> elements (Section 2.61)
4317    #
4318    # 2.57.1.  "anchor" Attribute
4319    #
4320    #    Document-wide unique identifier for the tfoot.
4321    render_tfoot = null_renderer        # handled in build_table
4322
4323
4324    # 2.58.  <th>
4325    #
4326    #    A cell in a table row.  When rendered, this will normally come out in
4327    #    boldface; other than that, there is no difference between this and
4328    #    the <td> element.
4329    #
4330    #    This element appears as a child element of <tr> (Section 2.61).
4331    #
4332    #    Content model:
4333    #
4334    #    Either:
4335    #
4336    #       In any order, but at least one of:
4337    #
4338    #       *  <artwork> elements (Section 2.5)
4339    #
4340    #       *  <dl> elements (Section 2.20)
4341    #
4342    #       *  <figure> elements (Section 2.25)
4343    #
4344    #       *  <ol> elements (Section 2.34)
4345    #
4346    #       *  <sourcecode> elements (Section 2.48)
4347    #
4348    #       *  <t> elements (Section 2.53)
4349    #
4350    #       *  <ul> elements (Section 2.63)
4351    #
4352    #    Or:
4353    #
4354    #       In any order:
4355    #
4356    #       *  Text
4357    #
4358    #       *  <bcp14> elements (Section 2.9)
4359    #
4360    #       *  <br> elements (Section 2.12)
4361    #
4362    #       *  <cref> elements (Section 2.16)
4363    #
4364    #       *  <em> elements (Section 2.22)
4365    #
4366    #       *  <eref> elements (Section 2.24)
4367    #
4368    #       *  <iref> elements (Section 2.27)
4369    #
4370    #       *  <relref> elements (Section 2.44)
4371    #
4372    #       *  <strong> elements (Section 2.50)
4373    #
4374    #       *  <sub> elements (Section 2.51)
4375    #
4376    #       *  <sup> elements (Section 2.52)
4377    #
4378    #       *  <tt> elements (Section 2.62)
4379    #
4380    #       *  <xref> elements (Section 2.66)
4381    #
4382    # 2.58.1.  "align" Attribute
4383    #
4384    #    Controls whether the content of the cell appears left justified
4385    #    (default), centered, or right justified.  Note that "center" or
4386    #    "right" will probably only work well in cells with plain text; any
4387    #    other elements might make the contents render badly.
4388    #
4389    #    Allowed values:
4390    #
4391    #    o  "left" (default)
4392    #
4393    #    o  "center"
4394    #
4395    #    o  "right"
4396    #
4397    # 2.58.2.  "anchor" Attribute
4398    #
4399    #    Document-wide unique identifier for the row.
4400    #
4401    # 2.58.3.  "colspan" Attribute
4402    #
4403    #    The number of columns that the cell is to span.  For example, setting
4404    #    "colspan='3'" indicates that the cell occupies the same horizontal
4405    #    space as three cells of a row without any "colspan" attributes.
4406    #
4407    # 2.58.4.  "rowspan" Attribute
4408    #
4409    #    The number of rows that the cell is to span.  For example, setting
4410    #    "rowspan='3'" indicates that the cell occupies the same vertical
4411    #    space as three rows.
4412    render_th = null_renderer           # handled in build_table
4413
4414
4415    # 2.59.  <thead>
4416    #
4417    #    A container for a set of header rows for a table.
4418    #
4419    #    This element appears as a child element of <table> (Section 2.54).
4420    #
4421    #    Content model:
4422    #
4423    #    One or more <tr> elements (Section 2.61)
4424    #
4425    # 2.59.1.  "anchor" Attribute
4426    #
4427    #    Document-wide unique identifier for the thead.
4428    render_thead = null_renderer        # handled in build_table
4429
4430
4431    # 2.60.  <title>
4432    #
4433    #    Represents the document title.
4434    #
4435    #    When this element appears in the <front> element of the current
4436    #    document, the title might also appear in page headers or footers.  If
4437    #    it is long (~40 characters), the "abbrev" attribute can be used to
4438    #    specify an abbreviated variant.
4439    #
4440    #    This element appears as a child element of <front> (Section 2.26).
4441    #
4442    #    Content model: only text content.
4443    def render_title(self, e, width, **kwargs):
4444        r = e.getparent().getparent()   # <reference>
4445        title = clean_text(' '.join(e.itertext()).strip(stripspace))
4446        quote_title = r.get('quoteTitle')
4447        if quote_title == 'true':
4448            title = '"%s"' % title
4449        return title
4450
4451    def render_title_front(self, e, width, **kwargs):
4452        pp = e.getparent().getparent()
4453        title = '\u2028'.join(e.itertext()).strip(stripspace)
4454        title = fill(title, width=width, **kwargs)
4455        title = center(title, width)
4456        if self.options.rfc:
4457            return title
4458        else:
4459            if pp.tag == 'rfc':
4460                doc_name = self.root.get('docName')
4461                if doc_name:
4462                    title += '\n'+doc_name.strip(stripspace).center(width).rstrip(stripspace)
4463            return title
4464
4465    # 2.60.1.  "abbrev" Attribute
4466    #
4467    #    Specifies an abbreviated variant of the document title.
4468    #
4469    # 2.60.2.  "ascii" Attribute
4470    #
4471    #    The ASCII equivalent of the title.
4472
4473
4474    # 2.61.  <tr>
4475    #
4476    #    A row of a table.
4477    #
4478    #    This element appears as a child element of <tbody> (Section 2.55),
4479    #    <tfoot> (Section 2.57), and <thead> (Section 2.59).
4480    #
4481    #    Content model:
4482    #
4483    #    In any order, but at least one of:
4484    #
4485    #    o  <td> elements (Section 2.56)
4486    #
4487    #    o  <th> elements (Section 2.58)
4488    #
4489    # 2.61.1.  "anchor" Attribute
4490    #
4491    #    Document-wide unique identifier for the row.
4492    render_tr = null_renderer           # handled in build_table
4493
4494    # <toc>
4495    def render_toc(self, e, width, **kwargs):
4496        lines = []
4497        for c in e.getchildren():
4498            numbered = c.get('numbered')
4499            if not numbered == 'false':
4500                self.err(c, "Expected toc section to have numbered='false', but found '%s'" % (numbered, ))
4501            keep_url = True if self.options.rfc else False
4502            lines = self.ljoin(lines, c, width, keep_url=keep_url, **kwargs)
4503        return lines
4504
4505
4506    # 2.62.  <tt>
4507    #
4508    #    Causes the text to be displayed in a constant-width font.  This
4509    #    element can be combined with other character formatting elements, and
4510    #    the formatting will be additive.
4511    def render_tt(self, e, width, **kwargs):
4512        p = e.getparent()
4513        render_plain = list(p.iterancestors('table')) and not utils.hastext(p, ignore=['tt'])
4514        if render_plain:
4515            text = '%s' % self.inner_text_renderer(e)
4516        else:
4517            text = '"%s"' % self.inner_text_renderer(e)
4518        text += e.tail or ''
4519        return text
4520
4521
4522    # 2.63.  <ul>
4523    #
4524    #    An unordered list.  The labels on the items will be symbols picked by
4525    #    the formatter.
4526    #
4527    #    This element appears as a child element of <abstract> (Section 2.1),
4528    #    <aside> (Section 2.6), <blockquote> (Section 2.10), <dd>
4529    #    (Section 2.18), <li> (Section 2.29), <note> (Section 2.33), <section>
4530    #    (Section 2.46), <td> (Section 2.56), and <th> (Section 2.58).
4531    #
4532    #    Content model:
4533    #
4534    #    One or more <li> elements (Section 2.29)
4535    #
4536    # 2.63.1.  "anchor" Attribute
4537    #
4538    #    Document-wide unique identifier for the list.
4539    #
4540    # 2.63.2.  "empty" Attribute
4541    #
4542    #    Defines whether or not the label is empty.  empty="true" indicates
4543    #    that no label will be shown.
4544    #
4545    #    Allowed values:
4546    #
4547    #    o  "false" (default)
4548    #
4549    #    o  "true"
4550    #
4551    # 2.63.3.  "spacing" Attribute
4552    #
4553    #    Defines whether or not there is a blank line between entries.
4554    #    spacing="normal" indicates a single blank line, while
4555    #    spacing="compact" indicates no space between.
4556    #
4557    #    Allowed values:
4558    #
4559    #    o  "normal" (default)
4560    #
4561    #    o  "compact"
4562    def render_ul(self, e, width, **kwargs):
4563        # setup and validation
4564        empty = e.get('empty') == 'true'
4565        e._bare = empty and e.get('bare') == 'true'
4566        e._initial_text = self.get_ul_li_initial_text
4567        #
4568        compact = e.get('spacing') == 'compact'
4569        ljoin  = '\n' if compact else '\n\n'
4570        #
4571        depth = len([ a for a in e.iterancestors(e.tag) ])
4572        symbols = self.options.list_symbols
4573        e._symbol = ' ' if empty else symbols[depth%len(symbols)]
4574
4575        #
4576        indent = len(e._symbol)+2
4577        if e._bare:
4578            first = mktextblock(self.render(e[-1], width, **kwargs))
4579            if first:
4580                indent = min(8, len(first.split()[0])+2)
4581        padding = indent
4582        indent = int( e.get('indent') or indent )
4583        hang = max(padding, indent) - indent
4584        e._padding = indent
4585        #
4586        kwargs['joiners'].update({
4587            None:   Joiner(ljoin, indent, 0, False, False),
4588            'li':   Joiner(ljoin, 0, 0, False, False),
4589            't':    Joiner(ljoin, indent, hang, False, False),
4590        })
4591        # rendering
4592        lines = []
4593        for c in e.getchildren():
4594            lines = self.ljoin(lines, c, width, **kwargs)
4595        return lines
4596
4597
4598    def render_u(self, e, width, **kwargs):
4599        try:
4600            text = expand_unicode_element(e)
4601        except (RuntimeError, ValueError) as exception:
4602            text = ''
4603            self.err(e, exception)
4604        anchor = e.get('anchor')
4605        xref = self.root.find('.//xref[@target="%s"]'%anchor) if anchor else None
4606        if xref != None:
4607            # render only literal here
4608            text = e.text
4609        text += e.tail or ''
4610        return text
4611
4612    # 2.64.  <uri>
4613    #
4614    #    Contains a web address associated with the author.
4615    #
4616    #    The contents should be a valid URI; this most likely will be an
4617    #    "http:" or "https:" URI.
4618    #
4619    #    This element appears as a child element of <address> (Section 2.2).
4620    #
4621    #    Content model: only text content.
4622    def render_uri(self, e, width, **kwargs):
4623        latin = kwargs.pop('latin', None)
4624        text = fill("URI:\u00a0\u00a0 %s"%e.text, width=width, **kwargs) if e.text and latin!=False else ''
4625        return text
4626
4627    # 2.65.  <workgroup>
4628    #
4629    #    This element is used to specify the Working Group (IETF) or Research
4630    #    Group (IRTF) from which the document originates, if any.  The
4631    #    recommended format is the official name of the Working Group (with
4632    #    some capitalization).
4633    #
4634    #    In Internet-Drafts, this is used in the upper left corner of the
4635    #    boilerplate, replacing the "Network Working Group" string.
4636    #    Formatting software can append the words "Working Group" or "Research
4637    #    Group", depending on the "submissionType" property of the <rfc>
4638    #    element (Section 2.45.12).
4639    #
4640    #    This element appears as a child element of <front> (Section 2.26).
4641    #
4642    #    Content model: only text content.
4643
4644
4645    # 2.66.  <xref>
4646    #
4647    #    A reference to an anchor in this document.  Formatters that have
4648    #    links (such as HTML and PDF) are likely to render <xref> elements as
4649    #    internal hyperlinks.  This element is useful for referring to
4650    #    references in the "References" section, to specific sections of this
4651    #    document, to specific figures, and so on.  The "target" attribute is
4652    #    required.
4653    #
4654    #    This element appears as a child element of <annotation>
4655    #    (Section 2.3), <blockquote> (Section 2.10), <c> (Section 3.1), <cref>
4656    #    (Section 2.16), <dd> (Section 2.18), <dt> (Section 2.21), <em>
4657    #    (Section 2.22), <li> (Section 2.29), <name> (Section 2.32),
4658    #    <postamble> (Section 3.5), <preamble> (Section 3.6), <strong>
4659    #    (Section 2.50), <sub> (Section 2.51), <sup> (Section 2.52), <t>
4660    #    (Section 2.53), <td> (Section 2.56), <th> (Section 2.58), <tt>
4661    #    (Section 2.62), and <ttcol> (Section 3.9).
4662    #
4663    #    Content model: only text content.
4664    def render_xref(self, e, width, **kwargs):
4665        target = e.get('target')
4666        section = e.get('section')
4667        format = e.get('format')
4668        reftext = e.get('derivedContent').strip(stripspace)
4669        exptext = self.inner_text_renderer(e, width, **kwargs)
4670        if exptext:
4671            # for later string formatting convenience, a trailing space if any text:
4672            exptext += ' '
4673        content = clean_text(''.join(list(e.itertext())))
4674        if reftext is None:
4675            self.die(e, "Found an <xref> without derivedContent: %s" % (etree.tostring(e),))
4676        #
4677        if not section:
4678            if reftext:
4679                if target in self.refname_mapping:
4680                    if format == 'none':
4681                        text = "%s" % exptext
4682                    elif format == 'title':
4683                        if content:
4684                            text = '%s ("%s")' % (exptext, reftext.strip('"'))
4685                        else:
4686                            text = '"%s"' % reftext.strip('"')
4687                    else:
4688                        if content:
4689                            text = "%s[%s]" % (exptext, reftext)
4690                        else:
4691                            text = "[%s]" % reftext
4692                else:
4693                    if format == 'none':
4694                        text = "%s" % exptext
4695                    else:
4696                        if content:
4697                            text = "%s(%s)" % (exptext, reftext)
4698                        else:
4699                            text = "%s" % (exptext or reftext)
4700            else:
4701                text = exptext.strip(stripspace)
4702            pageno = e.get('pageno')
4703            if pageno and pageno.isdigit():
4704                text += '\u2026' '%04d' % int(pageno)
4705        else:
4706            label = 'Section' if section[0].isdigit() else 'Appendix' if re.search(r'^[A-Z](\.|$)', section) else 'Part'
4707            sform  = e.get('sectionFormat')
4708
4709            if   sform == 'of':
4710                text = '%s %s of %s[%s]' % (label, section, exptext, reftext)
4711            elif sform == 'comma':
4712                text = '%s[%s], %s %s' % (exptext, reftext, label, section)
4713            elif sform == 'parens':
4714                text = '%s[%s] (%s %s)' % (exptext, reftext, label, section)
4715            elif sform == 'bare':
4716                if exptext and exptext != section:
4717                    text = '%s (%s)' % (section, exptext.strip(stripspace))
4718                else:
4719                    text = '%s' % (section, )
4720            else:
4721                self.err(e, 'Unexpected value combination: section: %s  sectionFormat: %s' %(section, sform))
4722
4723        # Prevent line breaking on dash
4724        text = text.replace('-', '\u2011')
4725        text += (e.tail or '')
4726
4727        return text
4728
4729    # 2.66.1.  "format" Attribute
4730    #
4731    #    This attribute signals to formatters what the desired format of the
4732    #    reference should be.  Formatters for document types that have linking
4733    #    capability should wrap the displayed text in hyperlinks.
4734    #
4735    #    "counter"
4736    #
4737    #       The "derivedContent" attribute will contain just a counter.  This
4738    #       is used for targets that are <section>, <figure>, <table>, or
4739    #       items in an ordered list.  Using "format='counter'" where the
4740    #       target is any other type of element is an error.
4741    #
4742    #       For example, with an input of:
4743    #
4744    #          <section anchor="overview">Protocol Overview</section>
4745    #          . . .
4746    #          See Section <xref target="overview" format="counter"/>
4747    #          for an overview.
4748    #
4749    #       An HTML formatter might generate:
4750    #
4751    #          See Section <a href="#overview">1.7</a> for an overview.
4752    #
4753    #    "default"
4754    #
4755    #       If the element has no content, the "derivedContent" attribute will
4756    #       contain a text fragment that describes the referenced part
4757    #       completely, such as "XML" for a target that is a <reference>, or
4758    #       "Section 2" or "Table 4" for a target to a non-reference.  (If the
4759    #       element has content, the "derivedContent" attribute is filled with
4760    #       the content.)
4761    #
4762    #       For example, with an input of:
4763    #
4764    #          <section anchor="overview">Protocol Overview</section>
4765    #          . . .
4766    #          See <xref target="overview"/> for an overview.
4767    #
4768    #       An HTML formatter might generate:
4769    #
4770    #          See <a href="#overview">Section 1.7</a> for an overview.
4771    #
4772    #    "none"
4773    #
4774    #       Deprecated.
4775    #
4776    #    "title"
4777    #
4778    #       If the target is a <reference> element, the "derivedContent"
4779    #       attribute will contain the name of the reference, extracted from
4780    #       the <title> child of the <front> child of the reference.  Or, if
4781    #       the target element has a <name> child element, the
4782    #       "derivedContent" attribute will contain the text content of that
4783    #       <name> element concatenated with the text content of each
4784    #       descendant node of <name> (that is, stripping out all of the XML
4785    #       markup, leaving only the text).  Or, if the target element does
4786    #       not contain a <name> child element, the "derivedContent" attribute
4787    #       will contain the name of the "anchor" attribute of that element
4788    #       with no other adornment.
4789    #
4790    #    Allowed values:
4791    #
4792    #    o  "default" (default)
4793    #
4794    #    o  "title"
4795    #
4796    #    o  "counter"
4797    #
4798    #    o  "none"
4799    #
4800    # 2.66.2.  "pageno" Attribute
4801    #
4802    #    Deprecated.
4803    #
4804    #    Allowed values:
4805    #
4806    #    o  "true"
4807    #
4808    #    o  "false" (default)
4809    #
4810    # 2.66.3.  "target" Attribute (Mandatory)
4811    #
4812    #    Identifies the document component being referenced.  The value needs
4813    #    to match the value of the "anchor" attribute of an element in the
4814    #    document; otherwise, it is an error.
4815
4816    # --- class variables ------------------------------------------------------
4817
4818    element_tags = [
4819        'abstract',
4820        'address',
4821        'annotation',
4822        'artset',
4823        'artwork',
4824        'aside',
4825        'author',
4826        'back',
4827        'bcp14',
4828        'blockquote',
4829        'boilerplate',
4830        'br',
4831        'city',
4832        'code',
4833        'country',
4834        'cref',
4835        'date',
4836        'dd',
4837        'displayreference',
4838        'dl',
4839        'dt',
4840        'em',
4841        'email',
4842        'eref',
4843        'figure',
4844        'front',
4845        'iref',
4846        'li',
4847        'link',
4848        'middle',
4849        'name',
4850        'note',
4851        'ol',
4852        'organization',
4853        'phone',
4854        'postal',
4855        'postalLine',
4856        'refcontent',
4857        'reference',
4858        'referencegroup',
4859        'references',
4860        'region',
4861        'relref',
4862        'rfc',
4863        'section',
4864        'seriesInfo',
4865        'sourcecode',
4866        'street',
4867        'strong',
4868        'sub',
4869        'sup',
4870        't',
4871        'table',
4872        'tbody',
4873        'td',
4874        'tfoot',
4875        'th',
4876        'thead',
4877        'title',
4878        'toc',
4879        'tr',
4880        'tt',
4881        'ul',
4882        'uri',
4883        'xref',
4884    ]
4885    deprecated_element_tags = [
4886        'list',
4887        'spanx',
4888        'vspace',
4889        'c',
4890        'texttable',
4891        'ttcol',
4892        'facsimile',
4893        'format',
4894        'preamble',
4895        'postamble',
4896    ]
4897    unused_front_element_renderers = [
4898        'area',
4899        'keyword',
4900        'workgroup',
4901    ]
4902    all_element_tags = element_tags + deprecated_element_tags + unused_front_element_renderers
4903    deprecated_attributes = [
4904        # element, attrbute
4905        ('figure', 'align'),
4906        ('section', 'title'),
4907        ('note', 'title'),
4908        ('figure', 'title'),
4909        ('references', 'title'),
4910        ('texttable', 'title'),
4911        ('figure', 'src'),
4912        ('artwork', 'xml:space'),
4913        ('artwork', 'height'),
4914        ('artwork', 'width'),
4915        ('figure', 'height'),
4916        ('figure', 'width'),
4917        ('xref', 'pageno'),
4918    ]
4919