1# Copyright The IETF Trust 2018, All Rights Reserved 2# -*- coding: utf-8 -*- 3from __future__ import unicode_literals, print_function, division 4 5import copy 6import datetime 7import inspect 8import re 9import sys 10import six 11import textwrap 12 13from codecs import open 14from collections import namedtuple 15from kitchen.text.display import textual_width as displength 16from lxml import etree 17 18try: 19 from xml2rfc import debug 20 debug.debug = True 21except ImportError: 22 debug = None 23 pass 24 25 26from xml2rfc import strings 27from xml2rfc.writers.base import default_options, BaseV3Writer, RfcWriterError 28from xml2rfc import utils 29from xml2rfc.uniscripts import is_script 30from xml2rfc.util.date import extract_date, augment_date, get_expiry_date, format_date 31from xml2rfc.util.name import short_author_name, short_author_ascii_name, short_author_name_parts, short_org_name_set 32 33from xml2rfc.util.name import full_author_name_set 34from xml2rfc.util.num import ol_style_formatter, num_width 35from xml2rfc.util.unicode import expand_unicode_element, textwidth 36from xml2rfc.util.postal import get_normalized_address_info, get_address_format_rules, address_field_mapping 37from xml2rfc.utils import justify_inline, clean_text 38 39 40IndexItem = namedtuple('indexitem', ['item', 'subitem', 'anchor', 'page', ]) 41Joiner = namedtuple('joiner', ['join', 'indent', 'hang', 'overlap', 'do_outdent']) 42# Joiner parts: 43# join string used to join a rendered element to preceding text or lines 44# indent indentation of rendered element 45# hang additional indentation of second and follwoing lines 46# overlap Join the last preceding and the first new line on one line, rather 47# than simply appending new lines (when processing lines). 48# Used to handle <dl newline="false"/"true"> and multiple emails 49# outdent If necessary to fit content within width, use a smaller indent than 50# indicated, in ljoin(). Used for <artwork>. 51 52# We don't use namedtuple for Line, because the resulting objects would be immutable: 53class Line(object): 54 def __init__(self, text, elem): 55 assert isinstance(text, six.text_type) 56 self.text = text 57 self.elem = elem 58 self.page = None 59 self.block = None 60 self.keep = False # keep this line with the previous one 61 62# a couple of factory functions. We may modify the resulting lines later, 63# which is why we can't just use static instances. 64def blankline(): 65 return [ Line('', None) ] 66def pagefeed(): 67 return [ Line('\f', None) ] 68def striplines(lines): 69 while lines and lines[0].text.strip(stripspace) == '': 70 lines = lines[1:] 71 while lines and lines[-1].text.strip(stripspace) == '': 72 lines = lines[:-1] 73 return lines 74 75class Block(object): 76 " Used to hold line block information needed for pagination." 77 def __init__(self, elem, prev, next=None, beg=None, end=None): 78 self.prev = prev # previous block 79 self.next = next # next block 80 self.elem = elem # the block's element 81 self.beg = beg # beginning line of block 82 self.end = end # ending line of block 83 84wrapper = utils.TextWrapper(width=72) 85splitter = utils.TextSplitter(width=67) 86seen = set() 87 88# This is not a complete list of whitespace characters, and isn't intended to be. It's 89# intended to be whitespace characters commonly occuring in XML input text which should be 90# ignored at the beginning and end of text blocks: 91stripspace = " \t\n\r\f\v" 92 93base_joiners = { 94 None: Joiner('\n\n', 0, 0, False, False), 95 etree.Comment: Joiner('', 0, 0, False, False), 96 etree.PI: Joiner('', 0, 0, False, False), 97 } 98 99def set_joiners(kwargs, update): 100 kwargs['joiners'] = copy.copy(base_joiners) 101 kwargs['joiners'].update(update) 102 103def indent(text, indent=3, hang=0): 104 lines = [] 105 text = text.replace('\u2028', '\n') 106 for l in text.split('\n'): 107 if l.strip(stripspace): 108 if lines: 109 lines.append(' '*(indent+hang) + l) 110 else: 111 lines.append(' '*indent + l) 112 else: 113 lines.append('') 114 return '\n'.join(lines) 115 116def lindent(lines, indent=3, hang=0): 117 for i, l in enumerate(lines): 118 if l.text.strip(stripspace): 119 if i == 0: 120 lines[i].text = ' '*(indent+hang) + l.text 121 else: 122 lines[i].text = ' '*(indent) + l.text 123 return lines 124 125def fill(text, **kwargs): 126 kwargs.pop('joiners', None) 127 kwargs.pop('prev', None) 128 # 129 indent = kwargs.pop('indent', 0) 130 hang = kwargs.pop('hang', 0) 131 first = kwargs.pop('first', 0) 132 keep = kwargs.pop('keep_url', False) 133 initial=' '*(first+indent) 134 subsequent_indent = ' '*(indent+hang) 135 if keep: 136 text = utils.urlkeep(text, max=kwargs['width']) 137 result = wrapper.fill(text, initial=initial, subsequent_indent=subsequent_indent, **kwargs) 138 return result 139 140def center(text, width, **kwargs): 141 "Fold and center the given text" 142 # avoid centered text extending all the way to the margins 143 kwargs['width'] = width-4 144 text = text.replace('\u2028', '\n') 145 lines = text.split('\n') 146 if max([ len(l) for l in lines ]+[0]) > width: 147 # need to reflow 148 lines = wrapper.wrap(text, **kwargs) 149 for i, l in enumerate(lines): 150 lines[i] = l.center(width).rstrip(stripspace) 151 text = '\n'.join(lines).replace('\u00A0', ' ') 152 return text 153 154def align(lines, how, width): 155 "Align the given text block left, center, or right, as a block" 156 if not lines: 157 return lines 158 if how == 'left': 159 return lines 160 w = max( len(l.text) for l in lines ) 161 if w >= width: 162 return lines 163 shift = width - w 164 if how == 'center': 165 for i, l in enumerate(lines): 166 if l.text.strip(stripspace): 167 lines[i].text = ' '*(shift//2)+l.text 168 elif how == 'right': 169 for i, l in enumerate(lines): 170 if l.text.strip(stripspace): 171 lines[i].text = ' '*(shift)+l.text 172 else: 173 # XXX TODO: Raise execption, catch in TextWriter, and emit error 174 pass 175 return lines 176 177def mklines(arg, e): 178 if isinstance(arg, six.text_type): 179 # \u2028 and \u2029 are eliminated here, through splitlines() 180 lines = [ Line(t, e) for t in arg.splitlines() ] 181 else: 182 lines = arg 183 return lines 184 185def mktextblock(arg): 186 if isinstance(arg, six.text_type): 187 text = arg 188 else: 189 text = '\u2028'.join([ l.text for l in arg ]) 190 return text 191 192def mktext(arg): 193 if isinstance(arg, six.text_type): 194 text = arg 195 else: 196 text = '\n'.join([ l.text for l in arg ]) 197 return text 198 199def minwidth(arg): 200 text = mktext(arg) 201 words = text.split() 202 return min([ len(w) for w in words ]+[0]) 203 204def stripl(l): 205 while l and l[0].text.strip(stripspace) == '': 206 del l[0] 207 while l and l[-1].text.strip(stripspace) == '': 208 del l[-1] 209 return l 210 211def findblocks(lines): 212 "Iterate through all lines, adding block beg/end and back/fwd links" 213 elem = None # last seen element 214 prev = None # previous block 215 keep = False # True if previous keepWithNext was true 216 block = None 217 for n, l in enumerate(lines): 218 if l.elem == None: 219 if block!=None and not keep and not block.end: 220 block.end = n 221 elif l.elem != elem: 222 elem = l.elem 223 if elem.tag not in ['t', 'dl', 'dt', 'figure', 'ol', 'table', 'ul', ]: 224 keep = l.keep 225 if not keep: 226 block = Block(elem, prev, beg=n) 227 if prev!=None: 228 prev.next = block 229 if not prev.end: 230 prev.end = n 231 prev = block 232 keep = (elem.get('keepWithNext') == 'true' 233 or (elem.getnext()!=None and elem.getnext().get('keepWithPrevious') == 'true') 234 or elem.tag == 'section') 235 l.block = block 236 else: 237 l.block = block 238 block.end = n 239 return lines 240 241def expand_ellipsis(text, width): 242 if re.search(r'\u2026\d+$', text): 243 head, tail = text.rsplit('\u2026', 1) # split on ellipsis 244 head += ' ' 245 if tail != '0000': 246 tail = '%4s' % tail.lstrip('0') # strip leading zeros 247 last = head.split('\n')[-1] 248 lack = width - (len(last) + len(tail)) 249 elip = (' .'*40)[-lack:] 250 text = head + elip + tail 251 return text 252 253# ------------------------------------------------------------------------------ 254# Address formatting functions, based on i18naddress functions, but rewritten to 255# suit the text output format. 256 257def _format_address_line(line_format, address, rules): 258 def _get_field(name): 259 value = address.get(name, '') 260 if name == 'name': 261 role = address.get('role', '') 262 if role: 263 value += ' (%s)' % role 264 return value 265 266 replacements = { 267 '%%%s' % code: _get_field(field_name) 268 for code, field_name in address_field_mapping.items()} 269 270 fields = re.split('(%.)', line_format) 271 has_content = any([ replacements.get(f) for f in fields if (f.startswith('%') and f!= '%%') ]) 272 if not has_content: 273 return '' 274 values = [replacements.get(f, f) for f in fields] 275 return ''.join(values).strip(stripspace).lstrip(', ') 276 277def format_address(address, latin=False, normalize=False): 278 def hasword(item): 279 return re.search(r'\w', item, re.U) != None 280 address_format, rules = get_address_format_rules(address, latin, normalize) 281 address_line_formats = address_format.split('%n') 282 address_lines = [ 283 _format_address_line(lf, address, rules) 284 for lf in address_line_formats] 285 address_lines = filter(hasword, address_lines) 286 return '\n'.join(address_lines) 287 288class TextWriter(BaseV3Writer): 289 290 def __init__(self, xmlrfc, quiet=None, options=default_options, date=datetime.date.today()): 291 super(TextWriter, self).__init__(xmlrfc, quiet=quiet, options=options, date=date) 292 self.options.min_section_start_lines = 5 293 self.refname_mapping = self.get_refname_mapping() 294 self.rendered = None 295 296 def process(self): 297 if not self.rendered: 298 joiners = base_joiners 299 if self.options.pagination: 300 self.add_pageno_placeholders() 301 lines = self.render(self.root, width=72, joiners=joiners) 302 303 if self.options.pagination: 304 lines = findblocks(lines) 305 lines = self.paginate(lines) 306 lines = self.update_toc(lines) 307 if self.options.debug: 308 for i, l in enumerate(lines): 309 tag = l.elem.tag if l.elem!=None else '-' 310 page = l.elem.page if l.elem!=None else '-' 311 if l.block: 312 if six.PY2: 313 sys.stderr.write(("%3d %10s %3d-%3d [%4s] %s\n" % (i, tag, l.block.beg, l.block.end, page, l.text)).encode('utf8')) 314 else: 315 sys.stderr.write(("%3d %10s %3d-%3d [%4s] %s\n" % (i, tag, l.block.beg, l.block.end, page, l.text))) 316 else: 317 if six.PY2: 318 sys.stderr.write(("%3d %10s [%4s] %s\n" % (i, tag, page, l.text)).encode('utf8')) 319 else: 320 sys.stderr.write(("%3d %10s [%4s] %s\n" % (i, tag, page, l.text))) 321 for i, l in enumerate(lines): 322 length = len(l.text) 323 if length > 72: 324 self.warn(l.elem, "Too long line found (L%s), %s characters longer than 72 characters: \n%s" %(i+1, length-72, l.text)) 325 326 text = ('\n'.join( l.text for l in lines )).rstrip(stripspace) + '\n' 327 328 # Replace some code points whose utility has ended 329 text = text.replace(u'\u00A0', u' ') 330 text = text.replace(u'\u2011', u'-') 331 text = text.replace(u'\u200B', u'') 332 text = text.replace(u'\u2060', u'') 333 assert text == text.replace(u'\u2028', u' ') 334 assert text == text.replace(u'\uE060', u'') 335 336 self.rendered = text 337 338 return self.rendered 339 340 def write(self, filename): 341 """Write the document to a file """ 342 343 text = self.process() 344 345 if self.errors: 346 raise RfcWriterError("Not creating output file due to errors (see above)") 347 348 encoding = 'utf-8-sig' if self.options.bom else 'utf-8' 349 with open(filename, 'w', encoding=encoding) as file: 350 file.write(text) 351 352 if not self.options.quiet: 353 self.log(' Created file %s' % filename) 354 355 def render(self, e, width, **kw): 356 if e.tag in (etree.PI, etree.Comment): 357 return e.tail.lstrip(stripspace) if (e.tail and e.tail.strip(stripspace)) else '' 358 kwargs = copy.deepcopy(kw) 359 func_name = "render_%s" % (e.tag.lower(),) 360 func = getattr(self, func_name, self.default_renderer) 361 if func == self.default_renderer: 362 if e.tag in self.__class__.deprecated_element_tags: 363 self.warn(e, "Was asked to render a deprecated element: <%s>" % (e.tag, )) 364 elif not e.tag in seen: 365 self.warn(e, "No renderer for <%s> found" % (e.tag, )) 366 seen.add(e.tag) 367 res = func(e, width, **kwargs) 368 return res 369 370 def add_pageno_placeholders(self): 371 toc = self.root.find('./front/toc/section') 372 for e in toc.xpath('.//xref[2]'): 373 e.set('pageno', '0000') 374 375 def paginate(self, lines): 376 """ 377 The maximum length of page text is 48 lines. Above this there are 4 lines of 378 top header, or 4 blank lines on the first page, below this there are 5 lines 379 of footer, with ^L on the last line and the footer on the next-to-last line. 380 """ 381 header = justify_inline(self.page_top_left(), 382 self.page_top_center(), 383 self.page_top_right()) 384 start_lineno = 0 # start of unbroken text 385 break_target = 4+48 # projected next break 386 page = 1 387 textlen = len(lines) 388 paginated = [] 389 while start_lineno < textlen: 390 footer = justify_inline(self.page_bottom_left(), 391 self.page_bottom_center(), 392 "[Page %s]" % page) 393 # if the current block ends 1 after break_target, we'll have a widow line on 394 # the next page. If the current block starts 1 before break_target, we'll 395 # have an orphan line on this page. In either case, we insert the 396 # page break one line earlier, at break_target-1, and add a filler line. 397 break_lineno = break_target 398 pad = 0 399 if break_lineno >= textlen: 400 # The remaining text fits on the next page, this is the last page break 401 pad = break_lineno - textlen 402 break_target = textlen - 1 # last line 403 else: 404 # See if we need to adjust break point to avoid break right after a section 405 # heading, and avoid creating orphans or widows 406 block = lines[break_target].block 407 if block is None: 408 # check backwards for section start. If we find one, check 409 # again for another, in case it's a subsection. 410 found = None 411 i = break_target 412 while i > break_target-12: 413 for j in range(1,4): 414 k = i - j 415 if lines[k].elem != None and lines[k].elem.tag == 'section': 416 found = True 417 i = k 418 break # break for loop 419 else: 420 break # break while loop 421 if found: 422 pad = break_target - i 423 break_lineno = i 424 else: 425 # Look for orphan and widow cases 426 olen = break_target - block.beg # number of lines left at the end of this page 427 wlen = block.end - break_target # number of lines at the start of next page 428 blen = block.end - block.beg # complete block length 429 elem = lines[block.beg].elem 430 if elem.tag == 'section': 431 tcount = 0 432 for r in range(block.beg, break_target): 433 if lines[r].elem!=None and lines[r].elem.tag != 'section': 434 tcount += 1 435 if wlen == 1 or tcount <= self.options.min_section_start_lines: 436 adj = break_lineno - block.beg 437 pad += adj 438 break_lineno -= adj 439 elif elem.tag in ['artset', 'artwork', 'figure', 'sourcecode', 'table', ]: 440 if blen < 48 or olen <= self.options.min_section_start_lines: 441 adj = break_lineno - block.beg 442 pad += adj 443 break_lineno -= adj 444 else: 445 pass 446 elif ( (olen in range(1, self.options.orphans+1) and blen > olen) 447 or (wlen in range(1, self.options.widows+1) and blen > wlen)): 448 break_lineno -= olen 449 pad += olen 450 else: 451 pass 452 # Transfer lines to next page 453 pagestart = len(paginated) 454 if page > 1: 455 paginated += pagefeed() + mklines(header, None) + blankline()*2 456 paginated += lines[start_lineno:break_lineno] 457 paginated += blankline() * pad 458 paginated += blankline() * 3 + mklines(footer, None) 459 # make note of each line's page 460 for i in range(pagestart, len(paginated)): 461 paginated[i].page = page 462 if paginated[i].elem != None and not isinstance(paginated[i].elem, (etree._ProcessingInstruction, etree._Comment)): 463 paginated[i].elem.page = page 464 # Set the next page start 465 start_lineno = break_lineno 466 # discard blank lines at the top of the next page, if any 467 while start_lineno < textlen and lines[start_lineno].text.strip(stripspace) == '': 468 start_lineno += 1 469 # advance page end to the next potential page break 470 break_target = start_lineno + 48 471 page += 1 472 473 return paginated 474 475 def update_toc(self, lines): 476 if self.root.get('tocInclude') != 'true': 477 return lines 478 toc = self.root.find('./front/toc/section') 479 in_toc = False 480 toc_start = None 481 toc_end = None 482 for i, l in enumerate(lines): 483 if l.elem is None: 484 continue 485 elif l.elem == toc: 486 in_toc = True 487 toc_start = i 488 elif in_toc and l.elem.tag == 'section': 489 # end of toc 490 in_toc = False 491 toc_end = i 492 break 493 elif in_toc and l.elem.tag in ['li', 't']: 494 xref = l.elem.find('.//xref[2]') 495 if xref!= None: 496 id = xref.get('target') 497 target = self.get_element_from_id(id) 498 page = self.get_element_page(target) 499 xref.set('pageno', '%s'%page ) 500 elif in_toc and l.elem!=None: 501 self.error(l.elem, "Unexpected condition. <%s> in toc" % (l.elem.tag)) 502 else: 503 pass 504 # new toc, to be used to replace the old one 505 toclines = self.render(toc, width=72, joiners=base_joiners) 506 if toc_start and toc_end: 507 j = 2 508 for i in range(toc_start+2, toc_end): 509 old = lines[i] 510 if old.elem is None: 511 continue 512 new = toclines[j] 513 lines[i].text = new.text 514 j += 1 515 return lines 516 517 def tjoin(self, text, e, width, **kwargs): 518 ''' 519 Render element e, then format and join it to text using the 520 appropriate settings in joiners. 521 ''' 522 assert isinstance(text, six.text_type) 523 joiners = kwargs['joiners'] 524 j = joiners[e.tag] if e.tag in joiners else joiners[None] 525 width -= j.indent + j.hang 526 if width < minwidth(text): 527 self.die(e, "Trying to render text in a too narrow column: width: %s, text: '%s'" % (width, text)) 528 kwargs['hang'] = j.hang 529 etext = self.render(e, width, **kwargs) 530 itext = indent(etext, j.indent, j.hang) 531 if text: 532 if '\n' in j.join: 533 text += j.join + itext 534 elif j.join.strip(stripspace) and not itext.strip(stripspace): 535 # don't use non-empty joiners with empty content 536 pass 537 else: 538 text += j.join + itext.lstrip(stripspace) 539 else: 540 text = itext 541 return text 542 543 def ljoin(self, lines, e, width, **kwargs): 544 ''' 545 Render element e, then format and join it to preceding text using the 546 appropriate settings in joiners. 547 ''' 548 assert isinstance(lines, list) 549 assert not lines or isinstance(lines[0], Line) 550 joiners = kwargs['joiners'] 551 j = joiners[e.tag] if e.tag in joiners else joiners[None] 552 width -= j.indent 553 kwargs['hang'] = j.hang 554 res = mklines(self.render(e, width, **kwargs), e) 555 if lines: 556 for i in range(j.join.count('\n')-1): 557 lines += blankline() 558 reswidth = max(len(l.text) for l in res) if res else 0 559 indent = j.indent 560 residue = 0 561 if (hasattr(e, 'outdent') and e.outdent) or (j.do_outdent and reswidth > width): 562 outdent = e.outdent if e.outdent else reswidth-width 563 residue = max(0, outdent - indent) 564 if residue: 565 e.getparent().outdent = residue 566 indent -= min(indent, outdent) 567 self.warn(e, "%s too wide, reducing indentation from %s to %s" % (e.tag.capitalize(), j.indent, indent)) 568 nlines = lindent(res, indent, j.hang) 569 if j.overlap and nlines: 570 firstline = nlines[0] 571 nlines = nlines[1:] 572 if firstline.text.strip(stripspace): 573 lines[-1].text += j.join + firstline.text.lstrip(stripspace) 574 lines += nlines 575 return lines 576 577 578 def element(self, tag, line=None, **attribs): 579 e = self.root.makeelement(tag, attrib=attribs) 580 if line: 581 e.sourceline = line 582 return e 583 584 def get_initials(self, author): 585 """author is an rfc2629 author element. Return the author initials, 586 fixed up according to current flavour and policy.""" 587 initials = author.attrib.get('initials', '') 588 589 initials_list = re.split("[. ]+", initials) 590 try: 591 initials_list.remove('') 592 except: 593 pass 594 if len(initials_list) > 0: 595 # preserve spacing, but make sure all parts have a trailing 596 # period 597 initials = initials.strip(stripspace) 598 initials += '.' if not initials.endswith('.') else '' 599 initials = re.sub('([^.]) ', r'\g<1>. ', initials) 600 return initials 601 602 # --- fallback rendering functions ------------------------------------------ 603 604 def default_renderer(self, e, width, **kwargs): 605 # This is a fallback when a more specific function doesn't exist 606 text = "<%s>:%s" % (e.tag, e.text or '') 607 for c in e.getchildren(): 608 ctext = self.render(c, width, **kwargs) 609 if isinstance(ctext, list): 610 ctext = "\n\n".join(ctext) 611 if ctext is None and debug: 612 debug.show('e') 613 debug.show('c') 614 text += '\n' + ctext 615 text += e.tail or '' 616 return text 617 618# def parts_renderer(self, e, width, **kwargs): 619# text = "" 620# for c in e.getchildren(): 621# text = self.tjoin(text, c, width, **kwargs) 622# return text 623 624 def inner_text_renderer(self, e, width=None, **kwargs): 625 text = e.text or '' 626 for c in e.getchildren(): 627 try: 628 text += self.render(c, width, **kwargs) 629 except TypeError: 630 debug.show('c') 631 raise 632 return text.strip(stripspace) 633 634# def text_renderer(self, e, width, **kwargs): 635# text = self.inner_text_renderer(e, **kwargs) 636# text += ' '+e.tail if e.tail else '' 637# return text 638 639 def text_or_block_renderer(self, e, width, **kw): 640 # This handles the case where the element has two alternative content 641 # models, either text or block-level children; deal with them 642 # separately. Return text and whether this was plain text. 643 kwargs = copy.deepcopy(kw) 644 if utils.hastext(e): 645 _tag = e.tag; e.tag = 't' 646 text = mktext(self.ljoin([], e, width, **kwargs)) 647 e.tag = _tag 648 return text, True 649 else: 650 lines = [] 651 for c in e.getchildren(): 652 lines = self.ljoin(lines, c, width, **kwargs) 653 kwargs.pop('first', None) 654 return lines, False 655 656 657 def quote_renderer(self, e, width, prefix, by, cite, **kwargs): 658 set_joiners(kwargs, { 659 None: Joiner('\n', 0, 0, False, False), 660 't': Joiner('\n\n', 0, 0, False, False), 661 'artset': Joiner('\n\n', 0, 0, False, False), 662 'artwork': Joiner('\n\n', 3, 0, False, True), 663 'sourcecode': Joiner('\n\n', 3, 0, False, False), 664 }) 665 width = width if width else 69 666 text, plain = self.text_or_block_renderer(e, width-3, **kwargs) 667 if plain: 668 text = fill(text, width=width-3, **kwargs) 669 lines = mklines(text, e) 670 if by or cite: 671 lines += [ Line('', e) ] 672 if by: 673 lines += mklines("-- %s" % fill(by, width=width-6, hang=3), e) 674 if cite: 675 lines += mklines(" %s\n" % fill(cite, width=width-6, hang=3), e) 676 for i, l in enumerate(lines): 677 lines[i].text = prefix + ' '+l.text 678 lines = lindent(lines, indent=kwargs.get('indent', 0)) 679 return lines 680 681 def null_renderer(self, e, width, **kwargs): 682 self.die(e, "Did not expect to be asked to render <%s> while in %s//%s" % (e.tag, self.part, e.getparent().tag)) 683 return None 684 685 # --- element rendering functions ------------------------------------------ 686 687 # 2.1. <abstract> 688 # 689 # Contains the Abstract of the document. See [RFC7322] for more 690 # information on restrictions for the Abstract. 691 # 692 # This element appears as a child element of <front> (Section 2.26). 693 # 694 # ... 695 # 696 # 2.1.1. "anchor" Attribute 697 # 698 # Document-wide unique identifier for the Abstract. 699 def render_abstract(self, e, width, **kwargs): 700 kwargs['joiners'].update({ None: Joiner('\n\n', 3, 0, False, False), }) 701 lines = [ Line("Abstract", e) ] 702 for c in e.getchildren(): 703 lines = self.ljoin(lines, c, width, **kwargs) 704 return lines 705 706 # 2.2. <address> 707 # 708 # Provides address information for the author. 709 # 710 # This element appears as a child element of <author> (Section 2.7). 711 def render_address(self, e, width, **kwargs): 712 set_joiners(kwargs, { 713 None: Joiner('\n', 0, 0, False, False), 714 'email': Joiner('', 0, 0, True, False), 715 }) 716 lines = [] 717 for c in e.getchildren(): 718 lines = self.ljoin(lines, c, width, **kwargs) 719 return lines 720 721 # 2.3. <annotation> 722 # 723 # Provides additional prose augmenting a bibliographic reference. This 724 # text is intended to be shown after the rest of the generated 725 # reference text. 726 # 727 # This element appears as a child element of <reference> 728 # (Section 2.40). 729 def render_annotation(self, e, width, **kwargs): 730 text = fill(self.inner_text_renderer(e), width=width, **kwargs) 731 return text 732 733 # 2.4. <area> 734 # 735 # Provides information about the IETF area to which this document 736 # relates (currently not used when generating documents). 737 # 738 # The value ought to be either the full name or the abbreviation of one 739 # of the IETF areas as listed on <http://www.ietf.org/iesg/area.html>. 740 # A list of full names and abbreviations will be kept by the RFC Series 741 # Editor. 742 # 743 # This element appears as a child element of <front> (Section 2.26). 744 745 746 # 2.5. <artwork> 747 # 748 # This element allows the inclusion of "artwork" in the document. 749 # <artwork> provides full control of horizontal whitespace and line 750 # breaks; thus, it is used for a variety of things, such as diagrams 751 # ("line art") and protocol unit diagrams. Tab characters (U+0009) 752 # inside of this element are prohibited. 753 # 754 # Alternatively, the "src" attribute allows referencing an external 755 # graphics file, such as a vector drawing in SVG or a bitmap graphic 756 # file, using a URI. In this case, the textual content acts as a 757 # fallback for output representations that do not support graphics; 758 # thus, it ought to contain either (1) a "line art" variant of the 759 # graphics or (2) prose that describes the included image in sufficient 760 # detail. 761 # 762 # In [RFC7749], the <artwork> element was also used for source code and 763 # formal languages; in v3, this is now done with <sourcecode>. 764 # 765 # There are at least five ways to include SVG in artwork in 766 # Internet-Drafts: 767 # 768 # o Inline, by including all of the SVG in the content of the element, 769 # such as: <artwork type="svg"><svg xmlns="http://www.w3.org/2000/ 770 # svg..."> 771 # 772 # o Inline, but using XInclude (see Appendix B.1), such as: <artwork 773 # type="svg"><xi:include href=...> 774 # 775 # o As a data: URI, such as: <artwork type="svg" src="data:image/ 776 # svg+xml,%3Csvg%20xmlns%3D%22http%3A%2F%2Fwww.w3..."> 777 # 778 # o As a URI to an external entity, such as: <artwork type="svg" 779 # src="http://www.example.com/..."> 780 # 781 # o As a local file, such as: <artwork type="svg" src="diagram12.svg"> 782 # 783 # The use of SVG in Internet-Drafts and RFCs is covered in much more 784 # detail in [RFC7996]. 785 # 786 # The above methods for inclusion of SVG art can also be used for 787 # including text artwork, but using a data: URI is probably confusing 788 # for text artwork. 789 # 790 # Formatters that do pagination should attempt to keep artwork on a 791 # single page. This is to prevent artwork that is split across pages 792 # from looking like two separate pieces of artwork. 793 # 794 # See Section 5 for a description of how to deal with issues of using 795 # "&" and "<" characters in artwork. 796 797 def render_artset(self, e, width, **kwargs): 798 preflist = ['ascii-art', ] 799 lines = [] 800 for t in preflist: 801 for a in e.xpath('./artwork[@type="%s"]' % t): 802 lines = self.ljoin(lines, a, width, **kwargs) 803 return lines 804 else: 805 a = e[0] 806 if e.text and e.text.strip(stripspace): 807 lines = self.ljoin(lines, a, width, **kwargs) 808 else: 809 self.err(a, "Expected ascii-art text, but found none.") 810 return lines 811 812 def render_artwork(self, e, width, **kwargs): 813 msg = ( "(Artwork only available as %s: %s)" 814 % ( e.get('type', '(unknown type)'), 815 e.get('originalSrc') or e.get('src') or 'No external link available, see %s.html for artwork.'%self.root.get('docName'))) 816 msg = fill(msg, width=width, **kwargs) 817# text = (e.text.strip(stripspace) and e.text.expandtabs()) or msg 818# text = text.strip('\n') 819# text = '\n'.join( [ l.rstrip(stripspace) for l in text.split('\n') ] ) 820 # We need this in order to deal with xml comments inside artwork: 821 text = (e.text or '') + ''.join([ c.tail for c in e.getchildren() ]) 822 text = text.strip('\n') 823 text = (text.strip(stripspace) and text.expandtabs()) or msg 824 text = '\n'.join( [ l.rstrip(stripspace) for l in text.split('\n') ] ) 825 # 826 lines = [ Line(t, e) for t in text.splitlines() ] 827 lines = align(lines, e.get('align', 'left'), width) 828 return lines 829 830 # 2.5.1. "align" Attribute 831 # 832 # Controls whether the artwork appears left justified (default), 833 # centered, or right justified. Artwork is aligned relative to the 834 # left margin of the document. 835 # 836 # Allowed values: 837 # 838 # o "left" (default) 839 # 840 # o "center" 841 # 842 # o "right" 843 844 845 # 2.5.2. "alt" Attribute 846 # 847 # Alternative text description of the artwork (which is more than just 848 # a summary or caption). When the art comes from the "src" attribute 849 # and the format of that artwork supports alternate text, the 850 # alternative text comes from the text of the artwork itself, not from 851 # this attribute. The contents of this attribute are important to 852 # readers who are visually impaired, as well as those reading on 853 # devices that cannot show the artwork well, or at all. 854 855 856 # 2.5.3. "anchor" Attribute 857 # 858 # Document-wide unique identifier for this artwork. 859 860 # 2.5.5. "name" Attribute 861 # 862 # A filename suitable for the contents (such as for extraction to a 863 # local file). This attribute can be helpful for other kinds of tools 864 # (such as automated syntax checkers, which work by extracting the 865 # artwork). Note that the "name" attribute does not need to be unique 866 # for <artwork> elements in a document. If multiple <artwork> elements 867 # have the same "name" attribute, a processing tool might assume that 868 # the elements are all fragments of a single file, and the tool can 869 # collect those fragments for later processing. See Section 7 for a 870 # discussion of possible problems with the value of this attribute. 871 872 # 2.5.6. "src" Attribute 873 # 874 # The URI reference of a graphics file [RFC3986], or the name of a file 875 # on the local disk. This can be a "data" URI [RFC2397] that contains 876 # the contents of the graphics file. Note that the inclusion of art 877 # with the "src" attribute depends on the capabilities of the 878 # processing tool reading the XML document. Tools need to be able to 879 # handle the file: URI, and they should be able to handle http: and 880 # https: URIs as well. The prep tool will be able to handle reading 881 # the "src" attribute. 882 # 883 # If no URI scheme is given in the attribute, the attribute is 884 # considered to be a local filename relative to the current directory. 885 # Processing tools must be careful to not accept dangerous values for 886 # the filename, particularly those that contain absolute references 887 # outside the current directory. Document creators should think hard 888 # before using relative URIs due to possible later problems if files 889 # move around on the disk. Also, documents should most likely use 890 # explicit URI schemes wherever possible. 891 # 892 # In some cases, the prep tool may remove the "src" attribute after 893 # processing its value. See [RFC7998] for a description of this. 894 # 895 # It is an error to have both a "src" attribute and content in the 896 # <artwork> element. 897 898 # 2.5.7. "type" Attribute 899 # 900 # Specifies the type of the artwork. The value of this attribute is 901 # free text with certain values designated as preferred. 902 # 903 # The preferred values for <artwork> types are: 904 # 905 # o ascii-art 906 # 907 # o binary-art 908 # 909 # o call-flow 910 # 911 # o hex-dump 912 # 913 # o svg 914 # 915 # The RFC Series Editor will maintain a complete list of the preferred 916 # values on the RFC Editor web site, and that list is expected to be 917 # updated over time. Thus, a consumer of v3 XML should not cause a 918 # failure when it encounters an unexpected type or no type is 919 # specified. The table will also indicate which type of art can appear 920 # in plain-text output (for example, type="svg" cannot). 921 922 923 924 # 2.6. <aside> 925 # 926 # This element is a container for content that is semantically less 927 # important or tangential to the content that surrounds it. 928 # 929 # This element appears as a child element of <section> (Section 2.46). 930 # 931 # 2.6.1. "anchor" Attribute 932 # 933 # Document-wide unique identifier for this aside. 934 def render_aside(self, e, width, **kwargs): 935 kwargs['joiners'].update({ 't': Joiner('\n\n', 0, 0, False, False), }) 936 prefix = ' | ' 937 width -= len(prefix) 938 text, plain = self.text_or_block_renderer(e, width, **kwargs) 939 if plain: 940 text = fill(text, width=width, **kwargs) 941 lines = mklines(text, e) 942 for i, l in enumerate(lines): 943 lines[i].text = prefix + l.text 944 lines = lindent(lines, indent=kwargs.get('indent', 0)) 945 return lines 946 947 948 # 2.7. <author> 949 # 950 # Provides information about a document's author. This is used both 951 # for the document itself (at the beginning of the document) and for 952 # referenced documents. 953 # 954 # The <author> elements contained within the document's <front> element 955 # are used to fill the boilerplate and also to generate the "Author's 956 # Address" section (see [RFC7322]). 957 # 958 # Note that an "author" can also be just an organization (by not 959 # specifying any of the "name" attributes, but adding the 960 # <organization> child element). 961 # 962 # Furthermore, the "role" attribute can be used to mark an author as 963 # "editor". This is reflected both on the front page and in the 964 # "Author's Address" section, as well as in bibliographic references. 965 # Note that this specification does not define a precise meaning for 966 # the term "editor". 967 # 968 # This element appears as a child element of <front> (Section 2.26). 969 # 970 # ... 971 # 972 # 2.7.1. "asciiFullname" Attribute 973 # 974 # The ASCII equivalent of the author's full name. 975 # 976 # 2.7.2. "asciiInitials" Attribute 977 # 978 # The ASCII equivalent of the author's initials, to be used in 979 # conjunction with the separately specified asciiSurname. 980 # 981 # 2.7.3. "asciiSurname" Attribute 982 # 983 # The ASCII equivalent of the author's surname, to be used in 984 # conjunction with the separately specified asciiInitials. 985 # 986 # 2.7.4. "fullname" Attribute 987 # 988 # The full name (used in the automatically generated "Author's Address" 989 # section). Although this attribute is optional, if one or more of the 990 # "asciiFullname", "asciiInitials", or "asciiSurname" attributes have 991 # values, the "fullname" attribute is required. 992 # 993 # 2.7.5. "initials" Attribute 994 # 995 # An abbreviated variant of the given name(s), to be used in 996 # conjunction with the separately specified surname. It usually 997 # appears on the front page, in footers, and in references. 998 # 999 # Some processors will post-process the value -- for instance, when it 1000 # only contains a single letter (in which case they might add a 1001 # trailing dot). Relying on this kind of post-processing can lead to 1002 # results varying across formatters and thus ought to be avoided. 1003 # 1004 # 2.7.6. "role" Attribute 1005 # 1006 # Specifies the role the author had in creating the document. 1007 # 1008 # Allowed value: 1009 # 1010 # o "editor" 1011 # 1012 # 2.7.7. "surname" Attribute 1013 # 1014 # The author's surname, to be used in conjunction with the separately 1015 # specified initials. It usually appears on the front page, in 1016 # footers, and in references. 1017 def render_author(self, e, width, **kwargs): 1018 """ 1019 Render one author entry for the Authors' Addresses section. 1020 """ 1021 set_joiners(kwargs, { 1022 None: Joiner('\n', 0, 0, False, False), # default 1023 }) 1024 lines = [] 1025 address = e.find('./address') 1026 if address is None: 1027 address = etree.Element('address') 1028 e.append(address) 1029 postal = e.find('./address/postal') 1030 if postal is None: 1031 # We render author name as part of postal, so make sure it's there 1032 address.insert(0, etree.Element('postal')) 1033 # ascii will be set only if name has codepoints not in the Latin script blocks 1034 name, ascii = full_author_name_set(e) 1035 if ascii: 1036 for c in e.iterchildren('address'): 1037 lines = self.ljoin(lines, c, width, latin=True, **kwargs) 1038 lines = striplines(lines) 1039 lines += blankline() 1040 lines += [ Line( 'Additional contact information:', address) ] 1041 lines += blankline() 1042 lines += lindent(self.ljoin([], c, width, latin=False, **kwargs)) 1043 lines = striplines(lines) 1044 else: 1045 for c in e.iterchildren('address'): 1046 lines = self.ljoin(lines, c, width, **kwargs) 1047 lines = striplines(lines) 1048 lines += blankline() 1049 return lines 1050 1051 def render_author_name(self, e, width, **kwargs): 1052 text = '' 1053 organization = self.render_organization(e.find('organization'), width, **kwargs) 1054 fullname = e.attrib.get('fullname', '') 1055 if not fullname: 1056 surname = e.attrib.get('surname', '') 1057 if surname: 1058 initials = self.get_initials(e) 1059 fullname = '%s %s' % (initials, fullname) 1060 if fullname: 1061 text = fullname 1062 if e.attrib.get('role', '') == 'editor': 1063 text += ' (editor)' 1064 if organization: 1065 text += '\n'+ organization 1066 elif organization: 1067 # Use organization instead of name 1068 text = organization 1069 else: 1070 text = '' 1071 return text 1072 1073 def render_contact(self, e, width, **kwargs): 1074 p = e.getparent() 1075 if p.tag == 't': 1076 name, ascii = full_author_name_set(e) 1077 if ascii: 1078 contact = "%s (%s)" % (name, ascii) 1079 else: 1080 contact = name 1081 # Avoid sentence end space doubling 1082 contact = contact.replace('. ', '.\u00a0') 1083 return contact + (e.tail or '') 1084 elif p.tag == 'section': 1085 return self.render_author(e, width, **kwargs) 1086 else: 1087 return self.null_renderer(e, width, **kwargs) 1088 1089 def render_author_front(self, e, **kwargs): 1090 name = short_author_name(e) 1091 if not is_script(name, 'Latin'): 1092 aname = short_author_ascii_name(e) 1093 name = '%s (%s)' % (name, aname) 1094 # 1095 o = e.find('./organization') 1096 1097 if o != None: 1098 if o.get('showOnFrontPage') == 'true': 1099 organization = self.render_front_organization(o, **kwargs) 1100 else: 1101 organization = None 1102 else: 1103 organization = '' 1104 # 1105 if organization and not name: 1106 name = organization 1107 organization = None 1108 # 1109 if e.get('role') == 'editor': 1110 name += ', Ed.' 1111 return name, organization 1112 1113 def render_authors(self, e, width, **kwargs): 1114 """ 1115 Render authors for reference display. This has to take into 1116 consideration the particular presentation of surnames and initials 1117 used by the RFC Editor. 1118 """ 1119 buf = [] 1120 authors = list(e.iterdescendants('author')) 1121 for i, author in enumerate(authors): 1122 if i == len(authors) - 1 and len(authors) > 1: 1123 buf.append('and ') 1124 organization = author.find('organization') 1125 initials, surname = short_author_name_parts(author) 1126 if surname: 1127 initials = initials or '' 1128 if i == len(authors) - 1 and len(authors) > 1: 1129 # Last author is rendered in reverse 1130 if len(initials) > 0: 1131 buf.append(initials + ' ' + \ 1132 surname) 1133 else: 1134 buf.append(surname) 1135 elif len(initials) > 0: 1136 buf.append(surname + ', ' + initials) 1137 else: 1138 buf.append(surname) 1139 if author.attrib.get('role', '') == 'editor': 1140 buf.append(', Ed.') 1141 elif organization is not None and organization.text: 1142 # Use organization instead of name 1143 buf.append(organization.text.strip(stripspace)) 1144 else: 1145 continue 1146 if len(authors) == 2 and i == 0: 1147 buf.append(' ') 1148 elif i < len(authors) - 1: 1149 buf.append(', ') 1150 return ''.join(buf) 1151 1152 # 2.8. <back> 1153 # 1154 # Contains the "back" part of the document: the references and 1155 # appendices. In <back>, <section> elements indicate appendices. 1156 # 1157 # This element appears as a child element of <rfc> (Section 2.45). 1158 def render_back(self, e, width, **kwargs): 1159 lines = [] 1160 for c in e.getchildren(): 1161 lines = self.ljoin(lines, c, width, **kwargs) 1162 return lines 1163 1164 1165 # 2.9. <bcp14> 1166 # 1167 # Marks text that are phrases defined in [BCP14] such as "MUST", 1168 # "SHOULD NOT", and so on. When shown in some of the output 1169 # representations, the text in this element might be highlighted. The 1170 # use of this element is optional. 1171 # 1172 # This element is only to be used around the actual phrase from BCP 14, 1173 # not the full definition of a requirement. For example, it is correct 1174 # to say "The packet <bcp14>MUST</bcp14> be dropped.", but it is not 1175 # correct to say "<bcp14>The packet MUST be dropped.</bcp14>". 1176 # 1177 # This element appears as a child element of <annotation> 1178 # (Section 2.3), <blockquote> (Section 2.10), <dd> (Section 2.18), <dt> 1179 # (Section 2.21), <em> (Section 2.22), <li> (Section 2.29), <preamble> 1180 # (Section 3.6), <refcontent> (Section 2.39), <strong> (Section 2.50), 1181 # <sub> (Section 2.51), <sup> (Section 2.52), <t> (Section 2.53), <td> 1182 # (Section 2.56), <th> (Section 2.58), and <tt> (Section 2.62). 1183 # 1184 # Content model: only text content. 1185 def render_bcp14(self, e, width, **kwargs): 1186 return (e.text or '') + (e.tail or '') 1187 1188 # 2.10. <blockquote> 1189 # 1190 # Specifies that a block of text is a quotation. 1191 # 1192 # This element appears as a child element of <section> (Section 2.46). 1193 # 1194 # 2.10.1. "anchor" Attribute 1195 # 1196 # Document-wide unique identifier for this quotation. 1197 # 1198 # 2.10.2. "cite" Attribute 1199 # 1200 # The source of the citation. This must be a URI. If the "quotedFrom" 1201 # attribute is given, this URI will be used by processing tools as the 1202 # link for the text of that attribute. 1203 # 1204 # 2.10.3. "quotedFrom" Attribute 1205 # 1206 # Name of person or document the text in this element is quoted from. 1207 # A formatter should render this as visible text at the end of the 1208 # quotation. 1209 def render_blockquote(self, e, width, **kwargs): 1210 by = e.get('quotedFrom') 1211 cite = e.get('cite') 1212 return self.quote_renderer(e, width, '|', by, cite, **kwargs) 1213 1214 # 2.11. <boilerplate> 1215 # 1216 # Holds the boilerplate text for the document. This element is filled 1217 # in by the prep tool. 1218 # 1219 # This element contains <section> elements. Every <section> element in 1220 # this element must have the "numbered" attribute set to "false". 1221 # 1222 # This element appears as a child element of <front> (Section 2.26). 1223 def render_boilerplate(self, e, width, **kwargs): 1224 lines = [] 1225 for c in e.getchildren(): 1226 numbered = c.get('numbered') 1227 if not numbered == 'false': 1228 self.err(c, "Expected boilerplate section to have numbered='false', but found '%s'" % (numbered, )) 1229 keep_url = True if self.options.rfc else False 1230 lines = self.ljoin(lines, c, width, keep_url=keep_url, **kwargs) 1231 return lines 1232 1233 # 2.12. <br> 1234 # 1235 # Indicates that a line break should be inserted in the generated 1236 # output by a formatting tool. Multiple successive instances of this 1237 # element are ignored. 1238 # 1239 # This element appears as a child element of <td> (Section 2.56) and 1240 # <th> (Section 2.58). 1241 def render_br(self, e, width, **kwargs): 1242 return '\u2028' + (e.tail or '') 1243 1244 # 2.13. <city> 1245 # 1246 # Gives the city name in a postal address. 1247 # 1248 # This element appears as a child element of <postal> (Section 2.37). 1249 # 1250 # 2.13.1. "ascii" Attribute 1251 # 1252 # The ASCII equivalent of the city name. 1253 render_city = null_renderer # handled in render_address 1254 1255 # 2.14. <code> 1256 # 1257 # Gives the postal region code. 1258 # 1259 # This element appears as a child element of <postal> (Section 2.37). 1260 # 1261 # 2.14.1. "ascii" Attribute 1262 # 1263 # The ASCII equivalent of the postal code. 1264 render_code = null_renderer # handled in render_address 1265 1266 # 2.15. <country> 1267 # 1268 # Gives the country name or code in a postal address. 1269 # 1270 # This element appears as a child element of <postal> (Section 2.37). 1271 # 1272 # 2.15.1. "ascii" Attribute 1273 # 1274 # The ASCII equivalent of the country name. 1275 render_country = null_renderer # handled in render_address 1276 1277 # 2.16. <cref> 1278 # 1279 # Represents a comment. 1280 # 1281 # Comments can be used in a document while it is work in progress. 1282 # They might appear either inline and visually highlighted, at the end 1283 # of the document, or not at all, depending on the formatting tool. 1284 # 1285 # This element appears as a child element of <annotation> 1286 # (Section 2.3), <blockquote> (Section 2.10), <c> (Section 3.1), <dd> 1287 # (Section 2.18), <dt> (Section 2.21), <em> (Section 2.22), <li> 1288 # (Section 2.29), <name> (Section 2.32), <postamble> (Section 3.5), 1289 # <preamble> (Section 3.6), <strong> (Section 2.50), <sub> 1290 # (Section 2.51), <sup> (Section 2.52), <t> (Section 2.53), <td> 1291 # (Section 2.56), <th> (Section 2.58), <tt> (Section 2.62), and <ttcol> 1292 # (Section 3.9). 1293 # 1294 # 2.16.1. "anchor" Attribute 1295 # 1296 # Document-wide unique identifier for this comment. 1297 # 1298 # 2.16.2. "display" Attribute 1299 # 1300 # Suggests whether or not the comment should be displayed by formatting 1301 # tools. This might be set to "false" if you want to keep a comment in 1302 # a document after the contents of the comment have already been dealt 1303 # with. 1304 # 1305 # Allowed values: 1306 # 1307 # o "true" (default) 1308 # 1309 # o "false" 1310 # 1311 # 2.16.3. "source" Attribute 1312 # 1313 # Holds the "source" of a comment, such as the name or the initials of 1314 # the person who made the comment. 1315 def render_cref(self, e, width, **kwargs): 1316 display = e.get('display') == 'true' 1317 source = e.get('source') 1318 if display: 1319 text = '\u2028' + mktextblock(self.quote_renderer(e, width, '//', source, None, **kwargs)) 1320 return text 1321 else: 1322 return '' 1323 1324 # 2.17. <date> 1325 # 1326 # Provides information about the publication date. This element is 1327 # used for two cases: the boilerplate of the document being produced, 1328 # and inside bibliographic references that use the <front> element. 1329 # 1330 # Boilerplate for Internet-Drafts and RFCs: This element defines the 1331 # date of publication for the current document (Internet-Draft or 1332 # RFC). When producing Internet-Drafts, the prep tool uses this 1333 # date to compute the expiration date (see [IDGUIDE]). When one or 1334 # more of "year", "month", or "day" are left out, the prep tool will 1335 # attempt to use the current system date if the attributes that are 1336 # present are consistent with that date. 1337 # 1338 # In dates in <rfc> elements, the month must be a number or a month 1339 # in English. The prep tool will silently change text month names 1340 # to numbers. Similarly, the year must be a four-digit number. 1341 # 1342 # When the prep tool is used to create Internet-Drafts, it will 1343 # reject a submitted Internet-Draft that has a <date> element in the 1344 # boilerplate for itself that is anything other than today. That 1345 # is, the tool will not allow a submitter to specify a date other 1346 # than the day of submission. To avoid this problem, authors might 1347 # simply not include a <date> element in the boilerplate. 1348 # 1349 # Bibliographic references: In dates in <reference> elements, the date 1350 # information can have prose text for the month or year. For 1351 # example, vague dates (year="ca. 2000"), date ranges 1352 # (year="2012-2013"), non-specific months (month="Second quarter"), 1353 # and so on are allowed. 1354 # 1355 # This element appears as a child element of <front> (Section 2.26). 1356 # 1357 # 2.17.1. "day" Attribute 1358 # 1359 # The day of publication. 1360 # 1361 # 2.17.2. "month" Attribute 1362 # 1363 # The month or months of publication. 1364 # 1365 # 2.17.3. "year" Attribute 1366 # 1367 # The year or years of publication. 1368 def render_date(self, e, width, **kwargs): 1369 #pp = e.getparent().getparent() 1370 #if pp.tag == 'rfc': 1371 have_date = e.get('day') or e.get('month') or e.get('year') 1372 year, month, day = extract_date(e, self.date) 1373 p = e.getparent() 1374 if p==None or p.getparent().tag != 'reference': 1375 # don't touch the given date if we're rendering a reference 1376 year, month, day = augment_date(year, month, day, self.date) 1377 date = format_date(year, month, day, self.options.legacy_date_format) 1378 if e.text and have_date: 1379 date = "%s (%s)" % (e.text, date) 1380 elif e.text: 1381 date = e.text 1382 else: 1383 # date = date 1384 pass 1385 return date 1386 1387 # 2.18. <dd> 1388 # 1389 # The definition part of an entry in a definition list. 1390 # 1391 # This element appears as a child element of <dl> (Section 2.20). 1392 # 1393 # 2.18.1. "anchor" Attribute 1394 # 1395 # Document-wide unique identifier for this definition. 1396 def render_dd(self, e, width, **kwargs): 1397 dtwidth = kwargs.pop('dtwidth') 1398 j = kwargs['joiners']['dd'] 1399 kwargs['first'] = dtwidth + len(j.join) - j.indent if j.overlap else 0 1400 r, foldable = self.text_or_block_renderer(e, width, **kwargs) 1401 lines = mklines(r, e) if foldable else r 1402 if lines: 1403 lines[0].keep = True # keep first line of dd with preceding dt 1404 return lines 1405 1406 # 2.19. <displayreference> 1407 # 1408 # This element gives a mapping between the anchor of a reference and a 1409 # name that will be displayed instead. This allows authors to display 1410 # more mnemonic anchor names for automatically included references. 1411 # The mapping in this element only applies to <xref> elements whose 1412 # format is "default". For example, if the reference uses the anchor 1413 # "RFC6949", the following would cause that anchor in the body of 1414 # displayed documents to be "RFC-dev": 1415 # 1416 # <displayreference target="RFC6949" to="RFC-dev"/> 1417 # 1418 # If a reference section is sorted, this element changes the sort 1419 # order. 1420 # 1421 # It is expected that this element will only be valid in input 1422 # documents. It will likely be removed by prep tools when preparing a 1423 # final version after those tools have replaced all of the associated 1424 # anchors, targets, and "derivedContent" attributes. 1425 # 1426 # This element appears as a child element of <back> (Section 2.8). 1427 # 1428 # 2.19.1. "target" Attribute (Mandatory) 1429 # 1430 # This attribute must be the name of an anchor in a <reference> or 1431 # <referencegroup> element. 1432 # 1433 # 2.19.2. "to" Attribute (Mandatory) 1434 # 1435 # This attribute is a name that will be displayed as the anchor instead 1436 # of the anchor that is given in the <reference> element. The string 1437 # given must start with one of the following characters: 0-9, a-z, or 1438 # A-Z. The other characters in the string must be 0-9, a-z, A-Z, "-", 1439 # ".", or "_". 1440 def render_displayreference(self, e, width, **kwargs): 1441 return '' 1442 1443 1444 # 2.20. <dl> 1445 # 1446 # A definition list. Each entry has a pair of elements: a term (<dt>) 1447 # and a definition (<dd>). (This is slightly different and simpler 1448 # than the model used in HTML, which allows for multiple terms for a 1449 # single definition.) 1450 # 1451 # This element appears as a child element of <abstract> (Section 2.1), 1452 # <aside> (Section 2.6), <blockquote> (Section 2.10), <dd> 1453 # (Section 2.18), <li> (Section 2.29), <note> (Section 2.33), <section> 1454 # (Section 2.46), <td> (Section 2.56), and <th> (Section 2.58). 1455 # 1456 # 2.20.1. "anchor" Attribute 1457 # 1458 # Document-wide unique identifier for the list. 1459 # 1460 # 2.20.2. "hanging" Attribute 1461 # 1462 # The "hanging" attribute defines whether or not the term appears on 1463 # the same line as the definition. hanging="true" indicates that the 1464 # term is to the left of the definition, while hanging="false" 1465 # indicates that the term will be on a separate line. 1466 # 1467 # Allowed values: 1468 # 1469 # o "false" 1470 # 1471 # o "true" (default) 1472 # 1473 # 2.20.3. "spacing" Attribute 1474 # 1475 # Defines whether or not there is a blank line between entries. 1476 # spacing="normal" indicates a single blank line, while 1477 # spacing="compact" indicates no space between. 1478 # 1479 # Allowed values: 1480 # 1481 # o "normal" (default) 1482 # 1483 # o "compact" 1484 def render_dl(self, e, width, **kwargs): 1485 newline = e.get('newline') == 'true' 1486 compact = e.get('spacing') == 'compact' 1487 tjoin = '\n' if compact else '\n\n' 1488 # 1489 indent = int(e.get('indent') or '3') 1490 nljoin = Joiner('\n', indent, 0, False, False) 1491 spjoin = Joiner(' ', indent, 0, True, False) 1492 ddjoin = nljoin if newline else spjoin 1493 set_joiners(kwargs, { 1494 None: Joiner(tjoin, 0, 0, False, False), 1495 'dt': Joiner(tjoin, 0, 0, False, False), 1496 'dd': ddjoin, 1497 }) 1498 # child tags which should always render with newline=True 1499 newline_tags = set([ 1500 'artset', 1501 'artwork', 1502 'aside', 1503 'figure', 1504 'ol', 1505 'sourcecode', 1506 'table', 1507 'ul', 1508 ]) 1509 # rendering 1510 lines = [] 1511 text = '' 1512 dtwidth = indent 1513 for c in e.getchildren(): 1514 if ((not newline and c.tag == 'dd' and c.text and c.text.strip(stripspace) 1515 and (width - len(' ') - len(text)) < len(c.text.split(None, 1)[0])) 1516 or (len(c) and c[0].tag in newline_tags)): 1517 # Add a newline if first word of dd text won't fit to the right of dt 1518 kwargs['joiners']['dd'] = nljoin 1519 else: 1520 kwargs['joiners']['dd'] = ddjoin 1521 # 1522 lines = self.ljoin(lines, c, width, dtwidth=dtwidth, **kwargs) 1523 # 1524 if c.tag == 'dt': 1525 dtwidth = len(lines[-1].text) 1526 return lines 1527 1528 1529 # 2.21. <dt> 1530 # 1531 # The term being defined in a definition list. 1532 # 1533 # This element appears as a child element of <dl> (Section 2.20). 1534 # 1535 # 2.21.1. "anchor" Attribute 1536 # 1537 # Document-wide unique identifier for this term. 1538 def render_dt(self, e, width, **kwargs): 1539 kwargs.pop('dtwidth') 1540 indent = kwargs['joiners']['dd'].indent 1541 join = len(kwargs['joiners']['dd'].join) 1542 text = fill(self.inner_text_renderer(e), width=width-3, **kwargs) 1543 if len(text) < indent: 1544 text = text+' '*max(0, indent-join-len(text)) 1545 return mklines(text, e) 1546 1547 1548 # 2.22. <em> 1549 # 1550 # Indicates text that is semantically emphasized. Text enclosed within 1551 # this element will be displayed as italic after processing. This 1552 # element can be combined with other character formatting elements, and 1553 # the formatting will be additive. 1554 def render_em(self, e, width, **kwargs): 1555 # Render text with leading and trailing '_' 1556 text = '_%s_' % self.inner_text_renderer(e) 1557 text += e.tail or '' 1558 return text 1559 1560 # 2.23. <email> 1561 # 1562 # Provides an email address. 1563 # 1564 # The value is expected to be the addr-spec defined in Section 2 of 1565 # [RFC6068]. 1566 # 1567 # This element appears as a child element of <address> (Section 2.2). 1568 # 1569 # Content model: only text content. 1570 # 1571 # 2.23.1. "ascii" Attribute 1572 # 1573 # The ASCII equivalent of the author's email address. This is only 1574 # used if the email address has any internationalized components. 1575 def render_email(self, e, width, **kwargs): 1576 latin = kwargs.pop('latin', None) 1577 prev = e.getprevious() 1578 if prev!=None and prev.tag==e.tag: 1579 text = fill(", %s"%e.text, width=width, **kwargs) if e.text and latin!=False else '' 1580 else: 1581 text = '\n'+fill("Email: %s"%e.text, width=width, **kwargs) if e.text and latin!=False else '' 1582 return text 1583 1584 # 2.24. <eref> 1585 # 1586 # Represents an "external" link (as specified in the "target" 1587 # attribute). This is useful for embedding URIs in the body of a 1588 # document. 1589 # 1590 # If the <eref> element has non-empty text content, formatters should 1591 # use the content as the displayed text that is linked. Otherwise, the 1592 # formatter should use the value of the "target" attribute as the 1593 # displayed text. Formatters will link the displayed text to the value 1594 # of the "target" attribute in a manner appropriate for the output 1595 # format. 1596 # 1597 # For example, with an input of: 1598 # 1599 # This is described at 1600 # <eref target="http://www.example.com/reports/r12.html"/>. 1601 # 1602 # An HTML formatter might generate: 1603 # 1604 # This is described at 1605 # <a href="http://www.example.com/reports/r12.html"> 1606 # http://www.example.com/reports/r12.html</a>. 1607 # 1608 # With an input of: 1609 # 1610 # This is described 1611 # <eref target="http://www.example.com/reports/r12.html"> 1612 # in this interesting report</eref>. 1613 # 1614 # An HTML formatter might generate: 1615 # 1616 # This is described 1617 # <a href="http://www.example.com/reports/r12.html"> 1618 # in this interesting report</a>. 1619 # 1620 # This element appears as a child element of <annotation> 1621 # (Section 2.3), <blockquote> (Section 2.10), <c> (Section 3.1), <cref> 1622 # (Section 2.16), <dd> (Section 2.18), <dt> (Section 2.21), <em> 1623 # (Section 2.22), <li> (Section 2.29), <name> (Section 2.32), 1624 # <postamble> (Section 3.5), <preamble> (Section 3.6), <strong> 1625 # (Section 2.50), <sub> (Section 2.51), <sup> (Section 2.52), <t> 1626 # (Section 2.53), <td> (Section 2.56), <th> (Section 2.58), <tt> 1627 # (Section 2.62), and <ttcol> (Section 3.9). 1628 # 1629 # Content model: only text content. 1630 # 1631 # 2.24.1. "target" Attribute (Mandatory) 1632 # 1633 # URI of the link target [RFC3986]. This must begin with a scheme name 1634 # (such as "https://") and thus not be relative to the URL of the 1635 # current document. 1636 def render_eref(self, e, width, **kwargs): 1637 target = e.get('target', '') 1638 brackets = e.get('brackets', self.attribute_defaults[e.tag]['brackets']) 1639 if not target: 1640 self.warn(e, "Expected the 'target' attribute to have a value, but found %s" % (etree.tostring(e), )) 1641 if brackets == 'none': 1642 if e.text and target: 1643 target = "(%s)" % target 1644 elif brackets == 'angle': 1645 target = "<%s>" % target 1646 else: 1647 self.warn(e, 'Unexpected attribute value in <eref>: brackets="%s"' % brackets) 1648 text = ' '.join([ t for t in [e.text, target] if t ]) 1649 text += e.tail or '' 1650 return text 1651 1652 1653 # 2.25. <figure> 1654 # 1655 # Contains a figure with a caption with the figure number. If the 1656 # element contains a <name> element, the caption will also show that 1657 # name. 1658 # 1659 # This element appears as a child element of <aside> (Section 2.6), 1660 # <blockquote> (Section 2.10), <dd> (Section 2.18), <li> 1661 # (Section 2.29), <section> (Section 2.46), <td> (Section 2.56), and 1662 # <th> (Section 2.58). 1663 # 1664 # Content model: 1665 # 1666 # In this order: 1667 # 1668 # 1. One optional <name> element (Section 2.32) 1669 # 1670 # 2. Optional <iref> elements (Section 2.27) 1671 # 1672 # 3. One optional <preamble> element (Section 3.6) 1673 # 1674 # 4. In any order, but at least one of: 1675 # 1676 # * <artwork> elements (Section 2.5) 1677 # 1678 # * <sourcecode> elements (Section 2.48) 1679 # 1680 # 5. One optional <postamble> element (Section 3.5) 1681 # 1682 # 2.25.1. "align" Attribute 1683 # 1684 # Deprecated. 1685 # 1686 # Note: does not affect title or <artwork> alignment. 1687 # 1688 # Allowed values: 1689 # 1690 # o "left" (default) 1691 # 1692 # o "center" 1693 # 1694 # o "right" 1695 # 1696 # 2.25.2. "alt" Attribute 1697 # 1698 # Deprecated. If the goal is to provide a single URI for a reference, 1699 # use the "target" attribute in <reference> instead. 1700 # 1701 # 2.25.3. "anchor" Attribute 1702 # 1703 # Document-wide unique identifier for this figure. 1704 # 1705 # 2.25.4. "height" Attribute 1706 # 1707 # Deprecated. 1708 # 1709 # 2.25.5. "src" Attribute 1710 # 1711 # Deprecated. 1712 # 1713 # 2.25.6. "suppress-title" Attribute 1714 # 1715 # Deprecated. 1716 # 1717 # Allowed values: 1718 # 1719 # o "true" 1720 # 1721 # o "false" (default) 1722 # 1723 # 2.25.7. "title" Attribute 1724 # 1725 # Deprecated. Use <name> instead. 1726 # 1727 # 2.25.8. "width" Attribute 1728 # 1729 # Deprecated. 1730 def render_figure(self, e, width, **kwargs): 1731 kwargs['joiners'].update({ 1732 'name': Joiner(': ', 0, 0, False, False), 1733 'artset': Joiner('', 0, 0, False, False), 1734 'artwork': Joiner('', 0, 0, False, True), 1735 'sourcecode': Joiner('', 0, 0, False, False), 1736 }) 1737 # 1738 pn = e.get('pn') 1739 num = pn.split('-')[1].capitalize() 1740 children = e.getchildren() 1741 title = "Figure %s" % (num, ) 1742 if len(children) and children[0].tag == 'name': 1743 name = children[0] 1744 children = children[1:] 1745 title = self.tjoin(title, name, width, **kwargs) 1746 lines = [] 1747 for c in children: 1748 lines = self.ljoin(lines, c, width, **kwargs) 1749 title = '\n'+center(title, width).rstrip(stripspace) 1750 lines += mklines(title, e) 1751 return lines 1752 1753 # 2.26. <front> 1754 # 1755 # Represents the "front matter": metadata (such as author information), 1756 # the Abstract, and additional notes. 1757 # 1758 # A <front> element may have more than one <seriesInfo> element. A 1759 # <seriesInfo> element determines the document number (for RFCs) or 1760 # name (for Internet-Drafts). Another <seriesInfo> element determines 1761 # the "maturity level" (defined in [RFC2026]), using values of "std" 1762 # for "Standards Track", "bcp" for "BCP", "info" for "Informational", 1763 # "exp" for "Experimental", and "historic" for "Historic". The "name" 1764 # attributes of those multiple <seriesInfo> elements interact as 1765 # described in Section 2.47. 1766 # 1767 # This element appears as a child element of <reference> (Section 2.40) 1768 # and <rfc> (Section 2.45). 1769 # 1770 # Content model: 1771 # 1772 # In this order: 1773 # 1774 # ... 1775 # 1776 def render_front(self, e, width, **kwargs): 1777 if e.getparent().tag == 'reference': 1778 return self.render_reference_front(e, width, **kwargs) 1779 else: 1780 text = '\n\n\n\n' + self.render_first_page_top(e, width, **kwargs) + '\n' 1781 lines = mklines(text, e) 1782 for c in e.getchildren(): 1783 if c.tag in ['title', 'seriesInfo', 'author', 'date', 'area', 'workgroup', 'keyword', etree.PI, etree.Comment, ]: 1784 # handled in render_first_page_top() or discarded 1785 continue 1786 res = self.render(c, width, **kwargs) 1787 lines += blankline() + res 1788 return lines 1789 1790 def render_first_page_top(self, e, width, **kwargs): 1791 def join_cols(left, right): 1792 "Join left and right columns of page top into page top text" 1793 l = max(len(left), len(right)) 1794 left += ['']*(l-len(left)) 1795 right += ['']*(l-len(right)) 1796 lines = [] 1797 t = len(left) 1798 for i in range(t): 1799 l = left[i] 1800 r = right[i] 1801 #assert displength(l)+displength(r)<70 1802 w = 72-displength(l)-displength(r) 1803 lines.append(l+' '*w+r) 1804 return '\n'.join(lines).rstrip(stripspace)+'\n' 1805 # 1806 def wrap(label, items, left, right, suffix=''): 1807 line = '%s%s%s' % (label, items, suffix) 1808 ll = len(left) 1809 lr = len(right) 1810 width = 48 if ll >= lr else min(48, 72-4-len(right[ll])) 1811 wrapper = textwrap.TextWrapper(width=width, subsequent_indent=' '*len(label)) 1812 return wrapper.wrap(line) 1813 # 1814 def normalize(t): 1815 return re.sub(r',\s*', ', ', t).strip(', ') 1816 1817 def get_left(front, right): 1818 "Get front page top left column" 1819 #left_parts = ['source', 'seriesInfo', 'obsoletes', 'updates', 'category', 'issn', 'expires', ] 1820 left = [] 1821 if self.root.get('ipr') == 'none': 1822 for group in front.xpath('./workgroup'): 1823 if group.text and group.text.strip(stripspace): 1824 found = True 1825 left.append(group.text.strip(stripspace)) 1826 return left 1827 if self.options.rfc: 1828 # 1829 # There is a set of additional information that is needed at the front 1830 # of the RFC. Historically, this has been presented with the 1831 # information below in a left hand column, and the author-related 1832 # information described above in the right. 1833 # 1834 # <document source> This describes the area where the work originates. 1835 # Historically, all RFCs were labeled "Network Working Group". 1836 # Network Working Group refers to the original version of today's 1837 # IETF when people from the original set of ARPANET sites and 1838 # whomever else was interested -- the meetings were open -- got 1839 # together to discuss, design, and document proposed protocols 1840 # [RFC3]. Here, we obsolete the term "Network Working Group" in 1841 # order to indicate the originating stream. 1842 # 1843 # The <document source> is the name of the RFC stream, as defined in 1844 # [RFC4844] and its successors. At the time of this publication, 1845 # the streams, and therefore the possible entries are: 1846 # 1847 # * Internet Engineering Task Force 1848 # * Internet Architecture Board 1849 # * Internet Research Task Force 1850 # * Independent Submission 1851 stream = self.root.get('submissionType') 1852 left.append(strings.stream_name[stream]) 1853 # 1854 # Request for Comments: <RFC number> This indicates the RFC number, 1855 # assigned by the RFC Editor upon publication of the document. This 1856 # element is unchanged. 1857 for item in front.iter('seriesInfo'): 1858 name = item.get('name') 1859 value = item.get('value') 1860 if name == 'RFC': 1861 name = 'Request for Comments' 1862 left.append("%s: %s" % (name, value)) 1863 # <subseries ID> <subseries number> Some document categories are also 1864 # labeled as a subseries of RFCs. These elements appear as 1865 # appropriate for such categories, indicating the subseries and the 1866 # documents number within that series. Currently, there are 1867 # subseries for BCPs [RFC2026] and STDs [RFC1311]. These subseries 1868 # numbers may appear in several RFCs. For example, when a new RFC 1869 # obsoletes or updates an old one, the same subseries number is 1870 # used. Also, several RFCs may be assigned the same subseries 1871 # number: a single STD, for example, may be composed of several 1872 # RFCs, each of which will bear the same STD number. This element 1873 # is unchanged. 1874 category = self.root.get('category', '') 1875 # 1876 ## The following code duplicates series info also generated from <seriesInfo> 1877 ## entries. Commented out. 1878 #series_no = self.root.get('seriesNo') 1879 #if category and category in strings.series_name and series_no: 1880 # left.append('%s: %s' % (strings.series_name[category], series_no)) 1881 #else: 1882 # pass 1883 # [<RFC relation>:<RFC number[s]>] Some relations between RFCs in the 1884 # series are explicitly noted in the RFC header. For example, a new 1885 # RFC may update one or more earlier RFCs. Currently two 1886 # relationships are defined: "Updates" and "Obsoletes" [RFC7322]. 1887 # Variants like "Obsoleted by" are also used (e.g, in [RFC5143]). 1888 # Other types of relationships may be defined by the RFC Editor and 1889 # may appear in future RFCs. 1890 obsoletes = self.root.get('obsoletes') 1891 if obsoletes: 1892 left += wrap('Obsoletes: ', normalize(obsoletes), left, right) 1893 updates = self.root.get('updates') 1894 if updates: 1895 left += wrap('Updates: ', normalize(updates), left, right) 1896 1897 # Category: <category> This indicates the initial RFC document 1898 # category of the publication. These are defined in [RFC2026]. 1899 # Currently, this is always one of: Standards Track, Best Current 1900 # Practice, Experimental, Informational, or Historic. This element 1901 # is unchanged. 1902 if category: 1903 if category in strings.category_name: 1904 left.append('Category: %s' % (strings.category_name[category], )) 1905 else: 1906 self.warn(self.root, "Expected a known category, one of %s, but found '%s'" % (','.join(strings.category_name.keys()), category, )) 1907 else: 1908 self.warn(self.root, "Expected a category, one of %s, but found none" % (','.join(strings.category_name.keys()), )) 1909 # 1910 left.append('ISSN: 2070-1721') 1911 # 1912 else: 1913 # Internet-Draft 1914 found = False 1915 for group in front.xpath('./workgroup'): 1916 if group.text and group.text.strip(stripspace): 1917 found = True 1918 left.append(group.text.strip(stripspace)) 1919 if not found: 1920 left.append('Network Working Group') 1921 left.append('Internet-Draft') 1922 # 1923 category = self.root.get('category', '') 1924 # 1925 ## The following code duplicates series info also generated from <seriesInfo> 1926 ## entries. Commented out. 1927 #series_no = self.root.get('seriesNo') 1928 #if category and series_no and category in strings.series_name: 1929 # left.append('%s: %s (if approved)' % (strings.series_name[category], series_no)) 1930 #else: 1931 # pass 1932 # 1933 obsoletes = self.root.get('obsoletes') 1934 if obsoletes: 1935 left += wrap('Obsoletes: ', normalize(obsoletes), left, right, suffix=' (if approved)') 1936 updates = self.root.get('updates') 1937 if updates: 1938 left += wrap('Updates: ', normalize(updates), left, right, suffix=' (if approved)') 1939 # 1940 if category: 1941 if category in strings.category_name: 1942 left.append('Intended status: %s' % (strings.category_name[category], )) 1943 else: 1944 self.warn(self.root, "Expected a known category, one of %s, but found '%s'" % (','.join(strings.category_name.keys()), category, )) 1945 else: 1946 self.warn(self.root, "Expected a category, one of %s, but found none" % (','.join(strings.category_name.keys()), )) 1947 # 1948 if self.root.get('ipr') != 'none': 1949 exp = get_expiry_date(self.root, self.date) 1950 left.append('Expires: %s' % format_date(exp.year, exp.month, exp.day, self.options.legacy_date_format)) 1951 return left 1952 # 1953 def get_right(front): 1954 "Get front page top right column" 1955 # RFC 7841 RFC Streams, Headers, Boilerplates May 2016 1956 # 1957 # 3.1. The Title Page Header 1958 # 1959 # The information at the front of the RFC includes the name and 1960 # affiliation of the authors as well as the RFC publication month and 1961 # year. 1962 # 1963 #------------------------------------------------------------------------- 1964 # 1965 # RFC 7322 RFC Style Guide September 2014 1966 # 1967 # 4.1.2. Organization 1968 # 1969 # The author's organization is indicated on the line following the 1970 # author's name. 1971 # 1972 # For multiple authors, each author name appears on its own line, 1973 # followed by that author's organization. When more than one author is 1974 # affiliated with the same organization, the organization can be 1975 # "factored out," appearing only once following the corresponding 1976 # Author lines. However, such factoring is inappropriate when it would 1977 # force an unacceptable reordering of author names. 1978 right = [] 1979 auth = namedtuple('author', ['name', 'org']) 1980 prev = auth(None, '') 1981 authors = front.xpath('./author') 1982 for a in authors: 1983 this = auth(*self.render_author_front(a, **kwargs)) 1984 if right and this.name and this.org and this.org == prev.org: 1985 right[-1] = this.name 1986 right.append(this.org or '') 1987 else: 1988 if this.name: 1989 right.append(this.name) 1990 if this.org!=None: 1991 right.append(this.org) 1992 prev = this 1993 # We don't need show a trailing blank line if the last author has a blank organization 1994 if prev.org == '': 1995 right = right[:-1] 1996 right.append(self.render_date(front.find('./date'), width, **kwargs)) 1997 return right 1998 # 1999 # get right first, in order to limit the width of left lines as needed 2000 right = get_right(e) 2001 left = get_left(e, right) 2002 # 2003 first_page_header = join_cols(left, right) 2004 first_page_header += '\n\n' 2005 first_page_header += self.render_title_front(e.find('./title'), width, **kwargs) 2006 return first_page_header 2007 2008 def render_reference_front(self, e, width, **kwargs): 2009 return self.default_renderer(e, width, **kwargs) 2010 2011 # 2.27. <iref> 2012 # 2013 # Provides terms for the document's index. 2014 # 2015 # Index entries can be either regular entries (when just the "item" 2016 # attribute is given) or nested entries (by specifying "subitem" as 2017 # well), grouped under a regular entry. 2018 # 2019 # Index entries generally refer to the exact place where the <iref> 2020 # element occurred. An exception is the occurrence as a child element 2021 # of <section>, in which case the whole section is considered to be 2022 # relevant for that index entry. In some formats, index entries of 2023 # this type might be displayed as ranges. 2024 # 2025 # When the prep tool is creating index content, it collects the items 2026 # in a case-sensitive fashion for both the item and subitem level. 2027 # 2028 # This element appears as a child element of <annotation> 2029 # (Section 2.3), <aside> (Section 2.6), <blockquote> (Section 2.10), 2030 # <c> (Section 3.1), <dd> (Section 2.18), <dt> (Section 2.21), <em> 2031 # (Section 2.22), <figure> (Section 2.25), <li> (Section 2.29), 2032 # <postamble> (Section 3.5), <preamble> (Section 3.6), <section> 2033 # (Section 2.46), <strong> (Section 2.50), <sub> (Section 2.51), <sup> 2034 # (Section 2.52), <t> (Section 2.53), <table> (Section 2.54), <td> 2035 # (Section 2.56), <th> (Section 2.58), <tt> (Section 2.62), and <ttcol> 2036 # (Section 3.9). 2037 # 2038 # Content model: this element does not have any contents. 2039 def render_iref(self, e, width, **kwargs): 2040 p = e.getparent() 2041 self.index_items.append(IndexItem(e.get('item'), e.get('subitem'), p.get('pn'), None)) 2042 return '' if p.tag in ['section', 'figure', 'table', ] else e.tail or '' 2043 2044 # 2.27.1. "item" Attribute (Mandatory) 2045 # 2046 # The item to include. 2047 # 2048 # 2.27.2. "primary" Attribute 2049 # 2050 # Setting this to "true" declares the occurrence as "primary", which 2051 # might cause it to be highlighted in the index. There is no 2052 # restriction on the number of occurrences that can be "primary". 2053 # 2054 # Allowed values: 2055 # 2056 # o "true" 2057 # 2058 # o "false" (default) 2059 # 2060 # 2.27.3. "subitem" Attribute 2061 # 2062 # The subitem to include. 2063 2064 2065 # 2.28. <keyword> 2066 # 2067 # Specifies a keyword applicable to the document. 2068 # 2069 # Note that each element should only contain a single keyword; for 2070 # multiple keywords, the element can simply be repeated. 2071 # 2072 # Keywords are used both in the RFC Index and in the metadata of 2073 # generated document representations. 2074 # 2075 # This element appears as a child element of <front> (Section 2.26). 2076 # 2077 # Content model: only text content. 2078 2079 2080 # 2.29. <li> 2081 # 2082 # A list element, used in <ol> and <ul>. 2083 # 2084 # This element appears as a child element of <ol> (Section 2.34) and 2085 # <ul> (Section 2.63). 2086 # 2087 # Content model: 2088 # 2089 # Either: 2090 # 2091 # In any order, but at least one of: 2092 # 2093 # * <artwork> elements (Section 2.5) 2094 # 2095 # * <dl> elements (Section 2.20) 2096 # 2097 # * <figure> elements (Section 2.25) 2098 # 2099 # * <ol> elements (Section 2.34) 2100 # 2101 # * <sourcecode> elements (Section 2.48) 2102 # 2103 # * <t> elements (Section 2.53) 2104 # 2105 # * <ul> elements (Section 2.63) 2106 # 2107 # Or: 2108 # 2109 # In any order, but at least one of: 2110 # 2111 # * Text 2112 # 2113 # * <bcp14> elements (Section 2.9) 2114 # 2115 # * <cref> elements (Section 2.16) 2116 # 2117 # * <em> elements (Section 2.22) 2118 # 2119 # * <eref> elements (Section 2.24) 2120 # 2121 # * <iref> elements (Section 2.27) 2122 # 2123 # * <relref> elements (Section 2.44) 2124 # 2125 # * <strong> elements (Section 2.50) 2126 # 2127 # * <sub> elements (Section 2.51) 2128 # 2129 # * <sup> elements (Section 2.52) 2130 # 2131 # * <tt> elements (Section 2.62) 2132 # 2133 # * <xref> elements (Section 2.66) 2134 # 2135 # 2.29.1. "anchor" Attribute 2136 # 2137 # Document-wide unique identifier for this list item. 2138 # Text 2139 def render_li(self, e, width, **kwargs): 2140 p = e.getparent() 2141 text = p._initial_text(e, p) 2142 tt, __ = self.text_or_block_renderer(e, width, **kwargs) 2143 if isinstance(tt, list): 2144 lines = stripl(tt) 2145 if lines and lines[0].elem.tag not in ['artwork', 'figure', 'sourcecode', 'li', ]: 2146 lines[0].text = text + lines[0].text.lstrip(stripspace) 2147 else: 2148 text += tt.lstrip(stripspace) 2149 lines = mklines(text, e) 2150 if self.options.pagination: 2151 for i, l in enumerate(lines): 2152 if '\u2026' in l.text: 2153 lines[i].text = expand_ellipsis(l.text, width) 2154 return lines 2155 2156 def get_ol_li_initial_text(self, e, p): 2157 text = p._format % p._int2str(p._counter) 2158 text += ' '*(p._padding-len(text)) 2159 p._counter += 1 2160 return text 2161 2162 def get_ul_li_initial_text(self, e, p): 2163 if p._bare: 2164 text = '' 2165 else: 2166 text = p._symbol 2167 text += ' '*(p._padding-len(text)) 2168 return text 2169 2170 # 2.30. <link> 2171 # 2172 # A link to an external document that is related to the RFC. 2173 # 2174 # The following are the supported types of external documents that can 2175 # be pointed to in a <link> element: 2176 # 2177 # o The current International Standard Serial Number (ISSN) for the 2178 # RFC Series. The value for the "rel" attribute is "item". The 2179 # link should use the form "urn:issn:". 2180 # 2181 # o The Digital Object Identifier (DOI) for this document. The value 2182 # for the "rel" attribute is "describedBy". The link should use the 2183 # form specified in [RFC7669]; this is expected to change in the 2184 # future. 2185 # 2186 # o The Internet-Draft that was submitted to the RFC Editor to become 2187 # the published RFC. The value for the "rel" attribute is 2188 # "convertedFrom". The link should be to an IETF-controlled web 2189 # site that retains copies of Internet-Drafts. 2190 # 2191 # o A representation of the document offered by the document author. 2192 # The value for the "rel" attribute is "alternate". The link can be 2193 # to a personally run web site. 2194 # 2195 # In RFC production mode, the prep tool needs to check the values for 2196 # <link> before an RFC is published. In draft production mode, the 2197 # prep tool might remove some <link> elements during the draft 2198 # submission process. 2199 # 2200 # This element appears as a child element of <rfc> (Section 2.45). 2201 # 2202 # Content model: this element does not have any contents. 2203 def render_link(self, e, width, **kwargs): 2204 return '' 2205 2206 # 2.30.1. "href" Attribute (Mandatory) 2207 # 2208 # The URI of the external document. 2209 # 2210 # 2.30.2. "rel" Attribute 2211 # 2212 # The relationship of the external document to this one. The 2213 # relationships are taken from the "Link Relations" registry maintained 2214 # by IANA [LINKRELATIONS]. 2215 2216 2217 # 2.31. <middle> 2218 # 2219 # Represents the main content of the document. 2220 # 2221 # This element appears as a child element of <rfc> (Section 2.45). 2222 # 2223 # Content model: 2224 # 2225 # One or more <section> elements (Section 2.46) 2226 def render_middle(self, e, width, **kwargs): 2227 kwargs['joiners'] = base_joiners 2228 lines = [] 2229 for c in e.getchildren(): 2230 lines = self.ljoin(lines, c, width, **kwargs) 2231 return lines 2232 2233 # 2.32. <name> 2234 # 2235 # The name of the section, note, figure, or texttable. This name can 2236 # indicate markup of flowing text (for example, including references or 2237 # making some characters use a fixed-width font). 2238 # 2239 # This element appears as a child element of <figure> (Section 2.25), 2240 # <note> (Section 2.33), <references> (Section 2.42), <section> 2241 # (Section 2.46), <table> (Section 2.54), and <texttable> 2242 # (Section 3.8). 2243 # 2244 # Content model: 2245 # 2246 # In any order: 2247 # 2248 # o Text 2249 # 2250 # o <cref> elements (Section 2.16) 2251 # 2252 # o <eref> elements (Section 2.24) 2253 # 2254 # o <relref> elements (Section 2.44) 2255 # 2256 # o <tt> elements (Section 2.62) 2257 # 2258 # o <xref> elements (Section 2.66) 2259 def render_name(self, e, width, **kwargs): 2260 hang=kwargs['joiners'][e.tag].hang 2261 return fill(self.inner_text_renderer(e).strip(stripspace), width=width, hang=hang) 2262 2263 # 2.33. <note> 2264 # 2265 # Creates an unnumbered, titled block of text that appears after the 2266 # Abstract. 2267 # 2268 # It is usually used for additional information to reviewers (Working 2269 # Group information, mailing list, ...) or for additional publication 2270 # information such as "IESG Notes". 2271 # 2272 # This element appears as a child element of <front> (Section 2.26). 2273 # 2274 # Content model: 2275 # 2276 # In this order: 2277 # 2278 # 1. One optional <name> element (Section 2.32) 2279 # 2280 # 2. In any order, but at least one of: 2281 # 2282 # * <dl> elements (Section 2.20) 2283 # 2284 # * <ol> elements (Section 2.34) 2285 # 2286 # * <t> elements (Section 2.53) 2287 # 2288 # * <ul> elements (Section 2.63) 2289 def render_note(self, e, width, **kwargs): 2290 kwargs['joiners'].update( 2291 { 2292 None: Joiner('\n\n', 3, 0, False, False), 2293 'name': Joiner(': ', 0, 0, False, False), 2294 } 2295 ) 2296 lines = [] 2297 if e[0].tag != 'name': 2298 lines.append(Line("Note", e)) 2299 for c in e.getchildren(): 2300 lines = self.ljoin(lines, c, width, **kwargs) 2301 return lines 2302 2303 # 2.33.1. "removeInRFC" Attribute 2304 # 2305 # If set to "true", this note is marked in the prep tool with text 2306 # indicating that it should be removed before the document is published 2307 # as an RFC. That text will be "This note is to be removed before 2308 # publishing as an RFC." 2309 # 2310 # Allowed values: 2311 # 2312 # o "true" 2313 # 2314 # o "false" (default) 2315 # 2316 # 2.33.2. "title" Attribute 2317 # 2318 # Deprecated. Use <name> instead. 2319 2320 2321 # 2.34. <ol> 2322 # 2323 # An ordered list. The labels on the items will be either a number or 2324 # a letter, depending on the value of the style attribute. 2325 # 2326 # This element appears as a child element of <abstract> (Section 2.1), 2327 # <aside> (Section 2.6), <blockquote> (Section 2.10), <dd> 2328 # (Section 2.18), <li> (Section 2.29), <note> (Section 2.33), <section> 2329 # (Section 2.46), <td> (Section 2.56), and <th> (Section 2.58). 2330 # 2331 # Content model: 2332 # 2333 # One or more <li> elements (Section 2.29) 2334 # 2335 # 2.34.1. "anchor" Attribute 2336 # 2337 # Document-wide unique identifier for the list. 2338 # 2339 # 2.34.2. "group" Attribute 2340 # 2341 # When the prep tool sees an <ol> element with a "group" attribute that 2342 # has already been seen, it continues the numbering of the list from 2343 # where the previous list with the same group name left off. If an 2344 # <ol> element has both a "group" attribute and a "start" attribute, 2345 # the group's numbering is reset to the given start value. 2346 # 2347 # 2.34.3. "spacing" Attribute 2348 # 2349 # Defines whether or not there is a blank line between entries. 2350 # spacing="normal" indicates a single blank line, while 2351 # spacing="compact" indicates no space between. 2352 # 2353 # Allowed values: 2354 # 2355 # o "normal" (default) 2356 # 2357 # o "compact" 2358 # 2359 # 2.34.4. "start" Attribute 2360 # 2361 # The ordinal value at which to start the list. This defaults to "1" 2362 # and must be an integer of 0 or greater. 2363 # 2364 # 2.34.5. "type" Attribute 2365 # 2366 # The type of the labels on list items. If the length of the type 2367 # value is 1, the meaning is the same as it is for HTML: 2368 # 2369 # a Lowercase letters (a, b, c, ...) 2370 # 2371 # A Uppercase letters (A, B, C, ...) 2372 # 2373 # 1 Decimal numbers (1, 2, 3, ...) 2374 # 2375 # i Lowercase Roman numerals (i, ii, iii, ...) 2376 # 2377 # I Uppercase Roman numerals (I, II, III, ...) 2378 # 2379 # For types "a" and "A", after the 26th entry, the numbering starts at 2380 # "aa"/"AA", then "ab"/"AB", and so on. 2381 # 2382 # If the length of the type value is greater than 1, the value must 2383 # contain a percent-encoded indicator and other text. The value is a 2384 # free-form text that allows counter values to be inserted using a 2385 # "percent-letter" format. For instance, "[REQ%d]" generates labels of 2386 # the form "[REQ1]", where "%d" inserts the item number as a decimal 2387 # number. 2388 # 2389 # The following formats are supported: 2390 # 2391 # %c Lowercase letters (a, b, c, ...) 2392 # 2393 # %C Uppercase letters (A, B, C, ...) 2394 # 2395 # %d Decimal numbers (1, 2, 3, ...) 2396 # 2397 # %i Lowercase Roman numerals (i, ii, iii, ...) 2398 # 2399 # %I Uppercase Roman numerals (I, II, III, ...) 2400 # 2401 # %% Represents a percent sign 2402 # 2403 # Other formats are reserved for future use. Only one percent encoding 2404 # other than "%%" is allowed in a type string. 2405 # 2406 # It is an error for the type string to be empty. For bulleted lists, 2407 # use the <ul> element. For lists that have neither bullets nor 2408 # numbers, use the <ul> element with the 'empty="true"' attribute. 2409 # 2410 # If no type attribute is given, the default type is the same as 2411 # "type='%d.'". 2412 def render_ol(self, e, width, **kwargs): 2413 # setup and validation 2414 start = e.get('start') 2415 if not start.isdigit(): 2416 self.warn(e, "Expected a numeric value for the 'start' attribute, but found %s" % (etree.tostring(e), )) 2417 start = '1' 2418 e._counter = int(start) 2419 # 2420 type = e.get('type') 2421 if not type: 2422 self.warn(e, "Expected the 'type' attribute to have a string value, but found %s" % (etree.tostring(e), )) 2423 type = '1' 2424 if '%p' in type: 2425 for p in e.iterancestors('li'): 2426 pcounter = p.get('derivedCounter') 2427 type = type.replace('%p', pcounter ) 2428 break 2429 e._type = type 2430 if len(type) > 1: 2431 formspec = re.search('%([cCdiIoOxX])', type) 2432 if formspec: 2433 fchar = formspec.group(1) 2434 fspec = formspec.group(0) 2435 e._format = type.replace(fspec, '%s') 2436 else: 2437 self.err(e, "Expected an <ol> format specification of '%%' followed by upper- or lower-case letter, of one of c,d,i,o,x; but found '%s'" % (type, )) 2438 fchar = 'd' 2439 e._format = '%s' 2440 else: 2441 fchar = type 2442 e._format = '%s.' 2443 e._int2str = ol_style_formatter[fchar] 2444 e._initial_text = self.get_ol_li_initial_text 2445 # 2446 compact = e.get('spacing') == 'compact' 2447 ljoin = '\n' if compact else '\n\n' 2448 # 2449 adaptive_indent = len(e._format % (' '*num_width(fchar, len(list(e))))) + len(' ') 2450 indent_attrib = e.get('indent') or '3' 2451 indent = int(indent_attrib) if indent_attrib.isdigit() else adaptive_indent 2452 e._padding = indent 2453 kwargs['joiners'].update({ 2454 None: Joiner(ljoin, indent, 0, False, False), 2455 'li': Joiner(ljoin, 0, 0, False, False), 2456 't': Joiner(ljoin, indent, 0, False, False), 2457 }) 2458 # rendering 2459 lines = [] 2460 for c in e.getchildren(): 2461 lines = self.ljoin(lines, c, width, **kwargs) 2462 return lines 2463 2464 # 2.35. <organization> 2465 # 2466 # Specifies the affiliation [RFC7322] of an author. 2467 # 2468 # This information appears both in the "Author's Address" section and 2469 # on the front page (see [RFC7322] for more information). If the value 2470 # is long, an abbreviated variant can be specified in the "abbrev" 2471 # attribute. 2472 # 2473 # This element appears as a child element of <author> (Section 2.7). 2474 # 2475 # Content model: only text content. 2476 # 2477 # 2.35.1. "abbrev" Attribute 2478 # 2479 # Abbreviated variant. 2480 # 2481 # 2.35.2. "ascii" Attribute 2482 # 2483 # The ASCII equivalent of the organization's name. 2484 def render_front_organization(self, e, **kwargs): 2485 author = e.getparent() 2486 org, ascii = short_org_name_set(author) 2487 if ascii: 2488 org += ' (%s)' % ascii.strip(stripspace) 2489 return org 2490 2491 def render_organization(self, e, width, **kwargs): 2492 text = '' 2493 if e != None: 2494 org = e.text or '' 2495 org = org.strip(stripspace) 2496 if org and not is_script(org, 'Latin'): 2497 ascii = e.get('ascii') 2498 if ascii and ascii != org: 2499 org += ' (%s)' % ascii.strip(stripspace) 2500 text = fill(org, width=width, **kwargs) 2501 return text 2502 2503 # 2.36. <phone> 2504 # 2505 # Represents a phone number. 2506 # 2507 # The value is expected to be the scheme-specific part of a "tel" URI 2508 # (and so does not include the prefix "tel:"), using the 2509 # "global-number-digits" syntax. See Section 3 of [RFC3966] for 2510 # details. 2511 # 2512 # This element appears as a child element of <address> (Section 2.2). 2513 # 2514 # Content model: only text content. 2515 def render_phone(self, e, width, **kwargs): 2516 latin = kwargs.pop('latin', None) 2517 text = fill("Phone: %s"%e.text, width=width, **kwargs) if e.text and latin!=False else '' 2518 return text 2519 2520 # 2.37. <postal> 2521 # 2522 # Contains optional child elements providing postal information. These 2523 # elements will be displayed in an order that is specific to 2524 # formatters. A postal address can contain only a set of <street>, 2525 # <city>, <region>, <code>, and <country> elements, or only an ordered 2526 # set of <postalLine> elements, but not both. 2527 # 2528 # This element appears as a child element of <address> (Section 2.2). 2529 # 2530 # Content model: 2531 # 2532 # Either: 2533 # 2534 # In any order: 2535 # 2536 # * <city> elements (Section 2.13) 2537 # 2538 # * <code> elements (Section 2.14) 2539 # 2540 # * <country> elements (Section 2.15) 2541 # 2542 # * <region> elements (Section 2.43) 2543 # 2544 # * <street> elements (Section 2.49) 2545 # 2546 # Or: 2547 # 2548 # One or more <postalLine> elements (Section 2.38) 2549 def render_postal(self, e, width, **kwargs): 2550 latin = kwargs.pop('latin', False) 2551 adr = get_normalized_address_info(self, e, latin=latin) 2552 for k in adr: 2553 if isinstance(adr[k], list): 2554 adr[k] = '\n'.join(adr[k]) 2555 set_joiners(kwargs, { None: Joiner('\n', 0, 0, False, False), }) 2556 if adr: 2557 if all(is_script(v, 'Latin') for v in adr.values() if v): 2558 latin = True 2559 try: 2560 text = format_address(adr, latin=latin) 2561 text = text.strip(stripspace)+'\n\n' 2562 return mklines(text, e) 2563 except: 2564 debug.pprint('adr') 2565 raise 2566 else: 2567 author = e.getparent().getparent() 2568 text = self.render_author_name(author, width, **kwargs) 2569 if e.find('./postalLine') != None: 2570 lines = [] 2571 for c in e.getchildren(): 2572 lines = self.ljoin(lines, c, width, **kwargs) 2573 else: 2574 lines = [] 2575 for street in e.findall('street'): 2576 if street.text: 2577 lines.append(Line(street.text, street)) 2578 cityline = [] 2579 city = e.find('city') 2580 if city is not None and city.text: 2581 cityline.append(city.text) 2582 region = e.find('region') 2583 if region is not None and region.text: 2584 if len(cityline) > 0: cityline.append(', '); 2585 cityline.append(region.text) 2586 code = e.find('code') 2587 if code is not None and code.text: 2588 if len(cityline) > 0: cityline.append(' '); 2589 cityline.append(code.text) 2590 if len(cityline) > 0: 2591 lines.append(Line(''.join(cityline), e)) 2592 country = e.find('country') 2593 if country is not None and country.text: 2594 lines.append(Line(country.text, country)) 2595 lines.append( Line('', e) ) 2596 return lines 2597 2598 # 2.38. <postalLine> 2599 # 2600 # Represents one line of a postal address. When more than one 2601 # <postalLine> is given, the prep tool emits them in the order given. 2602 # 2603 # This element appears as a child element of <postal> (Section 2.37). 2604 # 2605 # Content model: only text content. 2606 # 2607 # 2.38.1. "ascii" Attribute 2608 # 2609 # The ASCII equivalent of the text in the address line. 2610 def render_postalline(self, e, width, **kwargs): 2611 text = fill(self.inner_text_renderer(e), width=width, **kwargs) 2612 return text 2613 2614 # 2.39. <refcontent> 2615 # 2616 # Text that should appear between the title and the date of a 2617 # reference. The purpose of this element is to prevent the need to 2618 # abuse <seriesInfo> to get such text in a reference. 2619 # 2620 # For example: 2621 # 2622 # <reference anchor="April1"> 2623 # <front> 2624 # <title>On Being A Fool</title> 2625 # <author initials="K." surname="Phunny" fullname="Knot Phunny"/> 2626 # <date year="2000" month="April"/> 2627 # </front> 2628 # <refcontent>Self-published pamphlet</refcontent> 2629 # </reference> 2630 # 2631 # would render as: 2632 # 2633 # [April1] Phunny, K., "On Being A Fool", Self-published 2634 # pamphlet, April 2000. 2635 # 2636 # This element appears as a child element of <reference> 2637 # (Section 2.40). 2638 # 2639 # Content model: 2640 # 2641 # In any order: 2642 # 2643 # o Text 2644 # 2645 # o <bcp14> elements (Section 2.9) 2646 # 2647 # o <em> elements (Section 2.22) 2648 # 2649 # o <strong> elements (Section 2.50) 2650 # 2651 # o <sub> elements (Section 2.51) 2652 # 2653 # o <sup> elements (Section 2.52) 2654 # 2655 # o <tt> elements (Section 2.62) 2656 def render_refcontent(self, e, width, **kwargs): 2657 text = self.inner_text_renderer(e) 2658 return text 2659 2660 # 2.40. <reference> 2661 # 2662 # Represents a bibliographic reference. 2663 # 2664 # This element appears as a child element of <referencegroup> 2665 # (Section 2.41) and <references> (Section 2.42). 2666 # 2667 # Content model: 2668 # 2669 # In this order: 2670 # 2671 # 1. One <front> element (Section 2.26) 2672 # 2673 # 2. In any order: 2674 # 2675 # * <annotation> elements (Section 2.3) 2676 # 2677 # * <format> elements (Section 3.3) 2678 # 2679 # * <refcontent> elements (Section 2.39) 2680 # 2681 # * <seriesInfo> elements (Section 2.47; deprecated in this 2682 # context) 2683 # 2684 # 2.40.1. "anchor" Attribute (Mandatory) 2685 # 2686 # Document-wide unique identifier for this reference. Usually, this 2687 # will be used both to "label" the reference in the "References" 2688 # section and as an identifier in links to this reference entry. 2689 # 2690 # 2.40.2. "quoteTitle" Attribute 2691 # 2692 # Specifies whether or not the title in the reference should be quoted. 2693 # This can be used to prevent quoting, such as on errata. 2694 # 2695 # Allowed values: 2696 # 2697 # o "true" (default) 2698 # 2699 # o "false" 2700 # 2701 # 2.40.3. "target" Attribute 2702 # 2703 # Holds the URI for the reference. 2704 def render_reference(self, e, width, **kwargs): 2705 # rendering order: authors, title, seriesInfo, date, target, annotation 2706 p = e.getparent() 2707 if p.tag == 'referencegroup': 2708 label = '' 2709 else: 2710 label = self.refname_mapping[e.get('anchor')] 2711 label = ('[%s]' % label).ljust(11) 2712 # ensure the desired ordering 2713 elements = [] 2714 for ctag in ('title', 'refcontent', 'stream', 'seriesInfo', 'date',): 2715 for c in e.iterdescendants(ctag): 2716 if p.tag == 'referencegroup' and c.tag == 'seriesInfo' and c.get('name') == 'DOI': 2717 # Don't render DOI within a reference group 2718 continue 2719 elements.append(c) 2720 if p.tag != 'referencegroup': 2721 target = e.get('target') 2722 if target: 2723 url = self.element('refcontent') 2724 url.text = '<%s>' % target 2725 elements.append(url) 2726 set_joiners(kwargs, { 2727 None: Joiner(', ', 0, 0, False, False), 2728 'annotation': Joiner(' ', 0, 0, False, False), 2729 }) 2730 width = width-11 2731 text = self.render_authors(e, width, **kwargs) 2732 for c in elements: 2733 text = self.tjoin(text, c, width, **kwargs) 2734 text += '.' 2735 for ctag in ('annotation', ): 2736 for c in e.iterdescendants(ctag): 2737 text = self.tjoin(text, c, width, **kwargs) 2738 text = fill(text, width=width, fix_sentence_endings=False, keep_url=True, **kwargs).lstrip(stripspace) 2739 2740 text = indent(text, 11, 0) 2741 if p.tag != 'referencegroup': 2742 if len(label.strip(stripspace)) > 10: 2743 label += '\n' 2744 else: 2745 text = text.lstrip(stripspace) 2746 text = label + text 2747 lines = mklines(text, e) 2748 return lines 2749 2750 2751 2752 # 2.41. <referencegroup> 2753 # 2754 # Represents a list of bibliographic references that will be 2755 # represented as a single reference. This is most often used to 2756 # reference STDs and BCPs, where a single reference (such as "BCP 9") 2757 # may encompass more than one RFC. 2758 # 2759 # This element appears as a child element of <references> 2760 # (Section 2.42). 2761 # 2762 # Content model: 2763 # 2764 # One or more <reference> elements (Section 2.40) 2765 # 2766 # 2.41.1. "anchor" Attribute (Mandatory) 2767 # 2768 # Document-wide unique identifier for this reference group. Usually, 2769 # this will be used both to "label" the reference group in the 2770 # "References" section and as an identifier in links to this reference 2771 # entry. 2772 def render_referencegroup(self, e, width, **kwargs): 2773 kwargs['joiners'].update({ 2774 'reference': Joiner('\n\n', 0, 0, False, False), 2775 't': Joiner('\n\n', 11, 0, False, False), 2776 }) 2777 label = self.refname_mapping[e.get('anchor')] 2778 label = ('[%s]' % label).ljust(11) 2779 lines = [] 2780 for c in e.getchildren(): 2781 lines = self.ljoin(lines, c, width, **kwargs) 2782 target = e.get('target') 2783 if target: 2784 t = self.element('t') 2785 t.text = '<%s>' % target 2786 lines = self.ljoin(lines, t, width, **kwargs) 2787 if len(label.strip(stripspace)) > 10: 2788 lines = [ Line(label, e) ] + lines 2789 else: 2790 lines[0].text = label + lines[0].text.lstrip(stripspace) 2791 return lines 2792 2793 # 2.42. <references> 2794 # 2795 # Contains a set of bibliographic references. 2796 # 2797 # In the early days of the RFC Series, there was only one "References" 2798 # section per RFC. This convention was later changed to group 2799 # references into two sets, "Normative" and "Informative", as described 2800 # in [RFC7322]. This vocabulary supports the split with the <name> 2801 # child element. In general, the title should be either "Normative 2802 # References" or "Informative References". 2803 # 2804 # This element appears as a child element of <back> (Section 2.8). 2805 # 2806 # Content model: 2807 # 2808 # In this order: 2809 # 2810 # 1. One optional <name> element (Section 2.32) 2811 # 2812 # 2. In any order: 2813 # 2814 # * <reference> elements (Section 2.40) 2815 # 2816 # * <referencegroup> elements (Section 2.41) 2817 # 2818 # 2.42.1. "anchor" Attribute 2819 # 2820 # An optional user-supplied identifier for this set of references. 2821 # 2822 # 2.42.2. "title" Attribute 2823 # 2824 # Deprecated. Use <name> instead. 2825 def render_references(self, e, width, **kwargs): 2826 self.part = e.tag 2827 kwargs['joiners'].update({ 2828 None: Joiner('\n\n', 3, 0, False, False), 2829 'name': Joiner(' ' , 0, 0, False, False), 2830 'reference': Joiner('\n\n', 3, 0, False, False), 2831 'references': Joiner('\n\n', 0, 0, False, False), 2832 }) 2833 lines = [] 2834 if e.find('name') != None: 2835 if self.check_refs_numbered(): 2836 pn = e.get('pn') 2837 text = pn.split('-',1)[1].replace('-', ' ').title() +'.' 2838 else: 2839 text = '' 2840 lines += mklines(self.tjoin(text, e[0], width, **kwargs), e) 2841 for c in e[1:]: 2842 lines = self.ljoin(lines, c, width, **kwargs) 2843 return lines 2844 2845 2846 # 2.43. <region> 2847 # 2848 # Provides the region name in a postal address. 2849 # 2850 # This element appears as a child element of <postal> (Section 2.37). 2851 # 2852 # Content model: only text content. 2853 # 2854 # 2.43.1. "ascii" Attribute 2855 # 2856 # The ASCII equivalent of the region name. 2857 render_region = null_renderer # handled in render_address 2858 2859 # 2.44. <relref> 2860 # 2861 # Represents a link to a specific part of a document that appears in a 2862 # <reference> element. Formatters that have links (such as HTML and 2863 # PDF) render <relref> elements as external hyperlinks to the specified 2864 # part of the reference, creating the link target by combining the base 2865 # URI from the <reference> element with the "relative" attribute from 2866 # this element. The "target" attribute is required, and it must be the 2867 # anchor of a <reference> element. 2868 # 2869 # The "section" attribute is required, and the "relative" attribute is 2870 # optional. If the reference is not an RFC or Internet-Draft that is 2871 # in the v3 format, the element needs to have a "relative" attribute; 2872 # in this case, the value of the "section" attribute is ignored. 2873 # 2874 # An example of the <relref> element with text content might be: 2875 # 2876 # See 2877 # <relref section="2.3" target="RFC9999" displayFormat="bare"> 2878 # the protocol overview</relref> 2879 # for more information. 2880 # 2881 # An HTML formatter might generate: 2882 # 2883 # See 2884 # <a href="http://www.rfc-editor.org/rfc/rfc9999.html#s-2.3"> 2885 # the protocol overview</a> 2886 # for more information. 2887 # 2888 # Note that the URL in the above example might be different when the 2889 # RFC Editor deploys the v3 format. 2890 # 2891 # This element appears as a child element of <annotation> 2892 # (Section 2.3), <blockquote> (Section 2.10), <cref> (Section 2.16), 2893 # <dd> (Section 2.18), <dt> (Section 2.21), <em> (Section 2.22), <li> 2894 # (Section 2.29), <name> (Section 2.32), <preamble> (Section 3.6), 2895 # <strong> (Section 2.50), <sub> (Section 2.51), <sup> (Section 2.52), 2896 # <t> (Section 2.53), <td> (Section 2.56), <th> (Section 2.58), and 2897 # <tt> (Section 2.62). 2898 # 2899 # Content model: only text content. 2900 # 2901 # 2.44.1. "displayFormat" Attribute 2902 # 2903 # This attribute is used to signal formatters what the desired format 2904 # of the relative reference should be. Formatters for document types 2905 # that have linking capability should wrap each part of the displayed 2906 # text in hyperlinks. If there is content in the <relref> element, 2907 # formatters will ignore the value of this attribute. 2908 # 2909 # "of" 2910 # 2911 # A formatter should display the relative reference as the word 2912 # "Section" followed by a space, the contents of the "section" 2913 # attribute followed by a space, the word "of", another space, and 2914 # the value from the "target" attribute enclosed in square brackets. 2915 # 2916 # For example, with an input of: 2917 # 2918 # See 2919 # <relref section="2.3" target="RFC9999" displayFormat="of"/> 2920 # for an overview. 2921 # 2922 # An HTML formatter might generate: 2923 # 2924 # See 2925 # <a href="http://www.rfc-editor.org/info/rfc9999#s-2.3"> 2926 # Section 2.3</a> of 2927 # [<a href="#RFC9999">RFC9999</a>] 2928 # for an overview. 2929 # 2930 # Note that "displayFormat='of'" is the default for <relref>, so it 2931 # does not need to be given in a <relref> element if that format is 2932 # desired. 2933 # 2934 # "comma" 2935 # 2936 # A formatter should display the relative reference as the value 2937 # from the "target" attribute enclosed in square brackets, a comma, 2938 # a space, the word "Section" followed by a space, and the "section" 2939 # attribute. 2940 # 2941 # For example, with an input of: 2942 # 2943 # See 2944 # <relref section="2.3" target="RFC9999" displayFormat="comma"/>, 2945 # for an overview. 2946 # 2947 # An HTML formatter might generate: 2948 # 2949 # See 2950 # [<a href="#RFC9999">RFC9999</a>], 2951 # <a href="http://www.rfc-editor.org/info/rfc9999#s-2.3"> 2952 # Section 2.3</a>, for an overview. 2953 # 2954 # "parens" 2955 # 2956 # A formatter should display the relative reference as the value 2957 # from the "target" attribute enclosed in square brackets, a space, 2958 # a left parenthesis, the word "Section" followed by a space, the 2959 # "section" attribute, and a right parenthesis. 2960 # 2961 # For example, with an input of: 2962 # 2963 # See 2964 # <relref section="2.3" target="RFC9999" displayFormat="parens"/> 2965 # for an overview. 2966 # 2967 # An HTML formatter might generate: 2968 # 2969 # See 2970 # [<a href="#RFC9999">RFC9999</a>] 2971 # (<a href="http://www.rfc-editor.org/info/rfc9999#s-2.3"> 2972 # Section 2.3</a>) 2973 # for an overview. 2974 # 2975 # "bare" 2976 # 2977 # A formatter should display the relative reference as the contents 2978 # of the "section" attribute and nothing else. This is useful when 2979 # there are multiple relative references to a single base reference. 2980 # 2981 # For example: 2982 # 2983 # See Sections 2984 # <relref section="2.3" target="RFC9999" displayFormat="bare"/> 2985 # and 2986 # <relref section="2.4" target="RFC9999" displayFormat="of"/> 2987 # for an overview. 2988 # 2989 # An HTML formatter might generate: 2990 # 2991 # See Sections 2992 # <a href="http://www.rfc-editor.org/info/rfc9999#s-2.3"> 2993 # 2.3</a> 2994 # and 2995 # <a href="http://www.rfc-editor.org/info/rfc9999#s-2.4"> 2996 # Section 2.4</a> of 2997 # [<a href="#RFC9999">RFC9999</a>] 2998 # for an overview. 2999 # 3000 # Allowed values: 3001 # 3002 # o "of" (default) 3003 # 3004 # o "comma" 3005 # 3006 # o "parens" 3007 # 3008 # o "bare" 3009 # 3010 # 2.44.2. "relative" Attribute 3011 # 3012 # Specifies a relative reference from the URI in the target reference. 3013 # This value must include whatever leading character is needed to 3014 # create the relative reference; typically, this is "#" for HTML 3015 # documents. 3016 # 3017 # 2.44.3. "section" Attribute (Mandatory) 3018 # 3019 # Specifies a section of the target reference. If the reference is not 3020 # an RFC or Internet-Draft in the v3 format, it is an error. 3021 # 3022 # 2.44.4. "target" Attribute (Mandatory) 3023 # 3024 # The anchor of the reference for this element. If this value is not 3025 # an anchor to a <reference> or <referencegroup> element, it is an 3026 # error. If the reference at the target has no URI, it is an error. 3027 def render_relref(self, e, width, **kwargs): 3028 return self.render_xref(e, width, **kwargs) 3029 3030 # 2.45. <rfc> 3031 # 3032 # This is the root element of the xml2rfc vocabulary. 3033 # 3034 # Content model: 3035 # 3036 # In this order: 3037 # 3038 # 1. Optional <link> elements (Section 2.30) 3039 # 3040 # 2. One <front> element (Section 2.26) 3041 # 3042 # 3. One <middle> element (Section 2.31) 3043 # 3044 # 4. One optional <back> element (Section 2.8) 3045 def render_rfc(self, e, width, **kwargs): 3046 self.part = e.tag 3047 lines = [] 3048 for c in e.getchildren(): 3049 if c.tag in (etree.PI, etree.Comment): 3050 continue 3051 self.part = c.tag 3052 lines = self.ljoin(lines, c, width, **kwargs) 3053 return lines 3054 3055 # 2.45.1. "category" Attribute 3056 # 3057 # Deprecated; instead, use the "name" attribute in <seriesInfo>. 3058 # 3059 # 2.45.2. "consensus" Attribute 3060 # 3061 # Affects the generated boilerplate. Note that the values of "no" and 3062 # "yes" are deprecated and are replaced by "false" (the default) and 3063 # "true". 3064 # 3065 # See [RFC7841] for more information. 3066 # 3067 # Allowed values: 3068 # 3069 # o "no" 3070 # 3071 # o "yes" 3072 # 3073 # o "false" (default) 3074 # 3075 # o "true" 3076 # 3077 # 2.45.3. "docName" Attribute 3078 # 3079 # Deprecated; instead, use the "value" attribute in <seriesInfo>. 3080 # 3081 # 2.45.4. "indexInclude" Attribute 3082 # 3083 # Specifies whether or not a formatter is requested to include an index 3084 # in generated files. If the source file has no <iref> elements, an 3085 # index is never generated. This option is useful for generating 3086 # documents where the source document has <iref> elements but the 3087 # author no longer wants an index. 3088 # 3089 # Allowed values: 3090 # 3091 # o "true" (default) 3092 # 3093 # o "false" 3094 # 3095 # 2.45.5. "ipr" Attribute 3096 # 3097 # Represents the Intellectual Property status of the document. See 3098 # Appendix A.1 for details. 3099 # 3100 # 2.45.6. "iprExtract" Attribute 3101 # 3102 # Identifies a single section within the document for which extraction 3103 # "as is" is explicitly allowed (only relevant for historic values of 3104 # the "ipr" attribute). 3105 # 3106 # 2.45.7. "number" Attribute 3107 # 3108 # Deprecated; instead, use the "value" attribute in <seriesInfo>. 3109 # 3110 # 2.45.8. "obsoletes" Attribute 3111 # 3112 # A comma-separated list of RFC numbers or Internet-Draft names. 3113 # 3114 # The prep tool will parse the attribute value so that incorrect 3115 # references can be detected. 3116 # 3117 # 2.45.9. "prepTime" Attribute 3118 # 3119 # The date that the XML was processed by a prep tool. This is included 3120 # in the XML file just before it is saved to disk. The value is 3121 # formatted using the "date-time" format defined in Section 5.6 of 3122 # [RFC3339]. The "time-offset" should be "Z". 3123 # 3124 # 2.45.10. "seriesNo" Attribute 3125 # 3126 # Deprecated; instead, use the "value" attribute in <seriesInfo>. 3127 # 3128 # 2.45.11. "sortRefs" Attribute 3129 # 3130 # Specifies whether or not the prep tool will sort the references in 3131 # each reference section. 3132 # 3133 # Allowed values: 3134 # 3135 # o "true" 3136 # 3137 # o "false" (default) 3138 # 3139 # 2.45.12. "submissionType" Attribute 3140 # 3141 # The document stream, as described in [RFC7841]. (The RFC Series 3142 # Editor may change the list of allowed values in the future.) 3143 # 3144 # Allowed values: 3145 # 3146 # o "IETF" (default) 3147 # 3148 # o "IAB" 3149 # 3150 # o "IRTF" 3151 # 3152 # o "independent" 3153 # 3154 # 2.45.13. "symRefs" Attribute 3155 # 3156 # Specifies whether or not a formatter is requested to use symbolic 3157 # references (such as "[RFC2119]"). If the value for this is "false", 3158 # the references come out as numbers (such as "[3]"). 3159 # 3160 # Allowed values: 3161 # 3162 # o "true" (default) 3163 # 3164 # o "false" 3165 # 3166 # 2.45.14. "tocDepth" Attribute 3167 # 3168 # Specifies the number of levels of headings that a formatter is 3169 # requested to include in the table of contents; the default is "3". 3170 # 3171 # 2.45.15. "tocInclude" Attribute 3172 # 3173 # Specifies whether or not a formatter is requested to include a table 3174 # of contents in generated files. 3175 # 3176 # Allowed values: 3177 # 3178 # o "true" (default) 3179 # 3180 # o "false" 3181 # 3182 # 2.45.16. "updates" Attribute 3183 # 3184 # A comma-separated list of RFC numbers or Internet-Draft names. 3185 # 3186 # The prep tool will parse the attribute value so that incorrect 3187 # references can be detected. 3188 # 3189 # 2.45.17. "version" Attribute 3190 # 3191 # Specifies the version of xml2rfc syntax used in this document. The 3192 # only expected value (for now) is "3". 3193 3194 3195 # 2.46. <section> 3196 # 3197 # Represents a section (when inside a <middle> element) or an appendix 3198 # (when inside a <back> element). 3199 # 3200 # Subsections are created by nesting <section> elements inside 3201 # <section> elements. Sections are allowed to be empty. 3202 # 3203 # This element appears as a child element of <back> (Section 2.8), 3204 # <boilerplate> (Section 2.11), <middle> (Section 2.31), and <section> 3205 # (Section 2.46). 3206 # 3207 # Content model: 3208 # 3209 # In this order: 3210 # 3211 # 1. One optional <name> element (Section 2.32) 3212 # 3213 # 2. In any order: 3214 # 3215 # * <artwork> elements (Section 2.5) 3216 # 3217 # * <aside> elements (Section 2.6) 3218 # 3219 # * <blockquote> elements (Section 2.10) 3220 # 3221 # * <dl> elements (Section 2.20) 3222 # 3223 # * <figure> elements (Section 2.25) 3224 # 3225 # * <iref> elements (Section 2.27) 3226 # 3227 # * <ol> elements (Section 2.34) 3228 # 3229 # * <sourcecode> elements (Section 2.48) 3230 # 3231 # * <t> elements (Section 2.53) 3232 # 3233 # * <table> elements (Section 2.54) 3234 # 3235 # * <texttable> elements (Section 3.8) 3236 # 3237 # * <ul> elements (Section 2.63) 3238 # 3239 # 3. Optional <section> elements (Section 2.46) 3240 # 3241 # 2.46.1. "anchor" Attribute 3242 # 3243 # Document-wide unique identifier for this section. 3244 # 3245 # 2.46.2. "numbered" Attribute 3246 # 3247 # If set to "false", the formatter is requested to not display a 3248 # section number. The prep tool will verify that such a section is not 3249 # followed by a numbered section in this part of the document and will 3250 # verify that the section is a top-level section. 3251 # 3252 # Allowed values: 3253 # 3254 # o "true" (default) 3255 # 3256 # o "false" 3257 # 3258 # 2.46.3. "removeInRFC" Attribute 3259 # 3260 # If set to "true", this note is marked in the prep tool with text 3261 # indicating that it should be removed before the document is published 3262 # as an RFC. That text will be "This note is to be removed before 3263 # publishing as an RFC." 3264 # 3265 # Allowed values: 3266 # 3267 # o "true" 3268 # 3269 # o "false" (default) 3270 # 3271 # 2.46.4. "title" Attribute 3272 # 3273 # Deprecated. Use <name> instead. 3274 # 3275 # 2.46.5. "toc" Attribute 3276 # 3277 # Indicates to a formatter whether or not the section is to be included 3278 # in a table of contents, if such a table of contents is produced. 3279 # This only takes effect if the level of the section would have 3280 # appeared in the table of contents based on the "tocDepth" attribute 3281 # of the <rfc> element, and of course only if the table of contents is 3282 # being created based on the "tocInclude" attribute of the <rfc> 3283 # element. If this is set to "exclude", any section below this one 3284 # will be excluded as well. The "default" value indicates inclusion of 3285 # the section if it would be included by the tocDepth attribute of the 3286 # <rfc> element. 3287 # 3288 # Allowed values: 3289 # 3290 # o "include" 3291 # 3292 # o "exclude" 3293 # 3294 # o "default" (default) 3295 def render_section(self, e, width, **kwargs): 3296 kwargs['joiners'].update({ 3297 None: Joiner('\n\n', 3, 0, False, False), # default 3298 't': Joiner('\n\n', 3, 0, False, False), 3299 'name': Joiner(' ', 0, 0, False, False), 3300 'iref': Joiner(' ', 0, 0, False, False), 3301 'section': Joiner('\n\n', 0, 0, False, False), 3302 'artset': Joiner('\n\n', 0, 0, False, False), 3303 'artwork': Joiner('\n\n', 3, 0, False, True), 3304 'sourcecode': Joiner('\n\n', 3, 0, False, False), 3305 }) 3306 text = '' 3307 pn = e.get('pn', 'unknown-unknown') 3308 if e.get('numbered') == 'true': 3309 text = pn.split('-',1)[1].replace('-', ' ').title() +'.' 3310 if text.startswith('Appendix'): 3311 text = text.replace('.', ' ', 1) 3312 kwargs['joiners'].update({ 3313 'name': Joiner(' ', len(text)+2, 0, False, False), 3314 }) 3315 lines = [] 3316 name = e.find('name') 3317 if name != None: 3318 lines += mklines(self.tjoin(text, name, width, **kwargs), e) 3319 for c in e.getchildren(): 3320 if c.tag == 'name': 3321 continue 3322 lines = self.ljoin(lines, c, width, **kwargs) 3323 return lines 3324 3325 # 2.47. <seriesInfo> 3326 # 3327 # Specifies the document series in which this document appears, and 3328 # also specifies an identifier within that series. 3329 # 3330 # A processing tool determines whether it is working on an RFC or an 3331 # Internet-Draft by inspecting the "name" attribute of a <seriesInfo> 3332 # element inside the <front> element inside the <rfc> element, looking 3333 # for "RFC" or "Internet-Draft". (Specifying neither value in any of 3334 # the <seriesInfo> elements can be useful for producing other types of 3335 # documents but is out of scope for this specification.) 3336 # 3337 # It is invalid to have multiple <seriesInfo> elements inside the same 3338 # <front> element containing the same "name" value. Some combinations 3339 # of <seriesInfo> "name" attribute values make no sense, such as having 3340 # both <seriesInfo name="rfc"/> and <seriesInfo name="Internet-Draft"/> 3341 # in the same <front> element. 3342 # 3343 # This element appears as a child element of <front> (Section 2.26) and 3344 # <reference> (Section 2.40; deprecated in this context). 3345 # 3346 # Content model: this element does not have any contents. 3347 # 3348 # 2.47.1. "asciiName" Attribute 3349 # 3350 # The ASCII equivalent of the name field. 3351 # 3352 # 2.47.2. "asciiValue" Attribute 3353 # 3354 # The ASCII equivalent of the value field. 3355 # 3356 # 2.47.3. "name" Attribute (Mandatory) 3357 # 3358 # The name of the series. The currently known values are "RFC", 3359 # "Internet-Draft", and "DOI". The RFC Series Editor may change this 3360 # list in the future. 3361 # 3362 # Some of the values for "name" interact as follows: 3363 # 3364 # o If a <front> element contains a <seriesInfo> element with a name 3365 # of "Internet-Draft", it can also have at most one additional 3366 # <seriesInfo> element with a "status" attribute whose value is of 3367 # "standard", "full-standard", "bcp", "fyi", "informational", 3368 # "experimental", or "historic" to indicate the intended status of 3369 # this Internet-Draft, if it were to be later published as an RFC. 3370 # If such an additional <seriesInfo> element has one of those 3371 # statuses, the name needs to be "". 3372 # 3373 # o If a <front> element contains a <seriesInfo> element with a name 3374 # of "RFC", it can also have at most one additional <seriesInfo> 3375 # element with a "status" attribute whose value is of 3376 # "full-standard", "bcp", or "fyi" to indicate the current status of 3377 # this RFC. If such an additional <seriesInfo> element has one of 3378 # those statuses, the "value" attribute for that name needs to be 3379 # the number within that series. That <front> element might also 3380 # contain an additional <seriesInfo> element with the status of 3381 # "info", "exp", or "historic" and a name of "" to indicate the 3382 # status of the RFC. 3383 # 3384 # o A <front> element that has a <seriesInfo> element that has the 3385 # name "Internet-Draft" cannot also have a <seriesInfo> element that 3386 # has the name "RFC". 3387 # 3388 # o The <seriesInfo> element can contain the DOI for the referenced 3389 # document. This cannot be used when the <seriesInfo> element is an 3390 # eventual child element of an <rfc> element -- only as an eventual 3391 # child of a <reference> element. The "value" attribute should use 3392 # the form specified in [RFC7669]. 3393 # 3394 # 2.47.4. "status" Attribute 3395 # 3396 # The status of this document. The currently known values are 3397 # "standard", "informational", "experimental", "bcp", "fyi", and 3398 # "full-standard". The RFC Series Editor may change this list in the 3399 # future. 3400 # 3401 # 2.47.5. "stream" Attribute 3402 # 3403 # The stream (as described in [RFC7841]) that originated the document. 3404 # (The RFC Series Editor may change this list in the future.) 3405 # 3406 # Allowed values: 3407 # 3408 # o "IETF" (default) 3409 # 3410 # o "IAB" 3411 # 3412 # o "IRTF" 3413 # 3414 # o "independent" 3415 # 3416 # 2.47.6. "value" Attribute (Mandatory) 3417 # 3418 # The identifier within the series specified by the "name" attribute. 3419 # 3420 # For BCPs, FYIs, RFCs, and STDs, this is the number within the series. 3421 # For Internet-Drafts, it is the full draft name (ending with the 3422 # two-digit version number). For DOIs, the value is given, such as 3423 # "10.17487/rfc1149", as described in [RFC7669]. 3424 # 3425 # The name in the value should be the document name without any file 3426 # extension. For Internet-Drafts, the value for this attribute should 3427 # be "draft-ietf-somewg-someprotocol-07", not 3428 # "draft-ietf-somewg-someprotocol-07.txt". 3429 def render_seriesinfo(self, e, width, **kwargs): 3430 name = e.get('name') 3431 value = e.get('value') 3432 if name == 'Internet-Draft': 3433 return name + ', ' + value 3434 else: 3435 return name + '\u00A0' + value.replace('/', '/' + '\uE060') 3436 3437 # 2.48. <sourcecode> 3438 # 3439 # This element allows the inclusion of source code into the document. 3440 # 3441 # When rendered, source code is always shown in a monospace font. When 3442 # <sourcecode> is a child of <figure> or <section>, it provides full 3443 # control of horizontal whitespace and line breaks. When formatted, it 3444 # is indented relative to the left margin of the enclosing element. It 3445 # is thus useful for source code and formal languages (such as ABNF 3446 # [RFC5234] or the RNC notation used in this document). (When 3447 # <sourcecode> is a child of other elements, it flows with the text 3448 # that surrounds it.) Tab characters (U+0009) inside of this element 3449 # are prohibited. 3450 # 3451 # For artwork such as character-based art, diagrams of message layouts, 3452 # and so on, use the <artwork> element instead. 3453 # 3454 # Output formatters that do pagination should attempt to keep source 3455 # code on a single page. This is to prevent source code that is split 3456 # across pages from looking like two separate pieces of code. 3457 # 3458 # See Section 5 for a description of how to deal with issues of using 3459 # "&" and "<" characters in source code. 3460 # 3461 # This element appears as a child element of <blockquote> 3462 # (Section 2.10), <dd> (Section 2.18), <figure> (Section 2.25), <li> 3463 # (Section 2.29), <section> (Section 2.46), <td> (Section 2.56), and 3464 # <th> (Section 2.58). 3465 # 3466 # Content model: only text content. 3467 # 3468 # 2.48.1. "anchor" Attribute 3469 # 3470 # Document-wide unique identifier for this source code. 3471 # 3472 # 2.48.2. "name" Attribute 3473 # 3474 # A filename suitable for the contents (such as for extraction to a 3475 # local file). This attribute can be helpful for other kinds of tools 3476 # (such as automated syntax checkers, which work by extracting the 3477 # source code). Note that the "name" attribute does not need to be 3478 # unique for <artwork> elements in a document. If multiple 3479 # <sourcecode> elements have the same "name" attribute, a formatter 3480 # might assume that the elements are all fragments of a single file, 3481 # and such a formatter can collect those fragments for later 3482 # processing. 3483 # 3484 # 2.48.3. "src" Attribute 3485 # 3486 # The URI reference of a source file [RFC3986]. 3487 # 3488 # It is an error to have both a "src" attribute and content in the 3489 # <sourcecode> element. 3490 # 3491 # 2.48.4. "type" Attribute 3492 # 3493 # Specifies the type of the source code. The value of this attribute 3494 # is free text with certain values designated as preferred. 3495 # 3496 # The preferred values for <sourcecode> types are: 3497 # 3498 # o abnf 3499 # 3500 # o asn.1 3501 # 3502 # o bash 3503 # 3504 # o c++ 3505 # 3506 # o c 3507 # 3508 # o cbor 3509 # 3510 # o dtd 3511 # 3512 # o java 3513 # 3514 # o javascript 3515 # 3516 # o json 3517 # 3518 # o mib 3519 # 3520 # o perl 3521 # 3522 # o pseudocode 3523 # 3524 # o python 3525 # 3526 # o rnc 3527 # 3528 # o xml 3529 # 3530 # o yang 3531 # 3532 # The RFC Series Editor will maintain a complete list of the preferred 3533 # values on the RFC Editor web site, and that list is expected to be 3534 # updated over time. Thus, a consumer of v3 XML should not cause a 3535 # failure when it encounters an unexpected type or no type is 3536 # specified. 3537 def render_sourcecode(self, e, width, **kwargs): 3538 markers = e.get('markers') 3539 lines = self.render_artwork(e, width, **kwargs) 3540 if markers == 'true': 3541 text = '<CODE BEGINS>' 3542 file = e.get('name') 3543 marker_lines = [ Line(text, e) ] 3544 if file: 3545 filetext = 'file "%s"' % file 3546 if len(filetext) > (width - len(text)): 3547 marker_lines += [ Line(' ' + filetext, e) ] 3548 else: 3549 marker_lines = [ Line(text + ' ' + filetext, e) ] 3550 lines = marker_lines + lines + [ Line('<CODE ENDS>', e) ] 3551 return lines 3552 3553 3554 def render_stream(self, e, width, **kwargs): 3555 text = e.text 3556 return text 3557 3558 3559 # 2.49. <street> 3560 # 3561 # Provides a street address. 3562 # 3563 # This element appears as a child element of <postal> (Section 2.37). 3564 # 3565 # Content model: only text content. 3566 # 3567 # 2.49.1. "ascii" Attribute 3568 # 3569 # The ASCII equivalent of the street address. 3570 render_street = null_renderer # handled in render_address 3571 3572 # 2.50. <strong> 3573 # 3574 # Indicates text that is semantically strong. Text enclosed within 3575 # this element will be displayed as bold after processing. This 3576 # element can be combined with other character formatting elements, and 3577 # the formatting will be additive. 3578 def render_strong(self, e, width, **kwargs): 3579 text = '*%s*' % self.inner_text_renderer(e) 3580 text += e.tail or '' 3581 return text 3582 3583 3584 # 2.51. <sub> 3585 # 3586 # Causes the text to be displayed as subscript, approximately half a 3587 # letter-height lower than normal text. This element can be combined 3588 # with other character formatting elements, and the formatting will be 3589 # additive. 3590 def render_sub(self, e, width, **kwargs): 3591 text = '_(%s)' % self.inner_text_renderer(e) 3592 text += e.tail or '' 3593 return text 3594 3595 3596 # 2.52. <sup> 3597 # 3598 # Causes the text to be displayed as superscript, approximately half a 3599 # letter-height higher than normal text. This element can be combined 3600 # with other character formatting elements, and the formatting will be 3601 # additive. 3602 def render_sup(self, e, width, **kwargs): 3603 text = '^(%s)' % self.inner_text_renderer(e) 3604 text += e.tail or '' 3605 return text 3606 3607 3608 # 2.53. <t> 3609 # 3610 # Contains a paragraph of text. 3611 # 3612 # This element appears as a child element of <abstract> (Section 2.1), 3613 # <aside> (Section 2.6), <blockquote> (Section 2.10), <dd> 3614 # (Section 2.18), <li> (Section 2.29), <list> (Section 3.4), <note> 3615 # (Section 2.33), <section> (Section 2.46), <td> (Section 2.56), and 3616 # <th> (Section 2.58). 3617 # 3618 # Content model: 3619 # 3620 # In any order: 3621 # 3622 # o Text 3623 # 3624 # o <bcp14> elements (Section 2.9) 3625 # 3626 # o <cref> elements (Section 2.16) 3627 # 3628 # o <em> elements (Section 2.22) 3629 # 3630 # o <eref> elements (Section 2.24) 3631 # 3632 # o <iref> elements (Section 2.27) 3633 # 3634 # o <list> elements (Section 3.4) 3635 # 3636 # o <relref> elements (Section 2.44) 3637 # 3638 # o <spanx> elements (Section 3.7) 3639 # 3640 # o <strong> elements (Section 2.50) 3641 # 3642 # o <sub> elements (Section 2.51) 3643 # 3644 # o <sup> elements (Section 2.52) 3645 # 3646 # o <tt> elements (Section 2.62) 3647 # 3648 # o <vspace> elements (Section 3.10) 3649 # 3650 # o <xref> elements (Section 2.66) 3651 # 3652 # 2.53.1. "anchor" Attribute 3653 # 3654 # Document-wide unique identifier for this paragraph. 3655 # 3656 # 2.53.2. "hangText" Attribute 3657 # 3658 # Deprecated. Instead, use <dd> inside of a definition list (<dl>). 3659 # 3660 # 2.53.3. "keepWithNext" Attribute 3661 # 3662 # Acts as a hint to the output formatters that do pagination to do a 3663 # best-effort attempt to keep the paragraph with the next element, 3664 # whatever that happens to be. For example, the HTML output @media 3665 # print CSS ("CSS" refers to Cascading Style Sheets) might translate 3666 # this to page-break-after: avoid. For PDF, the paginator could 3667 # attempt to keep the paragraph with the next element. Note: this 3668 # attribute is strictly a hint and not always actionable. 3669 # 3670 # Allowed values: 3671 # 3672 # o "false" (default) 3673 # 3674 # o "true" 3675 # 3676 # 2.53.4. "keepWithPrevious" Attribute 3677 # 3678 # Acts as a hint to the output formatters that do pagination to do a 3679 # best-effort attempt to keep the paragraph with the previous element, 3680 # whatever that happens to be. For example, the HTML output @media 3681 # print CSS might translate this to page-break-before: avoid. For PDF, 3682 # the paginator could attempt to keep the paragraph with the previous 3683 # element. Note: this attribute is strictly a hint and not always 3684 # actionable. 3685 # 3686 # Allowed values: 3687 # 3688 # o "false" (default) 3689 # 3690 # o "true" 3691 def render_t(self, e, width, **kwargs): 3692 def rreplace(s, old, new, max): 3693 lst = s.rsplit(old, max) 3694 return new.join(lst) 3695 indent = e.get('indent', None) or '0' 3696 if indent: 3697 kwargs['indent'] = int(indent) 3698 text = self.inner_text_renderer(e) 3699 if kwargs.pop('fill', True): 3700 text = fill(text, width=width, **kwargs) 3701 lines = mklines(text, e) 3702 else: 3703 if isinstance(text, six.binary_type): 3704 text = text.decode('utf-8') 3705 lines = [ Line(text, e) ] 3706 return lines 3707 3708 3709 # 2.54. <table> 3710 # 3711 # Contains a table with a caption with the table number. If the 3712 # element contains a <name> element, the caption will also show that 3713 # name. 3714 # 3715 # Inside the <table> element is, optionally, a <thead> element to 3716 # contain the rows that will be the table's heading and, optionally, a 3717 # <tfoot> element to contain the rows of the table's footer. If the 3718 # XML is converted to a representation that has page breaks (such as 3719 # PDFs or printed HTML), the header and footer are meant to appear on 3720 # each page. 3721 # 3722 # This element appears as a child element of <aside> (Section 2.6) and 3723 # <section> (Section 2.46). 3724 # 3725 # Content model: 3726 # 3727 # In this order: 3728 # 3729 # 1. One optional <name> element (Section 2.32) 3730 # 3731 # 2. Optional <iref> elements (Section 2.27) 3732 # 3733 # 3. One optional <thead> element (Section 2.59) 3734 # 3735 # 4. One or more <tbody> elements (Section 2.55) 3736 # 3737 # 5. One optional <tfoot> element (Section 2.57) 3738 # 3739 # 2.54.1. "anchor" Attribute 3740 # 3741 # Document-wide unique identifier for this table. 3742 def build_table(self, e, width, **kwargs): 3743 # variations on border characters for table styles 3744 style = self.get_relevant_pi(e, 'table_borders') or self.options.table_borders 3745 bchar_sets = { 3746 'full': { '=': '=', 3747 '-': '-', 3748 '+': '+', 3749 '|': '|',}, 3750 'light':{ '=': '-', 3751 '-': None, 3752 '+': '+', 3753 '|': '|',}, 3754 'min': { '=': '-', 3755 '-': None, 3756 '+': ' ', 3757 '|': ' ',}, 3758 } 3759 bchar_sets['minimal'] = bchar_sets['min'] 3760 bchar = bchar_sets[style] 3761 class Cell(object): 3762 type = b'None' 3763 text = None 3764 wrapped = [] 3765 colspan = 1 3766 rowspan = 1 3767 width = None 3768 minwidth= None 3769 height = None 3770 element = None 3771 padding = 0 3772 foldable= True 3773 top = '' 3774 bot = '' 3775 3776 def show(cells, attr='', note=''): 3777 debug.say('') 3778 debug.say('%s %s:' % (attr, note)) 3779 for i in range(len(cells)): 3780 row = [ (c.type[1], getattr(c, attr)) if attr else c for c in cells[i] ] 3781 debug.say(str(row).replace('\u2028', '\u00a4')) 3782 3783 def array(rows, cols, init): 3784 a = [] 3785 for i in range(rows): 3786 a.append([]) 3787 for j in range(cols): 3788 if inspect.isclass(init): 3789 a[i].append(init()) 3790 else: 3791 a[i].append(init) 3792 return a 3793 3794 def intattr(e, name): 3795 attr = e.get(name) 3796 if attr.isdigit(): 3797 attr = int(attr) 3798 else: 3799 attr = 1 3800 return attr 3801 3802 def get_dimensions(e): 3803 cols = 0 3804 rows = 0 3805 # Find the dimensions of the table 3806 for p in e.iterchildren(['thead', 'tbody', 'tfoot']): 3807 for r in p.iterchildren('tr'): 3808 ccols = 0 3809 crows = 0 3810 extrarows = 0 3811 for c in r.iterchildren('td', 'th'): 3812 colspan = intattr(c, 'colspan') 3813 ccols += colspan 3814 rowspan = intattr(c, 'rowspan') 3815 crows = max(crows, rowspan) 3816 cols = max(cols, ccols) 3817 extrarows = max(extrarows, crows) 3818 extrarows -=1 3819 rows += 1 3820 if extrarows > 0: 3821 rows += extrarows 3822 return rows, cols 3823 3824 def justify(cell, line, minpad): 3825 align = cell.element.get('align') 3826 if align == 'center': 3827 padding = 0 3828 width = cell.colwidth 3829 else: 3830 padding = cell.colwidth - textwidth(line) 3831 width = cell.colwidth - min(2, padding) 3832 if align == 'left': 3833 text = line.ljust(width) 3834 elif align == 'center': 3835 text = line.center(width) 3836 elif align == 'right': 3837 text = line.rjust(width) 3838 if align == 'right': 3839 if padding > 1: 3840 text = text + ' ' if minpad > 1 else ' ' + text 3841 if padding > 0: 3842 text = ' ' + text 3843 elif align == 'left': 3844 if padding > 1: 3845 text = ' ' + text if minpad > 1 else text + ' ' 3846 if padding > 0: 3847 text = text + ' ' 3848 else: 3849 pass 3850 return text 3851 3852 def merge_border(c, d): 3853 border = { 3854 '=': { '=':'=', '-':'=', '+':'+', }, 3855 '-': { '=':'=', '-':'-', '+':'+', }, 3856 '+': { '=':'+', '-':'+', '+':'+', '|':'+', }, 3857 '|': { '+':'+', '|':'|', }, 3858 } 3859 if c in border and d in border[c]: 3860 return border[c][d] 3861 return c 3862 3863 def build_line(cells, i, cols, next=True): 3864 def table(e): 3865 return list(e.iterancestors('table'))[0] 3866 line = '' 3867 e = cells[i][0].element 3868 for j in range(cols): 3869 k, l = cells[i][j].origin 3870 # skip colspan cells 3871 if k==i and l<j: 3872 continue 3873 cell = cells[k][l] 3874 part = cell.wrapped[cell.m] 3875 if next: 3876 cell.m += 1 3877 if line: 3878 if bchar['|']: 3879 line = line[:-1] + merge_border(line[-1], part[0]) + part[1:] 3880 else: 3881 line = line + part 3882 else: 3883 line = part 3884 return Line(line, table(e)) 3885 3886 def find_minwidths(e, cells, hyphen_split=False): 3887 """ 3888 Find the minimum column widths of regular cells 3889 """ 3890 i = 0 3891 splitter = utils.TextSplitter(width=67, hyphen_split=hyphen_split) 3892 for p in e.iterchildren(['thead', 'tbody', 'tfoot']): 3893 for r in list(p.iterchildren('tr')): 3894 j = 0 3895 for c in r.iterchildren('td', 'th'): 3896 # skip over cells belonging to an earlier row or column 3897 while j < len(cells[i]) and cells[i][j].element != c: 3898 j += 1 3899 # 3900 cell = cells[i][j] 3901 if cell.foldable: 3902 cell.text = cell.text.strip(stripspace) 3903 cell.minwidth = max([0]+[ len(word.strip(stripspace)) for word in splitter._split(cell.text) ]) if cell.text else 0 3904 else: 3905 cell.minwidth = max([0]+[ len(word.strip(stripspace)) for line in cell.text.splitlines() for word in splitter._split(line) ]) 3906 i += 1 3907 3908 def set_colwidths(cells, rows, cols): 3909 """ 3910 Compute the adjusted cell widths; the same for all rows of each column 3911 """ 3912 for j in range(cols): 3913 colmax = 0 3914 for i in range(rows): 3915 cell = cells[i][j] 3916 if cell.minwidth: 3917 cw = cell.minwidth // cell.colspan 3918 if cw > colmax: 3919 colmax = cw 3920 for i in range(rows): 3921 cells[i][j].colwidth = colmax 3922 3923 # ---------------------------------------------------------------------- 3924 rows, cols = get_dimensions(e) 3925 cells = array(rows, cols, Cell) 3926 3927 # ---------------------------------------------------------------------- 3928 # Iterate through tr and th/td elements, and annotate the cells array 3929 # with rowspan, colspan, and owning element and its origin 3930 i = 0 3931 for p in e.iterchildren(['thead', 'tbody', 'tfoot']): 3932 for r in list(p.iterchildren('tr')): 3933 j = 0 3934 for c in r.iterchildren('td', 'th'): 3935 # skip over cells belonging to an earlier row or column 3936 while j < len(cells[i]) and cells[i][j].element != None: 3937 j += 1 3938 # 3939 cell = cells[i][j] 3940 cell.colspan = intattr(c, 'colspan') 3941 cell.rowspan = intattr(c, 'rowspan') 3942 if len(c) == 1 and c[0].tag == 't': 3943 cell.text, cell.foldable = self.text_or_block_renderer(c[0], width, fill=False, **kwargs) or ('', True) 3944 else: 3945 cell.text, cell.foldable = self.text_or_block_renderer(c, width, fill=False, **kwargs) or ('', True) 3946 cell.text = mktextblock(cell.text) 3947 if cell.foldable: 3948 cell.text = cell.text.strip(stripspace) 3949 cell.minwidth = max([0]+[ len(word) for word in splitter._split(cell.text) ]) if cell.text else 0 3950 else: 3951 cell.minwidth = max([0]+[ len(word) for line in cell.text.splitlines() for word in splitter._split(line) ]) 3952 cell.type = p.tag 3953 if c.tag == 'th': 3954 cell.top = bchar['='] 3955 cell.bot = bchar['='] 3956 else: 3957 cell.top = bchar['-'] if not cell.top else cell.top 3958 cell.bot = bchar['-'] if not cell.bot else cell.bot 3959 for k in range(i, i+cell.rowspan): 3960 for l in range(j, j+cell.colspan): 3961 cells[k][l].element = c 3962 cells[k][l].origin = (i, j) 3963 i += 1 3964 # Ensure we have top and bottom borders 3965 for j in range(len(cells[0])): 3966 if hasattr(cells[0][j], 'origin'): 3967 k, l = cells[0][j].origin 3968 if not cells[k][l].top: 3969 cells[k][l].top = bchar['='] 3970 for j in range(len(cells[-1])): 3971 if hasattr(cells[-1][j], 'origin'): 3972 k, l = cells[-1][j].origin 3973 if not cells[k][l].bot: 3974 cells[k][l].bot = bchar['='] 3975 del k, l 3976 #show(cells, 'origin') 3977 3978 # ---------------------------------------------------------------------- 3979 # Find the minimum column widths of regular cells, and total width 3980 # per row. 3981 find_minwidths(e, cells, hyphen_split=self.options.table_hyphen_breaks) 3982 #show(cells, 'minwidth') 3983 #debug.pprint('totwidth') 3984 3985 # ---------------------------------------------------------------------- 3986 # Compute the adjusted cell widths; the same for all rows of each column 3987 set_colwidths(cells, rows, cols) 3988 reqwidth = sum([ c.colwidth for c in cells[0] ]) + cols + 1 3989 if reqwidth > width: 3990 # Try again, splitting cell content on hyphens this time 3991 find_minwidths(e, cells, hyphen_split=True) 3992 set_colwidths(cells, rows, cols) 3993 #show(cells, 'colwidth', 'after aligned cell widths') 3994 3995 # ---------------------------------------------------------------------- 3996 # Add padding if possible. Pad widest first. 3997 reqwidth = sum([ c.colwidth for c in cells[0] ]) + (cols + 1)*len(bchar['|']) 3998 if reqwidth > width: 3999 self.warn(e, "Total table width (%s) exceeds available width (%s)" % (reqwidth, width)) 4000 excess = width - reqwidth 4001 # 4002 if excess > 0: 4003 widths = [ (c.colwidth, ic) for ic, c in enumerate(cells[0]) ] 4004 widths.sort() 4005 widths.reverse() 4006 for j in [ k for w, k in widths ]: 4007 pad = min(2, excess) 4008 excess -= pad 4009 for i in range(rows): 4010 cells[i][j].colwidth += pad 4011 cells[i][j].padding = pad 4012 #show(cells, 'colwidth', 'after padding') 4013 4014 # ---------------------------------------------------------------------- 4015 # Set up initial cell.wrapped values 4016 for i in range(rows): 4017 for j in range(cols): 4018 cell = cells[i][j] 4019 if cell.text: 4020 if cell.foldable: 4021 cell.wrapped = fill(cell.text, width=cell.colwidth, fix_sentence_endings=True).splitlines() 4022 else: 4023 cell.wrapped = cell.text.splitlines() 4024 4025 # ---------------------------------------------------------------------- 4026 # Make columns wider, if possible 4027 while excess > 0: 4028 maxpos = (None, None) 4029 maxrows = 0 4030 for i in range(rows): 4031 for j in range(cols): 4032 cell = cells[i][j] 4033 if hasattr(cell, 'origin'): 4034 if cell.origin == (i,j): 4035 w = sum([ cells[i][k].colwidth for k in range(j, j+cell.colspan)])+ cell.colspan-1 - cell.padding 4036 r = cell.rowspan 4037 # this is simplified, and doesn't always account for the 4038 # extra line from the missing border line in a rowspan cell: 4039 if cell.text: 4040 if cell.foldable: 4041 cell.wrapped = fill(cell.text, width=w, fix_sentence_endings=True).splitlines() 4042 else: 4043 cell.wrapped = [ l.text for l in self.text_or_block_renderer(cell.element, width=w, fill=True, **kwargs)[0] ] 4044 cell.height = len(cell.wrapped) 4045 if maxrows < cell.height and cell.height > 1: 4046 maxrows = cell.height 4047 maxpos = (i, j) 4048 else: 4049 self.die(e, "Inconsistent table width: Found different row lengths in this table") 4050 4051 # calculate a better width for the cell with the largest number 4052 # of text rows 4053 if maxpos != (None, None): 4054 i, j = maxpos 4055 cell = cells[i][j] 4056 w = sum([ cells[i][k].colwidth for k in range(j, j+cell.colspan)])+ cell.colspan-1 - cell.padding 4057 r = cell.rowspan 4058 h = cell.height 4059 for l in range(1, excess+1): 4060 lines = fill(cell.text, width=w+l, fix_sentence_endings=True).splitlines() 4061 if len(lines) < h: 4062 cell.height = lines 4063 excess -= l 4064 c = h//r 4065 for k in range(rows): 4066 cells[k][j].colwidth += l 4067 break 4068 else: 4069 break 4070 else: 4071 break 4072 4073 #show(cells, 'colwidth', 'after widening wide cells and re-wrapping lines') 4074 #show(cells, 'height') 4075 #show(cells, 'origin') 4076 4077 # ---------------------------------------------------------------------- 4078 # Normalize cell height and lines lists 4079 #show(cells, 'wrapped', 'before height normalization') 4080 #show(cells, 'rowspan', 'before height normalization') 4081 for i in range(rows): 4082 minspan = sys.maxsize 4083 for j in range(cols): 4084 cell = cells[i][j] 4085 k, l = cell.origin 4086 hspan = cell.rowspan+k-i if cell.rowspan else minspan 4087 if hspan > 0 and hspan < minspan: 4088 minspan = hspan 4089 maxlines = 0 4090 for j in range(cols): 4091 cell = cells[i][j] 4092 k, l = cell.origin 4093 hspan = cell.rowspan+k-i if cell.rowspan else minspan 4094 lines = len(cell.wrapped) if cell.wrapped else 0 4095 if hspan == minspan and lines > maxlines: 4096 maxlines = lines 4097 for j in range(cols): 4098 cells[i][j].lines = maxlines 4099 4100 # ---------------------------------------------------------------------- 4101 # Calculate total height for rowspan cells 4102 for i in range(rows): 4103 for j in range(cols): 4104 cells[i][j].m = None 4105 cells[i][j].height = None 4106 k, l = cells[i][j].origin 4107 cell = cells[k][l] 4108 if cell.m is None: 4109 cell.m = 0 4110 cell.height = sum([ cells[n][l].lines for n in range(k, k+cell.rowspan)]) + cell.rowspan-1 4111 4112 # ---------------------------------------------------------------------- 4113 # Calculate total width for colspan cells 4114 for i in range(rows): 4115 for j in range(cols): 4116 k, l = cells[i][j].origin 4117 cell = cells[k][l] 4118 if cell.origin == (i,j): 4119 cell.colwidth = sum([ cells[i][n].colwidth for n in range(j, j+cell.colspan)]) + cell.colspan-1 4120 4121 # ---------------------------------------------------------------------- 4122 # Calculate minimum padding per table column 4123 minpad = [width,]*cols 4124 for i in range(rows): 4125 for j in range(cols): 4126 cell = cells[i][j] 4127 if cell.origin == (i, j): 4128 padding = min([width] + [(cell.colwidth - textwidth(line)) for line in cell.wrapped]) 4129 if padding < minpad[j]: 4130 minpad[j] = padding 4131 4132 # ---------------------------------------------------------------------- 4133 # Add cell borders 4134 x = bchar['+'] 4135 l = bchar['|'] 4136 for i in range(rows): 4137 for j in range(cols): 4138 cell = cells[i][j] 4139 if cell.origin == (i, j): 4140 wrapped = (cell.wrapped + ['']*cell.height)[:cell.height] 4141 lines = ( ([ x + cell.top*cell.colwidth + x ] if cell.top else []) 4142 + ([ l + justify(cell, line, minpad[j]) + l for line in wrapped ]) 4143 + ([ x + cell.bot*cell.colwidth + x ] if cell.bot else []) ) 4144 cell.wrapped = lines 4145 4146 #show(cells, 'lines', 'before assembly') 4147 # ---------------------------------------------------------------------- 4148 # Emit combined cell content, line by line 4149 lines = [] 4150 prev_bottom_border_line = None 4151 for i in range(rows): 4152 # For each table row, render the top cell border (if any) and content. The bottom 4153 # border will be merged with the next row's top border when processing that row. 4154 has_top_border = any( c.top for c in cells[i] if c.wrapped) 4155 has_bot_border = any( c.bot for c in cells[i] if c.wrapped) 4156 for n in range(min(len(c.wrapped) for c in cells[i] if c.wrapped)-int(has_bot_border) ): 4157 line = build_line(cells, i, cols) 4158 lines.append(line) 4159 if prev_bottom_border_line: 4160 if has_top_border: 4161 line = lines[-1] 4162 lines[-1] = Line(''.join(merge_border(prev_bottom_border_line.text[c], line.text[c]) for c in range(len(line.text))), line.elem) 4163 else: 4164 line = lines[-1] 4165 lines[-1] = prev_bottom_border_line 4166 lines.append(line) 4167 prev_bottom_border_line = None 4168 # Get the next line, which will contain the bottom border for completed cells, 4169 # without incrementing the line count (we might have rowspan cells which might 4170 # not have been completely consumed yet): 4171 prev_bottom_border_line = build_line(cells, i, cols, next=False) if has_bot_border else None 4172 lines.append(prev_bottom_border_line) 4173 return lines 4174 4175 def render_table(self, e, width, **kwargs): 4176 kwargs['joiners'].update({ 4177 'name': Joiner(': ', 0, 0, False, False), 4178 'dl': Joiner('\n\n', 0, 0, False, False), 4179 'ol': Joiner('\n\n', 0, 0, False, False), 4180 't': Joiner('\n\n', 0, 0, False, False), 4181 'ul': Joiner('\n\n', 0, 0, False, False), 4182 }) 4183 # 4184 pn = e.get('pn') 4185 num = pn.split('-')[1].capitalize() 4186 children = e.getchildren() 4187 title = "Table %s" % (num, ) 4188 if len(children) and children[0].tag == 'name': 4189 name = children[0] 4190 children = children[1:] 4191 title = self.tjoin(title, name, width, **kwargs) 4192 lines = self.build_table(e, width, **kwargs) 4193 table_width = min([ width, max( len(l.text) for l in lines ) ]) 4194 min_title_width = min([ 26, len(title) ]) 4195 if table_width < min_title_width: 4196 table_width = min_title_width 4197 lines = align(lines, 'center', table_width) 4198 title = '\n'+center(title, table_width).rstrip(stripspace) 4199 lines += mklines(title, e) 4200 lines = align(lines, e.get('align', 'center'), width) 4201 return lines 4202 4203 4204 # 2.55. <tbody> 4205 # 4206 # A container for a set of body rows for a table. 4207 # 4208 # This element appears as a child element of <table> (Section 2.54). 4209 # 4210 # Content model: 4211 # 4212 # One or more <tr> elements (Section 2.61) 4213 # 4214 # 2.55.1. "anchor" Attribute 4215 # 4216 # Document-wide unique identifier for the tbody. 4217 render_tbody = null_renderer # handled in build_table 4218 4219 # 2.56. <td> 4220 # 4221 # A cell in a table row. 4222 # 4223 # This element appears as a child element of <tr> (Section 2.61). 4224 # 4225 # Content model: 4226 # 4227 # Either: 4228 # 4229 # In any order, but at least one of: 4230 # 4231 # * <artwork> elements (Section 2.5) 4232 # 4233 # * <dl> elements (Section 2.20) 4234 # 4235 # * <figure> elements (Section 2.25) 4236 # 4237 # * <ol> elements (Section 2.34) 4238 # 4239 # * <sourcecode> elements (Section 2.48) 4240 # 4241 # * <t> elements (Section 2.53) 4242 # 4243 # * <ul> elements (Section 2.63) 4244 # 4245 # Or: 4246 # 4247 # In any order: 4248 # 4249 # * Text 4250 # 4251 # * <bcp14> elements (Section 2.9) 4252 # 4253 # * <br> elements (Section 2.12) 4254 # 4255 # * <cref> elements (Section 2.16) 4256 # 4257 # * <em> elements (Section 2.22) 4258 # 4259 # * <eref> elements (Section 2.24) 4260 # 4261 # * <iref> elements (Section 2.27) 4262 # 4263 # * <relref> elements (Section 2.44) 4264 # 4265 # * <strong> elements (Section 2.50) 4266 # 4267 # * <sub> elements (Section 2.51) 4268 # 4269 # * <sup> elements (Section 2.52) 4270 # 4271 # * <tt> elements (Section 2.62) 4272 # 4273 # * <xref> elements (Section 2.66) 4274 # 4275 # 2.56.1. "align" Attribute 4276 # 4277 # Controls whether the content of the cell appears left justified 4278 # (default), centered, or right justified. Note that "center" or 4279 # "right" will probably only work well in cells with plain text; any 4280 # other elements might make the contents render badly. 4281 # 4282 # Allowed values: 4283 # 4284 # o "left" (default) 4285 # 4286 # o "center" 4287 # 4288 # o "right" 4289 # 4290 # 2.56.2. "anchor" Attribute 4291 # 4292 # Document-wide unique identifier for the cell. 4293 # 4294 # 2.56.3. "colspan" Attribute 4295 # 4296 # The number of columns that the cell is to span. For example, setting 4297 # "colspan='3'" indicates that the cell occupies the same horizontal 4298 # space as three cells of a row without any "colspan" attributes. 4299 # 4300 # 2.56.4. "rowspan" Attribute 4301 # 4302 # The number of rows that the cell is to span. For example, setting 4303 # "rowspan='3'" indicates that the cell occupies the same vertical 4304 # space as three rows. 4305 render_td = null_renderer # handled in build_table 4306 4307 4308 # 2.57. <tfoot> 4309 # 4310 # A container for a set of footer rows for a table. 4311 # 4312 # This element appears as a child element of <table> (Section 2.54). 4313 # 4314 # Content model: 4315 # 4316 # One or more <tr> elements (Section 2.61) 4317 # 4318 # 2.57.1. "anchor" Attribute 4319 # 4320 # Document-wide unique identifier for the tfoot. 4321 render_tfoot = null_renderer # handled in build_table 4322 4323 4324 # 2.58. <th> 4325 # 4326 # A cell in a table row. When rendered, this will normally come out in 4327 # boldface; other than that, there is no difference between this and 4328 # the <td> element. 4329 # 4330 # This element appears as a child element of <tr> (Section 2.61). 4331 # 4332 # Content model: 4333 # 4334 # Either: 4335 # 4336 # In any order, but at least one of: 4337 # 4338 # * <artwork> elements (Section 2.5) 4339 # 4340 # * <dl> elements (Section 2.20) 4341 # 4342 # * <figure> elements (Section 2.25) 4343 # 4344 # * <ol> elements (Section 2.34) 4345 # 4346 # * <sourcecode> elements (Section 2.48) 4347 # 4348 # * <t> elements (Section 2.53) 4349 # 4350 # * <ul> elements (Section 2.63) 4351 # 4352 # Or: 4353 # 4354 # In any order: 4355 # 4356 # * Text 4357 # 4358 # * <bcp14> elements (Section 2.9) 4359 # 4360 # * <br> elements (Section 2.12) 4361 # 4362 # * <cref> elements (Section 2.16) 4363 # 4364 # * <em> elements (Section 2.22) 4365 # 4366 # * <eref> elements (Section 2.24) 4367 # 4368 # * <iref> elements (Section 2.27) 4369 # 4370 # * <relref> elements (Section 2.44) 4371 # 4372 # * <strong> elements (Section 2.50) 4373 # 4374 # * <sub> elements (Section 2.51) 4375 # 4376 # * <sup> elements (Section 2.52) 4377 # 4378 # * <tt> elements (Section 2.62) 4379 # 4380 # * <xref> elements (Section 2.66) 4381 # 4382 # 2.58.1. "align" Attribute 4383 # 4384 # Controls whether the content of the cell appears left justified 4385 # (default), centered, or right justified. Note that "center" or 4386 # "right" will probably only work well in cells with plain text; any 4387 # other elements might make the contents render badly. 4388 # 4389 # Allowed values: 4390 # 4391 # o "left" (default) 4392 # 4393 # o "center" 4394 # 4395 # o "right" 4396 # 4397 # 2.58.2. "anchor" Attribute 4398 # 4399 # Document-wide unique identifier for the row. 4400 # 4401 # 2.58.3. "colspan" Attribute 4402 # 4403 # The number of columns that the cell is to span. For example, setting 4404 # "colspan='3'" indicates that the cell occupies the same horizontal 4405 # space as three cells of a row without any "colspan" attributes. 4406 # 4407 # 2.58.4. "rowspan" Attribute 4408 # 4409 # The number of rows that the cell is to span. For example, setting 4410 # "rowspan='3'" indicates that the cell occupies the same vertical 4411 # space as three rows. 4412 render_th = null_renderer # handled in build_table 4413 4414 4415 # 2.59. <thead> 4416 # 4417 # A container for a set of header rows for a table. 4418 # 4419 # This element appears as a child element of <table> (Section 2.54). 4420 # 4421 # Content model: 4422 # 4423 # One or more <tr> elements (Section 2.61) 4424 # 4425 # 2.59.1. "anchor" Attribute 4426 # 4427 # Document-wide unique identifier for the thead. 4428 render_thead = null_renderer # handled in build_table 4429 4430 4431 # 2.60. <title> 4432 # 4433 # Represents the document title. 4434 # 4435 # When this element appears in the <front> element of the current 4436 # document, the title might also appear in page headers or footers. If 4437 # it is long (~40 characters), the "abbrev" attribute can be used to 4438 # specify an abbreviated variant. 4439 # 4440 # This element appears as a child element of <front> (Section 2.26). 4441 # 4442 # Content model: only text content. 4443 def render_title(self, e, width, **kwargs): 4444 r = e.getparent().getparent() # <reference> 4445 title = clean_text(' '.join(e.itertext()).strip(stripspace)) 4446 quote_title = r.get('quoteTitle') 4447 if quote_title == 'true': 4448 title = '"%s"' % title 4449 return title 4450 4451 def render_title_front(self, e, width, **kwargs): 4452 pp = e.getparent().getparent() 4453 title = '\u2028'.join(e.itertext()).strip(stripspace) 4454 title = fill(title, width=width, **kwargs) 4455 title = center(title, width) 4456 if self.options.rfc: 4457 return title 4458 else: 4459 if pp.tag == 'rfc': 4460 doc_name = self.root.get('docName') 4461 if doc_name: 4462 title += '\n'+doc_name.strip(stripspace).center(width).rstrip(stripspace) 4463 return title 4464 4465 # 2.60.1. "abbrev" Attribute 4466 # 4467 # Specifies an abbreviated variant of the document title. 4468 # 4469 # 2.60.2. "ascii" Attribute 4470 # 4471 # The ASCII equivalent of the title. 4472 4473 4474 # 2.61. <tr> 4475 # 4476 # A row of a table. 4477 # 4478 # This element appears as a child element of <tbody> (Section 2.55), 4479 # <tfoot> (Section 2.57), and <thead> (Section 2.59). 4480 # 4481 # Content model: 4482 # 4483 # In any order, but at least one of: 4484 # 4485 # o <td> elements (Section 2.56) 4486 # 4487 # o <th> elements (Section 2.58) 4488 # 4489 # 2.61.1. "anchor" Attribute 4490 # 4491 # Document-wide unique identifier for the row. 4492 render_tr = null_renderer # handled in build_table 4493 4494 # <toc> 4495 def render_toc(self, e, width, **kwargs): 4496 lines = [] 4497 for c in e.getchildren(): 4498 numbered = c.get('numbered') 4499 if not numbered == 'false': 4500 self.err(c, "Expected toc section to have numbered='false', but found '%s'" % (numbered, )) 4501 keep_url = True if self.options.rfc else False 4502 lines = self.ljoin(lines, c, width, keep_url=keep_url, **kwargs) 4503 return lines 4504 4505 4506 # 2.62. <tt> 4507 # 4508 # Causes the text to be displayed in a constant-width font. This 4509 # element can be combined with other character formatting elements, and 4510 # the formatting will be additive. 4511 def render_tt(self, e, width, **kwargs): 4512 p = e.getparent() 4513 render_plain = list(p.iterancestors('table')) and not utils.hastext(p, ignore=['tt']) 4514 if render_plain: 4515 text = '%s' % self.inner_text_renderer(e) 4516 else: 4517 text = '"%s"' % self.inner_text_renderer(e) 4518 text += e.tail or '' 4519 return text 4520 4521 4522 # 2.63. <ul> 4523 # 4524 # An unordered list. The labels on the items will be symbols picked by 4525 # the formatter. 4526 # 4527 # This element appears as a child element of <abstract> (Section 2.1), 4528 # <aside> (Section 2.6), <blockquote> (Section 2.10), <dd> 4529 # (Section 2.18), <li> (Section 2.29), <note> (Section 2.33), <section> 4530 # (Section 2.46), <td> (Section 2.56), and <th> (Section 2.58). 4531 # 4532 # Content model: 4533 # 4534 # One or more <li> elements (Section 2.29) 4535 # 4536 # 2.63.1. "anchor" Attribute 4537 # 4538 # Document-wide unique identifier for the list. 4539 # 4540 # 2.63.2. "empty" Attribute 4541 # 4542 # Defines whether or not the label is empty. empty="true" indicates 4543 # that no label will be shown. 4544 # 4545 # Allowed values: 4546 # 4547 # o "false" (default) 4548 # 4549 # o "true" 4550 # 4551 # 2.63.3. "spacing" Attribute 4552 # 4553 # Defines whether or not there is a blank line between entries. 4554 # spacing="normal" indicates a single blank line, while 4555 # spacing="compact" indicates no space between. 4556 # 4557 # Allowed values: 4558 # 4559 # o "normal" (default) 4560 # 4561 # o "compact" 4562 def render_ul(self, e, width, **kwargs): 4563 # setup and validation 4564 empty = e.get('empty') == 'true' 4565 e._bare = empty and e.get('bare') == 'true' 4566 e._initial_text = self.get_ul_li_initial_text 4567 # 4568 compact = e.get('spacing') == 'compact' 4569 ljoin = '\n' if compact else '\n\n' 4570 # 4571 depth = len([ a for a in e.iterancestors(e.tag) ]) 4572 symbols = self.options.list_symbols 4573 e._symbol = ' ' if empty else symbols[depth%len(symbols)] 4574 4575 # 4576 indent = len(e._symbol)+2 4577 if e._bare: 4578 first = mktextblock(self.render(e[-1], width, **kwargs)) 4579 if first: 4580 indent = min(8, len(first.split()[0])+2) 4581 padding = indent 4582 indent = int( e.get('indent') or indent ) 4583 hang = max(padding, indent) - indent 4584 e._padding = indent 4585 # 4586 kwargs['joiners'].update({ 4587 None: Joiner(ljoin, indent, 0, False, False), 4588 'li': Joiner(ljoin, 0, 0, False, False), 4589 't': Joiner(ljoin, indent, hang, False, False), 4590 }) 4591 # rendering 4592 lines = [] 4593 for c in e.getchildren(): 4594 lines = self.ljoin(lines, c, width, **kwargs) 4595 return lines 4596 4597 4598 def render_u(self, e, width, **kwargs): 4599 try: 4600 text = expand_unicode_element(e) 4601 except (RuntimeError, ValueError) as exception: 4602 text = '' 4603 self.err(e, exception) 4604 anchor = e.get('anchor') 4605 xref = self.root.find('.//xref[@target="%s"]'%anchor) if anchor else None 4606 if xref != None: 4607 # render only literal here 4608 text = e.text 4609 text += e.tail or '' 4610 return text 4611 4612 # 2.64. <uri> 4613 # 4614 # Contains a web address associated with the author. 4615 # 4616 # The contents should be a valid URI; this most likely will be an 4617 # "http:" or "https:" URI. 4618 # 4619 # This element appears as a child element of <address> (Section 2.2). 4620 # 4621 # Content model: only text content. 4622 def render_uri(self, e, width, **kwargs): 4623 latin = kwargs.pop('latin', None) 4624 text = fill("URI:\u00a0\u00a0 %s"%e.text, width=width, **kwargs) if e.text and latin!=False else '' 4625 return text 4626 4627 # 2.65. <workgroup> 4628 # 4629 # This element is used to specify the Working Group (IETF) or Research 4630 # Group (IRTF) from which the document originates, if any. The 4631 # recommended format is the official name of the Working Group (with 4632 # some capitalization). 4633 # 4634 # In Internet-Drafts, this is used in the upper left corner of the 4635 # boilerplate, replacing the "Network Working Group" string. 4636 # Formatting software can append the words "Working Group" or "Research 4637 # Group", depending on the "submissionType" property of the <rfc> 4638 # element (Section 2.45.12). 4639 # 4640 # This element appears as a child element of <front> (Section 2.26). 4641 # 4642 # Content model: only text content. 4643 4644 4645 # 2.66. <xref> 4646 # 4647 # A reference to an anchor in this document. Formatters that have 4648 # links (such as HTML and PDF) are likely to render <xref> elements as 4649 # internal hyperlinks. This element is useful for referring to 4650 # references in the "References" section, to specific sections of this 4651 # document, to specific figures, and so on. The "target" attribute is 4652 # required. 4653 # 4654 # This element appears as a child element of <annotation> 4655 # (Section 2.3), <blockquote> (Section 2.10), <c> (Section 3.1), <cref> 4656 # (Section 2.16), <dd> (Section 2.18), <dt> (Section 2.21), <em> 4657 # (Section 2.22), <li> (Section 2.29), <name> (Section 2.32), 4658 # <postamble> (Section 3.5), <preamble> (Section 3.6), <strong> 4659 # (Section 2.50), <sub> (Section 2.51), <sup> (Section 2.52), <t> 4660 # (Section 2.53), <td> (Section 2.56), <th> (Section 2.58), <tt> 4661 # (Section 2.62), and <ttcol> (Section 3.9). 4662 # 4663 # Content model: only text content. 4664 def render_xref(self, e, width, **kwargs): 4665 target = e.get('target') 4666 section = e.get('section') 4667 format = e.get('format') 4668 reftext = e.get('derivedContent').strip(stripspace) 4669 exptext = self.inner_text_renderer(e, width, **kwargs) 4670 if exptext: 4671 # for later string formatting convenience, a trailing space if any text: 4672 exptext += ' ' 4673 content = clean_text(''.join(list(e.itertext()))) 4674 if reftext is None: 4675 self.die(e, "Found an <xref> without derivedContent: %s" % (etree.tostring(e),)) 4676 # 4677 if not section: 4678 if reftext: 4679 if target in self.refname_mapping: 4680 if format == 'none': 4681 text = "%s" % exptext 4682 elif format == 'title': 4683 if content: 4684 text = '%s ("%s")' % (exptext, reftext.strip('"')) 4685 else: 4686 text = '"%s"' % reftext.strip('"') 4687 else: 4688 if content: 4689 text = "%s[%s]" % (exptext, reftext) 4690 else: 4691 text = "[%s]" % reftext 4692 else: 4693 if format == 'none': 4694 text = "%s" % exptext 4695 else: 4696 if content: 4697 text = "%s(%s)" % (exptext, reftext) 4698 else: 4699 text = "%s" % (exptext or reftext) 4700 else: 4701 text = exptext.strip(stripspace) 4702 pageno = e.get('pageno') 4703 if pageno and pageno.isdigit(): 4704 text += '\u2026' '%04d' % int(pageno) 4705 else: 4706 label = 'Section' if section[0].isdigit() else 'Appendix' if re.search(r'^[A-Z](\.|$)', section) else 'Part' 4707 sform = e.get('sectionFormat') 4708 4709 if sform == 'of': 4710 text = '%s %s of %s[%s]' % (label, section, exptext, reftext) 4711 elif sform == 'comma': 4712 text = '%s[%s], %s %s' % (exptext, reftext, label, section) 4713 elif sform == 'parens': 4714 text = '%s[%s] (%s %s)' % (exptext, reftext, label, section) 4715 elif sform == 'bare': 4716 if exptext and exptext != section: 4717 text = '%s (%s)' % (section, exptext.strip(stripspace)) 4718 else: 4719 text = '%s' % (section, ) 4720 else: 4721 self.err(e, 'Unexpected value combination: section: %s sectionFormat: %s' %(section, sform)) 4722 4723 # Prevent line breaking on dash 4724 text = text.replace('-', '\u2011') 4725 text += (e.tail or '') 4726 4727 return text 4728 4729 # 2.66.1. "format" Attribute 4730 # 4731 # This attribute signals to formatters what the desired format of the 4732 # reference should be. Formatters for document types that have linking 4733 # capability should wrap the displayed text in hyperlinks. 4734 # 4735 # "counter" 4736 # 4737 # The "derivedContent" attribute will contain just a counter. This 4738 # is used for targets that are <section>, <figure>, <table>, or 4739 # items in an ordered list. Using "format='counter'" where the 4740 # target is any other type of element is an error. 4741 # 4742 # For example, with an input of: 4743 # 4744 # <section anchor="overview">Protocol Overview</section> 4745 # . . . 4746 # See Section <xref target="overview" format="counter"/> 4747 # for an overview. 4748 # 4749 # An HTML formatter might generate: 4750 # 4751 # See Section <a href="#overview">1.7</a> for an overview. 4752 # 4753 # "default" 4754 # 4755 # If the element has no content, the "derivedContent" attribute will 4756 # contain a text fragment that describes the referenced part 4757 # completely, such as "XML" for a target that is a <reference>, or 4758 # "Section 2" or "Table 4" for a target to a non-reference. (If the 4759 # element has content, the "derivedContent" attribute is filled with 4760 # the content.) 4761 # 4762 # For example, with an input of: 4763 # 4764 # <section anchor="overview">Protocol Overview</section> 4765 # . . . 4766 # See <xref target="overview"/> for an overview. 4767 # 4768 # An HTML formatter might generate: 4769 # 4770 # See <a href="#overview">Section 1.7</a> for an overview. 4771 # 4772 # "none" 4773 # 4774 # Deprecated. 4775 # 4776 # "title" 4777 # 4778 # If the target is a <reference> element, the "derivedContent" 4779 # attribute will contain the name of the reference, extracted from 4780 # the <title> child of the <front> child of the reference. Or, if 4781 # the target element has a <name> child element, the 4782 # "derivedContent" attribute will contain the text content of that 4783 # <name> element concatenated with the text content of each 4784 # descendant node of <name> (that is, stripping out all of the XML 4785 # markup, leaving only the text). Or, if the target element does 4786 # not contain a <name> child element, the "derivedContent" attribute 4787 # will contain the name of the "anchor" attribute of that element 4788 # with no other adornment. 4789 # 4790 # Allowed values: 4791 # 4792 # o "default" (default) 4793 # 4794 # o "title" 4795 # 4796 # o "counter" 4797 # 4798 # o "none" 4799 # 4800 # 2.66.2. "pageno" Attribute 4801 # 4802 # Deprecated. 4803 # 4804 # Allowed values: 4805 # 4806 # o "true" 4807 # 4808 # o "false" (default) 4809 # 4810 # 2.66.3. "target" Attribute (Mandatory) 4811 # 4812 # Identifies the document component being referenced. The value needs 4813 # to match the value of the "anchor" attribute of an element in the 4814 # document; otherwise, it is an error. 4815 4816 # --- class variables ------------------------------------------------------ 4817 4818 element_tags = [ 4819 'abstract', 4820 'address', 4821 'annotation', 4822 'artset', 4823 'artwork', 4824 'aside', 4825 'author', 4826 'back', 4827 'bcp14', 4828 'blockquote', 4829 'boilerplate', 4830 'br', 4831 'city', 4832 'code', 4833 'country', 4834 'cref', 4835 'date', 4836 'dd', 4837 'displayreference', 4838 'dl', 4839 'dt', 4840 'em', 4841 'email', 4842 'eref', 4843 'figure', 4844 'front', 4845 'iref', 4846 'li', 4847 'link', 4848 'middle', 4849 'name', 4850 'note', 4851 'ol', 4852 'organization', 4853 'phone', 4854 'postal', 4855 'postalLine', 4856 'refcontent', 4857 'reference', 4858 'referencegroup', 4859 'references', 4860 'region', 4861 'relref', 4862 'rfc', 4863 'section', 4864 'seriesInfo', 4865 'sourcecode', 4866 'street', 4867 'strong', 4868 'sub', 4869 'sup', 4870 't', 4871 'table', 4872 'tbody', 4873 'td', 4874 'tfoot', 4875 'th', 4876 'thead', 4877 'title', 4878 'toc', 4879 'tr', 4880 'tt', 4881 'ul', 4882 'uri', 4883 'xref', 4884 ] 4885 deprecated_element_tags = [ 4886 'list', 4887 'spanx', 4888 'vspace', 4889 'c', 4890 'texttable', 4891 'ttcol', 4892 'facsimile', 4893 'format', 4894 'preamble', 4895 'postamble', 4896 ] 4897 unused_front_element_renderers = [ 4898 'area', 4899 'keyword', 4900 'workgroup', 4901 ] 4902 all_element_tags = element_tags + deprecated_element_tags + unused_front_element_renderers 4903 deprecated_attributes = [ 4904 # element, attrbute 4905 ('figure', 'align'), 4906 ('section', 'title'), 4907 ('note', 'title'), 4908 ('figure', 'title'), 4909 ('references', 'title'), 4910 ('texttable', 'title'), 4911 ('figure', 'src'), 4912 ('artwork', 'xml:space'), 4913 ('artwork', 'height'), 4914 ('artwork', 'width'), 4915 ('figure', 'height'), 4916 ('figure', 'width'), 4917 ('xref', 'pageno'), 4918 ] 4919